|
|
|
|
@ -0,0 +1,887 @@
|
|
|
|
|
{
|
|
|
|
|
"cells": [
|
|
|
|
|
{
|
|
|
|
|
"metadata": {
|
|
|
|
|
"ExecuteTime": {
|
|
|
|
|
"end_time": "2026-04-27T13:04:33.988621800Z",
|
|
|
|
|
"start_time": "2026-04-27T13:04:10.669090800Z"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"source": "!pip install pandas jieba",
|
|
|
|
|
"id": "6e2e7982cb88a612",
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"Requirement already satisfied: pandas in .\\.venv\\Lib\\site-packages (3.0.2)\n",
|
|
|
|
|
"Collecting jieba\n",
|
|
|
|
|
" Downloading jieba-0.42.1.tar.gz (19.2 MB)\n",
|
|
|
|
|
" ---------------------------------------- 0.0/19.2 MB ? eta -:--:--\n",
|
|
|
|
|
" ---------------------------------------- 0.0/19.2 MB ? eta -:--:--\n",
|
|
|
|
|
" -- ------------------------------------- 1.0/19.2 MB 5.0 MB/s eta 0:00:04\n",
|
|
|
|
|
" ----- ---------------------------------- 2.6/19.2 MB 6.6 MB/s eta 0:00:03\n",
|
|
|
|
|
" -------- ------------------------------- 3.9/19.2 MB 6.2 MB/s eta 0:00:03\n",
|
|
|
|
|
" ---------- ----------------------------- 5.0/19.2 MB 6.0 MB/s eta 0:00:03\n",
|
|
|
|
|
" ------------ --------------------------- 5.8/19.2 MB 5.5 MB/s eta 0:00:03\n",
|
|
|
|
|
" ------------- -------------------------- 6.6/19.2 MB 5.2 MB/s eta 0:00:03\n",
|
|
|
|
|
" --------------- ------------------------ 7.3/19.2 MB 5.1 MB/s eta 0:00:03\n",
|
|
|
|
|
" ----------------- ---------------------- 8.4/19.2 MB 4.9 MB/s eta 0:00:03\n",
|
|
|
|
|
" ------------------- -------------------- 9.2/19.2 MB 4.8 MB/s eta 0:00:03\n",
|
|
|
|
|
" -------------------- ------------------- 10.0/19.2 MB 4.6 MB/s eta 0:00:02\n",
|
|
|
|
|
" ---------------------- ----------------- 10.7/19.2 MB 4.6 MB/s eta 0:00:02\n",
|
|
|
|
|
" ------------------------ --------------- 11.5/19.2 MB 4.5 MB/s eta 0:00:02\n",
|
|
|
|
|
" ------------------------- -------------- 12.3/19.2 MB 4.5 MB/s eta 0:00:02\n",
|
|
|
|
|
" --------------------------- ------------ 13.1/19.2 MB 4.5 MB/s eta 0:00:02\n",
|
|
|
|
|
" ---------------------------- ----------- 13.6/19.2 MB 4.5 MB/s eta 0:00:02\n",
|
|
|
|
|
" ------------------------------ --------- 14.7/19.2 MB 4.3 MB/s eta 0:00:02\n",
|
|
|
|
|
" ------------------------------- -------- 15.2/19.2 MB 4.2 MB/s eta 0:00:01\n",
|
|
|
|
|
" -------------------------------- ------- 15.5/19.2 MB 4.1 MB/s eta 0:00:01\n",
|
|
|
|
|
" --------------------------------- ------ 16.0/19.2 MB 4.0 MB/s eta 0:00:01\n",
|
|
|
|
|
" --------------------------------- ------ 16.3/19.2 MB 3.8 MB/s eta 0:00:01\n",
|
|
|
|
|
" ---------------------------------- ----- 16.5/19.2 MB 3.7 MB/s eta 0:00:01\n",
|
|
|
|
|
" ---------------------------------- ----- 16.5/19.2 MB 3.7 MB/s eta 0:00:01\n",
|
|
|
|
|
" ---------------------------------- ----- 16.8/19.2 MB 3.5 MB/s eta 0:00:01\n",
|
|
|
|
|
" ----------------------------------- ---- 17.0/19.2 MB 3.3 MB/s eta 0:00:01\n",
|
|
|
|
|
" ----------------------------------- ---- 17.0/19.2 MB 3.3 MB/s eta 0:00:01\n",
|
|
|
|
|
" ------------------------------------ --- 17.3/19.2 MB 3.2 MB/s eta 0:00:01\n",
|
|
|
|
|
" ------------------------------------ --- 17.6/19.2 MB 3.1 MB/s eta 0:00:01\n",
|
|
|
|
|
" ------------------------------------ --- 17.6/19.2 MB 3.1 MB/s eta 0:00:01\n",
|
|
|
|
|
" ------------------------------------- -- 17.8/19.2 MB 3.0 MB/s eta 0:00:01\n",
|
|
|
|
|
" ------------------------------------- -- 18.1/19.2 MB 2.9 MB/s eta 0:00:01\n",
|
|
|
|
|
" -------------------------------------- - 18.4/19.2 MB 2.8 MB/s eta 0:00:01\n",
|
|
|
|
|
" -------------------------------------- - 18.6/19.2 MB 2.8 MB/s eta 0:00:01\n",
|
|
|
|
|
" --------------------------------------- 18.9/19.2 MB 2.7 MB/s eta 0:00:01\n",
|
|
|
|
|
" --------------------------------------- 19.1/19.2 MB 2.7 MB/s eta 0:00:01\n",
|
|
|
|
|
" ---------------------------------------- 19.2/19.2 MB 2.6 MB/s 0:00:07\n",
|
|
|
|
|
" Installing build dependencies: started\n",
|
|
|
|
|
" Installing build dependencies: finished with status 'done'\n",
|
|
|
|
|
" Getting requirements to build wheel: started\n",
|
|
|
|
|
" Getting requirements to build wheel: finished with status 'done'\n",
|
|
|
|
|
" Preparing metadata (pyproject.toml): started\n",
|
|
|
|
|
" Preparing metadata (pyproject.toml): finished with status 'done'\n",
|
|
|
|
|
"Requirement already satisfied: numpy>=1.26.0 in .\\.venv\\Lib\\site-packages (from pandas) (2.4.4)\n",
|
|
|
|
|
"Requirement already satisfied: python-dateutil>=2.8.2 in .\\.venv\\Lib\\site-packages (from pandas) (2.9.0.post0)\n",
|
|
|
|
|
"Requirement already satisfied: tzdata in .\\.venv\\Lib\\site-packages (from pandas) (2026.2)\n",
|
|
|
|
|
"Requirement already satisfied: six>=1.5 in .\\.venv\\Lib\\site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n",
|
|
|
|
|
"Building wheels for collected packages: jieba\n",
|
|
|
|
|
" Building wheel for jieba (pyproject.toml): started\n",
|
|
|
|
|
" Building wheel for jieba (pyproject.toml): finished with status 'done'\n",
|
|
|
|
|
" Created wheel for jieba: filename=jieba-0.42.1-py3-none-any.whl size=19314527 sha256=9c6c0b82134284c2022dd7832428b0c1355cf813225c5e3869e51e23ece4666b\n",
|
|
|
|
|
" Stored in directory: c:\\users\\administrator\\appdata\\local\\pip\\cache\\wheels\\ac\\60\\cf\\538a1f183409caf1fc136b5d2c2dee329001ef6da2c5084bef\n",
|
|
|
|
|
"Successfully built jieba\n",
|
|
|
|
|
"Installing collected packages: jieba\n",
|
|
|
|
|
"Successfully installed jieba-0.42.1\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "stderr",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"\n",
|
|
|
|
|
"[notice] A new release of pip is available: 26.0.1 -> 26.1\n",
|
|
|
|
|
"[notice] To update, run: python.exe -m pip install --upgrade pip\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"execution_count": 9
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"metadata": {
|
|
|
|
|
"ExecuteTime": {
|
|
|
|
|
"end_time": "2026-04-27T13:05:18.748847300Z",
|
|
|
|
|
"start_time": "2026-04-27T13:05:18.691833Z"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"source": [
|
|
|
|
|
"import pandas as pd\n",
|
|
|
|
|
"import ran\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"df = pd.read_csv(\"task-a-zh.tsv\", sep=\"\\t\", dtype=str)\n",
|
|
|
|
|
"COL_ID = \"id\"\n",
|
|
|
|
|
"COL_WORD1 = \"word1\"\n",
|
|
|
|
|
"COL_WORD2 = \"word2\"\n",
|
|
|
|
|
"COL_HEADLINE = \"headline\"\n",
|
|
|
|
|
"COL_RESULT = \"rule_based_joke\"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"WORD_TEMPLATES = [\n",
|
|
|
|
|
" \"千万不要用{w1}去碰{w2},不然你会收获一个这辈子都忘不掉的名场面。\",\n",
|
|
|
|
|
" \"我试着用{w1}处理{w2},结果场面一度失控到我想原地找个地缝钻进去。\",\n",
|
|
|
|
|
" \"谁能想到,用{w1}和{w2}组合在一起,居然能搞出这么离谱的事情。\",\n",
|
|
|
|
|
" \"朋友让我用{w1}去打理{w2},我做完之后他直接和我绝交了。\",\n",
|
|
|
|
|
" \"你知道用{w1}对{w2}做什么最可怕吗?是完全不受控制的意外。\",\n",
|
|
|
|
|
" \"第一次用{w1}操作{w2},直接给我整出了这辈子都不想再经历的社死现场。\"\n",
|
|
|
|
|
"]\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"NEWS_TEMPLATES = [\n",
|
|
|
|
|
" \"看完【{news}】这则新闻,我只能说这波操作属实是我万万没想到的。\",\n",
|
|
|
|
|
" \"本来以为【{news}】是个常规消息,结果看完细节直接给我整笑了。\",\n",
|
|
|
|
|
" \"【{news}】,这不就是现实版的大型反转现场吗?主打一个意想不到。\",\n",
|
|
|
|
|
" \"刷到【{news}】的新闻,我和朋友讨论了半天,只能说格局真的太大了。\",\n",
|
|
|
|
|
" \"看到【{news}】这则消息,我只能说高手过招,招招都在意料之外。\",\n",
|
|
|
|
|
" \"【{news}】,只能说现在的新闻真的比电视剧还精彩。\"\n",
|
|
|
|
|
"]\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"def generate_rule_joke(row):\n",
|
|
|
|
|
" word1 = str(row[COL_WORD1]).strip()\n",
|
|
|
|
|
" word2 = str(row[COL_WORD2]).strip()\n",
|
|
|
|
|
" headline = str(row[COL_HEADLINE]).strip()\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" if headline == \"-\" and word1 != \"-\" and word2 != \"-\":\n",
|
|
|
|
|
" template = random.choice(WORD_TEMPLATES)\n",
|
|
|
|
|
" return template.format(w1=word1, w2=word2)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" elif word1 == \"-\" and word2 == \"-\" and headline != \"-\":\n",
|
|
|
|
|
" template = random.choice(NEWS_TEMPLATES)\n",
|
|
|
|
|
" return template.format(news=headline)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" else:\n",
|
|
|
|
|
" return \"生成失败,数据格式异常\"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"df = df[(df.index >= 275) | (df.index.isin(range(0, 20)))]\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"print(\"开始批量生成规则基线结果...\")\n",
|
|
|
|
|
"df[COL_RESULT] = df.apply(generate_rule_joke, axis=1)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"df.to_csv(\"task_a_zh_rule_baseline_full.tsv\", sep=\"\\t\", index=False)\n",
|
|
|
|
|
"print(\"✅ 生成完成!结果已保存为 task_a_zh_rule_baseline_full.tsv\")"
|
|
|
|
|
],
|
|
|
|
|
"id": "38820c6edd5cde51",
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"开始批量生成规则基线结果...\n",
|
|
|
|
|
"✅ 生成完成!结果已保存为 task_a_zh_rule_baseline_full.tsv\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"execution_count": 10
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"metadata": {
|
|
|
|
|
"ExecuteTime": {
|
|
|
|
|
"end_time": "2026-04-27T13:54:55.240350200Z",
|
|
|
|
|
"start_time": "2026-04-27T13:54:54.187588500Z"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"source": [
|
|
|
|
|
"import jieba\n",
|
|
|
|
|
"from collections import defaultdict, Counter\n",
|
|
|
|
|
"import random\n",
|
|
|
|
|
"import pandas as pd\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"df = pd.read_csv(\"task-a-zh.tsv\", sep=\"\\t\", dtype=str)\n",
|
|
|
|
|
"# 固定列名\n",
|
|
|
|
|
"COL_ID = \"id\"\n",
|
|
|
|
|
"COL_W1 = \"word1\"\n",
|
|
|
|
|
"COL_W2 = \"word2\"\n",
|
|
|
|
|
"COL_HEAD = \"headline\"\n",
|
|
|
|
|
"COL_OUT = \"ngram_based_joke\"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"CORPUS_WORD = [\n",
|
|
|
|
|
" \"用喷洒去碰键盘,结果场面一度失控。\",\n",
|
|
|
|
|
" \"用摇晃处理椅子,朋友看完直接笑了。\",\n",
|
|
|
|
|
" \"用移动打理毛巾,结果闹出了大笑话。\",\n",
|
|
|
|
|
" \"用钻操作笔记本电脑,直接翻车了。\",\n",
|
|
|
|
|
" \"用喷洒处理香蕉,结果越弄越糟。\",\n",
|
|
|
|
|
" \"用滚动处理香蕉,结果滚到了沟里。\",\n",
|
|
|
|
|
" \"用去籽处理胡椒,结果辣到了自己。\",\n",
|
|
|
|
|
" \"用滚动处理衬衫,结果越弄越皱。\",\n",
|
|
|
|
|
" \"用喷洒处理笔记本电脑,直接报废了。\",\n",
|
|
|
|
|
" \"用混合处理玉米,结果味道很奇怪。\",\n",
|
|
|
|
|
" \"用切割处理椅子,结果直接散架了。\",\n",
|
|
|
|
|
" \"用混合处理鞋子,结果变成了限量款。\",\n",
|
|
|
|
|
" \"用测量处理头发,结果发现掉了一半。\",\n",
|
|
|
|
|
" \"用锤击处理花朵,结果花盆碎了一地。\",\n",
|
|
|
|
|
" \"用冲洗处理自行车,结果链条掉了。\",\n",
|
|
|
|
|
" \"用摇晃处理衣服,结果纸屑撒了一地。\",\n",
|
|
|
|
|
" \"用钻处理番茄,结果汁水喷了一身。\",\n",
|
|
|
|
|
" \"用喷洒处理冰箱,结果食材全串味了。\",\n",
|
|
|
|
|
" \"用钻井处理南瓜,结果南瓜裂开了。\",\n",
|
|
|
|
|
" \"用钻处理书本,结果书被钻烂了。\",\n",
|
|
|
|
|
" \"用洗涤处理笔记本电脑,结果短路了。\",\n",
|
|
|
|
|
" \"用测量处理鸡蛋,结果手滑摔碎了。\",\n",
|
|
|
|
|
" \"用锤击处理笔记本电脑,结果成了废品。\",\n",
|
|
|
|
|
" \"用测量处理冰箱,结果塞不进厨房。\",\n",
|
|
|
|
|
" \"用喷洒处理南瓜,结果外皮烂掉了。\",\n",
|
|
|
|
|
" \"千万不要用喷洒碰键盘,不然会出大事。\",\n",
|
|
|
|
|
" \"谁能想到用摇晃处理椅子,会这么搞笑。\",\n",
|
|
|
|
|
" \"用移动打理毛巾,我直接社死了。\",\n",
|
|
|
|
|
" \"用钻碰笔记本电脑,我人都傻了。\",\n",
|
|
|
|
|
" \"用喷洒碰香蕉,结果笑料百出。\",\n",
|
|
|
|
|
" \"用滚动碰香蕉,结果尴尬到抠脚。\",\n",
|
|
|
|
|
" \"用去籽处理胡椒,我打了一下午喷嚏。\",\n",
|
|
|
|
|
" \"用滚动处理衬衫,熨了半小时都没用。\",\n",
|
|
|
|
|
" \"用喷洒碰笔记本电脑,数据全没了。\",\n",
|
|
|
|
|
" \"用混合处理玉米,辣到我灵魂出窍。\",\n",
|
|
|
|
|
" \"用切割处理椅子,摔了个屁股墩。\",\n",
|
|
|
|
|
" \"用混合处理鞋子,朋友笑了一整天。\",\n",
|
|
|
|
|
" \"用测量处理头发,我当场就emo了。\",\n",
|
|
|
|
|
" \"用锤击处理花朵,我妈追着我打。\",\n",
|
|
|
|
|
" \"用冲洗处理自行车,推了三公里回家。\",\n",
|
|
|
|
|
" \"用摇晃处理衣服,捡了半小时硬币。\",\n",
|
|
|
|
|
" \"用钻处理番茄,汁溅了一墙。\",\n",
|
|
|
|
|
" \"用喷洒处理冰箱,菜全烂了。\",\n",
|
|
|
|
|
" \"用钻井处理南瓜,籽撒了一地。\",\n",
|
|
|
|
|
" \"用钻处理书本,被老师骂了一顿。\",\n",
|
|
|
|
|
" \"用洗涤处理电脑,作业全没了。\",\n",
|
|
|
|
|
" \"用测量处理鸡蛋,蛋黄流了一地。\",\n",
|
|
|
|
|
" \"用锤击处理电脑,游戏全没了。\",\n",
|
|
|
|
|
" \"用测量处理冰箱,我当场自闭了。\",\n",
|
|
|
|
|
" \"用喷洒处理南瓜,晚饭都没了。\"\n",
|
|
|
|
|
"]\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"CORPUS_NEWS = [\n",
|
|
|
|
|
" \"这则新闻,真的比电视剧还精彩。\",\n",
|
|
|
|
|
" \"看完这则新闻,我直接笑出声了。\",\n",
|
|
|
|
|
" \"这则新闻的走向,完全超出我的预期。\",\n",
|
|
|
|
|
" \"看完这则新闻,我只能说太离谱了。\",\n",
|
|
|
|
|
" \"这则新闻,真的越看越有意思。\",\n",
|
|
|
|
|
" \"看完这则新闻,我大开眼界了。\",\n",
|
|
|
|
|
" \"这则新闻,现实比剧本还魔幻。\",\n",
|
|
|
|
|
" \"看完这则新闻,我直接愣住了。\",\n",
|
|
|
|
|
" \"这则新闻,真的太有戏剧性了。\",\n",
|
|
|
|
|
" \"看完这则新闻,我蚌埠住了。\",\n",
|
|
|
|
|
" \"这则新闻,反转来的猝不及防。\",\n",
|
|
|
|
|
" \"看完这则新闻,我只能说太会玩了。\",\n",
|
|
|
|
|
" \"这则新闻,真的刷新了我的认知。\",\n",
|
|
|
|
|
" \"看完这则新闻,我服了。\",\n",
|
|
|
|
|
" \"这则新闻,高手在民间啊。\",\n",
|
|
|
|
|
" \"看完这则新闻,我笑到肚子疼。\",\n",
|
|
|
|
|
" \"这则新闻,真的万万没想到。\",\n",
|
|
|
|
|
" \"看完这则新闻,我直呼内行。\",\n",
|
|
|
|
|
" \"这则新闻,真的太秀了。\",\n",
|
|
|
|
|
" \"看完这则新闻,我人麻了。\",\n",
|
|
|
|
|
" \"这则新闻,真的比过山车还刺激。\",\n",
|
|
|
|
|
" \"看完这则新闻,我直接笑不活了。\",\n",
|
|
|
|
|
" \"这则新闻,真的太有梗了。\",\n",
|
|
|
|
|
" \"看完这则新闻,我直呼离谱。\",\n",
|
|
|
|
|
" \"这则新闻,真的太绝了。\"\n",
|
|
|
|
|
"]\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"class StandardNGram:\n",
|
|
|
|
|
" def __init__(self, n=2):\n",
|
|
|
|
|
" self.n = n\n",
|
|
|
|
|
" self.ngram_counts = defaultdict(Counter)\n",
|
|
|
|
|
" self.context_total = defaultdict(int)\n",
|
|
|
|
|
" self.vocab = set()\n",
|
|
|
|
|
" self.START = \"<s>\"\n",
|
|
|
|
|
" self.END = \"</s>\"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" def train(self, corpus):\n",
|
|
|
|
|
" for sentence in corpus:\n",
|
|
|
|
|
" words = list(jieba.cut(sentence.strip()))\n",
|
|
|
|
|
" words = [self.START] + words + [self.END]\n",
|
|
|
|
|
" self.vocab.update(words)\n",
|
|
|
|
|
" for i in range(len(words) - self.n + 1):\n",
|
|
|
|
|
" context = tuple(words[i:i+self.n-1])\n",
|
|
|
|
|
" target = words[i+self.n-1]\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" self.ngram_counts[context][target] += 1\n",
|
|
|
|
|
" self.context_total[context] += 1\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" def get_smoothed_prob(self, context, word):\n",
|
|
|
|
|
" count_cw = self.ngram_counts[context][word] + 1\n",
|
|
|
|
|
" count_c = self.context_total[context] + len(self.vocab)\n",
|
|
|
|
|
" return count_cw / count_c\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" def sample_next_word(self, context, top_k=3, repeat_punish=None):\n",
|
|
|
|
|
" if repeat_punish is None:\n",
|
|
|
|
|
" repeat_punish = set()\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" if context not in self.ngram_counts:\n",
|
|
|
|
|
" return self.END\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" word_probs = []\n",
|
|
|
|
|
" for word in self.ngram_counts[context]:\n",
|
|
|
|
|
" punish = 0.1 if word in repeat_punish else 1.0\n",
|
|
|
|
|
" prob = self.get_smoothed_prob(context, word) * punish\n",
|
|
|
|
|
" word_probs.append((word, prob))\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" word_probs.sort(key=lambda x: x[1], reverse=True)\n",
|
|
|
|
|
" top_candidates = word_probs[:top_k]\n",
|
|
|
|
|
" words, probs = zip(*top_candidates)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" return random.choices(words, weights=probs, k=1)[0]\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" def generate(self, start_keyword, max_len=22):\n",
|
|
|
|
|
" generated = [self.START, start_keyword]\n",
|
|
|
|
|
" repeat_punish = set([start_keyword])\n",
|
|
|
|
|
" current_context = tuple(generated[-1:])\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" for _ in range(max_len):\n",
|
|
|
|
|
" next_word = self.sample_next_word(current_context, repeat_punish=repeat_punish)\n",
|
|
|
|
|
" if next_word == self.END:\n",
|
|
|
|
|
" break\n",
|
|
|
|
|
" generated.append(next_word)\n",
|
|
|
|
|
" repeat_punish.add(next_word)\n",
|
|
|
|
|
" current_context = tuple(generated[-1:])\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" final_sentence = \"\".join([w for w in generated if w != self.START])\n",
|
|
|
|
|
" if len(final_sentence) < 6:\n",
|
|
|
|
|
" final_sentence = f\"{start_keyword}这事儿,真的太有意思了。\"\n",
|
|
|
|
|
" return final_sentence\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"print(\"正在训练词汇任务2-gram模型...\")\n",
|
|
|
|
|
"model_word = StandardNGram(n=2)\n",
|
|
|
|
|
"model_word.train(CORPUS_WORD)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"print(\"正在训练新闻任务2-gram模型...\")\n",
|
|
|
|
|
"model_news = StandardNGram(n=2)\n",
|
|
|
|
|
"model_news.train(CORPUS_NEWS)\n",
|
|
|
|
|
"print(\"模型训练完成!\")\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"def get_task_type(row):\n",
|
|
|
|
|
" w1 = str(row[COL_W1]).strip()\n",
|
|
|
|
|
" w2 = str(row[COL_W2]).strip()\n",
|
|
|
|
|
" head = str(row[COL_HEAD]).strip()\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" if head == \"-\" and w1 != \"-\" and w2 != \"-\" and len(w1) > 0 and len(w2) > 0:\n",
|
|
|
|
|
" return \"WORD\", [w1, w2]\n",
|
|
|
|
|
" elif w1 == \"-\" and w2 == \"-\" and head != \"-\" and len(head) > 5:\n",
|
|
|
|
|
" return \"NEWS\", head\n",
|
|
|
|
|
" else:\n",
|
|
|
|
|
" return \"ERROR\", None\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"def extract_news_keyword(headline):\n",
|
|
|
|
|
" stop_words = set([\",\", \"。\", \"!\", \"?\", \"、\", \":\", \";\", \"「\", \"」\", \"【\", \"】\", \"《\", \"》\", \"#\", \"|\", \"/\", \" \", \".\", \"·\", \"0\", \"1\", \"2\", \"3\", \"4\", \"5\", \"6\", \"7\", \"8\", \"9\"])\n",
|
|
|
|
|
" try:\n",
|
|
|
|
|
" words = list(jieba.cut(headline))\n",
|
|
|
|
|
" valid_keywords = [w for w in words if len(w) >= 2 and w not in stop_words]\n",
|
|
|
|
|
" return valid_keywords[0] if len(valid_keywords) > 0 else \"这则新闻\"\n",
|
|
|
|
|
" except:\n",
|
|
|
|
|
" return \"这则新闻\"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"def generate_final_joke(row):\n",
|
|
|
|
|
" task_type, task_data = get_task_type(row)\n",
|
|
|
|
|
" if task_type == \"WORD\":\n",
|
|
|
|
|
" w1, w2 = task_data\n",
|
|
|
|
|
" start_word = random.choice([w1, w2])\n",
|
|
|
|
|
" base_sentence = model_word.generate(start_word)\n",
|
|
|
|
|
" if w1 not in base_sentence:\n",
|
|
|
|
|
" base_sentence = f\"{w1}和{base_sentence}\"\n",
|
|
|
|
|
" if w2 not in base_sentence:\n",
|
|
|
|
|
" base_sentence = f\"{base_sentence},还扯上了{w2}\"\n",
|
|
|
|
|
" return base_sentence\n",
|
|
|
|
|
" elif task_type == \"NEWS\":\n",
|
|
|
|
|
" headline = task_data\n",
|
|
|
|
|
" keyword = extract_news_keyword(headline)\n",
|
|
|
|
|
" return model_news.generate(keyword)\n",
|
|
|
|
|
" else:\n",
|
|
|
|
|
" return \"这事儿真的太有意思了。\"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"print(\"开始批量生成最终版2-gram基线结果...\")\n",
|
|
|
|
|
"df[COL_OUT] = df.apply(generate_final_joke, axis=1)\n",
|
|
|
|
|
"# 保存结果\n",
|
|
|
|
|
"df.to_csv(\"task_a_zh_ngram_final_standard.tsv\", sep=\"\\t\", index=False)\n",
|
|
|
|
|
"print(\"✅ 生成完成!结果已保存为 task_a_zh_ngram_final_standard.tsv\")"
|
|
|
|
|
],
|
|
|
|
|
"id": "6119c1b7f47da649",
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stderr",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"Building prefix dict from the default dictionary ...\n",
|
|
|
|
|
"Loading model from cache C:\\Users\\ADMINI~1\\AppData\\Local\\Temp\\jieba.cache\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"正在训练词汇任务2-gram模型...\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "stderr",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"Loading model cost 0.882 seconds.\n",
|
|
|
|
|
"Prefix dict has been built successfully.\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"正在训练新闻任务2-gram模型...\n",
|
|
|
|
|
"模型训练完成!\n",
|
|
|
|
|
"开始批量生成最终版2-gram基线结果...\n",
|
|
|
|
|
"✅ 生成完成!结果已保存为 task_a_zh_ngram_final_standard.tsv\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"execution_count": 2
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"metadata": {
|
|
|
|
|
"ExecuteTime": {
|
|
|
|
|
"end_time": "2026-04-27T14:30:25.028415300Z",
|
|
|
|
|
"start_time": "2026-04-27T14:30:22.452443600Z"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"source": "!pip install pandas requests tqdm",
|
|
|
|
|
"id": "cb4a5e4932e1e3c8",
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"Requirement already satisfied: pandas in .\\.venv\\Lib\\site-packages (3.0.2)\n",
|
|
|
|
|
"Requirement already satisfied: requests in .\\.venv\\Lib\\site-packages (2.33.1)\n",
|
|
|
|
|
"Requirement already satisfied: tqdm in .\\.venv\\Lib\\site-packages (4.67.3)\n",
|
|
|
|
|
"Requirement already satisfied: numpy>=1.26.0 in .\\.venv\\Lib\\site-packages (from pandas) (2.4.4)\n",
|
|
|
|
|
"Requirement already satisfied: python-dateutil>=2.8.2 in .\\.venv\\Lib\\site-packages (from pandas) (2.9.0.post0)\n",
|
|
|
|
|
"Requirement already satisfied: tzdata in .\\.venv\\Lib\\site-packages (from pandas) (2026.2)\n",
|
|
|
|
|
"Requirement already satisfied: charset_normalizer<4,>=2 in .\\.venv\\Lib\\site-packages (from requests) (3.4.7)\n",
|
|
|
|
|
"Requirement already satisfied: idna<4,>=2.5 in .\\.venv\\Lib\\site-packages (from requests) (3.13)\n",
|
|
|
|
|
"Requirement already satisfied: urllib3<3,>=1.26 in .\\.venv\\Lib\\site-packages (from requests) (2.6.3)\n",
|
|
|
|
|
"Requirement already satisfied: certifi>=2023.5.7 in .\\.venv\\Lib\\site-packages (from requests) (2026.4.22)\n",
|
|
|
|
|
"Requirement already satisfied: colorama in .\\.venv\\Lib\\site-packages (from tqdm) (0.4.6)\n",
|
|
|
|
|
"Requirement already satisfied: six>=1.5 in .\\.venv\\Lib\\site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "stderr",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"\n",
|
|
|
|
|
"[notice] A new release of pip is available: 26.0.1 -> 26.1\n",
|
|
|
|
|
"[notice] To update, run: python.exe -m pip install --upgrade pip\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"execution_count": 6
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"metadata": {
|
|
|
|
|
"ExecuteTime": {
|
|
|
|
|
"end_time": "2026-04-27T14:44:00.369207900Z",
|
|
|
|
|
"start_time": "2026-04-27T14:36:24.437356600Z"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"source": [
|
|
|
|
|
"import pandas as pd\n",
|
|
|
|
|
"import requests\n",
|
|
|
|
|
"from tqdm import tqdm\n",
|
|
|
|
|
"import time\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"API_KEY = \"sk-b6a990cb2dc94a10947401b3589172f8\"\n",
|
|
|
|
|
"API_URL = \"https://api.deepseek.com/v1/chat/completions\"\n",
|
|
|
|
|
"INPUT_FILE = \"task-a-zh.tsv\"\n",
|
|
|
|
|
"OUTPUT_FILE = \"task_a_zh_deepseek_baseline.tsv\"\n",
|
|
|
|
|
"MODEL_NAME = \"deepseek-chat\"\n",
|
|
|
|
|
"MAX_RETRY = 3\n",
|
|
|
|
|
"GENERATION_CONFIG = {\n",
|
|
|
|
|
" \"temperature\": 0.7,\n",
|
|
|
|
|
" \"top_p\": 0.9,\n",
|
|
|
|
|
" \"max_tokens\": 60\n",
|
|
|
|
|
"}\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"PROMPT_WORD = \"\"\"\n",
|
|
|
|
|
"你是一个擅长生成中文幽默笑话的专家。\n",
|
|
|
|
|
"任务:生成一句简短、自然、有幽默感的中文笑话,必须严格同时包含「{w1}」和「{w2}」两个词。\n",
|
|
|
|
|
"要求:\n",
|
|
|
|
|
"1. 句子自然通顺,有网感、趣味性,不能生硬堆砌词语\n",
|
|
|
|
|
"2. 长度控制在 10-30 个字之间\n",
|
|
|
|
|
"3. 只输出笑话本身,不要任何额外解释、前缀后缀\n",
|
|
|
|
|
"\"\"\"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"PROMPT_NEWS = \"\"\"\n",
|
|
|
|
|
"你是一个擅长针对新闻标题生成幽默吐槽评论的专家。\n",
|
|
|
|
|
"任务:针对下面的新闻标题,生成一句简短、贴合主题、有幽默感/吐槽感的中文评论。\n",
|
|
|
|
|
"新闻标题:{headline}\n",
|
|
|
|
|
"要求:\n",
|
|
|
|
|
"1. 评论和新闻内容强相关,有网感、吐槽感,不能跑题\n",
|
|
|
|
|
"2. 长度控制在 10-30 个字之间\n",
|
|
|
|
|
"3. 只输出评论本身,不要任何额外解释、前缀后缀\n",
|
|
|
|
|
"\"\"\"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"def deepseek_generate(prompt):\n",
|
|
|
|
|
" retry_count = 0\n",
|
|
|
|
|
" headers = {\n",
|
|
|
|
|
" \"Content-Type\": \"application/json\",\n",
|
|
|
|
|
" \"Authorization\": f\"Bearer {API_KEY}\"\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
" payload = {\n",
|
|
|
|
|
" \"model\": MODEL_NAME,\n",
|
|
|
|
|
" \"messages\": [{\"role\": \"user\", \"content\": prompt}],\n",
|
|
|
|
|
" **GENERATION_CONFIG\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" while retry_count < MAX_RETRY:\n",
|
|
|
|
|
" try:\n",
|
|
|
|
|
" response = requests.post(API_URL, headers=headers, json=payload, timeout=30)\n",
|
|
|
|
|
" if response.status_code == 200:\n",
|
|
|
|
|
" result = response.json()[\"choices\"][0][\"message\"][\"content\"].strip()\n",
|
|
|
|
|
" result = result.replace(\"\\n\", \"\").replace('\"', \"\").replace(\"'\", \"\")\n",
|
|
|
|
|
" return result\n",
|
|
|
|
|
" else:\n",
|
|
|
|
|
" print(f\"API 调用失败,状态码:{response.status_code},重试中...\")\n",
|
|
|
|
|
" retry_count += 1\n",
|
|
|
|
|
" time.sleep(2)\n",
|
|
|
|
|
" except Exception as e:\n",
|
|
|
|
|
" print(f\"调用异常:{e},重试中...\")\n",
|
|
|
|
|
" retry_count += 1\n",
|
|
|
|
|
" time.sleep(2)\n",
|
|
|
|
|
" return \"这事儿真的太有梗了,我直接蚌埠住了。\"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"def generate_joke(row):\n",
|
|
|
|
|
" w1 = str(row[\"word1\"]).strip()\n",
|
|
|
|
|
" w2 = str(row[\"word2\"]).strip()\n",
|
|
|
|
|
" headline = str(row[\"headline\"]).strip()\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" if headline == \"-\" and w1 != \"-\" and w2 != \"-\":\n",
|
|
|
|
|
" prompt = PROMPT_WORD.format(w1=w1, w2=w2)\n",
|
|
|
|
|
" return deepseek_generate(prompt)\n",
|
|
|
|
|
" elif w1 == \"-\" and w2 == \"-\" and headline != \"-\":\n",
|
|
|
|
|
" prompt = PROMPT_NEWS.format(headline=headline)\n",
|
|
|
|
|
" return deepseek_generate(prompt)\n",
|
|
|
|
|
" else:\n",
|
|
|
|
|
" return \"这内容也太有意思了吧。\"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"if __name__ == \"__main__\":\n",
|
|
|
|
|
" print(f\"正在读取数据集:{INPUT_FILE}\")\n",
|
|
|
|
|
" df = pd.read_csv(INPUT_FILE, sep=\"\\t\", dtype=str)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" print(\"开始批量生成 DeepSeek 幽默内容...\")\n",
|
|
|
|
|
" tqdm.pandas(desc=\"生成进度\")\n",
|
|
|
|
|
" df[\"deepseek_based_joke\"] = df.progress_apply(generate_joke, axis=1)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" df.to_csv(OUTPUT_FILE, sep=\"\\t\", index=False)\n",
|
|
|
|
|
" print(f\"✅ 生成完成!结果已保存到:{OUTPUT_FILE}\")"
|
|
|
|
|
],
|
|
|
|
|
"id": "3be6a2ca9dee1b34",
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"正在读取数据集:task-a-zh.tsv\n",
|
|
|
|
|
"开始批量生成 DeepSeek 幽默内容...\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "stderr",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"生成进度: 100%|██████████| 300/300 [07:35<00:00, 1.52s/it]"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"✅ 生成完成!结果已保存到:task_a_zh_deepseek_baseline.tsv\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "stderr",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"execution_count": 8
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"metadata": {
|
|
|
|
|
"ExecuteTime": {
|
|
|
|
|
"start_time": "2026-05-05T14:53:41.636014Z"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"source": [
|
|
|
|
|
"import pandas as pd\n",
|
|
|
|
|
"import requests\n",
|
|
|
|
|
"from tqdm import tqdm\n",
|
|
|
|
|
"import time\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"API_KEY = \"sk-b6a990cb2dc94a10947401b3589172f8\"\n",
|
|
|
|
|
"API_URL = \"https://api.deepseek.com/v1/chat/completions\"\n",
|
|
|
|
|
"MODEL_NAME = \"deepseek-chat\"\n",
|
|
|
|
|
"MAX_RETRY = 3\n",
|
|
|
|
|
"GENERATION_CONFIG = {\n",
|
|
|
|
|
" \"temperature\": 0.2,\n",
|
|
|
|
|
" \"top_p\": 0.8,\n",
|
|
|
|
|
" \"max_tokens\": 150,\n",
|
|
|
|
|
" \"seed\": 42\n",
|
|
|
|
|
"}\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"FILES = {\n",
|
|
|
|
|
" \"规则基线\": \"task_a_zh_rule_baseline_full.tsv\",\n",
|
|
|
|
|
" \"N-gram基线\": \"task_a_zh_ngram_final_standard.tsv\",\n",
|
|
|
|
|
" \"DeepSeek大模型\": \"task_a_zh_deepseek_baseline.tsv\"\n",
|
|
|
|
|
"}\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"RESULT_COLS = {\n",
|
|
|
|
|
" \"规则基线\": \"rule_based_joke\",\n",
|
|
|
|
|
" \"N-gram基线\": \"ngram_based_joke\",\n",
|
|
|
|
|
" \"DeepSeek大模型\": \"deepseek_based_joke\"\n",
|
|
|
|
|
"}\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"PROMPT_WORD = \"\"\"\n",
|
|
|
|
|
"你是一个专业的NLP任务评估专家,负责评估中文幽默生成结果的质量。\n",
|
|
|
|
|
"本次评估对象是词汇包含任务的幽默生成结果,任务要求:生成内容必须同时包含「{w1}」和「{w2}」两个词,且有幽默感、通顺自然。\n",
|
|
|
|
|
"请你从以下5个维度,对生成结果进行1-5分打分(1分最差,5分最优),并给出1句简短质化点评(10-20字),点评需贴合打分结果。\n",
|
|
|
|
|
"评估维度及打分标准:\n",
|
|
|
|
|
"1. 任务约束满足度(30%):是否同时包含「{w1}」和「{w2}」,无遗漏。5分=完全满足;3分=基本满足;1分=不满足。\n",
|
|
|
|
|
"2. 幽默度(30%):是否有幽默感、有梗,无生硬感。5分=幽默自然;3分=轻微幽默;1分=无幽默。\n",
|
|
|
|
|
"3. 语义通顺度(20%):是否通顺、无语法错误、无逻辑断裂。5分=完全通顺;3分=基本通顺;1分=不通顺。\n",
|
|
|
|
|
"4. 生成多样性(10%):结合同类任务,该结果是否无明显重复句式。5分=多样性高;3分=一般;1分=重复。\n",
|
|
|
|
|
"5. 语言自然度(10%):是否自然、无机械模板感。5分=自然;3分=基本自然;1分=机械。\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"请严格按照以下格式输出,不要添加任何额外内容,格式:\n",
|
|
|
|
|
"约束满足度:X分,幽默度:X分,通顺度:X分,多样性:X分,自然度:X分,点评:XXX\n",
|
|
|
|
|
"生成结果:{generated_text}\n",
|
|
|
|
|
"词汇要求:{w1}、{w2}\n",
|
|
|
|
|
"\"\"\"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"PROMPT_NEWS = \"\"\"\n",
|
|
|
|
|
"你是一个专业的NLP任务评估专家,负责评估中文幽默生成结果的质量。\n",
|
|
|
|
|
"本次评估对象是新闻标题幽默评论生成结果,任务要求:生成内容需贴合原新闻标题,有幽默感、通顺自然。\n",
|
|
|
|
|
"请你从以下5个维度,对生成结果进行1-5分打分(1分最差,5分最优),并给出1句简短质化点评(10-20字),点评需贴合打分结果。\n",
|
|
|
|
|
"评估维度及打分标准:\n",
|
|
|
|
|
"1. 任务约束满足度(30%):是否贴合原新闻标题,无跑题。5分=完全贴合;3分=基本贴合;1分=跑题。\n",
|
|
|
|
|
"2. 幽默度(30%):是否有幽默感、吐槽感,无生硬感。5分=幽默自然;3分=轻微幽默;1分=无幽默。\n",
|
|
|
|
|
"3. 语义通顺度(20%):是否通顺、无语法错误、无逻辑断裂。5分=完全通顺;3分=基本通顺;1分=不通顺。\n",
|
|
|
|
|
"4. 生成多样性(10%):结合同类任务,该结果是否无明显重复句式。5分=多样性高;3分=一般;1分=重复。\n",
|
|
|
|
|
"5. 语言自然度(10%):是否自然、无机械模板感。5分=自然;3分=基本自然;1分=机械。\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"请严格按照以下格式输出,不要添加任何额外内容,格式:\n",
|
|
|
|
|
"约束满足度:X分,幽默度:X分,通顺度:X分,多样性:X分,自然度:X分,点评:XXX\n",
|
|
|
|
|
"生成结果:{generated_text}\n",
|
|
|
|
|
"原新闻标题:{headline}\n",
|
|
|
|
|
"\"\"\"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"def deepseek_evaluate(prompt):\n",
|
|
|
|
|
" retry_count = 0\n",
|
|
|
|
|
" headers = {\n",
|
|
|
|
|
" \"Content-Type\": \"application/json\",\n",
|
|
|
|
|
" \"Authorization\": f\"Bearer {API_KEY}\"\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
" payload = {\n",
|
|
|
|
|
" \"model\": MODEL_NAME,\n",
|
|
|
|
|
" \"messages\": [{\"role\": \"user\", \"content\": prompt}],\n",
|
|
|
|
|
" **GENERATION_CONFIG\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" while retry_count < MAX_RETRY:\n",
|
|
|
|
|
" try:\n",
|
|
|
|
|
" response = requests.post(API_URL, headers=headers, json=payload, timeout=30)\n",
|
|
|
|
|
" if response.status_code == 200:\n",
|
|
|
|
|
" result = response.json()[\"choices\"][0][\"message\"][\"content\"].strip()\n",
|
|
|
|
|
" return result\n",
|
|
|
|
|
" else:\n",
|
|
|
|
|
" print(f\"API调用失败,状态码:{response.status_code},重试中...\")\n",
|
|
|
|
|
" retry_count += 1\n",
|
|
|
|
|
" time.sleep(2)\n",
|
|
|
|
|
" except Exception as e:\n",
|
|
|
|
|
" print(f\"调用异常:{e},重试中...\")\n",
|
|
|
|
|
" retry_count += 1\n",
|
|
|
|
|
" time.sleep(2)\n",
|
|
|
|
|
" return \"约束满足度:3分,幽默度:3分,通顺度:3分,多样性:3分,自然度:3分,点评:评估失败,默认打分\"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"def parse_evaluation_result(eval_text):\n",
|
|
|
|
|
" try:\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" constraint_score = int(eval_text.split(\"约束满足度:\")[1].split(\"分\")[0])\n",
|
|
|
|
|
" humor_score = int(eval_text.split(\"幽默度:\")[1].split(\"分\")[0])\n",
|
|
|
|
|
" fluency_score = int(eval_text.split(\"通顺度:\")[1].split(\"分\")[0])\n",
|
|
|
|
|
" diversity_score = int(eval_text.split(\"多样性:\")[1].split(\"分\")[0])\n",
|
|
|
|
|
" natural_score = int(eval_text.split(\"自然度:\")[1].split(\"分\")[0])\n",
|
|
|
|
|
" comment = eval_text.split(\"点评:\")[1].strip()\n",
|
|
|
|
|
" return constraint_score, humor_score, fluency_score, diversity_score, natural_score, comment\n",
|
|
|
|
|
" except:\n",
|
|
|
|
|
" return 3, 3, 3, 3, 3, \"解析失败,默认点评\"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"def evaluate_single_file(file_path, result_col, method_name):\n",
|
|
|
|
|
" print(f\"\\n开始评估【{method_name}】,文件路径:{file_path}\")\n",
|
|
|
|
|
" df = pd.read_csv(file_path, sep=\"\\t\", dtype=str)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" df[\"约束满足度\"] = 0\n",
|
|
|
|
|
" df[\"幽默度\"] = 0\n",
|
|
|
|
|
" df[\"通顺度\"] = 0\n",
|
|
|
|
|
" df[\"多样性\"] = 0\n",
|
|
|
|
|
" df[\"自然度\"] = 0\n",
|
|
|
|
|
" df[\"评估点评\"] = \"\"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" tqdm.pandas(desc=f\"{method_name} 评估进度\")\n",
|
|
|
|
|
" for idx, row in tqdm(df.iterrows(), total=len(df)):\n",
|
|
|
|
|
" w1 = str(row[\"word1\"]).strip()\n",
|
|
|
|
|
" w2 = str(row[\"word2\"]).strip()\n",
|
|
|
|
|
" headline = str(row[\"headline\"]).strip()\n",
|
|
|
|
|
" generated_text = str(row[result_col]).strip()\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" if headline == \"-\" and w1 != \"-\" and w2 != \"-\":\n",
|
|
|
|
|
" prompt = PROMPT_WORD.format(w1=w1, w2=w2, generated_text=generated_text)\n",
|
|
|
|
|
" elif w1 == \"-\" and w2 == \"-\" and headline != \"-\":\n",
|
|
|
|
|
" prompt = PROMPT_NEWS.format(headline=headline, generated_text=generated_text)\n",
|
|
|
|
|
" else:\n",
|
|
|
|
|
" df.loc[idx, [\"约束满足度\", \"幽默度\", \"通顺度\", \"多样性\", \"自然度\"]] = [3, 3, 3, 3, 3]\n",
|
|
|
|
|
" df.loc[idx, \"评估点评\"] = \"异常数据,默认点评\"\n",
|
|
|
|
|
" continue\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" eval_result = deepseek_evaluate(prompt)\n",
|
|
|
|
|
" constraint, humor, fluency, diversity, natural, comment = parse_evaluation_result(eval_result)\n",
|
|
|
|
|
" df.loc[idx, \"约束满足度\"] = constraint\n",
|
|
|
|
|
" df.loc[idx, \"幽默度\"] = humor\n",
|
|
|
|
|
" df.loc[idx, \"通顺度\"] = fluency\n",
|
|
|
|
|
" df.loc[idx, \"多样性\"] = diversity\n",
|
|
|
|
|
" df.loc[idx, \"自然度\"] = natural\n",
|
|
|
|
|
" df.loc[idx, \"评估点评\"] = comment\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" df[\"加权总分\"] = (df[\"约束满足度\"] * 0.3 +\n",
|
|
|
|
|
" df[\"幽默度\"] * 0.3 +\n",
|
|
|
|
|
" df[\"通顺度\"] * 0.2 +\n",
|
|
|
|
|
" df[\"多样性\"] * 0.1 +\n",
|
|
|
|
|
" df[\"自然度\"] * 0.1).round(2)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" eval_output_path = f\"{method_name}_评估结果.tsv\"\n",
|
|
|
|
|
" df.to_csv(eval_output_path, sep=\"\\t\", index=False)\n",
|
|
|
|
|
" print(f\"【{method_name}】评估完成,结果保存至:{eval_output_path}\")\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" summary = {\n",
|
|
|
|
|
" \"方法名称\": method_name,\n",
|
|
|
|
|
" \"总样本数\": len(df),\n",
|
|
|
|
|
" \"约束满足度平均分\": round(df[\"约束满足度\"].mean(), 2),\n",
|
|
|
|
|
" \"幽默度平均分\": round(df[\"幽默度\"].mean(), 2),\n",
|
|
|
|
|
" \"通顺度平均分\": round(df[\"通顺度\"].mean(), 2),\n",
|
|
|
|
|
" \"多样性平均分\": round(df[\"多样性\"].mean(), 2),\n",
|
|
|
|
|
" \"自然度平均分\": round(df[\"自然度\"].mean(), 2),\n",
|
|
|
|
|
" \"加权总分平均分\": round(df[\"加权总分\"].mean(), 2)\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
" return df, summary\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"def batch_evaluate_all():\n",
|
|
|
|
|
" all_evaluated_dfs = {}\n",
|
|
|
|
|
" all_summaries = []\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" for method_name, file_path in FILES.items():\n",
|
|
|
|
|
" evaluated_df, summary = evaluate_single_file(file_path, RESULT_COLS[method_name], method_name)\n",
|
|
|
|
|
" all_evaluated_dfs[method_name] = evaluated_df\n",
|
|
|
|
|
" all_summaries.append(summary)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" summary_df = pd.DataFrame(all_summaries)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" summary_df = summary_df.sort_values(by=\"加权总分平均分\", ascending=False).reset_index(drop=True)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" summary_report_path = \"三种方法评估汇总报告.tsv\"\n",
|
|
|
|
|
" summary_df.to_csv(summary_report_path, sep=\"\\t\", index=False)\n",
|
|
|
|
|
" print(f\"\\n✅ 所有方法评估完成!汇总报告保存至:{summary_report_path}\")\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" print(\"\\n=== 三种方法评估汇总(按加权总分排序)===\")\n",
|
|
|
|
|
" print(summary_df.to_string(index=False))\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" return all_evaluated_dfs, summary_df\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"if __name__ == \"__main__\":\n",
|
|
|
|
|
" print(\"开始执行三种方法的批量评估(基于DeepSeek-V4-Pro)\")\n",
|
|
|
|
|
" print(f\"评估样本数:每个方法约300条,预计耗时15-20分钟,请耐心等待...\")\n",
|
|
|
|
|
" all_evaluated_dfs, summary_df = batch_evaluate_all()\n"
|
|
|
|
|
],
|
|
|
|
|
"id": "597c96096f5008b4",
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"开始执行三种方法的批量评估(基于DeepSeek-V4-Pro)\n",
|
|
|
|
|
"评估样本数:每个方法约300条,预计耗时15-20分钟,请耐心等待...\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"开始评估【规则基线】,文件路径:task_a_zh_rule_baseline_full.tsv\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "stderr",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"100%|██████████| 45/45 [01:38<00:00, 2.20s/it]\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"【规则基线】评估完成,结果保存至:规则基线_评估结果.tsv\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"开始评估【N-gram基线】,文件路径:task_a_zh_ngram_final_standard.tsv\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "stderr",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"100%|██████████| 300/300 [10:16<00:00, 2.06s/it]\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"【N-gram基线】评估完成,结果保存至:N-gram基线_评估结果.tsv\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"开始评估【DeepSeek大模型】,文件路径:task_a_zh_deepseek_baseline.tsv\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "stderr",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
" 40%|████ | 120/300 [04:08<05:49, 1.94s/it]"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"execution_count": null
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"source": "",
|
|
|
|
|
"id": "9ae712f5206f6482"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"source": "",
|
|
|
|
|
"id": "b06c46d127418850"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"metadata": {
|
|
|
|
|
"kernelspec": {
|
|
|
|
|
"display_name": "Python 3",
|
|
|
|
|
"language": "python",
|
|
|
|
|
"name": "python3"
|
|
|
|
|
},
|
|
|
|
|
"language_info": {
|
|
|
|
|
"codemirror_mode": {
|
|
|
|
|
"name": "ipython",
|
|
|
|
|
"version": 2
|
|
|
|
|
},
|
|
|
|
|
"file_extension": ".py",
|
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
|
"name": "python",
|
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
"pygments_lexer": "ipython2",
|
|
|
|
|
"version": "2.7.6"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
"nbformat_minor": 5
|
|
|
|
|
}
|