You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

888 lines
40 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

{
"cells": [
{
"metadata": {
"ExecuteTime": {
"end_time": "2026-04-27T13:04:33.988621800Z",
"start_time": "2026-04-27T13:04:10.669090800Z"
}
},
"cell_type": "code",
"source": "!pip install pandas jieba",
"id": "6e2e7982cb88a612",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: pandas in .\\.venv\\Lib\\site-packages (3.0.2)\n",
"Collecting jieba\n",
" Downloading jieba-0.42.1.tar.gz (19.2 MB)\n",
" ---------------------------------------- 0.0/19.2 MB ? eta -:--:--\n",
" ---------------------------------------- 0.0/19.2 MB ? eta -:--:--\n",
" -- ------------------------------------- 1.0/19.2 MB 5.0 MB/s eta 0:00:04\n",
" ----- ---------------------------------- 2.6/19.2 MB 6.6 MB/s eta 0:00:03\n",
" -------- ------------------------------- 3.9/19.2 MB 6.2 MB/s eta 0:00:03\n",
" ---------- ----------------------------- 5.0/19.2 MB 6.0 MB/s eta 0:00:03\n",
" ------------ --------------------------- 5.8/19.2 MB 5.5 MB/s eta 0:00:03\n",
" ------------- -------------------------- 6.6/19.2 MB 5.2 MB/s eta 0:00:03\n",
" --------------- ------------------------ 7.3/19.2 MB 5.1 MB/s eta 0:00:03\n",
" ----------------- ---------------------- 8.4/19.2 MB 4.9 MB/s eta 0:00:03\n",
" ------------------- -------------------- 9.2/19.2 MB 4.8 MB/s eta 0:00:03\n",
" -------------------- ------------------- 10.0/19.2 MB 4.6 MB/s eta 0:00:02\n",
" ---------------------- ----------------- 10.7/19.2 MB 4.6 MB/s eta 0:00:02\n",
" ------------------------ --------------- 11.5/19.2 MB 4.5 MB/s eta 0:00:02\n",
" ------------------------- -------------- 12.3/19.2 MB 4.5 MB/s eta 0:00:02\n",
" --------------------------- ------------ 13.1/19.2 MB 4.5 MB/s eta 0:00:02\n",
" ---------------------------- ----------- 13.6/19.2 MB 4.5 MB/s eta 0:00:02\n",
" ------------------------------ --------- 14.7/19.2 MB 4.3 MB/s eta 0:00:02\n",
" ------------------------------- -------- 15.2/19.2 MB 4.2 MB/s eta 0:00:01\n",
" -------------------------------- ------- 15.5/19.2 MB 4.1 MB/s eta 0:00:01\n",
" --------------------------------- ------ 16.0/19.2 MB 4.0 MB/s eta 0:00:01\n",
" --------------------------------- ------ 16.3/19.2 MB 3.8 MB/s eta 0:00:01\n",
" ---------------------------------- ----- 16.5/19.2 MB 3.7 MB/s eta 0:00:01\n",
" ---------------------------------- ----- 16.5/19.2 MB 3.7 MB/s eta 0:00:01\n",
" ---------------------------------- ----- 16.8/19.2 MB 3.5 MB/s eta 0:00:01\n",
" ----------------------------------- ---- 17.0/19.2 MB 3.3 MB/s eta 0:00:01\n",
" ----------------------------------- ---- 17.0/19.2 MB 3.3 MB/s eta 0:00:01\n",
" ------------------------------------ --- 17.3/19.2 MB 3.2 MB/s eta 0:00:01\n",
" ------------------------------------ --- 17.6/19.2 MB 3.1 MB/s eta 0:00:01\n",
" ------------------------------------ --- 17.6/19.2 MB 3.1 MB/s eta 0:00:01\n",
" ------------------------------------- -- 17.8/19.2 MB 3.0 MB/s eta 0:00:01\n",
" ------------------------------------- -- 18.1/19.2 MB 2.9 MB/s eta 0:00:01\n",
" -------------------------------------- - 18.4/19.2 MB 2.8 MB/s eta 0:00:01\n",
" -------------------------------------- - 18.6/19.2 MB 2.8 MB/s eta 0:00:01\n",
" --------------------------------------- 18.9/19.2 MB 2.7 MB/s eta 0:00:01\n",
" --------------------------------------- 19.1/19.2 MB 2.7 MB/s eta 0:00:01\n",
" ---------------------------------------- 19.2/19.2 MB 2.6 MB/s 0:00:07\n",
" Installing build dependencies: started\n",
" Installing build dependencies: finished with status 'done'\n",
" Getting requirements to build wheel: started\n",
" Getting requirements to build wheel: finished with status 'done'\n",
" Preparing metadata (pyproject.toml): started\n",
" Preparing metadata (pyproject.toml): finished with status 'done'\n",
"Requirement already satisfied: numpy>=1.26.0 in .\\.venv\\Lib\\site-packages (from pandas) (2.4.4)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in .\\.venv\\Lib\\site-packages (from pandas) (2.9.0.post0)\n",
"Requirement already satisfied: tzdata in .\\.venv\\Lib\\site-packages (from pandas) (2026.2)\n",
"Requirement already satisfied: six>=1.5 in .\\.venv\\Lib\\site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n",
"Building wheels for collected packages: jieba\n",
" Building wheel for jieba (pyproject.toml): started\n",
" Building wheel for jieba (pyproject.toml): finished with status 'done'\n",
" Created wheel for jieba: filename=jieba-0.42.1-py3-none-any.whl size=19314527 sha256=9c6c0b82134284c2022dd7832428b0c1355cf813225c5e3869e51e23ece4666b\n",
" Stored in directory: c:\\users\\administrator\\appdata\\local\\pip\\cache\\wheels\\ac\\60\\cf\\538a1f183409caf1fc136b5d2c2dee329001ef6da2c5084bef\n",
"Successfully built jieba\n",
"Installing collected packages: jieba\n",
"Successfully installed jieba-0.42.1\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"[notice] A new release of pip is available: 26.0.1 -> 26.1\n",
"[notice] To update, run: python.exe -m pip install --upgrade pip\n"
]
}
],
"execution_count": 9
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2026-04-27T13:05:18.748847300Z",
"start_time": "2026-04-27T13:05:18.691833Z"
}
},
"cell_type": "code",
"source": [
"import pandas as pd\n",
"import ran\n",
"\n",
"df = pd.read_csv(\"task-a-zh.tsv\", sep=\"\\t\", dtype=str)\n",
"COL_ID = \"id\"\n",
"COL_WORD1 = \"word1\"\n",
"COL_WORD2 = \"word2\"\n",
"COL_HEADLINE = \"headline\"\n",
"COL_RESULT = \"rule_based_joke\"\n",
"\n",
"WORD_TEMPLATES = [\n",
" \"千万不要用{w1}去碰{w2},不然你会收获一个这辈子都忘不掉的名场面。\",\n",
" \"我试着用{w1}处理{w2},结果场面一度失控到我想原地找个地缝钻进去。\",\n",
" \"谁能想到,用{w1}和{w2}组合在一起,居然能搞出这么离谱的事情。\",\n",
" \"朋友让我用{w1}去打理{w2},我做完之后他直接和我绝交了。\",\n",
" \"你知道用{w1}对{w2}做什么最可怕吗?是完全不受控制的意外。\",\n",
" \"第一次用{w1}操作{w2},直接给我整出了这辈子都不想再经历的社死现场。\"\n",
"]\n",
"\n",
"NEWS_TEMPLATES = [\n",
" \"看完【{news}】这则新闻,我只能说这波操作属实是我万万没想到的。\",\n",
" \"本来以为【{news}】是个常规消息,结果看完细节直接给我整笑了。\",\n",
" \"【{news}】,这不就是现实版的大型反转现场吗?主打一个意想不到。\",\n",
" \"刷到【{news}】的新闻,我和朋友讨论了半天,只能说格局真的太大了。\",\n",
" \"看到【{news}】这则消息,我只能说高手过招,招招都在意料之外。\",\n",
" \"【{news}】,只能说现在的新闻真的比电视剧还精彩。\"\n",
"]\n",
"\n",
"def generate_rule_joke(row):\n",
" word1 = str(row[COL_WORD1]).strip()\n",
" word2 = str(row[COL_WORD2]).strip()\n",
" headline = str(row[COL_HEADLINE]).strip()\n",
"\n",
" if headline == \"-\" and word1 != \"-\" and word2 != \"-\":\n",
" template = random.choice(WORD_TEMPLATES)\n",
" return template.format(w1=word1, w2=word2)\n",
"\n",
" elif word1 == \"-\" and word2 == \"-\" and headline != \"-\":\n",
" template = random.choice(NEWS_TEMPLATES)\n",
" return template.format(news=headline)\n",
"\n",
" else:\n",
" return \"生成失败,数据格式异常\"\n",
"\n",
"df = df[(df.index >= 275) | (df.index.isin(range(0, 20)))]\n",
"\n",
"print(\"开始批量生成规则基线结果...\")\n",
"df[COL_RESULT] = df.apply(generate_rule_joke, axis=1)\n",
"\n",
"df.to_csv(\"task_a_zh_rule_baseline_full.tsv\", sep=\"\\t\", index=False)\n",
"print(\"✅ 生成完成!结果已保存为 task_a_zh_rule_baseline_full.tsv\")"
],
"id": "38820c6edd5cde51",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"开始批量生成规则基线结果...\n",
"✅ 生成完成!结果已保存为 task_a_zh_rule_baseline_full.tsv\n"
]
}
],
"execution_count": 10
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2026-04-27T13:54:55.240350200Z",
"start_time": "2026-04-27T13:54:54.187588500Z"
}
},
"cell_type": "code",
"source": [
"import jieba\n",
"from collections import defaultdict, Counter\n",
"import random\n",
"import pandas as pd\n",
"\n",
"df = pd.read_csv(\"task-a-zh.tsv\", sep=\"\\t\", dtype=str)\n",
"# 固定列名\n",
"COL_ID = \"id\"\n",
"COL_W1 = \"word1\"\n",
"COL_W2 = \"word2\"\n",
"COL_HEAD = \"headline\"\n",
"COL_OUT = \"ngram_based_joke\"\n",
"\n",
"\n",
"CORPUS_WORD = [\n",
" \"用喷洒去碰键盘,结果场面一度失控。\",\n",
" \"用摇晃处理椅子,朋友看完直接笑了。\",\n",
" \"用移动打理毛巾,结果闹出了大笑话。\",\n",
" \"用钻操作笔记本电脑,直接翻车了。\",\n",
" \"用喷洒处理香蕉,结果越弄越糟。\",\n",
" \"用滚动处理香蕉,结果滚到了沟里。\",\n",
" \"用去籽处理胡椒,结果辣到了自己。\",\n",
" \"用滚动处理衬衫,结果越弄越皱。\",\n",
" \"用喷洒处理笔记本电脑,直接报废了。\",\n",
" \"用混合处理玉米,结果味道很奇怪。\",\n",
" \"用切割处理椅子,结果直接散架了。\",\n",
" \"用混合处理鞋子,结果变成了限量款。\",\n",
" \"用测量处理头发,结果发现掉了一半。\",\n",
" \"用锤击处理花朵,结果花盆碎了一地。\",\n",
" \"用冲洗处理自行车,结果链条掉了。\",\n",
" \"用摇晃处理衣服,结果纸屑撒了一地。\",\n",
" \"用钻处理番茄,结果汁水喷了一身。\",\n",
" \"用喷洒处理冰箱,结果食材全串味了。\",\n",
" \"用钻井处理南瓜,结果南瓜裂开了。\",\n",
" \"用钻处理书本,结果书被钻烂了。\",\n",
" \"用洗涤处理笔记本电脑,结果短路了。\",\n",
" \"用测量处理鸡蛋,结果手滑摔碎了。\",\n",
" \"用锤击处理笔记本电脑,结果成了废品。\",\n",
" \"用测量处理冰箱,结果塞不进厨房。\",\n",
" \"用喷洒处理南瓜,结果外皮烂掉了。\",\n",
" \"千万不要用喷洒碰键盘,不然会出大事。\",\n",
" \"谁能想到用摇晃处理椅子,会这么搞笑。\",\n",
" \"用移动打理毛巾,我直接社死了。\",\n",
" \"用钻碰笔记本电脑,我人都傻了。\",\n",
" \"用喷洒碰香蕉,结果笑料百出。\",\n",
" \"用滚动碰香蕉,结果尴尬到抠脚。\",\n",
" \"用去籽处理胡椒,我打了一下午喷嚏。\",\n",
" \"用滚动处理衬衫,熨了半小时都没用。\",\n",
" \"用喷洒碰笔记本电脑,数据全没了。\",\n",
" \"用混合处理玉米,辣到我灵魂出窍。\",\n",
" \"用切割处理椅子,摔了个屁股墩。\",\n",
" \"用混合处理鞋子,朋友笑了一整天。\",\n",
" \"用测量处理头发我当场就emo了。\",\n",
" \"用锤击处理花朵,我妈追着我打。\",\n",
" \"用冲洗处理自行车,推了三公里回家。\",\n",
" \"用摇晃处理衣服,捡了半小时硬币。\",\n",
" \"用钻处理番茄,汁溅了一墙。\",\n",
" \"用喷洒处理冰箱,菜全烂了。\",\n",
" \"用钻井处理南瓜,籽撒了一地。\",\n",
" \"用钻处理书本,被老师骂了一顿。\",\n",
" \"用洗涤处理电脑,作业全没了。\",\n",
" \"用测量处理鸡蛋,蛋黄流了一地。\",\n",
" \"用锤击处理电脑,游戏全没了。\",\n",
" \"用测量处理冰箱,我当场自闭了。\",\n",
" \"用喷洒处理南瓜,晚饭都没了。\"\n",
"]\n",
"\n",
"CORPUS_NEWS = [\n",
" \"这则新闻,真的比电视剧还精彩。\",\n",
" \"看完这则新闻,我直接笑出声了。\",\n",
" \"这则新闻的走向,完全超出我的预期。\",\n",
" \"看完这则新闻,我只能说太离谱了。\",\n",
" \"这则新闻,真的越看越有意思。\",\n",
" \"看完这则新闻,我大开眼界了。\",\n",
" \"这则新闻,现实比剧本还魔幻。\",\n",
" \"看完这则新闻,我直接愣住了。\",\n",
" \"这则新闻,真的太有戏剧性了。\",\n",
" \"看完这则新闻,我蚌埠住了。\",\n",
" \"这则新闻,反转来的猝不及防。\",\n",
" \"看完这则新闻,我只能说太会玩了。\",\n",
" \"这则新闻,真的刷新了我的认知。\",\n",
" \"看完这则新闻,我服了。\",\n",
" \"这则新闻,高手在民间啊。\",\n",
" \"看完这则新闻,我笑到肚子疼。\",\n",
" \"这则新闻,真的万万没想到。\",\n",
" \"看完这则新闻,我直呼内行。\",\n",
" \"这则新闻,真的太秀了。\",\n",
" \"看完这则新闻,我人麻了。\",\n",
" \"这则新闻,真的比过山车还刺激。\",\n",
" \"看完这则新闻,我直接笑不活了。\",\n",
" \"这则新闻,真的太有梗了。\",\n",
" \"看完这则新闻,我直呼离谱。\",\n",
" \"这则新闻,真的太绝了。\"\n",
"]\n",
"\n",
"class StandardNGram:\n",
" def __init__(self, n=2):\n",
" self.n = n\n",
" self.ngram_counts = defaultdict(Counter)\n",
" self.context_total = defaultdict(int)\n",
" self.vocab = set()\n",
" self.START = \"<s>\"\n",
" self.END = \"</s>\"\n",
"\n",
" def train(self, corpus):\n",
" for sentence in corpus:\n",
" words = list(jieba.cut(sentence.strip()))\n",
" words = [self.START] + words + [self.END]\n",
" self.vocab.update(words)\n",
" for i in range(len(words) - self.n + 1):\n",
" context = tuple(words[i:i+self.n-1])\n",
" target = words[i+self.n-1]\n",
"\n",
" self.ngram_counts[context][target] += 1\n",
" self.context_total[context] += 1\n",
"\n",
" def get_smoothed_prob(self, context, word):\n",
" count_cw = self.ngram_counts[context][word] + 1\n",
" count_c = self.context_total[context] + len(self.vocab)\n",
" return count_cw / count_c\n",
"\n",
" def sample_next_word(self, context, top_k=3, repeat_punish=None):\n",
" if repeat_punish is None:\n",
" repeat_punish = set()\n",
"\n",
" if context not in self.ngram_counts:\n",
" return self.END\n",
"\n",
"\n",
" word_probs = []\n",
" for word in self.ngram_counts[context]:\n",
" punish = 0.1 if word in repeat_punish else 1.0\n",
" prob = self.get_smoothed_prob(context, word) * punish\n",
" word_probs.append((word, prob))\n",
"\n",
" word_probs.sort(key=lambda x: x[1], reverse=True)\n",
" top_candidates = word_probs[:top_k]\n",
" words, probs = zip(*top_candidates)\n",
"\n",
" return random.choices(words, weights=probs, k=1)[0]\n",
"\n",
" def generate(self, start_keyword, max_len=22):\n",
" generated = [self.START, start_keyword]\n",
" repeat_punish = set([start_keyword])\n",
" current_context = tuple(generated[-1:])\n",
"\n",
" for _ in range(max_len):\n",
" next_word = self.sample_next_word(current_context, repeat_punish=repeat_punish)\n",
" if next_word == self.END:\n",
" break\n",
" generated.append(next_word)\n",
" repeat_punish.add(next_word)\n",
" current_context = tuple(generated[-1:])\n",
"\n",
" final_sentence = \"\".join([w for w in generated if w != self.START])\n",
" if len(final_sentence) < 6:\n",
" final_sentence = f\"{start_keyword}这事儿,真的太有意思了。\"\n",
" return final_sentence\n",
"\n",
"print(\"正在训练词汇任务2-gram模型...\")\n",
"model_word = StandardNGram(n=2)\n",
"model_word.train(CORPUS_WORD)\n",
"\n",
"print(\"正在训练新闻任务2-gram模型...\")\n",
"model_news = StandardNGram(n=2)\n",
"model_news.train(CORPUS_NEWS)\n",
"print(\"模型训练完成!\")\n",
"\n",
"def get_task_type(row):\n",
" w1 = str(row[COL_W1]).strip()\n",
" w2 = str(row[COL_W2]).strip()\n",
" head = str(row[COL_HEAD]).strip()\n",
"\n",
" if head == \"-\" and w1 != \"-\" and w2 != \"-\" and len(w1) > 0 and len(w2) > 0:\n",
" return \"WORD\", [w1, w2]\n",
" elif w1 == \"-\" and w2 == \"-\" and head != \"-\" and len(head) > 5:\n",
" return \"NEWS\", head\n",
" else:\n",
" return \"ERROR\", None\n",
"\n",
"def extract_news_keyword(headline):\n",
" stop_words = set([\"\", \"。\", \"\", \"\", \"、\", \"\", \"\", \"「\", \"」\", \"【\", \"】\", \"《\", \"》\", \"#\", \"\", \"/\", \" \", \".\", \"·\", \"0\", \"1\", \"2\", \"3\", \"4\", \"5\", \"6\", \"7\", \"8\", \"9\"])\n",
" try:\n",
" words = list(jieba.cut(headline))\n",
" valid_keywords = [w for w in words if len(w) >= 2 and w not in stop_words]\n",
" return valid_keywords[0] if len(valid_keywords) > 0 else \"这则新闻\"\n",
" except:\n",
" return \"这则新闻\"\n",
"\n",
"def generate_final_joke(row):\n",
" task_type, task_data = get_task_type(row)\n",
" if task_type == \"WORD\":\n",
" w1, w2 = task_data\n",
" start_word = random.choice([w1, w2])\n",
" base_sentence = model_word.generate(start_word)\n",
" if w1 not in base_sentence:\n",
" base_sentence = f\"{w1}和{base_sentence}\"\n",
" if w2 not in base_sentence:\n",
" base_sentence = f\"{base_sentence},还扯上了{w2}\"\n",
" return base_sentence\n",
" elif task_type == \"NEWS\":\n",
" headline = task_data\n",
" keyword = extract_news_keyword(headline)\n",
" return model_news.generate(keyword)\n",
" else:\n",
" return \"这事儿真的太有意思了。\"\n",
"\n",
"print(\"开始批量生成最终版2-gram基线结果...\")\n",
"df[COL_OUT] = df.apply(generate_final_joke, axis=1)\n",
"# 保存结果\n",
"df.to_csv(\"task_a_zh_ngram_final_standard.tsv\", sep=\"\\t\", index=False)\n",
"print(\"✅ 生成完成!结果已保存为 task_a_zh_ngram_final_standard.tsv\")"
],
"id": "6119c1b7f47da649",
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Building prefix dict from the default dictionary ...\n",
"Loading model from cache C:\\Users\\ADMINI~1\\AppData\\Local\\Temp\\jieba.cache\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"正在训练词汇任务2-gram模型...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Loading model cost 0.882 seconds.\n",
"Prefix dict has been built successfully.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"正在训练新闻任务2-gram模型...\n",
"模型训练完成!\n",
"开始批量生成最终版2-gram基线结果...\n",
"✅ 生成完成!结果已保存为 task_a_zh_ngram_final_standard.tsv\n"
]
}
],
"execution_count": 2
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2026-04-27T14:30:25.028415300Z",
"start_time": "2026-04-27T14:30:22.452443600Z"
}
},
"cell_type": "code",
"source": "!pip install pandas requests tqdm",
"id": "cb4a5e4932e1e3c8",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: pandas in .\\.venv\\Lib\\site-packages (3.0.2)\n",
"Requirement already satisfied: requests in .\\.venv\\Lib\\site-packages (2.33.1)\n",
"Requirement already satisfied: tqdm in .\\.venv\\Lib\\site-packages (4.67.3)\n",
"Requirement already satisfied: numpy>=1.26.0 in .\\.venv\\Lib\\site-packages (from pandas) (2.4.4)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in .\\.venv\\Lib\\site-packages (from pandas) (2.9.0.post0)\n",
"Requirement already satisfied: tzdata in .\\.venv\\Lib\\site-packages (from pandas) (2026.2)\n",
"Requirement already satisfied: charset_normalizer<4,>=2 in .\\.venv\\Lib\\site-packages (from requests) (3.4.7)\n",
"Requirement already satisfied: idna<4,>=2.5 in .\\.venv\\Lib\\site-packages (from requests) (3.13)\n",
"Requirement already satisfied: urllib3<3,>=1.26 in .\\.venv\\Lib\\site-packages (from requests) (2.6.3)\n",
"Requirement already satisfied: certifi>=2023.5.7 in .\\.venv\\Lib\\site-packages (from requests) (2026.4.22)\n",
"Requirement already satisfied: colorama in .\\.venv\\Lib\\site-packages (from tqdm) (0.4.6)\n",
"Requirement already satisfied: six>=1.5 in .\\.venv\\Lib\\site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"[notice] A new release of pip is available: 26.0.1 -> 26.1\n",
"[notice] To update, run: python.exe -m pip install --upgrade pip\n"
]
}
],
"execution_count": 6
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2026-04-27T14:44:00.369207900Z",
"start_time": "2026-04-27T14:36:24.437356600Z"
}
},
"cell_type": "code",
"source": [
"import pandas as pd\n",
"import requests\n",
"from tqdm import tqdm\n",
"import time\n",
"\n",
"API_KEY = \"sk-b6a990cb2dc94a10947401b3589172f8\"\n",
"API_URL = \"https://api.deepseek.com/v1/chat/completions\"\n",
"INPUT_FILE = \"task-a-zh.tsv\"\n",
"OUTPUT_FILE = \"task_a_zh_deepseek_baseline.tsv\"\n",
"MODEL_NAME = \"deepseek-chat\"\n",
"MAX_RETRY = 3\n",
"GENERATION_CONFIG = {\n",
" \"temperature\": 0.7,\n",
" \"top_p\": 0.9,\n",
" \"max_tokens\": 60\n",
"}\n",
"\n",
"PROMPT_WORD = \"\"\"\n",
"你是一个擅长生成中文幽默笑话的专家。\n",
"任务:生成一句简短、自然、有幽默感的中文笑话,必须严格同时包含「{w1}」和「{w2}」两个词。\n",
"要求:\n",
"1. 句子自然通顺,有网感、趣味性,不能生硬堆砌词语\n",
"2. 长度控制在 10-30 个字之间\n",
"3. 只输出笑话本身,不要任何额外解释、前缀后缀\n",
"\"\"\"\n",
"\n",
"PROMPT_NEWS = \"\"\"\n",
"你是一个擅长针对新闻标题生成幽默吐槽评论的专家。\n",
"任务:针对下面的新闻标题,生成一句简短、贴合主题、有幽默感/吐槽感的中文评论。\n",
"新闻标题:{headline}\n",
"要求:\n",
"1. 评论和新闻内容强相关,有网感、吐槽感,不能跑题\n",
"2. 长度控制在 10-30 个字之间\n",
"3. 只输出评论本身,不要任何额外解释、前缀后缀\n",
"\"\"\"\n",
"\n",
"def deepseek_generate(prompt):\n",
" retry_count = 0\n",
" headers = {\n",
" \"Content-Type\": \"application/json\",\n",
" \"Authorization\": f\"Bearer {API_KEY}\"\n",
" }\n",
" payload = {\n",
" \"model\": MODEL_NAME,\n",
" \"messages\": [{\"role\": \"user\", \"content\": prompt}],\n",
" **GENERATION_CONFIG\n",
" }\n",
"\n",
" while retry_count < MAX_RETRY:\n",
" try:\n",
" response = requests.post(API_URL, headers=headers, json=payload, timeout=30)\n",
" if response.status_code == 200:\n",
" result = response.json()[\"choices\"][0][\"message\"][\"content\"].strip()\n",
" result = result.replace(\"\\n\", \"\").replace('\"', \"\").replace(\"'\", \"\")\n",
" return result\n",
" else:\n",
" print(f\"API 调用失败,状态码:{response.status_code},重试中...\")\n",
" retry_count += 1\n",
" time.sleep(2)\n",
" except Exception as e:\n",
" print(f\"调用异常:{e},重试中...\")\n",
" retry_count += 1\n",
" time.sleep(2)\n",
" return \"这事儿真的太有梗了,我直接蚌埠住了。\"\n",
"\n",
"def generate_joke(row):\n",
" w1 = str(row[\"word1\"]).strip()\n",
" w2 = str(row[\"word2\"]).strip()\n",
" headline = str(row[\"headline\"]).strip()\n",
"\n",
" if headline == \"-\" and w1 != \"-\" and w2 != \"-\":\n",
" prompt = PROMPT_WORD.format(w1=w1, w2=w2)\n",
" return deepseek_generate(prompt)\n",
" elif w1 == \"-\" and w2 == \"-\" and headline != \"-\":\n",
" prompt = PROMPT_NEWS.format(headline=headline)\n",
" return deepseek_generate(prompt)\n",
" else:\n",
" return \"这内容也太有意思了吧。\"\n",
"\n",
"if __name__ == \"__main__\":\n",
" print(f\"正在读取数据集:{INPUT_FILE}\")\n",
" df = pd.read_csv(INPUT_FILE, sep=\"\\t\", dtype=str)\n",
"\n",
" print(\"开始批量生成 DeepSeek 幽默内容...\")\n",
" tqdm.pandas(desc=\"生成进度\")\n",
" df[\"deepseek_based_joke\"] = df.progress_apply(generate_joke, axis=1)\n",
"\n",
" df.to_csv(OUTPUT_FILE, sep=\"\\t\", index=False)\n",
" print(f\"✅ 生成完成!结果已保存到:{OUTPUT_FILE}\")"
],
"id": "3be6a2ca9dee1b34",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"正在读取数据集task-a-zh.tsv\n",
"开始批量生成 DeepSeek 幽默内容...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"生成进度: 100%|██████████| 300/300 [07:35<00:00, 1.52s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"✅ 生成完成结果已保存到task_a_zh_deepseek_baseline.tsv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"execution_count": 8
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2026-05-05T14:53:41.636014Z"
}
},
"cell_type": "code",
"source": [
"import pandas as pd\n",
"import requests\n",
"from tqdm import tqdm\n",
"import time\n",
"\n",
"API_KEY = \"sk-b6a990cb2dc94a10947401b3589172f8\"\n",
"API_URL = \"https://api.deepseek.com/v1/chat/completions\"\n",
"MODEL_NAME = \"deepseek-chat\"\n",
"MAX_RETRY = 3\n",
"GENERATION_CONFIG = {\n",
" \"temperature\": 0.2,\n",
" \"top_p\": 0.8,\n",
" \"max_tokens\": 150,\n",
" \"seed\": 42\n",
"}\n",
"\n",
"FILES = {\n",
" \"规则基线\": \"task_a_zh_rule_baseline_full.tsv\",\n",
" \"N-gram基线\": \"task_a_zh_ngram_final_standard.tsv\",\n",
" \"DeepSeek大模型\": \"task_a_zh_deepseek_baseline.tsv\"\n",
"}\n",
"\n",
"RESULT_COLS = {\n",
" \"规则基线\": \"rule_based_joke\",\n",
" \"N-gram基线\": \"ngram_based_joke\",\n",
" \"DeepSeek大模型\": \"deepseek_based_joke\"\n",
"}\n",
"\n",
"PROMPT_WORD = \"\"\"\n",
"你是一个专业的NLP任务评估专家负责评估中文幽默生成结果的质量。\n",
"本次评估对象是词汇包含任务的幽默生成结果,任务要求:生成内容必须同时包含「{w1}」和「{w2}」两个词,且有幽默感、通顺自然。\n",
"请你从以下5个维度对生成结果进行1-5分打分1分最差5分最优并给出1句简短质化点评10-20字点评需贴合打分结果。\n",
"评估维度及打分标准:\n",
"1. 任务约束满足度30%):是否同时包含「{w1}」和「{w2}」无遗漏。5分=完全满足3分=基本满足1分=不满足。\n",
"2. 幽默度30%是否有幽默感、有梗无生硬感。5分=幽默自然3分=轻微幽默1分=无幽默。\n",
"3. 语义通顺度20%是否通顺、无语法错误、无逻辑断裂。5分=完全通顺3分=基本通顺1分=不通顺。\n",
"4. 生成多样性10%结合同类任务该结果是否无明显重复句式。5分=多样性高3分=一般1分=重复。\n",
"5. 语言自然度10%是否自然、无机械模板感。5分=自然3分=基本自然1分=机械。\n",
"\n",
"请严格按照以下格式输出,不要添加任何额外内容,格式:\n",
"约束满足度X分幽默度X分通顺度X分多样性X分自然度X分点评XXX\n",
"生成结果:{generated_text}\n",
"词汇要求:{w1}、{w2}\n",
"\"\"\"\n",
"\n",
"PROMPT_NEWS = \"\"\"\n",
"你是一个专业的NLP任务评估专家负责评估中文幽默生成结果的质量。\n",
"本次评估对象是新闻标题幽默评论生成结果,任务要求:生成内容需贴合原新闻标题,有幽默感、通顺自然。\n",
"请你从以下5个维度对生成结果进行1-5分打分1分最差5分最优并给出1句简短质化点评10-20字点评需贴合打分结果。\n",
"评估维度及打分标准:\n",
"1. 任务约束满足度30%是否贴合原新闻标题无跑题。5分=完全贴合3分=基本贴合1分=跑题。\n",
"2. 幽默度30%是否有幽默感、吐槽感无生硬感。5分=幽默自然3分=轻微幽默1分=无幽默。\n",
"3. 语义通顺度20%是否通顺、无语法错误、无逻辑断裂。5分=完全通顺3分=基本通顺1分=不通顺。\n",
"4. 生成多样性10%结合同类任务该结果是否无明显重复句式。5分=多样性高3分=一般1分=重复。\n",
"5. 语言自然度10%是否自然、无机械模板感。5分=自然3分=基本自然1分=机械。\n",
"\n",
"请严格按照以下格式输出,不要添加任何额外内容,格式:\n",
"约束满足度X分幽默度X分通顺度X分多样性X分自然度X分点评XXX\n",
"生成结果:{generated_text}\n",
"原新闻标题:{headline}\n",
"\"\"\"\n",
"\n",
"def deepseek_evaluate(prompt):\n",
" retry_count = 0\n",
" headers = {\n",
" \"Content-Type\": \"application/json\",\n",
" \"Authorization\": f\"Bearer {API_KEY}\"\n",
" }\n",
" payload = {\n",
" \"model\": MODEL_NAME,\n",
" \"messages\": [{\"role\": \"user\", \"content\": prompt}],\n",
" **GENERATION_CONFIG\n",
" }\n",
"\n",
" while retry_count < MAX_RETRY:\n",
" try:\n",
" response = requests.post(API_URL, headers=headers, json=payload, timeout=30)\n",
" if response.status_code == 200:\n",
" result = response.json()[\"choices\"][0][\"message\"][\"content\"].strip()\n",
" return result\n",
" else:\n",
" print(f\"API调用失败状态码{response.status_code},重试中...\")\n",
" retry_count += 1\n",
" time.sleep(2)\n",
" except Exception as e:\n",
" print(f\"调用异常:{e},重试中...\")\n",
" retry_count += 1\n",
" time.sleep(2)\n",
" return \"约束满足度3分幽默度3分通顺度3分多样性3分自然度3分点评评估失败默认打分\"\n",
"\n",
"def parse_evaluation_result(eval_text):\n",
" try:\n",
"\n",
" constraint_score = int(eval_text.split(\"约束满足度:\")[1].split(\"分\")[0])\n",
" humor_score = int(eval_text.split(\"幽默度:\")[1].split(\"分\")[0])\n",
" fluency_score = int(eval_text.split(\"通顺度:\")[1].split(\"分\")[0])\n",
" diversity_score = int(eval_text.split(\"多样性:\")[1].split(\"分\")[0])\n",
" natural_score = int(eval_text.split(\"自然度:\")[1].split(\"分\")[0])\n",
" comment = eval_text.split(\"点评:\")[1].strip()\n",
" return constraint_score, humor_score, fluency_score, diversity_score, natural_score, comment\n",
" except:\n",
" return 3, 3, 3, 3, 3, \"解析失败,默认点评\"\n",
"\n",
"def evaluate_single_file(file_path, result_col, method_name):\n",
" print(f\"\\n开始评估【{method_name}】,文件路径:{file_path}\")\n",
" df = pd.read_csv(file_path, sep=\"\\t\", dtype=str)\n",
"\n",
" df[\"约束满足度\"] = 0\n",
" df[\"幽默度\"] = 0\n",
" df[\"通顺度\"] = 0\n",
" df[\"多样性\"] = 0\n",
" df[\"自然度\"] = 0\n",
" df[\"评估点评\"] = \"\"\n",
"\n",
" tqdm.pandas(desc=f\"{method_name} 评估进度\")\n",
" for idx, row in tqdm(df.iterrows(), total=len(df)):\n",
" w1 = str(row[\"word1\"]).strip()\n",
" w2 = str(row[\"word2\"]).strip()\n",
" headline = str(row[\"headline\"]).strip()\n",
" generated_text = str(row[result_col]).strip()\n",
"\n",
" if headline == \"-\" and w1 != \"-\" and w2 != \"-\":\n",
" prompt = PROMPT_WORD.format(w1=w1, w2=w2, generated_text=generated_text)\n",
" elif w1 == \"-\" and w2 == \"-\" and headline != \"-\":\n",
" prompt = PROMPT_NEWS.format(headline=headline, generated_text=generated_text)\n",
" else:\n",
" df.loc[idx, [\"约束满足度\", \"幽默度\", \"通顺度\", \"多样性\", \"自然度\"]] = [3, 3, 3, 3, 3]\n",
" df.loc[idx, \"评估点评\"] = \"异常数据,默认点评\"\n",
" continue\n",
"\n",
" eval_result = deepseek_evaluate(prompt)\n",
" constraint, humor, fluency, diversity, natural, comment = parse_evaluation_result(eval_result)\n",
" df.loc[idx, \"约束满足度\"] = constraint\n",
" df.loc[idx, \"幽默度\"] = humor\n",
" df.loc[idx, \"通顺度\"] = fluency\n",
" df.loc[idx, \"多样性\"] = diversity\n",
" df.loc[idx, \"自然度\"] = natural\n",
" df.loc[idx, \"评估点评\"] = comment\n",
"\n",
" df[\"加权总分\"] = (df[\"约束满足度\"] * 0.3 +\n",
" df[\"幽默度\"] * 0.3 +\n",
" df[\"通顺度\"] * 0.2 +\n",
" df[\"多样性\"] * 0.1 +\n",
" df[\"自然度\"] * 0.1).round(2)\n",
"\n",
" eval_output_path = f\"{method_name}_评估结果.tsv\"\n",
" df.to_csv(eval_output_path, sep=\"\\t\", index=False)\n",
" print(f\"【{method_name}】评估完成,结果保存至:{eval_output_path}\")\n",
"\n",
" summary = {\n",
" \"方法名称\": method_name,\n",
" \"总样本数\": len(df),\n",
" \"约束满足度平均分\": round(df[\"约束满足度\"].mean(), 2),\n",
" \"幽默度平均分\": round(df[\"幽默度\"].mean(), 2),\n",
" \"通顺度平均分\": round(df[\"通顺度\"].mean(), 2),\n",
" \"多样性平均分\": round(df[\"多样性\"].mean(), 2),\n",
" \"自然度平均分\": round(df[\"自然度\"].mean(), 2),\n",
" \"加权总分平均分\": round(df[\"加权总分\"].mean(), 2)\n",
" }\n",
" return df, summary\n",
"\n",
"\n",
"def batch_evaluate_all():\n",
" all_evaluated_dfs = {}\n",
" all_summaries = []\n",
"\n",
" for method_name, file_path in FILES.items():\n",
" evaluated_df, summary = evaluate_single_file(file_path, RESULT_COLS[method_name], method_name)\n",
" all_evaluated_dfs[method_name] = evaluated_df\n",
" all_summaries.append(summary)\n",
"\n",
" summary_df = pd.DataFrame(all_summaries)\n",
"\n",
" summary_df = summary_df.sort_values(by=\"加权总分平均分\", ascending=False).reset_index(drop=True)\n",
"\n",
" summary_report_path = \"三种方法评估汇总报告.tsv\"\n",
" summary_df.to_csv(summary_report_path, sep=\"\\t\", index=False)\n",
" print(f\"\\n✅ 所有方法评估完成!汇总报告保存至:{summary_report_path}\")\n",
"\n",
" print(\"\\n=== 三种方法评估汇总(按加权总分排序)===\")\n",
" print(summary_df.to_string(index=False))\n",
"\n",
" return all_evaluated_dfs, summary_df\n",
"\n",
"if __name__ == \"__main__\":\n",
" print(\"开始执行三种方法的批量评估基于DeepSeek-V4-Pro\")\n",
" print(f\"评估样本数每个方法约300条预计耗时15-20分钟请耐心等待...\")\n",
" all_evaluated_dfs, summary_df = batch_evaluate_all()\n"
],
"id": "597c96096f5008b4",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"开始执行三种方法的批量评估基于DeepSeek-V4-Pro\n",
"评估样本数每个方法约300条预计耗时15-20分钟请耐心等待...\n",
"\n",
"开始评估【规则基线】文件路径task_a_zh_rule_baseline_full.tsv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 45/45 [01:38<00:00, 2.20s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"【规则基线】评估完成结果保存至规则基线_评估结果.tsv\n",
"\n",
"开始评估【N-gram基线】文件路径task_a_zh_ngram_final_standard.tsv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 300/300 [10:16<00:00, 2.06s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"【N-gram基线】评估完成结果保存至N-gram基线_评估结果.tsv\n",
"\n",
"开始评估【DeepSeek大模型】文件路径task_a_zh_deepseek_baseline.tsv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 40%|████ | 120/300 [04:08<05:49, 1.94s/it]"
]
}
],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"outputs": [],
"execution_count": null,
"source": "",
"id": "9ae712f5206f6482"
},
{
"metadata": {},
"cell_type": "code",
"outputs": [],
"execution_count": null,
"source": "",
"id": "b06c46d127418850"
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}