From 779ac3333f81920d448fb3ed68e0dbc093ed445d Mon Sep 17 00:00:00 2001 From: pjtocqbk6 <1873401205@qq.com> Date: Tue, 5 May 2026 23:10:21 +0800 Subject: [PATCH] ADD file via upload --- task1.ipynb | 887 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 887 insertions(+) create mode 100644 task1.ipynb diff --git a/task1.ipynb b/task1.ipynb new file mode 100644 index 0000000..f4afce3 --- /dev/null +++ b/task1.ipynb @@ -0,0 +1,887 @@ +{ + "cells": [ + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-04-27T13:04:33.988621800Z", + "start_time": "2026-04-27T13:04:10.669090800Z" + } + }, + "cell_type": "code", + "source": "!pip install pandas jieba", + "id": "6e2e7982cb88a612", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: pandas in .\\.venv\\Lib\\site-packages (3.0.2)\n", + "Collecting jieba\n", + " Downloading jieba-0.42.1.tar.gz (19.2 MB)\n", + " ---------------------------------------- 0.0/19.2 MB ? eta -:--:--\n", + " ---------------------------------------- 0.0/19.2 MB ? eta -:--:--\n", + " -- ------------------------------------- 1.0/19.2 MB 5.0 MB/s eta 0:00:04\n", + " ----- ---------------------------------- 2.6/19.2 MB 6.6 MB/s eta 0:00:03\n", + " -------- ------------------------------- 3.9/19.2 MB 6.2 MB/s eta 0:00:03\n", + " ---------- ----------------------------- 5.0/19.2 MB 6.0 MB/s eta 0:00:03\n", + " ------------ --------------------------- 5.8/19.2 MB 5.5 MB/s eta 0:00:03\n", + " ------------- -------------------------- 6.6/19.2 MB 5.2 MB/s eta 0:00:03\n", + " --------------- ------------------------ 7.3/19.2 MB 5.1 MB/s eta 0:00:03\n", + " ----------------- ---------------------- 8.4/19.2 MB 4.9 MB/s eta 0:00:03\n", + " ------------------- -------------------- 9.2/19.2 MB 4.8 MB/s eta 0:00:03\n", + " -------------------- ------------------- 10.0/19.2 MB 4.6 MB/s eta 0:00:02\n", + " ---------------------- ----------------- 10.7/19.2 MB 4.6 MB/s eta 0:00:02\n", + " ------------------------ --------------- 11.5/19.2 MB 4.5 MB/s eta 0:00:02\n", + " ------------------------- -------------- 12.3/19.2 MB 4.5 MB/s eta 0:00:02\n", + " --------------------------- ------------ 13.1/19.2 MB 4.5 MB/s eta 0:00:02\n", + " ---------------------------- ----------- 13.6/19.2 MB 4.5 MB/s eta 0:00:02\n", + " ------------------------------ --------- 14.7/19.2 MB 4.3 MB/s eta 0:00:02\n", + " ------------------------------- -------- 15.2/19.2 MB 4.2 MB/s eta 0:00:01\n", + " -------------------------------- ------- 15.5/19.2 MB 4.1 MB/s eta 0:00:01\n", + " --------------------------------- ------ 16.0/19.2 MB 4.0 MB/s eta 0:00:01\n", + " --------------------------------- ------ 16.3/19.2 MB 3.8 MB/s eta 0:00:01\n", + " ---------------------------------- ----- 16.5/19.2 MB 3.7 MB/s eta 0:00:01\n", + " ---------------------------------- ----- 16.5/19.2 MB 3.7 MB/s eta 0:00:01\n", + " ---------------------------------- ----- 16.8/19.2 MB 3.5 MB/s eta 0:00:01\n", + " ----------------------------------- ---- 17.0/19.2 MB 3.3 MB/s eta 0:00:01\n", + " ----------------------------------- ---- 17.0/19.2 MB 3.3 MB/s eta 0:00:01\n", + " ------------------------------------ --- 17.3/19.2 MB 3.2 MB/s eta 0:00:01\n", + " ------------------------------------ --- 17.6/19.2 MB 3.1 MB/s eta 0:00:01\n", + " ------------------------------------ --- 17.6/19.2 MB 3.1 MB/s eta 0:00:01\n", + " ------------------------------------- -- 17.8/19.2 MB 3.0 MB/s eta 0:00:01\n", + " ------------------------------------- -- 18.1/19.2 MB 2.9 MB/s eta 0:00:01\n", + " -------------------------------------- - 18.4/19.2 MB 2.8 MB/s eta 0:00:01\n", + " -------------------------------------- - 18.6/19.2 MB 2.8 MB/s eta 0:00:01\n", + " --------------------------------------- 18.9/19.2 MB 2.7 MB/s eta 0:00:01\n", + " --------------------------------------- 19.1/19.2 MB 2.7 MB/s eta 0:00:01\n", + " ---------------------------------------- 19.2/19.2 MB 2.6 MB/s 0:00:07\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing metadata (pyproject.toml): started\n", + " Preparing metadata (pyproject.toml): finished with status 'done'\n", + "Requirement already satisfied: numpy>=1.26.0 in .\\.venv\\Lib\\site-packages (from pandas) (2.4.4)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in .\\.venv\\Lib\\site-packages (from pandas) (2.9.0.post0)\n", + "Requirement already satisfied: tzdata in .\\.venv\\Lib\\site-packages (from pandas) (2026.2)\n", + "Requirement already satisfied: six>=1.5 in .\\.venv\\Lib\\site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n", + "Building wheels for collected packages: jieba\n", + " Building wheel for jieba (pyproject.toml): started\n", + " Building wheel for jieba (pyproject.toml): finished with status 'done'\n", + " Created wheel for jieba: filename=jieba-0.42.1-py3-none-any.whl size=19314527 sha256=9c6c0b82134284c2022dd7832428b0c1355cf813225c5e3869e51e23ece4666b\n", + " Stored in directory: c:\\users\\administrator\\appdata\\local\\pip\\cache\\wheels\\ac\\60\\cf\\538a1f183409caf1fc136b5d2c2dee329001ef6da2c5084bef\n", + "Successfully built jieba\n", + "Installing collected packages: jieba\n", + "Successfully installed jieba-0.42.1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "[notice] A new release of pip is available: 26.0.1 -> 26.1\n", + "[notice] To update, run: python.exe -m pip install --upgrade pip\n" + ] + } + ], + "execution_count": 9 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-04-27T13:05:18.748847300Z", + "start_time": "2026-04-27T13:05:18.691833Z" + } + }, + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "import ran\n", + "\n", + "df = pd.read_csv(\"task-a-zh.tsv\", sep=\"\\t\", dtype=str)\n", + "COL_ID = \"id\"\n", + "COL_WORD1 = \"word1\"\n", + "COL_WORD2 = \"word2\"\n", + "COL_HEADLINE = \"headline\"\n", + "COL_RESULT = \"rule_based_joke\"\n", + "\n", + "WORD_TEMPLATES = [\n", + " \"千万不要用{w1}去碰{w2},不然你会收获一个这辈子都忘不掉的名场面。\",\n", + " \"我试着用{w1}处理{w2},结果场面一度失控到我想原地找个地缝钻进去。\",\n", + " \"谁能想到,用{w1}和{w2}组合在一起,居然能搞出这么离谱的事情。\",\n", + " \"朋友让我用{w1}去打理{w2},我做完之后他直接和我绝交了。\",\n", + " \"你知道用{w1}对{w2}做什么最可怕吗?是完全不受控制的意外。\",\n", + " \"第一次用{w1}操作{w2},直接给我整出了这辈子都不想再经历的社死现场。\"\n", + "]\n", + "\n", + "NEWS_TEMPLATES = [\n", + " \"看完【{news}】这则新闻,我只能说这波操作属实是我万万没想到的。\",\n", + " \"本来以为【{news}】是个常规消息,结果看完细节直接给我整笑了。\",\n", + " \"【{news}】,这不就是现实版的大型反转现场吗?主打一个意想不到。\",\n", + " \"刷到【{news}】的新闻,我和朋友讨论了半天,只能说格局真的太大了。\",\n", + " \"看到【{news}】这则消息,我只能说高手过招,招招都在意料之外。\",\n", + " \"【{news}】,只能说现在的新闻真的比电视剧还精彩。\"\n", + "]\n", + "\n", + "def generate_rule_joke(row):\n", + " word1 = str(row[COL_WORD1]).strip()\n", + " word2 = str(row[COL_WORD2]).strip()\n", + " headline = str(row[COL_HEADLINE]).strip()\n", + "\n", + " if headline == \"-\" and word1 != \"-\" and word2 != \"-\":\n", + " template = random.choice(WORD_TEMPLATES)\n", + " return template.format(w1=word1, w2=word2)\n", + "\n", + " elif word1 == \"-\" and word2 == \"-\" and headline != \"-\":\n", + " template = random.choice(NEWS_TEMPLATES)\n", + " return template.format(news=headline)\n", + "\n", + " else:\n", + " return \"生成失败,数据格式异常\"\n", + "\n", + "df = df[(df.index >= 275) | (df.index.isin(range(0, 20)))]\n", + "\n", + "print(\"开始批量生成规则基线结果...\")\n", + "df[COL_RESULT] = df.apply(generate_rule_joke, axis=1)\n", + "\n", + "df.to_csv(\"task_a_zh_rule_baseline_full.tsv\", sep=\"\\t\", index=False)\n", + "print(\"✅ 生成完成!结果已保存为 task_a_zh_rule_baseline_full.tsv\")" + ], + "id": "38820c6edd5cde51", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "开始批量生成规则基线结果...\n", + "✅ 生成完成!结果已保存为 task_a_zh_rule_baseline_full.tsv\n" + ] + } + ], + "execution_count": 10 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-04-27T13:54:55.240350200Z", + "start_time": "2026-04-27T13:54:54.187588500Z" + } + }, + "cell_type": "code", + "source": [ + "import jieba\n", + "from collections import defaultdict, Counter\n", + "import random\n", + "import pandas as pd\n", + "\n", + "df = pd.read_csv(\"task-a-zh.tsv\", sep=\"\\t\", dtype=str)\n", + "# 固定列名\n", + "COL_ID = \"id\"\n", + "COL_W1 = \"word1\"\n", + "COL_W2 = \"word2\"\n", + "COL_HEAD = \"headline\"\n", + "COL_OUT = \"ngram_based_joke\"\n", + "\n", + "\n", + "CORPUS_WORD = [\n", + " \"用喷洒去碰键盘,结果场面一度失控。\",\n", + " \"用摇晃处理椅子,朋友看完直接笑了。\",\n", + " \"用移动打理毛巾,结果闹出了大笑话。\",\n", + " \"用钻操作笔记本电脑,直接翻车了。\",\n", + " \"用喷洒处理香蕉,结果越弄越糟。\",\n", + " \"用滚动处理香蕉,结果滚到了沟里。\",\n", + " \"用去籽处理胡椒,结果辣到了自己。\",\n", + " \"用滚动处理衬衫,结果越弄越皱。\",\n", + " \"用喷洒处理笔记本电脑,直接报废了。\",\n", + " \"用混合处理玉米,结果味道很奇怪。\",\n", + " \"用切割处理椅子,结果直接散架了。\",\n", + " \"用混合处理鞋子,结果变成了限量款。\",\n", + " \"用测量处理头发,结果发现掉了一半。\",\n", + " \"用锤击处理花朵,结果花盆碎了一地。\",\n", + " \"用冲洗处理自行车,结果链条掉了。\",\n", + " \"用摇晃处理衣服,结果纸屑撒了一地。\",\n", + " \"用钻处理番茄,结果汁水喷了一身。\",\n", + " \"用喷洒处理冰箱,结果食材全串味了。\",\n", + " \"用钻井处理南瓜,结果南瓜裂开了。\",\n", + " \"用钻处理书本,结果书被钻烂了。\",\n", + " \"用洗涤处理笔记本电脑,结果短路了。\",\n", + " \"用测量处理鸡蛋,结果手滑摔碎了。\",\n", + " \"用锤击处理笔记本电脑,结果成了废品。\",\n", + " \"用测量处理冰箱,结果塞不进厨房。\",\n", + " \"用喷洒处理南瓜,结果外皮烂掉了。\",\n", + " \"千万不要用喷洒碰键盘,不然会出大事。\",\n", + " \"谁能想到用摇晃处理椅子,会这么搞笑。\",\n", + " \"用移动打理毛巾,我直接社死了。\",\n", + " \"用钻碰笔记本电脑,我人都傻了。\",\n", + " \"用喷洒碰香蕉,结果笑料百出。\",\n", + " \"用滚动碰香蕉,结果尴尬到抠脚。\",\n", + " \"用去籽处理胡椒,我打了一下午喷嚏。\",\n", + " \"用滚动处理衬衫,熨了半小时都没用。\",\n", + " \"用喷洒碰笔记本电脑,数据全没了。\",\n", + " \"用混合处理玉米,辣到我灵魂出窍。\",\n", + " \"用切割处理椅子,摔了个屁股墩。\",\n", + " \"用混合处理鞋子,朋友笑了一整天。\",\n", + " \"用测量处理头发,我当场就emo了。\",\n", + " \"用锤击处理花朵,我妈追着我打。\",\n", + " \"用冲洗处理自行车,推了三公里回家。\",\n", + " \"用摇晃处理衣服,捡了半小时硬币。\",\n", + " \"用钻处理番茄,汁溅了一墙。\",\n", + " \"用喷洒处理冰箱,菜全烂了。\",\n", + " \"用钻井处理南瓜,籽撒了一地。\",\n", + " \"用钻处理书本,被老师骂了一顿。\",\n", + " \"用洗涤处理电脑,作业全没了。\",\n", + " \"用测量处理鸡蛋,蛋黄流了一地。\",\n", + " \"用锤击处理电脑,游戏全没了。\",\n", + " \"用测量处理冰箱,我当场自闭了。\",\n", + " \"用喷洒处理南瓜,晚饭都没了。\"\n", + "]\n", + "\n", + "CORPUS_NEWS = [\n", + " \"这则新闻,真的比电视剧还精彩。\",\n", + " \"看完这则新闻,我直接笑出声了。\",\n", + " \"这则新闻的走向,完全超出我的预期。\",\n", + " \"看完这则新闻,我只能说太离谱了。\",\n", + " \"这则新闻,真的越看越有意思。\",\n", + " \"看完这则新闻,我大开眼界了。\",\n", + " \"这则新闻,现实比剧本还魔幻。\",\n", + " \"看完这则新闻,我直接愣住了。\",\n", + " \"这则新闻,真的太有戏剧性了。\",\n", + " \"看完这则新闻,我蚌埠住了。\",\n", + " \"这则新闻,反转来的猝不及防。\",\n", + " \"看完这则新闻,我只能说太会玩了。\",\n", + " \"这则新闻,真的刷新了我的认知。\",\n", + " \"看完这则新闻,我服了。\",\n", + " \"这则新闻,高手在民间啊。\",\n", + " \"看完这则新闻,我笑到肚子疼。\",\n", + " \"这则新闻,真的万万没想到。\",\n", + " \"看完这则新闻,我直呼内行。\",\n", + " \"这则新闻,真的太秀了。\",\n", + " \"看完这则新闻,我人麻了。\",\n", + " \"这则新闻,真的比过山车还刺激。\",\n", + " \"看完这则新闻,我直接笑不活了。\",\n", + " \"这则新闻,真的太有梗了。\",\n", + " \"看完这则新闻,我直呼离谱。\",\n", + " \"这则新闻,真的太绝了。\"\n", + "]\n", + "\n", + "class StandardNGram:\n", + " def __init__(self, n=2):\n", + " self.n = n\n", + " self.ngram_counts = defaultdict(Counter)\n", + " self.context_total = defaultdict(int)\n", + " self.vocab = set()\n", + " self.START = \"\"\n", + " self.END = \"\"\n", + "\n", + " def train(self, corpus):\n", + " for sentence in corpus:\n", + " words = list(jieba.cut(sentence.strip()))\n", + " words = [self.START] + words + [self.END]\n", + " self.vocab.update(words)\n", + " for i in range(len(words) - self.n + 1):\n", + " context = tuple(words[i:i+self.n-1])\n", + " target = words[i+self.n-1]\n", + "\n", + " self.ngram_counts[context][target] += 1\n", + " self.context_total[context] += 1\n", + "\n", + " def get_smoothed_prob(self, context, word):\n", + " count_cw = self.ngram_counts[context][word] + 1\n", + " count_c = self.context_total[context] + len(self.vocab)\n", + " return count_cw / count_c\n", + "\n", + " def sample_next_word(self, context, top_k=3, repeat_punish=None):\n", + " if repeat_punish is None:\n", + " repeat_punish = set()\n", + "\n", + " if context not in self.ngram_counts:\n", + " return self.END\n", + "\n", + "\n", + " word_probs = []\n", + " for word in self.ngram_counts[context]:\n", + " punish = 0.1 if word in repeat_punish else 1.0\n", + " prob = self.get_smoothed_prob(context, word) * punish\n", + " word_probs.append((word, prob))\n", + "\n", + " word_probs.sort(key=lambda x: x[1], reverse=True)\n", + " top_candidates = word_probs[:top_k]\n", + " words, probs = zip(*top_candidates)\n", + "\n", + " return random.choices(words, weights=probs, k=1)[0]\n", + "\n", + " def generate(self, start_keyword, max_len=22):\n", + " generated = [self.START, start_keyword]\n", + " repeat_punish = set([start_keyword])\n", + " current_context = tuple(generated[-1:])\n", + "\n", + " for _ in range(max_len):\n", + " next_word = self.sample_next_word(current_context, repeat_punish=repeat_punish)\n", + " if next_word == self.END:\n", + " break\n", + " generated.append(next_word)\n", + " repeat_punish.add(next_word)\n", + " current_context = tuple(generated[-1:])\n", + "\n", + " final_sentence = \"\".join([w for w in generated if w != self.START])\n", + " if len(final_sentence) < 6:\n", + " final_sentence = f\"{start_keyword}这事儿,真的太有意思了。\"\n", + " return final_sentence\n", + "\n", + "print(\"正在训练词汇任务2-gram模型...\")\n", + "model_word = StandardNGram(n=2)\n", + "model_word.train(CORPUS_WORD)\n", + "\n", + "print(\"正在训练新闻任务2-gram模型...\")\n", + "model_news = StandardNGram(n=2)\n", + "model_news.train(CORPUS_NEWS)\n", + "print(\"模型训练完成!\")\n", + "\n", + "def get_task_type(row):\n", + " w1 = str(row[COL_W1]).strip()\n", + " w2 = str(row[COL_W2]).strip()\n", + " head = str(row[COL_HEAD]).strip()\n", + "\n", + " if head == \"-\" and w1 != \"-\" and w2 != \"-\" and len(w1) > 0 and len(w2) > 0:\n", + " return \"WORD\", [w1, w2]\n", + " elif w1 == \"-\" and w2 == \"-\" and head != \"-\" and len(head) > 5:\n", + " return \"NEWS\", head\n", + " else:\n", + " return \"ERROR\", None\n", + "\n", + "def extract_news_keyword(headline):\n", + " stop_words = set([\",\", \"。\", \"!\", \"?\", \"、\", \":\", \";\", \"「\", \"」\", \"【\", \"】\", \"《\", \"》\", \"#\", \"|\", \"/\", \" \", \".\", \"·\", \"0\", \"1\", \"2\", \"3\", \"4\", \"5\", \"6\", \"7\", \"8\", \"9\"])\n", + " try:\n", + " words = list(jieba.cut(headline))\n", + " valid_keywords = [w for w in words if len(w) >= 2 and w not in stop_words]\n", + " return valid_keywords[0] if len(valid_keywords) > 0 else \"这则新闻\"\n", + " except:\n", + " return \"这则新闻\"\n", + "\n", + "def generate_final_joke(row):\n", + " task_type, task_data = get_task_type(row)\n", + " if task_type == \"WORD\":\n", + " w1, w2 = task_data\n", + " start_word = random.choice([w1, w2])\n", + " base_sentence = model_word.generate(start_word)\n", + " if w1 not in base_sentence:\n", + " base_sentence = f\"{w1}和{base_sentence}\"\n", + " if w2 not in base_sentence:\n", + " base_sentence = f\"{base_sentence},还扯上了{w2}\"\n", + " return base_sentence\n", + " elif task_type == \"NEWS\":\n", + " headline = task_data\n", + " keyword = extract_news_keyword(headline)\n", + " return model_news.generate(keyword)\n", + " else:\n", + " return \"这事儿真的太有意思了。\"\n", + "\n", + "print(\"开始批量生成最终版2-gram基线结果...\")\n", + "df[COL_OUT] = df.apply(generate_final_joke, axis=1)\n", + "# 保存结果\n", + "df.to_csv(\"task_a_zh_ngram_final_standard.tsv\", sep=\"\\t\", index=False)\n", + "print(\"✅ 生成完成!结果已保存为 task_a_zh_ngram_final_standard.tsv\")" + ], + "id": "6119c1b7f47da649", + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Building prefix dict from the default dictionary ...\n", + "Loading model from cache C:\\Users\\ADMINI~1\\AppData\\Local\\Temp\\jieba.cache\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "正在训练词汇任务2-gram模型...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading model cost 0.882 seconds.\n", + "Prefix dict has been built successfully.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "正在训练新闻任务2-gram模型...\n", + "模型训练完成!\n", + "开始批量生成最终版2-gram基线结果...\n", + "✅ 生成完成!结果已保存为 task_a_zh_ngram_final_standard.tsv\n" + ] + } + ], + "execution_count": 2 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-04-27T14:30:25.028415300Z", + "start_time": "2026-04-27T14:30:22.452443600Z" + } + }, + "cell_type": "code", + "source": "!pip install pandas requests tqdm", + "id": "cb4a5e4932e1e3c8", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: pandas in .\\.venv\\Lib\\site-packages (3.0.2)\n", + "Requirement already satisfied: requests in .\\.venv\\Lib\\site-packages (2.33.1)\n", + "Requirement already satisfied: tqdm in .\\.venv\\Lib\\site-packages (4.67.3)\n", + "Requirement already satisfied: numpy>=1.26.0 in .\\.venv\\Lib\\site-packages (from pandas) (2.4.4)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in .\\.venv\\Lib\\site-packages (from pandas) (2.9.0.post0)\n", + "Requirement already satisfied: tzdata in .\\.venv\\Lib\\site-packages (from pandas) (2026.2)\n", + "Requirement already satisfied: charset_normalizer<4,>=2 in .\\.venv\\Lib\\site-packages (from requests) (3.4.7)\n", + "Requirement already satisfied: idna<4,>=2.5 in .\\.venv\\Lib\\site-packages (from requests) (3.13)\n", + "Requirement already satisfied: urllib3<3,>=1.26 in .\\.venv\\Lib\\site-packages (from requests) (2.6.3)\n", + "Requirement already satisfied: certifi>=2023.5.7 in .\\.venv\\Lib\\site-packages (from requests) (2026.4.22)\n", + "Requirement already satisfied: colorama in .\\.venv\\Lib\\site-packages (from tqdm) (0.4.6)\n", + "Requirement already satisfied: six>=1.5 in .\\.venv\\Lib\\site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "[notice] A new release of pip is available: 26.0.1 -> 26.1\n", + "[notice] To update, run: python.exe -m pip install --upgrade pip\n" + ] + } + ], + "execution_count": 6 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-04-27T14:44:00.369207900Z", + "start_time": "2026-04-27T14:36:24.437356600Z" + } + }, + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "import requests\n", + "from tqdm import tqdm\n", + "import time\n", + "\n", + "API_KEY = \"sk-b6a990cb2dc94a10947401b3589172f8\"\n", + "API_URL = \"https://api.deepseek.com/v1/chat/completions\"\n", + "INPUT_FILE = \"task-a-zh.tsv\"\n", + "OUTPUT_FILE = \"task_a_zh_deepseek_baseline.tsv\"\n", + "MODEL_NAME = \"deepseek-chat\"\n", + "MAX_RETRY = 3\n", + "GENERATION_CONFIG = {\n", + " \"temperature\": 0.7,\n", + " \"top_p\": 0.9,\n", + " \"max_tokens\": 60\n", + "}\n", + "\n", + "PROMPT_WORD = \"\"\"\n", + "你是一个擅长生成中文幽默笑话的专家。\n", + "任务:生成一句简短、自然、有幽默感的中文笑话,必须严格同时包含「{w1}」和「{w2}」两个词。\n", + "要求:\n", + "1. 句子自然通顺,有网感、趣味性,不能生硬堆砌词语\n", + "2. 长度控制在 10-30 个字之间\n", + "3. 只输出笑话本身,不要任何额外解释、前缀后缀\n", + "\"\"\"\n", + "\n", + "PROMPT_NEWS = \"\"\"\n", + "你是一个擅长针对新闻标题生成幽默吐槽评论的专家。\n", + "任务:针对下面的新闻标题,生成一句简短、贴合主题、有幽默感/吐槽感的中文评论。\n", + "新闻标题:{headline}\n", + "要求:\n", + "1. 评论和新闻内容强相关,有网感、吐槽感,不能跑题\n", + "2. 长度控制在 10-30 个字之间\n", + "3. 只输出评论本身,不要任何额外解释、前缀后缀\n", + "\"\"\"\n", + "\n", + "def deepseek_generate(prompt):\n", + " retry_count = 0\n", + " headers = {\n", + " \"Content-Type\": \"application/json\",\n", + " \"Authorization\": f\"Bearer {API_KEY}\"\n", + " }\n", + " payload = {\n", + " \"model\": MODEL_NAME,\n", + " \"messages\": [{\"role\": \"user\", \"content\": prompt}],\n", + " **GENERATION_CONFIG\n", + " }\n", + "\n", + " while retry_count < MAX_RETRY:\n", + " try:\n", + " response = requests.post(API_URL, headers=headers, json=payload, timeout=30)\n", + " if response.status_code == 200:\n", + " result = response.json()[\"choices\"][0][\"message\"][\"content\"].strip()\n", + " result = result.replace(\"\\n\", \"\").replace('\"', \"\").replace(\"'\", \"\")\n", + " return result\n", + " else:\n", + " print(f\"API 调用失败,状态码:{response.status_code},重试中...\")\n", + " retry_count += 1\n", + " time.sleep(2)\n", + " except Exception as e:\n", + " print(f\"调用异常:{e},重试中...\")\n", + " retry_count += 1\n", + " time.sleep(2)\n", + " return \"这事儿真的太有梗了,我直接蚌埠住了。\"\n", + "\n", + "def generate_joke(row):\n", + " w1 = str(row[\"word1\"]).strip()\n", + " w2 = str(row[\"word2\"]).strip()\n", + " headline = str(row[\"headline\"]).strip()\n", + "\n", + " if headline == \"-\" and w1 != \"-\" and w2 != \"-\":\n", + " prompt = PROMPT_WORD.format(w1=w1, w2=w2)\n", + " return deepseek_generate(prompt)\n", + " elif w1 == \"-\" and w2 == \"-\" and headline != \"-\":\n", + " prompt = PROMPT_NEWS.format(headline=headline)\n", + " return deepseek_generate(prompt)\n", + " else:\n", + " return \"这内容也太有意思了吧。\"\n", + "\n", + "if __name__ == \"__main__\":\n", + " print(f\"正在读取数据集:{INPUT_FILE}\")\n", + " df = pd.read_csv(INPUT_FILE, sep=\"\\t\", dtype=str)\n", + "\n", + " print(\"开始批量生成 DeepSeek 幽默内容...\")\n", + " tqdm.pandas(desc=\"生成进度\")\n", + " df[\"deepseek_based_joke\"] = df.progress_apply(generate_joke, axis=1)\n", + "\n", + " df.to_csv(OUTPUT_FILE, sep=\"\\t\", index=False)\n", + " print(f\"✅ 生成完成!结果已保存到:{OUTPUT_FILE}\")" + ], + "id": "3be6a2ca9dee1b34", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "正在读取数据集:task-a-zh.tsv\n", + "开始批量生成 DeepSeek 幽默内容...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "生成进度: 100%|██████████| 300/300 [07:35<00:00, 1.52s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ 生成完成!结果已保存到:task_a_zh_deepseek_baseline.tsv\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "execution_count": 8 + }, + { + "metadata": { + "ExecuteTime": { + "start_time": "2026-05-05T14:53:41.636014Z" + } + }, + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "import requests\n", + "from tqdm import tqdm\n", + "import time\n", + "\n", + "API_KEY = \"sk-b6a990cb2dc94a10947401b3589172f8\"\n", + "API_URL = \"https://api.deepseek.com/v1/chat/completions\"\n", + "MODEL_NAME = \"deepseek-chat\"\n", + "MAX_RETRY = 3\n", + "GENERATION_CONFIG = {\n", + " \"temperature\": 0.2,\n", + " \"top_p\": 0.8,\n", + " \"max_tokens\": 150,\n", + " \"seed\": 42\n", + "}\n", + "\n", + "FILES = {\n", + " \"规则基线\": \"task_a_zh_rule_baseline_full.tsv\",\n", + " \"N-gram基线\": \"task_a_zh_ngram_final_standard.tsv\",\n", + " \"DeepSeek大模型\": \"task_a_zh_deepseek_baseline.tsv\"\n", + "}\n", + "\n", + "RESULT_COLS = {\n", + " \"规则基线\": \"rule_based_joke\",\n", + " \"N-gram基线\": \"ngram_based_joke\",\n", + " \"DeepSeek大模型\": \"deepseek_based_joke\"\n", + "}\n", + "\n", + "PROMPT_WORD = \"\"\"\n", + "你是一个专业的NLP任务评估专家,负责评估中文幽默生成结果的质量。\n", + "本次评估对象是词汇包含任务的幽默生成结果,任务要求:生成内容必须同时包含「{w1}」和「{w2}」两个词,且有幽默感、通顺自然。\n", + "请你从以下5个维度,对生成结果进行1-5分打分(1分最差,5分最优),并给出1句简短质化点评(10-20字),点评需贴合打分结果。\n", + "评估维度及打分标准:\n", + "1. 任务约束满足度(30%):是否同时包含「{w1}」和「{w2}」,无遗漏。5分=完全满足;3分=基本满足;1分=不满足。\n", + "2. 幽默度(30%):是否有幽默感、有梗,无生硬感。5分=幽默自然;3分=轻微幽默;1分=无幽默。\n", + "3. 语义通顺度(20%):是否通顺、无语法错误、无逻辑断裂。5分=完全通顺;3分=基本通顺;1分=不通顺。\n", + "4. 生成多样性(10%):结合同类任务,该结果是否无明显重复句式。5分=多样性高;3分=一般;1分=重复。\n", + "5. 语言自然度(10%):是否自然、无机械模板感。5分=自然;3分=基本自然;1分=机械。\n", + "\n", + "请严格按照以下格式输出,不要添加任何额外内容,格式:\n", + "约束满足度:X分,幽默度:X分,通顺度:X分,多样性:X分,自然度:X分,点评:XXX\n", + "生成结果:{generated_text}\n", + "词汇要求:{w1}、{w2}\n", + "\"\"\"\n", + "\n", + "PROMPT_NEWS = \"\"\"\n", + "你是一个专业的NLP任务评估专家,负责评估中文幽默生成结果的质量。\n", + "本次评估对象是新闻标题幽默评论生成结果,任务要求:生成内容需贴合原新闻标题,有幽默感、通顺自然。\n", + "请你从以下5个维度,对生成结果进行1-5分打分(1分最差,5分最优),并给出1句简短质化点评(10-20字),点评需贴合打分结果。\n", + "评估维度及打分标准:\n", + "1. 任务约束满足度(30%):是否贴合原新闻标题,无跑题。5分=完全贴合;3分=基本贴合;1分=跑题。\n", + "2. 幽默度(30%):是否有幽默感、吐槽感,无生硬感。5分=幽默自然;3分=轻微幽默;1分=无幽默。\n", + "3. 语义通顺度(20%):是否通顺、无语法错误、无逻辑断裂。5分=完全通顺;3分=基本通顺;1分=不通顺。\n", + "4. 生成多样性(10%):结合同类任务,该结果是否无明显重复句式。5分=多样性高;3分=一般;1分=重复。\n", + "5. 语言自然度(10%):是否自然、无机械模板感。5分=自然;3分=基本自然;1分=机械。\n", + "\n", + "请严格按照以下格式输出,不要添加任何额外内容,格式:\n", + "约束满足度:X分,幽默度:X分,通顺度:X分,多样性:X分,自然度:X分,点评:XXX\n", + "生成结果:{generated_text}\n", + "原新闻标题:{headline}\n", + "\"\"\"\n", + "\n", + "def deepseek_evaluate(prompt):\n", + " retry_count = 0\n", + " headers = {\n", + " \"Content-Type\": \"application/json\",\n", + " \"Authorization\": f\"Bearer {API_KEY}\"\n", + " }\n", + " payload = {\n", + " \"model\": MODEL_NAME,\n", + " \"messages\": [{\"role\": \"user\", \"content\": prompt}],\n", + " **GENERATION_CONFIG\n", + " }\n", + "\n", + " while retry_count < MAX_RETRY:\n", + " try:\n", + " response = requests.post(API_URL, headers=headers, json=payload, timeout=30)\n", + " if response.status_code == 200:\n", + " result = response.json()[\"choices\"][0][\"message\"][\"content\"].strip()\n", + " return result\n", + " else:\n", + " print(f\"API调用失败,状态码:{response.status_code},重试中...\")\n", + " retry_count += 1\n", + " time.sleep(2)\n", + " except Exception as e:\n", + " print(f\"调用异常:{e},重试中...\")\n", + " retry_count += 1\n", + " time.sleep(2)\n", + " return \"约束满足度:3分,幽默度:3分,通顺度:3分,多样性:3分,自然度:3分,点评:评估失败,默认打分\"\n", + "\n", + "def parse_evaluation_result(eval_text):\n", + " try:\n", + "\n", + " constraint_score = int(eval_text.split(\"约束满足度:\")[1].split(\"分\")[0])\n", + " humor_score = int(eval_text.split(\"幽默度:\")[1].split(\"分\")[0])\n", + " fluency_score = int(eval_text.split(\"通顺度:\")[1].split(\"分\")[0])\n", + " diversity_score = int(eval_text.split(\"多样性:\")[1].split(\"分\")[0])\n", + " natural_score = int(eval_text.split(\"自然度:\")[1].split(\"分\")[0])\n", + " comment = eval_text.split(\"点评:\")[1].strip()\n", + " return constraint_score, humor_score, fluency_score, diversity_score, natural_score, comment\n", + " except:\n", + " return 3, 3, 3, 3, 3, \"解析失败,默认点评\"\n", + "\n", + "def evaluate_single_file(file_path, result_col, method_name):\n", + " print(f\"\\n开始评估【{method_name}】,文件路径:{file_path}\")\n", + " df = pd.read_csv(file_path, sep=\"\\t\", dtype=str)\n", + "\n", + " df[\"约束满足度\"] = 0\n", + " df[\"幽默度\"] = 0\n", + " df[\"通顺度\"] = 0\n", + " df[\"多样性\"] = 0\n", + " df[\"自然度\"] = 0\n", + " df[\"评估点评\"] = \"\"\n", + "\n", + " tqdm.pandas(desc=f\"{method_name} 评估进度\")\n", + " for idx, row in tqdm(df.iterrows(), total=len(df)):\n", + " w1 = str(row[\"word1\"]).strip()\n", + " w2 = str(row[\"word2\"]).strip()\n", + " headline = str(row[\"headline\"]).strip()\n", + " generated_text = str(row[result_col]).strip()\n", + "\n", + " if headline == \"-\" and w1 != \"-\" and w2 != \"-\":\n", + " prompt = PROMPT_WORD.format(w1=w1, w2=w2, generated_text=generated_text)\n", + " elif w1 == \"-\" and w2 == \"-\" and headline != \"-\":\n", + " prompt = PROMPT_NEWS.format(headline=headline, generated_text=generated_text)\n", + " else:\n", + " df.loc[idx, [\"约束满足度\", \"幽默度\", \"通顺度\", \"多样性\", \"自然度\"]] = [3, 3, 3, 3, 3]\n", + " df.loc[idx, \"评估点评\"] = \"异常数据,默认点评\"\n", + " continue\n", + "\n", + " eval_result = deepseek_evaluate(prompt)\n", + " constraint, humor, fluency, diversity, natural, comment = parse_evaluation_result(eval_result)\n", + " df.loc[idx, \"约束满足度\"] = constraint\n", + " df.loc[idx, \"幽默度\"] = humor\n", + " df.loc[idx, \"通顺度\"] = fluency\n", + " df.loc[idx, \"多样性\"] = diversity\n", + " df.loc[idx, \"自然度\"] = natural\n", + " df.loc[idx, \"评估点评\"] = comment\n", + "\n", + " df[\"加权总分\"] = (df[\"约束满足度\"] * 0.3 +\n", + " df[\"幽默度\"] * 0.3 +\n", + " df[\"通顺度\"] * 0.2 +\n", + " df[\"多样性\"] * 0.1 +\n", + " df[\"自然度\"] * 0.1).round(2)\n", + "\n", + " eval_output_path = f\"{method_name}_评估结果.tsv\"\n", + " df.to_csv(eval_output_path, sep=\"\\t\", index=False)\n", + " print(f\"【{method_name}】评估完成,结果保存至:{eval_output_path}\")\n", + "\n", + " summary = {\n", + " \"方法名称\": method_name,\n", + " \"总样本数\": len(df),\n", + " \"约束满足度平均分\": round(df[\"约束满足度\"].mean(), 2),\n", + " \"幽默度平均分\": round(df[\"幽默度\"].mean(), 2),\n", + " \"通顺度平均分\": round(df[\"通顺度\"].mean(), 2),\n", + " \"多样性平均分\": round(df[\"多样性\"].mean(), 2),\n", + " \"自然度平均分\": round(df[\"自然度\"].mean(), 2),\n", + " \"加权总分平均分\": round(df[\"加权总分\"].mean(), 2)\n", + " }\n", + " return df, summary\n", + "\n", + "\n", + "def batch_evaluate_all():\n", + " all_evaluated_dfs = {}\n", + " all_summaries = []\n", + "\n", + " for method_name, file_path in FILES.items():\n", + " evaluated_df, summary = evaluate_single_file(file_path, RESULT_COLS[method_name], method_name)\n", + " all_evaluated_dfs[method_name] = evaluated_df\n", + " all_summaries.append(summary)\n", + "\n", + " summary_df = pd.DataFrame(all_summaries)\n", + "\n", + " summary_df = summary_df.sort_values(by=\"加权总分平均分\", ascending=False).reset_index(drop=True)\n", + "\n", + " summary_report_path = \"三种方法评估汇总报告.tsv\"\n", + " summary_df.to_csv(summary_report_path, sep=\"\\t\", index=False)\n", + " print(f\"\\n✅ 所有方法评估完成!汇总报告保存至:{summary_report_path}\")\n", + "\n", + " print(\"\\n=== 三种方法评估汇总(按加权总分排序)===\")\n", + " print(summary_df.to_string(index=False))\n", + "\n", + " return all_evaluated_dfs, summary_df\n", + "\n", + "if __name__ == \"__main__\":\n", + " print(\"开始执行三种方法的批量评估(基于DeepSeek-V4-Pro)\")\n", + " print(f\"评估样本数:每个方法约300条,预计耗时15-20分钟,请耐心等待...\")\n", + " all_evaluated_dfs, summary_df = batch_evaluate_all()\n" + ], + "id": "597c96096f5008b4", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "开始执行三种方法的批量评估(基于DeepSeek-V4-Pro)\n", + "评估样本数:每个方法约300条,预计耗时15-20分钟,请耐心等待...\n", + "\n", + "开始评估【规则基线】,文件路径:task_a_zh_rule_baseline_full.tsv\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 45/45 [01:38<00:00, 2.20s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "【规则基线】评估完成,结果保存至:规则基线_评估结果.tsv\n", + "\n", + "开始评估【N-gram基线】,文件路径:task_a_zh_ngram_final_standard.tsv\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 300/300 [10:16<00:00, 2.06s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "【N-gram基线】评估完成,结果保存至:N-gram基线_评估结果.tsv\n", + "\n", + "开始评估【DeepSeek大模型】,文件路径:task_a_zh_deepseek_baseline.tsv\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 40%|████ | 120/300 [04:08<05:49, 1.94s/it]" + ] + } + ], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": "", + "id": "9ae712f5206f6482" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": "", + "id": "b06c46d127418850" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}