From d542128f5d6a9d7fd5a172d2a04681a630365c77 Mon Sep 17 00:00:00 2001
From: pkjq8ohf2 <2100488276@qq.com>
Date: Tue, 17 Sep 2024 14:05:37 +0800
Subject: [PATCH] ADD file via upload

---
 ciyuntu.py | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100644 ciyuntu.py

diff --git a/ciyuntu.py b/ciyuntu.py
new file mode 100644
index 0000000..0862e53
--- /dev/null
+++ b/ciyuntu.py
@@ -0,0 +1,39 @@
+import jieba
+
+#阅读文本文件
+txt = open(r'C:\Users\86158\爬虫作业\ai吧.txt','r',encoding='UTF-8').read()
+rp_str = '： ， ； 。 、 ？ ———— ‘’ “” （） ！ # 《》\n\ufeff'
+for i in rp_str:
+    txt = txt.replace(i,'')
+    
+txt = ''.join(txt.split())
+jieba.load_userdict(r'C:\Users\86158\爬虫作业\userdict.txt')
+words = jieba.lcut(txt)
+
+stopwords = open(r'C:\Users\86158\爬虫作业\stopwords.txt','r',encoding='UTF-8').read()
+stopwords_list = list(stopwords)
+
+#将无意义的高频词写入remov_words筛掉
+remove_words = ['哈哈','可以','紫薯','整齐','开始','以为','这人','我们','好像']
+words_counts = {}
+for i in words:
+    if len(i)==1:
+        continue
+    if i in remove_words:
+        continue
+    if i not in stopwords_list:
+        words_counts[i]=words_counts.get(i,0)+1
+words_list = list(words_counts.items())
+words_list.sort(key=lambda x:x[1],reverse=True)
+
+ranking8_list = words_list[:8]
+ranking8_dict = dict(ranking8_list)
+print(ranking8_dict)
+
+#将得到的关键词保存为文本文件
+f = open(r'C:\Users\86158\爬虫作业\ciyuntu.txt','w',encoding='UTF-8')
+for i in range(len(words_list)):
+    k,v = words_list[i]
+    f.write('{:<8}{:>2}\n'.format(k,v))
+f.close()
+