Update get_wordcloud_pic.py

11 months ago · 5a756a6b88
parent 0ed6867ef9
commit 5a756a6b88
1 changed files with 56 additions and 56 deletions
--- a/get_wordcloud_pic.py
+++ b/get_wordcloud_pic.py
@ -1,56 +1,56 @@
-import pandas as pd
+import pandas as pd
-import jieba
+import jieba
-from wordcloud import WordCloud
+from wordcloud import WordCloud
-import matplotlib.pyplot as plt
+import matplotlib.pyplot as plt
-from PIL import Image
+from PIL import Image
-import numpy as np
+import numpy as np
-
+
-# 1. 读取停用词表
+# 读取停用词表
-def load_stopwords(file_path):
+def load_stopwords(file_path):
-    with open(file_path, 'r', encoding='utf-8') as f:
+    with open(file_path, 'r', encoding='utf-8') as f:
-        stopwords = set(line.strip() for line in f)
+        stopwords = set(line.strip() for line in f)
-    return stopwords
+    return stopwords
-
+
-# 2. 过滤停用词
+# 过滤停用词
-def remove_stopwords(words_list, stopwords):
+def remove_stopwords(words_list, stopwords):
-    return [word for word in words_list if word not in stopwords and len(word) > 1]
+    return [word for word in words_list if word not in stopwords and len(word) > 1]
-
+
-# 3. 读取Excel文件并提取弹幕内容
+# 读取Excel文件并提取弹幕内容
-file_path = "danmu_data.xlsx"
+file_path = "danmu_data.xlsx"
-df = pd.read_excel(file_path)
+df = pd.read_excel(file_path)
-comments = df['danmu'].astype(str)
+comments = df['danmu'].astype(str)
-text = ' '.join(comments)
+text = ' '.join(comments)
-
+
-# 4. 使用 jieba 分词
+# 使用 jieba 分词
-words = jieba.cut(text, cut_all=False)
+words = jieba.cut(text, cut_all=False)
-
+
-# 5. 加载停用词表
+# 加载停用词表
-stopwords_file = "D://edge//stop.txt"  # 替换为实际路径
+stopwords_file = "D://edge//stop.txt"  # 替换为实际路径
-stopwords = load_stopwords(stopwords_file)
+stopwords = load_stopwords(stopwords_file)
-
+
-# 6. 去除停用词
+# 去除停用词
-filtered_words = remove_stopwords(words, stopwords)
+filtered_words = remove_stopwords(words, stopwords)
-
+
-# 7. 将过滤后的词汇重新拼接为一个字符串
+# 将过滤后的词汇重新拼接为一个字符串
-words_list = ' '.join(filtered_words)
+words_list = ' '.join(filtered_words)
-
+
-# 8. 加载形状图片并生成词云
+# 加载形状图片并生成词云
-mask = np.array(Image.open("D://edge//kk.png"))
+mask = np.array(Image.open("D://edge//kk.png"))
-
+
-wordcloud = WordCloud(
+wordcloud = WordCloud(
-    font_path='simhei.ttf',  # 确保支持中文
+    font_path='simhei.ttf',  # 确保支持中文
-    background_color='white',
+    background_color='white',
-    mask=mask,
+    mask=mask,
-    contour_width=1,
+    contour_width=1,
-    contour_color='black',
+    contour_color='black',
-    width=800,
+    width=800,
-    height=600
+    height=600
-).generate(words_list)
+).generate(words_list)
-
+
-# 9. 显示词云图
+# 显示词云图
-plt.imshow(wordcloud, interpolation='bilinear')
+plt.imshow(wordcloud, interpolation='bilinear')
-plt.axis("off")
+plt.axis("off")
-plt.show()
+plt.show()
-
+
-# 10. 保存词云图
+# 保存词云图
-wordcloud.to_file("filtered_wordcloud.png")
+wordcloud.to_file("filtered_wordcloud.png")