词云图之代码

1 year ago · d1b8e8c95d
parent 57b584a6c9
commit d1b8e8c95d
1 changed files with 40 additions and 0 deletions
--- a/WordCloud.py
+++ b/WordCloud.py
@ -0,0 +1,40 @@
+import matplotlib.pyplot as plt
+import wordcloud
+import jieba
+import pyecharts
+import pandas as pd
+import numpy as np
+from PIL import Image
+import nltk
+from nltk import FreqDist
+
+font_path = r"C:\Users\26716\AppData\Local\Microsoft\Windows\Fonts\HanYiDieYuTiJian-1.ttf"
+
+with open("danmu.txt", 'r', encoding='utf-8') as f:
+    danmu_text = f.read()
+
+stopword = pd.read_csv("stopwords.txt", names=["w"], sep="\t", encoding="utf-8")
+
+jieba.add_word("发来贺电")
+words = jieba.cut(danmu_text)
+
+filtered_words = [w for w in words if w not in stopword['w'].tolist() and len(w) > 1]
+
+top_words = FreqDist(filtered_words)
+
+background = np.array(Image.open("D:\CODING\软工\奥运五环.png").convert("RGB"))
+
+cloudobj =wordcloud.WordCloud(font_path = font_path,
+                              scale = 4,
+                              width = 1920,
+                              height= 1080,
+                              max_words= 100,
+                              background_color= "white",
+                              max_font_size= 200,
+                              min_font_size= 10,
+                              mask = background
+                              ).fit_words(top_words)
+
+plt.imshow(cloudobj)
+plt.axis("off")
+plt.show()