From d1b8e8c95d8c75ec0a0a6f3463b3959b7df8792c Mon Sep 17 00:00:00 2001 From: p2fx74kt6 <2671606322@qq.com> Date: Wed, 18 Sep 2024 20:54:26 +0800 Subject: [PATCH] =?UTF-8?q?=E8=AF=8D=E4=BA=91=E5=9B=BE=E4=B9=8B=E4=BB=A3?= =?UTF-8?q?=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- WordCloud.py | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 WordCloud.py diff --git a/WordCloud.py b/WordCloud.py new file mode 100644 index 0000000..d037451 --- /dev/null +++ b/WordCloud.py @@ -0,0 +1,40 @@ +import matplotlib.pyplot as plt +import wordcloud +import jieba +import pyecharts +import pandas as pd +import numpy as np +from PIL import Image +import nltk +from nltk import FreqDist + +font_path = r"C:\Users\26716\AppData\Local\Microsoft\Windows\Fonts\HanYiDieYuTiJian-1.ttf" + +with open("danmu.txt", 'r', encoding='utf-8') as f: + danmu_text = f.read() + +stopword = pd.read_csv("stopwords.txt", names=["w"], sep="\t", encoding="utf-8") + +jieba.add_word("发来贺电") +words = jieba.cut(danmu_text) + +filtered_words = [w for w in words if w not in stopword['w'].tolist() and len(w) > 1] + +top_words = FreqDist(filtered_words) + +background = np.array(Image.open("D:\CODING\软工\奥运五环.png").convert("RGB")) + +cloudobj =wordcloud.WordCloud(font_path = font_path, + scale = 4, + width = 1920, + height= 1080, + max_words= 100, + background_color= "white", + max_font_size= 200, + min_font_size= 10, + mask = background + ).fit_words(top_words) + +plt.imshow(cloudobj) +plt.axis("off") +plt.show()