Update get_wordcloud_pic.py

main
luoyonghuang 5 months ago
parent 0ed6867ef9
commit 5a756a6b88

@ -1,56 +1,56 @@
import pandas as pd import pandas as pd
import jieba import jieba
from wordcloud import WordCloud from wordcloud import WordCloud
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from PIL import Image from PIL import Image
import numpy as np import numpy as np
# 1. 读取停用词表 # 读取停用词表
def load_stopwords(file_path): def load_stopwords(file_path):
with open(file_path, 'r', encoding='utf-8') as f: with open(file_path, 'r', encoding='utf-8') as f:
stopwords = set(line.strip() for line in f) stopwords = set(line.strip() for line in f)
return stopwords return stopwords
# 2. 过滤停用词 # 过滤停用词
def remove_stopwords(words_list, stopwords): def remove_stopwords(words_list, stopwords):
return [word for word in words_list if word not in stopwords and len(word) > 1] return [word for word in words_list if word not in stopwords and len(word) > 1]
# 3. 读取Excel文件并提取弹幕内容 # 读取Excel文件并提取弹幕内容
file_path = "danmu_data.xlsx" file_path = "danmu_data.xlsx"
df = pd.read_excel(file_path) df = pd.read_excel(file_path)
comments = df['danmu'].astype(str) comments = df['danmu'].astype(str)
text = ' '.join(comments) text = ' '.join(comments)
# 4. 使用 jieba 分词 # 使用 jieba 分词
words = jieba.cut(text, cut_all=False) words = jieba.cut(text, cut_all=False)
# 5. 加载停用词表 # 加载停用词表
stopwords_file = "D://edge//stop.txt" # 替换为实际路径 stopwords_file = "D://edge//stop.txt" # 替换为实际路径
stopwords = load_stopwords(stopwords_file) stopwords = load_stopwords(stopwords_file)
# 6. 去除停用词 # 去除停用词
filtered_words = remove_stopwords(words, stopwords) filtered_words = remove_stopwords(words, stopwords)
# 7. 将过滤后的词汇重新拼接为一个字符串 # 将过滤后的词汇重新拼接为一个字符串
words_list = ' '.join(filtered_words) words_list = ' '.join(filtered_words)
# 8. 加载形状图片并生成词云 # 加载形状图片并生成词云
mask = np.array(Image.open("D://edge//kk.png")) mask = np.array(Image.open("D://edge//kk.png"))
wordcloud = WordCloud( wordcloud = WordCloud(
font_path='simhei.ttf', # 确保支持中文 font_path='simhei.ttf', # 确保支持中文
background_color='white', background_color='white',
mask=mask, mask=mask,
contour_width=1, contour_width=1,
contour_color='black', contour_color='black',
width=800, width=800,
height=600 height=600
).generate(words_list) ).generate(words_list)
# 9. 显示词云图 # 显示词云图
plt.imshow(wordcloud, interpolation='bilinear') plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off") plt.axis("off")
plt.show() plt.show()
# 10. 保存词云图 # 保存词云图
wordcloud.to_file("filtered_wordcloud.png") wordcloud.to_file("filtered_wordcloud.png")

Loading…
Cancel
Save