Update Scraping Bilibili Danmaku (bullet comments).py

main
pux36pf8t 2 months ago
parent fc3864a0e0
commit 29677d320b

@ -186,7 +186,7 @@ def analyze_keywords_in_comments(comments_file, keywords_file, output_excel_file
def generate_wordcloud(text_file, stopwords_file, output_image_file, font_path='msyh.ttc'):
def generate_wordcloud(text_file, stopwords_file, output_image_file, mask_image_file=None, font_path='msyh.ttc'):
# 加载停用词
def load_stopwords(file_path):
with open(file_path, encoding='utf-8') as f:
@ -207,15 +207,29 @@ def generate_wordcloud(text_file, stopwords_file, output_image_file, font_path='
# 将处理后的词汇拼接成字符串
word_string = ' '.join(filtered_words)
# 生成词云
wc = wordcloud.WordCloud(
width=700,
height=700,
background_color='white',
font_path=font_path
)
wc.generate(word_string)
# 设置词云参数
wc_kwargs = {
'width': 700,
'height': 700,
'background_color': 'white',
'font_path': font_path,
}
if mask_image_file:
# 读取掩模图像并确保其为二值图像
mask_image = np.array(Image.open(mask_image_file).convert('L'))
# 反转黑白区域黑色区域变为0白色区域变为255
mask = np.where(mask_image == 0, 0, 255).astype(np.uint8)
wc_kwargs.update({
'mask': mask, # 应用掩模图像
'contour_color': 'white', # 轮廓颜色
'contour_width': 0,
})
wc = wordcloud.WordCloud(**wc_kwargs)
wc.generate(word_string)
# 保存词云图
wc.to_file(output_image_file)
@ -259,7 +273,7 @@ def main():
analyze_keywords_in_comments('cleaned_comment.txt', 'keywords.txt', 'ai_technologies_count.xlsx')
#输出词云图
print("开始构建词云图")
generate_wordcloud('cleaned_comment.txt', 'stopwords.txt', '词云.png')
generate_wordcloud('cleaned_comment.txt', 'stopwords.txt', '词云.png', 'img.png')
print("构建词云图完毕")
if __name__ == "__main__":
main()

Loading…
Cancel
Save