|
|
import collections
|
|
|
import matplotlib.pyplot as plt
|
|
|
from wordcloud import WordCloud
|
|
|
from PIL import Image
|
|
|
import numpy as np
|
|
|
|
|
|
|
|
|
# 读取弹幕文件
|
|
|
def read_comments(file_path):
|
|
|
with open(file_path, 'r', encoding='utf-8') as file:
|
|
|
comments = file.readlines()
|
|
|
|
|
|
# 移除每条弹幕的换行符
|
|
|
cleaned_comments = [comment.strip() for comment in comments]
|
|
|
return cleaned_comments
|
|
|
|
|
|
|
|
|
# 统计词频
|
|
|
def count_words(comments):
|
|
|
word_counter = collections.Counter()
|
|
|
for comment in comments:
|
|
|
words = comment.split() # 简单分词,按空格切分词语
|
|
|
word_counter.update(words)
|
|
|
return word_counter
|
|
|
|
|
|
|
|
|
# 生成词云图
|
|
|
def generate_wordcloud(word_frequencies, output_file, mask_image=None):
|
|
|
wordcloud = WordCloud(
|
|
|
width=1600, # 提高图像分辨率
|
|
|
height=800, # 提高图像分辨率
|
|
|
background_color='white', # 设置背景为白色
|
|
|
max_words=150, # 减少最大词数,避免文字重叠
|
|
|
min_font_size=10, # 确保最小字体大小不会太小
|
|
|
relative_scaling=0.5, # 调整字体大小比例
|
|
|
colormap='cool', # 使用冷色调配色方案
|
|
|
mask=mask_image, # 可选:使用自定义形状的mask
|
|
|
contour_width=3, # 设置轮廓宽度
|
|
|
contour_color='steelblue', # 轮廓颜色
|
|
|
font_path='C:/Windows/Fonts/simhei.ttf' # Windows下的黑体字体
|
|
|
).generate_from_frequencies(word_frequencies)
|
|
|
|
|
|
# 显示词云图
|
|
|
plt.figure(figsize=(12, 6)) # 设置展示窗口的尺寸
|
|
|
plt.imshow(wordcloud, interpolation='bilinear')
|
|
|
plt.axis('off') # 不显示坐标轴
|
|
|
plt.show()
|
|
|
|
|
|
# 保存词云图到文件
|
|
|
wordcloud.to_file(output_file)
|
|
|
print(f"词云图已保存至 {output_file}")
|
|
|
|
|
|
|
|
|
# 主函数:读取文件、统计词频、生成词云
|
|
|
file_path = '弹幕.txt' # 弹幕文件的路径
|
|
|
output_wordcloud_file = 'bullet_wordcloud.png' # 输出的词云图文件名
|
|
|
|
|
|
# 1. 读取所有弹幕
|
|
|
comments = read_comments(file_path)
|
|
|
|
|
|
# 2. 统计词频
|
|
|
word_frequencies = count_words(comments)
|
|
|
|
|
|
# 3. (可选) 使用自定义图片作为词云形状
|
|
|
mask_image = None # 如果不使用自定义形状,设为 None
|
|
|
|
|
|
# 4. 生成并展示词云图
|
|
|
generate_wordcloud(word_frequencies, output_wordcloud_file, mask_image)
|