You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

41 lines
1.3 KiB

import csv
import os
from wordcloud import WordCloud, STOPWORDS
import jieba
# 定义csv文件和中文停词表的路径
csv_dir = "./danmaku_files"
stop_words_file = "./cn_stopwords.txt"
# 加载中文词汇
with open(stop_words_file, encoding="utf8") as f:
stop_words = f.read().splitlines()
# 创建新文件夹为储存结果
if not os.path.exists("word_clouds"):
os.makedirs("word_clouds")
# 遍历danmaku_files中所有的文件
for filename in os.listdir(csv_dir):
if filename.endswith(".csv"):
# 读取CSV文件并提取弹幕内容
with open(os.path.join(csv_dir, filename), encoding="utf8") as f:
reader = csv.reader(f)
text = ' '.join([row[0] for row in reader])
# 移除中文停词
stopwords = set(STOPWORDS)
stopwords.update(stop_words)
text = ' '.join([word for word in jieba.cut(text) if word not in stopwords])
# 创建云图
wc = WordCloud(background_color="white",
font_path='simsun.ttc',
max_words=100,
width=800,
height=600)
wc.generate(text)
# 保存云图为png格式
wc.to_file(os.path.join("word_clouds", filename[:-4] + ".png"))