|
|
|
@ -186,7 +186,7 @@ def analyze_keywords_in_comments(comments_file, keywords_file, output_excel_file
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def generate_wordcloud(text_file, stopwords_file, output_image_file, font_path='msyh.ttc'):
|
|
|
|
|
def generate_wordcloud(text_file, stopwords_file, output_image_file, mask_image_file=None, font_path='msyh.ttc'):
|
|
|
|
|
# 加载停用词
|
|
|
|
|
def load_stopwords(file_path):
|
|
|
|
|
with open(file_path, encoding='utf-8') as f:
|
|
|
|
@ -207,15 +207,29 @@ def generate_wordcloud(text_file, stopwords_file, output_image_file, font_path='
|
|
|
|
|
# 将处理后的词汇拼接成字符串
|
|
|
|
|
word_string = ' '.join(filtered_words)
|
|
|
|
|
|
|
|
|
|
# 生成词云
|
|
|
|
|
wc = wordcloud.WordCloud(
|
|
|
|
|
width=700,
|
|
|
|
|
height=700,
|
|
|
|
|
background_color='white',
|
|
|
|
|
font_path=font_path
|
|
|
|
|
)
|
|
|
|
|
wc.generate(word_string)
|
|
|
|
|
# 设置词云参数
|
|
|
|
|
wc_kwargs = {
|
|
|
|
|
'width': 700,
|
|
|
|
|
'height': 700,
|
|
|
|
|
'background_color': 'white',
|
|
|
|
|
'font_path': font_path,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if mask_image_file:
|
|
|
|
|
# 读取掩模图像并确保其为二值图像
|
|
|
|
|
mask_image = np.array(Image.open(mask_image_file).convert('L'))
|
|
|
|
|
|
|
|
|
|
# 反转黑白区域,黑色区域变为0,白色区域变为255
|
|
|
|
|
mask = np.where(mask_image == 0, 0, 255).astype(np.uint8)
|
|
|
|
|
|
|
|
|
|
wc_kwargs.update({
|
|
|
|
|
'mask': mask, # 应用掩模图像
|
|
|
|
|
'contour_color': 'white', # 轮廓颜色
|
|
|
|
|
'contour_width': 0,
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
wc = wordcloud.WordCloud(**wc_kwargs)
|
|
|
|
|
wc.generate(word_string)
|
|
|
|
|
# 保存词云图
|
|
|
|
|
wc.to_file(output_image_file)
|
|
|
|
|
|
|
|
|
@ -259,7 +273,7 @@ def main():
|
|
|
|
|
analyze_keywords_in_comments('cleaned_comment.txt', 'keywords.txt', 'ai_technologies_count.xlsx')
|
|
|
|
|
#输出词云图
|
|
|
|
|
print("开始构建词云图")
|
|
|
|
|
generate_wordcloud('cleaned_comment.txt', 'stopwords.txt', '词云.png')
|
|
|
|
|
generate_wordcloud('cleaned_comment.txt', 'stopwords.txt', '词云.png', 'img.png')
|
|
|
|
|
print("构建词云图完毕")
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
main()
|
|
|
|
|