You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

39 lines
952 B

import jieba
from pyecharts.charts import WordCloud
from pyecharts import options as opts
ciyun={}
fp=open('./dataCleanFile/评论.txt','r',encoding='utf-8')
strt=fp.read().replace("\n",'').strip()
fp.close()
# 使用停用词表对数据进行过滤
f=open('./visualizationFile/停用词表.txt','r',encoding='utf-8')
stop_str=f.read().split("\n")
f.close()
sl=jieba.cut(strt,cut_all=False,HMM=True)
for i in sl:
if ciyun.get(i,0)==0:
ciyun[i]=1
else:
ciyun[i]+=1
data=[]
# 统计词出现次数
for i in ciyun:
if i in stop_str:
continue
else:
data.append((i,ciyun[i]))
print(data)
# 创建词云图对象
wordcloud = WordCloud()
# 添加数据
wordcloud.add("", data, word_size_range=[20, 100])
# 设置全局选项
wordcloud.set_global_opts(
title_opts=opts.TitleOpts(title="评论词云图",pos_left="center"),
)
# 渲染图表到文件
wordcloud.render("./visualizationFile/wordcloud.html")