#第一种普通词云图,下面第二种奖杯词云图,暂时注释掉了 import os import jieba from wordcloud import WordCloud import matplotlib.pyplot as plt directory = 'E:\\前端\软件工程\\莎莎和陈梦\\弹幕收集' #合并所有文件内容 text = "" for filename in os.listdir(directory): if filename.endswith('.txt'): with open(os.path.join(directory, filename), 'r', encoding='utf-8') as file: text += file.read() words = jieba.cut(text) #停用词列表 stop_words = set([ "我", "你", "他", "她", "它", "是", "的", "了", "在", "吗", "啊", "吧", "也", "有", "这", "那", "从", "为", "上", "下", "和", "与", "就", "不", "中", "还", "要", "会", "能", "对", "着", "个", "把", "所以", "但", "也", "所以", "从", "如", "她", "他", "它", "还", "也", "吗", "啊", "哦", "?", "!",",","。","哈哈哈" ]) #过滤停用词 filtered_words = [word for word in words if word.strip() and word not in stop_words and len(word) >= 2] #词频统计 word_freq = {} for word in filtered_words: word_freq[word] = word_freq.get(word, 0) + 1 #生成词云 wordcloud = WordCloud(font_path='simsun.ttc', width=800, height=400, background_color='white').generate_from_frequencies(word_freq) # 显示词云 plt.figure(figsize=(10, 5)) plt.imshow(wordcloud, interpolation='bilinear') plt.axis("off") plt.show() #第二种奖杯词云图 # from wordcloud import WordCloud, STOPWORDS # import matplotlib.pyplot as plt # import numpy as np # import jieba.posseg as pseg # from collections import Counter # import PIL.Image as Image # from matplotlib import colors # import os # directory = 'E:\\前端\\软件工程\\弹幕收集按序' # #合并所有文件内容 # text = "" # for filename in os.listdir(directory): # if filename.endswith('.txt'): # with open(os.path.join(directory, filename), 'r', encoding='utf-8') as file: # text += file.read() # words = pseg.cut(text) # #按指定长度和词性提取词 # report_words = [] # for word, flag in words: # if (len(word) >= 2) and ('n' in flag): #提取的是名词 # report_words.append(word) # #统计高频词汇 # result = Counter(report_words).most_common(300) # #建立词汇字典 # content = dict(result) # #输出词频统计结果 # for i in range(50): # word,flag=result[i] # print("{0:<10}{1:>5}".format(word,flag)) # #设置停用词 # stopwords = set(STOPWORDS) # stopwords.update(["我", "你", "他", "她", "它", "是", "的", "了", "在", "吗", "啊", "吧", # "也", "有", "这", "那", "从", "为", "上", "下", "和", "与", "就", "不", # "中", "还", "要", "会", "能", "对", "着", "个", "把", "所以", "但", "也", # "所以", "从", "如", "她", "他", "它", "还", "也", "吗", "啊", "哦", "?", "!", ",", "。"]) # #设置png掩膜 # background = Image.open("E:\前端\奖杯4.png").convert('RGB') # mask = np.array(background) # font_path = r"C:\Windows\Fonts\STLITI.TTF" # max_font_size =100 # min_font_size =10 # #建立颜色数组,可随意更改显示颜色 # color_list = ['#FF274B'] # #调用颜色数组 # colormap = colors.ListedColormap(color_list) # #生成词云 # wordcloud = WordCloud(scale=4, #输出清晰度 # font_path=font_path, #输出路径 # colormap=colormap, #字体颜色 # width=1600, #输出图片宽度 # height=900, #输出图片高度 # background_color='white', #图片背景颜色 # stopwords=stopwords, #停用词 # mask=mask, #掩膜 # max_font_size=max_font_size, #最大字体大小 # min_font_size=min_font_size) #最小字体大小 # wordcloud.generate_from_frequencies(content) # #使用 matplotlib 显示词云 # plt.imshow(wordcloud, interpolation='bilinear') # plt.axis('off') # plt.show() # #保存词云图 # wordcloud.to_file("wordcloud.png")