You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

41 lines
1.3 KiB

import matplotlib.pyplot as plt
import wordcloud
import jieba
import pyecharts
import pandas as pd
import numpy as np
from PIL import Image
import nltk
from nltk import FreqDist
font_path = r"C:\Users\26716\AppData\Local\Microsoft\Windows\Fonts\HanYiDieYuTiJian-1.ttf"
with open("danmu.txt", 'r', encoding='utf-8') as f:
danmu_text = f.read()
stopword = pd.read_csv("stopwords.txt", names=["w"], sep="\t", encoding="utf-8")
jieba.add_word("发来贺电")
words = jieba.cut(danmu_text)
filtered_words = [w for w in words if w not in stopword['w'].tolist() and len(w) > 1]
top_words = FreqDist(filtered_words)
background = np.array(Image.open("D:\CODING\软工\奥运五环.png").convert("RGB"))
cloudobj =wordcloud.WordCloud(font_path = font_path,
scale = 4,
width = 1920,
height= 1080,
max_words= 100,
background_color= "white",
max_font_size= 200,
min_font_size= 10,
mask = background
).fit_words(top_words)
plt.imshow(cloudobj)
plt.axis("off")
plt.show()