You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
41 lines
1.3 KiB
41 lines
1.3 KiB
import matplotlib.pyplot as plt
|
|
import wordcloud
|
|
import jieba
|
|
import pyecharts
|
|
import pandas as pd
|
|
import numpy as np
|
|
from PIL import Image
|
|
import nltk
|
|
from nltk import FreqDist
|
|
|
|
font_path = r"C:\Users\26716\AppData\Local\Microsoft\Windows\Fonts\HanYiDieYuTiJian-1.ttf"
|
|
|
|
with open("danmu.txt", 'r', encoding='utf-8') as f:
|
|
danmu_text = f.read()
|
|
|
|
stopword = pd.read_csv("stopwords.txt", names=["w"], sep="\t", encoding="utf-8")
|
|
|
|
jieba.add_word("发来贺电")
|
|
words = jieba.cut(danmu_text)
|
|
|
|
filtered_words = [w for w in words if w not in stopword['w'].tolist() and len(w) > 1]
|
|
|
|
top_words = FreqDist(filtered_words)
|
|
|
|
background = np.array(Image.open("D:\CODING\软工\奥运五环.png").convert("RGB"))
|
|
|
|
cloudobj =wordcloud.WordCloud(font_path = font_path,
|
|
scale = 4,
|
|
width = 1920,
|
|
height= 1080,
|
|
max_words= 100,
|
|
background_color= "white",
|
|
max_font_size= 200,
|
|
min_font_size= 10,
|
|
mask = background
|
|
).fit_words(top_words)
|
|
|
|
plt.imshow(cloudobj)
|
|
plt.axis("off")
|
|
plt.show()
|