parent
b76889d9f9
commit
7b9883b235
@ -1,80 +1,86 @@
|
||||
import pandas as pd
|
||||
import jieba
|
||||
from wordcloud import WordCloud
|
||||
import matplotlib.pyplot as plt
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
import imageio
|
||||
import os
|
||||
|
||||
#读取停用词表
|
||||
def load_stopwords(file_path):
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
stopwords = set(line.strip() for line in f)
|
||||
return stopwords
|
||||
|
||||
#过滤停用词
|
||||
def remove_stopwords(words_list, stopwords):
|
||||
return [word for word in words_list if word not in stopwords and len(word) > 1]
|
||||
|
||||
|
||||
|
||||
def generate_(file_path):
|
||||
#读取Excel文件并提取弹幕内容
|
||||
df = pd.read_excel(file_path)
|
||||
comments = df['danmu'].astype(str)
|
||||
text = ' '.join(comments)
|
||||
|
||||
#使用 jieba 分词
|
||||
words = jieba.cut(text, cut_all=False)
|
||||
|
||||
#加载停用词表
|
||||
stopwords_file = "D://edge//stop.txt" # 替换为实际路径
|
||||
stopwords = load_stopwords(stopwords_file)
|
||||
|
||||
#去除停用词
|
||||
filtered_words = remove_stopwords(words, stopwords)
|
||||
|
||||
#将过滤后的词汇重新拼接为一个字符串
|
||||
words_list = ' '.join(filtered_words)
|
||||
|
||||
#加载形状图片并生成词云
|
||||
mask = np.array(Image.open("D://edge//kk.png"))
|
||||
|
||||
#创建一个用于存储帧的文件夹
|
||||
if not os.path.exists("wordcloud_frames"):
|
||||
os.makedirs("wordcloud_frames")
|
||||
|
||||
#生成多个帧
|
||||
filenames = []
|
||||
for i in range(10): # 生成10帧
|
||||
# 动态改变词云的颜色和随机状态
|
||||
wordcloud = WordCloud(
|
||||
font_path='simhei.ttf', # 确保支持中文
|
||||
background_color='white',
|
||||
mask=mask,
|
||||
contour_width=1,
|
||||
contour_color='black',
|
||||
width=800,
|
||||
height=600,
|
||||
color_func=lambda *args, **kwargs: (np.random.randint(50, 255), np.random.randint(50, 255), np.random.randint(50, 255)), # 随机颜色
|
||||
random_state=i # 每帧的随机状态不同
|
||||
).generate(words_list)
|
||||
|
||||
# 保存每一帧为图片
|
||||
filename = f"wordcloud_frames/frame_{i}.png"
|
||||
wordcloud.to_file(filename)
|
||||
filenames.append(filename)
|
||||
|
||||
#将所有帧合成为GIF
|
||||
with imageio.get_writer("wordcloud_animation.gif", mode="I", duration=0.5) as writer: # duration表示每帧的显示时间
|
||||
for filename in filenames:
|
||||
image = imageio.imread(filename)
|
||||
writer.append_data(image)
|
||||
|
||||
# 清理临时帧文件
|
||||
for filename in filenames:
|
||||
os.remove(filename)
|
||||
|
||||
print("词云动图生成成功,保存为 'wordcloud_animation.gif'")
|
||||
return 0
|
||||
import pandas as pd
|
||||
import jieba
|
||||
from wordcloud import WordCloud
|
||||
import matplotlib.pyplot as plt
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
import imageio.v2 as imageio
|
||||
import os
|
||||
|
||||
def load_stopwords(file_path):
|
||||
#加载停用词表。
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
stopwords = set(line.strip() for line in f)
|
||||
return stopwords
|
||||
|
||||
def remove_stopwords(words_list, stopwords):
|
||||
# 从词汇列表中移除停用词。
|
||||
return [word for word in words_list if word not in stopwords and len(word) > 1]
|
||||
|
||||
def generate_(file_path):
|
||||
# 生成包含弹幕数据的词云 GIF 文件。
|
||||
|
||||
# 读取 Excel 文件并提取弹幕内容
|
||||
df = pd.read_excel(file_path)
|
||||
comments = df['danmu'].astype(str)
|
||||
text = ' '.join(comments)
|
||||
|
||||
# 使用 jieba 分词
|
||||
words = jieba.cut(text, cut_all=False)
|
||||
|
||||
# 加载停用词表
|
||||
stopwords_file = "D://edge//stop.txt" # 替换为实际路径
|
||||
stopwords = load_stopwords(stopwords_file)
|
||||
|
||||
# 去除停用词
|
||||
filtered_words = remove_stopwords(words, stopwords)
|
||||
|
||||
# 将过滤后的词汇重新拼接为一个字符串
|
||||
words_list = ' '.join(filtered_words)
|
||||
|
||||
# 加载形状图片并生成词云
|
||||
mask = np.array(Image.open("D://edge//kk.png")) # 替换为实际路径
|
||||
|
||||
# 创建一个用于存储帧的文件夹
|
||||
frames_dir = "wordcloud_frames"
|
||||
if not os.path.exists(frames_dir):
|
||||
os.makedirs(frames_dir)
|
||||
|
||||
# 生成多个帧
|
||||
filenames = []
|
||||
for i in range(10): # 生成 10 帧
|
||||
# 动态改变词云的颜色和随机状态
|
||||
wordcloud = WordCloud(
|
||||
font_path='simhei.ttf', # 确保支持中文
|
||||
background_color='white',
|
||||
mask=mask,
|
||||
contour_width=1,
|
||||
contour_color='black',
|
||||
width=800,
|
||||
height=600,
|
||||
color_func=lambda *args, **kwargs: (
|
||||
np.random.randint(50, 255),
|
||||
np.random.randint(50, 255),
|
||||
np.random.randint(50, 255)
|
||||
), # 随机颜色
|
||||
random_state=i # 每帧的随机状态不同
|
||||
).generate(words_list)
|
||||
|
||||
# 保存每一帧为图片
|
||||
filename = os.path.join(frames_dir, f"frame_{i}.png")
|
||||
wordcloud.to_file(filename)
|
||||
filenames.append(filename)
|
||||
|
||||
# 将所有帧合成为 GIF
|
||||
gif_path = "wordcloud.gif"
|
||||
with imageio.get_writer(gif_path, mode="I", duration=0.5) as writer: # duration 表示每帧的显示时间
|
||||
for filename in filenames:
|
||||
image = imageio.imread(filename) # 读取每一个图片
|
||||
writer.append_data(image)
|
||||
|
||||
# 清理临时帧文件
|
||||
for filename in filenames:
|
||||
os.remove(filename)
|
||||
|
||||
print(f"GIF created at {gif_path}")
|
||||
return 0
|
||||
|
Loading…
Reference in new issue