o_game/cloud.py

import numpy as np
import pandas as pd
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from collections import Counter
import plotly.express as px

# 读取AI技术相关弹幕的CSV文件
def read_data(file_path):
    df = pd.read_csv(file_path)
    return df['ai_related_danmakus'].dropna().astype(str)

# 筛选与统计关键词
def filter_keywords(text_list, stopwords):
    filtered_words = []
    for text in text_list:
        words = text.split()
        filtered_words.extend([word for word in words if word not in stopwords])
    return ' '.join(filtered_words)

# 生成词云并统计频率
def generate_wordcloud(text, stopwords,colormap='viridis'):

    wordcloud = WordCloud(width=800, height=400, background_color='white', stopwords=stopwords,
                          colormap=colormap, font_path='/usr/share/fonts/truetype/simhei.ttf').generate(text)
    return wordcloud

# 显示词云和频率统计
def show_wordcloud(wordcloud, freq_dist):
    # 绘制词云
    plt.figure(figsize=(10, 5))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')
    plt.show()

    # 使用 Plotly 绘制关键词频率条形图
    freq_df = pd.DataFrame(freq_dist.most_common(8), columns=['Word', 'Frequency'])
    fig = px.bar(freq_df, x='Word', y='Frequency', title='Top 8 ai_related_danmakus')
    fig.show()

def cloud(file_path):
    # 读取弹幕数据
    danmakus = read_data(file_path)

    # 自定义停用词
    stopwords = set(['的', '是', '在', '我', '你','5', '哈'])  # 可以根据需求自定义更多停用词

    # 过滤无意义词语
    filtered_text = filter_keywords(danmakus, stopwords)

    # 统计关键词频率
    word_list = filtered_text.split()
    freq_dist = Counter(word_list)

    # 生成词云
    wordcloud = generate_wordcloud(filtered_text, stopwords)

    # 显示词云和统计信息
    show_wordcloud(wordcloud, freq_dist)

    return 0