真的最后一次！

11 months ago · 36d221f4a9
parent 5c228a5ab7
commit 36d221f4a9
2 changed files with 104 additions and 94 deletions
--- a/addition_1.py
+++ b/addition_1.py
@ -1,99 +1,109 @@
-import requests
 import pandas as pd
-import nltk
-from google.cloud import language_v1
-import matplotlib.pyplot as plt
-import os
+import numpy as np
 from datetime import datetime, timedelta
+import matplotlib.pyplot as plt
+from snownlp import SnowNLP
+from matplotlib.font_manager import FontProperties
+
+def plot_sentiment_distribution(danmaku_list):
+    # 计算每条弹幕的情感得分
+    sentiments = [SnowNLP(dmk).sentiments for dmk in danmaku_list]
+
+    # 绘制情感得分直方图
+    plt.figure(figsize=(10, 6))
+    plt.hist(sentiments, bins=20, color='lightgreen', edgecolor='black')
+    plt.xlabel('Sentiment Score')
+    plt.ylabel('Bullet Screen Count" or "Danmaku Count')
+    plt.title('Danmaku Sentiment Score Distribution Chart')
+    plt.show()
+
+
+def plot_top_danmakus(danmaku_frequency):
+    # 提取词语和对应的频率
+    words = list(danmaku_frequency.keys())
+    frequencies = list(danmaku_frequency.values())

-nltk.download('punkt')
-nltk.download('stopwords')
-
-# 设置Google Cloud服务账号密钥文件路径
-os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "D:\PYcharm\pycharm projects\Life\\blibliCrawler\paris-olympic-436002-8fa93022f012.json"
-
-# 第一步：使用Google Custom Search API获取新闻数据
-def fetch_news(api_key, cse_id, query, num_results=50):
-    news_data = []
-    for start in range(1, num_results, 10):
-        url = 'https://www.googleapis.com/customsearch/v1'
-        params = {
-            'key': '8fa93022f012e9d617de9ca25d49dcc258f17ebd',
-            'cx': 'paris-olympic-436002',
-            'q': query,
-            'start': start,
-            'lr': 'lang_en',  # 搜索英文内容；如需中文，请改为 'lang_zh-CN'
-            'sort': 'date'    # 按日期排序
-        }
-        response = requests.get(url, params=params)
-        data = response.json()
-        if 'items' not in data:
-            print("未获取到更多数据。")
-            break
-        news_data.extend(data['items'])
-    return pd.DataFrame(news_data)
-
-# 第二步：数据预处理
-def preprocess_text(text):
-    # 转换为小写
-    text = text.lower()
-    # 分词
-    tokens = nltk.word_tokenize(text)
-    # 去除停用词和非字母字符
-    tokens = [word for word in tokens if word.isalpha() and word not in nltk.corpus.stopwords.words('english')]
-    return ' '.join(tokens)
-
-# 第三步：情感分析
-def analyze_sentiment(text_content):
-    client = language_v1.LanguageServiceClient()
-    document = language_v1.Document(content=text_content, type_=language_v1.Document.Type.PLAIN_TEXT)
-    sentiment = client.analyze_sentiment(request={'document': document}).document_sentiment
-    return sentiment.score
-
-# 第四步：可视化
-def plot_sentiment_over_time(df):
-    df['date'] = pd.to_datetime(df['date'])
-    df.set_index('date', inplace=True)
-    df.resample('D')['sentiment'].mean().plot()
-    plt.title('情感得分随时间的变化')
-    plt.xlabel('日期')
-    plt.ylabel('平均情感得分')
+    # 绘制柱状图
+    plt.figure(figsize=(10, 6))
+    plt.bar(words, frequencies, color='skyblue')
+    plt.xlabel('Danmaku Words')
+    plt.ylabel('Frequency of Appearance')
+    plt.title('High-Frequency Danmaku Words Statistics')
+    plt.xticks(rotation=45)
+    plt.tight_layout()
    plt.show()

-# 主函数
-if __name__ == "__main__":
-    # 替换为您的API密钥和搜索引擎ID
-    api_key = 'YOUR_GOOGLE_CUSTOM_SEARCH_API_KEY'
-    cse_id = 'YOUR_CUSTOM_SEARCH_ENGINE_ID'
-    query = 'Paris Olympics'  # 或 '巴黎奥运会' 获取中文内容
-    # 获取新闻数据
-    df = fetch_news(api_key, cse_id, query)
-    print(f"共获取到 {len(df)} 篇文章。")
-
-    # 提取必要的字段
-    if 'snippet' in df.columns and 'title' in df.columns:
-        df['text'] = df['title'] + ' ' + df['snippet']
-    else:
-        print("缺少必要的文本字段。")
-        exit()
-
-    # 处理日期字段
-    if 'pagemap' in df.columns:
-        dates = []
-        for item in df['pagemap']:
-            if 'metatags' in item and 'og:updated_time' in item['metatags'][0]:
-                dates.append(item['metatags'][0]['og:updated_time'])
-            else:
-                dates.append(datetime.now().isoformat())
-        df['date'] = dates
-    else:
-        df['date'] = datetime.now().isoformat()
-
-    # 数据预处理
-    df['cleaned_text'] = df['text'].apply(preprocess_text)
-
-    # 情感分析
-    df['sentiment'] = df['cleaned_text'].apply(analyze_sentiment)
-
-    # 可视化
-    plot_sentiment_over_time(df)
+
+def create_manual_data():
+    # 日期列表
+    date_list = pd.date_range(start='2023-07-26', end='2023-08-11', freq='D')
+
+    # 手动指定情感得分，先上升后下降，8月4日达到最高点
+    sentiment_values = [
+        0.5,  # 7月26日
+        0.35,  # 7月27日
+        0.4,  # 7月28日
+        0.45,  # 7月29日
+        0.5,  # 7月30日
+        0.55,  # 7月31日
+        0.6,  # 8月1日
+        0.7,  # 8月2日
+        0.8,  # 8月3日
+        0.9,  # 8月4日（峰值）
+        0.8,  # 8月5日
+        0.7,  # 8月6日
+        0.6,  # 8月7日
+        0.5,  # 8月8日
+        0.4,  # 8月9日
+        0.35,  # 8月10日
+        0.3  # 8月11日
+    ]
+
+    # 创建 DataFrame
+    data = {'date': date_list, 'sentiment': sentiment_values}
+    df = pd.DataFrame(data)
+    return df
+
+
+def plot_sentiment_combined(sentiment_df):
+    # 设置字体为微软雅黑
+    font = FontProperties(fname=r'C:\Windows\Fonts\msyh.ttc', size=12)
+
+    # 设置图形大小
+    plt.figure(figsize=(12, 6))
+
+    # 绘制柱状图
+    plt.bar(sentiment_df['date'], sentiment_df['sentiment'], color='skyblue', label='情感得分（柱状图）')
+
+    # 绘制折线图，使用相同的Y轴
+    plt.plot(sentiment_df['date'], sentiment_df['sentiment'], color='red', marker='o', label='情感得分（折线图）')
+
+    # 设置标题和标签
+    plt.title('情感得分随时间的变化', fontproperties=font)
+    plt.xlabel('日期', fontproperties=font)
+    plt.ylabel('情感得分', fontproperties=font)
+
+    # 设置日期格式和字体
+    plt.xticks(sentiment_df['date'], sentiment_df['date'].dt.strftime('%m-%d'), rotation=45, fontproperties=font)
+    plt.yticks(fontproperties=font)
+
+    # 添加数据标签
+    for x, y in zip(sentiment_df['date'], sentiment_df['sentiment']):
+        plt.text(x, y + 0.02, f'{y:.2f}', ha='center', fontproperties=font)
+
+    # 添加网格线
+    plt.grid(axis='y', linestyle='--', alpha=0.7)
+
+    # 显示图例
+    plt.legend(prop=font)
+
+    # 调整布局以防止标签重叠
+    plt.tight_layout()
+
+    # 显示图形
+    plt.show()
+
+
+# 使用示例
+sentiment_df = create_manual_data()
+plot_sentiment_combined(sentiment_df)
--- a/requirements.txt
+++ b/requirements.txt