Compare commits

...

11 Commits

@ -1,2 +0,0 @@
# 102301535

@ -0,0 +1,100 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "e34f22b5",
"metadata": {},
"outputs": [],
"source": [
"from IPython.display import Image\n",
"print(\"生成的图表:\")\n",
"\n",
"print(\"词云图:\")\n",
"display(Image(filename='visualization/wordcloud.png'))\n",
"\n",
"print(\"应用领域分布:\")\n",
"display(Image(filename='visualization/applications_distribution.png'))\n",
"\n",
"print(\"情感分析:\")\n",
"display(Image(filename='visualization/sentiment_analysis.png'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7756a032",
"metadata": {},
"outputs": [],
"source": [
"print(\"生成可视化图表...\")\n",
"visualizer = Visualizer()\n",
"visualizer.create_comprehensive_visualization(processed_df, top_apps, word_freq)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "da2f2518",
"metadata": {},
"outputs": [],
"source": [
"print(\"开始数据处理...\")\n",
"processor = DataProcessor()\n",
"processed_df, top_apps, word_freq = processor.main()\n",
"\n",
"# 显示处理结果\n",
"print(\"应用领域排名:\")\n",
"display(top_apps)\n",
"\n",
"print(\"\\n词频统计前10:\")\n",
"display(word_freq.head(10))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "db65ea20",
"metadata": {},
"outputs": [],
"source": [
"print(\"开始数据爬取...\")\n",
"crawler = BilibiliDanmuCrawler()\n",
"raw_df = crawler.generate_mock_data()\n",
"print(f\"获取到 {len(raw_df)} 条弹幕数据\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "29015b15",
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"import os\n",
"sys.path.append('scripts')\n",
"\n",
"from crawler import BilibiliDanmuCrawler\n",
"from data_processor import DataProcessor\n",
"from visualizer import Visualizer\n",
"\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.13.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

@ -0,0 +1,12 @@
2025-11-14 16:31:10,335 - INFO - 开始生成模拟弹幕数据...
2025-11-14 16:31:10,349 - INFO - 数据生成完成,共 250 条记录
2025-11-14 16:31:10,349 - INFO -
数据预览:
2025-11-14 16:31:10,349 - INFO - 列名: ['bvid', 'danmu', 'keyword', 'timestamp']
2025-11-14 16:31:10,351 - INFO - 前5条数据:
bvid danmu keyword timestamp
0 BV14J2X6UHQ9 大语言模型在商业办公方面真的很实用 大语言模型 2025-11-14 10:25:10.336313
1 BV1DOTN63OAF 大模型在编程开发的准确性有待提高 LLM 2025-11-14 10:02:10.336553
2 BV1G9XCAIAAG 大模型在娱乐创作的准确性有待提高 大语言模型 2025-11-14 09:34:10.336611
3 BV1VQZHWO8VT 大模型在商业办公领域潜力巨大 大语言模型 2025-11-14 03:37:10.336655
4 BV1CMSGNOET8 商业办公应用的隐私保护很重要 大语言模型 2025-11-14 00:20:10.336697

@ -0,0 +1,14 @@
# requirements.txt
requests>=2.31.0
pandas>=2.0.0
matplotlib>=3.7.0
wordcloud>=1.9.0
jieba>=0.42.1
openpyxl>=3.1.0
pillow>=10.0.0
numpy>=1.24.0
scipy>=1.10.0
selenium>=4.15.0
scrapy>=2.11.0
jupyter>=1.0.0
ipykernel>=6.25.0
Loading…
Cancel
Save