Compare commits
11 Commits
| Author | SHA1 | Date |
|---|---|---|
|
|
f1eb8b4a15 | 2 months ago |
|
|
3714f5f6a4 | 2 months ago |
|
|
cd813bd8d7 | 2 months ago |
|
|
bfbe38185c | 2 months ago |
|
|
b63d314b78 | 2 months ago |
|
|
d41d42fe17 | 2 months ago |
|
|
cfd0003966 | 2 months ago |
|
|
5e9c6bf254 | 2 months ago |
|
|
17594b6c31 | 2 months ago |
|
|
cba53d5416 | 2 months ago |
|
|
851ace59ca | 2 months ago |
Binary file not shown.
@ -0,0 +1,100 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "e34f22b5",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from IPython.display import Image\n",
|
||||||
|
"print(\"生成的图表:\")\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"词云图:\")\n",
|
||||||
|
"display(Image(filename='visualization/wordcloud.png'))\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"应用领域分布:\")\n",
|
||||||
|
"display(Image(filename='visualization/applications_distribution.png'))\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"情感分析:\")\n",
|
||||||
|
"display(Image(filename='visualization/sentiment_analysis.png'))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "7756a032",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(\"生成可视化图表...\")\n",
|
||||||
|
"visualizer = Visualizer()\n",
|
||||||
|
"visualizer.create_comprehensive_visualization(processed_df, top_apps, word_freq)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "da2f2518",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(\"开始数据处理...\")\n",
|
||||||
|
"processor = DataProcessor()\n",
|
||||||
|
"processed_df, top_apps, word_freq = processor.main()\n",
|
||||||
|
"\n",
|
||||||
|
"# 显示处理结果\n",
|
||||||
|
"print(\"应用领域排名:\")\n",
|
||||||
|
"display(top_apps)\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"\\n词频统计前10:\")\n",
|
||||||
|
"display(word_freq.head(10))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "db65ea20",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(\"开始数据爬取...\")\n",
|
||||||
|
"crawler = BilibiliDanmuCrawler()\n",
|
||||||
|
"raw_df = crawler.generate_mock_data()\n",
|
||||||
|
"print(f\"获取到 {len(raw_df)} 条弹幕数据\")\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "29015b15",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import sys\n",
|
||||||
|
"import os\n",
|
||||||
|
"sys.path.append('scripts')\n",
|
||||||
|
"\n",
|
||||||
|
"from crawler import BilibiliDanmuCrawler\n",
|
||||||
|
"from data_processor import DataProcessor\n",
|
||||||
|
"from visualizer import Visualizer\n",
|
||||||
|
"\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import matplotlib.pyplot as plt"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "venv",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"name": "python",
|
||||||
|
"version": "3.13.9"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
||||||
@ -0,0 +1,12 @@
|
|||||||
|
2025-11-14 16:31:10,335 - INFO - 开始生成模拟弹幕数据...
|
||||||
|
2025-11-14 16:31:10,349 - INFO - 数据生成完成,共 250 条记录
|
||||||
|
2025-11-14 16:31:10,349 - INFO -
|
||||||
|
数据预览:
|
||||||
|
2025-11-14 16:31:10,349 - INFO - 列名: ['bvid', 'danmu', 'keyword', 'timestamp']
|
||||||
|
2025-11-14 16:31:10,351 - INFO - 前5条数据:
|
||||||
|
bvid danmu keyword timestamp
|
||||||
|
0 BV14J2X6UHQ9 大语言模型在商业办公方面真的很实用 大语言模型 2025-11-14 10:25:10.336313
|
||||||
|
1 BV1DOTN63OAF 大模型在编程开发的准确性有待提高 LLM 2025-11-14 10:02:10.336553
|
||||||
|
2 BV1G9XCAIAAG 大模型在娱乐创作的准确性有待提高 大语言模型 2025-11-14 09:34:10.336611
|
||||||
|
3 BV1VQZHWO8VT 大模型在商业办公领域潜力巨大 大语言模型 2025-11-14 03:37:10.336655
|
||||||
|
4 BV1CMSGNOET8 商业办公应用的隐私保护很重要 大语言模型 2025-11-14 00:20:10.336697
|
||||||
@ -0,0 +1,14 @@
|
|||||||
|
# requirements.txt
|
||||||
|
requests>=2.31.0
|
||||||
|
pandas>=2.0.0
|
||||||
|
matplotlib>=3.7.0
|
||||||
|
wordcloud>=1.9.0
|
||||||
|
jieba>=0.42.1
|
||||||
|
openpyxl>=3.1.0
|
||||||
|
pillow>=10.0.0
|
||||||
|
numpy>=1.24.0
|
||||||
|
scipy>=1.10.0
|
||||||
|
selenium>=4.15.0
|
||||||
|
scrapy>=2.11.0
|
||||||
|
jupyter>=1.0.0
|
||||||
|
ipykernel>=6.25.0
|
||||||
Loading…
Reference in new issue