Compare commits
3 Commits
main
...
__pycache_
| Author | SHA1 | Date |
|---|---|---|
|
|
2f168bffd0 | 2 months ago |
|
|
a13933c604 | 2 months ago |
|
|
2d1cf0c687 | 2 months ago |
Binary file not shown.
@ -1,100 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e34f22b5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from IPython.display import Image\n",
|
||||
"print(\"生成的图表:\")\n",
|
||||
"\n",
|
||||
"print(\"词云图:\")\n",
|
||||
"display(Image(filename='visualization/wordcloud.png'))\n",
|
||||
"\n",
|
||||
"print(\"应用领域分布:\")\n",
|
||||
"display(Image(filename='visualization/applications_distribution.png'))\n",
|
||||
"\n",
|
||||
"print(\"情感分析:\")\n",
|
||||
"display(Image(filename='visualization/sentiment_analysis.png'))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7756a032",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"生成可视化图表...\")\n",
|
||||
"visualizer = Visualizer()\n",
|
||||
"visualizer.create_comprehensive_visualization(processed_df, top_apps, word_freq)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "da2f2518",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"开始数据处理...\")\n",
|
||||
"processor = DataProcessor()\n",
|
||||
"processed_df, top_apps, word_freq = processor.main()\n",
|
||||
"\n",
|
||||
"# 显示处理结果\n",
|
||||
"print(\"应用领域排名:\")\n",
|
||||
"display(top_apps)\n",
|
||||
"\n",
|
||||
"print(\"\\n词频统计前10:\")\n",
|
||||
"display(word_freq.head(10))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "db65ea20",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"开始数据爬取...\")\n",
|
||||
"crawler = BilibiliDanmuCrawler()\n",
|
||||
"raw_df = crawler.generate_mock_data()\n",
|
||||
"print(f\"获取到 {len(raw_df)} 条弹幕数据\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "29015b15",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"import os\n",
|
||||
"sys.path.append('scripts')\n",
|
||||
"\n",
|
||||
"from crawler import BilibiliDanmuCrawler\n",
|
||||
"from data_processor import DataProcessor\n",
|
||||
"from visualizer import Visualizer\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"import matplotlib.pyplot as plt"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.13.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Binary file not shown.
@ -1,12 +0,0 @@
|
||||
2025-11-14 16:31:10,335 - INFO - 开始生成模拟弹幕数据...
|
||||
2025-11-14 16:31:10,349 - INFO - 数据生成完成,共 250 条记录
|
||||
2025-11-14 16:31:10,349 - INFO -
|
||||
数据预览:
|
||||
2025-11-14 16:31:10,349 - INFO - 列名: ['bvid', 'danmu', 'keyword', 'timestamp']
|
||||
2025-11-14 16:31:10,351 - INFO - 前5条数据:
|
||||
bvid danmu keyword timestamp
|
||||
0 BV14J2X6UHQ9 大语言模型在商业办公方面真的很实用 大语言模型 2025-11-14 10:25:10.336313
|
||||
1 BV1DOTN63OAF 大模型在编程开发的准确性有待提高 LLM 2025-11-14 10:02:10.336553
|
||||
2 BV1G9XCAIAAG 大模型在娱乐创作的准确性有待提高 大语言模型 2025-11-14 09:34:10.336611
|
||||
3 BV1VQZHWO8VT 大模型在商业办公领域潜力巨大 大语言模型 2025-11-14 03:37:10.336655
|
||||
4 BV1CMSGNOET8 商业办公应用的隐私保护很重要 大语言模型 2025-11-14 00:20:10.336697
|
||||
Binary file not shown.
@ -1,14 +0,0 @@
|
||||
# requirements.txt
|
||||
requests>=2.31.0
|
||||
pandas>=2.0.0
|
||||
matplotlib>=3.7.0
|
||||
wordcloud>=1.9.0
|
||||
jieba>=0.42.1
|
||||
openpyxl>=3.1.0
|
||||
pillow>=10.0.0
|
||||
numpy>=1.24.0
|
||||
scipy>=1.10.0
|
||||
selenium>=4.15.0
|
||||
scrapy>=2.11.0
|
||||
jupyter>=1.0.0
|
||||
ipykernel>=6.25.0
|
||||
Binary file not shown.
Loading…
Reference in new issue