Compare commits
16 Commits
Author | SHA1 | Date |
---|---|---|
|
1973f58287 | 5 months ago |
|
be62c0356d | 5 months ago |
|
99a7f3587d | 5 months ago |
|
cce5370169 | 5 months ago |
|
ff7b6e8702 | 5 months ago |
|
adee86021e | 5 months ago |
|
235dedf111 | 5 months ago |
|
f80d80b091 | 5 months ago |
|
6d30d34b44 | 5 months ago |
|
4db4b37a16 | 5 months ago |
|
872a9d2fde | 5 months ago |
|
712ab4fb0f | 5 months ago |
|
da23d1b336 | 5 months ago |
|
422e1fe9db | 5 months ago |
|
454eeb27bd | 5 months ago |
|
d7b1403538 | 5 months ago |
@ -1,2 +1,9 @@
|
|||||||
# spider
|
# spider
|
||||||
|
中途有修改
|
||||||
|
#分为了"图片"/"main"两个分支,如果点开看不到所有文件可以在左上角分支处选择"图片"
|
||||||
|
|
||||||
|
spider_main.py-弹幕爬虫代码
|
||||||
|
cloudimage.py-词云图代码
|
||||||
|
select_content.py-筛选AI弹幕代码
|
||||||
|
ballgame.py-附加题代码
|
||||||
|
其中弹幕生成文件**all_content.txt**超过5MB无法上传
|
@ -0,0 +1,20 @@
|
|||||||
|
import pandas as pd
|
||||||
|
from collections import Counter
|
||||||
|
|
||||||
|
# 读入所有弹幕
|
||||||
|
with open('all_content.txt', mode='r', encoding='utf-8') as f:
|
||||||
|
data_list = f.readlines()
|
||||||
|
# 六项球类关键词
|
||||||
|
keywords = ['乒乓球','羽毛球','排球','篮球','足球','网球']
|
||||||
|
# 筛选有关球类的弹幕
|
||||||
|
selectdanmu = [danmu for danmu in data_list if any(keyword in danmu for keyword in keywords)]
|
||||||
|
|
||||||
|
# 统计弹幕数量
|
||||||
|
num = Counter(selectdanmu)
|
||||||
|
top_common = num.most_common(20)
|
||||||
|
# 展示数量前八条弹幕
|
||||||
|
print(top_common)
|
||||||
|
t = pd.DataFrame(top_common, columns=['弹幕内容', '数量'])
|
||||||
|
# 导出excel文件
|
||||||
|
excel_path = 'top_ball_danmu.xlsx'
|
||||||
|
t.to_excel(excel_path, index=False)
|
Binary file not shown.
@ -0,0 +1,21 @@
|
|||||||
|
import pandas as pd
|
||||||
|
from collections import Counter
|
||||||
|
|
||||||
|
# 读入所有弹幕
|
||||||
|
with open('all_content.txt', mode='r', encoding='utf-8') as f:
|
||||||
|
data_list = f.readlines()
|
||||||
|
# AI技术应用有关关键词
|
||||||
|
ai_keywords = ['AI','人工智能','ai音效','ai视频','ai技术','机器学习', '深度学习', '自然语言处理','ai训练',
|
||||||
|
'大模型','自然语言处理','云计算','神经网络', '自动驾驶','ai设计','ai图','AI软件',]
|
||||||
|
# 筛选有关AI的弹幕
|
||||||
|
selectdanmu = [danmu for danmu in data_list if any(keyword in danmu for keyword in ai_keywords)]
|
||||||
|
|
||||||
|
# 统计弹幕数量
|
||||||
|
num = Counter(selectdanmu)
|
||||||
|
top_common = num.most_common(8)
|
||||||
|
# 展示数量前八条弹幕
|
||||||
|
print(top_common)
|
||||||
|
t = pd.DataFrame(top_common, columns=['弹幕内容', '数量'])
|
||||||
|
# 导出excel文件
|
||||||
|
excel_path = 'top8_ai_danmu.xlsx'
|
||||||
|
t.to_excel(excel_path, index=False)
|
Binary file not shown.
Binary file not shown.
Loading…
Reference in new issue