|
|
@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
import requests
|
|
|
|
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
|
|
import pandas as pd
|
|
|
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
from wordcloud import WordCloud
|
|
|
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
|
from openpyxl import Workbook
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def fetch_videos(keyword):
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
根据关键词搜索视频,并返回前300个视频的ID列表。
|
|
|
|
|
|
|
|
:param keyword: 搜索关键词
|
|
|
|
|
|
|
|
:return: 视频ID列表
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
url = "https://search.bilibili.com/all"
|
|
|
|
|
|
|
|
headers = {
|
|
|
|
|
|
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
params = {
|
|
|
|
|
|
|
|
'keyword': keyword,
|
|
|
|
|
|
|
|
'order': 'totalrank',
|
|
|
|
|
|
|
|
'page': '1'
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
response = requests.get(url, headers=headers, params=params)
|
|
|
|
|
|
|
|
response.raise_for_status() # 抛出HTTP错误
|
|
|
|
|
|
|
|
soup = BeautifulSoup(response.text, 'html.parser')
|
|
|
|
|
|
|
|
video_ids = [a['href'].split('/')[-1] for a in soup.select('.video-item .title') if a.has_attr('href')]
|
|
|
|
|
|
|
|
print(f"Fetched {len(video_ids)} video IDs.")
|
|
|
|
|
|
|
|
return video_ids[:300]
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
|
|
print(f"Error fetching videos: {e}")
|
|
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def fetch_danmaku(video_id):
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
获取指定视频的弹幕数据。
|
|
|
|
|
|
|
|
:param video_id: 视频ID
|
|
|
|
|
|
|
|
:return: 弹幕列表
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
url = f"https://api.bilibili.com/x/v2/dm/web/seg.so?type=1&oid={video_id}"
|
|
|
|
|
|
|
|
headers = {
|
|
|
|
|
|
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
|
|
|
|
|
|
|
|
'Cookie': 'buvid3=0C047DB7-FB67-6565-B853-68B19196AEE053166infoc; buvid4=D2E32722-EB31-8B5B-8BC7-420F049CDE3657801-022071821-mG8+jYWtWHQ35A9yqIgZIA%3D%3D; buvid_fp=60e37bdf4fe67cde89d283db25adff46; _uuid=FCEA6C48-BB82-123A-61106-3F5410106BB410B03170infoc; b_nut=100; header_theme_version=CLOSE; enable_web_push=DISABLE; bmg_af_switch=1; bmg_src_def_domain=i0.hdslb.com; bsource=search_bing; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MjY5MTczNjUsImlhdCI6MTcyNjY1ODEwNSwicGx0IjotMX0.Aa-1_tfEk0rFqyzRFZ-vIsSUSbvUfyR7woQA3IC3h0s; bili_ticket_expires=1726917305; CURRENT_FNVAL=4048; SESSDATA=aa6a6590%2C1742210524%2C7b1c4%2A92CjCxud8rqp6tuF7AYkzmJF0YS7_L4_80iMI3NuY5q-M7BEW3cf0_bVyhIcnZMJapP7YSVnJiQ2NVcTJZZ1ZIMFduRURJXzZXOWtaTTl2WnBFSHkwckM0UzdwY2xHMG9MNVl4c1pUSHlFaFJ4RnQ5WjY3ZHRtcm5qcDhNSVo3eXZORDczc0VlYlF3IIEC; bili_jct=5232d057d308c18c1419d19271a3b85e; DedeUserID=1576579979; DedeUserID__ckMd5=da7d6054e70acbba; b_lsid=674C255B_19204FCE528; sid=g83mkv7a; home_feed_column=4; browser_resolution=435-748', # Cookie值
|
|
|
|
|
|
|
|
'Referer': f'https://www.bilibili.com/video/{video_id}',
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
response = requests.get(url, headers=headers)
|
|
|
|
|
|
|
|
response.raise_for_status() # 抛出HTTP错误
|
|
|
|
|
|
|
|
data = response.json()
|
|
|
|
|
|
|
|
danmakus = [re.sub(r'<[^>]+>', '', dm['content']) for dm in data['data']['dm_seg_list']]
|
|
|
|
|
|
|
|
print(f"Fetched {len(danmakus)} danmakus for video {video_id}.")
|
|
|
|
|
|
|
|
return danmakus
|
|
|
|
|
|
|
|
except requests.RequestException as e:
|
|
|
|
|
|
|
|
print(f"Error fetching danmakus for video {video_id}: {e}")
|
|
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
except KeyError as e:
|
|
|
|
|
|
|
|
print(f"Invalid response format for video {video_id}: {e}")
|
|
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def analyze_danmakus(danmakus):
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
分析弹幕数据,统计与AI技术应用相关的弹幕数量。
|
|
|
|
|
|
|
|
:param danmakus: 弹幕列表
|
|
|
|
|
|
|
|
:return: 弹幕数量的字典
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
ai_keywords = ['AI', '人工智能', '机器学习', '深度学习', '算法', '大数据', '智能', '技术']
|
|
|
|
|
|
|
|
keyword_counts = {}
|
|
|
|
|
|
|
|
for danmaku in danmakus:
|
|
|
|
|
|
|
|
for keyword in ai_keywords:
|
|
|
|
|
|
|
|
if keyword in danmaku:
|
|
|
|
|
|
|
|
if danmaku in keyword_counts:
|
|
|
|
|
|
|
|
keyword_counts[danmaku] += 1
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
keyword_counts[danmaku] = 1
|
|
|
|
|
|
|
|
return keyword_counts
|