import requests from bs4 import BeautifulSoup import pandas as pd import re from wordcloud import WordCloud import matplotlib.pyplot as plt from openpyxl import Workbook def fetch_videos(keyword): """ 根据关键词搜索视频,并返回前300个视频的ID列表。 :param keyword: 搜索关键词 :return: 视频ID列表 """ url = "https://search.bilibili.com/all" headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' } params = { 'keyword': keyword, 'order': 'totalrank', 'page': '1' } try: response = requests.get(url, headers=headers, params=params) response.raise_for_status() # 抛出HTTP错误 soup = BeautifulSoup(response.text, 'html.parser') video_ids = [a['href'].split('/')[-1] for a in soup.select('.video-item .title') if a.has_attr('href')] print(f"Fetched {len(video_ids)} video IDs.") return video_ids[:300] except Exception as e: print(f"Error fetching videos: {e}") return [] def fetch_danmaku(video_id): """ 获取指定视频的弹幕数据。 :param video_id: 视频ID :return: 弹幕列表 """ url = f"https://api.bilibili.com/x/v2/dm/web/seg.so?type=1&oid={video_id}" headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3', 'Cookie': 'buvid3=0C047DB7-FB67-6565-B853-68B19196AEE053166infoc; buvid4=D2E32722-EB31-8B5B-8BC7-420F049CDE3657801-022071821-mG8+jYWtWHQ35A9yqIgZIA%3D%3D; buvid_fp=60e37bdf4fe67cde89d283db25adff46; _uuid=FCEA6C48-BB82-123A-61106-3F5410106BB410B03170infoc; b_nut=100; header_theme_version=CLOSE; enable_web_push=DISABLE; bmg_af_switch=1; bmg_src_def_domain=i0.hdslb.com; bsource=search_bing; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MjY5MTczNjUsImlhdCI6MTcyNjY1ODEwNSwicGx0IjotMX0.Aa-1_tfEk0rFqyzRFZ-vIsSUSbvUfyR7woQA3IC3h0s; bili_ticket_expires=1726917305; CURRENT_FNVAL=4048; SESSDATA=aa6a6590%2C1742210524%2C7b1c4%2A92CjCxud8rqp6tuF7AYkzmJF0YS7_L4_80iMI3NuY5q-M7BEW3cf0_bVyhIcnZMJapP7YSVnJiQ2NVcTJZZ1ZIMFduRURJXzZXOWtaTTl2WnBFSHkwckM0UzdwY2xHMG9MNVl4c1pUSHlFaFJ4RnQ5WjY3ZHRtcm5qcDhNSVo3eXZORDczc0VlYlF3IIEC; bili_jct=5232d057d308c18c1419d19271a3b85e; DedeUserID=1576579979; DedeUserID__ckMd5=da7d6054e70acbba; b_lsid=674C255B_19204FCE528; sid=g83mkv7a; home_feed_column=4; browser_resolution=435-748', # Cookie值 'Referer': f'https://www.bilibili.com/video/{video_id}', } try: response = requests.get(url, headers=headers) response.raise_for_status() # 抛出HTTP错误 data = response.json() danmakus = [re.sub(r'<[^>]+>', '', dm['content']) for dm in data['data']['dm_seg_list']] print(f"Fetched {len(danmakus)} danmakus for video {video_id}.") return danmakus except requests.RequestException as e: print(f"Error fetching danmakus for video {video_id}: {e}") return [] except KeyError as e: print(f"Invalid response format for video {video_id}: {e}") return [] def analyze_danmakus(danmakus): """ 分析弹幕数据,统计与AI技术应用相关的弹幕数量。 :param danmakus: 弹幕列表 :return: 弹幕数量的字典 """ ai_keywords = ['AI', '人工智能', '机器学习', '深度学习', '算法', '大数据', '智能', '技术'] keyword_counts = {} for danmaku in danmakus: for keyword in ai_keywords: if keyword in danmaku: if danmaku in keyword_counts: keyword_counts[danmaku] += 1 else: keyword_counts[danmaku] = 1 return keyword_counts