You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
69 lines
2.4 KiB
69 lines
2.4 KiB
import requests
|
|
from bs4 import BeautifulSoup
|
|
import pandas as pd
|
|
import jieba
|
|
from wordcloud import WordCloud
|
|
import matplotlib.pyplot as plt
|
|
|
|
# 模拟浏览器请求头
|
|
headers = {
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0"
|
|
}
|
|
|
|
# 检查文本是否包含“AI”或“人工智能”
|
|
def contains_ai_or_artificial_intelligence(text):
|
|
return "ai" in text.lower() or "人工智能" in text.lower()
|
|
|
|
# 获取网页内容
|
|
def get_html(url):
|
|
response = requests.get(url, headers=headers)
|
|
response.encoding = 'utf-8'
|
|
return response.text
|
|
|
|
# 解析网页获取视频链接
|
|
def parse_video_links(html):
|
|
soup = BeautifulSoup(html, 'html.parser')
|
|
video_links = []
|
|
for link in soup.find_all('a', href=True):
|
|
href = link['href']
|
|
if href.startswith('/video/'):
|
|
video_links.append(href)
|
|
return video_links
|
|
|
|
# 获取弹幕数据
|
|
def get_danmaku_data(video_id):
|
|
danmaku_url = f"https://api.bilibili.com/x/v1/dm/list.so?oid={video_id}"
|
|
response = requests.get(danmaku_url, headers=headers)
|
|
if response.status_code == 200:
|
|
danmaku_data = response.json()
|
|
danmakus = [danmaku['content'] for danmaku in danmaku_data['data']['list']]
|
|
return danmakus
|
|
return []
|
|
|
|
# 主程序
|
|
def main():
|
|
ai_danmaku_list = [] # 初始化列表以存储AI相关的弹幕
|
|
video_links = parse_video_links(get_html("https://search.bilibili.com/video?keyword=%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A"))
|
|
for link in video_links:
|
|
video_id = link.split('/')[2]
|
|
danmakus = get_danmaku_data(video_id)
|
|
for danmaku in danmakus:
|
|
if contains_ai_or_artificial_intelligence(danmaku):
|
|
ai_danmaku_list.append(danmaku)
|
|
|
|
# 输出AI相关的弹幕数量
|
|
print(f"AI相关的弹幕数量: {len(ai_danmaku_list)}")
|
|
|
|
# 写入Excel文件
|
|
df = pd.DataFrame(ai_danmaku_list, columns=['弹幕'])
|
|
df.to_excel('ai_danmaku.xlsx', index=False)
|
|
|
|
# 生成词云图
|
|
text = ' '.join(ai_danmaku_list)
|
|
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
|
|
plt.imshow(wordcloud, interpolation='bilinear')
|
|
plt.axis('off')
|
|
plt.show()
|
|
|
|
if __name__ == "__main__":
|
|
main() |