parent
5b42a764d4
commit
7018ae8655
@ -0,0 +1,69 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import pandas as pd
|
||||
import jieba
|
||||
from wordcloud import WordCloud
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# 模拟浏览器请求头
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0"
|
||||
}
|
||||
|
||||
# 检查文本是否包含“AI”或“人工智能”
|
||||
def contains_ai_or_artificial_intelligence(text):
|
||||
return "ai" in text.lower() or "人工智能" in text.lower()
|
||||
|
||||
# 获取网页内容
|
||||
def get_html(url):
|
||||
response = requests.get(url, headers=headers)
|
||||
response.encoding = 'utf-8'
|
||||
return response.text
|
||||
|
||||
# 解析网页获取视频链接
|
||||
def parse_video_links(html):
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
video_links = []
|
||||
for link in soup.find_all('a', href=True):
|
||||
href = link['href']
|
||||
if href.startswith('/video/'):
|
||||
video_links.append(href)
|
||||
return video_links
|
||||
|
||||
# 获取弹幕数据
|
||||
def get_danmaku_data(video_id):
|
||||
danmaku_url = f"https://api.bilibili.com/x/v1/dm/list.so?oid={video_id}"
|
||||
response = requests.get(danmaku_url, headers=headers)
|
||||
if response.status_code == 200:
|
||||
danmaku_data = response.json()
|
||||
danmakus = [danmaku['content'] for danmaku in danmaku_data['data']['list']]
|
||||
return danmakus
|
||||
return []
|
||||
|
||||
# 主程序
|
||||
def main():
|
||||
ai_danmaku_list = [] # 初始化列表以存储AI相关的弹幕
|
||||
video_links = parse_video_links(get_html("https://search.bilibili.com/video?keyword=%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A"))
|
||||
for link in video_links:
|
||||
video_id = link.split('/')[2]
|
||||
danmakus = get_danmaku_data(video_id)
|
||||
for danmaku in danmakus:
|
||||
if contains_ai_or_artificial_intelligence(danmaku):
|
||||
ai_danmaku_list.append(danmaku)
|
||||
|
||||
# 输出AI相关的弹幕数量
|
||||
print(f"AI相关的弹幕数量: {len(ai_danmaku_list)}")
|
||||
|
||||
# 写入Excel文件
|
||||
df = pd.DataFrame(ai_danmaku_list, columns=['弹幕'])
|
||||
df.to_excel('ai_danmaku.xlsx', index=False)
|
||||
|
||||
# 生成词云图
|
||||
text = ' '.join(ai_danmaku_list)
|
||||
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
|
||||
plt.imshow(wordcloud, interpolation='bilinear')
|
||||
plt.axis('off')
|
||||
plt.show()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in new issue