You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

93 lines
4.5 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import time
import re
import requests
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0",
"Cookie":"buvid4=D16319A3-9CCF-7099-A512-0A135AD5785B47915-023052812-b1nz50QSFWB5YJLoC8St7Q%3D%3D; buvid_fp_plain=undefined; enable_web_push=DISABLE; header_theme_version=CLOSE; DedeUserID=5493983; DedeUserID__ckMd5=5158623efb606499; CURRENT_FNVAL=16; blackside_state=0; CURRENT_BLACKGAP=0; FEED_LIVE_VERSION=V_WATCHLATER_PIP_WINDOW3; _uuid=65D18E104-D381-1D7B-4191-BB14FBF71016F05662infoc; CURRENT_QUALITY=116; buvid3=5C4356FC-CED4-B797-D0E9-0D465C57237867779infoc; b_nut=1723395567; rpdid=|(u)~Ju~J|kl0J'u~kJJml~u); LIVE_BUVID=AUTO8217257278253243; home_feed_column=5; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MjY2NzExOTIsImlhdCI6MTcyNjQxMTkzMiwicGx0IjotMX0.HL4benw1AXqZyCPuhdJ0gWt_ntTJ11SaspK8rVkM4Nw; bili_ticket_expires=1726671132; SESSDATA=62b3d1f9%2C1741976682%2C01ac3%2A92CjBKLlT2PXjk2hmj_l4GXirVUtBEUJu-ii7ymQtB_b7urFREHxDukjCy1zRg6t4Fq_USVld0eW1pNGtoZ2prQWRCRUg3aTZZTzRUUE1wc1M2VkpGZDhTZzY3OHhwZ3BGTEQ4am9tOTJLMlUtZUduS2lCMjhpZS1HQU1laGRReUpoQ3VwdWdYS3FBIIEC; bili_jct=f1753715b455ce2db99ec05354cf2c00; fingerprint=262de2a503b05140a20bda05841f9755; buvid_fp=262de2a503b05140a20bda05841f9755; PVID=2; browser_resolution=2040-1026; bp_t_offset_5493983=978040246019031040; b_lsid=62D18E35_19203235E8B; sid=86fq5dr0",
"Referer":"https://search.bilibili.com/all?keyword=2024%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A&from_source=webtop_search&spm_id_from=333.1007&search_source=3"
}
def has_duplicates(lst): #判断列表中是否有重复元素
return len(lst) != len(set(lst))
def get_videos_cid(max_videos=300): #获取视频cid
bv_list = []
c_list = []
i = 1
while len(bv_list) < max_videos:
url = f"https://search.bilibili.com/all?vt=45910958&keyword=2024%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A&from_source=webtop_search&spm_id_from=333.788&search_source=3&page={i}"
#为了防止被拦截,打个瞌睡
time.sleep(2)
resp = requests.get(url, headers=headers)
resp.encoding = "utf-8"
#错误判断
if resp.status_code != 200:
print(f"{i}页视频请求失败,状态码: {resp.status_code}")
else:
i += 1
bv_id_re = r'BV\w+'
single_page_bv_ids = re.findall(bv_id_re, resp.text)
single_page_bv_ids = list(set(single_page_bv_ids))
#提取每页视频的BV号
for bv_ids in single_page_bv_ids:
if len(bv_list) >= max_videos:
break
bv_list.append(bv_ids)
bv_list = list(set(bv_list))
for bv_ids in bv_list:
resp1 = requests.get(f"https://api.bilibili.com/x/player/pagelist?bvid={bv_ids}",headers=headers)
resp1.encoding = "utf-8"
if resp1.status_code == 200:
c_id_re = r'"cid"\s*:\s*(\d+)'
single_page_c_ids = re.findall(c_id_re, resp1.text)
single_page_c_ids = list(set(single_page_c_ids))
#提取每个BV号对应的cid由于某些视频有分p导致有多个cid所以这里需要遍历
for c_ids in single_page_c_ids:
if len(c_list) >= max_videos:
break
c_list.append(c_ids)
c_list = list(set(c_list))
print(f"最终cid列表: {c_list}")
print(f"最终cid数量: {len(c_list)}")
if has_duplicates(c_list):
print("列表中有重复cid")
else:
print("列表中没有重复cid")
return (c_list)
def get_danmu(c_list): #通过cid获取视频弹幕
danmu_list = []
for c_ids in c_list:
url = f"https://comment.bilibili.com/{c_ids}.xml"
resp = requests.get(url, headers=headers)
resp.encoding = "utf-8"
#错误判断
if resp.status_code != 200:
print(f"cid为{c_ids}的视频获取弹幕失败,状态码: {resp.status_code}")
else:
danmu_re = r'<d[^>]*>(.*?)</d>'
single_video_danmus = re.findall(danmu_re, resp.text)
single_video_danmus = list(single_video_danmus)
danmu_list.extend(single_video_danmus)
print(f"cid为{c_ids}的视频获取弹幕成功!")
return danmu_list
c_list = get_videos_cid()
danmu_list = get_danmu(c_list)
#写入文件
with open('danmu.txt', 'w', encoding="utf-8") as f:
for dm in danmu_list:
f.write(str(dm) + "\n")