parent
cd6f526073
commit
e617857dc7
@ -0,0 +1,59 @@
|
||||
import requests
|
||||
import re
|
||||
import json
|
||||
|
||||
headers = {
|
||||
'authority': 'api.bilibili.com',
|
||||
'accept': 'application/json, text/plain, */*',
|
||||
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
|
||||
'cookie': 'b_nut=1659613422; buvid3=6C07DC9F-EE29-7F28-2B63-1BF4ECD504A422941infoc; CURRENT_FNVAL=4048; header_theme_version=CLOSE; buvid4=92532619-00E5-BF92-443B-595CD15DE59481123-023013113-97xIUW%2FWJtRnoJI8Rbvu4Q%3D%3D; enable_web_push=DISABLE; rpdid=|(u))kkYu|J|0J\'u~u|)u)RR); hit-dyn-v2=1; FEED_LIVE_VERSION=V_WATCHLATER_PIP_WINDOW3; LIVE_BUVID=AUTO2617189721183630; PVID=1; buvid_fp_plain=undefined; CURRENT_QUALITY=80; _uuid=8108A2C6D-A7AD-7F210-B10E5-EA35A5B47DA391233infoc; home_feed_column=5; browser_resolution=1545-857; bsource=search_bing; fingerprint=0c7279b7c69b9542a76b8d9df9b7872a; buvid_fp=0c7279b7c69b9542a76b8d9df9b7872a; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MjU0NTE2MTEsImlhdCI6MTcyNTE5MjM1MSwicGx0IjotMX0.9HAkh-aLUFL3i2asyrGNSGwvZnlCdO1qHnr8KCPYRAY; bili_ticket_expires=1725451551; b_lsid=B7B10E6101_191B8F11FA5; bp_t_offset_1760559884=973015460700225536; SESSDATA=96c7142d%2C1740938493%2C3a910%2A92CjCc4yaZOS0NpMlzpaXXFlyvjHEGHEZxVtH8JQp1M7im9KrgmNTYIP2F2prPQh4WI4gSVjJtTUt1dGVjMk9SMk9HNkl5MXRWV0tISnNlYzJndGhFVFR1SHVVLWt4UTJjLS1VQ0h1THFmcUY2UU5BV1Jsa2VjTGxDYnpFcnppLVNBQkp3VXdjYzVnIIEC; bili_jct=3a65db4d1ef7bc981b1673000e0bc73c; DedeUserID=1760559884; DedeUserID__ckMd5=b5c900381ecb7bcd; sid=ojanxj62',
|
||||
'origin': 'https://www.bilibili.com',
|
||||
'referer': 'https://space.bilibili.com/1760559884?spm_id_from=333.788.0.0',
|
||||
'sec-ch-ua': '"Not)A;Brand";v="99", "Microsoft Edge";v="127", "Chromium";v="127"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'sec-fetch-dest': 'document',
|
||||
'sec-fetch-mode': 'navigate',
|
||||
'sec-fetch-site': 'same-origin',
|
||||
'upgrade-insecure-requests': '1',
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36 Edg/127.0.0.0'
|
||||
}
|
||||
|
||||
|
||||
# 从文件中读取BV号
|
||||
def load_bv_numbers(file_path):
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
return [line.strip() for line in f.readlines()]
|
||||
|
||||
# 获取视频的CID号
|
||||
def get_video_cids(bv_numbers):
|
||||
video_cids = []
|
||||
for index in bv_numbers:
|
||||
myUrl = 'https://api.bilibili.com/x/player/pagelist?bvid=' + index + '&jsonp=jsonp'
|
||||
response = requests.get(url=myUrl, headers=headers)
|
||||
d = json.loads(response.text)
|
||||
cid = d['data'][0]['cid']
|
||||
video_cids.append(cid)
|
||||
return video_cids
|
||||
|
||||
# 爬取视频弹幕
|
||||
def crawl_danmu(cids, output_file):
|
||||
for j in cids:
|
||||
url2 = f'https://api.bilibili.com/x/v1/dm/list.so?oid={j}'
|
||||
response3 = requests.get(url=url2, headers=headers)
|
||||
response3.encoding = response3.apparent_encoding
|
||||
data_list = re.findall('<d p=".*?">(.*?)</d>', response3.text)
|
||||
with open(output_file, mode='a', encoding='utf-8') as f:
|
||||
for i in data_list:
|
||||
f.write(i)
|
||||
f.write('\n')
|
||||
|
||||
if __name__ == '__main__':
|
||||
bv_file_path = r'E:\Crawler\bv_numbers.txt' # BV号保存的文件路径
|
||||
output_file = r'E:\Crawler\danmu.txt' # 弹幕保存的文件路径
|
||||
|
||||
bv_numbers = load_bv_numbers(bv_file_path)
|
||||
video_cids = get_video_cids(bv_numbers)
|
||||
crawl_danmu(video_cids, output_file)
|
||||
|
||||
print("弹幕数据爬取完成")
|
Loading…
Reference in new issue