You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

31 lines
1.5 KiB

import requests
import re
import datetime
# content_list存放所有弹幕
content_list = []
# 爬取开始日期和结束日期范围内的弹幕
begin = datetime.date(2024, 8, 30)
end = datetime.date(2024, 9, 6)
for i in range((end - begin).days + 1):
day = begin + datetime.timedelta(days=i)
url = f'https://api.bilibili.com/x/v2/dm/wbi/web/seg.so?type=1&oid=1515208599&pid=1953322546&segment_index=1&pull_mode=1&ps=0&pe=120000&web_location=1315873&w_rid=4242363370f0b059d1ab65a117afadc7&wts=1725630721'
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'cookie': 'SESSDATA=cb9496bb%2C1741095634%2C97c03%2A91CjCtPq1vVd9lWddQl2H5M3rNPiY4lDcI_BVf7-ju2N1QhHNW1xAwJc1Eaw4YYPuuizISVjhfTUNlWTZzLTNGYlhzN0pzdHk5X1BwVlhobGtiX001bUpsdHdhZkthRDVOSy1nWkZ5aXcwOVk4WndHcXd1OENwbmhNZm5mNTNjeGEyb1QwMGJPTHRRIIEC; bili_jct=ab64ec210bbe14d6e4f44d0fd479c0b9;'
}
response = requests.get(url=url, headers=headers)
response.encoding = 'utf-8'
temp_list = re.findall('[\u4e00-\u9fa5]+', response.text)
content_list.extend(temp_list)
print("爬取", day, "日弹幕,获取到:", len(temp_list), "条弹幕,已经增加到总列表。总列表共有", len(content_list),
"条弹幕。")
print(content_list)
# 保存数据
content = '\n'.join(content_list)
with open('../弹幕1.txt', mode='a', encoding='utf-8') as f:
f.write(content)
print("保存完成")