import requests import re import datetime # content_list存放所有弹幕 content_list = [] # 爬取开始日期和结束日期范围内的弹幕 begin = datetime.date(2024, 8, 30) end = datetime.date(2024, 9, 6) for i in range((end - begin).days + 1): day = begin + datetime.timedelta(days=i) url = f'https://api.bilibili.com/x/v2/dm/wbi/web/seg.so?type=1&oid=1515208599&pid=1953322546&segment_index=1&pull_mode=1&ps=0&pe=120000&web_location=1315873&w_rid=4242363370f0b059d1ab65a117afadc7&wts=1725630721' headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'cookie': 'SESSDATA=cb9496bb%2C1741095634%2C97c03%2A91CjCtPq1vVd9lWddQl2H5M3rNPiY4lDcI_BVf7-ju2N1QhHNW1xAwJc1Eaw4YYPuuizISVjhfTUNlWTZzLTNGYlhzN0pzdHk5X1BwVlhobGtiX001bUpsdHdhZkthRDVOSy1nWkZ5aXcwOVk4WndHcXd1OENwbmhNZm5mNTNjeGEyb1QwMGJPTHRRIIEC; bili_jct=ab64ec210bbe14d6e4f44d0fd479c0b9;' } response = requests.get(url=url, headers=headers) response.encoding = 'utf-8' temp_list = re.findall('[\u4e00-\u9fa5]+', response.text) content_list.extend(temp_list) print("爬取", day, "日弹幕,获取到:", len(temp_list), "条弹幕,已经增加到总列表。总列表共有", len(content_list), "条弹幕。") print(content_list) # 保存数据 content = '\n'.join(content_list) with open('../弹幕1.txt', mode='a', encoding='utf-8') as f: f.write(content) print("保存完成")