diff --git a/b站弹幕爬取.py b/b站弹幕爬取.py new file mode 100644 index 0000000..7d7645e --- /dev/null +++ b/b站弹幕爬取.py @@ -0,0 +1,30 @@ +import requests +import re +import datetime + +# content_list存放所有弹幕 +content_list = [] +# 爬取开始日期和结束日期范围内的弹幕 +begin = datetime.date(2024, 8, 30) +end = datetime.date(2024, 9, 6) +for i in range((end - begin).days + 1): + day = begin + datetime.timedelta(days=i) + url = f'https://api.bilibili.com/x/v2/dm/wbi/web/seg.so?type=1&oid=1515208599&pid=1953322546&segment_index=1&pull_mode=1&ps=0&pe=120000&web_location=1315873&w_rid=4242363370f0b059d1ab65a117afadc7&wts=1725630721' + headers = { + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + 'cookie': 'SESSDATA=cb9496bb%2C1741095634%2C97c03%2A91CjCtPq1vVd9lWddQl2H5M3rNPiY4lDcI_BVf7-ju2N1QhHNW1xAwJc1Eaw4YYPuuizISVjhfTUNlWTZzLTNGYlhzN0pzdHk5X1BwVlhobGtiX001bUpsdHdhZkthRDVOSy1nWkZ5aXcwOVk4WndHcXd1OENwbmhNZm5mNTNjeGEyb1QwMGJPTHRRIIEC; bili_jct=ab64ec210bbe14d6e4f44d0fd479c0b9;' + } + response = requests.get(url=url, headers=headers) + response.encoding = 'utf-8' + + temp_list = re.findall('[\u4e00-\u9fa5]+', response.text) + content_list.extend(temp_list) + print("爬取", day, "日弹幕,获取到:", len(temp_list), "条弹幕,已经增加到总列表。总列表共有", len(content_list), + "条弹幕。") +print(content_list) +# 保存数据 +content = '\n'.join(content_list) +with open('../弹幕1.txt', mode='a', encoding='utf-8') as f: + f.write(content) +print("保存完成") +