diff --git a/b站弹幕爬虫(获取弹幕).py b/b站弹幕爬虫(获取弹幕).py new file mode 100644 index 0000000..b2eb67b --- /dev/null +++ b/b站弹幕爬虫(获取弹幕).py @@ -0,0 +1,39 @@ +import requests +import re +#一、首先定义获取弹幕huqudanmu函数 +def huoqudanmu(cid): + url=f'https://api.bilibili.com/x/v1/dm/list.so?oid={cid}'#通过cid值获取对应视频的弹幕 + Hddf = { + "user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0" + } + resp= requests.get(url,headers=Hddf)#从网址获取弹幕 + resp.encoding="utf-8"#弹幕中文编码 + Data=resp.text#提取text文本 + context=re.findall('(.*?)',Data)#提取弹幕文本 + print(context) + for index in context: + with open('总弹幕.txt',mode='a',encoding='utf-8')as f: + f.write(index)#写入text文件 + f.write('\n') + +#二、获取所需弹幕地址 +headers0={ + "user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0" +}#请求头 +for page0 in range(1,11):#从搜索的十页获取网址,一页30个视频 + if page0==1 : + url="https://search.bilibili.com/all?vt=93020172&keyword=2024%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A&from_source=webtop_search&spm_id_from=333.1007&search_source=5" + else: + url=f"https://search.bilibili.com/all?vt=93020172&keyword=2024%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A&from_source=webtop_search&spm_id_from=333.1007&search_source=5&page={page0}" + res0=requests.get(url,headers=headers0)#获取网页数据 + Text0=res0.text + bvid=re.findall('bvid:"(.*?)",title:',Text0)#获取视频bvid号 + for bvid1 in bvid:# + url=f"https://www.bilibili.com/video/{bvid1.strip()}/?spm_id_from=333.337.search-card.all.click&vd_source=516714ff716c382225c801afa2c87d8d" + res0=requests.get(url,headers=headers0)#获取视频数据 + Text0=res0.text + oid=re.findall('"embedPlayer":{"p":.*?,"aid":.*?,"bvid":".*?","cid":(.*?),',Text0)#获取oid值(多余空格去除,不然匹配不到) + for oid1 in oid:#提取oid值 + huoqudanmu(oid1)#调用获取弹幕的函数 + +