parent
a6b77c16d3
commit
07d18870a2
@ -0,0 +1,54 @@
|
||||
import requests
|
||||
import re
|
||||
#一、首先定义获取弹幕huqudanmu函数
|
||||
def huoqudanmu(cid):
|
||||
url=f'https://api.bilibili.com/x/v1/dm/list.so?oid={cid}'#通过cid值获取对应视频的弹幕
|
||||
Hddf = {
|
||||
"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0"
|
||||
}
|
||||
resp= requests.get(url,headers=Hddf)#从网址获取弹幕
|
||||
resp.encoding="utf-8"#弹幕中文编码
|
||||
Data=resp.text#提取text文本
|
||||
context=re.findall('<d p=.*?>(.*?)</d>',Data)#提取弹幕文本
|
||||
print(context)
|
||||
for index in context:
|
||||
with open('总弹幕.txt',mode='a',encoding='utf-8')as f:
|
||||
f.write(index)#写入text文件
|
||||
f.write('\n')
|
||||
|
||||
#二、获取所需弹幕地址
|
||||
file=open('bvid.txt','w')#创建存放视频bvid值的文件
|
||||
file.close()
|
||||
headers0={
|
||||
"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0"
|
||||
}#请求头
|
||||
for page0 in range(1,11):#从搜索的十页获取网址
|
||||
print(page0)
|
||||
if page0==1 :
|
||||
url="https://search.bilibili.com/all?vt=93020172&keyword=2024%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A&from_source=webtop_search&spm_id_from=333.1007&search_source=5"
|
||||
else:
|
||||
url=f"https://search.bilibili.com/all?vt=93020172&keyword=2024%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A&from_source=webtop_search&spm_id_from=333.1007&search_source=5&page={page0}"
|
||||
res0=requests.get(url,headers=headers0)#获取网页数据
|
||||
Text0=res0.text
|
||||
bvid=re.findall('bvid:"(.*?)",title:',Text0)#获取视频bvid号
|
||||
for index in bvid:
|
||||
with open('bvid.txt',mode='r+',encoding='utf-8')as f:
|
||||
if index not in f.read():#防止重复的bvid号写入
|
||||
f.write(index)#写入text文件
|
||||
f.write('\n')
|
||||
#获取oid值
|
||||
f1=open('bvid.txt',mode='r',encoding='utf-8')#打开bvid文本
|
||||
bvid_text=f1.read().splitlines()
|
||||
count=0#计数来获取前三百个视频
|
||||
for bvid0 in bvid_text:
|
||||
count=count+1
|
||||
url=f"https://www.bilibili.com/video/{bvid0}/?spm_id_from=333.337.search-card.all.click&vd_source=516714ff716c382225c801afa2c87d8d"
|
||||
res0=requests.get(url,headers=headers0)#获取视频数据
|
||||
Text0=res0.text
|
||||
oid=re.findall('"embedPlayer":{"p":.*?,"aid":.*?,"bvid":".*?","cid":(.*?),',Text0)#获取oid值(多余空格去除,不然匹配不到)
|
||||
for oid1 in oid:#提取oid值
|
||||
huoqudanmu(oid1)#调用获取弹幕的函数
|
||||
if count >=300:break#循环300次停止
|
||||
|
||||
|
||||
|
Loading…
Reference in new issue