You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
|
|
import requests#爬取网页源码
|
|
|
|
|
import re#对解析后的文件进行弹幕匹配
|
|
|
|
|
|
|
|
|
|
url = 'https://api.bilibili.com/x/v1/dm/list.so?oid=1663877514'
|
|
|
|
|
headers = {
|
|
|
|
|
'user-agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0'
|
|
|
|
|
}#模拟浏览器发送请求
|
|
|
|
|
response = requests.get(url=url, headers=headers)#通过requests模块内get请求方式,对url发送请求
|
|
|
|
|
response.encoding = response.apparent_encoding
|
|
|
|
|
print(response.text)#获取数据
|
|
|
|
|
data_list = re.findall('<d p=".*?">(.*?)</d>',response.text)#解析数据
|
|
|
|
|
for index in data_list:
|
|
|
|
|
with open('弹幕.txt',mode='a',encoding='utf-8') as f:#输出为文本
|
|
|
|
|
f.write(index)
|
|
|
|
|
f.write('\n')#换行
|
|
|
|
|
print(index)
|