parent
225621dd68
commit
4ac3b12815
@ -0,0 +1,26 @@
|
||||
import io
|
||||
import sys
|
||||
import requests
|
||||
import re
|
||||
import json
|
||||
|
||||
#将爬到的数据消除乱码
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='utf8')
|
||||
|
||||
#''内填入网页url
|
||||
url = 'https://api.bilibili.com/x/v1/dm/list.so?oid=1634922221'
|
||||
|
||||
headers = {
|
||||
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0'
|
||||
}
|
||||
|
||||
response = requests.get(url=url,headers=headers)
|
||||
response.encoding = 'utf-8'
|
||||
|
||||
#将需要获取的部分改为.*?
|
||||
content_list = re.findall('<d p=".*?">(.*?)</d>',response.text)
|
||||
content_list = json.dumps(content_list,ensure_ascii=False,indent=2)
|
||||
|
||||
#将爬取到的数据写为文本
|
||||
with open('output.txt', 'w', encoding='utf-8') as file:
|
||||
file.write(content_list)
|
Loading…
Reference in new issue