You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
|
|
# bullet screen为弹幕
|
|
|
|
|
"""
|
|
|
|
|
说明:爬取b站弹幕的模块
|
|
|
|
|
"""
|
|
|
|
|
import re
|
|
|
|
|
import requests
|
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
from bv_maker import get_bv, BV_NUM
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"https://api.bilibili.com/x/v1/dm/list.so?oid=" # 获取b站弹幕api,需要提供bv号获取cid
|
|
|
|
|
"https://comment.bilibili.com/{cid}.xml" # 提供bv号获取cid
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
header = {
|
|
|
|
|
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0",
|
|
|
|
|
"referer": "https://www.bilibili.com/"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# print(bv_list)
|
|
|
|
|
def get_bullet_screen(bv_list):
|
|
|
|
|
bullet_screen_list = [] #存放弹幕
|
|
|
|
|
for bv in bv_list:
|
|
|
|
|
cid_url = f"https://api.bilibili.com/x/player/pagelist?bvid={bv}&jsonp=jsonp"
|
|
|
|
|
resp1 = requests.get(cid_url, headers=header)
|
|
|
|
|
# print(resp1.json())
|
|
|
|
|
dict = resp1.json()
|
|
|
|
|
cid = dict["data"][0]["cid"] #获取字典中的cid
|
|
|
|
|
api_url = f"https://comment.bilibili.com/{cid}.xml"
|
|
|
|
|
resp2 = requests.get(api_url, headers=header)
|
|
|
|
|
resp2.encoding = "utf-8"
|
|
|
|
|
# print(resp2.text)
|
|
|
|
|
xml = BeautifulSoup(resp2.text, "xml")
|
|
|
|
|
ds = xml.find_all("d")
|
|
|
|
|
for d in ds:
|
|
|
|
|
bullet_screen_list.append(d.text)
|
|
|
|
|
print(f"爬取{bv}的弹幕成功")
|
|
|
|
|
return bullet_screen_list
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
bv_list = get_bv(BV_NUM)
|
|
|
|
|
print(get_bullet_screen(bv_list))
|