'B站评论爬取'

4 years ago · 316d47f22b
parent 43a1d5152d
commit 316d47f22b
1 changed files with 59 additions and 0 deletions
--- a/Crawl/BiliBili/demo.py
+++ b/Crawl/BiliBili/demo.py
@ -0,0 +1,59 @@
+import requests
+from bs4 import BeautifulSoup
+import json
+import math
+
+"""
+    Bilibili简易评论抓取
+"""
+# next 用来控制评论页数，next=0表示第一页
+url = 'https://api.bilibili.com/x/v2/reply/main?mode=3&next=0&oid={}&plat=1&seek_rpid=&type=1'
+
+headers = {
+    'accept': 'application/json, text/plain, */*',
+    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.42',
+    'cookie': "buvid3=1BE9BF5E-174E-E36F-638F-A83EBFE3923B73812infoc; buvid_fp=1BE9BF5E-174E-E36F-638F-A83EBFE3923B73812infoc; blackside_state=0; rpdid=|(u)Rm|uR|)l0J'uYRukmJ~RJ; LIVE_BUVID=AUTO1016419942077939; DedeUserID=1128547717; DedeUserID__ckMd5=6e2c06fc4d4a080a; b_ut=5; fingerprint_s=a1a842bc7f2ecb138fcd0ee9ebd6c264; i-wanna-go-back=-1; nostalgia_conf=-1; is-2022-channel=1; fingerprint3=da72afc7e244beb108e5842d6c8c8ccb; go_old_video=-1; i-wanna-go-feeds=-1; CURRENT_BLACKGAP=0; CURRENT_QUALITY=80; buvid4=null; fingerprint=aae73dad5de59138dcd2680c4d451960; buvid_fp_plain=undefined; b_nut=1666187479; CURRENT_FNVAL=4048; bp_video_offset_1128547717=719454129464803300; SESSDATA=4df18d94%2C1683255514%2C2be5f%2Ab1; bili_jct=a2c88699d75f8024401a4deae6d82e76; sid=8vjk7l6x",
+    'Connection': 'keep-alive',
+	'TE': 'Trailers'    
+}
+
+videoList = []
+
+def getVideo():
+		search = input("输入搜索关键字：")
+		pass
+
+def getReplyPageNum(oid):
+
+    url="https://api.bilibili.com/x/v2/reply?&jsonp=jsonp&pn=1"+"&type=1&oid="+str(oid)+"&sort=2"
+
+    respond=requests.get(url)
+
+    res_dirct=json.loads(respond.text)
+
+    replyNum=int(res_dirct['data']['page']['acount'])
+
+    replyPageCount=int(res_dirct['data']['page']['count'])
+
+    replyPageSize=int(res_dirct['data']['page']['size'])
+
+    replyPageNum=math.ceil(replyPageCount/replyPageSize)
+
+    return  replyPageNum
+
+def getAid(bvid):
+    url = "http://api.bilibili.com/x/web-interface/view?bvid="+str(bvid)
+    response = requests.get(url)
+    dirt=json.loads(response.text)
+    aid=dirt['data']['aid']
+    return aid
+
+if __name__ == "__main__":
+    url = url.format(getAid('BV1AW41117bT'))
+    # print(getReplyPageNum(getAid('BV1AW41117bT')))
+    # print(url)
+    resp = requests.get(url, headers=headers)
+    # print(resp.json()['data']['replies'][0]['content']['message'])
+    # print(resp.text)
+    # getVideo()
+    # print(videoList)