parent
7e11ca6978
commit
d1c266b777
@ -0,0 +1,62 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import jieba
|
||||
import wordcloud
|
||||
from collections import Counter
|
||||
import imageio
|
||||
from openpyxl import Workbook,load_workbook
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
from pyinstrument import Profiler
|
||||
t=Profiler()
|
||||
t.start()
|
||||
#定义请求头
|
||||
headers = {
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 SLBrowser/9.0.3.5211 SLBChan/105",
|
||||
"Referer":"https: // www.bilibili.com /",
|
||||
"Origin":"https://search.bilibili.com"
|
||||
}
|
||||
#定义页数page和o来翻页
|
||||
page=0
|
||||
o=0
|
||||
#存放视频bv号
|
||||
list_ = []
|
||||
#翻页提取视频bv号
|
||||
while page<10:
|
||||
url = 'https://search.bilibili.com/all?vt=94833807&keyword=2024%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A&from_source=webtop_search&spm_id_from=333.1007&search_source=3'.format(page,o)
|
||||
response=requests.get(url=url,headers=headers)
|
||||
text1=response.text
|
||||
html=re.findall(r'(BV.{10})',text1)
|
||||
list_.extend(html)
|
||||
page=page+1
|
||||
o=o+36
|
||||
#排除重复的bv号
|
||||
ll = []
|
||||
for k in list_:
|
||||
if ll.count(k)==0:
|
||||
ll.append(k)
|
||||
|
||||
|
||||
#获取前300个视频链接
|
||||
all_danmu=[]
|
||||
for j in ll[:300]:
|
||||
link = "https://www.bilibili.com/video/{}/".format(j)
|
||||
response = requests.get(url=link, headers=headers)
|
||||
response.encoding = 'utf-8'
|
||||
html1 = response.text
|
||||
#获取cid以此获得弹幕地址
|
||||
cid = re.search(r'"cid":(\d*),', html1).group(1)
|
||||
link = "https://comment.bilibili.com/{}.xml".format(cid)
|
||||
res = requests.get(link)
|
||||
res.encoding = 'utf-8'
|
||||
soup2 = BeautifulSoup(res.text, 'xml')
|
||||
all_barrage = soup2.findAll("d")
|
||||
#print(all_danmu)
|
||||
#将弹幕存入文件中
|
||||
for danmu in all_danmu:
|
||||
with open('弹幕.txt', 'a', newline='', encoding='utf-8-sig') as file:
|
||||
file.write(danmu.string)
|
||||
file.write("\n")
|
||||
t.stop()
|
||||
t.print()
|
Loading…
Reference in new issue