|
|
|
@ -12,47 +12,56 @@ dic_cookies = {}
|
|
|
|
|
for i in cookies.split("; "):
|
|
|
|
|
dic_cookies[i.split("=")[0]] = i.split("=")[1]
|
|
|
|
|
|
|
|
|
|
#获取整页视频的url
|
|
|
|
|
def get_videos_url(keyword,pages=1):
|
|
|
|
|
def get_web_url(keyword, pages):#页面url
|
|
|
|
|
# 对关键词进行URL编码
|
|
|
|
|
keyword = quote(keyword) if keyword else "" # 确保keyword不为None
|
|
|
|
|
res = []
|
|
|
|
|
pages = int(pages)
|
|
|
|
|
# 当请求页数为0时,直接返回空列表
|
|
|
|
|
if pages <= 0:
|
|
|
|
|
return res
|
|
|
|
|
else:
|
|
|
|
|
# 构建基础URL
|
|
|
|
|
url = f"https://search.bilibili.com/video?keyword={keyword}&from_source=webtop_search&spm_id_from=333.1007&search_source=5"
|
|
|
|
|
result = []
|
|
|
|
|
for i in range(0, pages):
|
|
|
|
|
if i > 0 :
|
|
|
|
|
# 根据页数构建完整的URL列表
|
|
|
|
|
for i in range(pages):
|
|
|
|
|
if i > 0:
|
|
|
|
|
url1 = f"{url}&page={i + 1}&o={i * 30}"
|
|
|
|
|
else:
|
|
|
|
|
url1 = url
|
|
|
|
|
response = requests.get(url1, headers=headers,cookies=dic_cookies)
|
|
|
|
|
response.encoding = 'utf-8'
|
|
|
|
|
res.append(url1)
|
|
|
|
|
return res
|
|
|
|
|
|
|
|
|
|
def get_videos_url(web_url):#获取当前页面的所有视频的url
|
|
|
|
|
videos_url = []
|
|
|
|
|
response = requests.get(web_url,headers=headers,cookies=dic_cookies)
|
|
|
|
|
e = etree.HTML(response.text)
|
|
|
|
|
# xpath定位
|
|
|
|
|
a = e.xpath('//div[@class="video-list row"]/div[not(contains(@class, "to_hide_md"))]/div[1]/div[2]/a[1]')
|
|
|
|
|
for element in a:
|
|
|
|
|
href = element.get('href') # 获取href属性
|
|
|
|
|
href1 = f"https:{href}"
|
|
|
|
|
result.append(href1)
|
|
|
|
|
print(href1)
|
|
|
|
|
time.sleep(5)
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
videos_url.append(href1)
|
|
|
|
|
return videos_url
|
|
|
|
|
|
|
|
|
|
def get_danmu(video_id):
|
|
|
|
|
#打开视频页面,获取视频cid
|
|
|
|
|
response = requests.get(video_id, headers=headers)
|
|
|
|
|
video_cid = re.search(r'"cid":(\d*),', response.text).group(1)
|
|
|
|
|
|
|
|
|
|
#用现有链接获取弹幕
|
|
|
|
|
url_dm = f"https://comment.bilibili.com/{video_cid}.xml"
|
|
|
|
|
response_dm = requests.get(url_dm,headers=headers,cookies=dic_cookies)
|
|
|
|
|
if response.status_code == 200:
|
|
|
|
|
if response_dm.status_code == 200:
|
|
|
|
|
response_dm.encoding = 'utf-8'
|
|
|
|
|
content = re.findall('<d p=".*?">(.*?)</d>', response_dm.text)
|
|
|
|
|
print("请求弹幕成功")
|
|
|
|
|
return content
|
|
|
|
|
else:
|
|
|
|
|
print("请求弹幕失败")
|
|
|
|
|
return
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
def writeintxt (list,file_path):
|
|
|
|
|
with open(file_path, 'w', encoding='utf-8') as file:
|
|
|
|
|
for item in list:
|
|
|
|
|
file.write("%s\n" % item)
|
|
|
|
|
file.write(f"{item}\n ")
|
|
|
|
|
|
|
|
|
|