From d06df68b5b3331a24afcf5e168f2627a38e143ba Mon Sep 17 00:00:00 2001 From: p6fxi93qh <1240380517@qq.com> Date: Wed, 18 Sep 2024 19:33:53 +0800 Subject: [PATCH] =?UTF-8?q?=E5=B0=86=E8=8E=B7=E5=BE=97=E5=BC=B9=E5=B9=95?= =?UTF-8?q?=E6=89=80=E9=9C=80=E8=A6=81=E7=9A=84=E5=87=BD=E6=95=B0=E6=9B=B4?= =?UTF-8?q?=E4=BB=94=E7=BB=86=E7=9A=84=E5=88=92=E5=88=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- getdm.py | 53 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/getdm.py b/getdm.py index c7749bc..9d3ea52 100644 --- a/getdm.py +++ b/getdm.py @@ -12,47 +12,56 @@ dic_cookies = {} for i in cookies.split("; "): dic_cookies[i.split("=")[0]] = i.split("=")[1] -#获取整页视频的url -def get_videos_url(keyword,pages=1): +def get_web_url(keyword, pages):#页面url + # 对关键词进行URL编码 + keyword = quote(keyword) if keyword else "" # 确保keyword不为None + res = [] + pages = int(pages) + # 当请求页数为0时,直接返回空列表 + if pages <= 0: + return res + else: + # 构建基础URL url = f"https://search.bilibili.com/video?keyword={keyword}&from_source=webtop_search&spm_id_from=333.1007&search_source=5" - result = [] - for i in range(0, pages): - if i > 0 : - url1 = f"{url}&page={i + 1}&o={i * 30}" + # 根据页数构建完整的URL列表 + for i in range(pages): + if i > 0: + url1 = f"{url}&page={i + 1}&o={i * 30}" else: url1 = url - response = requests.get(url1, headers=headers,cookies=dic_cookies) - response.encoding = 'utf-8' - e = etree.HTML(response.text) - # xpath定位 - a = e.xpath('//div[@class="video-list row"]/div[not(contains(@class, "to_hide_md"))]/div[1]/div[2]/a[1]') - for element in a: - href = element.get('href') # 获取href属性 - href1 = f"https:{href}" - result.append(href1) - print(href1) - time.sleep(5) - return result + res.append(url1) + return res +def get_videos_url(web_url):#获取当前页面的所有视频的url + videos_url = [] + response = requests.get(web_url,headers=headers,cookies=dic_cookies) + e = etree.HTML(response.text) + # xpath定位 + a = e.xpath('//div[@class="video-list row"]/div[not(contains(@class, "to_hide_md"))]/div[1]/div[2]/a[1]') + for element in a: + href = element.get('href') # 获取href属性 + href1 = f"https:{href}" + videos_url.append(href1) + return videos_url def get_danmu(video_id): #打开视频页面,获取视频cid response = requests.get(video_id, headers=headers) video_cid = re.search(r'"cid":(\d*),', response.text).group(1) - #用现有链接获取弹幕 url_dm = f"https://comment.bilibili.com/{video_cid}.xml" response_dm = requests.get(url_dm,headers=headers,cookies=dic_cookies) - if response.status_code == 200: + if response_dm.status_code == 200: response_dm.encoding = 'utf-8' content = re.findall('(.*?)', response_dm.text) print("请求弹幕成功") return content else: print("请求弹幕失败") - return + return [] def writeintxt (list,file_path): with open(file_path, 'w', encoding='utf-8') as file: for item in list: - file.write("%s\n" % item) \ No newline at end of file + file.write(f"{item}\n ") +