将获得弹幕所需要的函数更仔细的划分

1 year ago · d06df68b5b
parent 6a161daaec
commit d06df68b5b
1 changed files with 31 additions and 22 deletions
--- a/getdm.py
+++ b/getdm.py
@ -12,47 +12,56 @@ dic_cookies = {}
 for i in cookies.split("; "):
    dic_cookies[i.split("=")[0]] = i.split("=")[1]

-#获取整页视频的url
-def get_videos_url(keyword,pages=1):
+def get_web_url(keyword, pages):#页面url
+    # 对关键词进行URL编码
+    keyword = quote(keyword) if keyword else ""  # 确保keyword不为None
+    res = []
+    pages = int(pages)
+    # 当请求页数为0时，直接返回空列表
+    if pages <= 0:
+        return res
+    else:
+        # 构建基础URL
        url = f"https://search.bilibili.com/video?keyword={keyword}&from_source=webtop_search&spm_id_from=333.1007&search_source=5"
-        result = []
-        for i in range(0, pages):
-            if i > 0 :
+        # 根据页数构建完整的URL列表
+        for i in range(pages):
+            if i > 0:
                url1 = f"{url}&page={i + 1}&o={i * 30}"
            else:
                url1 = url
-            response = requests.get(url1, headers=headers,cookies=dic_cookies)
-            response.encoding = 'utf-8'
+            res.append(url1)
+    return res
+
+def get_videos_url(web_url):#获取当前页面的所有视频的url
+    videos_url = []
+    response = requests.get(web_url,headers=headers,cookies=dic_cookies)
    e = etree.HTML(response.text)
    # xpath定位
    a = e.xpath('//div[@class="video-list row"]/div[not(contains(@class, "to_hide_md"))]/div[1]/div[2]/a[1]')
    for element in a:
        href = element.get('href')  # 获取href属性
        href1 = f"https:{href}"
-                result.append(href1)
-                print(href1)
-            time.sleep(5)
-        return result
-
+        videos_url.append(href1)
+    return videos_url

 def get_danmu(video_id):
    #打开视频页面，获取视频cid
    response = requests.get(video_id, headers=headers)
    video_cid = re.search(r'"cid":(\d*),', response.text).group(1)
-
    #用现有链接获取弹幕
    url_dm = f"https://comment.bilibili.com/{video_cid}.xml"
    response_dm = requests.get(url_dm,headers=headers,cookies=dic_cookies)
-    if response.status_code == 200:
+    if response_dm.status_code == 200:
        response_dm.encoding = 'utf-8'
        content = re.findall('<d p=".*?">(.*?)</d>', response_dm.text)
        print("请求弹幕成功")
        return content
    else:
        print("请求弹幕失败")
-        return 
+        return []
    
 def writeintxt (list,file_path):
    with open(file_path, 'w', encoding='utf-8') as file:
        for item in list:
-            file.write("%s\n" % item)
+            file.write(f"{item}\n ")
+