diff --git a/getdm.py b/getdm.py index 92ab467..c7749bc 100644 --- a/getdm.py +++ b/getdm.py @@ -1,62 +1,58 @@ - -import re -import requests -from lxml import etree -import time -from urllib.parse import quote - -headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0"} - -cookies = " buvid3=8231EFA3-C943-B46F-C28D-10CA6EB40DD180949infoc; CURRENT_FNVAL=4048; rpdid=|(JYYJ|u)luu0J'uYmkl~mY)k; buvid4=CA2A9538-3AB7-5FF0-1AD2-830935987DFB07953-024012212-75GkjfNGeaGAWMbBOvqWtQ%3D%3D; buvid_fp=f29e4e3ef42f7eb5eeb379115e870ad9; LIVE_BUVID=AUTO7217086875647310; PVID=2; b_nut=100; _uuid=7FDC1091C-5D99-24A8-4133-F89DFB510EA6531786infoc; header_theme_version=CLOSE; enable_web_push=DISABLE; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MjY2NDk2NTcsImlhdCI6MTcyNjM5MDM5NywicGx0IjotMX0.3Z1vsF3Xzu2nPlPrZBCesXytvgQb_37-XNcBtrRlMzE; bili_ticket_expires=1726649597; bsource=search_bing; bmg_af_switch=1; bmg_src_def_domain=i1.hdslb.com; home_feed_column=4; SESSDATA=188e2907%2C1742055275%2C569b4%2A92CjDCOHUaxUkq_L5c6PfUab0-sYNi5nOwFET1z4H9q14EuO2HVsnxLmx1932gpvUQBgESVnFIY3lzUFJhZWxtY1N1d21POEc5bDRKRldVTlJSTDNWdG8ybkJSYmdGN3g3S2hjRk1uYzhSVEJUUkVOMThzb2Zfb0txZlUwbDNVdnZEM1h6UjJ6amhBIIEC; bili_jct=b1a79048046b927643ac32fe2ce260a0; DedeUserID=1986103749; DedeUserID__ckMd5=be6d80b5f24d01e0; browser_resolution=1220-150; sid=6jgymz8o; bp_t_offset_1986103749=977826403892330496; b_lsid=EF11D7E6_191FBE2F27F" - -dic_cookies = {} -for i in cookies.split("; "): - dic_cookies[i.split("=")[0]] = i.split("=")[1] - -#获取整页视频的url -def get_videos_url(keyword,pages=1): - url = f"https://search.bilibili.com/video?keyword={keyword}&from_source=webtop_search&spm_id_from=333.1007&search_source=5" - result = [] - for i in range(0, pages): - if i > 0 : - url1 = f"{url}&page={i + 1}&o={i * 30}" - else: - url1 = url - response = requests.get(url1, headers=headers,cookies=dic_cookies) - response.encoding = 'utf-8' - e = etree.HTML(response.text) - # xpath定位 - a = e.xpath('//div[@class="video-list row"]/div[not(contains(@class, "to_hide_md"))]/div[1]/div[2]/a[1]') - for element in a: - href = element.get('href') # 获取href属性 - href1 = f"https:{href}" - result.append(href1) - print(href1) - time.sleep(5) - return result - - -def get_danmu(video_id): - - #打开视频页面,获取视频cid - response = requests.get(video_id, headers=headers) - video_cid = re.search(r'"cid":(\d*),', response.text).group(1) - - #用现有链接获取弹幕 - url_dm = f"https://comment.bilibili.com/{video_cid}.xml" - response_dm = requests.get(url_dm,headers=headers,cookies=dic_cookies) - if response.status_code == 200: - response_dm.encoding = 'utf-8' - content = re.findall('(.*?)', response_dm.text) - print("请求弹幕成功") - return content - else: - print("请求弹幕失败") - return - - -def writeintxt (list,file_path): - with open(file_path, 'w', encoding='utf-8') as file: - for item in list: - file.write("%s\n" % item) - +import re +import requests +from lxml import etree +import time +from urllib.parse import quote + +headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0"} + +cookies = " buvid3=8231EFA3-C943-B46F-C28D-10CA6EB40DD180949infoc; CURRENT_FNVAL=4048; rpdid=|(JYYJ|u)luu0J'uYmkl~mY)k; buvid4=CA2A9538-3AB7-5FF0-1AD2-830935987DFB07953-024012212-75GkjfNGeaGAWMbBOvqWtQ%3D%3D; buvid_fp=f29e4e3ef42f7eb5eeb379115e870ad9; LIVE_BUVID=AUTO7217086875647310; PVID=2; b_nut=100; _uuid=7FDC1091C-5D99-24A8-4133-F89DFB510EA6531786infoc; header_theme_version=CLOSE; enable_web_push=DISABLE; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MjY2NDk2NTcsImlhdCI6MTcyNjM5MDM5NywicGx0IjotMX0.3Z1vsF3Xzu2nPlPrZBCesXytvgQb_37-XNcBtrRlMzE; bili_ticket_expires=1726649597; bsource=search_bing; bmg_af_switch=1; bmg_src_def_domain=i1.hdslb.com; home_feed_column=4; SESSDATA=188e2907%2C1742055275%2C569b4%2A92CjDCOHUaxUkq_L5c6PfUab0-sYNi5nOwFET1z4H9q14EuO2HVsnxLmx1932gpvUQBgESVnFIY3lzUFJhZWxtY1N1d21POEc5bDRKRldVTlJSTDNWdG8ybkJSYmdGN3g3S2hjRk1uYzhSVEJUUkVOMThzb2Zfb0txZlUwbDNVdnZEM1h6UjJ6amhBIIEC; bili_jct=b1a79048046b927643ac32fe2ce260a0; DedeUserID=1986103749; DedeUserID__ckMd5=be6d80b5f24d01e0; browser_resolution=1220-150; sid=6jgymz8o; bp_t_offset_1986103749=977826403892330496; b_lsid=EF11D7E6_191FBE2F27F" + +dic_cookies = {} +for i in cookies.split("; "): + dic_cookies[i.split("=")[0]] = i.split("=")[1] + +#获取整页视频的url +def get_videos_url(keyword,pages=1): + url = f"https://search.bilibili.com/video?keyword={keyword}&from_source=webtop_search&spm_id_from=333.1007&search_source=5" + result = [] + for i in range(0, pages): + if i > 0 : + url1 = f"{url}&page={i + 1}&o={i * 30}" + else: + url1 = url + response = requests.get(url1, headers=headers,cookies=dic_cookies) + response.encoding = 'utf-8' + e = etree.HTML(response.text) + # xpath定位 + a = e.xpath('//div[@class="video-list row"]/div[not(contains(@class, "to_hide_md"))]/div[1]/div[2]/a[1]') + for element in a: + href = element.get('href') # 获取href属性 + href1 = f"https:{href}" + result.append(href1) + print(href1) + time.sleep(5) + return result + + +def get_danmu(video_id): + #打开视频页面,获取视频cid + response = requests.get(video_id, headers=headers) + video_cid = re.search(r'"cid":(\d*),', response.text).group(1) + + #用现有链接获取弹幕 + url_dm = f"https://comment.bilibili.com/{video_cid}.xml" + response_dm = requests.get(url_dm,headers=headers,cookies=dic_cookies) + if response.status_code == 200: + response_dm.encoding = 'utf-8' + content = re.findall('(.*?)', response_dm.text) + print("请求弹幕成功") + return content + else: + print("请求弹幕失败") + return + +def writeintxt (list,file_path): + with open(file_path, 'w', encoding='utf-8') as file: + for item in list: + file.write("%s\n" % item) \ No newline at end of file