From 3663c7e09d7ae6b57b4c0d5db6a3415f11d778bd Mon Sep 17 00:00:00 2001 From: pg3fbpv9r Date: Thu, 5 Sep 2024 19:20:26 +0800 Subject: [PATCH 1/9] Initial commit --- README.md | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..6fdd916 --- /dev/null +++ b/README.md @@ -0,0 +1,2 @@ +# 1022014411 + From 61438d2274b5bdc20683447b04a4314679b0e5f3 Mon Sep 17 00:00:00 2001 From: pg3fbpv9r Date: Mon, 9 Sep 2024 19:57:45 +0800 Subject: [PATCH 2/9] ADD file via upload --- code | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 code diff --git a/code b/code new file mode 100644 index 0000000..e69de29 From 0465494851648882362c663e15f48b06010e28c1 Mon Sep 17 00:00:00 2001 From: pg3fbpv9r Date: Mon, 9 Sep 2024 19:58:56 +0800 Subject: [PATCH 3/9] Delete 'code' --- code | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 code diff --git a/code b/code deleted file mode 100644 index e69de29..0000000 From a85387eb04205439c4d676ea5924e01c8dc73344 Mon Sep 17 00:00:00 2001 From: pg3fbpv9r Date: Wed, 11 Sep 2024 16:26:15 +0800 Subject: [PATCH 4/9] ADD file via upload --- get_urls.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 get_urls.py diff --git a/get_urls.py b/get_urls.py new file mode 100644 index 0000000..d152f6c --- /dev/null +++ b/get_urls.py @@ -0,0 +1,39 @@ +from selenium import webdriver +from selenium.webdriver.chrome.service import Service +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By + +def get_urls(query, number): + chrome_driver_path = 'D:\chromedriver-win64\chromedriver.exe' + service = Service(chrome_driver_path) + options = Options() + options.add_argument('--headless') #后台运行,不显示浏览器窗口 + + driver = webdriver.Chrome(service=service, options=options) # 初始化 WebDriver + url_list = set() + page = 1 + while (len(url_list) < number): + search_url = f'https://search.bilibili.com/video?keyword={query}&page={page}' + driver.get(search_url) # 打开网页 + + # 查找符合选择器的所有 标签 + elements = driver.find_elements(By.CSS_SELECTOR, ".video-list.row div.bili-video-card > div > a") + + # 将每个 标签的 href 属性(即网址)加入list + for element in elements: + url_list.add(element.get_attribute('href')) + if (len(url_list) >= number): break + # print(f"page = {page}, cnt = {len(url_list)}") + page = page + 1 + + driver.quit() # 关闭浏览器 + return url_list + +if __name__ == '__main__': + + query = '2024巴黎奥运会' + number = 300 + url_list = get_urls(query=query, number=number) + for url in url_list : + print(url) + \ No newline at end of file From cf12ce96b8638ba5093416a05a18f91fe238bdd7 Mon Sep 17 00:00:00 2001 From: pg3fbpv9r Date: Sat, 14 Sep 2024 21:19:02 +0800 Subject: [PATCH 5/9] Delete 'get_urls.py' --- get_urls.py | 39 --------------------------------------- 1 file changed, 39 deletions(-) delete mode 100644 get_urls.py diff --git a/get_urls.py b/get_urls.py deleted file mode 100644 index d152f6c..0000000 --- a/get_urls.py +++ /dev/null @@ -1,39 +0,0 @@ -from selenium import webdriver -from selenium.webdriver.chrome.service import Service -from selenium.webdriver.chrome.options import Options -from selenium.webdriver.common.by import By - -def get_urls(query, number): - chrome_driver_path = 'D:\chromedriver-win64\chromedriver.exe' - service = Service(chrome_driver_path) - options = Options() - options.add_argument('--headless') #后台运行,不显示浏览器窗口 - - driver = webdriver.Chrome(service=service, options=options) # 初始化 WebDriver - url_list = set() - page = 1 - while (len(url_list) < number): - search_url = f'https://search.bilibili.com/video?keyword={query}&page={page}' - driver.get(search_url) # 打开网页 - - # 查找符合选择器的所有 标签 - elements = driver.find_elements(By.CSS_SELECTOR, ".video-list.row div.bili-video-card > div > a") - - # 将每个 标签的 href 属性(即网址)加入list - for element in elements: - url_list.add(element.get_attribute('href')) - if (len(url_list) >= number): break - # print(f"page = {page}, cnt = {len(url_list)}") - page = page + 1 - - driver.quit() # 关闭浏览器 - return url_list - -if __name__ == '__main__': - - query = '2024巴黎奥运会' - number = 300 - url_list = get_urls(query=query, number=number) - for url in url_list : - print(url) - \ No newline at end of file From 4408ad74ac4bf5c42b2d094fba935f7966484466 Mon Sep 17 00:00:00 2001 From: pg3fbpv9r Date: Sat, 14 Sep 2024 21:19:21 +0800 Subject: [PATCH 6/9] ADD file via upload --- get_urls.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 get_urls.py diff --git a/get_urls.py b/get_urls.py new file mode 100644 index 0000000..0656cfc --- /dev/null +++ b/get_urls.py @@ -0,0 +1,44 @@ +from selenium import webdriver +from selenium.webdriver.chrome.service import Service +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By + +def get_urls(query, number): + chrome_driver_path = 'D:\chromedriver-win64\chromedriver.exe' + service = Service(chrome_driver_path) + options = Options() + options.add_argument('--headless') #后台运行,不显示浏览器窗口 + + driver = webdriver.Chrome(service=service, options=options) # 初始化 WebDriver + url_list = set() + page = 1 + while (len(url_list) < number): + search_url = f'https://search.bilibili.com/video?keyword={query}&page={page}' + driver.get(search_url) # 打开网页 + + # 查找符合选择器的所有 标签 + elements = driver.find_elements(By.CSS_SELECTOR, ".video-list.row div.bili-video-card > div > a") + + # 将每个 标签的 href 属性(即网址)加入list + for element in elements: + url_list.add(element.get_attribute('href')) + if (len(url_list) >= number): break + # print(f"page = {page}, cnt = {len(url_list)}") + page = page + 1 + + driver.quit() # 关闭浏览器 + return url_list + +if __name__ == '__main__': + + query = '2024巴黎奥运会' + number = 300 + cnt = 0 + url_list = get_urls(query=query, number=number) + for url in url_list : + with open('./urls.txt', mode='a', encoding='utf-8') as f: + f.write(url + "\n") + cnt = cnt + 1 + print(f"url : {cnt}/{number}") + + \ No newline at end of file From 784d3b7fab81e20948427469c06d6419c0ad0674 Mon Sep 17 00:00:00 2001 From: pg3fbpv9r Date: Sat, 14 Sep 2024 21:19:31 +0800 Subject: [PATCH 7/9] ADD file via upload --- get_cid.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 get_cid.py diff --git a/get_cid.py b/get_cid.py new file mode 100644 index 0000000..2346ce7 --- /dev/null +++ b/get_cid.py @@ -0,0 +1,34 @@ +import json +import requests +import random +import time +import re + +headers = { + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.5735.289 Safari/537.36", + "Cookie": "i-wanna-go-back=-1; buvid_fp_plain=undefined; CURRENT_BLACKGAP=0; blackside_state=0; LIVE_BUVID=AUTO5216539051785441; buvid4=BF640363-932C-9859-2DEB-9D5332BED8BA14521-022050118-RBQaCti2N%2FgbXXvSImVESA%3D%3D; buvid3=EA6B6EE5-CF42-44F0-8BF1-0E035F5182C9167646infoc; DedeUserID=506881997; DedeUserID__ckMd5=6816981dbd4223e9; CURRENT_FNVAL=4048; rpdid=|(u))kRlJJ)u0J'uYY)l~u)~J; CURRENT_QUALITY=80; hit-new-style-dyn=1; CURRENT_PID=150df130-cdea-11ed-9e61-390f799e5bb1; _uuid=68159E9C-3BA8-49EE-A1C6-D7E510610D865E40530infoc; nostalgia_conf=-1; b_ut=5; FEED_LIVE_VERSION=V8; hit-dyn-v2=1; home_feed_column=5; browser_resolution=1530-712; header_theme_version=CLOSE; fingerprint=93340026c1ba350713aeadf8766000e1; SESSDATA=5c25a608%2C1709466512%2Cc7e4b%2A92gDhsEFKTVzRobJkJtk9Sk1ph71ufczEtnhZVk3UyXcKE4ChKGDta46HuRUO_g-u_Rbl2OgAAYQA; bili_jct=37f8d40c6076352e8e44a85bbbeb65a4; sid=7px9659x; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE2OTQxODU3MzksImlhdCI6MTY5MzkyNjUzOSwicGx0IjotMX0.ek0FRkjhs25UswbCHtI0R25Otecvf_5FppkCkYoDMCE; bili_ticket_expires=1694185739; PVID=3; b_nut=100; buvid_fp=93340026c1ba350713aeadf8766000e1; b_lsid=109D53E510_18A6AC7C071; bp_video_offset_506881997=838254238965432390", +} + +def video_cid(bvid): + sleep_time = random.randint(0, 3) + random.random() + time.sleep(sleep_time) + url = "https://api.bilibili.com/x/player/pagelist?bvid=" + str(bvid) + "&jsonp=jsonp" + video_logo = requests.get(url=url, headers=headers) + video_name = video_logo.text + name = json.loads(video_name) + cid = name['data'][0]['cid'] + return cid + +if __name__ == '__main__': + + cnt = 0 + with open('./urls.txt', 'r') as file: + for line in file: + url = line.strip() + bvid = re.findall('https://www.bilibili.com/video/(.*?)/', url)[0] + cid = video_cid(bvid) + with open('./cid.txt', mode='a') as f: + f.write(f"{cid}\n") + cnt = cnt + 1 + print(f"cid : {cnt}/300") + \ No newline at end of file From f6fca8617fd252ea8477e6882bb72c4e6d64c26b Mon Sep 17 00:00:00 2001 From: pg3fbpv9r Date: Sat, 14 Sep 2024 21:19:39 +0800 Subject: [PATCH 8/9] ADD file via upload --- get_danmu.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 get_danmu.py diff --git a/get_danmu.py b/get_danmu.py new file mode 100644 index 0000000..4dfd537 --- /dev/null +++ b/get_danmu.py @@ -0,0 +1,38 @@ +import requests +import re +import random +import time + +# headers = {'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Mobile Safari/537.36 Edg/128.0.0.0'} +headers = { + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.5735.289 Safari/537.36", + "Cookie": "i-wanna-go-back=-1; buvid_fp_plain=undefined; CURRENT_BLACKGAP=0; blackside_state=0; LIVE_BUVID=AUTO5216539051785441; buvid4=BF640363-932C-9859-2DEB-9D5332BED8BA14521-022050118-RBQaCti2N%2FgbXXvSImVESA%3D%3D; buvid3=EA6B6EE5-CF42-44F0-8BF1-0E035F5182C9167646infoc; DedeUserID=506881997; DedeUserID__ckMd5=6816981dbd4223e9; CURRENT_FNVAL=4048; rpdid=|(u))kRlJJ)u0J'uYY)l~u)~J; CURRENT_QUALITY=80; hit-new-style-dyn=1; CURRENT_PID=150df130-cdea-11ed-9e61-390f799e5bb1; _uuid=68159E9C-3BA8-49EE-A1C6-D7E510610D865E40530infoc; nostalgia_conf=-1; b_ut=5; FEED_LIVE_VERSION=V8; hit-dyn-v2=1; home_feed_column=5; browser_resolution=1530-712; header_theme_version=CLOSE; fingerprint=93340026c1ba350713aeadf8766000e1; SESSDATA=5c25a608%2C1709466512%2Cc7e4b%2A92gDhsEFKTVzRobJkJtk9Sk1ph71ufczEtnhZVk3UyXcKE4ChKGDta46HuRUO_g-u_Rbl2OgAAYQA; bili_jct=37f8d40c6076352e8e44a85bbbeb65a4; sid=7px9659x; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE2OTQxODU3MzksImlhdCI6MTY5MzkyNjUzOSwicGx0IjotMX0.ek0FRkjhs25UswbCHtI0R25Otecvf_5FppkCkYoDMCE; bili_ticket_expires=1694185739; PVID=3; b_nut=100; buvid_fp=93340026c1ba350713aeadf8766000e1; b_lsid=109D53E510_18A6AC7C071; bp_video_offset_506881997=838254238965432390", +} +def get_info(url): + sleep_time = random.randint(0, 3) + random.random() + time.sleep(sleep_time) + response = requests.get(url = url, headers = headers) + response.encoding = response.apparent_encoding #改变编码 + + # 解析数据(re正则表达式 + data_list = re.findall('(.*?)', response.text) + + #输出到文件 + tot = 0 + for index in data_list: + with open('./danmu.txt', mode='a', encoding='utf-8') as f: + f.write(index) + f.write('\n') + tot = tot + 1 + return tot + +if __name__ == '__main__': + + cnt = 0 + tot = 0 + with open('./cid.txt', 'r') as file: + for line in file: + cid = line.strip() + tot += get_info('https://api.bilibili.com/x/v1/dm/list.so?oid=' + cid) + cnt = cnt + 1 + print(f"danmu : {cnt}/300, {tot}") \ No newline at end of file From a46f7c65f4102f2708b0095d3c86b72a1e664c2c Mon Sep 17 00:00:00 2001 From: pg3fbpv9r Date: Sat, 14 Sep 2024 21:19:49 +0800 Subject: [PATCH 9/9] ADD file via upload --- run_all.py | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 run_all.py diff --git a/run_all.py b/run_all.py new file mode 100644 index 0000000..a858cac --- /dev/null +++ b/run_all.py @@ -0,0 +1,6 @@ +import subprocess + +files = ['get_urls.py', 'get_cid.py', 'get_danmu.py'] + +for file in files: + subprocess.run(['python', file])