From 64a206f34720b1703bac0417698c922053332889 Mon Sep 17 00:00:00 2001 From: CustomerAcquisition <2797027626@qq.com> Date: Fri, 31 May 2024 09:28:44 +0800 Subject: [PATCH] commit1 --- py1/a.py | 165 ++++++++++++++++++++++++++++++++++++ py1/pinglun/zidongcaozuo.py | 15 ++-- py1/test.py | 54 ++++++------ 3 files changed, 204 insertions(+), 30 deletions(-) create mode 100644 py1/a.py diff --git a/py1/a.py b/py1/a.py new file mode 100644 index 0000000..34d3c20 --- /dev/null +++ b/py1/a.py @@ -0,0 +1,165 @@ +import shutil +import requests +import os +import sys +import re +import time + +""" +## 使用说明: + +1. 保存文件名为 "名称.机房.m3u8" , 可以保存多个文件,脚本会批量处理 。sz 表示深圳,bj 表示北京,注意修改 +2. 执行脚本 "python3 a.py" + +格式: "python3 a.py [-mac-crf] [-nv]" 在 mac 下 ,可以增加参数执行命令 -crf 压缩成 720p分辨率 ,-nv 提取音频 + +## 常用脚本 + +合并视频 Terminal 执行,已经包含在自动化脚本里面 +ffmpeg -f concat -safe 0 -i file.txt -c copy a.mp4 + +压缩当前文件下的所有视频的 +find ./ -name '*.mp4' -and ! -name '*264].mp4' -exec sh -c 'ffmpeg -i "$0" -c:v libx264 -crf 30 -c:a aac "${0%%.mp4}[x264].mp4"' {} \; + +""" + +# sz 表示深圳,bj 表示北京,注意修改 + +host_room = "bj" # 会重新赋值 +cache_dir_base = "[dingtalk-playback]-cache-" +# base_url = f"https://dtliving-{jifang}.dingtalk.com/live_hp/" + + +def get_m3u8_list(): + file_list = [] + path = os.listdir('./') + for i in path: + if re.match(r".*\.m3u8$", i) and os.path.isfile(i): + print(i) + file_list.append(i) + return file_list + + +def get_url(fileName, host_room="bj"): + base_url = f"https://dtliving-{host_room}.dingtalk.com/live_hp/8a0a4b6f-cdc2-4935-9bee-50a492079522/" + url_list = [] + with open(fileName, "r") as f: + s = f.readlines() + for i in s: + if re.match(r".*?ts.*?", i): + url_list.append(base_url + i) + return url_list + + +def download(fileName, host_room, cache_dir): + urls = get_url(fileName, host_room) + sum = len(urls) + size = 0 # 单位 B + scale = 50 # 进度条长度 + print(f"一共{sum}个ts文件下载") + print("执行开始,祈祷不报错".center(scale // 2, "-")) + start = time.perf_counter() + + finished_i = 0 + if os.path.exists(cache_dir): + print("检测到已下载的文件,继续下载。。。") + finished_i = max(len(os.listdir(cache_dir)) - 1, 0) + else: + os.mkdir(cache_dir) + + for i, url in enumerate(urls): + + if i < finished_i: + # 已下载 + continue + # 为了展示进度条 + a = "*" * round(i / sum * scale) + b = "." * round((sum - i)/sum * scale) + c = (i / sum) * 100 + dur = time.perf_counter() - start + speed = float(size / 1024 / dur) + db = "KB/s" + # 核心代码 start + with open(f"{cache_dir}/{i + 1}.ts", "wb") as f: + response = requests.get(url[:-1]) # 去掉换行符 + + if response.headers["Content-Type"] == "video/MP2T": # 判断是否响应成功 + size += int(response.headers["Content-Length"]) + f.write(response.content) + else: + print(f"执行到 {i} 发生错误") + print( + f"\n\nerror: response.Content-Type not 'video/MP2T' \nMaybe {fileName}'s roomID 'bj' or 'sz' miss") + raise + # end + if speed > 1024: + speed = float(speed / 1024) + db = "MB/s" + + print( + "\r[下载进度] {}/{} {:^3.0f}% [{}->{}] {:.2f}{} {:.2f}s ".format(i+1, sum, c, a, b, speed, db, dur), end="") + # print(f"{i}/{sum} 已下载:{round(i/sum*100)}%", "ok") + # time.sleep(1) + return len(urls) + + +# 整合文件名, 方便FFmpeg合并 +def parse_filename(cache_dir, len): + base_path = os.getcwd() + with open(f"{cache_dir}/file.txt", "w+") as f: + for i in range(1, 1 + len): + path = f"file '{base_path}/{cache_dir}/{i}.ts'\n" + f.write(path) + + +def downloadAndConcat(fileName): + cache_dir = cache_dir_base+fileName + + name = fileName.split('.', 2)[0] + host_room = fileName.split('.', 2)[1] + if get_url(fileName, host_room) == 0: + print("文件内容为空!!") + return + print(f"\n\n{fileName},准备下载...") + for i in range(3): + print("倒计时:", 3-i, "s") + time.sleep(1) + parse_filename(cache_dir, download(fileName, host_room, cache_dir)) + print("\ndownload finished,准备合并视频...") + time.sleep(3) # 等待喵 + os.system( + f'ffmpeg -hide_banner -f concat -safe 0 -i {cache_dir}/file.txt -c copy {name}.mp4') + os.rename(fileName, fileName+'.ok') + + # 清除缓存 + if os.path.exists(cache_dir): + shutil.rmtree(cache_dir) + print(f"{fileName} finished") + + return fileName + + +def extraFFmpeg(fileNames, argv): + for fileName in fileNames: + name = fileName.split('.', 2)[0] + # 压缩视频 + if "-mac-crf" in argv: + os.system( + f"ffmpeg -hide_banner -y -i {name}.mp4 -vf scale=-1:720 -c:v libx264 -crf 30 -c:a aac '{name}[x264].mp4'") + # 提取音频 + if "-vn" in argv: + os.system( + f"ffmpeg -hide_banner -y -i {name}.mp4 -vn -c:a copy '{name}.aac'") + + +if __name__ == "__main__": + list = get_m3u8_list() + finished = [] + + print("检测到可下载文件: ", list) + + for fileName in list: + finished.append(downloadAndConcat(fileName)) + + # 对已下载完成的视频进行额外操作 + extraFFmpeg(finished, sys.argv) diff --git a/py1/pinglun/zidongcaozuo.py b/py1/pinglun/zidongcaozuo.py index 117e544..b0ab3c9 100644 --- a/py1/pinglun/zidongcaozuo.py +++ b/py1/pinglun/zidongcaozuo.py @@ -19,17 +19,22 @@ driver_edge = webdriver.Edge(options=options) my_url = "https://www.douyin.com/" driver_edge.get(my_url) -sleep(1) +sleep(5) -x=driver_edge.find_element(By.XPATH,'//*[@id="login-pannel"]/div[2]') -x.click() -sleep(1) +#x=driver_edge.find_element(By.XPATH,'//*[@id="login-pannel"]/div[2]') +#x.click() +#sleep(1) # 模拟点击 search_box = driver_edge.find_element(By.XPATH,'//*[@id="douyin-header"]/div[1]/header/div/div/div[1]/div/div[2]/div/div/input').send_keys('zy2752629612',Keys.ENTER) -sleep(100) +sleep(10) +s=driver_edge.find_element(By.XPATH,'//*[@id="search-content-area"]/div/div[1]/div[2]/div[1]/ul/li[1]/div/div/div/div/div/a/div/img') +x.click() +sleep(5) +h=driver_edge.find_element(By.XPATH,'//*[@id="douyin-right-container"]/div[2]/div/div/div[2]/div[3]/div[3]/div[1]/button[2]') +x.click() # 获取cookie cookies = driver_edge.get_cookies() for cookie in cookies: diff --git a/py1/test.py b/py1/test.py index 5873db2..6a4c579 100644 --- a/py1/test.py +++ b/py1/test.py @@ -1,34 +1,38 @@ -# import requests -# import json -# from pprint import pprint -# import csv -# import os -# import glob -# from time import sleep +import requests +import json +from pprint import pprint +import csv +import os +import glob +from time import sleep -# #评论接口 +# #评论接 +url = 'https://dtliving-bj.dingtalk.com/live_hp/94ec1da9-8aa3-48cc-baa3-d0705a56cef8' -# url = 'https://www.educoder.net//users/liuq1016/classrooms' +headers={ +'Host':'dtliving-bj.dingtalk.com', +'Connection':'keep-alive', +'sec-ch-ua':'"Chromium";v="91"', +'sec-ch-ua-mobile':'?0', +'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.164 Safari/537.36 dingtalk-win/1.0.0 nw(0.14.7) DingTalk(7.5.30-Release.5179102) Mojo/1.0.0 Native AppType(release) Channel/201200 Architecture/x86_64', +'Accept':'*/*', +'Origin':'https://n.dingtalk.com', +'Cookie':'wolai_client_id=e3nhEuFqtWgkD2Yt8v14qg; xlly_s=1; wld_ptoken=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjIwMzI1ODMwMTAsInVzZXJJZCI6Ik9nK0dPWDJCbE9OcGJreG5Yd3B3c0E9PSIsImlhdCI6MTcxNzA1MDIxMH0.xEUHp4TiCxwX1V_mKwVWajEnl0Ot-Hdt6LFRLMIc708; doc_atoken=Njc1NDYzODYziWZBenJdOcuBVXpMgRChXaUibXCRNuWp; account=oauth_k1%3AKDtVLmrPbniRb0xfVmca8q91R7vgGLKhOgr0TAcfUzJ5SyINzZHi7SCwXQeEkO24A6Q8W3W%2F3V722I2qpbT5a5kEF2cEsnEOJQjT0DcXzAE%3D; deviceid=YjgxN2IxOTIyMzMyYmI4NWI0_unified; pub_uid=%2FdNdJzpYI64jnAK%2FxCH6gQ%3D%3D; token=dtspace_u-2842c2b7-8fc93dbeda-2136647e-1ba738-542a31cf-2cf9f444-0737-4f85-bf07-275cebc9e90e; dt_s=u-2842c2b7-8fc93dbeda-2136647e-1ba738-542a31cf-2cf9f444-0737-4f85-bf07-275cebc9e90e; XSRF-TOKEN=d6ba2d0e-8a4a-410f-b200-84ecf5ef0af4; up_ab=y; preview_ab=y; login_pc_deviceid=b817b1922332bb85b432b4e9dbc5405a; umidtoken=P1gA89NYBPr1YueuyHE_zQMnnf9l9NxVPN6dP0oxvE8TqQp0xMK-su2Feya3sAmjo96McaefCOsQRk8fUadoj9SR; cna=YQnfHi3N610CAd+UhyXyqDv+; dd_sid=k0_daf1213dc07258664867_213ddaf1665872c059910b17cd834c2825663df71617; tfstk=f7f-HaVHwoqlNUyhNawcY5rqpqU0m_Qylg7stHxodiIADnfhrwfkRBIV4p1kZajvJgjcq6Xl-DIA0gHhEa9hvMID0av3YzuCHg76EBfd8XLCRMzyxzyczaReOlqG95bPzlWclPqDOy_Xsd7TZ5VGzagkPPbbsyucdn-HAB9WOETX0FMBOptWGrLB-0MINM_bke-XRUTBdZMXWFiSAjAIwH3WbXC01Rh7kW8-OX1bTLKOPvcIOsLJe6_WDxMfMUpJjKbtUG55mw1laKyjHB7cBMBCfoDkVOLODdX8XbIO4e_pRgFZWn1RJspPn5ivut_ld_S-JXTPZ1vpbKUITpAfsd6122zNlQT6IsRE-XjeN3p1IdgK_NpCn_JNZoGJ1O7kZ9s_mYx5BesPD1fTTfoMuIH7krHELLTDMfMAU1TnhtYvjrSqLvJvuEKgkUDELL9Hkh4fBvkeh81..; isg=BFxc7975VjdMRSJr_S9ab2LMLXoO1QD_spt0RTZdvMcqgf0LXuXIjh6l5-l5CThX', +'Sec-Fetch-Site':'same-site', +'Sec-Fetch-Mode':'cors', +'Sec-Fetch-Dest':'empty', +'Referer':'https://n.dingtalk.com/', +'Accept-Encoding':'gzip, deflate, br', +'Accept-Language':'zh-CN,zh;q=0.9', -# headers={ -# 'Referer':'https://www.educoder.net/', -# 'cookie':'autologin_trustie=3b5b3cee750491d7125ffad4983c8921d9514fe4; _educoder_session=78309f003e92566c24fe5b1090c0a80e', -# 'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0' -# } -# response = requests.get(url=url,headers=headers,) + +} + +response = requests.get(url=url,headers=headers,) # #response_text = response.json()['comments'] -# print(response.text) -try: - n=0 - n=input('jnn') - - def p(n): - return n**10 - p(n) -except: - print("bjbugbub") \ No newline at end of file +print(response)