main
CustomerAcquisition 6 months ago
parent 00cc755102
commit 64a206f347

@ -0,0 +1,165 @@
import shutil
import requests
import os
import sys
import re
import time
"""
## 使用说明:
1. 保存文件名为 "名称.机房.m3u8" , 可以保存多个文件脚本会批量处理 sz 表示深圳bj 表示北京注意修改
2. 执行脚本 "python3 a.py"
格式: "python3 a.py [-mac-crf] [-nv]" mac 可以增加参数执行命令 -crf 压缩成 720p分辨率 -nv 提取音频
## 常用脚本
合并视频 Terminal 执行已经包含在自动化脚本里面
ffmpeg -f concat -safe 0 -i file.txt -c copy a.mp4
压缩当前文件下的所有视频的
find ./ -name '*.mp4' -and ! -name '*264].mp4' -exec sh -c 'ffmpeg -i "$0" -c:v libx264 -crf 30 -c:a aac "${0%%.mp4}[x264].mp4"' {} \;
"""
# sz 表示深圳bj 表示北京,注意修改
host_room = "bj" # 会重新赋值
cache_dir_base = "[dingtalk-playback]-cache-"
# base_url = f"https://dtliving-{jifang}.dingtalk.com/live_hp/"
def get_m3u8_list():
file_list = []
path = os.listdir('./')
for i in path:
if re.match(r".*\.m3u8$", i) and os.path.isfile(i):
print(i)
file_list.append(i)
return file_list
def get_url(fileName, host_room="bj"):
base_url = f"https://dtliving-{host_room}.dingtalk.com/live_hp/8a0a4b6f-cdc2-4935-9bee-50a492079522/"
url_list = []
with open(fileName, "r") as f:
s = f.readlines()
for i in s:
if re.match(r".*?ts.*?", i):
url_list.append(base_url + i)
return url_list
def download(fileName, host_room, cache_dir):
urls = get_url(fileName, host_room)
sum = len(urls)
size = 0 # 单位 B
scale = 50 # 进度条长度
print(f"一共{sum}个ts文件下载")
print("执行开始,祈祷不报错".center(scale // 2, "-"))
start = time.perf_counter()
finished_i = 0
if os.path.exists(cache_dir):
print("检测到已下载的文件,继续下载。。。")
finished_i = max(len(os.listdir(cache_dir)) - 1, 0)
else:
os.mkdir(cache_dir)
for i, url in enumerate(urls):
if i < finished_i:
# 已下载
continue
# 为了展示进度条
a = "*" * round(i / sum * scale)
b = "." * round((sum - i)/sum * scale)
c = (i / sum) * 100
dur = time.perf_counter() - start
speed = float(size / 1024 / dur)
db = "KB/s"
# 核心代码 start
with open(f"{cache_dir}/{i + 1}.ts", "wb") as f:
response = requests.get(url[:-1]) # 去掉换行符
if response.headers["Content-Type"] == "video/MP2T": # 判断是否响应成功
size += int(response.headers["Content-Length"])
f.write(response.content)
else:
print(f"执行到 {i} 发生错误")
print(
f"\n\nerror: response.Content-Type not 'video/MP2T' \nMaybe {fileName}'s roomID 'bj' or 'sz' miss")
raise
# end
if speed > 1024:
speed = float(speed / 1024)
db = "MB/s"
print(
"\r[下载进度] {}/{} {:^3.0f}% [{}->{}] {:.2f}{} {:.2f}s ".format(i+1, sum, c, a, b, speed, db, dur), end="")
# print(f"{i}/{sum} 已下载:{round(i/sum*100)}%", "ok")
# time.sleep(1)
return len(urls)
# 整合文件名, 方便FFmpeg合并
def parse_filename(cache_dir, len):
base_path = os.getcwd()
with open(f"{cache_dir}/file.txt", "w+") as f:
for i in range(1, 1 + len):
path = f"file '{base_path}/{cache_dir}/{i}.ts'\n"
f.write(path)
def downloadAndConcat(fileName):
cache_dir = cache_dir_base+fileName
name = fileName.split('.', 2)[0]
host_room = fileName.split('.', 2)[1]
if get_url(fileName, host_room) == 0:
print("文件内容为空!!")
return
print(f"\n\n{fileName},准备下载...")
for i in range(3):
print("倒计时:", 3-i, "s")
time.sleep(1)
parse_filename(cache_dir, download(fileName, host_room, cache_dir))
print("\ndownload finished,准备合并视频...")
time.sleep(3) # 等待喵
os.system(
f'ffmpeg -hide_banner -f concat -safe 0 -i {cache_dir}/file.txt -c copy {name}.mp4')
os.rename(fileName, fileName+'.ok')
# 清除缓存
if os.path.exists(cache_dir):
shutil.rmtree(cache_dir)
print(f"{fileName} finished")
return fileName
def extraFFmpeg(fileNames, argv):
for fileName in fileNames:
name = fileName.split('.', 2)[0]
# 压缩视频
if "-mac-crf" in argv:
os.system(
f"ffmpeg -hide_banner -y -i {name}.mp4 -vf scale=-1:720 -c:v libx264 -crf 30 -c:a aac '{name}[x264].mp4'")
# 提取音频
if "-vn" in argv:
os.system(
f"ffmpeg -hide_banner -y -i {name}.mp4 -vn -c:a copy '{name}.aac'")
if __name__ == "__main__":
list = get_m3u8_list()
finished = []
print("检测到可下载文件: ", list)
for fileName in list:
finished.append(downloadAndConcat(fileName))
# 对已下载完成的视频进行额外操作
extraFFmpeg(finished, sys.argv)

@ -19,17 +19,22 @@ driver_edge = webdriver.Edge(options=options)
my_url = "https://www.douyin.com/" my_url = "https://www.douyin.com/"
driver_edge.get(my_url) driver_edge.get(my_url)
sleep(1) sleep(5)
x=driver_edge.find_element(By.XPATH,'//*[@id="login-pannel"]/div[2]') #x=driver_edge.find_element(By.XPATH,'//*[@id="login-pannel"]/div[2]')
x.click() #x.click()
sleep(1) #sleep(1)
# 模拟点击 # 模拟点击
search_box = driver_edge.find_element(By.XPATH,'//*[@id="douyin-header"]/div[1]/header/div/div/div[1]/div/div[2]/div/div/input').send_keys('zy2752629612',Keys.ENTER) search_box = driver_edge.find_element(By.XPATH,'//*[@id="douyin-header"]/div[1]/header/div/div/div[1]/div/div[2]/div/div/input').send_keys('zy2752629612',Keys.ENTER)
sleep(100) sleep(10)
s=driver_edge.find_element(By.XPATH,'//*[@id="search-content-area"]/div/div[1]/div[2]/div[1]/ul/li[1]/div/div/div/div/div/a/div/img')
x.click()
sleep(5)
h=driver_edge.find_element(By.XPATH,'//*[@id="douyin-right-container"]/div[2]/div/div/div[2]/div[3]/div[3]/div[1]/button[2]')
x.click()
# 获取cookie # 获取cookie
cookies = driver_edge.get_cookies() cookies = driver_edge.get_cookies()
for cookie in cookies: for cookie in cookies:

@ -1,34 +1,38 @@
# import requests import requests
# import json import json
# from pprint import pprint from pprint import pprint
# import csv import csv
# import os import os
# import glob import glob
# from time import sleep from time import sleep
# #评论接口 # #评论接
url = 'https://dtliving-bj.dingtalk.com/live_hp/94ec1da9-8aa3-48cc-baa3-d0705a56cef8'
# url = 'https://www.educoder.net//users/liuq1016/classrooms'
headers={
'Host':'dtliving-bj.dingtalk.com',
'Connection':'keep-alive',
'sec-ch-ua':'"Chromium";v="91"',
'sec-ch-ua-mobile':'?0',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.164 Safari/537.36 dingtalk-win/1.0.0 nw(0.14.7) DingTalk(7.5.30-Release.5179102) Mojo/1.0.0 Native AppType(release) Channel/201200 Architecture/x86_64',
'Accept':'*/*',
'Origin':'https://n.dingtalk.com',
'Cookie':'wolai_client_id=e3nhEuFqtWgkD2Yt8v14qg; xlly_s=1; wld_ptoken=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjIwMzI1ODMwMTAsInVzZXJJZCI6Ik9nK0dPWDJCbE9OcGJreG5Yd3B3c0E9PSIsImlhdCI6MTcxNzA1MDIxMH0.xEUHp4TiCxwX1V_mKwVWajEnl0Ot-Hdt6LFRLMIc708; doc_atoken=Njc1NDYzODYziWZBenJdOcuBVXpMgRChXaUibXCRNuWp; account=oauth_k1%3AKDtVLmrPbniRb0xfVmca8q91R7vgGLKhOgr0TAcfUzJ5SyINzZHi7SCwXQeEkO24A6Q8W3W%2F3V722I2qpbT5a5kEF2cEsnEOJQjT0DcXzAE%3D; deviceid=YjgxN2IxOTIyMzMyYmI4NWI0_unified; pub_uid=%2FdNdJzpYI64jnAK%2FxCH6gQ%3D%3D; token=dtspace_u-2842c2b7-8fc93dbeda-2136647e-1ba738-542a31cf-2cf9f444-0737-4f85-bf07-275cebc9e90e; dt_s=u-2842c2b7-8fc93dbeda-2136647e-1ba738-542a31cf-2cf9f444-0737-4f85-bf07-275cebc9e90e; XSRF-TOKEN=d6ba2d0e-8a4a-410f-b200-84ecf5ef0af4; up_ab=y; preview_ab=y; login_pc_deviceid=b817b1922332bb85b432b4e9dbc5405a; umidtoken=P1gA89NYBPr1YueuyHE_zQMnnf9l9NxVPN6dP0oxvE8TqQp0xMK-su2Feya3sAmjo96McaefCOsQRk8fUadoj9SR; cna=YQnfHi3N610CAd+UhyXyqDv+; dd_sid=k0_daf1213dc07258664867_213ddaf1665872c059910b17cd834c2825663df71617; tfstk=f7f-HaVHwoqlNUyhNawcY5rqpqU0m_Qylg7stHxodiIADnfhrwfkRBIV4p1kZajvJgjcq6Xl-DIA0gHhEa9hvMID0av3YzuCHg76EBfd8XLCRMzyxzyczaReOlqG95bPzlWclPqDOy_Xsd7TZ5VGzagkPPbbsyucdn-HAB9WOETX0FMBOptWGrLB-0MINM_bke-XRUTBdZMXWFiSAjAIwH3WbXC01Rh7kW8-OX1bTLKOPvcIOsLJe6_WDxMfMUpJjKbtUG55mw1laKyjHB7cBMBCfoDkVOLODdX8XbIO4e_pRgFZWn1RJspPn5ivut_ld_S-JXTPZ1vpbKUITpAfsd6122zNlQT6IsRE-XjeN3p1IdgK_NpCn_JNZoGJ1O7kZ9s_mYx5BesPD1fTTfoMuIH7krHELLTDMfMAU1TnhtYvjrSqLvJvuEKgkUDELL9Hkh4fBvkeh81..; isg=BFxc7975VjdMRSJr_S9ab2LMLXoO1QD_spt0RTZdvMcqgf0LXuXIjh6l5-l5CThX',
'Sec-Fetch-Site':'same-site',
'Sec-Fetch-Mode':'cors',
'Sec-Fetch-Dest':'empty',
'Referer':'https://n.dingtalk.com/',
'Accept-Encoding':'gzip, deflate, br',
'Accept-Language':'zh-CN,zh;q=0.9',
# headers={
# 'Referer':'https://www.educoder.net/',
# 'cookie':'autologin_trustie=3b5b3cee750491d7125ffad4983c8921d9514fe4; _educoder_session=78309f003e92566c24fe5b1090c0a80e',
# 'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0'
# }
# response = requests.get(url=url,headers=headers,)
}
response = requests.get(url=url,headers=headers,)
# #response_text = response.json()['comments'] # #response_text = response.json()['comments']
# print(response.text) print(response)
try:
n=0
n=input('jnn')
def p(n):
return n**10
p(n)
except:
print("bjbugbub")

Loading…
Cancel
Save