ADD file via upload

master
pw9kqx2if 3 years ago
parent ee87778d0f
commit ccb6425245

@ -0,0 +1,119 @@
import json # json.loads 用于解码 JSON 数据。该函数返回 Python 字段的数据类型
import os
import pprint
import requests
# from bs4 import BeautifulSoup # 网页解析,获取数据
import re # 正则表达式,进行文字匹配`
import xlwt # 进行excel操作
from concurrent.futures import ThreadPoolExecutor
search_url = 'https://api.bilibili.com/x/web-interface/popular/precious?page_size=100&page=1'
head = { # 模拟浏览器头部信息,向服务器发送消息
"User-Agent": "Mozilla / 5.0(Windows NT 10.0; Win64; x64) AppleWebKit / 537.36(KHTML, like Gecko) Chrome / 80.0.3987.122 Safari / 537.36",
'referer': 'https://www.bilibili.com/' #防盗链
}
response = requests.get(search_url,headers=head) #爬取网页页面内容
# pprint.pprint(response.json()) #格式化打印
result_list = response.json()['data']['list'] #获取所需数据
# print(result_list)
def download_mp3mp4(url): #下载音视频,合成
# index = result_list[0]
res = requests.get(url, headers=head).text
title = re.findall('<title data-vue-meta="true">(.*?)_哔哩哔哩_bilibili</title> ',res)[0]
title = re.sub(r'[\/:*"<>|]','',title)
html_data = re.findall('<script>window.__playinfo__=(.*?)</script>',res)[0]
print(title)
#把字符串转成字典
json_data = json.loads(html_data) #字典数据类型
# dic = json.loads(response)
# pprint.pprint(json_data) #格式输出
audio_url = json_data['data']['dash']['audio'][0]['baseUrl']
video_url = json_data['data']['dash']['video'][0]['baseUrl']
# print(audio_url)
# print(video_url)
audio_content = requests.get(audio_url,headers=head).content
video_content = requests.get(video_url,headers=head).content
print('音频下载中')
with open(title+'.mp3','wb') as fp:
fp.write(audio_content )
print('视频下载中')
with open(title+'.mp4','wb') as fp:
fp.write(video_content)
n_mp4_n = 'new' + title.split('\\')[-1]
n_mp4_f = title.replace(title.split('\\')[-1], n_mp4_n)
com = f'D:\\pycharm\\ffmpeg\\ffmpeg-2022-04-18-git-d5687236ab-essentials_build\\bin\\ffmpeg.exe -i "{title+".mp3"}" -i "{title+".mp4"}" ' \
f'-acodec copy -vcodec copy "{n_mp4_f+".mp4"}"'
print(com)
os.system(com)
print('视频合成完成')
os.remove(title+'.mp3')
os.remove(title+'.mp4')
#获取网页数据
def getData(datalist):
datalist = [] # 用来存储爬取的网页信息
for i in range(85):
data = []
lis = result_list[i]
# print(lis)
owner = lis['owner']['name'] #UP主昵称
data.append(owner)
title = lis['title'] #视频标题
data.append(title)
achievement = lis['achievement']#视频成就
data.append(achievement)
desc = lis['desc'] #视频简介
desc = re.sub('[(\s+)?(\s+)?]', "", desc)
desc = re.sub('/', " ", desc)
# print(desc)
data.append(desc)
# aid = lis['stat']['aid'] #
view = lis['stat']['view'] #播放量
data.append(view)
danmaku = lis['stat']['danmaku'] #弹幕数
data.append(danmaku)
reply = lis['stat']['reply'] #评论数
data.append(reply)
favorite = lis['stat']['favorite'] #收藏数
data.append(favorite)
coin = lis['stat']['coin'] #投币数
data.append(coin)
share = lis['stat']['share'] #分享数
data.append(share)
datalist.append(data)
# print(datalist)
return datalist
# 3.保存数据
def saveDATA(datalist,savepath):
print('excel saving.....')
workbook = xlwt.Workbook(encoding='utf-8',style_compression=0) # 创建workbook对象
worksheet = workbook.add_sheet('入站必看',cell_overwrite_ok=True) # 创建工作表,cell_overwrite_ok=True表示覆盖原excel表
col = ('UP主昵称','视频标题','视频成就','视频简介','播放量','弹幕数','评论数','收藏数','投币数','分享数')
for i in range(10):
worksheet.write(0, i,col[i] ) # 在0i位置填写
for i in range(0,85): #4900-70
# print("第%d条" %(i+1)) #输出语句,用来测试
data = datalist[i]
for j in range(10):
worksheet.write(i+1,j,data[j]) #数据
workbook.save(savepath)
print('excel saved')
if __name__ == '__main__':
# 1.爬取网页https://www.bilibili.com/v/popular/history
datalist=getData(search_url)
# print(datalist)
savepath='入B站必看.xls'
#保存数据
saveDATA(datalist,savepath)
with ThreadPoolExecutor(3) as t: #线程池
for index in result_list:
t.submit(download_mp3mp4,f'https://www.bilibili.com/video/{index["bvid"]}')
print('全部下载完毕')
Loading…
Cancel
Save