You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

120 lines
5.0 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import json # json.loads 用于解码 JSON 数据。该函数返回 Python 字段的数据类型
import os
import pprint
import requests
# from bs4 import BeautifulSoup # 网页解析,获取数据
import re # 正则表达式,进行文字匹配`
import xlwt # 进行excel操作
from concurrent.futures import ThreadPoolExecutor
search_url = 'https://api.bilibili.com/x/web-interface/popular/precious?page_size=100&page=1'
head = { # 模拟浏览器头部信息,向服务器发送消息
"User-Agent": "Mozilla / 5.0(Windows NT 10.0; Win64; x64) AppleWebKit / 537.36(KHTML, like Gecko) Chrome / 80.0.3987.122 Safari / 537.36",
'referer': 'https://www.bilibili.com/' #防盗链
}
response = requests.get(search_url,headers=head) #爬取网页页面内容
# pprint.pprint(response.json()) #格式化打印
result_list = response.json()['data']['list'] #获取所需数据
# print(result_list)
def download_mp3mp4(url): #下载音视频,合成
# index = result_list[0]
res = requests.get(url, headers=head).text
title = re.findall('<title data-vue-meta="true">(.*?)_哔哩哔哩_bilibili</title> ',res)[0]
title = re.sub(r'[\/:*"<>|]','',title)
html_data = re.findall('<script>window.__playinfo__=(.*?)</script>',res)[0]
print(title)
#把字符串转成字典
json_data = json.loads(html_data) #字典数据类型
# dic = json.loads(response)
# pprint.pprint(json_data) #格式输出
audio_url = json_data['data']['dash']['audio'][0]['baseUrl']
video_url = json_data['data']['dash']['video'][0]['baseUrl']
# print(audio_url)
# print(video_url)
audio_content = requests.get(audio_url,headers=head).content
video_content = requests.get(video_url,headers=head).content
print('音频下载中')
with open(title+'.mp3','wb') as fp:
fp.write(audio_content )
print('视频下载中')
with open(title+'.mp4','wb') as fp:
fp.write(video_content)
n_mp4_n = 'new' + title.split('\\')[-1]
n_mp4_f = title.replace(title.split('\\')[-1], n_mp4_n)
com = f'D:\\pycharm\\ffmpeg\\ffmpeg-2022-04-18-git-d5687236ab-essentials_build\\bin\\ffmpeg.exe -i "{title+".mp3"}" -i "{title+".mp4"}" ' \
f'-acodec copy -vcodec copy "{n_mp4_f+".mp4"}"'
print(com)
os.system(com)
print('视频合成完成')
os.remove(title+'.mp3')
os.remove(title+'.mp4')
#获取网页数据
def getData(datalist):
datalist = [] # 用来存储爬取的网页信息
for i in range(85):
data = []
lis = result_list[i]
# print(lis)
owner = lis['owner']['name'] #UP主昵称
data.append(owner)
title = lis['title'] #视频标题
data.append(title)
achievement = lis['achievement']#视频成就
data.append(achievement)
desc = lis['desc'] #视频简介
desc = re.sub('[(\s+)?(\s+)?]', "", desc)
desc = re.sub('/', " ", desc)
# print(desc)
data.append(desc)
# aid = lis['stat']['aid'] #
view = lis['stat']['view'] #播放量
data.append(view)
danmaku = lis['stat']['danmaku'] #弹幕数
data.append(danmaku)
reply = lis['stat']['reply'] #评论数
data.append(reply)
favorite = lis['stat']['favorite'] #收藏数
data.append(favorite)
coin = lis['stat']['coin'] #投币数
data.append(coin)
share = lis['stat']['share'] #分享数
data.append(share)
datalist.append(data)
# print(datalist)
return datalist
# 3.保存数据
def saveDATA(datalist,savepath):
print('excel saving.....')
workbook = xlwt.Workbook(encoding='utf-8',style_compression=0) # 创建workbook对象
worksheet = workbook.add_sheet('入站必看',cell_overwrite_ok=True) # 创建工作表,cell_overwrite_ok=True表示覆盖原excel表
col = ('UP主昵称','视频标题','视频成就','视频简介','播放量','弹幕数','评论数','收藏数','投币数','分享数')
for i in range(10):
worksheet.write(0, i,col[i] ) # 在0i位置填写
for i in range(0,85): #4900-70
# print("第%d条" %(i+1)) #输出语句,用来测试
data = datalist[i]
for j in range(10):
worksheet.write(i+1,j,data[j]) #数据
workbook.save(savepath)
print('excel saved')
if __name__ == '__main__':
# 1.爬取网页https://www.bilibili.com/v/popular/history
datalist=getData(search_url)
# print(datalist)
savepath='入B站必看.xls'
#保存数据
saveDATA(datalist,savepath)
with ThreadPoolExecutor(3) as t: #线程池
for index in result_list:
t.submit(download_mp3mp4,f'https://www.bilibili.com/video/{index["bvid"]}')
print('全部下载完毕')