From ccb6425245be2c2fbf377f18e125f3e4b8c9657a Mon Sep 17 00:00:00 2001 From: pw9kqx2if <1563604327@qq.com> Date: Tue, 10 May 2022 22:11:48 +0800 Subject: [PATCH] ADD file via upload --- pythonbilbil.txt | 119 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 pythonbilbil.txt diff --git a/pythonbilbil.txt b/pythonbilbil.txt new file mode 100644 index 0000000..08ccbf4 --- /dev/null +++ b/pythonbilbil.txt @@ -0,0 +1,119 @@ +import json # json.loads 用于解码 JSON 数据。该函数返回 Python 字段的数据类型 +import os +import pprint +import requests +# from bs4 import BeautifulSoup # 网页解析,获取数据 +import re # 正则表达式,进行文字匹配` +import xlwt # 进行excel操作 +from concurrent.futures import ThreadPoolExecutor + +search_url = 'https://api.bilibili.com/x/web-interface/popular/precious?page_size=100&page=1' +head = { # 模拟浏览器头部信息,向服务器发送消息 + "User-Agent": "Mozilla / 5.0(Windows NT 10.0; Win64; x64) AppleWebKit / 537.36(KHTML, like Gecko) Chrome / 80.0.3987.122 Safari / 537.36", + 'referer': 'https://www.bilibili.com/' #防盗链 +} +response = requests.get(search_url,headers=head) #爬取网页页面内容 +# pprint.pprint(response.json()) #格式化打印 +result_list = response.json()['data']['list'] #获取所需数据 +# print(result_list) + +def download_mp3mp4(url): #下载音视频,合成 + # index = result_list[0] + + res = requests.get(url, headers=head).text + + title = re.findall('(.*?)_哔哩哔哩_bilibili ',res)[0] + title = re.sub(r'[\/:*?"<>|]','',title) + html_data = re.findall('',res)[0] + print(title) + #把字符串转成字典 + json_data = json.loads(html_data) #字典数据类型 + # dic = json.loads(response) + # pprint.pprint(json_data) #格式输出 + + audio_url = json_data['data']['dash']['audio'][0]['baseUrl'] + video_url = json_data['data']['dash']['video'][0]['baseUrl'] + # print(audio_url) + # print(video_url) + audio_content = requests.get(audio_url,headers=head).content + video_content = requests.get(video_url,headers=head).content + print('音频下载中') + with open(title+'.mp3','wb') as fp: + fp.write(audio_content ) + print('视频下载中') + with open(title+'.mp4','wb') as fp: + fp.write(video_content) + n_mp4_n = 'new' + title.split('\\')[-1] + n_mp4_f = title.replace(title.split('\\')[-1], n_mp4_n) + com = f'D:\\pycharm\\ffmpeg\\ffmpeg-2022-04-18-git-d5687236ab-essentials_build\\bin\\ffmpeg.exe -i "{title+".mp3"}" -i "{title+".mp4"}" ' \ + f'-acodec copy -vcodec copy "{n_mp4_f+".mp4"}"' + print(com) + os.system(com) + print('视频合成完成') + os.remove(title+'.mp3') + os.remove(title+'.mp4') + +#获取网页数据 +def getData(datalist): + datalist = [] # 用来存储爬取的网页信息 + for i in range(85): + data = [] + lis = result_list[i] + # print(lis) + owner = lis['owner']['name'] #UP主昵称 + data.append(owner) + title = lis['title'] #视频标题 + data.append(title) + achievement = lis['achievement']#视频成就 + data.append(achievement) + desc = lis['desc'] #视频简介 + desc = re.sub('[(\s+)?(\s+)?]', "", desc) + desc = re.sub('/', " ", desc) + # print(desc) + data.append(desc) + # aid = lis['stat']['aid'] # + view = lis['stat']['view'] #播放量 + data.append(view) + danmaku = lis['stat']['danmaku'] #弹幕数 + data.append(danmaku) + reply = lis['stat']['reply'] #评论数 + data.append(reply) + favorite = lis['stat']['favorite'] #收藏数 + data.append(favorite) + coin = lis['stat']['coin'] #投币数 + data.append(coin) + share = lis['stat']['share'] #分享数 + data.append(share) + datalist.append(data) + # print(datalist) + return datalist + +# 3.保存数据 +def saveDATA(datalist,savepath): + print('excel saving.....') + workbook = xlwt.Workbook(encoding='utf-8',style_compression=0) # 创建workbook对象 + worksheet = workbook.add_sheet('入站必看',cell_overwrite_ok=True) # 创建工作表,cell_overwrite_ok=True表示覆盖原excel表 + col = ('UP主昵称','视频标题','视频成就','视频简介','播放量','弹幕数','评论数','收藏数','投币数','分享数') + for i in range(10): + worksheet.write(0, i,col[i] ) # 在(0,i)位置填写 + for i in range(0,85): #4900-70 + # print("第%d条" %(i+1)) #输出语句,用来测试 + data = datalist[i] + for j in range(10): + worksheet.write(i+1,j,data[j]) #数据 + workbook.save(savepath) + print('excel saved') + +if __name__ == '__main__': + # 1.爬取网页https://www.bilibili.com/v/popular/history + datalist=getData(search_url) + # print(datalist) + savepath='入B站必看.xls' + #保存数据 + saveDATA(datalist,savepath) + + with ThreadPoolExecutor(3) as t: #线程池 + for index in result_list: + t.submit(download_mp3mp4,f'https://www.bilibili.com/video/{index["bvid"]}') + print('全部下载完毕') +