From ccb6425245be2c2fbf377f18e125f3e4b8c9657a Mon Sep 17 00:00:00 2001
From: pw9kqx2if <1563604327@qq.com>
Date: Tue, 10 May 2022 22:11:48 +0800
Subject: [PATCH] ADD file via upload
---
pythonbilbil.txt | 119 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 119 insertions(+)
create mode 100644 pythonbilbil.txt
diff --git a/pythonbilbil.txt b/pythonbilbil.txt
new file mode 100644
index 0000000..08ccbf4
--- /dev/null
+++ b/pythonbilbil.txt
@@ -0,0 +1,119 @@
+import json # json.loads 用于解码 JSON 数据。该函数返回 Python 字段的数据类型
+import os
+import pprint
+import requests
+# from bs4 import BeautifulSoup # 网页解析,获取数据
+import re # 正则表达式,进行文字匹配`
+import xlwt # 进行excel操作
+from concurrent.futures import ThreadPoolExecutor
+
+search_url = 'https://api.bilibili.com/x/web-interface/popular/precious?page_size=100&page=1'
+head = { # 模拟浏览器头部信息,向服务器发送消息
+ "User-Agent": "Mozilla / 5.0(Windows NT 10.0; Win64; x64) AppleWebKit / 537.36(KHTML, like Gecko) Chrome / 80.0.3987.122 Safari / 537.36",
+ 'referer': 'https://www.bilibili.com/' #防盗链
+}
+response = requests.get(search_url,headers=head) #爬取网页页面内容
+# pprint.pprint(response.json()) #格式化打印
+result_list = response.json()['data']['list'] #获取所需数据
+# print(result_list)
+
+def download_mp3mp4(url): #下载音视频,合成
+ # index = result_list[0]
+
+ res = requests.get(url, headers=head).text
+
+ title = re.findall('
(.*?)_哔哩哔哩_bilibili ',res)[0]
+ title = re.sub(r'[\/:*?"<>|]','',title)
+ html_data = re.findall('',res)[0]
+ print(title)
+ #把字符串转成字典
+ json_data = json.loads(html_data) #字典数据类型
+ # dic = json.loads(response)
+ # pprint.pprint(json_data) #格式输出
+
+ audio_url = json_data['data']['dash']['audio'][0]['baseUrl']
+ video_url = json_data['data']['dash']['video'][0]['baseUrl']
+ # print(audio_url)
+ # print(video_url)
+ audio_content = requests.get(audio_url,headers=head).content
+ video_content = requests.get(video_url,headers=head).content
+ print('音频下载中')
+ with open(title+'.mp3','wb') as fp:
+ fp.write(audio_content )
+ print('视频下载中')
+ with open(title+'.mp4','wb') as fp:
+ fp.write(video_content)
+ n_mp4_n = 'new' + title.split('\\')[-1]
+ n_mp4_f = title.replace(title.split('\\')[-1], n_mp4_n)
+ com = f'D:\\pycharm\\ffmpeg\\ffmpeg-2022-04-18-git-d5687236ab-essentials_build\\bin\\ffmpeg.exe -i "{title+".mp3"}" -i "{title+".mp4"}" ' \
+ f'-acodec copy -vcodec copy "{n_mp4_f+".mp4"}"'
+ print(com)
+ os.system(com)
+ print('视频合成完成')
+ os.remove(title+'.mp3')
+ os.remove(title+'.mp4')
+
+#获取网页数据
+def getData(datalist):
+ datalist = [] # 用来存储爬取的网页信息
+ for i in range(85):
+ data = []
+ lis = result_list[i]
+ # print(lis)
+ owner = lis['owner']['name'] #UP主昵称
+ data.append(owner)
+ title = lis['title'] #视频标题
+ data.append(title)
+ achievement = lis['achievement']#视频成就
+ data.append(achievement)
+ desc = lis['desc'] #视频简介
+ desc = re.sub('[(\s+)?(\s+)?]', "", desc)
+ desc = re.sub('/', " ", desc)
+ # print(desc)
+ data.append(desc)
+ # aid = lis['stat']['aid'] #
+ view = lis['stat']['view'] #播放量
+ data.append(view)
+ danmaku = lis['stat']['danmaku'] #弹幕数
+ data.append(danmaku)
+ reply = lis['stat']['reply'] #评论数
+ data.append(reply)
+ favorite = lis['stat']['favorite'] #收藏数
+ data.append(favorite)
+ coin = lis['stat']['coin'] #投币数
+ data.append(coin)
+ share = lis['stat']['share'] #分享数
+ data.append(share)
+ datalist.append(data)
+ # print(datalist)
+ return datalist
+
+# 3.保存数据
+def saveDATA(datalist,savepath):
+ print('excel saving.....')
+ workbook = xlwt.Workbook(encoding='utf-8',style_compression=0) # 创建workbook对象
+ worksheet = workbook.add_sheet('入站必看',cell_overwrite_ok=True) # 创建工作表,cell_overwrite_ok=True表示覆盖原excel表
+ col = ('UP主昵称','视频标题','视频成就','视频简介','播放量','弹幕数','评论数','收藏数','投币数','分享数')
+ for i in range(10):
+ worksheet.write(0, i,col[i] ) # 在(0,i)位置填写
+ for i in range(0,85): #4900-70
+ # print("第%d条" %(i+1)) #输出语句,用来测试
+ data = datalist[i]
+ for j in range(10):
+ worksheet.write(i+1,j,data[j]) #数据
+ workbook.save(savepath)
+ print('excel saved')
+
+if __name__ == '__main__':
+ # 1.爬取网页https://www.bilibili.com/v/popular/history
+ datalist=getData(search_url)
+ # print(datalist)
+ savepath='入B站必看.xls'
+ #保存数据
+ saveDATA(datalist,savepath)
+
+ with ThreadPoolExecutor(3) as t: #线程池
+ for index in result_list:
+ t.submit(download_mp3mp4,f'https://www.bilibili.com/video/{index["bvid"]}')
+ print('全部下载完毕')
+