import json # json.loads 用于解码 JSON 数据。该函数返回 Python 字段的数据类型 import os import pprint import requests # from bs4 import BeautifulSoup # 网页解析,获取数据 import re # 正则表达式,进行文字匹配` import xlwt # 进行excel操作 from concurrent.futures import ThreadPoolExecutor search_url = 'https://api.bilibili.com/x/web-interface/popular/precious?page_size=100&page=1' head = { # 模拟浏览器头部信息,向服务器发送消息 "User-Agent": "Mozilla / 5.0(Windows NT 10.0; Win64; x64) AppleWebKit / 537.36(KHTML, like Gecko) Chrome / 80.0.3987.122 Safari / 537.36", 'referer': 'https://www.bilibili.com/' #防盗链 } response = requests.get(search_url,headers=head) #爬取网页页面内容 # pprint.pprint(response.json()) #格式化打印 result_list = response.json()['data']['list'] #获取所需数据 # print(result_list) def download_mp3mp4(url): #下载音视频,合成 # index = result_list[0] res = requests.get(url, headers=head).text title = re.findall('(.*?)_哔哩哔哩_bilibili ',res)[0] title = re.sub(r'[\/:*?"<>|]','',title) html_data = re.findall('',res)[0] print(title) #把字符串转成字典 json_data = json.loads(html_data) #字典数据类型 # dic = json.loads(response) # pprint.pprint(json_data) #格式输出 audio_url = json_data['data']['dash']['audio'][0]['baseUrl'] video_url = json_data['data']['dash']['video'][0]['baseUrl'] # print(audio_url) # print(video_url) audio_content = requests.get(audio_url,headers=head).content video_content = requests.get(video_url,headers=head).content print('音频下载中') with open(title+'.mp3','wb') as fp: fp.write(audio_content ) print('视频下载中') with open(title+'.mp4','wb') as fp: fp.write(video_content) n_mp4_n = 'new' + title.split('\\')[-1] n_mp4_f = title.replace(title.split('\\')[-1], n_mp4_n) com = f'D:\\pycharm\\ffmpeg\\ffmpeg-2022-04-18-git-d5687236ab-essentials_build\\bin\\ffmpeg.exe -i "{title+".mp3"}" -i "{title+".mp4"}" ' \ f'-acodec copy -vcodec copy "{n_mp4_f+".mp4"}"' print(com) os.system(com) print('视频合成完成') os.remove(title+'.mp3') os.remove(title+'.mp4') #获取网页数据 def getData(datalist): datalist = [] # 用来存储爬取的网页信息 for i in range(85): data = [] lis = result_list[i] # print(lis) owner = lis['owner']['name'] #UP主昵称 data.append(owner) title = lis['title'] #视频标题 data.append(title) achievement = lis['achievement']#视频成就 data.append(achievement) desc = lis['desc'] #视频简介 desc = re.sub('[(\s+)?(\s+)?]', "", desc) desc = re.sub('/', " ", desc) # print(desc) data.append(desc) # aid = lis['stat']['aid'] # view = lis['stat']['view'] #播放量 data.append(view) danmaku = lis['stat']['danmaku'] #弹幕数 data.append(danmaku) reply = lis['stat']['reply'] #评论数 data.append(reply) favorite = lis['stat']['favorite'] #收藏数 data.append(favorite) coin = lis['stat']['coin'] #投币数 data.append(coin) share = lis['stat']['share'] #分享数 data.append(share) datalist.append(data) # print(datalist) return datalist # 3.保存数据 def saveDATA(datalist,savepath): print('excel saving.....') workbook = xlwt.Workbook(encoding='utf-8',style_compression=0) # 创建workbook对象 worksheet = workbook.add_sheet('入站必看',cell_overwrite_ok=True) # 创建工作表,cell_overwrite_ok=True表示覆盖原excel表 col = ('UP主昵称','视频标题','视频成就','视频简介','播放量','弹幕数','评论数','收藏数','投币数','分享数') for i in range(10): worksheet.write(0, i,col[i] ) # 在(0,i)位置填写 for i in range(0,85): #4900-70 # print("第%d条" %(i+1)) #输出语句,用来测试 data = datalist[i] for j in range(10): worksheet.write(i+1,j,data[j]) #数据 workbook.save(savepath) print('excel saved') if __name__ == '__main__': # 1.爬取网页https://www.bilibili.com/v/popular/history datalist=getData(search_url) # print(datalist) savepath='入B站必看.xls' #保存数据 saveDATA(datalist,savepath) with ThreadPoolExecutor(3) as t: #线程池 for index in result_list: t.submit(download_mp3mp4,f'https://www.bilibili.com/video/{index["bvid"]}') print('全部下载完毕')