diff --git a/Finnal Work.py b/Finnal Work.py new file mode 100644 index 0000000..e7c2589 --- /dev/null +++ b/Finnal Work.py @@ -0,0 +1,114 @@ +import requests +from bs4 import BeautifulSoup +from lxml import etree +from bs4 import BeautifulSoup +import re +import json +import os + +headers = { # 设置请求头 + "User-agent": + "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Core/1.94.169.400 QQBrowser/11.0.5130.400", + "Cookie": + "kg_mid=14e57fa8f8bd5163efe72e9847a80e64; kg_dfid=0jICDR1Z9i6c3Xwh3n02F2uD; KuGooRandom=66751668051014213; kg_dfid_collect=d41d8cd98f00b204e9800998ecf8427e; ACK_SERVER_10015=%7B%22list%22%3A%5B%5B%22bjlogin-user.kugou.com%22%5D%5D%7D; Hm_lvt_aedee6983d4cfc62f509129360d6bb3d=1668330462,1669556862,1669856911,1669877618; Hm_lpvt_aedee6983d4cfc62f509129360d6bb3d=1669878413; ACK_SERVER_10016=%7B%22list%22%3A%5B%5B%22bjreg-user.kugou.com%22%5D%5D%7D; ACK_SERVER_10017=%7B%22list%22%3A%5B%5B%22bjverifycode.service.kugou.com%22%5D%5D%7D", +} + + +def the_hot_list(): + print("为你创建文件夹music") + isExists = os.path.exists("./music") + if not isExists: + os.makedirs("./music") + print("music文件夹已经创建完成") + else: + print("music文件夹已经存在") + print("开始爬取内容") + print("请稍等一会") + list_url = "https://www.kugou.com/yy/html/rank.html" + list_req = requests.get(url=list_url, headers=headers) # 对热门榜网页请求 + soup = BeautifulSoup(list_req.content, "lxml") + text = re.compile(r"global.features =(.*?);$", re.MULTILINE | re.DOTALL) # 设置正则表达式帮助找到对应数据 + script = soup.find('script', text=text) + list_text = script.text[(script.text.find('Hash') - 2):-5] + m = 0 + list = [] + while (m < len(list_text)): # 将字符串转化为字典列表方便处理 + n = list_text[m:].find('}') + dict = eval(list_text[m:m + n + 1]) + list.append(dict) + m = m + n + 2 + for x in list: + url = 'https://wwwapi.kugou.com/yy/index.php?r=play/getdata&hash=' # 获取歌曲的地址 + url = url + x["Hash"] + "&album_id=" + str(x['album_id']) + req = requests.get(url=url, headers=headers) + req_json = json.loads(req.text) + song_url = req_json['data'].get('play_url') + song_req = requests.get(url=song_url) + with open(f'./music/{req_json["data"].get("audio_name")}.mp3', 'wb') as f: + f.write(song_req.content) + f.close() + song_req.close() + req.close() + list_req.close() + print("爬取完成") + + +def the_Any_music(): + print("为你创建文件夹music") + isExists = os.path.exists("./music") + if not isExists: + os.makedirs("./music") + print("music文件夹已经创建完成") + else: + print("music文件夹已经存在") + song_name = input("输入你想获得歌曲名字:") + url = "http://mobilecdn.kugou.com/api/v3/search/song?format=json&keyword=" + song_name # 实现不同歌曲的搜索 + req = requests.get(url=url, headers=headers) # 请求歌曲数据 + n = req.text.find("{") + list_json = json.loads(req.text[n:]) + total = list_json['data']['total'] + list_num = total // 20 # 获取歌曲总页数进行不同页数爬取 + print(f"页数总数为{list_num}") + num = input("输入你想查看得页数:") + list_url = "http://mobilecdn.kugou.com/api/v3/search/song?format=json&keyword=" + song_name + "&page=" + str(num) + list_req = requests.get(url=list_url, headers=headers) + list_json = json.loads(list_req.text[n:]) + y = 1 + for x in list_json['data']['info']: + print(f"{y}.{x.get('songname')} {x.get('singername')}") + y = y + 1 + song_num = input("输入你想获取第几首歌曲:") + print("开始爬取内容") + print("请稍等一会") + y = 1 + song_url = 'https://wwwapi.kugou.com/yy/index.php?r=play/getdata&hash=' + for x in list_json['data']['info']: + if y == int(song_num): + song_url = song_url + x.get("hash") + "&album_id=" + str(x.get('album_id')) + y = y + 1 + # print(song_url) + song_req = requests.get(url=song_url, headers=headers) + song_json = json.loads(song_req.text) + # print(song_json) + song_url1 = song_json['data'].get('play_url') + song_req1 = requests.get(url=song_url1, headers=headers) + with open(f'd:/PythonProject/music/{song_json["data"].get("audio_name")}.mp3', 'wb') as f: + f.write(song_req1.content) + f.close() + song_req1.close() + song_req.close() + list_req.close() + req.close() + print("爬取完成") + + +menubar = ["功能榜单", "1.爬取酷狗热门榜", "2.爬取任意音乐", "3.退出系统"] +for x in menubar: + print(x) +feature_option = int(input("输入你想进行的功能:")) +if feature_option == 1: + the_hot_list() +elif feature_option == 2: + the_Any_music() +else: + exit() diff --git a/main.py b/main.py deleted file mode 100644 index 903e46d..0000000 --- a/main.py +++ /dev/null @@ -1,67 +0,0 @@ -import json -import requests - -headers ={ - "User-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Core/1.94.169.400 QQBrowser/11.0.5130.400", - "Cookie": "kg_mid=14e57fa8f8bd5163efe72e9847a80e64; kg_dfid=0jICDR1Z9i6c3Xwh3n02F2uD; Hm_lvt_aedee6983d4cfc62f509129360d6bb3d=1668051000,1668243927; kg_dfid_collect=d41d8cd98f00b204e9800998ecf8427e; kg_mid_temp=14e57fa8f8bd5163efe72e9847a80e64; Hm_lpvt_aedee6983d4cfc62f509129360d6bb3d=1668245258", -} -list_url ='https://complexsearch.kugou.com/v2/search/song?callback=callback123&srcappid=2919&clientver=1000&clienttime=1668257140471&mid=14e57fa8f8bd5163efe72e9847a80e64&uuid=14e57fa8f8bd5163efe72e9847a80e64&dfid=0jICDR1Z9i6c3Xwh3n02F2uD&keyword=%E5%8F%AA%E5%9B%A0%E4%BD%A0%E5%A4%AA%E7%BE%8E&page=1&pagesize=30&bitrate=0&isfuzzy=0&inputtype=0&platform=WebFilter&userid=0&iscorrection=1&privilege_filter=0&filter=10&token=&appid=1014&signature=5d256cf87e57f93badbdceaad2e1eb33' -req_list = requests.get(url=list_url,headers=headers) -char='{' -n =req_list.text.find(char) -list_json = json.loads(req_list.text[n:-2]) -url ='https://wwwapi.kugou.com/yy/index.php?r=play/getdata&hash=' -for i in list_json['data']['lists']: - url1 = url +i.get('FileHash')+"&album_id="+str(i.get('AlbumID')) - req = requests.get(url=url1,headers=headers) - req_json = json.loads(req.text) - song_url = req_json['data'].get('play_url') - print(song_url) - req1 = requests.get(url=song_url) - with open(f'{req_json["data"].get("audio_name")}.mp3','wb') as f: - f.write(req1.content) - f.close() - req1.close() - req.close() -req_list.close() - -# url1 = url +list_json['data']['lists'][0].get('FileHash')+"&album_id="+str(list_json['data']['lists'][0].get('AlbumID')) -# req = requests.get(url=url1,headers=headers) -# req_json = json.loads(req.text) -# song_url = req_json['data'].get('play_url') -# req1 = requests.get(url=song_url) -# with open(f'{req_json["data"].get("audio_name")}.mp3','wb') as f: -# f.write(req1.content) -#https://wwwapi.kugou.com/yy/index.php?r=play/getdata&callback=jQuery19107826705776976817_1668256148578&hash=D80A9FB3362FB91C2D06D70E321F4BC5&dfid=0jICDR1Z9i6c3Xwh3n02F2uD&appid=1014&mid=14e57fa8f8bd5163efe72e9847a80e64&platid=4&album_id=1944022&_=1668256148598 -#https://www.kugou.com/mixsong/vjjt936.html?fromsearch=只因你太美#hash=D80A9FB3362FB91C2D06D70E321F4BC5&album_id=1944022&album_audio_id=52980237 -#https://wwwapi.kugou.com/yy/index.php?r=play/getdata&callback=jQuery191038106222049194827_1668261761236&hash=D80A9FB3362FB91C2D06D70E321F4BC5&dfid=0jICDR1Z9i6c3Xwh3n02F2uD&appid=1014&mid=14e57fa8f8bd5163efe72e9847a80e64&platid=4&album_id=1944022&_=1668261761246 -# url = 'https://wwwapi.kugou.com/yy/index.php?r=play/getdata&hash=' -# for i in songlist: -# print(url+i.get("FileHash")) - # req = requests.get(url=url) - # with open(f'{i.get("SongName")}.mp3','wb') as f: -# # # f.write(req.content) -# url='https://voice.baidu.com/api/newpneumonia?from=page&callback=jsonp_1668237727742_73497' -# headers = { -# 'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Core/1.94.169.400 QQBrowser/11.0.5130.400' -# } -# req = requests.get(url=url,headers=headers) -# reqjson = json.loads(req.text[26:-2]) -# with open('疫情.csv','w',encoding='UTF-8') as f: -# f.write(f'{"省市地区"}, {"新增确诊"}, {"新增无症状"}, {"累计确诊"}, {"风险地区"}\n') -# # for i in reqjson['data']['resumes']['chinaProvince']: -# # for j in reqjson['data']['resumes']['chinaProvince'][f'{i}']['list']: -# # f.write(f'{j.get("area")},{j.get("asymptomaticLocalRelative")},{j.get("asymptomaticRelative")},{j.get("confirmed")}\n') -# for i in reqjson['data']['resumes']['china']['list']: -# f.write(f'{i.get("province")},{i.get("nativeRelative")},{i.get("asymptomaticLocalRelative")},{i.get("confirmed")},{i.get("asymptomaticRelative")}\n') - -# #导入csv文件模块: -# import csv # 该模块为内置模块不用安装! -# list3 = [["姓名","爱好","年龄","性别"],["特斯拉",'喜欢核辐射','未知','雄性'],["迪迦",'打怪兽','未知','雄性'] ] -# -# for i in list3: -# print(i) -# fp =open('adi.csv',mode='a+',newline='') # ps:此处记得用“a+” 不然首行会变成空行! -# dp = csv.writer(fp) -# dp.writerow(i) -# fp.close() diff --git a/text2.py b/text2.py deleted file mode 100644 index dcb1b5f..0000000 --- a/text2.py +++ /dev/null @@ -1,50 +0,0 @@ -import requests -from bs4 import BeautifulSoup -from lxml import etree -from bs4 import BeautifulSoup -import re -import json -import os -''' -爬热搜榜 -''' - - - -isExists = os.path.exists("./music") -if not isExists: - os.makedirs("./music") - -url = "https://www.kugou.com/yy/html/rank.html" -headers = { - "User-agent": - "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Core/1.94.169.400 QQBrowser/11.0.5130.400", - "Cookie": - "kg_mid=14e57fa8f8bd5163efe72e9847a80e64; kg_dfid=0jICDR1Z9i6c3Xwh3n02F2uD;" - " Hm_lvt_aedee6983d4cfc62f509129360d6bb3d=1668051000,1668243927; kg_dfid_collect=d41d8cd98f00b204e9800998ecf8427e; kg_mid_temp=14e57fa8f8bd5163efe72e9847a80e64; Hm_lpvt_aedee6983d4cfc62f509129360d6bb3d=1668245258", -} -req = requests.get(url=url,headers=headers) -soup = BeautifulSoup(req.content,"html.parser") -text = re.compile(r"global.features =(.*?);$",re.MULTILINE|re.DOTALL) -script = soup.find('script',text=text) -list_text = script.text[(script.text.find('Hash')-2):-5] -m = 0 -list = [] -while(m