import requests from bs4 import BeautifulSoup from lxml import etree from bs4 import BeautifulSoup import re import json import os headers = { # 设置请求头 "User-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Core/1.94.169.400 QQBrowser/11.0.5130.400", "Cookie": "kg_mid=14e57fa8f8bd5163efe72e9847a80e64; kg_dfid=0jICDR1Z9i6c3Xwh3n02F2uD; kg_dfid_collect=d41d8cd98f00b204e9800998ecf8427e; Hm_lvt_aedee6983d4cfc62f509129360d6bb3d=1669556862,1669856911,1669877618,1669889562; kg_mid_temp=14e57fa8f8bd5163efe72e9847a80e64; KuGoo=KugooID=650152187&KugooPwd=3C564C11588FE543C170C1DE2FE12CBE&NickName=%u0035%u0035%u0035%u0035%u0034%u0034&Pic=http://imge.kugou.com/kugouicon/165/20100101/20100101192931478054.jpg&RegState=1&RegFrom=&t=8ea6a545bf550f5845a622cab06619f65cf61eb2ad5f17b725c792751ad4732d&t_ts=1669893548&t_key=&a_id=1014&ct=1669893548&UserName=%u006b%u0067%u006f%u0070%u0065%u006e%u0036%u0035%u0030%u0031%u0035%u0032%u0031%u0038%u0037; KugooID=650152187; t=8ea6a545bf550f5845a622cab06619f65cf61eb2ad5f17b725c792751ad4732d; a_id=1014; UserName=%u006b%u0067%u006f%u0070%u0065%u006e%u0036%u0035%u0030%u0031%u0035%u0032%u0031%u0038%u0037; mid=14e57fa8f8bd5163efe72e9847a80e64; dfid=0jICDR1Z9i6c3Xwh3n02F2uD; Hm_lpvt_aedee6983d4cfc62f509129360d6bb3d=1669893568", } def the_hot_list(): print("为你创建文件夹music") isExists = os.path.exists("./music") if not isExists: os.makedirs("./music") print("music文件夹已经创建完成") else: print("music文件夹已经存在") print("开始爬取内容") print("请稍等一会") list_url = "https://www.kugou.com/yy/html/rank.html" list_req = requests.get(url=list_url, headers=headers) # 对热门榜网页请求 soup = BeautifulSoup(list_req.content, "lxml") text = re.compile(r"global.features =(.*?);$", re.MULTILINE | re.DOTALL) # 设置正则表达式帮助找到对应数据 script = soup.find('script', text=text) list_text = script.text[(script.text.find('Hash') - 2):-5] m = 0 list = [] while (m < len(list_text)): # 将字符串转化为字典列表方便处理 n = list_text[m:].find('}') dict = eval(list_text[m:m + n + 1]) list.append(dict) m = m + n + 2 for x in list: url = 'https://wwwapi.kugou.com/yy/index.php?r=play/getdata&hash=' # 获取歌曲的地址 url = url + x["Hash"] + "&album_id=" + str(x['album_id']) req = requests.get(url=url, headers=headers) req_json = json.loads(req.text) song_url = req_json['data'].get('play_url') song_req = requests.get(url=song_url) with open(f'./music/{req_json["data"].get("audio_name")}.mp3', 'wb') as f: f.write(song_req.content) f.close() song_req.close() req.close() list_req.close() print("爬取完成") def the_Any_music(): print("为你创建文件夹music") isExists = os.path.exists("./music") if not isExists: os.makedirs("./music") print("music文件夹已经创建完成") else: print("music文件夹已经存在") song_name = input("输入你想获得歌曲名字:") url = "http://mobilecdn.kugou.com/api/v3/search/song?format=json&keyword=" + song_name # 实现不同歌曲的搜索 req = requests.get(url=url, headers=headers) # 请求歌曲数据 n = req.text.find("{") list_json = json.loads(req.text[n:]) total = list_json['data']['total'] list_num = total // 20 # 获取歌曲总页数进行不同页数爬取 if list_num == 0: list_num =1 print(f"页数总数为{list_num}") num = input("输入你想查看得页数:") #song_num = '' while True: list_url = "http://mobilecdn.kugou.com/api/v3/search/song?format=json&keyword=" + song_name + "&page=" + str(num) list_req = requests.get(url=list_url, headers=headers) list_json = json.loads(list_req.text[n:]) y = 1 for x in list_json['data']['info']: print(f"{y}.{x.get('songname')} {x.get('singername')}") y = y + 1 song_num = input("输入你想获取第几首歌曲(或者输入next查看下一页):") if song_num == 'next': num = str(int(num) + 1) if int(num) >list_num: print('已经到达最后1页') num = str(list_num) else: break print("开始爬取内容") print("请稍等一会") y = 1 song_url = 'https://wwwapi.kugou.com/yy/index.php?r=play/getdata&hash=' for x in list_json['data']['info']: if y == int(song_num): song_url = song_url + x.get("hash") + "&album_id=" + str(x.get('album_id')) # print(song_url) y = y + 1 # print(song_url)2 song_req = requests.get(url=song_url, headers=headers) song_json = json.loads(song_req.text) # print(song_json) song_url1 = song_json['data'].get('play_url') song_req1 = requests.get(url=song_url1, headers=headers) with open(f'./music/{song_json["data"].get("audio_name")}.mp3', 'wb') as f: f.write(song_req1.content) f.close() song_req1.close() song_req.close() list_req.close() req.close() print("爬取完成") menubar = ["功能榜单", "1.爬取酷狗热门榜", "2.爬取任意音乐", "3.退出系统"] for x in menubar: print(x) feature_option = int(input("输入你想进行的功能:")) if feature_option == 1: the_hot_list() elif feature_option == 2: the_Any_music() else: exit()