parent
44827051b0
commit
71863de427
@ -0,0 +1,114 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from lxml import etree
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import json
|
||||
import os
|
||||
|
||||
headers = { # 设置请求头
|
||||
"User-agent":
|
||||
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Core/1.94.169.400 QQBrowser/11.0.5130.400",
|
||||
"Cookie":
|
||||
"kg_mid=14e57fa8f8bd5163efe72e9847a80e64; kg_dfid=0jICDR1Z9i6c3Xwh3n02F2uD; KuGooRandom=66751668051014213; kg_dfid_collect=d41d8cd98f00b204e9800998ecf8427e; ACK_SERVER_10015=%7B%22list%22%3A%5B%5B%22bjlogin-user.kugou.com%22%5D%5D%7D; Hm_lvt_aedee6983d4cfc62f509129360d6bb3d=1668330462,1669556862,1669856911,1669877618; Hm_lpvt_aedee6983d4cfc62f509129360d6bb3d=1669878413; ACK_SERVER_10016=%7B%22list%22%3A%5B%5B%22bjreg-user.kugou.com%22%5D%5D%7D; ACK_SERVER_10017=%7B%22list%22%3A%5B%5B%22bjverifycode.service.kugou.com%22%5D%5D%7D",
|
||||
}
|
||||
|
||||
|
||||
def the_hot_list():
|
||||
print("为你创建文件夹music")
|
||||
isExists = os.path.exists("./music")
|
||||
if not isExists:
|
||||
os.makedirs("./music")
|
||||
print("music文件夹已经创建完成")
|
||||
else:
|
||||
print("music文件夹已经存在")
|
||||
print("开始爬取内容")
|
||||
print("请稍等一会")
|
||||
list_url = "https://www.kugou.com/yy/html/rank.html"
|
||||
list_req = requests.get(url=list_url, headers=headers) # 对热门榜网页请求
|
||||
soup = BeautifulSoup(list_req.content, "lxml")
|
||||
text = re.compile(r"global.features =(.*?);$", re.MULTILINE | re.DOTALL) # 设置正则表达式帮助找到对应数据
|
||||
script = soup.find('script', text=text)
|
||||
list_text = script.text[(script.text.find('Hash') - 2):-5]
|
||||
m = 0
|
||||
list = []
|
||||
while (m < len(list_text)): # 将字符串转化为字典列表方便处理
|
||||
n = list_text[m:].find('}')
|
||||
dict = eval(list_text[m:m + n + 1])
|
||||
list.append(dict)
|
||||
m = m + n + 2
|
||||
for x in list:
|
||||
url = 'https://wwwapi.kugou.com/yy/index.php?r=play/getdata&hash=' # 获取歌曲的地址
|
||||
url = url + x["Hash"] + "&album_id=" + str(x['album_id'])
|
||||
req = requests.get(url=url, headers=headers)
|
||||
req_json = json.loads(req.text)
|
||||
song_url = req_json['data'].get('play_url')
|
||||
song_req = requests.get(url=song_url)
|
||||
with open(f'./music/{req_json["data"].get("audio_name")}.mp3', 'wb') as f:
|
||||
f.write(song_req.content)
|
||||
f.close()
|
||||
song_req.close()
|
||||
req.close()
|
||||
list_req.close()
|
||||
print("爬取完成")
|
||||
|
||||
|
||||
def the_Any_music():
|
||||
print("为你创建文件夹music")
|
||||
isExists = os.path.exists("./music")
|
||||
if not isExists:
|
||||
os.makedirs("./music")
|
||||
print("music文件夹已经创建完成")
|
||||
else:
|
||||
print("music文件夹已经存在")
|
||||
song_name = input("输入你想获得歌曲名字:")
|
||||
url = "http://mobilecdn.kugou.com/api/v3/search/song?format=json&keyword=" + song_name # 实现不同歌曲的搜索
|
||||
req = requests.get(url=url, headers=headers) # 请求歌曲数据
|
||||
n = req.text.find("{")
|
||||
list_json = json.loads(req.text[n:])
|
||||
total = list_json['data']['total']
|
||||
list_num = total // 20 # 获取歌曲总页数进行不同页数爬取
|
||||
print(f"页数总数为{list_num}")
|
||||
num = input("输入你想查看得页数:")
|
||||
list_url = "http://mobilecdn.kugou.com/api/v3/search/song?format=json&keyword=" + song_name + "&page=" + str(num)
|
||||
list_req = requests.get(url=list_url, headers=headers)
|
||||
list_json = json.loads(list_req.text[n:])
|
||||
y = 1
|
||||
for x in list_json['data']['info']:
|
||||
print(f"{y}.{x.get('songname')} {x.get('singername')}")
|
||||
y = y + 1
|
||||
song_num = input("输入你想获取第几首歌曲:")
|
||||
print("开始爬取内容")
|
||||
print("请稍等一会")
|
||||
y = 1
|
||||
song_url = 'https://wwwapi.kugou.com/yy/index.php?r=play/getdata&hash='
|
||||
for x in list_json['data']['info']:
|
||||
if y == int(song_num):
|
||||
song_url = song_url + x.get("hash") + "&album_id=" + str(x.get('album_id'))
|
||||
y = y + 1
|
||||
# print(song_url)
|
||||
song_req = requests.get(url=song_url, headers=headers)
|
||||
song_json = json.loads(song_req.text)
|
||||
# print(song_json)
|
||||
song_url1 = song_json['data'].get('play_url')
|
||||
song_req1 = requests.get(url=song_url1, headers=headers)
|
||||
with open(f'd:/PythonProject/music/{song_json["data"].get("audio_name")}.mp3', 'wb') as f:
|
||||
f.write(song_req1.content)
|
||||
f.close()
|
||||
song_req1.close()
|
||||
song_req.close()
|
||||
list_req.close()
|
||||
req.close()
|
||||
print("爬取完成")
|
||||
|
||||
|
||||
menubar = ["功能榜单", "1.爬取酷狗热门榜", "2.爬取任意音乐", "3.退出系统"]
|
||||
for x in menubar:
|
||||
print(x)
|
||||
feature_option = int(input("输入你想进行的功能:"))
|
||||
if feature_option == 1:
|
||||
the_hot_list()
|
||||
elif feature_option == 2:
|
||||
the_Any_music()
|
||||
else:
|
||||
exit()
|
@ -1,50 +0,0 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from lxml import etree
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import json
|
||||
import os
|
||||
'''
|
||||
爬热搜榜
|
||||
'''
|
||||
|
||||
|
||||
|
||||
isExists = os.path.exists("./music")
|
||||
if not isExists:
|
||||
os.makedirs("./music")
|
||||
|
||||
url = "https://www.kugou.com/yy/html/rank.html"
|
||||
headers = {
|
||||
"User-agent":
|
||||
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Core/1.94.169.400 QQBrowser/11.0.5130.400",
|
||||
"Cookie":
|
||||
"kg_mid=14e57fa8f8bd5163efe72e9847a80e64; kg_dfid=0jICDR1Z9i6c3Xwh3n02F2uD;"
|
||||
" Hm_lvt_aedee6983d4cfc62f509129360d6bb3d=1668051000,1668243927; kg_dfid_collect=d41d8cd98f00b204e9800998ecf8427e; kg_mid_temp=14e57fa8f8bd5163efe72e9847a80e64; Hm_lpvt_aedee6983d4cfc62f509129360d6bb3d=1668245258",
|
||||
}
|
||||
req = requests.get(url=url,headers=headers)
|
||||
soup = BeautifulSoup(req.content,"html.parser")
|
||||
text = re.compile(r"global.features =(.*?);$",re.MULTILINE|re.DOTALL)
|
||||
script = soup.find('script',text=text)
|
||||
list_text = script.text[(script.text.find('Hash')-2):-5]
|
||||
m = 0
|
||||
list = []
|
||||
while(m<len(list_text)):
|
||||
n = list_text[m:].find('}')
|
||||
dict = eval(list_text[m:m+n+1])
|
||||
# print(list_text[m:m+n+1])
|
||||
list.append(dict)
|
||||
m = m+n+2
|
||||
for x in list:
|
||||
url = 'https://wwwapi.kugou.com/yy/index.php?r=play/getdata&hash='
|
||||
url = url + x["Hash"]+"&album_id="+str(x['album_id'])
|
||||
req = requests.get(url=url, headers=headers)
|
||||
req_json = json.loads(req.text)
|
||||
song_url = req_json['data'].get('play_url')
|
||||
req1 = requests.get(url=song_url)
|
||||
with open(f'./music/{req_json["data"].get("audio_name")}.mp3', 'wb') as f:
|
||||
f.write(req1.content)
|
||||
f.close()
|
||||
req1.close()
|
||||
req.close()
|
Loading…
Reference in new issue