爬热搜榜

master
WKJYES 2 years ago
parent 5c68ee17d7
commit 44827051b0

@ -1,4 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (Spider)" project-jdk-type="Python SDK" />
</project>

@ -0,0 +1,50 @@
import requests
from bs4 import BeautifulSoup
from lxml import etree
from bs4 import BeautifulSoup
import re
import json
import os
'''
爬热搜榜
'''
isExists = os.path.exists("./music")
if not isExists:
os.makedirs("./music")
url = "https://www.kugou.com/yy/html/rank.html"
headers = {
"User-agent":
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Core/1.94.169.400 QQBrowser/11.0.5130.400",
"Cookie":
"kg_mid=14e57fa8f8bd5163efe72e9847a80e64; kg_dfid=0jICDR1Z9i6c3Xwh3n02F2uD;"
" Hm_lvt_aedee6983d4cfc62f509129360d6bb3d=1668051000,1668243927; kg_dfid_collect=d41d8cd98f00b204e9800998ecf8427e; kg_mid_temp=14e57fa8f8bd5163efe72e9847a80e64; Hm_lpvt_aedee6983d4cfc62f509129360d6bb3d=1668245258",
}
req = requests.get(url=url,headers=headers)
soup = BeautifulSoup(req.content,"html.parser")
text = re.compile(r"global.features =(.*?);$",re.MULTILINE|re.DOTALL)
script = soup.find('script',text=text)
list_text = script.text[(script.text.find('Hash')-2):-5]
m = 0
list = []
while(m<len(list_text)):
n = list_text[m:].find('}')
dict = eval(list_text[m:m+n+1])
# print(list_text[m:m+n+1])
list.append(dict)
m = m+n+2
for x in list:
url = 'https://wwwapi.kugou.com/yy/index.php?r=play/getdata&hash='
url = url + x["Hash"]+"&album_id="+str(x['album_id'])
req = requests.get(url=url, headers=headers)
req_json = json.loads(req.text)
song_url = req_json['data'].get('play_url')
req1 = requests.get(url=song_url)
with open(f'./music/{req_json["data"].get("audio_name")}.mp3', 'wb') as f:
f.write(req1.content)
f.close()
req1.close()
req.close()
Loading…
Cancel
Save