爬热搜榜

3 years ago · 44827051b0
parent 5c68ee17d7
commit 44827051b0
2 changed files with 50 additions and 4 deletions
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -1,4 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (Spider)" project-jdk-type="Python SDK" />
-</project>
--- a/text2.py
+++ b/text2.py
@ -0,0 +1,50 @@
+import requests
+from bs4 import BeautifulSoup
+from lxml import etree
+from bs4 import BeautifulSoup
+import re
+import json
+import os
+'''
+爬热搜榜
+'''
+
+
+
+isExists = os.path.exists("./music")
+if not isExists:
+    os.makedirs("./music")
+
+url = "https://www.kugou.com/yy/html/rank.html"
+headers = {
+    "User-agent":
+    "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Core/1.94.169.400 QQBrowser/11.0.5130.400",
+    "Cookie":
+    "kg_mid=14e57fa8f8bd5163efe72e9847a80e64; kg_dfid=0jICDR1Z9i6c3Xwh3n02F2uD;"
+    " Hm_lvt_aedee6983d4cfc62f509129360d6bb3d=1668051000,1668243927; kg_dfid_collect=d41d8cd98f00b204e9800998ecf8427e; kg_mid_temp=14e57fa8f8bd5163efe72e9847a80e64; Hm_lpvt_aedee6983d4cfc62f509129360d6bb3d=1668245258",
+}
+req = requests.get(url=url,headers=headers)
+soup = BeautifulSoup(req.content,"html.parser")
+text = re.compile(r"global.features =(.*?);$",re.MULTILINE|re.DOTALL)
+script = soup.find('script',text=text)
+list_text = script.text[(script.text.find('Hash')-2):-5]
+m = 0
+list = []
+while(m<len(list_text)):
+    n = list_text[m:].find('}')
+    dict = eval(list_text[m:m+n+1])
+    # print(list_text[m:m+n+1])
+    list.append(dict)
+    m = m+n+2
+for x in list:
+    url = 'https://wwwapi.kugou.com/yy/index.php?r=play/getdata&hash='
+    url = url + x["Hash"]+"&album_id="+str(x['album_id'])
+    req = requests.get(url=url, headers=headers)
+    req_json = json.loads(req.text)
+    song_url = req_json['data'].get('play_url')
+    req1 = requests.get(url=song_url)
+    with open(f'./music/{req_json["data"].get("audio_name")}.mp3', 'wb') as f:
+        f.write(req1.content)
+        f.close()
+    req1.close()
+    req.close()