From 44827051b0e0ffb607f6e845f99a0e3b159e474d Mon Sep 17 00:00:00 2001 From: WKJYES Date: Thu, 1 Dec 2022 13:33:21 +0800 Subject: [PATCH] =?UTF-8?q?=E7=88=AC=E7=83=AD=E6=90=9C=E6=A6=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .idea/misc.xml | 4 ---- text2.py | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 4 deletions(-) delete mode 100644 .idea/misc.xml create mode 100644 text2.py diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index da7d34f..0000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/text2.py b/text2.py new file mode 100644 index 0000000..dcb1b5f --- /dev/null +++ b/text2.py @@ -0,0 +1,50 @@ +import requests +from bs4 import BeautifulSoup +from lxml import etree +from bs4 import BeautifulSoup +import re +import json +import os +''' +爬热搜榜 +''' + + + +isExists = os.path.exists("./music") +if not isExists: + os.makedirs("./music") + +url = "https://www.kugou.com/yy/html/rank.html" +headers = { + "User-agent": + "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Core/1.94.169.400 QQBrowser/11.0.5130.400", + "Cookie": + "kg_mid=14e57fa8f8bd5163efe72e9847a80e64; kg_dfid=0jICDR1Z9i6c3Xwh3n02F2uD;" + " Hm_lvt_aedee6983d4cfc62f509129360d6bb3d=1668051000,1668243927; kg_dfid_collect=d41d8cd98f00b204e9800998ecf8427e; kg_mid_temp=14e57fa8f8bd5163efe72e9847a80e64; Hm_lpvt_aedee6983d4cfc62f509129360d6bb3d=1668245258", +} +req = requests.get(url=url,headers=headers) +soup = BeautifulSoup(req.content,"html.parser") +text = re.compile(r"global.features =(.*?);$",re.MULTILINE|re.DOTALL) +script = soup.find('script',text=text) +list_text = script.text[(script.text.find('Hash')-2):-5] +m = 0 +list = [] +while(m