parent
2269059bba
commit
8ff818ba5b
@ -0,0 +1,48 @@
|
|||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import time
|
||||||
|
import csv
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get_info(url):
|
||||||
|
web_data = requests.get(url, headers=headers)
|
||||||
|
web_data.raise_for_status()
|
||||||
|
soup = BeautifulSoup(web_data.text, 'lxml')
|
||||||
|
|
||||||
|
ranks = soup.select('span.pc_temp_num')
|
||||||
|
titles = soup.select('div.pc_temp_songlist > ul > li > a')
|
||||||
|
times = soup.select('span.pc_temp_tips_r > span')
|
||||||
|
|
||||||
|
songs_data = []
|
||||||
|
|
||||||
|
for rank, title, time in zip(ranks, titles, times):
|
||||||
|
data = {
|
||||||
|
"rank": rank.get_text().strip(),
|
||||||
|
"song": title.get_text().replace("\n", "").replace("\t", "").split('-')[0],
|
||||||
|
"singer": title.get_text().replace("\n", "").replace("\t", "").split('-')[1],
|
||||||
|
"time": time.get_text().strip()
|
||||||
|
}
|
||||||
|
songs_data.append(data)
|
||||||
|
|
||||||
|
return songs_data
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
urls = ["https://www.kugou.com/yy/rank/home/{}-8888.html".format(str(i)) for i in range(1, 24)]
|
||||||
|
|
||||||
|
with open('qqmusic_top_songs.csv', 'w', newline='', encoding='utf-8') as csvfile:
|
||||||
|
fieldnames = ["rank", "song", "singer", "time"]
|
||||||
|
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||||||
|
|
||||||
|
writer.writeheader()
|
||||||
|
|
||||||
|
for url in urls:
|
||||||
|
songs_data = get_info(url)
|
||||||
|
for song in songs_data:
|
||||||
|
writer.writerow(song)
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
print("所有歌曲数据已保存到CSV文件中")
|
Loading…
Reference in new issue