From a7b5119ebc7ad5e3933dbd5b00615369bdf36cad Mon Sep 17 00:00:00 2001 From: patigy9v2 <2403862878@qq.com> Date: Tue, 18 Jun 2024 15:12:38 +0800 Subject: [PATCH] ADD file via upload --- xiangmu1.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 xiangmu1.py diff --git a/xiangmu1.py b/xiangmu1.py new file mode 100644 index 0000000..9097912 --- /dev/null +++ b/xiangmu1.py @@ -0,0 +1,35 @@ +import csv +from bs4 import BeautifulSoup +import requests + +def match_songs(page): + url = f'https://www.kugou.com/yy/rank/home/{page}-8888.html' + response = requests.get(url, headers=headers) + html_content = response.text + + soup = BeautifulSoup(html_content, 'html.parser') + page_songs = [] + for li in soup.find_all('li', {'title': True}): + rank = li.find('span', class_='pc_temp_num').get_text(strip=True).strip('.') + full_title = li.get('title') + if '8' in rank: + parts = full_title.split(' - ') + if len(parts) >= 2: + artist = parts[0] + song = ' - '.join(parts[1:]) + page_songs.append({"排名": rank, "歌曲": song, "歌手": artist})#如果如果标题包含多个部分,则将除第一部分外的其他部分连接起来作为歌曲名称。 + return page_songs + +headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' +} + +songs = [] +for page in range(1, 24): songs.extend(match_songs(page))#循环遍历页面编号从1到23的每个页面,并调用match_songs()函数来抓取每个页面的歌曲信息。将抓取到的歌曲信息添加到songs列表中。 + +csv_file = '酷狗排行榜含8的歌曲.csv' +with open(csv_file, 'w', newline='', encoding='utf-8-sig') as file: + writer = csv.DictWriter(file, fieldnames=["排名", "歌曲", "歌手"]) + writer.writeheader()#调用writeheader()方法将字段名称写入CSV文件的第一行 + for song in songs: + writer.writerow(song)