|
|
@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
import csv
|
|
|
|
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
|
|
import requests
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def match_songs(page):
|
|
|
|
|
|
|
|
url = f'https://www.kugou.com/yy/rank/home/{page}-8888.html'
|
|
|
|
|
|
|
|
response = requests.get(url, headers=headers)
|
|
|
|
|
|
|
|
html_content = response.text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
soup = BeautifulSoup(html_content, 'html.parser')
|
|
|
|
|
|
|
|
page_songs = []
|
|
|
|
|
|
|
|
for li in soup.find_all('li', {'title': True}):
|
|
|
|
|
|
|
|
rank = li.find('span', class_='pc_temp_num').get_text(strip=True).strip('.')
|
|
|
|
|
|
|
|
full_title = li.get('title')
|
|
|
|
|
|
|
|
if '8' in rank:
|
|
|
|
|
|
|
|
parts = full_title.split(' - ')
|
|
|
|
|
|
|
|
if len(parts) >= 2:
|
|
|
|
|
|
|
|
artist = parts[0]
|
|
|
|
|
|
|
|
song = ' - '.join(parts[1:])
|
|
|
|
|
|
|
|
page_songs.append({"排名": rank, "歌曲": song, "歌手": artist})#如果如果标题包含多个部分,则将除第一部分外的其他部分连接起来作为歌曲名称。
|
|
|
|
|
|
|
|
return page_songs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
headers = {
|
|
|
|
|
|
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
songs = []
|
|
|
|
|
|
|
|
for page in range(1, 24): songs.extend(match_songs(page))#循环遍历页面编号从1到23的每个页面,并调用match_songs()函数来抓取每个页面的歌曲信息。将抓取到的歌曲信息添加到songs列表中。
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
csv_file = '酷狗排行榜含8的歌曲.csv'
|
|
|
|
|
|
|
|
with open(csv_file, 'w', newline='', encoding='utf-8-sig') as file:
|
|
|
|
|
|
|
|
writer = csv.DictWriter(file, fieldnames=["排名", "歌曲", "歌手"])
|
|
|
|
|
|
|
|
writer.writeheader()#调用writeheader()方法将字段名称写入CSV文件的第一行
|
|
|
|
|
|
|
|
for song in songs:
|
|
|
|
|
|
|
|
writer.writerow(song)
|