You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.
import csv
from bs4 import BeautifulSoup
import requests
def match_songs ( page ) :
url = f ' https://www.kugou.com/yy/rank/home/ { page } -8888.html '
response = requests . get ( url , headers = headers )
html_content = response . text
soup = BeautifulSoup ( html_content , ' html.parser ' )
page_songs = [ ]
for li in soup . find_all ( ' li ' , { ' title ' : True } ) :
rank = li . find ( ' span ' , class_ = ' pc_temp_num ' ) . get_text ( strip = True ) . strip ( ' . ' )
full_title = li . get ( ' title ' )
if ' 8 ' in rank :
parts = full_title . split ( ' - ' )
if len ( parts ) > = 2 :
artist = parts [ 0 ]
song = ' - ' . join ( parts [ 1 : ] )
page_songs . append ( { " 排名 " : rank , " 歌曲 " : song , " 歌手 " : artist } ) #如果如果标题包含多个部分,则将除第一部分外的其他部分连接起来作为歌曲名称。
return page_songs
headers = {
' User-Agent ' : ' Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3 '
}
songs = [ ]
for page in range ( 1 , 24 ) : songs . extend ( match_songs ( page ) ) #循环遍历页面编号从1到23的每个页面, 并调用match_songs()函数来抓取每个页面的歌曲信息。将抓取到的歌曲信息添加到songs列表中。
csv_file = ' 酷狗排行榜含8的歌曲.csv '
with open ( csv_file , ' w ' , newline = ' ' , encoding = ' utf-8-sig ' ) as file :
writer = csv . DictWriter ( file , fieldnames = [ " 排名 " , " 歌曲 " , " 歌手 " ] )
writer . writeheader ( ) #调用writeheader()方法将字段名称写入CSV文件的第一行
for song in songs :
writer . writerow ( song )