|
|
import re
|
|
|
import csv
|
|
|
import requests
|
|
|
from lxml import etree
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
url = 'https://music.163.com/discover/toplist?id=3778678'
|
|
|
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
|
|
"(KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"}
|
|
|
down_url = 'https://music.163.com/song/media/outer/url?id='
|
|
|
|
|
|
# 发送请求
|
|
|
response = requests.get(url, headers=headers)
|
|
|
|
|
|
# 获取数据
|
|
|
html_data = response.text
|
|
|
|
|
|
# 解析数据
|
|
|
soup = BeautifulSoup(html_data, 'lxml')
|
|
|
|
|
|
# 创建一个空的列表来存储爬取的数据
|
|
|
song_data = []
|
|
|
|
|
|
# 爬取歌名、ID和url
|
|
|
song_list = re.findall('<li><a href="/song\?id=(.*?)">(.*?)</a></li>', html_data)
|
|
|
for song in song_list:
|
|
|
song_id = song[0]
|
|
|
song_name = song[1]
|
|
|
song_url = f'https://music.163.com/song?id={song_id}'
|
|
|
|
|
|
# 爬取歌词信息
|
|
|
lyrics_url = f'https://music.163.com/api/song/lyric?id={song_id}&lv=-1&kv=-1&tv=-1'
|
|
|
lyrics_response = requests.get(lyrics_url, headers=headers)
|
|
|
lyrics_data = lyrics_response.json()
|
|
|
if 'lrc' in lyrics_data and 'lyric' in lyrics_data['lrc']:
|
|
|
# 提取歌词的文本部分,忽略时间戳
|
|
|
lyrics = re.sub(r'\[\d+:\d+\.\d+\]', '', lyrics_data['lrc']['lyric'])
|
|
|
else:
|
|
|
lyrics = '暂无歌词'
|
|
|
|
|
|
print(f'歌名:{song_name}\nID:{song_id}\nURL:{song_url}\n歌词:{lyrics}')
|
|
|
|
|
|
# 将爬取的数据添加到列表中
|
|
|
song_data.append([song_name, song_id, song_url, lyrics])
|
|
|
|
|
|
# 将数据保存到CSV文件中
|
|
|
with open('网易云音乐热歌榜.csv', 'w', newline='', encoding='utf-8') as csvfile:
|
|
|
writer = csv.writer(csvfile)
|
|
|
writer.writerow(['歌名', 'ID', 'URL', '歌词'])
|
|
|
writer.writerows(song_data)
|