diff --git a/music.py b/music.py new file mode 100644 index 0000000..04df920 --- /dev/null +++ b/music.py @@ -0,0 +1,50 @@ +import re +import csv +import requests +from lxml import etree +from bs4 import BeautifulSoup + +url = 'https://music.163.com/discover/toplist?id=3778678' +headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"} +down_url = 'https://music.163.com/song/media/outer/url?id=' + +# 发送请求 +response = requests.get(url, headers=headers) + +# 获取数据 +html_data = response.text + +# 解析数据 +soup = BeautifulSoup(html_data, 'lxml') + +# 创建一个空的列表来存储爬取的数据 +song_data = [] + +# 爬取歌名、ID和url +song_list = re.findall('
  • (.*?)
  • ', html_data) +for song in song_list: + song_id = song[0] + song_name = song[1] + song_url = f'https://music.163.com/song?id={song_id}' + + # 爬取歌词信息 + lyrics_url = f'https://music.163.com/api/song/lyric?id={song_id}&lv=-1&kv=-1&tv=-1' + lyrics_response = requests.get(lyrics_url, headers=headers) + lyrics_data = lyrics_response.json() + if 'lrc' in lyrics_data and 'lyric' in lyrics_data['lrc']: + # 提取歌词的文本部分,忽略时间戳 + lyrics = re.sub(r'\[\d+:\d+\.\d+\]', '', lyrics_data['lrc']['lyric']) + else: + lyrics = '暂无歌词' + + print(f'歌名:{song_name}\nID:{song_id}\nURL:{song_url}\n歌词:{lyrics}') + + # 将爬取的数据添加到列表中 + song_data.append([song_name, song_id, song_url, lyrics]) + +# 将数据保存到CSV文件中 +with open('网易云音乐热歌榜.csv', 'w', newline='', encoding='utf-8') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['歌名', 'ID', 'URL', '歌词']) + writer.writerows(song_data)