You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

190 lines
7.8 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#1.找到未加密的参数
#2.通过函数window.asrsea()进行加密
#2.想办法把参数进行加密params--->encText encSecKey--->encSecKey
import requests
from Crypto.Cipher import AES
from base64 import b64encode
import json
import re
#爬取软件名称:网易云
def get_wyy():
url="https://music.163.com/"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36"}
resp=requests.get(url=url, headers=headers)
#print(resp.text)
obj=re.compile(r"<title>(?P<name>.*?)</title>",re.S)
result = obj.finditer(resp.text)
for it in result:
name = it.group('name')
return name
#歌词信息
def get_lyric(song_id):
headers = {
"user-agent" : "Mozilla/5.0",
"Referer" : "http://music.163.com",
"Host" : "music.163.com"
}
if not isinstance(song_id, str): #判断函数是否是一个已知的类型类似type
song_id = str(song_id)
url = f"http://music.163.com/api/song/lyric?id={song_id}+&lv=1&tv=-1"
try:
resp = requests.get(url, headers=headers)
resp.raise_for_status() #判断返回的Response类型状态是不是200,如果是200他将表示返回的内容是正确的;如果不是200他就会产生一个HttpError的异常.
resp.encoding = resp.apparent_encoding #从网页的响应内容分析编码的方式
json_obj = json.loads(resp.text) #将str类型的数据转成dict
# print(json_obj)
return json_obj["lrc"]["lyric"]
except:
return "访问异常"
e = "010001"
f = "00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7"
g = "0CoJUm6Qyw8W8jud"
i = "0hyFaCNAVzOIdoht"
url = 'https://music.163.com/weapi/comment/resource/comments/get?csrf_token='
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'
}
def get_encSecKey():
return "4022359ea3110bcd034e0160c3b89e5e172fd0110a3cf765d9f366d9fd09840a1f4a4705ac43719fdb8bfeb44d3b92334733061ad10942131184a4dfba0ac9d2cf867b8b6236523c1ca5f44c0d2d82c1c2665a3137a9241c7373539c1aa8e5e9bb9d33dafc764b5d76c2ab34fc94df85e27a934c8a603fa713f2cf38c2b7bbae"
#对数据进行加密
def get_params(data): #data得是json字符串
first = enc_params(data,g)
second = enc_params(first,i)
return second #返回的就是params
def to_16(data):
pad = 16-len(data)%16
data +=chr(pad) * pad
return data
#加密过程
def enc_params(data,key):
iv = "0102030405060708"
data = to_16(data)
aes = AES.new(key=key.encode('utf-8'),IV=iv.encode('utf-8'),mode=AES.MODE_CBC) #创建加密器
bs = aes.encrypt(data.encode('utf-8')) #加密加密的内容长度必须是16的倍数
return str(b64encode(bs),"utf-8") #转化成字符串
#处理加密过程
'''
function a(a = 16) {
var d, e, b = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", c = "";
for (d = 0; a > d; d += 1) #循环16次
e = Math.random() * b.length, #取随机数 3.15154
e = Math.floor(e), #取整 3
c += b.charAt(e); #取字符串中的第e个字符 3
return c #循环16次取b中的字符返回一个值c
}
function b(a, b) { #a为要加密的内容
var c = CryptoJS.enc.Utf8.parse(b) #由下几行可知c为密钥则b为密钥
, d = CryptoJS.enc.Utf8.parse("0102030405060708")
, e = CryptoJS.enc.Utf8.parse(a) #将数据a用utf-8转换
, f = CryptoJS.AES.encrypt(e, c, { #缺少加密密钥从而得知c密钥
iv: d, #AES算法中的偏移量
mode: CryptoJS.mode.CBC #模式CBC
});
return f.toString() #把f转换成字符串返回
}
function c(a, b, c) { #c不产生随机数random
var d, e;
return setMaxDigits(131),
d = new RSAKeyPair(b,"",c),
e = encryptedString(d, a)
}
function d(d, e, f, g) { d:data , e:'010001' , f:'一堆破烂玩意' , g:'0CoJUm6Qyw8W8jud'
var h = {} #空对象
, i = a(16); #i设置成定值
/*
return h.encText = b(d, g),
h.encText = b(h.encText, i),
h.encSecKey = c(i, e, f),
h
/*
#上式逻辑与下式相同
h.encText = b(d, g), #g为密钥
h.encText = b(h.encText, i), #返回的就是params i为密钥
h.encSecKey = c(i, e, f), #得到的就是encSecKey ef为默认值参数i为随机数,将i固定得到一个固定的encSecKey
return h
}
'''
def get_comment():
page = int(input('请输入需要爬取的评论页数:'))
print('开始爬!!!!!')
for j in range(page):
page_num = str(j*20)
data = {
'csrf_token': "",
'cursor': "-1",
'offset': page_num,
'orderType': "1",
'pageNo': "1",
'pageSize': "20",
'rid': "R_SO_4_" + song_id, #查看不同歌曲评论只需要找到对应歌曲的id
'threadId': "R_SO_4_" + song_id #还有我
}
response = requests.post(url,data={
"params":get_params(json.dumps(data)),
"encSecKey":get_encSecKey()
},headers=headers)
print(response.text)
result = json.loads(response.content.decode('utf-8'))
fp = open('./the_song.txt', 'a', encoding='utf-8')
# with open ('./网易云评论.txt', 'w', encoding='utf-8') as fp:
#hotComments
#fp.write('最佳损友' + '\n')
fp.write('hotComments' + '\n')
for hot in range(len(result['data']['hotComments'])):
fp.write('账号:' + str(result['data']['hotComments'][hot]['user']['userId']) + '\n')
fp.write('昵称:' + result['data']['hotComments'][hot]['user']['nickname'] + '\n')
fp.write('评论:' + result['data']['hotComments'][hot]['content'] + '\n')
if result['data']['hotComments'][hot]['user']['vipRights'] == None:
fp.write('vip:yes' + '\n')
else:
fp.write('vip:no' + '\n')
fp.write('点赞数' + str(result['data']['hotComments'][hot]['likedCount']) + '\n')
fp.write('-------------------------------------' + '\n')
#comments
fp.write('comments' + '\n')
for r in range(20):
fp.write('昵称:'+result['data']['comments'][r]['user']['nickname']+'\n')
fp.write('评论:'+result['data']['comments'][r]['content']+'\n')
if result['data']['comments'][r]['user']['vipRights'] == None:
fp.write('vip: No'+'\n')
else:
fp.write('vip: Yes'+'\n')
fp.write('点赞数:'+str(result['data']['comments'][r]['likedCount'])+'\n')
fp.write('-------------------------------------'+'\n')
fp.close()
print('爬取完毕!!!')
if __name__ == '__main__':
song_id = input('请输入想看的歌曲ID呢在歌曲网址的最后面有一串id=......:')
lyric = get_lyric(song_id)
name = get_wyy()
with open("./the_song.txt", "w", encoding='utf-8') as file:
file.writelines(name + '\n')
file.writelines(lyric + '\n')
get_comment()