|
|
#1.找到未加密的参数
|
|
|
#2.通过函数window.asrsea()进行加密
|
|
|
#2.想办法把参数进行加密,params--->encText encSecKey--->encSecKey
|
|
|
|
|
|
import requests
|
|
|
from Crypto.Cipher import AES
|
|
|
from base64 import b64encode
|
|
|
import json
|
|
|
import re
|
|
|
|
|
|
#爬取软件名称:网易云
|
|
|
def get_wyy():
|
|
|
url="https://music.163.com/"
|
|
|
headers = {
|
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36"}
|
|
|
resp=requests.get(url=url, headers=headers)
|
|
|
#print(resp.text)
|
|
|
obj=re.compile(r"<title>(?P<name>.*?)</title>",re.S)
|
|
|
result = obj.finditer(resp.text)
|
|
|
for it in result:
|
|
|
name = it.group('name')
|
|
|
return name
|
|
|
|
|
|
#歌词信息
|
|
|
def get_lyric(song_id):
|
|
|
headers = {
|
|
|
"user-agent" : "Mozilla/5.0",
|
|
|
"Referer" : "http://music.163.com",
|
|
|
"Host" : "music.163.com"
|
|
|
}
|
|
|
if not isinstance(song_id, str): #判断函数是否是一个已知的类型,类似type()
|
|
|
song_id = str(song_id)
|
|
|
url = f"http://music.163.com/api/song/lyric?id={song_id}+&lv=1&tv=-1"
|
|
|
try:
|
|
|
resp = requests.get(url, headers=headers)
|
|
|
resp.raise_for_status() #判断返回的Response类型状态是不是200,如果是200,他将表示返回的内容是正确的;如果不是200,他就会产生一个HttpError的异常.
|
|
|
resp.encoding = resp.apparent_encoding #从网页的响应内容分析编码的方式
|
|
|
json_obj = json.loads(resp.text) #将str类型的数据转成dict
|
|
|
# print(json_obj)
|
|
|
return json_obj["lrc"]["lyric"]
|
|
|
except:
|
|
|
return "访问异常"
|
|
|
|
|
|
|
|
|
e = "010001"
|
|
|
f = "00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7"
|
|
|
g = "0CoJUm6Qyw8W8jud"
|
|
|
i = "0hyFaCNAVzOIdoht"
|
|
|
|
|
|
url = 'https://music.163.com/weapi/comment/resource/comments/get?csrf_token='
|
|
|
headers = {
|
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'
|
|
|
}
|
|
|
|
|
|
def get_encSecKey():
|
|
|
return "4022359ea3110bcd034e0160c3b89e5e172fd0110a3cf765d9f366d9fd09840a1f4a4705ac43719fdb8bfeb44d3b92334733061ad10942131184a4dfba0ac9d2cf867b8b6236523c1ca5f44c0d2d82c1c2665a3137a9241c7373539c1aa8e5e9bb9d33dafc764b5d76c2ab34fc94df85e27a934c8a603fa713f2cf38c2b7bbae"
|
|
|
|
|
|
#对数据进行加密
|
|
|
def get_params(data): #data得是json字符串
|
|
|
first = enc_params(data,g)
|
|
|
second = enc_params(first,i)
|
|
|
return second #返回的就是params
|
|
|
|
|
|
def to_16(data):
|
|
|
pad = 16-len(data)%16
|
|
|
data +=chr(pad) * pad
|
|
|
return data
|
|
|
|
|
|
#加密过程
|
|
|
def enc_params(data,key):
|
|
|
iv = "0102030405060708"
|
|
|
data = to_16(data)
|
|
|
aes = AES.new(key=key.encode('utf-8'),IV=iv.encode('utf-8'),mode=AES.MODE_CBC) #创建加密器
|
|
|
bs = aes.encrypt(data.encode('utf-8')) #加密,加密的内容长度必须是16的倍数,
|
|
|
return str(b64encode(bs),"utf-8") #转化成字符串
|
|
|
|
|
|
#处理加密过程
|
|
|
'''
|
|
|
function a(a = 16) { ↓
|
|
|
var d, e, b = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", c = "";
|
|
|
for (d = 0; a > d; d += 1) #循环16次
|
|
|
e = Math.random() * b.length, #取随机数 3.15154
|
|
|
e = Math.floor(e), #取整 3
|
|
|
c += b.charAt(e); #取字符串中的第e个字符 3
|
|
|
return c #循环16次取b中的字符返回一个值c
|
|
|
}
|
|
|
function b(a, b) { #a为要加密的内容
|
|
|
var c = CryptoJS.enc.Utf8.parse(b) #由下几行可知c为密钥,则b为密钥
|
|
|
, d = CryptoJS.enc.Utf8.parse("0102030405060708")
|
|
|
, e = CryptoJS.enc.Utf8.parse(a) #将数据a用utf-8转换
|
|
|
, f = CryptoJS.AES.encrypt(e, c, { #缺少加密密钥,从而得知c密钥
|
|
|
iv: d, #AES算法中的偏移量
|
|
|
mode: CryptoJS.mode.CBC #模式:CBC
|
|
|
});
|
|
|
return f.toString() #把f转换成字符串返回
|
|
|
}
|
|
|
function c(a, b, c) { #c不产生随机数random
|
|
|
var d, e;
|
|
|
return setMaxDigits(131),
|
|
|
d = new RSAKeyPair(b,"",c),
|
|
|
e = encryptedString(d, a)
|
|
|
}
|
|
|
function d(d, e, f, g) { d:data , e:'010001' , f:'一堆破烂玩意' , g:'0CoJUm6Qyw8W8jud'
|
|
|
var h = {} #空对象
|
|
|
, i = a(16); #i设置成定值
|
|
|
/*
|
|
|
return h.encText = b(d, g),
|
|
|
h.encText = b(h.encText, i),
|
|
|
h.encSecKey = c(i, e, f),
|
|
|
h
|
|
|
/*
|
|
|
#上式逻辑与下式相同
|
|
|
h.encText = b(d, g), #g为密钥
|
|
|
h.encText = b(h.encText, i), #返回的就是params i为密钥
|
|
|
h.encSecKey = c(i, e, f), #得到的就是encSecKey e,f为默认值参数,i为随机数,将i固定得到一个固定的encSecKey
|
|
|
return h
|
|
|
}
|
|
|
'''
|
|
|
|
|
|
def get_comment():
|
|
|
page = int(input('请输入需要爬取的评论页数:'))
|
|
|
print('开始爬!!!!!')
|
|
|
for j in range(page):
|
|
|
page_num = str(j*20)
|
|
|
data = {
|
|
|
'csrf_token': "",
|
|
|
'cursor': "-1",
|
|
|
'offset': page_num,
|
|
|
'orderType': "1",
|
|
|
'pageNo': "1",
|
|
|
'pageSize': "20",
|
|
|
'rid': "R_SO_4_" + song_id, #查看不同歌曲评论只需要找到对应歌曲的id
|
|
|
'threadId': "R_SO_4_" + song_id #还有我
|
|
|
}
|
|
|
|
|
|
response = requests.post(url,data={
|
|
|
"params":get_params(json.dumps(data)),
|
|
|
"encSecKey":get_encSecKey()
|
|
|
},headers=headers)
|
|
|
print(response.text)
|
|
|
|
|
|
result = json.loads(response.content.decode('utf-8'))
|
|
|
fp = open('./the_song.txt', 'a', encoding='utf-8')
|
|
|
# with open ('./网易云评论.txt', 'w', encoding='utf-8') as fp:
|
|
|
|
|
|
#hotComments
|
|
|
#fp.write('最佳损友' + '\n')
|
|
|
fp.write('hotComments' + '\n')
|
|
|
for hot in range(len(result['data']['hotComments'])):
|
|
|
fp.write('账号:' + str(result['data']['hotComments'][hot]['user']['userId']) + '\n')
|
|
|
fp.write('昵称:' + result['data']['hotComments'][hot]['user']['nickname'] + '\n')
|
|
|
fp.write('评论:' + result['data']['hotComments'][hot]['content'] + '\n')
|
|
|
|
|
|
if result['data']['hotComments'][hot]['user']['vipRights'] == None:
|
|
|
fp.write('vip:yes' + '\n')
|
|
|
else:
|
|
|
fp.write('vip:no' + '\n')
|
|
|
fp.write('点赞数' + str(result['data']['hotComments'][hot]['likedCount']) + '\n')
|
|
|
fp.write('-------------------------------------' + '\n')
|
|
|
|
|
|
|
|
|
#comments
|
|
|
fp.write('comments' + '\n')
|
|
|
for r in range(20):
|
|
|
fp.write('昵称:'+result['data']['comments'][r]['user']['nickname']+'\n')
|
|
|
fp.write('评论:'+result['data']['comments'][r]['content']+'\n')
|
|
|
|
|
|
if result['data']['comments'][r]['user']['vipRights'] == None:
|
|
|
fp.write('vip: No'+'\n')
|
|
|
else:
|
|
|
fp.write('vip: Yes'+'\n')
|
|
|
fp.write('点赞数:'+str(result['data']['comments'][r]['likedCount'])+'\n')
|
|
|
fp.write('-------------------------------------'+'\n')
|
|
|
|
|
|
fp.close()
|
|
|
print('爬取完毕!!!')
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
song_id = input('请输入想看的歌曲ID呢(在歌曲网址的最后面有一串id=......):')
|
|
|
lyric = get_lyric(song_id)
|
|
|
name = get_wyy()
|
|
|
with open("./the_song.txt", "w", encoding='utf-8') as file:
|
|
|
file.writelines(name + '\n')
|
|
|
file.writelines(lyric + '\n')
|
|
|
get_comment()
|
|
|
|
|
|
|
|
|
|