diff --git a/fin.py b/fin.py deleted file mode 100644 index 3add0a6..0000000 --- a/fin.py +++ /dev/null @@ -1,189 +0,0 @@ -#1.找到未加密的参数 -#2.通过函数window.asrsea()进行加密 -#2.想办法把参数进行加密,params--->encText encSecKey--->encSecKey - -import requests -from Crypto.Cipher import AES -from base64 import b64encode -import json -import re - -#爬取软件名称:网易云 -def get_wyy(): - url="https://music.163.com/" - headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36"} - resp=requests.get(url=url, headers=headers) - #print(resp.text) - obj=re.compile(r"(?P<name>.*?)",re.S) - result = obj.finditer(resp.text) - for it in result: - name = it.group('name') - return name - -#歌词信息 -def get_lyric(song_id): - headers = { - "user-agent" : "Mozilla/5.0", - "Referer" : "http://music.163.com", - "Host" : "music.163.com" - } - if not isinstance(song_id, str): #判断函数是否是一个已知的类型,类似type() - song_id = str(song_id) - url = f"http://music.163.com/api/song/lyric?id={song_id}+&lv=1&tv=-1" - try: - resp = requests.get(url, headers=headers) - resp.raise_for_status() #判断返回的Response类型状态是不是200,如果是200,他将表示返回的内容是正确的;如果不是200,他就会产生一个HttpError的异常. - resp.encoding = resp.apparent_encoding #从网页的响应内容分析编码的方式 - json_obj = json.loads(resp.text) #将str类型的数据转成dict - # print(json_obj) - return json_obj["lrc"]["lyric"] - except: - return "访问异常" - - -e = "010001" -f = "00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7" -g = "0CoJUm6Qyw8W8jud" -i = "0hyFaCNAVzOIdoht" - -url = 'https://music.163.com/weapi/comment/resource/comments/get?csrf_token=' -headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36' - } - -def get_encSecKey(): - return "4022359ea3110bcd034e0160c3b89e5e172fd0110a3cf765d9f366d9fd09840a1f4a4705ac43719fdb8bfeb44d3b92334733061ad10942131184a4dfba0ac9d2cf867b8b6236523c1ca5f44c0d2d82c1c2665a3137a9241c7373539c1aa8e5e9bb9d33dafc764b5d76c2ab34fc94df85e27a934c8a603fa713f2cf38c2b7bbae" - -#对数据进行加密 -def get_params(data): #data得是json字符串 - first = enc_params(data,g) - second = enc_params(first,i) - return second #返回的就是params - -def to_16(data): - pad = 16-len(data)%16 - data +=chr(pad) * pad - return data - -#加密过程 -def enc_params(data,key): - iv = "0102030405060708" - data = to_16(data) - aes = AES.new(key=key.encode('utf-8'),IV=iv.encode('utf-8'),mode=AES.MODE_CBC) #创建加密器 - bs = aes.encrypt(data.encode('utf-8')) #加密,加密的内容长度必须是16的倍数, - return str(b64encode(bs),"utf-8") #转化成字符串 - -#处理加密过程 -''' -function a(a = 16) { ↓ - var d, e, b = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", c = ""; - for (d = 0; a > d; d += 1) #循环16次 - e = Math.random() * b.length, #取随机数 3.15154 - e = Math.floor(e), #取整 3 - c += b.charAt(e); #取字符串中的第e个字符 3 - return c #循环16次取b中的字符返回一个值c -} -function b(a, b) { #a为要加密的内容 - var c = CryptoJS.enc.Utf8.parse(b) #由下几行可知c为密钥,则b为密钥 - , d = CryptoJS.enc.Utf8.parse("0102030405060708") - , e = CryptoJS.enc.Utf8.parse(a) #将数据a用utf-8转换 - , f = CryptoJS.AES.encrypt(e, c, { #缺少加密密钥,从而得知c密钥 - iv: d, #AES算法中的偏移量 - mode: CryptoJS.mode.CBC #模式:CBC - }); - return f.toString() #把f转换成字符串返回 -} -function c(a, b, c) { #c不产生随机数random - var d, e; - return setMaxDigits(131), - d = new RSAKeyPair(b,"",c), - e = encryptedString(d, a) -} -function d(d, e, f, g) { d:data , e:'010001' , f:'一堆破烂玩意' , g:'0CoJUm6Qyw8W8jud' - var h = {} #空对象 - , i = a(16); #i设置成定值 -/* - return h.encText = b(d, g), - h.encText = b(h.encText, i), - h.encSecKey = c(i, e, f), - h -/* - #上式逻辑与下式相同 - h.encText = b(d, g), #g为密钥 - h.encText = b(h.encText, i), #返回的就是params i为密钥 - h.encSecKey = c(i, e, f), #得到的就是encSecKey e,f为默认值参数,i为随机数,将i固定得到一个固定的encSecKey - return h -} -''' - -def get_comment(): - page = int(input('请输入需要爬取的评论页数:')) - print('开始爬!!!!!') - for j in range(page): - page_num = str(j*20) - data = { - 'csrf_token': "", - 'cursor': "-1", - 'offset': page_num, - 'orderType': "1", - 'pageNo': "1", - 'pageSize': "20", - 'rid': "R_SO_4_" + song_id, #查看不同歌曲评论只需要找到对应歌曲的id - 'threadId': "R_SO_4_" + song_id #还有我 - } - - response = requests.post(url,data={ - "params":get_params(json.dumps(data)), - "encSecKey":get_encSecKey() - },headers=headers) - print(response.text) - - result = json.loads(response.content.decode('utf-8')) - fp = open('./歌曲信息.txt', 'a', encoding='utf-8') - # with open ('./网易云评论.txt', 'w', encoding='utf-8') as fp: - - #hotComments - #fp.write('最佳损友' + '\n') - fp.write('hotComments' + '\n') - for hot in range(len(result['data']['hotComments'])): - fp.write('账号:' + str(result['data']['hotComments'][hot]['user']['userId']) + '\n') - fp.write('昵称:' + result['data']['hotComments'][hot]['user']['nickname'] + '\n') - fp.write('评论:' + result['data']['hotComments'][hot]['content'] + '\n') - - if result['data']['hotComments'][hot]['user']['vipRights'] == None: - fp.write('vip:yes' + '\n') - else: - fp.write('vip:no' + '\n') - fp.write('点赞数' + str(result['data']['hotComments'][hot]['likedCount']) + '\n') - fp.write('-------------------------------------' + '\n') - - - #comments - fp.write('comments' + '\n') - for r in range(20): - fp.write('昵称:'+result['data']['comments'][r]['user']['nickname']+'\n') - fp.write('评论:'+result['data']['comments'][r]['content']+'\n') - - if result['data']['comments'][r]['user']['vipRights'] == None: - fp.write('vip: No'+'\n') - else: - fp.write('vip: Yes'+'\n') - fp.write('点赞数:'+str(result['data']['comments'][r]['likedCount'])+'\n') - fp.write('-------------------------------------'+'\n') - - fp.close() - print('爬取完毕!!!') - - -if __name__ == '__main__': - song_id = input('请输入想看的歌曲ID呢(在歌曲网址的最后面有一串id=......):') - lyric = get_lyric(song_id) - name = get_wyy() - with open("./歌曲信息.txt", "w", encoding='utf-8') as file: - file.writelines(name + '\n') - file.writelines(lyric + '\n') - get_comment() - - -