ADD file via upload

4 years ago · 233d2e1be6
parent 97ef9406b7
commit 233d2e1be6
1 changed files with 189 additions and 0 deletions
--- a/fin.py
+++ b/fin.py
@ -0,0 +1,189 @@
+#1.找到未加密的参数
+#2.通过函数window.asrsea()进行加密
+#2.想办法把参数进行加密，params--->encText  encSecKey--->encSecKey
+
+import requests
+from Crypto.Cipher import AES
+from base64 import b64encode
+import json
+import re
+
+#爬取软件名称：网易云
+def get_wyy():
+    url="https://music.163.com/"
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36"}
+    resp=requests.get(url=url, headers=headers)
+    #print(resp.text)
+    obj=re.compile(r"<title>(?P<name>.*?)</title>",re.S)
+    result = obj.finditer(resp.text)
+    for it in result:
+        name = it.group('name')
+    return name
+
+#歌词信息
+def get_lyric(song_id):
+    headers = {
+        "user-agent" : "Mozilla/5.0",
+        "Referer" : "http://music.163.com",
+        "Host" : "music.163.com"
+    }
+    if not isinstance(song_id, str):         #判断函数是否是一个已知的类型，类似type（）
+        song_id = str(song_id)
+    url = f"http://music.163.com/api/song/lyric?id={song_id}+&lv=1&tv=-1"
+    try:
+        resp = requests.get(url, headers=headers)
+        resp.raise_for_status()                   #判断返回的Response类型状态是不是200,如果是200，他将表示返回的内容是正确的;如果不是200，他就会产生一个HttpError的异常.
+        resp.encoding = resp.apparent_encoding    #从网页的响应内容分析编码的方式
+        json_obj = json.loads(resp.text)          #将str类型的数据转成dict
+    #   print(json_obj)
+        return json_obj["lrc"]["lyric"]
+    except:
+        return "访问异常"
+
+
+e = "010001"
+f = "00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7"
+g = "0CoJUm6Qyw8W8jud"
+i = "0hyFaCNAVzOIdoht"
+
+url = 'https://music.163.com/weapi/comment/resource/comments/get?csrf_token='
+headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'
+    }
+
+def get_encSecKey():
+    return "4022359ea3110bcd034e0160c3b89e5e172fd0110a3cf765d9f366d9fd09840a1f4a4705ac43719fdb8bfeb44d3b92334733061ad10942131184a4dfba0ac9d2cf867b8b6236523c1ca5f44c0d2d82c1c2665a3137a9241c7373539c1aa8e5e9bb9d33dafc764b5d76c2ab34fc94df85e27a934c8a603fa713f2cf38c2b7bbae"
+
+#对数据进行加密
+def get_params(data):        #data得是json字符串
+    first = enc_params(data,g)
+    second = enc_params(first,i)
+    return second            #返回的就是params
+
+def to_16(data):
+    pad = 16-len(data)%16
+    data +=chr(pad) * pad
+    return data
+
+#加密过程
+def enc_params(data,key):
+    iv = "0102030405060708"
+    data = to_16(data)
+    aes = AES.new(key=key.encode('utf-8'),IV=iv.encode('utf-8'),mode=AES.MODE_CBC) #创建加密器
+    bs = aes.encrypt(data.encode('utf-8'))         #加密，加密的内容长度必须是16的倍数，
+    return str(b64encode(bs),"utf-8")              #转化成字符串
+
+#处理加密过程
+'''
+function a(a = 16) { ↓
+    var d, e, b = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", c = "";
+    for (d = 0; a > d; d += 1)               #循环16次
+        e = Math.random() * b.length,        #取随机数   3.15154
+        e = Math.floor(e),                   #取整      3
+        c += b.charAt(e);                    #取字符串中的第e个字符    3
+    return c                                 #循环16次取b中的字符返回一个值c
+}
+function b(a, b) {  #a为要加密的内容
+    var c = CryptoJS.enc.Utf8.parse(b)      #由下几行可知c为密钥，则b为密钥
+      , d = CryptoJS.enc.Utf8.parse("0102030405060708")
+      , e = CryptoJS.enc.Utf8.parse(a)      #将数据a用utf-8转换
+      , f = CryptoJS.AES.encrypt(e, c, {    #缺少加密密钥，从而得知c密钥
+        iv: d,                              #AES算法中的偏移量
+        mode: CryptoJS.mode.CBC             #模式：CBC
+    });
+    return f.toString()                     #把f转换成字符串返回
+}
+function c(a, b, c) {      #c不产生随机数random
+    var d, e;
+    return setMaxDigits(131),
+    d = new RSAKeyPair(b,"",c),
+    e = encryptedString(d, a)
+}
+function d(d, e, f, g) {        d:data , e:'010001' , f:'一堆破烂玩意' , g:'0CoJUm6Qyw8W8jud'
+    var h = {}                 #空对象
+      , i = a(16);             #i设置成定值
+/*
+    return h.encText = b(d, g),
+    h.encText = b(h.encText, i),
+    h.encSecKey = c(i, e, f),
+    h
+/*
+    #上式逻辑与下式相同
+    h.encText = b(d, g),            #g为密钥          
+    h.encText = b(h.encText, i),    #返回的就是params      i为密钥
+    h.encSecKey = c(i, e, f),       #得到的就是encSecKey      e，f为默认值参数，i为随机数,将i固定得到一个固定的encSecKey
+    return h
+}
+'''
+
+def get_comment():
+    page = int(input('请输入需要爬取的评论页数：'))
+    print('开始爬！！！！！')
+    for j in range(page):
+        page_num = str(j*20)
+        data = {
+            'csrf_token': "",
+            'cursor': "-1",
+            'offset': page_num,
+            'orderType': "1",
+            'pageNo': "1",
+            'pageSize': "20",
+            'rid': "R_SO_4_" + song_id,          #查看不同歌曲评论只需要找到对应歌曲的id
+            'threadId': "R_SO_4_" + song_id      #还有我
+        }
+
+        response = requests.post(url,data={
+            "params":get_params(json.dumps(data)),
+            "encSecKey":get_encSecKey()
+        },headers=headers)
+        print(response.text)
+
+        result = json.loads(response.content.decode('utf-8'))
+        fp = open('./歌曲信息.txt', 'a', encoding='utf-8')
+        # with open ('./网易云评论.txt', 'w', encoding='utf-8') as fp:
+
+        #hotComments
+        #fp.write('最佳损友' + '\n')
+        fp.write('hotComments' + '\n')
+        for hot in range(len(result['data']['hotComments'])):
+            fp.write('账号：' + str(result['data']['hotComments'][hot]['user']['userId']) + '\n')
+            fp.write('昵称：' + result['data']['hotComments'][hot]['user']['nickname'] + '\n')
+            fp.write('评论：' + result['data']['hotComments'][hot]['content'] + '\n')
+
+            if result['data']['hotComments'][hot]['user']['vipRights'] == None:
+                fp.write('vip:yes' + '\n')
+            else:
+                fp.write('vip:no' + '\n')
+            fp.write('点赞数' + str(result['data']['hotComments'][hot]['likedCount']) + '\n')
+            fp.write('-------------------------------------' + '\n')
+
+
+        #comments
+        fp.write('comments' + '\n')
+        for r in range(20):
+            fp.write('昵称：'+result['data']['comments'][r]['user']['nickname']+'\n')
+            fp.write('评论：'+result['data']['comments'][r]['content']+'\n')
+
+            if result['data']['comments'][r]['user']['vipRights'] == None:
+                fp.write('vip: No'+'\n')
+            else:
+                fp.write('vip: Yes'+'\n')
+            fp.write('点赞数：'+str(result['data']['comments'][r]['likedCount'])+'\n')
+            fp.write('-------------------------------------'+'\n')
+
+    fp.close()
+    print('爬取完毕！！！')
+
+
+if __name__ == '__main__':
+    song_id = input('请输入想看的歌曲ID呢（在歌曲网址的最后面有一串id=......）:')
+    lyric = get_lyric(song_id)
+    name = get_wyy()
+    with open("./歌曲信息.txt", "w", encoding='utf-8') as file:
+        file.writelines(name + '\n')
+        file.writelines(lyric + '\n')
+    get_comment()
+
+
+