You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

89 lines
3.0 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import re
import requests
from fontTools.ttLib import TTFont
from lxml import etree
fp1 = open("类型.txt", "a", encoding="utf-8")
fp2 = open("收藏数.txt", "a", encoding="utf-8")
fp3 = open("作者名.txt", "a", encoding="utf-8")
for i in range(1,2):
#url = (f'https://www.qidian.com/rank/vipcollect?page={i}')
url = ('https://www.qidian.com/rank/vipcollect/')
response =requests.get(url)
html = response.text
# 'w'只写,不存在则创建
with open('解密前.html', mode='w', encoding='utf-8') as f:
f.write(html)
# 代码下载字体文件
with open('解密前.html', mode='r', encoding='utf-8') as f:
html = f.read()
# 正则匹配需要下载的字体文件下载地址
font_url = re.findall("\('eot'\); src: url\('(.*?)'\) format\('woff'\)", html)[0] # 目的是获取字体文件链接
# print(font_url)
#font_url = ('https://qidian.gtimg.com/qd_anti_spider/fTDYLCSL.woff')
# 下载
font_response = requests.get(font_url)
font_path = font_url.split('/')[-1] # 用split将/之间分隔开取最后一个元素将字体文件链接中的piQOMNSL.woff提取出来给font_path命名
with open(font_path, mode='wb') as f: # 二进制
f.write(font_response.content)
# 用fonttools查看字体源码
# font_path = 'piQOMNSL.woff'
fi = TTFont(font_path) # 打开当前目录的font_path文件也就是”piQOMNSL.woff“
fi.saveXML('font.xml') # 另存为font.xml
# 解析到字体,并将英文替换为数字
font_map = fi['cmap'].getBestCmap()
d = {
'one': '1', 'two': '2', 'three': '3', 'four': '4', 'five': '5', 'six': '6', 'seven': '7',
'eight': '8', 'nine': '9', 'zero': '0', 'period': '.'
}
# 将字典的值从英文转换为数字
for key in font_map.keys():
# key:100070
# font_map[key]: 'key'
# d[font_map[key]]: '5'
font_map[key] = d[font_map[key]]
# font_map字典,将网页中的加密内容替换为破解的数字
for key, value in font_map.items():
# print(key,value)
html = html.replace('&#' + str(key) + ';', str(value))
with open('解密后.html', mode='w', encoding='utf_8') as f:
f.write(html)
# 用xpath提取html中解密的信息
html = etree.HTML(open('解密后.html', 'r', encoding='utf-8').read())
#h2是小说类型
h2 = html.xpath('''//*[@id="book-img-text"]/ul/li['j']/div[2]/p[1]/a[2]/text()''')
#h1是小说的收藏量
h1 = html.xpath('''//*[@id="book-img-text"]/ul/li['j']/div[3]/div/p/span/span/text()''')
# h0是小说的作者名
h0 = html.xpath('''//*[@id="book-img-text"]/ul/li['j']/div[2]/p[1]/a[1]/text()''')
h3 = h2
h4 = h1
h = h0
#循环h3列表输出元素
for k in h3[::1]:
k = str(k) + '\n'
fp1.write(k)
#同理
for e in h4[::1]:
e = str(e) + '\n'
fp2.write(e)
for q in h[::1]:
q = str(q) + '\n'
fp3.write(q)