You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

89 lines
3.0 KiB

3 years ago
import re
import requests
from fontTools.ttLib import TTFont
from lxml import etree
fp1 = open("类型.txt", "a", encoding="utf-8")
fp2 = open("收藏数.txt", "a", encoding="utf-8")
fp3 = open("作者名.txt", "a", encoding="utf-8")
for i in range(1,2):
#url = (f'https://www.qidian.com/rank/vipcollect?page={i}')
url = ('https://www.qidian.com/rank/vipcollect/')
response =requests.get(url)
html = response.text
# 'w'只写,不存在则创建
with open('解密前.html', mode='w', encoding='utf-8') as f:
f.write(html)
# 代码下载字体文件
with open('解密前.html', mode='r', encoding='utf-8') as f:
html = f.read()
# 正则匹配需要下载的字体文件下载地址
font_url = re.findall("\('eot'\); src: url\('(.*?)'\) format\('woff'\)", html)[0] # 目的是获取字体文件链接
# print(font_url)
#font_url = ('https://qidian.gtimg.com/qd_anti_spider/fTDYLCSL.woff')
# 下载
font_response = requests.get(font_url)
font_path = font_url.split('/')[-1] # 用split将/之间分隔开取最后一个元素将字体文件链接中的piQOMNSL.woff提取出来给font_path命名
with open(font_path, mode='wb') as f: # 二进制
f.write(font_response.content)
# 用fonttools查看字体源码
# font_path = 'piQOMNSL.woff'
fi = TTFont(font_path) # 打开当前目录的font_path文件也就是”piQOMNSL.woff“
fi.saveXML('font.xml') # 另存为font.xml
# 解析到字体,并将英文替换为数字
font_map = fi['cmap'].getBestCmap()
d = {
'one': '1', 'two': '2', 'three': '3', 'four': '4', 'five': '5', 'six': '6', 'seven': '7',
'eight': '8', 'nine': '9', 'zero': '0', 'period': '.'
}
# 将字典的值从英文转换为数字
for key in font_map.keys():
# key:100070
# font_map[key]: 'key'
# d[font_map[key]]: '5'
font_map[key] = d[font_map[key]]
# font_map字典,将网页中的加密内容替换为破解的数字
for key, value in font_map.items():
# print(key,value)
html = html.replace('&#' + str(key) + ';', str(value))
with open('解密后.html', mode='w', encoding='utf_8') as f:
f.write(html)
# 用xpath提取html中解密的信息
html = etree.HTML(open('解密后.html', 'r', encoding='utf-8').read())
#h2是小说类型
h2 = html.xpath('''//*[@id="book-img-text"]/ul/li['j']/div[2]/p[1]/a[2]/text()''')
#h1是小说的收藏量
h1 = html.xpath('''//*[@id="book-img-text"]/ul/li['j']/div[3]/div/p/span/span/text()''')
# h0是小说的作者名
h0 = html.xpath('''//*[@id="book-img-text"]/ul/li['j']/div[2]/p[1]/a[1]/text()''')
h3 = h2
h4 = h1
h = h0
#循环h3列表输出元素
for k in h3[::1]:
k = str(k) + '\n'
fp1.write(k)
#同理
for e in h4[::1]:
e = str(e) + '\n'
fp2.write(e)
for q in h[::1]:
q = str(q) + '\n'
fp3.write(q)