You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
30 lines
954 B
30 lines
954 B
6 years ago
|
# coding = utf-8
|
||
|
|
||
|
import requests
|
||
|
import re
|
||
|
import os
|
||
|
|
||
|
Pre = 'https://www.qidian.com/rank/fin?chn='
|
||
|
Type = '&dateType=3'
|
||
|
dict = {'玄幻': '21', '奇幻': '1', '武侠': '2', '仙侠': '22',
|
||
|
'都市': '4', '现实': '15', '军事': '6', '历史': '5',
|
||
|
'游戏': '7', '体育': '8', '科幻': '9', '悬疑灵异': '10',
|
||
|
'二次元': '12'}
|
||
|
|
||
|
|
||
|
def parse_one_page(html):
|
||
|
pattern = re.compile('<li\sdata-rid.*?<span class="rank-tag no.*?">(.*?)<cite>.*?<h4><a\shref="(.*?)".*?>(.*?)</a></h4>',re.S)
|
||
|
items = re.findall(pattern,html)
|
||
|
return items
|
||
|
|
||
|
|
||
|
def getBookinfo():
|
||
|
for page in dict:
|
||
|
url = Pre + dict[page] + Type
|
||
|
print(url)
|
||
|
response = requests.get(url, verify=False)
|
||
|
path = os.path.join('./Books', page)
|
||
|
for item in parse_one_page(response.text):
|
||
|
with open(path, 'a', encoding='utf-8') as file:
|
||
|
file.write("https:" + item[1] + " " + item[2] + '\n')
|