You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
27 lines
903 B
27 lines
903 B
# coding = utf-8
|
|
|
|
import requests
|
|
import re
|
|
import os
|
|
|
|
Pre = 'https://www.qidian.com/rank/fin?chn='
|
|
Type = '&dateType=3'
|
|
dict = {'玄幻':'21','奇幻':'1','武侠':'2','仙侠':'22',
|
|
'都市':'4','现实':'15','军事':'6','历史':'5',
|
|
'游戏':'7','体育':'8','科幻':'9','悬疑灵异':'10',
|
|
'二次元':'12'}
|
|
|
|
def parse_one_page(html):
|
|
pattern = re.compile('<li\sdata-rid.*?<span class="rank-tag no.*?">(.*?)<cite>.*?<h4><a\shref="(.*?)".*?>(.*?)</a></h4>',re.S)
|
|
items = re.findall(pattern,html)
|
|
return items
|
|
|
|
for page in dict:
|
|
url = Pre + dict[page] + Type
|
|
print(url)
|
|
response = requests.get(url, verify=False)
|
|
total = 0
|
|
path = os.path.join('./Books',page)
|
|
for item in parse_one_page(response.text):
|
|
with open(path, 'a', encoding='utf-8') as file:
|
|
file.write( " " + "https:" + item[1] + " " + item[2] + '\n') |