hunjianghu/gzy/getBooks.py

# coding = utf-8

import requests
import re
import os

Pre = 'https://www.qidian.com/rank/fin?chn='
Type = '&dateType=3'
dict = {'玄幻':'21','奇幻':'1','武侠':'2','仙侠':'22',
         '都市':'4','现实':'15','军事':'6','历史':'5',
         '游戏':'7','体育':'8','科幻':'9','悬疑灵异':'10',
         '二次元':'12'}

def parse_one_page(html):
    pattern = re.compile('<li\sdata-rid.*?<span class="rank-tag no.*?">(.*?)<cite>.*?<h4><a\shref="(.*?)".*?>(.*?)</a></h4>',re.S)
    items = re.findall(pattern,html)
    return items

for page in dict:
    url = Pre + dict[page] + Type
    print(url)
    response = requests.get(url, verify=False)
    total = 0
    path = os.path.join('./Books',page)
    for item in parse_one_page(response.text):
        with open(path, 'a', encoding='utf-8') as file:
            file.write( "   " + "https:" + item[1] + "   " + item[2] + '\n')