hunjianghu/gzy/数据库/getBooks.py

# coding = utf-8

import requests
import re
import os

Pre = 'https://www.qidian.com/rank/fin?chn='
Type = '&dateType=3'
dict = {'玄幻': '21', '奇幻': '1', '武侠': '2', '仙侠': '22',
        '都市': '4', '现实': '15', '军事': '6', '历史': '5',
        '游戏': '7', '体育': '8', '科幻': '9', '悬疑灵异': '10',
        '二次元': '12'}


def parse_one_page(html):
    pattern = re.compile('<li\sdata-rid.*?<span class="rank-tag no.*?">(.*?)<cite>.*?<h4><a\shref="(.*?)".*?>(.*?)</a></h4>',re.S)
    items = re.findall(pattern,html)
    return items


def getBookinfo():
    for page in dict:
        url = Pre + dict[page] + Type
        print(url)
        response = requests.get(url, verify=False)
        path = os.path.join('./Books', page)
        for item in parse_one_page(response.text):
            with open(path, 'a', encoding='utf-8') as file:
                file.write("https:" + item[1] + "   " + item[2] + '\n')
数据库 6 years ago			`# coding = utf-8`

			`import requests`
			`import re`
			`import os`

			`Pre = 'https://www.qidian.com/rank/fin?chn='`
			`Type = '&dateType=3'`
			`dict = {'玄幻': '21', '奇幻': '1', '武侠': '2', '仙侠': '22',`
			`'都市': '4', '现实': '15', '军事': '6', '历史': '5',`
			`'游戏': '7', '体育': '8', '科幻': '9', '悬疑灵异': '10',`
			`'二次元': '12'}`


			`def parse_one_page(html):`
			`pattern = re.compile('<li\sdata-rid.?<span class="rank-tag no.?">(.?)<cite>.?<h4><a\shref="(.?)".?>(.*?)</a></h4>',re.S)`
			`items = re.findall(pattern,html)`
			`return items`


			`def getBookinfo():`
			`for page in dict:`
			`url = Pre + dict[page] + Type`
			`print(url)`
			`response = requests.get(url, verify=False)`
			`path = os.path.join('./Books', page)`
			`for item in parse_one_page(response.text):`
			`with open(path, 'a', encoding='utf-8') as file:`
			`file.write("https:" + item[1] + " " + item[2] + '\n')`