You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
hunjianghu/gzy/数据库/getBooks.py

30 lines
954 B

6 years ago
# coding = utf-8
import requests
import re
import os
Pre = 'https://www.qidian.com/rank/fin?chn='
Type = '&dateType=3'
dict = {'玄幻': '21', '奇幻': '1', '武侠': '2', '仙侠': '22',
'都市': '4', '现实': '15', '军事': '6', '历史': '5',
'游戏': '7', '体育': '8', '科幻': '9', '悬疑灵异': '10',
'二次元': '12'}
def parse_one_page(html):
pattern = re.compile('<li\sdata-rid.*?<span class="rank-tag no.*?">(.*?)<cite>.*?<h4><a\shref="(.*?)".*?>(.*?)</a></h4>',re.S)
items = re.findall(pattern,html)
return items
def getBookinfo():
for page in dict:
url = Pre + dict[page] + Type
print(url)
response = requests.get(url, verify=False)
path = os.path.join('./Books', page)
for item in parse_one_page(response.text):
with open(path, 'a', encoding='utf-8') as file:
file.write("https:" + item[1] + " " + item[2] + '\n')