parent
3e37b6352c
commit
859b641d7b
@ -0,0 +1,42 @@
|
||||
import requests
|
||||
from lxml import etree
|
||||
url = "https://www.kunnu.com/doupo/"
|
||||
|
||||
# 发送请求
|
||||
requests = requests.get(url=url)
|
||||
requests.encoding = requests.apparent_encoding #自动识别响应体的编码
|
||||
html_data = requests.text
|
||||
# print(html_data)
|
||||
|
||||
# 依次获取书名,作者,最新章节和简历
|
||||
dom = etree.HTML(html_data)
|
||||
title = dom.xpath('//h1/text()')[0]
|
||||
p = dom.xpath('//p/text()')
|
||||
text = dom.xpath('//div[@class="describe-html"]/p/text()')
|
||||
# print(title)
|
||||
# print(p[0].strip())
|
||||
# print(p[1].strip())
|
||||
# print(p[2].strip())
|
||||
# print(p[3].strip())
|
||||
# print(p[4].strip())
|
||||
# print(p[5].strip())
|
||||
# print(text[0].strip())
|
||||
# print(text[1].strip())
|
||||
with open('dpcq.txt','w',encoding="utf8",newline='') as f:
|
||||
f.write(title)
|
||||
f.write(p[0].strip()+'\n')
|
||||
f.write(p[1].strip()+'\n')
|
||||
f.write(p[2].strip()+'\n')
|
||||
f.write(p[3].strip()+'\n')
|
||||
f.write(p[4].strip()+'\n')
|
||||
f.write(p[5].strip()+'\n')
|
||||
f.write(text[0].strip()+'\n')
|
||||
f.write(text[1].strip()+'\n')
|
||||
f.write("------------------------------------------"+'\n')
|
||||
# 获取章节目录
|
||||
block = dom.xpath('//div[@class="book-list clearfix"]/ul/li/a')
|
||||
for i in block:
|
||||
print(i.text)
|
||||
print(i.get("href"))
|
||||
f.write(i.text+'\n')
|
||||
f.write(i.get("href")+'\n')
|
||||
Loading…
Reference in new issue