You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
64 lines
1.8 KiB
64 lines
1.8 KiB
import csv
|
|
|
|
import requests
|
|
from lxml import etree
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
# 请求头
|
|
headers= {
|
|
'User-Agent':
|
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'
|
|
|
|
}
|
|
|
|
# 小说主页网址
|
|
main_url = "https://www.zzdxss.com/yuanzun/"
|
|
# #
|
|
# # # 使用get方法请求网页
|
|
main_resp = requests.get(main_url, headers=headers)
|
|
# #
|
|
# # # 将网页内容按gbk规范解码为文本形式
|
|
main_text = main_resp.content.decode('gbk')
|
|
# #
|
|
# # # 将文本内容创建为可解析元素
|
|
main_html = etree.HTML(main_text)
|
|
# #
|
|
# # # 依次获取书籍的标题、作者、最近更新时间和简介
|
|
bookTitle = main_html.xpath('/html/body/div[6]/div/div[1]/div[2]/div[1]/h1/text()')[0]
|
|
author = main_html.xpath('/html/body/div[6]/div/div[1]/div[2]/div[2]/ul/li[1]/a/text()')[0]
|
|
update = main_html.xpath('/html/body/div[6]/div/div[1]/div[2]/div[3]/a/text()')[0]
|
|
introduction = main_html.xpath('/html/body/div[6]/div/div[1]/div[2]/div[3]/p/text()[2]')[0]
|
|
block = main_html.xpath('/html/body/div[8]/div[1]/div[2]/ul/li/a/text()')
|
|
|
|
print(bookTitle)
|
|
print('作者:'+author)
|
|
print('最新章节:'+update)
|
|
print('简介:'+introduction)
|
|
|
|
# 获取章节目录
|
|
# dom = etree.HTML(main_text)
|
|
# block = dom.xpath('/html/body/div[8]/div[1]/div[2]/ul/li[1]/a/text()')[0]
|
|
# # for i in block:
|
|
# # print(i.text)
|
|
# # print(i.get("href"))
|
|
|
|
list = []
|
|
for b in block:
|
|
# print(f'章节目录:{b}')
|
|
list.append(f'章节目录:{b}')
|
|
|
|
print(list)
|
|
with open('元尊.txt','w',encoding="utf8") as f:
|
|
f.write('标题:'+bookTitle)
|
|
f.write('作者:'+author)
|
|
f.write('最新章节:'+update)
|
|
f.write('简介:'+introduction+'\n')
|
|
for b in block:
|
|
# print(f'章节目录:{b}')
|
|
f.write(f'章节目录:{b}\n')
|
|
|
|
|
|
|
|
|