import csv import requests from lxml import etree from bs4 import BeautifulSoup # 请求头 headers= { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0' } # 小说主页网址 main_url = "https://www.zzdxss.com/yuanzun/" # # # # # 使用get方法请求网页 main_resp = requests.get(main_url, headers=headers) # # # # # 将网页内容按gbk规范解码为文本形式 main_text = main_resp.content.decode('gbk') # # # # # 将文本内容创建为可解析元素 main_html = etree.HTML(main_text) # # # # # 依次获取书籍的标题、作者、最近更新时间和简介 bookTitle = main_html.xpath('/html/body/div[6]/div/div[1]/div[2]/div[1]/h1/text()')[0] author = main_html.xpath('/html/body/div[6]/div/div[1]/div[2]/div[2]/ul/li[1]/a/text()')[0] update = main_html.xpath('/html/body/div[6]/div/div[1]/div[2]/div[3]/a/text()')[0] introduction = main_html.xpath('/html/body/div[6]/div/div[1]/div[2]/div[3]/p/text()[2]')[0] block = main_html.xpath('/html/body/div[8]/div[1]/div[2]/ul/li/a/text()') print(bookTitle) print('作者:'+author) print('最新章节:'+update) print('简介:'+introduction) # 获取章节目录 # dom = etree.HTML(main_text) # block = dom.xpath('/html/body/div[8]/div[1]/div[2]/ul/li[1]/a/text()')[0] # # for i in block: # # print(i.text) # # print(i.get("href")) list = [] for b in block: # print(f'章节目录:{b}') list.append(f'章节目录:{b}') print(list) with open('元尊.txt','w',encoding="utf8") as f: f.write('标题:'+bookTitle) f.write('作者:'+author) f.write('最新章节:'+update) f.write('简介:'+introduction+'\n') for b in block: # print(f'章节目录:{b}') f.write(f'章节目录:{b}\n')