import requests from bs4 import BeautifulSoup import csv import os url='http://www.lazytxt.shop:8081/xs/112/112040/' header={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"} response = requests.get(url, headers=header) response.encoding="gbk" #print(response.url) #print(response.text) soup=BeautifulSoup(response.text,"lxml") re = soup.select('div.book_list ul li a') #print(re) dir_name = "许一个愿忘记你" if not os.path.exists(dir_name): os.mkdir(dir_name) with open('许一个愿忘记你/许一个愿忘记你.csv', 'w', newline='', encoding='utf-8') as csvfile: fieldnames = ['链接'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() urls=[] for i in re: c_url = i.get('href') url1="http://www.lazytxt.shop:8081" full_url = url1 + c_url writer.writerow({'链接': full_url}) print(full_url) urls.append(full_url) print(len(urls)) re1 = soup.select('span.red a,h3.bookinfo_intro') for i in re1:#获取书名作者简介 print(i.text) for i in re: c_url = i.get('href') url1 = "http://www.lazytxt.shop:8081" full_url = url1 + c_url response = requests.get(full_url, headers=header) response.encoding = "gbk" soup = BeautifulSoup(response.text, "lxml") chapter = soup.select_one('div.h1title h1').text print(chapter) for i in re: c_url = i.get('href') url1 = "http://www.lazytxt.shop:8081" full_url = url1 + c_url response = requests.get(full_url, headers=header) response.encoding = "gbk" soup = BeautifulSoup(response.text, 'lxml') chapter = soup.select_one('div.h1title h1').text print() print(chapter) content = soup.find('div', {'id': 'htmlContent'}).text.strip() print(content) content = content.replace(" ", "") file_name = os.path.join(dir_name, f"{chapter}.txt") with open(file_name, 'w', encoding='utf-8') as f: f.write(content)