|  |  |  | @ -0,0 +1,57 @@ | 
			
		
	
		
			
				
					|  |  |  |  | from bs4 import BeautifulSoup | 
			
		
	
		
			
				
					|  |  |  |  | import requests | 
			
		
	
		
			
				
					|  |  |  |  | import os | 
			
		
	
		
			
				
					|  |  |  |  | import csv | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | urls = ['https://www.tadu.com/book/rank/list/0-hour72-0-0-{}'.format(numbers) for numbers in range(1, 21)] | 
			
		
	
		
			
				
					|  |  |  |  | bookname = [] | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | save_folder = "book_images" | 
			
		
	
		
			
				
					|  |  |  |  | if not os.path.exists(save_folder): | 
			
		
	
		
			
				
					|  |  |  |  |     os.makedirs(save_folder) | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | # 循环遍历每个链接 | 
			
		
	
		
			
				
					|  |  |  |  | for i in urls: | 
			
		
	
		
			
				
					|  |  |  |  |     headers = {"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' | 
			
		
	
		
			
				
					|  |  |  |  |                              'Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'} | 
			
		
	
		
			
				
					|  |  |  |  |     # 发送GET请求到当前链接,并获取响应内容 | 
			
		
	
		
			
				
					|  |  |  |  |     res = requests.get(i, headers=headers) | 
			
		
	
		
			
				
					|  |  |  |  |     html = res.text  # 将响应内容存储为字符串 | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |     soup = BeautifulSoup(html, "lxml") | 
			
		
	
		
			
				
					|  |  |  |  |     # 在页面中找到所有包含小说名称的元素 | 
			
		
	
		
			
				
					|  |  |  |  |     y = soup.find_all("span", class_='bookname_name') | 
			
		
	
		
			
				
					|  |  |  |  |     # 在页面中找到所有包含作者信息的元素 | 
			
		
	
		
			
				
					|  |  |  |  |     j = soup.find_all("div", class_='booknick') | 
			
		
	
		
			
				
					|  |  |  |  |     # 在页面中找到所有包含小说描述的元素 | 
			
		
	
		
			
				
					|  |  |  |  |     a = soup.find_all("div", class_='bookdes') | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |     # 在页面中找到所有包含小说图片的元素 | 
			
		
	
		
			
				
					|  |  |  |  |     imgs = soup.select("div.bookimg img") | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |     for k, v, h, img in zip(y, j, a, imgs): | 
			
		
	
		
			
				
					|  |  |  |  |         l = k.get_text() | 
			
		
	
		
			
				
					|  |  |  |  |         t = v.text.strip().split('\n')[0] | 
			
		
	
		
			
				
					|  |  |  |  |         t2 = v.text.strip().split('\n')[1].split('· ')[1] | 
			
		
	
		
			
				
					|  |  |  |  |         p = h.get_text().strip().replace('\r\n', '').replace('\n', '') | 
			
		
	
		
			
				
					|  |  |  |  |         bookname.append([l, t, t2, p]) | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |         # 提取小说图片的URL | 
			
		
	
		
			
				
					|  |  |  |  |         img_url = img.get('src') | 
			
		
	
		
			
				
					|  |  |  |  |         # 发送HTTP请求到小说图片的URL | 
			
		
	
		
			
				
					|  |  |  |  |         response = requests.get(img_url, headers=headers) | 
			
		
	
		
			
				
					|  |  |  |  |         # 使用小说的索引和名称作为文件名 | 
			
		
	
		
			
				
					|  |  |  |  |         file_name = f"book_{len(bookname)}_{l.replace(' ', '_')}.jpg" | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |         # 将图片保存到文件夹中 | 
			
		
	
		
			
				
					|  |  |  |  |         save_path = os.path.join(save_folder, file_name) | 
			
		
	
		
			
				
					|  |  |  |  |         with open(save_path, "wb") as f: | 
			
		
	
		
			
				
					|  |  |  |  |             f.write(response.content) | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | # 将小说信息写入CSV文件 | 
			
		
	
		
			
				
					|  |  |  |  | with open("book.csv", "w", encoding="utf-8", newline="") as f: | 
			
		
	
		
			
				
					|  |  |  |  |     writer = csv.writer(f) | 
			
		
	
		
			
				
					|  |  |  |  |     writer.writerow(["书名", "作者", "小说类型", "简介"]) | 
			
		
	
		
			
				
					|  |  |  |  |     # 遍历bookname列表,将每一行数据写入CSV文件 | 
			
		
	
		
			
				
					|  |  |  |  |     for row in bookname: | 
			
		
	
		
			
				
					|  |  |  |  |         writer.writerow(row) |