You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

57 lines
2.2 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

from bs4 import BeautifulSoup
import requests
import os
import csv
urls = ['https://www.tadu.com/book/rank/list/0-hour72-0-0-{}'.format(numbers) for numbers in range(1, 21)]
bookname = []
save_folder = "book_images"
if not os.path.exists(save_folder):
os.makedirs(save_folder)
# 循环遍历每个链接
for i in urls:
headers = {"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'}
# 发送GET请求到当前链接并获取响应内容
res = requests.get(i, headers=headers)
html = res.text # 将响应内容存储为字符串
soup = BeautifulSoup(html, "lxml")
# 在页面中找到所有包含小说名称的元素
y = soup.find_all("span", class_='bookname_name')
# 在页面中找到所有包含作者信息的元素
j = soup.find_all("div", class_='booknick')
# 在页面中找到所有包含小说描述的元素
a = soup.find_all("div", class_='bookdes')
# 在页面中找到所有包含小说图片的元素
imgs = soup.select("div.bookimg img")
for k, v, h, img in zip(y, j, a, imgs):
l = k.get_text()
t = v.text.strip().split('\n')[0]
t2 = v.text.strip().split('\n')[1].split('· ')[1]
p = h.get_text().strip().replace('\r\n', '').replace('\n', '')
bookname.append([l, t, t2, p])
# 提取小说图片的URL
img_url = img.get('src')
# 发送HTTP请求到小说图片的URL
response = requests.get(img_url, headers=headers)
# 使用小说的索引和名称作为文件名
file_name = f"book_{len(bookname)}_{l.replace(' ', '_')}.jpg"
# 将图片保存到文件夹中
save_path = os.path.join(save_folder, file_name)
with open(save_path, "wb") as f:
f.write(response.content)
# 将小说信息写入CSV文件
with open("book.csv", "w", encoding="utf-8", newline="") as f:
writer = csv.writer(f)
writer.writerow(["书名", "作者", "小说类型", "简介"])
# 遍历bookname列表将每一行数据写入CSV文件
for row in bookname:
writer.writerow(row)