|
|
|
@ -0,0 +1,54 @@
|
|
|
|
|
import requests
|
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
import csv
|
|
|
|
|
import os
|
|
|
|
|
|
|
|
|
|
urls = {"https://ts.faloo.com/category/1/{}.html".format(number) for number in range(1, 9)}
|
|
|
|
|
# 创建空列表来存储所有数据
|
|
|
|
|
all_data = []
|
|
|
|
|
all_photo = []
|
|
|
|
|
# 创建保存图片的文件夹
|
|
|
|
|
save_folder = "zyk_images"
|
|
|
|
|
if not os.path.exists(save_folder):
|
|
|
|
|
os.makedirs(save_folder)
|
|
|
|
|
|
|
|
|
|
for url in urls:
|
|
|
|
|
zyk_list = []
|
|
|
|
|
headers = {"User-Agent": 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36 Edg/122.0.0.0'}
|
|
|
|
|
response = requests.get(url, headers=headers)
|
|
|
|
|
html = response.text
|
|
|
|
|
# print(html)
|
|
|
|
|
soup = BeautifulSoup(html, "lxml")
|
|
|
|
|
z = soup.select("div.audio_content")
|
|
|
|
|
# 选择包含所需数据的元素
|
|
|
|
|
p = soup.select("div.audio_img img")
|
|
|
|
|
for i in z:
|
|
|
|
|
y = i.get_text(",")
|
|
|
|
|
zyk_list.append(y)
|
|
|
|
|
|
|
|
|
|
for img in p:
|
|
|
|
|
src = img.get('src')
|
|
|
|
|
# 将图片URL添加到 all_data 列表中
|
|
|
|
|
all_photo.append(src)
|
|
|
|
|
|
|
|
|
|
# 将当前页面的数据添加到all_data列表中
|
|
|
|
|
all_data.extend(zyk_list)
|
|
|
|
|
|
|
|
|
|
# 打开CSV文件,写入数据
|
|
|
|
|
with open("zyk.csv", "w", encoding="utf-8", newline="") as f:
|
|
|
|
|
writer = csv.writer(f)
|
|
|
|
|
writer.writerow(["书名", "作者", "简介", "在听人数", "集数"])
|
|
|
|
|
# 遍历all_data列表,将每一行数据写入CSV文件
|
|
|
|
|
for row in all_data:
|
|
|
|
|
writer.writerow(row.split(","))
|
|
|
|
|
|
|
|
|
|
# 在循环外部下载图片,以避免重复使用相同的文件名
|
|
|
|
|
for i, url in enumerate(all_photo):
|
|
|
|
|
headers = {"User-Agent": 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36 Edg/122.0.0.0'}
|
|
|
|
|
response = requests.get(url, headers=headers)
|
|
|
|
|
# 使用图片的索引和扩展名作为文件名
|
|
|
|
|
file_name = f"zyk_{i}.jpg"
|
|
|
|
|
# 将图片保存到文件夹中
|
|
|
|
|
save_path = os.path.join(save_folder, file_name)
|
|
|
|
|
with open(save_path, "wb") as f:
|
|
|
|
|
f.write(response.content)
|