From e8b57001036f1b12a111fc80d1d2e64c07a0064e Mon Sep 17 00:00:00 2001 From: pxfqriu37 <168345566@qq.com> Date: Wed, 12 Jun 2024 11:05:28 +0800 Subject: [PATCH] ADD file via upload --- zyk.py | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 zyk.py diff --git a/zyk.py b/zyk.py new file mode 100644 index 0000000..f09f60f --- /dev/null +++ b/zyk.py @@ -0,0 +1,54 @@ +import requests +from bs4 import BeautifulSoup +import csv +import os + +urls = {"https://ts.faloo.com/category/1/{}.html".format(number) for number in range(1, 9)} +# 创建空列表来存储所有数据 +all_data = [] +all_photo = [] +# 创建保存图片的文件夹 +save_folder = "zyk_images" +if not os.path.exists(save_folder): + os.makedirs(save_folder) + +for url in urls: + zyk_list = [] + headers = {"User-Agent": 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36 Edg/122.0.0.0'} + response = requests.get(url, headers=headers) + html = response.text + # print(html) + soup = BeautifulSoup(html, "lxml") + z = soup.select("div.audio_content") + # 选择包含所需数据的元素 + p = soup.select("div.audio_img img") + for i in z: + y = i.get_text(",") + zyk_list.append(y) + + for img in p: + src = img.get('src') + # 将图片URL添加到 all_data 列表中 + all_photo.append(src) + + # 将当前页面的数据添加到all_data列表中 + all_data.extend(zyk_list) + +# 打开CSV文件,写入数据 +with open("zyk.csv", "w", encoding="utf-8", newline="") as f: + writer = csv.writer(f) + writer.writerow(["书名", "作者", "简介", "在听人数", "集数"]) + # 遍历all_data列表,将每一行数据写入CSV文件 + for row in all_data: + writer.writerow(row.split(",")) + +# 在循环外部下载图片,以避免重复使用相同的文件名 +for i, url in enumerate(all_photo): + headers = {"User-Agent": 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36 Edg/122.0.0.0'} + response = requests.get(url, headers=headers) + # 使用图片的索引和扩展名作为文件名 + file_name = f"zyk_{i}.jpg" + # 将图片保存到文件夹中 + save_path = os.path.join(save_folder, file_name) + with open(save_path, "wb") as f: + f.write(response.content)