From 37e0d08e4ff20297a073b0237422e839133da995 Mon Sep 17 00:00:00 2001 From: pzpalh4w5 <346553037@qq.com> Date: Wed, 12 Jun 2024 15:26:16 +0800 Subject: [PATCH] ADD file via upload --- 许一个愿忘记你.py | 72 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 许一个愿忘记你.py diff --git a/许一个愿忘记你.py b/许一个愿忘记你.py new file mode 100644 index 0000000..e4a482e --- /dev/null +++ b/许一个愿忘记你.py @@ -0,0 +1,72 @@ +import requests +from bs4 import BeautifulSoup +import csv +import os + +url='http://www.lazytxt.shop:8081/xs/112/112040/' + +header={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"} +response = requests.get(url, headers=header) +response.encoding="gbk" +#print(response.url) +#print(response.text) +soup=BeautifulSoup(response.text,"lxml") +re = soup.select('div.book_list ul li a') +#print(re) + +dir_name = "许一个愿忘记你" +if not os.path.exists(dir_name): + os.mkdir(dir_name) + +with open('许一个愿忘记你/许一个愿忘记你.csv', 'w', newline='', encoding='utf-8') as csvfile: + fieldnames = ['链接'] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + writer.writeheader() + urls=[] + for i in re: + c_url = i.get('href') + url1="http://www.lazytxt.shop:8081" + full_url = url1 + c_url + writer.writerow({'链接': full_url}) + print(full_url) + urls.append(full_url) + print(len(urls)) + +re1 = soup.select('span.red a,h3.bookinfo_intro') +for i in re1:#获取书名作者简介 + print(i.text) + +for i in re: + c_url = i.get('href') + url1 = "http://www.lazytxt.shop:8081" + full_url = url1 + c_url + response = requests.get(full_url, headers=header) + response.encoding = "gbk" + soup = BeautifulSoup(response.text, "lxml") + chapter = soup.select_one('div.h1title h1').text + print(chapter) + + + +for i in re: + c_url = i.get('href') + url1 = "http://www.lazytxt.shop:8081" + full_url = url1 + c_url + response = requests.get(full_url, headers=header) + response.encoding = "gbk" + soup = BeautifulSoup(response.text, 'lxml') + chapter = soup.select_one('div.h1title h1').text + print() + print(chapter) + content = soup.find('div', {'id': 'htmlContent'}).text.strip() + print(content) + content = content.replace(" ", "") + file_name = os.path.join(dir_name, f"{chapter}.txt") + with open(file_name, 'w', encoding='utf-8') as f: + f.write(content) + + + + + +