You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
xiaoshuo/许一个愿忘记你.py

73 lines
2.2 KiB

import requests
from bs4 import BeautifulSoup
import csv
import os
url='http://www.lazytxt.shop:8081/xs/112/112040/'
header={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"}
response = requests.get(url, headers=header)
response.encoding="gbk"
#print(response.url)
#print(response.text)
soup=BeautifulSoup(response.text,"lxml")
re = soup.select('div.book_list ul li a')
#print(re)
dir_name = "许一个愿忘记你"
if not os.path.exists(dir_name):
os.mkdir(dir_name)
with open('许一个愿忘记你/许一个愿忘记你.csv', 'w', newline='', encoding='utf-8') as csvfile:
fieldnames = ['链接']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
urls=[]
for i in re:
c_url = i.get('href')
url1="http://www.lazytxt.shop:8081"
full_url = url1 + c_url
writer.writerow({'链接': full_url})
print(full_url)
urls.append(full_url)
print(len(urls))
re1 = soup.select('span.red a,h3.bookinfo_intro')
for i in re1:#获取书名作者简介
print(i.text)
for i in re:
c_url = i.get('href')
url1 = "http://www.lazytxt.shop:8081"
full_url = url1 + c_url
response = requests.get(full_url, headers=header)
response.encoding = "gbk"
soup = BeautifulSoup(response.text, "lxml")
chapter = soup.select_one('div.h1title h1').text
print(chapter)
for i in re:
c_url = i.get('href')
url1 = "http://www.lazytxt.shop:8081"
full_url = url1 + c_url
response = requests.get(full_url, headers=header)
response.encoding = "gbk"
soup = BeautifulSoup(response.text, 'lxml')
chapter = soup.select_one('div.h1title h1').text
print()
print(chapter)
content = soup.find('div', {'id': 'htmlContent'}).text.strip()
print(content)
content = content.replace(" ", "")
file_name = os.path.join(dir_name, f"{chapter}.txt")
with open(file_name, 'w', encoding='utf-8') as f:
f.write(content)