xiaoshuowangzhan/许一个愿忘记你.py

import requests
from bs4 import BeautifulSoup
import csv
import os

url='http://www.lazytxt.shop:8081/xs/112/112040/'

header={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"}
response = requests.get(url, headers=header)
response.encoding="gbk"
#print(response.url)
#print(response.text)
soup=BeautifulSoup(response.text,"lxml")
re = soup.select('div.book_list ul li a')
#print(re)

dir_name = "许一个愿忘记你"
if not os.path.exists(dir_name):
    os.mkdir(dir_name)

with open('许一个愿忘记你/许一个愿忘记你.csv', 'w', newline='', encoding='utf-8') as csvfile:
    fieldnames = ['链接']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    urls=[]
    for i in re:
        c_url = i.get('href')
        url1="http://www.lazytxt.shop:8081"
        full_url = url1 + c_url
        writer.writerow({'链接': full_url})
        print(full_url)
        urls.append(full_url)
        print(len(urls))

re1 = soup.select('span.red a,h3.bookinfo_intro')
for i in re1:#获取书名作者简介
    print(i.text)

for i in re:
        c_url = i.get('href')
        url1 = "http://www.lazytxt.shop:8081"
        full_url = url1 + c_url
        response = requests.get(full_url, headers=header)
        response.encoding = "gbk"
        soup = BeautifulSoup(response.text, "lxml")
        chapter = soup.select_one('div.h1title h1').text
        print(chapter)


for i in re:
            c_url = i.get('href')
            url1 = "http://www.lazytxt.shop:8081"
            full_url = url1 + c_url
            response = requests.get(full_url, headers=header)
            response.encoding = "gbk"
            soup = BeautifulSoup(response.text, 'lxml')
            chapter = soup.select_one('div.h1title h1').text
            print()
            print(chapter)
            content = soup.find('div', {'id': 'htmlContent'}).text.strip()
            print(content)
            #content = content.replace("&nbsp;", "")
            file_name = os.path.join(dir_name, f"{chapter}.txt")
            with open(file_name, 'w', encoding='utf-8') as f:
                f.write(content)

def remove_leading_spaces_in_txt_files(directory):
    # 遍历指定目录下的所有文件和子目录
    for root, dirs, files in os.walk(directory):
        for file in files:
            # 检查文件是否为.txt文件
            if file.endswith('.txt'):
                # 构建文件的完整路径
                file_path = os.path.join(root, file)

                # 读取文件内容
                with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                    content = f.read()

                    # 删除每行前面的空格，并替换文件内容
                # 注意：这里假设每行都应该有相同的处理，即删除行首空格
                modified_content = '\n'.join(line.lstrip() for line in content.splitlines())

                # 写回文件内容
                with open(file_path, 'w', encoding='utf-8') as f:
                    f.write(modified_content)

                # 使用函数，替换为你的目录路径


remove_leading_spaces_in_txt_files('许一个愿忘记你')
ADD file via upload 5 months ago			`import requests`
			`from bs4 import BeautifulSoup`
			`import csv`
			`import os`

			`url='http://www.lazytxt.shop:8081/xs/112/112040/'`

			`header={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"}`
			`response = requests.get(url, headers=header)`
			`response.encoding="gbk"`
			`#print(response.url)`
			`#print(response.text)`
			`soup=BeautifulSoup(response.text,"lxml")`
			`re = soup.select('div.book_list ul li a')`
			`#print(re)`

			`dir_name = "许一个愿忘记你"`
			`if not os.path.exists(dir_name):`
			`os.mkdir(dir_name)`

			`with open('许一个愿忘记你/许一个愿忘记你.csv', 'w', newline='', encoding='utf-8') as csvfile:`
			`fieldnames = ['链接']`
			`writer = csv.DictWriter(csvfile, fieldnames=fieldnames)`
			`writer.writeheader()`
			`urls=[]`
			`for i in re:`
			`c_url = i.get('href')`
			`url1="http://www.lazytxt.shop:8081"`
			`full_url = url1 + c_url`
			`writer.writerow({'链接': full_url})`
			`print(full_url)`
			`urls.append(full_url)`
			`print(len(urls))`

			`re1 = soup.select('span.red a,h3.bookinfo_intro')`
			`for i in re1:#获取书名作者简介`
			`print(i.text)`

			`for i in re:`
			`c_url = i.get('href')`
			`url1 = "http://www.lazytxt.shop:8081"`
			`full_url = url1 + c_url`
			`response = requests.get(full_url, headers=header)`
			`response.encoding = "gbk"`
			`soup = BeautifulSoup(response.text, "lxml")`
			`chapter = soup.select_one('div.h1title h1').text`
			`print(chapter)`



			`for i in re:`
			`c_url = i.get('href')`
			`url1 = "http://www.lazytxt.shop:8081"`
			`full_url = url1 + c_url`
			`response = requests.get(full_url, headers=header)`
			`response.encoding = "gbk"`
			`soup = BeautifulSoup(response.text, 'lxml')`
			`chapter = soup.select_one('div.h1title h1').text`
			`print()`
			`print(chapter)`
			`content = soup.find('div', {'id': 'htmlContent'}).text.strip()`
			`print(content)`
			`#content = content.replace(" ", "")`
			`file_name = os.path.join(dir_name, f"{chapter}.txt")`
			`with open(file_name, 'w', encoding='utf-8') as f:`
			`f.write(content)`

			`def remove_leading_spaces_in_txt_files(directory):`
			`# 遍历指定目录下的所有文件和子目录`
			`for root, dirs, files in os.walk(directory):`
			`for file in files:`
			`# 检查文件是否为.txt文件`
			`if file.endswith('.txt'):`
			`# 构建文件的完整路径`
			`file_path = os.path.join(root, file)`

			`# 读取文件内容`
			`with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:`
			`content = f.read()`

			`# 删除每行前面的空格，并替换文件内容`
			`# 注意：这里假设每行都应该有相同的处理，即删除行首空格`
			`modified_content = '\n'.join(line.lstrip() for line in content.splitlines())`

			`# 写回文件内容`
			`with open(file_path, 'w', encoding='utf-8') as f:`
			`f.write(modified_content)`

			`# 使用函数，替换为你的目录路径`


			`remove_leading_spaces_in_txt_files('许一个愿忘记你')`