ADD file via upload

5 months ago · aba3ab16b6
parent 340bc5134b
commit aba3ab16b6
1 changed files with 96 additions and 0 deletions
--- a/许一个愿忘记你.py
+++ b/许一个愿忘记你.py
@ -0,0 +1,96 @@
+import requests
+from bs4 import BeautifulSoup
+import csv
+import os
+
+url='http://www.lazytxt.shop:8081/xs/112/112040/'
+
+header={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"}
+response = requests.get(url, headers=header)
+response.encoding="gbk"
+#print(response.url)
+#print(response.text)
+soup=BeautifulSoup(response.text,"lxml")
+re = soup.select('div.book_list ul li a')
+#print(re)
+
+dir_name = "许一个愿忘记你"
+if not os.path.exists(dir_name):
+    os.mkdir(dir_name)
+
+with open('许一个愿忘记你/许一个愿忘记你.csv', 'w', newline='', encoding='utf-8') as csvfile:
+    fieldnames = ['链接']
+    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+    writer.writeheader()
+    urls=[]
+    for i in re:
+        c_url = i.get('href')
+        url1="http://www.lazytxt.shop:8081"
+        full_url = url1 + c_url
+        writer.writerow({'链接': full_url})
+        print(full_url)
+        urls.append(full_url)
+        print(len(urls))
+
+re1 = soup.select('span.red a,h3.bookinfo_intro')
+for i in re1:#获取书名作者简介
+    print(i.text)
+
+for i in re:
+        c_url = i.get('href')
+        url1 = "http://www.lazytxt.shop:8081"
+        full_url = url1 + c_url
+        response = requests.get(full_url, headers=header)
+        response.encoding = "gbk"
+        soup = BeautifulSoup(response.text, "lxml")
+        chapter = soup.select_one('div.h1title h1').text
+        print(chapter)
+
+
+
+for i in re:
+            c_url = i.get('href')
+            url1 = "http://www.lazytxt.shop:8081"
+            full_url = url1 + c_url
+            response = requests.get(full_url, headers=header)
+            response.encoding = "gbk"
+            soup = BeautifulSoup(response.text, 'lxml')
+            chapter = soup.select_one('div.h1title h1').text
+            print()
+            print(chapter)
+            content = soup.find('div', {'id': 'htmlContent'}).text.strip()
+            print(content)
+            #content = content.replace("&nbsp;", "")
+            file_name = os.path.join(dir_name, f"{chapter}.txt")
+            with open(file_name, 'w', encoding='utf-8') as f:
+                f.write(content)
+
+def remove_leading_spaces_in_txt_files(directory):
+    # 遍历指定目录下的所有文件和子目录
+    for root, dirs, files in os.walk(directory):
+        for file in files:
+            # 检查文件是否为.txt文件
+            if file.endswith('.txt'):
+                # 构建文件的完整路径
+                file_path = os.path.join(root, file)
+
+                # 读取文件内容
+                with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+                    content = f.read()
+
+                    # 删除每行前面的空格，并替换文件内容
+                # 注意：这里假设每行都应该有相同的处理，即删除行首空格
+                modified_content = '\n'.join(line.lstrip() for line in content.splitlines())
+
+                # 写回文件内容
+                with open(file_path, 'w', encoding='utf-8') as f:
+                    f.write(modified_content)
+
+                # 使用函数，替换为你的目录路径
+
+
+remove_leading_spaces_in_txt_files('许一个愿忘记你')
+
+
+
+