ADD file via upload

6 months ago · 37e0d08e4f
parent aca2a4cd5e
commit 37e0d08e4f
1 changed files with 72 additions and 0 deletions
--- a/许一个愿忘记你.py
+++ b/许一个愿忘记你.py
@ -0,0 +1,72 @@
 import requests
 from bs4 import BeautifulSoup
 import csv
 import os
 url='http://www.lazytxt.shop:8081/xs/112/112040/'
 header={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"}
 response = requests.get(url, headers=header)
 response.encoding="gbk"
 #print(response.url)
 #print(response.text)
 soup=BeautifulSoup(response.text,"lxml")
 re = soup.select('div.book_list ul li a')
 #print(re)
 dir_name = "许一个愿忘记你"
 if not os.path.exists(dir_name):
    os.mkdir(dir_name)
 with open('许一个愿忘记你/许一个愿忘记你.csv', 'w', newline='', encoding='utf-8') as csvfile:
    fieldnames = ['链接']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    urls=[]
    for i in re:
        c_url = i.get('href')
        url1="http://www.lazytxt.shop:8081"
        full_url = url1 + c_url
        writer.writerow({'链接': full_url})
        print(full_url)
        urls.append(full_url)
        print(len(urls))
 re1 = soup.select('span.red a,h3.bookinfo_intro')
 for i in re1:#获取书名作者简介
    print(i.text)
 for i in re:
        c_url = i.get('href')
        url1 = "http://www.lazytxt.shop:8081"
        full_url = url1 + c_url
        response = requests.get(full_url, headers=header)
        response.encoding = "gbk"
        soup = BeautifulSoup(response.text, "lxml")
        chapter = soup.select_one('div.h1title h1').text
        print(chapter)
 for i in re:
            c_url = i.get('href')
            url1 = "http://www.lazytxt.shop:8081"
            full_url = url1 + c_url
            response = requests.get(full_url, headers=header)
            response.encoding = "gbk"
            soup = BeautifulSoup(response.text, 'lxml')
            chapter = soup.select_one('div.h1title h1').text
            print()
            print(chapter)
            content = soup.find('div', {'id': 'htmlContent'}).text.strip()
            print(content)
            content = content.replace("&nbsp;", "")
            file_name = os.path.join(dir_name, f"{chapter}.txt")
            with open(file_name, 'w', encoding='utf-8') as f:
                f.write(content)