ADD file via upload

1 year ago · 2bec3b8f81
parent 94829abde7
commit 2bec3b8f81
1 changed files with 48 additions and 0 deletions
--- a/xiancheng.py
+++ b/xiancheng.py
@ -0,0 +1,48 @@
+import requests
+import re
+from bs4 import BeautifulSoup
+import parsel
+import os
+import concurrent.futures
+def get_response(html_url):
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 Edg/123.0.0.0'
+    }
+    response = requests.get(url=html_url, headers=headers)
+    return response
+def get_list_url(html_url):
+    html_data = get_response(html_url).text
+    name = re.findall('<h1>(.*?)</h1>', html_data)[0]
+    soup = BeautifulSoup(html_data, 'html.parser')
+    link_tags = soup.find_all('dd')
+    url_list = []
+    for tag in link_tags:
+        if tag.find('a'):
+            url_list.append(tag.find('a')['href'])
+    return name, url_list
+def get_content(html_url):
+    html_data = get_response(html_url).text
+    title = re.findall(r'<h1 class="wap_none">(.*?)</h1>', html_data)[0]
+    content = re.findall('<div id="chaptercontent" class="Readarea ReadAjax_content">(.*?)<p class="readinline">', html_data,re.S)[0].replace('<br /><br />', '\n')
+    return title, content
+def save(name, title, content):
+    file = f'{name}\\'
+    if not os.path.exists(file):
+        os.mkdir(file)
+    with open(file + title + '.txt', mode='a', encoding='utf-8') as f:
+        f.write(title)
+        f.write('\n')
+        f.write(content)
+        f.write('\n')
+    print(title, '已经保存')
+def main(home_url):
+    title, content = get_content(html_url=home_url)
+    save(name, title, content)
+if __name__ == '__main__':
+    url = 'https://www.bqguu.cc/book/176453/'
+    name, url_list = get_list_url(html_url=url)
+    exe = concurrent.futures.ThreadPoolExecutor(max_workers=7)
+    for url in url_list:
+        index_url = 'https://www.bqguu.cc/' + url
+        exe.submit(main, index_url)
+    exe.shutdown()