diff --git a/xiancheng.py b/xiancheng.py new file mode 100644 index 0000000..8777334 --- /dev/null +++ b/xiancheng.py @@ -0,0 +1,48 @@ +import requests +import re +from bs4 import BeautifulSoup +import parsel +import os +import concurrent.futures +def get_response(html_url): + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 Edg/123.0.0.0' + } + response = requests.get(url=html_url, headers=headers) + return response +def get_list_url(html_url): + html_data = get_response(html_url).text + name = re.findall('
', html_data,re.S)[0].replace('
', '\n')
+ return title, content
+def save(name, title, content):
+ file = f'{name}\\'
+ if not os.path.exists(file):
+ os.mkdir(file)
+ with open(file + title + '.txt', mode='a', encoding='utf-8') as f:
+ f.write(title)
+ f.write('\n')
+ f.write(content)
+ f.write('\n')
+ print(title, '已经保存')
+def main(home_url):
+ title, content = get_content(html_url=home_url)
+ save(name, title, content)
+if __name__ == '__main__':
+ url = 'https://www.bqguu.cc/book/176453/'
+ name, url_list = get_list_url(html_url=url)
+ exe = concurrent.futures.ThreadPoolExecutor(max_workers=7)
+ for url in url_list:
+ index_url = 'https://www.bqguu.cc/' + url
+ exe.submit(main, index_url)
+ exe.shutdown()
\ No newline at end of file