|
|
|
@ -0,0 +1,106 @@
|
|
|
|
|
import requests
|
|
|
|
|
from lxml import html
|
|
|
|
|
from lxml import etree
|
|
|
|
|
import re
|
|
|
|
|
import os
|
|
|
|
|
import tkinter as tk
|
|
|
|
|
import concurrent.futures
|
|
|
|
|
x = ''
|
|
|
|
|
def get_response(html_url):#请求函数
|
|
|
|
|
#伪装自己
|
|
|
|
|
headers = {
|
|
|
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'
|
|
|
|
|
}
|
|
|
|
|
response = requests.get(url=html_url,headers = headers)
|
|
|
|
|
return response
|
|
|
|
|
def get_list_url(html_url):#获取章节url,书名
|
|
|
|
|
html_data = get_response(html_url).text
|
|
|
|
|
#book_name
|
|
|
|
|
name = re.findall('<h1>(.*?)</h1>',html_data)[0]
|
|
|
|
|
url_list = re.findall('<dd><a href ="(.*?)">',html_data)
|
|
|
|
|
# print(name)
|
|
|
|
|
# print(url_list)
|
|
|
|
|
return name,url_list
|
|
|
|
|
def get_content(html_url): #获取小说标题,内容
|
|
|
|
|
html_data = get_response(html_url).text
|
|
|
|
|
title = re.findall('<h1 class="wap_none">(.*?)</h1>',html_data)[0]
|
|
|
|
|
selector = etree.HTML(html_data)
|
|
|
|
|
content = '\n'.join(selector.xpath('//*[@id="chaptercontent"]/text()'))
|
|
|
|
|
return title,content
|
|
|
|
|
def save(name,title,content):#保存小说
|
|
|
|
|
#创建文件夹
|
|
|
|
|
file = f'{name}\\'
|
|
|
|
|
if not os.path.exists(file):
|
|
|
|
|
os.mkdir(file)
|
|
|
|
|
with open( file+ name + '.txt', mode='a', encoding='utf-8') as f:
|
|
|
|
|
f.write(title)
|
|
|
|
|
f.write('\n')
|
|
|
|
|
f.write(content)
|
|
|
|
|
f.write('\n')
|
|
|
|
|
def main(home_url,name):#主函数
|
|
|
|
|
# index_url = 'https://www.bqgui.cc'+ url
|
|
|
|
|
# index_url = open("wz.txt",'r',encoding='utf8').readlines
|
|
|
|
|
title,content = get_content(html_url=home_url)
|
|
|
|
|
save(name,title,content)
|
|
|
|
|
def queren():
|
|
|
|
|
f = open("wz.txt",'r',encoding='utf8')
|
|
|
|
|
url = f.read()
|
|
|
|
|
print(url)
|
|
|
|
|
|
|
|
|
|
# url='https://www.bqgui.cc/book/511/'
|
|
|
|
|
name,url_list = get_list_url(url)
|
|
|
|
|
exe = concurrent.futures.ThreadPoolExecutor(max_workers=7)
|
|
|
|
|
for url in url_list:
|
|
|
|
|
index_url = 'https://www.bqgui.cc'+ url
|
|
|
|
|
exe.submit(main,index_url,name)
|
|
|
|
|
exe.shutdown()
|
|
|
|
|
|
|
|
|
|
def window():
|
|
|
|
|
root = tk.Tk()
|
|
|
|
|
root.title("小说爬虫")
|
|
|
|
|
root.geometry("500x500+300+300")
|
|
|
|
|
|
|
|
|
|
text_table = tk.Label(root,text="请在https://www.bqgui.cc中找到想下载的书并在下方输入其网址")
|
|
|
|
|
text_table.pack()
|
|
|
|
|
|
|
|
|
|
e1= tk.Entry(root,width=60)
|
|
|
|
|
e1.pack()
|
|
|
|
|
def func():
|
|
|
|
|
global x
|
|
|
|
|
x=e1.get()
|
|
|
|
|
with open("wz.txt",'w+',encoding='utf8') as f:
|
|
|
|
|
f.writelines(x)
|
|
|
|
|
# print(x)
|
|
|
|
|
e1.delete(0,50)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tk.Button(root,text="确认",command=func).pack()
|
|
|
|
|
tk.Button(root,text="下载",command=queren).pack()
|
|
|
|
|
root.mainloop()
|
|
|
|
|
def huanyin():
|
|
|
|
|
root = tk.Tk()
|
|
|
|
|
root.title("小说爬虫")
|
|
|
|
|
root.geometry("500x500+300+300")
|
|
|
|
|
text1_table = tk.Label(root,text="欢迎使用小说爬虫")
|
|
|
|
|
text1_table.pack()
|
|
|
|
|
text2_table = tk.Label(root,text="班级:计科2105")
|
|
|
|
|
text2_table.pack()
|
|
|
|
|
text2_table = tk.Label(root,text="姓名:王文杰")
|
|
|
|
|
text2_table.pack()
|
|
|
|
|
text3_table = tk.Label(root,text="学号:21412030511")
|
|
|
|
|
text3_table.pack()
|
|
|
|
|
def fun2():
|
|
|
|
|
root.withdraw()
|
|
|
|
|
window()
|
|
|
|
|
root.destroy()
|
|
|
|
|
|
|
|
|
|
tk.Button(root,text="下一步",command=fun2).pack()
|
|
|
|
|
|
|
|
|
|
root.mainloop()
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
huanyin()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|