parent
f239ebab98
commit
725a88b160
@ -0,0 +1,141 @@
|
||||
import requests#引入爬虫模块
|
||||
from selenium import webdriver#驱动器
|
||||
from selenium.webdriver.common.by import By#selenium元素定位
|
||||
from lxml import etree#用于数据分析
|
||||
import os#建立文件夹
|
||||
from time import sleep#
|
||||
|
||||
import threading#多线程
|
||||
import tkinter as tk
|
||||
|
||||
from tkinter import *
|
||||
|
||||
|
||||
headers={
|
||||
'User-Agent':
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0"
|
||||
}#在请求头输入用户代理
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def open_browser():
|
||||
|
||||
|
||||
xiaoshuo = entry.get()
|
||||
url_bate='https://www.qu70.cc/'#基础网站url
|
||||
res=webdriver.Firefox()#使用火狐驱动
|
||||
res.maximize_window()#页面最大化
|
||||
res.get(url_bate)#通过url获取网站主页面信息
|
||||
sleep(4)#休息两秒防止系统检测
|
||||
input = res.find_element(By.XPATH,'/html/body/div[4]/div[1]/div[2]/form/input[1]')#通过元素定位找到输入框
|
||||
input.send_keys(xiaoshuo)#在输入框中输入小说的结果
|
||||
search_nover(res,headers)
|
||||
|
||||
def search_nover(res,headers):
|
||||
|
||||
|
||||
|
||||
sousuo=res.find_element(By.CLASS_NAME,'btn')
|
||||
sousuo.click()#点击搜索
|
||||
sleep(5)#休息5秒等页面加载完成后再进行后续操作
|
||||
dicts={}
|
||||
|
||||
xiaoshuo_list=res.find_elements(By.XPATH,'/html/body/div[5]/div/div/div/div/div[2]/h4/a')#定位到既有标题也有url的元素
|
||||
for i in xiaoshuo_list:
|
||||
dicts[i.text]=i.get_attribute('href')#将标题和url以键值对形式添加到字典中
|
||||
|
||||
print(fr'搜索出{len(xiaoshuo_list)}篇相关小说')#记录搜索结果的数目
|
||||
|
||||
dictss = {
|
||||
}#建立字典存储小说章节和内容
|
||||
for key1,value1 in dicts.items():
|
||||
os.mkdir(fr'./{key1}')#每一个搜索结果分别建立文件夹
|
||||
re=requests.get(url=value1,headers=headers)
|
||||
re.encoding='utf-8'
|
||||
|
||||
tree=etree.HTML(re.text)
|
||||
title=tree.xpath('/html/body//dd/a')
|
||||
for i in title:
|
||||
dictss[i.text]=str('https://www.qu70.cc'+i.xpath('@href')[0])#将章节和url组成字典
|
||||
for key2 ,value2 in dictss.items():
|
||||
|
||||
try:
|
||||
print(key2,value2)
|
||||
re2=requests.get(url=value2,headers=headers)
|
||||
tree2=etree.HTML(re2.text)
|
||||
text2=tree2.xpath('//div/div/div[@id="chaptercontent"]/text()')#得到的小说文本是字典类型
|
||||
print(text2)
|
||||
for book in text2:
|
||||
# print(book)
|
||||
|
||||
with open(fr'./{key1}/{key2}.text',mode='a',encoding='utf-8') as f:
|
||||
f.write(book)
|
||||
print(fr'{key2}下载完成')
|
||||
|
||||
|
||||
except requests.exceptions.InvalidURL:
|
||||
continue#遇到无效url直接跳过
|
||||
|
||||
|
||||
def th1_start():#多线程
|
||||
thread1 = threading.Thread(name='thread1',target=open_browser)
|
||||
thread1.start()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
root =tk.Tk()
|
||||
#窗口尺寸
|
||||
window_width = 600
|
||||
window_height = 400
|
||||
screen_width = root.winfo_screenwidth()
|
||||
screen_height = root.winfo_screenheight()
|
||||
x_coordinate = (screen_width / 2) - (window_width / 2)
|
||||
y_coordinate = (screen_height / 2) - (window_height / 2)
|
||||
root.geometry("%dx%d+%d+%d" % (window_width, window_height, x_coordinate, y_coordinate))
|
||||
root.title('小说下载器')
|
||||
#框架
|
||||
f1 = tk.Frame(root,borderwidth=10)
|
||||
f1.pack()
|
||||
|
||||
#输入框
|
||||
label = tk.Label(f1,text='请输入您要下载的小说:')
|
||||
label.pack(side=LEFT,padx=5,pady=10)
|
||||
|
||||
entry = Entry(f1,width=50,state="normal")
|
||||
entry.pack(side=LEFT)
|
||||
|
||||
text = '点击输入'
|
||||
|
||||
|
||||
|
||||
|
||||
entry.insert(0,text)
|
||||
def enable_edit(event):
|
||||
entry.config(state="normal")
|
||||
entry.delete(0,tk.END)
|
||||
|
||||
def disable_edit(event):
|
||||
if not entry.get():
|
||||
entry.insert(0,text)
|
||||
|
||||
entry.config(state="readonly")
|
||||
|
||||
|
||||
|
||||
entry.bind("<FocusIn>",enable_edit)
|
||||
|
||||
entry.bind("<FocusOut>",disable_edit)
|
||||
entry.config(state="readonly")
|
||||
|
||||
#搜索按钮
|
||||
search_btn = tk.Button(f1,text='搜索',command=th1_start)
|
||||
search_btn.pack(side=LEFT,padx=5,pady=10)
|
||||
|
||||
root.mainloop()
|
||||
|
||||
|
Loading…
Reference in new issue