From 725a88b160b3e5a255eb7e5af740d1e444147b5b Mon Sep 17 00:00:00 2001 From: pmgjpsy8w <3070286745@qq.com> Date: Tue, 30 Apr 2024 18:18:06 +0800 Subject: [PATCH] ADD file via upload --- 小说.py | 141 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 小说.py diff --git a/小说.py b/小说.py new file mode 100644 index 0000000..ba9a36b --- /dev/null +++ b/小说.py @@ -0,0 +1,141 @@ +import requests#引入爬虫模块 +from selenium import webdriver#驱动器 +from selenium.webdriver.common.by import By#selenium元素定位 +from lxml import etree#用于数据分析 +import os#建立文件夹 +from time import sleep# + +import threading#多线程 +import tkinter as tk + +from tkinter import * + + +headers={ + 'User-Agent': + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0" + }#在请求头输入用户代理 + + + + + +def open_browser(): + + + xiaoshuo = entry.get() + url_bate='https://www.qu70.cc/'#基础网站url + res=webdriver.Firefox()#使用火狐驱动 + res.maximize_window()#页面最大化 + res.get(url_bate)#通过url获取网站主页面信息 + sleep(4)#休息两秒防止系统检测 + input = res.find_element(By.XPATH,'/html/body/div[4]/div[1]/div[2]/form/input[1]')#通过元素定位找到输入框 + input.send_keys(xiaoshuo)#在输入框中输入小说的结果 + search_nover(res,headers) + +def search_nover(res,headers): + + + + sousuo=res.find_element(By.CLASS_NAME,'btn') + sousuo.click()#点击搜索 + sleep(5)#休息5秒等页面加载完成后再进行后续操作 + dicts={} + + xiaoshuo_list=res.find_elements(By.XPATH,'/html/body/div[5]/div/div/div/div/div[2]/h4/a')#定位到既有标题也有url的元素 + for i in xiaoshuo_list: + dicts[i.text]=i.get_attribute('href')#将标题和url以键值对形式添加到字典中 + + print(fr'搜索出{len(xiaoshuo_list)}篇相关小说')#记录搜索结果的数目 + + dictss = { + }#建立字典存储小说章节和内容 + for key1,value1 in dicts.items(): + os.mkdir(fr'./{key1}')#每一个搜索结果分别建立文件夹 + re=requests.get(url=value1,headers=headers) + re.encoding='utf-8' + + tree=etree.HTML(re.text) + title=tree.xpath('/html/body//dd/a') + for i in title: + dictss[i.text]=str('https://www.qu70.cc'+i.xpath('@href')[0])#将章节和url组成字典 + for key2 ,value2 in dictss.items(): + + try: + print(key2,value2) + re2=requests.get(url=value2,headers=headers) + tree2=etree.HTML(re2.text) + text2=tree2.xpath('//div/div/div[@id="chaptercontent"]/text()')#得到的小说文本是字典类型 + print(text2) + for book in text2: + # print(book) + + with open(fr'./{key1}/{key2}.text',mode='a',encoding='utf-8') as f: + f.write(book) + print(fr'{key2}下载完成') + + + except requests.exceptions.InvalidURL: + continue#遇到无效url直接跳过 + + +def th1_start():#多线程 + thread1 = threading.Thread(name='thread1',target=open_browser) + thread1.start() + + + + + + +root =tk.Tk() +#窗口尺寸 +window_width = 600 +window_height = 400 +screen_width = root.winfo_screenwidth() +screen_height = root.winfo_screenheight() +x_coordinate = (screen_width / 2) - (window_width / 2) +y_coordinate = (screen_height / 2) - (window_height / 2) +root.geometry("%dx%d+%d+%d" % (window_width, window_height, x_coordinate, y_coordinate)) +root.title('小说下载器') +#框架 +f1 = tk.Frame(root,borderwidth=10) +f1.pack() + +#输入框 +label = tk.Label(f1,text='请输入您要下载的小说:') +label.pack(side=LEFT,padx=5,pady=10) + +entry = Entry(f1,width=50,state="normal") +entry.pack(side=LEFT) + +text = '点击输入' + + + + +entry.insert(0,text) +def enable_edit(event): + entry.config(state="normal") + entry.delete(0,tk.END) + +def disable_edit(event): + if not entry.get(): + entry.insert(0,text) + + entry.config(state="readonly") + + + +entry.bind("",enable_edit) + +entry.bind("",disable_edit) +entry.config(state="readonly") + +#搜索按钮 +search_btn = tk.Button(f1,text='搜索',command=th1_start) +search_btn.pack(side=LEFT,padx=5,pady=10) + +root.mainloop() + +