ADD file via upload

1 year ago · 725a88b160
parent f239ebab98
commit 725a88b160
1 changed files with 141 additions and 0 deletions
--- a/小说.py
+++ b/小说.py
@ -0,0 +1,141 @@
+import requests#引入爬虫模块
+from selenium import webdriver#驱动器
+from selenium.webdriver.common.by import By#selenium元素定位
+from lxml import etree#用于数据分析
+import os#建立文件夹
+from time import sleep#
+
+import threading#多线程
+import tkinter as tk
+
+from tkinter import *
+
+
+headers={
+    'User-Agent':
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0"
+    }#在请求头输入用户代理
+
+
+
+
+
+def open_browser():
+
+
+    xiaoshuo = entry.get()
+    url_bate='https://www.qu70.cc/'#基础网站url
+    res=webdriver.Firefox()#使用火狐驱动
+    res.maximize_window()#页面最大化
+    res.get(url_bate)#通过url获取网站主页面信息
+    sleep(4)#休息两秒防止系统检测
+    input = res.find_element(By.XPATH,'/html/body/div[4]/div[1]/div[2]/form/input[1]')#通过元素定位找到输入框
+    input.send_keys(xiaoshuo)#在输入框中输入小说的结果
+    search_nover(res,headers)
+
+def search_nover(res,headers):
+
+
+
+    sousuo=res.find_element(By.CLASS_NAME,'btn')
+    sousuo.click()#点击搜索
+    sleep(5)#休息5秒等页面加载完成后再进行后续操作
+    dicts={}
+
+    xiaoshuo_list=res.find_elements(By.XPATH,'/html/body/div[5]/div/div/div/div/div[2]/h4/a')#定位到既有标题也有url的元素
+    for i in xiaoshuo_list:
+        dicts[i.text]=i.get_attribute('href')#将标题和url以键值对形式添加到字典中
+
+    print(fr'搜索出{len(xiaoshuo_list)}篇相关小说')#记录搜索结果的数目
+
+    dictss = {
+    }#建立字典存储小说章节和内容
+    for key1,value1 in dicts.items():
+        os.mkdir(fr'./{key1}')#每一个搜索结果分别建立文件夹
+        re=requests.get(url=value1,headers=headers)
+        re.encoding='utf-8'
+
+        tree=etree.HTML(re.text)
+        title=tree.xpath('/html/body//dd/a')
+        for i in title:
+            dictss[i.text]=str('https://www.qu70.cc'+i.xpath('@href')[0])#将章节和url组成字典
+        for key2 ,value2 in dictss.items():
+
+            try:
+                print(key2,value2)
+                re2=requests.get(url=value2,headers=headers)
+                tree2=etree.HTML(re2.text)
+                text2=tree2.xpath('//div/div/div[@id="chaptercontent"]/text()')#得到的小说文本是字典类型
+                print(text2)
+                for book in text2:
+                #    print(book)
+
+                    with open(fr'./{key1}/{key2}.text',mode='a',encoding='utf-8') as f:
+                        f.write(book)
+                print(fr'{key2}下载完成')
+
+
+            except requests.exceptions.InvalidURL:
+                continue#遇到无效url直接跳过
+
+
+def th1_start():#多线程
+    thread1 = threading.Thread(name='thread1',target=open_browser)
+    thread1.start()
+
+
+
+
+
+
+root =tk.Tk()
+#窗口尺寸
+window_width = 600
+window_height = 400
+screen_width = root.winfo_screenwidth()
+screen_height = root.winfo_screenheight()
+x_coordinate = (screen_width / 2) - (window_width / 2)
+y_coordinate = (screen_height / 2) - (window_height / 2)
+root.geometry("%dx%d+%d+%d" % (window_width, window_height, x_coordinate, y_coordinate))
+root.title('小说下载器')
+#框架
+f1 = tk.Frame(root,borderwidth=10)
+f1.pack()
+
+#输入框
+label = tk.Label(f1,text='请输入您要下载的小说：')
+label.pack(side=LEFT,padx=5,pady=10)
+
+entry = Entry(f1,width=50,state="normal")
+entry.pack(side=LEFT)
+
+text = '点击输入'
+
+
+
+
+entry.insert(0,text)
+def enable_edit(event):
+    entry.config(state="normal")
+    entry.delete(0,tk.END)
+
+def disable_edit(event):
+    if not entry.get():
+        entry.insert(0,text)
+
+    entry.config(state="readonly")
+
+
+
+entry.bind("<FocusIn>",enable_edit)
+
+entry.bind("<FocusOut>",disable_edit)
+entry.config(state="readonly")
+
+#搜索按钮
+search_btn = tk.Button(f1,text='搜索',command=th1_start)
+search_btn.pack(side=LEFT,padx=5,pady=10)
+
+root.mainloop()
+
+