|
|
@ -2,14 +2,15 @@ import requests
|
|
|
|
import re
|
|
|
|
import re
|
|
|
|
import tkinter as tk
|
|
|
|
import tkinter as tk
|
|
|
|
from tkinter import Toplevel
|
|
|
|
from tkinter import Toplevel
|
|
|
|
import mysql
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
headers = {
|
|
|
|
headers = {
|
|
|
|
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 SLBrowser/9.0.3.1311 SLBChan/11"
|
|
|
|
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 SLBrowser/9.0.3.1311 SLBChan/11"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def show_data_window():
|
|
|
|
def show_data_window():
|
|
|
|
def fetch_data():
|
|
|
|
def fetch_data():
|
|
|
|
urls = [f"https://www.gushiwen.cn/default_{i}.aspx" for i in range(1, 3)]
|
|
|
|
urls = [f"https://www.gushiwen.cn/default_{i}.aspx" for i in range(1, 5)]
|
|
|
|
gushici = []
|
|
|
|
gushici = []
|
|
|
|
for url in urls:
|
|
|
|
for url in urls:
|
|
|
|
response = requests.get(url, headers=headers)
|
|
|
|
response = requests.get(url, headers=headers)
|
|
|
@ -23,29 +24,26 @@ def show_data_window():
|
|
|
|
for title, author, dynasty, poetic in zip(titles, authors, dynastys, new_poetics):
|
|
|
|
for title, author, dynasty, poetic in zip(titles, authors, dynastys, new_poetics):
|
|
|
|
gushici.append({"title": title, "author": author, "dynasty": dynasty, "poetic": poetic})
|
|
|
|
gushici.append({"title": title, "author": author, "dynasty": dynasty, "poetic": poetic})
|
|
|
|
|
|
|
|
|
|
|
|
# 保存到TXT文件
|
|
|
|
|
|
|
|
with open("gushi.txt", "w", encoding="utf-8") as file:
|
|
|
|
with open("gushi.txt", "w", encoding="utf-8") as file:
|
|
|
|
for item in gushici:
|
|
|
|
for item in gushici:
|
|
|
|
file.write(f"标题: {item['title']}, 作者: {item['author']}, 朝代: {item['dynasty']}, 内容: {item['poetic']}\n")
|
|
|
|
file.write(f"标题: {item['title']}, 作者: {item['author']}, 朝代: {item['dynasty']}, 内容: {item['poetic']}\n")
|
|
|
|
|
|
|
|
|
|
|
|
# 在新窗口显示数据
|
|
|
|
# 在新窗口显示数据
|
|
|
|
show_data_window(gushici)
|
|
|
|
show_data_window(gushici)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def show_data_window(data):
|
|
|
|
def show_data_window(data):
|
|
|
|
window = Toplevel(root)
|
|
|
|
window = Toplevel(root)
|
|
|
|
window.title("古诗词信息")
|
|
|
|
window.title("古诗词信息")
|
|
|
|
text_widget = tk.Text(window)
|
|
|
|
text_widget = tk.Text(window)
|
|
|
|
text_widget.pack(expand=True, fill='both')
|
|
|
|
text_widget.pack(expand=True, fill='both')
|
|
|
|
|
|
|
|
|
|
|
|
for item in data[:20]: # 仅显示前10条数据作为示例
|
|
|
|
for item in data[:10]: # 仅显示前10条数据作为示例
|
|
|
|
text_widget.insert(tk.END, f"标题: {item['title']}\n作者: {item['author']}\n朝代: {item['dynasty']}\n内容: {item['poetic']}\n\n")
|
|
|
|
text_widget.insert(tk.END, f"标题: {item['title']}\n作者: {item['author']}\n朝代: {item['dynasty']}\n内容: {item['poetic']}\n\n")
|
|
|
|
|
|
|
|
|
|
|
|
window.mainloop()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
window.mainloop()
|
|
|
|
|
|
|
|
|
|
|
|
def start_mysql():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
mysql.start_mysql_process()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
root = tk.Tk()
|
|
|
|
root = tk.Tk()
|
|
|
|
root.title("古诗词爬虫")
|
|
|
|
root.title("古诗词爬虫")
|
|
|
@ -53,6 +51,5 @@ def show_data_window():
|
|
|
|
fetch_button = tk.Button(root, text="开始爬取", command=fetch_data)
|
|
|
|
fetch_button = tk.Button(root, text="开始爬取", command=fetch_data)
|
|
|
|
fetch_button.pack(pady=20)
|
|
|
|
fetch_button.pack(pady=20)
|
|
|
|
|
|
|
|
|
|
|
|
mysql_button = tk.Button(root, text="跳转到mysql.py", command=start_mysql)
|
|
|
|
|
|
|
|
mysql_button.pack(pady=10) # 在"开始爬取"按钮下面添加新按钮
|
|
|
|
|
|
|
|
root.mainloop()
|
|
|
|
root.mainloop()
|
|
|
|
|
|
|
|
|
|
|
|