import requests import re import tkinter as tk from tkinter import Toplevel headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 SLBrowser/9.0.3.1311 SLBChan/11" } def shicigefu(): def fetch_data(): urls = [f"https://www.gushiwen.cn/default_{i}.aspx" for i in range(1, 5)] gushici = [] for url in urls: response = requests.get(url, headers=headers) content = response.text titles = re.findall('(.*?)', content, re.DOTALL) authors = re.findall('

.*?(.*?)', content, re.DOTALL) dynastys = re.findall('

.*?(.*?)', content, re.DOTALL) poetics = re.findall('

(.*?)
', content, re.DOTALL) new_poetics = [''.join(re.split('<.*?>|<.*? />', p)).strip() for p in poetics] for title, author, dynasty, poetic in zip(titles, authors, dynastys, new_poetics): gushici.append({"title": title, "author": author, "dynasty": dynasty, "poetic": poetic}) return gushici def is_data_unique(new_data, existing_data): """检查新数据是否已存在于现有数据中""" for new_item in new_data: if any(new_item['title'] == item['title'] and new_item['author'] == item['author'] and new_item['dynasty'] == item['dynasty'] and new_item['poetic'] == item['poetic'] for item in existing_data): return False return True def read_existing_data_from_file(filename="gushi.txt"): """从文件中读取现有数据""" existing_data = [] try: with open(filename, "r", encoding="utf-8") as file: lines = file.readlines() for line in lines: parts = line.strip().split(', ') if len(parts) == 4: # 确保每一行数据格式正确 existing_data.append({ 'title': parts[0].split(': ')[1], 'author': parts[1].split(': ')[1], 'dynasty': parts[2].split(': ')[1], 'poetic': parts[3].split(': ')[1] }) except FileNotFoundError: pass # 文件不存在则忽略 return existing_data def save_data_to_file(data, filename="gushi.txt"): """将去重后的新数据保存到文件""" existing_data = read_existing_data_from_file(filename) if is_data_unique(data, existing_data): with open(filename, "a", encoding="utf-8") as file: for item in data: file.write(f"标题: {item['title']}, 作者: {item['author']}, 朝代: {item['dynasty']}, 内容: {item['poetic']}\n") def show_data_window(data): window = Toplevel(root) window.title("古诗词信息") text_widget = tk.Text(window) text_widget.pack(expand=True, fill='both') for item in data[:10]: text_widget.insert(tk.END, f"标题: {item['title']}\n作者: {item['author']}\n朝代: {item['dynasty']}\n内容: {item['poetic']}\n\n") window.mainloop() def fetch_and_save(): gushici_data = fetch_data() save_data_to_file(gushici_data) show_data_window(gushici_data) # 现在这里应该能够正确引用到外部的show_data_window函数 def save_button_action(): gushici_data = fetch_data() save_data_to_file(gushici_data) root = tk.Tk() root.title("古诗词爬虫") fetch_button = tk.Button(root, text="开始爬取并显示", command=fetch_and_save) fetch_button.pack(pady=20) save_button = tk.Button(root, text="仅储存新数据", command=save_button_action) save_button.pack(pady=10) root.mainloop()