From 57aa1718241236eda712b79fa3e1af89b81a8862 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E8=8D=A3=E6=9D=B0?= <2986694301@qq.com> Date: Tue, 17 Sep 2024 14:59:22 +0800 Subject: [PATCH 1/3] 1 --- 1.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 1.txt diff --git a/1.txt b/1.txt new file mode 100644 index 0000000..e69de29 From bb625c67b8fdc64110d2b5e9e74811f4e0f5315c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E8=8D=A3=E6=9D=B0?= <2986694301@qq.com> Date: Thu, 31 Oct 2024 11:37:00 +0800 Subject: [PATCH 2/3] =?UTF-8?q?=E8=AF=84=E8=AE=BA=E6=80=BB=E7=BB=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 练习二.py | 130 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 练习二.py diff --git a/练习二.py b/练习二.py new file mode 100644 index 0000000..79fa3e0 --- /dev/null +++ b/练习二.py @@ -0,0 +1,130 @@ +import os +import re +import pandas as pd +from collections import Counter +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.metrics.pairwise import cosine_similarity +import spacy +from spacy.lang.zh import Chinese + +# 设置文件路径 +sample_file = '样本.txt' +standard_file = '标准.txt' +output_file = '评论提取结果.txt' +excel_file = '评论提取结果.xlsx' + +# 加载中文模型 +nlp = Chinese() + + +def read_file(file_path): + """读取文件内容""" + with open(file_path, 'r', encoding='utf-8') as f: + return f.read() + + +def clean_text(text): + """清洗文本""" + # 去除特殊字符和多余空格 + text = re.sub(r'\W+', ' ', text) # 去除特殊字符 + text = re.sub(r'\s+', ' ', text) # 合并多余空格 + return text.strip() # 去掉首尾空格 + + +def extract_keywords_using_tfidf(text, top_n=10): + """使用TF-IDF提取关键词""" + vectorizer = TfidfVectorizer() + tfidf_matrix = vectorizer.fit_transform([text]) + feature_names = vectorizer.get_feature_names_out() + dense = tfidf_matrix.todense() + + # 获取关键词和对应的TF-IDF值 + tfidf_scores = dense.tolist()[0] + keywords = sorted(zip(feature_names, tfidf_scores), key=lambda x: x[1], reverse=True) + + return keywords[:top_n] + + +def get_high_frequency_words(text, num=20): + """获取高频词汇""" + words = text.split() + word_counts = Counter(words) + return word_counts.most_common(num) + + +def get_related_keywords(high_freq_words, standard_keywords): + """从高频词汇中提取与标准关键词相关的词""" + return [word for word in high_freq_words if word in standard_keywords] + + +def get_similar_sentences(text, keywords): + """获取所有匹配关键词的句子""" + sentences = re.split(r'[.!?,。]+', text) + sentences = [s.strip() for s in sentences if s.strip()] + + # 筛选包含主题关键词的句子 + relevant_sentences = [] + for s in sentences: + if any(keyword in s for keyword in keywords): + relevant_sentences.append(s) + + return Counter(relevant_sentences).most_common() + + +def main(): + sample_text = read_file(sample_file) + standard_text = read_file(standard_file) + + cleaned_sample_text = clean_text(sample_text) + cleaned_standard_text = clean_text(standard_text) + + # 从标准文本提取关键词 + standard_keywords_data = extract_keywords_using_tfidf(cleaned_standard_text, top_n=10) + standard_keywords = [word for word, score in standard_keywords_data] + print(f"标准文本中提取到的关键词: {standard_keywords}") + + # 从样本文本提取关键词 + high_freq_words_data = extract_keywords_using_tfidf(cleaned_sample_text, top_n=20) + high_freq_words = [word for word, score in high_freq_words_data] + print(f"样本文本中提取到的高频词汇: {high_freq_words}") + + # 从高频词汇中提取与标准关键词相关的词 + related_keywords = get_related_keywords(high_freq_words, standard_keywords) + print(f"与标准关键字相关的高频词汇:{related_keywords}") + + high_freq_sentences = get_similar_sentences(cleaned_sample_text, related_keywords) + + # 保存结果到文本文件 + with open(output_file, 'w', encoding='utf-8') as f: + f.write("标准关键词:\n") + for word, score in standard_keywords_data: + f.write(f"{word}: {score}\n") + + f.write("\n样本高频词汇:\n") + for word, score in high_freq_words_data: + f.write(f"{word}: {score}\n") + + f.write("\n相关高频句子:\n") + for sentence, freq in high_freq_sentences: + f.write(f"{sentence.strip()}: {freq}\n") + + # 生成 Excel 文件 + high_freq_df = pd.DataFrame(high_freq_words_data, columns=['词汇', 'TF-IDF值']) + related_keywords_df = pd.DataFrame(related_keywords, columns=['与标准关键词相关的词汇']) + + if os.path.exists(excel_file): + os.remove(excel_file) + + try: + with pd.ExcelWriter(excel_file) as writer: + high_freq_df.to_excel(writer, sheet_name='样本高频词汇', index=False) + pd.Series(related_keywords, name='相关高频词汇').to_frame().to_excel(writer, sheet_name='相关高频词汇', + index=False) + except PermissionError: + print(f"无法写入文件 '{excel_file}',请确保该文件未在其他程序中打开。") + except Exception as e: + print(f"文件写入过程中发生错误: {e}") + + +if __name__ == "__main__": + main() \ No newline at end of file From de667a2b17631865cb961c621d00b61c77a812b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E8=8D=A3=E6=9D=B0?= <2986694301@qq.com> Date: Thu, 31 Oct 2024 12:25:56 +0800 Subject: [PATCH 3/3] =?UTF-8?q?=E4=B8=BB=E5=87=BD=E6=95=B0=E4=B8=AD?= =?UTF-8?q?=E9=97=B4=E7=89=88=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 练习三.py | 275 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 275 insertions(+) create mode 100644 练习三.py diff --git a/练习三.py b/练习三.py new file mode 100644 index 0000000..551c4c9 --- /dev/null +++ b/练习三.py @@ -0,0 +1,275 @@ +import tkinter as tk +from tkinter import messagebox, ttk +import pymysql + + +class MasterChargeSystem(tk.Tk): + def __init__(self): + super().__init__() + self.login_frame = None + self.register_frame = None + self.comment_frame = None + self.create_login_window() + + def register_database(self, username, password): + with pymysql.connect(host='localhost', user='root', password='123456', database='mynewdb') as connection: + cursor = connection.cursor() + sql = "INSERT INTO user1(username, password) VALUES (%s, %s)" + cursor.execute(sql, (username, password)) + connection.commit() + + def login(self): + username = self.username_entry.get().strip() + password = self.password_entry.get().strip() + + with pymysql.connect(host='localhost', user='root', password='123456', database='mynewdb') as connection: + cursor = connection.cursor() + sql = "SELECT password FROM user1 WHERE username = %s" + cursor.execute(sql, (username,)) + result = cursor.fetchone() + + if result: + stored_password = result[0] + if stored_password == password: + messagebox.showinfo("登录", "登陆成功") + self.show_main_window() + else: + messagebox.showerror("登录", "用户名或密码错误") + else: + messagebox.showerror("登录", "用户名不存在") + + def register(self): + username = self.username_entry.get().strip() + password = self.password_entry.get().strip() + confirm_password = self.confirm_password_entry.get().strip() + + if username and password: + if password == confirm_password: + self.register_database(username, password) + messagebox.showinfo("注册", "注册成功") + self.create_login_window() + else: + messagebox.showerror("注册", "两次输入的密码不匹配") + else: + messagebox.showerror("注册", "用户名或密码为空") + + def show_main_window(self): + if self.login_frame: + self.login_frame.pack_forget() + if self.comment_frame: + self.comment_frame.pack_forget() + + self.title("考研信息系统") + self.attributes('-fullscreen', True) + + self.main_frame = tk.Frame(self) + self.main_frame.pack(fill=tk.BOTH, expand=True, padx=50, pady=50) + + main_label = tk.Label(self.main_frame, text="欢迎来到考研信息系统!") + main_label.pack(pady=10) + + # 添加搜索框 + search_frame = tk.Frame(self.main_frame) + search_frame.pack(pady=10) + + search_label = tk.Label(search_frame, text="搜索:") + search_label.pack(side=tk.LEFT) + + self.search1_entry = tk.Entry(search_frame) + self.search1_entry.pack(side=tk.LEFT, padx=5) + + self.search2_entry = ttk.Combobox(search_frame, values=['1', '2']) + self.search2_entry.pack(side=tk.LEFT, padx=5) + affirm_button = tk.Button(search_frame, text="确认", command=self.search_data) + affirm_button.pack(side=tk.RIGHT) + + try: + with pymysql.connect(host='localhost', user='root', password='123456', database='mynewdb') as connection: + cursor = connection.cursor() + cursor.execute("SELECT * FROM universities_beijing") + + # 创建一个用于 Treeview 和滚动条的框架 + tree_frame = tk.Frame(self.main_frame) + tree_frame.pack(fill=tk.BOTH, expand=True) + + self.tree = ttk.Treeview(tree_frame) + self.vertical_scrollbar = ttk.Scrollbar(tree_frame, orient=tk.VERTICAL, command=self.tree.yview) + self.tree.configure(yscrollcommand=self.vertical_scrollbar.set) + + # 获取列名 + cursor.execute("SELECT * FROM universities_beijing LIMIT 0") + self.columns = [description[0] for description in cursor.description] + + # 获取数据 + cursor.execute("SELECT * FROM universities_beijing") + self.data = cursor.fetchall() + + # 设置列 + self.tree['columns'] = self.columns + self.tree.column("#0", width=0, stretch=tk.NO) # 隐藏第一个空列 + + # 动态设置列和列名 + for column in self.columns: + self.tree.column(column, anchor=tk.W, width=100, stretch=tk.YES) + self.tree.heading(column, text=column, anchor=tk.W) + + self.tree.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) + self.vertical_scrollbar.pack(side=tk.RIGHT, fill=tk.Y) + + # 插入数据 + for row in self.data: + self.tree.insert("", tk.END, values=row) + + # 绑定点击事件 + self.tree.bind("", self.show_university_details) + + except pymysql.Error as err: + messagebox.showerror("数据库错误", str(err)) + + exit_button = tk.Button(self.main_frame, text="退出", command=self.quit) + exit_button.pack(pady=10) + + comment_button = tk.Button(self.main_frame, text="评论", command=self.comment) + comment_button.pack(pady=10) + + def show_university_details(self, event): + selected_item = self.tree.selection()[0] # 获取选中的行 + university_data = self.tree.item(selected_item, 'values') # 获取行数据 + + if university_data: + self.detail_window = tk.Toplevel(self) # 创建新窗口 + self.detail_window.title("大学详细信息") + self.detail_window.geometry("400x300") + + # 显示大学的其他信息 + detail_label = tk.Label(self.detail_window, text=f"大学名称: {university_data[0]}") # Assuming first column is name + detail_label.pack(pady=10) + + additional_info = f"其他信息:\n" # Here you can add more info as needed. + additional_info_label = tk.Label(self.detail_window, text=additional_info) + additional_info_label.pack(pady=10) + + # 添加评论框 + comment_label = tk.Label(self.detail_window, text="添加评论:") + comment_label.pack(pady=10) + + self.comment_entry = tk.Entry(self.detail_window) + self.comment_entry.pack(pady=5) + + submit_comment_button = tk.Button(self.detail_window, text="提交评论", command=lambda: self.submit_comment(university_data[0])) + submit_comment_button.pack(pady=10) + + self.detail_window.protocol("WM_DELETE_WINDOW", self.detail_window.destroy) + + def submit_comment(self, university_name): + comment = self.comment_entry.get() + if comment: + # Store comment in the database (add your SQL query here) + with pymysql.connect(host='localhost', user='root', password='123456', database='mynewdb') as connection: + cursor = connection.cursor() + sql = "INSERT INTO comments (university_name, comment) VALUES (%s, %s)" + cursor.execute(sql, (university_name, comment)) + connection.commit() + + messagebox.showinfo("评论", "评论提交成功") + self.comment_entry.delete(0, tk.END) # 清空输入框 + else: + messagebox.showerror("评论", "评论不能为空") + + def comment(self): + if self.main_frame: + self.main_frame.pack_forget() + + self.title("评论界面") + self.attributes('-fullscreen', True) + + self.comment_frame = tk.Frame(self) + self.comment_frame.pack(fill=tk.BOTH, expand=True, padx=50, pady=50) + + self.back_button = tk.Button(self.comment_frame, text="返回", command=self.show_main_window) + self.back_button.pack(pady=10) + + def search_data(self): + search_term = self.search1_entry.get() # Fixed to use the correct entry + + # 清空当前Treeview + self.tree.delete(*self.tree.get_children()) + + # 重新插入符合搜索条件的数据 + for row in self.data: + if any(search_term in str(value).lower() for value in row): + self.tree.insert("", tk.END, values=row) + + def show_register_window(self): + if self.login_frame: + self.login_frame.pack_forget() + self.title("用户注册界面") + sw = self.winfo_screenwidth() + sh = self.winfo_screenheight() + Width = 400 + Height = 300 + cen_x = (sw - Width) / 2 + cen_y = (sh - Height) / 2 + self.geometry('%dx%d+%d+%d' % (Width, Height, cen_x, cen_y)) + + self.register_frame = tk.Frame(self) + self.register_frame.pack(padx=20, pady=20) + + self.username_label = tk.Label(self.register_frame, text="用户名:") + self.username_label.grid(row=0, column=0) + self.username_entry = tk.Entry(self.register_frame) + self.username_entry.grid(row=0, column=1) + + self.password_label = tk.Label(self.register_frame, text="密码:") + self.password_label.grid(row=1, column=0) + self.password_entry = tk.Entry(self.register_frame, show="*") + self.password_entry.grid(row=1, column=1) + + self.confirm_password_label = tk.Label(self.register_frame, text="确认密码:") + self.confirm_password_label.grid(row=2, column=0) + self.confirm_password_entry = tk.Entry(self.register_frame, show="*") + self.confirm_password_entry.grid(row=2, column=1) + + self.register_button = tk.Button(self.register_frame, text="注册", command=self.register) + self.register_button.grid(row=3, columnspan=2, pady=10) + + self.back_to_login_button = tk.Button(self.register_frame, text="返回登录", command=self.create_login_window) + self.back_to_login_button.grid(row=4, columnspan=2) + + def create_login_window(self): + if self.register_frame: + self.register_frame.pack_forget() + if self.comment_frame: + self.comment_frame.pack_forget() + + self.title("用户登录界面") + sw = self.winfo_screenwidth() + sh = self.winfo_screenheight() + Width = 400 + Height = 300 + cen_x = (sw - Width) / 2 + cen_y = (sh - Height) / 2 + self.geometry('%dx%d+%d+%d' % (Width, Height, cen_x, cen_y)) + + self.login_frame = tk.Frame(self) + self.login_frame.pack(padx=20, pady=20) + + self.username_label = tk.Label(self.login_frame, text="用户名:") + self.username_label.grid(row=0, column=0) + self.username_entry = tk.Entry(self.login_frame) + self.username_entry.grid(row=0, column=1) + + self.password_label = tk.Label(self.login_frame, text="密码:") + self.password_label.grid(row=1, column=0) + self.password_entry = tk.Entry(self.login_frame, show="*") + self.password_entry.grid(row=1, column=1) + + self.login_button = tk.Button(self.login_frame, text="登录", command=self.login) + self.login_button.grid(row=2, columnspan=2, pady=10) + + self.register_button = tk.Button(self.login_frame, text="注册", command=self.show_register_window) + self.register_button.grid(row=3, columnspan=2) + +if __name__ == "__main__": + app = MasterChargeSystem() + app.mainloop()