From c22c921cf83f2b33eb2b93c33606a0eebc0699be Mon Sep 17 00:00:00 2001
From: p26zockiw <1285381170@qq.com>
Date: Sun, 17 Mar 2024 20:53:58 +0800
Subject: [PATCH 1/2] =?UTF-8?q?=E8=A7=82=E5=AF=9F=E8=80=85=E6=A8=A1?=
 =?UTF-8?q?=E5=BC=8F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 基本结构/观察者模式/Observer.py | 77 ++++++++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 基本结构/观察者模式/Observer.py

diff --git a/基本结构/观察者模式/Observer.py b/基本结构/观察者模式/Observer.py
new file mode 100644
index 0000000..7d73002
--- /dev/null
+++ b/基本结构/观察者模式/Observer.py
@@ -0,0 +1,77 @@
+import os,re,string,operator
+from collections import Counter
+
+# TextProcessor 类负责处理文本并计算词频。当文本处理完成后，它会通过 notify 方法通知所有注册的观察者。
+# WordFrequencyObserver 类是一个具体的观察者，它实现了 update 方法来接收词频更新并打印前10个最常见的单词。
+class Subject:
+    def __init__(self):
+        self._observers = []
+    # 不能随意改变，所以肯定是私有
+    def attach(self, observer):
+        self._observers.append(observer)
+
+    def detach(self, observer):
+        self._observers.remove(observer)
+
+    def notify(self, word_freqs):
+        for observer in self._observers:
+            observer.update(word_freqs)
+# 关注，取消关注，通知有更新,Subject类是用来创建一个类，对订阅者（即观察者）列表进行维护
+
+class Observer:
+    def update(self, word_freqs):
+        pass
+# 定义一个抽象的Observer
+# 而下面的是一个具体的Observer类
+class WordFrequencyObserver(Observer):
+    def update(self, word_freqs):
+        print("词频已经被更新:")
+        self.print_word_freqs(word_freqs)
+
+    def print_word_freqs(self, word_freqs):
+        sorted_freqs = sorted(word_freqs.items(), key=operator.itemgetter(1), reverse=True)
+        for (w, c) in sorted_freqs[:10]:
+            print(f"{w}: {c}")
+
+# 对文本进行分析
+class TextProcessor:
+    def __init__(self, subject: Subject):
+#subject是Subject的子类，类型注解，单独写也可以
+        self._subject = subject
+        self._stop_words:str = set()
+#是一个集合（其实这里需要表明是str）
+    def load_stop_words(self, path_to_file):
+        with open(path_to_file, encoding='utf-8') as f:
+            self._stop_words = set(line.strip().lower() for line in f)
+
+    def process_text(self, path_to_file):
+        with open(path_to_file, encoding='utf-8') as f:
+            data = f.read()
+            word_list = self.re_split(data)
+            filtered_words = self.filter_words(word_list)
+            word_freqs = self.count_frequencies(filtered_words)
+            self._subject.notify(word_freqs)
+
+    def re_split(self, data):
+        pattern = re.compile('[\W_]+')
+        return pattern.sub(' ', data).lower().split()
+
+    def filter_words(self, word_list):
+        return [w for w in word_list if w not in self._stop_words and len(w) >= 3]
+
+    def count_frequencies(self, word_list):
+        return Counter(word_list)
+
+# 开始测试
+if __name__ == "__main__":
+    stopwordfilepath = r'C:\Users\asus\Desktop\cppy余悦批注\cppy\data\stop_words.txt'
+    testfilepath = r'C:\Users\asus\Desktop\cppy余悦批注\cppy\data\pride-and-prejudice.txt'
+
+    # 调用实例
+    subject = Subject()
+    observer = WordFrequencyObserver()
+    subject.attach(observer)
+
+    text_processor = TextProcessor(subject)
+    text_processor.load_stop_words(stopwordfilepath)
+    text_processor.process_text(testfilepath)
\ No newline at end of file
-- 
2.34.1


From a66617dcce53e610a9411fb7f7b8e09be9b51168 Mon Sep 17 00:00:00 2001
From: p26zockiw <1285381170@qq.com>
Date: Thu, 21 Mar 2024 17:25:18 +0800
Subject: [PATCH 2/2] ADD file via upload

---
 一盘大棋/A01修改.py | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 一盘大棋/A01修改.py

diff --git a/一盘大棋/A01修改.py b/一盘大棋/A01修改.py
new file mode 100644
index 0000000..d19a369
--- /dev/null
+++ b/一盘大棋/A01修改.py
@@ -0,0 +1,33 @@
+import re
+from collections import Counter
+import string
+from cppy.cp_util import stopwordfilepath,testfilepath
+
+# 读取停用词并创建一个集合以便快速查找
+stop_words = set()
+with open(stopwordfilepath, encoding='utf-8') as f:
+    for line in f:
+        stop_words.update(word.strip() for word in line.split(','))
+
+# 停用词集合中添加所有小写英文字母
+# 注意：这里我们不直接添加所有字母，而是在过滤时检查单词长度
+# 如果单词只包含一个字符，则视为字母，排除在外
+stop_words.update(set(string.ascii_lowercase))
+
+# 读取测试文件并计算单词频率
+with open(testfilepath, encoding='utf-8') as f:
+    # 使用正则表达式移除标点并分割单词，排除单个字符
+    words = re.findall(r'\b\w{2,}\b', f.read().lower())  # 只匹配至少两个字符的单词
+    # 过滤停用词并计数
+    word_freqs = Counter(word for word in words if word not in stop_words and len(word) > 1)
+
+# 获取出现频率最高的前10个单词
+most_common_words = word_freqs.most_common(10)
+
+# 打印结果
+for word, freq in most_common_words:
+    print(f'{word} - {freq}')
+
+# 修改逻辑：A01没有排除逗号的影响，同时一遍提取一边排序，资源占用大
+# 解决方案：引入re，将逗号去除。并且引入counter进行计数
+
-- 
2.34.1