t02

1 year ago · 2bfeabe429
parent e5dc492333
commit 2bfeabe429
27 changed files with 722 additions and 0 deletions
--- a/交互/MVC/tf-33.py
+++ b/交互/MVC/tf-33.py
@ -0,0 +1,45 @@
+import sys, collections
+from cppy.cp_util import *
+
+class WordFrequenciesModel:
+    """ 模型：数据 """
+    def __init__(self, path_to_file):
+        self.update(path_to_file)
+
+    def update(self, path_to_file):
+        try:            
+            self.freqs = collections.Counter( extract_file_words(path_to_file) )
+        except IOError:
+            print("File not found")
+            self.freqs = {}
+
+
+class WordFrequenciesView:
+    """ 视图：数据展现 """
+    def __init__(self, model):
+        self._model = model
+
+    def render(self):
+        sorted_freqs = sort_dict(self._model.freqs) 
+        print_word_freqs(sorted_freqs)
+
+
+class WordFrequencyController:
+    """ 控制：操作逻辑 """
+    def __init__(self, model, view):
+        self._model, self._view = model, view
+        view.render()
+
+    def run(self):
+        while True:  
+            print("Enter the file path (or 'q' to quit): ", file=sys.stderr, flush=True)  
+            filename = sys.stdin.readline().strip()  
+            if filename.lower() == 'q':  break  
+            self._model.update(filename)  
+            self._view.render()                  
+
+
+m = WordFrequenciesModel( testfilepath )
+v = WordFrequenciesView(m)
+c = WordFrequencyController(m, v)
+c.run()
--- a/交互/桌面/test.txt
+++ b/交互/桌面/test.txt
@ -0,0 +1,2 @@
+" my Some sure acquaintance or other, my dear, sure,other  I suppose; I am sure I do not
+know. sure "
--- a/交互/桌面/tf-98.py
+++ b/交互/桌面/tf-98.py
@ -0,0 +1,65 @@
+import sys
+from PyQt5.QtWidgets import QApplication, QWidget, QPushButton, QVBoxLayout, QTextEdit, QFileDialog
+import cppy.cp_util as util
+
+# 工具函数
+def extract_words(path_to_file):    
+    return util.extract_words(path_to_file)
+
+def frequencies(word_list):
+    return util.get_frequencies(word_list)
+
+def sort(word_freq):
+    return util.sort_dict(word_freq)
+
+class MenuApp(QWidget):
+    def __init__(self):
+        super().__init__()
+        self.initUI()
+
+    def initUI(self):
+        self.setWindowTitle('终端菜单')
+        self.setGeometry(100, 100, 400, 300)
+
+        # 创建按钮
+        self.openFileBtn = QPushButton('上传并打开文件', self)
+        self.continueBtn = QPushButton('继续', self)
+        self.exitBtn = QPushButton('退出', self)
+
+        # 创建文本编辑框
+        self.textEdit = QTextEdit(self)
+        self.textEdit.setReadOnly(True)
+
+        # 布局
+        layout = QVBoxLayout()
+        layout.addWidget(self.openFileBtn)
+        layout.addWidget(self.continueBtn)
+        layout.addWidget(self.exitBtn)
+        layout.addWidget(self.textEdit)
+
+        self.setLayout(layout)
+
+        # 信号连接到槽
+        self.openFileBtn.clicked.connect(self.openFile)
+        self.continueBtn.clicked.connect(self.clearText)
+        self.exitBtn.clicked.connect(self.close)
+
+    def openFile(self):
+        options = QFileDialog.Options()
+        fileName, _ = QFileDialog.getOpenFileName(self, "上传并打开文件", "", "All Files (*);;Text Files (*.txt)", options=options)
+        word_freqs = sort( frequencies(extract_words( fileName )) )    
+        s = ''
+        for (w, c) in word_freqs[ :10 ]:
+            s = s +  w + '-' + str(c)+'\n'
+        self.textEdit.setText(s)
+        
+
+    def clearText(self):
+        self.textEdit.clear()
+
+
+if __name__ == '__main__':
+    app = QApplication(sys.argv)
+    ex = MenuApp()
+    ex.show()
+    sys.exit(app.exec_())
--- a/交互/终端/test.txt
+++ b/交互/终端/test.txt
@ -0,0 +1,2 @@
+" my Some sure acquaintance or other, my dear, sure,other  I suppose; I am sure I do not
+know. sure "
--- a/交互/终端/tf-97.py
+++ b/交互/终端/tf-97.py
@ -0,0 +1,42 @@
+import os
+import cppy.cp_util as util
+
+# 工具函数
+def extract_words(path_to_file):    
+    return util.extract_words(path_to_file)
+
+def frequencies(word_list):
+    return util.get_frequencies(word_list)
+
+def sort(word_freq):
+    return util.sort_dict(word_freq)
+
+def print_menu():
+    print("\n菜单选项：")
+    print("1. 上传并处理文件")
+    print("2. 继续")
+    print("3. 退出")
+
+def open_and_print_file():
+    filename = input("请输入文件名：")
+    word_freqs = sort( frequencies(extract_words( filename )) )    
+    util.print_word_freqs(word_freqs)
+
+def main():
+    while True:
+        print_menu()
+        choice = input("请选择一个选项（1/2/3）：")
+        
+        if choice == '1':
+            open_and_print_file()
+        elif choice == '2':
+            continue
+        elif choice == '3':
+            print("退出程序。")
+            break
+        else:
+            print("无效的输入，请重新输入。")
+
+
+if __name__ == "__main__":
+    main()
--- a/基本结构/021
+++ b/基本结构/021
@ -0,0 +1,49 @@
+import string
+from collections import Counter
+from cppy.cp_util import *
+
+# data
+data = []
+words = []
+word_freqs = []
+
+################################
+# procedures
+################################
+def read_file(path_to_file):
+    global data
+    with open(path_to_file,encoding='utf-8') as f:
+        data = data + list(f.read())
+
+def filter_chars_and_normalize():    
+    global data
+    global words
+    for i in range(len(data)):                
+        data[i] = ' ' if not data[i].isalnum() else data[i].lower()        
+
+    data_str = ''.join(data)
+    words = words + data_str.split()
+
+    with open(stopwordfilepath) as f:
+        stop_words = set(f.read().split(','))    
+    stop_words.update(string.ascii_lowercase)
+    words = [word for word in words if word not in stop_words]
+
+def frequencies():    
+    global words
+    global word_freqs
+    word_freqs.extend([(word, 1) for word in words])
+
+def sort():    
+    global word_freqs    
+    word_freqs = Counter(words).most_common()
+
+
+if __name__ == "__main__":
+    read_file( testfilepath )
+    filter_chars_and_normalize()    
+    frequencies()
+    sort()
+
+    for tf in word_freqs[:10]:
+        print(tf[0], '-', tf[1])
--- a/基本结构/021
+++ b/基本结构/021
@ -0,0 +1,32 @@
+import re
+from cppy.cp_util import *
+
+
+def filter_chars_and_normalize(str_data):
+    pattern = re.compile('[\W_]+')
+    word_list = pattern.sub(' ', str_data).lower().split()
+    stop_words = get_stopwords()    
+    return [w for w in word_list if not w in stop_words]
+
+
+def frequencies(word_list):    
+    word_freqs = {}  
+    for word in word_list:  
+        word_freqs[word] = word_freqs.get(word, 0) + 1    
+    return word_freqs
+
+
+def sort(word_freq):    
+    return sorted( word_freq.items(), key=lambda x: x[1], reverse=True )
+
+
+def print_all(word_freqs, n = 10 ):    
+    for word, freq in word_freqs[ :n ]:
+        print(word, '-', freq)        
+
+
+if __name__ == "__main__":
+    print_all(sort(frequencies(     
+            filter_chars_and_normalize(
+                read_file( testfilepath ))))
+    )
--- a/基本结构/021
+++ b/基本结构/021
@ -0,0 +1,39 @@
+import re, operator
+from cppy.cp_util import *
+
+def print_text(word_freqs, func):
+    print_word_freqs(word_freqs) 
+    func(None)
+
+def frequencies(word_list, func):
+    wf = get_frequencies(word_list)    
+    func(wf, print_text)
+
+def scan(str_data, func):
+    func(str_data.split(), frequencies)
+
+def filter_chars(str_data, func):
+    pattern = re.compile('[\W_]+')
+    func(pattern.sub(' ', str_data), scan)
+
+def remove_stop_words(word_list, func):    
+    stop_words = get_stopwords()  
+    func([w for w in word_list if not w in stop_words], sort)
+
+def sort(wf, func):
+    func(sorted(wf.items(), key=operator.itemgetter(1), reverse=True), no_op)
+
+def no_op(func):
+    return
+
+def normalize(str_data, func):
+    func(str_data.lower(), remove_stop_words)
+
+def read_file(path_to_file, func):
+    with open(path_to_file,encoding='utf-8') as f:
+        data = f.read()
+    func(data, normalize)
+
+
+if __name__ == "__main__":
+    read_file(testfilepath, filter_chars)
--- a/基本结构/021
+++ b/基本结构/021
@ -0,0 +1,25 @@
+import re
+from collections import Counter
+from cppy.cp_util import *
+
+# 读取文件
+with open(testfilepath,encoding='utf-8') as f:
+    data = f.read().lower()  # 直接转换为小写
+
+# 过滤非字母字符
+data = re.sub('[\W_]+', ' ', data)
+
+# 分词
+words = data.split()
+
+# 移除停用词
+stop_words = get_stopwords()
+words = [word for word in words if word not in stop_words]
+
+# 计算词频
+word_freqs = Counter(words)
+
+# 排序并打印
+sorted_word_freqs = sorted(word_freqs.items(), key=lambda x: x[1], reverse=True)
+
+print_word_freqs(sorted_word_freqs)
--- a/对象化/tf-11.py
+++ b/对象化/tf-11.py
@ -0,0 +1,53 @@
+from collections import Counter
+from cppy.cp_util import *
+    
+
+class DataStorageManager:
+    """ 数据模型 """    
+    def __init__(self, path_to_file):        
+        data = read_file(path_to_file)
+        self._data = re_split( data )
+
+    def words(self):        
+        return self._data
+
+
+class StopWordManager:
+    """ 停用词模型 """    
+    def __init__(self):        
+        self._stop_words = get_stopwords()
+
+    def is_stop_word(self, word):
+        return word in self._stop_words
+
+
+class WordFrequencyManager:
+    """ 词频模型 """    
+    def __init__(self):
+        self._word_freqs = Counter()
+
+    def increment_count(self, word):
+        self._word_freqs[word] += 1
+
+    def sorted(self):
+        return self._word_freqs.most_common()
+
+
+class WordFrequencyController:
+    def __init__(self, path_to_file):
+        self._storage_manager = DataStorageManager(path_to_file)
+        self._stop_word_manager = StopWordManager()
+        self._word_freq_manager = WordFrequencyManager()
+
+    def run(self):
+        for w in self._storage_manager.words():
+            if not self._stop_word_manager.is_stop_word(w):
+                self._word_freq_manager.increment_count(w)
+
+        word_freqs = self._word_freq_manager.sorted()
+        print_word_freqs(word_freqs)        
+
+
+
+if __name__ == '__main__':    
+    WordFrequencyController(testfilepath).run()
--- a/对象化/tf-13.py
+++ b/对象化/tf-13.py
@ -0,0 +1,41 @@
+from cppy.cp_util import *
+
+def extract_words(obj, path_to_file):    
+    obj['data'] = re_split( read_file(path_to_file) )
+
+def load_stop_words(obj):    
+    obj['stop_words'] = get_stopwords()
+
+def increment_count(obj, w):
+    obj['freqs'][w] = 1 if w not in obj['freqs'] else obj['freqs'][w]+1
+
+data_storage_obj = {
+    'data' : [],
+    'init' : lambda path_to_file : extract_words(data_storage_obj, path_to_file),
+    'words' : lambda : data_storage_obj['data']
+}
+
+stop_words_obj = {
+    'stop_words' : [],
+    'init' : lambda : load_stop_words(stop_words_obj),
+    'is_stop_word' : lambda word : word in stop_words_obj['stop_words']
+}
+
+word_freqs_obj = {
+    'freqs' : {},
+    'increment_count' : lambda w : increment_count(word_freqs_obj, w),
+    'sorted' : lambda : sort_dict(word_freqs_obj['freqs']) 
+}
+
+
+if __name__ == '__main__':
+    data_storage_obj['init']( testfilepath )
+    stop_words_obj['init']()
+
+    for w in data_storage_obj['words']():
+        if not stop_words_obj['is_stop_word'](w):
+            word_freqs_obj['increment_count'](w)
+
+    word_freqs = word_freqs_obj['sorted']()
+    for (w, c) in word_freqs[0:10]:
+        print(w, '-', c)
--- a/对象接口/tf-14A.py
+++ b/对象接口/tf-14A.py
@ -0,0 +1,59 @@
+import re
+from collections import Counter
+from cppy.cp_util import *
+
+class DataStorageManager1:    
+    def __init__(self, path_to_file):
+        self._data = read_file(path_to_file)        
+        self._data = re_split(self._data)
+
+    def words(self): return self._data
+    
+
+class DataStorageManager2:    
+    def __init__(self, path_to_file):
+        self._data = read_file(path_to_file)
+        self._data = re.findall('[a-z]{2,}', self._data)        
+
+    def words(self): return self._data
+
+
+class StopWordManager:    
+    def __init__(self):
+        self._stop_words = set(get_stopwords())
+        
+    def is_stop_word(self, word):
+        return word in self._stop_words
+
+
+class WordFrequencyManager:
+    def __init__(self):
+        self.word_freqs = Counter()
+
+    def increment_count(self, word):
+        self.word_freqs[word] += 1
+
+    def sorted(self):
+        return self.word_freqs.most_common()    
+
+
+#
+# 应用类
+#
+class WordFrequencyController:
+    def __init__(self, path_to_file):
+        # self._storage = DataStorageManager1(path_to_file)
+        self.storage = DataStorageManager2(path_to_file)
+        self.stop_word_manager = StopWordManager()
+        self.word_freq_counter = WordFrequencyManager()
+
+    def run(self): # 可以看做面向协议编程
+        for word in self.storage.words():
+            if not self.stop_word_manager.is_stop_word(word):
+                self.word_freq_counter.increment_count(word)
+        
+        print_word_freqs( self.word_freq_counter.sorted() ) 
+
+
+if __name__ == '__main__':
+    WordFrequencyController(testfilepath).run()
--- a/对象接口/tf-14B.py
+++ b/对象接口/tf-14B.py
@ -0,0 +1,92 @@
+import abc, re
+from cppy.cp_util import *
+
+#
+# 接口
+#
+class IDataStorage (metaclass=abc.ABCMeta):  
+    @abc.abstractmethod
+    def words(self):
+        pass
+
+class IStopWordFilter (metaclass=abc.ABCMeta):
+    @abc.abstractmethod
+    def is_stop_word(self, word):
+        pass
+
+class IWordFrequencyCounter(metaclass=abc.ABCMeta):
+    @abc.abstractmethod
+    def increment_count(self, word):
+        pass
+
+    @abc.abstractmethod
+    def sorted(self):
+        pass
+
+#
+# 类实现
+#
+class DataStorageManager1:    
+    def __init__(self, path_to_file):
+        self._data = read_file(path_to_file)        
+        self._data = re_split(self._data)
+
+    def words(self): return self._data
+
+
+class DataStorageManager2:    
+    def __init__(self, path_to_file):
+        self._data = read_file(path_to_file)
+        self._data = re.findall('[a-z]{2,}', self._data)        
+
+    def words(self): return self._data
+
+            
+class StopWordManager:    
+    def __init__(self):
+        self._stop_words = get_stopwords()        
+
+    def is_stop_word(self, word):
+        return word in self._stop_words
+
+
+class WordFrequencyManager:
+    def __init__(self):
+        self._word_freqs = {}    
+
+    def increment_count(self, word):
+        self._word_freqs[word] = self._word_freqs.get(word, 0) + 1
+
+    def sorted(self):
+        return sort_dict( self._word_freqs )        
+
+
+#
+# 注册到抽象接口：并非必要
+#
+# IDataStorage.register(subclass=DataStorageManager1)
+IDataStorage.register(subclass=DataStorageManager2)
+IStopWordFilter.register(subclass=StopWordManager)
+IWordFrequencyCounter.register(subclass=WordFrequencyManager)
+
+
+#
+# 应用类
+#
+class WordFrequencyController:
+    def __init__(self, path_to_file):
+        # self._storage = DataStorageManager1(path_to_file)
+        self.storage = DataStorageManager2(path_to_file)
+        self.stop_word_manager = StopWordManager()
+        self.word_freq_counter = WordFrequencyManager()
+
+    def run(self): # 可以看做面向协议编程
+        for word in self.storage.words():
+            if not self.stop_word_manager.is_stop_word(word):
+                self.word_freq_counter.increment_count(word)
+        
+        print_word_freqs( self.word_freq_counter.sorted() ) 
+
+
+if __name__ == '__main__':
+    WordFrequencyController(testfilepath).run()
--- a/插件/pycache/tf-20.cpython-38.pyc
+++ b/插件/pycache/tf-20.cpython-38.pyc
--- a/插件/config.ini
+++ b/插件/config.ini
@ -0,0 +1,5 @@
+[Plugins]
+;; Options: plugins/words1.pyc, plugins/words2.pyc
+words = plugins/words1.pyc
+;; Options: plugins/frequencies1.pyc, plugins/frequencies2.pyc
+frequencies = plugins/frequencies1.pyc            
--- a/插件/plugins-src/compile.sh
+++ b/插件/plugins-src/compile.sh
@ -0,0 +1,2 @@
+python -m compileall .
+cp __pycache__/*.pyc ../plugins
--- a/插件/plugins-src/frequencies1.py
+++ b/插件/plugins-src/frequencies1.py
@ -0,0 +1,11 @@
+import operator
+
+def top25(word_list):
+    word_freqs = {}
+    for w in word_list:
+        if w in word_freqs:
+            word_freqs[w] += 1
+        else:
+            word_freqs[w] = 1
+    return sorted(word_freqs.items(), key=operator.itemgetter(1), reverse=True)[:10]
+
--- a/插件/plugins-src/frequencies2.py
+++ b/插件/plugins-src/frequencies2.py
@ -0,0 +1,6 @@
+import operator, collections
+
+def top25(word_list):
+    counts = collections.Counter(w for w in word_list)
+    return counts.most_common(10)
+
--- a/插件/plugins-src/words1.py
+++ b/插件/plugins-src/words1.py
@ -0,0 +1,14 @@
+import sys, re, string
+from cppy.cp_util import *
+
+def extract_words(path_to_file):
+    with open(path_to_file,encoding='utf-8') as f:
+        str_data = f.read()
+    pattern = re.compile('[\W_]+')
+    word_list = pattern.sub(' ', str_data).lower().split()
+
+    
+    stop_words = get_stopwords()
+
+    return [w for w in word_list if not w in stop_words]
+
--- a/插件/plugins-src/words2.py
+++ b/插件/plugins-src/words2.py
@ -0,0 +1,8 @@
+import sys, re, string
+from cppy.cp_util import *
+
+def extract_words(path_to_file):
+    words = re.findall('[a-z]{2,}', open(path_to_file,encoding='utf-8').read().lower())
+    stopwords = get_stopwords()
+    return [w for w in words if w not in stopwords]
+
--- a/插件/plugins/frequencies1.pyc
+++ b/插件/plugins/frequencies1.pyc
--- a/插件/plugins/frequencies2.cpython-38.pyc
+++ b/插件/plugins/frequencies2.cpython-38.pyc
--- a/插件/plugins/words1.pyc
+++ b/插件/plugins/words1.pyc
--- a/插件/plugins/words2.cpython-38.pyc
+++ b/插件/plugins/words2.cpython-38.pyc
--- a/基本结构/041
+++ b/基本结构/041
@ -0,0 +1,21 @@
+import configparser, importlib.machinery
+from cppy.cp_util import *
+
+
+def load_plugins():
+    config = configparser.ConfigParser()
+    script_dir = os.path.dirname(os.path.abspath(__file__))  
+    os.chdir(script_dir)
+    config.read("config.ini")
+    words_plugin = config.get("Plugins", "words")
+    frequencies_plugin = config.get("Plugins", "frequencies")
+    global tfwords, tffreqs
+    tfwords = importlib.machinery.SourcelessFileLoader('tfwords', words_plugin).load_module()
+    tffreqs = importlib.machinery.SourcelessFileLoader('tffreqs', frequencies_plugin).load_module()
+
+load_plugins()
+word_freqs = tffreqs.top25(tfwords.extract_words( testfilepath ))
+
+for (w, c) in word_freqs:
+    print(w, '-', c)
+
--- a/restful/test.txt
+++ b/restful/test.txt
@ -0,0 +1,2 @@
+" my Some sure acquaintance or other, my dear, sure,other  I suppose; I am sure I do not
+know. sure "
--- a/restful/tf-34.py
+++ b/restful/tf-34.py
@ -0,0 +1,107 @@
+import re, sys
+from cppy.cp_util import *
+
+stops = get_stopwords()
+data = {}
+
+# 路径问题，测试输入 test.txt ，演示当前目录下这个文件的处理
+#################################################################################
+# 服务端
+def error_state():
+    return "Something wrong", ["get", "default", None]
+
+def quit_handler(args):
+    sys.exit(" ... ")
+
+def upload_get_handler(args):
+    return "Name of file to upload?", ["post", "file"]
+
+def default_get_handler(args):
+    rep = "What would you like to do?"
+    rep += "\n1 - Quit" + "\n2 - Upload file"
+    links = {"1" : ["post", "execution", None], 
+             "2" : ["get", "file_form", None]}
+    return rep, links
+
+def upload_post_handler(args):
+    def create_data(fn):
+        if fn in data: return
+        word_freqs = {}
+        with open(fn) as f:
+            for w in [x.lower() for x in re.split("[^a-zA-Z]+", f.read()) if len(x) > 0 and x.lower() not in stops]:
+                word_freqs[w] = word_freqs.get(w, 0) + 1
+        wf = list(word_freqs.items())
+        data[fn] = sorted(wf,key=lambda x: x[1],reverse=True)
+
+    if args == None:  return error_state()
+    filename = args[0]
+    try:
+        script_dir = os.path.dirname(os.path.abspath(__file__))  
+        filename = os.path.join(script_dir, filename)
+        create_data(filename)
+    except:
+        print("Unexpected error: %s" % sys.exc_info()[0])
+        return error_state()
+    return word_get_handler([filename, 0])
+
+def word_get_handler(args):
+    def get_word(filename, word_index):
+        if word_index < len(data[filename]):
+            return data[filename][word_index]
+        else:
+            return ("no more words", 0)
+
+    filename = args[0]; word_index = args[1]
+    word_info = get_word(filename, word_index)
+    rep = '\n#{0}: {1} - {2}'.format(word_index+1, word_info[0], word_info[1])
+    rep += "\n\nWhat would you like to do next?"
+    rep += "\n1 - Quit" + "\n2 - Upload file"
+    rep += "\n3 - See next most-frequently occurring word"
+    links = {"1" : ["post", "execution", None],
+             "2" : ["get", "file_form", None],
+             "3" : ["get", "word", [filename, word_index+1]]}
+    return rep, links
+
+# Handler registration
+handlers = {"post_execution" : quit_handler,
+            "get_default" : default_get_handler,
+            "get_file_form" : upload_get_handler,
+            "post_file" : upload_post_handler,
+            "get_word" : word_get_handler }
+
+# The "server" core
+def handle_request(verb, uri, args):
+    def handler_key(verb, uri): 
+        return verb + "_" + uri
+    if handler_key(verb, uri) in handlers:
+        return handlers[handler_key(verb, uri)](args)
+    else:
+        return handlers[handler_key("get", "default")](args)
+
+#################################################################################
+# 仿真简单的浏览器客户端动作
+def render_and_get_input(state_representation, links):
+    print(state_representation)
+    sys.stdout.flush()
+    if type(links) is dict:
+        input = sys.stdin.readline().strip()
+        if input in links:
+            return links[input]
+        else:
+            return ["get", "default", None]
+    elif type(links) is list:
+        if links[0] == "post": # get "form" data
+            input = sys.stdin.readline().strip()
+            links.append([input]) # add the data at the end
+            return links
+        else: # get action, don't get user input
+            return links
+    else:
+        return ["get", "default", None]
+
+
+if __name__ == "__main__":  
+    request = ["get", "default", None]
+    while True:    
+        state_representation, links = handle_request(*request) # "server"    
+        request = render_and_get_input(state_representation, links) # "client"