t02

2 years ago · 2bfeabe429
parent e5dc492333
commit 2bfeabe429
27 changed files with 722 additions and 0 deletions
--- a/交互/MVC/tf-33.py
+++ b/交互/MVC/tf-33.py
@ -0,0 +1,45 @@
 import sys, collections
 from cppy.cp_util import *
 class WordFrequenciesModel:
    """ 模型：数据 """
    def __init__(self, path_to_file):
        self.update(path_to_file)
    def update(self, path_to_file):
        try:            
            self.freqs = collections.Counter( extract_file_words(path_to_file) )
        except IOError:
            print("File not found")
            self.freqs = {}
 class WordFrequenciesView:
    """ 视图：数据展现 """
    def __init__(self, model):
        self._model = model
    def render(self):
        sorted_freqs = sort_dict(self._model.freqs) 
        print_word_freqs(sorted_freqs)
 class WordFrequencyController:
    """ 控制：操作逻辑 """
    def __init__(self, model, view):
        self._model, self._view = model, view
        view.render()
    def run(self):
        while True:  
            print("Enter the file path (or 'q' to quit): ", file=sys.stderr, flush=True)  
            filename = sys.stdin.readline().strip()  
            if filename.lower() == 'q':  break  
            self._model.update(filename)  
            self._view.render()                  
 m = WordFrequenciesModel( testfilepath )
 v = WordFrequenciesView(m)
 c = WordFrequencyController(m, v)
 c.run()
--- a/交互/桌面/test.txt
+++ b/交互/桌面/test.txt
@ -0,0 +1,2 @@
 " my Some sure acquaintance or other, my dear, sure,other  I suppose; I am sure I do not
 know. sure "
--- a/交互/桌面/tf-98.py
+++ b/交互/桌面/tf-98.py
@ -0,0 +1,65 @@
 import sys
 from PyQt5.QtWidgets import QApplication, QWidget, QPushButton, QVBoxLayout, QTextEdit, QFileDialog
 import cppy.cp_util as util
 # 工具函数
 def extract_words(path_to_file):    
    return util.extract_words(path_to_file)
 def frequencies(word_list):
    return util.get_frequencies(word_list)
 def sort(word_freq):
    return util.sort_dict(word_freq)
 class MenuApp(QWidget):
    def __init__(self):
        super().__init__()
        self.initUI()
    def initUI(self):
        self.setWindowTitle('终端菜单')
        self.setGeometry(100, 100, 400, 300)
        # 创建按钮
        self.openFileBtn = QPushButton('上传并打开文件', self)
        self.continueBtn = QPushButton('继续', self)
        self.exitBtn = QPushButton('退出', self)
        # 创建文本编辑框
        self.textEdit = QTextEdit(self)
        self.textEdit.setReadOnly(True)
        # 布局
        layout = QVBoxLayout()
        layout.addWidget(self.openFileBtn)
        layout.addWidget(self.continueBtn)
        layout.addWidget(self.exitBtn)
        layout.addWidget(self.textEdit)
        self.setLayout(layout)
        # 信号连接到槽
        self.openFileBtn.clicked.connect(self.openFile)
        self.continueBtn.clicked.connect(self.clearText)
        self.exitBtn.clicked.connect(self.close)
    def openFile(self):
        options = QFileDialog.Options()
        fileName, _ = QFileDialog.getOpenFileName(self, "上传并打开文件", "", "All Files (*);;Text Files (*.txt)", options=options)
        word_freqs = sort( frequencies(extract_words( fileName )) )    
        s = ''
        for (w, c) in word_freqs[ :10 ]:
            s = s +  w + '-' + str(c)+'\n'
        self.textEdit.setText(s)
    def clearText(self):
        self.textEdit.clear()
 if __name__ == '__main__':
    app = QApplication(sys.argv)
    ex = MenuApp()
    ex.show()
    sys.exit(app.exec_())
--- a/交互/终端/test.txt
+++ b/交互/终端/test.txt
@ -0,0 +1,2 @@
 " my Some sure acquaintance or other, my dear, sure,other  I suppose; I am sure I do not
 know. sure "
--- a/交互/终端/tf-97.py
+++ b/交互/终端/tf-97.py
@ -0,0 +1,42 @@
 import os
 import cppy.cp_util as util
 # 工具函数
 def extract_words(path_to_file):    
    return util.extract_words(path_to_file)
 def frequencies(word_list):
    return util.get_frequencies(word_list)
 def sort(word_freq):
    return util.sort_dict(word_freq)
 def print_menu():
    print("\n菜单选项：")
    print("1. 上传并处理文件")
    print("2. 继续")
    print("3. 退出")
 def open_and_print_file():
    filename = input("请输入文件名：")
    word_freqs = sort( frequencies(extract_words( filename )) )    
    util.print_word_freqs(word_freqs)
 def main():
    while True:
        print_menu()
        choice = input("请选择一个选项（1/2/3）：")
        if choice == '1':
            open_and_print_file()
        elif choice == '2':
            continue
        elif choice == '3':
            print("退出程序。")
            break
        else:
            print("无效的输入，请重新输入。")
 if __name__ == "__main__":
    main()
--- a/基本结构/021
+++ b/基本结构/021
@ -0,0 +1,49 @@
 import string
 from collections import Counter
 from cppy.cp_util import *
 # data
 data = []
 words = []
 word_freqs = []
 ################################
 # procedures
 ################################
 def read_file(path_to_file):
    global data
    with open(path_to_file,encoding='utf-8') as f:
        data = data + list(f.read())
 def filter_chars_and_normalize():    
    global data
    global words
    for i in range(len(data)):                
        data[i] = ' ' if not data[i].isalnum() else data[i].lower()        
    data_str = ''.join(data)
    words = words + data_str.split()
    with open(stopwordfilepath) as f:
        stop_words = set(f.read().split(','))    
    stop_words.update(string.ascii_lowercase)
    words = [word for word in words if word not in stop_words]
 def frequencies():    
    global words
    global word_freqs
    word_freqs.extend([(word, 1) for word in words])
 def sort():    
    global word_freqs    
    word_freqs = Counter(words).most_common()
 if __name__ == "__main__":
    read_file( testfilepath )
    filter_chars_and_normalize()    
    frequencies()
    sort()
    for tf in word_freqs[:10]:
        print(tf[0], '-', tf[1])
--- a/基本结构/021
+++ b/基本结构/021
@ -0,0 +1,32 @@
 import re
 from cppy.cp_util import *
 def filter_chars_and_normalize(str_data):
    pattern = re.compile('[\W_]+')
    word_list = pattern.sub(' ', str_data).lower().split()
    stop_words = get_stopwords()    
    return [w for w in word_list if not w in stop_words]
 def frequencies(word_list):    
    word_freqs = {}  
    for word in word_list:  
        word_freqs[word] = word_freqs.get(word, 0) + 1    
    return word_freqs
 def sort(word_freq):    
    return sorted( word_freq.items(), key=lambda x: x[1], reverse=True )
 def print_all(word_freqs, n = 10 ):    
    for word, freq in word_freqs[ :n ]:
        print(word, '-', freq)        
 if __name__ == "__main__":
    print_all(sort(frequencies(     
            filter_chars_and_normalize(
                read_file( testfilepath ))))
    )
--- a/基本结构/021
+++ b/基本结构/021
@ -0,0 +1,39 @@
 import re, operator
 from cppy.cp_util import *
 def print_text(word_freqs, func):
    print_word_freqs(word_freqs) 
    func(None)
 def frequencies(word_list, func):
    wf = get_frequencies(word_list)    
    func(wf, print_text)
 def scan(str_data, func):
    func(str_data.split(), frequencies)
 def filter_chars(str_data, func):
    pattern = re.compile('[\W_]+')
    func(pattern.sub(' ', str_data), scan)
 def remove_stop_words(word_list, func):    
    stop_words = get_stopwords()  
    func([w for w in word_list if not w in stop_words], sort)
 def sort(wf, func):
    func(sorted(wf.items(), key=operator.itemgetter(1), reverse=True), no_op)
 def no_op(func):
    return
 def normalize(str_data, func):
    func(str_data.lower(), remove_stop_words)
 def read_file(path_to_file, func):
    with open(path_to_file,encoding='utf-8') as f:
        data = f.read()
    func(data, normalize)
 if __name__ == "__main__":
    read_file(testfilepath, filter_chars)
--- a/基本结构/021
+++ b/基本结构/021
@ -0,0 +1,25 @@
 import re
 from collections import Counter
 from cppy.cp_util import *
 # 读取文件
 with open(testfilepath,encoding='utf-8') as f:
    data = f.read().lower()  # 直接转换为小写
 # 过滤非字母字符
 data = re.sub('[\W_]+', ' ', data)
 # 分词
 words = data.split()
 # 移除停用词
 stop_words = get_stopwords()
 words = [word for word in words if word not in stop_words]
 # 计算词频
 word_freqs = Counter(words)
 # 排序并打印
 sorted_word_freqs = sorted(word_freqs.items(), key=lambda x: x[1], reverse=True)
 print_word_freqs(sorted_word_freqs)
--- a/对象化/tf-11.py
+++ b/对象化/tf-11.py
@ -0,0 +1,53 @@
 from collections import Counter
 from cppy.cp_util import *
 class DataStorageManager:
    """ 数据模型 """    
    def __init__(self, path_to_file):        
        data = read_file(path_to_file)
        self._data = re_split( data )
    def words(self):        
        return self._data
 class StopWordManager:
    """ 停用词模型 """    
    def __init__(self):        
        self._stop_words = get_stopwords()
    def is_stop_word(self, word):
        return word in self._stop_words
 class WordFrequencyManager:
    """ 词频模型 """    
    def __init__(self):
        self._word_freqs = Counter()
    def increment_count(self, word):
        self._word_freqs[word] += 1
    def sorted(self):
        return self._word_freqs.most_common()
 class WordFrequencyController:
    def __init__(self, path_to_file):
        self._storage_manager = DataStorageManager(path_to_file)
        self._stop_word_manager = StopWordManager()
        self._word_freq_manager = WordFrequencyManager()
    def run(self):
        for w in self._storage_manager.words():
            if not self._stop_word_manager.is_stop_word(w):
                self._word_freq_manager.increment_count(w)
        word_freqs = self._word_freq_manager.sorted()
        print_word_freqs(word_freqs)        
 if __name__ == '__main__':    
    WordFrequencyController(testfilepath).run()
--- a/对象化/tf-13.py
+++ b/对象化/tf-13.py
@ -0,0 +1,41 @@
 from cppy.cp_util import *
 def extract_words(obj, path_to_file):    
    obj['data'] = re_split( read_file(path_to_file) )
 def load_stop_words(obj):    
    obj['stop_words'] = get_stopwords()
 def increment_count(obj, w):
    obj['freqs'][w] = 1 if w not in obj['freqs'] else obj['freqs'][w]+1
 data_storage_obj = {
    'data' : [],
    'init' : lambda path_to_file : extract_words(data_storage_obj, path_to_file),
    'words' : lambda : data_storage_obj['data']
 }
 stop_words_obj = {
    'stop_words' : [],
    'init' : lambda : load_stop_words(stop_words_obj),
    'is_stop_word' : lambda word : word in stop_words_obj['stop_words']
 }
 word_freqs_obj = {
    'freqs' : {},
    'increment_count' : lambda w : increment_count(word_freqs_obj, w),
    'sorted' : lambda : sort_dict(word_freqs_obj['freqs']) 
 }
 if __name__ == '__main__':
    data_storage_obj['init']( testfilepath )
    stop_words_obj['init']()
    for w in data_storage_obj['words']():
        if not stop_words_obj['is_stop_word'](w):
            word_freqs_obj['increment_count'](w)
    word_freqs = word_freqs_obj['sorted']()
    for (w, c) in word_freqs[0:10]:
        print(w, '-', c)
--- a/对象接口/tf-14A.py
+++ b/对象接口/tf-14A.py
@ -0,0 +1,59 @@
 import re
 from collections import Counter
 from cppy.cp_util import *
 class DataStorageManager1:    
    def __init__(self, path_to_file):
        self._data = read_file(path_to_file)        
        self._data = re_split(self._data)
    def words(self): return self._data
 class DataStorageManager2:    
    def __init__(self, path_to_file):
        self._data = read_file(path_to_file)
        self._data = re.findall('[a-z]{2,}', self._data)        
    def words(self): return self._data
 class StopWordManager:    
    def __init__(self):
        self._stop_words = set(get_stopwords())
    def is_stop_word(self, word):
        return word in self._stop_words
 class WordFrequencyManager:
    def __init__(self):
        self.word_freqs = Counter()
    def increment_count(self, word):
        self.word_freqs[word] += 1
    def sorted(self):
        return self.word_freqs.most_common()    
 #
 # 应用类
 #
 class WordFrequencyController:
    def __init__(self, path_to_file):
        # self._storage = DataStorageManager1(path_to_file)
        self.storage = DataStorageManager2(path_to_file)
        self.stop_word_manager = StopWordManager()
        self.word_freq_counter = WordFrequencyManager()
    def run(self): # 可以看做面向协议编程
        for word in self.storage.words():
            if not self.stop_word_manager.is_stop_word(word):
                self.word_freq_counter.increment_count(word)
        print_word_freqs( self.word_freq_counter.sorted() ) 
 if __name__ == '__main__':
    WordFrequencyController(testfilepath).run()
--- a/对象接口/tf-14B.py
+++ b/对象接口/tf-14B.py
@ -0,0 +1,92 @@
 import abc, re
 from cppy.cp_util import *
 #
 # 接口
 #
 class IDataStorage (metaclass=abc.ABCMeta):  
    @abc.abstractmethod
    def words(self):
        pass
 class IStopWordFilter (metaclass=abc.ABCMeta):
    @abc.abstractmethod
    def is_stop_word(self, word):
        pass
 class IWordFrequencyCounter(metaclass=abc.ABCMeta):
    @abc.abstractmethod
    def increment_count(self, word):
        pass
    @abc.abstractmethod
    def sorted(self):
        pass
 #
 # 类实现
 #
 class DataStorageManager1:    
    def __init__(self, path_to_file):
        self._data = read_file(path_to_file)        
        self._data = re_split(self._data)
    def words(self): return self._data
 class DataStorageManager2:    
    def __init__(self, path_to_file):
        self._data = read_file(path_to_file)
        self._data = re.findall('[a-z]{2,}', self._data)        
    def words(self): return self._data
 class StopWordManager:    
    def __init__(self):
        self._stop_words = get_stopwords()        
    def is_stop_word(self, word):
        return word in self._stop_words
 class WordFrequencyManager:
    def __init__(self):
        self._word_freqs = {}    
    def increment_count(self, word):
        self._word_freqs[word] = self._word_freqs.get(word, 0) + 1
    def sorted(self):
        return sort_dict( self._word_freqs )        
 #
 # 注册到抽象接口：并非必要
 #
 # IDataStorage.register(subclass=DataStorageManager1)
 IDataStorage.register(subclass=DataStorageManager2)
 IStopWordFilter.register(subclass=StopWordManager)
 IWordFrequencyCounter.register(subclass=WordFrequencyManager)
 #
 # 应用类
 #
 class WordFrequencyController:
    def __init__(self, path_to_file):
        # self._storage = DataStorageManager1(path_to_file)
        self.storage = DataStorageManager2(path_to_file)
        self.stop_word_manager = StopWordManager()
        self.word_freq_counter = WordFrequencyManager()
    def run(self): # 可以看做面向协议编程
        for word in self.storage.words():
            if not self.stop_word_manager.is_stop_word(word):
                self.word_freq_counter.increment_count(word)
        print_word_freqs( self.word_freq_counter.sorted() ) 
 if __name__ == '__main__':
    WordFrequencyController(testfilepath).run()
--- a/插件/pycache/tf-20.cpython-38.pyc
+++ b/插件/pycache/tf-20.cpython-38.pyc
--- a/插件/config.ini
+++ b/插件/config.ini
@ -0,0 +1,5 @@
 [Plugins]
 ;; Options: plugins/words1.pyc, plugins/words2.pyc
 words = plugins/words1.pyc
 ;; Options: plugins/frequencies1.pyc, plugins/frequencies2.pyc
 frequencies = plugins/frequencies1.pyc            
--- a/插件/plugins-src/compile.sh
+++ b/插件/plugins-src/compile.sh
@ -0,0 +1,2 @@
 python -m compileall .
 cp __pycache__/*.pyc ../plugins
--- a/插件/plugins-src/frequencies1.py
+++ b/插件/plugins-src/frequencies1.py
@ -0,0 +1,11 @@
 import operator
 def top25(word_list):
    word_freqs = {}
    for w in word_list:
        if w in word_freqs:
            word_freqs[w] += 1
        else:
            word_freqs[w] = 1
    return sorted(word_freqs.items(), key=operator.itemgetter(1), reverse=True)[:10]
--- a/插件/plugins-src/frequencies2.py
+++ b/插件/plugins-src/frequencies2.py
@ -0,0 +1,6 @@
 import operator, collections
 def top25(word_list):
    counts = collections.Counter(w for w in word_list)
    return counts.most_common(10)
--- a/插件/plugins-src/words1.py
+++ b/插件/plugins-src/words1.py
@ -0,0 +1,14 @@
 import sys, re, string
 from cppy.cp_util import *
 def extract_words(path_to_file):
    with open(path_to_file,encoding='utf-8') as f:
        str_data = f.read()
    pattern = re.compile('[\W_]+')
    word_list = pattern.sub(' ', str_data).lower().split()
    stop_words = get_stopwords()
    return [w for w in word_list if not w in stop_words]
--- a/插件/plugins-src/words2.py
+++ b/插件/plugins-src/words2.py
@ -0,0 +1,8 @@
 import sys, re, string
 from cppy.cp_util import *
 def extract_words(path_to_file):
    words = re.findall('[a-z]{2,}', open(path_to_file,encoding='utf-8').read().lower())
    stopwords = get_stopwords()
    return [w for w in words if w not in stopwords]
--- a/插件/plugins/frequencies1.pyc
+++ b/插件/plugins/frequencies1.pyc
--- a/插件/plugins/frequencies2.cpython-38.pyc
+++ b/插件/plugins/frequencies2.cpython-38.pyc
--- a/插件/plugins/words1.pyc
+++ b/插件/plugins/words1.pyc
--- a/插件/plugins/words2.cpython-38.pyc
+++ b/插件/plugins/words2.cpython-38.pyc
--- a/基本结构/041
+++ b/基本结构/041
@ -0,0 +1,21 @@
 import configparser, importlib.machinery
 from cppy.cp_util import *
 def load_plugins():
    config = configparser.ConfigParser()
    script_dir = os.path.dirname(os.path.abspath(__file__))  
    os.chdir(script_dir)
    config.read("config.ini")
    words_plugin = config.get("Plugins", "words")
    frequencies_plugin = config.get("Plugins", "frequencies")
    global tfwords, tffreqs
    tfwords = importlib.machinery.SourcelessFileLoader('tfwords', words_plugin).load_module()
    tffreqs = importlib.machinery.SourcelessFileLoader('tffreqs', frequencies_plugin).load_module()
 load_plugins()
 word_freqs = tffreqs.top25(tfwords.extract_words( testfilepath ))
 for (w, c) in word_freqs:
    print(w, '-', c)
--- a/restful/test.txt
+++ b/restful/test.txt
@ -0,0 +1,2 @@
 " my Some sure acquaintance or other, my dear, sure,other  I suppose; I am sure I do not
 know. sure "
--- a/restful/tf-34.py
+++ b/restful/tf-34.py
@ -0,0 +1,107 @@
 import re, sys
 from cppy.cp_util import *
 stops = get_stopwords()
 data = {}
 # 路径问题，测试输入 test.txt ，演示当前目录下这个文件的处理
 #################################################################################
 # 服务端
 def error_state():
    return "Something wrong", ["get", "default", None]
 def quit_handler(args):
    sys.exit(" ... ")
 def upload_get_handler(args):
    return "Name of file to upload?", ["post", "file"]
 def default_get_handler(args):
    rep = "What would you like to do?"
    rep += "\n1 - Quit" + "\n2 - Upload file"
    links = {"1" : ["post", "execution", None], 
             "2" : ["get", "file_form", None]}
    return rep, links
 def upload_post_handler(args):
    def create_data(fn):
        if fn in data: return
        word_freqs = {}
        with open(fn) as f:
            for w in [x.lower() for x in re.split("[^a-zA-Z]+", f.read()) if len(x) > 0 and x.lower() not in stops]:
                word_freqs[w] = word_freqs.get(w, 0) + 1
        wf = list(word_freqs.items())
        data[fn] = sorted(wf,key=lambda x: x[1],reverse=True)
    if args == None:  return error_state()
    filename = args[0]
    try:
        script_dir = os.path.dirname(os.path.abspath(__file__))  
        filename = os.path.join(script_dir, filename)
        create_data(filename)
    except:
        print("Unexpected error: %s" % sys.exc_info()[0])
        return error_state()
    return word_get_handler([filename, 0])
 def word_get_handler(args):
    def get_word(filename, word_index):
        if word_index < len(data[filename]):
            return data[filename][word_index]
        else:
            return ("no more words", 0)
    filename = args[0]; word_index = args[1]
    word_info = get_word(filename, word_index)
    rep = '\n#{0}: {1} - {2}'.format(word_index+1, word_info[0], word_info[1])
    rep += "\n\nWhat would you like to do next?"
    rep += "\n1 - Quit" + "\n2 - Upload file"
    rep += "\n3 - See next most-frequently occurring word"
    links = {"1" : ["post", "execution", None],
             "2" : ["get", "file_form", None],
             "3" : ["get", "word", [filename, word_index+1]]}
    return rep, links
 # Handler registration
 handlers = {"post_execution" : quit_handler,
            "get_default" : default_get_handler,
            "get_file_form" : upload_get_handler,
            "post_file" : upload_post_handler,
            "get_word" : word_get_handler }
 # The "server" core
 def handle_request(verb, uri, args):
    def handler_key(verb, uri): 
        return verb + "_" + uri
    if handler_key(verb, uri) in handlers:
        return handlers[handler_key(verb, uri)](args)
    else:
        return handlers[handler_key("get", "default")](args)
 #################################################################################
 # 仿真简单的浏览器客户端动作
 def render_and_get_input(state_representation, links):
    print(state_representation)
    sys.stdout.flush()
    if type(links) is dict:
        input = sys.stdin.readline().strip()
        if input in links:
            return links[input]
        else:
            return ["get", "default", None]
    elif type(links) is list:
        if links[0] == "post": # get "form" data
            input = sys.stdin.readline().strip()
            links.append([input]) # add the data at the end
            return links
        else: # get action, don't get user input
            return links
    else:
        return ["get", "default", None]
 if __name__ == "__main__":  
    request = ["get", "default", None]
    while True:    
        state_representation, links = handle_request(*request) # "server"    
        request = render_and_get_input(state_representation, links) # "client"
		`@ -0,0 +1,2 @@`
							`" my Some sure acquaintance or other, my dear, sure,other I suppose; I am sure I do not`
							`know. sure "`
		`@ -0,0 +1,2 @@`
							`python -m compileall .`
							`cp __pycache__/*.pyc ../plugins`