# -*- coding: utf-8 -*- import cppy.cp_util as util from collections import Counter ''' 状态机是计算机程序运行的基础理论。 使用状态机的风格来处理文件并计算词频,我们可以将整个过程分解为一系列状态转移。 每个状态代表处理过程中的一个阶段,比如“读取文件”、“分割单词”和“计算词频”等。 这种方法在Python中并不常见,但它展示了如何使用状态机来管理程序的状态和流程 ''' class WordFrequencyStateMachine: def __init__(self, file_path): self.file_path = file_path self.content = None self.words = None self.word_freq = None self.state = 'IDLE' def transition_to_read_file(self): try: with open(self.file_path, 'r', encoding='utf-8') as file: self.content = file.read() self.state = 'WORDS_SPLIT' except FileNotFoundError: print(f"文件 {self.file_path} 未找到。") except Exception as e: print(f"读取文件时发生错误: {e}") def transition_to_split_words(self): if self.content is not None: self.words = util.extract_str_words(self.content) self.state = 'CALCULATE_FREQ' else: print("文件内容为空,无法分割单词。") def transition_to_calculate_freq(self): if self.words is not None: self.word_freq = Counter(self.words) self.state = 'DONE' else: print("单词列表为空,无法计算词频。") def run(self): while self.state != 'DONE': if self.state == 'IDLE': self.transition_to_read_file() elif self.state == 'WORDS_SPLIT': self.transition_to_split_words() elif self.state == 'CALCULATE_FREQ': self.transition_to_calculate_freq() else: print(f"未知状态: {self.state}") break return self.word_freq # 使用状态机计算词频 state_machine = WordFrequencyStateMachine(util.testfilepath) word_frequencies = state_machine.run() # 打印结果 util.print_word_freqs(word_frequencies.most_common(10))