# -*- coding: utf-8 -*-
import cppy.cp_util as util
from collections import Counter

'''
状态机是计算机程序运行的基础理论。
使用状态机的风格来处理文件并计算词频，我们可以将整个过程分解为一系列状态转移。
每个状态代表处理过程中的一个阶段，比如“读取文件”、“分割单词”和“计算词频”等。
这种方法在Python中并不常见，但它展示了如何使用状态机来管理程序的状态和流程    
'''

class WordFrequencyStateMachine:
    def __init__(self, file_path):
        self.file_path = file_path
        self.content = None
        self.words = None
        self.word_freq = None
        self.state = 'IDLE'

    def transition_to_read_file(self):
        try:
            with open(self.file_path, 'r', encoding='utf-8') as file:
                self.content = file.read()
                self.state = 'WORDS_SPLIT'
        except FileNotFoundError:
            print(f"文件 {self.file_path} 未找到。")
        except Exception as e:
            print(f"读取文件时发生错误: {e}")

    def transition_to_split_words(self):
        if self.content is not None:
            self.words = util.extract_str_words(self.content)
            self.state = 'CALCULATE_FREQ'
        else:
            print("文件内容为空，无法分割单词。")

    def transition_to_calculate_freq(self):
        if self.words is not None:
            self.word_freq = Counter(self.words)
            self.state = 'DONE'
        else:
            print("单词列表为空，无法计算词频。")

    def run(self):
        while self.state != 'DONE':
            if self.state == 'IDLE':
                self.transition_to_read_file()
            elif self.state == 'WORDS_SPLIT':
                self.transition_to_split_words()
            elif self.state == 'CALCULATE_FREQ':
                self.transition_to_calculate_freq()
            else:
                print(f"未知状态: {self.state}")
                break

        return self.word_freq

    # 使用状态机计算词频


state_machine = WordFrequencyStateMachine(util.testfilepath)
word_frequencies = state_machine.run()

# 打印结果
util.print_word_freqs(word_frequencies.most_common(10))