@ -1,56 +0,0 @@
 | 
				
			||||
import cppy.cp_util as util
 | 
				
			||||
from collections import Counter  
 | 
				
			||||
 | 
				
			||||
class WordFrequencyStateMachine:  
 | 
				
			||||
    def __init__(self, file_path):  
 | 
				
			||||
        self.file_path = file_path  
 | 
				
			||||
        self.content = None  
 | 
				
			||||
        self.words = None  
 | 
				
			||||
        self.word_freq = None  
 | 
				
			||||
        self.state = 'IDLE'  
 | 
				
			||||
  
 | 
				
			||||
    def transition_to_read_file(self):  
 | 
				
			||||
        try:  
 | 
				
			||||
            with open(self.file_path, 'r', encoding='utf-8') as file:  
 | 
				
			||||
                self.content = file.read()  
 | 
				
			||||
                self.state = 'WORDS_SPLIT'  
 | 
				
			||||
        except FileNotFoundError:  
 | 
				
			||||
            print(f"文件 {self.file_path} 未找到。")  
 | 
				
			||||
        except Exception as e:  
 | 
				
			||||
            print(f"读取文件时发生错误: {e}")  
 | 
				
			||||
  
 | 
				
			||||
    def transition_to_split_words(self):  
 | 
				
			||||
        if self.content is not None:  
 | 
				
			||||
            self.words = util.extract_str_words(self.content)
 | 
				
			||||
            self.state = 'CALCULATE_FREQ'  
 | 
				
			||||
        else:  
 | 
				
			||||
            print("文件内容为空,无法分割单词。")  
 | 
				
			||||
  
 | 
				
			||||
    def transition_to_calculate_freq(self):  
 | 
				
			||||
        if self.words is not None:  
 | 
				
			||||
            self.word_freq = Counter(self.words)  
 | 
				
			||||
            self.state = 'DONE'  
 | 
				
			||||
        else:  
 | 
				
			||||
            print("单词列表为空,无法计算词频。")  
 | 
				
			||||
  
 | 
				
			||||
    def run(self):  
 | 
				
			||||
        while self.state != 'DONE':  
 | 
				
			||||
            if self.state == 'IDLE':  
 | 
				
			||||
                self.transition_to_read_file()  
 | 
				
			||||
            elif self.state == 'WORDS_SPLIT':  
 | 
				
			||||
                self.transition_to_split_words()  
 | 
				
			||||
            elif self.state == 'CALCULATE_FREQ':  
 | 
				
			||||
                self.transition_to_calculate_freq()  
 | 
				
			||||
            else:  
 | 
				
			||||
                print(f"未知状态: {self.state}")  
 | 
				
			||||
                break  
 | 
				
			||||
  
 | 
				
			||||
        return self.word_freq  
 | 
				
			||||
  
 | 
				
			||||
# 使用状态机计算词频  
 | 
				
			||||
state_machine = WordFrequencyStateMachine( util.testfilepath )  
 | 
				
			||||
word_frequencies = state_machine.run()  
 | 
				
			||||
  
 | 
				
			||||
# 打印结果  
 | 
				
			||||
for word, freq in word_frequencies.most_common(10):  
 | 
				
			||||
    print(f"{word}: {freq}")
 | 
				
			||||
@ -0,0 +1,60 @@
 | 
				
			||||
# -*- coding: utf-8 -*-
 | 
				
			||||
import cppy.cp_util as util
 | 
				
			||||
from collections import Counter
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
class WordFrequencyStateMachine:
 | 
				
			||||
    def __init__(self, file_path):
 | 
				
			||||
        self.file_path = file_path
 | 
				
			||||
        self.content = None
 | 
				
			||||
        self.words = None
 | 
				
			||||
        self.word_freq = None
 | 
				
			||||
        self.state = 'IDLE'
 | 
				
			||||
 | 
				
			||||
    def transition_to_read_file(self):
 | 
				
			||||
        try:
 | 
				
			||||
            with open(self.file_path, 'r', encoding='utf-8') as file:
 | 
				
			||||
                self.content = file.read()
 | 
				
			||||
                self.state = 'WORDS_SPLIT'
 | 
				
			||||
        except FileNotFoundError:
 | 
				
			||||
            print(f"文件 {self.file_path} 未找到。")
 | 
				
			||||
        except Exception as e:
 | 
				
			||||
            print(f"读取文件时发生错误: {e}")
 | 
				
			||||
 | 
				
			||||
    def transition_to_split_words(self):
 | 
				
			||||
        if self.content is not None:
 | 
				
			||||
            self.words = util.extract_str_words(self.content)
 | 
				
			||||
            self.state = 'CALCULATE_FREQ'
 | 
				
			||||
        else:
 | 
				
			||||
            print("文件内容为空,无法分割单词。")
 | 
				
			||||
 | 
				
			||||
    def transition_to_calculate_freq(self):
 | 
				
			||||
        if self.words is not None:
 | 
				
			||||
            self.word_freq = Counter(self.words)
 | 
				
			||||
            self.state = 'DONE'
 | 
				
			||||
        else:
 | 
				
			||||
            print("单词列表为空,无法计算词频。")
 | 
				
			||||
 | 
				
			||||
    def run(self):
 | 
				
			||||
        while self.state != 'DONE':
 | 
				
			||||
            if self.state == 'IDLE':
 | 
				
			||||
                self.transition_to_read_file()
 | 
				
			||||
            elif self.state == 'WORDS_SPLIT':
 | 
				
			||||
                self.transition_to_split_words()
 | 
				
			||||
            elif self.state == 'CALCULATE_FREQ':
 | 
				
			||||
                self.transition_to_calculate_freq()
 | 
				
			||||
            else:
 | 
				
			||||
                print(f"未知状态: {self.state}")
 | 
				
			||||
                break
 | 
				
			||||
 | 
				
			||||
        return self.word_freq
 | 
				
			||||
 | 
				
			||||
    # 使用状态机计算词频
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
state_machine = WordFrequencyStateMachine(util.testfilepath)
 | 
				
			||||
word_frequencies = state_machine.run()
 | 
				
			||||
 | 
				
			||||
# 打印结果
 | 
				
			||||
for word, freq in word_frequencies.most_common(10):
 | 
				
			||||
    print(f"{word}: {freq}")
 | 
				
			||||
@ -0,0 +1,33 @@
 | 
				
			||||
# -*- coding: utf-8 -*-
 | 
				
			||||
import cppy.cp_util as util
 | 
				
			||||
 | 
				
			||||
# 每一列是一个数据元素和一个公式,第一列是输入数据,所以没有公式
 | 
				
			||||
all_words = [(), None]
 | 
				
			||||
non_stop_words = [(), util.extract_str_words]
 | 
				
			||||
frequencies = [(), util.get_frequencies]
 | 
				
			||||
sorted_data = [(), util.sort_dict]
 | 
				
			||||
 | 
				
			||||
# 整个电子表格
 | 
				
			||||
all_columns = [all_words, non_stop_words,\
 | 
				
			||||
                frequencies, sorted_data]
 | 
				
			||||
 | 
				
			||||
# 每次输入数据后调用此方法
 | 
				
			||||
def update():
 | 
				
			||||
    global all_columns
 | 
				
			||||
    for c in all_columns[1::]:
 | 
				
			||||
        if c[1] == util.extract_str_words:
 | 
				
			||||
            c[0] = c[1](all_words[0])
 | 
				
			||||
        elif c[1] == util.get_frequencies:
 | 
				
			||||
            c[0] = c[1](non_stop_words[0])
 | 
				
			||||
        elif c[1] == util.sort_dict:
 | 
				
			||||
            c[0] = c[1](frequencies[0])
 | 
				
			||||
 | 
				
			||||
# 将固定数据加载到第一列中
 | 
				
			||||
all_words[0] = util.read_file(util.testfilepath)
 | 
				
			||||
# 调用update函数遍历列表
 | 
				
			||||
update()
 | 
				
			||||
 | 
				
			||||
#打印结果
 | 
				
			||||
util.print_word_freqs(sorted_data[0])
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
					Loading…
					
					
				
		Reference in new issue