forked from p46318075/CodePattern
parent
e779762530
commit
df34765748
@ -0,0 +1,56 @@
|
|||||||
|
import cppy.cp_util as util
|
||||||
|
from collections import Counter
|
||||||
|
|
||||||
|
class WordFrequencyStateMachine:
|
||||||
|
def __init__(self, file_path):
|
||||||
|
self.file_path = file_path
|
||||||
|
self.content = None
|
||||||
|
self.words = None
|
||||||
|
self.word_freq = None
|
||||||
|
self.state = 'IDLE'
|
||||||
|
|
||||||
|
def transition_to_read_file(self):
|
||||||
|
try:
|
||||||
|
with open(self.file_path, 'r', encoding='utf-8') as file:
|
||||||
|
self.content = file.read()
|
||||||
|
self.state = 'WORDS_SPLIT'
|
||||||
|
except FileNotFoundError:
|
||||||
|
print(f"文件 {self.file_path} 未找到。")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"读取文件时发生错误: {e}")
|
||||||
|
|
||||||
|
def transition_to_split_words(self):
|
||||||
|
if self.content is not None:
|
||||||
|
self.words = util.extract_str_words(self.content)
|
||||||
|
self.state = 'CALCULATE_FREQ'
|
||||||
|
else:
|
||||||
|
print("文件内容为空,无法分割单词。")
|
||||||
|
|
||||||
|
def transition_to_calculate_freq(self):
|
||||||
|
if self.words is not None:
|
||||||
|
self.word_freq = Counter(self.words)
|
||||||
|
self.state = 'DONE'
|
||||||
|
else:
|
||||||
|
print("单词列表为空,无法计算词频。")
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
while self.state != 'DONE':
|
||||||
|
if self.state == 'IDLE':
|
||||||
|
self.transition_to_read_file()
|
||||||
|
elif self.state == 'WORDS_SPLIT':
|
||||||
|
self.transition_to_split_words()
|
||||||
|
elif self.state == 'CALCULATE_FREQ':
|
||||||
|
self.transition_to_calculate_freq()
|
||||||
|
else:
|
||||||
|
print(f"未知状态: {self.state}")
|
||||||
|
break
|
||||||
|
|
||||||
|
return self.word_freq
|
||||||
|
|
||||||
|
# 使用状态机计算词频
|
||||||
|
state_machine = WordFrequencyStateMachine( util.testfilepath )
|
||||||
|
word_frequencies = state_machine.run()
|
||||||
|
|
||||||
|
# 打印结果
|
||||||
|
for word, freq in word_frequencies.most_common(10):
|
||||||
|
print(f"{word}: {freq}")
|
Loading…
Reference in new issue