zj3D 8 months ago
parent c8946209bf
commit ebe28f7670

@ -0,0 +1,36 @@
from cppy.cp_util import *
from dataclasses import dataclass
from collections import Counter
import re
@dataclass
class WordFrequency:
text: str
stop_words: set = None
def __post_init__(self):
# 如果未提供停用词表
if self.stop_words is None:
self.stop_words = get_stopwords()
def tokenize(self):
# 分词并去除停用词
words = re.findall(r'\b\w+\b', self.text.lower())
filtered_words = [word for word in words if word not in self.stop_words and len(word)>2]
return filtered_words
def get_top_n(self, n=10):
# 计算词频
word_freqs = Counter(self.tokenize())
return word_freqs.most_common(n)
# 使用示例
if __name__ == '__main__':
# 创建WordFrequency实例
text = read_file()
word_freq = WordFrequency( text )
# 获取并打印词频
top_words = word_freq.get_top_n()
print_word_freqs(top_words)

@ -19,9 +19,9 @@ class WordFrequencyControllerFactory():
def get_WordFrequencyController(self, number,testfilepath):
if number not in self.types:
self.types[number] = WordFrequencyController(number,testfilepath) # 创建新的对象
print('new obj',number,'*'*30)
print('new obj: ','*'*30,number)
else:
print('ref obj','*'*30)
print('ref obj: ','*'*30,number)
return self.types[number] # 重复使用已存在的对象
@ -29,5 +29,6 @@ if __name__ == "__main__":
factory = WordFrequencyControllerFactory()
for number in [ 1,3,5,3,5,7 ]:
WordFrequency = factory.get_WordFrequencyController(number,testfilepath)
# print(flush=True)
WordFrequency.print_word_freqs()

@ -16,8 +16,6 @@ class WordFrequencyObserver(Observer):
def update(self, word):
self.word_count[word] += 1
def get_top_n(self,n):
return self.word_count.most_common(n)
# 定义主题类
class WordSubject:
@ -48,7 +46,7 @@ def main(testfilepath, top_n = 10 ):
subject.notify(word) # 触发
# 打印最高的N个词频
top_words = observer.get_top_n(top_n)
top_words = observer.word_count.most_common(top_n)
print_word_freqs(top_words)

Loading…
Cancel
Save