diff --git a/一盘大棋/A01.py b/一盘大棋/A01.py index ec7c88a..18f4d69 100644 --- a/一盘大棋/A01.py +++ b/一盘大棋/A01.py @@ -1,5 +1,5 @@ import string -from cppy.cp_util import * +from cppy.cp_util import stopwordfilepath,testfilepath # 准备词和停用词表 word_freqs = [] diff --git a/一盘大棋/A02.py b/一盘大棋/A02.py index 4870e2a..df26455 100644 --- a/一盘大棋/A02.py +++ b/一盘大棋/A02.py @@ -1,4 +1,5 @@ -from cppy.cp_util import * +from cppy.cp_util import stopwordfilepath,testfilepath +import string from collections import Counter # 准备词和停用词表 diff --git a/一盘大棋/A03.py b/一盘大棋/A03.py index 15b6479..0a3f462 100644 --- a/一盘大棋/A03.py +++ b/一盘大棋/A03.py @@ -1,11 +1,10 @@ -import re, sys, collections -from cppy.cp_util import * +import re, collections +from cppy.cp_util import stopwordfilepath,testfilepath stopwords = set(open( stopwordfilepath,encoding = 'utf8' ).read().split(',')) words = re.findall('[a-z]{2,}', open( testfilepath,encoding = 'utf8').read().lower()) counts = collections.Counter(w for w in words if w not in stopwords) -for (w, c) in counts.most_common(10): - print(w, '-', c) +for (w, c) in counts.most_common(10) : print(w, '-', c) ''' 熟练的软件工程师,会如此简单完成任务 diff --git a/交互/终端/终端命令行/command_line.py b/交互/终端/终端命令行/command_line.py new file mode 100644 index 0000000..9f85b53 --- /dev/null +++ b/交互/终端/终端命令行/command_line.py @@ -0,0 +1,48 @@ +import re +from collections import Counter + +# 清洗文本,移除标点符号并转换为小写 +def clean_text(text): + return re.sub(r'[^\w\s]', '', text).lower() + +# 统计词频 +def count_frequencies(text): + return Counter(word for word in clean_text(text).split()) + +# 交互式提示用户输入文件路径和前n个单词的数量 +def interactive_mode(): + file_path = input("请输入文件路径: ") + try: + n = int(input("请输入你想要输出的前n个最常见单词的数量: ")) + if n <= 0: + raise ValueError("数量必须大于0。") + except ValueError as e: + print(f"输入错误:{e}") + return + + try: + # 打开文件并读取内容 + with open(file_path, 'r', encoding='utf-8') as file: + text = file.read() + + # 统计词频 + frequencies = count_frequencies(text) + + # 获取前n个最常见的单词 + most_common = frequencies.most_common(n) + + # 输出结果 + for word, freq in most_common: + print(f"{word}: {freq}") + except FileNotFoundError: + print(f"文件未找到: {file_path}") + except Exception as e: + print(f"发生错误: {e}") + +# 主函数 +def main(): + print("欢迎使用词频统计工具。") + interactive_mode() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/基本结构/031 对象化/A22.py b/基本结构/031 对象化/A22.py index 43dba89..629cfaf 100644 --- a/基本结构/031 对象化/A22.py +++ b/基本结构/031 对象化/A22.py @@ -1,7 +1,7 @@ from cppy.cp_util import * def extract_words(obj, path_to_file): - obj['data'] = re_split( read_file(path_to_file) ) + obj['data'] = extract_file_words(path_to_file) def load_stop_words(obj): obj['stop_words'] = get_stopwords() diff --git a/基本结构/032 对象接口/tf-14A.py b/工程化/对象接口/tf-14A.py similarity index 100% rename from 基本结构/032 对象接口/tf-14A.py rename to 工程化/对象接口/tf-14A.py diff --git a/基本结构/032 对象接口/tf-14B.py b/工程化/对象接口/tf-14B.py similarity index 100% rename from 基本结构/032 对象接口/tf-14B.py rename to 工程化/对象接口/tf-14B.py diff --git a/计算设备/缓存/84.py b/计算设备/缓存/84.py index 592653d..8c57113 100644 --- a/计算设备/缓存/84.py +++ b/计算设备/缓存/84.py @@ -23,9 +23,9 @@ top_10_words = calculate_word_frequency(testfilepath) print_word_freqs(top_10_words) ''' -python 提供了一种缓存调用函数的机制 -import functools - +Python 提供了一个缓存调用函数的装饰器 +import functools + # 使用 functools.lru_cache 缓存结果 @functools.lru_cache(maxsize=None) def calculate_word_frequency(file_path):