zj3D 9 months ago
parent e5dc492333
commit 2bfeabe429

@ -0,0 +1,45 @@
import sys, collections
from cppy.cp_util import *
class WordFrequenciesModel:
""" 模型:数据 """
def __init__(self, path_to_file):
self.update(path_to_file)
def update(self, path_to_file):
try:
self.freqs = collections.Counter( extract_file_words(path_to_file) )
except IOError:
print("File not found")
self.freqs = {}
class WordFrequenciesView:
""" 视图:数据展现 """
def __init__(self, model):
self._model = model
def render(self):
sorted_freqs = sort_dict(self._model.freqs)
print_word_freqs(sorted_freqs)
class WordFrequencyController:
""" 控制:操作逻辑 """
def __init__(self, model, view):
self._model, self._view = model, view
view.render()
def run(self):
while True:
print("Enter the file path (or 'q' to quit): ", file=sys.stderr, flush=True)
filename = sys.stdin.readline().strip()
if filename.lower() == 'q': break
self._model.update(filename)
self._view.render()
m = WordFrequenciesModel( testfilepath )
v = WordFrequenciesView(m)
c = WordFrequencyController(m, v)
c.run()

@ -0,0 +1,2 @@
" my Some sure acquaintance or other, my dear, sure,other I suppose; I am sure I do not
know. sure "

@ -0,0 +1,65 @@
import sys
from PyQt5.QtWidgets import QApplication, QWidget, QPushButton, QVBoxLayout, QTextEdit, QFileDialog
import cppy.cp_util as util
# 工具函数
def extract_words(path_to_file):
return util.extract_words(path_to_file)
def frequencies(word_list):
return util.get_frequencies(word_list)
def sort(word_freq):
return util.sort_dict(word_freq)
class MenuApp(QWidget):
def __init__(self):
super().__init__()
self.initUI()
def initUI(self):
self.setWindowTitle('终端菜单')
self.setGeometry(100, 100, 400, 300)
# 创建按钮
self.openFileBtn = QPushButton('上传并打开文件', self)
self.continueBtn = QPushButton('继续', self)
self.exitBtn = QPushButton('退出', self)
# 创建文本编辑框
self.textEdit = QTextEdit(self)
self.textEdit.setReadOnly(True)
# 布局
layout = QVBoxLayout()
layout.addWidget(self.openFileBtn)
layout.addWidget(self.continueBtn)
layout.addWidget(self.exitBtn)
layout.addWidget(self.textEdit)
self.setLayout(layout)
# 信号连接到槽
self.openFileBtn.clicked.connect(self.openFile)
self.continueBtn.clicked.connect(self.clearText)
self.exitBtn.clicked.connect(self.close)
def openFile(self):
options = QFileDialog.Options()
fileName, _ = QFileDialog.getOpenFileName(self, "上传并打开文件", "", "All Files (*);;Text Files (*.txt)", options=options)
word_freqs = sort( frequencies(extract_words( fileName )) )
s = ''
for (w, c) in word_freqs[ :10 ]:
s = s + w + '-' + str(c)+'\n'
self.textEdit.setText(s)
def clearText(self):
self.textEdit.clear()
if __name__ == '__main__':
app = QApplication(sys.argv)
ex = MenuApp()
ex.show()
sys.exit(app.exec_())

@ -0,0 +1,2 @@
" my Some sure acquaintance or other, my dear, sure,other I suppose; I am sure I do not
know. sure "

@ -0,0 +1,42 @@
import os
import cppy.cp_util as util
# 工具函数
def extract_words(path_to_file):
return util.extract_words(path_to_file)
def frequencies(word_list):
return util.get_frequencies(word_list)
def sort(word_freq):
return util.sort_dict(word_freq)
def print_menu():
print("\n菜单选项:")
print("1. 上传并处理文件")
print("2. 继续")
print("3. 退出")
def open_and_print_file():
filename = input("请输入文件名:")
word_freqs = sort( frequencies(extract_words( filename )) )
util.print_word_freqs(word_freqs)
def main():
while True:
print_menu()
choice = input("请选择一个选项1/2/3")
if choice == '1':
open_and_print_file()
elif choice == '2':
continue
elif choice == '3':
print("退出程序。")
break
else:
print("无效的输入,请重新输入。")
if __name__ == "__main__":
main()

@ -0,0 +1,49 @@
import string
from collections import Counter
from cppy.cp_util import *
# data
data = []
words = []
word_freqs = []
################################
# procedures
################################
def read_file(path_to_file):
global data
with open(path_to_file,encoding='utf-8') as f:
data = data + list(f.read())
def filter_chars_and_normalize():
global data
global words
for i in range(len(data)):
data[i] = ' ' if not data[i].isalnum() else data[i].lower()
data_str = ''.join(data)
words = words + data_str.split()
with open(stopwordfilepath) as f:
stop_words = set(f.read().split(','))
stop_words.update(string.ascii_lowercase)
words = [word for word in words if word not in stop_words]
def frequencies():
global words
global word_freqs
word_freqs.extend([(word, 1) for word in words])
def sort():
global word_freqs
word_freqs = Counter(words).most_common()
if __name__ == "__main__":
read_file( testfilepath )
filter_chars_and_normalize()
frequencies()
sort()
for tf in word_freqs[:10]:
print(tf[0], '-', tf[1])

@ -0,0 +1,32 @@
import re
from cppy.cp_util import *
def filter_chars_and_normalize(str_data):
pattern = re.compile('[\W_]+')
word_list = pattern.sub(' ', str_data).lower().split()
stop_words = get_stopwords()
return [w for w in word_list if not w in stop_words]
def frequencies(word_list):
word_freqs = {}
for word in word_list:
word_freqs[word] = word_freqs.get(word, 0) + 1
return word_freqs
def sort(word_freq):
return sorted( word_freq.items(), key=lambda x: x[1], reverse=True )
def print_all(word_freqs, n = 10 ):
for word, freq in word_freqs[ :n ]:
print(word, '-', freq)
if __name__ == "__main__":
print_all(sort(frequencies(
filter_chars_and_normalize(
read_file( testfilepath ))))
)

@ -0,0 +1,39 @@
import re, operator
from cppy.cp_util import *
def print_text(word_freqs, func):
print_word_freqs(word_freqs)
func(None)
def frequencies(word_list, func):
wf = get_frequencies(word_list)
func(wf, print_text)
def scan(str_data, func):
func(str_data.split(), frequencies)
def filter_chars(str_data, func):
pattern = re.compile('[\W_]+')
func(pattern.sub(' ', str_data), scan)
def remove_stop_words(word_list, func):
stop_words = get_stopwords()
func([w for w in word_list if not w in stop_words], sort)
def sort(wf, func):
func(sorted(wf.items(), key=operator.itemgetter(1), reverse=True), no_op)
def no_op(func):
return
def normalize(str_data, func):
func(str_data.lower(), remove_stop_words)
def read_file(path_to_file, func):
with open(path_to_file,encoding='utf-8') as f:
data = f.read()
func(data, normalize)
if __name__ == "__main__":
read_file(testfilepath, filter_chars)

@ -0,0 +1,25 @@
import re
from collections import Counter
from cppy.cp_util import *
# 读取文件
with open(testfilepath,encoding='utf-8') as f:
data = f.read().lower() # 直接转换为小写
# 过滤非字母字符
data = re.sub('[\W_]+', ' ', data)
# 分词
words = data.split()
# 移除停用词
stop_words = get_stopwords()
words = [word for word in words if word not in stop_words]
# 计算词频
word_freqs = Counter(words)
# 排序并打印
sorted_word_freqs = sorted(word_freqs.items(), key=lambda x: x[1], reverse=True)
print_word_freqs(sorted_word_freqs)

@ -0,0 +1,53 @@
from collections import Counter
from cppy.cp_util import *
class DataStorageManager:
""" 数据模型 """
def __init__(self, path_to_file):
data = read_file(path_to_file)
self._data = re_split( data )
def words(self):
return self._data
class StopWordManager:
""" 停用词模型 """
def __init__(self):
self._stop_words = get_stopwords()
def is_stop_word(self, word):
return word in self._stop_words
class WordFrequencyManager:
""" 词频模型 """
def __init__(self):
self._word_freqs = Counter()
def increment_count(self, word):
self._word_freqs[word] += 1
def sorted(self):
return self._word_freqs.most_common()
class WordFrequencyController:
def __init__(self, path_to_file):
self._storage_manager = DataStorageManager(path_to_file)
self._stop_word_manager = StopWordManager()
self._word_freq_manager = WordFrequencyManager()
def run(self):
for w in self._storage_manager.words():
if not self._stop_word_manager.is_stop_word(w):
self._word_freq_manager.increment_count(w)
word_freqs = self._word_freq_manager.sorted()
print_word_freqs(word_freqs)
if __name__ == '__main__':
WordFrequencyController(testfilepath).run()

@ -0,0 +1,41 @@
from cppy.cp_util import *
def extract_words(obj, path_to_file):
obj['data'] = re_split( read_file(path_to_file) )
def load_stop_words(obj):
obj['stop_words'] = get_stopwords()
def increment_count(obj, w):
obj['freqs'][w] = 1 if w not in obj['freqs'] else obj['freqs'][w]+1
data_storage_obj = {
'data' : [],
'init' : lambda path_to_file : extract_words(data_storage_obj, path_to_file),
'words' : lambda : data_storage_obj['data']
}
stop_words_obj = {
'stop_words' : [],
'init' : lambda : load_stop_words(stop_words_obj),
'is_stop_word' : lambda word : word in stop_words_obj['stop_words']
}
word_freqs_obj = {
'freqs' : {},
'increment_count' : lambda w : increment_count(word_freqs_obj, w),
'sorted' : lambda : sort_dict(word_freqs_obj['freqs'])
}
if __name__ == '__main__':
data_storage_obj['init']( testfilepath )
stop_words_obj['init']()
for w in data_storage_obj['words']():
if not stop_words_obj['is_stop_word'](w):
word_freqs_obj['increment_count'](w)
word_freqs = word_freqs_obj['sorted']()
for (w, c) in word_freqs[0:10]:
print(w, '-', c)

@ -0,0 +1,59 @@
import re
from collections import Counter
from cppy.cp_util import *
class DataStorageManager1:
def __init__(self, path_to_file):
self._data = read_file(path_to_file)
self._data = re_split(self._data)
def words(self): return self._data
class DataStorageManager2:
def __init__(self, path_to_file):
self._data = read_file(path_to_file)
self._data = re.findall('[a-z]{2,}', self._data)
def words(self): return self._data
class StopWordManager:
def __init__(self):
self._stop_words = set(get_stopwords())
def is_stop_word(self, word):
return word in self._stop_words
class WordFrequencyManager:
def __init__(self):
self.word_freqs = Counter()
def increment_count(self, word):
self.word_freqs[word] += 1
def sorted(self):
return self.word_freqs.most_common()
#
# 应用类
#
class WordFrequencyController:
def __init__(self, path_to_file):
# self._storage = DataStorageManager1(path_to_file)
self.storage = DataStorageManager2(path_to_file)
self.stop_word_manager = StopWordManager()
self.word_freq_counter = WordFrequencyManager()
def run(self): # 可以看做面向协议编程
for word in self.storage.words():
if not self.stop_word_manager.is_stop_word(word):
self.word_freq_counter.increment_count(word)
print_word_freqs( self.word_freq_counter.sorted() )
if __name__ == '__main__':
WordFrequencyController(testfilepath).run()

@ -0,0 +1,92 @@
import abc, re
from cppy.cp_util import *
#
# 接口
#
class IDataStorage (metaclass=abc.ABCMeta):
@abc.abstractmethod
def words(self):
pass
class IStopWordFilter (metaclass=abc.ABCMeta):
@abc.abstractmethod
def is_stop_word(self, word):
pass
class IWordFrequencyCounter(metaclass=abc.ABCMeta):
@abc.abstractmethod
def increment_count(self, word):
pass
@abc.abstractmethod
def sorted(self):
pass
#
# 类实现
#
class DataStorageManager1:
def __init__(self, path_to_file):
self._data = read_file(path_to_file)
self._data = re_split(self._data)
def words(self): return self._data
class DataStorageManager2:
def __init__(self, path_to_file):
self._data = read_file(path_to_file)
self._data = re.findall('[a-z]{2,}', self._data)
def words(self): return self._data
class StopWordManager:
def __init__(self):
self._stop_words = get_stopwords()
def is_stop_word(self, word):
return word in self._stop_words
class WordFrequencyManager:
def __init__(self):
self._word_freqs = {}
def increment_count(self, word):
self._word_freqs[word] = self._word_freqs.get(word, 0) + 1
def sorted(self):
return sort_dict( self._word_freqs )
#
# 注册到抽象接口:并非必要
#
# IDataStorage.register(subclass=DataStorageManager1)
IDataStorage.register(subclass=DataStorageManager2)
IStopWordFilter.register(subclass=StopWordManager)
IWordFrequencyCounter.register(subclass=WordFrequencyManager)
#
# 应用类
#
class WordFrequencyController:
def __init__(self, path_to_file):
# self._storage = DataStorageManager1(path_to_file)
self.storage = DataStorageManager2(path_to_file)
self.stop_word_manager = StopWordManager()
self.word_freq_counter = WordFrequencyManager()
def run(self): # 可以看做面向协议编程
for word in self.storage.words():
if not self.stop_word_manager.is_stop_word(word):
self.word_freq_counter.increment_count(word)
print_word_freqs( self.word_freq_counter.sorted() )
if __name__ == '__main__':
WordFrequencyController(testfilepath).run()

@ -0,0 +1,5 @@
[Plugins]
;; Options: plugins/words1.pyc, plugins/words2.pyc
words = plugins/words1.pyc
;; Options: plugins/frequencies1.pyc, plugins/frequencies2.pyc
frequencies = plugins/frequencies1.pyc

@ -0,0 +1,2 @@
python -m compileall .
cp __pycache__/*.pyc ../plugins

@ -0,0 +1,11 @@
import operator
def top25(word_list):
word_freqs = {}
for w in word_list:
if w in word_freqs:
word_freqs[w] += 1
else:
word_freqs[w] = 1
return sorted(word_freqs.items(), key=operator.itemgetter(1), reverse=True)[:10]

@ -0,0 +1,6 @@
import operator, collections
def top25(word_list):
counts = collections.Counter(w for w in word_list)
return counts.most_common(10)

@ -0,0 +1,14 @@
import sys, re, string
from cppy.cp_util import *
def extract_words(path_to_file):
with open(path_to_file,encoding='utf-8') as f:
str_data = f.read()
pattern = re.compile('[\W_]+')
word_list = pattern.sub(' ', str_data).lower().split()
stop_words = get_stopwords()
return [w for w in word_list if not w in stop_words]

@ -0,0 +1,8 @@
import sys, re, string
from cppy.cp_util import *
def extract_words(path_to_file):
words = re.findall('[a-z]{2,}', open(path_to_file,encoding='utf-8').read().lower())
stopwords = get_stopwords()
return [w for w in words if w not in stopwords]

@ -0,0 +1,21 @@
import configparser, importlib.machinery
from cppy.cp_util import *
def load_plugins():
config = configparser.ConfigParser()
script_dir = os.path.dirname(os.path.abspath(__file__))
os.chdir(script_dir)
config.read("config.ini")
words_plugin = config.get("Plugins", "words")
frequencies_plugin = config.get("Plugins", "frequencies")
global tfwords, tffreqs
tfwords = importlib.machinery.SourcelessFileLoader('tfwords', words_plugin).load_module()
tffreqs = importlib.machinery.SourcelessFileLoader('tffreqs', frequencies_plugin).load_module()
load_plugins()
word_freqs = tffreqs.top25(tfwords.extract_words( testfilepath ))
for (w, c) in word_freqs:
print(w, '-', c)

@ -0,0 +1,2 @@
" my Some sure acquaintance or other, my dear, sure,other I suppose; I am sure I do not
know. sure "

@ -0,0 +1,107 @@
import re, sys
from cppy.cp_util import *
stops = get_stopwords()
data = {}
# 路径问题,测试输入 test.txt ,演示当前目录下这个文件的处理
#################################################################################
# 服务端
def error_state():
return "Something wrong", ["get", "default", None]
def quit_handler(args):
sys.exit(" ... ")
def upload_get_handler(args):
return "Name of file to upload?", ["post", "file"]
def default_get_handler(args):
rep = "What would you like to do?"
rep += "\n1 - Quit" + "\n2 - Upload file"
links = {"1" : ["post", "execution", None],
"2" : ["get", "file_form", None]}
return rep, links
def upload_post_handler(args):
def create_data(fn):
if fn in data: return
word_freqs = {}
with open(fn) as f:
for w in [x.lower() for x in re.split("[^a-zA-Z]+", f.read()) if len(x) > 0 and x.lower() not in stops]:
word_freqs[w] = word_freqs.get(w, 0) + 1
wf = list(word_freqs.items())
data[fn] = sorted(wf,key=lambda x: x[1],reverse=True)
if args == None: return error_state()
filename = args[0]
try:
script_dir = os.path.dirname(os.path.abspath(__file__))
filename = os.path.join(script_dir, filename)
create_data(filename)
except:
print("Unexpected error: %s" % sys.exc_info()[0])
return error_state()
return word_get_handler([filename, 0])
def word_get_handler(args):
def get_word(filename, word_index):
if word_index < len(data[filename]):
return data[filename][word_index]
else:
return ("no more words", 0)
filename = args[0]; word_index = args[1]
word_info = get_word(filename, word_index)
rep = '\n#{0}: {1} - {2}'.format(word_index+1, word_info[0], word_info[1])
rep += "\n\nWhat would you like to do next?"
rep += "\n1 - Quit" + "\n2 - Upload file"
rep += "\n3 - See next most-frequently occurring word"
links = {"1" : ["post", "execution", None],
"2" : ["get", "file_form", None],
"3" : ["get", "word", [filename, word_index+1]]}
return rep, links
# Handler registration
handlers = {"post_execution" : quit_handler,
"get_default" : default_get_handler,
"get_file_form" : upload_get_handler,
"post_file" : upload_post_handler,
"get_word" : word_get_handler }
# The "server" core
def handle_request(verb, uri, args):
def handler_key(verb, uri):
return verb + "_" + uri
if handler_key(verb, uri) in handlers:
return handlers[handler_key(verb, uri)](args)
else:
return handlers[handler_key("get", "default")](args)
#################################################################################
# 仿真简单的浏览器客户端动作
def render_and_get_input(state_representation, links):
print(state_representation)
sys.stdout.flush()
if type(links) is dict:
input = sys.stdin.readline().strip()
if input in links:
return links[input]
else:
return ["get", "default", None]
elif type(links) is list:
if links[0] == "post": # get "form" data
input = sys.stdin.readline().strip()
links.append([input]) # add the data at the end
return links
else: # get action, don't get user input
return links
else:
return ["get", "default", None]
if __name__ == "__main__":
request = ["get", "default", None]
while True:
state_representation, links = handle_request(*request) # "server"
request = render_and_get_input(state_representation, links) # "client"
Loading…
Cancel
Save