parent
e5dc492333
commit
2bfeabe429
@ -0,0 +1,45 @@
|
|||||||
|
import sys, collections
|
||||||
|
from cppy.cp_util import *
|
||||||
|
|
||||||
|
class WordFrequenciesModel:
|
||||||
|
""" 模型:数据 """
|
||||||
|
def __init__(self, path_to_file):
|
||||||
|
self.update(path_to_file)
|
||||||
|
|
||||||
|
def update(self, path_to_file):
|
||||||
|
try:
|
||||||
|
self.freqs = collections.Counter( extract_file_words(path_to_file) )
|
||||||
|
except IOError:
|
||||||
|
print("File not found")
|
||||||
|
self.freqs = {}
|
||||||
|
|
||||||
|
|
||||||
|
class WordFrequenciesView:
|
||||||
|
""" 视图:数据展现 """
|
||||||
|
def __init__(self, model):
|
||||||
|
self._model = model
|
||||||
|
|
||||||
|
def render(self):
|
||||||
|
sorted_freqs = sort_dict(self._model.freqs)
|
||||||
|
print_word_freqs(sorted_freqs)
|
||||||
|
|
||||||
|
|
||||||
|
class WordFrequencyController:
|
||||||
|
""" 控制:操作逻辑 """
|
||||||
|
def __init__(self, model, view):
|
||||||
|
self._model, self._view = model, view
|
||||||
|
view.render()
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
while True:
|
||||||
|
print("Enter the file path (or 'q' to quit): ", file=sys.stderr, flush=True)
|
||||||
|
filename = sys.stdin.readline().strip()
|
||||||
|
if filename.lower() == 'q': break
|
||||||
|
self._model.update(filename)
|
||||||
|
self._view.render()
|
||||||
|
|
||||||
|
|
||||||
|
m = WordFrequenciesModel( testfilepath )
|
||||||
|
v = WordFrequenciesView(m)
|
||||||
|
c = WordFrequencyController(m, v)
|
||||||
|
c.run()
|
@ -0,0 +1,2 @@
|
|||||||
|
" my Some sure acquaintance or other, my dear, sure,other I suppose; I am sure I do not
|
||||||
|
know. sure "
|
@ -0,0 +1,65 @@
|
|||||||
|
import sys
|
||||||
|
from PyQt5.QtWidgets import QApplication, QWidget, QPushButton, QVBoxLayout, QTextEdit, QFileDialog
|
||||||
|
import cppy.cp_util as util
|
||||||
|
|
||||||
|
# 工具函数
|
||||||
|
def extract_words(path_to_file):
|
||||||
|
return util.extract_words(path_to_file)
|
||||||
|
|
||||||
|
def frequencies(word_list):
|
||||||
|
return util.get_frequencies(word_list)
|
||||||
|
|
||||||
|
def sort(word_freq):
|
||||||
|
return util.sort_dict(word_freq)
|
||||||
|
|
||||||
|
class MenuApp(QWidget):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.initUI()
|
||||||
|
|
||||||
|
def initUI(self):
|
||||||
|
self.setWindowTitle('终端菜单')
|
||||||
|
self.setGeometry(100, 100, 400, 300)
|
||||||
|
|
||||||
|
# 创建按钮
|
||||||
|
self.openFileBtn = QPushButton('上传并打开文件', self)
|
||||||
|
self.continueBtn = QPushButton('继续', self)
|
||||||
|
self.exitBtn = QPushButton('退出', self)
|
||||||
|
|
||||||
|
# 创建文本编辑框
|
||||||
|
self.textEdit = QTextEdit(self)
|
||||||
|
self.textEdit.setReadOnly(True)
|
||||||
|
|
||||||
|
# 布局
|
||||||
|
layout = QVBoxLayout()
|
||||||
|
layout.addWidget(self.openFileBtn)
|
||||||
|
layout.addWidget(self.continueBtn)
|
||||||
|
layout.addWidget(self.exitBtn)
|
||||||
|
layout.addWidget(self.textEdit)
|
||||||
|
|
||||||
|
self.setLayout(layout)
|
||||||
|
|
||||||
|
# 信号连接到槽
|
||||||
|
self.openFileBtn.clicked.connect(self.openFile)
|
||||||
|
self.continueBtn.clicked.connect(self.clearText)
|
||||||
|
self.exitBtn.clicked.connect(self.close)
|
||||||
|
|
||||||
|
def openFile(self):
|
||||||
|
options = QFileDialog.Options()
|
||||||
|
fileName, _ = QFileDialog.getOpenFileName(self, "上传并打开文件", "", "All Files (*);;Text Files (*.txt)", options=options)
|
||||||
|
word_freqs = sort( frequencies(extract_words( fileName )) )
|
||||||
|
s = ''
|
||||||
|
for (w, c) in word_freqs[ :10 ]:
|
||||||
|
s = s + w + '-' + str(c)+'\n'
|
||||||
|
self.textEdit.setText(s)
|
||||||
|
|
||||||
|
|
||||||
|
def clearText(self):
|
||||||
|
self.textEdit.clear()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
app = QApplication(sys.argv)
|
||||||
|
ex = MenuApp()
|
||||||
|
ex.show()
|
||||||
|
sys.exit(app.exec_())
|
@ -0,0 +1,2 @@
|
|||||||
|
" my Some sure acquaintance or other, my dear, sure,other I suppose; I am sure I do not
|
||||||
|
know. sure "
|
@ -0,0 +1,49 @@
|
|||||||
|
import string
|
||||||
|
from collections import Counter
|
||||||
|
from cppy.cp_util import *
|
||||||
|
|
||||||
|
# data
|
||||||
|
data = []
|
||||||
|
words = []
|
||||||
|
word_freqs = []
|
||||||
|
|
||||||
|
################################
|
||||||
|
# procedures
|
||||||
|
################################
|
||||||
|
def read_file(path_to_file):
|
||||||
|
global data
|
||||||
|
with open(path_to_file,encoding='utf-8') as f:
|
||||||
|
data = data + list(f.read())
|
||||||
|
|
||||||
|
def filter_chars_and_normalize():
|
||||||
|
global data
|
||||||
|
global words
|
||||||
|
for i in range(len(data)):
|
||||||
|
data[i] = ' ' if not data[i].isalnum() else data[i].lower()
|
||||||
|
|
||||||
|
data_str = ''.join(data)
|
||||||
|
words = words + data_str.split()
|
||||||
|
|
||||||
|
with open(stopwordfilepath) as f:
|
||||||
|
stop_words = set(f.read().split(','))
|
||||||
|
stop_words.update(string.ascii_lowercase)
|
||||||
|
words = [word for word in words if word not in stop_words]
|
||||||
|
|
||||||
|
def frequencies():
|
||||||
|
global words
|
||||||
|
global word_freqs
|
||||||
|
word_freqs.extend([(word, 1) for word in words])
|
||||||
|
|
||||||
|
def sort():
|
||||||
|
global word_freqs
|
||||||
|
word_freqs = Counter(words).most_common()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
read_file( testfilepath )
|
||||||
|
filter_chars_and_normalize()
|
||||||
|
frequencies()
|
||||||
|
sort()
|
||||||
|
|
||||||
|
for tf in word_freqs[:10]:
|
||||||
|
print(tf[0], '-', tf[1])
|
@ -0,0 +1,32 @@
|
|||||||
|
import re
|
||||||
|
from cppy.cp_util import *
|
||||||
|
|
||||||
|
|
||||||
|
def filter_chars_and_normalize(str_data):
|
||||||
|
pattern = re.compile('[\W_]+')
|
||||||
|
word_list = pattern.sub(' ', str_data).lower().split()
|
||||||
|
stop_words = get_stopwords()
|
||||||
|
return [w for w in word_list if not w in stop_words]
|
||||||
|
|
||||||
|
|
||||||
|
def frequencies(word_list):
|
||||||
|
word_freqs = {}
|
||||||
|
for word in word_list:
|
||||||
|
word_freqs[word] = word_freqs.get(word, 0) + 1
|
||||||
|
return word_freqs
|
||||||
|
|
||||||
|
|
||||||
|
def sort(word_freq):
|
||||||
|
return sorted( word_freq.items(), key=lambda x: x[1], reverse=True )
|
||||||
|
|
||||||
|
|
||||||
|
def print_all(word_freqs, n = 10 ):
|
||||||
|
for word, freq in word_freqs[ :n ]:
|
||||||
|
print(word, '-', freq)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print_all(sort(frequencies(
|
||||||
|
filter_chars_and_normalize(
|
||||||
|
read_file( testfilepath ))))
|
||||||
|
)
|
@ -0,0 +1,39 @@
|
|||||||
|
import re, operator
|
||||||
|
from cppy.cp_util import *
|
||||||
|
|
||||||
|
def print_text(word_freqs, func):
|
||||||
|
print_word_freqs(word_freqs)
|
||||||
|
func(None)
|
||||||
|
|
||||||
|
def frequencies(word_list, func):
|
||||||
|
wf = get_frequencies(word_list)
|
||||||
|
func(wf, print_text)
|
||||||
|
|
||||||
|
def scan(str_data, func):
|
||||||
|
func(str_data.split(), frequencies)
|
||||||
|
|
||||||
|
def filter_chars(str_data, func):
|
||||||
|
pattern = re.compile('[\W_]+')
|
||||||
|
func(pattern.sub(' ', str_data), scan)
|
||||||
|
|
||||||
|
def remove_stop_words(word_list, func):
|
||||||
|
stop_words = get_stopwords()
|
||||||
|
func([w for w in word_list if not w in stop_words], sort)
|
||||||
|
|
||||||
|
def sort(wf, func):
|
||||||
|
func(sorted(wf.items(), key=operator.itemgetter(1), reverse=True), no_op)
|
||||||
|
|
||||||
|
def no_op(func):
|
||||||
|
return
|
||||||
|
|
||||||
|
def normalize(str_data, func):
|
||||||
|
func(str_data.lower(), remove_stop_words)
|
||||||
|
|
||||||
|
def read_file(path_to_file, func):
|
||||||
|
with open(path_to_file,encoding='utf-8') as f:
|
||||||
|
data = f.read()
|
||||||
|
func(data, normalize)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
read_file(testfilepath, filter_chars)
|
@ -0,0 +1,25 @@
|
|||||||
|
import re
|
||||||
|
from collections import Counter
|
||||||
|
from cppy.cp_util import *
|
||||||
|
|
||||||
|
# 读取文件
|
||||||
|
with open(testfilepath,encoding='utf-8') as f:
|
||||||
|
data = f.read().lower() # 直接转换为小写
|
||||||
|
|
||||||
|
# 过滤非字母字符
|
||||||
|
data = re.sub('[\W_]+', ' ', data)
|
||||||
|
|
||||||
|
# 分词
|
||||||
|
words = data.split()
|
||||||
|
|
||||||
|
# 移除停用词
|
||||||
|
stop_words = get_stopwords()
|
||||||
|
words = [word for word in words if word not in stop_words]
|
||||||
|
|
||||||
|
# 计算词频
|
||||||
|
word_freqs = Counter(words)
|
||||||
|
|
||||||
|
# 排序并打印
|
||||||
|
sorted_word_freqs = sorted(word_freqs.items(), key=lambda x: x[1], reverse=True)
|
||||||
|
|
||||||
|
print_word_freqs(sorted_word_freqs)
|
@ -0,0 +1,53 @@
|
|||||||
|
from collections import Counter
|
||||||
|
from cppy.cp_util import *
|
||||||
|
|
||||||
|
|
||||||
|
class DataStorageManager:
|
||||||
|
""" 数据模型 """
|
||||||
|
def __init__(self, path_to_file):
|
||||||
|
data = read_file(path_to_file)
|
||||||
|
self._data = re_split( data )
|
||||||
|
|
||||||
|
def words(self):
|
||||||
|
return self._data
|
||||||
|
|
||||||
|
|
||||||
|
class StopWordManager:
|
||||||
|
""" 停用词模型 """
|
||||||
|
def __init__(self):
|
||||||
|
self._stop_words = get_stopwords()
|
||||||
|
|
||||||
|
def is_stop_word(self, word):
|
||||||
|
return word in self._stop_words
|
||||||
|
|
||||||
|
|
||||||
|
class WordFrequencyManager:
|
||||||
|
""" 词频模型 """
|
||||||
|
def __init__(self):
|
||||||
|
self._word_freqs = Counter()
|
||||||
|
|
||||||
|
def increment_count(self, word):
|
||||||
|
self._word_freqs[word] += 1
|
||||||
|
|
||||||
|
def sorted(self):
|
||||||
|
return self._word_freqs.most_common()
|
||||||
|
|
||||||
|
|
||||||
|
class WordFrequencyController:
|
||||||
|
def __init__(self, path_to_file):
|
||||||
|
self._storage_manager = DataStorageManager(path_to_file)
|
||||||
|
self._stop_word_manager = StopWordManager()
|
||||||
|
self._word_freq_manager = WordFrequencyManager()
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
for w in self._storage_manager.words():
|
||||||
|
if not self._stop_word_manager.is_stop_word(w):
|
||||||
|
self._word_freq_manager.increment_count(w)
|
||||||
|
|
||||||
|
word_freqs = self._word_freq_manager.sorted()
|
||||||
|
print_word_freqs(word_freqs)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
WordFrequencyController(testfilepath).run()
|
@ -0,0 +1,41 @@
|
|||||||
|
from cppy.cp_util import *
|
||||||
|
|
||||||
|
def extract_words(obj, path_to_file):
|
||||||
|
obj['data'] = re_split( read_file(path_to_file) )
|
||||||
|
|
||||||
|
def load_stop_words(obj):
|
||||||
|
obj['stop_words'] = get_stopwords()
|
||||||
|
|
||||||
|
def increment_count(obj, w):
|
||||||
|
obj['freqs'][w] = 1 if w not in obj['freqs'] else obj['freqs'][w]+1
|
||||||
|
|
||||||
|
data_storage_obj = {
|
||||||
|
'data' : [],
|
||||||
|
'init' : lambda path_to_file : extract_words(data_storage_obj, path_to_file),
|
||||||
|
'words' : lambda : data_storage_obj['data']
|
||||||
|
}
|
||||||
|
|
||||||
|
stop_words_obj = {
|
||||||
|
'stop_words' : [],
|
||||||
|
'init' : lambda : load_stop_words(stop_words_obj),
|
||||||
|
'is_stop_word' : lambda word : word in stop_words_obj['stop_words']
|
||||||
|
}
|
||||||
|
|
||||||
|
word_freqs_obj = {
|
||||||
|
'freqs' : {},
|
||||||
|
'increment_count' : lambda w : increment_count(word_freqs_obj, w),
|
||||||
|
'sorted' : lambda : sort_dict(word_freqs_obj['freqs'])
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
data_storage_obj['init']( testfilepath )
|
||||||
|
stop_words_obj['init']()
|
||||||
|
|
||||||
|
for w in data_storage_obj['words']():
|
||||||
|
if not stop_words_obj['is_stop_word'](w):
|
||||||
|
word_freqs_obj['increment_count'](w)
|
||||||
|
|
||||||
|
word_freqs = word_freqs_obj['sorted']()
|
||||||
|
for (w, c) in word_freqs[0:10]:
|
||||||
|
print(w, '-', c)
|
@ -0,0 +1,59 @@
|
|||||||
|
import re
|
||||||
|
from collections import Counter
|
||||||
|
from cppy.cp_util import *
|
||||||
|
|
||||||
|
class DataStorageManager1:
|
||||||
|
def __init__(self, path_to_file):
|
||||||
|
self._data = read_file(path_to_file)
|
||||||
|
self._data = re_split(self._data)
|
||||||
|
|
||||||
|
def words(self): return self._data
|
||||||
|
|
||||||
|
|
||||||
|
class DataStorageManager2:
|
||||||
|
def __init__(self, path_to_file):
|
||||||
|
self._data = read_file(path_to_file)
|
||||||
|
self._data = re.findall('[a-z]{2,}', self._data)
|
||||||
|
|
||||||
|
def words(self): return self._data
|
||||||
|
|
||||||
|
|
||||||
|
class StopWordManager:
|
||||||
|
def __init__(self):
|
||||||
|
self._stop_words = set(get_stopwords())
|
||||||
|
|
||||||
|
def is_stop_word(self, word):
|
||||||
|
return word in self._stop_words
|
||||||
|
|
||||||
|
|
||||||
|
class WordFrequencyManager:
|
||||||
|
def __init__(self):
|
||||||
|
self.word_freqs = Counter()
|
||||||
|
|
||||||
|
def increment_count(self, word):
|
||||||
|
self.word_freqs[word] += 1
|
||||||
|
|
||||||
|
def sorted(self):
|
||||||
|
return self.word_freqs.most_common()
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# 应用类
|
||||||
|
#
|
||||||
|
class WordFrequencyController:
|
||||||
|
def __init__(self, path_to_file):
|
||||||
|
# self._storage = DataStorageManager1(path_to_file)
|
||||||
|
self.storage = DataStorageManager2(path_to_file)
|
||||||
|
self.stop_word_manager = StopWordManager()
|
||||||
|
self.word_freq_counter = WordFrequencyManager()
|
||||||
|
|
||||||
|
def run(self): # 可以看做面向协议编程
|
||||||
|
for word in self.storage.words():
|
||||||
|
if not self.stop_word_manager.is_stop_word(word):
|
||||||
|
self.word_freq_counter.increment_count(word)
|
||||||
|
|
||||||
|
print_word_freqs( self.word_freq_counter.sorted() )
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
WordFrequencyController(testfilepath).run()
|
@ -0,0 +1,92 @@
|
|||||||
|
import abc, re
|
||||||
|
from cppy.cp_util import *
|
||||||
|
|
||||||
|
#
|
||||||
|
# 接口
|
||||||
|
#
|
||||||
|
class IDataStorage (metaclass=abc.ABCMeta):
|
||||||
|
@abc.abstractmethod
|
||||||
|
def words(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class IStopWordFilter (metaclass=abc.ABCMeta):
|
||||||
|
@abc.abstractmethod
|
||||||
|
def is_stop_word(self, word):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class IWordFrequencyCounter(metaclass=abc.ABCMeta):
|
||||||
|
@abc.abstractmethod
|
||||||
|
def increment_count(self, word):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def sorted(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
#
|
||||||
|
# 类实现
|
||||||
|
#
|
||||||
|
class DataStorageManager1:
|
||||||
|
def __init__(self, path_to_file):
|
||||||
|
self._data = read_file(path_to_file)
|
||||||
|
self._data = re_split(self._data)
|
||||||
|
|
||||||
|
def words(self): return self._data
|
||||||
|
|
||||||
|
|
||||||
|
class DataStorageManager2:
|
||||||
|
def __init__(self, path_to_file):
|
||||||
|
self._data = read_file(path_to_file)
|
||||||
|
self._data = re.findall('[a-z]{2,}', self._data)
|
||||||
|
|
||||||
|
def words(self): return self._data
|
||||||
|
|
||||||
|
|
||||||
|
class StopWordManager:
|
||||||
|
def __init__(self):
|
||||||
|
self._stop_words = get_stopwords()
|
||||||
|
|
||||||
|
def is_stop_word(self, word):
|
||||||
|
return word in self._stop_words
|
||||||
|
|
||||||
|
|
||||||
|
class WordFrequencyManager:
|
||||||
|
def __init__(self):
|
||||||
|
self._word_freqs = {}
|
||||||
|
|
||||||
|
def increment_count(self, word):
|
||||||
|
self._word_freqs[word] = self._word_freqs.get(word, 0) + 1
|
||||||
|
|
||||||
|
def sorted(self):
|
||||||
|
return sort_dict( self._word_freqs )
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# 注册到抽象接口:并非必要
|
||||||
|
#
|
||||||
|
# IDataStorage.register(subclass=DataStorageManager1)
|
||||||
|
IDataStorage.register(subclass=DataStorageManager2)
|
||||||
|
IStopWordFilter.register(subclass=StopWordManager)
|
||||||
|
IWordFrequencyCounter.register(subclass=WordFrequencyManager)
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# 应用类
|
||||||
|
#
|
||||||
|
class WordFrequencyController:
|
||||||
|
def __init__(self, path_to_file):
|
||||||
|
# self._storage = DataStorageManager1(path_to_file)
|
||||||
|
self.storage = DataStorageManager2(path_to_file)
|
||||||
|
self.stop_word_manager = StopWordManager()
|
||||||
|
self.word_freq_counter = WordFrequencyManager()
|
||||||
|
|
||||||
|
def run(self): # 可以看做面向协议编程
|
||||||
|
for word in self.storage.words():
|
||||||
|
if not self.stop_word_manager.is_stop_word(word):
|
||||||
|
self.word_freq_counter.increment_count(word)
|
||||||
|
|
||||||
|
print_word_freqs( self.word_freq_counter.sorted() )
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
WordFrequencyController(testfilepath).run()
|
Binary file not shown.
@ -0,0 +1,5 @@
|
|||||||
|
[Plugins]
|
||||||
|
;; Options: plugins/words1.pyc, plugins/words2.pyc
|
||||||
|
words = plugins/words1.pyc
|
||||||
|
;; Options: plugins/frequencies1.pyc, plugins/frequencies2.pyc
|
||||||
|
frequencies = plugins/frequencies1.pyc
|
@ -0,0 +1,2 @@
|
|||||||
|
python -m compileall .
|
||||||
|
cp __pycache__/*.pyc ../plugins
|
@ -0,0 +1,11 @@
|
|||||||
|
import operator
|
||||||
|
|
||||||
|
def top25(word_list):
|
||||||
|
word_freqs = {}
|
||||||
|
for w in word_list:
|
||||||
|
if w in word_freqs:
|
||||||
|
word_freqs[w] += 1
|
||||||
|
else:
|
||||||
|
word_freqs[w] = 1
|
||||||
|
return sorted(word_freqs.items(), key=operator.itemgetter(1), reverse=True)[:10]
|
||||||
|
|
@ -0,0 +1,6 @@
|
|||||||
|
import operator, collections
|
||||||
|
|
||||||
|
def top25(word_list):
|
||||||
|
counts = collections.Counter(w for w in word_list)
|
||||||
|
return counts.most_common(10)
|
||||||
|
|
@ -0,0 +1,14 @@
|
|||||||
|
import sys, re, string
|
||||||
|
from cppy.cp_util import *
|
||||||
|
|
||||||
|
def extract_words(path_to_file):
|
||||||
|
with open(path_to_file,encoding='utf-8') as f:
|
||||||
|
str_data = f.read()
|
||||||
|
pattern = re.compile('[\W_]+')
|
||||||
|
word_list = pattern.sub(' ', str_data).lower().split()
|
||||||
|
|
||||||
|
|
||||||
|
stop_words = get_stopwords()
|
||||||
|
|
||||||
|
return [w for w in word_list if not w in stop_words]
|
||||||
|
|
@ -0,0 +1,8 @@
|
|||||||
|
import sys, re, string
|
||||||
|
from cppy.cp_util import *
|
||||||
|
|
||||||
|
def extract_words(path_to_file):
|
||||||
|
words = re.findall('[a-z]{2,}', open(path_to_file,encoding='utf-8').read().lower())
|
||||||
|
stopwords = get_stopwords()
|
||||||
|
return [w for w in words if w not in stopwords]
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,21 @@
|
|||||||
|
import configparser, importlib.machinery
|
||||||
|
from cppy.cp_util import *
|
||||||
|
|
||||||
|
|
||||||
|
def load_plugins():
|
||||||
|
config = configparser.ConfigParser()
|
||||||
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
os.chdir(script_dir)
|
||||||
|
config.read("config.ini")
|
||||||
|
words_plugin = config.get("Plugins", "words")
|
||||||
|
frequencies_plugin = config.get("Plugins", "frequencies")
|
||||||
|
global tfwords, tffreqs
|
||||||
|
tfwords = importlib.machinery.SourcelessFileLoader('tfwords', words_plugin).load_module()
|
||||||
|
tffreqs = importlib.machinery.SourcelessFileLoader('tffreqs', frequencies_plugin).load_module()
|
||||||
|
|
||||||
|
load_plugins()
|
||||||
|
word_freqs = tffreqs.top25(tfwords.extract_words( testfilepath ))
|
||||||
|
|
||||||
|
for (w, c) in word_freqs:
|
||||||
|
print(w, '-', c)
|
||||||
|
|
@ -0,0 +1,2 @@
|
|||||||
|
" my Some sure acquaintance or other, my dear, sure,other I suppose; I am sure I do not
|
||||||
|
know. sure "
|
@ -0,0 +1,107 @@
|
|||||||
|
import re, sys
|
||||||
|
from cppy.cp_util import *
|
||||||
|
|
||||||
|
stops = get_stopwords()
|
||||||
|
data = {}
|
||||||
|
|
||||||
|
# 路径问题,测试输入 test.txt ,演示当前目录下这个文件的处理
|
||||||
|
#################################################################################
|
||||||
|
# 服务端
|
||||||
|
def error_state():
|
||||||
|
return "Something wrong", ["get", "default", None]
|
||||||
|
|
||||||
|
def quit_handler(args):
|
||||||
|
sys.exit(" ... ")
|
||||||
|
|
||||||
|
def upload_get_handler(args):
|
||||||
|
return "Name of file to upload?", ["post", "file"]
|
||||||
|
|
||||||
|
def default_get_handler(args):
|
||||||
|
rep = "What would you like to do?"
|
||||||
|
rep += "\n1 - Quit" + "\n2 - Upload file"
|
||||||
|
links = {"1" : ["post", "execution", None],
|
||||||
|
"2" : ["get", "file_form", None]}
|
||||||
|
return rep, links
|
||||||
|
|
||||||
|
def upload_post_handler(args):
|
||||||
|
def create_data(fn):
|
||||||
|
if fn in data: return
|
||||||
|
word_freqs = {}
|
||||||
|
with open(fn) as f:
|
||||||
|
for w in [x.lower() for x in re.split("[^a-zA-Z]+", f.read()) if len(x) > 0 and x.lower() not in stops]:
|
||||||
|
word_freqs[w] = word_freqs.get(w, 0) + 1
|
||||||
|
wf = list(word_freqs.items())
|
||||||
|
data[fn] = sorted(wf,key=lambda x: x[1],reverse=True)
|
||||||
|
|
||||||
|
if args == None: return error_state()
|
||||||
|
filename = args[0]
|
||||||
|
try:
|
||||||
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
filename = os.path.join(script_dir, filename)
|
||||||
|
create_data(filename)
|
||||||
|
except:
|
||||||
|
print("Unexpected error: %s" % sys.exc_info()[0])
|
||||||
|
return error_state()
|
||||||
|
return word_get_handler([filename, 0])
|
||||||
|
|
||||||
|
def word_get_handler(args):
|
||||||
|
def get_word(filename, word_index):
|
||||||
|
if word_index < len(data[filename]):
|
||||||
|
return data[filename][word_index]
|
||||||
|
else:
|
||||||
|
return ("no more words", 0)
|
||||||
|
|
||||||
|
filename = args[0]; word_index = args[1]
|
||||||
|
word_info = get_word(filename, word_index)
|
||||||
|
rep = '\n#{0}: {1} - {2}'.format(word_index+1, word_info[0], word_info[1])
|
||||||
|
rep += "\n\nWhat would you like to do next?"
|
||||||
|
rep += "\n1 - Quit" + "\n2 - Upload file"
|
||||||
|
rep += "\n3 - See next most-frequently occurring word"
|
||||||
|
links = {"1" : ["post", "execution", None],
|
||||||
|
"2" : ["get", "file_form", None],
|
||||||
|
"3" : ["get", "word", [filename, word_index+1]]}
|
||||||
|
return rep, links
|
||||||
|
|
||||||
|
# Handler registration
|
||||||
|
handlers = {"post_execution" : quit_handler,
|
||||||
|
"get_default" : default_get_handler,
|
||||||
|
"get_file_form" : upload_get_handler,
|
||||||
|
"post_file" : upload_post_handler,
|
||||||
|
"get_word" : word_get_handler }
|
||||||
|
|
||||||
|
# The "server" core
|
||||||
|
def handle_request(verb, uri, args):
|
||||||
|
def handler_key(verb, uri):
|
||||||
|
return verb + "_" + uri
|
||||||
|
if handler_key(verb, uri) in handlers:
|
||||||
|
return handlers[handler_key(verb, uri)](args)
|
||||||
|
else:
|
||||||
|
return handlers[handler_key("get", "default")](args)
|
||||||
|
|
||||||
|
#################################################################################
|
||||||
|
# 仿真简单的浏览器客户端动作
|
||||||
|
def render_and_get_input(state_representation, links):
|
||||||
|
print(state_representation)
|
||||||
|
sys.stdout.flush()
|
||||||
|
if type(links) is dict:
|
||||||
|
input = sys.stdin.readline().strip()
|
||||||
|
if input in links:
|
||||||
|
return links[input]
|
||||||
|
else:
|
||||||
|
return ["get", "default", None]
|
||||||
|
elif type(links) is list:
|
||||||
|
if links[0] == "post": # get "form" data
|
||||||
|
input = sys.stdin.readline().strip()
|
||||||
|
links.append([input]) # add the data at the end
|
||||||
|
return links
|
||||||
|
else: # get action, don't get user input
|
||||||
|
return links
|
||||||
|
else:
|
||||||
|
return ["get", "default", None]
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
request = ["get", "default", None]
|
||||||
|
while True:
|
||||||
|
state_representation, links = handle_request(*request) # "server"
|
||||||
|
request = render_and_get_input(state_representation, links) # "client"
|
Loading…
Reference in new issue