parent
ebe28f7670
commit
88606f2bce
@ -1,6 +1,6 @@
|
||||
import operator, collections
|
||||
import collections
|
||||
|
||||
def top25(word_list):
|
||||
def top_word(word_list):
|
||||
counts = collections.Counter(w for w in word_list)
|
||||
return counts.most_common(10)
|
||||
|
||||
|
@ -1,14 +0,0 @@
|
||||
import sys, re, string
|
||||
from cppy.cp_util import *
|
||||
|
||||
def extract_words(path_to_file):
|
||||
with open(path_to_file,encoding='utf-8') as f:
|
||||
str_data = f.read()
|
||||
pattern = re.compile('[\W_]+')
|
||||
word_list = pattern.sub(' ', str_data).lower().split()
|
||||
|
||||
|
||||
stop_words = get_stopwords()
|
||||
|
||||
return [w for w in word_list if not w in stop_words]
|
||||
|
@ -1,8 +0,0 @@
|
||||
import sys, re, string
|
||||
from cppy.cp_util import *
|
||||
|
||||
def extract_words(path_to_file):
|
||||
words = re.findall('[a-z]{2,}', open(path_to_file,encoding='utf-8').read().lower())
|
||||
stopwords = get_stopwords()
|
||||
return [w for w in words if w not in stopwords]
|
||||
|
Binary file not shown.
Binary file not shown.
@ -1,19 +1,19 @@
|
||||
import configparser, importlib.machinery
|
||||
from cppy.cp_util import *
|
||||
|
||||
|
||||
def load_plugins():
|
||||
config = configparser.ConfigParser()
|
||||
config = configparser.ConfigParser()
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
os.chdir(script_dir)
|
||||
config.read("config.ini")
|
||||
words_plugin = config.get("Plugins", "words")
|
||||
os.chdir(script_dir)
|
||||
config.read("config.ini")
|
||||
frequencies_plugin = config.get("Plugins", "frequencies")
|
||||
global tfwords, tffreqs
|
||||
tfwords = importlib.machinery.SourcelessFileLoader('tfwords', words_plugin).load_module()
|
||||
tffreqs = importlib.machinery.SourcelessFileLoader('tffreqs', frequencies_plugin).load_module()
|
||||
|
||||
global get_frequencies
|
||||
get_frequencies = importlib.machinery.SourcelessFileLoader('tffreqs', frequencies_plugin).load_module()
|
||||
|
||||
|
||||
load_plugins()
|
||||
word_freqs = tffreqs.top25(tfwords.extract_words( testfilepath ))
|
||||
|
||||
wordlist = extract_file_words( testfilepath )
|
||||
word_freqs = get_frequencies.top_word( wordlist )
|
||||
print_word_freqs(word_freqs)
|
Loading…
Reference in new issue