dev
zj3D 8 months ago
parent ebe28f7670
commit 88606f2bce

@ -1,6 +1,6 @@
import operator import operator
def top25(word_list): def top_word(word_list):
word_freqs = {} word_freqs = {}
for w in word_list: for w in word_list:
if w in word_freqs: if w in word_freqs:

@ -1,6 +1,6 @@
import operator, collections import collections
def top25(word_list): def top_word(word_list):
counts = collections.Counter(w for w in word_list) counts = collections.Counter(w for w in word_list)
return counts.most_common(10) return counts.most_common(10)

@ -1,14 +0,0 @@
import sys, re, string
from cppy.cp_util import *
def extract_words(path_to_file):
with open(path_to_file,encoding='utf-8') as f:
str_data = f.read()
pattern = re.compile('[\W_]+')
word_list = pattern.sub(' ', str_data).lower().split()
stop_words = get_stopwords()
return [w for w in word_list if not w in stop_words]

@ -1,8 +0,0 @@
import sys, re, string
from cppy.cp_util import *
def extract_words(path_to_file):
words = re.findall('[a-z]{2,}', open(path_to_file,encoding='utf-8').read().lower())
stopwords = get_stopwords()
return [w for w in words if w not in stopwords]

@ -1,19 +1,19 @@
import configparser, importlib.machinery import configparser, importlib.machinery
from cppy.cp_util import * from cppy.cp_util import *
def load_plugins(): def load_plugins():
config = configparser.ConfigParser() config = configparser.ConfigParser()
script_dir = os.path.dirname(os.path.abspath(__file__)) script_dir = os.path.dirname(os.path.abspath(__file__))
os.chdir(script_dir) os.chdir(script_dir)
config.read("config.ini") config.read("config.ini")
words_plugin = config.get("Plugins", "words")
frequencies_plugin = config.get("Plugins", "frequencies") frequencies_plugin = config.get("Plugins", "frequencies")
global tfwords, tffreqs
tfwords = importlib.machinery.SourcelessFileLoader('tfwords', words_plugin).load_module() global get_frequencies
tffreqs = importlib.machinery.SourcelessFileLoader('tffreqs', frequencies_plugin).load_module() get_frequencies = importlib.machinery.SourcelessFileLoader('tffreqs', frequencies_plugin).load_module()
load_plugins() load_plugins()
word_freqs = tffreqs.top25(tfwords.extract_words( testfilepath ))
wordlist = extract_file_words( testfilepath )
word_freqs = get_frequencies.top_word( wordlist )
print_word_freqs(word_freqs) print_word_freqs(word_freqs)
Loading…
Cancel
Save