You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

49 lines
1.5 KiB

9 months ago
from collections import Counter
from cppy.cp_util import re_split,read_file,testfilepath,get_stopwords
from createDb import TextFile, WordFrequency,engine
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import sessionmaker
Session = sessionmaker(bind=engine)
def store_textfile(session, filepath):
content = read_file(filepath)
textfile = TextFile(filepath=filepath, content=content)
session.add(textfile)
try:
session.commit()
except IntegrityError:
session.rollback()
print(f"File {filepath} already exists in the database.")
return None
return textfile
def update_word_frequencies(session, textfile):
words = re_split( textfile.content.lower())
stopwords = get_stopwords()
words = [ word for word in words if word not in stopwords ]
word_counts = Counter(words)
for word, count in word_counts.items():
word_freq = WordFrequency(word=word, frequency=count, textfile=textfile)
session.add(word_freq)
try:
session.commit()
except Exception as e:
session.rollback()
print(f"Error updating word frequencies: {e}")
def process_textfile(filepath):
session = Session()
textfile = store_textfile(session, filepath)
if textfile:
update_word_frequencies(session, textfile)
session.close()
if __name__ == '__main__':
process_textfile(testfilepath)