from collections import Counter from cppy.cp_util import re_split,read_file,testfilepath,get_stopwords from createDb import TextFile, WordFrequency,engine from sqlalchemy.exc import IntegrityError from sqlalchemy.orm import sessionmaker Session = sessionmaker(bind=engine) def store_textfile(session, filepath): content = read_file(filepath) textfile = TextFile(filepath=filepath, content=content) session.add(textfile) try: session.commit() except IntegrityError: session.rollback() print(f"File {filepath} already exists in the database.") return None return textfile def update_word_frequencies(session, textfile): words = re_split( textfile.content.lower()) stopwords = get_stopwords() words = [ word for word in words if word not in stopwords ] word_counts = Counter(words) for word, count in word_counts.items(): word_freq = WordFrequency(word=word, frequency=count, textfile=textfile) session.add(word_freq) try: session.commit() except Exception as e: session.rollback() print(f"Error updating word frequencies: {e}") def process_textfile(filepath): session = Session() textfile = store_textfile(session, filepath) if textfile: update_word_frequencies(session, textfile) session.close() if __name__ == '__main__': process_textfile(testfilepath)