parent
							
								
									a647f06f32
								
							
						
					
					
						commit
						66ecf28d15
					
				
											
												Binary file not shown.
											
										
									
								| @ -0,0 +1,43 @@ | ||||
| from sqlalchemy import create_engine, Column, Integer, String, ForeignKey   | ||||
| from sqlalchemy.ext.declarative import declarative_base   | ||||
| from sqlalchemy.orm import relationship   | ||||
| import os | ||||
| from cppy.cp_util import db_filename | ||||
| 
 | ||||
| 
 | ||||
| # 定义数据模型和数据库连接   | ||||
| Base = declarative_base()   | ||||
| 
 | ||||
| # 获取当前文件所在的目录   | ||||
| current_dir = os.path.dirname(os.path.abspath(__file__))   | ||||
|    | ||||
| # 构造数据库文件的完整路径   | ||||
| db_file_path = os.path.join(current_dir, db_filename)     | ||||
| DATABASE_URI = f"sqlite:///{db_file_path}"   | ||||
|    | ||||
| # 创建数据库引擎   | ||||
| engine = create_engine(DATABASE_URI, echo=True) | ||||
| 
 | ||||
|    | ||||
| class TextFile(Base):   | ||||
|     __tablename__ = 'text_files'   | ||||
|     id = Column(Integer, primary_key=True)   | ||||
|     filepath = Column(String, unique=True)   | ||||
|     content = Column(String)   | ||||
|     words = relationship("WordFrequency", back_populates="textfile")   | ||||
|    | ||||
| class WordFrequency(Base):   | ||||
|     __tablename__ = 'word_frequencies'   | ||||
|     id = Column(Integer, primary_key=True)   | ||||
|     word = Column(String)   | ||||
|     frequency = Column(Integer)   | ||||
|     textfile_id = Column(Integer, ForeignKey('text_files.id'))   | ||||
|     textfile = relationship("TextFile", back_populates="words")   | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     # 检查数据库文件是否存在     | ||||
|     if os.path.exists(db_file_path):           | ||||
|         os.remove(db_file_path)           | ||||
| 
 | ||||
|     Base.metadata.create_all(engine)       | ||||
| @ -0,0 +1,49 @@ | ||||
| from collections import Counter   | ||||
| from cppy.cp_util import re_split,read_file,testfilepath,get_stopwords | ||||
| from createDb import TextFile, WordFrequency,engine | ||||
| 
 | ||||
| from sqlalchemy.exc import IntegrityError   | ||||
| from sqlalchemy.orm import sessionmaker | ||||
| 
 | ||||
| 
 | ||||
| Session = sessionmaker(bind=engine) | ||||
| 
 | ||||
| 
 | ||||
| def store_textfile(session, filepath):       | ||||
|     content = read_file(filepath)   | ||||
|     textfile = TextFile(filepath=filepath, content=content)   | ||||
|     session.add(textfile)   | ||||
|     try:   | ||||
|         session.commit()   | ||||
|     except IntegrityError:   | ||||
|         session.rollback()   | ||||
|         print(f"File {filepath} already exists in the database.")   | ||||
|         return None   | ||||
|     return textfile   | ||||
|    | ||||
| def update_word_frequencies(session, textfile):   | ||||
|     words = re_split( textfile.content.lower()) | ||||
|     stopwords = get_stopwords() | ||||
|     words = [ word  for word in words if  word  not in stopwords ]     | ||||
|     word_counts = Counter(words)   | ||||
|     for word, count in word_counts.items():   | ||||
|         word_freq = WordFrequency(word=word, frequency=count, textfile=textfile)   | ||||
|         session.add(word_freq)   | ||||
|     try:   | ||||
|         session.commit()   | ||||
|     except Exception as e:   | ||||
|         session.rollback()   | ||||
|         print(f"Error updating word frequencies: {e}")   | ||||
|    | ||||
| def process_textfile(filepath):   | ||||
|     session = Session()   | ||||
|     textfile = store_textfile(session, filepath)   | ||||
|     if textfile:   | ||||
|         update_word_frequencies(session, textfile)   | ||||
|     session.close() | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     process_textfile(testfilepath) | ||||
|      | ||||
					Loading…
					
					
				
		Reference in new issue