You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
|
|
from cppy.cp_util import testfilepath
|
|
|
|
|
|
|
|
|
|
from sqlalchemy.orm import sessionmaker
|
|
|
|
|
from createDb import TextFile, WordFrequency,engine
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_top_n_word_frequencies(filepath, n=10):
|
|
|
|
|
Session = sessionmaker(bind=engine)
|
|
|
|
|
session = Session()
|
|
|
|
|
textfile = session.query(TextFile).filter_by(filepath=filepath).first()
|
|
|
|
|
if textfile:
|
|
|
|
|
# 查询词频并按频率降序排序,然后取前N个
|
|
|
|
|
word_freqs = (session.query(WordFrequency)
|
|
|
|
|
.filter_by(textfile=textfile)
|
|
|
|
|
.order_by(WordFrequency.frequency.desc())
|
|
|
|
|
.limit(n)
|
|
|
|
|
.all())
|
|
|
|
|
# 输出词频最高的N个词
|
|
|
|
|
for wf in word_freqs:
|
|
|
|
|
print(f"{wf.word}: {wf.frequency}")
|
|
|
|
|
else:
|
|
|
|
|
print(f"File {filepath} not found in the database.")
|
|
|
|
|
session.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
get_top_n_word_frequencies( testfilepath )
|
|
|
|
|
|