import sqlite3, os.path from cppy.cp_util import testfilepath,db_filename,extract_file_words # 数据库表结构 TABLES = { 'words': '''CREATE TABLE IF NOT EXISTS words ( doc_name INTEGER NOT NULL, value TEXT NOT NULL )''', } # 创建数据库表 def create_db_schema(connection): for table, sql in TABLES.items(): c = connection.cursor() c.execute(sql) connection.commit() c.close() def load_file_into_database(path_to_file, connection): words = extract_file_words( path_to_file ) doc_name = os.path.basename(testfilepath).split('.')[0] c = connection.cursor() for w in words: c.execute("INSERT INTO words (doc_name, value) VALUES (?, ?)", (doc_name, w)) connection.commit() c.close() ####################################################### # 建数据库,处理数据入库 ####################################################### # 构造数据库文件的完整路径 current_dir = os.path.dirname(os.path.abspath(__file__)) db_file_path = os.path.join(current_dir, db_filename) if os.path.exists(db_file_path): os.remove(db_file_path) if not os.path.isfile(db_file_path): with sqlite3.connect(db_file_path) as connection: create_db_schema(connection) load_file_into_database(testfilepath, connection) # 查询输出 with sqlite3.connect(db_file_path) as connection: c = connection.cursor() c.execute("SELECT value, COUNT(*) as C FROM words GROUP BY value ORDER BY C DESC LIMIT 10") for row in c.fetchall(): print(row[0], '-', row[1])