import sqlite3, os.path from cppy.cp_util import testfilepath,db_filename,extract_file_words # 数据库表结构 TABLES = { 'documents': '''CREATE TABLE IF NOT EXISTS documents ( id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT NOT NULL )''', 'words': '''CREATE TABLE IF NOT EXISTS words ( doc_id INTEGER NOT NULL, value TEXT NOT NULL, FOREIGN KEY (doc_id) REFERENCES documents (id) )''', 'characters': '''CREATE TABLE IF NOT EXISTS characters ( word_id INTEGER NOT NULL, value TEXT NOT NULL, FOREIGN KEY (word_id) REFERENCES words (id) )''' } # 创建数据库表 def create_db_schema(connection): for table, sql in TABLES.items(): c = connection.cursor() c.execute(sql) connection.commit() c.close() def load_file_into_database(path_to_file, connection): words = extract_file_words( path_to_file ) c = connection.cursor() c.execute("INSERT INTO documents (name) VALUES (?)", (path_to_file,)) doc_id = c.lastrowid for w in words: c.execute("INSERT INTO words (doc_id, value) VALUES (?, ?)", (doc_id, w)) word_id = c.lastrowid for char in w: c.execute("INSERT INTO characters (word_id, value) VALUES (?, ?)", (word_id, char)) connection.commit() c.close() ####################################################### # 建数据库,处理数据入库 ####################################################### # 获取当前文件所在的目录 current_dir = os.path.dirname(os.path.abspath(__file__)) # 构造数据库文件的完整路径 db_file_path = os.path.join(current_dir, db_filename) if os.path.exists(db_file_path): os.remove(db_file_path) if not os.path.isfile(db_file_path): with sqlite3.connect(db_file_path) as connection: create_db_schema(connection) load_file_into_database(testfilepath, connection) # 查询输出 with sqlite3.connect(db_file_path) as connection: c = connection.cursor() c.execute("SELECT value, COUNT(*) as C FROM words GROUP BY value ORDER BY C DESC LIMIT 10") for row in c.fetchall(): print(row[0], '-', row[1])