import sqlite3, os.path from cppy.cp_util import * # 数据库表结构 TABLES = { 'documents': '''CREATE TABLE IF NOT EXISTS documents ( id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT NOT NULL )''', 'words': '''CREATE TABLE IF NOT EXISTS words ( doc_id INTEGER NOT NULL, value TEXT NOT NULL, FOREIGN KEY (doc_id) REFERENCES documents (id) )''', 'characters': '''CREATE TABLE IF NOT EXISTS characters ( word_id INTEGER NOT NULL, value TEXT NOT NULL, FOREIGN KEY (word_id) REFERENCES words (id) )''' } # 创建数据库表 def create_db_schema(connection): for table, sql in TABLES.items(): c = connection.cursor() c.execute(sql) connection.commit() c.close() def load_file_into_database(path_to_file, connection): words = extract_file_words( path_to_file ) c = connection.cursor() c.execute("INSERT INTO documents (name) VALUES (?)", (path_to_file,)) doc_id = c.lastrowid for w in words: c.execute("INSERT INTO words (doc_id, value) VALUES (?, ?)", (doc_id, w)) word_id = c.lastrowid for char in w: c.execute("INSERT INTO characters (word_id, value) VALUES (?, ?)", (word_id, char)) connection.commit() c.close() # 建数据库,处理数据入库 db_path = 'tfdb' if not os.path.isfile(db_path): with sqlite3.connect(db_path) as connection: create_db_schema(connection) load_file_into_database(testfilepath, connection) # 查询输出 with sqlite3.connect(db_path) as connection: c = connection.cursor() c.execute("SELECT value, COUNT(*) as C FROM words GROUP BY value ORDER BY C DESC LIMIT 10") for row in c.fetchall(): print(row[0], '-', row[1])