|
|
@ -1,5 +1,5 @@
|
|
|
|
import sqlite3, os.path
|
|
|
|
import sqlite3, os.path
|
|
|
|
from cppy.cp_util import *
|
|
|
|
from cppy.cp_util import testfilepath,db_filename,extract_file_words
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 数据库表结构
|
|
|
|
# 数据库表结构
|
|
|
@ -45,17 +45,26 @@ def load_file_into_database(path_to_file, connection):
|
|
|
|
connection.commit()
|
|
|
|
connection.commit()
|
|
|
|
c.close()
|
|
|
|
c.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#######################################################
|
|
|
|
# 建数据库,处理数据入库
|
|
|
|
# 建数据库,处理数据入库
|
|
|
|
db_path = 'tfdb'
|
|
|
|
#######################################################
|
|
|
|
if not os.path.isfile(db_path):
|
|
|
|
|
|
|
|
with sqlite3.connect(db_path) as connection:
|
|
|
|
# 获取当前文件所在的目录
|
|
|
|
|
|
|
|
current_dir = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
|
|
|
|
# 构造数据库文件的完整路径
|
|
|
|
|
|
|
|
db_file_path = os.path.join(current_dir, db_filename)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if os.path.exists(db_file_path):
|
|
|
|
|
|
|
|
os.remove(db_file_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if not os.path.isfile(db_file_path):
|
|
|
|
|
|
|
|
with sqlite3.connect(db_file_path) as connection:
|
|
|
|
create_db_schema(connection)
|
|
|
|
create_db_schema(connection)
|
|
|
|
load_file_into_database(testfilepath, connection)
|
|
|
|
load_file_into_database(testfilepath, connection)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 查询输出
|
|
|
|
# 查询输出
|
|
|
|
with sqlite3.connect(db_path) as connection:
|
|
|
|
with sqlite3.connect(db_file_path) as connection:
|
|
|
|
c = connection.cursor()
|
|
|
|
c = connection.cursor()
|
|
|
|
c.execute("SELECT value, COUNT(*) as C FROM words GROUP BY value ORDER BY C DESC LIMIT 10")
|
|
|
|
c.execute("SELECT value, COUNT(*) as C FROM words GROUP BY value ORDER BY C DESC LIMIT 10")
|
|
|
|
for row in c.fetchall():
|
|
|
|
for row in c.fetchall():
|
|
|
|