You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

61 lines
1.8 KiB

9 months ago
import sqlite3, os.path
9 months ago
from cppy.cp_util import testfilepath,db_filename,extract_file_words
9 months ago
# 数据库表结构
TABLES = {
'words': '''CREATE TABLE IF NOT EXISTS words (
9 months ago
doc_name INTEGER NOT NULL,
value TEXT NOT NULL
)''',
9 months ago
}
# 创建数据库表
def create_db_schema(connection):
for table, sql in TABLES.items():
c = connection.cursor()
c.execute(sql)
connection.commit()
c.close()
def load_file_into_database(path_to_file, connection):
words = extract_file_words( path_to_file )
9 months ago
doc_name = os.path.basename(testfilepath).split('.')[0]
c = connection.cursor()
9 months ago
for w in words:
9 months ago
c.execute("INSERT INTO words (doc_name, value) VALUES (?, ?)", (doc_name, w))
9 months ago
connection.commit()
c.close()
9 months ago
#######################################################
9 months ago
# 建数据库,处理数据入库
9 months ago
#######################################################
# 构造数据库文件的完整路径
9 months ago
current_dir = os.path.dirname(os.path.abspath(__file__))
9 months ago
db_file_path = os.path.join(current_dir, db_filename)
if os.path.exists(db_file_path):
os.remove(db_file_path)
if not os.path.isfile(db_file_path):
with sqlite3.connect(db_file_path) as connection:
9 months ago
create_db_schema(connection)
load_file_into_database(testfilepath, connection)
# 查询输出
9 months ago
with sqlite3.connect(db_file_path) as connection:
9 months ago
c = connection.cursor()
c.execute("SELECT value, COUNT(*) as C FROM words GROUP BY value ORDER BY C DESC LIMIT 10")
for row in c.fetchall():
8 months ago
print(row[0], '-', row[1])
'''
也可以把数据库看做解决共享数据的竞争死锁的办法
不过本例中的计算太快
用数据库共享数据成本太高
'''