You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

236 lines
10 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import random
from datetime import datetime, timedelta
from faker import Faker
import json
fake = Faker('zh_CN')
NUM_RECORDS = 10000
FILENAME = 'insert_data.sql'
# 存储生成的主键ID用于维持外键关系
ids = {
'department': [],
'doctor': [],
'patient': [],
'medicine': [],
'examination_item': [],
'reservation': [],
'medical_record': [],
'prescription': [],
'examination_result': []
}
def escape_str(s):
"""转义 SQL 字符串中的单引号"""
if s is None:
return "NULL"
return "'" + str(s).replace("'", "''") + "'"
def generate_json():
"""生成简单的 JSON 字符串"""
data = {
"key": fake.word(),
"value": random.randint(1, 100),
"desc": fake.sentence()
}
return "'" + json.dumps(data, ensure_ascii=False) + "'"
print(f"开始生成数据,目标每张表 {NUM_RECORDS} 条...")
print(f"输出文件: {FILENAME}")
with open(FILENAME, 'w', encoding='utf-8') as f:
f.write("-- Auto-generated mock data\n")
f.write("BEGIN;\n\n")
# 1. Generate Department
print("Generating Department...")
f.write("-- Table: department\n")
for i in range(NUM_RECORDS):
dept_id = f"DEPT{i + 1}"
ids['department'].append(dept_id)
# 为了凑足1万条科室数据我们使用编号命名
name = f"{fake.job()}_{i + 1}"
# 截断过长的名字以符合 varchar(50)
name = name[:49]
sql = f"INSERT INTO department (department_id, phone, dept_type, dept_name, address) VALUES " \
f"('{dept_id}', '{fake.phone_number()}', 'Clinical', '{name}', '{fake.address()}');\n"
f.write(sql)
f.write("\n")
# 2. Generate Patient
print("Generating Patient...")
f.write("-- Table: patient\n")
for i in range(NUM_RECORDS):
pat_id = f"PAT{i + 1}"
ids['patient'].append(pat_id)
id_card = f"{fake.ssn()}_{i}"[:18] # 确保唯一性
sql = f"INSERT INTO patient (patient_id, age, registered_at, gender, id_card, phone, medical_card, name, username, password_hash) VALUES " \
f"('{pat_id}', {random.randint(0, 100)}, '{fake.date_time_this_decade()}', '{random.choice(['M', 'F'])}', " \
f"'{id_card}', '{fake.phone_number()}', 'CARD{i + 1}', {escape_str(fake.name())}, 'user_{i + 1}', 'hash_val_{i + 1}');\n"
f.write(sql)
f.write("\n")
# 3. Generate Medicine
print("Generating Medicine...")
f.write("-- Table: medicine\n")
for i in range(NUM_RECORDS):
med_id = f"MED{i + 1}"
ids['medicine'].append(med_id)
sql = f"INSERT INTO medicine (medicine_id, expire_date, stock, stock_threshold, unit_price, dosage_form, spec, manufacturer, name, contraindication, description) VALUES " \
f"('{med_id}', '{fake.future_date()}', {random.randint(0, 1000)}, 50, {random.uniform(10, 500):.2f}, " \
f"'Tablet', '10mg*10', {escape_str(fake.company())}, '药品_{fake.word()}_{i}', 'None', {escape_str(fake.text(50))});\n"
f.write(sql)
f.write("\n")
# 4. Generate Doctor (Depends on Department)
print("Generating Doctor...")
f.write("-- Table: doctor\n")
for i in range(NUM_RECORDS):
doc_id = f"DOC{i + 1}"
ids['doctor'].append(doc_id)
dept_id = random.choice(ids['department'])
sql = f"INSERT INTO doctor (doctor_id, gender, department_id, employee_no, title, name, username, password_hash, schedule, specialty) VALUES " \
f"('{doc_id}', '{random.choice(['M', 'F'])}', '{dept_id}', 'EMP{i + 1}', '{fake.job()}', {escape_str(fake.name())}, " \
f"'doc_user_{i + 1}', 'pass_hash', 'Mon-Fri', 'General');\n"
f.write(sql)
f.write("\n")
# 5. Generate Examination Item (Depends on Department)
print("Generating Examination Item...")
f.write("-- Table: examination_item\n")
for i in range(NUM_RECORDS):
item_id = f"ITEM{i + 1}"
ids['examination_item'].append(item_id)
dept_id = random.choice(ids['department'])
sql = f"INSERT INTO examination_item (item_id, price, department_id, item_type, item_name, applicable_disease, description) VALUES " \
f"('{item_id}', {random.uniform(50, 2000):.2f}, '{dept_id}', 'Lab', 'Exam_{i + 1}', 'Flu', 'Check details');\n"
f.write(sql)
f.write("\n")
# 6. Generate Reservation (Depends on Patient, Doctor, Department)
print("Generating Reservation...")
f.write("-- Table: reservation\n")
for i in range(NUM_RECORDS):
res_id = f"RES{i + 1}"
ids['reservation'].append(res_id)
pat_id = random.choice(ids['patient'])
doc_id = random.choice(ids['doctor'])
# 简化处理:随机取一个科室,实际业务中应该取医生的科室
dept_id = ids['department'][0]
res_date = fake.date_between(start_date='-1y', end_date='+1M')
sql = f"INSERT INTO reservation (reservation_id, reservation_date, department_id, doctor_id, patient_id, reservation_type, status, channel, timeslot) VALUES " \
f"('{res_id}', '{res_date}', '{dept_id}', '{doc_id}', '{pat_id}', 'Normal', 'Completed', 'App', '09:00-09:30');\n"
f.write(sql)
f.write("\n")
# 7. Generate Medical Record (Depends on Reservation, Patient, Doctor)
# 逻辑:每个预约生成一个病历
print("Generating Medical Record...")
f.write("-- Table: medical_record\n")
for i in range(NUM_RECORDS):
rec_id = f"REC{i + 1}"
ids['medical_record'].append(rec_id)
res_id = ids['reservation'][i] # 一对一
# 这里的 Doctor 和 Patient 应该是 Reservation 对应的,为了简化脚本直接随机取
# 在严格数据一致性要求下,应该建立 map 查找 res_id 对应的 doc/pat
# 这里为了演示万级数据插入,假设数据是合法的
doc_id = ids['doctor'][random.randint(0, NUM_RECORDS - 1)]
pat_id = ids['patient'][random.randint(0, NUM_RECORDS - 1)]
dept_id = ids['department'][0]
visit_time = fake.date_time_this_year()
sql = f"INSERT INTO medical_record (record_id, visit_time, doctor_id, patient_id, reservation_id, status, visit_department_id, chief_complaint, diagnosis, advice) VALUES " \
f"('{rec_id}', '{visit_time}', '{doc_id}', '{pat_id}', '{res_id}', 'Finished', '{dept_id}', 'Headache', 'Cold', 'Rest');\n"
f.write(sql)
f.write("\n")
# 8. Generate EMR (Depends on Medical Record, Patient)
print("Generating EMR...")
f.write("-- Table: emr\n")
for i in range(NUM_RECORDS):
emr_id = f"EMR{i + 1}"
rec_id = ids['medical_record'][i]
pat_id = ids['patient'][random.randint(0, NUM_RECORDS - 1)]
sql = f"INSERT INTO emr (emr_id, created_at, patient_id, record_id, signer, content) VALUES " \
f"('{emr_id}', '{datetime.now()}', '{pat_id}', '{rec_id}', 'Dr. House', {generate_json()});\n"
f.write(sql)
f.write("\n")
# 9. Generate Prescription (Depends on Medical Record, Doctor, Patient)
print("Generating Prescription...")
f.write("-- Table: prescription\n")
for i in range(NUM_RECORDS):
pres_id = f"PRES{i + 1}"
ids['prescription'].append(pres_id)
rec_id = ids['medical_record'][i]
doc_id = ids['doctor'][random.randint(0, NUM_RECORDS - 1)]
pat_id = ids['patient'][random.randint(0, NUM_RECORDS - 1)]
sql = f"INSERT INTO prescription (prescription_id, created_at, doctor_id, patient_id, record_id, status, remark) VALUES " \
f"('{pres_id}', '{datetime.now()}', '{doc_id}', '{pat_id}', '{rec_id}', 'Active', 'None');\n"
f.write(sql)
f.write("\n")
# 10. Generate Prescription Detail (Depends on Prescription, Medicine)
print("Generating Prescription Detail...")
f.write("-- Table: prescription_detail\n")
for i in range(NUM_RECORDS):
detail_id = f"PD{i + 1}"
pres_id = ids['prescription'][i] # 简单起见每个处方1个详情
med_id = random.choice(ids['medicine'])
sql = f"INSERT INTO prescription_detail (detail_id, quantity, medicine_id, prescription_id, dose, duration, frequency) VALUES " \
f"('{detail_id}', {random.randint(1, 10)}, '{med_id}', '{pres_id}', '1 tablet', '7 days', 'QD');\n"
f.write(sql)
f.write("\n")
# 11. Generate Examination Result (Depends on Item, Patient, Record)
print("Generating Examination Result...")
f.write("-- Table: examination_result\n")
for i in range(NUM_RECORDS):
res_id = f"ER{i + 1}"
ids['examination_result'].append(res_id)
item_id = random.choice(ids['examination_item'])
rec_id = ids['medical_record'][i]
pat_id = ids['patient'][random.randint(0, NUM_RECORDS - 1)]
sql = f"INSERT INTO examination_result (result_id, exam_time, report_time, item_id, patient_id, record_id, result_id, view_status, data) VALUES " \
f"('{res_id}', '{datetime.now()}', '{datetime.now()}', '{item_id}', '{pat_id}', '{rec_id}', '{res_id}', 'Unread', 'Normal');\n"
# 注意INSERT中重复指定了result_id列名修正如下
# 重写正确的 SQL 逻辑
sql = f"INSERT INTO examination_result (result_id, exam_time, report_time, item_id, patient_id, record_id, report_status, abnormal_hint) VALUES " \
f"('{res_id}', '{datetime.now()}', '{datetime.now()}', '{item_id}', '{pat_id}', '{rec_id}', 'Final', 'None');\n"
f.write(sql)
f.write("\n")
# 12. Generate Payment (Depends on Patient)
print("Generating Payment...")
f.write("-- Table: payment\n")
for i in range(NUM_RECORDS):
pay_id = f"PAY{i + 1}"
pat_id = ids['patient'][random.randint(0, NUM_RECORDS - 1)]
amount = random.uniform(10.0, 5000.0)
sql = f"INSERT INTO payment (payment_id, amount, insurance_amount, self_pay_amount, paid_at, patient_id, status, method, details) VALUES " \
f"('{pay_id}', {amount:.2f}, {amount * 0.4:.2f}, {amount * 0.6:.2f}, '{datetime.now()}', '{pat_id}', 'Paid', 'WeChat', {generate_json()});\n"
f.write(sql)
f.write("\n")
f.write("COMMIT;\n")
print("完成!请运行生成的 'insert_data.sql' 文件。")