Batch import flight data

dev-clawer
Lin 6 months ago
parent 9ce47b9e8a
commit d4e76dba28

@ -1,9 +1,8 @@
import pandas as pd import pandas as pd
import mysql.connector import mysql.connector
from mysql.connector import Error from mysql.connector import Error
import os
# 读取CSV文件 from datetime import datetime, timedelta
df = pd.read_csv('D:\\college\\SE2\\Ctrip-Crawler-main\\Ctrip-Crawler-main\\2024-10-21\\2024-10-20\\天津-贵阳.csv')
# 数据库连接配置 # 数据库连接配置
db_config = { db_config = {
@ -14,6 +13,44 @@ db_config = {
'password': '123456' 'password': '123456'
} }
def import_csv_to_db(file_path, cursor):
df = pd.read_csv(file_path)
for index, row in df.iterrows():
sql = """INSERT INTO flight (f_n, f_s_p, f_a_p, f_s_a, f_a_a, f_s_t, f_a_t, f_Date, f_Delay, f_p, f_food, f_wide, f_depcode, f_dstcode)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
f_s_p = VALUES(f_s_p),
f_a_p = VALUES(f_a_p),
f_s_a = VALUES(f_s_a),
f_a_a = VALUES(f_a_a),
f_s_t = VALUES(f_s_t),
f_a_t = VALUES(f_a_t),
f_Delay = VALUES(f_Delay),
f_p = VALUES(f_p),
f_food = VALUES(f_food),
f_wide = VALUES(f_wide),
f_depcode = VALUES(f_depcode),
f_dstcode = VALUES(f_dstcode);"""
values = (
row['航班号'],
row['出发城市'],
row['到达城市'],
row['出发机场'],
row['到达机场'],
row['出发时间'],
row['到达时间'],
row['出发日期'],
row['出发延误时间'],
row['economy_origin'],
row['经济舱餐食信息'],
row['经济舱座椅间距'],
row['出发机场三字码'],
row['到达机场三字码']
)
cursor.execute(sql, values)
try: try:
# 连接到数据库 # 连接到数据库
conn = mysql.connector.connect(**db_config) conn = mysql.connector.connect(**db_config)
@ -21,33 +58,27 @@ try:
if conn.is_connected(): if conn.is_connected():
cursor = conn.cursor() cursor = conn.cursor()
# 为每行数据创建插入语句 # 设置日期范围
for index, row in df.iterrows(): start_date = datetime(2024, 10, 22)
sql = """INSERT INTO flight (f_n, f_s_p, f_a_p, f_s_a, f_a_a, f_s_t, f_a_t, f_Date, f_Delay, f_p, f_food, f_wide, f_depcode, f_dstcode) end_date = datetime(2024, 11, 1)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""" current_date = start_date
values = ( while current_date <= end_date:
row['航班号'], folder_name = current_date.strftime("%Y-%m-%d")
row['出发城市'], folder_path = os.path.join("D:\\college\\SE2\\Ctrip-Crawler-main\\Ctrip-Crawler-main", folder_name, "2024-10-21")
row['到达城市'],
row['出发机场'], if os.path.exists(folder_path):
row['到达机场'], for file_name in os.listdir(folder_path):
row['出发时间'], if file_name.endswith('.csv'):
row['到达时间'], file_path = os.path.join(folder_path, file_name)
row['出发日期'], import_csv_to_db(file_path, cursor)
row['出发延误时间'], print(f"已导入文件: {file_path}")
row['economy_origin'],
row['经济舱餐食信息'], current_date += timedelta(days=1)
row['经济舱座椅间距'],
row['出发机场三字码'],
row['到达机场三字码']
)
cursor.execute(sql, values)
# 提交更改 # 提交更改
conn.commit() conn.commit()
print("数据成功插入到数据库") print("所有数据成功插入到数据库")
except Error as e: except Error as e:
print(f"连接数据库时出错: {e}") print(f"连接数据库时出错: {e}")

Loading…
Cancel
Save