Compare commits

...

1 Commits

Author SHA1 Message Date
sgqt 948df89ba5 添加注释
3 weeks ago

@ -0,0 +1,73 @@
import pandas as pd
import os
from datetime import datetime, timedelta
def get_departure_destination(file_name):
name_without_extension = os.path.splitext(file_name)[0]
return name_without_extension
def merge_csv_files(csv_files, output_xlsx):
all_dfs = []
for csv_file in csv_files:
df = pd.read_csv(csv_file)
# 添加日期列
date = os.path.basename(os.path.dirname(os.path.dirname(csv_file)))
df['出发日期'] = date
# 选择指定的列
selected_columns = [
'航班号', '航空公司', '出发日期', '出发时间', '到达时间',
'中转信息', 'economy_origin', '经济舱餐食信息', '经济舱座椅间距', '出发延误时间'
]
df = df[selected_columns]
# 重命名 'economy_origin' 为 '票价'
df = df.rename(columns={'economy_origin': '票价'})
all_dfs.append(df)
# 合并所有数据框
merged_df = pd.concat(all_dfs, ignore_index=True)
# 保存为Excel文件
merged_df.to_excel(output_xlsx, index=False, engine='openpyxl')
# 设置日期范围
start_date = datetime(2024, 10, 22)
end_date = datetime(2024, 11, 1)
# 设置输入和输出文件夹路径
input_base_path = "D:\\college\\SE2\\Ctrip-Crawler-main\\Ctrip-Crawler-main"
output_folder = "D:\\college\\SE2\\Ctrip-Crawler-main\\Ctrip-Crawler-main\\xlsx_output"
# 确保输出文件夹存在
if not os.path.exists(output_folder):
os.makedirs(output_folder)
# 用于存储同一始发地和目的地的CSV文件
route_files = {}
current_date = start_date
while current_date <= end_date:
folder_name = current_date.strftime("%Y-%m-%d")
folder_path = os.path.join(input_base_path, folder_name, "2024-10-22")
if os.path.exists(folder_path):
for file_name in os.listdir(folder_path):
if file_name.endswith('.csv'):
csv_path = os.path.join(folder_path, file_name)
route = get_departure_destination(file_name)
if route not in route_files:
route_files[route] = []
route_files[route].append(csv_path)
current_date += timedelta(days=1)
# 合并并保存每个路线的文件
for route, files in route_files.items():
output_xlsx = os.path.join(output_folder, f"{route}.xlsx")
merge_csv_files(files, output_xlsx)
print(f"已合并并保存路线: {route} -> {output_xlsx}")
print("所有CSV文件已成功合并为XLSX文件并筛选了指定的列")

File diff suppressed because it is too large Load Diff

@ -0,0 +1,90 @@
import pandas as pd
import mysql.connector
from mysql.connector import Error
import os
from datetime import datetime, timedelta
# 数据库连接配置
db_config = {
'host': 'localhost', # 修改这里,去掉端口号
'port': 3307, # 单独指定端口号
'database': 'fly_ticket',
'user': 'root',
'password': '123456'
}
def import_csv_to_db(file_path, cursor):
df = pd.read_csv(file_path)
for index, row in df.iterrows():
sql = """INSERT INTO flight (f_n, f_s_p, f_a_p, f_s_a, f_a_a, f_s_t, f_a_t, f_Date, f_Delay, f_p, f_food, f_wide, f_depcode, f_dstcode)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
f_s_p = VALUES(f_s_p),
f_a_p = VALUES(f_a_p),
f_s_a = VALUES(f_s_a),
f_a_a = VALUES(f_a_a),
f_s_t = VALUES(f_s_t),
f_a_t = VALUES(f_a_t),
f_Delay = VALUES(f_Delay),
f_p = VALUES(f_p),
f_food = VALUES(f_food),
f_wide = VALUES(f_wide),
f_depcode = VALUES(f_depcode),
f_dstcode = VALUES(f_dstcode);"""
values = (
row['航班号'],
row['出发城市'],
row['到达城市'],
row['出发机场'],
row['到达机场'],
row['出发时间'],
row['到达时间'],
row['出发日期'],
row['出发延误时间'],
row['economy_origin'],
row['经济舱餐食信息'],
row['经济舱座椅间距'],
row['出发机场三字码'],
row['到达机场三字码']
)
cursor.execute(sql, values)
try:
# 连接到数据库
conn = mysql.connector.connect(**db_config)
if conn.is_connected():
cursor = conn.cursor()
# 设置日期范围
start_date = datetime(2024, 10, 22)
end_date = datetime(2024, 11, 1)
current_date = start_date
while current_date <= end_date:
folder_name = current_date.strftime("%Y-%m-%d")
folder_path = os.path.join("D:\\college\\SE2\\Ctrip-Crawler-main\\Ctrip-Crawler-main", folder_name, "2024-10-22")
if os.path.exists(folder_path):
for file_name in os.listdir(folder_path):
if file_name.endswith('.csv'):
file_path = os.path.join(folder_path, file_name)
import_csv_to_db(file_path, cursor)
print(f"已导入文件: {file_path}")
current_date += timedelta(days=1)
# 提交更改
conn.commit()
print("所有数据成功插入到数据库")
except Error as e:
print(f"连接数据库时出错: {e}")
finally:
if 'conn' in locals() and conn.is_connected():
cursor.close()
conn.close()
print("数据库连接已关闭")
Loading…
Cancel
Save