diff --git a/csv_to_xlsx_converter.py b/csv_to_xlsx_converter.py new file mode 100644 index 0000000..c1c3581 --- /dev/null +++ b/csv_to_xlsx_converter.py @@ -0,0 +1,73 @@ +import pandas as pd +import os +from datetime import datetime, timedelta + +def get_departure_destination(file_name): + name_without_extension = os.path.splitext(file_name)[0] + return name_without_extension + +def merge_csv_files(csv_files, output_xlsx): + all_dfs = [] + for csv_file in csv_files: + df = pd.read_csv(csv_file) + # 添加日期列 + date = os.path.basename(os.path.dirname(os.path.dirname(csv_file))) + df['出发日期'] = date + + # 选择指定的列 + selected_columns = [ + '航班号', '航空公司', '出发日期', '出发时间', '到达时间', + '中转信息', 'economy_origin', '经济舱餐食信息', '经济舱座椅间距', '出发延误时间' + ] + df = df[selected_columns] + + # 重命名 'economy_origin' 为 '票价' + df = df.rename(columns={'economy_origin': '票价'}) + + all_dfs.append(df) + + # 合并所有数据框 + merged_df = pd.concat(all_dfs, ignore_index=True) + + # 保存为Excel文件 + merged_df.to_excel(output_xlsx, index=False, engine='openpyxl') + +# 设置日期范围 +start_date = datetime(2024, 10, 22) +end_date = datetime(2024, 11, 1) + +# 设置输入和输出文件夹路径 +input_base_path = "D:\\college\\SE2\\Ctrip-Crawler-main\\Ctrip-Crawler-main" +output_folder = "D:\\college\\SE2\\Ctrip-Crawler-main\\Ctrip-Crawler-main\\xlsx_output" + +# 确保输出文件夹存在 +if not os.path.exists(output_folder): + os.makedirs(output_folder) + +# 用于存储同一始发地和目的地的CSV文件 +route_files = {} + +current_date = start_date +while current_date <= end_date: + folder_name = current_date.strftime("%Y-%m-%d") + folder_path = os.path.join(input_base_path, folder_name, "2024-10-22") + + if os.path.exists(folder_path): + for file_name in os.listdir(folder_path): + if file_name.endswith('.csv'): + csv_path = os.path.join(folder_path, file_name) + route = get_departure_destination(file_name) + + if route not in route_files: + route_files[route] = [] + route_files[route].append(csv_path) + + current_date += timedelta(days=1) + +# 合并并保存每个路线的文件 +for route, files in route_files.items(): + output_xlsx = os.path.join(output_folder, f"{route}.xlsx") + merge_csv_files(files, output_xlsx) + print(f"已合并并保存路线: {route} -> {output_xlsx}") + +print("所有CSV文件已成功合并为XLSX文件,并筛选了指定的列")