csv convert to xlsx

2 years ago · 1c9ec9b167
parent 3eecc9ed60
commit 1c9ec9b167
1 changed files with 73 additions and 0 deletions
--- a/csv_to_xlsx_converter.py
+++ b/csv_to_xlsx_converter.py
@ -0,0 +1,73 @@
+import pandas as pd
+import os
+from datetime import datetime, timedelta
+
+def get_departure_destination(file_name):
+    name_without_extension = os.path.splitext(file_name)[0]
+    return name_without_extension
+
+def merge_csv_files(csv_files, output_xlsx):
+    all_dfs = []
+    for csv_file in csv_files:
+        df = pd.read_csv(csv_file)
+        # 添加日期列
+        date = os.path.basename(os.path.dirname(os.path.dirname(csv_file)))
+        df['出发日期'] = date
+        
+        # 选择指定的列
+        selected_columns = [
+            '航班号', '航空公司', '出发日期', '出发时间', '到达时间', 
+            '中转信息', 'economy_origin', '经济舱餐食信息', '经济舱座椅间距', '出发延误时间'
+        ]
+        df = df[selected_columns]
+        
+        # 重命名 'economy_origin' 为 '票价'
+        df = df.rename(columns={'economy_origin': '票价'})
+        
+        all_dfs.append(df)
+    
+    # 合并所有数据框
+    merged_df = pd.concat(all_dfs, ignore_index=True)
+    
+    # 保存为Excel文件
+    merged_df.to_excel(output_xlsx, index=False, engine='openpyxl')
+
+# 设置日期范围
+start_date = datetime(2024, 10, 22)
+end_date = datetime(2024, 11, 1)
+
+# 设置输入和输出文件夹路径
+input_base_path = "D:\\college\\SE2\\Ctrip-Crawler-main\\Ctrip-Crawler-main"
+output_folder = "D:\\college\\SE2\\Ctrip-Crawler-main\\Ctrip-Crawler-main\\xlsx_output"
+
+# 确保输出文件夹存在
+if not os.path.exists(output_folder):
+    os.makedirs(output_folder)
+
+# 用于存储同一始发地和目的地的CSV文件
+route_files = {}
+
+current_date = start_date
+while current_date <= end_date:
+    folder_name = current_date.strftime("%Y-%m-%d")
+    folder_path = os.path.join(input_base_path, folder_name, "2024-10-22")
+    
+    if os.path.exists(folder_path):
+        for file_name in os.listdir(folder_path):
+            if file_name.endswith('.csv'):
+                csv_path = os.path.join(folder_path, file_name)
+                route = get_departure_destination(file_name)
+                
+                if route not in route_files:
+                    route_files[route] = []
+                route_files[route].append(csv_path)
+    
+    current_date += timedelta(days=1)
+
+# 合并并保存每个路线的文件
+for route, files in route_files.items():
+    output_xlsx = os.path.join(output_folder, f"{route}.xlsx")
+    merge_csv_files(files, output_xlsx)
+    print(f"已合并并保存路线: {route} -> {output_xlsx}")
+
+print("所有CSV文件已成功合并为XLSX文件，并筛选了指定的列")