From 948df89ba51a4abbc680277cc1b04906f584186f Mon Sep 17 00:00:00 2001
From: sgqt <1036203584@qq.com>
Date: Wed, 6 Nov 2024 09:36:48 +0800
Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E6=B3=A8=E9=87=8A?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/clawer/csv_to_xlsx_converter.py    |   73 ++
 src/clawer/ctrip_flights_scraper_V3.py | 1420 ++++++++++++++++++++++++
 src/clawer/db_import.py                |   90 ++
 3 files changed, 1583 insertions(+)
 create mode 100644 src/clawer/csv_to_xlsx_converter.py
 create mode 100644 src/clawer/ctrip_flights_scraper_V3.py
 create mode 100644 src/clawer/db_import.py

diff --git a/src/clawer/csv_to_xlsx_converter.py b/src/clawer/csv_to_xlsx_converter.py
new file mode 100644
index 0000000..c1c3581
--- /dev/null
+++ b/src/clawer/csv_to_xlsx_converter.py
@@ -0,0 +1,73 @@
+import pandas as pd
+import os
+from datetime import datetime, timedelta
+
+def get_departure_destination(file_name):
+    name_without_extension = os.path.splitext(file_name)[0]
+    return name_without_extension
+
+def merge_csv_files(csv_files, output_xlsx):
+    all_dfs = []
+    for csv_file in csv_files:
+        df = pd.read_csv(csv_file)
+        # 添加日期列
+        date = os.path.basename(os.path.dirname(os.path.dirname(csv_file)))
+        df['出发日期'] = date
+        
+        # 选择指定的列
+        selected_columns = [
+            '航班号', '航空公司', '出发日期', '出发时间', '到达时间', 
+            '中转信息', 'economy_origin', '经济舱餐食信息', '经济舱座椅间距', '出发延误时间'
+        ]
+        df = df[selected_columns]
+        
+        # 重命名 'economy_origin' 为 '票价'
+        df = df.rename(columns={'economy_origin': '票价'})
+        
+        all_dfs.append(df)
+    
+    # 合并所有数据框
+    merged_df = pd.concat(all_dfs, ignore_index=True)
+    
+    # 保存为Excel文件
+    merged_df.to_excel(output_xlsx, index=False, engine='openpyxl')
+
+# 设置日期范围
+start_date = datetime(2024, 10, 22)
+end_date = datetime(2024, 11, 1)
+
+# 设置输入和输出文件夹路径
+input_base_path = "D:\\college\\SE2\\Ctrip-Crawler-main\\Ctrip-Crawler-main"
+output_folder = "D:\\college\\SE2\\Ctrip-Crawler-main\\Ctrip-Crawler-main\\xlsx_output"
+
+# 确保输出文件夹存在
+if not os.path.exists(output_folder):
+    os.makedirs(output_folder)
+
+# 用于存储同一始发地和目的地的CSV文件
+route_files = {}
+
+current_date = start_date
+while current_date <= end_date:
+    folder_name = current_date.strftime("%Y-%m-%d")
+    folder_path = os.path.join(input_base_path, folder_name, "2024-10-22")
+    
+    if os.path.exists(folder_path):
+        for file_name in os.listdir(folder_path):
+            if file_name.endswith('.csv'):
+                csv_path = os.path.join(folder_path, file_name)
+                route = get_departure_destination(file_name)
+                
+                if route not in route_files:
+                    route_files[route] = []
+                route_files[route].append(csv_path)
+    
+    current_date += timedelta(days=1)
+
+# 合并并保存每个路线的文件
+for route, files in route_files.items():
+    output_xlsx = os.path.join(output_folder, f"{route}.xlsx")
+    merge_csv_files(files, output_xlsx)
+    print(f"已合并并保存路线: {route} -> {output_xlsx}")
+
+print("所有CSV文件已成功合并为XLSX文件，并筛选了指定的列")
diff --git a/src/clawer/ctrip_flights_scraper_V3.py b/src/clawer/ctrip_flights_scraper_V3.py
new file mode 100644
index 0000000..ff0e93f
--- /dev/null
+++ b/src/clawer/ctrip_flights_scraper_V3.py
@@ -0,0 +1,1420 @@
+import magic
+import io
+import os
+import gzip
+import time
+import json
+import requests
+import pandas as pd
+from seleniumwire import webdriver
+from datetime import datetime as dt, timedelta
+from selenium.webdriver.common.by import By
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.support.ui import WebDriverWait
+from datetime import datetime
+
+# 爬取的城市
+crawal_citys = ["杭州", "天津"]
+
+# 爬取日期范围：起始日期。格式'2023-12-01'
+begin_date = "2024-11-6"
+
+# 爬取日期范围：结束日期。格式'2023-12-31'
+end_date = "2024-11-6"
+
+# 爬取T+N，即N天后
+start_interval = 1
+
+# 爬取的日期
+crawal_days = 60
+
+# 设置各城市爬取的时间间隔（单位：秒）
+crawal_interval = 5
+
+# 日期间隔
+days_interval = 1
+
+# 设置页面加载的最长等待时间（单位：秒）
+max_wait_time = 10
+
+# 最大错误重试次数
+max_retry_time = 5
+
+# 是否只抓取直飞信息（True: 只抓取直飞，False: 抓取所有航班）
+direct_flight = False
+
+# 是否删除不重要的信息
+del_info = False
+
+# 是否重命DataFrame的列名
+rename_col = True
+
+# 调试截图
+enable_screenshot = False
+
+# 允许登录（可能必须要登录才能获取数据）
+login_allowed = True
+
+# 账号
+accounts = ['', '']
+
+# 密码
+passwords = ['', '']
+
+# 利用stealth.min.js隐藏selenium特征
+stealth_js_path = './stealth.min.js'
+
+
+# 定义下载stealth.min.js的函数
+def download_stealth_js(file_path,
+                        url='https://raw.githubusercontent.com/requireCool/stealth.min.js/main/stealth.min.js'):
+    if not os.path.exists(file_path):
+        print(f"{file_path} not found, downloading...")
+        response = requests.get(url)
+        response.raise_for_status()  # 确保请求成功
+        with open(file_path, 'w') as file:
+            file.write(response.text)
+        print(f"{file_path} downloaded.")
+    else:
+        print(f"{file_path} already exists, no need to download.")
+
+
+def init_driver():
+    options = webdriver.ChromeOptions()  # 改为ChromeOptions
+    options.add_argument("--incognito")  # 隐身模式（无痕模式）
+    # options.add_argument('--headless')  # 启用无头模式
+    options.add_argument("--no-sandbox")
+    options.add_argument("--disable-dev-shm-usage")
+    options.add_argument("--disable-blink-features")
+    options.add_argument("--disable-blink-features=AutomationControlled")
+    options.add_argument("--disable-extensions")
+    options.add_argument("--pageLoadStrategy=eager")
+    options.add_argument("--disable-gpu")
+    options.add_argument("--disable-software-rasterizer")
+    options.add_argument("--disable-dev-shm-usage")
+    options.add_argument("--ignore-certificate-errors")
+    options.add_argument("--ignore-certificate-errors-spki-list")
+    options.add_argument("--ignore-ssl-errors")
+    options.add_experimental_option("excludeSwitches", ["enable-automation"])  # 不显示正在受自动化软件控制的提示
+
+    # 如果需要指定Chrome驱动的路径，取消下面这行的注释并设置正确的路径
+    # chromedriver_path = '/path/to/chromedriver'
+
+    driver = webdriver.Chrome(options=options)  # 改为Chrome，如果需要指定路径，可以加上executable_path参数
+
+    try:
+        download_stealth_js(stealth_js_path)
+        # 读取并注入stealth.min.js
+        with open(stealth_js_path, 'r') as file:
+            stealth_js = file.read()
+            driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source": stealth_js})
+    except Exception as e:
+        print(e)
+
+    driver.maximize_window()
+
+    return driver
+
+
+def gen_citys(crawal_citys):
+    # 生成城市组合表
+    citys = []
+    ytic = list(reversed(crawal_citys))
+    for m in crawal_citys:
+        for n in ytic:
+            if m == n:
+                continue
+            else:
+                citys.append([m, n])
+    return citys
+
+
+def generate_flight_dates(n, begin_date, end_date, start_interval, days_interval):
+    flight_dates = []
+
+    if begin_date:
+        begin_date = dt.strptime(begin_date, "%Y-%m-%d")
+    elif start_interval:
+        begin_date = dt.now() + timedelta(days=start_interval)
+
+    for i in range(0, n, days_interval):
+        flight_date = begin_date + timedelta(days=i)
+
+        flight_dates.append(flight_date.strftime("%Y-%m-%d"))
+
+    # 如果有结束日期，确保生成的日期不超过结束日期
+    if end_date:
+        end_date = dt.strptime(end_date, "%Y-%m-%d")
+        flight_dates = [date for date in flight_dates if dt.strptime(date, "%Y-%m-%d") <= end_date]
+        # 继续生成日期直到达到或超过结束日期
+        while dt.strptime(flight_dates[-1], "%Y-%m-%d") < end_date:
+            next_date = dt.strptime(flight_dates[-1], "%Y-%m-%d") + timedelta(days=days_interval)
+            if next_date <= end_date:
+                flight_dates.append(next_date.strftime("%Y-%m-%d"))
+            else:
+                break
+
+    return flight_dates
+
+
+# element_to_be_clickable 函数来替代 expected_conditions.element_to_be_clickable 或 expected_conditions.visibility_of_element_located
+
+
+def element_to_be_clickable(element):
+    def check_clickable(driver):
+        try:
+            if element.is_enabled() and element.is_displayed():
+                return element  # 当条件满足时，返回元素本身
+            else:
+                return False
+        except:
+            return False
+
+    return check_clickable
+
+
+class DataFetcher(object):
+    def __init__(self, driver):
+        self.driver = driver
+        self.date = None
+        self.city = None
+        self.err = 0  # 错误重试次数
+        self.switch_acc = 0  # 切换账户
+        self.comfort_data = None  # 新添加的属性
+
+    def refresh_driver(self):
+        try:
+            self.driver.refresh()
+        except Exception as e:
+            # 错误次数+1
+            self.err += 1
+
+            print(
+                f'{time.strftime("%Y-%m-%d_%H-%M-%S")} refresh_driver:刷新页面失败，错误类型：{type(e).__name__}, 详细错误信息：{str(e).split("Stacktrace:")[0]}'
+            )
+
+            # 保存错误截图
+            if enable_screenshot:
+                self.driver.save_screenshot(
+                    f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png'
+                )
+            if self.err < max_retry_time:
+                # 刷新页面
+                print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} refresh_driver：刷新页面')
+                self.refresh_driver()
+
+            # 判断错误次数
+            if self.err >= max_retry_time:
+                print(
+                    f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,refresh_driver:不继续重试'
+                )
+
+    def remove_btn(self):
+        try:
+            # WebDriverWait(self.driver, max_wait_time).until(lambda d: d.execute_script('return typeof jQuery !== "undefined"'))
+            # 移除提醒
+            self.driver.execute_script("document.querySelectorAll('.notice-box').forEach(element => element.remove());")
+            # 移除在线客服
+            self.driver.execute_script(
+                "document.querySelectorAll('.shortcut, .shortcut-link').forEach(element => element.remove());")
+            # 移除分享链接
+            self.driver.execute_script("document.querySelectorAll('.shareline').forEach(element => element.remove());")
+            '''
+            # 使用JavaScript除有的<dl>标签
+            self.driver.execute_script("""
+                var elements = document.getElementsByTagName('dl');
+                while(elements.length > 0){
+                    elements[0].parentNode.removeChild(elements[0]);
+                }
+            """)
+            '''
+        except Exception as e:
+            print(
+                f'{time.strftime("%Y-%m-%d_%H-%M-%S")} remove_btn:提醒移除失败，错误类型：{type(e).__name__}, 详细错误信息：{str(e).split("Stacktrace:")[0]}'
+            )
+
+    def check_verification_code(self):
+        try:
+            # 检查是否有验证码元素，如果有，则需要人工处理
+            if (len(self.driver.find_elements(By.ID, "verification-code")) + len(
+                    self.driver.find_elements(By.CLASS_NAME, "alert-title"))):
+                print(
+                    f'{time.strftime("%Y-%m-%d_%H-%M-%S")} check_verification_code：验证码被触发verification-code/alert-title，请手动完成验证。')
+
+                # 等待用户手动处理验证码
+                input("请完成验证码，然后按回车键继续...")
+
+                # 等待页面加载完成
+                WebDriverWait(self.driver, max_wait_time).until(
+                    EC.presence_of_element_located((By.CLASS_NAME, "pc_home-jipiao"))
+                )
+
+                print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} check_verification_code：验证码处理完成，继续执行。')
+
+                # 移除注意事项
+                self.remove_btn()
+                return True
+            else:
+                # 移除注意事项
+                self.remove_btn()
+                # 如果没有找到验证码元素，则说明页面加载成功，没有触发验证码
+                return True
+        except Exception as e:
+            print(
+                f'{time.strftime("%Y-%m-%d_%H-%M-%S")} check_verification_code:未知错误，错误类型：{type(e).__name__}, 详细错误信息：{str(e).split("Stacktrace:")[0]}'
+            )
+            return False
+
+    def login(self):
+        if login_allowed:
+
+            account = accounts[self.switch_acc % len(accounts)]
+            password = passwords[self.switch_acc % len(passwords)]
+
+            try:
+                if len(self.driver.find_elements(By.CLASS_NAME, "lg_loginbox_modal")) == 0:
+                    print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login:未弹出登录界面')
+                    WebDriverWait(self.driver, max_wait_time).until(
+                        EC.presence_of_element_located((By.CLASS_NAME, "tl_nfes_home_header_login_wrapper_siwkn")))
+                    # 点击飞机图标，返回主界面
+                    ele = WebDriverWait(self.driver, max_wait_time).until(element_to_be_clickable(
+                        self.driver.find_element(By.CLASS_NAME, "tl_nfes_home_header_login_wrapper_siwkn")))
+                    ele.click()
+                    # 等待页面加
+                    WebDriverWait(self.driver, max_wait_time).until(
+                        EC.presence_of_element_located((By.CLASS_NAME, "lg_loginwrap")))
+                else:
+                    print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login:已经弹出登录界面')
+
+                ele = WebDriverWait(self.driver, max_wait_time).until(element_to_be_clickable(
+                    self.driver.find_elements(By.CLASS_NAME, "r_input.bbz-js-iconable-input")[0]))
+                ele.send_keys(account)
+                print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login:输入账户成功')
+
+                ele = WebDriverWait(self.driver, max_wait_time).until(element_to_be_clickable(
+                    self.driver.find_element(By.CSS_SELECTOR,
+                                             "div[data-testid='accountPanel'] input[data-testid='passwordInput']")))
+                ele.send_keys(password)
+                print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login:输入密码成功')
+
+                ele = WebDriverWait(self.driver, max_wait_time).until(element_to_be_clickable(
+                    self.driver.find_element(By.CSS_SELECTOR, '[for="checkboxAgreementInput"]')))
+                ele.click()
+                print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login:勾选同意成功')
+
+                ele = WebDriverWait(self.driver, max_wait_time).until(
+                    element_to_be_clickable(self.driver.find_elements(By.CLASS_NAME, "form_btn.form_btn--block")[0]))
+                ele.click()
+                print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login：登录成功')
+                # 保存登录截图
+                if enable_screenshot:
+                    self.driver.save_screenshot(
+                        f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png'
+                    )
+                time.sleep(crawal_interval * 3)
+            except Exception as e:
+                # 错误次数+1
+                self.err += 1
+                # 用f字符串格式化错误类型和错误信息，提供更多的调试信息
+                print(
+                    f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login：页面加载或元素操作失败，错误类型：{type(e).__name__}, 详细误信息：{str(e).split("Stacktrace:")[0]}'
+                )
+
+                # 保存错误截图
+                if enable_screenshot:
+                    self.driver.save_screenshot(
+                        f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png'
+                    )
+
+                if self.err < max_retry_time:
+                    # 刷新页面
+                    print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login：刷新页面')
+                    self.refresh_driver()
+                    # 检查注意事项和验证码
+                    if self.check_verification_code():
+                        # 重试
+                        self.login()
+                # 判断错误次数
+                if self.err >= max_retry_time:
+                    print(
+                        f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,login:重新尝试加载页面，这次指定需要重定向到首页'
+                    )
+
+    def get_page(self, reset_to_homepage=0):
+        next_stage_flag = False
+        try:
+            if reset_to_homepage == 1:
+                print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 尝试前往首页...')
+                start_time = time.time()
+                # 前往首页
+                self.driver.get(
+                    "https://flights.ctrip.com/online/channel/domestic")
+                end_time = time.time()
+                print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 前往首页耗时: {end_time - start_time:.2f} 秒')
+
+            print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 当前页面 URL: {self.driver.current_url}')
+            print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 当前页面标题: {self.driver.title}')
+
+            # 检查注意事项和验证码
+            if self.check_verification_code():
+                print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 等待页面加载完成...')
+                WebDriverWait(self.driver, max_wait_time).until(
+                    EC.presence_of_element_located(
+                        (By.CLASS_NAME, "pc_home-jipiao"))
+                )
+                print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 页面加载完成')
+
+                print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 尝试点击飞机图标...')
+                # 点击飞机图标，返回主界面
+                ele = WebDriverWait(self.driver, max_wait_time).until(
+                    element_to_be_clickable(
+                        self.driver.find_element(
+                            By.CLASS_NAME, "pc_home-jipiao")
+                    )
+                )
+                ele.click()
+                print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 成功点击飞机图标')
+
+                print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 尝试选择单程...')
+                # 单程
+                ele = WebDriverWait(self.driver, max_wait_time).until(
+                    element_to_be_clickable(
+                        self.driver.find_elements(
+                            By.CLASS_NAME, "radio-label")[0]
+                    )
+                )
+                ele.click()
+                print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 成功选择单程')
+
+                print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 尝试点击搜索按钮...')
+                # 搜索
+                ele = WebDriverWait(self.driver, max_wait_time).until(
+                    element_to_be_clickable(
+                        self.driver.find_element(By.CLASS_NAME, "search-btn")
+                    )
+                )
+                ele.click()
+                print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 成功点击搜索按钮')
+
+                next_stage_flag = True
+        except Exception as e:
+            # 用f字符串格式化错误类型和错误信息，提供更多的调试信息
+            print(
+                f'{time.strftime("%Y-%m-%d_%H-%M-%S")} get_page：页面加载或元素操作失败，错误类型：{type(e).__name__}, 详细错误信息：{str(e).split("Stacktrace:")[0]}'
+            )
+            print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 当前页面 URL: {self.driver.current_url}')
+            print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 当前页面标题: {self.driver.title}')
+            print(
+                f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 当前页面源代码: {self.driver.page_source[:500]}...')  # 只打印前500个字符
+
+            # 保存错误截图
+            if enable_screenshot:
+                screenshot_path = f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png'
+                self.driver.save_screenshot(screenshot_path)
+                print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误截图已保存: {screenshot_path}')
+
+            # 重新尝试加载页面，这次指定需要重定向到首页
+            print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 重新尝试加载页面，这次指定需要重定向到首页')
+            self.get_page(1)
+        else:
+            if next_stage_flag:
+                # 继续下一步
+                print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 页面加载成功，继续下一步')
+                self.change_city()
+            else:
+                print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 页面加载成功，但未能完成所有操作')
+
+    def change_city(self):
+        next_stage_flag = False
+        try:
+            # 等待页面完成加载
+            WebDriverWait(self.driver, max_wait_time).until(
+                EC.presence_of_element_located(
+                    (By.CLASS_NAME, "form-input-v3"))
+            )
+
+            # 检查注意事项和验证码
+            if self.check_verification_code():
+                # 若出发地与目标值不符，则更改出发地
+                while self.city[0] not in self.driver.find_elements(
+                        By.CLASS_NAME, "form-input-v3"
+                )[0].get_attribute("value"):
+                    ele = WebDriverWait(self.driver, max_wait_time).until(
+                        element_to_be_clickable(
+                            self.driver.find_elements(
+                                By.CLASS_NAME, "form-input-v3")[0]
+                        )
+                    )
+                    ele.click()
+                    ele = WebDriverWait(self.driver, max_wait_time).until(
+                        element_to_be_clickable(
+                            self.driver.find_elements(
+                                By.CLASS_NAME, "form-input-v3")[0]
+                        )
+                    )
+                    ele.send_keys(Keys.CONTROL + "a")
+                    ele = WebDriverWait(self.driver, max_wait_time).until(
+                        element_to_be_clickable(
+                            self.driver.find_elements(
+                                By.CLASS_NAME, "form-input-v3")[0]
+                        )
+                    )
+                    ele.send_keys(self.city[0])
+
+                print(
+                    f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city：更换城市【0】-{self.driver.find_elements(By.CLASS_NAME, "form-input-v3")[0].get_attribute("value")}'
+                )
+
+                # 若目的地与目标值不符，则更改目的地
+                while self.city[1] not in self.driver.find_elements(
+                        By.CLASS_NAME, "form-input-v3"
+                )[1].get_attribute("value"):
+                    ele = WebDriverWait(self.driver, max_wait_time).until(
+                        element_to_be_clickable(
+                            self.driver.find_elements(
+                                By.CLASS_NAME, "form-input-v3")[1]
+                        )
+                    )
+                    ele.click()
+                    ele = WebDriverWait(self.driver, max_wait_time).until(
+                        element_to_be_clickable(
+                            self.driver.find_elements(
+                                By.CLASS_NAME, "form-input-v3")[1]
+                        )
+                    )
+                    ele.send_keys(Keys.CONTROL + "a")
+                    ele = WebDriverWait(self.driver, max_wait_time).until(
+                        element_to_be_clickable(
+                            self.driver.find_elements(
+                                By.CLASS_NAME, "form-input-v3")[1]
+                        )
+                    )
+                    ele.send_keys(self.city[1])
+
+                print(
+                    f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city：更换城市【1】-{self.driver.find_elements(By.CLASS_NAME, "form-input-v3")[1].get_attribute("value")}'
+                )
+
+                while (
+                        self.driver.find_elements(By.CSS_SELECTOR, "[aria-label=请选择日期]")[
+                            0
+                        ].get_attribute("value")
+                        != self.date
+                ):
+                    # 点击日期选择
+                    ele = WebDriverWait(self.driver, max_wait_time).until(
+                        element_to_be_clickable(
+                            self.driver.find_element(
+                                By.CLASS_NAME, "modifyDate.depart-date"
+                            )
+                        )
+                    )
+                    ele.click()
+
+                    if int(
+                            self.driver.find_elements(
+                                By.CLASS_NAME, "date-picker.date-picker-block"
+                            )[1]
+                                    .find_element(By.CLASS_NAME, "year")
+                                    .text[:-1]
+                    ) < int(self.date[:4]):
+                        ele = WebDriverWait(self.driver, max_wait_time).until(
+                            element_to_be_clickable(
+                                self.driver.find_elements(
+                                    By.CLASS_NAME,
+                                    "in-date-picker.icon.next-ico.iconf-right",
+                                )[1]
+                            )
+                        )
+                        print(
+                            f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city：更换日期{int(self.driver.find_elements(By.CLASS_NAME, "date-picker.date-picker-block")[1].find_element(By.CLASS_NAME, "year").text[:-1])}小于 {int(self.date[:4])} 向右点击'
+                        )
+                        ele.click()
+
+                    if int(
+                            self.driver.find_elements(
+                                By.CLASS_NAME, "date-picker.date-picker-block"
+                            )[0]
+                                    .find_element(By.CLASS_NAME, "year")
+                                    .text[:-1]
+                    ) > int(self.date[:4]):
+                        ele = WebDriverWait(self.driver, max_wait_time).until(
+                            element_to_be_clickable(
+                                self.driver.find_elements(
+                                    By.CLASS_NAME,
+                                    "in-date-picker.icon.prev-ico.iconf-left",
+                                )[0]
+                            )
+                        )
+                        print(
+                            f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city：更换日期{int(self.driver.find_elements(By.CLASS_NAME, "date-picker.date-picker-block")[0].find_element(By.CLASS_NAME, "year").text[:-1])}大于 {int(self.date[:4])} 向左点击'
+                        )
+                        ele.click()
+
+                    if int(
+                            self.driver.find_elements(
+                                By.CLASS_NAME, "date-picker.date-picker-block"
+                            )[0]
+                                    .find_element(By.CLASS_NAME, "year")
+                                    .text[:-1]
+                    ) == int(self.date[:4]):
+                        if int(
+                                self.driver.find_elements(
+                                    By.CLASS_NAME, "date-picker.date-picker-block"
+                                )[0]
+                                        .find_element(By.CLASS_NAME, "month")
+                                        .text[:-1]
+                        ) > int(self.date[5:7]):
+                            ele = WebDriverWait(self.driver, max_wait_time).until(
+                                element_to_be_clickable(
+                                    self.driver.find_elements(
+                                        By.CLASS_NAME,
+                                        "in-date-picker.icon.prev-ico.iconf-left",
+                                    )[0]
+                                )
+                            )
+                            print(
+                                f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city：更换日期{int(self.driver.find_elements(By.CLASS_NAME, "date-picker.date-picker-block")[0].find_element(By.CLASS_NAME, "month").text[:-1])}大于 {int(self.date[5:7])} 左点击'
+                            )
+                            ele.click()
+
+                    if int(
+                            self.driver.find_elements(
+                                By.CLASS_NAME, "date-picker.date-picker-block"
+                            )[1]
+                                    .find_element(By.CLASS_NAME, "year")
+                                    .text[:-1]
+                    ) == int(self.date[:4]):
+                        if int(
+                                self.driver.find_elements(
+                                    By.CLASS_NAME, "date-picker.date-picker-block"
+                                )[1]
+                                        .find_element(By.CLASS_NAME, "month")
+                                        .text[:-1]
+                        ) < int(self.date[5:7]):
+                            ele = WebDriverWait(self.driver, max_wait_time).until(
+                                element_to_be_clickable(
+                                    self.driver.find_elements(
+                                        By.CLASS_NAME,
+                                        "in-date-picker.icon.next-ico.iconf-right",
+                                    )[1]
+                                )
+                            )
+                            print(
+                                f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city：更换日期{int(self.driver.find_elements(By.CLASS_NAME, "date-picker.date-picker-block")[1].find_element(By.CLASS_NAME, "month").text[:-1])}小于 {int(self.date[5:7])} 向右点击'
+                            )
+                            ele.click()
+
+                    for m in self.driver.find_elements(
+                            By.CLASS_NAME, "date-picker.date-picker-block"
+                    ):
+                        if int(m.find_element(By.CLASS_NAME, "year").text[:-1]) != int(
+                                self.date[:4]
+                        ):
+                            continue
+
+                        if int(m.find_element(By.CLASS_NAME, "month").text[:-1]) != int(
+                                self.date[5:7]
+                        ):
+                            continue
+
+                        for d in m.find_elements(By.CLASS_NAME, "date-d"):
+                            if int(d.text) == int(self.date[-2:]):
+                                ele = WebDriverWait(self.driver, max_wait_time).until(
+                                    element_to_be_clickable(d)
+                                )
+                                ele.click()
+                                break
+                print(
+                    f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city：更换日期-{self.driver.find_elements(By.CSS_SELECTOR, "[aria-label=请选择日期]")[0].get_attribute("value")}'
+                )
+
+                while "(" not in self.driver.find_elements(
+                        By.CLASS_NAME, "form-input-v3"
+                )[0].get_attribute("value"):
+                    # Enter搜索
+                    # ele=WebDriverWait(self.driver, max_wait_time).until(element_to_be_clickable(its[1]))
+                    # ele.send_keys(Keys.ENTER)
+                    ele = WebDriverWait(self.driver, max_wait_time).until(
+                        element_to_be_clickable(
+                            self.driver.find_elements(
+                                By.CLASS_NAME, "form-input-v3")[0]
+                        )
+                    )
+                    ele.click()
+
+                    # 通过低价提醒按钮实现enter键换页
+                    ele = WebDriverWait(self.driver, max_wait_time).until(
+                        element_to_be_clickable(
+                            self.driver.find_elements(
+                                By.CLASS_NAME, "low-price-remind"
+                            )[0]
+                        )
+                    )
+                    ele.click()
+
+                while "(" not in self.driver.find_elements(
+                        By.CLASS_NAME, "form-input-v3"
+                )[1].get_attribute("value"):
+                    # Enter搜索
+                    # ele=WebDriverWait(self.driver, max_wait_time).until(element_to_be_clickable(its[1]))
+                    # ele.send_keys(Keys.ENTER)
+                    ele = WebDriverWait(self.driver, max_wait_time).until(
+                        element_to_be_clickable(
+                            self.driver.find_elements(
+                                By.CLASS_NAME, "form-input-v3")[1]
+                        )
+                    )
+                    ele.click()
+
+                    # 通过低价提醒按钮实现enter键换页
+                    ele = WebDriverWait(self.driver, max_wait_time).until(
+                        element_to_be_clickable(
+                            self.driver.find_elements(
+                                By.CLASS_NAME, "low-price-remind"
+                            )[0]
+                        )
+                    )
+                    ele.click()
+
+                next_stage_flag = True
+
+        except Exception as e:
+            # 错误次数+1
+            self.err += 1
+
+            # 保存错误截图
+            if enable_screenshot:
+                self.driver.save_screenshot(
+                    f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png'
+                )
+
+            print(
+                f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,change_city：更换市和日期失败，错误类型：{type(e).__name__}, 详细错误信息：{str(e).split("Stacktrace:")[0]}'
+            )
+
+            # 检查注意事项和验证码
+            if self.check_verification_code():
+                if self.err < max_retry_time:
+                    if len(self.driver.find_elements(By.CLASS_NAME, "lg_loginbox_modal")):
+                        print(
+                            f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city：检测到登录弹窗，需要登录'
+                        )
+                        self.login()
+                    # 重试
+                    print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city：重试')
+                    self.change_city()
+                # 判断错误次数
+                if self.err >= max_retry_time:
+                    print(
+                        f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,change_city:重新尝试加载页面，这次指定需要重定向到首页'
+                    )
+
+                    # 删除本次请求
+                    del self.driver.requests
+
+                    # 置错计数
+                    self.err = 0
+
+                    # 重新尝试加载页面，这次指定需要重定向到首页
+                    self.get_page(1)
+        else:
+            if next_stage_flag:
+                # 若无错误，执行下一步
+                self.get_data()
+
+                print(
+                    f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city：成功更换城市和日期，当前路线为：{self.city[0]}-{self.city[1]}')
+
+    def get_data(self):
+        try:
+            # 等待响应加载完成
+            self.predata = self.driver.wait_for_request(
+                "/international/search/api/search/batchSearch?.*", timeout=max_wait_time
+            )
+            # 捕获 getFlightComfort 数据
+            self.comfort_data = self.capture_flight_comfort_data()
+
+            rb = dict(json.loads(self.predata.body).get("flightSegments")[0])
+
+
+
+        except Exception as e:
+            # 错误次数+1
+            self.err += 1
+
+            print(
+                f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,get_data:获取数据超时，错误类型：{type(e).__name__}, 错误详细：{str(e).split("Stacktrace:")[0]}'
+            )
+
+            # 保存错误截图
+            if enable_screenshot:
+                self.driver.save_screenshot(
+                    f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png'
+                )
+
+            # 删除本次请求
+            del self.driver.requests
+
+            if self.err < max_retry_time:
+                # 刷新页面
+                print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} get_data：刷新页面')
+                self.refresh_driver()
+
+                # 检查注意事项和验证码
+                if self.check_verification_code():
+                    # 重试
+                    self.get_data()
+
+            # 判断错误次数
+            if self.err >= max_retry_time:
+                print(
+                    f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 误次数【{self.err}-{max_retry_time}】,get_data:重新尝试加载页面，这次指定需要重定向到首页'
+                )
+
+                # 重置错误计数
+                self.err = 0
+                # 重新尝试加载页面，这次指定需要重定向到首页
+                self.get_page(1)
+        else:
+            # 删除本次请求
+            del self.driver.requests
+
+            # 检查数据获取正确性
+            if (
+                    rb["departureCityName"] == self.city[0]
+                    and rb["arrivalCityName"] == self.city[1]
+                    and rb["departureDate"] == self.date
+            ):
+                print(f"get_data:城市匹配成功：出发地-{self.city[0]}，目的地-{self.city[1]}")
+
+                # 重置错误计数
+                self.err = 0
+
+                # 若无错误，执行下一步
+                self.decode_data()
+            else:
+                print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,get_data:刷新页面')
+                # 错误次数+1
+                self.err += 1
+
+                # 保存错误截图
+                if enable_screenshot:
+                    self.driver.save_screenshot(
+                        f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png'
+                    )
+
+                # 重新更换城
+                print(
+                    f'{time.strftime("%Y-%m-%d_%H-%M-%S")} get_data：重新更换城市:{rb["departureCityName"]}-{rb["arrivalCityName"]}-{rb["departureDate"]}'
+                )
+
+                # 检查注意事项和验证码
+                if self.check_verification_code():
+                    # 重试
+                    self.change_city()
+
+    def decode_data(self):
+        try:
+            # 使用python-magic库检查MIME类型
+            mime = magic.Magic()
+            file_type = mime.from_buffer(self.predata.response.body)
+
+            buf = io.BytesIO(self.predata.response.body)
+
+            if "gzip" in file_type:
+                gf = gzip.GzipFile(fileobj=buf)
+                self.dedata = gf.read().decode("UTF-8")
+            elif "JSON data" in file_type:
+                print(buf.read().decode("UTF-8"))
+            else:
+                print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 未知的压缩格式：{file_type}')
+
+            self.dedata = json.loads(self.dedata)
+
+        except Exception as e:
+            # 错误次数+1
+            self.err += 1
+
+            print(
+                f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,decode_data:数据解码失败，错误类型：{type(e).__name__}, 错误详细：{str(e).split("Stacktrace:")[0]}'
+            )
+
+            # 保存错误截图
+            if enable_screenshot:
+                self.driver.save_screenshot(
+                    f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png'
+                )
+
+            # 删除本次请求
+            del self.driver.requests
+
+            if self.err < max_retry_time:
+                # 刷新页面
+                print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} decode_data：刷新页面')
+                self.refresh_driver()
+
+                # 检查注意事项和验证码
+                if self.check_verification_code():
+                    # 试
+                    self.get_data()
+            # 判错误次数
+            if self.err >= max_retry_time:
+                print(
+                    f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,decode_data:重新尝试加载页面，这次指定需要重定向到首页'
+                )
+
+                # 重置错误计数
+                self.err = 0
+
+                # 重新尝试加载页面，这次指定需要重定向到首页
+                self.get_page(1)
+        else:
+            # 重置错误计数
+            self.err = 0
+
+            # 若无误，执行下一步
+            self.check_data()
+
+    def check_data(self):
+        try:
+            self.flightItineraryList = self.dedata["data"]["flightItineraryList"]
+            # 倒序遍历,删除转机航班
+            for i in range(len(self.flightItineraryList) - 1, -1, -1):
+                if (
+                        self.flightItineraryList[i]["flightSegments"][0]["transferCount"]
+                        != 0
+                ):
+                    self.flightItineraryList.pop(i)
+            if len(self.flightItineraryList) == 0 and direct_flight:
+                print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 不存在直航航班:{self.city[0]}-{self.city[1]}')
+                # 重置错误计数
+                self.err = 0
+                return 0
+        except Exception as e:
+            # 错误次数+1
+            self.err += 1
+            print(
+                f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 数据检查出错：不存在航班，错误类型：{type(e).__name__}, 错误详细：{str(e).split("Stacktrace:")[0]}'
+            )
+            print(self.dedata)
+            if self.err < max_retry_time:
+                if 'searchErrorInfo' in self.dedata["data"]:
+                    # 重置错误计数
+                    self.err = 0
+                    return 0
+                else:
+                    if "'needUserLogin': True" in str(self.dedata["data"]):
+                        print(
+                            f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,check_data:必须要登录才能查看数据，这次指定需要重定向到首页'
+                        )
+                        # 重新尝试加载页面，这次指定需要重定向到首页
+                        self.login()
+
+                    # 刷新页面
+                    print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} check_data：刷新页面')
+                    self.refresh_driver()
+                    # 检查注意事项和验证码
+                    if self.check_verification_code():
+                        # 重试
+                        self.get_data()
+            # 判断错误次数
+            if self.err >= max_retry_time:
+                print(
+                    f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,check_data:重新尝试加载页面，这次指定需要重定向到首页'
+                )
+
+                # 重置错误计数
+                self.err = 0
+
+                # 重新尝试加载页面，这次指定需要重定向到首页
+                self.get_page(1)
+        else:
+            # 重置错误计数
+            self.err = 0
+            self.proc_flightSegments()
+            self.proc_priceList()
+            self.mergedata()
+
+    def proc_flightSegments(self):
+        self.flights = pd.DataFrame()
+
+        for flightlist in self.flightItineraryList:
+            flightlist = flightlist["flightSegments"][0]["flightList"]
+            flightUnitList = dict(flightlist[0])
+
+            departureday = flightUnitList["departureDateTime"].split(" ")[0]
+            departuretime = flightUnitList["departureDateTime"].split(" ")[1]
+
+            arrivalday = flightUnitList["arrivalDateTime"].split(" ")[0]
+            arrivaltime = flightUnitList["arrivalDateTime"].split(" ")[1]
+
+            # 处理 stopList
+            if 'stopList' in flightUnitList and flightUnitList['stopList']:
+                stop_info = []
+                for stop in flightUnitList['stopList']:
+                    stop_info.append(f"{stop['cityName']}({stop['airportName']}, {stop['duration']}分钟)")
+                flightUnitList['stopInfo'] = ' -> '.join(stop_info)
+            else:
+                flightUnitList['stopInfo'] = '无中转'
+
+            if del_info:
+                # 删除一些不重要的信息
+                dellist = [
+                    "sequenceNo",
+                    "marketAirlineCode",
+                    "departureProvinceId",
+                    "departureCityId",
+                    "departureCityCode",
+                    "departureAirportShortName",
+                    "departureTerminal",
+                    "arrivalProvinceId",
+                    "arrivalCityId",
+                    "arrivalCityCode",
+                    "arrivalAirportShortName",
+                    "arrivalTerminal",
+                    "transferDuration",
+                    "stopList",  # 删除原始的 stopList
+                    "leakedVisaTagSwitch",
+                    "trafficType",
+                    "highLightPlaneNo",
+                    "mealType",
+                    "operateAirlineCode",
+                    "arrivalDateTime",
+                    "departureDateTime",
+                    "operateFlightNo",
+                    "operateAirlineName",
+                ]
+                for value in dellist:
+                    flightUnitList.pop(value, None)
+
+            # 更新日期格式
+            flightUnitList.update(
+                {
+                    "departureday": departureday,
+                    "departuretime": departuretime,
+                    "arrivalday": arrivalday,
+                    "arrivaltime": arrivaltime,
+                }
+            )
+
+            self.flights = pd.concat(
+                [
+                    self.flights,
+                    pd.DataFrame.from_dict(flightUnitList, orient="index").T,
+                ],
+                ignore_index=True,
+            )
+
+    def proc_priceList(self):
+        self.prices = pd.DataFrame()
+
+        for flightlist in self.flightItineraryList:
+            flightNo = flightlist["itineraryId"].split("_")[0]
+            priceList = flightlist["priceList"]
+
+            # 经济舱，经济舱折扣
+            economy, economy_tax, economy_total, economy_full = [], [], [], []
+            economy_origin_price, economy_tax_price, economy_total_price, economy_full_price = "", "", "", ""
+            # 商务舱，商务舱折扣
+            bussiness, bussiness_tax, bussiness_total, bussiness_full = [], [], [], []
+            bussiness_origin_price, bussiness_tax_price, bussiness_total_price, bussiness_full_price = "", "", "", ""
+
+            for price in priceList:
+                # print("Price dictionary keys:", price.keys())
+                # print("Full price dictionary:", json.dumps(price, indent=2))
+
+                adultPrice = price["adultPrice"]
+                childPrice = price.get("childPrice", adultPrice)  # 如果没有childPrice，使用adultPrice
+                freeOilFeeAndTax = price["freeOilFeeAndTax"]
+                sortPrice = price["sortPrice"]
+
+                # 估算税费（如果需要的话）
+                estimatedTax = sortPrice - adultPrice if not freeOilFeeAndTax else 0
+
+                miseryIndex = price["miseryIndex"]
+                cabin = price["cabin"]
+
+                # 经济舱
+                if cabin == "Y":
+                    economy.append(adultPrice)
+                    economy_tax.append(estimatedTax)
+                    economy_full.append(miseryIndex)
+                    economy_total.append(adultPrice + estimatedTax)
+                # 商务舱
+                elif cabin == "C":
+                    bussiness.append(adultPrice)
+                    bussiness_tax.append(estimatedTax)
+                    bussiness_full.append(miseryIndex)
+                    bussiness_total.append(adultPrice + estimatedTax)
+
+            # 初始化变量
+            economy_min_index = None
+            bussiness_min_index = None
+
+            if economy_total != []:
+                economy_total_price = min(economy_total)
+                economy_min_index = economy_total.index(economy_total_price)
+
+            if bussiness_total != []:
+                bussiness_total_price = min(bussiness_total)
+                bussiness_min_index = bussiness_total.index(bussiness_total_price)
+
+            if economy_min_index is not None:
+                economy_origin_price = economy[economy_min_index]
+                economy_tax_price = economy_tax[economy_min_index]
+                economy_full_price = economy_full[economy_min_index]
+
+            if bussiness_min_index is not None:
+                bussiness_origin_price = bussiness[bussiness_min_index]
+                bussiness_tax_price = bussiness_tax[bussiness_min_index]
+                bussiness_full_price = bussiness_full[bussiness_min_index]
+
+            price_info = {
+                "flightNo": flightNo,
+                "economy_origin": economy_origin_price,
+                "economy_tax": economy_tax_price,
+                "economy_total": economy_total_price,
+                "economy_full": economy_full_price,
+                "bussiness_origin": bussiness_origin_price,
+                "bussiness_tax": bussiness_tax_price,
+                "bussiness_total": bussiness_total_price,
+                "bussiness_full": bussiness_full_price,
+            }
+
+            # self.prices=self.prices.append(price_info,ignore_index=True)
+            self.prices = pd.concat(
+                [self.prices, pd.DataFrame(price_info, index=[0])], ignore_index=True
+            )
+
+    ##
+    def mergedata(self):
+        try:
+            self.df = self.flights.merge(self.prices, on=["flightNo"])
+            print(f"合并后的航班数据形状: {self.df.shape}")
+            print(f"合并后的航班数据列: {self.df.columns}")
+
+            self.df["dateGetTime"] = dt.now().strftime("%Y-%m-%d")
+
+            print(f"获取到的舒适度数据: {self.comfort_data}")
+
+            if self.comfort_data:
+                comfort_df = pd.DataFrame.from_dict(self.comfort_data, orient='index')
+                comfort_df.reset_index(inplace=True)
+                comfort_df.rename(columns={'index': 'flight_no'}, inplace=True)
+
+                print(f"舒适度数据形状: {comfort_df.shape}")
+                print(f"舒适度数据列: {comfort_df.columns}")
+                print(f"舒适度数据前几行: \n{comfort_df.head()}")
+
+                # 检查 operateFlightNo 列是否存在
+                if 'operateFlightNo' in self.df.columns:
+                    print(f"合并前的 operateFlightNo 唯一值: {self.df['operateFlightNo'].unique()}")
+                    # 创建一个临时列来存储用于匹配的航班号
+                    self.df['match_flight_no'] = self.df['operateFlightNo'].fillna(self.df['flightNo'])
+                else:
+                    print("警告: operateFlightNo 列不存在于数据中,将使用 flightNo 进行匹配")
+                    self.df['match_flight_no'] = self.df['flightNo']
+
+                print(f"现有的列: {self.df.columns}")
+                print(f"合并前的 flight_no 唯一值: {comfort_df['flight_no'].unique()}")
+
+                # 使用 left join 来合并数据
+                self.df = self.df.merge(comfort_df, left_on='match_flight_no', right_on='flight_no', how='left')
+
+                print(f"合并后的数据形状: {self.df.shape}")
+                print(f"合并后的数据列: {self.df.columns}")
+
+                # 删除临时列和多余的flight_no列
+                self.df.drop(['match_flight_no', 'flight_no'], axis=1, inplace=True, errors='ignore')
+
+            if rename_col:
+                # 对pandas的columns进行重命名
+                order = [
+                    "数据获取日期",
+                    "航班号",
+                    "航空公司",
+                    "出发日期",
+                    "出发时间",
+                    "到达日期",
+                    "到达时间",
+                    "飞行时长",
+                    "出发国家",
+                    "出发城市",
+                    "出发机场",
+                    "出发机场三字码",
+                    "到达国家",
+                    "到达城市",
+                    "到达机场",
+                    "到达机场三字码",
+                    "飞机型号",
+                    "飞机尺寸",
+                    "飞机型号三字码",
+                    "到达准点率",
+                    "停留次数",
+                    "中转信息",  # 新增字段
+                ]
+
+                origin = [
+                    "dateGetTime",
+                    "flightNo",
+                    "marketAirlineName",
+                    "departureday",
+                    "departuretime",
+                    "arrivalday",
+                    "arrivaltime",
+                    "duration",
+                    "departureCountryName",
+                    "departureCityName",
+                    "departureAirportName",
+                    "departureAirportCode",
+                    "arrivalCountryName",
+                    "arrivalCityName",
+                    "arrivalAirportName",
+                    "arrivalAirportCode",
+                    "aircraftName",
+                    "aircraftSize",
+                    "aircraftCode",
+                    "arrivalPunctuality",
+                    "stopCount",
+                    "stopInfo",  # 新增字段
+                ]
+
+                columns = dict(zip(origin, order))
+
+                # 添加舒适度数据的列名映射
+                comfort_columns = {
+                    'departure_delay_time': '出发延误时间',
+                    'departure_bridge_rate': '出发廊桥率',
+                    'arrival_delay_time': '到达延误时间',
+                    'plane_type': '飞机类型',
+                    'plane_width': '飞机宽度',
+                    'plane_age': '飞机机龄',
+                    'Y_has_meal': '经济舱是否有餐食',
+                    'Y_seat_tilt': '经济舱座椅倾斜度',
+                    'Y_seat_width': '经济舱座椅宽度',
+                    'Y_seat_pitch': '经济舱座椅间距',
+                    'Y_meal_msg': '经济舱餐食信息',
+                    'Y_power': '经济舱电源',
+                    'C_has_meal': '商务舱是否有餐食',
+                    'C_seat_tilt': '商务舱座椅倾斜度',
+                    'C_seat_width': '商务舱座椅宽度',
+                    'C_seat_pitch': '商务舱座椅间距',
+                    'C_meal_msg': '商务舱餐食信息',
+                    'C_power': '商务舱电源',
+                }
+                columns.update(comfort_columns)
+
+                self.df = self.df.rename(columns=columns)
+
+                if del_info:
+                    self.df = self.df[order + list(comfort_columns.values())]
+
+            files_dir = os.path.join(
+                os.getcwd(), self.date, dt.now().strftime("%Y-%m-%d")
+            )
+
+            if not os.path.exists(files_dir):
+                os.makedirs(files_dir)
+
+            filename = os.path.join(
+                files_dir, f"{self.city[0]}-{self.city[1]}.csv")
+
+            self.df.to_csv(filename, encoding="UTF-8", index=False)  #保存为CSV文件
+
+            print(f'\n{time.strftime("%Y-%m-%d_%H-%M-%S")} 数据爬取完成 {filename}\n')  #输出爬取成功信息
+
+            return 0
+#捕获在合并数据发生的异常
+        except Exception as e:
+            print(f"合并数据失败 {str(e)}")
+            print(f"错误类型: {type(e).__name__}")
+            print(f"错误详情: {str(e)}")
+            import traceback
+            print(f"错误堆栈: {traceback.format_exc()}")
+            return 0
+
+    ##爬取航班舒适度信息
+    def capture_flight_comfort_data(self):
+        try:
+            # 滚动页面到底部以加载所有内容
+            last_height = self.driver.execute_script("return document.body.scrollHeight")
+            while True:
+                # 分步滚动页面
+                for i in range(10):  # 将页面分成10步滚动
+                    scroll_height = last_height * (i + 1) / 3
+                    self.driver.execute_script(f"window.scrollTo(0, {scroll_height});")
+                    time.sleep(0.5)  # 每一小步等待0.5秒
+
+                # 等待页面加载
+                time.sleep(3)  # 滚动到底部后多等待3秒
+
+                # 计算新的滚动高度并与最后的滚动高度进行比较
+                new_height = self.driver.execute_script("return document.body.scrollHeight")
+                if new_height == last_height:
+                    break
+                last_height = new_height
+
+            comfort_requests = self.driver.requests#获取页面加载过程中发出的所有网络请求
+            #初始化一些变量用于统计和标记相关请求
+            comfort_data = {}
+            batch_comfort_found = False
+            getFlightComfort_requests_count = 0
+            total_requests_count = len(comfort_requests)
+
+            print(f"\n{time.strftime('%Y-%m-%d_%H-%M-%S')} 开始分析请求，总请求数：{total_requests_count}")
+            #遍历所有网络请求
+            for request in comfort_requests:
+                if "/search/api/flight/comfort/batchGetComfortTagList" in request.url:
+                    batch_comfort_found = True
+                    print(f"{time.strftime('%Y-%m-%d_%H-%M-%S')} 找到 batchGetComfortTagList 请求")
+                    continue#如果请求 URL 包含 "/search/api/flight/comfort/batchGetComfortTagList"，标记 batch_comfort_found 为 True 并继续下一个请求
+
+                if "/search/api/flight/comfort/getFlightComfort" in request.url:
+                    getFlightComfort_requests_count += 1
+                    print(
+                        f"\n{time.strftime('%Y-%m-%d_%H-%M-%S')} 捕获到第 {getFlightComfort_requests_count} 个 getFlightComfort 请求:")
+                    print(f"URL: {request.url}")
+                    #如果请求 URL 包含 "/search/api/flight/comfort/getFlightComfort"：统计此类请求数量并打印请求信息
+
+                    #解析请求体（payload）获取航班号，如果解析失败打印错误信息并继续下一个请求。
+                    try:
+                        payload = json.loads(request.body.decode('utf-8'))
+                        flight_no = payload.get('flightNoList', ['Unknown'])[0]
+                        print(f"请求的航班号: {flight_no}")
+                    except Exception as e:
+                        print(f"无法解析请求 payload: {str(e)}")
+                        continue
+                    #如果请求有响应
+                    if request.response:
+                        print(f"响应状态码: {request.response.status_code}")#打印状态码
+                        body = request.response.body
+                        if request.response.headers.get('Content-Encoding', '').lower() == 'gzip':
+                            body = gzip.decompress(body)#解压gzip
+
+                        try:
+                            json_data = json.loads(body.decode('utf-8')) #解析响应体为 JSON 格式
+                            #解析成功且响应状态正常，提取舒适度数据
+                            print(
+                                f"响应数据: {json.dumps(json_data, indent=2, ensure_ascii=False)[:500]}...")  # 打印前500个字符
+                            if json_data['status'] == 0 and json_data['msg'] == 'success':
+                                flight_comfort = json_data['data']
+
+                                punctuality = flight_comfort['punctualityInfo']
+                                plane_info = flight_comfort['planeInfo']
+                                cabin_info = {cabin['cabin']: cabin for cabin in flight_comfort['cabinInfoList']}
+                                #整理到 processed_data 字典
+                                processed_data = {
+                                    'departure_delay_time': punctuality['departureDelaytime'],
+                                    'departure_bridge_rate': punctuality['departureBridge'],
+                                    'arrival_delay_time': punctuality['arrivalDelaytime'],
+                                    'plane_type': plane_info['planeTypeName'],
+                                    'plane_width': plane_info['planeWidthCategory'],
+                                    'plane_age': plane_info['planeAge']
+                                }
+                                #客舱类型标识
+                                for cabin_type in ['Y', 'C']:
+                                    if cabin_type in cabin_info:
+                                        cabin = cabin_info[cabin_type]
+                                        processed_data.update({
+                                            f'{cabin_type}_has_meal': cabin['hasMeal'],
+                                            f'{cabin_type}_seat_tilt': cabin['seatTilt']['value'],
+                                            f'{cabin_type}_seat_width': cabin['seatWidth']['value'],
+                                            f'{cabin_type}_seat_pitch': cabin['seatPitch']['value'],
+                                            f'{cabin_type}_meal_msg': cabin['mealMsg']
+                                        })
+                                        if 'power' in cabin:
+                                            processed_data[f'{cabin_type}_power'] = cabin['power']
+
+                                #添加到 comfort_data 字典中，同时打印成功提取信息
+                                comfort_data[flight_no] = processed_data
+                                print(f"{time.strftime('%Y-%m-%d_%H-%M-%S')} 成功提取航班 {flight_no} 的舒适度数据")
+                            else:
+                                print(
+                                    f"{time.strftime('%Y-%m-%d_%H-%M-%S')} getFlightComfort 响应状态异常: {json_data['status']}, {json_data['msg']}")
+                        except Exception as e:
+                            print(f"{time.strftime('%Y-%m-%d_%H-%M-%S')} 处理 getFlightComfort 响应时出错: {str(e)}")
+                    else:
+                        print(f"{time.strftime('%Y-%m-%d_%H-%M-%S')} getFlightComfort 请求没有响应")
+
+            print(f"\n{time.strftime('%Y-%m-%d_%H-%M-%S')} 请求分析完成")
+            print(f"总请求数: {total_requests_count}")
+            print(f"batchGetComfortTagList 请求是否找到: {batch_comfort_found}")
+            print(f"getFlightComfort 请求数: {getFlightComfort_requests_count}")
+            print(f"成功提取的舒适度数据数: {len(comfort_data)}")
+
+            if comfort_data:
+                # 创建舒适度DataFrame
+                comfort_df = pd.DataFrame.from_dict(comfort_data, orient='index')
+                comfort_df.reset_index(inplace=True)
+                comfort_df.rename(columns={'index': 'flight_no'}, inplace=True)
+
+                # 保存舒适度数据为CSV文件
+                # save_dir = os.path.join(os.getcwd(), self.date, datetime.now().strftime("%Y-%m-%d"))
+                # os.makedirs(save_dir, exist_ok=True)
+
+                # comfort_filename = os.path.join(save_dir, f"{self.city[0]}-{self.city[1]}_comfort.csv")
+                # comfort_df.to_csv(comfort_filename, encoding="UTF-8", index=False)
+                # print(f"{time.strftime('%Y-%m-%d_%H-%M-%S')} 航班舒适度数据已保存到 {comfort_filename}")
+
+                return comfort_data
+            #没有提取到数据，打印未捕获到数据的提示和可能的原因
+            else:
+                print(f"{time.strftime('%Y-%m-%d_%H-%M-%S')} 未捕获到任何 getFlightComfort 数据")
+                print("可能的原因:")
+                print("1. 网页没有加载完全")
+                print("2. 网站结构可能已经改变")
+                print("3. 网络连接问题")
+                print("4. 请求被网站拦截或限制")
+                return None
+        #捕获到异常，打印异常发生的时间、错误信息、错误类型、错误详情和详细的错误堆栈跟踪，最后返回 None。
+        except Exception as e:
+            print(f"{time.strftime('%Y-%m-%d_%H-%M-%S')} 捕获 getFlightComfort 数据时出错：{str(e)}")
+            print(f"错误类型: {type(e).__name__}")
+            print(f"错误详情: {str(e)}")
+            import traceback
+            print(f"错误堆栈: {traceback.format_exc()}")
+            return None
+
+
+if __name__ == "__main__":
+
+    driver = init_driver()
+
+    citys = gen_citys(crawal_citys)
+
+    flight_dates = generate_flight_dates(crawal_days, begin_date, end_date, start_interval, days_interval)
+
+    Flight_DataFetcher = DataFetcher(driver)
+
+    for city in citys:
+        Flight_DataFetcher.city = city
+
+        for flight_date in flight_dates:
+            Flight_DataFetcher.date = flight_date
+
+            if os.path.exists(
+                    os.path.join(os.getcwd(), flight_date, dt.now().strftime("%Y-%m-%d"), f"{city[0]}-{city[1]}.csv")):
+                print(
+                    f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 文件已存在:{os.path.join(os.getcwd(), flight_date, dt.now().strftime("%Y-%m-%d"), f"{city[0]}-{city[1]}.csv")}')
+                continue
+            elif ('http' not in Flight_DataFetcher.driver.current_url):
+                print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 当前的URL是：{driver.current_url}')
+                # 初始化页面
+                Flight_DataFetcher.get_page(1)
+
+            else:
+                # 后续运行只需更换出发与目的地
+                Flight_DataFetcher.change_city()
+
+            time.sleep(crawal_interval)
+
+    # 运行结束退出
+    try:
+        driver = Flight_DataFetcher.driver
+        driver.quit()
+    except Exception as e:
+        print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} An error occurred while quitting the driver: {e}')
+
+    print(f'\n{time.strftime("%Y-%m-%d_%H-%M-%S")} 程序运行完成！！！！')
diff --git a/src/clawer/db_import.py b/src/clawer/db_import.py
new file mode 100644
index 0000000..774d8ed
--- /dev/null
+++ b/src/clawer/db_import.py
@@ -0,0 +1,90 @@
+import pandas as pd
+import mysql.connector
+from mysql.connector import Error
+import os
+from datetime import datetime, timedelta
+
+# 数据库连接配置
+db_config = {
+    'host': 'localhost',  # 修改这里，去掉端口号
+    'port': 3307,  # 单独指定端口号
+    'database': 'fly_ticket',
+    'user': 'root',
+    'password': '123456'
+}
+
+def import_csv_to_db(file_path, cursor):
+    df = pd.read_csv(file_path)
+    for index, row in df.iterrows():
+        sql = """INSERT INTO flight (f_n, f_s_p, f_a_p, f_s_a, f_a_a, f_s_t, f_a_t, f_Date, f_Delay, f_p, f_food, f_wide, f_depcode, f_dstcode)
+                 VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
+                 ON DUPLICATE KEY UPDATE 
+                 f_s_p = VALUES(f_s_p), 
+                 f_a_p = VALUES(f_a_p), 
+                 f_s_a = VALUES(f_s_a), 
+                 f_a_a = VALUES(f_a_a), 
+                 f_s_t = VALUES(f_s_t), 
+                 f_a_t = VALUES(f_a_t), 
+                 f_Delay = VALUES(f_Delay), 
+                 f_p = VALUES(f_p), 
+                 f_food = VALUES(f_food), 
+                 f_wide = VALUES(f_wide), 
+                 f_depcode = VALUES(f_depcode), 
+                 f_dstcode = VALUES(f_dstcode);"""
+        
+        values = (
+            row['航班号'],
+            row['出发城市'],
+            row['到达城市'],
+            row['出发机场'],
+            row['到达机场'],
+            row['出发时间'],
+            row['到达时间'],
+            row['出发日期'],
+            row['出发延误时间'],
+            row['economy_origin'],
+            row['经济舱餐食信息'],
+            row['经济舱座椅间距'],
+            row['出发机场三字码'],
+            row['到达机场三字码']
+        )
+        
+        cursor.execute(sql, values)
+
+try:
+    # 连接到数据库
+    conn = mysql.connector.connect(**db_config)
+    
+    if conn.is_connected():
+        cursor = conn.cursor()
+        
+        # 设置日期范围
+        start_date = datetime(2024, 10, 22)
+        end_date = datetime(2024, 11, 1)
+        current_date = start_date
+
+        while current_date <= end_date:
+            folder_name = current_date.strftime("%Y-%m-%d")
+            folder_path = os.path.join("D:\\college\\SE2\\Ctrip-Crawler-main\\Ctrip-Crawler-main", folder_name, "2024-10-22")
+            
+            if os.path.exists(folder_path):
+                for file_name in os.listdir(folder_path):
+                    if file_name.endswith('.csv'):
+                        file_path = os.path.join(folder_path, file_name)
+                        import_csv_to_db(file_path, cursor)
+                        print(f"已导入文件: {file_path}")
+            
+            current_date += timedelta(days=1)
+        
+        # 提交更改
+        conn.commit()
+        print("所有数据成功插入到数据库")
+
+except Error as e:
+    print(f"连接数据库时出错: {e}")
+
+finally:
+    if 'conn' in locals() and conn.is_connected():
+        cursor.close()
+        conn.close()
+        print("数据库连接已关闭")