Merge branch 'dev-clawer' into dev

Merge the main System and FlightInformation-clawer into the same branch to form a complete system
9 months ago · a29f33b493
parent 8188fb18cb f1052b180b
commit a29f33b493
24 changed files with 5096 additions and 0 deletions
--- a/DataMaintenance/.scannerwork/.sonar_lock
+++ b/DataMaintenance/.scannerwork/.sonar_lock
--- a/DataMaintenance/.scannerwork/report-task.txt
+++ b/DataMaintenance/.scannerwork/report-task.txt
@ -0,0 +1,6 @@
+projectKey=clawer
+serverUrl=http://localhost:9000
+serverVersion=7.8.0.26217
+dashboardUrl=http://localhost:9000/dashboard?id=clawer
+ceTaskId=AZMv5JVBnAUFl5pPDUTm
+ceTaskUrl=http://localhost:9000/api/ce/task?id=AZMv5JVBnAUFl5pPDUTm
--- a/DataMaintenance/LICENSE
+++ b/DataMaintenance/LICENSE
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 Suysker
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/version/ctrip_flights_scraper_V3.5.py
+++ b/version/ctrip_flights_scraper_V3.5.py
--- a/version/gen_proxy_servers.py
+++ b/version/gen_proxy_servers.py
@ -0,0 +1,157 @@
+import os
+import re
+import subprocess
+
+# Global variables for proxy switch count
+proxy_switch_count = 0
+iface_ipv6_dict = {}
+
+def is_root():
+    return os.geteuid() == 0
+
+def interface_usable(interface_name, skip_check=False, ipv6_address='2400:3200::1', max_retries=3):
+    if skip_check:
+        return True
+    current_try = 0
+    while current_try < max_retries:
+        try:
+            cmd_result = subprocess.run(["ping", "-c", "1", "-I", interface_name, ipv6_address], stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=5)
+            if cmd_result.returncode == 0:
+                return True  # 成功ping通，直接返回True
+        except subprocess.TimeoutExpired:
+            print(f"Ping attempt {current_try + 1} of {max_retries} timed out. Retrying...")
+        except subprocess.SubprocessError as e:
+            # 捕获其他subprocess相关的异常
+            print(f"An error occurred while trying to ping: {e}. Retrying...")
+        current_try += 1
+    return False  # 所有尝试后仍未成功，返回False
+
+def get_existing_interfaces(base_interface='eth0'):
+    cmd_result = subprocess.run(["ip", "addr", "show"], stdout=subprocess.PIPE)
+    output = cmd_result.stdout.decode()
+    
+    # 匹配接口名称
+    iface_pattern = re.compile(re.escape(base_interface) + r'_([0-9]+)@')
+    iface_matches = iface_pattern.findall(output)
+    
+    # 构建完整的接口名称列表
+    interfaces = [f"{base_interface}_{match}" for match in iface_matches]
+
+    # 初始化字典来存储接口名称与其IPv6地址的映射
+    iface_ipv6_dict = {}
+
+    for iface in interfaces:
+        # 对于每个接口，查找其IPv6地址，这里假设只提取第一个IPv6地址
+        # 注意：需要确保只匹配特定接口的IPv6地址，因此使用iface作为正则表达式的一部分
+        cmd_result = subprocess.run(["ip", "addr", "show", iface], stdout=subprocess.PIPE)
+        output = cmd_result.stdout.decode()
+        ipv6_pattern = re.compile(r"inet6\s+([0-9a-f:]+)\/\d+")
+        ipv6_matches = ipv6_pattern.findall(output)
+        
+        # 过滤掉以"fe80"开头的IPv6地址
+        ipv6_addresses = [addr for addr in ipv6_matches if not addr.startswith("fe80")]
+        
+        # 如果存在非链路本地的IPv6地址，只取第一个地址
+        if ipv6_addresses:
+            iface_ipv6_dict[iface] = ipv6_addresses[0]
+
+    return iface_ipv6_dict
+
+def execute_ip6tables_command(command):
+    sudo_cmd = ["sudo"] if not is_root() else []
+    cmd = sudo_cmd + command.split()
+    subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+def switch_proxy_server(mode='normal'):
+    global proxy_switch_count
+    global iface_ipv6_dict
+    
+    if mode == 'normal':
+        if iface_ipv6_dict:
+            proxy_switch_count += 1
+            proxy_index = proxy_switch_count % len(iface_ipv6_dict)
+            selected_interface = list(iface_ipv6_dict.keys())[proxy_index]
+            ipv6_address = iface_ipv6_dict[selected_interface]
+            # 清空自定义链
+            execute_ip6tables_command('ip6tables -t nat -F FAKE_IPV6_CHAIN')
+            # 添加SNAT规则
+            execute_ip6tables_command(f'ip6tables -t nat -A FAKE_IPV6_CHAIN -j SNAT --to-source {ipv6_address}')
+            
+            print(f"Using interface: {selected_interface}, Connecting to: {ipv6_address}")
+
+def create_ipv6_addresses(n, base_interface='eth0', delete_interface=True):
+    sudo_cmd = ["sudo"] if not is_root() else []
+    if delete_interface:
+        delete_ipv6_addresses(base_interface)
+    existing_interfaces = list(get_existing_interfaces(base_interface).keys())
+    interfaces = []
+    for i in range(1, n + 1):
+        interface_name = f"{base_interface}_{i}"
+        
+        # Check if the interface exists, if yes, delete it first
+        if interface_name in existing_interfaces:
+            if interface_usable(interface_name):
+                print(f"Interface {interface_name} already exists. Skipping creation.")
+                interfaces.append(interface_name)
+                continue
+            else:
+                subprocess.run(sudo_cmd + ["ip", "link", "delete", interface_name])
+        
+        # Now add the interface
+        subprocess.run(sudo_cmd + ["ip", "link", "add", "link", base_interface, interface_name, "type", "macvlan", "mode", "bridge"])
+        subprocess.run(sudo_cmd + ["ip", "link", "set", interface_name, "up"])
+        #subprocess.run(sudo_cmd + ["dhclient", "-6", "-nw", interface_name])
+        interfaces.append(interface_name)
+    return interfaces
+
+def delete_ipv6_addresses(base_interface='eth0'):
+    sudo_cmd = ["sudo"] if not is_root() else []
+    existing_interfaces = list(get_existing_interfaces(base_interface).keys())
+    
+    for interface_name in existing_interfaces:
+        subprocess.run(sudo_cmd + ["ip", "link", "delete", interface_name])
+
+def stop_proxy_servers(base_interface='eth0', delete_interface=True):
+    # 删除流量重定向到自定义链
+    execute_ip6tables_command('ip6tables -t nat -D POSTROUTING -j FAKE_IPV6_CHAIN')
+    # 删除自定义链
+    execute_ip6tables_command('ip6tables -t nat -X FAKE_IPV6_CHAIN')
+    
+    if delete_interface:
+        print("正在关闭代理服务器...")
+        print("删除IPv6地址...")
+        delete_ipv6_addresses(base_interface)
+        print("代理服务器已关闭.")
+    else:
+        print("正在关闭代理服务器...")
+        print("代理服务器已关闭.")
+
+def start_proxy_servers(n, mode='normal', base_interface='eth0', delete_interface=True):
+    global iface_ipv6_dict
+    
+    interfaces = create_ipv6_addresses(n, base_interface, delete_interface)
+    #获取生成的接口及IP
+    iface_ipv6_dict = get_existing_interfaces(base_interface)
+
+    if iface_ipv6_dict:
+        # 删除流量重定向到自定义链
+        execute_ip6tables_command('ip6tables -t nat -D POSTROUTING -j FAKE_IPV6_CHAIN')
+        # 删除自定义链
+        execute_ip6tables_command('ip6tables -t nat -X FAKE_IPV6_CHAIN')
+        
+        # 创建自定义链
+        execute_ip6tables_command('ip6tables -t nat -N FAKE_IPV6_CHAIN')
+        # 流量重定向到自定义链
+        execute_ip6tables_command(f'ip6tables -t nat -A POSTROUTING -o {base_interface} -j FAKE_IPV6_CHAIN')
+    
+        if mode == 'normal':
+            selected_interface = list(iface_ipv6_dict.keys())[0]
+            ipv6_address = iface_ipv6_dict[selected_interface]
+            # 添加SNAT规则
+            execute_ip6tables_command(f'ip6tables -t nat -A FAKE_IPV6_CHAIN -j SNAT --to-source {ipv6_address}')
+    
+            print(f"Using interface: {selected_interface}, Connecting to: {ipv6_address}")
+        elif mode == 'random':
+            for index, (interface, ipv6_address) in enumerate(iface_ipv6_dict.items()):
+                adjusted_probability = 1/(len(iface_ipv6_dict)-index)
+                execute_ip6tables_command(f'ip6tables -t nat -A FAKE_IPV6_CHAIN -m statistic --mode random --probability {adjusted_probability} -j SNAT --to-source {ipv6_address}')
--- a/DataMaintenance/README.md
+++ b/DataMaintenance/README.md
@ -0,0 +1,50 @@
+# Ctrip-Crawler
+
+
+
+## 概述
+
+Ctrip-Crawler 是一个携程航班信息的专业爬虫工具，主要基于 Selenium 框架进行实现。
+request 方法访问携程 API 的方法，由于 IP 限制和 JS 逆向工程的挑战，该途径已不再适用。（报错）
+
+携程支持IPV6访问，因此可以通过生成大量IPV6规避 IP 限制。
+
+
+
+## 主要特性
+
+Selenium 自动化框架：与直接请求 API 的方法不同，该项目基于 Selenium，提供高度可定制和交互式的浏览器模拟。
+
+灵活的错误处理机制：针对不同类型的异常（如超时、验证码出现、未知错误等），实施相应的处理策略，包括重试和人工干预。
+
+IP限制解决方案：利用页面特性和用户模拟，规避了 IP 限制，提高了爬取稳定性。
+
+数据校验与解析：对获取的数据进行严格的数据质量和完整性校验，包括 gzip 解压缩和 JSON 格式解析。
+
+版本迭代与优化：V2版本解决了验证码问题；V3版本提高了系统的稳定性和可用性；V3.5版本增加了linux系统下多IPV6网口的生成与代理
+
+
+
+## 文档和教程
+
+详细的使用指南和开发文档可在以下博客中查看：
+
+[基于selenium的携程机票爬取程序](https://blog.suysker.xyz/archives/35)
+
+[基于selenium的携程机票爬取程序V2](https://blog.suysker.xyz/archives/139)
+
+[基于request的携程机票爬取程序](https://blog.suysker.xyz/archives/37)
+
+[基于request的航班历史票价爬取](https://blog.suysker.xyz/archives/36)
+
+
+
+## TO DO
+
+V4.0增加多线程分片运行……
+
+
+
+## 贡献与反馈
+
+如果你有更好的优化建议或发现任何 bug，请通过 Issues 或 Pull Requests 与我们交流。我们非常欢迎各种形式的贡献！
--- a/DataMaintenance/csv_to_xlsx_converter.py
+++ b/DataMaintenance/csv_to_xlsx_converter.py
@ -0,0 +1,73 @@
+import pandas as pd
+import os
+from datetime import datetime, timedelta
+
+def get_departure_destination(file_name):
+    name_without_extension = os.path.splitext(file_name)[0]
+    return name_without_extension
+
+def merge_csv_files(csv_files, output_xlsx):
+    all_dfs = []
+    for csv_file in csv_files:
+        df = pd.read_csv(csv_file)
+        # 添加日期列
+        date = os.path.basename(os.path.dirname(os.path.dirname(csv_file)))
+        df['出发日期'] = date
+        
+        # 选择指定的列
+        selected_columns = [
+            '航班号','出发城市','到达城市', '航空公司', '出发日期', '出发时间', '到达时间', 
+            '中转信息', 'economy_origin', '经济舱餐食信息', '经济舱座椅间距', '出发延误时间'
+        ]
+        df = df[selected_columns]
+        
+        # 重命名 'economy_origin' 为 '票价'
+        df = df.rename(columns={'economy_origin': '票价'})
+        
+        all_dfs.append(df)
+    
+    # 合并所有数据框
+    merged_df = pd.concat(all_dfs, ignore_index=True)
+    
+    # 保存为Excel文件
+    merged_df.to_excel(output_xlsx, index=False, engine='openpyxl')
+
+# 设置日期范围
+start_date = datetime(2024, 11, 12)# 起始日期
+end_date = datetime(2024, 11, 19)# 结束日期
+clawer_date = datetime(2024, 11, 12)# 爬虫日期
+# 设置输入和输出文件夹路径
+input_base_path = "./"
+output_folder = "./xlsx_output"
+
+# 确保输出文件夹存在
+if not os.path.exists(output_folder):
+    os.makedirs(output_folder)
+
+# 用于存储同一始发地和目的地的CSV文件
+route_files = {}
+
+current_date = start_date
+while current_date <= end_date:
+    folder_name = current_date.strftime("%Y-%m-%d")
+    folder_path = os.path.join(input_base_path, folder_name, clawer_date.strftime("%Y-%m-%d"))
+    
+    if os.path.exists(folder_path):
+        for file_name in os.listdir(folder_path):
+            if file_name.endswith('.csv'):
+                csv_path = os.path.join(folder_path, file_name)
+                route = get_departure_destination(file_name)
+                
+                if route not in route_files:
+                    route_files[route] = []
+                route_files[route].append(csv_path)
+    
+    current_date += timedelta(days=1)
+
+# 合并并保存每个路线的文件
+for route, files in route_files.items():
+    output_xlsx = os.path.join(output_folder, f"{route}.xlsx")
+    merge_csv_files(files, output_xlsx)
+    print(f"已合并并保存路线: {route} -> {output_xlsx}")
+
+print("所有CSV文件已成功合并为XLSX文件，并筛选了指定的列")
--- a/DataMaintenance/ctrip_flights_scraper_V3(undetected_chromedriver).py
+++ b/DataMaintenance/ctrip_flights_scraper_V3(undetected_chromedriver).py
--- a/DataMaintenance/ctrip_flights_scraper_V3.py
+++ b/DataMaintenance/ctrip_flights_scraper_V3.py
--- a/DataMaintenance/db_import.py
+++ b/DataMaintenance/db_import.py
@ -0,0 +1,90 @@
+import pandas as pd
+import mysql.connector
+from mysql.connector import Error
+import os
+from datetime import datetime, timedelta
+
+# 数据库连接配置
+db_config = {
+    'host': '152.136.166.253',  # 修改这里，去掉端口号
+    'port': 8989,  # 单独指定端口号
+    'database': 'fly_ticket',
+    'user': 'root',
+    'password': 'Cauc@2024'
+}
+
+def import_csv_to_db(file_path, cursor):
+    df = pd.read_csv(file_path)
+    for index, row in df.iterrows():
+        sql = """INSERT INTO flight (f_n, f_s_p, f_a_p, f_s_a, f_a_a, f_s_t, f_a_t, f_Date, f_Delay, f_p, f_food, f_wide, f_depcode, f_dstcode)
+                 VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
+                 ON DUPLICATE KEY UPDATE 
+                 f_s_p = VALUES(f_s_p), 
+                 f_a_p = VALUES(f_a_p), 
+                 f_s_a = VALUES(f_s_a), 
+                 f_a_a = VALUES(f_a_a), 
+                 f_s_t = VALUES(f_s_t), 
+                 f_a_t = VALUES(f_a_t), 
+                 f_Delay = VALUES(f_Delay), 
+                 f_p = VALUES(f_p), 
+                 f_food = VALUES(f_food), 
+                 f_wide = VALUES(f_wide), 
+                 f_depcode = VALUES(f_depcode), 
+                 f_dstcode = VALUES(f_dstcode);"""
+        
+        values = (
+            row['航班号'],
+            row['出发城市'],
+            row['到达城市'],
+            row['出发机场'],
+            row['到达机场'],
+            row['出发时间'],
+            row['到达时间'],
+            row['出发日期'],
+            row['出发延误时间'],
+            row['economy_origin'],
+            row['经济舱餐食信息'],
+            row['经济舱座椅间距'],
+            row['出发机场三字码'],
+            row['到达机场三字码']
+        )
+        
+        cursor.execute(sql, values)
+
+try:
+    # 连接到数据库
+    conn = mysql.connector.connect(**db_config)
+    
+    if conn.is_connected():
+        cursor = conn.cursor()
+        
+        # 设置日期范围
+        start_date = datetime(2024, 11, 12)
+        end_date = datetime(2024, 11, 20)
+        current_date = start_date
+
+        while current_date <= end_date:
+            folder_name = current_date.strftime("%Y-%m-%d")
+            folder_path = os.path.join("D:\college\SE2\Ctrip-Crawler-main\Ctrip-Crawler-withComfortInfo", folder_name, "2024-11-12")
+            
+            if os.path.exists(folder_path):
+                for file_name in os.listdir(folder_path):
+                    if file_name.endswith('.csv'):
+                        file_path = os.path.join(folder_path, file_name)
+                        import_csv_to_db(file_path, cursor)
+                        print(f"已导入文件: {file_path}")
+            
+            current_date += timedelta(days=1)
+        
+        # 提交更改
+        conn.commit()
+        print("所有数据成功插入到数据库")
+
+except Error as e:
+    print(f"连接数据库时出错: {e}")
+
+finally:
+    if 'conn' in locals() and conn.is_connected():
+        cursor.close()
+        conn.close()
+        print("数据库连接已关闭")
--- a/version/ctrip_flights_scraper.py
+++ b/version/ctrip_flights_scraper.py
@ -0,0 +1,412 @@
+import io
+import os
+import gzip
+import time
+import json
+import random
+import requests
+import threading
+import pandas as pd
+from seleniumwire import webdriver
+from datetime import datetime as dt,timedelta
+from selenium.webdriver.common.by import By
+from selenium.webdriver.common.keys import Keys
+from selenium.common.exceptions import TimeoutException,StaleElementReferenceException,ElementNotInteractableException,ElementClickInterceptedException # 加载异常
+
+
+def getcitycode():
+    cityname,code=[],[]
+    #采用携程的api接口
+    city_url='https://flights.ctrip.com/online/api/poi/get?v='+str(random.random())
+    headers={
+        'dnt':'1',
+        'referer':'https://verify.ctrip.com/',
+        'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36'
+        }
+    r=requests.get(city_url,headers=headers)
+    citys=json.loads(r.text).get('data')
+    for city in citys:
+        if city =='热门':
+            continue
+        for key in city:
+            try:
+                for k in citys[city][key]:
+                    cityname.append(k['display'])
+                    code.append(k['data'])
+            except:
+                continue
+    citycode=dict(zip(cityname,code))
+    
+    return cityname,citycode
+
+
+
+class FLIGHT(object):
+    def __init__(self):
+        self.url = 'https://flights.ctrip.com/online/list/oneway' #携程机票查询页面
+        self.chromeDriverPath = 'C:/Program Files/Google/Chrome/Application/chromedriver' #chromedriver位置
+        self.options = webdriver.ChromeOptions() # 创建一个配置对象
+        #self.options.add_argument('--incognito')  # 隐身模式（无痕模式）
+        #self.options.add_argument('User-Agent=%s'%UserAgent().random) # 替换User-Agent
+        self.options.add_argument("--disable-blink-features")
+        self.options.add_argument("--disable-blink-features=AutomationControlled")
+        self.options.add_experimental_option("excludeSwitches", ['enable-automation'])# 不显示正在受自动化软件控制
+        self.driver = webdriver.Chrome(executable_path=self.chromeDriverPath,chrome_options=self.options)
+        self.driver.maximize_window()
+        self.err=0#错误重试次数
+          
+    
+    def getpage(self): 
+        ##############获取地区码
+        self.startcode=self.citycode[self.city[0]][-3:]
+        self.endcode=self.citycode[self.city[1]][-3:]
+        
+        ##############生成访问链接
+        flights_url=self.url+'-'+self.startcode+'-'+self.endcode+'?&depdate='+self.date    
+        print(flights_url)
+        ##############设置加载超时阈值
+        self.driver.set_page_load_timeout(300)
+        try:
+            self.driver.get(flights_url)
+        except:
+            print('页面连接失败')
+            self.driver.close()
+            self.getpage()
+        else:
+            try:
+                ##############判断是否存在验证码
+                self.driver.find_element(By.CLASS_NAME,"basic-alert.alert-giftinfo")
+                print('等待2小时后重试')
+                time.sleep(7200)
+                self.getpage()
+            except:
+                ##############不存在验证码，执行下一步
+                self.remove_btn()
+
+    def remove_btn(self):
+        try:
+            js_remove="$('.notice-box').remove();"
+            self.driver.execute_script(js_remove)
+        except Exception as e:
+            print('防疫移除失败',e)
+        else:
+            self.changecity()
+
+    
+    
+    def changecity(self):
+        try:
+        	#获取出发地与目的地元素位置
+            its=self.driver.find_elements(By.CLASS_NAME,'form-input-v3')
+            
+            #若出发地与目标值不符，则更改出发地
+            while self.city[0] not in its[0].get_attribute('value'):    
+                its[0].click()
+                time.sleep(0.5)
+                its[0].send_keys(Keys.CONTROL + 'a')
+                time.sleep(0.5)
+                its[0].send_keys(self.city[0])
+
+            time.sleep(0.5)
+
+            #若目的地与目标值不符，则更改目的地
+            while self.city[1] not in its[1].get_attribute('value'):
+                its[1].click()
+                time.sleep(0.5)
+                its[1].send_keys(Keys.CONTROL + 'a')
+                time.sleep(0.5)
+                its[1].send_keys(self.city[1])
+            
+            time.sleep(0.5)
+            try:
+                #通过低价提醒按钮实现enter键换页
+                self.driver.implicitly_wait(5) # seconds
+                self.driver.find_elements(By.CLASS_NAME,'low-price-remind')[0].click()
+            except IndexError as e:
+                print('\n更换城市错误 找不到元素',e)
+                #以防万一
+                its[1].send_keys(Keys.ENTER)
+            
+            print('\n更换城市成功',self.city[0]+'-'+self.city[1])
+        except (ElementNotInteractableException,StaleElementReferenceException,ElementClickInterceptedException,ElementClickInterceptedException) as e:
+            print('\n更换城市错误 元素错误',e)
+            self.err+=1
+            if self.err<=5:
+                self.click_btn()
+            else:
+                self.err=0
+                del self.driver.requests
+                self.getpage()
+        except Exception as e:
+            print('\n更换城市错误',e)
+            #删除本次请求
+            del self.driver.requests
+            #从头开始重新执行程序
+            self.getpage()
+        else:
+            #若无错误，执行下一步
+            self.err=0
+            self.getdata()           
+            
+            
+    
+    def getdata(self):
+        try:
+            #等待响应加载完成
+            self.predata = self.driver.wait_for_request('/international/search/api/search/batchSearch?.*', timeout=60)
+        
+            rb=dict(json.loads(self.predata.body).get('flightSegments')[0])
+        
+        except TimeoutException as e:
+            print('\获取数据错误',e)
+            #删除本次请求
+            del self.driver.requests
+            #从头开始重新执行程序
+            self.getpage()
+        else:
+            #检查数据获取正确性
+            if rb['departureCityName'] == self.city[0] and rb['arrivalCityName'] == self.city[1]:
+                print('城市获取正确')
+                #删除本次请求
+                del self.driver.requests
+                #若无错误，执行下一步
+                self.decode_data()
+            else:
+                #删除本次请求
+                del self.driver.requests
+                #重新更换城市
+                self.changecity()
+    
+    
+    
+    def decode_data(self):
+        try:
+            buf = io.BytesIO(self.predata.response.body)
+            gf = gzip.GzipFile(fileobj = buf)
+            self.dedata = gf.read().decode('UTF-8')
+            self.dedata=json.loads(self.dedata)
+        except:
+            print('重新获取数据')
+            self.getpage()
+        else:
+            #若无错误，执行下一步
+            self.check_data()
+            
+        
+        
+    def check_data(self):
+        try:
+            self.flightItineraryList=self.dedata['data']['flightItineraryList']
+            #倒序遍历,删除转机航班
+            for i in range(len(self.flightItineraryList)-1, -1, -1):
+                if self.flightItineraryList[i]['flightSegments'][0]['transferCount'] !=0:
+                    self.flightItineraryList.pop(i)
+            if len(self.flightItineraryList):
+                #存在直航航班，执行下一步
+                self.muti_process()
+            else:
+                print('不存在直航航班')
+                return 0
+        except:
+            print('不存在直航航班')
+            return 0        
+                      
+    
+    def muti_process(self):
+        processes = []
+
+        self.flights = pd.DataFrame()
+        self.prices = pd.DataFrame()
+        #处理航班信息
+        processes.append(threading.Thread(target=self.proc_flightSegments))
+        #处理票价信息
+        processes.append(threading.Thread(target=self.proc_priceList))
+
+        for pro in processes:
+            pro.start()
+        for pro in processes:
+            pro.join()
+        
+        #若无错误，执行下一步
+        self.mergedata()
+    
+    def proc_flightSegments(self):
+        for flightlist in self.flightItineraryList:
+            flightlist=flightlist['flightSegments'][0]['flightList']
+            flightUnitList=dict(flightlist[0])
+
+            
+            departureday=flightUnitList['departureDateTime'].split(' ')[0]
+            departuretime=flightUnitList['departureDateTime'].split(' ')[1]
+            
+            arrivalday=flightUnitList['arrivalDateTime'].split(' ')[0]
+            arrivaltime=flightUnitList['arrivalDateTime'].split(' ')[1]            
+            
+            #删除一些不重要的信息
+            dellist=['sequenceNo', 'marketAirlineCode',
+             'departureProvinceId','departureCityId','departureCityCode','departureAirportShortName','departureTerminal',
+             'arrivalProvinceId','arrivalCityId','arrivalCityCode','arrivalAirportShortName','arrivalTerminal',
+             'transferDuration','stopList','leakedVisaTagSwitch','trafficType','highLightPlaneNo','mealType',
+             'operateAirlineCode','arrivalDateTime','departureDateTime','operateFlightNo','operateAirlineName']
+            for value in dellist:
+                try:
+                    flightUnitList.pop(value)
+                except:
+                    continue
+            
+            #更新日期格式
+            flightUnitList.update({'departureday': departureday, 'departuretime': departuretime,
+                                   'arrivalday': arrivalday, 'arrivaltime': arrivaltime}) 
+            
+            
+            self.flights=pd.concat([self.flights,pd.DataFrame(flightUnitList,index=[0])],ignore_index=True)
+
+                          
+            
+    def proc_priceList(self):
+        for flightlist in self.flightItineraryList:
+            flightNo=flightlist['itineraryId'].split('_')[0]
+            priceList=flightlist['priceList']
+            
+            #经济舱，经济舱折扣
+            economy,economy_discount=[],[]
+            #商务舱，商务舱折扣
+            bussiness,bussiness_discount=[],[]
+            
+            for price in priceList:
+                adultPrice=price['adultPrice']
+                cabin=price['cabin']
+                priceUnitList=dict(price['priceUnitList'][0]['flightSeatList'][0])
+                discountRate=priceUnitList['discountRate']
+                #经济舱
+                if cabin=='Y':
+                    economy.append(adultPrice)
+                    economy_discount.append(discountRate)
+                 #商务舱
+                elif cabin=='C':
+                    bussiness.append(adultPrice)
+                    bussiness_discount.append(discountRate)
+            
+            if economy !=[]:
+                try:
+                    economy_origin=economy[economy_discount.index(1)]
+                except:
+                    economy_origin=int(max(economy)/max(economy_discount))
+            
+                if min(economy_discount) !=1:
+                    economy_low=min(economy)
+                    economy_cut=min(economy_discount)
+                else:
+                    economy_low=''
+                    economy_cut=''
+                
+            else:
+                economy_origin=''
+                economy_low=''
+                economy_cut=''
+            
+
+            if bussiness !=[]: 
+                try:
+                    bussiness_origin=bussiness[bussiness_discount.index(1)]
+                except:
+                    bussiness_origin=int(max(bussiness)/max(bussiness_discount))
+            
+                if min(bussiness_discount) !=1:
+                    bussiness_low=min(bussiness)
+                    bussiness_cut=min(bussiness_discount)
+                else:
+                    bussiness_low=''
+                    bussiness_cut=''
+                
+            else:
+                bussiness_origin=''
+                bussiness_low=''
+                bussiness_cut=''        
+        
+            price_info={'flightNo':flightNo,
+                    'economy_origin':economy_origin,'economy_low':economy_low,'economy_cut':economy_cut,
+                    'bussiness_origin':bussiness_origin,'bussiness_low':bussiness_low,'bussiness_cut':bussiness_cut}
+
+            #self.prices=self.prices.append(price_info,ignore_index=True)
+            self.prices=pd.concat([self.prices,pd.DataFrame(price_info,index=[0])],ignore_index=True)
+        
+   
+   
+    def mergedata(self):
+        try:
+            self.df = self.flights.merge(self.prices,on=['flightNo'])
+            
+            self.df['数据获取日期']=dt.now().strftime('%Y-%m-%d')
+            
+            #对pandas的columns进行重命名
+            order=['数据获取日期','航班号','航空公司',
+                   '出发日期','出发时间','到达日期','到达时间','飞行时长','出发国家','出发城市','出发机场','出发机场三字码',
+                   '到达国家','到达城市','到达机场','到达机场三字码','飞机型号','飞机尺寸','飞机型号三字码',
+                   '经济舱原价','经济舱最低价','经济舱折扣','商务舱原价','商务舱最低价','商务舱折扣',
+                   '到达准点率','停留次数']
+            
+            origin=['数据获取日期','flightNo','marketAirlineName',
+                    'departureday','departuretime','arrivalday','arrivaltime','duration',
+                    'departureCountryName','departureCityName','departureAirportName','departureAirportCode',
+                    'arrivalCountryName','arrivalCityName','arrivalAirportName','arrivalAirportCode',
+                    'aircraftName','aircraftSize','aircraftCode',
+                    'economy_origin','economy_low','economy_cut',
+                    'bussiness_origin','bussiness_low','bussiness_cut',
+                    'arrivalPunctuality','stopCount']
+            
+            columns=dict(zip(origin,order))
+
+            self.df=self.df.rename(columns=columns)
+              
+            self.df = self.df[order]
+            
+            
+            if not os.path.exists(self.date):
+                os.makedirs(self.date)      
+
+            filename=os.getcwd()+'\\'+self.date+'\\'+self.date+'-'+self.city[0]+'-'+self.city[1]+'.csv'
+
+            self.df.to_csv(filename,encoding='GB18030',index=False)
+            
+            print('\n数据爬取完成',filename) 
+        except Exception as e:
+            print('合并数据失败',e)
+
+
+    def demain(self,citys,citycode):
+        self.citycode=citycode
+        #设置出发日期
+        self.date=dt.now()+timedelta(days=7)
+        self.date=self.date.strftime('%Y-%m-%d')
+        
+        for city in citys:
+            self.city=city
+            
+            if citys.index(city)==0:
+                #第一次运行
+                self.getpage()
+            else:
+                #后续运行只需更换出发与目的地
+                self.changecity()
+        
+        #运行结束退出
+        self.driver.quit()
+
+
+
+if __name__ == '__main__':
+    citys=[]
+    cityname,citycode=getcitycode()
+    city=['上海','广州','深圳','北京']
+    ytic=list(reversed(city))
+    for m in city:
+        for n in ytic:
+            if m==n:
+                continue
+            else:
+                citys.append([m,n])
+    fly = FLIGHT()
+    fly.demain(citys,citycode)
+    print('\n程序运行完成！！！！')    
+    
--- a/version/ctrip_flights_scraper_V2.py
+++ b/version/ctrip_flights_scraper_V2.py
@ -0,0 +1,397 @@
+import io
+import os
+import gzip
+import time
+import json
+import threading
+import pandas as pd
+from seleniumwire import webdriver
+from datetime import datetime as dt,timedelta
+from selenium.webdriver.common.by import By
+from selenium.webdriver.common.keys import Keys
+from selenium.common.exceptions import TimeoutException,StaleElementReferenceException,ElementNotInteractableException,ElementClickInterceptedException # 加载异常
+
+
+
+class FLIGHT(object):
+    def __init__(self):
+        self.chromeDriverPath = 'C:/Program Files/Google/Chrome/Application/chromedriver' #chromedriver位置
+        self.options = webdriver.ChromeOptions() # 创建一个配置对象
+        self.options.add_argument('--incognito')  # 隐身模式（无痕模式）
+        self.options.add_argument("--disable-blink-features")
+        self.options.add_argument("--disable-blink-features=AutomationControlled")
+        self.options.add_experimental_option("excludeSwitches", ['enable-automation'])# 不显示正在受自动化软件控制
+        self.driver = webdriver.Chrome(executable_path=self.chromeDriverPath,chrome_options=self.options)
+        self.driver.set_page_load_timeout(300)#设置加载超时阈值
+        self.driver.maximize_window()
+        self.err=0#错误重试次数
+        #前往首页
+        self.driver.get('https://flights.ctrip.com/online/channel/domestic')
+
+          
+    
+    def getpage(self): 
+        try:
+            self.driver.find_element(By.CLASS_NAME,'pc_home-jipiao').click()#点击飞机图标，返回主界面
+            self.driver.implicitly_wait(5) # seconds
+            self.driver.find_elements(By.CLASS_NAME,'radio-label')[0].click()#单程
+            
+            while self.driver.find_elements(By.CSS_SELECTOR,"[aria-label=请选择日期]")[0].get_attribute("value") != self.date:
+                
+                self.driver.find_element(By.CLASS_NAME,'modifyDate.depart-date').click()#点击日期选择
+                
+                for m in self.driver.find_elements(By.CLASS_NAME,'date-picker.date-picker-block'):
+                    
+                    if int(m.find_element(By.CLASS_NAME,'month').text[:-1]) != int(self.date[5:7]):
+                        continue
+                    
+                    for d in m.find_elements(By.CLASS_NAME,'date-d'):
+                        if int(d.text) == int(self.date[-2:]):
+                            d.click()
+                            break                    
+            
+            self.driver.find_element(By.CLASS_NAME,'search-btn').click()#搜索        
+            
+        except:
+            print('页面连接失败')
+            self.driver.close()
+            self.getpage()
+        else:
+            try:
+                ##############判断是否存在验证码
+                self.driver.find_element(By.ID,"verification-code")
+                print('等待2小时后重试')
+                time.sleep(7200)
+                self.getpage()
+            except:
+                ##############不存在验证码，执行下一步
+                self.changecity()
+
+    def remove_btn(self):
+        try:
+            js_remove="$('.notice-box').remove();"
+            self.driver.execute_script(js_remove)
+        except Exception as e:
+            print('防疫移除失败',e)
+
+    
+    def changecity(self):
+        
+        #移除防疫提醒
+        self.remove_btn()
+        
+        try:
+            #获取出发地与目的地元素位置
+            its=self.driver.find_elements(By.CLASS_NAME,'form-input-v3')
+            
+            #若出发地与目标值不符，则更改出发地
+            while self.city[0] not in its[0].get_attribute('value'):    
+                its[0].click()
+                time.sleep(0.5)
+                its[0].send_keys(Keys.CONTROL + 'a')
+                time.sleep(0.5)
+                its[0].send_keys(self.city[0])
+
+            time.sleep(0.5)
+
+            #若目的地与目标值不符，则更改目的地
+            while self.city[1] not in its[1].get_attribute('value'):
+                its[1].click()
+                time.sleep(0.5)
+                its[1].send_keys(Keys.CONTROL + 'a')
+                time.sleep(0.5)
+                its[1].send_keys(self.city[1])
+            
+            time.sleep(0.5)
+            try:
+                #通过低价提醒按钮实现enter键换页
+                self.driver.implicitly_wait(5) # seconds
+                self.driver.find_elements(By.CLASS_NAME,'low-price-remind')[0].click()
+            except IndexError as e:
+                print('\n更换城市错误 找不到元素',e)
+                #以防万一
+                its[1].send_keys(Keys.ENTER)
+            
+            print('\n更换城市成功',self.city[0]+'-'+self.city[1])
+        #捕获错误
+        except (IndexError,ElementNotInteractableException,StaleElementReferenceException,ElementClickInterceptedException,ElementClickInterceptedException) as e:
+            print('\n更换城市错误 元素错误',e)
+            self.err+=1
+            if self.err<=5:
+                self.changecity()
+            else:
+                self.err=0
+                del self.driver.requests
+                self.getpage()
+        except Exception as e:
+            print('\n更换城市错误',e)
+            #删除本次请求
+            del self.driver.requests
+            #从头开始重新执行程序
+            self.getpage()
+        else:
+            #若无错误，执行下一步
+            self.err=0
+            self.getdata()           
+            
+            
+    
+    def getdata(self):
+        try:
+            #等待响应加载完成
+            self.predata = self.driver.wait_for_request('/international/search/api/search/batchSearch?.*', timeout=30)
+        
+            rb=dict(json.loads(self.predata.body).get('flightSegments')[0])
+        
+        except TimeoutException as e:
+            print('\获取数据错误',e)
+            #删除本次请求
+            del self.driver.requests
+            #从头开始重新执行程序
+            self.getpage()
+        else:
+            #检查数据获取正确性
+            if rb['departureCityName'] == self.city[0] and rb['arrivalCityName'] == self.city[1]:
+                print('城市获取正确')
+                #删除本次请求
+                del self.driver.requests
+                #若无错误，执行下一步
+                self.decode_data()
+            else:
+                #删除本次请求
+                del self.driver.requests
+                #重新更换城市
+                self.changecity()
+    
+    
+    
+    def decode_data(self):
+        try:
+            buf = io.BytesIO(self.predata.response.body)
+            gf = gzip.GzipFile(fileobj = buf)
+            self.dedata = gf.read().decode('UTF-8')
+            self.dedata=json.loads(self.dedata)
+        except:
+            print('重新获取数据')
+            self.getpage()
+        else:
+            #若无错误，执行下一步
+            self.check_data()
+            
+        
+        
+    def check_data(self):
+        try:
+            self.flightItineraryList=self.dedata['data']['flightItineraryList']
+            #倒序遍历,删除转机航班
+            for i in range(len(self.flightItineraryList)-1, -1, -1):
+                if self.flightItineraryList[i]['flightSegments'][0]['transferCount'] !=0:
+                    self.flightItineraryList.pop(i)
+            if len(self.flightItineraryList):
+                #存在直航航班，执行下一步
+                self.muti_process()
+            else:
+                print('不存在直航航班')
+                return 0
+        except:
+            print('不存在直航航班')
+            return 0        
+                      
+    
+    def muti_process(self):
+        processes = []
+
+        self.flights = pd.DataFrame()
+        self.prices = pd.DataFrame()
+        #处理航班信息
+        processes.append(threading.Thread(target=self.proc_flightSegments))
+        #处理票价信息
+        processes.append(threading.Thread(target=self.proc_priceList))
+
+        for pro in processes:
+            pro.start()
+        for pro in processes:
+            pro.join()
+        
+        #若无错误，执行下一步
+        self.mergedata()
+    
+    def proc_flightSegments(self):
+        for flightlist in self.flightItineraryList:
+            flightlist=flightlist['flightSegments'][0]['flightList']
+            flightUnitList=dict(flightlist[0])
+
+            
+            departureday=flightUnitList['departureDateTime'].split(' ')[0]
+            departuretime=flightUnitList['departureDateTime'].split(' ')[1]
+            
+            arrivalday=flightUnitList['arrivalDateTime'].split(' ')[0]
+            arrivaltime=flightUnitList['arrivalDateTime'].split(' ')[1]            
+            
+            #删除一些不重要的信息
+            dellist=['sequenceNo', 'marketAirlineCode',
+             'departureProvinceId','departureCityId','departureCityCode','departureAirportShortName','departureTerminal',
+             'arrivalProvinceId','arrivalCityId','arrivalCityCode','arrivalAirportShortName','arrivalTerminal',
+             'transferDuration','stopList','leakedVisaTagSwitch','trafficType','highLightPlaneNo','mealType',
+             'operateAirlineCode','arrivalDateTime','departureDateTime','operateFlightNo','operateAirlineName']
+            for value in dellist:
+                try:
+                    flightUnitList.pop(value)
+                except:
+                    continue
+            
+            #更新日期格式
+            flightUnitList.update({'departureday': departureday, 'departuretime': departuretime,
+                                   'arrivalday': arrivalday, 'arrivaltime': arrivaltime}) 
+            
+            
+            self.flights=pd.concat([self.flights,pd.DataFrame(flightUnitList,index=[0])],ignore_index=True)
+
+                          
+            
+    def proc_priceList(self):
+        for flightlist in self.flightItineraryList:
+            flightNo=flightlist['itineraryId'].split('_')[0]
+            priceList=flightlist['priceList']
+            
+            #经济舱，经济舱折扣
+            economy,economy_discount=[],[]
+            #商务舱，商务舱折扣
+            bussiness,bussiness_discount=[],[]
+            
+            for price in priceList:
+                adultPrice=price['adultPrice']
+                cabin=price['cabin']
+                priceUnitList=dict(price['priceUnitList'][0]['flightSeatList'][0])
+                discountRate=priceUnitList['discountRate']
+                #经济舱
+                if cabin=='Y':
+                    economy.append(adultPrice)
+                    economy_discount.append(discountRate)
+                 #商务舱
+                elif cabin=='C':
+                    bussiness.append(adultPrice)
+                    bussiness_discount.append(discountRate)
+            
+            if economy !=[]:
+                try:
+                    economy_origin=economy[economy_discount.index(1)]
+                except:
+                    economy_origin=int(max(economy)/max(economy_discount))
+            
+                if min(economy_discount) !=1:
+                    economy_low=min(economy)
+                    economy_cut=min(economy_discount)
+                else:
+                    economy_low=''
+                    economy_cut=''
+                
+            else:
+                economy_origin=''
+                economy_low=''
+                economy_cut=''
+            
+
+            if bussiness !=[]: 
+                try:
+                    bussiness_origin=bussiness[bussiness_discount.index(1)]
+                except:
+                    bussiness_origin=int(max(bussiness)/max(bussiness_discount))
+            
+                if min(bussiness_discount) !=1:
+                    bussiness_low=min(bussiness)
+                    bussiness_cut=min(bussiness_discount)
+                else:
+                    bussiness_low=''
+                    bussiness_cut=''
+                
+            else:
+                bussiness_origin=''
+                bussiness_low=''
+                bussiness_cut=''        
+        
+            price_info={'flightNo':flightNo,
+                    'economy_origin':economy_origin,'economy_low':economy_low,'economy_cut':economy_cut,
+                    'bussiness_origin':bussiness_origin,'bussiness_low':bussiness_low,'bussiness_cut':bussiness_cut}
+
+            #self.prices=self.prices.append(price_info,ignore_index=True)
+            self.prices=pd.concat([self.prices,pd.DataFrame(price_info,index=[0])],ignore_index=True)
+        
+   
+   
+    def mergedata(self):
+        try:
+            self.df = self.flights.merge(self.prices,on=['flightNo'])
+            
+            self.df['数据获取日期']=dt.now().strftime('%Y-%m-%d')
+            
+            #对pandas的columns进行重命名
+            order=['数据获取日期','航班号','航空公司',
+                   '出发日期','出发时间','到达日期','到达时间','飞行时长','出发国家','出发城市','出发机场','出发机场三字码',
+                   '到达国家','到达城市','到达机场','到达机场三字码','飞机型号','飞机尺寸','飞机型号三字码',
+                   '经济舱原价','经济舱最低价','经济舱折扣','商务舱原价','商务舱最低价','商务舱折扣',
+                   '到达准点率','停留次数']
+            
+            origin=['数据获取日期','flightNo','marketAirlineName',
+                    'departureday','departuretime','arrivalday','arrivaltime','duration',
+                    'departureCountryName','departureCityName','departureAirportName','departureAirportCode',
+                    'arrivalCountryName','arrivalCityName','arrivalAirportName','arrivalAirportCode',
+                    'aircraftName','aircraftSize','aircraftCode',
+                    'economy_origin','economy_low','economy_cut',
+                    'bussiness_origin','bussiness_low','bussiness_cut',
+                    'arrivalPunctuality','stopCount']
+            
+            columns=dict(zip(origin,order))
+
+            self.df=self.df.rename(columns=columns)
+              
+            self.df = self.df[order]
+            
+            
+            if not os.path.exists(self.date):
+                os.makedirs(self.date)      
+
+            filename=os.getcwd()+'\\'+self.date+'\\'+self.date+'-'+self.city[0]+'-'+self.city[1]+'.csv'
+
+            self.df.to_csv(filename,encoding='GB18030',index=False)
+            
+            print('\n数据爬取完成',filename) 
+        except Exception as e:
+            print('合并数据失败',e)
+
+
+    def demain(self,citys):
+        #设置出发日期
+        self.date=dt.now()+timedelta(days=1)
+        self.date=self.date.strftime('%Y-%m-%d')
+        
+        for city in citys:
+            self.city=city
+            
+            if citys.index(city)==0:
+                #第一次运行
+                self.getpage()
+            else:
+                #后续运行只需更换出发与目的地
+                self.changecity()
+        
+        #运行结束退出
+        self.driver.quit()
+
+
+
+if __name__ == '__main__':
+    citys=[]
+    city=['上海','广州','深圳','北京']
+    #形成城市对
+    ytic=list(reversed(city))
+    for m in city:
+        for n in ytic:
+            if m==n:
+                continue
+            else:
+                citys.append([m,n])
+    fly = FLIGHT()
+    fly.demain(citys)
+    print('\n程序运行完成！！！！')    
+    
--- a/version/基于request的航班历史票价爬取.py
+++ b/version/基于request的航班历史票价爬取.py
@ -0,0 +1,143 @@
+import requests
+import datetime
+import re
+import demjson
+import time
+import pandas as pd
+
+def create_assist_date(datestart = None,dateend = None):
+	# 创建日期辅助表
+	if datestart is None:
+		datestart = '2020-01-01'
+	if dateend is None:
+		dateend = (datetime.datetime.now()+datetime.timedelta(days=-1)).strftime('%Y-%m-%d')
+
+	# 转为日期格式
+	datestart=datetime.datetime.strptime(datestart,'%Y-%m-%d')
+	dateend=datetime.datetime.strptime(dateend,'%Y-%m-%d')
+	date_list = []
+	date_list.append(datestart.strftime('%Y-%m-%d'))
+	while datestart<dateend:
+		# 日期叠加一天
+	    datestart+=datetime.timedelta(days=+1)
+	    # 日期转字符串存入列表
+	    date_list.append(datestart.strftime('%Y-%m-%d'))
+	return date_list
+
+def getdata(citys,dateseries):
+    url='https://www.lsjpjg.com/getthis.php'
+    
+    headers={
+        'Accept': 'application/json, text/javascript, */*; q=0.01',
+        'Accept-Encoding': 'gzip, deflate, br',
+        'Accept-Language': 'zh-CN,zh;q=0.9',
+        'Host': 'www.lsjpjg.com',
+        'Origin': 'https://www.lsjpjg.com',
+        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4647.116 Safari/537.36',
+        'X-Requested-With': 'XMLHttpRequest'
+        }
+    
+    for city in citys:
+        df=pd.DataFrame()
+        err=0
+    
+        for date in dateseries:
+        
+            data={'dep_dt': date,'dep_ct': city[0],'arr_ct': city[1]}
+            res=requests.post(url, headers=headers,data=data)
+            #判断航线是否一直不存在
+            if res.text=='\ufeff[]' :
+                print(city,'无航班',date)
+                err+=1
+                #数量超过阈值则中断该航线
+                if err>30:
+                    break
+                continue
+            else:
+                err-=1
+                print(city,date)
+        
+            res.encoding=res.apparent_encoding
+            NewResponse = re.sub(r"/","",res.text)
+            try:
+                r=NewResponse.encode('utf-8')
+                j=demjson.decode(r)
+            except:
+                continue
+            temp=pd.DataFrame(j)
+            try:
+                temp.drop('icon',axis=1,inplace=True)
+                temp['出发日期']=date
+            except:
+                continue
+            df=pd.concat([df,temp])
+            time.sleep(0.5)
+        
+        filename=city[0]+'-'+city[1]
+        #处理原始数据
+        proc_data(filename,df,interval=8)
+    
+
+def proc_data(filename,df,interval=8):
+    #保存原始数据至本地
+    df.to_csv(filename+'.csv',encoding='GB18030')
+    df['全票价']=0
+    df['日期差']=None
+    
+    for i in df.index:
+        try:
+            if not '经济' in df['discount'][i]:
+                df.drop(index=i,inplace=True)
+            elif '折' in df['discount'][i]:
+                #判断出发日期与查询日期之间的间隔是否大于阈值
+                delta=datetime.datetime.strptime(df['出发日期'][i],'%Y-%m-%d')-datetime.datetime.strptime(df['qry_dt'][i],'%Y-%m-%d')
+                if delta.days >interval:
+                    df.drop(index=i,inplace=True)
+                    continue
+                else:
+                    df.loc[i,'日期差']=delta.days
+                #通过折扣率计算全票价
+                discount=float(re.findall('\d+\.?\d*',df['discount'][i])[0])
+                full_price=df['price'][i]/discount*10
+                df.loc[i,'全票价']=full_price
+            
+            elif ('全价'or'经典') in df['discount'][i]:
+                #判断出发日期与查询日期之间的间隔是否大于阈值
+                delta=datetime.datetime.strptime(df['出发日期'][i],'%Y-%m-%d')-datetime.datetime.strptime(df['qry_dt'][i],'%Y-%m-%d')
+                if delta.days >interval:
+                    df.drop(index=i,inplace=True)
+                    continue
+                else:
+                    df.loc[i,'日期差']=delta.days
+                 #全票价
+                full_price=df['price'][i]
+                df.loc[i,'全票价']=full_price  
+        except:
+            df.drop(index=i,inplace=True)
+    
+    avg_full_price=df[df['全票价']!=0].groupby(['出发日期'])[['全票价']].mean()
+    avg_price=df[df['全票价']!=df['price']].groupby(['出发日期'])[['price']].mean()
+    result=pd.concat([avg_price,avg_full_price],axis=1)
+    
+    result['折扣']=result['price']/result['全票价']
+    
+    #将处理后的数据保存至本地
+    result.to_csv(result+'-'+filename+'.csv',encoding='GB18030')
+    
+ 
+    
+if __name__ == '__main__': 
+    citys=[]
+    #设置开始与结束日期
+    dateseries=create_assist_date(datestart = None,dateend = None)
+    
+    city=['上海','广州','深圳','北京']
+    ytic=list(reversed(city))
+    for m in city:
+        for n in ytic:
+            if m==n:
+                continue
+            else:
+                citys.append([m,n])
+    
+    getdata(citys,dateseries)
--- a/DataMaintenance/sonar-project.properties
+++ b/DataMaintenance/sonar-project.properties
@ -0,0 +1,17 @@
+# must be unique in a given SonarQube instance
+sonar.projectKey=clawer
+
+# --- optional properties ---
+
+# defaults to project key
+sonar.projectName=clawer
+# defaults to 'not provided'
+#sonar.projectVersion=1.0
+ 
+# Path is relative to the sonar-project.properties file. Defaults to .
+#sonar.sources=src,WebContent
+ 
+# Encoding of the source code. Default is default system encoding
+sonar.sourceEncoding=UTF-8
+
+#sonar.java.binaries=target/classes/javabean,target/classes/servlet
--- a/DataMaintenance/stealth.min.js
+++ b/DataMaintenance/stealth.min.js
--- a/DataMaintenance/xlsx_output/南宁-天津.xlsx
+++ b/DataMaintenance/xlsx_output/南宁-天津.xlsx
--- a/DataMaintenance/xlsx_output/天津-南宁.xlsx
+++ b/DataMaintenance/xlsx_output/天津-南宁.xlsx
--- a/DataMaintenance/xlsx_output/天津-泉州.xlsx
+++ b/DataMaintenance/xlsx_output/天津-泉州.xlsx
--- a/DataMaintenance/xlsx_output/天津-贵阳.xlsx
+++ b/DataMaintenance/xlsx_output/天津-贵阳.xlsx
--- a/DataMaintenance/xlsx_output/天津-郑州.xlsx
+++ b/DataMaintenance/xlsx_output/天津-郑州.xlsx
--- a/DataMaintenance/xlsx_output/天津-长春.xlsx
+++ b/DataMaintenance/xlsx_output/天津-长春.xlsx
--- a/DataMaintenance/xlsx_output/泉州-天津.xlsx
+++ b/DataMaintenance/xlsx_output/泉州-天津.xlsx
--- a/DataMaintenance/xlsx_output/贵阳-天津.xlsx
+++ b/DataMaintenance/xlsx_output/贵阳-天津.xlsx
--- a/DataMaintenance/xlsx_output/长春-天津.xlsx
+++ b/DataMaintenance/xlsx_output/长春-天津.xlsx