diff --git a/DataMaintenance/.scannerwork/.sonar_lock b/DataMaintenance/.scannerwork/.sonar_lock new file mode 100644 index 0000000..e69de29 diff --git a/DataMaintenance/.scannerwork/report-task.txt b/DataMaintenance/.scannerwork/report-task.txt new file mode 100644 index 0000000..149eba8 --- /dev/null +++ b/DataMaintenance/.scannerwork/report-task.txt @@ -0,0 +1,6 @@ +projectKey=clawer +serverUrl=http://localhost:9000 +serverVersion=7.8.0.26217 +dashboardUrl=http://localhost:9000/dashboard?id=clawer +ceTaskId=AZMv5JVBnAUFl5pPDUTm +ceTaskUrl=http://localhost:9000/api/ce/task?id=AZMv5JVBnAUFl5pPDUTm diff --git a/DataMaintenance/LICENSE b/DataMaintenance/LICENSE new file mode 100644 index 0000000..37c472d --- /dev/null +++ b/DataMaintenance/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Suysker + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/DataMaintenance/Linux version/ctrip_flights_scraper_V3.5.py b/DataMaintenance/Linux version/ctrip_flights_scraper_V3.5.py new file mode 100644 index 0000000..966e342 --- /dev/null +++ b/DataMaintenance/Linux version/ctrip_flights_scraper_V3.5.py @@ -0,0 +1,1173 @@ +import gen_proxy_servers +import magic +import io +import os +import gzip +import time +import json +import pandas as pd +from seleniumwire import webdriver +from datetime import datetime as dt, timedelta +from selenium.webdriver.common.by import By +from selenium.webdriver.common.keys import Keys +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait + +# 爬取的城市 +crawl_citys = ["上海", "香港", "东京"] + +# 爬取日期范围:起始日期。格式'2023-12-01' +begin_date = None + +# 爬取日期范围:结束日期。格式'2023-12-31' +end_date = None + +# 爬取T+N,即N天后 +start_interval = 1 + +# 爬取的日期 +crawl_days = 60 + +# 设置各城市爬取的时间间隔(单位:秒) +crawl_interval = 1 + +# 日期间隔 +days_interval = 1 + +# 设置页面加载的最长等待时间(单位:秒) +max_wait_time = 10 + +# 最大错误重试次数 +max_retry_time = 5 + +# 是否只抓取直飞信息(True: 只抓取直飞,False: 抓取所有航班) +direct_flight = True + +# 是否删除不重要的信息 +del_info = False + +# 是否重命名DataFrame的列名 +rename_col = True + +# 开启代理 +enable_proxy = True + +# 生成代理IPV6数量 +ipv6_count = 120 + +# 生成的IPV6接口前缀名称 +base_interface = "eth0" + +# 调试截图 +enable_screenshot = False + +# 允许登录(可能必须要登录才能获取数据) +login_allowed = True + +# 账号 +accounts = ['',''] + +# 密码 +passwords = ['',''] + +#切换IP模式 +ip_mode ='normal' + +# 运行完毕后删除生成的网口 +delete_interface = False + +def kill_driver(): + os.system( + """ps -ef | grep selenium | grep -v grep | awk '{print "kill -9" $2}'| sh""" + ) + os.system( + """ps -ef | grep chromium | grep -v grep | awk '{print "kill -9" $2}'| sh""" + ) + os.system( + """ps -ef | grep chromedriver | grep -v grep | awk '{print "kill -9" $2}'| sh""" + ) + +def init_driver(): + options = webdriver.ChromeOptions() # 创建一个配置对象 + options.add_argument("--incognito") # 隐身模式(无痕模式) + options.add_argument("--headless") # 启用无头模式 + options.add_argument("--no-sandbox") + options.add_argument("--disable-dev-shm-usage") + options.add_argument("--disable-blink-features=AutomationControlled") + options.add_argument("--disable-extensions") + options.add_argument("--pageLoadStrategy=eager") + options.add_argument("--disable-gpu") + options.add_argument("--disable-software-rasterizer") + options.add_argument("--disable-dev-shm-usage") + options.add_argument("--ignore-certificate-errors") + options.add_argument("--ignore-certificate-errors-spki-list") + options.add_argument("--ignore-ssl-errors") + prefs = {"profile.managed_default_content_settings.images": 2} + options.add_experimental_option("prefs", prefs) + options.add_experimental_option( + "excludeSwitches", ["enable-automation"] + ) # 不显示正在受自动化软件控制的提示 + # options.page_load_strategy = 'eager' # DOMContentLoaded事件触发即可 + # options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Edg/116.0.1938.69") + driver = webdriver.Chrome(options=options) + driver.set_page_load_timeout(max_wait_time*max_retry_time) # 设置加载超时阈值 + # driver.maximize_window() + driver.set_window_size(1280, 480) + + return driver + + +def gen_citys(crawl_citys): + # 生成城市组合列表 + citys = [] + ytic = list(reversed(crawl_citys)) + for m in crawl_citys: + for n in ytic: + if m == n: + continue + else: + citys.append([m, n]) + return citys + + +def generate_flight_dates(n, begin_date, end_date, start_interval, days_interval): + flight_dates = [] + + if begin_date: + begin_date = dt.strptime(begin_date, "%Y-%m-%d") + elif start_interval: + begin_date = dt.now() + timedelta(days=start_interval) + + for i in range(0, n, days_interval): + flight_date = begin_date + timedelta(days=i) + + flight_dates.append(flight_date.strftime("%Y-%m-%d")) + + # 如果有结束日期,确保生成的日期不超过结束日期 + if end_date: + end_date = dt.strptime(end_date, "%Y-%m-%d") + flight_dates = [date for date in flight_dates if dt.strptime(date, "%Y-%m-%d") <= end_date] + # 继续生成日期直到达到或超过结束日期 + while dt.strptime(flight_dates[-1], "%Y-%m-%d") < end_date: + next_date = dt.strptime(flight_dates[-1], "%Y-%m-%d") + timedelta(days=days_interval) + if next_date <= end_date: + flight_dates.append(next_date.strftime("%Y-%m-%d")) + else: + break + + return flight_dates + + +# element_to_be_clickable 函数来替代 expected_conditions.element_to_be_clickable 或 expected_conditions.visibility_of_element_located +def element_to_be_clickable(element): + def check_clickable(driver): + try: + if element.is_enabled() and element.is_displayed(): + return element # 当条件满足时,返回元素本身 + else: + return False + except: + return False + + return check_clickable + + +class DataFetcher(object): + def __init__(self, driver): + self.driver = driver + self.date = None + self.city = None + self.err = 0 # 错误重试次数 + self.switch_acc = 0 #切换账户 + + def refresh_driver(self): + try: + self.driver.refresh() + except Exception as e: + # 错误次数+1 + self.err += 1 + + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} refresh_driver:刷新页面失败,错误类型:{type(e).__name__}, 详细错误信息:{str(e).split("Stacktrace:")[0]}' + ) + + # 保存错误截图 + if enable_screenshot: + self.driver.save_screenshot( + f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png' + ) + if self.err < max_retry_time: + # 刷新页面 + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} refresh_driver:刷新页面') + self.refresh_driver() + + # 判断错误次数 + if self.err >= max_retry_time: + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,refresh_driver:不继续重试' + ) + + def remove_btn(self): + try: + #WebDriverWait(self.driver, max_wait_time).until(lambda d: d.execute_script('return typeof jQuery !== "undefined"')) + # 移除提醒 + self.driver.execute_script("document.querySelectorAll('.notice-box').forEach(element => element.remove());") + # 移除在线客服 + self.driver.execute_script("document.querySelectorAll('.shortcut, .shortcut-link').forEach(element => element.remove());") + # 移除分享链接 + self.driver.execute_script("document.querySelectorAll('.shareline').forEach(element => element.remove());") + ''' + # 使用JavaScript删除所有的
标签 + self.driver.execute_script(""" + var elements = document.getElementsByTagName('dl'); + while(elements.length > 0){ + elements[0].parentNode.removeChild(elements[0]); + } + """) + ''' + except Exception as e: + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} remove_btn:提醒移除失败,错误类型:{type(e).__name__}, 详细错误信息:{str(e).split("Stacktrace:")[0]}' + ) + + def check_verification_code(self): + try: + # 检查是否有验证码元素,如果有,则需要人工处理 + if (len(self.driver.find_elements(By.ID, "verification-code"))+len(self.driver.find_elements(By.CLASS_NAME, "alert-title"))): + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} check_verification_code:验证码被触发verification-code/alert-title,等待{crawl_interval*100}后重试。' + ) + self.driver.quit() + time.sleep(crawl_interval*100) + self.driver = init_driver() + self.err = 0 + # 更换IPV6地址 + if enable_proxy: + gen_proxy_servers.switch_proxy_server(ip_mode) + self.switch_acc += 1 + self.get_page(1) + return False + else: + # 移除注意事项 + self.remove_btn() + # 如果没有找到验证码元素,则说明页面加载成功,没有触发验证码 + return True + except Exception as e: + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} check_verification_code:未知错误,错误类型:{type(e).__name__}, 详细错误信息:{str(e).split("Stacktrace:")[0]}' + ) + + def login(self): + if login_allowed: + + account = accounts[self.switch_acc % len(accounts)] + password = passwords[self.switch_acc % len(passwords)] + + try: + if len(self.driver.find_elements(By.CLASS_NAME, "lg_loginbox_modal")) == 0: + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login:未弹出登录界面') + WebDriverWait(self.driver, max_wait_time).until(EC.presence_of_element_located((By.CLASS_NAME, "tl_nfes_home_header_login_wrapper_siwkn"))) + # 点击飞机图标,返回主界面 + ele = WebDriverWait(self.driver, max_wait_time).until(element_to_be_clickable(self.driver.find_element(By.CLASS_NAME, "tl_nfes_home_header_login_wrapper_siwkn"))) + ele.click() + #等待页面加载 + WebDriverWait(self.driver, max_wait_time).until(EC.presence_of_element_located((By.CLASS_NAME, "lg_loginwrap"))) + else: + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login:已经弹出登录界面') + + ele = WebDriverWait(self.driver, max_wait_time).until(element_to_be_clickable(self.driver.find_elements(By.CLASS_NAME, "r_input.bbz-js-iconable-input")[0])) + ele.send_keys(account) + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login:输入账户成功') + + ele = WebDriverWait(self.driver, max_wait_time).until(element_to_be_clickable(self.driver.find_element(By.CSS_SELECTOR, "div[data-testid='accountPanel'] input[data-testid='passwordInput']"))) + ele.send_keys(password) + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login:输入密码成功') + + ele = WebDriverWait(self.driver, max_wait_time).until(element_to_be_clickable(self.driver.find_element(By.CSS_SELECTOR, '[for="checkboxAgreementInput"]'))) + ele.click() + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login:勾选同意成功') + + ele = WebDriverWait(self.driver, max_wait_time).until(element_to_be_clickable(self.driver.find_elements(By.CLASS_NAME, "form_btn.form_btn--block")[0])) + ele.click() + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login:登录成功' + ) + # 保存登录截图 + if enable_screenshot: + self.driver.save_screenshot( + f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png' + ) + time.sleep(crawl_interval*3) + + self.driver.refresh() + except Exception as e: + # 错误次数+1 + self.err += 1 + # 用f字符串格式化错误类型和错误信息,提供更多的调试信息 + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login:页面加载或元素操作失败,错误类型:{type(e).__name__}, 详细错误信息:{str(e).split("Stacktrace:")[0]}' + ) + + # 保存错误截图 + if enable_screenshot: + self.driver.save_screenshot( + f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png' + ) + + if self.err < max_retry_time: + # 刷新页面 + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login:刷新页面') + self.refresh_driver() + # 检查注意事项和验证码 + if self.check_verification_code(): + # 重试 + self.login() + # 判断错误次数 + if self.err >= max_retry_time: + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,login:重新尝试加载页面,这次指定需要重定向到首页' + ) + + + def get_page(self, reset_to_homepage=0): + next_stage_flag = False + try: + if reset_to_homepage == 1: + # 前往首页 + self.driver.get( + "https://flights.ctrip.com/online/channel/domestic") + + # 检查注意事项和验证码 + if self.check_verification_code(): + WebDriverWait(self.driver, max_wait_time).until( + EC.presence_of_element_located( + (By.CLASS_NAME, "pc_home-jipiao")) + ) + # 点击飞机图标,返回主界面 + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_element( + By.CLASS_NAME, "pc_home-jipiao") + ) + ) + ele.click() + + # 单程 + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "radio-label")[0] + ) + ) + ele.click() + + # 搜索 + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_element(By.CLASS_NAME, "search-btn") + ) + ) + ele.click() + + next_stage_flag = True + except Exception as e: + # 用f字符串格式化错误类型和错误信息,提供更多的调试信息 + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} get_page:页面加载或元素操作失败,错误类型:{type(e).__name__}, 详细错误信息:{str(e).split("Stacktrace:")[0]}' + ) + + # 保存错误截图 + if enable_screenshot: + self.driver.save_screenshot( + f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png' + ) + + # 重新尝试加载页面,这次指定需要重定向到首页 + self.get_page(1) + else: + if next_stage_flag: + # 继续下一步 + self.change_city() + + def change_city(self): + next_stage_flag = False + try: + # 等待页面完成加载 + WebDriverWait(self.driver, max_wait_time).until( + EC.presence_of_element_located( + (By.CLASS_NAME, "form-input-v3")) + ) + + # 检查注意事项和验证码 + if self.check_verification_code(): + # 若出发地与目标值不符,则更改出发地 + while self.city[0] not in self.driver.find_elements( + By.CLASS_NAME, "form-input-v3" + )[0].get_attribute("value"): + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "form-input-v3")[0] + ) + ) + ele.click() + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "form-input-v3")[0] + ) + ) + ele.send_keys(Keys.CONTROL + "a") + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "form-input-v3")[0] + ) + ) + ele.send_keys(self.city[0]) + + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city:更换城市【0】-{self.driver.find_elements(By.CLASS_NAME,"form-input-v3")[0].get_attribute("value")}' + ) + + # 若目的地与目标值不符,则更改目的地 + while self.city[1] not in self.driver.find_elements( + By.CLASS_NAME, "form-input-v3" + )[1].get_attribute("value"): + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "form-input-v3")[1] + ) + ) + ele.click() + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "form-input-v3")[1] + ) + ) + ele.send_keys(Keys.CONTROL + "a") + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "form-input-v3")[1] + ) + ) + ele.send_keys(self.city[1]) + + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city:更换城市【1】-{self.driver.find_elements(By.CLASS_NAME,"form-input-v3")[1].get_attribute("value")}' + ) + + while ( + self.driver.find_elements(By.CSS_SELECTOR, "[aria-label=请选择日期]")[ + 0 + ].get_attribute("value") + != self.date + ): + # 点击日期选择 + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_element( + By.CLASS_NAME, "modifyDate.depart-date" + ) + ) + ) + ele.click() + + if int( + self.driver.find_elements( + By.CLASS_NAME, "date-picker.date-picker-block" + )[1] + .find_element(By.CLASS_NAME, "year") + .text[:-1] + ) < int(self.date[:4]): + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, + "in-date-picker.icon.next-ico.iconf-right", + )[1] + ) + ) + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city:更换日期{int(self.driver.find_elements(By.CLASS_NAME, "date-picker.date-picker-block")[1].find_element(By.CLASS_NAME, "year").text[:-1])}小于 {int(self.date[:4])} 向右点击' + ) + ele.click() + + if int( + self.driver.find_elements( + By.CLASS_NAME, "date-picker.date-picker-block" + )[0] + .find_element(By.CLASS_NAME, "year") + .text[:-1] + ) > int(self.date[:4]): + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, + "in-date-picker.icon.prev-ico.iconf-left", + )[0] + ) + ) + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city:更换日期{int(self.driver.find_elements(By.CLASS_NAME, "date-picker.date-picker-block")[0].find_element(By.CLASS_NAME, "year").text[:-1])}大于 {int(self.date[:4])} 向左点击' + ) + ele.click() + + if int( + self.driver.find_elements( + By.CLASS_NAME, "date-picker.date-picker-block" + )[0] + .find_element(By.CLASS_NAME, "year") + .text[:-1] + ) == int(self.date[:4]): + if int( + self.driver.find_elements( + By.CLASS_NAME, "date-picker.date-picker-block" + )[0] + .find_element(By.CLASS_NAME, "month") + .text[:-1] + ) > int(self.date[5:7]): + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, + "in-date-picker.icon.prev-ico.iconf-left", + )[0] + ) + ) + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city:更换日期{int(self.driver.find_elements(By.CLASS_NAME, "date-picker.date-picker-block")[0].find_element(By.CLASS_NAME, "month").text[:-1])}大于 {int(self.date[5:7])} 向左点击' + ) + ele.click() + + if int( + self.driver.find_elements( + By.CLASS_NAME, "date-picker.date-picker-block" + )[1] + .find_element(By.CLASS_NAME, "year") + .text[:-1] + ) == int(self.date[:4]): + if int( + self.driver.find_elements( + By.CLASS_NAME, "date-picker.date-picker-block" + )[1] + .find_element(By.CLASS_NAME, "month") + .text[:-1] + ) < int(self.date[5:7]): + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, + "in-date-picker.icon.next-ico.iconf-right", + )[1] + ) + ) + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city:更换日期{int(self.driver.find_elements(By.CLASS_NAME, "date-picker.date-picker-block")[1].find_element(By.CLASS_NAME, "month").text[:-1])}小于 {int(self.date[5:7])} 向右点击' + ) + ele.click() + + for m in self.driver.find_elements( + By.CLASS_NAME, "date-picker.date-picker-block" + ): + if int(m.find_element(By.CLASS_NAME, "year").text[:-1]) != int( + self.date[:4] + ): + continue + + if int(m.find_element(By.CLASS_NAME, "month").text[:-1]) != int( + self.date[5:7] + ): + continue + + for d in m.find_elements(By.CLASS_NAME, "date-d"): + if int(d.text) == int(self.date[-2:]): + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable(d) + ) + ele.click() + break + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city:更换日期-{self.driver.find_elements(By.CSS_SELECTOR,"[aria-label=请选择日期]")[0].get_attribute("value")}' + ) + + while "(" not in self.driver.find_elements( + By.CLASS_NAME, "form-input-v3" + )[0].get_attribute("value"): + # Enter搜索 + # ele=WebDriverWait(self.driver, max_wait_time).until(element_to_be_clickable(its[1])) + # ele.send_keys(Keys.ENTER) + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "form-input-v3")[0] + ) + ) + ele.click() + + # 通过低价提醒按钮实现enter键换页 + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "low-price-remind" + )[0] + ) + ) + ele.click() + + while "(" not in self.driver.find_elements( + By.CLASS_NAME, "form-input-v3" + )[1].get_attribute("value"): + # Enter搜索 + # ele=WebDriverWait(self.driver, max_wait_time).until(element_to_be_clickable(its[1])) + # ele.send_keys(Keys.ENTER) + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "form-input-v3")[1] + ) + ) + ele.click() + + # 通过低价提醒按钮实现enter键换页 + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "low-price-remind" + )[0] + ) + ) + ele.click() + + next_stage_flag = True + + except Exception as e: + # 错误次数+1 + self.err += 1 + + # 保存错误截图 + if enable_screenshot: + self.driver.save_screenshot( + f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png' + ) + + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,change_city:更换城市和日期失败,错误类型:{type(e).__name__}, 详细错误信息:{str(e).split("Stacktrace:")[0]}' + ) + + # 检查注意事项和验证码 + if self.check_verification_code(): + if self.err < max_retry_time: + if len(self.driver.find_elements(By.CLASS_NAME, "lg_loginbox_modal")): + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city:检测到登录弹窗,需要登录' + ) + self.login() + # 重试 + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city:重试') + self.change_city() + # 判断错误次数 + if self.err >= max_retry_time: + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,change_city:重新尝试加载页面,这次指定需要重定向到首页' + ) + + # 删除本次请求 + del self.driver.requests + + # 重置错误计数 + self.err = 0 + + # 重新尝试加载页面,这次指定需要重定向到首页 + self.get_page(1) + else: + if next_stage_flag: + # 若无错误,执行下一步 + self.get_data() + + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city:成功更换城市和日期,当前路线为:{self.city[0]}-{self.city[1]}') + + def get_data(self): + try: + # 等待响应加载完成 + self.predata = self.driver.wait_for_request( + "/international/search/api/search/batchSearch?.*", timeout=max_wait_time + ) + + rb = dict(json.loads(self.predata.body).get("flightSegments")[0]) + + except Exception as e: + # 错误次数+1 + self.err += 1 + + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,get_data:获取数据超时,错误类型:{type(e).__name__}, 错误详细:{str(e).split("Stacktrace:")[0]}' + ) + + # 保存错误截图 + if enable_screenshot: + self.driver.save_screenshot( + f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png' + ) + + # 删除本次请求 + del self.driver.requests + + if self.err < max_retry_time: + # 刷新页面 + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} get_data:刷新页面') + self.refresh_driver() + + # 检查注意事项和验证码 + if self.check_verification_code(): + # 重试 + self.get_data() + + # 判断错误次数 + if self.err >= max_retry_time: + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,get_data:重新尝试加载页面,这次指定需要重定向到首页' + ) + + # 重置错误计数 + self.err = 0 + # 重新尝试加载页面,这次指定需要重定向到首页 + self.get_page(1) + + else: + # 删除本次请求 + del self.driver.requests + + # 检查数据获取正确性 + if ( + rb["departureCityName"] == self.city[0] + and rb["arrivalCityName"] == self.city[1] + and rb["departureDate"] == self.date + ): + print(f"get_data:城市匹配成功:出发地-{self.city[0]},目的地-{self.city[1]}") + + # 重置错误计数 + self.err = 0 + + # 若无错误,执行下一步 + self.decode_data() + else: + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,get_data:刷新页面') + # 错误次数+1 + self.err += 1 + + # 保存错误截图 + if enable_screenshot: + self.driver.save_screenshot( + f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png' + ) + + # 重新更换城市 + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} get_data:重新更换城市:{rb["departureCityName"]}-{rb["arrivalCityName"]}-{rb["departureDate"]}' + ) + + # 检查注意事项和验证码 + if self.check_verification_code(): + # 重试 + self.change_city() + + def decode_data(self): + try: + # 使用python-magic库检查MIME类型 + mime = magic.Magic() + file_type = mime.from_buffer(self.predata.response.body) + + buf = io.BytesIO(self.predata.response.body) + + if "gzip" in file_type: + gf = gzip.GzipFile(fileobj=buf) + self.dedata = gf.read().decode("UTF-8") + elif "JSON data" in file_type: + print(buf.read().decode("UTF-8")) + else: + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 未知的压缩格式:{file_type}') + + self.dedata = json.loads(self.dedata) + + except Exception as e: + # 错误次数+1 + self.err += 1 + + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,decode_data:数据解码失败,错误类型:{type(e).__name__}, 错误详细:{str(e).split("Stacktrace:")[0]}' + ) + + # 保存错误截图 + if enable_screenshot: + self.driver.save_screenshot( + f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png' + ) + + # 删除本次请求 + del self.driver.requests + + if self.err < max_retry_time: + # 刷新页面 + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} decode_data:刷新页面') + self.refresh_driver() + + # 检查注意事项和验证码 + if self.check_verification_code(): + # 重试 + self.get_data() + # 判断错误次数 + if self.err >= max_retry_time: + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,decode_data:重新尝试加载页面,这次指定需要重定向到首页' + ) + + # 重置错误计数 + self.err = 0 + + # 重新尝试加载页面,这次指定需要重定向到首页 + self.get_page(1) + else: + # 重置错误计数 + self.err = 0 + + # 若无错误,执行下一步 + self.check_data() + + def check_data(self): + try: + self.flightItineraryList = self.dedata["data"]["flightItineraryList"] + # 倒序遍历,删除转机航班 + for i in range(len(self.flightItineraryList) - 1, -1, -1): + if ( + self.flightItineraryList[i]["flightSegments"][0]["transferCount"] + != 0 + ): + self.flightItineraryList.pop(i) + if len(self.flightItineraryList) == 0 and direct_flight: + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 不存在直航航班:{self.city[0]}-{self.city[1]}') + # 重置错误计数 + self.err = 0 + return 0 + except Exception as e: + # 错误次数+1 + self.err += 1 + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 数据检查出错:不存在航班,错误类型:{type(e).__name__}, 错误详细:{str(e).split("Stacktrace:")[0]}' + ) + print(self.dedata) + if self.err < max_retry_time: + if 'searchErrorInfo' in self.dedata["data"]: + # 重置错误计数 + self.err = 0 + return 0 + else: + if "'needUserLogin': True" in str(self.dedata["data"]): + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,check_data:必须要登录才能查看数据,这次指定需要重定向到首页' + ) + # 重新尝试加载页面,这次指定需要重定向到首页 + self.login() + + # 刷新页面 + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} check_data:刷新页面') + self.refresh_driver() + # 检查注意事项和验证码 + if self.check_verification_code(): + # 重试 + self.get_data() + # 判断错误次数 + if self.err >= max_retry_time: + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,check_data:重新尝试加载页面,这次指定需要重定向到首页' + ) + + # 重置错误计数 + self.err = 0 + + # 重新尝试加载页面,这次指定需要重定向到首页 + self.get_page(1) + else: + # 重置错误计数 + self.err = 0 + self.proc_flightSegments() + self.proc_priceList() + self.mergedata() + + def proc_flightSegments(self): + self.flights = pd.DataFrame() + + for flightlist in self.flightItineraryList: + flightlist = flightlist["flightSegments"][0]["flightList"] + flightUnitList = dict(flightlist[0]) + + departureday = flightUnitList["departureDateTime"].split(" ")[0] + departuretime = flightUnitList["departureDateTime"].split(" ")[1] + + arrivalday = flightUnitList["arrivalDateTime"].split(" ")[0] + arrivaltime = flightUnitList["arrivalDateTime"].split(" ")[1] + + if del_info: + # 删除一些不重要的信息 + dellist = [ + "sequenceNo", + "marketAirlineCode", + "departureProvinceId", + "departureCityId", + "departureCityCode", + "departureAirportShortName", + "departureTerminal", + "arrivalProvinceId", + "arrivalCityId", + "arrivalCityCode", + "arrivalAirportShortName", + "arrivalTerminal", + "transferDuration", + "stopList", + "leakedVisaTagSwitch", + "trafficType", + "highLightPlaneNo", + "mealType", + "operateAirlineCode", + "arrivalDateTime", + "departureDateTime", + "operateFlightNo", + "operateAirlineName", + ] + for value in dellist: + try: + flightUnitList.pop(value) + except: + continue + + # 更新日期格式 + flightUnitList.update( + { + "departureday": departureday, + "departuretime": departuretime, + "arrivalday": arrivalday, + "arrivaltime": arrivaltime, + } + ) + + self.flights = pd.concat( + [ + self.flights, + pd.DataFrame.from_dict(flightUnitList, orient="index").T, + ], + ignore_index=True, + ) + + def proc_priceList(self): + self.prices = pd.DataFrame() + + for flightlist in self.flightItineraryList: + flightNo = flightlist["itineraryId"].split("_")[0] + priceList = flightlist["priceList"] + + # 经济舱,经济舱折扣 + economy, economy_tax, economy_total, economy_full = [], [], [], [] + economy_origin_price, economy_tax_price, economy_total_price, economy_full_price = "", "", "", "" + # 商务舱,商务舱折扣 + bussiness, bussiness_tax, bussiness_total, bussiness_full = [], [], [], [] + bussiness_origin_price, bussiness_tax_price, bussiness_total_price, bussiness_full_price = "", "", "", "" + + for price in priceList: + adultPrice = price["adultPrice"] + adultTax = price["adultTax"] + miseryIndex = price["miseryIndex"] + cabin = price["cabin"] + + # 经济舱 + if cabin == "Y": + economy.append(adultPrice) + economy_tax.append(adultTax) + economy_full.append(miseryIndex) + economy_total.append(adultPrice+adultTax) + # 商务舱 + elif cabin == "C": + bussiness.append(adultPrice) + bussiness_tax.append(adultTax) + bussiness_full.append(miseryIndex) + bussiness_total.append(adultPrice+adultTax) + + # 初始化变量 + economy_min_index = None + bussiness_min_index = None + + if economy_total != []: + economy_total_price = min(economy_total) + economy_min_index = economy_total.index(economy_total_price) + + if bussiness_total != []: + bussiness_total_price = min(bussiness_total) + bussiness_min_index = bussiness_total.index(bussiness_total_price) + + if economy_min_index is not None: + economy_origin_price = economy[economy_min_index] + economy_tax_price = economy_tax[economy_min_index] + economy_full_price = economy_full[economy_min_index] + + if bussiness_min_index is not None: + bussiness_origin_price = bussiness[bussiness_min_index] + bussiness_tax_price = bussiness_tax[bussiness_min_index] + bussiness_full_price = bussiness_full[bussiness_min_index] + + price_info = { + "flightNo": flightNo, + "economy_origin": economy_origin_price, + "economy_tax": economy_tax_price, + "economy_total": economy_total_price, + "economy_full": economy_full_price, + "bussiness_origin": bussiness_origin_price, + "bussiness_tax": bussiness_tax_price, + "bussiness_total": bussiness_total_price, + "bussiness_full": bussiness_full_price, + } + + # self.prices=self.prices.append(price_info,ignore_index=True) + self.prices = pd.concat( + [self.prices, pd.DataFrame(price_info, index=[0])], ignore_index=True + ) + + def mergedata(self): + try: + self.df = self.flights.merge(self.prices, on=["flightNo"]) + + self.df["dateGetTime"] = dt.now().strftime("%Y-%m-%d") + + if rename_col: + # 对pandas的columns进行重命名 + order = [ + "数据获取日期", + "航班号", + "航空公司", + "出发日期", + "出发时间", + "到达日期", + "到达时间", + "飞行时长", + "出发国家", + "出发城市", + "出发机场", + "出发机场三字码", + "到达国家", + "到达城市", + "到达机场", + "到达机场三字码", + "飞机型号", + "飞机尺寸", + "飞机型号三字码", + "到达准点率", + "停留次数", + ] + + origin = [ + "dateGetTime", + "flightNo", + "marketAirlineName", + "departureday", + "departuretime", + "arrivalday", + "arrivaltime", + "duration", + "departureCountryName", + "departureCityName", + "departureAirportName", + "departureAirportCode", + "arrivalCountryName", + "arrivalCityName", + "arrivalAirportName", + "arrivalAirportCode", + "aircraftName", + "aircraftSize", + "aircraftCode", + "arrivalPunctuality", + "stopCount", + ] + + columns = dict(zip(origin, order)) + + self.df = self.df.rename(columns=columns) + + if del_info: + self.df = self.df[order] + + files_dir = os.path.join( + os.getcwd(), self.date, dt.now().strftime("%Y-%m-%d") + ) + + if not os.path.exists(files_dir): + os.makedirs(files_dir) + + filename = os.path.join( + files_dir, f"{self.city[0]}-{self.city[1]}.csv") + + self.df.to_csv(filename, encoding="UTF-8", index=False) + + print(f'\n{time.strftime("%Y-%m-%d_%H-%M-%S")} 数据爬取完成 {filename}\n') + + return 0 + + except Exception as e: + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 合并数据失败 {str(e).split("Stacktrace:")[0]}') + + return 0 + + +if __name__ == "__main__": + kill_driver() + + if enable_proxy: + gen_proxy_servers.start_proxy_servers( + ipv6_count, ip_mode, base_interface, delete_interface + ) + + driver = init_driver() + + citys = gen_citys(crawl_citys) + + flight_dates = generate_flight_dates(crawl_days, begin_date, end_date, start_interval, days_interval) + + Flight_DataFetcher = DataFetcher(driver) + + for city in citys: + Flight_DataFetcher.city = city + + for flight_date in flight_dates: + Flight_DataFetcher.date = flight_date + + if os.path.exists(os.path.join(os.getcwd(), flight_date, dt.now().strftime("%Y-%m-%d"), f"{city[0]}-{city[1]}.csv")): + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 文件已存在:{os.path.join(os.getcwd(), flight_date, dt.now().strftime("%Y-%m-%d"), f"{city[0]}-{city[1]}.csv")}') + continue + elif (Flight_DataFetcher.driver.current_url=="data:,"): + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 当前的URL是:{driver.current_url}') + # 初始化页面 + Flight_DataFetcher.get_page(1) + + else: + # 后续运行只需更换出发与目的地 + Flight_DataFetcher.change_city() + + # 更换IPV6地址 + if enable_proxy: + gen_proxy_servers.switch_proxy_server(ip_mode) + + time.sleep(crawl_interval) + + # 运行结束退出 + try: + driver = Flight_DataFetcher.driver + driver.quit() + except Exception as e: + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} An error occurred while quitting the driver: {e}') + + if enable_proxy: + gen_proxy_servers.stop_proxy_servers(base_interface, delete_interface) + + print(f'\n{time.strftime("%Y-%m-%d_%H-%M-%S")} 程序运行完成!!!!') diff --git a/DataMaintenance/Linux version/gen_proxy_servers.py b/DataMaintenance/Linux version/gen_proxy_servers.py new file mode 100644 index 0000000..c1675b3 --- /dev/null +++ b/DataMaintenance/Linux version/gen_proxy_servers.py @@ -0,0 +1,157 @@ +import os +import re +import subprocess + +# Global variables for proxy switch count +proxy_switch_count = 0 +iface_ipv6_dict = {} + +def is_root(): + return os.geteuid() == 0 + +def interface_usable(interface_name, skip_check=False, ipv6_address='2400:3200::1', max_retries=3): + if skip_check: + return True + current_try = 0 + while current_try < max_retries: + try: + cmd_result = subprocess.run(["ping", "-c", "1", "-I", interface_name, ipv6_address], stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=5) + if cmd_result.returncode == 0: + return True # 成功ping通,直接返回True + except subprocess.TimeoutExpired: + print(f"Ping attempt {current_try + 1} of {max_retries} timed out. Retrying...") + except subprocess.SubprocessError as e: + # 捕获其他subprocess相关的异常 + print(f"An error occurred while trying to ping: {e}. Retrying...") + current_try += 1 + return False # 所有尝试后仍未成功,返回False + +def get_existing_interfaces(base_interface='eth0'): + cmd_result = subprocess.run(["ip", "addr", "show"], stdout=subprocess.PIPE) + output = cmd_result.stdout.decode() + + # 匹配接口名称 + iface_pattern = re.compile(re.escape(base_interface) + r'_([0-9]+)@') + iface_matches = iface_pattern.findall(output) + + # 构建完整的接口名称列表 + interfaces = [f"{base_interface}_{match}" for match in iface_matches] + + # 初始化字典来存储接口名称与其IPv6地址的映射 + iface_ipv6_dict = {} + + for iface in interfaces: + # 对于每个接口,查找其IPv6地址,这里假设只提取第一个IPv6地址 + # 注意:需要确保只匹配特定接口的IPv6地址,因此使用iface作为正则表达式的一部分 + cmd_result = subprocess.run(["ip", "addr", "show", iface], stdout=subprocess.PIPE) + output = cmd_result.stdout.decode() + ipv6_pattern = re.compile(r"inet6\s+([0-9a-f:]+)\/\d+") + ipv6_matches = ipv6_pattern.findall(output) + + # 过滤掉以"fe80"开头的IPv6地址 + ipv6_addresses = [addr for addr in ipv6_matches if not addr.startswith("fe80")] + + # 如果存在非链路本地的IPv6地址,只取第一个地址 + if ipv6_addresses: + iface_ipv6_dict[iface] = ipv6_addresses[0] + + return iface_ipv6_dict + +def execute_ip6tables_command(command): + sudo_cmd = ["sudo"] if not is_root() else [] + cmd = sudo_cmd + command.split() + subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + +def switch_proxy_server(mode='normal'): + global proxy_switch_count + global iface_ipv6_dict + + if mode == 'normal': + if iface_ipv6_dict: + proxy_switch_count += 1 + proxy_index = proxy_switch_count % len(iface_ipv6_dict) + selected_interface = list(iface_ipv6_dict.keys())[proxy_index] + ipv6_address = iface_ipv6_dict[selected_interface] + # 清空自定义链 + execute_ip6tables_command('ip6tables -t nat -F FAKE_IPV6_CHAIN') + # 添加SNAT规则 + execute_ip6tables_command(f'ip6tables -t nat -A FAKE_IPV6_CHAIN -j SNAT --to-source {ipv6_address}') + + print(f"Using interface: {selected_interface}, Connecting to: {ipv6_address}") + +def create_ipv6_addresses(n, base_interface='eth0', delete_interface=True): + sudo_cmd = ["sudo"] if not is_root() else [] + if delete_interface: + delete_ipv6_addresses(base_interface) + existing_interfaces = list(get_existing_interfaces(base_interface).keys()) + interfaces = [] + for i in range(1, n + 1): + interface_name = f"{base_interface}_{i}" + + # Check if the interface exists, if yes, delete it first + if interface_name in existing_interfaces: + if interface_usable(interface_name): + print(f"Interface {interface_name} already exists. Skipping creation.") + interfaces.append(interface_name) + continue + else: + subprocess.run(sudo_cmd + ["ip", "link", "delete", interface_name]) + + # Now add the interface + subprocess.run(sudo_cmd + ["ip", "link", "add", "link", base_interface, interface_name, "type", "macvlan", "mode", "bridge"]) + subprocess.run(sudo_cmd + ["ip", "link", "set", interface_name, "up"]) + #subprocess.run(sudo_cmd + ["dhclient", "-6", "-nw", interface_name]) + interfaces.append(interface_name) + return interfaces + +def delete_ipv6_addresses(base_interface='eth0'): + sudo_cmd = ["sudo"] if not is_root() else [] + existing_interfaces = list(get_existing_interfaces(base_interface).keys()) + + for interface_name in existing_interfaces: + subprocess.run(sudo_cmd + ["ip", "link", "delete", interface_name]) + +def stop_proxy_servers(base_interface='eth0', delete_interface=True): + # 删除流量重定向到自定义链 + execute_ip6tables_command('ip6tables -t nat -D POSTROUTING -j FAKE_IPV6_CHAIN') + # 删除自定义链 + execute_ip6tables_command('ip6tables -t nat -X FAKE_IPV6_CHAIN') + + if delete_interface: + print("正在关闭代理服务器...") + print("删除IPv6地址...") + delete_ipv6_addresses(base_interface) + print("代理服务器已关闭.") + else: + print("正在关闭代理服务器...") + print("代理服务器已关闭.") + +def start_proxy_servers(n, mode='normal', base_interface='eth0', delete_interface=True): + global iface_ipv6_dict + + interfaces = create_ipv6_addresses(n, base_interface, delete_interface) + #获取生成的接口及IP + iface_ipv6_dict = get_existing_interfaces(base_interface) + + if iface_ipv6_dict: + # 删除流量重定向到自定义链 + execute_ip6tables_command('ip6tables -t nat -D POSTROUTING -j FAKE_IPV6_CHAIN') + # 删除自定义链 + execute_ip6tables_command('ip6tables -t nat -X FAKE_IPV6_CHAIN') + + # 创建自定义链 + execute_ip6tables_command('ip6tables -t nat -N FAKE_IPV6_CHAIN') + # 流量重定向到自定义链 + execute_ip6tables_command(f'ip6tables -t nat -A POSTROUTING -o {base_interface} -j FAKE_IPV6_CHAIN') + + if mode == 'normal': + selected_interface = list(iface_ipv6_dict.keys())[0] + ipv6_address = iface_ipv6_dict[selected_interface] + # 添加SNAT规则 + execute_ip6tables_command(f'ip6tables -t nat -A FAKE_IPV6_CHAIN -j SNAT --to-source {ipv6_address}') + + print(f"Using interface: {selected_interface}, Connecting to: {ipv6_address}") + elif mode == 'random': + for index, (interface, ipv6_address) in enumerate(iface_ipv6_dict.items()): + adjusted_probability = 1/(len(iface_ipv6_dict)-index) + execute_ip6tables_command(f'ip6tables -t nat -A FAKE_IPV6_CHAIN -m statistic --mode random --probability {adjusted_probability} -j SNAT --to-source {ipv6_address}') \ No newline at end of file diff --git a/DataMaintenance/README.md b/DataMaintenance/README.md new file mode 100644 index 0000000..0e324d7 --- /dev/null +++ b/DataMaintenance/README.md @@ -0,0 +1,50 @@ +# Ctrip-Crawler + + + +## 概述 + +Ctrip-Crawler 是一个携程航班信息的专业爬虫工具,主要基于 Selenium 框架进行实现。 +request 方法访问携程 API 的方法,由于 IP 限制和 JS 逆向工程的挑战,该途径已不再适用。(报错) + +携程支持IPV6访问,因此可以通过生成大量IPV6规避 IP 限制。 + + + +## 主要特性 + +Selenium 自动化框架:与直接请求 API 的方法不同,该项目基于 Selenium,提供高度可定制和交互式的浏览器模拟。 + +灵活的错误处理机制:针对不同类型的异常(如超时、验证码出现、未知错误等),实施相应的处理策略,包括重试和人工干预。 + +IP限制解决方案:利用页面特性和用户模拟,规避了 IP 限制,提高了爬取稳定性。 + +数据校验与解析:对获取的数据进行严格的数据质量和完整性校验,包括 gzip 解压缩和 JSON 格式解析。 + +版本迭代与优化:V2版本解决了验证码问题;V3版本提高了系统的稳定性和可用性;V3.5版本增加了linux系统下多IPV6网口的生成与代理 + + + +## 文档和教程 + +详细的使用指南和开发文档可在以下博客中查看: + +[基于selenium的携程机票爬取程序](https://blog.suysker.xyz/archives/35) + +[基于selenium的携程机票爬取程序V2](https://blog.suysker.xyz/archives/139) + +[基于request的携程机票爬取程序](https://blog.suysker.xyz/archives/37) + +[基于request的航班历史票价爬取](https://blog.suysker.xyz/archives/36) + + + +## TO DO + +V4.0增加多线程分片运行…… + + + +## 贡献与反馈 + +如果你有更好的优化建议或发现任何 bug,请通过 Issues 或 Pull Requests 与我们交流。我们非常欢迎各种形式的贡献! diff --git a/DataMaintenance/csv_to_xlsx_converter.py b/DataMaintenance/csv_to_xlsx_converter.py new file mode 100644 index 0000000..5691e88 --- /dev/null +++ b/DataMaintenance/csv_to_xlsx_converter.py @@ -0,0 +1,73 @@ +import pandas as pd +import os +from datetime import datetime, timedelta + +def get_departure_destination(file_name): + name_without_extension = os.path.splitext(file_name)[0] + return name_without_extension + +def merge_csv_files(csv_files, output_xlsx): + all_dfs = [] + for csv_file in csv_files: + df = pd.read_csv(csv_file) + # 添加日期列 + date = os.path.basename(os.path.dirname(os.path.dirname(csv_file))) + df['出发日期'] = date + + # 选择指定的列 + selected_columns = [ + '航班号','出发城市','到达城市', '航空公司', '出发日期', '出发时间', '到达时间', + '中转信息', 'economy_origin', '经济舱餐食信息', '经济舱座椅间距', '出发延误时间' + ] + df = df[selected_columns] + + # 重命名 'economy_origin' 为 '票价' + df = df.rename(columns={'economy_origin': '票价'}) + + all_dfs.append(df) + + # 合并所有数据框 + merged_df = pd.concat(all_dfs, ignore_index=True) + + # 保存为Excel文件 + merged_df.to_excel(output_xlsx, index=False, engine='openpyxl') + +# 设置日期范围 +start_date = datetime(2024, 11, 12)# 起始日期 +end_date = datetime(2024, 11, 19)# 结束日期 +clawer_date = datetime(2024, 11, 12)# 爬虫日期 +# 设置输入和输出文件夹路径 +input_base_path = "./" +output_folder = "./xlsx_output" + +# 确保输出文件夹存在 +if not os.path.exists(output_folder): + os.makedirs(output_folder) + +# 用于存储同一始发地和目的地的CSV文件 +route_files = {} + +current_date = start_date +while current_date <= end_date: + folder_name = current_date.strftime("%Y-%m-%d") + folder_path = os.path.join(input_base_path, folder_name, clawer_date.strftime("%Y-%m-%d")) + + if os.path.exists(folder_path): + for file_name in os.listdir(folder_path): + if file_name.endswith('.csv'): + csv_path = os.path.join(folder_path, file_name) + route = get_departure_destination(file_name) + + if route not in route_files: + route_files[route] = [] + route_files[route].append(csv_path) + + current_date += timedelta(days=1) + +# 合并并保存每个路线的文件 +for route, files in route_files.items(): + output_xlsx = os.path.join(output_folder, f"{route}.xlsx") + merge_csv_files(files, output_xlsx) + print(f"已合并并保存路线: {route} -> {output_xlsx}") + +print("所有CSV文件已成功合并为XLSX文件,并筛选了指定的列") diff --git a/DataMaintenance/ctrip_flights_scraper_V3(undetected_chromedriver).py b/DataMaintenance/ctrip_flights_scraper_V3(undetected_chromedriver).py new file mode 100644 index 0000000..66282ae --- /dev/null +++ b/DataMaintenance/ctrip_flights_scraper_V3(undetected_chromedriver).py @@ -0,0 +1,1153 @@ +import magic +import io +import os +import gzip +import time +import json +import requests +import pandas as pd +from typing import Any +import seleniumwire.undetected_chromedriver as webdriver +from datetime import datetime as dt, timedelta +from selenium.webdriver.common.by import By +from selenium.webdriver.common.keys import Keys +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait + + +# 爬取的城市 +crawal_citys = ["上海", "香港", "东京"] + +# 爬取日期范围:起始日期。格式'2023-12-01' +begin_date = None + +# 爬取日期范围:结束日期。格式'2023-12-31' +end_date = None + +# 爬取T+N,即N天后 +start_interval = 1 + +# 爬取的日期 +crawal_days = 60 + +# 设置各城市爬取的时间间隔(单位:秒) +crawal_interval = 5 + +# 日期间隔 +days_interval = 1 + +# 设置页面加载的最长等待时间(单位:秒) +max_wait_time = 10 + +# 最大错误重试次数 +max_retry_time = 5 + +# 是否只抓取直飞信息(True: 只抓取直飞,False: 抓取所有航班) +direct_flight = True + +# 是否删除不重要的信息 +del_info = False + +# 是否重命名DataFrame的列名 +rename_col = True + +# 调试截图 +enable_screenshot = False + +# 允许登录(可能必须要登录才能获取数据) +login_allowed = True + +# 账号 +accounts = ['',''] + +# 密码 +passwords = ['',''] + +#利用stealth.min.js隐藏selenium特征 +stealth_js_path='./stealth.min.js' + +# 定义下载stealth.min.js的函数 +def download_stealth_js(file_path, url='https://raw.githubusercontent.com/requireCool/stealth.min.js/main/stealth.min.js'): + if not os.path.exists(file_path): + print(f"{file_path} not found, downloading...") + response = requests.get(url) + response.raise_for_status() # 确保请求成功 + with open(file_path, 'w') as file: + file.write(response.text) + print(f"{file_path} downloaded.") + else: + print(f"{file_path} already exists, no need to download.") + +def init_driver(): + # options = webdriver.ChromeOptions() # 创建一个配置对象 + options = webdriver.ChromeOptions() # 创建一个配置对象 + options.add_argument("--incognito") # 隐身模式(无痕模式) + # options.add_argument('--headless') # 启用无头模式 + options.add_argument("--no-sandbox") + options.add_argument("--disable-dev-shm-usage") + options.add_argument("--disable-blink-features") + options.add_argument("--disable-blink-features=AutomationControlled") + options.add_argument("--disable-extensions") + options.add_argument("--pageLoadStrategy=eager") + options.add_argument("--disable-gpu") + options.add_argument("--disable-software-rasterizer") + options.add_argument("--disable-dev-shm-usage") + options.add_argument("--ignore-certificate-errors") + options.add_argument("--ignore-certificate-errors-spki-list") + options.add_argument("--ignore-ssl-errors") + # options.add_experimental_option("excludeSwitches", ["enable-automation"]) # 不显示正在受自动化软件控制的提示 + seleniumwireOptions: dict[str, Any] = {"verify_ssl": False} + # chromeDriverPath = 'C:/Program Files/Google/Chrome/Application/chromedriver' #chromedriver位置 + # options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Edg/116.0.1938.69") + # driver = webdriver.Chrome(executable_path=self.chromeDriverPath,chrome_options=self.options) + driver = webdriver.Chrome(options=options,seleniumwire_options=seleniumwireOptions) + + try: + download_stealth_js(stealth_js_path) + # 读取并注入stealth.min.js + with open(stealth_js_path, 'r') as file: + stealth_js = file.read() + driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source": stealth_js}) + except Exception as e: + print(e) + + driver.maximize_window() + + return driver + + +def gen_citys(crawal_citys): + # 生成城市组合列表 + citys = [] + ytic = list(reversed(crawal_citys)) + for m in crawal_citys: + for n in ytic: + if m == n: + continue + else: + citys.append([m, n]) + return citys + + +def generate_flight_dates(n, begin_date, end_date, start_interval, days_interval): + flight_dates = [] + + if begin_date: + begin_date = dt.strptime(begin_date, "%Y-%m-%d") + elif start_interval: + begin_date = dt.now() + timedelta(days=start_interval) + + for i in range(0, n, days_interval): + flight_date = begin_date + timedelta(days=i) + + flight_dates.append(flight_date.strftime("%Y-%m-%d")) + + # 如果有结束日期,确保生成的日期不超过结束日期 + if end_date: + end_date = dt.strptime(end_date, "%Y-%m-%d") + flight_dates = [date for date in flight_dates if dt.strptime(date, "%Y-%m-%d") <= end_date] + # 继续生成日期直到达到或超过结束日期 + while dt.strptime(flight_dates[-1], "%Y-%m-%d") < end_date: + next_date = dt.strptime(flight_dates[-1], "%Y-%m-%d") + timedelta(days=days_interval) + if next_date <= end_date: + flight_dates.append(next_date.strftime("%Y-%m-%d")) + else: + break + + return flight_dates + + +# element_to_be_clickable 函数来替代 expected_conditions.element_to_be_clickable 或 expected_conditions.visibility_of_element_located + + +def element_to_be_clickable(element): + def check_clickable(driver): + try: + if element.is_enabled() and element.is_displayed(): + return element # 当条件满足时,返回元素本身 + else: + return False + except: + return False + + return check_clickable + + +class DataFetcher(object): + def __init__(self, driver): + self.driver = driver + self.date = None + self.city = None + self.err = 0 # 错误重试次数 + self.switch_acc = 0 #切换账户 + + def refresh_driver(self): + try: + self.driver.refresh() + except Exception as e: + # 错误次数+1 + self.err += 1 + + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} refresh_driver:刷新页面失败,错误类型:{type(e).__name__}, 详细错误信息:{str(e).split("Stacktrace:")[0]}' + ) + + # 保存错误截图 + if enable_screenshot: + self.driver.save_screenshot( + f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png' + ) + if self.err < max_retry_time: + # 刷新页面 + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} refresh_driver:刷新页面') + self.refresh_driver() + + # 判断错误次数 + if self.err >= max_retry_time: + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,refresh_driver:不继续重试' + ) + + def remove_btn(self): + try: + #WebDriverWait(self.driver, max_wait_time).until(lambda d: d.execute_script('return typeof jQuery !== "undefined"')) + # 移除提醒 + self.driver.execute_script("document.querySelectorAll('.notice-box').forEach(element => element.remove());") + # 移除在线客服 + self.driver.execute_script("document.querySelectorAll('.shortcut, .shortcut-link').forEach(element => element.remove());") + # 移除分享链接 + self.driver.execute_script("document.querySelectorAll('.shareline').forEach(element => element.remove());") + ''' + # 使用JavaScript删除所有的
标签 + self.driver.execute_script(""" + var elements = document.getElementsByTagName('dl'); + while(elements.length > 0){ + elements[0].parentNode.removeChild(elements[0]); + } + """) + ''' + except Exception as e: + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} remove_btn:提醒移除失败,错误类型:{type(e).__name__}, 详细错误信息:{str(e).split("Stacktrace:")[0]}' + ) + + def check_verification_code(self): + try: + # 检查是否有验证码元素,如果有,则需要人工处理 + if (len(self.driver.find_elements(By.ID, "verification-code"))+len(self.driver.find_elements(By.CLASS_NAME, "alert-title"))): + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} check_verification_code:验证码被触发verification-code/alert-title,等待{crawal_interval*100}后重试。' + ) + self.driver.quit() + time.sleep(crawal_interval*100) + self.driver = init_driver() + self.err = 0 + self.switch_acc += 1 + self.get_page(1) + return False + else: + # 移除注意事项 + self.remove_btn() + # 如果没有找到验证码元素,则说明页面加载成功,没有触发验证码 + return True + except Exception as e: + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} check_verification_code:未知错误,错误类型:{type(e).__name__}, 详细错误信息:{str(e).split("Stacktrace:")[0]}' + ) + + def login(self): + if login_allowed: + + account = accounts[self.switch_acc % len(accounts)] + password = passwords[self.switch_acc % len(passwords)] + + try: + if len(self.driver.find_elements(By.CLASS_NAME, "lg_loginbox_modal")) == 0: + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login:未弹出登录界面') + WebDriverWait(self.driver, max_wait_time).until(EC.presence_of_element_located((By.CLASS_NAME, "tl_nfes_home_header_login_wrapper_siwkn"))) + # 点击飞机图标,返回主界面 + ele = WebDriverWait(self.driver, max_wait_time).until(element_to_be_clickable(self.driver.find_element(By.CLASS_NAME, "tl_nfes_home_header_login_wrapper_siwkn"))) + ele.click() + #等待页面加载 + WebDriverWait(self.driver, max_wait_time).until(EC.presence_of_element_located((By.CLASS_NAME, "lg_loginwrap"))) + else: + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login:已经弹出登录界面') + + ele = WebDriverWait(self.driver, max_wait_time).until(element_to_be_clickable(self.driver.find_elements(By.CLASS_NAME, "r_input.bbz-js-iconable-input")[0])) + ele.send_keys(account) + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login:输入账户成功') + + ele = WebDriverWait(self.driver, max_wait_time).until(element_to_be_clickable(self.driver.find_element(By.CSS_SELECTOR, "div[data-testid='accountPanel'] input[data-testid='passwordInput']"))) + ele.send_keys(password) + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login:输入密码成功') + + ele = WebDriverWait(self.driver, max_wait_time).until(element_to_be_clickable(self.driver.find_element(By.CSS_SELECTOR, '[for="checkboxAgreementInput"]'))) + ele.click() + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login:勾选同意成功') + + ele = WebDriverWait(self.driver, max_wait_time).until(element_to_be_clickable(self.driver.find_elements(By.CLASS_NAME, "form_btn.form_btn--block")[0])) + ele.click() + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login:登录成功') + # 保存登录截图 + if enable_screenshot: + self.driver.save_screenshot( + f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png' + ) + time.sleep(crawal_interval*3) + except Exception as e: + # 错误次数+1 + self.err += 1 + # 用f字符串格式化错误类型和错误信息,提供更多的调试信息 + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login:页面加载或元素操作失败,错误类型:{type(e).__name__}, 详细错误信息:{str(e).split("Stacktrace:")[0]}' + ) + + # 保存错误截图 + if enable_screenshot: + self.driver.save_screenshot( + f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png' + ) + + if self.err < max_retry_time: + # 刷新页面 + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login:刷新页面') + self.refresh_driver() + # 检查注意事项和验证码 + if self.check_verification_code(): + # 重试 + self.login() + # 判断错误次数 + if self.err >= max_retry_time: + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,login:重新尝试加载页面,这次指定需要重定向到首页' + ) + + def get_page(self, reset_to_homepage=0): + next_stage_flag = False + try: + if reset_to_homepage == 1: + # 前往首页 + self.driver.get( + "https://flights.ctrip.com/online/channel/domestic") + + # 检查注意事项和验证码 + if self.check_verification_code(): + WebDriverWait(self.driver, max_wait_time).until( + EC.presence_of_element_located( + (By.CLASS_NAME, "pc_home-jipiao")) + ) + # 点击飞机图标,返回主界面 + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_element( + By.CLASS_NAME, "pc_home-jipiao") + ) + ) + ele.click() + + # 单程 + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "radio-label")[0] + ) + ) + ele.click() + + # 搜索 + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_element(By.CLASS_NAME, "search-btn") + ) + ) + ele.click() + + next_stage_flag = True + except Exception as e: + # 用f字符串格式化错误类型和错误信息,提供更多的调试信息 + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} get_page:页面加载或元素操作失败,错误类型:{type(e).__name__}, 详细错误信息:{str(e).split("Stacktrace:")[0]}' + ) + + # 保存错误截图 + if enable_screenshot: + self.driver.save_screenshot( + f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png' + ) + + # 重新尝试加载页面,这次指定需要重定向到首页 + self.get_page(1) + else: + if next_stage_flag: + # 继续下一步 + self.change_city() + + def change_city(self): + next_stage_flag = False + try: + # 等待页面完成加载 + WebDriverWait(self.driver, max_wait_time).until( + EC.presence_of_element_located( + (By.CLASS_NAME, "form-input-v3")) + ) + + # 检查注意事项和验证码 + if self.check_verification_code(): + # 若出发地与目标值不符,则更改出发地 + while self.city[0] not in self.driver.find_elements( + By.CLASS_NAME, "form-input-v3" + )[0].get_attribute("value"): + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "form-input-v3")[0] + ) + ) + ele.click() + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "form-input-v3")[0] + ) + ) + ele.send_keys(Keys.CONTROL + "a") + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "form-input-v3")[0] + ) + ) + ele.send_keys(self.city[0]) + + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city:更换城市【0】-{self.driver.find_elements(By.CLASS_NAME,"form-input-v3")[0].get_attribute("value")}' + ) + + # 若目的地与目标值不符,则更改目的地 + while self.city[1] not in self.driver.find_elements( + By.CLASS_NAME, "form-input-v3" + )[1].get_attribute("value"): + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "form-input-v3")[1] + ) + ) + ele.click() + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "form-input-v3")[1] + ) + ) + ele.send_keys(Keys.CONTROL + "a") + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "form-input-v3")[1] + ) + ) + ele.send_keys(self.city[1]) + + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city:更换城市【1】-{self.driver.find_elements(By.CLASS_NAME,"form-input-v3")[1].get_attribute("value")}' + ) + + while ( + self.driver.find_elements(By.CSS_SELECTOR, "[aria-label=请选择日期]")[ + 0 + ].get_attribute("value") + != self.date + ): + # 点击日期选择 + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_element( + By.CLASS_NAME, "modifyDate.depart-date" + ) + ) + ) + ele.click() + + if int( + self.driver.find_elements( + By.CLASS_NAME, "date-picker.date-picker-block" + )[1] + .find_element(By.CLASS_NAME, "year") + .text[:-1] + ) < int(self.date[:4]): + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, + "in-date-picker.icon.next-ico.iconf-right", + )[1] + ) + ) + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city:更换日期{int(self.driver.find_elements(By.CLASS_NAME, "date-picker.date-picker-block")[1].find_element(By.CLASS_NAME, "year").text[:-1])}小于 {int(self.date[:4])} 向右点击' + ) + ele.click() + + if int( + self.driver.find_elements( + By.CLASS_NAME, "date-picker.date-picker-block" + )[0] + .find_element(By.CLASS_NAME, "year") + .text[:-1] + ) > int(self.date[:4]): + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, + "in-date-picker.icon.prev-ico.iconf-left", + )[0] + ) + ) + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city:更换日期{int(self.driver.find_elements(By.CLASS_NAME, "date-picker.date-picker-block")[0].find_element(By.CLASS_NAME, "year").text[:-1])}大于 {int(self.date[:4])} 向左点击' + ) + ele.click() + + if int( + self.driver.find_elements( + By.CLASS_NAME, "date-picker.date-picker-block" + )[0] + .find_element(By.CLASS_NAME, "year") + .text[:-1] + ) == int(self.date[:4]): + if int( + self.driver.find_elements( + By.CLASS_NAME, "date-picker.date-picker-block" + )[0] + .find_element(By.CLASS_NAME, "month") + .text[:-1] + ) > int(self.date[5:7]): + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, + "in-date-picker.icon.prev-ico.iconf-left", + )[0] + ) + ) + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city:更换日期{int(self.driver.find_elements(By.CLASS_NAME, "date-picker.date-picker-block")[0].find_element(By.CLASS_NAME, "month").text[:-1])}大于 {int(self.date[5:7])} 向左点击' + ) + ele.click() + + if int( + self.driver.find_elements( + By.CLASS_NAME, "date-picker.date-picker-block" + )[1] + .find_element(By.CLASS_NAME, "year") + .text[:-1] + ) == int(self.date[:4]): + if int( + self.driver.find_elements( + By.CLASS_NAME, "date-picker.date-picker-block" + )[1] + .find_element(By.CLASS_NAME, "month") + .text[:-1] + ) < int(self.date[5:7]): + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, + "in-date-picker.icon.next-ico.iconf-right", + )[1] + ) + ) + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city:更换日期{int(self.driver.find_elements(By.CLASS_NAME, "date-picker.date-picker-block")[1].find_element(By.CLASS_NAME, "month").text[:-1])}小于 {int(self.date[5:7])} 向右点击' + ) + ele.click() + + for m in self.driver.find_elements( + By.CLASS_NAME, "date-picker.date-picker-block" + ): + if int(m.find_element(By.CLASS_NAME, "year").text[:-1]) != int( + self.date[:4] + ): + continue + + if int(m.find_element(By.CLASS_NAME, "month").text[:-1]) != int( + self.date[5:7] + ): + continue + + for d in m.find_elements(By.CLASS_NAME, "date-d"): + if int(d.text) == int(self.date[-2:]): + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable(d) + ) + ele.click() + break + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city:更换日期-{self.driver.find_elements(By.CSS_SELECTOR,"[aria-label=请选择日期]")[0].get_attribute("value")}' + ) + + while "(" not in self.driver.find_elements( + By.CLASS_NAME, "form-input-v3" + )[0].get_attribute("value"): + # Enter搜索 + # ele=WebDriverWait(self.driver, max_wait_time).until(element_to_be_clickable(its[1])) + # ele.send_keys(Keys.ENTER) + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "form-input-v3")[0] + ) + ) + ele.click() + + # 通过低价提醒按钮实现enter键换页 + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "low-price-remind" + )[0] + ) + ) + ele.click() + + while "(" not in self.driver.find_elements( + By.CLASS_NAME, "form-input-v3" + )[1].get_attribute("value"): + # Enter搜索 + # ele=WebDriverWait(self.driver, max_wait_time).until(element_to_be_clickable(its[1])) + # ele.send_keys(Keys.ENTER) + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "form-input-v3")[1] + ) + ) + ele.click() + + # 通过低价提醒按钮实现enter键换页 + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "low-price-remind" + )[0] + ) + ) + ele.click() + + next_stage_flag = True + + except Exception as e: + # 错误次数+1 + self.err += 1 + + # 保存错误截图 + if enable_screenshot: + self.driver.save_screenshot( + f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png' + ) + + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,change_city:更换城市和日期失败,错误类型:{type(e).__name__}, 详细错误信息:{str(e).split("Stacktrace:")[0]}' + ) + + # 检查注意事项和验证码 + if self.check_verification_code(): + if self.err < max_retry_time: + if len(self.driver.find_elements(By.CLASS_NAME, "lg_loginbox_modal")): + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city:检测到登录弹窗,需要登录' + ) + self.login() + # 重试 + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city:重试') + self.change_city() + # 判断错误次数 + if self.err >= max_retry_time: + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,change_city:重新尝试加载页面,这次指定需要重定向到首页' + ) + + # 删除本次请求 + del self.driver.requests + + # 重置错误计数 + self.err = 0 + + # 重新尝试加载页面,这次指定需要重定向到首页 + self.get_page(1) + else: + if next_stage_flag: + # 若无错误,执行下一步 + self.get_data() + + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city:成功更换城市和日期,当前路线为:{self.city[0]}-{self.city[1]}') + + def get_data(self): + try: + # 等待响应加载完成 + self.predata = self.driver.wait_for_request( + "/international/search/api/search/batchSearch?.*", timeout=max_wait_time + ) + + rb = dict(json.loads(self.predata.body).get("flightSegments")[0]) + + except Exception as e: + # 错误次数+1 + self.err += 1 + + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,get_data:获取数据超时,错误类型:{type(e).__name__}, 错误详细:{str(e).split("Stacktrace:")[0]}' + ) + + # 保存错误截图 + if enable_screenshot: + self.driver.save_screenshot( + f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png' + ) + + # 删除本次请求 + del self.driver.requests + + if self.err < max_retry_time: + # 刷新页面 + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} get_data:刷新页面') + self.refresh_driver() + + # 检查注意事项和验证码 + if self.check_verification_code(): + # 重试 + self.get_data() + + # 判断错误次数 + if self.err >= max_retry_time: + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,get_data:重新尝试加载页面,这次指定需要重定向到首页' + ) + + # 重置错误计数 + self.err = 0 + # 重新尝试加载页面,这次指定需要重定向到首页 + self.get_page(1) + + else: + # 删除本次请求 + del self.driver.requests + + # 检查数据获取正确性 + if ( + rb["departureCityName"] == self.city[0] + and rb["arrivalCityName"] == self.city[1] + and rb["departureDate"] == self.date + ): + print(f"get_data:城市匹配成功:出发地-{self.city[0]},目的地-{self.city[1]}") + + # 重置错误计数 + self.err = 0 + + # 若无错误,执行下一步 + self.decode_data() + else: + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,get_data:刷新页面') + # 错误次数+1 + self.err += 1 + + # 保存错误截图 + if enable_screenshot: + self.driver.save_screenshot( + f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png' + ) + + # 重新更换城市 + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} get_data:重新更换城市:{rb["departureCityName"]}-{rb["arrivalCityName"]}-{rb["departureDate"]}' + ) + + # 检查注意事项和验证码 + if self.check_verification_code(): + # 重试 + self.change_city() + + def decode_data(self): + try: + # 使用python-magic库检查MIME类型 + mime = magic.Magic() + file_type = mime.from_buffer(self.predata.response.body) + + buf = io.BytesIO(self.predata.response.body) + + if "gzip" in file_type: + gf = gzip.GzipFile(fileobj=buf) + self.dedata = gf.read().decode("UTF-8") + elif "JSON data" in file_type: + print(buf.read().decode("UTF-8")) + else: + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 未知的压缩格式:{file_type}') + + self.dedata = json.loads(self.dedata) + + except Exception as e: + # 错误次数+1 + self.err += 1 + + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,decode_data:数据解码失败,错误类型:{type(e).__name__}, 错误详细:{str(e).split("Stacktrace:")[0]}' + ) + + # 保存错误截图 + if enable_screenshot: + self.driver.save_screenshot( + f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png' + ) + + # 删除本次请求 + del self.driver.requests + + if self.err < max_retry_time: + # 刷新页面 + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} decode_data:刷新页面') + self.refresh_driver() + + # 检查注意事项和验证码 + if self.check_verification_code(): + # 重试 + self.get_data() + # 判断错误次数 + if self.err >= max_retry_time: + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,decode_data:重新尝试加载页面,这次指定需要重定向到首页' + ) + + # 重置错误计数 + self.err = 0 + + # 重新尝试加载页面,这次指定需要重定向到首页 + self.get_page(1) + else: + # 重置错误计数 + self.err = 0 + + # 若无错误,执行下一步 + self.check_data() + + def check_data(self): + try: + self.flightItineraryList = self.dedata["data"]["flightItineraryList"] + # 倒序遍历,删除转机航班 + for i in range(len(self.flightItineraryList) - 1, -1, -1): + if ( + self.flightItineraryList[i]["flightSegments"][0]["transferCount"] + != 0 + ): + self.flightItineraryList.pop(i) + if len(self.flightItineraryList) == 0 and direct_flight: + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 不存在直航航班:{self.city[0]}-{self.city[1]}') + # 重置错误计数 + self.err = 0 + return 0 + except Exception as e: + # 错误次数+1 + self.err += 1 + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 数据检查出错:不存在航班,错误类型:{type(e).__name__}, 错误详细:{str(e).split("Stacktrace:")[0]}' + ) + print(self.dedata) + if self.err < max_retry_time: + if 'searchErrorInfo' in self.dedata["data"]: + # 重置错误计数 + self.err = 0 + return 0 + else: + if "'needUserLogin': True" in str(self.dedata["data"]): + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,check_data:必须要登录才能查看数据,这次指定需要重定向到首页' + ) + # 重新尝试加载页面,这次指定需要重定向到首页 + self.login() + + # 刷新页面 + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} check_data:刷新页面') + self.refresh_driver() + # 检查注意事项和验证码 + if self.check_verification_code(): + # 重试 + self.get_data() + # 判断错误次数 + if self.err >= max_retry_time: + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,check_data:重新尝试加载页面,这次指定需要重定向到首页' + ) + + # 重置错误计数 + self.err = 0 + + # 重新尝试加载页面,这次指定需要重定向到首页 + self.get_page(1) + else: + # 重置错误计数 + self.err = 0 + self.proc_flightSegments() + self.proc_priceList() + self.mergedata() + + def proc_flightSegments(self): + self.flights = pd.DataFrame() + + for flightlist in self.flightItineraryList: + flightlist = flightlist["flightSegments"][0]["flightList"] + flightUnitList = dict(flightlist[0]) + + departureday = flightUnitList["departureDateTime"].split(" ")[0] + departuretime = flightUnitList["departureDateTime"].split(" ")[1] + + arrivalday = flightUnitList["arrivalDateTime"].split(" ")[0] + arrivaltime = flightUnitList["arrivalDateTime"].split(" ")[1] + + if del_info: + # 删除一些不重要的信息 + dellist = [ + "sequenceNo", + "marketAirlineCode", + "departureProvinceId", + "departureCityId", + "departureCityCode", + "departureAirportShortName", + "departureTerminal", + "arrivalProvinceId", + "arrivalCityId", + "arrivalCityCode", + "arrivalAirportShortName", + "arrivalTerminal", + "transferDuration", + "stopList", + "leakedVisaTagSwitch", + "trafficType", + "highLightPlaneNo", + "mealType", + "operateAirlineCode", + "arrivalDateTime", + "departureDateTime", + "operateFlightNo", + "operateAirlineName", + ] + for value in dellist: + try: + flightUnitList.pop(value) + except: + continue + + # 更新日期格式 + flightUnitList.update( + { + "departureday": departureday, + "departuretime": departuretime, + "arrivalday": arrivalday, + "arrivaltime": arrivaltime, + } + ) + + self.flights = pd.concat( + [ + self.flights, + pd.DataFrame.from_dict(flightUnitList, orient="index").T, + ], + ignore_index=True, + ) + + def proc_priceList(self): + self.prices = pd.DataFrame() + + for flightlist in self.flightItineraryList: + flightNo = flightlist["itineraryId"].split("_")[0] + priceList = flightlist["priceList"] + + # 经济舱,经济舱折扣 + economy, economy_tax, economy_total, economy_full = [], [], [], [] + economy_origin_price, economy_tax_price, economy_total_price, economy_full_price = "", "", "", "" + # 商务舱,商务舱折扣 + bussiness, bussiness_tax, bussiness_total, bussiness_full = [], [], [], [] + bussiness_origin_price, bussiness_tax_price, bussiness_total_price, bussiness_full_price = "", "", "", "" + + for price in priceList: + adultPrice = price["adultPrice"] + adultTax = price["adultTax"] + miseryIndex = price["miseryIndex"] + cabin = price["cabin"] + + # 经济舱 + if cabin == "Y": + economy.append(adultPrice) + economy_tax.append(adultTax) + economy_full.append(miseryIndex) + economy_total.append(adultPrice+adultTax) + # 商务舱 + elif cabin == "C": + bussiness.append(adultPrice) + bussiness_tax.append(adultTax) + bussiness_full.append(miseryIndex) + bussiness_total.append(adultPrice+adultTax) + + # 初始化变量 + economy_min_index = None + bussiness_min_index = None + + if economy_total != []: + economy_total_price = min(economy_total) + economy_min_index = economy_total.index(economy_total_price) + + if bussiness_total != []: + bussiness_total_price = min(bussiness_total) + bussiness_min_index = bussiness_total.index(bussiness_total_price) + + if economy_min_index is not None: + economy_origin_price = economy[economy_min_index] + economy_tax_price = economy_tax[economy_min_index] + economy_full_price = economy_full[economy_min_index] + + if bussiness_min_index is not None: + bussiness_origin_price = bussiness[bussiness_min_index] + bussiness_tax_price = bussiness_tax[bussiness_min_index] + bussiness_full_price = bussiness_full[bussiness_min_index] + + price_info = { + "flightNo": flightNo, + "economy_origin": economy_origin_price, + "economy_tax": economy_tax_price, + "economy_total": economy_total_price, + "economy_full": economy_full_price, + "bussiness_origin": bussiness_origin_price, + "bussiness_tax": bussiness_tax_price, + "bussiness_total": bussiness_total_price, + "bussiness_full": bussiness_full_price, + } + + # self.prices=self.prices.append(price_info,ignore_index=True) + self.prices = pd.concat( + [self.prices, pd.DataFrame(price_info, index=[0])], ignore_index=True + ) + + def mergedata(self): + try: + self.df = self.flights.merge(self.prices, on=["flightNo"]) + + self.df["dateGetTime"] = dt.now().strftime("%Y-%m-%d") + + if rename_col: + # 对pandas的columns进行重命名 + order = [ + "数据获取日期", + "航班号", + "航空公司", + "出发日期", + "出发时间", + "到达日期", + "到达时间", + "飞行时长", + "出发国家", + "出发城市", + "出发机场", + "出发机场三字码", + "到达国家", + "到达城市", + "到达机场", + "到达机场三字码", + "飞机型号", + "飞机尺寸", + "飞机型号三字码", + "到达准点率", + "停留次数", + ] + + origin = [ + "dateGetTime", + "flightNo", + "marketAirlineName", + "departureday", + "departuretime", + "arrivalday", + "arrivaltime", + "duration", + "departureCountryName", + "departureCityName", + "departureAirportName", + "departureAirportCode", + "arrivalCountryName", + "arrivalCityName", + "arrivalAirportName", + "arrivalAirportCode", + "aircraftName", + "aircraftSize", + "aircraftCode", + "arrivalPunctuality", + "stopCount", + ] + + columns = dict(zip(origin, order)) + + self.df = self.df.rename(columns=columns) + + if del_info: + self.df = self.df[order] + + files_dir = os.path.join( + os.getcwd(), self.date, dt.now().strftime("%Y-%m-%d") + ) + + if not os.path.exists(files_dir): + os.makedirs(files_dir) + + filename = os.path.join( + files_dir, f"{self.city[0]}-{self.city[1]}.csv") + + self.df.to_csv(filename, encoding="UTF-8", index=False) + + print(f'\n{time.strftime("%Y-%m-%d_%H-%M-%S")} 数据爬取完成 {filename}\n') + + return 0 + + except Exception as e: + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 合并数据失败 {str(e).split("Stacktrace:")[0]}') + + return 0 + + +if __name__ == "__main__": + + driver = init_driver() + + citys = gen_citys(crawal_citys) + + flight_dates = generate_flight_dates(crawal_days, begin_date, end_date, start_interval, days_interval) + + Flight_DataFetcher = DataFetcher(driver) + + for city in citys: + Flight_DataFetcher.city = city + + for flight_date in flight_dates: + Flight_DataFetcher.date = flight_date + + if os.path.exists(os.path.join(os.getcwd(), flight_date, dt.now().strftime("%Y-%m-%d"), f"{city[0]}-{city[1]}.csv")): + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 文件已存在:{os.path.join(os.getcwd(), flight_date, dt.now().strftime("%Y-%m-%d"), f"{city[0]}-{city[1]}.csv")}') + continue + elif ('http' not in Flight_DataFetcher.driver.current_url): + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 当前的URL是:{driver.current_url}') + # 初始化页面 + Flight_DataFetcher.get_page(1) + + else: + # 后续运行只需更换出发与目的地 + Flight_DataFetcher.change_city() + + time.sleep(crawal_interval) + + # 运行结束退出 + try: + driver = Flight_DataFetcher.driver + driver.quit() + except Exception as e: + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} An error occurred while quitting the driver: {e}') + + print(f'\n{time.strftime("%Y-%m-%d_%H-%M-%S")} 程序运行完成!!!!') diff --git a/DataMaintenance/ctrip_flights_scraper_V3.py b/DataMaintenance/ctrip_flights_scraper_V3.py new file mode 100644 index 0000000..a0a45f5 --- /dev/null +++ b/DataMaintenance/ctrip_flights_scraper_V3.py @@ -0,0 +1,1397 @@ +import magic +import io +import os +import gzip +import time +import json +import requests +import pandas as pd +from seleniumwire import webdriver +from datetime import datetime as dt, timedelta +from selenium.webdriver.common.by import By +from selenium.webdriver.common.keys import Keys +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait +import threading + +# 爬取的城市 +crawal_citys = ["天津", "郑州"] + +# 爬取日期范围:起始日期。格式'2023-12-01' +begin_date = '2024-11-12' + +# 爬取日期范围:结束日期。格式'2023-12-31' +end_date = '2024-11-20' + +# 爬取T+N,即N天后 +start_interval = 1 + +# 爬取的日期 +crawal_days = 60 + +# 设置各城市爬取的时间间隔(单位:秒) +crawal_interval = 5 + +# 日期间隔 +days_interval = 1 + +# 设置页面加载的最长等待时间(单位:秒) +max_wait_time = 10 + +# 最大错误重试次数 +max_retry_time = 5 + +# 是否只抓取直飞信息(True: 只抓取直飞,False: 抓取所有航班) +direct_flight = True + +# 是否抓取航班舒适信息(True: 抓取,False: 不抓取) +comft_flight = True + +# 是否删除不重要的信息 +del_info = False + +# 是否重命名DataFrame的列名 +rename_col = True + +# 调试截图 +enable_screenshot = False + +# 允许登录(可能必须要登录才能获取数据) +login_allowed = True + +# 账号 +accounts = ['',''] + +# 密码 +passwords = ['',''] + +#利用stealth.min.js隐藏selenium特征 +stealth_js_path='./stealth.min.js' + +# 定义下载stealth.min.js的函数 +def download_stealth_js(file_path, url='https://raw.githubusercontent.com/requireCool/stealth.min.js/main/stealth.min.js'): + if not os.path.exists(file_path): + print(f"{file_path} not found, downloading...") + response = requests.get(url) + response.raise_for_status() # 确保请求成功 + with open(file_path, 'w') as file: + file.write(response.text) + print(f"{file_path} downloaded.") + else: + print(f"{file_path} already exists, no need to download.") + +def init_driver(): + # options = webdriver.ChromeOptions() # 创建一个配置对象 + options = webdriver.EdgeOptions() # 创建一个配置对象 + options.add_argument("--incognito") # 隐身模式(无痕模式) + # options.add_argument('--headless') # 启用无头模式 + options.add_argument("--no-sandbox") + options.add_argument("--disable-dev-shm-usage") + options.add_argument("--disable-blink-features") + options.add_argument("--disable-blink-features=AutomationControlled") + options.add_argument("--disable-extensions") + options.add_argument("--pageLoadStrategy=eager") + options.add_argument("--disable-gpu") + options.add_argument("--disable-software-rasterizer") + options.add_argument("--disable-dev-shm-usage") + options.add_argument("--ignore-certificate-errors") + options.add_argument("--ignore-certificate-errors-spki-list") + options.add_argument("--ignore-ssl-errors") + options.add_experimental_option("excludeSwitches", ["enable-automation"]) # 不显示正在受自动化软件控制的提示 + + # 如果需要指定Chrome驱动的路径,取消下面这行的注释并设置正确的路径 + # chromedriver_path = '/path/to/chromedriver' + + driver = webdriver.Chrome(options=options) # 改为Chrome,如果需要指定路径,可以加上executable_path参数 + + try: + download_stealth_js(stealth_js_path) + # 读取并注入stealth.min.js + with open(stealth_js_path, 'r') as file: + stealth_js = file.read() + driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source": stealth_js}) + except Exception as e: + print(e) + + driver.maximize_window() + + return driver + + +def gen_citys(crawal_citys): + # 生成城市组合表 + citys = [] + ytic = list(reversed(crawal_citys)) + for m in crawal_citys: + for n in ytic: + if m == n: + continue + else: + citys.append([m, n]) + return citys + + +def generate_flight_dates(n, begin_date, end_date, start_interval, days_interval): + flight_dates = [] + + if begin_date: + begin_date = dt.strptime(begin_date, "%Y-%m-%d") + elif start_interval: + begin_date = dt.now() + timedelta(days=start_interval) + + for i in range(0, n, days_interval): + flight_date = begin_date + timedelta(days=i) + + flight_dates.append(flight_date.strftime("%Y-%m-%d")) + + # 如果有结束日期,确保生成的日期不超过结束日期 + if end_date: + end_date = dt.strptime(end_date, "%Y-%m-%d") + flight_dates = [date for date in flight_dates if dt.strptime(date, "%Y-%m-%d") <= end_date] + # 继续生成日期直到达到或超过结束日期 + while dt.strptime(flight_dates[-1], "%Y-%m-%d") < end_date: + next_date = dt.strptime(flight_dates[-1], "%Y-%m-%d") + timedelta(days=days_interval) + if next_date <= end_date: + flight_dates.append(next_date.strftime("%Y-%m-%d")) + else: + break + + return flight_dates + + +# element_to_be_clickable 函数来替代 expected_conditions.element_to_be_clickable 或 expected_conditions.visibility_of_element_located +def element_to_be_clickable(element): + def check_clickable(driver): + try: + if element.is_enabled() and element.is_displayed(): + return element # 当条件满足时,返回元素本身 + else: + return False + except: + return False + + return check_clickable + + +class DataFetcher(object): + def __init__(self, driver): + self.driver = driver + self.date = None + self.city = None + self.err = 0 # 错误重试次数 + self.switch_acc = 0 #切换账户 + self.comfort_data = None # 航班舒适度信息 + + def refresh_driver(self): + try: + self.driver.refresh() + except Exception as e: + # 错误次数+1 + self.err += 1 + + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} refresh_driver:刷新页面失败,错误类型:{type(e).__name__}, 详细错误信息:{str(e).split("Stacktrace:")[0]}' + ) + + # 保存错误截图 + if enable_screenshot: + self.driver.save_screenshot( + f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png' + ) + if self.err < max_retry_time: + # 刷新页面 + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} refresh_driver:刷新页面') + self.refresh_driver() + + # 判断错误次数 + if self.err >= max_retry_time: + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,refresh_driver:不继续重试' + ) + + def remove_btn(self): + try: + #WebDriverWait(self.driver, max_wait_time).until(lambda d: d.execute_script('return typeof jQuery !== "undefined"')) + # 移除提醒 + self.driver.execute_script("document.querySelectorAll('.notice-box').forEach(element => element.remove());") + # 移除在线客服 + self.driver.execute_script("document.querySelectorAll('.shortcut, .shortcut-link').forEach(element => element.remove());") + # 移除分享链接 + self.driver.execute_script("document.querySelectorAll('.shareline').forEach(element => element.remove());") + ''' + # 使用JavaScript除有的
标签 + self.driver.execute_script(""" + var elements = document.getElementsByTagName('dl'); + while(elements.length > 0){ + elements[0].parentNode.removeChild(elements[0]); + } + """) + ''' + except Exception as e: + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} remove_btn:提醒移除失败,错误类型:{type(e).__name__}, 详细错误信息:{str(e).split("Stacktrace:")[0]}' + ) + + def check_verification_code(self): + try: + # 检查是否有验证码元素,如果有,则需要人工处理 + if (len(self.driver.find_elements(By.ID, "verification-code")) + len(self.driver.find_elements(By.CLASS_NAME, "alert-title"))): + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} check_verification_code:验证码被触发verification-code/alert-title,请手动完成验证。') + + user_input_completed = threading.Event() + # 等待用户手动处理验证码 + def wait_for_input(): + input("请完成验证码,然后按回车键继续...") + user_input_completed.set() + + input_thread = threading.Thread(target=wait_for_input) + input_thread.start() + + # 设置手动验证超时时间 + timeout_seconds = crawal_interval * 100 + + input_thread.join(timeout=timeout_seconds) + + if user_input_completed.is_set(): + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} check_verification_code:验证码处理完成,继续执行。') + + # 等待页面加载完成 + WebDriverWait(self.driver, max_wait_time).until( + EC.presence_of_element_located((By.CLASS_NAME, "pc_home-jipiao")) + ) + + # 移除注意事项 + self.remove_btn() + return True + else: + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} check_verification_code: 手动验证超时 {timeout_seconds} 秒') + self.driver.quit() + self.driver = init_driver() + self.err = 0 + self.switch_acc += 1 + self.get_page(1) + return False + else: + # 移除注意事项 + self.remove_btn() + # 如果没有找到验证码元素,则说明页面加载成功,没有触发验证码 + return True + except Exception as e: + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} check_verification_code:未知错误,错误类型:{type(e).__name__}, 详细错误信息:{str(e).split("Stacktrace:")[0]}' + ) + return False + + def login(self): + if login_allowed: + + account = accounts[self.switch_acc % len(accounts)] + password = passwords[self.switch_acc % len(passwords)] + + try: + if len(self.driver.find_elements(By.CLASS_NAME, "lg_loginbox_modal")) == 0: + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login:未弹出登录界面') + WebDriverWait(self.driver, max_wait_time).until(EC.presence_of_element_located((By.CLASS_NAME, "tl_nfes_home_header_login_wrapper_siwkn"))) + # 点击飞机图标,返回主界面 + ele = WebDriverWait(self.driver, max_wait_time).until(element_to_be_clickable(self.driver.find_element(By.CLASS_NAME, "tl_nfes_home_header_login_wrapper_siwkn"))) + ele.click() + #等待页面加 + WebDriverWait(self.driver, max_wait_time).until(EC.presence_of_element_located((By.CLASS_NAME, "lg_loginwrap"))) + else: + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login:已经弹出登录界面') + + ele = WebDriverWait(self.driver, max_wait_time).until(element_to_be_clickable(self.driver.find_elements(By.CLASS_NAME, "r_input.bbz-js-iconable-input")[0])) + ele.send_keys(account) + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login:输入账户成功') + + ele = WebDriverWait(self.driver, max_wait_time).until(element_to_be_clickable(self.driver.find_element(By.CSS_SELECTOR, "div[data-testid='accountPanel'] input[data-testid='passwordInput']"))) + ele.send_keys(password) + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login:输入密码成功') + + ele = WebDriverWait(self.driver, max_wait_time).until(element_to_be_clickable(self.driver.find_element(By.CSS_SELECTOR, '[for="checkboxAgreementInput"]'))) + ele.click() + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login:勾选同意成功') + + ele = WebDriverWait(self.driver, max_wait_time).until(element_to_be_clickable(self.driver.find_elements(By.CLASS_NAME, "form_btn.form_btn--block")[0])) + ele.click() + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login:登录成功') + # 保存登录截图 + if enable_screenshot: + self.driver.save_screenshot( + f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png' + ) + time.sleep(crawal_interval*3) + except Exception as e: + # 错误次数+1 + self.err += 1 + # 用f字符串格式化错误类型和错误信息,提供更多的调试信息 + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login:页面加载或元素操作失败,错误类型:{type(e).__name__}, 详细误信息:{str(e).split("Stacktrace:")[0]}' + ) + + # 保存错误截图 + if enable_screenshot: + self.driver.save_screenshot( + f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png' + ) + + if self.err < max_retry_time: + # 刷新页面 + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} login:刷新页面') + self.refresh_driver() + # 检查注意事项和验证码 + if self.check_verification_code(): + # 重试 + self.login() + # 判断错误次数 + if self.err >= max_retry_time: + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,login:重新尝试加载页面,这次指定需要重定向到首页' + ) + + def get_page(self, reset_to_homepage=0): + next_stage_flag = False + try: + if reset_to_homepage == 1: + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 尝试前往首页...') + start_time = time.time() + # 前往首页 + self.driver.get( + "https://flights.ctrip.com/online/channel/domestic") + end_time = time.time() + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 前往首页耗时: {end_time - start_time:.2f} 秒') + + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 当前页面 URL: {self.driver.current_url}') + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 当前页面标题: {self.driver.title}') + + # 检查注意事项和验证码 + if self.check_verification_code(): + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 等待页面加载完成...') + WebDriverWait(self.driver, max_wait_time).until( + EC.presence_of_element_located( + (By.CLASS_NAME, "pc_home-jipiao")) + ) + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 页面加载完成') + + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 尝试点击飞机图标...') + # 点击飞机图标,返回主界面 + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_element( + By.CLASS_NAME, "pc_home-jipiao") + ) + ) + ele.click() + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 成功点击飞机图标') + + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 尝试选择单程...') + # 单程 + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "radio-label")[0] + ) + ) + ele.click() + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 成功选择单程') + + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 尝试点击搜索按钮...') + # 搜索 + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_element(By.CLASS_NAME, "search-btn") + ) + ) + ele.click() + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 成功点击搜索按钮') + + next_stage_flag = True + except Exception as e: + # 用f字符串格式化错误类型和错误信息,提供更多的调试信息 + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} get_page:页面加载或元素操作失败,错误类型:{type(e).__name__}, 详细错误信息:{str(e).split("Stacktrace:")[0]}' + ) + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 当前页面 URL: {self.driver.current_url}') + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 当前页面标题: {self.driver.title}') + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 当前页面源代码: {self.driver.page_source[:500]}...') # 只打印前500个字符 + + # 保存错误截图 + if enable_screenshot: + screenshot_path = f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png' + self.driver.save_screenshot(screenshot_path) + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误截图已保存: {screenshot_path}') + + # 重新尝试加载页面,这次指定需要重定向到首页 + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 重新尝试加载页面,这次指定需要重定向到首页') + self.get_page(1) + else: + if next_stage_flag: + # 继续下一步 + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 页面加载成功,继续下一步') + self.change_city() + else: + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 页面加载成功,但未能完成所有操作') + + def change_city(self): + next_stage_flag = False + try: + # 等待页面完成加载 + WebDriverWait(self.driver, max_wait_time).until( + EC.presence_of_element_located( + (By.CLASS_NAME, "form-input-v3")) + ) + + # 检查注意事项和验证码 + if self.check_verification_code(): + # 若出发地与目标值不符,则更改出发地 + while self.city[0] not in self.driver.find_elements( + By.CLASS_NAME, "form-input-v3" + )[0].get_attribute("value"): + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "form-input-v3")[0] + ) + ) + ele.click() + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "form-input-v3")[0] + ) + ) + ele.send_keys(Keys.CONTROL + "a") + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "form-input-v3")[0] + ) + ) + ele.send_keys(self.city[0]) + + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city:更换城市【0】-{self.driver.find_elements(By.CLASS_NAME,"form-input-v3")[0].get_attribute("value")}' + ) + + # 若目的地与目标值不符,则更改目的地 + while self.city[1] not in self.driver.find_elements( + By.CLASS_NAME, "form-input-v3" + )[1].get_attribute("value"): + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "form-input-v3")[1] + ) + ) + ele.click() + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "form-input-v3")[1] + ) + ) + ele.send_keys(Keys.CONTROL + "a") + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "form-input-v3")[1] + ) + ) + ele.send_keys(self.city[1]) + + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city:更换城市【1】-{self.driver.find_elements(By.CLASS_NAME,"form-input-v3")[1].get_attribute("value")}' + ) + + while ( + self.driver.find_elements(By.CSS_SELECTOR, "[aria-label=请选择日期]")[ + 0 + ].get_attribute("value") + != self.date + ): + # 点击日期选择 + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_element( + By.CLASS_NAME, "modifyDate.depart-date" + ) + ) + ) + ele.click() + + if int( + self.driver.find_elements( + By.CLASS_NAME, "date-picker.date-picker-block" + )[1] + .find_element(By.CLASS_NAME, "year") + .text[:-1] + ) < int(self.date[:4]): + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, + "in-date-picker.icon.next-ico.iconf-right", + )[1] + ) + ) + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city:更换日期{int(self.driver.find_elements(By.CLASS_NAME, "date-picker.date-picker-block")[1].find_element(By.CLASS_NAME, "year").text[:-1])}小于 {int(self.date[:4])} 向右点击' + ) + ele.click() + + if int( + self.driver.find_elements( + By.CLASS_NAME, "date-picker.date-picker-block" + )[0] + .find_element(By.CLASS_NAME, "year") + .text[:-1] + ) > int(self.date[:4]): + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, + "in-date-picker.icon.prev-ico.iconf-left", + )[0] + ) + ) + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city:更换日期{int(self.driver.find_elements(By.CLASS_NAME, "date-picker.date-picker-block")[0].find_element(By.CLASS_NAME, "year").text[:-1])}大于 {int(self.date[:4])} 向左点击' + ) + ele.click() + + if int( + self.driver.find_elements( + By.CLASS_NAME, "date-picker.date-picker-block" + )[0] + .find_element(By.CLASS_NAME, "year") + .text[:-1] + ) == int(self.date[:4]): + if int( + self.driver.find_elements( + By.CLASS_NAME, "date-picker.date-picker-block" + )[0] + .find_element(By.CLASS_NAME, "month") + .text[:-1] + ) > int(self.date[5:7]): + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, + "in-date-picker.icon.prev-ico.iconf-left", + )[0] + ) + ) + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city:更换日期{int(self.driver.find_elements(By.CLASS_NAME, "date-picker.date-picker-block")[0].find_element(By.CLASS_NAME, "month").text[:-1])}大于 {int(self.date[5:7])} 向左点击' + ) + ele.click() + + if int( + self.driver.find_elements( + By.CLASS_NAME, "date-picker.date-picker-block" + )[1] + .find_element(By.CLASS_NAME, "year") + .text[:-1] + ) == int(self.date[:4]): + if int( + self.driver.find_elements( + By.CLASS_NAME, "date-picker.date-picker-block" + )[1] + .find_element(By.CLASS_NAME, "month") + .text[:-1] + ) < int(self.date[5:7]): + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, + "in-date-picker.icon.next-ico.iconf-right", + )[1] + ) + ) + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city:更换日期{int(self.driver.find_elements(By.CLASS_NAME, "date-picker.date-picker-block")[1].find_element(By.CLASS_NAME, "month").text[:-1])}小于 {int(self.date[5:7])} 向右点击' + ) + ele.click() + + for m in self.driver.find_elements( + By.CLASS_NAME, "date-picker.date-picker-block" + ): + if int(m.find_element(By.CLASS_NAME, "year").text[:-1]) != int( + self.date[:4] + ): + continue + + if int(m.find_element(By.CLASS_NAME, "month").text[:-1]) != int( + self.date[5:7] + ): + continue + + for d in m.find_elements(By.CLASS_NAME, "date-d"): + if int(d.text) == int(self.date[-2:]): + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable(d) + ) + ele.click() + break + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city:更换日期-{self.driver.find_elements(By.CSS_SELECTOR,"[aria-label=请选择日期]")[0].get_attribute("value")}' + ) + + while "(" not in self.driver.find_elements( + By.CLASS_NAME, "form-input-v3" + )[0].get_attribute("value"): + # Enter搜索 + # ele=WebDriverWait(self.driver, max_wait_time).until(element_to_be_clickable(its[1])) + # ele.send_keys(Keys.ENTER) + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "form-input-v3")[0] + ) + ) + ele.click() + + # 通过低价提醒按钮实现enter键换页 + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "low-price-remind" + )[0] + ) + ) + ele.click() + + while "(" not in self.driver.find_elements( + By.CLASS_NAME, "form-input-v3" + )[1].get_attribute("value"): + # Enter搜索 + # ele=WebDriverWait(self.driver, max_wait_time).until(element_to_be_clickable(its[1])) + # ele.send_keys(Keys.ENTER) + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "form-input-v3")[1] + ) + ) + ele.click() + + # 通过低价提醒按钮实现enter键换页 + ele = WebDriverWait(self.driver, max_wait_time).until( + element_to_be_clickable( + self.driver.find_elements( + By.CLASS_NAME, "low-price-remind" + )[0] + ) + ) + ele.click() + + next_stage_flag = True + + except Exception as e: + # 错误次数+1 + self.err += 1 + + # 保存错误截图 + if enable_screenshot: + self.driver.save_screenshot( + f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png' + ) + + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,change_city:更换市和日期失败,错误类型:{type(e).__name__}, 详细错误信息:{str(e).split("Stacktrace:")[0]}' + ) + + # 检查注意事项和验证码 + if self.check_verification_code(): + if self.err < max_retry_time: + if len(self.driver.find_elements(By.CLASS_NAME, "lg_loginbox_modal")): + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city:检测到登录弹窗,需要登录' + ) + self.login() + # 重试 + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city:重试') + self.change_city() + # 判断错误次数 + if self.err >= max_retry_time: + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,change_city:重新尝试加载页面,这次指定需要重定向到首页' + ) + + # 删除本次请求 + del self.driver.requests + + # 置错计数 + self.err = 0 + + # 重新尝试加载页面,这次指定需要重定向到首页 + self.get_page(1) + else: + if next_stage_flag: + # 若无错误,执行下一步 + self.get_data() + + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} change_city:成功更换城市和日期,当前路线为:{self.city[0]}-{self.city[1]}') + + def get_data(self): + try: + # 等待响应加载完成 + self.predata = self.driver.wait_for_request( + "/international/search/api/search/batchSearch?.*", timeout=max_wait_time + ) + + if comft_flight: + # 捕获 getFlightComfort 数据 + self.comfort_data = self.capture_flight_comfort_data() + + rb = dict(json.loads(self.predata.body).get("flightSegments")[0]) + + except Exception as e: + # 错误次数+1 + self.err += 1 + + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,get_data:获取数据超时,错误类型:{type(e).__name__}, 错误详细:{str(e).split("Stacktrace:")[0]}' + ) + + # 保存错误截图 + if enable_screenshot: + self.driver.save_screenshot( + f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png' + ) + + # 删除本次请求 + del self.driver.requests + + if self.err < max_retry_time: + # 刷新页面 + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} get_data:刷新页面') + self.refresh_driver() + + # 检查注意事项和验证码 + if self.check_verification_code(): + # 重试 + self.get_data() + + # 判断错误次数 + if self.err >= max_retry_time: + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,get_data:重新尝试加载页面,这次指定需要重定向到首页' + ) + + # 重置错误计数 + self.err = 0 + # 重新尝试加载页面,这次指定需要重定向到首页 + self.get_page(1) + else: + # 删除本次请求 + del self.driver.requests + + # 检查数据获取正确性 + if ( + rb["departureCityName"] == self.city[0] + and rb["arrivalCityName"] == self.city[1] + and rb["departureDate"] == self.date + ): + print(f"get_data:城市匹配成功:出发地-{self.city[0]},目的地-{self.city[1]}") + + # 重置错误计数 + self.err = 0 + + # 若无错误,执行下一步 + self.decode_data() + else: + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,get_data:刷新页面') + # 错误次数+1 + self.err += 1 + + # 保存错误截图 + if enable_screenshot: + self.driver.save_screenshot( + f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png' + ) + + # 重新更换城市 + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} get_data:重新更换城市:{rb["departureCityName"]}-{rb["arrivalCityName"]}-{rb["departureDate"]}' + ) + + # 检查注意事项和验证码 + if self.check_verification_code(): + # 重试 + self.change_city() + + def decode_data(self): + try: + # 使用python-magic库检查MIME类型 + mime = magic.Magic() + file_type = mime.from_buffer(self.predata.response.body) + + buf = io.BytesIO(self.predata.response.body) + + if "gzip" in file_type: + gf = gzip.GzipFile(fileobj=buf) + self.dedata = gf.read().decode("UTF-8") + elif "JSON data" in file_type: + print(buf.read().decode("UTF-8")) + else: + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 未知的压缩格式:{file_type}') + + self.dedata = json.loads(self.dedata) + + except Exception as e: + # 错误次数+1 + self.err += 1 + + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,decode_data:数据解码失败,错误类型:{type(e).__name__}, 错误详细:{str(e).split("Stacktrace:")[0]}' + ) + + # 保存错误截图 + if enable_screenshot: + self.driver.save_screenshot( + f'screenshot/screenshot_{time.strftime("%Y-%m-%d_%H-%M-%S")}.png' + ) + + # 删除本次请求 + del self.driver.requests + + if self.err < max_retry_time: + # 刷新页面 + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} decode_data:刷新页面') + self.refresh_driver() + + # 检查注意事项和验证码 + if self.check_verification_code(): + # 重试 + self.get_data() + # 判错误次数 + if self.err >= max_retry_time: + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,decode_data:重新尝试加载页面,这次指定需要重定向到首页' + ) + + # 重置错误计数 + self.err = 0 + + # 重新尝试加载页面,这次指定需要重定向到首页 + self.get_page(1) + else: + # 重置错误计数 + self.err = 0 + + # 若无错误,执行下一步 + self.check_data() + + def check_data(self): + try: + self.flightItineraryList = self.dedata["data"]["flightItineraryList"] + # 倒序遍历,删除转机航班 + for i in range(len(self.flightItineraryList) - 1, -1, -1): + if ( + self.flightItineraryList[i]["flightSegments"][0]["transferCount"] + != 0 + ): + self.flightItineraryList.pop(i) + if len(self.flightItineraryList) == 0 and direct_flight: + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 不存在直航航班:{self.city[0]}-{self.city[1]}') + # 重置错误计数 + self.err = 0 + return 0 + except Exception as e: + # 错误次数+1 + self.err += 1 + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 数据检查出错:不存在航班,错误类型:{type(e).__name__}, 错误详细:{str(e).split("Stacktrace:")[0]}' + ) + print(self.dedata) + if self.err < max_retry_time: + if 'searchErrorInfo' in self.dedata["data"]: + # 重置错误计数 + self.err = 0 + return 0 + else: + if "'needUserLogin': True" in str(self.dedata["data"]): + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,check_data:必须要登录才能查看数据,这次指定需要重定向到首页' + ) + # 重新尝试加载页面,这次指定需要重定向到首页 + self.login() + + # 刷新页面 + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} check_data:刷新页面') + self.refresh_driver() + # 检查注意事项和验证码 + if self.check_verification_code(): + # 重试 + self.get_data() + # 判断错误次数 + if self.err >= max_retry_time: + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 错误次数【{self.err}-{max_retry_time}】,check_data:重新尝试加载页面,这次指定需要重定向到首页' + ) + + # 重置错误计数 + self.err = 0 + + # 重新尝试加载页面,这次指定需要重定向到首页 + self.get_page(1) + else: + # 重置错误计数 + self.err = 0 + self.proc_flightSegments() + self.proc_priceList() + self.mergedata() + + def proc_flightSegments(self): + self.flights = pd.DataFrame() + + for flightlist in self.flightItineraryList: + flightlist = flightlist["flightSegments"][0]["flightList"] + flightUnitList = dict(flightlist[0]) + + departureday = flightUnitList["departureDateTime"].split(" ")[0] + departuretime = flightUnitList["departureDateTime"].split(" ")[1] + + arrivalday = flightUnitList["arrivalDateTime"].split(" ")[0] + arrivaltime = flightUnitList["arrivalDateTime"].split(" ")[1] + + # 处理 stopList + if 'stopList' in flightUnitList and flightUnitList['stopList']: + stop_info = [] + for stop in flightUnitList['stopList']: + stop_info.append(f"{stop['cityName']}({stop['airportName']}, {stop['duration']}分钟)") + flightUnitList['stopInfo'] = ' -> '.join(stop_info) + else: + flightUnitList['stopInfo'] = '无中转' + + if del_info: + # 删除一些不重要的信息 + dellist = [ + "sequenceNo", + "marketAirlineCode", + "departureProvinceId", + "departureCityId", + "departureCityCode", + "departureAirportShortName", + "departureTerminal", + "arrivalProvinceId", + "arrivalCityId", + "arrivalCityCode", + "arrivalAirportShortName", + "arrivalTerminal", + "transferDuration", + "stopList", + "leakedVisaTagSwitch", + "trafficType", + "highLightPlaneNo", + "mealType", + "operateAirlineCode", + "arrivalDateTime", + "departureDateTime", + "operateFlightNo", + "operateAirlineName", + ] + for value in dellist: + flightUnitList.pop(value, None) + + # 更新日期格式 + flightUnitList.update( + { + "departureday": departureday, + "departuretime": departuretime, + "arrivalday": arrivalday, + "arrivaltime": arrivaltime, + } + ) + + self.flights = pd.concat( + [ + self.flights, + pd.DataFrame.from_dict(flightUnitList, orient="index").T, + ], + ignore_index=True, + ) + + def proc_priceList(self): + self.prices = pd.DataFrame() + + for flightlist in self.flightItineraryList: + flightNo = flightlist["itineraryId"].split("_")[0] + priceList = flightlist["priceList"] + + # 经济舱,经济舱折扣 + economy, economy_tax, economy_total, economy_full = [], [], [], [] + economy_origin_price, economy_tax_price, economy_total_price, economy_full_price = "", "", "", "" + # 商务舱,商务舱折扣 + bussiness, bussiness_tax, bussiness_total, bussiness_full = [], [], [], [] + bussiness_origin_price, bussiness_tax_price, bussiness_total_price, bussiness_full_price = "", "", "", "" + + for price in priceList: + # print("Price dictionary keys:", price.keys()) + # print("Full price dictionary:", json.dumps(price, indent=2)) + + adultPrice = price["adultPrice"] + childPrice = price.get("childPrice", adultPrice) # 如果没有childPrice,使用adultPrice + freeOilFeeAndTax = price["freeOilFeeAndTax"] + sortPrice = price.get("sortPrice", adultPrice) # 如果没有sortPrice,使用adultPrice + + # 估算税费(如果需要的话) + estimatedTax = sortPrice - adultPrice if not freeOilFeeAndTax else 0 + adultTax = price.get("adultTax", estimatedTax) # 如果没有adultTax,使用estimatedTax + + miseryIndex = price["miseryIndex"] + cabin = price["cabin"] + + # 经济舱 + if cabin == "Y": + economy.append(adultPrice) + economy_tax.append(adultTax) + economy_full.append(miseryIndex) + economy_total.append(adultPrice+adultTax) + # 商务舱 + elif cabin == "C": + bussiness.append(adultPrice) + bussiness_tax.append(adultTax) + bussiness_full.append(miseryIndex) + bussiness_total.append(adultPrice+adultTax) + + # 初始化变量 + economy_min_index = None + bussiness_min_index = None + + if economy_total != []: + economy_total_price = min(economy_total) + economy_min_index = economy_total.index(economy_total_price) + + if bussiness_total != []: + bussiness_total_price = min(bussiness_total) + bussiness_min_index = bussiness_total.index(bussiness_total_price) + + if economy_min_index is not None: + economy_origin_price = economy[economy_min_index] + economy_tax_price = economy_tax[economy_min_index] + economy_full_price = economy_full[economy_min_index] + + if bussiness_min_index is not None: + bussiness_origin_price = bussiness[bussiness_min_index] + bussiness_tax_price = bussiness_tax[bussiness_min_index] + bussiness_full_price = bussiness_full[bussiness_min_index] + + price_info = { + "flightNo": flightNo, + "economy_origin": economy_origin_price, + "economy_tax": economy_tax_price, + "economy_total": economy_total_price, + "economy_full": economy_full_price, + "bussiness_origin": bussiness_origin_price, + "bussiness_tax": bussiness_tax_price, + "bussiness_total": bussiness_total_price, + "bussiness_full": bussiness_full_price, + } + + # self.prices=self.prices.append(price_info,ignore_index=True) + self.prices = pd.concat( + [self.prices, pd.DataFrame(price_info, index=[0])], ignore_index=True + ) + + def mergedata(self): + try: + self.df = self.flights.merge(self.prices, on=["flightNo"]) + print(f"合并后的航班数据形状: {self.df.shape}") + print(f"合并后的航班数据列: {self.df.columns}") + + self.df["dateGetTime"] = dt.now().strftime("%Y-%m-%d") + + print(f"获取到的舒适度数据: {self.comfort_data}") + + # 数据的列名映射 + columns = { + "dateGetTime": "数据获取日期", + "flightNo": "航班号", + "marketAirlineName": "航空公司", + "departureday": "出发日期", + "departuretime": "出发时间", + "arrivalday": "到达日期", + "arrivaltime": "到达时间", + "duration": "飞行时长", + "departureCountryName": "出发国家", + "departureCityName": "出发城市", + "departureAirportName": "出发机场", + "departureAirportCode": "出发机场三字码", + "arrivalCountryName": "到达国家", + "arrivalCityName": "到达城市", + "arrivalAirportName": "到达机场", + "arrivalAirportCode": "到达机场三字码", + "aircraftName": "飞机型号", + "aircraftSize": "飞机尺寸", + "aircraftCode": "飞机型号三字码", + "arrivalPunctuality": "到达准点率", + "stopCount": "停留次数", + "stopInfo": "中转信息" + } + + # 定义舒适度数据的列名映射 + comfort_columns = { + 'departure_delay_time': '出发延误时间', + 'departure_bridge_rate': '出发廊桥率', + 'arrival_delay_time': '到达延误时间', + 'plane_type': '飞机类型', + 'plane_width': '飞机宽度', + 'plane_age': '飞机机龄', + 'Y_has_meal': '经济舱是否有餐食', + 'Y_seat_tilt': '经济舱座椅倾斜度', + 'Y_seat_width': '经济舱座椅宽度', + 'Y_seat_pitch': '经济舱座椅间距', + 'Y_meal_msg': '经济舱餐食信息', + 'Y_power': '经济舱电源', + 'C_has_meal': '商务舱是否有餐食', + 'C_seat_tilt': '商务舱座椅倾斜度', + 'C_seat_width': '商务舱座椅宽度', + 'C_seat_pitch': '商务舱座椅间距', + 'C_meal_msg': '商务舱餐食信息', + 'C_power': '商务舱电源', + } + + if self.comfort_data: + comfort_df = pd.DataFrame.from_dict(self.comfort_data, orient='index') + comfort_df.reset_index(inplace=True) + comfort_df.rename(columns={'index': 'flight_no'}, inplace=True) + + print(f"舒适度数据形状: {comfort_df.shape}") + print(f"舒适度数据列: {comfort_df.columns}") + print(f"舒适度数据前几行: \n{comfort_df.head()}") + + # 检查 operateFlightNo 列是否存在 + if 'operateFlightNo' in self.df.columns: + print(f"合并前的 operateFlightNo 唯一值: {self.df['operateFlightNo'].unique()}") + # 创建一个临时列来存储用于匹配的航班号 + self.df['match_flight_no'] = self.df['operateFlightNo'].fillna(self.df['flightNo']) + else: + print("警告: operateFlightNo 列不存在于数据中,将使用 flightNo 进行匹配") + self.df['match_flight_no'] = self.df['flightNo'] + + print(f"现有的列: {self.df.columns}") + print(f"合并前的 flight_no 唯一值: {comfort_df['flight_no'].unique()}") + + # 使用 left join 来合并数据 + self.df = self.df.merge(comfort_df, left_on='match_flight_no', right_on='flight_no', how='left') + + print(f"合并后的数据形状: {self.df.shape}") + print(f"合并后的数据列: {self.df.columns}") + + # 删除临时列和多余的flight_no列 + self.df.drop(['match_flight_no', 'flight_no'], axis=1, inplace=True, errors='ignore') + else: + # 如果没有舒适度数据,手动添加空列,保证数据结构一致性 + for col in comfort_columns.keys(): + self.df[col] = None # 添加缺失的舒适度列并填充为空值 + + if rename_col: + order = list(columns.values()) + # 对pandas的columns进行重命名 + columns.update(comfort_columns, errors='ignore') + + self.df = self.df.rename(columns=columns) + + if del_info: + # 使用 reindex 确保所有列都存在于最终的 DataFrame 中,不存在的列会被自动忽略 + self.df = self.df.reindex(columns=order, fill_value=None) + + files_dir = os.path.join( + os.getcwd(), self.date, dt.now().strftime("%Y-%m-%d") + ) + + if not os.path.exists(files_dir): + os.makedirs(files_dir) + + filename = os.path.join( + files_dir, f"{self.city[0]}-{self.city[1]}.csv") + + self.df.to_csv(filename, encoding="UTF-8", index=False) + + print(f'\n{time.strftime("%Y-%m-%d_%H-%M-%S")} 数据爬取完成 {filename}\n') + + return 0 + + except Exception as e: + print(f"合并数据失败 {str(e)}") + print(f"错误类型: {type(e).__name__}") + print(f"错误详情: {str(e)}") + import traceback + print(f"错误堆栈: {traceback.format_exc()}") + return 0 + + def capture_flight_comfort_data(self): + try: + # 滚动页面到底部以加载所有内容 + last_height = self.driver.execute_script("return document.body.scrollHeight") + while True: + # 分步滚动页面 + for i in range(10): # 将页面分成10步滚动 + scroll_height = last_height * (i + 1) / 3 + self.driver.execute_script(f"window.scrollTo(0, {scroll_height});") + time.sleep(0.5) # 每一小步等待0.5秒 + + # 等待页面加载 + time.sleep(3) # 滚动到底部后多等待3秒 + + # 计算新的滚动高度并与最后的滚动高度进行比较 + new_height = self.driver.execute_script("return document.body.scrollHeight") + if new_height == last_height: + break + last_height = new_height + + comfort_requests = self.driver.requests + comfort_data = {} + batch_comfort_found = False + getFlightComfort_requests_count = 0 + total_requests_count = len(comfort_requests) + + print(f"\n{time.strftime('%Y-%m-%d_%H-%M-%S')} 开始分析请求,总请求数:{total_requests_count}") + + for request in comfort_requests: + if "/search/api/flight/comfort/batchGetComfortTagList" in request.url: + batch_comfort_found = True + print(f"{time.strftime('%Y-%m-%d_%H-%M-%S')} 找到 batchGetComfortTagList 请求") + continue + + if "/search/api/flight/comfort/getFlightComfort" in request.url: + getFlightComfort_requests_count += 1 + print(f"\n{time.strftime('%Y-%m-%d_%H-%M-%S')} 捕获到第 {getFlightComfort_requests_count} 个 getFlightComfort 请求:") + print(f"URL: {request.url}") + + try: + payload = json.loads(request.body.decode('utf-8')) + flight_no = payload.get('flightNoList', ['Unknown'])[0] + print(f"请求的航班号: {flight_no}") + except Exception as e: + print(f"无法解析请求 payload: {str(e)}") + continue + + if request.response: + print(f"响应状态码: {request.response.status_code}") + body = request.response.body + if request.response.headers.get('Content-Encoding', '').lower() == 'gzip': + body = gzip.decompress(body) + + try: + json_data = json.loads(body.decode('utf-8')) + print(f"响应数据: {json.dumps(json_data, indent=2, ensure_ascii=False)[:500]}...") # 打印前500个字符 + if json_data['status'] == 0 and json_data['msg'] == 'success': + flight_comfort = json_data['data'] + + punctuality = flight_comfort['punctualityInfo'] + plane_info = flight_comfort['planeInfo'] + cabin_info = {cabin['cabin']: cabin for cabin in flight_comfort['cabinInfoList']} + + processed_data = { + 'departure_delay_time': punctuality.get("departureDelaytime", None), + 'departure_bridge_rate': punctuality.get("departureBridge", None), + 'arrival_delay_time': punctuality.get("arrivalDelaytime", None), + 'plane_type': plane_info.get("planeTypeName", None), + 'plane_width': plane_info.get("planeWidthCategory", None), + 'plane_age': plane_info.get("planeAge", None) + } + + for cabin_type in ['Y', 'C']: + if cabin_type in cabin_info: + cabin = cabin_info[cabin_type] + processed_data.update({ + f'{cabin_type}_has_meal': cabin['hasMeal'], + f'{cabin_type}_seat_tilt': cabin['seatTilt']['value'], + f'{cabin_type}_seat_width': cabin['seatWidth']['value'], + f'{cabin_type}_seat_pitch': cabin['seatPitch']['value'], + f'{cabin_type}_meal_msg': cabin['mealMsg'] + }) + if 'power' in cabin: + processed_data[f'{cabin_type}_power'] = cabin['power'] + + comfort_data[flight_no] = processed_data + print(f"{time.strftime('%Y-%m-%d_%H-%M-%S')} 成功提取航班 {flight_no} 的舒适度数据") + else: + print(f"{time.strftime('%Y-%m-%d_%H-%M-%S')} getFlightComfort 响应状态异常: {json_data['status']}, {json_data['msg']}") + except Exception as e: + print(f"{time.strftime('%Y-%m-%d_%H-%M-%S')} 处理 getFlightComfort 响应时出错: {str(e)}") + else: + print(f"{time.strftime('%Y-%m-%d_%H-%M-%S')} getFlightComfort 请求没有响应") + + print(f"\n{time.strftime('%Y-%m-%d_%H-%M-%S')} 请求分析完成") + print(f"总请求数: {total_requests_count}") + print(f"batchGetComfortTagList 请求是否找到: {batch_comfort_found}") + print(f"getFlightComfort 请求数: {getFlightComfort_requests_count}") + print(f"成功提取的舒适度数据数: {len(comfort_data)}") + + if comfort_data: + # 创建舒适度DataFrame + comfort_df = pd.DataFrame.from_dict(comfort_data, orient='index') + comfort_df.reset_index(inplace=True) + comfort_df.rename(columns={'index': 'flight_no'}, inplace=True) + + # 保存舒适度数据为CSV文件 + # save_dir = os.path.join(os.getcwd(), self.date, datetime.now().strftime("%Y-%m-%d")) + # os.makedirs(save_dir, exist_ok=True) + + # comfort_filename = os.path.join(save_dir, f"{self.city[0]}-{self.city[1]}_comfort.csv") + # comfort_df.to_csv(comfort_filename, encoding="UTF-8", index=False) + # print(f"{time.strftime('%Y-%m-%d_%H-%M-%S')} 航班舒适度数据已保存到 {comfort_filename}") + + return comfort_data + else: + print(f"{time.strftime('%Y-%m-%d_%H-%M-%S')} 未捕获到任何 getFlightComfort 数据") + print("可能的原因:") + print("1. 网页没有加载完全") + print("2. 网站结构可能已经改变") + print("3. 网络连接问题") + print("4. 请求被网站拦截或限制") + return None + + except Exception as e: + print(f"{time.strftime('%Y-%m-%d_%H-%M-%S')} 捕获 getFlightComfort 数据时出错:{str(e)}") + print(f"错误类型: {type(e).__name__}") + print(f"错误详情: {str(e)}") + import traceback + print(f"错误堆栈: {traceback.format_exc()}") + return None + + +if __name__ == "__main__": + + driver = init_driver() + + citys = gen_citys(crawal_citys) + + flight_dates = generate_flight_dates(crawal_days, begin_date, end_date, start_interval, days_interval) + + Flight_DataFetcher = DataFetcher(driver) + + for city in citys: + Flight_DataFetcher.city = city + + for flight_date in flight_dates: + Flight_DataFetcher.date = flight_date + + if os.path.exists(os.path.join(os.getcwd(), flight_date, dt.now().strftime("%Y-%m-%d"), f"{city[0]}-{city[1]}.csv")): + print( + f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 文件已存在:{os.path.join(os.getcwd(), flight_date, dt.now().strftime("%Y-%m-%d"), f"{city[0]}-{city[1]}.csv")}') + continue + elif ('http' not in Flight_DataFetcher.driver.current_url): + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} 当前的URL是:{driver.current_url}') + # 初始化页面 + Flight_DataFetcher.get_page(1) + + else: + # 后续运行只需更换出发与目的地 + Flight_DataFetcher.change_city() + + time.sleep(crawal_interval) + + # 运行结束退出 + try: + driver = Flight_DataFetcher.driver + driver.quit() + except Exception as e: + print(f'{time.strftime("%Y-%m-%d_%H-%M-%S")} An error occurred while quitting the driver: {e}') + + print(f'\n{time.strftime("%Y-%m-%d_%H-%M-%S")} 程序运行完成!!!!') diff --git a/DataMaintenance/db_import.py b/DataMaintenance/db_import.py new file mode 100644 index 0000000..6357291 --- /dev/null +++ b/DataMaintenance/db_import.py @@ -0,0 +1,90 @@ +import pandas as pd +import mysql.connector +from mysql.connector import Error +import os +from datetime import datetime, timedelta + +# 数据库连接配置 +db_config = { + 'host': '152.136.166.253', # 修改这里,去掉端口号 + 'port': 8989, # 单独指定端口号 + 'database': 'fly_ticket', + 'user': 'root', + 'password': 'Cauc@2024' +} + +def import_csv_to_db(file_path, cursor): + df = pd.read_csv(file_path) + for index, row in df.iterrows(): + sql = """INSERT INTO flight (f_n, f_s_p, f_a_p, f_s_a, f_a_a, f_s_t, f_a_t, f_Date, f_Delay, f_p, f_food, f_wide, f_depcode, f_dstcode) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) + ON DUPLICATE KEY UPDATE + f_s_p = VALUES(f_s_p), + f_a_p = VALUES(f_a_p), + f_s_a = VALUES(f_s_a), + f_a_a = VALUES(f_a_a), + f_s_t = VALUES(f_s_t), + f_a_t = VALUES(f_a_t), + f_Delay = VALUES(f_Delay), + f_p = VALUES(f_p), + f_food = VALUES(f_food), + f_wide = VALUES(f_wide), + f_depcode = VALUES(f_depcode), + f_dstcode = VALUES(f_dstcode);""" + + values = ( + row['航班号'], + row['出发城市'], + row['到达城市'], + row['出发机场'], + row['到达机场'], + row['出发时间'], + row['到达时间'], + row['出发日期'], + row['出发延误时间'], + row['economy_origin'], + row['经济舱餐食信息'], + row['经济舱座椅间距'], + row['出发机场三字码'], + row['到达机场三字码'] + ) + + cursor.execute(sql, values) + +try: + # 连接到数据库 + conn = mysql.connector.connect(**db_config) + + if conn.is_connected(): + cursor = conn.cursor() + + # 设置日期范围 + start_date = datetime(2024, 11, 12) + end_date = datetime(2024, 11, 20) + current_date = start_date + + while current_date <= end_date: + folder_name = current_date.strftime("%Y-%m-%d") + folder_path = os.path.join("D:\college\SE2\Ctrip-Crawler-main\Ctrip-Crawler-withComfortInfo", folder_name, "2024-11-12") + + if os.path.exists(folder_path): + for file_name in os.listdir(folder_path): + if file_name.endswith('.csv'): + file_path = os.path.join(folder_path, file_name) + import_csv_to_db(file_path, cursor) + print(f"已导入文件: {file_path}") + + current_date += timedelta(days=1) + + # 提交更改 + conn.commit() + print("所有数据成功插入到数据库") + +except Error as e: + print(f"连接数据库时出错: {e}") + +finally: + if 'conn' in locals() and conn.is_connected(): + cursor.close() + conn.close() + print("数据库连接已关闭") diff --git a/DataMaintenance/history version/ctrip_flights_scraper.py b/DataMaintenance/history version/ctrip_flights_scraper.py new file mode 100644 index 0000000..b48d4db --- /dev/null +++ b/DataMaintenance/history version/ctrip_flights_scraper.py @@ -0,0 +1,412 @@ +import io +import os +import gzip +import time +import json +import random +import requests +import threading +import pandas as pd +from seleniumwire import webdriver +from datetime import datetime as dt,timedelta +from selenium.webdriver.common.by import By +from selenium.webdriver.common.keys import Keys +from selenium.common.exceptions import TimeoutException,StaleElementReferenceException,ElementNotInteractableException,ElementClickInterceptedException # 加载异常 + + +def getcitycode(): + cityname,code=[],[] + #采用携程的api接口 + city_url='https://flights.ctrip.com/online/api/poi/get?v='+str(random.random()) + headers={ + 'dnt':'1', + 'referer':'https://verify.ctrip.com/', + 'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36' + } + r=requests.get(city_url,headers=headers) + citys=json.loads(r.text).get('data') + for city in citys: + if city =='热门': + continue + for key in city: + try: + for k in citys[city][key]: + cityname.append(k['display']) + code.append(k['data']) + except: + continue + citycode=dict(zip(cityname,code)) + + return cityname,citycode + + + +class FLIGHT(object): + def __init__(self): + self.url = 'https://flights.ctrip.com/online/list/oneway' #携程机票查询页面 + self.chromeDriverPath = 'C:/Program Files/Google/Chrome/Application/chromedriver' #chromedriver位置 + self.options = webdriver.ChromeOptions() # 创建一个配置对象 + #self.options.add_argument('--incognito') # 隐身模式(无痕模式) + #self.options.add_argument('User-Agent=%s'%UserAgent().random) # 替换User-Agent + self.options.add_argument("--disable-blink-features") + self.options.add_argument("--disable-blink-features=AutomationControlled") + self.options.add_experimental_option("excludeSwitches", ['enable-automation'])# 不显示正在受自动化软件控制 + self.driver = webdriver.Chrome(executable_path=self.chromeDriverPath,chrome_options=self.options) + self.driver.maximize_window() + self.err=0#错误重试次数 + + + def getpage(self): + ##############获取地区码 + self.startcode=self.citycode[self.city[0]][-3:] + self.endcode=self.citycode[self.city[1]][-3:] + + ##############生成访问链接 + flights_url=self.url+'-'+self.startcode+'-'+self.endcode+'?&depdate='+self.date + print(flights_url) + ##############设置加载超时阈值 + self.driver.set_page_load_timeout(300) + try: + self.driver.get(flights_url) + except: + print('页面连接失败') + self.driver.close() + self.getpage() + else: + try: + ##############判断是否存在验证码 + self.driver.find_element(By.CLASS_NAME,"basic-alert.alert-giftinfo") + print('等待2小时后重试') + time.sleep(7200) + self.getpage() + except: + ##############不存在验证码,执行下一步 + self.remove_btn() + + def remove_btn(self): + try: + js_remove="$('.notice-box').remove();" + self.driver.execute_script(js_remove) + except Exception as e: + print('防疫移除失败',e) + else: + self.changecity() + + + + def changecity(self): + try: + #获取出发地与目的地元素位置 + its=self.driver.find_elements(By.CLASS_NAME,'form-input-v3') + + #若出发地与目标值不符,则更改出发地 + while self.city[0] not in its[0].get_attribute('value'): + its[0].click() + time.sleep(0.5) + its[0].send_keys(Keys.CONTROL + 'a') + time.sleep(0.5) + its[0].send_keys(self.city[0]) + + time.sleep(0.5) + + #若目的地与目标值不符,则更改目的地 + while self.city[1] not in its[1].get_attribute('value'): + its[1].click() + time.sleep(0.5) + its[1].send_keys(Keys.CONTROL + 'a') + time.sleep(0.5) + its[1].send_keys(self.city[1]) + + time.sleep(0.5) + try: + #通过低价提醒按钮实现enter键换页 + self.driver.implicitly_wait(5) # seconds + self.driver.find_elements(By.CLASS_NAME,'low-price-remind')[0].click() + except IndexError as e: + print('\n更换城市错误 找不到元素',e) + #以防万一 + its[1].send_keys(Keys.ENTER) + + print('\n更换城市成功',self.city[0]+'-'+self.city[1]) + except (ElementNotInteractableException,StaleElementReferenceException,ElementClickInterceptedException,ElementClickInterceptedException) as e: + print('\n更换城市错误 元素错误',e) + self.err+=1 + if self.err<=5: + self.click_btn() + else: + self.err=0 + del self.driver.requests + self.getpage() + except Exception as e: + print('\n更换城市错误',e) + #删除本次请求 + del self.driver.requests + #从头开始重新执行程序 + self.getpage() + else: + #若无错误,执行下一步 + self.err=0 + self.getdata() + + + + def getdata(self): + try: + #等待响应加载完成 + self.predata = self.driver.wait_for_request('/international/search/api/search/batchSearch?.*', timeout=60) + + rb=dict(json.loads(self.predata.body).get('flightSegments')[0]) + + except TimeoutException as e: + print('\获取数据错误',e) + #删除本次请求 + del self.driver.requests + #从头开始重新执行程序 + self.getpage() + else: + #检查数据获取正确性 + if rb['departureCityName'] == self.city[0] and rb['arrivalCityName'] == self.city[1]: + print('城市获取正确') + #删除本次请求 + del self.driver.requests + #若无错误,执行下一步 + self.decode_data() + else: + #删除本次请求 + del self.driver.requests + #重新更换城市 + self.changecity() + + + + def decode_data(self): + try: + buf = io.BytesIO(self.predata.response.body) + gf = gzip.GzipFile(fileobj = buf) + self.dedata = gf.read().decode('UTF-8') + self.dedata=json.loads(self.dedata) + except: + print('重新获取数据') + self.getpage() + else: + #若无错误,执行下一步 + self.check_data() + + + + def check_data(self): + try: + self.flightItineraryList=self.dedata['data']['flightItineraryList'] + #倒序遍历,删除转机航班 + for i in range(len(self.flightItineraryList)-1, -1, -1): + if self.flightItineraryList[i]['flightSegments'][0]['transferCount'] !=0: + self.flightItineraryList.pop(i) + if len(self.flightItineraryList): + #存在直航航班,执行下一步 + self.muti_process() + else: + print('不存在直航航班') + return 0 + except: + print('不存在直航航班') + return 0 + + + def muti_process(self): + processes = [] + + self.flights = pd.DataFrame() + self.prices = pd.DataFrame() + #处理航班信息 + processes.append(threading.Thread(target=self.proc_flightSegments)) + #处理票价信息 + processes.append(threading.Thread(target=self.proc_priceList)) + + for pro in processes: + pro.start() + for pro in processes: + pro.join() + + #若无错误,执行下一步 + self.mergedata() + + def proc_flightSegments(self): + for flightlist in self.flightItineraryList: + flightlist=flightlist['flightSegments'][0]['flightList'] + flightUnitList=dict(flightlist[0]) + + + departureday=flightUnitList['departureDateTime'].split(' ')[0] + departuretime=flightUnitList['departureDateTime'].split(' ')[1] + + arrivalday=flightUnitList['arrivalDateTime'].split(' ')[0] + arrivaltime=flightUnitList['arrivalDateTime'].split(' ')[1] + + #删除一些不重要的信息 + dellist=['sequenceNo', 'marketAirlineCode', + 'departureProvinceId','departureCityId','departureCityCode','departureAirportShortName','departureTerminal', + 'arrivalProvinceId','arrivalCityId','arrivalCityCode','arrivalAirportShortName','arrivalTerminal', + 'transferDuration','stopList','leakedVisaTagSwitch','trafficType','highLightPlaneNo','mealType', + 'operateAirlineCode','arrivalDateTime','departureDateTime','operateFlightNo','operateAirlineName'] + for value in dellist: + try: + flightUnitList.pop(value) + except: + continue + + #更新日期格式 + flightUnitList.update({'departureday': departureday, 'departuretime': departuretime, + 'arrivalday': arrivalday, 'arrivaltime': arrivaltime}) + + + self.flights=pd.concat([self.flights,pd.DataFrame(flightUnitList,index=[0])],ignore_index=True) + + + + def proc_priceList(self): + for flightlist in self.flightItineraryList: + flightNo=flightlist['itineraryId'].split('_')[0] + priceList=flightlist['priceList'] + + #经济舱,经济舱折扣 + economy,economy_discount=[],[] + #商务舱,商务舱折扣 + bussiness,bussiness_discount=[],[] + + for price in priceList: + adultPrice=price['adultPrice'] + cabin=price['cabin'] + priceUnitList=dict(price['priceUnitList'][0]['flightSeatList'][0]) + discountRate=priceUnitList['discountRate'] + #经济舱 + if cabin=='Y': + economy.append(adultPrice) + economy_discount.append(discountRate) + #商务舱 + elif cabin=='C': + bussiness.append(adultPrice) + bussiness_discount.append(discountRate) + + if economy !=[]: + try: + economy_origin=economy[economy_discount.index(1)] + except: + economy_origin=int(max(economy)/max(economy_discount)) + + if min(economy_discount) !=1: + economy_low=min(economy) + economy_cut=min(economy_discount) + else: + economy_low='' + economy_cut='' + + else: + economy_origin='' + economy_low='' + economy_cut='' + + + if bussiness !=[]: + try: + bussiness_origin=bussiness[bussiness_discount.index(1)] + except: + bussiness_origin=int(max(bussiness)/max(bussiness_discount)) + + if min(bussiness_discount) !=1: + bussiness_low=min(bussiness) + bussiness_cut=min(bussiness_discount) + else: + bussiness_low='' + bussiness_cut='' + + else: + bussiness_origin='' + bussiness_low='' + bussiness_cut='' + + price_info={'flightNo':flightNo, + 'economy_origin':economy_origin,'economy_low':economy_low,'economy_cut':economy_cut, + 'bussiness_origin':bussiness_origin,'bussiness_low':bussiness_low,'bussiness_cut':bussiness_cut} + + #self.prices=self.prices.append(price_info,ignore_index=True) + self.prices=pd.concat([self.prices,pd.DataFrame(price_info,index=[0])],ignore_index=True) + + + + def mergedata(self): + try: + self.df = self.flights.merge(self.prices,on=['flightNo']) + + self.df['数据获取日期']=dt.now().strftime('%Y-%m-%d') + + #对pandas的columns进行重命名 + order=['数据获取日期','航班号','航空公司', + '出发日期','出发时间','到达日期','到达时间','飞行时长','出发国家','出发城市','出发机场','出发机场三字码', + '到达国家','到达城市','到达机场','到达机场三字码','飞机型号','飞机尺寸','飞机型号三字码', + '经济舱原价','经济舱最低价','经济舱折扣','商务舱原价','商务舱最低价','商务舱折扣', + '到达准点率','停留次数'] + + origin=['数据获取日期','flightNo','marketAirlineName', + 'departureday','departuretime','arrivalday','arrivaltime','duration', + 'departureCountryName','departureCityName','departureAirportName','departureAirportCode', + 'arrivalCountryName','arrivalCityName','arrivalAirportName','arrivalAirportCode', + 'aircraftName','aircraftSize','aircraftCode', + 'economy_origin','economy_low','economy_cut', + 'bussiness_origin','bussiness_low','bussiness_cut', + 'arrivalPunctuality','stopCount'] + + columns=dict(zip(origin,order)) + + self.df=self.df.rename(columns=columns) + + self.df = self.df[order] + + + if not os.path.exists(self.date): + os.makedirs(self.date) + + filename=os.getcwd()+'\\'+self.date+'\\'+self.date+'-'+self.city[0]+'-'+self.city[1]+'.csv' + + self.df.to_csv(filename,encoding='GB18030',index=False) + + print('\n数据爬取完成',filename) + except Exception as e: + print('合并数据失败',e) + + + def demain(self,citys,citycode): + self.citycode=citycode + #设置出发日期 + self.date=dt.now()+timedelta(days=7) + self.date=self.date.strftime('%Y-%m-%d') + + for city in citys: + self.city=city + + if citys.index(city)==0: + #第一次运行 + self.getpage() + else: + #后续运行只需更换出发与目的地 + self.changecity() + + #运行结束退出 + self.driver.quit() + + + +if __name__ == '__main__': + citys=[] + cityname,citycode=getcitycode() + city=['上海','广州','深圳','北京'] + ytic=list(reversed(city)) + for m in city: + for n in ytic: + if m==n: + continue + else: + citys.append([m,n]) + fly = FLIGHT() + fly.demain(citys,citycode) + print('\n程序运行完成!!!!') + diff --git a/DataMaintenance/history version/ctrip_flights_scraper_V2.py b/DataMaintenance/history version/ctrip_flights_scraper_V2.py new file mode 100644 index 0000000..3af1fb0 --- /dev/null +++ b/DataMaintenance/history version/ctrip_flights_scraper_V2.py @@ -0,0 +1,397 @@ +import io +import os +import gzip +import time +import json +import threading +import pandas as pd +from seleniumwire import webdriver +from datetime import datetime as dt,timedelta +from selenium.webdriver.common.by import By +from selenium.webdriver.common.keys import Keys +from selenium.common.exceptions import TimeoutException,StaleElementReferenceException,ElementNotInteractableException,ElementClickInterceptedException # 加载异常 + + + +class FLIGHT(object): + def __init__(self): + self.chromeDriverPath = 'C:/Program Files/Google/Chrome/Application/chromedriver' #chromedriver位置 + self.options = webdriver.ChromeOptions() # 创建一个配置对象 + self.options.add_argument('--incognito') # 隐身模式(无痕模式) + self.options.add_argument("--disable-blink-features") + self.options.add_argument("--disable-blink-features=AutomationControlled") + self.options.add_experimental_option("excludeSwitches", ['enable-automation'])# 不显示正在受自动化软件控制 + self.driver = webdriver.Chrome(executable_path=self.chromeDriverPath,chrome_options=self.options) + self.driver.set_page_load_timeout(300)#设置加载超时阈值 + self.driver.maximize_window() + self.err=0#错误重试次数 + #前往首页 + self.driver.get('https://flights.ctrip.com/online/channel/domestic') + + + + def getpage(self): + try: + self.driver.find_element(By.CLASS_NAME,'pc_home-jipiao').click()#点击飞机图标,返回主界面 + self.driver.implicitly_wait(5) # seconds + self.driver.find_elements(By.CLASS_NAME,'radio-label')[0].click()#单程 + + while self.driver.find_elements(By.CSS_SELECTOR,"[aria-label=请选择日期]")[0].get_attribute("value") != self.date: + + self.driver.find_element(By.CLASS_NAME,'modifyDate.depart-date').click()#点击日期选择 + + for m in self.driver.find_elements(By.CLASS_NAME,'date-picker.date-picker-block'): + + if int(m.find_element(By.CLASS_NAME,'month').text[:-1]) != int(self.date[5:7]): + continue + + for d in m.find_elements(By.CLASS_NAME,'date-d'): + if int(d.text) == int(self.date[-2:]): + d.click() + break + + self.driver.find_element(By.CLASS_NAME,'search-btn').click()#搜索 + + except: + print('页面连接失败') + self.driver.close() + self.getpage() + else: + try: + ##############判断是否存在验证码 + self.driver.find_element(By.ID,"verification-code") + print('等待2小时后重试') + time.sleep(7200) + self.getpage() + except: + ##############不存在验证码,执行下一步 + self.changecity() + + def remove_btn(self): + try: + js_remove="$('.notice-box').remove();" + self.driver.execute_script(js_remove) + except Exception as e: + print('防疫移除失败',e) + + + def changecity(self): + + #移除防疫提醒 + self.remove_btn() + + try: + #获取出发地与目的地元素位置 + its=self.driver.find_elements(By.CLASS_NAME,'form-input-v3') + + #若出发地与目标值不符,则更改出发地 + while self.city[0] not in its[0].get_attribute('value'): + its[0].click() + time.sleep(0.5) + its[0].send_keys(Keys.CONTROL + 'a') + time.sleep(0.5) + its[0].send_keys(self.city[0]) + + time.sleep(0.5) + + #若目的地与目标值不符,则更改目的地 + while self.city[1] not in its[1].get_attribute('value'): + its[1].click() + time.sleep(0.5) + its[1].send_keys(Keys.CONTROL + 'a') + time.sleep(0.5) + its[1].send_keys(self.city[1]) + + time.sleep(0.5) + try: + #通过低价提醒按钮实现enter键换页 + self.driver.implicitly_wait(5) # seconds + self.driver.find_elements(By.CLASS_NAME,'low-price-remind')[0].click() + except IndexError as e: + print('\n更换城市错误 找不到元素',e) + #以防万一 + its[1].send_keys(Keys.ENTER) + + print('\n更换城市成功',self.city[0]+'-'+self.city[1]) + #捕获错误 + except (IndexError,ElementNotInteractableException,StaleElementReferenceException,ElementClickInterceptedException,ElementClickInterceptedException) as e: + print('\n更换城市错误 元素错误',e) + self.err+=1 + if self.err<=5: + self.changecity() + else: + self.err=0 + del self.driver.requests + self.getpage() + except Exception as e: + print('\n更换城市错误',e) + #删除本次请求 + del self.driver.requests + #从头开始重新执行程序 + self.getpage() + else: + #若无错误,执行下一步 + self.err=0 + self.getdata() + + + + def getdata(self): + try: + #等待响应加载完成 + self.predata = self.driver.wait_for_request('/international/search/api/search/batchSearch?.*', timeout=30) + + rb=dict(json.loads(self.predata.body).get('flightSegments')[0]) + + except TimeoutException as e: + print('\获取数据错误',e) + #删除本次请求 + del self.driver.requests + #从头开始重新执行程序 + self.getpage() + else: + #检查数据获取正确性 + if rb['departureCityName'] == self.city[0] and rb['arrivalCityName'] == self.city[1]: + print('城市获取正确') + #删除本次请求 + del self.driver.requests + #若无错误,执行下一步 + self.decode_data() + else: + #删除本次请求 + del self.driver.requests + #重新更换城市 + self.changecity() + + + + def decode_data(self): + try: + buf = io.BytesIO(self.predata.response.body) + gf = gzip.GzipFile(fileobj = buf) + self.dedata = gf.read().decode('UTF-8') + self.dedata=json.loads(self.dedata) + except: + print('重新获取数据') + self.getpage() + else: + #若无错误,执行下一步 + self.check_data() + + + + def check_data(self): + try: + self.flightItineraryList=self.dedata['data']['flightItineraryList'] + #倒序遍历,删除转机航班 + for i in range(len(self.flightItineraryList)-1, -1, -1): + if self.flightItineraryList[i]['flightSegments'][0]['transferCount'] !=0: + self.flightItineraryList.pop(i) + if len(self.flightItineraryList): + #存在直航航班,执行下一步 + self.muti_process() + else: + print('不存在直航航班') + return 0 + except: + print('不存在直航航班') + return 0 + + + def muti_process(self): + processes = [] + + self.flights = pd.DataFrame() + self.prices = pd.DataFrame() + #处理航班信息 + processes.append(threading.Thread(target=self.proc_flightSegments)) + #处理票价信息 + processes.append(threading.Thread(target=self.proc_priceList)) + + for pro in processes: + pro.start() + for pro in processes: + pro.join() + + #若无错误,执行下一步 + self.mergedata() + + def proc_flightSegments(self): + for flightlist in self.flightItineraryList: + flightlist=flightlist['flightSegments'][0]['flightList'] + flightUnitList=dict(flightlist[0]) + + + departureday=flightUnitList['departureDateTime'].split(' ')[0] + departuretime=flightUnitList['departureDateTime'].split(' ')[1] + + arrivalday=flightUnitList['arrivalDateTime'].split(' ')[0] + arrivaltime=flightUnitList['arrivalDateTime'].split(' ')[1] + + #删除一些不重要的信息 + dellist=['sequenceNo', 'marketAirlineCode', + 'departureProvinceId','departureCityId','departureCityCode','departureAirportShortName','departureTerminal', + 'arrivalProvinceId','arrivalCityId','arrivalCityCode','arrivalAirportShortName','arrivalTerminal', + 'transferDuration','stopList','leakedVisaTagSwitch','trafficType','highLightPlaneNo','mealType', + 'operateAirlineCode','arrivalDateTime','departureDateTime','operateFlightNo','operateAirlineName'] + for value in dellist: + try: + flightUnitList.pop(value) + except: + continue + + #更新日期格式 + flightUnitList.update({'departureday': departureday, 'departuretime': departuretime, + 'arrivalday': arrivalday, 'arrivaltime': arrivaltime}) + + + self.flights=pd.concat([self.flights,pd.DataFrame(flightUnitList,index=[0])],ignore_index=True) + + + + def proc_priceList(self): + for flightlist in self.flightItineraryList: + flightNo=flightlist['itineraryId'].split('_')[0] + priceList=flightlist['priceList'] + + #经济舱,经济舱折扣 + economy,economy_discount=[],[] + #商务舱,商务舱折扣 + bussiness,bussiness_discount=[],[] + + for price in priceList: + adultPrice=price['adultPrice'] + cabin=price['cabin'] + priceUnitList=dict(price['priceUnitList'][0]['flightSeatList'][0]) + discountRate=priceUnitList['discountRate'] + #经济舱 + if cabin=='Y': + economy.append(adultPrice) + economy_discount.append(discountRate) + #商务舱 + elif cabin=='C': + bussiness.append(adultPrice) + bussiness_discount.append(discountRate) + + if economy !=[]: + try: + economy_origin=economy[economy_discount.index(1)] + except: + economy_origin=int(max(economy)/max(economy_discount)) + + if min(economy_discount) !=1: + economy_low=min(economy) + economy_cut=min(economy_discount) + else: + economy_low='' + economy_cut='' + + else: + economy_origin='' + economy_low='' + economy_cut='' + + + if bussiness !=[]: + try: + bussiness_origin=bussiness[bussiness_discount.index(1)] + except: + bussiness_origin=int(max(bussiness)/max(bussiness_discount)) + + if min(bussiness_discount) !=1: + bussiness_low=min(bussiness) + bussiness_cut=min(bussiness_discount) + else: + bussiness_low='' + bussiness_cut='' + + else: + bussiness_origin='' + bussiness_low='' + bussiness_cut='' + + price_info={'flightNo':flightNo, + 'economy_origin':economy_origin,'economy_low':economy_low,'economy_cut':economy_cut, + 'bussiness_origin':bussiness_origin,'bussiness_low':bussiness_low,'bussiness_cut':bussiness_cut} + + #self.prices=self.prices.append(price_info,ignore_index=True) + self.prices=pd.concat([self.prices,pd.DataFrame(price_info,index=[0])],ignore_index=True) + + + + def mergedata(self): + try: + self.df = self.flights.merge(self.prices,on=['flightNo']) + + self.df['数据获取日期']=dt.now().strftime('%Y-%m-%d') + + #对pandas的columns进行重命名 + order=['数据获取日期','航班号','航空公司', + '出发日期','出发时间','到达日期','到达时间','飞行时长','出发国家','出发城市','出发机场','出发机场三字码', + '到达国家','到达城市','到达机场','到达机场三字码','飞机型号','飞机尺寸','飞机型号三字码', + '经济舱原价','经济舱最低价','经济舱折扣','商务舱原价','商务舱最低价','商务舱折扣', + '到达准点率','停留次数'] + + origin=['数据获取日期','flightNo','marketAirlineName', + 'departureday','departuretime','arrivalday','arrivaltime','duration', + 'departureCountryName','departureCityName','departureAirportName','departureAirportCode', + 'arrivalCountryName','arrivalCityName','arrivalAirportName','arrivalAirportCode', + 'aircraftName','aircraftSize','aircraftCode', + 'economy_origin','economy_low','economy_cut', + 'bussiness_origin','bussiness_low','bussiness_cut', + 'arrivalPunctuality','stopCount'] + + columns=dict(zip(origin,order)) + + self.df=self.df.rename(columns=columns) + + self.df = self.df[order] + + + if not os.path.exists(self.date): + os.makedirs(self.date) + + filename=os.getcwd()+'\\'+self.date+'\\'+self.date+'-'+self.city[0]+'-'+self.city[1]+'.csv' + + self.df.to_csv(filename,encoding='GB18030',index=False) + + print('\n数据爬取完成',filename) + except Exception as e: + print('合并数据失败',e) + + + def demain(self,citys): + #设置出发日期 + self.date=dt.now()+timedelta(days=1) + self.date=self.date.strftime('%Y-%m-%d') + + for city in citys: + self.city=city + + if citys.index(city)==0: + #第一次运行 + self.getpage() + else: + #后续运行只需更换出发与目的地 + self.changecity() + + #运行结束退出 + self.driver.quit() + + + +if __name__ == '__main__': + citys=[] + city=['上海','广州','深圳','北京'] + #形成城市对 + ytic=list(reversed(city)) + for m in city: + for n in ytic: + if m==n: + continue + else: + citys.append([m,n]) + fly = FLIGHT() + fly.demain(citys) + print('\n程序运行完成!!!!') + diff --git a/DataMaintenance/history version/基于request的航班历史票价爬取.py b/DataMaintenance/history version/基于request的航班历史票价爬取.py new file mode 100644 index 0000000..5ecfdb6 --- /dev/null +++ b/DataMaintenance/history version/基于request的航班历史票价爬取.py @@ -0,0 +1,143 @@ +import requests +import datetime +import re +import demjson +import time +import pandas as pd + +def create_assist_date(datestart = None,dateend = None): + # 创建日期辅助表 + if datestart is None: + datestart = '2020-01-01' + if dateend is None: + dateend = (datetime.datetime.now()+datetime.timedelta(days=-1)).strftime('%Y-%m-%d') + + # 转为日期格式 + datestart=datetime.datetime.strptime(datestart,'%Y-%m-%d') + dateend=datetime.datetime.strptime(dateend,'%Y-%m-%d') + date_list = [] + date_list.append(datestart.strftime('%Y-%m-%d')) + while datestart30: + break + continue + else: + err-=1 + print(city,date) + + res.encoding=res.apparent_encoding + NewResponse = re.sub(r"/","",res.text) + try: + r=NewResponse.encode('utf-8') + j=demjson.decode(r) + except: + continue + temp=pd.DataFrame(j) + try: + temp.drop('icon',axis=1,inplace=True) + temp['出发日期']=date + except: + continue + df=pd.concat([df,temp]) + time.sleep(0.5) + + filename=city[0]+'-'+city[1] + #处理原始数据 + proc_data(filename,df,interval=8) + + +def proc_data(filename,df,interval=8): + #保存原始数据至本地 + df.to_csv(filename+'.csv',encoding='GB18030') + df['全票价']=0 + df['日期差']=None + + for i in df.index: + try: + if not '经济' in df['discount'][i]: + df.drop(index=i,inplace=True) + elif '折' in df['discount'][i]: + #判断出发日期与查询日期之间的间隔是否大于阈值 + delta=datetime.datetime.strptime(df['出发日期'][i],'%Y-%m-%d')-datetime.datetime.strptime(df['qry_dt'][i],'%Y-%m-%d') + if delta.days >interval: + df.drop(index=i,inplace=True) + continue + else: + df.loc[i,'日期差']=delta.days + #通过折扣率计算全票价 + discount=float(re.findall('\d+\.?\d*',df['discount'][i])[0]) + full_price=df['price'][i]/discount*10 + df.loc[i,'全票价']=full_price + + elif ('全价'or'经典') in df['discount'][i]: + #判断出发日期与查询日期之间的间隔是否大于阈值 + delta=datetime.datetime.strptime(df['出发日期'][i],'%Y-%m-%d')-datetime.datetime.strptime(df['qry_dt'][i],'%Y-%m-%d') + if delta.days >interval: + df.drop(index=i,inplace=True) + continue + else: + df.loc[i,'日期差']=delta.days + #全票价 + full_price=df['price'][i] + df.loc[i,'全票价']=full_price + except: + df.drop(index=i,inplace=True) + + avg_full_price=df[df['全票价']!=0].groupby(['出发日期'])[['全票价']].mean() + avg_price=df[df['全票价']!=df['price']].groupby(['出发日期'])[['price']].mean() + result=pd.concat([avg_price,avg_full_price],axis=1) + + result['折扣']=result['price']/result['全票价'] + + #将处理后的数据保存至本地 + result.to_csv(result+'-'+filename+'.csv',encoding='GB18030') + + + +if __name__ == '__main__': + citys=[] + #设置开始与结束日期 + dateseries=create_assist_date(datestart = None,dateend = None) + + city=['上海','广州','深圳','北京'] + ytic=list(reversed(city)) + for m in city: + for n in ytic: + if m==n: + continue + else: + citys.append([m,n]) + + getdata(citys,dateseries) \ No newline at end of file diff --git a/DataMaintenance/sonar-project.properties b/DataMaintenance/sonar-project.properties new file mode 100644 index 0000000..379688a --- /dev/null +++ b/DataMaintenance/sonar-project.properties @@ -0,0 +1,17 @@ +# must be unique in a given SonarQube instance +sonar.projectKey=clawer + +# --- optional properties --- + +# defaults to project key +sonar.projectName=clawer +# defaults to 'not provided' +#sonar.projectVersion=1.0 + +# Path is relative to the sonar-project.properties file. Defaults to . +#sonar.sources=src,WebContent + +# Encoding of the source code. Default is default system encoding +sonar.sourceEncoding=UTF-8 + +#sonar.java.binaries=target/classes/javabean,target/classes/servlet \ No newline at end of file diff --git a/DataMaintenance/stealth.min.js b/DataMaintenance/stealth.min.js new file mode 100644 index 0000000..02515d1 --- /dev/null +++ b/DataMaintenance/stealth.min.js @@ -0,0 +1,7 @@ +/*! + * Note: Auto-generated, do not update manually. + * Generated by: https://github.com/berstend/puppeteer-extra/tree/master/packages/extract-stealth-evasions + * Generated on: Mon, 09 Sep 2024 06:24:45 GMT + * License: MIT + */ +(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.init(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{init:"() => {\n utils.preloadCache()\n}",stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {\n setPrototypeOf: function (target, proto) {\n if (proto === null)\n throw new TypeError('Cannot convert object to primitive value')\n if (Object.getPrototypeOf(target) === Object.getPrototypeOf(proto)) {\n throw new TypeError('Cyclic __proto__ value')\n }\n return Reflect.setPrototypeOf(target, proto)\n }\n }\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function () {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = (stack, stripFirstLine = true) => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => !(index === 1 && stripFirstLine))\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = (stack, anchor) => {\n const stackArr = stack.split('\\n')\n anchor = anchor || `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Special cases due to our nested toString proxies\n err.stack = err.stack.replace(\n 'at Object.toString (',\n 'at Function.toString ('\n )\n if ((err.stack || '').includes('at Function.toString (')) {\n err.stack = stripWithBlacklist(err.stack, false)\n throw err\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n if (typeof ctx === 'undefined' || ctx === null) {\n return target.call(ctx)\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",replaceGetterWithProxy:"(obj, propName, handler) => {\n const fn = Object.getOwnPropertyDescriptor(obj, propName).get\n const fnStr = fn.toString() // special getter function string\n const proxyObj = new Proxy(fn, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { get: proxyObj })\n utils.patchToString(proxyObj, fnStr)\n\n return true\n}",replaceGetterSetter:"(obj, propName, handlerGetterSetter) => {\n const ownPropertyDescriptor = Object.getOwnPropertyDescriptor(obj, propName)\n const handler = { ...ownPropertyDescriptor }\n\n if (handlerGetterSetter.get !== undefined) {\n const nativeFn = ownPropertyDescriptor.get\n handler.get = function() {\n return handlerGetterSetter.get.call(this, nativeFn.bind(this))\n }\n utils.redirectToString(handler.get, nativeFn)\n }\n\n if (handlerGetterSetter.set !== undefined) {\n const nativeFn = ownPropertyDescriptor.set\n handler.set = function(newValue) {\n handlerGetterSetter.set.call(this, newValue, nativeFn.bind(this))\n }\n utils.redirectToString(handler.set, nativeFn)\n }\n\n Object.defineProperty(obj, propName, handler)\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath.split('.').slice(0, -1).join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}",makeHandler:"() => ({\n // Used by simple `navigator` getter evasions\n getterValue: value => ({\n apply(target, ctx, args) {\n // Let's fetch the value first, to trigger and escalate potential errors\n // Illegal invocations like `navigator.__proto__.vendor` will throw here\n utils.cache.Reflect.apply(...arguments)\n return value\n }\n })\n})",arrayEquals:"(array1, array2) => {\n if (array1.length !== array2.length) {\n return false\n }\n for (let i = 0; i < array1.length; ++i) {\n if (array1[i] !== array2[i]) {\n return false\n }\n }\n return true\n}",memoize:"fn => {\n const cache = []\n return function(...args) {\n if (!cache.some(c => utils.arrayEquals(c.key, args))) {\n cache.push({ key: args, value: fn.apply(this, args) })\n }\n return cache.find(c => utils.arrayEquals(c.key, args)).value\n }\n}"},_mainFunction:'utils => {\n if (!window.chrome) {\n // Use the exact property descriptor found in headful Chrome\n // fetch it via `Object.getOwnPropertyDescriptor(window, \'chrome\')`\n Object.defineProperty(window, \'chrome\', {\n writable: true,\n enumerable: true,\n configurable: false, // note!\n value: {} // We\'ll extend that later\n })\n }\n\n // That means we\'re running headful and don\'t need to mock anything\n if (\'app\' in window.chrome) {\n return // Nothing to do here\n }\n\n const makeError = {\n ErrorInInvocation: fn => {\n const err = new TypeError(`Error in invocation of app.${fn}()`)\n return utils.stripErrorWithAnchor(\n err,\n `at ${fn} (eval at `\n )\n }\n }\n\n // There\'s a some static data in that property which doesn\'t seem to change,\n // we should periodically check for updates: `JSON.stringify(window.app, null, 2)`\n const STATIC_DATA = JSON.parse(\n `\n{\n "isInstalled": false,\n "InstallState": {\n "DISABLED": "disabled",\n "INSTALLED": "installed",\n "NOT_INSTALLED": "not_installed"\n },\n "RunningState": {\n "CANNOT_RUN": "cannot_run",\n "READY_TO_RUN": "ready_to_run",\n "RUNNING": "running"\n }\n}\n `.trim()\n )\n\n window.chrome.app = {\n ...STATIC_DATA,\n\n get isInstalled() {\n return false\n },\n\n getDetails: function getDetails() {\n if (arguments.length) {\n throw makeError.ErrorInInvocation(`getDetails`)\n }\n return null\n },\n getIsInstalled: function getDetails() {\n if (arguments.length) {\n throw makeError.ErrorInInvocation(`getIsInstalled`)\n }\n return false\n },\n runningState: function getDetails() {\n if (arguments.length) {\n throw makeError.ErrorInInvocation(`runningState`)\n }\n return \'cannot_run\'\n }\n }\n utils.patchToStringNested(window.chrome.app)\n }',_args:[]}),(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.init(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{init:"() => {\n utils.preloadCache()\n}",stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {\n setPrototypeOf: function (target, proto) {\n if (proto === null)\n throw new TypeError('Cannot convert object to primitive value')\n if (Object.getPrototypeOf(target) === Object.getPrototypeOf(proto)) {\n throw new TypeError('Cyclic __proto__ value')\n }\n return Reflect.setPrototypeOf(target, proto)\n }\n }\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function () {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = (stack, stripFirstLine = true) => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => !(index === 1 && stripFirstLine))\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = (stack, anchor) => {\n const stackArr = stack.split('\\n')\n anchor = anchor || `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Special cases due to our nested toString proxies\n err.stack = err.stack.replace(\n 'at Object.toString (',\n 'at Function.toString ('\n )\n if ((err.stack || '').includes('at Function.toString (')) {\n err.stack = stripWithBlacklist(err.stack, false)\n throw err\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n if (typeof ctx === 'undefined' || ctx === null) {\n return target.call(ctx)\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",replaceGetterWithProxy:"(obj, propName, handler) => {\n const fn = Object.getOwnPropertyDescriptor(obj, propName).get\n const fnStr = fn.toString() // special getter function string\n const proxyObj = new Proxy(fn, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { get: proxyObj })\n utils.patchToString(proxyObj, fnStr)\n\n return true\n}",replaceGetterSetter:"(obj, propName, handlerGetterSetter) => {\n const ownPropertyDescriptor = Object.getOwnPropertyDescriptor(obj, propName)\n const handler = { ...ownPropertyDescriptor }\n\n if (handlerGetterSetter.get !== undefined) {\n const nativeFn = ownPropertyDescriptor.get\n handler.get = function() {\n return handlerGetterSetter.get.call(this, nativeFn.bind(this))\n }\n utils.redirectToString(handler.get, nativeFn)\n }\n\n if (handlerGetterSetter.set !== undefined) {\n const nativeFn = ownPropertyDescriptor.set\n handler.set = function(newValue) {\n handlerGetterSetter.set.call(this, newValue, nativeFn.bind(this))\n }\n utils.redirectToString(handler.set, nativeFn)\n }\n\n Object.defineProperty(obj, propName, handler)\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath.split('.').slice(0, -1).join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}",makeHandler:"() => ({\n // Used by simple `navigator` getter evasions\n getterValue: value => ({\n apply(target, ctx, args) {\n // Let's fetch the value first, to trigger and escalate potential errors\n // Illegal invocations like `navigator.__proto__.vendor` will throw here\n utils.cache.Reflect.apply(...arguments)\n return value\n }\n })\n})",arrayEquals:"(array1, array2) => {\n if (array1.length !== array2.length) {\n return false\n }\n for (let i = 0; i < array1.length; ++i) {\n if (array1[i] !== array2[i]) {\n return false\n }\n }\n return true\n}",memoize:"fn => {\n const cache = []\n return function(...args) {\n if (!cache.some(c => utils.arrayEquals(c.key, args))) {\n cache.push({ key: args, value: fn.apply(this, args) })\n }\n return cache.find(c => utils.arrayEquals(c.key, args)).value\n }\n}"},_mainFunction:"utils => {\n if (!window.chrome) {\n // Use the exact property descriptor found in headful Chrome\n // fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')`\n Object.defineProperty(window, 'chrome', {\n writable: true,\n enumerable: true,\n configurable: false, // note!\n value: {} // We'll extend that later\n })\n }\n\n // That means we're running headful and don't need to mock anything\n if ('csi' in window.chrome) {\n return // Nothing to do here\n }\n\n // Check that the Navigation Timing API v1 is available, we need that\n if (!window.performance || !window.performance.timing) {\n return\n }\n\n const { timing } = window.performance\n\n window.chrome.csi = function() {\n return {\n onloadT: timing.domContentLoadedEventEnd,\n startE: timing.navigationStart,\n pageT: Date.now() - timing.navigationStart,\n tran: 15 // Transition type or something\n }\n }\n utils.patchToString(window.chrome.csi)\n }",_args:[]}),(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.init(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{init:"() => {\n utils.preloadCache()\n}",stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {\n setPrototypeOf: function (target, proto) {\n if (proto === null)\n throw new TypeError('Cannot convert object to primitive value')\n if (Object.getPrototypeOf(target) === Object.getPrototypeOf(proto)) {\n throw new TypeError('Cyclic __proto__ value')\n }\n return Reflect.setPrototypeOf(target, proto)\n }\n }\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function () {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = (stack, stripFirstLine = true) => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => !(index === 1 && stripFirstLine))\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = (stack, anchor) => {\n const stackArr = stack.split('\\n')\n anchor = anchor || `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Special cases due to our nested toString proxies\n err.stack = err.stack.replace(\n 'at Object.toString (',\n 'at Function.toString ('\n )\n if ((err.stack || '').includes('at Function.toString (')) {\n err.stack = stripWithBlacklist(err.stack, false)\n throw err\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n if (typeof ctx === 'undefined' || ctx === null) {\n return target.call(ctx)\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",replaceGetterWithProxy:"(obj, propName, handler) => {\n const fn = Object.getOwnPropertyDescriptor(obj, propName).get\n const fnStr = fn.toString() // special getter function string\n const proxyObj = new Proxy(fn, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { get: proxyObj })\n utils.patchToString(proxyObj, fnStr)\n\n return true\n}",replaceGetterSetter:"(obj, propName, handlerGetterSetter) => {\n const ownPropertyDescriptor = Object.getOwnPropertyDescriptor(obj, propName)\n const handler = { ...ownPropertyDescriptor }\n\n if (handlerGetterSetter.get !== undefined) {\n const nativeFn = ownPropertyDescriptor.get\n handler.get = function() {\n return handlerGetterSetter.get.call(this, nativeFn.bind(this))\n }\n utils.redirectToString(handler.get, nativeFn)\n }\n\n if (handlerGetterSetter.set !== undefined) {\n const nativeFn = ownPropertyDescriptor.set\n handler.set = function(newValue) {\n handlerGetterSetter.set.call(this, newValue, nativeFn.bind(this))\n }\n utils.redirectToString(handler.set, nativeFn)\n }\n\n Object.defineProperty(obj, propName, handler)\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath.split('.').slice(0, -1).join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}",makeHandler:"() => ({\n // Used by simple `navigator` getter evasions\n getterValue: value => ({\n apply(target, ctx, args) {\n // Let's fetch the value first, to trigger and escalate potential errors\n // Illegal invocations like `navigator.__proto__.vendor` will throw here\n utils.cache.Reflect.apply(...arguments)\n return value\n }\n })\n})",arrayEquals:"(array1, array2) => {\n if (array1.length !== array2.length) {\n return false\n }\n for (let i = 0; i < array1.length; ++i) {\n if (array1[i] !== array2[i]) {\n return false\n }\n }\n return true\n}",memoize:"fn => {\n const cache = []\n return function(...args) {\n if (!cache.some(c => utils.arrayEquals(c.key, args))) {\n cache.push({ key: args, value: fn.apply(this, args) })\n }\n return cache.find(c => utils.arrayEquals(c.key, args)).value\n }\n}"},_mainFunction:"(utils, { opts }) => {\n if (!window.chrome) {\n // Use the exact property descriptor found in headful Chrome\n // fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')`\n Object.defineProperty(window, 'chrome', {\n writable: true,\n enumerable: true,\n configurable: false, // note!\n value: {} // We'll extend that later\n })\n }\n\n // That means we're running headful and don't need to mock anything\n if ('loadTimes' in window.chrome) {\n return // Nothing to do here\n }\n\n // Check that the Navigation Timing API v1 + v2 is available, we need that\n if (\n !window.performance ||\n !window.performance.timing ||\n !window.PerformancePaintTiming\n ) {\n return\n }\n\n const { performance } = window\n\n // Some stuff is not available on about:blank as it requires a navigation to occur,\n // let's harden the code to not fail then:\n const ntEntryFallback = {\n nextHopProtocol: 'h2',\n type: 'other'\n }\n\n // The API exposes some funky info regarding the connection\n const protocolInfo = {\n get connectionInfo() {\n const ntEntry =\n performance.getEntriesByType('navigation')[0] || ntEntryFallback\n return ntEntry.nextHopProtocol\n },\n get npnNegotiatedProtocol() {\n // NPN is deprecated in favor of ALPN, but this implementation returns the\n // HTTP/2 or HTTP2+QUIC/39 requests negotiated via ALPN.\n const ntEntry =\n performance.getEntriesByType('navigation')[0] || ntEntryFallback\n return ['h2', 'hq'].includes(ntEntry.nextHopProtocol)\n ? ntEntry.nextHopProtocol\n : 'unknown'\n },\n get navigationType() {\n const ntEntry =\n performance.getEntriesByType('navigation')[0] || ntEntryFallback\n return ntEntry.type\n },\n get wasAlternateProtocolAvailable() {\n // The Alternate-Protocol header is deprecated in favor of Alt-Svc\n // (https://www.mnot.net/blog/2016/03/09/alt-svc), so technically this\n // should always return false.\n return false\n },\n get wasFetchedViaSpdy() {\n // SPDY is deprecated in favor of HTTP/2, but this implementation returns\n // true for HTTP/2 or HTTP2+QUIC/39 as well.\n const ntEntry =\n performance.getEntriesByType('navigation')[0] || ntEntryFallback\n return ['h2', 'hq'].includes(ntEntry.nextHopProtocol)\n },\n get wasNpnNegotiated() {\n // NPN is deprecated in favor of ALPN, but this implementation returns true\n // for HTTP/2 or HTTP2+QUIC/39 requests negotiated via ALPN.\n const ntEntry =\n performance.getEntriesByType('navigation')[0] || ntEntryFallback\n return ['h2', 'hq'].includes(ntEntry.nextHopProtocol)\n }\n }\n\n const { timing } = window.performance\n\n // Truncate number to specific number of decimals, most of the `loadTimes` stuff has 3\n function toFixed(num, fixed) {\n var re = new RegExp('^-?\\\\d+(?:.\\\\d{0,' + (fixed || -1) + '})?')\n return num.toString().match(re)[0]\n }\n\n const timingInfo = {\n get firstPaintAfterLoadTime() {\n // This was never actually implemented and always returns 0.\n return 0\n },\n get requestTime() {\n return timing.navigationStart / 1000\n },\n get startLoadTime() {\n return timing.navigationStart / 1000\n },\n get commitLoadTime() {\n return timing.responseStart / 1000\n },\n get finishDocumentLoadTime() {\n return timing.domContentLoadedEventEnd / 1000\n },\n get finishLoadTime() {\n return timing.loadEventEnd / 1000\n },\n get firstPaintTime() {\n const fpEntry = performance.getEntriesByType('paint')[0] || {\n startTime: timing.loadEventEnd / 1000 // Fallback if no navigation occured (`about:blank`)\n }\n return toFixed(\n (fpEntry.startTime + performance.timeOrigin) / 1000,\n 3\n )\n }\n }\n\n window.chrome.loadTimes = function() {\n return {\n ...protocolInfo,\n ...timingInfo\n }\n }\n utils.patchToString(window.chrome.loadTimes)\n }",_args:[{opts:{}}]}),(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.init(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{init:"() => {\n utils.preloadCache()\n}",stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {\n setPrototypeOf: function (target, proto) {\n if (proto === null)\n throw new TypeError('Cannot convert object to primitive value')\n if (Object.getPrototypeOf(target) === Object.getPrototypeOf(proto)) {\n throw new TypeError('Cyclic __proto__ value')\n }\n return Reflect.setPrototypeOf(target, proto)\n }\n }\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function () {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = (stack, stripFirstLine = true) => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => !(index === 1 && stripFirstLine))\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = (stack, anchor) => {\n const stackArr = stack.split('\\n')\n anchor = anchor || `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Special cases due to our nested toString proxies\n err.stack = err.stack.replace(\n 'at Object.toString (',\n 'at Function.toString ('\n )\n if ((err.stack || '').includes('at Function.toString (')) {\n err.stack = stripWithBlacklist(err.stack, false)\n throw err\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n if (typeof ctx === 'undefined' || ctx === null) {\n return target.call(ctx)\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",replaceGetterWithProxy:"(obj, propName, handler) => {\n const fn = Object.getOwnPropertyDescriptor(obj, propName).get\n const fnStr = fn.toString() // special getter function string\n const proxyObj = new Proxy(fn, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { get: proxyObj })\n utils.patchToString(proxyObj, fnStr)\n\n return true\n}",replaceGetterSetter:"(obj, propName, handlerGetterSetter) => {\n const ownPropertyDescriptor = Object.getOwnPropertyDescriptor(obj, propName)\n const handler = { ...ownPropertyDescriptor }\n\n if (handlerGetterSetter.get !== undefined) {\n const nativeFn = ownPropertyDescriptor.get\n handler.get = function() {\n return handlerGetterSetter.get.call(this, nativeFn.bind(this))\n }\n utils.redirectToString(handler.get, nativeFn)\n }\n\n if (handlerGetterSetter.set !== undefined) {\n const nativeFn = ownPropertyDescriptor.set\n handler.set = function(newValue) {\n handlerGetterSetter.set.call(this, newValue, nativeFn.bind(this))\n }\n utils.redirectToString(handler.set, nativeFn)\n }\n\n Object.defineProperty(obj, propName, handler)\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath.split('.').slice(0, -1).join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}",makeHandler:"() => ({\n // Used by simple `navigator` getter evasions\n getterValue: value => ({\n apply(target, ctx, args) {\n // Let's fetch the value first, to trigger and escalate potential errors\n // Illegal invocations like `navigator.__proto__.vendor` will throw here\n utils.cache.Reflect.apply(...arguments)\n return value\n }\n })\n})",arrayEquals:"(array1, array2) => {\n if (array1.length !== array2.length) {\n return false\n }\n for (let i = 0; i < array1.length; ++i) {\n if (array1[i] !== array2[i]) {\n return false\n }\n }\n return true\n}",memoize:"fn => {\n const cache = []\n return function(...args) {\n if (!cache.some(c => utils.arrayEquals(c.key, args))) {\n cache.push({ key: args, value: fn.apply(this, args) })\n }\n return cache.find(c => utils.arrayEquals(c.key, args)).value\n }\n}"},_mainFunction:"(utils, { opts, STATIC_DATA }) => {\n if (!window.chrome) {\n // Use the exact property descriptor found in headful Chrome\n // fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')`\n Object.defineProperty(window, 'chrome', {\n writable: true,\n enumerable: true,\n configurable: false, // note!\n value: {} // We'll extend that later\n })\n }\n\n // That means we're running headful and don't need to mock anything\n const existsAlready = 'runtime' in window.chrome\n // `chrome.runtime` is only exposed on secure origins\n const isNotSecure = !window.location.protocol.startsWith('https')\n if (existsAlready || (isNotSecure && !opts.runOnInsecureOrigins)) {\n return // Nothing to do here\n }\n\n window.chrome.runtime = {\n // There's a bunch of static data in that property which doesn't seem to change,\n // we should periodically check for updates: `JSON.stringify(window.chrome.runtime, null, 2)`\n ...STATIC_DATA,\n // `chrome.runtime.id` is extension related and returns undefined in Chrome\n get id() {\n return undefined\n },\n // These two require more sophisticated mocks\n connect: null,\n sendMessage: null\n }\n\n const makeCustomRuntimeErrors = (preamble, method, extensionId) => ({\n NoMatchingSignature: new TypeError(\n preamble + `No matching signature.`\n ),\n MustSpecifyExtensionID: new TypeError(\n preamble +\n `${method} called from a webpage must specify an Extension ID (string) for its first argument.`\n ),\n InvalidExtensionID: new TypeError(\n preamble + `Invalid extension id: '${extensionId}'`\n )\n })\n\n // Valid Extension IDs are 32 characters in length and use the letter `a` to `p`:\n // https://source.chromium.org/chromium/chromium/src/+/master:components/crx_file/id_util.cc;drc=14a055ccb17e8c8d5d437fe080faba4c6f07beac;l=90\n const isValidExtensionID = str =>\n str.length === 32 && str.toLowerCase().match(/^[a-p]+$/)\n\n /** Mock `chrome.runtime.sendMessage` */\n const sendMessageHandler = {\n apply: function(target, ctx, args) {\n const [extensionId, options, responseCallback] = args || []\n\n // Define custom errors\n const errorPreamble = `Error in invocation of runtime.sendMessage(optional string extensionId, any message, optional object options, optional function responseCallback): `\n const Errors = makeCustomRuntimeErrors(\n errorPreamble,\n `chrome.runtime.sendMessage()`,\n extensionId\n )\n\n // Check if the call signature looks ok\n const noArguments = args.length === 0\n const tooManyArguments = args.length > 4\n const incorrectOptions = options && typeof options !== 'object'\n const incorrectResponseCallback =\n responseCallback && typeof responseCallback !== 'function'\n if (\n noArguments ||\n tooManyArguments ||\n incorrectOptions ||\n incorrectResponseCallback\n ) {\n throw Errors.NoMatchingSignature\n }\n\n // At least 2 arguments are required before we even validate the extension ID\n if (args.length < 2) {\n throw Errors.MustSpecifyExtensionID\n }\n\n // Now let's make sure we got a string as extension ID\n if (typeof extensionId !== 'string') {\n throw Errors.NoMatchingSignature\n }\n\n if (!isValidExtensionID(extensionId)) {\n throw Errors.InvalidExtensionID\n }\n\n return undefined // Normal behavior\n }\n }\n utils.mockWithProxy(\n window.chrome.runtime,\n 'sendMessage',\n function sendMessage() {},\n sendMessageHandler\n )\n\n /**\n * Mock `chrome.runtime.connect`\n *\n * @see https://developer.chrome.com/apps/runtime#method-connect\n */\n const connectHandler = {\n apply: function(target, ctx, args) {\n const [extensionId, connectInfo] = args || []\n\n // Define custom errors\n const errorPreamble = `Error in invocation of runtime.connect(optional string extensionId, optional object connectInfo): `\n const Errors = makeCustomRuntimeErrors(\n errorPreamble,\n `chrome.runtime.connect()`,\n extensionId\n )\n\n // Behavior differs a bit from sendMessage:\n const noArguments = args.length === 0\n const emptyStringArgument = args.length === 1 && extensionId === ''\n if (noArguments || emptyStringArgument) {\n throw Errors.MustSpecifyExtensionID\n }\n\n const tooManyArguments = args.length > 2\n const incorrectConnectInfoType =\n connectInfo && typeof connectInfo !== 'object'\n\n if (tooManyArguments || incorrectConnectInfoType) {\n throw Errors.NoMatchingSignature\n }\n\n const extensionIdIsString = typeof extensionId === 'string'\n if (extensionIdIsString && extensionId === '') {\n throw Errors.MustSpecifyExtensionID\n }\n if (extensionIdIsString && !isValidExtensionID(extensionId)) {\n throw Errors.InvalidExtensionID\n }\n\n // There's another edge-case here: extensionId is optional so we might find a connectInfo object as first param, which we need to validate\n const validateConnectInfo = ci => {\n // More than a first param connectInfo as been provided\n if (args.length > 1) {\n throw Errors.NoMatchingSignature\n }\n // An empty connectInfo has been provided\n if (Object.keys(ci).length === 0) {\n throw Errors.MustSpecifyExtensionID\n }\n // Loop over all connectInfo props an check them\n Object.entries(ci).forEach(([k, v]) => {\n const isExpected = ['name', 'includeTlsChannelId'].includes(k)\n if (!isExpected) {\n throw new TypeError(\n errorPreamble + `Unexpected property: '${k}'.`\n )\n }\n const MismatchError = (propName, expected, found) =>\n TypeError(\n errorPreamble +\n `Error at property '${propName}': Invalid type: expected ${expected}, found ${found}.`\n )\n if (k === 'name' && typeof v !== 'string') {\n throw MismatchError(k, 'string', typeof v)\n }\n if (k === 'includeTlsChannelId' && typeof v !== 'boolean') {\n throw MismatchError(k, 'boolean', typeof v)\n }\n })\n }\n if (typeof extensionId === 'object') {\n validateConnectInfo(extensionId)\n throw Errors.MustSpecifyExtensionID\n }\n\n // Unfortunately even when the connect fails Chrome will return an object with methods we need to mock as well\n return utils.patchToStringNested(makeConnectResponse())\n }\n }\n utils.mockWithProxy(\n window.chrome.runtime,\n 'connect',\n function connect() {},\n connectHandler\n )\n\n function makeConnectResponse() {\n const onSomething = () => ({\n addListener: function addListener() {},\n dispatch: function dispatch() {},\n hasListener: function hasListener() {},\n hasListeners: function hasListeners() {\n return false\n },\n removeListener: function removeListener() {}\n })\n\n const response = {\n name: '',\n sender: undefined,\n disconnect: function disconnect() {},\n onDisconnect: onSomething(),\n onMessage: onSomething(),\n postMessage: function postMessage() {\n if (!arguments.length) {\n throw new TypeError(`Insufficient number of arguments.`)\n }\n throw new Error(`Attempting to use a disconnected port object`)\n }\n }\n return response\n }\n }",_args:[{opts:{runOnInsecureOrigins:!1},STATIC_DATA:{OnInstalledReason:{CHROME_UPDATE:"chrome_update",INSTALL:"install",SHARED_MODULE_UPDATE:"shared_module_update",UPDATE:"update"},OnRestartRequiredReason:{APP_UPDATE:"app_update",OS_UPDATE:"os_update",PERIODIC:"periodic"},PlatformArch:{ARM:"arm",ARM64:"arm64",MIPS:"mips",MIPS64:"mips64",X86_32:"x86-32",X86_64:"x86-64"},PlatformNaclArch:{ARM:"arm",MIPS:"mips",MIPS64:"mips64",X86_32:"x86-32",X86_64:"x86-64"},PlatformOs:{ANDROID:"android",CROS:"cros",LINUX:"linux",MAC:"mac",OPENBSD:"openbsd",WIN:"win"},RequestUpdateCheckStatus:{NO_UPDATE:"no_update",THROTTLED:"throttled",UPDATE_AVAILABLE:"update_available"}}}]}),(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.init(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{init:"() => {\n utils.preloadCache()\n}",stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {\n setPrototypeOf: function (target, proto) {\n if (proto === null)\n throw new TypeError('Cannot convert object to primitive value')\n if (Object.getPrototypeOf(target) === Object.getPrototypeOf(proto)) {\n throw new TypeError('Cyclic __proto__ value')\n }\n return Reflect.setPrototypeOf(target, proto)\n }\n }\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function () {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = (stack, stripFirstLine = true) => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => !(index === 1 && stripFirstLine))\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = (stack, anchor) => {\n const stackArr = stack.split('\\n')\n anchor = anchor || `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Special cases due to our nested toString proxies\n err.stack = err.stack.replace(\n 'at Object.toString (',\n 'at Function.toString ('\n )\n if ((err.stack || '').includes('at Function.toString (')) {\n err.stack = stripWithBlacklist(err.stack, false)\n throw err\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n if (typeof ctx === 'undefined' || ctx === null) {\n return target.call(ctx)\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",replaceGetterWithProxy:"(obj, propName, handler) => {\n const fn = Object.getOwnPropertyDescriptor(obj, propName).get\n const fnStr = fn.toString() // special getter function string\n const proxyObj = new Proxy(fn, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { get: proxyObj })\n utils.patchToString(proxyObj, fnStr)\n\n return true\n}",replaceGetterSetter:"(obj, propName, handlerGetterSetter) => {\n const ownPropertyDescriptor = Object.getOwnPropertyDescriptor(obj, propName)\n const handler = { ...ownPropertyDescriptor }\n\n if (handlerGetterSetter.get !== undefined) {\n const nativeFn = ownPropertyDescriptor.get\n handler.get = function() {\n return handlerGetterSetter.get.call(this, nativeFn.bind(this))\n }\n utils.redirectToString(handler.get, nativeFn)\n }\n\n if (handlerGetterSetter.set !== undefined) {\n const nativeFn = ownPropertyDescriptor.set\n handler.set = function(newValue) {\n handlerGetterSetter.set.call(this, newValue, nativeFn.bind(this))\n }\n utils.redirectToString(handler.set, nativeFn)\n }\n\n Object.defineProperty(obj, propName, handler)\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath.split('.').slice(0, -1).join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}",makeHandler:"() => ({\n // Used by simple `navigator` getter evasions\n getterValue: value => ({\n apply(target, ctx, args) {\n // Let's fetch the value first, to trigger and escalate potential errors\n // Illegal invocations like `navigator.__proto__.vendor` will throw here\n utils.cache.Reflect.apply(...arguments)\n return value\n }\n })\n})",arrayEquals:"(array1, array2) => {\n if (array1.length !== array2.length) {\n return false\n }\n for (let i = 0; i < array1.length; ++i) {\n if (array1[i] !== array2[i]) {\n return false\n }\n }\n return true\n}",memoize:"fn => {\n const cache = []\n return function(...args) {\n if (!cache.some(c => utils.arrayEquals(c.key, args))) {\n cache.push({ key: args, value: fn.apply(this, args) })\n }\n return cache.find(c => utils.arrayEquals(c.key, args)).value\n }\n}"},_mainFunction:"utils => {\n /**\n * Input might look funky, we need to normalize it so e.g. whitespace isn't an issue for our spoofing.\n *\n * @example\n * video/webm; codecs=\"vp8, vorbis\"\n * video/mp4; codecs=\"avc1.42E01E\"\n * audio/x-m4a;\n * audio/ogg; codecs=\"vorbis\"\n * @param {String} arg\n */\n const parseInput = arg => {\n const [mime, codecStr] = arg.trim().split(';')\n let codecs = []\n if (codecStr && codecStr.includes('codecs=\"')) {\n codecs = codecStr\n .trim()\n .replace(`codecs=\"`, '')\n .replace(`\"`, '')\n .trim()\n .split(',')\n .filter(x => !!x)\n .map(x => x.trim())\n }\n return {\n mime,\n codecStr,\n codecs\n }\n }\n\n const canPlayType = {\n // Intercept certain requests\n apply: function(target, ctx, args) {\n if (!args || !args.length) {\n return target.apply(ctx, args)\n }\n const { mime, codecs } = parseInput(args[0])\n // This specific mp4 codec is missing in Chromium\n if (mime === 'video/mp4') {\n if (codecs.includes('avc1.42E01E')) {\n return 'probably'\n }\n }\n // This mimetype is only supported if no codecs are specified\n if (mime === 'audio/x-m4a' && !codecs.length) {\n return 'maybe'\n }\n\n // This mimetype is only supported if no codecs are specified\n if (mime === 'audio/aac' && !codecs.length) {\n return 'probably'\n }\n // Everything else as usual\n return target.apply(ctx, args)\n }\n }\n\n /* global HTMLMediaElement */\n utils.replaceWithProxy(\n HTMLMediaElement.prototype,\n 'canPlayType',\n canPlayType\n )\n }",_args:[]}),(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.init(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{init:"() => {\n utils.preloadCache()\n}",stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {\n setPrototypeOf: function (target, proto) {\n if (proto === null)\n throw new TypeError('Cannot convert object to primitive value')\n if (Object.getPrototypeOf(target) === Object.getPrototypeOf(proto)) {\n throw new TypeError('Cyclic __proto__ value')\n }\n return Reflect.setPrototypeOf(target, proto)\n }\n }\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function () {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = (stack, stripFirstLine = true) => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => !(index === 1 && stripFirstLine))\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = (stack, anchor) => {\n const stackArr = stack.split('\\n')\n anchor = anchor || `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Special cases due to our nested toString proxies\n err.stack = err.stack.replace(\n 'at Object.toString (',\n 'at Function.toString ('\n )\n if ((err.stack || '').includes('at Function.toString (')) {\n err.stack = stripWithBlacklist(err.stack, false)\n throw err\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n if (typeof ctx === 'undefined' || ctx === null) {\n return target.call(ctx)\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",replaceGetterWithProxy:"(obj, propName, handler) => {\n const fn = Object.getOwnPropertyDescriptor(obj, propName).get\n const fnStr = fn.toString() // special getter function string\n const proxyObj = new Proxy(fn, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { get: proxyObj })\n utils.patchToString(proxyObj, fnStr)\n\n return true\n}",replaceGetterSetter:"(obj, propName, handlerGetterSetter) => {\n const ownPropertyDescriptor = Object.getOwnPropertyDescriptor(obj, propName)\n const handler = { ...ownPropertyDescriptor }\n\n if (handlerGetterSetter.get !== undefined) {\n const nativeFn = ownPropertyDescriptor.get\n handler.get = function() {\n return handlerGetterSetter.get.call(this, nativeFn.bind(this))\n }\n utils.redirectToString(handler.get, nativeFn)\n }\n\n if (handlerGetterSetter.set !== undefined) {\n const nativeFn = ownPropertyDescriptor.set\n handler.set = function(newValue) {\n handlerGetterSetter.set.call(this, newValue, nativeFn.bind(this))\n }\n utils.redirectToString(handler.set, nativeFn)\n }\n\n Object.defineProperty(obj, propName, handler)\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath.split('.').slice(0, -1).join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}",makeHandler:"() => ({\n // Used by simple `navigator` getter evasions\n getterValue: value => ({\n apply(target, ctx, args) {\n // Let's fetch the value first, to trigger and escalate potential errors\n // Illegal invocations like `navigator.__proto__.vendor` will throw here\n utils.cache.Reflect.apply(...arguments)\n return value\n }\n })\n})",arrayEquals:"(array1, array2) => {\n if (array1.length !== array2.length) {\n return false\n }\n for (let i = 0; i < array1.length; ++i) {\n if (array1[i] !== array2[i]) {\n return false\n }\n }\n return true\n}",memoize:"fn => {\n const cache = []\n return function(...args) {\n if (!cache.some(c => utils.arrayEquals(c.key, args))) {\n cache.push({ key: args, value: fn.apply(this, args) })\n }\n return cache.find(c => utils.arrayEquals(c.key, args)).value\n }\n}"},_mainFunction:"(utils, { opts }) => {\n utils.replaceGetterWithProxy(\n Object.getPrototypeOf(navigator),\n 'hardwareConcurrency',\n utils.makeHandler().getterValue(opts.hardwareConcurrency)\n )\n }",_args:[{opts:{hardwareConcurrency:4}}]}),(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.init(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{init:"() => {\n utils.preloadCache()\n}",stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {\n setPrototypeOf: function (target, proto) {\n if (proto === null)\n throw new TypeError('Cannot convert object to primitive value')\n if (Object.getPrototypeOf(target) === Object.getPrototypeOf(proto)) {\n throw new TypeError('Cyclic __proto__ value')\n }\n return Reflect.setPrototypeOf(target, proto)\n }\n }\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function () {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = (stack, stripFirstLine = true) => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => !(index === 1 && stripFirstLine))\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = (stack, anchor) => {\n const stackArr = stack.split('\\n')\n anchor = anchor || `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Special cases due to our nested toString proxies\n err.stack = err.stack.replace(\n 'at Object.toString (',\n 'at Function.toString ('\n )\n if ((err.stack || '').includes('at Function.toString (')) {\n err.stack = stripWithBlacklist(err.stack, false)\n throw err\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n if (typeof ctx === 'undefined' || ctx === null) {\n return target.call(ctx)\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",replaceGetterWithProxy:"(obj, propName, handler) => {\n const fn = Object.getOwnPropertyDescriptor(obj, propName).get\n const fnStr = fn.toString() // special getter function string\n const proxyObj = new Proxy(fn, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { get: proxyObj })\n utils.patchToString(proxyObj, fnStr)\n\n return true\n}",replaceGetterSetter:"(obj, propName, handlerGetterSetter) => {\n const ownPropertyDescriptor = Object.getOwnPropertyDescriptor(obj, propName)\n const handler = { ...ownPropertyDescriptor }\n\n if (handlerGetterSetter.get !== undefined) {\n const nativeFn = ownPropertyDescriptor.get\n handler.get = function() {\n return handlerGetterSetter.get.call(this, nativeFn.bind(this))\n }\n utils.redirectToString(handler.get, nativeFn)\n }\n\n if (handlerGetterSetter.set !== undefined) {\n const nativeFn = ownPropertyDescriptor.set\n handler.set = function(newValue) {\n handlerGetterSetter.set.call(this, newValue, nativeFn.bind(this))\n }\n utils.redirectToString(handler.set, nativeFn)\n }\n\n Object.defineProperty(obj, propName, handler)\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath.split('.').slice(0, -1).join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}",makeHandler:"() => ({\n // Used by simple `navigator` getter evasions\n getterValue: value => ({\n apply(target, ctx, args) {\n // Let's fetch the value first, to trigger and escalate potential errors\n // Illegal invocations like `navigator.__proto__.vendor` will throw here\n utils.cache.Reflect.apply(...arguments)\n return value\n }\n })\n})",arrayEquals:"(array1, array2) => {\n if (array1.length !== array2.length) {\n return false\n }\n for (let i = 0; i < array1.length; ++i) {\n if (array1[i] !== array2[i]) {\n return false\n }\n }\n return true\n}",memoize:"fn => {\n const cache = []\n return function(...args) {\n if (!cache.some(c => utils.arrayEquals(c.key, args))) {\n cache.push({ key: args, value: fn.apply(this, args) })\n }\n return cache.find(c => utils.arrayEquals(c.key, args)).value\n }\n}"},_mainFunction:"(utils, { opts }) => {\n const languages = opts.languages.length\n ? opts.languages\n : ['en-US', 'en']\n utils.replaceGetterWithProxy(\n Object.getPrototypeOf(navigator),\n 'languages',\n utils.makeHandler().getterValue(Object.freeze([...languages]))\n )\n }",_args:[{opts:{languages:[]}}]}),(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.init(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{init:"() => {\n utils.preloadCache()\n}",stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {\n setPrototypeOf: function (target, proto) {\n if (proto === null)\n throw new TypeError('Cannot convert object to primitive value')\n if (Object.getPrototypeOf(target) === Object.getPrototypeOf(proto)) {\n throw new TypeError('Cyclic __proto__ value')\n }\n return Reflect.setPrototypeOf(target, proto)\n }\n }\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function () {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = (stack, stripFirstLine = true) => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => !(index === 1 && stripFirstLine))\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = (stack, anchor) => {\n const stackArr = stack.split('\\n')\n anchor = anchor || `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Special cases due to our nested toString proxies\n err.stack = err.stack.replace(\n 'at Object.toString (',\n 'at Function.toString ('\n )\n if ((err.stack || '').includes('at Function.toString (')) {\n err.stack = stripWithBlacklist(err.stack, false)\n throw err\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n if (typeof ctx === 'undefined' || ctx === null) {\n return target.call(ctx)\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",replaceGetterWithProxy:"(obj, propName, handler) => {\n const fn = Object.getOwnPropertyDescriptor(obj, propName).get\n const fnStr = fn.toString() // special getter function string\n const proxyObj = new Proxy(fn, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { get: proxyObj })\n utils.patchToString(proxyObj, fnStr)\n\n return true\n}",replaceGetterSetter:"(obj, propName, handlerGetterSetter) => {\n const ownPropertyDescriptor = Object.getOwnPropertyDescriptor(obj, propName)\n const handler = { ...ownPropertyDescriptor }\n\n if (handlerGetterSetter.get !== undefined) {\n const nativeFn = ownPropertyDescriptor.get\n handler.get = function() {\n return handlerGetterSetter.get.call(this, nativeFn.bind(this))\n }\n utils.redirectToString(handler.get, nativeFn)\n }\n\n if (handlerGetterSetter.set !== undefined) {\n const nativeFn = ownPropertyDescriptor.set\n handler.set = function(newValue) {\n handlerGetterSetter.set.call(this, newValue, nativeFn.bind(this))\n }\n utils.redirectToString(handler.set, nativeFn)\n }\n\n Object.defineProperty(obj, propName, handler)\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath.split('.').slice(0, -1).join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}",makeHandler:"() => ({\n // Used by simple `navigator` getter evasions\n getterValue: value => ({\n apply(target, ctx, args) {\n // Let's fetch the value first, to trigger and escalate potential errors\n // Illegal invocations like `navigator.__proto__.vendor` will throw here\n utils.cache.Reflect.apply(...arguments)\n return value\n }\n })\n})",arrayEquals:"(array1, array2) => {\n if (array1.length !== array2.length) {\n return false\n }\n for (let i = 0; i < array1.length; ++i) {\n if (array1[i] !== array2[i]) {\n return false\n }\n }\n return true\n}",memoize:"fn => {\n const cache = []\n return function(...args) {\n if (!cache.some(c => utils.arrayEquals(c.key, args))) {\n cache.push({ key: args, value: fn.apply(this, args) })\n }\n return cache.find(c => utils.arrayEquals(c.key, args)).value\n }\n}"},_mainFunction:"(utils, opts) => {\n const isSecure = document.location.protocol.startsWith('https')\n\n // In headful on secure origins the permission should be \"default\", not \"denied\"\n if (isSecure) {\n utils.replaceGetterWithProxy(Notification, 'permission', {\n apply() {\n return 'default'\n }\n })\n }\n\n // Another weird behavior:\n // On insecure origins in headful the state is \"denied\",\n // whereas in headless it's \"prompt\"\n if (!isSecure) {\n const handler = {\n apply(target, ctx, args) {\n const param = (args || [])[0]\n\n const isNotifications =\n param && param.name && param.name === 'notifications'\n if (!isNotifications) {\n return utils.cache.Reflect.apply(...arguments)\n }\n\n return Promise.resolve(\n Object.setPrototypeOf(\n {\n state: 'denied',\n onchange: null\n },\n PermissionStatus.prototype\n )\n )\n }\n }\n // Note: Don't use `Object.getPrototypeOf` here\n utils.replaceWithProxy(Permissions.prototype, 'query', handler)\n }\n }",_args:[{}]}),(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.init(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{init:"() => {\n utils.preloadCache()\n}",stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {\n setPrototypeOf: function (target, proto) {\n if (proto === null)\n throw new TypeError('Cannot convert object to primitive value')\n if (Object.getPrototypeOf(target) === Object.getPrototypeOf(proto)) {\n throw new TypeError('Cyclic __proto__ value')\n }\n return Reflect.setPrototypeOf(target, proto)\n }\n }\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function () {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = (stack, stripFirstLine = true) => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => !(index === 1 && stripFirstLine))\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = (stack, anchor) => {\n const stackArr = stack.split('\\n')\n anchor = anchor || `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Special cases due to our nested toString proxies\n err.stack = err.stack.replace(\n 'at Object.toString (',\n 'at Function.toString ('\n )\n if ((err.stack || '').includes('at Function.toString (')) {\n err.stack = stripWithBlacklist(err.stack, false)\n throw err\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n if (typeof ctx === 'undefined' || ctx === null) {\n return target.call(ctx)\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",replaceGetterWithProxy:"(obj, propName, handler) => {\n const fn = Object.getOwnPropertyDescriptor(obj, propName).get\n const fnStr = fn.toString() // special getter function string\n const proxyObj = new Proxy(fn, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { get: proxyObj })\n utils.patchToString(proxyObj, fnStr)\n\n return true\n}",replaceGetterSetter:"(obj, propName, handlerGetterSetter) => {\n const ownPropertyDescriptor = Object.getOwnPropertyDescriptor(obj, propName)\n const handler = { ...ownPropertyDescriptor }\n\n if (handlerGetterSetter.get !== undefined) {\n const nativeFn = ownPropertyDescriptor.get\n handler.get = function() {\n return handlerGetterSetter.get.call(this, nativeFn.bind(this))\n }\n utils.redirectToString(handler.get, nativeFn)\n }\n\n if (handlerGetterSetter.set !== undefined) {\n const nativeFn = ownPropertyDescriptor.set\n handler.set = function(newValue) {\n handlerGetterSetter.set.call(this, newValue, nativeFn.bind(this))\n }\n utils.redirectToString(handler.set, nativeFn)\n }\n\n Object.defineProperty(obj, propName, handler)\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath.split('.').slice(0, -1).join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}",makeHandler:"() => ({\n // Used by simple `navigator` getter evasions\n getterValue: value => ({\n apply(target, ctx, args) {\n // Let's fetch the value first, to trigger and escalate potential errors\n // Illegal invocations like `navigator.__proto__.vendor` will throw here\n utils.cache.Reflect.apply(...arguments)\n return value\n }\n })\n})",arrayEquals:"(array1, array2) => {\n if (array1.length !== array2.length) {\n return false\n }\n for (let i = 0; i < array1.length; ++i) {\n if (array1[i] !== array2[i]) {\n return false\n }\n }\n return true\n}",memoize:"fn => {\n const cache = []\n return function(...args) {\n if (!cache.some(c => utils.arrayEquals(c.key, args))) {\n cache.push({ key: args, value: fn.apply(this, args) })\n }\n return cache.find(c => utils.arrayEquals(c.key, args)).value\n }\n}"},_mainFunction:"(utils, { fns, data }) => {\n fns = utils.materializeFns(fns)\n\n // That means we're running headful\n const hasPlugins = 'plugins' in navigator && navigator.plugins.length\n if (hasPlugins) {\n return // nothing to do here\n }\n\n const mimeTypes = fns.generateMimeTypeArray(utils, fns)(data.mimeTypes)\n const plugins = fns.generatePluginArray(utils, fns)(data.plugins)\n\n // Plugin and MimeType cross-reference each other, let's do that now\n // Note: We're looping through `data.plugins` here, not the generated `plugins`\n for (const pluginData of data.plugins) {\n pluginData.__mimeTypes.forEach((type, index) => {\n plugins[pluginData.name][index] = mimeTypes[type]\n\n Object.defineProperty(plugins[pluginData.name], type, {\n value: mimeTypes[type],\n writable: false,\n enumerable: false, // Not enumerable\n configurable: true\n })\n Object.defineProperty(mimeTypes[type], 'enabledPlugin', {\n value:\n type === 'application/x-pnacl'\n ? mimeTypes['application/x-nacl'].enabledPlugin // these reference the same plugin, so we need to re-use the Proxy in order to avoid leaks\n : new Proxy(plugins[pluginData.name], {}), // Prevent circular references\n writable: false,\n enumerable: false, // Important: `JSON.stringify(navigator.plugins)`\n configurable: true\n })\n })\n }\n\n const patchNavigator = (name, value) =>\n utils.replaceProperty(Object.getPrototypeOf(navigator), name, {\n get() {\n return value\n }\n })\n\n patchNavigator('mimeTypes', mimeTypes)\n patchNavigator('plugins', plugins)\n\n // All done\n }",_args:[{fns:{generateMimeTypeArray:"(utils, fns) => mimeTypesData => {\n return fns.generateMagicArray(utils, fns)(\n mimeTypesData,\n MimeTypeArray.prototype,\n MimeType.prototype,\n 'type'\n )\n}",generatePluginArray:"(utils, fns) => pluginsData => {\n return fns.generateMagicArray(utils, fns)(\n pluginsData,\n PluginArray.prototype,\n Plugin.prototype,\n 'name'\n )\n}",generateMagicArray:"(utils, fns) =>\n function(\n dataArray = [],\n proto = MimeTypeArray.prototype,\n itemProto = MimeType.prototype,\n itemMainProp = 'type'\n ) {\n // Quick helper to set props with the same descriptors vanilla is using\n const defineProp = (obj, prop, value) =>\n Object.defineProperty(obj, prop, {\n value,\n writable: false,\n enumerable: false, // Important for mimeTypes & plugins: `JSON.stringify(navigator.mimeTypes)`\n configurable: true\n })\n\n // Loop over our fake data and construct items\n const makeItem = data => {\n const item = {}\n for (const prop of Object.keys(data)) {\n if (prop.startsWith('__')) {\n continue\n }\n defineProp(item, prop, data[prop])\n }\n return patchItem(item, data)\n }\n\n const patchItem = (item, data) => {\n let descriptor = Object.getOwnPropertyDescriptors(item)\n\n // Special case: Plugins have a magic length property which is not enumerable\n // e.g. `navigator.plugins[i].length` should always be the length of the assigned mimeTypes\n if (itemProto === Plugin.prototype) {\n descriptor = {\n ...descriptor,\n length: {\n value: data.__mimeTypes.length,\n writable: false,\n enumerable: false,\n configurable: true // Important to be able to use the ownKeys trap in a Proxy to strip `length`\n }\n }\n }\n\n // We need to spoof a specific `MimeType` or `Plugin` object\n const obj = Object.create(itemProto, descriptor)\n\n // Virtually all property keys are not enumerable in vanilla\n const blacklist = [...Object.keys(data), 'length', 'enabledPlugin']\n return new Proxy(obj, {\n ownKeys(target) {\n return Reflect.ownKeys(target).filter(k => !blacklist.includes(k))\n },\n getOwnPropertyDescriptor(target, prop) {\n if (blacklist.includes(prop)) {\n return undefined\n }\n return Reflect.getOwnPropertyDescriptor(target, prop)\n }\n })\n }\n\n const magicArray = []\n\n // Loop through our fake data and use that to create convincing entities\n dataArray.forEach(data => {\n magicArray.push(makeItem(data))\n })\n\n // Add direct property access based on types (e.g. `obj['application/pdf']`) afterwards\n magicArray.forEach(entry => {\n defineProp(magicArray, entry[itemMainProp], entry)\n })\n\n // This is the best way to fake the type to make sure this is false: `Array.isArray(navigator.mimeTypes)`\n const magicArrayObj = Object.create(proto, {\n ...Object.getOwnPropertyDescriptors(magicArray),\n\n // There's one ugly quirk we unfortunately need to take care of:\n // The `MimeTypeArray` prototype has an enumerable `length` property,\n // but headful Chrome will still skip it when running `Object.getOwnPropertyNames(navigator.mimeTypes)`.\n // To strip it we need to make it first `configurable` and can then overlay a Proxy with an `ownKeys` trap.\n length: {\n value: magicArray.length,\n writable: false,\n enumerable: false,\n configurable: true // Important to be able to use the ownKeys trap in a Proxy to strip `length`\n }\n })\n\n // Generate our functional function mocks :-)\n const functionMocks = fns.generateFunctionMocks(utils)(\n proto,\n itemMainProp,\n magicArray\n )\n\n // We need to overlay our custom object with a JS Proxy\n const magicArrayObjProxy = new Proxy(magicArrayObj, {\n get(target, key = '') {\n // Redirect function calls to our custom proxied versions mocking the vanilla behavior\n if (key === 'item') {\n return functionMocks.item\n }\n if (key === 'namedItem') {\n return functionMocks.namedItem\n }\n if (proto === PluginArray.prototype && key === 'refresh') {\n return functionMocks.refresh\n }\n // Everything else can pass through as normal\n return utils.cache.Reflect.get(...arguments)\n },\n ownKeys(target) {\n // There are a couple of quirks where the original property demonstrates \"magical\" behavior that makes no sense\n // This can be witnessed when calling `Object.getOwnPropertyNames(navigator.mimeTypes)` and the absense of `length`\n // My guess is that it has to do with the recent change of not allowing data enumeration and this being implemented weirdly\n // For that reason we just completely fake the available property names based on our data to match what regular Chrome is doing\n // Specific issues when not patching this: `length` property is available, direct `types` props (e.g. `obj['application/pdf']`) are missing\n const keys = []\n const typeProps = magicArray.map(mt => mt[itemMainProp])\n typeProps.forEach((_, i) => keys.push(`${i}`))\n typeProps.forEach(propName => keys.push(propName))\n return keys\n },\n getOwnPropertyDescriptor(target, prop) {\n if (prop === 'length') {\n return undefined\n }\n return Reflect.getOwnPropertyDescriptor(target, prop)\n }\n })\n\n return magicArrayObjProxy\n }",generateFunctionMocks:"utils => (\n proto,\n itemMainProp,\n dataArray\n) => ({\n /** Returns the MimeType object with the specified index. */\n item: utils.createProxy(proto.item, {\n apply(target, ctx, args) {\n if (!args.length) {\n throw new TypeError(\n `Failed to execute 'item' on '${\n proto[Symbol.toStringTag]\n }': 1 argument required, but only 0 present.`\n )\n }\n // Special behavior alert:\n // - Vanilla tries to cast strings to Numbers (only integers!) and use them as property index lookup\n // - If anything else than an integer (including as string) is provided it will return the first entry\n const isInteger = args[0] && Number.isInteger(Number(args[0])) // Cast potential string to number first, then check for integer\n // Note: Vanilla never returns `undefined`\n return (isInteger ? dataArray[Number(args[0])] : dataArray[0]) || null\n }\n }),\n /** Returns the MimeType object with the specified name. */\n namedItem: utils.createProxy(proto.namedItem, {\n apply(target, ctx, args) {\n if (!args.length) {\n throw new TypeError(\n `Failed to execute 'namedItem' on '${\n proto[Symbol.toStringTag]\n }': 1 argument required, but only 0 present.`\n )\n }\n return dataArray.find(mt => mt[itemMainProp] === args[0]) || null // Not `undefined`!\n }\n }),\n /** Does nothing and shall return nothing */\n refresh: proto.refresh\n ? utils.createProxy(proto.refresh, {\n apply(target, ctx, args) {\n return undefined\n }\n })\n : undefined\n})"},data:{mimeTypes:[{type:"application/pdf",suffixes:"pdf",description:"",__pluginName:"Chrome PDF Viewer"},{type:"application/x-google-chrome-pdf",suffixes:"pdf",description:"Portable Document Format",__pluginName:"Chrome PDF Plugin"},{type:"application/x-nacl",suffixes:"",description:"Native Client Executable",__pluginName:"Native Client"},{type:"application/x-pnacl",suffixes:"",description:"Portable Native Client Executable",__pluginName:"Native Client"}],plugins:[{name:"Chrome PDF Plugin",filename:"internal-pdf-viewer",description:"Portable Document Format",__mimeTypes:["application/x-google-chrome-pdf"]},{name:"Chrome PDF Viewer",filename:"mhjfbmdgcfjbbpaeojofohoefgiehjai",description:"",__mimeTypes:["application/pdf"]},{name:"Native Client",filename:"internal-nacl-plugin",description:"",__mimeTypes:["application/x-nacl","application/x-pnacl"]}]}}]}),!1===navigator.webdriver||void 0===navigator.webdriver||delete Object.getPrototypeOf(navigator).webdriver,(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.init(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{init:"() => {\n utils.preloadCache()\n}",stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {\n setPrototypeOf: function (target, proto) {\n if (proto === null)\n throw new TypeError('Cannot convert object to primitive value')\n if (Object.getPrototypeOf(target) === Object.getPrototypeOf(proto)) {\n throw new TypeError('Cyclic __proto__ value')\n }\n return Reflect.setPrototypeOf(target, proto)\n }\n }\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function () {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = (stack, stripFirstLine = true) => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => !(index === 1 && stripFirstLine))\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = (stack, anchor) => {\n const stackArr = stack.split('\\n')\n anchor = anchor || `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Special cases due to our nested toString proxies\n err.stack = err.stack.replace(\n 'at Object.toString (',\n 'at Function.toString ('\n )\n if ((err.stack || '').includes('at Function.toString (')) {\n err.stack = stripWithBlacklist(err.stack, false)\n throw err\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n if (typeof ctx === 'undefined' || ctx === null) {\n return target.call(ctx)\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",replaceGetterWithProxy:"(obj, propName, handler) => {\n const fn = Object.getOwnPropertyDescriptor(obj, propName).get\n const fnStr = fn.toString() // special getter function string\n const proxyObj = new Proxy(fn, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { get: proxyObj })\n utils.patchToString(proxyObj, fnStr)\n\n return true\n}",replaceGetterSetter:"(obj, propName, handlerGetterSetter) => {\n const ownPropertyDescriptor = Object.getOwnPropertyDescriptor(obj, propName)\n const handler = { ...ownPropertyDescriptor }\n\n if (handlerGetterSetter.get !== undefined) {\n const nativeFn = ownPropertyDescriptor.get\n handler.get = function() {\n return handlerGetterSetter.get.call(this, nativeFn.bind(this))\n }\n utils.redirectToString(handler.get, nativeFn)\n }\n\n if (handlerGetterSetter.set !== undefined) {\n const nativeFn = ownPropertyDescriptor.set\n handler.set = function(newValue) {\n handlerGetterSetter.set.call(this, newValue, nativeFn.bind(this))\n }\n utils.redirectToString(handler.set, nativeFn)\n }\n\n Object.defineProperty(obj, propName, handler)\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath.split('.').slice(0, -1).join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}",makeHandler:"() => ({\n // Used by simple `navigator` getter evasions\n getterValue: value => ({\n apply(target, ctx, args) {\n // Let's fetch the value first, to trigger and escalate potential errors\n // Illegal invocations like `navigator.__proto__.vendor` will throw here\n utils.cache.Reflect.apply(...arguments)\n return value\n }\n })\n})",arrayEquals:"(array1, array2) => {\n if (array1.length !== array2.length) {\n return false\n }\n for (let i = 0; i < array1.length; ++i) {\n if (array1[i] !== array2[i]) {\n return false\n }\n }\n return true\n}",memoize:"fn => {\n const cache = []\n return function(...args) {\n if (!cache.some(c => utils.arrayEquals(c.key, args))) {\n cache.push({ key: args, value: fn.apply(this, args) })\n }\n return cache.find(c => utils.arrayEquals(c.key, args)).value\n }\n}"},_mainFunction:"(utils, opts) => {\n const getParameterProxyHandler = {\n apply: function(target, ctx, args) {\n const param = (args || [])[0]\n const result = utils.cache.Reflect.apply(target, ctx, args)\n // UNMASKED_VENDOR_WEBGL\n if (param === 37445) {\n return opts.vendor || 'Intel Inc.' // default in headless: Google Inc.\n }\n // UNMASKED_RENDERER_WEBGL\n if (param === 37446) {\n return opts.renderer || 'Intel Iris OpenGL Engine' // default in headless: Google SwiftShader\n }\n return result\n }\n }\n\n // There's more than one WebGL rendering context\n // https://developer.mozilla.org/en-US/docs/Web/API/WebGL2RenderingContext#Browser_compatibility\n // To find out the original values here: Object.getOwnPropertyDescriptors(WebGLRenderingContext.prototype.getParameter)\n const addProxy = (obj, propName) => {\n utils.replaceWithProxy(obj, propName, getParameterProxyHandler)\n }\n // For whatever weird reason loops don't play nice with Object.defineProperty, here's the next best thing:\n addProxy(WebGLRenderingContext.prototype, 'getParameter')\n addProxy(WebGL2RenderingContext.prototype, 'getParameter')\n }",_args:[{}]}),(()=>{try{if(window.outerWidth&&window.outerHeight)return;const n=85;window.outerWidth=window.innerWidth,window.outerHeight=window.innerHeight+n}catch(n){}})(),(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.init(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{init:"() => {\n utils.preloadCache()\n}",stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {\n setPrototypeOf: function (target, proto) {\n if (proto === null)\n throw new TypeError('Cannot convert object to primitive value')\n if (Object.getPrototypeOf(target) === Object.getPrototypeOf(proto)) {\n throw new TypeError('Cyclic __proto__ value')\n }\n return Reflect.setPrototypeOf(target, proto)\n }\n }\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function () {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = (stack, stripFirstLine = true) => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => !(index === 1 && stripFirstLine))\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = (stack, anchor) => {\n const stackArr = stack.split('\\n')\n anchor = anchor || `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Special cases due to our nested toString proxies\n err.stack = err.stack.replace(\n 'at Object.toString (',\n 'at Function.toString ('\n )\n if ((err.stack || '').includes('at Function.toString (')) {\n err.stack = stripWithBlacklist(err.stack, false)\n throw err\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n if (typeof ctx === 'undefined' || ctx === null) {\n return target.call(ctx)\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",replaceGetterWithProxy:"(obj, propName, handler) => {\n const fn = Object.getOwnPropertyDescriptor(obj, propName).get\n const fnStr = fn.toString() // special getter function string\n const proxyObj = new Proxy(fn, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { get: proxyObj })\n utils.patchToString(proxyObj, fnStr)\n\n return true\n}",replaceGetterSetter:"(obj, propName, handlerGetterSetter) => {\n const ownPropertyDescriptor = Object.getOwnPropertyDescriptor(obj, propName)\n const handler = { ...ownPropertyDescriptor }\n\n if (handlerGetterSetter.get !== undefined) {\n const nativeFn = ownPropertyDescriptor.get\n handler.get = function() {\n return handlerGetterSetter.get.call(this, nativeFn.bind(this))\n }\n utils.redirectToString(handler.get, nativeFn)\n }\n\n if (handlerGetterSetter.set !== undefined) {\n const nativeFn = ownPropertyDescriptor.set\n handler.set = function(newValue) {\n handlerGetterSetter.set.call(this, newValue, nativeFn.bind(this))\n }\n utils.redirectToString(handler.set, nativeFn)\n }\n\n Object.defineProperty(obj, propName, handler)\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath.split('.').slice(0, -1).join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}",makeHandler:"() => ({\n // Used by simple `navigator` getter evasions\n getterValue: value => ({\n apply(target, ctx, args) {\n // Let's fetch the value first, to trigger and escalate potential errors\n // Illegal invocations like `navigator.__proto__.vendor` will throw here\n utils.cache.Reflect.apply(...arguments)\n return value\n }\n })\n})",arrayEquals:"(array1, array2) => {\n if (array1.length !== array2.length) {\n return false\n }\n for (let i = 0; i < array1.length; ++i) {\n if (array1[i] !== array2[i]) {\n return false\n }\n }\n return true\n}",memoize:"fn => {\n const cache = []\n return function(...args) {\n if (!cache.some(c => utils.arrayEquals(c.key, args))) {\n cache.push({ key: args, value: fn.apply(this, args) })\n }\n return cache.find(c => utils.arrayEquals(c.key, args)).value\n }\n}"},_mainFunction:"(utils, opts) => {\n try {\n // Adds a contentWindow proxy to the provided iframe element\n const addContentWindowProxy = iframe => {\n const contentWindowProxy = {\n get(target, key) {\n // Now to the interesting part:\n // We actually make this thing behave like a regular iframe window,\n // by intercepting calls to e.g. `.self` and redirect it to the correct thing. :)\n // That makes it possible for these assertions to be correct:\n // iframe.contentWindow.self === window.top // must be false\n if (key === 'self') {\n return this\n }\n // iframe.contentWindow.frameElement === iframe // must be true\n if (key === 'frameElement') {\n return iframe\n }\n // Intercept iframe.contentWindow[0] to hide the property 0 added by the proxy.\n if (key === '0') {\n return undefined\n }\n return Reflect.get(target, key)\n }\n }\n\n if (!iframe.contentWindow) {\n const proxy = new Proxy(window, contentWindowProxy)\n Object.defineProperty(iframe, 'contentWindow', {\n get() {\n return proxy\n },\n set(newValue) {\n return newValue // contentWindow is immutable\n },\n enumerable: true,\n configurable: false\n })\n }\n }\n\n // Handles iframe element creation, augments `srcdoc` property so we can intercept further\n const handleIframeCreation = (target, thisArg, args) => {\n const iframe = target.apply(thisArg, args)\n\n // We need to keep the originals around\n const _iframe = iframe\n const _srcdoc = _iframe.srcdoc\n\n // Add hook for the srcdoc property\n // We need to be very surgical here to not break other iframes by accident\n Object.defineProperty(iframe, 'srcdoc', {\n configurable: true, // Important, so we can reset this later\n get: function() {\n return _srcdoc\n },\n set: function(newValue) {\n addContentWindowProxy(this)\n // Reset property, the hook is only needed once\n Object.defineProperty(iframe, 'srcdoc', {\n configurable: false,\n writable: false,\n value: _srcdoc\n })\n _iframe.srcdoc = newValue\n }\n })\n return iframe\n }\n\n // Adds a hook to intercept iframe creation events\n const addIframeCreationSniffer = () => {\n /* global document */\n const createElementHandler = {\n // Make toString() native\n get(target, key) {\n return Reflect.get(target, key)\n },\n apply: function(target, thisArg, args) {\n const isIframe =\n args && args.length && `${args[0]}`.toLowerCase() === 'iframe'\n if (!isIframe) {\n // Everything as usual\n return target.apply(thisArg, args)\n } else {\n return handleIframeCreation(target, thisArg, args)\n }\n }\n }\n // All this just due to iframes with srcdoc bug\n utils.replaceWithProxy(\n document,\n 'createElement',\n createElementHandler\n )\n }\n\n // Let's go\n addIframeCreationSniffer()\n } catch (err) {\n // console.warn(err)\n }\n }",_args:[]}); \ No newline at end of file diff --git a/DataMaintenance/xlsx_output/南宁-天津.xlsx b/DataMaintenance/xlsx_output/南宁-天津.xlsx new file mode 100644 index 0000000..b56a77d Binary files /dev/null and b/DataMaintenance/xlsx_output/南宁-天津.xlsx differ diff --git a/DataMaintenance/xlsx_output/天津-南宁.xlsx b/DataMaintenance/xlsx_output/天津-南宁.xlsx new file mode 100644 index 0000000..289c586 Binary files /dev/null and b/DataMaintenance/xlsx_output/天津-南宁.xlsx differ diff --git a/DataMaintenance/xlsx_output/天津-泉州.xlsx b/DataMaintenance/xlsx_output/天津-泉州.xlsx new file mode 100644 index 0000000..d6e8d2a Binary files /dev/null and b/DataMaintenance/xlsx_output/天津-泉州.xlsx differ diff --git a/DataMaintenance/xlsx_output/天津-贵阳.xlsx b/DataMaintenance/xlsx_output/天津-贵阳.xlsx new file mode 100644 index 0000000..0b407d3 Binary files /dev/null and b/DataMaintenance/xlsx_output/天津-贵阳.xlsx differ diff --git a/DataMaintenance/xlsx_output/天津-郑州.xlsx b/DataMaintenance/xlsx_output/天津-郑州.xlsx new file mode 100644 index 0000000..e8db69b Binary files /dev/null and b/DataMaintenance/xlsx_output/天津-郑州.xlsx differ diff --git a/DataMaintenance/xlsx_output/天津-长春.xlsx b/DataMaintenance/xlsx_output/天津-长春.xlsx new file mode 100644 index 0000000..53dac8b Binary files /dev/null and b/DataMaintenance/xlsx_output/天津-长春.xlsx differ diff --git a/DataMaintenance/xlsx_output/泉州-天津.xlsx b/DataMaintenance/xlsx_output/泉州-天津.xlsx new file mode 100644 index 0000000..47e2211 Binary files /dev/null and b/DataMaintenance/xlsx_output/泉州-天津.xlsx differ diff --git a/DataMaintenance/xlsx_output/贵阳-天津.xlsx b/DataMaintenance/xlsx_output/贵阳-天津.xlsx new file mode 100644 index 0000000..7c5c9f1 Binary files /dev/null and b/DataMaintenance/xlsx_output/贵阳-天津.xlsx differ diff --git a/DataMaintenance/xlsx_output/长春-天津.xlsx b/DataMaintenance/xlsx_output/长春-天津.xlsx new file mode 100644 index 0000000..7da1325 Binary files /dev/null and b/DataMaintenance/xlsx_output/长春-天津.xlsx differ