from ClassDriver import ClassDriver from ClassSearch import ClassSearch from Common import get_movie_info, save_xls import threading, time import random def main(): Driver = ClassDriver() Search = ClassSearch() driver = Driver.get_Options() WAIT = Driver.get_WAIT() data_queue = Search.data_queue result_queue = [] try: total = Search.search(Driver.baseurl, driver, WAIT) print(f'一共有{total}页。') num = int(input("输入需要获取的页数: ")) if num > total: num = total for i in range(1, int(num + 1)): Search.next_page(i, driver, WAIT) print("获取成功") # 处理数据 print("多线程爬取信息。。。") threads = [] for i in range(16): # 创建16个线程同时处理数据,线程数为16时效率最高 # 随机选择一个代理 IP proxy_ip = random.choice(Driver.IPS) t = threading.Thread(target=get_movie_info, args=(data_queue, result_queue, proxy_ip)) t.start() threads.append(t) for t in threads: t.join() save_xls(result_queue, '6.xls') finally: driver.close() if __name__ == "__main__": start = time.time() main() print(f"用时{time.time() - start}秒")