|
|
|
@ -0,0 +1,48 @@
|
|
|
|
|
from ClassDriver import ClassDriver
|
|
|
|
|
from ClassSearch import ClassSearch
|
|
|
|
|
from Common import get_movie_info, save_xls
|
|
|
|
|
import threading, time
|
|
|
|
|
import random
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
Driver = ClassDriver()
|
|
|
|
|
Search = ClassSearch()
|
|
|
|
|
driver = Driver.get_Options()
|
|
|
|
|
WAIT = Driver.get_WAIT()
|
|
|
|
|
data_queue = Search.data_queue
|
|
|
|
|
result_queue = []
|
|
|
|
|
try:
|
|
|
|
|
total = Search.search(Driver.baseurl, driver, WAIT)
|
|
|
|
|
print(f'一共有{total}页。')
|
|
|
|
|
|
|
|
|
|
num = int(input("输入需要获取的页数: "))
|
|
|
|
|
if num > total:
|
|
|
|
|
num = total
|
|
|
|
|
|
|
|
|
|
for i in range(1, int(num + 1)):
|
|
|
|
|
Search.next_page(i, driver, WAIT)
|
|
|
|
|
print("获取成功")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 处理数据
|
|
|
|
|
print("多线程爬取信息。。。")
|
|
|
|
|
threads = []
|
|
|
|
|
for i in range(16): # 创建16个线程同时处理数据,线程数为16时效率最高
|
|
|
|
|
# 随机选择一个代理 IP
|
|
|
|
|
proxy_ip = random.choice(Driver.IPS)
|
|
|
|
|
|
|
|
|
|
t = threading.Thread(target=get_movie_info, args=(data_queue, result_queue, proxy_ip))
|
|
|
|
|
t.start()
|
|
|
|
|
threads.append(t)
|
|
|
|
|
for t in threads:
|
|
|
|
|
t.join()
|
|
|
|
|
|
|
|
|
|
save_xls(result_queue, '6.xls')
|
|
|
|
|
finally:
|
|
|
|
|
driver.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
start = time.time()
|
|
|
|
|
main()
|
|
|
|
|
print(f"用时{time.time() - start}秒")
|