diff --git a/middlewares.py b/middlewares.py index ccd0724..9d69043 100644 --- a/middlewares.py +++ b/middlewares.py @@ -50,7 +50,7 @@ def precheck() -> bool: # 检查redis队列情况 return True elif check == 'q': print("Exit.") - exit() + return else: print("invalid input!") return True diff --git a/milkSpider.py b/milkSpider.py index 5eab9f9..548ce48 100644 --- a/milkSpider.py +++ b/milkSpider.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- +import os import time import middlewares @@ -42,6 +43,11 @@ def View(): def milkSpider(): + dir = 'Catalogues' + if os.path.exists(dir): + if len(os.listdir(dir)): + print("检测到缓存目录下已有数据,调用milkSpider将有可能导致数据重复和冗余。") + print("注意:调用milkSpider将启动selenium以及requests进程,因为爬取数据量较大,往往会占用较多时间,确定吗?[c]continue or [q]quit:", end = '') flag = str(input()) if flag == "q": diff --git a/pipelines.py b/pipelines.py index 488a866..6b8f10c 100644 --- a/pipelines.py +++ b/pipelines.py @@ -174,6 +174,11 @@ def write2csv(response, filename_csv): # 写入到csv文件 fd.write(itemString) try: + + dir = "Catalogues" + if not os.path.exists(dir): + os.mkdir(dir) + if os.path.exists(filename_csv): with open(filename_csv, 'a+', encoding = 'utf-8-sig') as fd: # 存在,文件尾追加 writer(fd)