|
|
|
@ -17,6 +17,7 @@ BASEURL = settings.BASEURL
|
|
|
|
|
FILENAME_CSV = settings.FILENAME_CSV
|
|
|
|
|
|
|
|
|
|
threadLock = threading.Lock()
|
|
|
|
|
threadlines = 16 # 默认调用16个线程,不要超过20
|
|
|
|
|
flag = 1 # 判断主线程
|
|
|
|
|
connection_pool = redis.ConnectionPool(host = REDIS_HOST, port = REDIS_PORT, password = REDIS_PASSWORD, decode_responses = True)
|
|
|
|
|
redisconn = redis.Redis(connection_pool = connection_pool)
|
|
|
|
@ -77,14 +78,12 @@ class milkSpider(threading.Thread):
|
|
|
|
|
def run(self):
|
|
|
|
|
self.response = downloader.getsource(self.url)
|
|
|
|
|
threadLock.acquire()
|
|
|
|
|
# print("write2csv for '{}' will be started in 3 seconds....".format(self.url))
|
|
|
|
|
print("write2csv for '{}' was started.".format(self.url))
|
|
|
|
|
# time.sleep(3)
|
|
|
|
|
write2csv(self.category, self.response)
|
|
|
|
|
print("{} is done.".format(self.name))
|
|
|
|
|
threadLock.release()
|
|
|
|
|
|
|
|
|
|
def mainThread(threadlines = 16, flag = flag): # 线程数默认为3
|
|
|
|
|
def mainThread(threadlines = threadlines, flag = flag): # 线程数默认为3
|
|
|
|
|
try:
|
|
|
|
|
threads = []
|
|
|
|
|
for index in range(1, threadlines + 1):
|
|
|
|
@ -116,9 +115,6 @@ if __name__ == '__main__':
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 以下是本地测试
|
|
|
|
|
def print2console(response): # 输出到命令行
|
|
|
|
|
pipelines.print2console(response)
|
|
|
|
|