|
|
@ -65,20 +65,21 @@ class milkSpider(threading.Thread):
|
|
|
|
def __init__(self, name, url):
|
|
|
|
def __init__(self, name, url):
|
|
|
|
threading.Thread.__init__(self)
|
|
|
|
threading.Thread.__init__(self)
|
|
|
|
self.name = name
|
|
|
|
self.name = name
|
|
|
|
self.url = url1
|
|
|
|
self.url = url
|
|
|
|
self.category = getCategory(url)
|
|
|
|
self.category = getCategory(url)
|
|
|
|
self.response = downloader.getsource(self.url)
|
|
|
|
|
|
|
|
# self.response = ""
|
|
|
|
# self.response = ""
|
|
|
|
|
|
|
|
|
|
|
|
def run(self):
|
|
|
|
def run(self):
|
|
|
|
|
|
|
|
self.response = downloader.getsource(self.url)
|
|
|
|
threadLock.acquire()
|
|
|
|
threadLock.acquire()
|
|
|
|
print("write2csv for '{}' will be started in 3 seconds....".format(self.url))
|
|
|
|
# print("write2csv for '{}' will be started in 3 seconds....".format(self.url))
|
|
|
|
time.sleep(3)
|
|
|
|
print("write2csv for '{}' was started.".format(self.url))
|
|
|
|
|
|
|
|
# time.sleep(3)
|
|
|
|
write2csv(self.category, self.response)
|
|
|
|
write2csv(self.category, self.response)
|
|
|
|
print("{} is done.".format(self.name))
|
|
|
|
print("{} is done.".format(self.name))
|
|
|
|
threadLock.release()
|
|
|
|
threadLock.release()
|
|
|
|
|
|
|
|
|
|
|
|
def mainThread(threadlines = 5, flag = flag): # 线程数默认为3
|
|
|
|
def mainThread(threadlines = 16, flag = flag): # 线程数默认为3
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
threads = []
|
|
|
|
threads = []
|
|
|
|
for index in range(1, threadlines + 1):
|
|
|
|
for index in range(1, threadlines + 1):
|
|
|
@ -106,7 +107,9 @@ def mainThread(threadlines = 5, flag = flag): # 线程数默认为3
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
if __name__ == '__main__':
|
|
|
|
if precheck():
|
|
|
|
if precheck():
|
|
|
|
|
|
|
|
start_time = time.time()
|
|
|
|
mainThread()
|
|
|
|
mainThread()
|
|
|
|
|
|
|
|
print("Totally spend " + str(time.time() - start_time) + "secends")
|
|
|
|
print("done.")
|
|
|
|
print("done.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -134,3 +137,8 @@ def localtest(category): # 本地加载的源码测试
|
|
|
|
time.sleep(10)
|
|
|
|
time.sleep(10)
|
|
|
|
print("page " + str(page) + " sleep over at " + time.ctime())
|
|
|
|
print("page " + str(page) + " sleep over at " + time.ctime())
|
|
|
|
page += 1
|
|
|
|
page += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def clearRedis(): # 用于清空Redis队列
|
|
|
|
|
|
|
|
while not isNullRedis():
|
|
|
|
|
|
|
|
redisconn.lpop(REDIS_LISTNAME)
|
|
|
|
|
|
|
|
print("Redis queue has cleared.")
|