|
|
|
@ -5,7 +5,6 @@ import settings
|
|
|
|
|
import pipelines
|
|
|
|
|
import downloader
|
|
|
|
|
import redis
|
|
|
|
|
import time
|
|
|
|
|
import os
|
|
|
|
|
|
|
|
|
|
# 全局设定
|
|
|
|
@ -92,26 +91,3 @@ def mainThread():
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
clearRedis()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 以下是本地测试
|
|
|
|
|
def print2console(response): # 输出到命令行
|
|
|
|
|
pipelines.print2console(response)
|
|
|
|
|
|
|
|
|
|
def localtest(category): # 本地加载的源码测试
|
|
|
|
|
fileList = settings.getfileList(settings.FILEPATH.get(category))
|
|
|
|
|
page = 1
|
|
|
|
|
for filename in fileList:
|
|
|
|
|
print("↓↓↓↓↓↓↓↓↓↓\npage " + str(page) + " start at " + time.ctime())
|
|
|
|
|
print("正在爬取第 " + str(page) + " 页: " + filename)
|
|
|
|
|
|
|
|
|
|
response = pipelines.gethtml(filename, gethtml_mode = "cache") # 只用在这里设定一次就够了
|
|
|
|
|
write2csv(response)
|
|
|
|
|
|
|
|
|
|
print("page " + str(page) + " sleep at " + time.ctime())
|
|
|
|
|
time.sleep(10)
|
|
|
|
|
print("page " + str(page) + " sleep over at " + time.ctime())
|
|
|
|
|
page += 1
|
|
|
|
|
|
|
|
|
|