|
|
|
@ -18,13 +18,13 @@ FILENAME_CSV = settings.FILENAME_CSV
|
|
|
|
|
connection_pool = redis.ConnectionPool(host = REDIS_HOST, port = REDIS_PORT, password = REDIS_PASSWORD, decode_responses = True)
|
|
|
|
|
redisconn = redis.Redis(connection_pool = connection_pool)
|
|
|
|
|
|
|
|
|
|
def getCategory(url) -> str:
|
|
|
|
|
def getCategory(url) -> str: # 用于判断获取的url是属于什么条目,便于后续打开对应的文件
|
|
|
|
|
for urlstr in BASEURL.items():
|
|
|
|
|
if urlstr[1] in url: return urlstr[0]
|
|
|
|
|
print("can't get a valid baseurl! Check your settings.BASEURL.")
|
|
|
|
|
exit()
|
|
|
|
|
|
|
|
|
|
def geturlList(baseurl) -> list:
|
|
|
|
|
def geturlList(baseurl) -> list: # 产生url条目对应页数序列,用于存入到redis数据库中
|
|
|
|
|
urlList = []
|
|
|
|
|
for i in range(1, 20, 2): # 爬取10页
|
|
|
|
|
url = baseurl + r"&page=" + str(i)
|
|
|
|
|