You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
77 lines
2.2 KiB
77 lines
2.2 KiB
1 year ago
|
import sys
|
||
|
import asyncio
|
||
|
from datetime import datetime, timedelta
|
||
|
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||
|
from apscheduler.triggers.interval import IntervalTrigger
|
||
|
from apscheduler.triggers.date import DateTrigger
|
||
|
from aspider import aspider
|
||
|
from bustag.spider import bus_spider
|
||
|
from bustag.util import logger, APP_CONFIG
|
||
|
|
||
|
scheduler = None
|
||
|
loop = None
|
||
|
|
||
|
|
||
|
def download(loop, no_parse_links=False, urls=None):
|
||
|
"""
|
||
|
下载更新数据
|
||
|
|
||
|
Args:
|
||
|
urls:tuple - tuple of urls
|
||
|
"""
|
||
|
print('start download')
|
||
|
# reset sys.argv
|
||
|
sys.argv = sys.argv[:1]
|
||
|
if not urls:
|
||
|
logger.warning('no links to download')
|
||
|
return
|
||
|
count = APP_CONFIG['download.count']
|
||
|
if no_parse_links:
|
||
|
count = len(urls)
|
||
|
extra_options = APP_CONFIG.get('options', {})
|
||
|
options = {'no_parse_links': no_parse_links,
|
||
|
'roots': urls, 'count': count}
|
||
|
extra_options.update(options)
|
||
|
|
||
|
aspider.download(loop, extra_options)
|
||
|
try:
|
||
|
import bustag.model.classifier as clf
|
||
|
|
||
|
clf.recommend()
|
||
|
except FileNotFoundError:
|
||
|
print('还没有训练好的模型, 无法推荐')
|
||
|
|
||
|
|
||
|
def start_scheduler():
|
||
|
global scheduler, loop
|
||
|
|
||
|
interval = int(APP_CONFIG.get('download.interval', 1800))
|
||
|
loop = asyncio.new_event_loop()
|
||
|
scheduler = AsyncIOScheduler(event_loop=loop)
|
||
|
t1 = datetime.now() + timedelta(seconds=1)
|
||
|
int_trigger = IntervalTrigger(seconds=interval)
|
||
|
date_trigger = DateTrigger(run_date=t1)
|
||
|
urls = (APP_CONFIG['download.root_path'],)
|
||
|
# add for down at server start
|
||
|
scheduler.add_job(download, trigger=date_trigger, args=(loop, False, urls))
|
||
|
scheduler.add_job(download, trigger=int_trigger, args=(loop, False, urls))
|
||
|
scheduler.start()
|
||
|
asyncio.set_event_loop(loop)
|
||
|
loop.run_forever()
|
||
|
|
||
|
|
||
|
def add_download_job(urls):
|
||
|
add_job(download, (urls,))
|
||
|
|
||
|
|
||
|
def add_job(job_func, args):
|
||
|
'''
|
||
|
add a job to scheduler
|
||
|
'''
|
||
|
default_args = (loop, True)
|
||
|
default_args = default_args + args
|
||
|
logger.debug(default_args)
|
||
|
t1 = datetime.now() + timedelta(seconds=10)
|
||
|
date_trigger = DateTrigger(run_date=t1)
|
||
|
scheduler.add_job(job_func, trigger=date_trigger, args=default_args)
|