You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

77 lines
2.2 KiB

1 year ago
import sys
import asyncio
from datetime import datetime, timedelta
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.interval import IntervalTrigger
from apscheduler.triggers.date import DateTrigger
from aspider import aspider
from bustag.spider import bus_spider
from bustag.util import logger, APP_CONFIG
scheduler = None
loop = None
def download(loop, no_parse_links=False, urls=None):
"""
下载更新数据
Args:
urls:tuple - tuple of urls
"""
print('start download')
# reset sys.argv
sys.argv = sys.argv[:1]
if not urls:
logger.warning('no links to download')
return
count = APP_CONFIG['download.count']
if no_parse_links:
count = len(urls)
extra_options = APP_CONFIG.get('options', {})
options = {'no_parse_links': no_parse_links,
'roots': urls, 'count': count}
extra_options.update(options)
aspider.download(loop, extra_options)
try:
import bustag.model.classifier as clf
clf.recommend()
except FileNotFoundError:
print('还没有训练好的模型, 无法推荐')
def start_scheduler():
global scheduler, loop
interval = int(APP_CONFIG.get('download.interval', 1800))
loop = asyncio.new_event_loop()
scheduler = AsyncIOScheduler(event_loop=loop)
t1 = datetime.now() + timedelta(seconds=1)
int_trigger = IntervalTrigger(seconds=interval)
date_trigger = DateTrigger(run_date=t1)
urls = (APP_CONFIG['download.root_path'],)
# add for down at server start
scheduler.add_job(download, trigger=date_trigger, args=(loop, False, urls))
scheduler.add_job(download, trigger=int_trigger, args=(loop, False, urls))
scheduler.start()
asyncio.set_event_loop(loop)
loop.run_forever()
def add_download_job(urls):
add_job(download, (urls,))
def add_job(job_func, args):
'''
add a job to scheduler
'''
default_args = (loop, True)
default_args = default_args + args
logger.debug(default_args)
t1 = datetime.now() + timedelta(seconds=10)
date_trigger = DateTrigger(run_date=t1)
scheduler.add_job(job_func, trigger=date_trigger, args=default_args)