|
|
|
@ -1,106 +0,0 @@
|
|
|
|
|
'''
|
|
|
|
|
负责处理入库业务
|
|
|
|
|
'''
|
|
|
|
|
from Model import DataDB as db
|
|
|
|
|
from Controller import DataFetch
|
|
|
|
|
import logging
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
scrawler = DataFetch.Scrawler()
|
|
|
|
|
|
|
|
|
|
'''入库业务'''
|
|
|
|
|
'''1.新闻入库 insertData(tableName,source,ctime,title,url):'''
|
|
|
|
|
def newsAdd(id):
|
|
|
|
|
if id=='0':
|
|
|
|
|
'''工大的地址'''
|
|
|
|
|
source="工大新闻"
|
|
|
|
|
url = 'http://news.hut.edu.cn/'
|
|
|
|
|
count=0
|
|
|
|
|
for i in range(0,5):
|
|
|
|
|
ctime = scrawler.reqbsGetText(url, cssselector='.news-list .date')[i]
|
|
|
|
|
title = scrawler.reqbsGetText(url, cssselector='.news-list .info h2')[i]
|
|
|
|
|
urls = url+scrawler.reqbsGetHref(url, cssselector='.news-list .info a')[i]
|
|
|
|
|
if db.insertData('news',source,ctime,title,urls):
|
|
|
|
|
count=count+1
|
|
|
|
|
else:count=0
|
|
|
|
|
if count==5:return True
|
|
|
|
|
else:return False
|
|
|
|
|
elif id=='1':
|
|
|
|
|
'''北大新闻'''
|
|
|
|
|
source = "北大新闻"
|
|
|
|
|
url = 'https://news.pku.edu.cn/ttxw/index.htm'
|
|
|
|
|
count = 0
|
|
|
|
|
for i in range(0, 5):
|
|
|
|
|
ctime = scrawler.reqbsGetText(url, cssselector='.articleList01 .item-date')[i]
|
|
|
|
|
title = scrawler.reqbsGetText(url, cssselector='.articleList01 .item-txt h3')[i]
|
|
|
|
|
urls = url + scrawler.reqbsGetHref(url, cssselector='.articleList01 .item-txt h3 a')[i]
|
|
|
|
|
if db.insertData('news', source, ctime, title, urls):
|
|
|
|
|
count = count + 1
|
|
|
|
|
else:
|
|
|
|
|
count = 0
|
|
|
|
|
if count == 5:
|
|
|
|
|
return True
|
|
|
|
|
else:
|
|
|
|
|
return False
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
'''2.疫情数据入库'''
|
|
|
|
|
def yqAdd(source):
|
|
|
|
|
if source=='1':
|
|
|
|
|
# 来源是新浪疫情
|
|
|
|
|
url = 'http://zj.sina.com.cn/zt_d/zjyiqing/?hk=/*'
|
|
|
|
|
res = scrawler.reqbsGetJSON(url=url)
|
|
|
|
|
ctime = res['data']['mtime']
|
|
|
|
|
cnumber = res['data']['gntotal']
|
|
|
|
|
gnumber = res['data']['deathtotal']
|
|
|
|
|
if db.insertData('yq',source,ctime,cnumber,gnumber):
|
|
|
|
|
return True
|
|
|
|
|
else:return False
|
|
|
|
|
elif source=='0':
|
|
|
|
|
# 来源是腾讯疫情
|
|
|
|
|
url = 'https://www.qq.com/'
|
|
|
|
|
res = scrawler.reqbsGetJSON(url=url)
|
|
|
|
|
ctime = res['data']['mtime']
|
|
|
|
|
cnumber = res['data']['gntotal']
|
|
|
|
|
gnumber = res['data']['deathtotal']
|
|
|
|
|
if db.insertData('yq',source,ctime,cnumber,gnumber):
|
|
|
|
|
return True
|
|
|
|
|
else:return False
|
|
|
|
|
|
|
|
|
|
'''
|
|
|
|
|
日志相关处理方法
|
|
|
|
|
'''
|
|
|
|
|
class MyLogHandler(logging.Handler, object):
|
|
|
|
|
""" 自定义日志handler """
|
|
|
|
|
def __init__(self):
|
|
|
|
|
logging.Handler.__init__(self)
|
|
|
|
|
def emit(self, record):
|
|
|
|
|
""" record为一个消息类对象,包括name,asctime、lineno、funcname等属性
|
|
|
|
|
emit函数为自定义handler类时必重写的函数,这里可以根据需要对日志消息做一些处理,比如发送日志到服务器
|
|
|
|
|
发出记录(Emit a record)"""
|
|
|
|
|
try:
|
|
|
|
|
db.log2db(str(datetime.now()),record.lineno,
|
|
|
|
|
record.funcName,record.getMessage())
|
|
|
|
|
except Exception:
|
|
|
|
|
self.handleError(record)
|
|
|
|
|
|
|
|
|
|
def mylog():
|
|
|
|
|
# 创建一个日志记录器
|
|
|
|
|
log = logging.getLogger("test_logger")
|
|
|
|
|
log.setLevel(logging.DEBUG)
|
|
|
|
|
# 创建一个日志处理器
|
|
|
|
|
logHandler = MyLogHandler()
|
|
|
|
|
logHandler.setLevel(logging.INFO)
|
|
|
|
|
# 创建一个日志格式器
|
|
|
|
|
formats = logging.Formatter('%(asctime)s - %(name)s - %(lineno)d: %(message)s')
|
|
|
|
|
# 将日志格式器添加到日志处理器中
|
|
|
|
|
logHandler.setFormatter(formats)
|
|
|
|
|
# 将日志处理器添加到日志记录器中
|
|
|
|
|
log.addHandler(logHandler)
|
|
|
|
|
return log
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__=='__main__':
|
|
|
|
|
print(newsAdd('工大新闻',0))
|
|
|
|
|
print(yqAdd('新浪'))
|
|
|
|
|
# print(db.getAllData('yq'))
|