You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
98 lines
3.0 KiB
98 lines
3.0 KiB
import requests
|
|
import json
|
|
from bs4 import BeautifulSoup
|
|
import re
|
|
import time
|
|
import pymysql
|
|
|
|
#定义新闻类
|
|
class InNews:
|
|
def __init__(self):
|
|
self.eventDescription=''
|
|
self.eventTime=''
|
|
self.eventUrl=''
|
|
self.siteName=''
|
|
self.Artical=''
|
|
|
|
def get_db_tuple(self):
|
|
return ((self.eventDescription,self.time(),self.eventUrl,self.siteName))
|
|
|
|
def delete_sql(self,con):
|
|
sql="""
|
|
delete from allNews
|
|
where eventDescription='%s' and eventTime = '%s' and eventUrl ='%s' and siteName='%s'
|
|
"""%(self.eventDescription,self.time(),self.eventUrl,self.siteName)
|
|
try:
|
|
cursor=con.cursor()
|
|
cursor.execute(sql)
|
|
except Exception as e:
|
|
print(e)
|
|
print('delete news failed.')
|
|
else:
|
|
print('delete news successfully')
|
|
|
|
def insert_sql(self,con):
|
|
self.delete_sql(con)
|
|
sql='''
|
|
insert into allNews(
|
|
eventDescription,eventTime,eventUrl,siteName)
|
|
values(%s,%s,%s,%s)
|
|
'''
|
|
try:
|
|
con.cursor().execute(sql,self.get_db_tuple())
|
|
con.commit()
|
|
except Exception as e:
|
|
print(e)
|
|
print('Insert news failed.')
|
|
else:
|
|
print('Inserted news successfully')
|
|
|
|
|
|
def printArtical(self):
|
|
req = requests.get(self.eventUrl)
|
|
content = req.content.decode('utf-8')
|
|
contentBs = BeautifulSoup(content, 'html.parser')
|
|
tag = contentBs.findAll('span', attrs={'class': 'bjh-p'})
|
|
artical = ''
|
|
for item in tag:
|
|
artical=artical+(' '+item.get_text())+'\n'
|
|
return artical
|
|
|
|
def time(self):
|
|
ts = float(self.eventTime)
|
|
localt = time.localtime(ts) # ???localtime??
|
|
timestr = time.strftime("%Y-%m-%d %H:%M:%S", localt) # ???
|
|
return(timestr)
|
|
|
|
def toString(self):
|
|
print('%s\n%s\n%s\n\n%s\n%s'%(self.eventDescription,self.eventUrl,self.time(),self.printArtical(),self.siteName))
|
|
|
|
#爬取数据函数
|
|
def getNews():
|
|
req = requests.get('https://opendata.baidu.com/data/inner?tn=reserved_all_res_tn&dspName=iphone&from_sf=1&dsp=iphone&resource_id=28565&alr=1&query=%E6%96%B0%E5%86%A0%E8%82%BA%E7%82%8E%E5%9B%BD%E5%A4%96%E7%96%AB%E6%83%85&cb=jsonp_1597232049116_92879')
|
|
content = req.content.decode('utf-8')
|
|
conDic = re.findall('\{.*\}',content)[0]
|
|
results = json.loads(conDic)['Result'][0]['DisplayData']['result']['items']
|
|
|
|
allNews=[]
|
|
for news in results:
|
|
# print (news['eventUrl'])
|
|
inNews = InNews()
|
|
inNews.eventDescription=news['eventDescription']
|
|
inNews.eventTime=news['eventTime']
|
|
inNews.eventUrl=news['eventUrl']
|
|
inNews.siteName=news['siteName']
|
|
allNews.append(inNews)
|
|
return allNews
|
|
|
|
def printStart():
|
|
allNews=getNews()
|
|
for news in allNews:
|
|
news.toString()
|
|
print('************'*6)
|
|
|
|
def mysqlStart(con):
|
|
allNews=getNews()
|
|
for news in allNews:
|
|
news.insert_sql(con)
|