You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
p9ew5o3q7/newsData.py

98 lines
3.0 KiB

import requests
import json
from bs4 import BeautifulSoup
import re
import time
import pymysql
#定义新闻类
class InNews:
def __init__(self):
self.eventDescription=''
self.eventTime=''
self.eventUrl=''
self.siteName=''
self.Artical=''
def get_db_tuple(self):
return ((self.eventDescription,self.time(),self.eventUrl,self.siteName))
def delete_sql(self,con):
sql="""
delete from allNews
where eventDescription='%s' and eventTime = '%s' and eventUrl ='%s' and siteName='%s'
"""%(self.eventDescription,self.time(),self.eventUrl,self.siteName)
try:
cursor=con.cursor()
cursor.execute(sql)
except Exception as e:
print(e)
print('delete news failed.')
else:
print('delete news successfully')
def insert_sql(self,con):
self.delete_sql(con)
sql='''
insert into allNews(
eventDescription,eventTime,eventUrl,siteName)
values(%s,%s,%s,%s)
'''
try:
con.cursor().execute(sql,self.get_db_tuple())
con.commit()
except Exception as e:
print(e)
print('Insert news failed.')
else:
print('Inserted news successfully')
def printArtical(self):
req = requests.get(self.eventUrl)
content = req.content.decode('utf-8')
contentBs = BeautifulSoup(content, 'html.parser')
tag = contentBs.findAll('span', attrs={'class': 'bjh-p'})
artical = ''
for item in tag:
artical=artical+(' '+item.get_text())+'\n'
return artical
def time(self):
ts = float(self.eventTime)
localt = time.localtime(ts) # ???localtime??
timestr = time.strftime("%Y-%m-%d %H:%M:%S", localt) # ???
return(timestr)
def toString(self):
print('%s\n%s\n%s\n\n%s\n%s'%(self.eventDescription,self.eventUrl,self.time(),self.printArtical(),self.siteName))
#爬取数据函数
def getNews():
req = requests.get('https://opendata.baidu.com/data/inner?tn=reserved_all_res_tn&dspName=iphone&from_sf=1&dsp=iphone&resource_id=28565&alr=1&query=%E6%96%B0%E5%86%A0%E8%82%BA%E7%82%8E%E5%9B%BD%E5%A4%96%E7%96%AB%E6%83%85&cb=jsonp_1597232049116_92879')
content = req.content.decode('utf-8')
conDic = re.findall('\{.*\}',content)[0]
results = json.loads(conDic)['Result'][0]['DisplayData']['result']['items']
allNews=[]
for news in results:
# print (news['eventUrl'])
inNews = InNews()
inNews.eventDescription=news['eventDescription']
inNews.eventTime=news['eventTime']
inNews.eventUrl=news['eventUrl']
inNews.siteName=news['siteName']
allNews.append(inNews)
return allNews
def printStart():
allNews=getNews()
for news in allNews:
news.toString()
print('************'*6)
def mysqlStart(con):
allNews=getNews()
for news in allNews:
news.insert_sql(con)