In [4]:
import datetime
import requests
from bs4 import BeautifulSoup
import re
import json
import pymysql

# 定义实体类
class Country:
    def __init__(self):
        self.countryName = ''
        self.currentConfirmedCount = 0 # 现有确诊病例数
        self.confirmedCount = 0 # 累计确诊
        self.confirmedCountRank = 0 # 累计确诊排名
        self.curedCount = 0  # 累计治愈
        self.deadCount = 0 # 累计死亡
        self.deadCountRank = 0 # 累计死亡排名
        self.deadRate = 0.0 # 死亡率
        self.deadRateRank = 0 # 死亡率排名
        self.updatedTime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    def get_info_tuple(self):
        return (self.countryName, self.currentConfirmedCount, self.confirmedCount, self.confirmedCountRank, self.curedCount, self.deadCount, self.deadCountRank, self.deadRate, self.deadRateRank,self.updatedTime)
    def __str__(self):
        return 'countryName:%s,currentConfirmedCount:%d,confirmedCount:%d,\
confirmedCountRankt:%d,curedCount:%d,deadCount:%d,deadCountRank:%d,deadRate:%d,deadRateRank:%d,updatedTime:%s' % (self.countryName, self.currentConfirmedCount, self.confirmedCount, self.confirmedCountRank, self.curedCount, self.deadCount, self.deadCountRank, self.deadRate, self.deadRateRank,self.updatedTime)

class MyDB:
    def __init__(self, host, user, passwd, db):

        self.conn = pymysql.connect(host, user, passwd, db)
        self.cursor = self.conn.cursor()


        
    def get_country_list_tuple(self, all_country):
        info_tuple = []
        for item in all_country:
            info_tuple.append(item.get_info_tuple())
        return info_tuple
    
    # 保存数据
    def save_country_datas(self, all_country):
        date=datetime.datetime.now().strftime('%Y-%m-%d')
        sql='delete from country_daily_datas where updatedTime like "%s"'%(date+"%")
        try:
            self.cursor.executemany(sql)
            self.conn.commit()
        except Exception as e:
            print(e)
            
        sql = 'insert into country_daily_datas(countryName,currentConfirmedCount,\
confirmedCount,confirmedCountRank,curedCount,deadCount,deadCountRank,deadRate,deadRateRank,updateTime) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
        res = self.get_country_list_tuple(all_country)
        
        print('+++ save_country_datas, data len: %d' % len(res))
        try:
            self.cursor.executemany(sql, res)
            self.conn.commit()
        except Exception as e:
            print(e)
        print('+++ save_country_datas is over.')
    def show_country_datas(self):
            self.cursor.execute('select * from country_daily_datas')
            
        
    def __del__(self):
        if self.conn is not None:
            self.conn.close()
            

def forign_data_search():
    db = MyDB('localhost', 'root', '20Z00t10x28_my','covid19')
    res = requests.get('https://ncov.dxy.cn/ncovh5/view/pneumonia')# 爬取页面
    res = res.content.decode('utf-8') # 重新解码
    soup = BeautifulSoup(res, 'html.parser')# 构建soup对象
    tag = soup.find('script', attrs={'id':'getListByCountryTypeService2true'}) # Tag# 使用soup对象查找实时播报新闻标签
    tagStr = tag.string# 获取内容
    results = re.findall('\{"id".*?"showRank".*?\}', tagStr) # length: 34, [str, str, ....]# 使用正则表达式匹配
    all_country = []
    for item in results:
        country=Country() 
        obj = json.loads(item) # obj -> dict
        country.countryName = obj['provinceName']
        country.currentConfirmedCount = int(obj['currentConfirmedCount'])
        country.confirmedCount = int(obj['confirmedCount'])
        country.curedCount = int(obj['curedCount'])
        country.deadCount = int(obj['deadCount'])
        country.deadRate = float(obj['deadRate'])
        country.updatedTime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        try:
            country.deadCountRank = int(obj['deadCountRank'])
            country.deadRateRank = int(obj['deadRateRank'])
            country.confirmedCountRank = int(obj['confirmedCountRank'])
        except KeyError:
            country.deadCountRank = 0
            country.deadRateRank = 0
            country.confirmedCountRank = 0
        finally:
            all_country.append(country)

    for item in all_country:
        print(item)   
        print('+++++++++++++++++++++++++' * 4)
    db.save_country_datas(all_country)
    
forign_data_search()

countryName:美国,currentConfirmedCount:3277590,confirmedCount:5197377,confirmedCountRankt:1,curedCount:1753760,deadCount:166027,deadCountRank:1,deadRate:3,deadRateRank:62,updatedTime:2020-08-13 22:04:21
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
countryName:巴西,currentConfirmedCount:751107,confirmedCount:3164785,confirmedCountRankt:2,curedCount:2309477,deadCount:104201,deadCountRank:2,deadRate:3,deadRateRank:59,updatedTime:2020-08-13 22:04:21
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
countryName:印度,currentConfirmedCount:653622,confirmedCount:2396637,confirmedCountRankt:3,curedCount:1695982,deadCount:47033,deadCountRank:4,deadRate:1,deadRateRank:102,updatedTime:2020-08-13 22:04:21
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
countryName:英国,currentConfirmedCount:266553,confirmedCount:313798,confirmedCountRankt:12,curedCo