diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..e7e9d11
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,2 @@
+# Default ignored files
+/workspace.xml
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..65531ca
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..4cbb0cc
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/疫情数据采集分析及可视化.iml b/.idea/疫情数据采集分析及可视化.iml
new file mode 100644
index 0000000..a193443
--- /dev/null
+++ b/.idea/疫情数据采集分析及可视化.iml
@@ -0,0 +1,11 @@
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/__pycache__/GrapSpider.cpython-36.pyc b/__pycache__/GrapSpider.cpython-36.pyc
new file mode 100644
index 0000000..66a25ea
Binary files /dev/null and b/__pycache__/GrapSpider.cpython-36.pyc differ
diff --git a/数据采集/GrapSpider.py b/数据采集/GrapSpider.py
new file mode 100644
index 0000000..78b5f27
--- /dev/null
+++ b/数据采集/GrapSpider.py
@@ -0,0 +1,126 @@
+import json
+import time
+
+import requests
+from bs4 import BeautifulSoup
+import re
+
+class Spider():
+ def __init__(self,url):
+ #目标网站
+ self.url=url
+ self.headers={"user-agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"}
+ #时事新闻
+ self.news=[]
+ #网站消息更新时间
+ self.modifytime=''
+
+
+ # 获取实时新闻
+ def grapNews(self):
+ data=requests.get(self.url,headers=self.headers)
+ #编码为utf8
+ data.encoding='utf-8'
+ html=data.content.decode('utf-8')
+
+ soup=BeautifulSoup(html,"html.parser")
+
+ tag=soup.find('script',attrs={'id':'getTimelineService1'})
+
+ info=re.findall(r'\[(.*?)\]',str(tag))
+ #消息ID
+ ids=re.findall(r'"id":(.*?),',info[0])
+ #出版时间,时间戳格式,使用时需要进行转换
+ pubDates=re.findall(r'"pubDate":(.*?),',info[0])
+ #出版时间字段
+ pubDateStrs=re.findall(r'"pubDateStr":"(.*?)","title"',info[0])
+ #新闻标题
+ titles=re.findall(r'"title":"(.*?)","summary"',info[0])
+ #新闻内容
+ summarys=re.findall(r'"summary":"(.*?)","infoSource"',info[0])
+ #新闻URL
+ sourceUrls=re.findall(r'"sourceUrl":"(.*?)","provinceId"',info[0])
+ #省份ID
+ provinceIds=re.findall(r'"provinceId":"(.*?)"},',info[0])
+ #存储数据
+ for i in range(len(ids)):
+ self.news.append(news(ids[i],pubDateStrs[i],pubDateStrs[i],titles[i],summarys[i],sourceUrls[i]))
+
+ #获取更新时间
+ tag = soup.find('script', attrs={'id': 'getListByCountryTypeService2true'})
+
+ tagstr = str(tag)
+
+ # 正则表达式匹配
+ result = re.findall(r'(\{"id".*?"showRank":.*?\})', tagstr)
+ # 取第一条记录
+ firstInfo = result[0]
+ # 转化为json对象
+ jsObj = json.loads(firstInfo)
+ # 获取数据更新时间并进行格式转换
+ updateTimestamp = jsObj['modifyTime']
+ updateTimestamp=float(updateTimestamp)/1000
+
+ localt=time.localtime(updateTimestamp)
+ #格式化
+ timestr = time.strftime("%Y-%m-%d %H:%M:%S", localt)
+
+ self.modifytime=timestr
+ print(timestr)
+
+ # 获取国内疫情
+ def grapchina(self):
+
+ data=requests.get(self.url,headers=self.headers)
+ data.encoding='utf-8'
+ html = data.content.decode('utf-8')
+ #构建soup对象
+ soup=BeautifulSoup(html,'html.parser')
+ #查找数据
+ tag=soup.find('script',attrs={'id':'getAreaStat'})
+ yiqinginfo=str(tag)
+
+ provinceNames=re.findall(r'"provinceName":"(.*?)",',yiqinginfo)
+ print(provinceNames)
+ print(len(provinceNames))
+ provinceShortName=re.findall(r'"provinceShortName":"(.*?)",',yiqinginfo)
+ print(provinceShortName)
+ currentConfirmedCounts=re.findall(r'"currentConfirmedCount":(.*?),"',yiqinginfo)
+ print(currentConfirmedCounts)
+ print(len(currentConfirmedCounts))
+ confirmedCounts=re.findall(r'"confirmedCount":(.*?),',yiqinginfo)
+ print(confirmedCounts)
+ suspectedCounts=re.findall(r'"suspectedCount":(.*?),',yiqinginfo)
+ print(suspectedCounts)
+ curedCounts=re.findall(r'"curedCount":(.*?),',yiqinginfo)
+ print(curedCounts)
+ deadCounts=re.findall(r'"deadCount":(.*?),',yiqinginfo)
+ print(deadCounts)
+ comments=re.findall(r'"comment":(.*?),',yiqinginfo)
+ print(comments)
+ locationIds=re.findall(r'"locationId":(.*?),',yiqinginfo)
+ print(locationIds)
+ statisticsDatas=re.findall(r'"statisticsData":"(.*?)",',yiqinginfo)
+ print(statisticsDatas)
+ cities=re.findall(r'"cities":(.*?)}',yiqinginfo)
+ print(cities)
+
+
+
+ print(tag)
+
+ # 获取国外疫情
+ def grapforign(self):
+ pass
+
+#news类用来存储实时新闻的信息
+class news():
+ def __init__(self,id,pubDate,pubDateStr,title,summary,sourceUrl):
+ #默认数据成员都是public的,可以直接访问
+ self.id=id
+ self.pubDate=pubDate
+ self.pubDateStr=pubDateStr
+ self.title=title
+ self.summay=summary
+ self.sourceUrl=sourceUrl
+
diff --git a/数据采集/__pycache__/GrapSpider.cpython-36.pyc b/数据采集/__pycache__/GrapSpider.cpython-36.pyc
new file mode 100644
index 0000000..de08539
Binary files /dev/null and b/数据采集/__pycache__/GrapSpider.cpython-36.pyc differ
diff --git a/数据采集/grapdata.py b/数据采集/grapdata.py
new file mode 100644
index 0000000..d83fdb0
--- /dev/null
+++ b/数据采集/grapdata.py
@@ -0,0 +1,15 @@
+import requests
+from bs4 import BeautifulSoup
+import re
+from 数据采集.GrapSpider import Spider
+
+
+def main():
+ #网站首页
+ url="https://ncov.dxy.cn/ncovh5/view/pneumonia"
+ spider=Spider(url)
+ spider.grapchina()
+
+
+if __name__ == '__main__':
+ main()
diff --git a/数据采集/text.txt b/数据采集/text.txt
new file mode 100644
index 0000000..9ffeb88
--- /dev/null
+++ b/数据采集/text.txt
@@ -0,0 +1,21 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/日志文件/第一天/杨旭东.txt b/日志文件/第一天/杨旭东.txt
deleted file mode 100644
index 1c81406..0000000
--- a/日志文件/第一天/杨旭东.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-从本地push到远程仓库时,git报认证错误,原因是本地认证的账号密码与educoder不匹配,需要从新认证。
-命令行输入命令git config --system --unset credential.helper
-会报permission denied错误,修改C:/Program Files/Git /mingw64/etc文件夹权限即可。
\ No newline at end of file