|
|
@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
"cells": [
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
|
|
"collapsed": true
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
"outputs": [],
|
|
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
|
|
"#爬虫部分\n",
|
|
|
|
|
|
|
|
"import requests\n",
|
|
|
|
|
|
|
|
"import bs4\n",
|
|
|
|
|
|
|
|
"import time\n",
|
|
|
|
|
|
|
|
"import random\n",
|
|
|
|
|
|
|
|
"import pandas as pd\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
"ulist = []\n",
|
|
|
|
|
|
|
|
"url = 'https://www.shanghairanking.cn/rankings/bcur/202011'\n",
|
|
|
|
|
|
|
|
"headers = {\n",
|
|
|
|
|
|
|
|
" 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36'}\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
"response = requests.get(url=url, headers=headers)\n",
|
|
|
|
|
|
|
|
"response.encoding = \"utf-8\" # 转化字符码\n",
|
|
|
|
|
|
|
|
"html = response.text\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
"soup = bs4.BeautifulSoup(html, 'html.parser')\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
"for tr in soup.find('tbody').children:\n",
|
|
|
|
|
|
|
|
" if isinstance(tr, bs4.element.Tag):\n",
|
|
|
|
|
|
|
|
" tds = tr.find_all('td') # 等价于 tds = tr('td')\n",
|
|
|
|
|
|
|
|
" name = tds[1].text\n",
|
|
|
|
|
|
|
|
" print(name)\n",
|
|
|
|
|
|
|
|
" score = tds[4].text\n",
|
|
|
|
|
|
|
|
" print(score)\n",
|
|
|
|
|
|
|
|
" pd1 = pd.DataFrame({'学校': name, '总分': score}, index=[0])\n",
|
|
|
|
|
|
|
|
" ulist.append(pd1)\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
"second = random.randrange(3, 5)\n",
|
|
|
|
|
|
|
|
"time.sleep(second)\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
"university2 = pd.concat(ulist[0:10])\n",
|
|
|
|
|
|
|
|
"university2.to_excel('university.xlsx', index=False)\n",
|
|
|
|
|
|
|
|
"#可视化部分\n",
|
|
|
|
|
|
|
|
"from pyecharts.charts import Bar # 柱状图\n",
|
|
|
|
|
|
|
|
"import xlrd # xlrd是对excel读取(read) xlwt是写入(write)\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
"data = xlrd.open_workbook(r'D:\\数据可视化\\university.xlsx') # 打开本地excel表格\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
"table = data.sheets()[0] # 拿出表格的第一个sheet\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
"names = [] # 画图用的x轴,坐标是姓名\n",
|
|
|
|
|
|
|
|
"sources = [] # 画图用的y轴,坐标成绩\n",
|
|
|
|
|
|
|
|
"# 循环输出每行内容\n",
|
|
|
|
|
|
|
|
"for i in range(1, table.nrows):\n",
|
|
|
|
|
|
|
|
" a = table.row_values(i) # 把第几行拿出来作为一个列表\n",
|
|
|
|
|
|
|
|
" name = a[0] # 姓名提取加入name列表\n",
|
|
|
|
|
|
|
|
" names.append(name)\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
" source = a[1]\n",
|
|
|
|
|
|
|
|
" sources.append(source)\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
"bar = Bar() # 创建一个柱状图对象\n",
|
|
|
|
|
|
|
|
"bar.add_xaxis(names) # 设置x轴\n",
|
|
|
|
|
|
|
|
"bar.add_yaxis('总分', sources) # 设置y轴和图标名\n",
|
|
|
|
|
|
|
|
"bar.render('柱形图.html') # 输出html文件来显示柱状图"
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
],
|
|
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
|
|
"kernelspec": {
|
|
|
|
|
|
|
|
"display_name": "Python 3",
|
|
|
|
|
|
|
|
"language": "python",
|
|
|
|
|
|
|
|
"name": "python3"
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
"language_info": {
|
|
|
|
|
|
|
|
"codemirror_mode": {
|
|
|
|
|
|
|
|
"name": "ipython",
|
|
|
|
|
|
|
|
"version": 3
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
"file_extension": ".py",
|
|
|
|
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
|
|
|
|
"name": "python",
|
|
|
|
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
|
|
|
|
"version": "3.6.1"
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
|
|
|
"nbformat_minor": 2
|
|
|
|
|
|
|
|
}
|