You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

92 lines
2.8 KiB

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#爬虫部分\n",
"import requests\n",
"import bs4\n",
"import time\n",
"import random\n",
"import pandas as pd\n",
"\n",
"ulist = []\n",
"url = 'https://www.shanghairanking.cn/rankings/bcur/202011'\n",
"headers = {\n",
" 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36'}\n",
"\n",
"response = requests.get(url=url, headers=headers)\n",
"response.encoding = \"utf-8\" # 转化字符码\n",
"html = response.text\n",
"\n",
"soup = bs4.BeautifulSoup(html, 'html.parser')\n",
"\n",
"for tr in soup.find('tbody').children:\n",
" if isinstance(tr, bs4.element.Tag):\n",
" tds = tr.find_all('td') # 等价于 tds = tr('td')\n",
" name = tds[1].text\n",
" print(name)\n",
" score = tds[4].text\n",
" print(score)\n",
" pd1 = pd.DataFrame({'学校': name, '总分': score}, index=[0])\n",
" ulist.append(pd1)\n",
"\n",
"second = random.randrange(3, 5)\n",
"time.sleep(second)\n",
"\n",
"university2 = pd.concat(ulist[0:10])\n",
"university2.to_excel('university.xlsx', index=False)\n",
"#可视化部分\n",
"from pyecharts.charts import Bar # 柱状图\n",
"import xlrd # xlrd是对excel读取read xlwt是写入write\n",
"\n",
"data = xlrd.open_workbook(r'D:\\数据可视化\\university.xlsx') # 打开本地excel表格\n",
"\n",
"table = data.sheets()[0] # 拿出表格的第一个sheet\n",
"\n",
"names = [] # 画图用的x轴坐标是姓名\n",
"sources = [] # 画图用的y轴坐标成绩\n",
"# 循环输出每行内容\n",
"for i in range(1, table.nrows):\n",
" a = table.row_values(i) # 把第几行拿出来作为一个列表\n",
" name = a[0] # 姓名提取加入name列表\n",
" names.append(name)\n",
"\n",
" source = a[1]\n",
" sources.append(source)\n",
"\n",
"bar = Bar() # 创建一个柱状图对象\n",
"bar.add_xaxis(names) # 设置x轴\n",
"bar.add_yaxis('总分', sources) # 设置y轴和图标名\n",
"bar.render('柱形图.html') # 输出html文件来显示柱状图"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}