You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

92 lines
2.8 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#爬虫部分\n",
"import requests\n",
"import bs4\n",
"import time\n",
"import random\n",
"import pandas as pd\n",
"\n",
"ulist = []\n",
"url = 'https://www.shanghairanking.cn/rankings/bcur/202011'\n",
"headers = {\n",
" 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36'}\n",
"\n",
"response = requests.get(url=url, headers=headers)\n",
"response.encoding = \"utf-8\" # 转化字符码\n",
"html = response.text\n",
"\n",
"soup = bs4.BeautifulSoup(html, 'html.parser')\n",
"\n",
"for tr in soup.find('tbody').children:\n",
" if isinstance(tr, bs4.element.Tag):\n",
" tds = tr.find_all('td') # 等价于 tds = tr('td')\n",
" name = tds[1].text\n",
" print(name)\n",
" score = tds[4].text\n",
" print(score)\n",
" pd1 = pd.DataFrame({'学校': name, '总分': score}, index=[0])\n",
" ulist.append(pd1)\n",
"\n",
"second = random.randrange(3, 5)\n",
"time.sleep(second)\n",
"\n",
"university2 = pd.concat(ulist[0:10])\n",
"university2.to_excel('university.xlsx', index=False)\n",
"#可视化部分\n",
"from pyecharts.charts import Bar # 柱状图\n",
"import xlrd # xlrd是对excel读取read xlwt是写入write\n",
"\n",
"data = xlrd.open_workbook(r'D:\\数据可视化\\university.xlsx') # 打开本地excel表格\n",
"\n",
"table = data.sheets()[0] # 拿出表格的第一个sheet\n",
"\n",
"names = [] # 画图用的x轴坐标是姓名\n",
"sources = [] # 画图用的y轴坐标成绩\n",
"# 循环输出每行内容\n",
"for i in range(1, table.nrows):\n",
" a = table.row_values(i) # 把第几行拿出来作为一个列表\n",
" name = a[0] # 姓名提取加入name列表\n",
" names.append(name)\n",
"\n",
" source = a[1]\n",
" sources.append(source)\n",
"\n",
"bar = Bar() # 创建一个柱状图对象\n",
"bar.add_xaxis(names) # 设置x轴\n",
"bar.add_yaxis('总分', sources) # 设置y轴和图标名\n",
"bar.render('柱形图.html') # 输出html文件来显示柱状图"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}