{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#爬虫部分\n", "import requests\n", "import bs4\n", "import time\n", "import random\n", "import pandas as pd\n", "\n", "ulist = []\n", "url = 'https://www.shanghairanking.cn/rankings/bcur/202011'\n", "headers = {\n", " 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36'}\n", "\n", "response = requests.get(url=url, headers=headers)\n", "response.encoding = \"utf-8\" # 转化字符码\n", "html = response.text\n", "\n", "soup = bs4.BeautifulSoup(html, 'html.parser')\n", "\n", "for tr in soup.find('tbody').children:\n", " if isinstance(tr, bs4.element.Tag):\n", " tds = tr.find_all('td') # 等价于 tds = tr('td')\n", " name = tds[1].text\n", " print(name)\n", " score = tds[4].text\n", " print(score)\n", " pd1 = pd.DataFrame({'学校': name, '总分': score}, index=[0])\n", " ulist.append(pd1)\n", "\n", "second = random.randrange(3, 5)\n", "time.sleep(second)\n", "\n", "university2 = pd.concat(ulist[0:10])\n", "university2.to_excel('university.xlsx', index=False)\n", "#可视化部分\n", "from pyecharts.charts import Bar # 柱状图\n", "import xlrd # xlrd是对excel读取(read) xlwt是写入(write)\n", "\n", "data = xlrd.open_workbook(r'D:\\数据可视化\\university.xlsx') # 打开本地excel表格\n", "\n", "table = data.sheets()[0] # 拿出表格的第一个sheet\n", "\n", "names = [] # 画图用的x轴,坐标是姓名\n", "sources = [] # 画图用的y轴,坐标成绩\n", "# 循环输出每行内容\n", "for i in range(1, table.nrows):\n", " a = table.row_values(i) # 把第几行拿出来作为一个列表\n", " name = a[0] # 姓名提取加入name列表\n", " names.append(name)\n", "\n", " source = a[1]\n", " sources.append(source)\n", "\n", "bar = Bar() # 创建一个柱状图对象\n", "bar.add_xaxis(names) # 设置x轴\n", "bar.add_yaxis('总分', sources) # 设置y轴和图标名\n", "bar.render('柱形图.html') # 输出html文件来显示柱状图" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 2 }