diff --git a/第九章.ipynb b/第九章.ipynb new file mode 100644 index 0000000..ffe3889 --- /dev/null +++ b/第九章.ipynb @@ -0,0 +1,882 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "ename": "SyntaxError", + "evalue": "(unicode error) 'unicodeescape' codec can't decode bytes in position 5-6: truncated \\UXXXXXXXX escape (3135147787.py, line 2)", + "output_type": "error", + "traceback": [ + "\u001b[1;36m Cell \u001b[1;32mIn[13], line 2\u001b[1;36m\u001b[0m\n\u001b[1;33m 'C:\\15\\UserBehavior.csv'\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m (unicode error) 'unicodeescape' codec can't decode bytes in position 5-6: truncated \\UXXXXXXXX escape\n" + ] + } + ], + "source": [ + "\n", + "import pandas as pd\n", + "'C:\\15\\UserBehavior.csv'\n", + "info = pd.read_csv('C:\\15\\UserBehavior.csv',encoding='gbk')\n", + "info\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'info' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[9], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m info_excel \u001b[38;5;241m=\u001b[39m info\u001b[38;5;241m.\u001b[39mto_excel(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mE:\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124m1000phone\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mexcel_test.xlsx\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 2\u001b[0m info_excel\n", + "\u001b[1;31mNameError\u001b[0m: name 'info' is not defined" + ] + } + ], + "source": [ + "info_excel = info.to_excel('E:\\\\1000phone\\\\excel_test.xlsx')\n", + "info_excel" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: 'E:\\\\千锋\\\\JSON.JSON'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[10], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mjson\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mopen\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mE:\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124m千锋\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mJSON.JSON\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m'\u001b[39m,encoding\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mutf8\u001b[39m\u001b[38;5;124m'\u001b[39m)\u001b[38;5;28;01mas\u001b[39;00m fp:\n\u001b[0;32m 3\u001b[0m json_data \u001b[38;5;241m=\u001b[39m json\u001b[38;5;241m.\u001b[39mload(fp)\n\u001b[0;32m 4\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m这是文件中的json数据:\u001b[39m\u001b[38;5;124m'\u001b[39m,json_data)\n", + "File \u001b[1;32mD:\\anc\\Lib\\site-packages\\IPython\\core\\interactiveshell.py:284\u001b[0m, in \u001b[0;36m_modified_open\u001b[1;34m(file, *args, **kwargs)\u001b[0m\n\u001b[0;32m 277\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m file \u001b[38;5;129;01min\u001b[39;00m {\u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m2\u001b[39m}:\n\u001b[0;32m 278\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 279\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIPython won\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt let you open fd=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m by default \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 280\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mas it is likely to crash IPython. If you know what you are doing, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 281\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124myou can use builtins\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m open.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 282\u001b[0m )\n\u001b[1;32m--> 284\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m io_open(file, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", + "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'E:\\\\千锋\\\\JSON.JSON'" + ] + } + ], + "source": [ + "import json\n", + "with open('E:\\\\千锋\\\\JSON.JSON','r',encoding='utf8')as fp:\n", + " json_data = json.load(fp)\n", + "print('这是文件中的json数据:',json_data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import json\n", + "x = {'name':'小明','age':19}\n", + "filename = 'E:\\\\千锋\\\\JSON.JSON'\n", + "with open (filename,'w') as f:\n", + " json.dump(x,f)\n", + "with open('E:\\\\千锋\\\\JSON.JSON','r',encoding='utf8')as fp:\n", + " json_data = json.load(fp)\n", + "print('新写入的json数据:',json_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'MySQLdb'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[9], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# import pymysql.cursors\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mMySQLdb\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mMySQLdb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcursors\u001b[39;00m\n\u001b[0;32m 5\u001b[0m connect \u001b[38;5;241m=\u001b[39m MySQLdb\u001b[38;5;241m.\u001b[39mConnect(\n\u001b[0;32m 6\u001b[0m host\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlocalhost\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m 7\u001b[0m port\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m3306\u001b[39m,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 11\u001b[0m charset\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mutf8\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m 12\u001b[0m )\n", + "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'MySQLdb'" + ] + } + ], + "source": [ + "# import pymysql.cursors\n", + "import MySQLdb\n", + "import MySQLdb.cursors\n", + "\n", + "connect = MySQLdb.Connect(\n", + " host='localhost',\n", + " port=3306,\n", + " user='root',\n", + " passwd='123456',\n", + " db='bicycle',\n", + " charset='utf8'\n", + ")\n", + "cursor = connect.cursor()\n", + "\n", + "cur = connect.cursor(MySQLdb.cursors.DictCursor)\n", + "\n", + "sql = \"SELECT * FROM train order by rand() limit 15\"\n", + "cur.execute(sql)\n", + "results = cur.fetchall()\n", + "print(result)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "ename": "IndexError", + "evalue": "list index out of range", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mIndexError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[4], line 8\u001b[0m\n\u001b[0;32m 5\u001b[0m htmltext \u001b[38;5;241m=\u001b[39m r\u001b[38;5;241m.\u001b[39mtext\n\u001b[0;32m 6\u001b[0m \u001b[38;5;66;03m# print(htmltext)\u001b[39;00m\n\u001b[1;32m----> 8\u001b[0m html \u001b[38;5;241m=\u001b[39m re\u001b[38;5;241m.\u001b[39mfindall(\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\u001b[39m\u001b[38;5;124m'\u001b[39m,htmltext,re\u001b[38;5;241m.\u001b[39mS)[\u001b[38;5;241m0\u001b[39m]\n\u001b[0;32m 9\u001b[0m htmlchubanshe \u001b[38;5;241m=\u001b[39m re\u001b[38;5;241m.\u001b[39mfindall(\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m
\u001b[39m\u001b[38;5;124m'\u001b[39m,html,re\u001b[38;5;241m.\u001b[39mS)\n\u001b[0;32m 10\u001b[0m fh \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mE:\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mPythondemo\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mPython-test\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mPythonLX\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mchubanshe.txt\u001b[39m\u001b[38;5;124m\"\u001b[39m,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mw\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "\u001b[1;31mIndexError\u001b[0m: list index out of range" + ] + } + ], + "source": [ + "import requests\n", + "import re\n", + " \n", + "r = requests.get(\"http://read.douban.com/provider/all\")\n", + "htmltext = r.text\n", + "# print(htmltext)\n", + " \n", + "html = re.findall(r'',htmltext,re.S)[0]\n", + "htmlchubanshe = re.findall(r'
',html,re.S)\n", + "fh = open(\"E:\\\\Pythondemo\\\\Python-test\\\\PythonLX\\\\chubanshe.txt\",\"w\")\n", + "for cbs in htmlchubanshe:\n", + " print(cbs)\n", + " fh.write(cbs+\"\\n\")\n", + "fh.close()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
订单编号总金额买家实际支付金额收货地址订单创建时间订单付款时间退款金额
01178.80.0上海2020-02-21 00:00:00NaN0.0
1221.021.0内蒙古自治区2020-02-20 23:59:542020-02-21 00:00:020.0
2337.00.0安徽省2020-02-20 23:59:35NaN0.0
34157.0157.0湖南省2020-02-20 23:58:342020-02-20 23:58:440.0
4564.80.0江苏省2020-02-20 23:57:042020-02-20 23:57:1164.8
56327.7148.9浙江省2020-02-20 23:56:392020-02-20 23:56:53178.8
67357.0357.0天津2020-02-20 23:56:362020-02-20 23:56:400.0
7853.053.0浙江省2020-02-20 23:56:122020-02-20 23:56:160.0
8943.00.0湖南省2020-02-20 23:54:532020-02-20 23:55:0443.0
910421.0421.0北京2020-02-20 23:54:282020-02-20 23:54:330.0
\n", + "
" + ], + "text/plain": [ + " 订单编号 总金额 买家实际支付金额 收货地址 订单创建时间 订单付款时间 \\\n", + "0 1 178.8 0.0 上海 2020-02-21 00:00:00 NaN \n", + "1 2 21.0 21.0 内蒙古自治区 2020-02-20 23:59:54 2020-02-21 00:00:02 \n", + "2 3 37.0 0.0 安徽省 2020-02-20 23:59:35 NaN \n", + "3 4 157.0 157.0 湖南省 2020-02-20 23:58:34 2020-02-20 23:58:44 \n", + "4 5 64.8 0.0 江苏省 2020-02-20 23:57:04 2020-02-20 23:57:11 \n", + "5 6 327.7 148.9 浙江省 2020-02-20 23:56:39 2020-02-20 23:56:53 \n", + "6 7 357.0 357.0 天津 2020-02-20 23:56:36 2020-02-20 23:56:40 \n", + "7 8 53.0 53.0 浙江省 2020-02-20 23:56:12 2020-02-20 23:56:16 \n", + "8 9 43.0 0.0 湖南省 2020-02-20 23:54:53 2020-02-20 23:55:04 \n", + "9 10 421.0 421.0 北京 2020-02-20 23:54:28 2020-02-20 23:54:33 \n", + "\n", + " 退款金额 \n", + "0 0.0 \n", + "1 0.0 \n", + "2 0.0 \n", + "3 0.0 \n", + "4 64.8 \n", + "5 178.8 \n", + "6 0.0 \n", + "7 0.0 \n", + "8 43.0 \n", + "9 0.0 " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "data = pd.read_csv('tmall_order_report.csv')\n", + "data.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 28010 entries, 0 to 28009\n", + "Data columns (total 7 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 订单编号 28010 non-null int64 \n", + " 1 总金额 28010 non-null float64\n", + " 2 买家实际支付金额 28010 non-null float64\n", + " 3 收货地址 28010 non-null object \n", + " 4 订单创建时间 28010 non-null object \n", + " 5 订单付款时间 24087 non-null object \n", + " 6 退款金额 28010 non-null float64\n", + "dtypes: float64(3), int64(1), object(3)\n", + "memory usage: 1.5+ MB\n" + ] + } + ], + "source": [ + "data.info()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'订单付款时间'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", + "File \u001b[1;32mD:\\anconda\\lib\\site-packages\\pandas\\core\\indexes\\base.py:3621\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[1;34m(self, key, method, tolerance)\u001b[0m\n\u001b[0;32m 3620\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m-> 3621\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 3622\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n", + "File \u001b[1;32mD:\\anconda\\lib\\site-packages\\pandas\\_libs\\index.pyx:136\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n", + "File \u001b[1;32mD:\\anconda\\lib\\site-packages\\pandas\\_libs\\index.pyx:163\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n", + "File \u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi:5198\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n", + "File \u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi:5206\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n", + "\u001b[1;31mKeyError\u001b[0m: '订单付款时间'", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", + "Input \u001b[1;32mIn [12]\u001b[0m, in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28msum\u001b[39m(\u001b[43mdata\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m订单付款时间\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241m.\u001b[39misnull())\n", + "File \u001b[1;32mD:\\anconda\\lib\\site-packages\\pandas\\core\\frame.py:3505\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 3503\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mnlevels \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[0;32m 3504\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_getitem_multilevel(key)\n\u001b[1;32m-> 3505\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 3506\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_integer(indexer):\n\u001b[0;32m 3507\u001b[0m indexer \u001b[38;5;241m=\u001b[39m [indexer]\n", + "File \u001b[1;32mD:\\anconda\\lib\\site-packages\\pandas\\core\\indexes\\base.py:3623\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[1;34m(self, key, method, tolerance)\u001b[0m\n\u001b[0;32m 3621\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine\u001b[38;5;241m.\u001b[39mget_loc(casted_key)\n\u001b[0;32m 3622\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[1;32m-> 3623\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[0;32m 3624\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[0;32m 3625\u001b[0m \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[0;32m 3626\u001b[0m \u001b[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[0;32m 3627\u001b[0m \u001b[38;5;66;03m# the TypeError.\u001b[39;00m\n\u001b[0;32m 3628\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_indexing_error(key)\n", + "\u001b[1;31mKeyError\u001b[0m: '订单付款时间'" + ] + } + ], + "source": [ + "sum(data['订单付款时间'].isnull())" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['订单编号', '总金额', '买家实际支付金额', '收货地址', '订单创建时间', '订单付款时间', '退款金额'], dtype='object')" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.rename(columns={'收货地址 ': '收货地址', '订单付款时间 ':'订单付款时间'}, inplace=True)\n", + "data.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "3923" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sum(data['订单付款时间'].isnull())" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0\n", + "0.14005712245626561\n" + ] + } + ], + "source": [ + "print(data[data['订单付款时间'].isnull() & data['买家实际支付金额']>0].size) # 查看缺失值是否为拍下订单但是未付款情况\n", + "print(sum(data['订单付款时间'].isnull()) / data.shape[0]) # 查看缺失值与整体数据的比例" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.duplicated().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
订单编号总金额买家实际支付金额退款金额
count28010.00000028010.00000028010.00000028010.000000
mean14005.500000106.95325367.92171220.433271
std8085.9348561136.587094151.49343471.501963
min1.0000001.0000000.0000000.000000
25%7003.25000038.0000000.0000000.000000
50%14005.50000075.00000045.0000000.000000
75%21007.750000119.000000101.0000000.000000
max28010.000000188320.00000016065.0000003800.000000
\n", + "
" + ], + "text/plain": [ + " 订单编号 总金额 买家实际支付金额 退款金额\n", + "count 28010.000000 28010.000000 28010.000000 28010.000000\n", + "mean 14005.500000 106.953253 67.921712 20.433271\n", + "std 8085.934856 1136.587094 151.493434 71.501963\n", + "min 1.000000 1.000000 0.000000 0.000000\n", + "25% 7003.250000 38.000000 0.000000 0.000000\n", + "50% 14005.500000 75.000000 45.000000 0.000000\n", + "75% 21007.750000 119.000000 101.000000 0.000000\n", + "max 28010.000000 188320.000000 16065.000000 3800.000000" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
订单编号总金额买家实际支付金额收货地址订单创建时间订单付款时间退款金额
1925719258188320.00.0上海2020-02-24 19:35:06NaN0.0
\n", + "
" + ], + "text/plain": [ + " 订单编号 总金额 买家实际支付金额 收货地址 订单创建时间 订单付款时间 退款金额\n", + "19257 19258 188320.0 0.0 上海 2020-02-24 19:35:06 NaN 0.0" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[data['总金额'] > 175000]" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "data = data.drop(index=data[data['总金额'] > 17500].index)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
订单编号总金额买家实际支付金额退款金额
count28009.00000028009.00000028009.00000028009.000000
mean14005.312471100.23351867.92413720.434000
std8086.018294164.451538151.49559571.503135
min1.0000001.0000000.0000000.000000
25%7003.00000038.0000000.0000000.000000
50%14005.00000075.00000045.0000000.000000
75%21008.000000119.000000101.0000000.000000
max28010.00000016065.00000016065.0000003800.000000
\n", + "
" + ], + "text/plain": [ + " 订单编号 总金额 买家实际支付金额 退款金额\n", + "count 28009.000000 28009.000000 28009.000000 28009.000000\n", + "mean 14005.312471 100.233518 67.924137 20.434000\n", + "std 8086.018294 164.451538 151.495595 71.503135\n", + "min 1.000000 1.000000 0.000000 0.000000\n", + "25% 7003.000000 38.000000 0.000000 0.000000\n", + "50% 14005.000000 75.000000 45.000000 0.000000\n", + "75% 21008.000000 119.000000 101.000000 0.000000\n", + "max 28010.000000 16065.000000 16065.000000 3800.000000" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}