|
|
|
@ -0,0 +1,882 @@
|
|
|
|
|
{
|
|
|
|
|
"cells": [
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 13,
|
|
|
|
|
"metadata": {
|
|
|
|
|
"pycharm": {
|
|
|
|
|
"name": "#%%\n"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"ename": "SyntaxError",
|
|
|
|
|
"evalue": "(unicode error) 'unicodeescape' codec can't decode bytes in position 5-6: truncated \\UXXXXXXXX escape (3135147787.py, line 2)",
|
|
|
|
|
"output_type": "error",
|
|
|
|
|
"traceback": [
|
|
|
|
|
"\u001b[1;36m Cell \u001b[1;32mIn[13], line 2\u001b[1;36m\u001b[0m\n\u001b[1;33m 'C:\\15\\UserBehavior.csv'\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m (unicode error) 'unicodeescape' codec can't decode bytes in position 5-6: truncated \\UXXXXXXXX escape\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"\n",
|
|
|
|
|
"import pandas as pd\n",
|
|
|
|
|
"'C:\\15\\UserBehavior.csv'\n",
|
|
|
|
|
"info = pd.read_csv('C:\\15\\UserBehavior.csv',encoding='gbk')\n",
|
|
|
|
|
"info\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 9,
|
|
|
|
|
"metadata": {
|
|
|
|
|
"pycharm": {
|
|
|
|
|
"name": "#%%\n"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"ename": "NameError",
|
|
|
|
|
"evalue": "name 'info' is not defined",
|
|
|
|
|
"output_type": "error",
|
|
|
|
|
"traceback": [
|
|
|
|
|
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
|
|
|
|
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
|
|
|
|
|
"Cell \u001b[1;32mIn[9], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m info_excel \u001b[38;5;241m=\u001b[39m info\u001b[38;5;241m.\u001b[39mto_excel(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mE:\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124m1000phone\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mexcel_test.xlsx\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 2\u001b[0m info_excel\n",
|
|
|
|
|
"\u001b[1;31mNameError\u001b[0m: name 'info' is not defined"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"info_excel = info.to_excel('E:\\\\1000phone\\\\excel_test.xlsx')\n",
|
|
|
|
|
"info_excel"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 10,
|
|
|
|
|
"metadata": {
|
|
|
|
|
"pycharm": {
|
|
|
|
|
"name": "#%%\n"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"ename": "FileNotFoundError",
|
|
|
|
|
"evalue": "[Errno 2] No such file or directory: 'E:\\\\千锋\\\\JSON.JSON'",
|
|
|
|
|
"output_type": "error",
|
|
|
|
|
"traceback": [
|
|
|
|
|
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
|
|
|
|
"\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
|
|
|
|
|
"Cell \u001b[1;32mIn[10], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mjson\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mopen\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mE:\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124m千锋\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mJSON.JSON\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m'\u001b[39m,encoding\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mutf8\u001b[39m\u001b[38;5;124m'\u001b[39m)\u001b[38;5;28;01mas\u001b[39;00m fp:\n\u001b[0;32m 3\u001b[0m json_data \u001b[38;5;241m=\u001b[39m json\u001b[38;5;241m.\u001b[39mload(fp)\n\u001b[0;32m 4\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m这是文件中的json数据:\u001b[39m\u001b[38;5;124m'\u001b[39m,json_data)\n",
|
|
|
|
|
"File \u001b[1;32mD:\\anc\\Lib\\site-packages\\IPython\\core\\interactiveshell.py:284\u001b[0m, in \u001b[0;36m_modified_open\u001b[1;34m(file, *args, **kwargs)\u001b[0m\n\u001b[0;32m 277\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m file \u001b[38;5;129;01min\u001b[39;00m {\u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m2\u001b[39m}:\n\u001b[0;32m 278\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 279\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIPython won\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt let you open fd=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m by default \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 280\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mas it is likely to crash IPython. If you know what you are doing, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 281\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124myou can use builtins\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m open.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 282\u001b[0m )\n\u001b[1;32m--> 284\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m io_open(file, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
|
|
|
|
|
"\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'E:\\\\千锋\\\\JSON.JSON'"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"import json\n",
|
|
|
|
|
"with open('E:\\\\千锋\\\\JSON.JSON','r',encoding='utf8')as fp:\n",
|
|
|
|
|
" json_data = json.load(fp)\n",
|
|
|
|
|
"print('这是文件中的json数据:',json_data)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {
|
|
|
|
|
"pycharm": {
|
|
|
|
|
"name": "#%%\n"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"import json\n",
|
|
|
|
|
"x = {'name':'小明','age':19}\n",
|
|
|
|
|
"filename = 'E:\\\\千锋\\\\JSON.JSON'\n",
|
|
|
|
|
"with open (filename,'w') as f:\n",
|
|
|
|
|
" json.dump(x,f)\n",
|
|
|
|
|
"with open('E:\\\\千锋\\\\JSON.JSON','r',encoding='utf8')as fp:\n",
|
|
|
|
|
" json_data = json.load(fp)\n",
|
|
|
|
|
"print('新写入的json数据:',json_data)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 9,
|
|
|
|
|
"metadata": {
|
|
|
|
|
"pycharm": {
|
|
|
|
|
"name": "#%%\n"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"ename": "ModuleNotFoundError",
|
|
|
|
|
"evalue": "No module named 'MySQLdb'",
|
|
|
|
|
"output_type": "error",
|
|
|
|
|
"traceback": [
|
|
|
|
|
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
|
|
|
|
"\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
|
|
|
|
|
"Cell \u001b[1;32mIn[9], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# import pymysql.cursors\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mMySQLdb\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mMySQLdb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcursors\u001b[39;00m\n\u001b[0;32m 5\u001b[0m connect \u001b[38;5;241m=\u001b[39m MySQLdb\u001b[38;5;241m.\u001b[39mConnect(\n\u001b[0;32m 6\u001b[0m host\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlocalhost\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m 7\u001b[0m port\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m3306\u001b[39m,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 11\u001b[0m charset\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mutf8\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m 12\u001b[0m )\n",
|
|
|
|
|
"\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'MySQLdb'"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"# import pymysql.cursors\n",
|
|
|
|
|
"import MySQLdb\n",
|
|
|
|
|
"import MySQLdb.cursors\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"connect = MySQLdb.Connect(\n",
|
|
|
|
|
" host='localhost',\n",
|
|
|
|
|
" port=3306,\n",
|
|
|
|
|
" user='root',\n",
|
|
|
|
|
" passwd='123456',\n",
|
|
|
|
|
" db='bicycle',\n",
|
|
|
|
|
" charset='utf8'\n",
|
|
|
|
|
")\n",
|
|
|
|
|
"cursor = connect.cursor()\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"cur = connect.cursor(MySQLdb.cursors.DictCursor)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"sql = \"SELECT * FROM train order by rand() limit 15\"\n",
|
|
|
|
|
"cur.execute(sql)\n",
|
|
|
|
|
"results = cur.fetchall()\n",
|
|
|
|
|
"print(result)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 4,
|
|
|
|
|
"metadata": {
|
|
|
|
|
"pycharm": {
|
|
|
|
|
"name": "#%%\n"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"ename": "IndexError",
|
|
|
|
|
"evalue": "list index out of range",
|
|
|
|
|
"output_type": "error",
|
|
|
|
|
"traceback": [
|
|
|
|
|
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
|
|
|
|
"\u001b[1;31mIndexError\u001b[0m Traceback (most recent call last)",
|
|
|
|
|
"Cell \u001b[1;32mIn[4], line 8\u001b[0m\n\u001b[0;32m 5\u001b[0m htmltext \u001b[38;5;241m=\u001b[39m r\u001b[38;5;241m.\u001b[39mtext\n\u001b[0;32m 6\u001b[0m \u001b[38;5;66;03m# print(htmltext)\u001b[39;00m\n\u001b[1;32m----> 8\u001b[0m html \u001b[38;5;241m=\u001b[39m re\u001b[38;5;241m.\u001b[39mfindall(\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m<a href=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/provider/63687123/\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m(.*?)</section>\u001b[39m\u001b[38;5;124m'\u001b[39m,htmltext,re\u001b[38;5;241m.\u001b[39mS)[\u001b[38;5;241m0\u001b[39m]\n\u001b[0;32m 9\u001b[0m htmlchubanshe \u001b[38;5;241m=\u001b[39m re\u001b[38;5;241m.\u001b[39mfindall(\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m<div class=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m(.*?)</div>\u001b[39m\u001b[38;5;124m'\u001b[39m,html,re\u001b[38;5;241m.\u001b[39mS)\n\u001b[0;32m 10\u001b[0m fh \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mE:\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mPythondemo\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mPython-test\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mPythonLX\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mchubanshe.txt\u001b[39m\u001b[38;5;124m\"\u001b[39m,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mw\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
|
|
|
|
|
"\u001b[1;31mIndexError\u001b[0m: list index out of range"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"import requests\n",
|
|
|
|
|
"import re\n",
|
|
|
|
|
" \n",
|
|
|
|
|
"r = requests.get(\"http://read.douban.com/provider/all\")\n",
|
|
|
|
|
"htmltext = r.text\n",
|
|
|
|
|
"# print(htmltext)\n",
|
|
|
|
|
" \n",
|
|
|
|
|
"html = re.findall(r'<a href=\"/provider/63687123/\"(.*?)</section>',htmltext,re.S)[0]\n",
|
|
|
|
|
"htmlchubanshe = re.findall(r'<div class=\"name\"(.*?)</div>',html,re.S)\n",
|
|
|
|
|
"fh = open(\"E:\\\\Pythondemo\\\\Python-test\\\\PythonLX\\\\chubanshe.txt\",\"w\")\n",
|
|
|
|
|
"for cbs in htmlchubanshe:\n",
|
|
|
|
|
" print(cbs)\n",
|
|
|
|
|
" fh.write(cbs+\"\\n\")\n",
|
|
|
|
|
"fh.close()\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 9,
|
|
|
|
|
"metadata": {
|
|
|
|
|
"pycharm": {
|
|
|
|
|
"name": "#%%\n"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>订单编号</th>\n",
|
|
|
|
|
" <th>总金额</th>\n",
|
|
|
|
|
" <th>买家实际支付金额</th>\n",
|
|
|
|
|
" <th>收货地址</th>\n",
|
|
|
|
|
" <th>订单创建时间</th>\n",
|
|
|
|
|
" <th>订单付款时间</th>\n",
|
|
|
|
|
" <th>退款金额</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>0</th>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>178.8</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>上海</td>\n",
|
|
|
|
|
" <td>2020-02-21 00:00:00</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1</th>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>21.0</td>\n",
|
|
|
|
|
" <td>21.0</td>\n",
|
|
|
|
|
" <td>内蒙古自治区</td>\n",
|
|
|
|
|
" <td>2020-02-20 23:59:54</td>\n",
|
|
|
|
|
" <td>2020-02-21 00:00:02</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>2</th>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>37.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>安徽省</td>\n",
|
|
|
|
|
" <td>2020-02-20 23:59:35</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>3</th>\n",
|
|
|
|
|
" <td>4</td>\n",
|
|
|
|
|
" <td>157.0</td>\n",
|
|
|
|
|
" <td>157.0</td>\n",
|
|
|
|
|
" <td>湖南省</td>\n",
|
|
|
|
|
" <td>2020-02-20 23:58:34</td>\n",
|
|
|
|
|
" <td>2020-02-20 23:58:44</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>4</th>\n",
|
|
|
|
|
" <td>5</td>\n",
|
|
|
|
|
" <td>64.8</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>江苏省</td>\n",
|
|
|
|
|
" <td>2020-02-20 23:57:04</td>\n",
|
|
|
|
|
" <td>2020-02-20 23:57:11</td>\n",
|
|
|
|
|
" <td>64.8</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>5</th>\n",
|
|
|
|
|
" <td>6</td>\n",
|
|
|
|
|
" <td>327.7</td>\n",
|
|
|
|
|
" <td>148.9</td>\n",
|
|
|
|
|
" <td>浙江省</td>\n",
|
|
|
|
|
" <td>2020-02-20 23:56:39</td>\n",
|
|
|
|
|
" <td>2020-02-20 23:56:53</td>\n",
|
|
|
|
|
" <td>178.8</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>6</th>\n",
|
|
|
|
|
" <td>7</td>\n",
|
|
|
|
|
" <td>357.0</td>\n",
|
|
|
|
|
" <td>357.0</td>\n",
|
|
|
|
|
" <td>天津</td>\n",
|
|
|
|
|
" <td>2020-02-20 23:56:36</td>\n",
|
|
|
|
|
" <td>2020-02-20 23:56:40</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>7</th>\n",
|
|
|
|
|
" <td>8</td>\n",
|
|
|
|
|
" <td>53.0</td>\n",
|
|
|
|
|
" <td>53.0</td>\n",
|
|
|
|
|
" <td>浙江省</td>\n",
|
|
|
|
|
" <td>2020-02-20 23:56:12</td>\n",
|
|
|
|
|
" <td>2020-02-20 23:56:16</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>8</th>\n",
|
|
|
|
|
" <td>9</td>\n",
|
|
|
|
|
" <td>43.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>湖南省</td>\n",
|
|
|
|
|
" <td>2020-02-20 23:54:53</td>\n",
|
|
|
|
|
" <td>2020-02-20 23:55:04</td>\n",
|
|
|
|
|
" <td>43.0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>9</th>\n",
|
|
|
|
|
" <td>10</td>\n",
|
|
|
|
|
" <td>421.0</td>\n",
|
|
|
|
|
" <td>421.0</td>\n",
|
|
|
|
|
" <td>北京</td>\n",
|
|
|
|
|
" <td>2020-02-20 23:54:28</td>\n",
|
|
|
|
|
" <td>2020-02-20 23:54:33</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
" 订单编号 总金额 买家实际支付金额 收货地址 订单创建时间 订单付款时间 \\\n",
|
|
|
|
|
"0 1 178.8 0.0 上海 2020-02-21 00:00:00 NaN \n",
|
|
|
|
|
"1 2 21.0 21.0 内蒙古自治区 2020-02-20 23:59:54 2020-02-21 00:00:02 \n",
|
|
|
|
|
"2 3 37.0 0.0 安徽省 2020-02-20 23:59:35 NaN \n",
|
|
|
|
|
"3 4 157.0 157.0 湖南省 2020-02-20 23:58:34 2020-02-20 23:58:44 \n",
|
|
|
|
|
"4 5 64.8 0.0 江苏省 2020-02-20 23:57:04 2020-02-20 23:57:11 \n",
|
|
|
|
|
"5 6 327.7 148.9 浙江省 2020-02-20 23:56:39 2020-02-20 23:56:53 \n",
|
|
|
|
|
"6 7 357.0 357.0 天津 2020-02-20 23:56:36 2020-02-20 23:56:40 \n",
|
|
|
|
|
"7 8 53.0 53.0 浙江省 2020-02-20 23:56:12 2020-02-20 23:56:16 \n",
|
|
|
|
|
"8 9 43.0 0.0 湖南省 2020-02-20 23:54:53 2020-02-20 23:55:04 \n",
|
|
|
|
|
"9 10 421.0 421.0 北京 2020-02-20 23:54:28 2020-02-20 23:54:33 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" 退款金额 \n",
|
|
|
|
|
"0 0.0 \n",
|
|
|
|
|
"1 0.0 \n",
|
|
|
|
|
"2 0.0 \n",
|
|
|
|
|
"3 0.0 \n",
|
|
|
|
|
"4 64.8 \n",
|
|
|
|
|
"5 178.8 \n",
|
|
|
|
|
"6 0.0 \n",
|
|
|
|
|
"7 0.0 \n",
|
|
|
|
|
"8 43.0 \n",
|
|
|
|
|
"9 0.0 "
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 9,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"import pandas as pd\n",
|
|
|
|
|
"data = pd.read_csv('tmall_order_report.csv')\n",
|
|
|
|
|
"data.head(10)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 11,
|
|
|
|
|
"metadata": {
|
|
|
|
|
"pycharm": {
|
|
|
|
|
"name": "#%%\n"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|
|
|
|
"RangeIndex: 28010 entries, 0 to 28009\n",
|
|
|
|
|
"Data columns (total 7 columns):\n",
|
|
|
|
|
" # Column Non-Null Count Dtype \n",
|
|
|
|
|
"--- ------ -------------- ----- \n",
|
|
|
|
|
" 0 订单编号 28010 non-null int64 \n",
|
|
|
|
|
" 1 总金额 28010 non-null float64\n",
|
|
|
|
|
" 2 买家实际支付金额 28010 non-null float64\n",
|
|
|
|
|
" 3 收货地址 28010 non-null object \n",
|
|
|
|
|
" 4 订单创建时间 28010 non-null object \n",
|
|
|
|
|
" 5 订单付款时间 24087 non-null object \n",
|
|
|
|
|
" 6 退款金额 28010 non-null float64\n",
|
|
|
|
|
"dtypes: float64(3), int64(1), object(3)\n",
|
|
|
|
|
"memory usage: 1.5+ MB\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"data.info()\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 12,
|
|
|
|
|
"metadata": {
|
|
|
|
|
"pycharm": {
|
|
|
|
|
"name": "#%%\n"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"ename": "KeyError",
|
|
|
|
|
"evalue": "'订单付款时间'",
|
|
|
|
|
"output_type": "error",
|
|
|
|
|
"traceback": [
|
|
|
|
|
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
|
|
|
|
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
|
|
|
|
|
"File \u001b[1;32mD:\\anconda\\lib\\site-packages\\pandas\\core\\indexes\\base.py:3621\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[1;34m(self, key, method, tolerance)\u001b[0m\n\u001b[0;32m 3620\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m-> 3621\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 3622\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n",
|
|
|
|
|
"File \u001b[1;32mD:\\anconda\\lib\\site-packages\\pandas\\_libs\\index.pyx:136\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
|
|
|
|
|
"File \u001b[1;32mD:\\anconda\\lib\\site-packages\\pandas\\_libs\\index.pyx:163\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
|
|
|
|
|
"File \u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi:5198\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n",
|
|
|
|
|
"File \u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi:5206\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n",
|
|
|
|
|
"\u001b[1;31mKeyError\u001b[0m: '订单付款时间'",
|
|
|
|
|
"\nThe above exception was the direct cause of the following exception:\n",
|
|
|
|
|
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
|
|
|
|
|
"Input \u001b[1;32mIn [12]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28msum\u001b[39m(\u001b[43mdata\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m订单付款时间\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241m.\u001b[39misnull())\n",
|
|
|
|
|
"File \u001b[1;32mD:\\anconda\\lib\\site-packages\\pandas\\core\\frame.py:3505\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 3503\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mnlevels \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[0;32m 3504\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_getitem_multilevel(key)\n\u001b[1;32m-> 3505\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 3506\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_integer(indexer):\n\u001b[0;32m 3507\u001b[0m indexer \u001b[38;5;241m=\u001b[39m [indexer]\n",
|
|
|
|
|
"File \u001b[1;32mD:\\anconda\\lib\\site-packages\\pandas\\core\\indexes\\base.py:3623\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[1;34m(self, key, method, tolerance)\u001b[0m\n\u001b[0;32m 3621\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine\u001b[38;5;241m.\u001b[39mget_loc(casted_key)\n\u001b[0;32m 3622\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[1;32m-> 3623\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[0;32m 3624\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[0;32m 3625\u001b[0m \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[0;32m 3626\u001b[0m \u001b[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[0;32m 3627\u001b[0m \u001b[38;5;66;03m# the TypeError.\u001b[39;00m\n\u001b[0;32m 3628\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_indexing_error(key)\n",
|
|
|
|
|
"\u001b[1;31mKeyError\u001b[0m: '订单付款时间'"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"sum(data['订单付款时间'].isnull())"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 13,
|
|
|
|
|
"metadata": {
|
|
|
|
|
"pycharm": {
|
|
|
|
|
"name": "#%%\n"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"Index(['订单编号', '总金额', '买家实际支付金额', '收货地址', '订单创建时间', '订单付款时间', '退款金额'], dtype='object')"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 13,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"data.rename(columns={'收货地址 ': '收货地址', '订单付款时间 ':'订单付款时间'}, inplace=True)\n",
|
|
|
|
|
"data.columns"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 14,
|
|
|
|
|
"metadata": {
|
|
|
|
|
"pycharm": {
|
|
|
|
|
"name": "#%%\n"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"3923"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 14,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"sum(data['订单付款时间'].isnull())"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 15,
|
|
|
|
|
"metadata": {
|
|
|
|
|
"pycharm": {
|
|
|
|
|
"name": "#%%\n"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"0\n",
|
|
|
|
|
"0.14005712245626561\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"print(data[data['订单付款时间'].isnull() & data['买家实际支付金额']>0].size) # 查看缺失值是否为拍下订单但是未付款情况\n",
|
|
|
|
|
"print(sum(data['订单付款时间'].isnull()) / data.shape[0]) # 查看缺失值与整体数据的比例"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 16,
|
|
|
|
|
"metadata": {
|
|
|
|
|
"pycharm": {
|
|
|
|
|
"name": "#%%\n"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"0"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 16,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"data.duplicated().sum()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 17,
|
|
|
|
|
"metadata": {
|
|
|
|
|
"pycharm": {
|
|
|
|
|
"name": "#%%\n"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>订单编号</th>\n",
|
|
|
|
|
" <th>总金额</th>\n",
|
|
|
|
|
" <th>买家实际支付金额</th>\n",
|
|
|
|
|
" <th>退款金额</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>count</th>\n",
|
|
|
|
|
" <td>28010.000000</td>\n",
|
|
|
|
|
" <td>28010.000000</td>\n",
|
|
|
|
|
" <td>28010.000000</td>\n",
|
|
|
|
|
" <td>28010.000000</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>mean</th>\n",
|
|
|
|
|
" <td>14005.500000</td>\n",
|
|
|
|
|
" <td>106.953253</td>\n",
|
|
|
|
|
" <td>67.921712</td>\n",
|
|
|
|
|
" <td>20.433271</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>std</th>\n",
|
|
|
|
|
" <td>8085.934856</td>\n",
|
|
|
|
|
" <td>1136.587094</td>\n",
|
|
|
|
|
" <td>151.493434</td>\n",
|
|
|
|
|
" <td>71.501963</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>min</th>\n",
|
|
|
|
|
" <td>1.000000</td>\n",
|
|
|
|
|
" <td>1.000000</td>\n",
|
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>25%</th>\n",
|
|
|
|
|
" <td>7003.250000</td>\n",
|
|
|
|
|
" <td>38.000000</td>\n",
|
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>50%</th>\n",
|
|
|
|
|
" <td>14005.500000</td>\n",
|
|
|
|
|
" <td>75.000000</td>\n",
|
|
|
|
|
" <td>45.000000</td>\n",
|
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>75%</th>\n",
|
|
|
|
|
" <td>21007.750000</td>\n",
|
|
|
|
|
" <td>119.000000</td>\n",
|
|
|
|
|
" <td>101.000000</td>\n",
|
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>max</th>\n",
|
|
|
|
|
" <td>28010.000000</td>\n",
|
|
|
|
|
" <td>188320.000000</td>\n",
|
|
|
|
|
" <td>16065.000000</td>\n",
|
|
|
|
|
" <td>3800.000000</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
" 订单编号 总金额 买家实际支付金额 退款金额\n",
|
|
|
|
|
"count 28010.000000 28010.000000 28010.000000 28010.000000\n",
|
|
|
|
|
"mean 14005.500000 106.953253 67.921712 20.433271\n",
|
|
|
|
|
"std 8085.934856 1136.587094 151.493434 71.501963\n",
|
|
|
|
|
"min 1.000000 1.000000 0.000000 0.000000\n",
|
|
|
|
|
"25% 7003.250000 38.000000 0.000000 0.000000\n",
|
|
|
|
|
"50% 14005.500000 75.000000 45.000000 0.000000\n",
|
|
|
|
|
"75% 21007.750000 119.000000 101.000000 0.000000\n",
|
|
|
|
|
"max 28010.000000 188320.000000 16065.000000 3800.000000"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 17,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"data.describe()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 18,
|
|
|
|
|
"metadata": {
|
|
|
|
|
"pycharm": {
|
|
|
|
|
"name": "#%%\n"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>订单编号</th>\n",
|
|
|
|
|
" <th>总金额</th>\n",
|
|
|
|
|
" <th>买家实际支付金额</th>\n",
|
|
|
|
|
" <th>收货地址</th>\n",
|
|
|
|
|
" <th>订单创建时间</th>\n",
|
|
|
|
|
" <th>订单付款时间</th>\n",
|
|
|
|
|
" <th>退款金额</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>19257</th>\n",
|
|
|
|
|
" <td>19258</td>\n",
|
|
|
|
|
" <td>188320.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>上海</td>\n",
|
|
|
|
|
" <td>2020-02-24 19:35:06</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
" 订单编号 总金额 买家实际支付金额 收货地址 订单创建时间 订单付款时间 退款金额\n",
|
|
|
|
|
"19257 19258 188320.0 0.0 上海 2020-02-24 19:35:06 NaN 0.0"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 18,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"data[data['总金额'] > 175000]"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 19,
|
|
|
|
|
"metadata": {
|
|
|
|
|
"pycharm": {
|
|
|
|
|
"name": "#%%\n"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"data = data.drop(index=data[data['总金额'] > 17500].index)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 20,
|
|
|
|
|
"metadata": {
|
|
|
|
|
"pycharm": {
|
|
|
|
|
"name": "#%%\n"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>订单编号</th>\n",
|
|
|
|
|
" <th>总金额</th>\n",
|
|
|
|
|
" <th>买家实际支付金额</th>\n",
|
|
|
|
|
" <th>退款金额</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>count</th>\n",
|
|
|
|
|
" <td>28009.000000</td>\n",
|
|
|
|
|
" <td>28009.000000</td>\n",
|
|
|
|
|
" <td>28009.000000</td>\n",
|
|
|
|
|
" <td>28009.000000</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>mean</th>\n",
|
|
|
|
|
" <td>14005.312471</td>\n",
|
|
|
|
|
" <td>100.233518</td>\n",
|
|
|
|
|
" <td>67.924137</td>\n",
|
|
|
|
|
" <td>20.434000</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>std</th>\n",
|
|
|
|
|
" <td>8086.018294</td>\n",
|
|
|
|
|
" <td>164.451538</td>\n",
|
|
|
|
|
" <td>151.495595</td>\n",
|
|
|
|
|
" <td>71.503135</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>min</th>\n",
|
|
|
|
|
" <td>1.000000</td>\n",
|
|
|
|
|
" <td>1.000000</td>\n",
|
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>25%</th>\n",
|
|
|
|
|
" <td>7003.000000</td>\n",
|
|
|
|
|
" <td>38.000000</td>\n",
|
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>50%</th>\n",
|
|
|
|
|
" <td>14005.000000</td>\n",
|
|
|
|
|
" <td>75.000000</td>\n",
|
|
|
|
|
" <td>45.000000</td>\n",
|
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>75%</th>\n",
|
|
|
|
|
" <td>21008.000000</td>\n",
|
|
|
|
|
" <td>119.000000</td>\n",
|
|
|
|
|
" <td>101.000000</td>\n",
|
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>max</th>\n",
|
|
|
|
|
" <td>28010.000000</td>\n",
|
|
|
|
|
" <td>16065.000000</td>\n",
|
|
|
|
|
" <td>16065.000000</td>\n",
|
|
|
|
|
" <td>3800.000000</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
" 订单编号 总金额 买家实际支付金额 退款金额\n",
|
|
|
|
|
"count 28009.000000 28009.000000 28009.000000 28009.000000\n",
|
|
|
|
|
"mean 14005.312471 100.233518 67.924137 20.434000\n",
|
|
|
|
|
"std 8086.018294 164.451538 151.495595 71.503135\n",
|
|
|
|
|
"min 1.000000 1.000000 0.000000 0.000000\n",
|
|
|
|
|
"25% 7003.000000 38.000000 0.000000 0.000000\n",
|
|
|
|
|
"50% 14005.000000 75.000000 45.000000 0.000000\n",
|
|
|
|
|
"75% 21008.000000 119.000000 101.000000 0.000000\n",
|
|
|
|
|
"max 28010.000000 16065.000000 16065.000000 3800.000000"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 20,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"data.describe()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {
|
|
|
|
|
"pycharm": {
|
|
|
|
|
"name": "#%%\n"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": []
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"metadata": {
|
|
|
|
|
"kernelspec": {
|
|
|
|
|
"display_name": "Python 3 (ipykernel)",
|
|
|
|
|
"language": "python",
|
|
|
|
|
"name": "python3"
|
|
|
|
|
},
|
|
|
|
|
"language_info": {
|
|
|
|
|
"codemirror_mode": {
|
|
|
|
|
"name": "ipython",
|
|
|
|
|
"version": 3
|
|
|
|
|
},
|
|
|
|
|
"file_extension": ".py",
|
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
|
"name": "python",
|
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
|
"version": "3.11.3"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
"nbformat_minor": 2
|
|
|
|
|
}
|