ADD file via upload

main
p63lvesyf 5 months ago
parent 88720f7439
commit 8db14b90ad

@ -0,0 +1,882 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"ename": "SyntaxError",
"evalue": "(unicode error) 'unicodeescape' codec can't decode bytes in position 5-6: truncated \\UXXXXXXXX escape (3135147787.py, line 2)",
"output_type": "error",
"traceback": [
"\u001b[1;36m Cell \u001b[1;32mIn[13], line 2\u001b[1;36m\u001b[0m\n\u001b[1;33m 'C:\\15\\UserBehavior.csv'\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m (unicode error) 'unicodeescape' codec can't decode bytes in position 5-6: truncated \\UXXXXXXXX escape\n"
]
}
],
"source": [
"\n",
"import pandas as pd\n",
"'C:\\15\\UserBehavior.csv'\n",
"info = pd.read_csv('C:\\15\\UserBehavior.csv',encoding='gbk')\n",
"info\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'info' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[9], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m info_excel \u001b[38;5;241m=\u001b[39m info\u001b[38;5;241m.\u001b[39mto_excel(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mE:\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124m1000phone\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mexcel_test.xlsx\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 2\u001b[0m info_excel\n",
"\u001b[1;31mNameError\u001b[0m: name 'info' is not defined"
]
}
],
"source": [
"info_excel = info.to_excel('E:\\\\1000phone\\\\excel_test.xlsx')\n",
"info_excel"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"ename": "FileNotFoundError",
"evalue": "[Errno 2] No such file or directory: 'E:\\\\千锋\\\\JSON.JSON'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[10], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mjson\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mopen\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mE:\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124m千锋\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mJSON.JSON\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m'\u001b[39m,encoding\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mutf8\u001b[39m\u001b[38;5;124m'\u001b[39m)\u001b[38;5;28;01mas\u001b[39;00m fp:\n\u001b[0;32m 3\u001b[0m json_data \u001b[38;5;241m=\u001b[39m json\u001b[38;5;241m.\u001b[39mload(fp)\n\u001b[0;32m 4\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m这是文件中的json数据\u001b[39m\u001b[38;5;124m'\u001b[39m,json_data)\n",
"File \u001b[1;32mD:\\anc\\Lib\\site-packages\\IPython\\core\\interactiveshell.py:284\u001b[0m, in \u001b[0;36m_modified_open\u001b[1;34m(file, *args, **kwargs)\u001b[0m\n\u001b[0;32m 277\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m file \u001b[38;5;129;01min\u001b[39;00m {\u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m2\u001b[39m}:\n\u001b[0;32m 278\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 279\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIPython won\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt let you open fd=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m by default \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 280\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mas it is likely to crash IPython. If you know what you are doing, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 281\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124myou can use builtins\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m open.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 282\u001b[0m )\n\u001b[1;32m--> 284\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m io_open(file, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
"\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'E:\\\\千锋\\\\JSON.JSON'"
]
}
],
"source": [
"import json\n",
"with open('E:\\\\千锋\\\\JSON.JSON','r',encoding='utf8')as fp:\n",
" json_data = json.load(fp)\n",
"print('这是文件中的json数据',json_data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"import json\n",
"x = {'name':'小明','age':19}\n",
"filename = 'E:\\\\千锋\\\\JSON.JSON'\n",
"with open (filename,'w') as f:\n",
" json.dump(x,f)\n",
"with open('E:\\\\千锋\\\\JSON.JSON','r',encoding='utf8')as fp:\n",
" json_data = json.load(fp)\n",
"print('新写入的json数据',json_data)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'MySQLdb'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[9], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# import pymysql.cursors\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mMySQLdb\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mMySQLdb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcursors\u001b[39;00m\n\u001b[0;32m 5\u001b[0m connect \u001b[38;5;241m=\u001b[39m MySQLdb\u001b[38;5;241m.\u001b[39mConnect(\n\u001b[0;32m 6\u001b[0m host\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlocalhost\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m 7\u001b[0m port\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m3306\u001b[39m,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 11\u001b[0m charset\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mutf8\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m 12\u001b[0m )\n",
"\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'MySQLdb'"
]
}
],
"source": [
"# import pymysql.cursors\n",
"import MySQLdb\n",
"import MySQLdb.cursors\n",
"\n",
"connect = MySQLdb.Connect(\n",
" host='localhost',\n",
" port=3306,\n",
" user='root',\n",
" passwd='123456',\n",
" db='bicycle',\n",
" charset='utf8'\n",
")\n",
"cursor = connect.cursor()\n",
"\n",
"cur = connect.cursor(MySQLdb.cursors.DictCursor)\n",
"\n",
"sql = \"SELECT * FROM train order by rand() limit 15\"\n",
"cur.execute(sql)\n",
"results = cur.fetchall()\n",
"print(result)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"ename": "IndexError",
"evalue": "list index out of range",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mIndexError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[4], line 8\u001b[0m\n\u001b[0;32m 5\u001b[0m htmltext \u001b[38;5;241m=\u001b[39m r\u001b[38;5;241m.\u001b[39mtext\n\u001b[0;32m 6\u001b[0m \u001b[38;5;66;03m# print(htmltext)\u001b[39;00m\n\u001b[1;32m----> 8\u001b[0m html \u001b[38;5;241m=\u001b[39m re\u001b[38;5;241m.\u001b[39mfindall(\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m<a href=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/provider/63687123/\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m(.*?)</section>\u001b[39m\u001b[38;5;124m'\u001b[39m,htmltext,re\u001b[38;5;241m.\u001b[39mS)[\u001b[38;5;241m0\u001b[39m]\n\u001b[0;32m 9\u001b[0m htmlchubanshe \u001b[38;5;241m=\u001b[39m re\u001b[38;5;241m.\u001b[39mfindall(\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m<div class=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m(.*?)</div>\u001b[39m\u001b[38;5;124m'\u001b[39m,html,re\u001b[38;5;241m.\u001b[39mS)\n\u001b[0;32m 10\u001b[0m fh \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mE:\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mPythondemo\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mPython-test\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mPythonLX\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mchubanshe.txt\u001b[39m\u001b[38;5;124m\"\u001b[39m,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mw\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
"\u001b[1;31mIndexError\u001b[0m: list index out of range"
]
}
],
"source": [
"import requests\n",
"import re\n",
" \n",
"r = requests.get(\"http://read.douban.com/provider/all\")\n",
"htmltext = r.text\n",
"# print(htmltext)\n",
" \n",
"html = re.findall(r'<a href=\"/provider/63687123/\"(.*?)</section>',htmltext,re.S)[0]\n",
"htmlchubanshe = re.findall(r'<div class=\"name\"(.*?)</div>',html,re.S)\n",
"fh = open(\"E:\\\\Pythondemo\\\\Python-test\\\\PythonLX\\\\chubanshe.txt\",\"w\")\n",
"for cbs in htmlchubanshe:\n",
" print(cbs)\n",
" fh.write(cbs+\"\\n\")\n",
"fh.close()\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>订单编号</th>\n",
" <th>总金额</th>\n",
" <th>买家实际支付金额</th>\n",
" <th>收货地址</th>\n",
" <th>订单创建时间</th>\n",
" <th>订单付款时间</th>\n",
" <th>退款金额</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>178.8</td>\n",
" <td>0.0</td>\n",
" <td>上海</td>\n",
" <td>2020-02-21 00:00:00</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>21.0</td>\n",
" <td>21.0</td>\n",
" <td>内蒙古自治区</td>\n",
" <td>2020-02-20 23:59:54</td>\n",
" <td>2020-02-21 00:00:02</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>37.0</td>\n",
" <td>0.0</td>\n",
" <td>安徽省</td>\n",
" <td>2020-02-20 23:59:35</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>157.0</td>\n",
" <td>157.0</td>\n",
" <td>湖南省</td>\n",
" <td>2020-02-20 23:58:34</td>\n",
" <td>2020-02-20 23:58:44</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>64.8</td>\n",
" <td>0.0</td>\n",
" <td>江苏省</td>\n",
" <td>2020-02-20 23:57:04</td>\n",
" <td>2020-02-20 23:57:11</td>\n",
" <td>64.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>6</td>\n",
" <td>327.7</td>\n",
" <td>148.9</td>\n",
" <td>浙江省</td>\n",
" <td>2020-02-20 23:56:39</td>\n",
" <td>2020-02-20 23:56:53</td>\n",
" <td>178.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>7</td>\n",
" <td>357.0</td>\n",
" <td>357.0</td>\n",
" <td>天津</td>\n",
" <td>2020-02-20 23:56:36</td>\n",
" <td>2020-02-20 23:56:40</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>8</td>\n",
" <td>53.0</td>\n",
" <td>53.0</td>\n",
" <td>浙江省</td>\n",
" <td>2020-02-20 23:56:12</td>\n",
" <td>2020-02-20 23:56:16</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>9</td>\n",
" <td>43.0</td>\n",
" <td>0.0</td>\n",
" <td>湖南省</td>\n",
" <td>2020-02-20 23:54:53</td>\n",
" <td>2020-02-20 23:55:04</td>\n",
" <td>43.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>10</td>\n",
" <td>421.0</td>\n",
" <td>421.0</td>\n",
" <td>北京</td>\n",
" <td>2020-02-20 23:54:28</td>\n",
" <td>2020-02-20 23:54:33</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 订单编号 总金额 买家实际支付金额 收货地址 订单创建时间 订单付款时间 \\\n",
"0 1 178.8 0.0 上海 2020-02-21 00:00:00 NaN \n",
"1 2 21.0 21.0 内蒙古自治区 2020-02-20 23:59:54 2020-02-21 00:00:02 \n",
"2 3 37.0 0.0 安徽省 2020-02-20 23:59:35 NaN \n",
"3 4 157.0 157.0 湖南省 2020-02-20 23:58:34 2020-02-20 23:58:44 \n",
"4 5 64.8 0.0 江苏省 2020-02-20 23:57:04 2020-02-20 23:57:11 \n",
"5 6 327.7 148.9 浙江省 2020-02-20 23:56:39 2020-02-20 23:56:53 \n",
"6 7 357.0 357.0 天津 2020-02-20 23:56:36 2020-02-20 23:56:40 \n",
"7 8 53.0 53.0 浙江省 2020-02-20 23:56:12 2020-02-20 23:56:16 \n",
"8 9 43.0 0.0 湖南省 2020-02-20 23:54:53 2020-02-20 23:55:04 \n",
"9 10 421.0 421.0 北京 2020-02-20 23:54:28 2020-02-20 23:54:33 \n",
"\n",
" 退款金额 \n",
"0 0.0 \n",
"1 0.0 \n",
"2 0.0 \n",
"3 0.0 \n",
"4 64.8 \n",
"5 178.8 \n",
"6 0.0 \n",
"7 0.0 \n",
"8 43.0 \n",
"9 0.0 "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"data = pd.read_csv('tmall_order_report.csv')\n",
"data.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 28010 entries, 0 to 28009\n",
"Data columns (total 7 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 订单编号 28010 non-null int64 \n",
" 1 总金额 28010 non-null float64\n",
" 2 买家实际支付金额 28010 non-null float64\n",
" 3 收货地址 28010 non-null object \n",
" 4 订单创建时间 28010 non-null object \n",
" 5 订单付款时间 24087 non-null object \n",
" 6 退款金额 28010 non-null float64\n",
"dtypes: float64(3), int64(1), object(3)\n",
"memory usage: 1.5+ MB\n"
]
}
],
"source": [
"data.info()\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"ename": "KeyError",
"evalue": "'订单付款时间'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32mD:\\anconda\\lib\\site-packages\\pandas\\core\\indexes\\base.py:3621\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[1;34m(self, key, method, tolerance)\u001b[0m\n\u001b[0;32m 3620\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m-> 3621\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 3622\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n",
"File \u001b[1;32mD:\\anconda\\lib\\site-packages\\pandas\\_libs\\index.pyx:136\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
"File \u001b[1;32mD:\\anconda\\lib\\site-packages\\pandas\\_libs\\index.pyx:163\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
"File \u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi:5198\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n",
"File \u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi:5206\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n",
"\u001b[1;31mKeyError\u001b[0m: '订单付款时间'",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"Input \u001b[1;32mIn [12]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28msum\u001b[39m(\u001b[43mdata\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m订单付款时间\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241m.\u001b[39misnull())\n",
"File \u001b[1;32mD:\\anconda\\lib\\site-packages\\pandas\\core\\frame.py:3505\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 3503\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mnlevels \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[0;32m 3504\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_getitem_multilevel(key)\n\u001b[1;32m-> 3505\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 3506\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_integer(indexer):\n\u001b[0;32m 3507\u001b[0m indexer \u001b[38;5;241m=\u001b[39m [indexer]\n",
"File \u001b[1;32mD:\\anconda\\lib\\site-packages\\pandas\\core\\indexes\\base.py:3623\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[1;34m(self, key, method, tolerance)\u001b[0m\n\u001b[0;32m 3621\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine\u001b[38;5;241m.\u001b[39mget_loc(casted_key)\n\u001b[0;32m 3622\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[1;32m-> 3623\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[0;32m 3624\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[0;32m 3625\u001b[0m \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[0;32m 3626\u001b[0m \u001b[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[0;32m 3627\u001b[0m \u001b[38;5;66;03m# the TypeError.\u001b[39;00m\n\u001b[0;32m 3628\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_indexing_error(key)\n",
"\u001b[1;31mKeyError\u001b[0m: '订单付款时间'"
]
}
],
"source": [
"sum(data['订单付款时间'].isnull())"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['订单编号', '总金额', '买家实际支付金额', '收货地址', '订单创建时间', '订单付款时间', '退款金额'], dtype='object')"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.rename(columns={'收货地址 ': '收货地址', '订单付款时间 ':'订单付款时间'}, inplace=True)\n",
"data.columns"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"3923"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sum(data['订单付款时间'].isnull())"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0\n",
"0.14005712245626561\n"
]
}
],
"source": [
"print(data[data['订单付款时间'].isnull() & data['买家实际支付金额']>0].size) # 查看缺失值是否为拍下订单但是未付款情况\n",
"print(sum(data['订单付款时间'].isnull()) / data.shape[0]) # 查看缺失值与整体数据的比例"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.duplicated().sum()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>订单编号</th>\n",
" <th>总金额</th>\n",
" <th>买家实际支付金额</th>\n",
" <th>退款金额</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>28010.000000</td>\n",
" <td>28010.000000</td>\n",
" <td>28010.000000</td>\n",
" <td>28010.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>14005.500000</td>\n",
" <td>106.953253</td>\n",
" <td>67.921712</td>\n",
" <td>20.433271</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>8085.934856</td>\n",
" <td>1136.587094</td>\n",
" <td>151.493434</td>\n",
" <td>71.501963</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>7003.250000</td>\n",
" <td>38.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>14005.500000</td>\n",
" <td>75.000000</td>\n",
" <td>45.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>21007.750000</td>\n",
" <td>119.000000</td>\n",
" <td>101.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>28010.000000</td>\n",
" <td>188320.000000</td>\n",
" <td>16065.000000</td>\n",
" <td>3800.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 订单编号 总金额 买家实际支付金额 退款金额\n",
"count 28010.000000 28010.000000 28010.000000 28010.000000\n",
"mean 14005.500000 106.953253 67.921712 20.433271\n",
"std 8085.934856 1136.587094 151.493434 71.501963\n",
"min 1.000000 1.000000 0.000000 0.000000\n",
"25% 7003.250000 38.000000 0.000000 0.000000\n",
"50% 14005.500000 75.000000 45.000000 0.000000\n",
"75% 21007.750000 119.000000 101.000000 0.000000\n",
"max 28010.000000 188320.000000 16065.000000 3800.000000"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.describe()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>订单编号</th>\n",
" <th>总金额</th>\n",
" <th>买家实际支付金额</th>\n",
" <th>收货地址</th>\n",
" <th>订单创建时间</th>\n",
" <th>订单付款时间</th>\n",
" <th>退款金额</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>19257</th>\n",
" <td>19258</td>\n",
" <td>188320.0</td>\n",
" <td>0.0</td>\n",
" <td>上海</td>\n",
" <td>2020-02-24 19:35:06</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 订单编号 总金额 买家实际支付金额 收货地址 订单创建时间 订单付款时间 退款金额\n",
"19257 19258 188320.0 0.0 上海 2020-02-24 19:35:06 NaN 0.0"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[data['总金额'] > 175000]"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"data = data.drop(index=data[data['总金额'] > 17500].index)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>订单编号</th>\n",
" <th>总金额</th>\n",
" <th>买家实际支付金额</th>\n",
" <th>退款金额</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>28009.000000</td>\n",
" <td>28009.000000</td>\n",
" <td>28009.000000</td>\n",
" <td>28009.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>14005.312471</td>\n",
" <td>100.233518</td>\n",
" <td>67.924137</td>\n",
" <td>20.434000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>8086.018294</td>\n",
" <td>164.451538</td>\n",
" <td>151.495595</td>\n",
" <td>71.503135</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>7003.000000</td>\n",
" <td>38.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>14005.000000</td>\n",
" <td>75.000000</td>\n",
" <td>45.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>21008.000000</td>\n",
" <td>119.000000</td>\n",
" <td>101.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>28010.000000</td>\n",
" <td>16065.000000</td>\n",
" <td>16065.000000</td>\n",
" <td>3800.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 订单编号 总金额 买家实际支付金额 退款金额\n",
"count 28009.000000 28009.000000 28009.000000 28009.000000\n",
"mean 14005.312471 100.233518 67.924137 20.434000\n",
"std 8086.018294 164.451538 151.495595 71.503135\n",
"min 1.000000 1.000000 0.000000 0.000000\n",
"25% 7003.000000 38.000000 0.000000 0.000000\n",
"50% 14005.000000 75.000000 45.000000 0.000000\n",
"75% 21008.000000 119.000000 101.000000 0.000000\n",
"max 28010.000000 16065.000000 16065.000000 3800.000000"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading…
Cancel
Save