{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_866/224548020.py:1: DtypeWarning: Columns (7) have mixed types. Specify dtype option on import or set low_memory=False.\n", " user=pd.read_csv('../../data/sample/users.csv',sep='\\t',encoding='utf-8')\n" ] } ], "source": [ "user=pd.read_csv('../../data/sample/users.csv',sep='\\t',encoding='utf-8')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_iduser_namegenderschool_idschool_namelocationlocation_cityoccupationidentitytechnical_titleedu_backgroundedu_entry_yeargidloginsgradeexperiencelast_login_on
01实践教学0.038304.0头歌教研中心湖南长沙国防科学技术大学0.0教授8.02021.00.0636164425084143682021-11-08 11:20:37
15尹刚0.0117.0国防科技大学湖南长沙国防科学技术大学2.0部门管理者8.02001.00.05617111904111202022-04-15 09:10:10
26王林春0.01618.0湖南工业职业技术学院湖南长沙NaN0.0讲师1.02021.00.0342871915743002022-04-09 10:17:54
37王老师0.0117.0国防科技大学湖南长沙国防科学技术大学0.0教授1.02022.00.011882210002022-04-15 09:26:34
410余跃0.0117.0国防科技大学湖南长沙国防科学技术大学0.0讲师0.00.00.023012001502022-04-09 16:05:54
\n", "
" ], "text/plain": [ " user_id user_name gender school_id school_name location location_city \\\n", "0 1 实践教学 0.0 38304.0 头歌教研中心 湖南 长沙 \n", "1 5 尹刚 0.0 117.0 国防科技大学 湖南 长沙 \n", "2 6 王林春 0.0 1618.0 湖南工业职业技术学院 湖南 长沙 \n", "3 7 王老师 0.0 117.0 国防科技大学 湖南 长沙 \n", "4 10 余跃 0.0 117.0 国防科技大学 湖南 长沙 \n", "\n", " occupation identity technical_title edu_background edu_entry_year gid \\\n", "0 国防科学技术大学 0.0 教授 8.0 2021.0 0.0 \n", "1 国防科学技术大学 2.0 部门管理者 8.0 2001.0 0.0 \n", "2 NaN 0.0 讲师 1.0 2021.0 0.0 \n", "3 国防科学技术大学 0.0 教授 1.0 2022.0 0.0 \n", "4 国防科学技术大学 0.0 讲师 0.0 0.0 0.0 \n", "\n", " logins grade experience last_login_on \n", "0 63616 442508 414368 2021-11-08 11:20:37 \n", "1 56171 11904 11120 2022-04-15 09:10:10 \n", "2 3428 71915 74300 2022-04-09 10:17:54 \n", "3 11882 2100 0 2022-04-15 09:26:34 \n", "4 2301 200 150 2022-04-09 16:05:54 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "user.head()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "users_l=user.rename(columns={'visits': 'logins'}, inplace=True)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_iduser_namegenderschool_idschool_namelocationlocation_cityoccupationidentitytechnical_titleedu_backgroundedu_entry_yeargidloginsgradeexperiencelast_login_on
01实践教学0.038304.0头歌教研中心湖南长沙国防科学技术大学0.0教授8.02021.00.0636164425084143682021-11-08 11:20:37
15尹刚0.0117.0国防科技大学湖南长沙国防科学技术大学2.0部门管理者8.02001.00.05617111904111202022-04-15 09:10:10
26王林春0.01618.0湖南工业职业技术学院湖南长沙NaN0.0讲师1.02021.00.0342871915743002022-04-09 10:17:54
37王老师0.0117.0国防科技大学湖南长沙国防科学技术大学0.0教授1.02022.00.011882210002022-04-15 09:26:34
410余跃0.0117.0国防科技大学湖南长沙国防科学技术大学0.0讲师0.00.00.023012001502022-04-09 16:05:54
......................................................
474964826943李波老师2220.01581.0湖南科技大学湖南长沙NaN0.0讲师1.02023.00.0050002023-04-24 10:28:08
474965826944DWT1.0573.0吉林农业大学吉林长春NaN1.0NaN6.02020.01.005002023-04-19 15:04:24
474966826945沈晓绿-1.0117.0国防科技大学NaNNaNNaN1.0NaN0.00.0-1.0050002023-04-19 16:54:15
474967826946王子豪-1.03364.0湖南智擎科技有限公司NaNNaNNaN2.0工程师0.00.0-1.00002023-04-23 10:19:01
474968826947周平0.0117.0国防科技大学北京朝阳NaN1.0NaN6.02019.00.005002023-04-24 14:14:28
\n", "

474969 rows × 17 columns

\n", "
" ], "text/plain": [ " user_id user_name gender school_id school_name location \\\n", "0 1 实践教学 0.0 38304.0 头歌教研中心 湖南 \n", "1 5 尹刚 0.0 117.0 国防科技大学 湖南 \n", "2 6 王林春 0.0 1618.0 湖南工业职业技术学院 湖南 \n", "3 7 王老师 0.0 117.0 国防科技大学 湖南 \n", "4 10 余跃 0.0 117.0 国防科技大学 湖南 \n", "... ... ... ... ... ... ... \n", "474964 826943 李波老师222 0.0 1581.0 湖南科技大学 湖南 \n", "474965 826944 DWT 1.0 573.0 吉林农业大学 吉林 \n", "474966 826945 沈晓绿 -1.0 117.0 国防科技大学 NaN \n", "474967 826946 王子豪 -1.0 3364.0 湖南智擎科技有限公司 NaN \n", "474968 826947 周平 0.0 117.0 国防科技大学 北京 \n", "\n", " location_city occupation identity technical_title edu_background \\\n", "0 长沙 国防科学技术大学 0.0 教授 8.0 \n", "1 长沙 国防科学技术大学 2.0 部门管理者 8.0 \n", "2 长沙 NaN 0.0 讲师 1.0 \n", "3 长沙 国防科学技术大学 0.0 教授 1.0 \n", "4 长沙 国防科学技术大学 0.0 讲师 0.0 \n", "... ... ... ... ... ... \n", "474964 长沙 NaN 0.0 讲师 1.0 \n", "474965 长春 NaN 1.0 NaN 6.0 \n", "474966 NaN NaN 1.0 NaN 0.0 \n", "474967 NaN NaN 2.0 工程师 0.0 \n", "474968 朝阳 NaN 1.0 NaN 6.0 \n", "\n", " edu_entry_year gid logins grade experience last_login_on \n", "0 2021.0 0.0 63616 442508 414368 2021-11-08 11:20:37 \n", "1 2001.0 0.0 56171 11904 11120 2022-04-15 09:10:10 \n", "2 2021.0 0.0 3428 71915 74300 2022-04-09 10:17:54 \n", "3 2022.0 0.0 11882 2100 0 2022-04-15 09:26:34 \n", "4 0.0 0.0 2301 200 150 2022-04-09 16:05:54 \n", "... ... ... ... ... ... ... \n", "474964 2023.0 0.0 0 500 0 2023-04-24 10:28:08 \n", "474965 2020.0 1.0 0 50 0 2023-04-19 15:04:24 \n", "474966 0.0 -1.0 0 500 0 2023-04-19 16:54:15 \n", "474967 0.0 -1.0 0 0 0 2023-04-23 10:19:01 \n", "474968 2019.0 0.0 0 50 0 2023-04-24 14:14:28 \n", "\n", "[474969 rows x 17 columns]" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "user" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "user.to_csv('../../data/sample/users.csv',sep='\\t', index=False, header=True)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "False\n" ] } ], "source": [ "print(pd.isnull(user['experience'].values).any())" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "shixun=pd.read_csv('../../data/sample/shixun_merage_emb.csv',sep='\\t',encoding='utf-8')" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
shixun_idvisitschallenges_countaverge_startask_passemb_0emb_1emb_2emb_3emb_4...emb_90emb_91emb_92emb_93emb_94emb_95emb_96emb_97emb_98emb_99
043125544.810.178059-0.2236610.4317240.222664-0.051939...1.2774930.0333920.0854300.3348100.1944890.4057300.223120-0.1629220.0205980.260939
149620834.800.029035-0.2330390.2115460.176656-0.227015...0.6886040.2572120.0234060.4628710.3644230.5557120.0186910.159427-0.310509-0.173528
250252764.80-0.0986170.150550-0.0533760.1371880.076380...0.2267770.3342040.3470670.3516060.1600540.212226-0.0422320.027391-0.1639520.083248
3511265134.900.1072600.0249690.0019650.326085-0.264945...0.8834070.031283-0.0685100.4832770.8567000.6854570.1322830.315131-0.675356-0.090122
453404344.900.143967-0.3258370.0681790.0157970.146450...0.6740010.2060880.1409780.5334510.2630600.296306-0.1136650.418402-0.3674880.065241
\n", "

5 rows × 105 columns

\n", "
" ], "text/plain": [ " shixun_id visits challenges_count averge_star task_pass emb_0 \\\n", "0 43 1255 4 4.8 1 0.178059 \n", "1 49 6208 3 4.8 0 0.029035 \n", "2 50 2527 6 4.8 0 -0.098617 \n", "3 51 12651 3 4.9 0 0.107260 \n", "4 53 4043 4 4.9 0 0.143967 \n", "\n", " emb_1 emb_2 emb_3 emb_4 ... emb_90 emb_91 emb_92 \\\n", "0 -0.223661 0.431724 0.222664 -0.051939 ... 1.277493 0.033392 0.085430 \n", "1 -0.233039 0.211546 0.176656 -0.227015 ... 0.688604 0.257212 0.023406 \n", "2 0.150550 -0.053376 0.137188 0.076380 ... 0.226777 0.334204 0.347067 \n", "3 0.024969 0.001965 0.326085 -0.264945 ... 0.883407 0.031283 -0.068510 \n", "4 -0.325837 0.068179 0.015797 0.146450 ... 0.674001 0.206088 0.140978 \n", "\n", " emb_93 emb_94 emb_95 emb_96 emb_97 emb_98 emb_99 \n", "0 0.334810 0.194489 0.405730 0.223120 -0.162922 0.020598 0.260939 \n", "1 0.462871 0.364423 0.555712 0.018691 0.159427 -0.310509 -0.173528 \n", "2 0.351606 0.160054 0.212226 -0.042232 0.027391 -0.163952 0.083248 \n", "3 0.483277 0.856700 0.685457 0.132283 0.315131 -0.675356 -0.090122 \n", "4 0.533451 0.263060 0.296306 -0.113665 0.418402 -0.367488 0.065241 \n", "\n", "[5 rows x 105 columns]" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "shixun.head()\n" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "False\n" ] } ], "source": [ "print(pd.isnull(shixun['task_pass'].values).any())" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "list1 = [[5,2,0,4],[2,3,4,5],[3,6,1,9],[4,1,0,8]]\n", "name=['a','b','c','d']\n", "df1=pd.DataFrame(list1,columns=name)\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "list1 = [[5,2,0,4],[1,1,1,5],[1,1,1,9],[4,1,0,8]]\n", "name=['a','e','f','d']\n", "df2=pd.DataFrame(list1,columns=name)\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abcd_xefd_y
05204204
14108108
\n", "
" ], "text/plain": [ " a b c d_x e f d_y\n", "0 5 2 0 4 2 0 4\n", "1 4 1 0 8 1 0 8" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df=pd.merge(df1,df2,on=['a'])\n", "df" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "subject=pd.read_csv('../../data/sample/subjects.csv',sep='\\t',encoding='utf-8')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
disciplines_iddisciplines_namesub_discipline_idsub_discipline_namesubject_idsubject_namevisitsstatuscreated_atupdated_at...initiative_passed_countinitiative_challenge_countinitiative_evaluate_countvideo_study_timeinitiative_video_study_timestudy_pdf_attachment_countinitiative_study_pdf_attachment_counttag_namesaverge_starcreated_at_ts
01程序设计语言2.0Java程序设计1C++程序设计206422017-07-17 11:08:112021-12-28 16:56:15...NaNNaNNaNNaNNaN0.0NaN运算符与表达式 循环 标识符、关键字 数据类型 数组 集合类 异常 多态 语法 时间 网络 ...0.01.500290e+09
\n", "

1 rows × 36 columns

\n", "
" ], "text/plain": [ " disciplines_id disciplines_name sub_discipline_id sub_discipline_name \\\n", "0 1 程序设计语言 2.0 Java程序设计 \n", "\n", " subject_id subject_name visits status created_at \\\n", "0 1 C++程序设计 2064 2 2017-07-17 11:08:11 \n", "\n", " updated_at ... initiative_passed_count \\\n", "0 2021-12-28 16:56:15 ... NaN \n", "\n", " initiative_challenge_count initiative_evaluate_count video_study_time \\\n", "0 NaN NaN NaN \n", "\n", " initiative_video_study_time study_pdf_attachment_count \\\n", "0 NaN 0.0 \n", "\n", " initiative_study_pdf_attachment_count \\\n", "0 NaN \n", "\n", " tag_names averge_star \\\n", "0 运算符与表达式 循环 标识符、关键字 数据类型 数组 集合类 异常 多态 语法 时间 网络 ... 0.0 \n", "\n", " created_at_ts \n", "0 1.500290e+09 \n", "\n", "[1 rows x 36 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "subject.head(1)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 1605 entries, 0 to 1604\n", "Data columns (total 36 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 disciplines_id 1605 non-null int64 \n", " 1 disciplines_name 431 non-null object \n", " 2 sub_discipline_id 431 non-null float64\n", " 3 sub_discipline_name 431 non-null object \n", " 4 subject_id 1605 non-null int64 \n", " 5 subject_name 1605 non-null object \n", " 6 visits 1605 non-null int64 \n", " 7 status 1605 non-null int64 \n", " 8 created_at 1605 non-null object \n", " 9 updated_at 1605 non-null object \n", " 10 stages_count 1605 non-null int64 \n", " 11 stage_shixuns_count 1605 non-null int64 \n", " 12 publish_time 1030 non-null object \n", " 13 homepage_show 1605 non-null int64 \n", " 14 repertoire_id 467 non-null float64\n", " 15 score_count 93 non-null float64\n", " 16 shixuns_count 1605 non-null int64 \n", " 17 study_count 1605 non-null float64\n", " 18 course_study_count 1605 non-null float64\n", " 19 initiative_study 768 non-null float64\n", " 20 passed_count 1605 non-null float64\n", " 21 course_used_count 768 non-null float64\n", " 22 school_used_count 768 non-null float64\n", " 23 challenge_count 1605 non-null float64\n", " 24 evaluate_count 1605 non-null float64\n", " 25 initiative_school_used_count 768 non-null float64\n", " 26 initiative_passed_count 768 non-null float64\n", " 27 initiative_challenge_count 768 non-null float64\n", " 28 initiative_evaluate_count 768 non-null float64\n", " 29 video_study_time 768 non-null float64\n", " 30 initiative_video_study_time 768 non-null float64\n", " 31 study_pdf_attachment_count 1605 non-null float64\n", " 32 initiative_study_pdf_attachment_count 768 non-null float64\n", " 33 tag_names 431 non-null object \n", " 34 averge_star 1605 non-null float64\n", " 35 created_at_ts 1605 non-null float64\n", "dtypes: float64(21), int64(8), object(7)\n", "memory usage: 451.5+ KB\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_8261/2736341527.py:1: FutureWarning: null_counts is deprecated. Use show_counts instead\n", " subject.info(verbose=True, null_counts=True)\n" ] } ], "source": [ "subject.info(verbose=True, null_counts=True)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "subject[\"study_count\"]=subject[\"study_count\"].fillna(value=0)\n", "subject[\"course_study_count\"]=subject[\"course_study_count\"].fillna(value=0)\n", "subject[\"passed_count\"]=subject[\"passed_count\"].fillna(value=0)\n", "subject[\"challenge_count\"]=subject[\"challenge_count\"].fillna(value=0)\n", "subject[\"evaluate_count\"]=subject[\"evaluate_count\"].fillna(value=0)\n", "subject[\"study_pdf_attachment_count\"]=subject[\"study_pdf_attachment_count\"].fillna(value=0)\n", "subject[\"averge_star\"]=subject[\"averge_star\"].fillna(value=0)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 1605 entries, 0 to 1604\n", "Data columns (total 36 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 disciplines_id 1605 non-null int64 \n", " 1 disciplines_name 431 non-null object \n", " 2 sub_discipline_id 431 non-null float64\n", " 3 sub_discipline_name 431 non-null object \n", " 4 subject_id 1605 non-null int64 \n", " 5 subject_name 1605 non-null object \n", " 6 visits 1605 non-null int64 \n", " 7 status 1605 non-null int64 \n", " 8 created_at 1605 non-null object \n", " 9 updated_at 1605 non-null object \n", " 10 stages_count 1605 non-null int64 \n", " 11 stage_shixuns_count 1605 non-null int64 \n", " 12 publish_time 1030 non-null object \n", " 13 homepage_show 1605 non-null int64 \n", " 14 repertoire_id 467 non-null float64\n", " 15 score_count 93 non-null float64\n", " 16 shixuns_count 1605 non-null int64 \n", " 17 study_count 1605 non-null float64\n", " 18 course_study_count 1605 non-null float64\n", " 19 initiative_study 768 non-null float64\n", " 20 passed_count 1605 non-null float64\n", " 21 course_used_count 768 non-null float64\n", " 22 school_used_count 768 non-null float64\n", " 23 challenge_count 1605 non-null float64\n", " 24 evaluate_count 1605 non-null float64\n", " 25 initiative_school_used_count 768 non-null float64\n", " 26 initiative_passed_count 768 non-null float64\n", " 27 initiative_challenge_count 768 non-null float64\n", " 28 initiative_evaluate_count 768 non-null float64\n", " 29 video_study_time 768 non-null float64\n", " 30 initiative_video_study_time 768 non-null float64\n", " 31 study_pdf_attachment_count 1605 non-null float64\n", " 32 initiative_study_pdf_attachment_count 768 non-null float64\n", " 33 tag_names 431 non-null object \n", " 34 averge_star 1605 non-null float64\n", " 35 created_at_ts 1605 non-null float64\n", "dtypes: float64(21), int64(8), object(7)\n", "memory usage: 451.5+ KB\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_8261/2736341527.py:1: FutureWarning: null_counts is deprecated. Use show_counts instead\n", " subject.info(verbose=True, null_counts=True)\n" ] } ], "source": [ "subject.info(verbose=True, null_counts=True)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "subject.to_csv('../../data/sample/subjects.csv',sep='\\t', index=False, header=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "mooc", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.2" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }