{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_15100/224548020.py:1: DtypeWarning: Columns (7) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" user=pd.read_csv('../../data/sample/users.csv',sep='\\t',encoding='utf-8')\n"
]
}
],
"source": [
"user=pd.read_csv('../../data/sample/users.csv',sep='\\t',encoding='utf-8')"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" user_id | \n",
" user_name | \n",
" gender | \n",
" school_id | \n",
" school_name | \n",
" location | \n",
" location_city | \n",
" occupation | \n",
" identity | \n",
" technical_title | \n",
" edu_background | \n",
" edu_entry_year | \n",
" gid | \n",
" logins | \n",
" grade | \n",
" experience | \n",
" last_login_on | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 实践教学 | \n",
" 0.0 | \n",
" 38304.0 | \n",
" 头歌教研中心 | \n",
" 湖南 | \n",
" 长沙 | \n",
" 国防科学技术大学 | \n",
" 0.0 | \n",
" 教授 | \n",
" 8.0 | \n",
" 2021.0 | \n",
" 0.0 | \n",
" 63616 | \n",
" 442508 | \n",
" 414368 | \n",
" 2021-11-08 11:20:37 | \n",
"
\n",
" \n",
" 1 | \n",
" 5 | \n",
" 尹刚 | \n",
" 0.0 | \n",
" 117.0 | \n",
" 国防科技大学 | \n",
" 湖南 | \n",
" 长沙 | \n",
" 国防科学技术大学 | \n",
" 2.0 | \n",
" 部门管理者 | \n",
" 8.0 | \n",
" 2001.0 | \n",
" 0.0 | \n",
" 56171 | \n",
" 11904 | \n",
" 11120 | \n",
" 2022-04-15 09:10:10 | \n",
"
\n",
" \n",
" 2 | \n",
" 6 | \n",
" 王林春 | \n",
" 0.0 | \n",
" 1618.0 | \n",
" 湖南工业职业技术学院 | \n",
" 湖南 | \n",
" 长沙 | \n",
" NaN | \n",
" 0.0 | \n",
" 讲师 | \n",
" 1.0 | \n",
" 2021.0 | \n",
" 0.0 | \n",
" 3428 | \n",
" 71915 | \n",
" 74300 | \n",
" 2022-04-09 10:17:54 | \n",
"
\n",
" \n",
" 3 | \n",
" 7 | \n",
" 王老师 | \n",
" 0.0 | \n",
" 117.0 | \n",
" 国防科技大学 | \n",
" 湖南 | \n",
" 长沙 | \n",
" 国防科学技术大学 | \n",
" 0.0 | \n",
" 教授 | \n",
" 1.0 | \n",
" 2022.0 | \n",
" 0.0 | \n",
" 11882 | \n",
" 2100 | \n",
" 0 | \n",
" 2022-04-15 09:26:34 | \n",
"
\n",
" \n",
" 4 | \n",
" 10 | \n",
" 余跃 | \n",
" 0.0 | \n",
" 117.0 | \n",
" 国防科技大学 | \n",
" 湖南 | \n",
" 长沙 | \n",
" 国防科学技术大学 | \n",
" 0.0 | \n",
" 讲师 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 2301 | \n",
" 200 | \n",
" 150 | \n",
" 2022-04-09 16:05:54 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" user_id user_name gender school_id school_name location location_city \\\n",
"0 1 实践教学 0.0 38304.0 头歌教研中心 湖南 长沙 \n",
"1 5 尹刚 0.0 117.0 国防科技大学 湖南 长沙 \n",
"2 6 王林春 0.0 1618.0 湖南工业职业技术学院 湖南 长沙 \n",
"3 7 王老师 0.0 117.0 国防科技大学 湖南 长沙 \n",
"4 10 余跃 0.0 117.0 国防科技大学 湖南 长沙 \n",
"\n",
" occupation identity technical_title edu_background edu_entry_year gid \\\n",
"0 国防科学技术大学 0.0 教授 8.0 2021.0 0.0 \n",
"1 国防科学技术大学 2.0 部门管理者 8.0 2001.0 0.0 \n",
"2 NaN 0.0 讲师 1.0 2021.0 0.0 \n",
"3 国防科学技术大学 0.0 教授 1.0 2022.0 0.0 \n",
"4 国防科学技术大学 0.0 讲师 0.0 0.0 0.0 \n",
"\n",
" logins grade experience last_login_on \n",
"0 63616 442508 414368 2021-11-08 11:20:37 \n",
"1 56171 11904 11120 2022-04-15 09:10:10 \n",
"2 3428 71915 74300 2022-04-09 10:17:54 \n",
"3 11882 2100 0 2022-04-15 09:26:34 \n",
"4 2301 200 150 2022-04-09 16:05:54 "
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"user.head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"users_l=user.rename(columns={'visits': 'logins'}, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" user_id | \n",
" user_name | \n",
" gender | \n",
" school_id | \n",
" school_name | \n",
" location | \n",
" location_city | \n",
" occupation | \n",
" identity | \n",
" technical_title | \n",
" edu_background | \n",
" edu_entry_year | \n",
" gid | \n",
" logins | \n",
" grade | \n",
" experience | \n",
" last_login_on | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 实践教学 | \n",
" 0.0 | \n",
" 38304.0 | \n",
" 头歌教研中心 | \n",
" 湖南 | \n",
" 长沙 | \n",
" 国防科学技术大学 | \n",
" 0.0 | \n",
" 教授 | \n",
" 8.0 | \n",
" 2021.0 | \n",
" 0.0 | \n",
" 63616 | \n",
" 442508 | \n",
" 414368 | \n",
" 2021-11-08 11:20:37 | \n",
"
\n",
" \n",
" 1 | \n",
" 5 | \n",
" 尹刚 | \n",
" 0.0 | \n",
" 117.0 | \n",
" 国防科技大学 | \n",
" 湖南 | \n",
" 长沙 | \n",
" 国防科学技术大学 | \n",
" 2.0 | \n",
" 部门管理者 | \n",
" 8.0 | \n",
" 2001.0 | \n",
" 0.0 | \n",
" 56171 | \n",
" 11904 | \n",
" 11120 | \n",
" 2022-04-15 09:10:10 | \n",
"
\n",
" \n",
" 2 | \n",
" 6 | \n",
" 王林春 | \n",
" 0.0 | \n",
" 1618.0 | \n",
" 湖南工业职业技术学院 | \n",
" 湖南 | \n",
" 长沙 | \n",
" NaN | \n",
" 0.0 | \n",
" 讲师 | \n",
" 1.0 | \n",
" 2021.0 | \n",
" 0.0 | \n",
" 3428 | \n",
" 71915 | \n",
" 74300 | \n",
" 2022-04-09 10:17:54 | \n",
"
\n",
" \n",
" 3 | \n",
" 7 | \n",
" 王老师 | \n",
" 0.0 | \n",
" 117.0 | \n",
" 国防科技大学 | \n",
" 湖南 | \n",
" 长沙 | \n",
" 国防科学技术大学 | \n",
" 0.0 | \n",
" 教授 | \n",
" 1.0 | \n",
" 2022.0 | \n",
" 0.0 | \n",
" 11882 | \n",
" 2100 | \n",
" 0 | \n",
" 2022-04-15 09:26:34 | \n",
"
\n",
" \n",
" 4 | \n",
" 10 | \n",
" 余跃 | \n",
" 0.0 | \n",
" 117.0 | \n",
" 国防科技大学 | \n",
" 湖南 | \n",
" 长沙 | \n",
" 国防科学技术大学 | \n",
" 0.0 | \n",
" 讲师 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 2301 | \n",
" 200 | \n",
" 150 | \n",
" 2022-04-09 16:05:54 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 474964 | \n",
" 826943 | \n",
" 李波老师222 | \n",
" 0.0 | \n",
" 1581.0 | \n",
" 湖南科技大学 | \n",
" 湖南 | \n",
" 长沙 | \n",
" NaN | \n",
" 0.0 | \n",
" 讲师 | \n",
" 1.0 | \n",
" 2023.0 | \n",
" 0.0 | \n",
" 0 | \n",
" 500 | \n",
" 0 | \n",
" 2023-04-24 10:28:08 | \n",
"
\n",
" \n",
" 474965 | \n",
" 826944 | \n",
" DWT | \n",
" 1.0 | \n",
" 573.0 | \n",
" 吉林农业大学 | \n",
" 吉林 | \n",
" 长春 | \n",
" NaN | \n",
" 1.0 | \n",
" NaN | \n",
" 6.0 | \n",
" 2020.0 | \n",
" 1.0 | \n",
" 0 | \n",
" 50 | \n",
" 0 | \n",
" 2023-04-19 15:04:24 | \n",
"
\n",
" \n",
" 474966 | \n",
" 826945 | \n",
" 沈晓绿 | \n",
" -1.0 | \n",
" 117.0 | \n",
" 国防科技大学 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 1.0 | \n",
" NaN | \n",
" 0.0 | \n",
" 0.0 | \n",
" -1.0 | \n",
" 0 | \n",
" 500 | \n",
" 0 | \n",
" 2023-04-19 16:54:15 | \n",
"
\n",
" \n",
" 474967 | \n",
" 826946 | \n",
" 王子豪 | \n",
" -1.0 | \n",
" 3364.0 | \n",
" 湖南智擎科技有限公司 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 2.0 | \n",
" 工程师 | \n",
" 0.0 | \n",
" 0.0 | \n",
" -1.0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 2023-04-23 10:19:01 | \n",
"
\n",
" \n",
" 474968 | \n",
" 826947 | \n",
" 周平 | \n",
" 0.0 | \n",
" 117.0 | \n",
" 国防科技大学 | \n",
" 北京 | \n",
" 朝阳 | \n",
" NaN | \n",
" 1.0 | \n",
" NaN | \n",
" 6.0 | \n",
" 2019.0 | \n",
" 0.0 | \n",
" 0 | \n",
" 50 | \n",
" 0 | \n",
" 2023-04-24 14:14:28 | \n",
"
\n",
" \n",
"
\n",
"
474969 rows × 17 columns
\n",
"
"
],
"text/plain": [
" user_id user_name gender school_id school_name location \\\n",
"0 1 实践教学 0.0 38304.0 头歌教研中心 湖南 \n",
"1 5 尹刚 0.0 117.0 国防科技大学 湖南 \n",
"2 6 王林春 0.0 1618.0 湖南工业职业技术学院 湖南 \n",
"3 7 王老师 0.0 117.0 国防科技大学 湖南 \n",
"4 10 余跃 0.0 117.0 国防科技大学 湖南 \n",
"... ... ... ... ... ... ... \n",
"474964 826943 李波老师222 0.0 1581.0 湖南科技大学 湖南 \n",
"474965 826944 DWT 1.0 573.0 吉林农业大学 吉林 \n",
"474966 826945 沈晓绿 -1.0 117.0 国防科技大学 NaN \n",
"474967 826946 王子豪 -1.0 3364.0 湖南智擎科技有限公司 NaN \n",
"474968 826947 周平 0.0 117.0 国防科技大学 北京 \n",
"\n",
" location_city occupation identity technical_title edu_background \\\n",
"0 长沙 国防科学技术大学 0.0 教授 8.0 \n",
"1 长沙 国防科学技术大学 2.0 部门管理者 8.0 \n",
"2 长沙 NaN 0.0 讲师 1.0 \n",
"3 长沙 国防科学技术大学 0.0 教授 1.0 \n",
"4 长沙 国防科学技术大学 0.0 讲师 0.0 \n",
"... ... ... ... ... ... \n",
"474964 长沙 NaN 0.0 讲师 1.0 \n",
"474965 长春 NaN 1.0 NaN 6.0 \n",
"474966 NaN NaN 1.0 NaN 0.0 \n",
"474967 NaN NaN 2.0 工程师 0.0 \n",
"474968 朝阳 NaN 1.0 NaN 6.0 \n",
"\n",
" edu_entry_year gid logins grade experience last_login_on \n",
"0 2021.0 0.0 63616 442508 414368 2021-11-08 11:20:37 \n",
"1 2001.0 0.0 56171 11904 11120 2022-04-15 09:10:10 \n",
"2 2021.0 0.0 3428 71915 74300 2022-04-09 10:17:54 \n",
"3 2022.0 0.0 11882 2100 0 2022-04-15 09:26:34 \n",
"4 0.0 0.0 2301 200 150 2022-04-09 16:05:54 \n",
"... ... ... ... ... ... ... \n",
"474964 2023.0 0.0 0 500 0 2023-04-24 10:28:08 \n",
"474965 2020.0 1.0 0 50 0 2023-04-19 15:04:24 \n",
"474966 0.0 -1.0 0 500 0 2023-04-19 16:54:15 \n",
"474967 0.0 -1.0 0 0 0 2023-04-23 10:19:01 \n",
"474968 2019.0 0.0 0 50 0 2023-04-24 14:14:28 \n",
"\n",
"[474969 rows x 17 columns]"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"user"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"user.to_csv('../../data/sample/users.csv',sep='\\t', index=False, header=True)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"False\n"
]
}
],
"source": [
"print(pd.isnull(user['experience'].values).any())"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"shixun=pd.read_csv('../../data/sample/shixun_merage_emb.csv',sep='\\t',encoding='utf-8')"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" shixun_id | \n",
" visits | \n",
" challenges_count | \n",
" averge_star | \n",
" task_pass | \n",
" emb_0 | \n",
" emb_1 | \n",
" emb_2 | \n",
" emb_3 | \n",
" emb_4 | \n",
" ... | \n",
" emb_90 | \n",
" emb_91 | \n",
" emb_92 | \n",
" emb_93 | \n",
" emb_94 | \n",
" emb_95 | \n",
" emb_96 | \n",
" emb_97 | \n",
" emb_98 | \n",
" emb_99 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 43 | \n",
" 1255 | \n",
" 4 | \n",
" 4.8 | \n",
" 1 | \n",
" 0.178059 | \n",
" -0.223661 | \n",
" 0.431724 | \n",
" 0.222664 | \n",
" -0.051939 | \n",
" ... | \n",
" 1.277493 | \n",
" 0.033392 | \n",
" 0.085430 | \n",
" 0.334810 | \n",
" 0.194489 | \n",
" 0.405730 | \n",
" 0.223120 | \n",
" -0.162922 | \n",
" 0.020598 | \n",
" 0.260939 | \n",
"
\n",
" \n",
" 1 | \n",
" 49 | \n",
" 6208 | \n",
" 3 | \n",
" 4.8 | \n",
" 0 | \n",
" 0.029035 | \n",
" -0.233039 | \n",
" 0.211546 | \n",
" 0.176656 | \n",
" -0.227015 | \n",
" ... | \n",
" 0.688604 | \n",
" 0.257212 | \n",
" 0.023406 | \n",
" 0.462871 | \n",
" 0.364423 | \n",
" 0.555712 | \n",
" 0.018691 | \n",
" 0.159427 | \n",
" -0.310509 | \n",
" -0.173528 | \n",
"
\n",
" \n",
" 2 | \n",
" 50 | \n",
" 2527 | \n",
" 6 | \n",
" 4.8 | \n",
" 0 | \n",
" -0.098617 | \n",
" 0.150550 | \n",
" -0.053376 | \n",
" 0.137188 | \n",
" 0.076380 | \n",
" ... | \n",
" 0.226777 | \n",
" 0.334204 | \n",
" 0.347067 | \n",
" 0.351606 | \n",
" 0.160054 | \n",
" 0.212226 | \n",
" -0.042232 | \n",
" 0.027391 | \n",
" -0.163952 | \n",
" 0.083248 | \n",
"
\n",
" \n",
" 3 | \n",
" 51 | \n",
" 12651 | \n",
" 3 | \n",
" 4.9 | \n",
" 0 | \n",
" 0.107260 | \n",
" 0.024969 | \n",
" 0.001965 | \n",
" 0.326085 | \n",
" -0.264945 | \n",
" ... | \n",
" 0.883407 | \n",
" 0.031283 | \n",
" -0.068510 | \n",
" 0.483277 | \n",
" 0.856700 | \n",
" 0.685457 | \n",
" 0.132283 | \n",
" 0.315131 | \n",
" -0.675356 | \n",
" -0.090122 | \n",
"
\n",
" \n",
" 4 | \n",
" 53 | \n",
" 4043 | \n",
" 4 | \n",
" 4.9 | \n",
" 0 | \n",
" 0.143967 | \n",
" -0.325837 | \n",
" 0.068179 | \n",
" 0.015797 | \n",
" 0.146450 | \n",
" ... | \n",
" 0.674001 | \n",
" 0.206088 | \n",
" 0.140978 | \n",
" 0.533451 | \n",
" 0.263060 | \n",
" 0.296306 | \n",
" -0.113665 | \n",
" 0.418402 | \n",
" -0.367488 | \n",
" 0.065241 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 105 columns
\n",
"
"
],
"text/plain": [
" shixun_id visits challenges_count averge_star task_pass emb_0 \\\n",
"0 43 1255 4 4.8 1 0.178059 \n",
"1 49 6208 3 4.8 0 0.029035 \n",
"2 50 2527 6 4.8 0 -0.098617 \n",
"3 51 12651 3 4.9 0 0.107260 \n",
"4 53 4043 4 4.9 0 0.143967 \n",
"\n",
" emb_1 emb_2 emb_3 emb_4 ... emb_90 emb_91 emb_92 \\\n",
"0 -0.223661 0.431724 0.222664 -0.051939 ... 1.277493 0.033392 0.085430 \n",
"1 -0.233039 0.211546 0.176656 -0.227015 ... 0.688604 0.257212 0.023406 \n",
"2 0.150550 -0.053376 0.137188 0.076380 ... 0.226777 0.334204 0.347067 \n",
"3 0.024969 0.001965 0.326085 -0.264945 ... 0.883407 0.031283 -0.068510 \n",
"4 -0.325837 0.068179 0.015797 0.146450 ... 0.674001 0.206088 0.140978 \n",
"\n",
" emb_93 emb_94 emb_95 emb_96 emb_97 emb_98 emb_99 \n",
"0 0.334810 0.194489 0.405730 0.223120 -0.162922 0.020598 0.260939 \n",
"1 0.462871 0.364423 0.555712 0.018691 0.159427 -0.310509 -0.173528 \n",
"2 0.351606 0.160054 0.212226 -0.042232 0.027391 -0.163952 0.083248 \n",
"3 0.483277 0.856700 0.685457 0.132283 0.315131 -0.675356 -0.090122 \n",
"4 0.533451 0.263060 0.296306 -0.113665 0.418402 -0.367488 0.065241 \n",
"\n",
"[5 rows x 105 columns]"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"shixun.head()\n"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"False\n"
]
}
],
"source": [
"print(pd.isnull(shixun['task_pass'].values).any())"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"list1 = [[5,2,0,4],[2,3,4,5],[3,6,1,9],[4,1,0,8]]\n",
"name=['a','b','c','d']\n",
"df1=pd.DataFrame(list1,columns=name)\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"list1 = [[5,2,0,4],[1,1,1,5],[1,1,1,9],[4,1,0,8]]\n",
"name=['a','e','f','d']\n",
"df2=pd.DataFrame(list1,columns=name)\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" c | \n",
" d_x | \n",
" e | \n",
" f | \n",
" d_y | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 5 | \n",
" 2 | \n",
" 0 | \n",
" 4 | \n",
" 2 | \n",
" 0 | \n",
" 4 | \n",
"
\n",
" \n",
" 1 | \n",
" 4 | \n",
" 1 | \n",
" 0 | \n",
" 8 | \n",
" 1 | \n",
" 0 | \n",
" 8 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b c d_x e f d_y\n",
"0 5 2 0 4 2 0 4\n",
"1 4 1 0 8 1 0 8"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df=pd.merge(df1,df2,on=['a'])\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"shixun=pd.read_csv('../../data/sample/shixun_merage_emb.csv',sep='\\t',encoding='utf-8')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" shixun_id | \n",
" visits | \n",
" challenges_count | \n",
" averge_star | \n",
" task_pass | \n",
" emb_0 | \n",
" emb_1 | \n",
" emb_2 | \n",
" emb_3 | \n",
" emb_4 | \n",
" ... | \n",
" emb_90 | \n",
" emb_91 | \n",
" emb_92 | \n",
" emb_93 | \n",
" emb_94 | \n",
" emb_95 | \n",
" emb_96 | \n",
" emb_97 | \n",
" emb_98 | \n",
" emb_99 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 43 | \n",
" 1255 | \n",
" 4 | \n",
" 4.8 | \n",
" 1 | \n",
" 0.178059 | \n",
" -0.223661 | \n",
" 0.431724 | \n",
" 0.222664 | \n",
" -0.051939 | \n",
" ... | \n",
" 1.277493 | \n",
" 0.033392 | \n",
" 0.085430 | \n",
" 0.334810 | \n",
" 0.194489 | \n",
" 0.405730 | \n",
" 0.223120 | \n",
" -0.162922 | \n",
" 0.020598 | \n",
" 0.260939 | \n",
"
\n",
" \n",
" 1 | \n",
" 49 | \n",
" 6208 | \n",
" 3 | \n",
" 4.8 | \n",
" 0 | \n",
" 0.029035 | \n",
" -0.233039 | \n",
" 0.211546 | \n",
" 0.176656 | \n",
" -0.227015 | \n",
" ... | \n",
" 0.688604 | \n",
" 0.257212 | \n",
" 0.023406 | \n",
" 0.462871 | \n",
" 0.364423 | \n",
" 0.555712 | \n",
" 0.018691 | \n",
" 0.159427 | \n",
" -0.310509 | \n",
" -0.173528 | \n",
"
\n",
" \n",
" 2 | \n",
" 50 | \n",
" 2527 | \n",
" 6 | \n",
" 4.8 | \n",
" 0 | \n",
" -0.098617 | \n",
" 0.150550 | \n",
" -0.053376 | \n",
" 0.137188 | \n",
" 0.076380 | \n",
" ... | \n",
" 0.226777 | \n",
" 0.334204 | \n",
" 0.347067 | \n",
" 0.351606 | \n",
" 0.160054 | \n",
" 0.212226 | \n",
" -0.042232 | \n",
" 0.027391 | \n",
" -0.163952 | \n",
" 0.083248 | \n",
"
\n",
" \n",
" 3 | \n",
" 51 | \n",
" 12651 | \n",
" 3 | \n",
" 4.9 | \n",
" 0 | \n",
" 0.107260 | \n",
" 0.024969 | \n",
" 0.001965 | \n",
" 0.326085 | \n",
" -0.264945 | \n",
" ... | \n",
" 0.883407 | \n",
" 0.031283 | \n",
" -0.068510 | \n",
" 0.483277 | \n",
" 0.856700 | \n",
" 0.685457 | \n",
" 0.132283 | \n",
" 0.315131 | \n",
" -0.675356 | \n",
" -0.090122 | \n",
"
\n",
" \n",
" 4 | \n",
" 53 | \n",
" 4043 | \n",
" 4 | \n",
" 4.9 | \n",
" 0 | \n",
" 0.143967 | \n",
" -0.325837 | \n",
" 0.068179 | \n",
" 0.015797 | \n",
" 0.146450 | \n",
" ... | \n",
" 0.674001 | \n",
" 0.206088 | \n",
" 0.140978 | \n",
" 0.533451 | \n",
" 0.263060 | \n",
" 0.296306 | \n",
" -0.113665 | \n",
" 0.418402 | \n",
" -0.367488 | \n",
" 0.065241 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 105 columns
\n",
"
"
],
"text/plain": [
" shixun_id visits challenges_count averge_star task_pass emb_0 \\\n",
"0 43 1255 4 4.8 1 0.178059 \n",
"1 49 6208 3 4.8 0 0.029035 \n",
"2 50 2527 6 4.8 0 -0.098617 \n",
"3 51 12651 3 4.9 0 0.107260 \n",
"4 53 4043 4 4.9 0 0.143967 \n",
"\n",
" emb_1 emb_2 emb_3 emb_4 ... emb_90 emb_91 emb_92 \\\n",
"0 -0.223661 0.431724 0.222664 -0.051939 ... 1.277493 0.033392 0.085430 \n",
"1 -0.233039 0.211546 0.176656 -0.227015 ... 0.688604 0.257212 0.023406 \n",
"2 0.150550 -0.053376 0.137188 0.076380 ... 0.226777 0.334204 0.347067 \n",
"3 0.024969 0.001965 0.326085 -0.264945 ... 0.883407 0.031283 -0.068510 \n",
"4 -0.325837 0.068179 0.015797 0.146450 ... 0.674001 0.206088 0.140978 \n",
"\n",
" emb_93 emb_94 emb_95 emb_96 emb_97 emb_98 emb_99 \n",
"0 0.334810 0.194489 0.405730 0.223120 -0.162922 0.020598 0.260939 \n",
"1 0.462871 0.364423 0.555712 0.018691 0.159427 -0.310509 -0.173528 \n",
"2 0.351606 0.160054 0.212226 -0.042232 0.027391 -0.163952 0.083248 \n",
"3 0.483277 0.856700 0.685457 0.132283 0.315131 -0.675356 -0.090122 \n",
"4 0.533451 0.263060 0.296306 -0.113665 0.418402 -0.367488 0.065241 \n",
"\n",
"[5 rows x 105 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"shixun.head()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"item_emb_df = pd.read_csv('../../data/sample/shixun_merage_emb.csv', sep='\\t', encoding='utf-8')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"item_emb_cols = [x for x in item_emb_df.columns if 'shixun_id' not in x]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['visits',\n",
" 'challenges_count',\n",
" 'averge_star',\n",
" 'task_pass',\n",
" 'emb_0',\n",
" 'emb_1',\n",
" 'emb_2',\n",
" 'emb_3',\n",
" 'emb_4',\n",
" 'emb_5',\n",
" 'emb_6',\n",
" 'emb_7',\n",
" 'emb_8',\n",
" 'emb_9',\n",
" 'emb_10',\n",
" 'emb_11',\n",
" 'emb_12',\n",
" 'emb_13',\n",
" 'emb_14',\n",
" 'emb_15',\n",
" 'emb_16',\n",
" 'emb_17',\n",
" 'emb_18',\n",
" 'emb_19',\n",
" 'emb_20',\n",
" 'emb_21',\n",
" 'emb_22',\n",
" 'emb_23',\n",
" 'emb_24',\n",
" 'emb_25',\n",
" 'emb_26',\n",
" 'emb_27',\n",
" 'emb_28',\n",
" 'emb_29',\n",
" 'emb_30',\n",
" 'emb_31',\n",
" 'emb_32',\n",
" 'emb_33',\n",
" 'emb_34',\n",
" 'emb_35',\n",
" 'emb_36',\n",
" 'emb_37',\n",
" 'emb_38',\n",
" 'emb_39',\n",
" 'emb_40',\n",
" 'emb_41',\n",
" 'emb_42',\n",
" 'emb_43',\n",
" 'emb_44',\n",
" 'emb_45',\n",
" 'emb_46',\n",
" 'emb_47',\n",
" 'emb_48',\n",
" 'emb_49',\n",
" 'emb_50',\n",
" 'emb_51',\n",
" 'emb_52',\n",
" 'emb_53',\n",
" 'emb_54',\n",
" 'emb_55',\n",
" 'emb_56',\n",
" 'emb_57',\n",
" 'emb_58',\n",
" 'emb_59',\n",
" 'emb_60',\n",
" 'emb_61',\n",
" 'emb_62',\n",
" 'emb_63',\n",
" 'emb_64',\n",
" 'emb_65',\n",
" 'emb_66',\n",
" 'emb_67',\n",
" 'emb_68',\n",
" 'emb_69',\n",
" 'emb_70',\n",
" 'emb_71',\n",
" 'emb_72',\n",
" 'emb_73',\n",
" 'emb_74',\n",
" 'emb_75',\n",
" 'emb_76',\n",
" 'emb_77',\n",
" 'emb_78',\n",
" 'emb_79',\n",
" 'emb_80',\n",
" 'emb_81',\n",
" 'emb_82',\n",
" 'emb_83',\n",
" 'emb_84',\n",
" 'emb_85',\n",
" 'emb_86',\n",
" 'emb_87',\n",
" 'emb_88',\n",
" 'emb_89',\n",
" 'emb_90',\n",
" 'emb_91',\n",
" 'emb_92',\n",
" 'emb_93',\n",
" 'emb_94',\n",
" 'emb_95',\n",
" 'emb_96',\n",
" 'emb_97',\n",
" 'emb_98',\n",
" 'emb_99']"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"item_emb_cols"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "mooc",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.2"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}