You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
3077 lines
93 KiB
3077 lines
93 KiB
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"stage_shixun=pd.read_csv('knowledge_forest_data/sample/stage_shixuns.csv',sep='\\t')\n",
|
|
"stage_att=pd.read_csv('knowledge_forest_data/sample/stage_attachments.csv',sep='\\t')\n",
|
|
"stage_video=pd.read_csv('knowledge_forest_data/sample/stage_videos.csv',sep='\\t')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>stage_id</th>\n",
|
|
" <th>item_id</th>\n",
|
|
" <th>item_type</th>\n",
|
|
" <th>item_name</th>\n",
|
|
" <th>knowledge</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>60</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++基本输入输出</td>\n",
|
|
" <td>输入输出</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>67</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++表达式语句实训</td>\n",
|
|
" <td>语句实训</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>71</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++控制结构实训</td>\n",
|
|
" <td>控制结构实训</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>76</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>函数实训</td>\n",
|
|
" <td>函数实训</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>2317</td>\n",
|
|
" <td>80</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++数组实训</td>\n",
|
|
" <td>数组实训</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" stage_id item_id item_type item_name knowledge\n",
|
|
"0 2316 60 Shixun C&C++基本输入输出 输入输出\n",
|
|
"1 2316 67 Shixun C&C++表达式语句实训 语句实训\n",
|
|
"2 2316 71 Shixun C&C++控制结构实训 控制结构实训\n",
|
|
"3 2316 76 Shixun 函数实训 函数实训\n",
|
|
"4 2317 80 Shixun C&C++数组实训 数组实训"
|
|
]
|
|
},
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"stage_shixun.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"stage_knowledge=stage_shixun[['stage_id','knowledge']]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>stage_id</th>\n",
|
|
" <th>knowledge</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>输入输出</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>语句实训</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>控制结构实训</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>函数实训</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>2317</td>\n",
|
|
" <td>数组实训</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" stage_id knowledge\n",
|
|
"0 2316 输入输出\n",
|
|
"1 2316 语句实训\n",
|
|
"2 2316 控制结构实训\n",
|
|
"3 2316 函数实训\n",
|
|
"4 2317 数组实训"
|
|
]
|
|
},
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"stage_knowledge.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"112"
|
|
]
|
|
},
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"len(stage_knowledge)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"new_stage_knowledge=stage_knowledge.groupby(['stage_id'])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"new_stage_knowledge=new_stage_knowledge['knowledge'].apply(lambda x: \"$\".join(list(set(x.str.cat(sep='$').split('$'))))).reset_index()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>stage_id</th>\n",
|
|
" <th>knowledge</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>控制结构实训$语句实训$函数实训$输入输出</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>2317</td>\n",
|
|
" <td>数组实训$指针实训$C&C++线性表实训$结构实训</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>2318</td>\n",
|
|
" <td>文件实训$面向过程编程练习</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>2319</td>\n",
|
|
" <td>C++面向对象构造函数与析构函数$C++面向对象类和对象的创建和使用</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>2320</td>\n",
|
|
" <td>C++面向对象的继承与派生$C++面向对象的多态性与</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" stage_id knowledge\n",
|
|
"0 2316 控制结构实训$语句实训$函数实训$输入输出\n",
|
|
"1 2317 数组实训$指针实训$C&C++线性表实训$结构实训\n",
|
|
"2 2318 文件实训$面向过程编程练习\n",
|
|
"3 2319 C++面向对象构造函数与析构函数$C++面向对象类和对象的创建和使用\n",
|
|
"4 2320 C++面向对象的继承与派生$C++面向对象的多态性与"
|
|
]
|
|
},
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"new_stage_knowledge.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"new_stage_knowledge[\"knowledge\"] = new_stage_knowledge[\"knowledge\"].apply(lambda x: x.split(\"$\"))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>stage_id</th>\n",
|
|
" <th>knowledge</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>[控制结构实训, 语句实训, 函数实训, 输入输出]</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>2317</td>\n",
|
|
" <td>[数组实训, 指针实训, C&C++线性表实训, 结构实训]</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>2318</td>\n",
|
|
" <td>[文件实训, 面向过程编程练习]</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>2319</td>\n",
|
|
" <td>[C++面向对象构造函数与析构函数, C++面向对象类和对象的创建和使用]</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>2320</td>\n",
|
|
" <td>[C++面向对象的继承与派生, C++面向对象的多态性与]</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" stage_id knowledge\n",
|
|
"0 2316 [控制结构实训, 语句实训, 函数实训, 输入输出]\n",
|
|
"1 2317 [数组实训, 指针实训, C&C++线性表实训, 结构实训]\n",
|
|
"2 2318 [文件实训, 面向过程编程练习]\n",
|
|
"3 2319 [C++面向对象构造函数与析构函数, C++面向对象类和对象的创建和使用]\n",
|
|
"4 2320 [C++面向对象的继承与派生, C++面向对象的多态性与]"
|
|
]
|
|
},
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"new_stage_knowledge.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"newskg=new_stage_knowledge.explode(\"knowledge\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"110"
|
|
]
|
|
},
|
|
"execution_count": 13,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"len(newskg)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 14,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"newskg['knowledge_id']=0"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 15,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>stage_id</th>\n",
|
|
" <th>knowledge</th>\n",
|
|
" <th>knowledge_id</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>控制结构实训</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>语句实训</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>函数实训</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>输入输出</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>2317</td>\n",
|
|
" <td>数组实训</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" stage_id knowledge knowledge_id\n",
|
|
"0 2316 控制结构实训 0\n",
|
|
"0 2316 语句实训 0\n",
|
|
"0 2316 函数实训 0\n",
|
|
"0 2316 输入输出 0\n",
|
|
"1 2317 数组实训 0"
|
|
]
|
|
},
|
|
"execution_count": 15,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"newskg.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 16,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"newskg.reset_index(inplace=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"newskg=newskg[['stage_id', 'knowledge', 'knowledge_id']]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 18,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>stage_id</th>\n",
|
|
" <th>knowledge</th>\n",
|
|
" <th>knowledge_id</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>控制结构实训</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>语句实训</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>函数实训</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>输入输出</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>2317</td>\n",
|
|
" <td>数组实训</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" stage_id knowledge knowledge_id\n",
|
|
"0 2316 控制结构实训 0\n",
|
|
"1 2316 语句实训 0\n",
|
|
"2 2316 函数实训 0\n",
|
|
"3 2316 输入输出 0\n",
|
|
"4 2317 数组实训 0"
|
|
]
|
|
},
|
|
"execution_count": 18,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"newskg.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 19,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"/tmp/ipykernel_32602/4216485099.py:2: SettingWithCopyWarning: \n",
|
|
"A value is trying to be set on a copy of a slice from a DataFrame\n",
|
|
"\n",
|
|
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
|
" newskg['knowledge_id'][i]=i\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"for i in range(len(newskg)):\n",
|
|
" newskg['knowledge_id'][i]=i"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 20,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>stage_id</th>\n",
|
|
" <th>knowledge</th>\n",
|
|
" <th>knowledge_id</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>控制结构实训</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>语句实训</td>\n",
|
|
" <td>1</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>函数实训</td>\n",
|
|
" <td>2</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>输入输出</td>\n",
|
|
" <td>3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>2317</td>\n",
|
|
" <td>数组实训</td>\n",
|
|
" <td>4</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" stage_id knowledge knowledge_id\n",
|
|
"0 2316 控制结构实训 0\n",
|
|
"1 2316 语句实训 1\n",
|
|
"2 2316 函数实训 2\n",
|
|
"3 2316 输入输出 3\n",
|
|
"4 2317 数组实训 4"
|
|
]
|
|
},
|
|
"execution_count": 20,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"newskg.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 21,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>stage_id</th>\n",
|
|
" <th>item_id</th>\n",
|
|
" <th>item_type</th>\n",
|
|
" <th>item_name</th>\n",
|
|
" <th>knowledge</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>60</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++基本输入输出</td>\n",
|
|
" <td>输入输出</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>67</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++表达式语句实训</td>\n",
|
|
" <td>语句实训</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>71</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++控制结构实训</td>\n",
|
|
" <td>控制结构实训</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>76</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>函数实训</td>\n",
|
|
" <td>函数实训</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>2317</td>\n",
|
|
" <td>80</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++数组实训</td>\n",
|
|
" <td>数组实训</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" stage_id item_id item_type item_name knowledge\n",
|
|
"0 2316 60 Shixun C&C++基本输入输出 输入输出\n",
|
|
"1 2316 67 Shixun C&C++表达式语句实训 语句实训\n",
|
|
"2 2316 71 Shixun C&C++控制结构实训 控制结构实训\n",
|
|
"3 2316 76 Shixun 函数实训 函数实训\n",
|
|
"4 2317 80 Shixun C&C++数组实训 数组实训"
|
|
]
|
|
},
|
|
"execution_count": 21,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"stage_shixun.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 22,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"stage_shixun=pd.merge(stage_shixun,newskg,on=\"stage_id\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 23,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>stage_id</th>\n",
|
|
" <th>item_id</th>\n",
|
|
" <th>item_type</th>\n",
|
|
" <th>item_name</th>\n",
|
|
" <th>knowledge_x</th>\n",
|
|
" <th>knowledge_y</th>\n",
|
|
" <th>knowledge_id</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>60</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++基本输入输出</td>\n",
|
|
" <td>输入输出</td>\n",
|
|
" <td>控制结构实训</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>60</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++基本输入输出</td>\n",
|
|
" <td>输入输出</td>\n",
|
|
" <td>语句实训</td>\n",
|
|
" <td>1</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>60</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++基本输入输出</td>\n",
|
|
" <td>输入输出</td>\n",
|
|
" <td>函数实训</td>\n",
|
|
" <td>2</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>60</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++基本输入输出</td>\n",
|
|
" <td>输入输出</td>\n",
|
|
" <td>输入输出</td>\n",
|
|
" <td>3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>67</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++表达式语句实训</td>\n",
|
|
" <td>语句实训</td>\n",
|
|
" <td>控制结构实训</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" stage_id item_id item_type item_name knowledge_x knowledge_y \\\n",
|
|
"0 2316 60 Shixun C&C++基本输入输出 输入输出 控制结构实训 \n",
|
|
"1 2316 60 Shixun C&C++基本输入输出 输入输出 语句实训 \n",
|
|
"2 2316 60 Shixun C&C++基本输入输出 输入输出 函数实训 \n",
|
|
"3 2316 60 Shixun C&C++基本输入输出 输入输出 输入输出 \n",
|
|
"4 2316 67 Shixun C&C++表达式语句实训 语句实训 控制结构实训 \n",
|
|
"\n",
|
|
" knowledge_id \n",
|
|
"0 0 \n",
|
|
"1 1 \n",
|
|
"2 2 \n",
|
|
"3 3 \n",
|
|
"4 0 "
|
|
]
|
|
},
|
|
"execution_count": 23,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"stage_shixun.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 24,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"stage_shixun=stage_shixun.query('knowledge_x == knowledge_y')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 25,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"stage_shixun = stage_shixun[['stage_id',\"item_id\",'item_type','item_name','knowledge_x','knowledge_id']]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 26,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>stage_id</th>\n",
|
|
" <th>item_id</th>\n",
|
|
" <th>item_type</th>\n",
|
|
" <th>item_name</th>\n",
|
|
" <th>knowledge_x</th>\n",
|
|
" <th>knowledge_id</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>60</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++基本输入输出</td>\n",
|
|
" <td>输入输出</td>\n",
|
|
" <td>3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>67</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++表达式语句实训</td>\n",
|
|
" <td>语句实训</td>\n",
|
|
" <td>1</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>8</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>71</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++控制结构实训</td>\n",
|
|
" <td>控制结构实训</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>14</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>76</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>函数实训</td>\n",
|
|
" <td>函数实训</td>\n",
|
|
" <td>2</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>16</th>\n",
|
|
" <td>2317</td>\n",
|
|
" <td>80</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++数组实训</td>\n",
|
|
" <td>数组实训</td>\n",
|
|
" <td>4</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>21</th>\n",
|
|
" <td>2317</td>\n",
|
|
" <td>99</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++指针实训</td>\n",
|
|
" <td>指针实训</td>\n",
|
|
" <td>5</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>27</th>\n",
|
|
" <td>2317</td>\n",
|
|
" <td>100</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++结构实训</td>\n",
|
|
" <td>结构实训</td>\n",
|
|
" <td>7</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>30</th>\n",
|
|
" <td>2317</td>\n",
|
|
" <td>101</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++线性表实训</td>\n",
|
|
" <td>C&C++线性表实训</td>\n",
|
|
" <td>6</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>33</th>\n",
|
|
" <td>2319</td>\n",
|
|
" <td>1381</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C++ 面向对象 - 类和对象的创建和使用</td>\n",
|
|
" <td>C++面向对象类和对象的创建和使用</td>\n",
|
|
" <td>11</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>34</th>\n",
|
|
" <td>2319</td>\n",
|
|
" <td>1423</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C++ 面向对象 - 构造函数与析构函数</td>\n",
|
|
" <td>C++面向对象构造函数与析构函数</td>\n",
|
|
" <td>10</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>36</th>\n",
|
|
" <td>2320</td>\n",
|
|
" <td>1454</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C++ 面向对象 - 类的继承与派生</td>\n",
|
|
" <td>C++面向对象的继承与派生</td>\n",
|
|
" <td>12</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>39</th>\n",
|
|
" <td>2320</td>\n",
|
|
" <td>1481</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C++ 面向对象 - 类的多态性与虚函数</td>\n",
|
|
" <td>C++面向对象的多态性与</td>\n",
|
|
" <td>13</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>43</th>\n",
|
|
" <td>2428</td>\n",
|
|
" <td>8431</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>软件危机</td>\n",
|
|
" <td>软件危机</td>\n",
|
|
" <td>19</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>49</th>\n",
|
|
" <td>2428</td>\n",
|
|
" <td>8464</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>走进软件工程</td>\n",
|
|
" <td>走进软件工程</td>\n",
|
|
" <td>20</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>52</th>\n",
|
|
" <td>2428</td>\n",
|
|
" <td>8561</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>SWEBOK 与软件过程</td>\n",
|
|
" <td>SWEBOK软件过程</td>\n",
|
|
" <td>18</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>55</th>\n",
|
|
" <td>2428</td>\n",
|
|
" <td>8562</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>主要的软件过程模型</td>\n",
|
|
" <td>的软件过程模型</td>\n",
|
|
" <td>16</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>61</th>\n",
|
|
" <td>2428</td>\n",
|
|
" <td>8563</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>敏捷软件过程</td>\n",
|
|
" <td>软件过程</td>\n",
|
|
" <td>17</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>65</th>\n",
|
|
" <td>2429</td>\n",
|
|
" <td>8710</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>可行性分析的任务及报告</td>\n",
|
|
" <td>可行性分析的任务及报告</td>\n",
|
|
" <td>21</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>66</th>\n",
|
|
" <td>2544</td>\n",
|
|
" <td>8426</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>软件设计</td>\n",
|
|
" <td>软件设计</td>\n",
|
|
" <td>22</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>67</th>\n",
|
|
" <td>2545</td>\n",
|
|
" <td>8699</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>结构化分析与设计</td>\n",
|
|
" <td>结构化分析与设计</td>\n",
|
|
" <td>23</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" stage_id item_id item_type item_name knowledge_x \\\n",
|
|
"3 2316 60 Shixun C&C++基本输入输出 输入输出 \n",
|
|
"5 2316 67 Shixun C&C++表达式语句实训 语句实训 \n",
|
|
"8 2316 71 Shixun C&C++控制结构实训 控制结构实训 \n",
|
|
"14 2316 76 Shixun 函数实训 函数实训 \n",
|
|
"16 2317 80 Shixun C&C++数组实训 数组实训 \n",
|
|
"21 2317 99 Shixun C&C++指针实训 指针实训 \n",
|
|
"27 2317 100 Shixun C&C++结构实训 结构实训 \n",
|
|
"30 2317 101 Shixun C&C++线性表实训 C&C++线性表实训 \n",
|
|
"33 2319 1381 Shixun C++ 面向对象 - 类和对象的创建和使用 C++面向对象类和对象的创建和使用 \n",
|
|
"34 2319 1423 Shixun C++ 面向对象 - 构造函数与析构函数 C++面向对象构造函数与析构函数 \n",
|
|
"36 2320 1454 Shixun C++ 面向对象 - 类的继承与派生 C++面向对象的继承与派生 \n",
|
|
"39 2320 1481 Shixun C++ 面向对象 - 类的多态性与虚函数 C++面向对象的多态性与 \n",
|
|
"43 2428 8431 Shixun 软件危机 软件危机 \n",
|
|
"49 2428 8464 Shixun 走进软件工程 走进软件工程 \n",
|
|
"52 2428 8561 Shixun SWEBOK 与软件过程 SWEBOK软件过程 \n",
|
|
"55 2428 8562 Shixun 主要的软件过程模型 的软件过程模型 \n",
|
|
"61 2428 8563 Shixun 敏捷软件过程 软件过程 \n",
|
|
"65 2429 8710 Shixun 可行性分析的任务及报告 可行性分析的任务及报告 \n",
|
|
"66 2544 8426 Shixun 软件设计 软件设计 \n",
|
|
"67 2545 8699 Shixun 结构化分析与设计 结构化分析与设计 \n",
|
|
"\n",
|
|
" knowledge_id \n",
|
|
"3 3 \n",
|
|
"5 1 \n",
|
|
"8 0 \n",
|
|
"14 2 \n",
|
|
"16 4 \n",
|
|
"21 5 \n",
|
|
"27 7 \n",
|
|
"30 6 \n",
|
|
"33 11 \n",
|
|
"34 10 \n",
|
|
"36 12 \n",
|
|
"39 13 \n",
|
|
"43 19 \n",
|
|
"49 20 \n",
|
|
"52 18 \n",
|
|
"55 16 \n",
|
|
"61 17 \n",
|
|
"65 21 \n",
|
|
"66 22 \n",
|
|
"67 23 "
|
|
]
|
|
},
|
|
"execution_count": 26,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"stage_shixun.head(20)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 27,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"stage_shixun.rename(columns={'knowledge_x':'knowledge'},inplace=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 28,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>stage_id</th>\n",
|
|
" <th>item_id</th>\n",
|
|
" <th>item_type</th>\n",
|
|
" <th>item_name</th>\n",
|
|
" <th>knowledge</th>\n",
|
|
" <th>knowledge_id</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>60</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++基本输入输出</td>\n",
|
|
" <td>输入输出</td>\n",
|
|
" <td>3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>67</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++表达式语句实训</td>\n",
|
|
" <td>语句实训</td>\n",
|
|
" <td>1</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>8</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>71</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++控制结构实训</td>\n",
|
|
" <td>控制结构实训</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>14</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>76</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>函数实训</td>\n",
|
|
" <td>函数实训</td>\n",
|
|
" <td>2</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>16</th>\n",
|
|
" <td>2317</td>\n",
|
|
" <td>80</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++数组实训</td>\n",
|
|
" <td>数组实训</td>\n",
|
|
" <td>4</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" stage_id item_id item_type item_name knowledge knowledge_id\n",
|
|
"3 2316 60 Shixun C&C++基本输入输出 输入输出 3\n",
|
|
"5 2316 67 Shixun C&C++表达式语句实训 语句实训 1\n",
|
|
"8 2316 71 Shixun C&C++控制结构实训 控制结构实训 0\n",
|
|
"14 2316 76 Shixun 函数实训 函数实训 2\n",
|
|
"16 2317 80 Shixun C&C++数组实训 数组实训 4"
|
|
]
|
|
},
|
|
"execution_count": 28,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"stage_shixun.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 29,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"kg_shixun = stage_shixun[[\"knowledge_id\",\"item_id\"]]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 30,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"kg_shixun=kg_shixun.drop_duplicates()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 31,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"kg_shixun.rename(columns={'item_id':'shixun_id'},inplace=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 32,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"kg_shixun['relation']='知识点实训'"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 33,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"kg_shixun.to_csv('knowledge_forest_data/sample/relation/kg_shixun.csv',sep='\\t',index=False)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 34,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>stage_id</th>\n",
|
|
" <th>item_id</th>\n",
|
|
" <th>item_type</th>\n",
|
|
" <th>item_name</th>\n",
|
|
" <th>knowledge</th>\n",
|
|
" <th>knowledge_id</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>60</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++基本输入输出</td>\n",
|
|
" <td>输入输出</td>\n",
|
|
" <td>3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>67</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++表达式语句实训</td>\n",
|
|
" <td>语句实训</td>\n",
|
|
" <td>1</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>8</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>71</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++控制结构实训</td>\n",
|
|
" <td>控制结构实训</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>14</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>76</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>函数实训</td>\n",
|
|
" <td>函数实训</td>\n",
|
|
" <td>2</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>16</th>\n",
|
|
" <td>2317</td>\n",
|
|
" <td>80</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++数组实训</td>\n",
|
|
" <td>数组实训</td>\n",
|
|
" <td>4</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" stage_id item_id item_type item_name knowledge knowledge_id\n",
|
|
"3 2316 60 Shixun C&C++基本输入输出 输入输出 3\n",
|
|
"5 2316 67 Shixun C&C++表达式语句实训 语句实训 1\n",
|
|
"8 2316 71 Shixun C&C++控制结构实训 控制结构实训 0\n",
|
|
"14 2316 76 Shixun 函数实训 函数实训 2\n",
|
|
"16 2317 80 Shixun C&C++数组实训 数组实训 4"
|
|
]
|
|
},
|
|
"execution_count": 34,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"stage_shixun.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 35,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>stage_id</th>\n",
|
|
" <th>item_id</th>\n",
|
|
" <th>item_type</th>\n",
|
|
" <th>item_name</th>\n",
|
|
" <th>knowledge</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>13581</td>\n",
|
|
" <td>2245589</td>\n",
|
|
" <td>Attachment</td>\n",
|
|
" <td>数据结构与算法_02线性表.pptx</td>\n",
|
|
" <td>数据结构与算法线性表</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>13582</td>\n",
|
|
" <td>2245627</td>\n",
|
|
" <td>Attachment</td>\n",
|
|
" <td>数据结构与算法_03栈和队列.pptx</td>\n",
|
|
" <td>数据结构与算法和队列</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>13583</td>\n",
|
|
" <td>2245691</td>\n",
|
|
" <td>Attachment</td>\n",
|
|
" <td>数据结构与算法_04树与二叉树.pptx</td>\n",
|
|
" <td>数据结构与算法树二叉树</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>13583</td>\n",
|
|
" <td>2245690</td>\n",
|
|
" <td>Attachment</td>\n",
|
|
" <td>数据结构与算法_05高级字典结构.pptx</td>\n",
|
|
" <td>数据结构与算法字典结构</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>13586</td>\n",
|
|
" <td>2245777</td>\n",
|
|
" <td>Attachment</td>\n",
|
|
" <td>数据结构与算法_08排序.pptx</td>\n",
|
|
" <td>数据结构与算法排序</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" stage_id item_id item_type item_name knowledge\n",
|
|
"0 13581 2245589 Attachment 数据结构与算法_02线性表.pptx 数据结构与算法线性表\n",
|
|
"1 13582 2245627 Attachment 数据结构与算法_03栈和队列.pptx 数据结构与算法和队列\n",
|
|
"2 13583 2245691 Attachment 数据结构与算法_04树与二叉树.pptx 数据结构与算法树二叉树\n",
|
|
"3 13583 2245690 Attachment 数据结构与算法_05高级字典结构.pptx 数据结构与算法字典结构\n",
|
|
"4 13586 2245777 Attachment 数据结构与算法_08排序.pptx 数据结构与算法排序"
|
|
]
|
|
},
|
|
"execution_count": 35,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"stage_att.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 36,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"stage_kg=stage_shixun"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 37,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>stage_id</th>\n",
|
|
" <th>item_id</th>\n",
|
|
" <th>item_type</th>\n",
|
|
" <th>item_name</th>\n",
|
|
" <th>knowledge</th>\n",
|
|
" <th>knowledge_id</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>525</th>\n",
|
|
" <td>13586</td>\n",
|
|
" <td>54462</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>选择排序</td>\n",
|
|
" <td>选择排序</td>\n",
|
|
" <td>104</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>530</th>\n",
|
|
" <td>13586</td>\n",
|
|
" <td>36229</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>查找</td>\n",
|
|
" <td>查找</td>\n",
|
|
" <td>106</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>531</th>\n",
|
|
" <td>13587</td>\n",
|
|
" <td>16175</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>图——课上课后练</td>\n",
|
|
" <td>图课上课后练</td>\n",
|
|
" <td>107</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>532</th>\n",
|
|
" <td>13588</td>\n",
|
|
" <td>23187</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>散列——实验及提升训练</td>\n",
|
|
" <td>散列实验及提升训练</td>\n",
|
|
" <td>108</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>535</th>\n",
|
|
" <td>13588</td>\n",
|
|
" <td>27244</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>集合——课上练</td>\n",
|
|
" <td>集合课练</td>\n",
|
|
" <td>109</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" stage_id item_id item_type item_name knowledge knowledge_id\n",
|
|
"525 13586 54462 Shixun 选择排序 选择排序 104\n",
|
|
"530 13586 36229 Shixun 查找 查找 106\n",
|
|
"531 13587 16175 Shixun 图——课上课后练 图课上课后练 107\n",
|
|
"532 13588 23187 Shixun 散列——实验及提升训练 散列实验及提升训练 108\n",
|
|
"535 13588 27244 Shixun 集合——课上练 集合课练 109"
|
|
]
|
|
},
|
|
"execution_count": 37,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"stage_kg.tail()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 38,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>stage_id</th>\n",
|
|
" <th>item_id</th>\n",
|
|
" <th>item_type</th>\n",
|
|
" <th>item_name</th>\n",
|
|
" <th>knowledge</th>\n",
|
|
" <th>knowledge_id</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>525</th>\n",
|
|
" <td>13586</td>\n",
|
|
" <td>54462</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>选择排序</td>\n",
|
|
" <td>选择排序</td>\n",
|
|
" <td>104</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>530</th>\n",
|
|
" <td>13586</td>\n",
|
|
" <td>36229</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>查找</td>\n",
|
|
" <td>查找</td>\n",
|
|
" <td>106</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>531</th>\n",
|
|
" <td>13587</td>\n",
|
|
" <td>16175</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>图——课上课后练</td>\n",
|
|
" <td>图课上课后练</td>\n",
|
|
" <td>107</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>532</th>\n",
|
|
" <td>13588</td>\n",
|
|
" <td>23187</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>散列——实验及提升训练</td>\n",
|
|
" <td>散列实验及提升训练</td>\n",
|
|
" <td>108</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>535</th>\n",
|
|
" <td>13588</td>\n",
|
|
" <td>27244</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>集合——课上练</td>\n",
|
|
" <td>集合课练</td>\n",
|
|
" <td>109</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" stage_id item_id item_type item_name knowledge knowledge_id\n",
|
|
"525 13586 54462 Shixun 选择排序 选择排序 104\n",
|
|
"530 13586 36229 Shixun 查找 查找 106\n",
|
|
"531 13587 16175 Shixun 图——课上课后练 图课上课后练 107\n",
|
|
"532 13588 23187 Shixun 散列——实验及提升训练 散列实验及提升训练 108\n",
|
|
"535 13588 27244 Shixun 集合——课上练 集合课练 109"
|
|
]
|
|
},
|
|
"execution_count": 38,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"stage_kg.tail()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 39,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>stage_id</th>\n",
|
|
" <th>item_id</th>\n",
|
|
" <th>item_type</th>\n",
|
|
" <th>item_name</th>\n",
|
|
" <th>knowledge</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>13581</td>\n",
|
|
" <td>2245589</td>\n",
|
|
" <td>Attachment</td>\n",
|
|
" <td>数据结构与算法_02线性表.pptx</td>\n",
|
|
" <td>数据结构与算法线性表</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>13582</td>\n",
|
|
" <td>2245627</td>\n",
|
|
" <td>Attachment</td>\n",
|
|
" <td>数据结构与算法_03栈和队列.pptx</td>\n",
|
|
" <td>数据结构与算法和队列</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>13583</td>\n",
|
|
" <td>2245691</td>\n",
|
|
" <td>Attachment</td>\n",
|
|
" <td>数据结构与算法_04树与二叉树.pptx</td>\n",
|
|
" <td>数据结构与算法树二叉树</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>13583</td>\n",
|
|
" <td>2245690</td>\n",
|
|
" <td>Attachment</td>\n",
|
|
" <td>数据结构与算法_05高级字典结构.pptx</td>\n",
|
|
" <td>数据结构与算法字典结构</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>13586</td>\n",
|
|
" <td>2245777</td>\n",
|
|
" <td>Attachment</td>\n",
|
|
" <td>数据结构与算法_08排序.pptx</td>\n",
|
|
" <td>数据结构与算法排序</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" stage_id item_id item_type item_name knowledge\n",
|
|
"0 13581 2245589 Attachment 数据结构与算法_02线性表.pptx 数据结构与算法线性表\n",
|
|
"1 13582 2245627 Attachment 数据结构与算法_03栈和队列.pptx 数据结构与算法和队列\n",
|
|
"2 13583 2245691 Attachment 数据结构与算法_04树与二叉树.pptx 数据结构与算法树二叉树\n",
|
|
"3 13583 2245690 Attachment 数据结构与算法_05高级字典结构.pptx 数据结构与算法字典结构\n",
|
|
"4 13586 2245777 Attachment 数据结构与算法_08排序.pptx 数据结构与算法排序"
|
|
]
|
|
},
|
|
"execution_count": 39,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"stage_att.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 40,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"stage_att['knowledge_id']=0"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 41,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>stage_id</th>\n",
|
|
" <th>item_id</th>\n",
|
|
" <th>item_type</th>\n",
|
|
" <th>item_name</th>\n",
|
|
" <th>knowledge</th>\n",
|
|
" <th>knowledge_id</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>13581</td>\n",
|
|
" <td>2245589</td>\n",
|
|
" <td>Attachment</td>\n",
|
|
" <td>数据结构与算法_02线性表.pptx</td>\n",
|
|
" <td>数据结构与算法线性表</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>13582</td>\n",
|
|
" <td>2245627</td>\n",
|
|
" <td>Attachment</td>\n",
|
|
" <td>数据结构与算法_03栈和队列.pptx</td>\n",
|
|
" <td>数据结构与算法和队列</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>13583</td>\n",
|
|
" <td>2245691</td>\n",
|
|
" <td>Attachment</td>\n",
|
|
" <td>数据结构与算法_04树与二叉树.pptx</td>\n",
|
|
" <td>数据结构与算法树二叉树</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>13583</td>\n",
|
|
" <td>2245690</td>\n",
|
|
" <td>Attachment</td>\n",
|
|
" <td>数据结构与算法_05高级字典结构.pptx</td>\n",
|
|
" <td>数据结构与算法字典结构</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>13586</td>\n",
|
|
" <td>2245777</td>\n",
|
|
" <td>Attachment</td>\n",
|
|
" <td>数据结构与算法_08排序.pptx</td>\n",
|
|
" <td>数据结构与算法排序</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" stage_id item_id item_type item_name knowledge \\\n",
|
|
"0 13581 2245589 Attachment 数据结构与算法_02线性表.pptx 数据结构与算法线性表 \n",
|
|
"1 13582 2245627 Attachment 数据结构与算法_03栈和队列.pptx 数据结构与算法和队列 \n",
|
|
"2 13583 2245691 Attachment 数据结构与算法_04树与二叉树.pptx 数据结构与算法树二叉树 \n",
|
|
"3 13583 2245690 Attachment 数据结构与算法_05高级字典结构.pptx 数据结构与算法字典结构 \n",
|
|
"4 13586 2245777 Attachment 数据结构与算法_08排序.pptx 数据结构与算法排序 \n",
|
|
"\n",
|
|
" knowledge_id \n",
|
|
"0 0 \n",
|
|
"1 0 \n",
|
|
"2 0 \n",
|
|
"3 0 \n",
|
|
"4 0 "
|
|
]
|
|
},
|
|
"execution_count": 41,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"stage_att.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 42,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"/tmp/ipykernel_32602/3546850827.py:9: SettingWithCopyWarning: \n",
|
|
"A value is trying to be set on a copy of a slice from a DataFrame\n",
|
|
"\n",
|
|
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
|
" stage_att['knowledge_id'][i] = len(stage_kg)+1\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"for i in range(len(stage_att)):\n",
|
|
" skg=stage_kg[stage_kg[\"stage_id\"]==stage_att['stage_id'][i]]\n",
|
|
" skg = skg[skg['knowledge'].isin([stage_att['knowledge'][i]])]\n",
|
|
" try:\n",
|
|
" if len(skg) > 0:\n",
|
|
" stage_att['knowledge_id'][i] = int(skg['knowledge_id'])\n",
|
|
" stage_kg=stage_kg.append(stage_att[i:i+1],ignore_index=True)\n",
|
|
" else:\n",
|
|
" stage_att['knowledge_id'][i] = len(stage_kg)+1\n",
|
|
" stage_kg=stage_kg.append(stage_att[i:i+1],ignore_index=True)\n",
|
|
" except:\n",
|
|
" stage_att['knowledge_id'][i] = int(skg[:1]['knowledge_id'])\n",
|
|
" stage_kg=stage_kg.append(stage_att[i:i+1],ignore_index=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 43,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>stage_id</th>\n",
|
|
" <th>item_id</th>\n",
|
|
" <th>item_type</th>\n",
|
|
" <th>item_name</th>\n",
|
|
" <th>knowledge</th>\n",
|
|
" <th>knowledge_id</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>13581</td>\n",
|
|
" <td>2245589</td>\n",
|
|
" <td>Attachment</td>\n",
|
|
" <td>数据结构与算法_02线性表.pptx</td>\n",
|
|
" <td>数据结构与算法线性表</td>\n",
|
|
" <td>113</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>13582</td>\n",
|
|
" <td>2245627</td>\n",
|
|
" <td>Attachment</td>\n",
|
|
" <td>数据结构与算法_03栈和队列.pptx</td>\n",
|
|
" <td>数据结构与算法和队列</td>\n",
|
|
" <td>114</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>13583</td>\n",
|
|
" <td>2245691</td>\n",
|
|
" <td>Attachment</td>\n",
|
|
" <td>数据结构与算法_04树与二叉树.pptx</td>\n",
|
|
" <td>数据结构与算法树二叉树</td>\n",
|
|
" <td>115</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>13583</td>\n",
|
|
" <td>2245690</td>\n",
|
|
" <td>Attachment</td>\n",
|
|
" <td>数据结构与算法_05高级字典结构.pptx</td>\n",
|
|
" <td>数据结构与算法字典结构</td>\n",
|
|
" <td>116</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>13586</td>\n",
|
|
" <td>2245777</td>\n",
|
|
" <td>Attachment</td>\n",
|
|
" <td>数据结构与算法_08排序.pptx</td>\n",
|
|
" <td>数据结构与算法排序</td>\n",
|
|
" <td>117</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" stage_id item_id item_type item_name knowledge \\\n",
|
|
"0 13581 2245589 Attachment 数据结构与算法_02线性表.pptx 数据结构与算法线性表 \n",
|
|
"1 13582 2245627 Attachment 数据结构与算法_03栈和队列.pptx 数据结构与算法和队列 \n",
|
|
"2 13583 2245691 Attachment 数据结构与算法_04树与二叉树.pptx 数据结构与算法树二叉树 \n",
|
|
"3 13583 2245690 Attachment 数据结构与算法_05高级字典结构.pptx 数据结构与算法字典结构 \n",
|
|
"4 13586 2245777 Attachment 数据结构与算法_08排序.pptx 数据结构与算法排序 \n",
|
|
"\n",
|
|
" knowledge_id \n",
|
|
"0 113 \n",
|
|
"1 114 \n",
|
|
"2 115 \n",
|
|
"3 116 \n",
|
|
"4 117 "
|
|
]
|
|
},
|
|
"execution_count": 43,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"stage_att.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 44,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"kg_att=stage_att[[\"knowledge_id\",\"item_id\"]]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 45,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"kg_att=kg_att.drop_duplicates()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 46,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"kg_att.rename(columns={'item_id':'attachment_id'},inplace=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 47,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"kg_att['relation']='知识点课件'"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 48,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>knowledge_id</th>\n",
|
|
" <th>attachment_id</th>\n",
|
|
" <th>relation</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>113</td>\n",
|
|
" <td>2245589</td>\n",
|
|
" <td>知识点课件</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>114</td>\n",
|
|
" <td>2245627</td>\n",
|
|
" <td>知识点课件</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>115</td>\n",
|
|
" <td>2245691</td>\n",
|
|
" <td>知识点课件</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>116</td>\n",
|
|
" <td>2245690</td>\n",
|
|
" <td>知识点课件</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>117</td>\n",
|
|
" <td>2245777</td>\n",
|
|
" <td>知识点课件</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" knowledge_id attachment_id relation\n",
|
|
"0 113 2245589 知识点课件\n",
|
|
"1 114 2245627 知识点课件\n",
|
|
"2 115 2245691 知识点课件\n",
|
|
"3 116 2245690 知识点课件\n",
|
|
"4 117 2245777 知识点课件"
|
|
]
|
|
},
|
|
"execution_count": 48,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"kg_att.head() "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 49,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"kg_att.to_csv('knowledge_forest_data/sample/relation/kg_att.csv',sep='\\t',index=False)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 50,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>stage_id</th>\n",
|
|
" <th>item_id</th>\n",
|
|
" <th>item_type</th>\n",
|
|
" <th>item_name</th>\n",
|
|
" <th>knowledge</th>\n",
|
|
" <th>knowledge_id</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>114</th>\n",
|
|
" <td>13583</td>\n",
|
|
" <td>2245691</td>\n",
|
|
" <td>Attachment</td>\n",
|
|
" <td>数据结构与算法_04树与二叉树.pptx</td>\n",
|
|
" <td>数据结构与算法树二叉树</td>\n",
|
|
" <td>115</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>115</th>\n",
|
|
" <td>13583</td>\n",
|
|
" <td>2245690</td>\n",
|
|
" <td>Attachment</td>\n",
|
|
" <td>数据结构与算法_05高级字典结构.pptx</td>\n",
|
|
" <td>数据结构与算法字典结构</td>\n",
|
|
" <td>116</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>116</th>\n",
|
|
" <td>13586</td>\n",
|
|
" <td>2245777</td>\n",
|
|
" <td>Attachment</td>\n",
|
|
" <td>数据结构与算法_08排序.pptx</td>\n",
|
|
" <td>数据结构与算法排序</td>\n",
|
|
" <td>117</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>117</th>\n",
|
|
" <td>13587</td>\n",
|
|
" <td>2245802</td>\n",
|
|
" <td>Attachment</td>\n",
|
|
" <td>数据结构与算法_06图.pptx</td>\n",
|
|
" <td>数据结构与算法图</td>\n",
|
|
" <td>118</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>118</th>\n",
|
|
" <td>13588</td>\n",
|
|
" <td>2245824</td>\n",
|
|
" <td>Attachment</td>\n",
|
|
" <td>数据结构与算法_07集合和字典.pptx</td>\n",
|
|
" <td>数据结构与算法集合和字典</td>\n",
|
|
" <td>119</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" stage_id item_id item_type item_name knowledge \\\n",
|
|
"114 13583 2245691 Attachment 数据结构与算法_04树与二叉树.pptx 数据结构与算法树二叉树 \n",
|
|
"115 13583 2245690 Attachment 数据结构与算法_05高级字典结构.pptx 数据结构与算法字典结构 \n",
|
|
"116 13586 2245777 Attachment 数据结构与算法_08排序.pptx 数据结构与算法排序 \n",
|
|
"117 13587 2245802 Attachment 数据结构与算法_06图.pptx 数据结构与算法图 \n",
|
|
"118 13588 2245824 Attachment 数据结构与算法_07集合和字典.pptx 数据结构与算法集合和字典 \n",
|
|
"\n",
|
|
" knowledge_id \n",
|
|
"114 115 \n",
|
|
"115 116 \n",
|
|
"116 117 \n",
|
|
"117 118 \n",
|
|
"118 119 "
|
|
]
|
|
},
|
|
"execution_count": 50,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"stage_kg.tail()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 51,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>stage_id</th>\n",
|
|
" <th>item_id</th>\n",
|
|
" <th>item_type</th>\n",
|
|
" <th>item_name</th>\n",
|
|
" <th>knowledge</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>2428</td>\n",
|
|
" <td>1015</td>\n",
|
|
" <td>VideoItem</td>\n",
|
|
" <td>软件工程引言和软件的概念</td>\n",
|
|
" <td>软件工程引言和软件的概念</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>2428</td>\n",
|
|
" <td>1016</td>\n",
|
|
" <td>VideoItem</td>\n",
|
|
" <td>软件的基本概念</td>\n",
|
|
" <td>软件的概念</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>2428</td>\n",
|
|
" <td>1017</td>\n",
|
|
" <td>VideoItem</td>\n",
|
|
" <td>软件过程</td>\n",
|
|
" <td>软件过程</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>2428</td>\n",
|
|
" <td>1018</td>\n",
|
|
" <td>VideoItem</td>\n",
|
|
" <td>RUP简介</td>\n",
|
|
" <td>RUP简介</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>2428</td>\n",
|
|
" <td>1019</td>\n",
|
|
" <td>VideoItem</td>\n",
|
|
" <td>敏捷的过程</td>\n",
|
|
" <td>的过程</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" stage_id item_id item_type item_name knowledge\n",
|
|
"0 2428 1015 VideoItem 软件工程引言和软件的概念 软件工程引言和软件的概念\n",
|
|
"1 2428 1016 VideoItem 软件的基本概念 软件的概念\n",
|
|
"2 2428 1017 VideoItem 软件过程 软件过程\n",
|
|
"3 2428 1018 VideoItem RUP简介 RUP简介\n",
|
|
"4 2428 1019 VideoItem 敏捷的过程 的过程"
|
|
]
|
|
},
|
|
"execution_count": 51,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"stage_video.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 52,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"stage_video['knowledge_id']=0"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 53,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>stage_id</th>\n",
|
|
" <th>item_id</th>\n",
|
|
" <th>item_type</th>\n",
|
|
" <th>item_name</th>\n",
|
|
" <th>knowledge</th>\n",
|
|
" <th>knowledge_id</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>2428</td>\n",
|
|
" <td>1015</td>\n",
|
|
" <td>VideoItem</td>\n",
|
|
" <td>软件工程引言和软件的概念</td>\n",
|
|
" <td>软件工程引言和软件的概念</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>2428</td>\n",
|
|
" <td>1016</td>\n",
|
|
" <td>VideoItem</td>\n",
|
|
" <td>软件的基本概念</td>\n",
|
|
" <td>软件的概念</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>2428</td>\n",
|
|
" <td>1017</td>\n",
|
|
" <td>VideoItem</td>\n",
|
|
" <td>软件过程</td>\n",
|
|
" <td>软件过程</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>2428</td>\n",
|
|
" <td>1018</td>\n",
|
|
" <td>VideoItem</td>\n",
|
|
" <td>RUP简介</td>\n",
|
|
" <td>RUP简介</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>2428</td>\n",
|
|
" <td>1019</td>\n",
|
|
" <td>VideoItem</td>\n",
|
|
" <td>敏捷的过程</td>\n",
|
|
" <td>的过程</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" stage_id item_id item_type item_name knowledge knowledge_id\n",
|
|
"0 2428 1015 VideoItem 软件工程引言和软件的概念 软件工程引言和软件的概念 0\n",
|
|
"1 2428 1016 VideoItem 软件的基本概念 软件的概念 0\n",
|
|
"2 2428 1017 VideoItem 软件过程 软件过程 0\n",
|
|
"3 2428 1018 VideoItem RUP简介 RUP简介 0\n",
|
|
"4 2428 1019 VideoItem 敏捷的过程 的过程 0"
|
|
]
|
|
},
|
|
"execution_count": 53,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"stage_video.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 54,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"/tmp/ipykernel_32602/3125594964.py:9: SettingWithCopyWarning: \n",
|
|
"A value is trying to be set on a copy of a slice from a DataFrame\n",
|
|
"\n",
|
|
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
|
" stage_video['knowledge_id'][i] = len(stage_kg)+1\n",
|
|
"/tmp/ipykernel_32602/3125594964.py:6: SettingWithCopyWarning: \n",
|
|
"A value is trying to be set on a copy of a slice from a DataFrame\n",
|
|
"\n",
|
|
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
|
" stage_video['knowledge_id'][i] = int(skg['knowledge_id'])\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"for i in range(len(stage_video)):\n",
|
|
" skg = stage_kg[stage_kg[\"stage_id\"]==stage_video['stage_id'][i]]\n",
|
|
" skg = skg[skg['knowledge'].isin([stage_video['knowledge'][i]])]\n",
|
|
" try:\n",
|
|
" if len(skg) > 0:\n",
|
|
" stage_video['knowledge_id'][i] = int(skg['knowledge_id'])\n",
|
|
" stage_kg=stage_kg.append(stage_video[i:i+1],ignore_index=True)\n",
|
|
" else:\n",
|
|
" stage_video['knowledge_id'][i] = len(stage_kg)+1\n",
|
|
" stage_kg=stage_kg.append(stage_video[i:i+1],ignore_index=True)\n",
|
|
" except:\n",
|
|
" stage_video['knowledge_id'][i] = int(skg[:1]['knowledge_id'])\n",
|
|
" stage_kg=stage_kg.append(stage_video[i:i+1],ignore_index=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 55,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>stage_id</th>\n",
|
|
" <th>item_id</th>\n",
|
|
" <th>item_type</th>\n",
|
|
" <th>item_name</th>\n",
|
|
" <th>knowledge</th>\n",
|
|
" <th>knowledge_id</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>2428</td>\n",
|
|
" <td>1015</td>\n",
|
|
" <td>VideoItem</td>\n",
|
|
" <td>软件工程引言和软件的概念</td>\n",
|
|
" <td>软件工程引言和软件的概念</td>\n",
|
|
" <td>120</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>2428</td>\n",
|
|
" <td>1016</td>\n",
|
|
" <td>VideoItem</td>\n",
|
|
" <td>软件的基本概念</td>\n",
|
|
" <td>软件的概念</td>\n",
|
|
" <td>121</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>2428</td>\n",
|
|
" <td>1017</td>\n",
|
|
" <td>VideoItem</td>\n",
|
|
" <td>软件过程</td>\n",
|
|
" <td>软件过程</td>\n",
|
|
" <td>17</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>2428</td>\n",
|
|
" <td>1018</td>\n",
|
|
" <td>VideoItem</td>\n",
|
|
" <td>RUP简介</td>\n",
|
|
" <td>RUP简介</td>\n",
|
|
" <td>123</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>2428</td>\n",
|
|
" <td>1019</td>\n",
|
|
" <td>VideoItem</td>\n",
|
|
" <td>敏捷的过程</td>\n",
|
|
" <td>的过程</td>\n",
|
|
" <td>124</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" stage_id item_id item_type item_name knowledge knowledge_id\n",
|
|
"0 2428 1015 VideoItem 软件工程引言和软件的概念 软件工程引言和软件的概念 120\n",
|
|
"1 2428 1016 VideoItem 软件的基本概念 软件的概念 121\n",
|
|
"2 2428 1017 VideoItem 软件过程 软件过程 17\n",
|
|
"3 2428 1018 VideoItem RUP简介 RUP简介 123\n",
|
|
"4 2428 1019 VideoItem 敏捷的过程 的过程 124"
|
|
]
|
|
},
|
|
"execution_count": 55,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"stage_video.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 56,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"kg_video=stage_video[[\"knowledge_id\",\"item_id\"]]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 57,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"kg_video = kg_video.drop_duplicates()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 58,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"kg_video.rename(columns={'item_id':'video_item_id'},inplace=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 59,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"kg_video['relation']='知识点视频'"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 60,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>knowledge_id</th>\n",
|
|
" <th>video_item_id</th>\n",
|
|
" <th>relation</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>120</td>\n",
|
|
" <td>1015</td>\n",
|
|
" <td>知识点视频</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>121</td>\n",
|
|
" <td>1016</td>\n",
|
|
" <td>知识点视频</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>17</td>\n",
|
|
" <td>1017</td>\n",
|
|
" <td>知识点视频</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>123</td>\n",
|
|
" <td>1018</td>\n",
|
|
" <td>知识点视频</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>124</td>\n",
|
|
" <td>1019</td>\n",
|
|
" <td>知识点视频</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" knowledge_id video_item_id relation\n",
|
|
"0 120 1015 知识点视频\n",
|
|
"1 121 1016 知识点视频\n",
|
|
"2 17 1017 知识点视频\n",
|
|
"3 123 1018 知识点视频\n",
|
|
"4 124 1019 知识点视频"
|
|
]
|
|
},
|
|
"execution_count": 60,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"kg_video.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 61,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"kg_video.to_csv('knowledge_forest_data/sample/relation/kg_video.csv',sep='\\t',index=False)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 62,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>stage_id</th>\n",
|
|
" <th>item_id</th>\n",
|
|
" <th>item_type</th>\n",
|
|
" <th>item_name</th>\n",
|
|
" <th>knowledge</th>\n",
|
|
" <th>knowledge_id</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>60</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++基本输入输出</td>\n",
|
|
" <td>输入输出</td>\n",
|
|
" <td>3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>67</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++表达式语句实训</td>\n",
|
|
" <td>语句实训</td>\n",
|
|
" <td>1</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>71</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++控制结构实训</td>\n",
|
|
" <td>控制结构实训</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>2316</td>\n",
|
|
" <td>76</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>函数实训</td>\n",
|
|
" <td>函数实训</td>\n",
|
|
" <td>2</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>2317</td>\n",
|
|
" <td>80</td>\n",
|
|
" <td>Shixun</td>\n",
|
|
" <td>C&C++数组实训</td>\n",
|
|
" <td>数组实训</td>\n",
|
|
" <td>4</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" stage_id item_id item_type item_name knowledge knowledge_id\n",
|
|
"0 2316 60 Shixun C&C++基本输入输出 输入输出 3\n",
|
|
"1 2316 67 Shixun C&C++表达式语句实训 语句实训 1\n",
|
|
"2 2316 71 Shixun C&C++控制结构实训 控制结构实训 0\n",
|
|
"3 2316 76 Shixun 函数实训 函数实训 2\n",
|
|
"4 2317 80 Shixun C&C++数组实训 数组实训 4"
|
|
]
|
|
},
|
|
"execution_count": 62,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"stage_kg.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 63,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"knowledge=stage_kg[['knowledge_id','knowledge']]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 64,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"knowledge = knowledge.drop_duplicates()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 65,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"knowledge.to_csv('knowledge_forest_data/sample/entity/knowledge.csv',sep='\\t',index=False)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 66,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"stage_kg = stage_kg[['stage_id','knowledge_id']]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 67,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"stage_kg = stage_kg.drop_duplicates()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 68,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"stage_kg['relation'] = '章节知识点'"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 69,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"stage_kg.to_csv('knowledge_forest_data/sample/relation/stage_kg.csv',sep='\\t',index=False)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "mooc",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.2"
|
|
},
|
|
"orig_nbformat": 4
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|