You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3077 lines
93 KiB

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"stage_shixun=pd.read_csv('knowledge_forest_data/sample/stage_shixuns.csv',sep='\\t')\n",
"stage_att=pd.read_csv('knowledge_forest_data/sample/stage_attachments.csv',sep='\\t')\n",
"stage_video=pd.read_csv('knowledge_forest_data/sample/stage_videos.csv',sep='\\t')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>stage_id</th>\n",
" <th>item_id</th>\n",
" <th>item_type</th>\n",
" <th>item_name</th>\n",
" <th>knowledge</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2316</td>\n",
" <td>60</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++基本输入输出</td>\n",
" <td>输入输出</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2316</td>\n",
" <td>67</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++表达式语句实训</td>\n",
" <td>语句实训</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2316</td>\n",
" <td>71</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++控制结构实训</td>\n",
" <td>控制结构实训</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2316</td>\n",
" <td>76</td>\n",
" <td>Shixun</td>\n",
" <td>函数实训</td>\n",
" <td>函数实训</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2317</td>\n",
" <td>80</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++数组实训</td>\n",
" <td>数组实训</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" stage_id item_id item_type item_name knowledge\n",
"0 2316 60 Shixun C&C++基本输入输出 输入输出\n",
"1 2316 67 Shixun C&C++表达式语句实训 语句实训\n",
"2 2316 71 Shixun C&C++控制结构实训 控制结构实训\n",
"3 2316 76 Shixun 函数实训 函数实训\n",
"4 2317 80 Shixun C&C++数组实训 数组实训"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stage_shixun.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"stage_knowledge=stage_shixun[['stage_id','knowledge']]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>stage_id</th>\n",
" <th>knowledge</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2316</td>\n",
" <td>输入输出</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2316</td>\n",
" <td>语句实训</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2316</td>\n",
" <td>控制结构实训</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2316</td>\n",
" <td>函数实训</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2317</td>\n",
" <td>数组实训</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" stage_id knowledge\n",
"0 2316 输入输出\n",
"1 2316 语句实训\n",
"2 2316 控制结构实训\n",
"3 2316 函数实训\n",
"4 2317 数组实训"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stage_knowledge.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"112"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(stage_knowledge)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"new_stage_knowledge=stage_knowledge.groupby(['stage_id'])"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"new_stage_knowledge=new_stage_knowledge['knowledge'].apply(lambda x: \"$\".join(list(set(x.str.cat(sep='$').split('$'))))).reset_index()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>stage_id</th>\n",
" <th>knowledge</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2316</td>\n",
" <td>控制结构实训$语句实训$函数实训$输入输出</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2317</td>\n",
" <td>数组实训$指针实训$C&amp;C++线性表实训$结构实训</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2318</td>\n",
" <td>文件实训$面向过程编程练习</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2319</td>\n",
" <td>C++面向对象构造函数与析构函数$C++面向对象类和对象的创建和使用</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2320</td>\n",
" <td>C++面向对象的继承与派生$C++面向对象的多态性与</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" stage_id knowledge\n",
"0 2316 控制结构实训$语句实训$函数实训$输入输出\n",
"1 2317 数组实训$指针实训$C&C++线性表实训$结构实训\n",
"2 2318 文件实训$面向过程编程练习\n",
"3 2319 C++面向对象构造函数与析构函数$C++面向对象类和对象的创建和使用\n",
"4 2320 C++面向对象的继承与派生$C++面向对象的多态性与"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"new_stage_knowledge.head()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"new_stage_knowledge[\"knowledge\"] = new_stage_knowledge[\"knowledge\"].apply(lambda x: x.split(\"$\"))"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>stage_id</th>\n",
" <th>knowledge</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2316</td>\n",
" <td>[控制结构实训, 语句实训, 函数实训, 输入输出]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2317</td>\n",
" <td>[数组实训, 指针实训, C&amp;C++线性表实训, 结构实训]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2318</td>\n",
" <td>[文件实训, 面向过程编程练习]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2319</td>\n",
" <td>[C++面向对象构造函数与析构函数, C++面向对象类和对象的创建和使用]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2320</td>\n",
" <td>[C++面向对象的继承与派生, C++面向对象的多态性与]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" stage_id knowledge\n",
"0 2316 [控制结构实训, 语句实训, 函数实训, 输入输出]\n",
"1 2317 [数组实训, 指针实训, C&C++线性表实训, 结构实训]\n",
"2 2318 [文件实训, 面向过程编程练习]\n",
"3 2319 [C++面向对象构造函数与析构函数, C++面向对象类和对象的创建和使用]\n",
"4 2320 [C++面向对象的继承与派生, C++面向对象的多态性与]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"new_stage_knowledge.head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"newskg=new_stage_knowledge.explode(\"knowledge\")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"110"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(newskg)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"newskg['knowledge_id']=0"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>stage_id</th>\n",
" <th>knowledge</th>\n",
" <th>knowledge_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2316</td>\n",
" <td>控制结构实训</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2316</td>\n",
" <td>语句实训</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2316</td>\n",
" <td>函数实训</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2316</td>\n",
" <td>输入输出</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2317</td>\n",
" <td>数组实训</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" stage_id knowledge knowledge_id\n",
"0 2316 控制结构实训 0\n",
"0 2316 语句实训 0\n",
"0 2316 函数实训 0\n",
"0 2316 输入输出 0\n",
"1 2317 数组实训 0"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"newskg.head()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"newskg.reset_index(inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"newskg=newskg[['stage_id', 'knowledge', 'knowledge_id']]"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>stage_id</th>\n",
" <th>knowledge</th>\n",
" <th>knowledge_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2316</td>\n",
" <td>控制结构实训</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2316</td>\n",
" <td>语句实训</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2316</td>\n",
" <td>函数实训</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2316</td>\n",
" <td>输入输出</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2317</td>\n",
" <td>数组实训</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" stage_id knowledge knowledge_id\n",
"0 2316 控制结构实训 0\n",
"1 2316 语句实训 0\n",
"2 2316 函数实训 0\n",
"3 2316 输入输出 0\n",
"4 2317 数组实训 0"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"newskg.head()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_32602/4216485099.py:2: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" newskg['knowledge_id'][i]=i\n"
]
}
],
"source": [
"for i in range(len(newskg)):\n",
" newskg['knowledge_id'][i]=i"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>stage_id</th>\n",
" <th>knowledge</th>\n",
" <th>knowledge_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2316</td>\n",
" <td>控制结构实训</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2316</td>\n",
" <td>语句实训</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2316</td>\n",
" <td>函数实训</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2316</td>\n",
" <td>输入输出</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2317</td>\n",
" <td>数组实训</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" stage_id knowledge knowledge_id\n",
"0 2316 控制结构实训 0\n",
"1 2316 语句实训 1\n",
"2 2316 函数实训 2\n",
"3 2316 输入输出 3\n",
"4 2317 数组实训 4"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"newskg.head()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>stage_id</th>\n",
" <th>item_id</th>\n",
" <th>item_type</th>\n",
" <th>item_name</th>\n",
" <th>knowledge</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2316</td>\n",
" <td>60</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++基本输入输出</td>\n",
" <td>输入输出</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2316</td>\n",
" <td>67</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++表达式语句实训</td>\n",
" <td>语句实训</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2316</td>\n",
" <td>71</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++控制结构实训</td>\n",
" <td>控制结构实训</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2316</td>\n",
" <td>76</td>\n",
" <td>Shixun</td>\n",
" <td>函数实训</td>\n",
" <td>函数实训</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2317</td>\n",
" <td>80</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++数组实训</td>\n",
" <td>数组实训</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" stage_id item_id item_type item_name knowledge\n",
"0 2316 60 Shixun C&C++基本输入输出 输入输出\n",
"1 2316 67 Shixun C&C++表达式语句实训 语句实训\n",
"2 2316 71 Shixun C&C++控制结构实训 控制结构实训\n",
"3 2316 76 Shixun 函数实训 函数实训\n",
"4 2317 80 Shixun C&C++数组实训 数组实训"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stage_shixun.head()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"stage_shixun=pd.merge(stage_shixun,newskg,on=\"stage_id\")"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>stage_id</th>\n",
" <th>item_id</th>\n",
" <th>item_type</th>\n",
" <th>item_name</th>\n",
" <th>knowledge_x</th>\n",
" <th>knowledge_y</th>\n",
" <th>knowledge_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2316</td>\n",
" <td>60</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++基本输入输出</td>\n",
" <td>输入输出</td>\n",
" <td>控制结构实训</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2316</td>\n",
" <td>60</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++基本输入输出</td>\n",
" <td>输入输出</td>\n",
" <td>语句实训</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2316</td>\n",
" <td>60</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++基本输入输出</td>\n",
" <td>输入输出</td>\n",
" <td>函数实训</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2316</td>\n",
" <td>60</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++基本输入输出</td>\n",
" <td>输入输出</td>\n",
" <td>输入输出</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2316</td>\n",
" <td>67</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++表达式语句实训</td>\n",
" <td>语句实训</td>\n",
" <td>控制结构实训</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" stage_id item_id item_type item_name knowledge_x knowledge_y \\\n",
"0 2316 60 Shixun C&C++基本输入输出 输入输出 控制结构实训 \n",
"1 2316 60 Shixun C&C++基本输入输出 输入输出 语句实训 \n",
"2 2316 60 Shixun C&C++基本输入输出 输入输出 函数实训 \n",
"3 2316 60 Shixun C&C++基本输入输出 输入输出 输入输出 \n",
"4 2316 67 Shixun C&C++表达式语句实训 语句实训 控制结构实训 \n",
"\n",
" knowledge_id \n",
"0 0 \n",
"1 1 \n",
"2 2 \n",
"3 3 \n",
"4 0 "
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stage_shixun.head()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"stage_shixun=stage_shixun.query('knowledge_x == knowledge_y')"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"stage_shixun = stage_shixun[['stage_id',\"item_id\",'item_type','item_name','knowledge_x','knowledge_id']]"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>stage_id</th>\n",
" <th>item_id</th>\n",
" <th>item_type</th>\n",
" <th>item_name</th>\n",
" <th>knowledge_x</th>\n",
" <th>knowledge_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2316</td>\n",
" <td>60</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++基本输入输出</td>\n",
" <td>输入输出</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>2316</td>\n",
" <td>67</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++表达式语句实训</td>\n",
" <td>语句实训</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>2316</td>\n",
" <td>71</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++控制结构实训</td>\n",
" <td>控制结构实训</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>2316</td>\n",
" <td>76</td>\n",
" <td>Shixun</td>\n",
" <td>函数实训</td>\n",
" <td>函数实训</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>2317</td>\n",
" <td>80</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++数组实训</td>\n",
" <td>数组实训</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>2317</td>\n",
" <td>99</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++指针实训</td>\n",
" <td>指针实训</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>2317</td>\n",
" <td>100</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++结构实训</td>\n",
" <td>结构实训</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>2317</td>\n",
" <td>101</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++线性表实训</td>\n",
" <td>C&amp;C++线性表实训</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>2319</td>\n",
" <td>1381</td>\n",
" <td>Shixun</td>\n",
" <td>C++ 面向对象 - 类和对象的创建和使用</td>\n",
" <td>C++面向对象类和对象的创建和使用</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>2319</td>\n",
" <td>1423</td>\n",
" <td>Shixun</td>\n",
" <td>C++ 面向对象 - 构造函数与析构函数</td>\n",
" <td>C++面向对象构造函数与析构函数</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>2320</td>\n",
" <td>1454</td>\n",
" <td>Shixun</td>\n",
" <td>C++ 面向对象 - 类的继承与派生</td>\n",
" <td>C++面向对象的继承与派生</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>2320</td>\n",
" <td>1481</td>\n",
" <td>Shixun</td>\n",
" <td>C++ 面向对象 - 类的多态性与虚函数</td>\n",
" <td>C++面向对象的多态性与</td>\n",
" <td>13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43</th>\n",
" <td>2428</td>\n",
" <td>8431</td>\n",
" <td>Shixun</td>\n",
" <td>软件危机</td>\n",
" <td>软件危机</td>\n",
" <td>19</td>\n",
" </tr>\n",
" <tr>\n",
" <th>49</th>\n",
" <td>2428</td>\n",
" <td>8464</td>\n",
" <td>Shixun</td>\n",
" <td>走进软件工程</td>\n",
" <td>走进软件工程</td>\n",
" <td>20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>52</th>\n",
" <td>2428</td>\n",
" <td>8561</td>\n",
" <td>Shixun</td>\n",
" <td>SWEBOK 与软件过程</td>\n",
" <td>SWEBOK软件过程</td>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>55</th>\n",
" <td>2428</td>\n",
" <td>8562</td>\n",
" <td>Shixun</td>\n",
" <td>主要的软件过程模型</td>\n",
" <td>的软件过程模型</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>61</th>\n",
" <td>2428</td>\n",
" <td>8563</td>\n",
" <td>Shixun</td>\n",
" <td>敏捷软件过程</td>\n",
" <td>软件过程</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65</th>\n",
" <td>2429</td>\n",
" <td>8710</td>\n",
" <td>Shixun</td>\n",
" <td>可行性分析的任务及报告</td>\n",
" <td>可行性分析的任务及报告</td>\n",
" <td>21</td>\n",
" </tr>\n",
" <tr>\n",
" <th>66</th>\n",
" <td>2544</td>\n",
" <td>8426</td>\n",
" <td>Shixun</td>\n",
" <td>软件设计</td>\n",
" <td>软件设计</td>\n",
" <td>22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>67</th>\n",
" <td>2545</td>\n",
" <td>8699</td>\n",
" <td>Shixun</td>\n",
" <td>结构化分析与设计</td>\n",
" <td>结构化分析与设计</td>\n",
" <td>23</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" stage_id item_id item_type item_name knowledge_x \\\n",
"3 2316 60 Shixun C&C++基本输入输出 输入输出 \n",
"5 2316 67 Shixun C&C++表达式语句实训 语句实训 \n",
"8 2316 71 Shixun C&C++控制结构实训 控制结构实训 \n",
"14 2316 76 Shixun 函数实训 函数实训 \n",
"16 2317 80 Shixun C&C++数组实训 数组实训 \n",
"21 2317 99 Shixun C&C++指针实训 指针实训 \n",
"27 2317 100 Shixun C&C++结构实训 结构实训 \n",
"30 2317 101 Shixun C&C++线性表实训 C&C++线性表实训 \n",
"33 2319 1381 Shixun C++ 面向对象 - 类和对象的创建和使用 C++面向对象类和对象的创建和使用 \n",
"34 2319 1423 Shixun C++ 面向对象 - 构造函数与析构函数 C++面向对象构造函数与析构函数 \n",
"36 2320 1454 Shixun C++ 面向对象 - 类的继承与派生 C++面向对象的继承与派生 \n",
"39 2320 1481 Shixun C++ 面向对象 - 类的多态性与虚函数 C++面向对象的多态性与 \n",
"43 2428 8431 Shixun 软件危机 软件危机 \n",
"49 2428 8464 Shixun 走进软件工程 走进软件工程 \n",
"52 2428 8561 Shixun SWEBOK 与软件过程 SWEBOK软件过程 \n",
"55 2428 8562 Shixun 主要的软件过程模型 的软件过程模型 \n",
"61 2428 8563 Shixun 敏捷软件过程 软件过程 \n",
"65 2429 8710 Shixun 可行性分析的任务及报告 可行性分析的任务及报告 \n",
"66 2544 8426 Shixun 软件设计 软件设计 \n",
"67 2545 8699 Shixun 结构化分析与设计 结构化分析与设计 \n",
"\n",
" knowledge_id \n",
"3 3 \n",
"5 1 \n",
"8 0 \n",
"14 2 \n",
"16 4 \n",
"21 5 \n",
"27 7 \n",
"30 6 \n",
"33 11 \n",
"34 10 \n",
"36 12 \n",
"39 13 \n",
"43 19 \n",
"49 20 \n",
"52 18 \n",
"55 16 \n",
"61 17 \n",
"65 21 \n",
"66 22 \n",
"67 23 "
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stage_shixun.head(20)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"stage_shixun.rename(columns={'knowledge_x':'knowledge'},inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>stage_id</th>\n",
" <th>item_id</th>\n",
" <th>item_type</th>\n",
" <th>item_name</th>\n",
" <th>knowledge</th>\n",
" <th>knowledge_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2316</td>\n",
" <td>60</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++基本输入输出</td>\n",
" <td>输入输出</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>2316</td>\n",
" <td>67</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++表达式语句实训</td>\n",
" <td>语句实训</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>2316</td>\n",
" <td>71</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++控制结构实训</td>\n",
" <td>控制结构实训</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>2316</td>\n",
" <td>76</td>\n",
" <td>Shixun</td>\n",
" <td>函数实训</td>\n",
" <td>函数实训</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>2317</td>\n",
" <td>80</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++数组实训</td>\n",
" <td>数组实训</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" stage_id item_id item_type item_name knowledge knowledge_id\n",
"3 2316 60 Shixun C&C++基本输入输出 输入输出 3\n",
"5 2316 67 Shixun C&C++表达式语句实训 语句实训 1\n",
"8 2316 71 Shixun C&C++控制结构实训 控制结构实训 0\n",
"14 2316 76 Shixun 函数实训 函数实训 2\n",
"16 2317 80 Shixun C&C++数组实训 数组实训 4"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stage_shixun.head()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"kg_shixun = stage_shixun[[\"knowledge_id\",\"item_id\"]]"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"kg_shixun=kg_shixun.drop_duplicates()"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"kg_shixun.rename(columns={'item_id':'shixun_id'},inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"kg_shixun['relation']='知识点实训'"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"kg_shixun.to_csv('knowledge_forest_data/sample/relation/kg_shixun.csv',sep='\\t',index=False)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>stage_id</th>\n",
" <th>item_id</th>\n",
" <th>item_type</th>\n",
" <th>item_name</th>\n",
" <th>knowledge</th>\n",
" <th>knowledge_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2316</td>\n",
" <td>60</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++基本输入输出</td>\n",
" <td>输入输出</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>2316</td>\n",
" <td>67</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++表达式语句实训</td>\n",
" <td>语句实训</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>2316</td>\n",
" <td>71</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++控制结构实训</td>\n",
" <td>控制结构实训</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>2316</td>\n",
" <td>76</td>\n",
" <td>Shixun</td>\n",
" <td>函数实训</td>\n",
" <td>函数实训</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>2317</td>\n",
" <td>80</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++数组实训</td>\n",
" <td>数组实训</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" stage_id item_id item_type item_name knowledge knowledge_id\n",
"3 2316 60 Shixun C&C++基本输入输出 输入输出 3\n",
"5 2316 67 Shixun C&C++表达式语句实训 语句实训 1\n",
"8 2316 71 Shixun C&C++控制结构实训 控制结构实训 0\n",
"14 2316 76 Shixun 函数实训 函数实训 2\n",
"16 2317 80 Shixun C&C++数组实训 数组实训 4"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stage_shixun.head()"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>stage_id</th>\n",
" <th>item_id</th>\n",
" <th>item_type</th>\n",
" <th>item_name</th>\n",
" <th>knowledge</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>13581</td>\n",
" <td>2245589</td>\n",
" <td>Attachment</td>\n",
" <td>数据结构与算法_02线性表.pptx</td>\n",
" <td>数据结构与算法线性表</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>13582</td>\n",
" <td>2245627</td>\n",
" <td>Attachment</td>\n",
" <td>数据结构与算法_03栈和队列.pptx</td>\n",
" <td>数据结构与算法和队列</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>13583</td>\n",
" <td>2245691</td>\n",
" <td>Attachment</td>\n",
" <td>数据结构与算法_04树与二叉树.pptx</td>\n",
" <td>数据结构与算法树二叉树</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13583</td>\n",
" <td>2245690</td>\n",
" <td>Attachment</td>\n",
" <td>数据结构与算法_05高级字典结构.pptx</td>\n",
" <td>数据结构与算法字典结构</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>13586</td>\n",
" <td>2245777</td>\n",
" <td>Attachment</td>\n",
" <td>数据结构与算法_08排序.pptx</td>\n",
" <td>数据结构与算法排序</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" stage_id item_id item_type item_name knowledge\n",
"0 13581 2245589 Attachment 数据结构与算法_02线性表.pptx 数据结构与算法线性表\n",
"1 13582 2245627 Attachment 数据结构与算法_03栈和队列.pptx 数据结构与算法和队列\n",
"2 13583 2245691 Attachment 数据结构与算法_04树与二叉树.pptx 数据结构与算法树二叉树\n",
"3 13583 2245690 Attachment 数据结构与算法_05高级字典结构.pptx 数据结构与算法字典结构\n",
"4 13586 2245777 Attachment 数据结构与算法_08排序.pptx 数据结构与算法排序"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stage_att.head()"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"stage_kg=stage_shixun"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>stage_id</th>\n",
" <th>item_id</th>\n",
" <th>item_type</th>\n",
" <th>item_name</th>\n",
" <th>knowledge</th>\n",
" <th>knowledge_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>525</th>\n",
" <td>13586</td>\n",
" <td>54462</td>\n",
" <td>Shixun</td>\n",
" <td>选择排序</td>\n",
" <td>选择排序</td>\n",
" <td>104</td>\n",
" </tr>\n",
" <tr>\n",
" <th>530</th>\n",
" <td>13586</td>\n",
" <td>36229</td>\n",
" <td>Shixun</td>\n",
" <td>查找</td>\n",
" <td>查找</td>\n",
" <td>106</td>\n",
" </tr>\n",
" <tr>\n",
" <th>531</th>\n",
" <td>13587</td>\n",
" <td>16175</td>\n",
" <td>Shixun</td>\n",
" <td>图——课上课后练</td>\n",
" <td>图课上课后练</td>\n",
" <td>107</td>\n",
" </tr>\n",
" <tr>\n",
" <th>532</th>\n",
" <td>13588</td>\n",
" <td>23187</td>\n",
" <td>Shixun</td>\n",
" <td>散列——实验及提升训练</td>\n",
" <td>散列实验及提升训练</td>\n",
" <td>108</td>\n",
" </tr>\n",
" <tr>\n",
" <th>535</th>\n",
" <td>13588</td>\n",
" <td>27244</td>\n",
" <td>Shixun</td>\n",
" <td>集合——课上练</td>\n",
" <td>集合课练</td>\n",
" <td>109</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" stage_id item_id item_type item_name knowledge knowledge_id\n",
"525 13586 54462 Shixun 选择排序 选择排序 104\n",
"530 13586 36229 Shixun 查找 查找 106\n",
"531 13587 16175 Shixun 图——课上课后练 图课上课后练 107\n",
"532 13588 23187 Shixun 散列——实验及提升训练 散列实验及提升训练 108\n",
"535 13588 27244 Shixun 集合——课上练 集合课练 109"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stage_kg.tail()"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>stage_id</th>\n",
" <th>item_id</th>\n",
" <th>item_type</th>\n",
" <th>item_name</th>\n",
" <th>knowledge</th>\n",
" <th>knowledge_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>525</th>\n",
" <td>13586</td>\n",
" <td>54462</td>\n",
" <td>Shixun</td>\n",
" <td>选择排序</td>\n",
" <td>选择排序</td>\n",
" <td>104</td>\n",
" </tr>\n",
" <tr>\n",
" <th>530</th>\n",
" <td>13586</td>\n",
" <td>36229</td>\n",
" <td>Shixun</td>\n",
" <td>查找</td>\n",
" <td>查找</td>\n",
" <td>106</td>\n",
" </tr>\n",
" <tr>\n",
" <th>531</th>\n",
" <td>13587</td>\n",
" <td>16175</td>\n",
" <td>Shixun</td>\n",
" <td>图——课上课后练</td>\n",
" <td>图课上课后练</td>\n",
" <td>107</td>\n",
" </tr>\n",
" <tr>\n",
" <th>532</th>\n",
" <td>13588</td>\n",
" <td>23187</td>\n",
" <td>Shixun</td>\n",
" <td>散列——实验及提升训练</td>\n",
" <td>散列实验及提升训练</td>\n",
" <td>108</td>\n",
" </tr>\n",
" <tr>\n",
" <th>535</th>\n",
" <td>13588</td>\n",
" <td>27244</td>\n",
" <td>Shixun</td>\n",
" <td>集合——课上练</td>\n",
" <td>集合课练</td>\n",
" <td>109</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" stage_id item_id item_type item_name knowledge knowledge_id\n",
"525 13586 54462 Shixun 选择排序 选择排序 104\n",
"530 13586 36229 Shixun 查找 查找 106\n",
"531 13587 16175 Shixun 图——课上课后练 图课上课后练 107\n",
"532 13588 23187 Shixun 散列——实验及提升训练 散列实验及提升训练 108\n",
"535 13588 27244 Shixun 集合——课上练 集合课练 109"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stage_kg.tail()"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>stage_id</th>\n",
" <th>item_id</th>\n",
" <th>item_type</th>\n",
" <th>item_name</th>\n",
" <th>knowledge</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>13581</td>\n",
" <td>2245589</td>\n",
" <td>Attachment</td>\n",
" <td>数据结构与算法_02线性表.pptx</td>\n",
" <td>数据结构与算法线性表</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>13582</td>\n",
" <td>2245627</td>\n",
" <td>Attachment</td>\n",
" <td>数据结构与算法_03栈和队列.pptx</td>\n",
" <td>数据结构与算法和队列</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>13583</td>\n",
" <td>2245691</td>\n",
" <td>Attachment</td>\n",
" <td>数据结构与算法_04树与二叉树.pptx</td>\n",
" <td>数据结构与算法树二叉树</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13583</td>\n",
" <td>2245690</td>\n",
" <td>Attachment</td>\n",
" <td>数据结构与算法_05高级字典结构.pptx</td>\n",
" <td>数据结构与算法字典结构</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>13586</td>\n",
" <td>2245777</td>\n",
" <td>Attachment</td>\n",
" <td>数据结构与算法_08排序.pptx</td>\n",
" <td>数据结构与算法排序</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" stage_id item_id item_type item_name knowledge\n",
"0 13581 2245589 Attachment 数据结构与算法_02线性表.pptx 数据结构与算法线性表\n",
"1 13582 2245627 Attachment 数据结构与算法_03栈和队列.pptx 数据结构与算法和队列\n",
"2 13583 2245691 Attachment 数据结构与算法_04树与二叉树.pptx 数据结构与算法树二叉树\n",
"3 13583 2245690 Attachment 数据结构与算法_05高级字典结构.pptx 数据结构与算法字典结构\n",
"4 13586 2245777 Attachment 数据结构与算法_08排序.pptx 数据结构与算法排序"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stage_att.head()"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"stage_att['knowledge_id']=0"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>stage_id</th>\n",
" <th>item_id</th>\n",
" <th>item_type</th>\n",
" <th>item_name</th>\n",
" <th>knowledge</th>\n",
" <th>knowledge_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>13581</td>\n",
" <td>2245589</td>\n",
" <td>Attachment</td>\n",
" <td>数据结构与算法_02线性表.pptx</td>\n",
" <td>数据结构与算法线性表</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>13582</td>\n",
" <td>2245627</td>\n",
" <td>Attachment</td>\n",
" <td>数据结构与算法_03栈和队列.pptx</td>\n",
" <td>数据结构与算法和队列</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>13583</td>\n",
" <td>2245691</td>\n",
" <td>Attachment</td>\n",
" <td>数据结构与算法_04树与二叉树.pptx</td>\n",
" <td>数据结构与算法树二叉树</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13583</td>\n",
" <td>2245690</td>\n",
" <td>Attachment</td>\n",
" <td>数据结构与算法_05高级字典结构.pptx</td>\n",
" <td>数据结构与算法字典结构</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>13586</td>\n",
" <td>2245777</td>\n",
" <td>Attachment</td>\n",
" <td>数据结构与算法_08排序.pptx</td>\n",
" <td>数据结构与算法排序</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" stage_id item_id item_type item_name knowledge \\\n",
"0 13581 2245589 Attachment 数据结构与算法_02线性表.pptx 数据结构与算法线性表 \n",
"1 13582 2245627 Attachment 数据结构与算法_03栈和队列.pptx 数据结构与算法和队列 \n",
"2 13583 2245691 Attachment 数据结构与算法_04树与二叉树.pptx 数据结构与算法树二叉树 \n",
"3 13583 2245690 Attachment 数据结构与算法_05高级字典结构.pptx 数据结构与算法字典结构 \n",
"4 13586 2245777 Attachment 数据结构与算法_08排序.pptx 数据结构与算法排序 \n",
"\n",
" knowledge_id \n",
"0 0 \n",
"1 0 \n",
"2 0 \n",
"3 0 \n",
"4 0 "
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stage_att.head()"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_32602/3546850827.py:9: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" stage_att['knowledge_id'][i] = len(stage_kg)+1\n"
]
}
],
"source": [
"for i in range(len(stage_att)):\n",
" skg=stage_kg[stage_kg[\"stage_id\"]==stage_att['stage_id'][i]]\n",
" skg = skg[skg['knowledge'].isin([stage_att['knowledge'][i]])]\n",
" try:\n",
" if len(skg) > 0:\n",
" stage_att['knowledge_id'][i] = int(skg['knowledge_id'])\n",
" stage_kg=stage_kg.append(stage_att[i:i+1],ignore_index=True)\n",
" else:\n",
" stage_att['knowledge_id'][i] = len(stage_kg)+1\n",
" stage_kg=stage_kg.append(stage_att[i:i+1],ignore_index=True)\n",
" except:\n",
" stage_att['knowledge_id'][i] = int(skg[:1]['knowledge_id'])\n",
" stage_kg=stage_kg.append(stage_att[i:i+1],ignore_index=True)"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>stage_id</th>\n",
" <th>item_id</th>\n",
" <th>item_type</th>\n",
" <th>item_name</th>\n",
" <th>knowledge</th>\n",
" <th>knowledge_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>13581</td>\n",
" <td>2245589</td>\n",
" <td>Attachment</td>\n",
" <td>数据结构与算法_02线性表.pptx</td>\n",
" <td>数据结构与算法线性表</td>\n",
" <td>113</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>13582</td>\n",
" <td>2245627</td>\n",
" <td>Attachment</td>\n",
" <td>数据结构与算法_03栈和队列.pptx</td>\n",
" <td>数据结构与算法和队列</td>\n",
" <td>114</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>13583</td>\n",
" <td>2245691</td>\n",
" <td>Attachment</td>\n",
" <td>数据结构与算法_04树与二叉树.pptx</td>\n",
" <td>数据结构与算法树二叉树</td>\n",
" <td>115</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13583</td>\n",
" <td>2245690</td>\n",
" <td>Attachment</td>\n",
" <td>数据结构与算法_05高级字典结构.pptx</td>\n",
" <td>数据结构与算法字典结构</td>\n",
" <td>116</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>13586</td>\n",
" <td>2245777</td>\n",
" <td>Attachment</td>\n",
" <td>数据结构与算法_08排序.pptx</td>\n",
" <td>数据结构与算法排序</td>\n",
" <td>117</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" stage_id item_id item_type item_name knowledge \\\n",
"0 13581 2245589 Attachment 数据结构与算法_02线性表.pptx 数据结构与算法线性表 \n",
"1 13582 2245627 Attachment 数据结构与算法_03栈和队列.pptx 数据结构与算法和队列 \n",
"2 13583 2245691 Attachment 数据结构与算法_04树与二叉树.pptx 数据结构与算法树二叉树 \n",
"3 13583 2245690 Attachment 数据结构与算法_05高级字典结构.pptx 数据结构与算法字典结构 \n",
"4 13586 2245777 Attachment 数据结构与算法_08排序.pptx 数据结构与算法排序 \n",
"\n",
" knowledge_id \n",
"0 113 \n",
"1 114 \n",
"2 115 \n",
"3 116 \n",
"4 117 "
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stage_att.head()"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"kg_att=stage_att[[\"knowledge_id\",\"item_id\"]]"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"kg_att=kg_att.drop_duplicates()"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"kg_att.rename(columns={'item_id':'attachment_id'},inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"kg_att['relation']='知识点课件'"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>knowledge_id</th>\n",
" <th>attachment_id</th>\n",
" <th>relation</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>113</td>\n",
" <td>2245589</td>\n",
" <td>知识点课件</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>114</td>\n",
" <td>2245627</td>\n",
" <td>知识点课件</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>115</td>\n",
" <td>2245691</td>\n",
" <td>知识点课件</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>116</td>\n",
" <td>2245690</td>\n",
" <td>知识点课件</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>117</td>\n",
" <td>2245777</td>\n",
" <td>知识点课件</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" knowledge_id attachment_id relation\n",
"0 113 2245589 知识点课件\n",
"1 114 2245627 知识点课件\n",
"2 115 2245691 知识点课件\n",
"3 116 2245690 知识点课件\n",
"4 117 2245777 知识点课件"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"kg_att.head() "
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"kg_att.to_csv('knowledge_forest_data/sample/relation/kg_att.csv',sep='\\t',index=False)"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>stage_id</th>\n",
" <th>item_id</th>\n",
" <th>item_type</th>\n",
" <th>item_name</th>\n",
" <th>knowledge</th>\n",
" <th>knowledge_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>114</th>\n",
" <td>13583</td>\n",
" <td>2245691</td>\n",
" <td>Attachment</td>\n",
" <td>数据结构与算法_04树与二叉树.pptx</td>\n",
" <td>数据结构与算法树二叉树</td>\n",
" <td>115</td>\n",
" </tr>\n",
" <tr>\n",
" <th>115</th>\n",
" <td>13583</td>\n",
" <td>2245690</td>\n",
" <td>Attachment</td>\n",
" <td>数据结构与算法_05高级字典结构.pptx</td>\n",
" <td>数据结构与算法字典结构</td>\n",
" <td>116</td>\n",
" </tr>\n",
" <tr>\n",
" <th>116</th>\n",
" <td>13586</td>\n",
" <td>2245777</td>\n",
" <td>Attachment</td>\n",
" <td>数据结构与算法_08排序.pptx</td>\n",
" <td>数据结构与算法排序</td>\n",
" <td>117</td>\n",
" </tr>\n",
" <tr>\n",
" <th>117</th>\n",
" <td>13587</td>\n",
" <td>2245802</td>\n",
" <td>Attachment</td>\n",
" <td>数据结构与算法_06图.pptx</td>\n",
" <td>数据结构与算法图</td>\n",
" <td>118</td>\n",
" </tr>\n",
" <tr>\n",
" <th>118</th>\n",
" <td>13588</td>\n",
" <td>2245824</td>\n",
" <td>Attachment</td>\n",
" <td>数据结构与算法_07集合和字典.pptx</td>\n",
" <td>数据结构与算法集合和字典</td>\n",
" <td>119</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" stage_id item_id item_type item_name knowledge \\\n",
"114 13583 2245691 Attachment 数据结构与算法_04树与二叉树.pptx 数据结构与算法树二叉树 \n",
"115 13583 2245690 Attachment 数据结构与算法_05高级字典结构.pptx 数据结构与算法字典结构 \n",
"116 13586 2245777 Attachment 数据结构与算法_08排序.pptx 数据结构与算法排序 \n",
"117 13587 2245802 Attachment 数据结构与算法_06图.pptx 数据结构与算法图 \n",
"118 13588 2245824 Attachment 数据结构与算法_07集合和字典.pptx 数据结构与算法集合和字典 \n",
"\n",
" knowledge_id \n",
"114 115 \n",
"115 116 \n",
"116 117 \n",
"117 118 \n",
"118 119 "
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stage_kg.tail()"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>stage_id</th>\n",
" <th>item_id</th>\n",
" <th>item_type</th>\n",
" <th>item_name</th>\n",
" <th>knowledge</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2428</td>\n",
" <td>1015</td>\n",
" <td>VideoItem</td>\n",
" <td>软件工程引言和软件的概念</td>\n",
" <td>软件工程引言和软件的概念</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2428</td>\n",
" <td>1016</td>\n",
" <td>VideoItem</td>\n",
" <td>软件的基本概念</td>\n",
" <td>软件的概念</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2428</td>\n",
" <td>1017</td>\n",
" <td>VideoItem</td>\n",
" <td>软件过程</td>\n",
" <td>软件过程</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2428</td>\n",
" <td>1018</td>\n",
" <td>VideoItem</td>\n",
" <td>RUP简介</td>\n",
" <td>RUP简介</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2428</td>\n",
" <td>1019</td>\n",
" <td>VideoItem</td>\n",
" <td>敏捷的过程</td>\n",
" <td>的过程</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" stage_id item_id item_type item_name knowledge\n",
"0 2428 1015 VideoItem 软件工程引言和软件的概念 软件工程引言和软件的概念\n",
"1 2428 1016 VideoItem 软件的基本概念 软件的概念\n",
"2 2428 1017 VideoItem 软件过程 软件过程\n",
"3 2428 1018 VideoItem RUP简介 RUP简介\n",
"4 2428 1019 VideoItem 敏捷的过程 的过程"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stage_video.head()"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
"stage_video['knowledge_id']=0"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>stage_id</th>\n",
" <th>item_id</th>\n",
" <th>item_type</th>\n",
" <th>item_name</th>\n",
" <th>knowledge</th>\n",
" <th>knowledge_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2428</td>\n",
" <td>1015</td>\n",
" <td>VideoItem</td>\n",
" <td>软件工程引言和软件的概念</td>\n",
" <td>软件工程引言和软件的概念</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2428</td>\n",
" <td>1016</td>\n",
" <td>VideoItem</td>\n",
" <td>软件的基本概念</td>\n",
" <td>软件的概念</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2428</td>\n",
" <td>1017</td>\n",
" <td>VideoItem</td>\n",
" <td>软件过程</td>\n",
" <td>软件过程</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2428</td>\n",
" <td>1018</td>\n",
" <td>VideoItem</td>\n",
" <td>RUP简介</td>\n",
" <td>RUP简介</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2428</td>\n",
" <td>1019</td>\n",
" <td>VideoItem</td>\n",
" <td>敏捷的过程</td>\n",
" <td>的过程</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" stage_id item_id item_type item_name knowledge knowledge_id\n",
"0 2428 1015 VideoItem 软件工程引言和软件的概念 软件工程引言和软件的概念 0\n",
"1 2428 1016 VideoItem 软件的基本概念 软件的概念 0\n",
"2 2428 1017 VideoItem 软件过程 软件过程 0\n",
"3 2428 1018 VideoItem RUP简介 RUP简介 0\n",
"4 2428 1019 VideoItem 敏捷的过程 的过程 0"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stage_video.head()"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_32602/3125594964.py:9: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" stage_video['knowledge_id'][i] = len(stage_kg)+1\n",
"/tmp/ipykernel_32602/3125594964.py:6: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" stage_video['knowledge_id'][i] = int(skg['knowledge_id'])\n"
]
}
],
"source": [
"for i in range(len(stage_video)):\n",
" skg = stage_kg[stage_kg[\"stage_id\"]==stage_video['stage_id'][i]]\n",
" skg = skg[skg['knowledge'].isin([stage_video['knowledge'][i]])]\n",
" try:\n",
" if len(skg) > 0:\n",
" stage_video['knowledge_id'][i] = int(skg['knowledge_id'])\n",
" stage_kg=stage_kg.append(stage_video[i:i+1],ignore_index=True)\n",
" else:\n",
" stage_video['knowledge_id'][i] = len(stage_kg)+1\n",
" stage_kg=stage_kg.append(stage_video[i:i+1],ignore_index=True)\n",
" except:\n",
" stage_video['knowledge_id'][i] = int(skg[:1]['knowledge_id'])\n",
" stage_kg=stage_kg.append(stage_video[i:i+1],ignore_index=True)"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>stage_id</th>\n",
" <th>item_id</th>\n",
" <th>item_type</th>\n",
" <th>item_name</th>\n",
" <th>knowledge</th>\n",
" <th>knowledge_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2428</td>\n",
" <td>1015</td>\n",
" <td>VideoItem</td>\n",
" <td>软件工程引言和软件的概念</td>\n",
" <td>软件工程引言和软件的概念</td>\n",
" <td>120</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2428</td>\n",
" <td>1016</td>\n",
" <td>VideoItem</td>\n",
" <td>软件的基本概念</td>\n",
" <td>软件的概念</td>\n",
" <td>121</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2428</td>\n",
" <td>1017</td>\n",
" <td>VideoItem</td>\n",
" <td>软件过程</td>\n",
" <td>软件过程</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2428</td>\n",
" <td>1018</td>\n",
" <td>VideoItem</td>\n",
" <td>RUP简介</td>\n",
" <td>RUP简介</td>\n",
" <td>123</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2428</td>\n",
" <td>1019</td>\n",
" <td>VideoItem</td>\n",
" <td>敏捷的过程</td>\n",
" <td>的过程</td>\n",
" <td>124</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" stage_id item_id item_type item_name knowledge knowledge_id\n",
"0 2428 1015 VideoItem 软件工程引言和软件的概念 软件工程引言和软件的概念 120\n",
"1 2428 1016 VideoItem 软件的基本概念 软件的概念 121\n",
"2 2428 1017 VideoItem 软件过程 软件过程 17\n",
"3 2428 1018 VideoItem RUP简介 RUP简介 123\n",
"4 2428 1019 VideoItem 敏捷的过程 的过程 124"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stage_video.head()"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
"kg_video=stage_video[[\"knowledge_id\",\"item_id\"]]"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [],
"source": [
"kg_video = kg_video.drop_duplicates()"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [],
"source": [
"kg_video.rename(columns={'item_id':'video_item_id'},inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [],
"source": [
"kg_video['relation']='知识点视频'"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>knowledge_id</th>\n",
" <th>video_item_id</th>\n",
" <th>relation</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>120</td>\n",
" <td>1015</td>\n",
" <td>知识点视频</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>121</td>\n",
" <td>1016</td>\n",
" <td>知识点视频</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>17</td>\n",
" <td>1017</td>\n",
" <td>知识点视频</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>123</td>\n",
" <td>1018</td>\n",
" <td>知识点视频</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>124</td>\n",
" <td>1019</td>\n",
" <td>知识点视频</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" knowledge_id video_item_id relation\n",
"0 120 1015 知识点视频\n",
"1 121 1016 知识点视频\n",
"2 17 1017 知识点视频\n",
"3 123 1018 知识点视频\n",
"4 124 1019 知识点视频"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"kg_video.head()"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [],
"source": [
"kg_video.to_csv('knowledge_forest_data/sample/relation/kg_video.csv',sep='\\t',index=False)"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>stage_id</th>\n",
" <th>item_id</th>\n",
" <th>item_type</th>\n",
" <th>item_name</th>\n",
" <th>knowledge</th>\n",
" <th>knowledge_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2316</td>\n",
" <td>60</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++基本输入输出</td>\n",
" <td>输入输出</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2316</td>\n",
" <td>67</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++表达式语句实训</td>\n",
" <td>语句实训</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2316</td>\n",
" <td>71</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++控制结构实训</td>\n",
" <td>控制结构实训</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2316</td>\n",
" <td>76</td>\n",
" <td>Shixun</td>\n",
" <td>函数实训</td>\n",
" <td>函数实训</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2317</td>\n",
" <td>80</td>\n",
" <td>Shixun</td>\n",
" <td>C&amp;C++数组实训</td>\n",
" <td>数组实训</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" stage_id item_id item_type item_name knowledge knowledge_id\n",
"0 2316 60 Shixun C&C++基本输入输出 输入输出 3\n",
"1 2316 67 Shixun C&C++表达式语句实训 语句实训 1\n",
"2 2316 71 Shixun C&C++控制结构实训 控制结构实训 0\n",
"3 2316 76 Shixun 函数实训 函数实训 2\n",
"4 2317 80 Shixun C&C++数组实训 数组实训 4"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stage_kg.head()"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [],
"source": [
"knowledge=stage_kg[['knowledge_id','knowledge']]"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [],
"source": [
"knowledge = knowledge.drop_duplicates()"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [],
"source": [
"knowledge.to_csv('knowledge_forest_data/sample/entity/knowledge.csv',sep='\\t',index=False)"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [],
"source": [
"stage_kg = stage_kg[['stage_id','knowledge_id']]"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [],
"source": [
"stage_kg = stage_kg.drop_duplicates()"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [],
"source": [
"stage_kg['relation'] = '章节知识点'"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [],
"source": [
"stage_kg.to_csv('knowledge_forest_data/sample/relation/stage_kg.csv',sep='\\t',index=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "mooc",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.2"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}