{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "stage_shixun=pd.read_csv('knowledge_forest_data/sample/stage_shixuns.csv',sep='\\t')\n", "stage_att=pd.read_csv('knowledge_forest_data/sample/stage_attachments.csv',sep='\\t')\n", "stage_video=pd.read_csv('knowledge_forest_data/sample/stage_videos.csv',sep='\\t')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stage_iditem_iditem_typeitem_nameknowledge
0231660ShixunC&C++基本输入输出输入输出
1231667ShixunC&C++表达式语句实训语句实训
2231671ShixunC&C++控制结构实训控制结构实训
3231676Shixun函数实训函数实训
4231780ShixunC&C++数组实训数组实训
\n", "
" ], "text/plain": [ " stage_id item_id item_type item_name knowledge\n", "0 2316 60 Shixun C&C++基本输入输出 输入输出\n", "1 2316 67 Shixun C&C++表达式语句实训 语句实训\n", "2 2316 71 Shixun C&C++控制结构实训 控制结构实训\n", "3 2316 76 Shixun 函数实训 函数实训\n", "4 2317 80 Shixun C&C++数组实训 数组实训" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stage_shixun.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "stage_knowledge=stage_shixun[['stage_id','knowledge']]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stage_idknowledge
02316输入输出
12316语句实训
22316控制结构实训
32316函数实训
42317数组实训
\n", "
" ], "text/plain": [ " stage_id knowledge\n", "0 2316 输入输出\n", "1 2316 语句实训\n", "2 2316 控制结构实训\n", "3 2316 函数实训\n", "4 2317 数组实训" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stage_knowledge.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "112" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(stage_knowledge)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "new_stage_knowledge=stage_knowledge.groupby(['stage_id'])" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "new_stage_knowledge=new_stage_knowledge['knowledge'].apply(lambda x: \"$\".join(list(set(x.str.cat(sep='$').split('$'))))).reset_index()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stage_idknowledge
02316控制结构实训$语句实训$函数实训$输入输出
12317数组实训$指针实训$C&C++线性表实训$结构实训
22318文件实训$面向过程编程练习
32319C++面向对象构造函数与析构函数$C++面向对象类和对象的创建和使用
42320C++面向对象的继承与派生$C++面向对象的多态性与
\n", "
" ], "text/plain": [ " stage_id knowledge\n", "0 2316 控制结构实训$语句实训$函数实训$输入输出\n", "1 2317 数组实训$指针实训$C&C++线性表实训$结构实训\n", "2 2318 文件实训$面向过程编程练习\n", "3 2319 C++面向对象构造函数与析构函数$C++面向对象类和对象的创建和使用\n", "4 2320 C++面向对象的继承与派生$C++面向对象的多态性与" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_stage_knowledge.head()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "new_stage_knowledge[\"knowledge\"] = new_stage_knowledge[\"knowledge\"].apply(lambda x: x.split(\"$\"))" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stage_idknowledge
02316[控制结构实训, 语句实训, 函数实训, 输入输出]
12317[数组实训, 指针实训, C&C++线性表实训, 结构实训]
22318[文件实训, 面向过程编程练习]
32319[C++面向对象构造函数与析构函数, C++面向对象类和对象的创建和使用]
42320[C++面向对象的继承与派生, C++面向对象的多态性与]
\n", "
" ], "text/plain": [ " stage_id knowledge\n", "0 2316 [控制结构实训, 语句实训, 函数实训, 输入输出]\n", "1 2317 [数组实训, 指针实训, C&C++线性表实训, 结构实训]\n", "2 2318 [文件实训, 面向过程编程练习]\n", "3 2319 [C++面向对象构造函数与析构函数, C++面向对象类和对象的创建和使用]\n", "4 2320 [C++面向对象的继承与派生, C++面向对象的多态性与]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_stage_knowledge.head()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "newskg=new_stage_knowledge.explode(\"knowledge\")" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "110" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(newskg)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "newskg['knowledge_id']=0" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stage_idknowledgeknowledge_id
02316控制结构实训0
02316语句实训0
02316函数实训0
02316输入输出0
12317数组实训0
\n", "
" ], "text/plain": [ " stage_id knowledge knowledge_id\n", "0 2316 控制结构实训 0\n", "0 2316 语句实训 0\n", "0 2316 函数实训 0\n", "0 2316 输入输出 0\n", "1 2317 数组实训 0" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "newskg.head()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "newskg.reset_index(inplace=True)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "newskg=newskg[['stage_id', 'knowledge', 'knowledge_id']]" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stage_idknowledgeknowledge_id
02316控制结构实训0
12316语句实训0
22316函数实训0
32316输入输出0
42317数组实训0
\n", "
" ], "text/plain": [ " stage_id knowledge knowledge_id\n", "0 2316 控制结构实训 0\n", "1 2316 语句实训 0\n", "2 2316 函数实训 0\n", "3 2316 输入输出 0\n", "4 2317 数组实训 0" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "newskg.head()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_32602/4216485099.py:2: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " newskg['knowledge_id'][i]=i\n" ] } ], "source": [ "for i in range(len(newskg)):\n", " newskg['knowledge_id'][i]=i" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stage_idknowledgeknowledge_id
02316控制结构实训0
12316语句实训1
22316函数实训2
32316输入输出3
42317数组实训4
\n", "
" ], "text/plain": [ " stage_id knowledge knowledge_id\n", "0 2316 控制结构实训 0\n", "1 2316 语句实训 1\n", "2 2316 函数实训 2\n", "3 2316 输入输出 3\n", "4 2317 数组实训 4" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "newskg.head()" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stage_iditem_iditem_typeitem_nameknowledge
0231660ShixunC&C++基本输入输出输入输出
1231667ShixunC&C++表达式语句实训语句实训
2231671ShixunC&C++控制结构实训控制结构实训
3231676Shixun函数实训函数实训
4231780ShixunC&C++数组实训数组实训
\n", "
" ], "text/plain": [ " stage_id item_id item_type item_name knowledge\n", "0 2316 60 Shixun C&C++基本输入输出 输入输出\n", "1 2316 67 Shixun C&C++表达式语句实训 语句实训\n", "2 2316 71 Shixun C&C++控制结构实训 控制结构实训\n", "3 2316 76 Shixun 函数实训 函数实训\n", "4 2317 80 Shixun C&C++数组实训 数组实训" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stage_shixun.head()" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "stage_shixun=pd.merge(stage_shixun,newskg,on=\"stage_id\")" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stage_iditem_iditem_typeitem_nameknowledge_xknowledge_yknowledge_id
0231660ShixunC&C++基本输入输出输入输出控制结构实训0
1231660ShixunC&C++基本输入输出输入输出语句实训1
2231660ShixunC&C++基本输入输出输入输出函数实训2
3231660ShixunC&C++基本输入输出输入输出输入输出3
4231667ShixunC&C++表达式语句实训语句实训控制结构实训0
\n", "
" ], "text/plain": [ " stage_id item_id item_type item_name knowledge_x knowledge_y \\\n", "0 2316 60 Shixun C&C++基本输入输出 输入输出 控制结构实训 \n", "1 2316 60 Shixun C&C++基本输入输出 输入输出 语句实训 \n", "2 2316 60 Shixun C&C++基本输入输出 输入输出 函数实训 \n", "3 2316 60 Shixun C&C++基本输入输出 输入输出 输入输出 \n", "4 2316 67 Shixun C&C++表达式语句实训 语句实训 控制结构实训 \n", "\n", " knowledge_id \n", "0 0 \n", "1 1 \n", "2 2 \n", "3 3 \n", "4 0 " ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stage_shixun.head()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "stage_shixun=stage_shixun.query('knowledge_x == knowledge_y')" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "stage_shixun = stage_shixun[['stage_id',\"item_id\",'item_type','item_name','knowledge_x','knowledge_id']]" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stage_iditem_iditem_typeitem_nameknowledge_xknowledge_id
3231660ShixunC&C++基本输入输出输入输出3
5231667ShixunC&C++表达式语句实训语句实训1
8231671ShixunC&C++控制结构实训控制结构实训0
14231676Shixun函数实训函数实训2
16231780ShixunC&C++数组实训数组实训4
21231799ShixunC&C++指针实训指针实训5
272317100ShixunC&C++结构实训结构实训7
302317101ShixunC&C++线性表实训C&C++线性表实训6
3323191381ShixunC++ 面向对象 - 类和对象的创建和使用C++面向对象类和对象的创建和使用11
3423191423ShixunC++ 面向对象 - 构造函数与析构函数C++面向对象构造函数与析构函数10
3623201454ShixunC++ 面向对象 - 类的继承与派生C++面向对象的继承与派生12
3923201481ShixunC++ 面向对象 - 类的多态性与虚函数C++面向对象的多态性与13
4324288431Shixun软件危机软件危机19
4924288464Shixun走进软件工程走进软件工程20
5224288561ShixunSWEBOK 与软件过程SWEBOK软件过程18
5524288562Shixun主要的软件过程模型的软件过程模型16
6124288563Shixun敏捷软件过程软件过程17
6524298710Shixun可行性分析的任务及报告可行性分析的任务及报告21
6625448426Shixun软件设计软件设计22
6725458699Shixun结构化分析与设计结构化分析与设计23
\n", "
" ], "text/plain": [ " stage_id item_id item_type item_name knowledge_x \\\n", "3 2316 60 Shixun C&C++基本输入输出 输入输出 \n", "5 2316 67 Shixun C&C++表达式语句实训 语句实训 \n", "8 2316 71 Shixun C&C++控制结构实训 控制结构实训 \n", "14 2316 76 Shixun 函数实训 函数实训 \n", "16 2317 80 Shixun C&C++数组实训 数组实训 \n", "21 2317 99 Shixun C&C++指针实训 指针实训 \n", "27 2317 100 Shixun C&C++结构实训 结构实训 \n", "30 2317 101 Shixun C&C++线性表实训 C&C++线性表实训 \n", "33 2319 1381 Shixun C++ 面向对象 - 类和对象的创建和使用 C++面向对象类和对象的创建和使用 \n", "34 2319 1423 Shixun C++ 面向对象 - 构造函数与析构函数 C++面向对象构造函数与析构函数 \n", "36 2320 1454 Shixun C++ 面向对象 - 类的继承与派生 C++面向对象的继承与派生 \n", "39 2320 1481 Shixun C++ 面向对象 - 类的多态性与虚函数 C++面向对象的多态性与 \n", "43 2428 8431 Shixun 软件危机 软件危机 \n", "49 2428 8464 Shixun 走进软件工程 走进软件工程 \n", "52 2428 8561 Shixun SWEBOK 与软件过程 SWEBOK软件过程 \n", "55 2428 8562 Shixun 主要的软件过程模型 的软件过程模型 \n", "61 2428 8563 Shixun 敏捷软件过程 软件过程 \n", "65 2429 8710 Shixun 可行性分析的任务及报告 可行性分析的任务及报告 \n", "66 2544 8426 Shixun 软件设计 软件设计 \n", "67 2545 8699 Shixun 结构化分析与设计 结构化分析与设计 \n", "\n", " knowledge_id \n", "3 3 \n", "5 1 \n", "8 0 \n", "14 2 \n", "16 4 \n", "21 5 \n", "27 7 \n", "30 6 \n", "33 11 \n", "34 10 \n", "36 12 \n", "39 13 \n", "43 19 \n", "49 20 \n", "52 18 \n", "55 16 \n", "61 17 \n", "65 21 \n", "66 22 \n", "67 23 " ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stage_shixun.head(20)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "stage_shixun.rename(columns={'knowledge_x':'knowledge'},inplace=True)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stage_iditem_iditem_typeitem_nameknowledgeknowledge_id
3231660ShixunC&C++基本输入输出输入输出3
5231667ShixunC&C++表达式语句实训语句实训1
8231671ShixunC&C++控制结构实训控制结构实训0
14231676Shixun函数实训函数实训2
16231780ShixunC&C++数组实训数组实训4
\n", "
" ], "text/plain": [ " stage_id item_id item_type item_name knowledge knowledge_id\n", "3 2316 60 Shixun C&C++基本输入输出 输入输出 3\n", "5 2316 67 Shixun C&C++表达式语句实训 语句实训 1\n", "8 2316 71 Shixun C&C++控制结构实训 控制结构实训 0\n", "14 2316 76 Shixun 函数实训 函数实训 2\n", "16 2317 80 Shixun C&C++数组实训 数组实训 4" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stage_shixun.head()" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "kg_shixun = stage_shixun[[\"knowledge_id\",\"item_id\"]]" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "kg_shixun=kg_shixun.drop_duplicates()" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "kg_shixun.rename(columns={'item_id':'shixun_id'},inplace=True)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "kg_shixun['relation']='知识点实训'" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "kg_shixun.to_csv('knowledge_forest_data/sample/relation/kg_shixun.csv',sep='\\t',index=False)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stage_iditem_iditem_typeitem_nameknowledgeknowledge_id
3231660ShixunC&C++基本输入输出输入输出3
5231667ShixunC&C++表达式语句实训语句实训1
8231671ShixunC&C++控制结构实训控制结构实训0
14231676Shixun函数实训函数实训2
16231780ShixunC&C++数组实训数组实训4
\n", "
" ], "text/plain": [ " stage_id item_id item_type item_name knowledge knowledge_id\n", "3 2316 60 Shixun C&C++基本输入输出 输入输出 3\n", "5 2316 67 Shixun C&C++表达式语句实训 语句实训 1\n", "8 2316 71 Shixun C&C++控制结构实训 控制结构实训 0\n", "14 2316 76 Shixun 函数实训 函数实训 2\n", "16 2317 80 Shixun C&C++数组实训 数组实训 4" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stage_shixun.head()" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stage_iditem_iditem_typeitem_nameknowledge
0135812245589Attachment数据结构与算法_02线性表.pptx数据结构与算法线性表
1135822245627Attachment数据结构与算法_03栈和队列.pptx数据结构与算法和队列
2135832245691Attachment数据结构与算法_04树与二叉树.pptx数据结构与算法树二叉树
3135832245690Attachment数据结构与算法_05高级字典结构.pptx数据结构与算法字典结构
4135862245777Attachment数据结构与算法_08排序.pptx数据结构与算法排序
\n", "
" ], "text/plain": [ " stage_id item_id item_type item_name knowledge\n", "0 13581 2245589 Attachment 数据结构与算法_02线性表.pptx 数据结构与算法线性表\n", "1 13582 2245627 Attachment 数据结构与算法_03栈和队列.pptx 数据结构与算法和队列\n", "2 13583 2245691 Attachment 数据结构与算法_04树与二叉树.pptx 数据结构与算法树二叉树\n", "3 13583 2245690 Attachment 数据结构与算法_05高级字典结构.pptx 数据结构与算法字典结构\n", "4 13586 2245777 Attachment 数据结构与算法_08排序.pptx 数据结构与算法排序" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stage_att.head()" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "stage_kg=stage_shixun" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stage_iditem_iditem_typeitem_nameknowledgeknowledge_id
5251358654462Shixun选择排序选择排序104
5301358636229Shixun查找查找106
5311358716175Shixun图——课上课后练图课上课后练107
5321358823187Shixun散列——实验及提升训练散列实验及提升训练108
5351358827244Shixun集合——课上练集合课练109
\n", "
" ], "text/plain": [ " stage_id item_id item_type item_name knowledge knowledge_id\n", "525 13586 54462 Shixun 选择排序 选择排序 104\n", "530 13586 36229 Shixun 查找 查找 106\n", "531 13587 16175 Shixun 图——课上课后练 图课上课后练 107\n", "532 13588 23187 Shixun 散列——实验及提升训练 散列实验及提升训练 108\n", "535 13588 27244 Shixun 集合——课上练 集合课练 109" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stage_kg.tail()" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stage_iditem_iditem_typeitem_nameknowledgeknowledge_id
5251358654462Shixun选择排序选择排序104
5301358636229Shixun查找查找106
5311358716175Shixun图——课上课后练图课上课后练107
5321358823187Shixun散列——实验及提升训练散列实验及提升训练108
5351358827244Shixun集合——课上练集合课练109
\n", "
" ], "text/plain": [ " stage_id item_id item_type item_name knowledge knowledge_id\n", "525 13586 54462 Shixun 选择排序 选择排序 104\n", "530 13586 36229 Shixun 查找 查找 106\n", "531 13587 16175 Shixun 图——课上课后练 图课上课后练 107\n", "532 13588 23187 Shixun 散列——实验及提升训练 散列实验及提升训练 108\n", "535 13588 27244 Shixun 集合——课上练 集合课练 109" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stage_kg.tail()" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stage_iditem_iditem_typeitem_nameknowledge
0135812245589Attachment数据结构与算法_02线性表.pptx数据结构与算法线性表
1135822245627Attachment数据结构与算法_03栈和队列.pptx数据结构与算法和队列
2135832245691Attachment数据结构与算法_04树与二叉树.pptx数据结构与算法树二叉树
3135832245690Attachment数据结构与算法_05高级字典结构.pptx数据结构与算法字典结构
4135862245777Attachment数据结构与算法_08排序.pptx数据结构与算法排序
\n", "
" ], "text/plain": [ " stage_id item_id item_type item_name knowledge\n", "0 13581 2245589 Attachment 数据结构与算法_02线性表.pptx 数据结构与算法线性表\n", "1 13582 2245627 Attachment 数据结构与算法_03栈和队列.pptx 数据结构与算法和队列\n", "2 13583 2245691 Attachment 数据结构与算法_04树与二叉树.pptx 数据结构与算法树二叉树\n", "3 13583 2245690 Attachment 数据结构与算法_05高级字典结构.pptx 数据结构与算法字典结构\n", "4 13586 2245777 Attachment 数据结构与算法_08排序.pptx 数据结构与算法排序" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stage_att.head()" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "stage_att['knowledge_id']=0" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stage_iditem_iditem_typeitem_nameknowledgeknowledge_id
0135812245589Attachment数据结构与算法_02线性表.pptx数据结构与算法线性表0
1135822245627Attachment数据结构与算法_03栈和队列.pptx数据结构与算法和队列0
2135832245691Attachment数据结构与算法_04树与二叉树.pptx数据结构与算法树二叉树0
3135832245690Attachment数据结构与算法_05高级字典结构.pptx数据结构与算法字典结构0
4135862245777Attachment数据结构与算法_08排序.pptx数据结构与算法排序0
\n", "
" ], "text/plain": [ " stage_id item_id item_type item_name knowledge \\\n", "0 13581 2245589 Attachment 数据结构与算法_02线性表.pptx 数据结构与算法线性表 \n", "1 13582 2245627 Attachment 数据结构与算法_03栈和队列.pptx 数据结构与算法和队列 \n", "2 13583 2245691 Attachment 数据结构与算法_04树与二叉树.pptx 数据结构与算法树二叉树 \n", "3 13583 2245690 Attachment 数据结构与算法_05高级字典结构.pptx 数据结构与算法字典结构 \n", "4 13586 2245777 Attachment 数据结构与算法_08排序.pptx 数据结构与算法排序 \n", "\n", " knowledge_id \n", "0 0 \n", "1 0 \n", "2 0 \n", "3 0 \n", "4 0 " ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stage_att.head()" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_32602/3546850827.py:9: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " stage_att['knowledge_id'][i] = len(stage_kg)+1\n" ] } ], "source": [ "for i in range(len(stage_att)):\n", " skg=stage_kg[stage_kg[\"stage_id\"]==stage_att['stage_id'][i]]\n", " skg = skg[skg['knowledge'].isin([stage_att['knowledge'][i]])]\n", " try:\n", " if len(skg) > 0:\n", " stage_att['knowledge_id'][i] = int(skg['knowledge_id'])\n", " stage_kg=stage_kg.append(stage_att[i:i+1],ignore_index=True)\n", " else:\n", " stage_att['knowledge_id'][i] = len(stage_kg)+1\n", " stage_kg=stage_kg.append(stage_att[i:i+1],ignore_index=True)\n", " except:\n", " stage_att['knowledge_id'][i] = int(skg[:1]['knowledge_id'])\n", " stage_kg=stage_kg.append(stage_att[i:i+1],ignore_index=True)" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stage_iditem_iditem_typeitem_nameknowledgeknowledge_id
0135812245589Attachment数据结构与算法_02线性表.pptx数据结构与算法线性表113
1135822245627Attachment数据结构与算法_03栈和队列.pptx数据结构与算法和队列114
2135832245691Attachment数据结构与算法_04树与二叉树.pptx数据结构与算法树二叉树115
3135832245690Attachment数据结构与算法_05高级字典结构.pptx数据结构与算法字典结构116
4135862245777Attachment数据结构与算法_08排序.pptx数据结构与算法排序117
\n", "
" ], "text/plain": [ " stage_id item_id item_type item_name knowledge \\\n", "0 13581 2245589 Attachment 数据结构与算法_02线性表.pptx 数据结构与算法线性表 \n", "1 13582 2245627 Attachment 数据结构与算法_03栈和队列.pptx 数据结构与算法和队列 \n", "2 13583 2245691 Attachment 数据结构与算法_04树与二叉树.pptx 数据结构与算法树二叉树 \n", "3 13583 2245690 Attachment 数据结构与算法_05高级字典结构.pptx 数据结构与算法字典结构 \n", "4 13586 2245777 Attachment 数据结构与算法_08排序.pptx 数据结构与算法排序 \n", "\n", " knowledge_id \n", "0 113 \n", "1 114 \n", "2 115 \n", "3 116 \n", "4 117 " ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stage_att.head()" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "kg_att=stage_att[[\"knowledge_id\",\"item_id\"]]" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "kg_att=kg_att.drop_duplicates()" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "kg_att.rename(columns={'item_id':'attachment_id'},inplace=True)" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "kg_att['relation']='知识点课件'" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
knowledge_idattachment_idrelation
01132245589知识点课件
11142245627知识点课件
21152245691知识点课件
31162245690知识点课件
41172245777知识点课件
\n", "
" ], "text/plain": [ " knowledge_id attachment_id relation\n", "0 113 2245589 知识点课件\n", "1 114 2245627 知识点课件\n", "2 115 2245691 知识点课件\n", "3 116 2245690 知识点课件\n", "4 117 2245777 知识点课件" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "kg_att.head() " ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "kg_att.to_csv('knowledge_forest_data/sample/relation/kg_att.csv',sep='\\t',index=False)" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stage_iditem_iditem_typeitem_nameknowledgeknowledge_id
114135832245691Attachment数据结构与算法_04树与二叉树.pptx数据结构与算法树二叉树115
115135832245690Attachment数据结构与算法_05高级字典结构.pptx数据结构与算法字典结构116
116135862245777Attachment数据结构与算法_08排序.pptx数据结构与算法排序117
117135872245802Attachment数据结构与算法_06图.pptx数据结构与算法图118
118135882245824Attachment数据结构与算法_07集合和字典.pptx数据结构与算法集合和字典119
\n", "
" ], "text/plain": [ " stage_id item_id item_type item_name knowledge \\\n", "114 13583 2245691 Attachment 数据结构与算法_04树与二叉树.pptx 数据结构与算法树二叉树 \n", "115 13583 2245690 Attachment 数据结构与算法_05高级字典结构.pptx 数据结构与算法字典结构 \n", "116 13586 2245777 Attachment 数据结构与算法_08排序.pptx 数据结构与算法排序 \n", "117 13587 2245802 Attachment 数据结构与算法_06图.pptx 数据结构与算法图 \n", "118 13588 2245824 Attachment 数据结构与算法_07集合和字典.pptx 数据结构与算法集合和字典 \n", "\n", " knowledge_id \n", "114 115 \n", "115 116 \n", "116 117 \n", "117 118 \n", "118 119 " ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stage_kg.tail()" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stage_iditem_iditem_typeitem_nameknowledge
024281015VideoItem软件工程引言和软件的概念软件工程引言和软件的概念
124281016VideoItem软件的基本概念软件的概念
224281017VideoItem软件过程软件过程
324281018VideoItemRUP简介RUP简介
424281019VideoItem敏捷的过程的过程
\n", "
" ], "text/plain": [ " stage_id item_id item_type item_name knowledge\n", "0 2428 1015 VideoItem 软件工程引言和软件的概念 软件工程引言和软件的概念\n", "1 2428 1016 VideoItem 软件的基本概念 软件的概念\n", "2 2428 1017 VideoItem 软件过程 软件过程\n", "3 2428 1018 VideoItem RUP简介 RUP简介\n", "4 2428 1019 VideoItem 敏捷的过程 的过程" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stage_video.head()" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [], "source": [ "stage_video['knowledge_id']=0" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stage_iditem_iditem_typeitem_nameknowledgeknowledge_id
024281015VideoItem软件工程引言和软件的概念软件工程引言和软件的概念0
124281016VideoItem软件的基本概念软件的概念0
224281017VideoItem软件过程软件过程0
324281018VideoItemRUP简介RUP简介0
424281019VideoItem敏捷的过程的过程0
\n", "
" ], "text/plain": [ " stage_id item_id item_type item_name knowledge knowledge_id\n", "0 2428 1015 VideoItem 软件工程引言和软件的概念 软件工程引言和软件的概念 0\n", "1 2428 1016 VideoItem 软件的基本概念 软件的概念 0\n", "2 2428 1017 VideoItem 软件过程 软件过程 0\n", "3 2428 1018 VideoItem RUP简介 RUP简介 0\n", "4 2428 1019 VideoItem 敏捷的过程 的过程 0" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stage_video.head()" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_32602/3125594964.py:9: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " stage_video['knowledge_id'][i] = len(stage_kg)+1\n", "/tmp/ipykernel_32602/3125594964.py:6: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " stage_video['knowledge_id'][i] = int(skg['knowledge_id'])\n" ] } ], "source": [ "for i in range(len(stage_video)):\n", " skg = stage_kg[stage_kg[\"stage_id\"]==stage_video['stage_id'][i]]\n", " skg = skg[skg['knowledge'].isin([stage_video['knowledge'][i]])]\n", " try:\n", " if len(skg) > 0:\n", " stage_video['knowledge_id'][i] = int(skg['knowledge_id'])\n", " stage_kg=stage_kg.append(stage_video[i:i+1],ignore_index=True)\n", " else:\n", " stage_video['knowledge_id'][i] = len(stage_kg)+1\n", " stage_kg=stage_kg.append(stage_video[i:i+1],ignore_index=True)\n", " except:\n", " stage_video['knowledge_id'][i] = int(skg[:1]['knowledge_id'])\n", " stage_kg=stage_kg.append(stage_video[i:i+1],ignore_index=True)" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stage_iditem_iditem_typeitem_nameknowledgeknowledge_id
024281015VideoItem软件工程引言和软件的概念软件工程引言和软件的概念120
124281016VideoItem软件的基本概念软件的概念121
224281017VideoItem软件过程软件过程17
324281018VideoItemRUP简介RUP简介123
424281019VideoItem敏捷的过程的过程124
\n", "
" ], "text/plain": [ " stage_id item_id item_type item_name knowledge knowledge_id\n", "0 2428 1015 VideoItem 软件工程引言和软件的概念 软件工程引言和软件的概念 120\n", "1 2428 1016 VideoItem 软件的基本概念 软件的概念 121\n", "2 2428 1017 VideoItem 软件过程 软件过程 17\n", "3 2428 1018 VideoItem RUP简介 RUP简介 123\n", "4 2428 1019 VideoItem 敏捷的过程 的过程 124" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stage_video.head()" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [], "source": [ "kg_video=stage_video[[\"knowledge_id\",\"item_id\"]]" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [], "source": [ "kg_video = kg_video.drop_duplicates()" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [], "source": [ "kg_video.rename(columns={'item_id':'video_item_id'},inplace=True)" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [], "source": [ "kg_video['relation']='知识点视频'" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
knowledge_idvideo_item_idrelation
01201015知识点视频
11211016知识点视频
2171017知识点视频
31231018知识点视频
41241019知识点视频
\n", "
" ], "text/plain": [ " knowledge_id video_item_id relation\n", "0 120 1015 知识点视频\n", "1 121 1016 知识点视频\n", "2 17 1017 知识点视频\n", "3 123 1018 知识点视频\n", "4 124 1019 知识点视频" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "kg_video.head()" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [], "source": [ "kg_video.to_csv('knowledge_forest_data/sample/relation/kg_video.csv',sep='\\t',index=False)" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stage_iditem_iditem_typeitem_nameknowledgeknowledge_id
0231660ShixunC&C++基本输入输出输入输出3
1231667ShixunC&C++表达式语句实训语句实训1
2231671ShixunC&C++控制结构实训控制结构实训0
3231676Shixun函数实训函数实训2
4231780ShixunC&C++数组实训数组实训4
\n", "
" ], "text/plain": [ " stage_id item_id item_type item_name knowledge knowledge_id\n", "0 2316 60 Shixun C&C++基本输入输出 输入输出 3\n", "1 2316 67 Shixun C&C++表达式语句实训 语句实训 1\n", "2 2316 71 Shixun C&C++控制结构实训 控制结构实训 0\n", "3 2316 76 Shixun 函数实训 函数实训 2\n", "4 2317 80 Shixun C&C++数组实训 数组实训 4" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stage_kg.head()" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [], "source": [ "knowledge=stage_kg[['knowledge_id','knowledge']]" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [], "source": [ "knowledge = knowledge.drop_duplicates()" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [], "source": [ "knowledge.to_csv('knowledge_forest_data/sample/entity/knowledge.csv',sep='\\t',index=False)" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [], "source": [ "stage_kg = stage_kg[['stage_id','knowledge_id']]" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [], "source": [ "stage_kg = stage_kg.drop_duplicates()" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [], "source": [ "stage_kg['relation'] = '章节知识点'" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [], "source": [ "stage_kg.to_csv('knowledge_forest_data/sample/relation/stage_kg.csv',sep='\\t',index=False)" ] } ], "metadata": { "kernelspec": { "display_name": "mooc", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.2" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }