{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"subjects = pd.read_csv(\"knowledge_forest_data/sample/entity/subjects.csv\",sep='\\t')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"subjects['link']=''"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_31398/2253921258.py:2: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" subjects['link'][i] = 'https://www.educoder.net/paths/'+str(subjects['subject_id'][i])\n"
]
}
],
"source": [
"for i in range(len(subjects)):\n",
" subjects['link'][i] = 'https://www.educoder.net/paths/'+str(subjects['subject_id'][i])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" subject_id | \n",
" subject_name | \n",
" description | \n",
" visits | \n",
" created_at | \n",
" updated_at | \n",
" learning_notes | \n",
" stages_count | \n",
" stage_shixuns_count | \n",
" shixuns_count | \n",
" excellent | \n",
" student_count | \n",
" participant_count | \n",
" link | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 863 | \n",
" C++程序设计 | \n",
" C 和 C++ 是历史最悠久、最受欢迎的程序设计语言。C++ 是 C 语言的面向对象扩展,是... | \n",
" 35156 | \n",
" 2020-02-03 15:10:32 | \n",
" 2023-01-04 17:36:28 | \n",
" 本课程既适合 C&C++ 的初学者,也适合 C&C++ 程序员参考和巩固。高校学生、软件开发... | \n",
" 6 | \n",
" 0 | \n",
" 16 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" https://www.educoder.net/paths/863 | \n",
"
\n",
" \n",
" 1 | \n",
" 952 | \n",
" 软件工程 | \n",
" 随着计算机应用需求的不断增长,软件的规模也越来越大,然而软件开发的生产率远远跟不上计算机应用... | \n",
" 19470 | \n",
" 2020-02-10 19:11:19 | \n",
" 2023-06-25 10:52:03 | \n",
" **适用对象:**\\n本课程作为软件工程方向的理论学习与实践性课程,主要介绍软件工程的概念、... | \n",
" 12 | \n",
" 0 | \n",
" 61 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" https://www.educoder.net/paths/952 | \n",
"
\n",
" \n",
" 2 | \n",
" 1792 | \n",
" 大学计算机基础 —— 计算思维 | \n",
" 人要成功融入社会所必备的思维能力,是由其所处时代能够获得的工具决定的。计算机是信息社会的必备... | \n",
" 279743 | \n",
" 2020-05-25 09:54:16 | \n",
" 2023-07-27 09:30:28 | \n",
" \\n本实践课程适用于作为大学一年级《大学计算机基础》或同类课程的线上配套实验,也适合于计算机... | \n",
" 9 | \n",
" 0 | \n",
" 42 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" https://www.educoder.net/paths/1792 | \n",
"
\n",
" \n",
" 3 | \n",
" 2567 | \n",
" 数据结构与算法实践课程 | \n",
" 本课程是《数据结构》的课内实验课程,共提供了十三个实训作业,每个实训作业设置2-10道... | \n",
" 5089 | \n",
" 2020-12-11 16:32:11 | \n",
" 2023-03-14 17:48:18 | \n",
" 1.使用C和C++编程均可。\\n2.个人独立完成,代码提交后经过查重检测才能得到分数。 | \n",
" 7 | \n",
" 0 | \n",
" 13 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" https://www.educoder.net/paths/2567 | \n",
"
\n",
" \n",
" 4 | \n",
" 4642 | \n",
" 数据结构与算法 | \n",
" 本课程是计算机类专业的专业基础必修课。课程系统地介绍了基本数据结构知识、算法设计与分析方法,... | \n",
" 25164 | \n",
" 2021-11-02 14:17:03 | \n",
" 2023-07-27 10:14:44 | \n",
" 1.理解计算机内部数据对象的表示和特性,理解线性表、树、图等常见数据逻辑结构、存储结构表示及... | \n",
" 6 | \n",
" 0 | \n",
" 83 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" https://www.educoder.net/paths/4642 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" subject_id subject_name \\\n",
"0 863 C++程序设计 \n",
"1 952 软件工程 \n",
"2 1792 大学计算机基础 —— 计算思维 \n",
"3 2567 数据结构与算法实践课程 \n",
"4 4642 数据结构与算法 \n",
"\n",
" description visits \\\n",
"0 C 和 C++ 是历史最悠久、最受欢迎的程序设计语言。C++ 是 C 语言的面向对象扩展,是... 35156 \n",
"1 随着计算机应用需求的不断增长,软件的规模也越来越大,然而软件开发的生产率远远跟不上计算机应用... 19470 \n",
"2 人要成功融入社会所必备的思维能力,是由其所处时代能够获得的工具决定的。计算机是信息社会的必备... 279743 \n",
"3 本课程是《数据结构》的课内实验课程,共提供了十三个实训作业,每个实训作业设置2-10道... 5089 \n",
"4 本课程是计算机类专业的专业基础必修课。课程系统地介绍了基本数据结构知识、算法设计与分析方法,... 25164 \n",
"\n",
" created_at updated_at \\\n",
"0 2020-02-03 15:10:32 2023-01-04 17:36:28 \n",
"1 2020-02-10 19:11:19 2023-06-25 10:52:03 \n",
"2 2020-05-25 09:54:16 2023-07-27 09:30:28 \n",
"3 2020-12-11 16:32:11 2023-03-14 17:48:18 \n",
"4 2021-11-02 14:17:03 2023-07-27 10:14:44 \n",
"\n",
" learning_notes stages_count \\\n",
"0 本课程既适合 C&C++ 的初学者,也适合 C&C++ 程序员参考和巩固。高校学生、软件开发... 6 \n",
"1 **适用对象:**\\n本课程作为软件工程方向的理论学习与实践性课程,主要介绍软件工程的概念、... 12 \n",
"2 \\n本实践课程适用于作为大学一年级《大学计算机基础》或同类课程的线上配套实验,也适合于计算机... 9 \n",
"3 1.使用C和C++编程均可。\\n2.个人独立完成,代码提交后经过查重检测才能得到分数。 7 \n",
"4 1.理解计算机内部数据对象的表示和特性,理解线性表、树、图等常见数据逻辑结构、存储结构表示及... 6 \n",
"\n",
" stage_shixuns_count shixuns_count excellent student_count \\\n",
"0 0 16 0 0 \n",
"1 0 61 0 0 \n",
"2 0 42 1 0 \n",
"3 0 13 0 0 \n",
"4 0 83 1 0 \n",
"\n",
" participant_count link \n",
"0 0 https://www.educoder.net/paths/863 \n",
"1 0 https://www.educoder.net/paths/952 \n",
"2 0 https://www.educoder.net/paths/1792 \n",
"3 0 https://www.educoder.net/paths/2567 \n",
"4 0 https://www.educoder.net/paths/4642 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"subjects.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"subjects.to_csv(\"knowledge_forest_data/sample/entity/subjects.csv\",sep='\\t',index=False)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"videos = pd.read_csv(\"knowledge_forest_data/sample/entity/video_items.csv\",sep='\\t')\n",
"stages = pd.read_csv(\"knowledge_forest_data/sample/relation/stage_video_item.csv\",sep='\\t')"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"videos['link']=''"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" video_item_id | \n",
" video_name | \n",
" description | \n",
" averge_star | \n",
" study_video_items_count | \n",
" link | \n",
" created_at | \n",
" updated_at | \n",
" cover_url | \n",
" file_url | \n",
" play_url | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1015 | \n",
" 软件工程引言和软件的概念 | \n",
" NaN | \n",
" 5.0 | \n",
" 100 | \n",
" | \n",
" 2021-10-13 10:12:09 | \n",
" 2021-10-13 10:12:09 | \n",
" https://video.educoder.net/92adea2926a64d5a839... | \n",
" https://outin-396971199eed11e991a100163e1c7426... | \n",
" https://video.educoder.net/sv/2438bbe3-17c776a... | \n",
"
\n",
" \n",
" 1 | \n",
" 1016 | \n",
" 软件的基本概念 | \n",
" NaN | \n",
" 5.0 | \n",
" 38 | \n",
" | \n",
" 2021-10-13 10:13:24 | \n",
" 2021-10-13 10:13:24 | \n",
" https://video.educoder.net/accb4f6c336e443f9bd... | \n",
" https://outin-396971199eed11e991a100163e1c7426... | \n",
" https://video.educoder.net/sv/7e5e624-17c776cc... | \n",
"
\n",
" \n",
" 2 | \n",
" 1017 | \n",
" 软件过程 | \n",
" NaN | \n",
" 5.0 | \n",
" 36 | \n",
" | \n",
" 2021-10-13 10:13:55 | \n",
" 2021-10-13 10:13:55 | \n",
" https://video.educoder.net/ddb2e6f026474e7186a... | \n",
" https://outin-396971199eed11e991a100163e1c7426... | \n",
" https://video.educoder.net/sv/4818be33-17c776d... | \n",
"
\n",
" \n",
" 3 | \n",
" 1018 | \n",
" RUP简介 | \n",
" NaN | \n",
" 5.0 | \n",
" 28 | \n",
" | \n",
" 2021-10-13 10:14:30 | \n",
" 2021-10-13 10:14:30 | \n",
" https://video.educoder.net/393ab26db5114dd39b3... | \n",
" https://outin-396971199eed11e991a100163e1c7426... | \n",
" https://video.educoder.net/sv/2f93bc38-17c776d... | \n",
"
\n",
" \n",
" 4 | \n",
" 1019 | \n",
" 敏捷的过程 | \n",
" NaN | \n",
" 5.0 | \n",
" 34 | \n",
" | \n",
" 2021-10-13 10:14:56 | \n",
" 2021-10-13 10:14:56 | \n",
" https://video.educoder.net/912575d6bc384a43a0d... | \n",
" https://outin-396971199eed11e991a100163e1c7426... | \n",
" https://video.educoder.net/sv/414bbec2-17c776e... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" video_item_id video_name description averge_star \\\n",
"0 1015 软件工程引言和软件的概念 NaN 5.0 \n",
"1 1016 软件的基本概念 NaN 5.0 \n",
"2 1017 软件过程 NaN 5.0 \n",
"3 1018 RUP简介 NaN 5.0 \n",
"4 1019 敏捷的过程 NaN 5.0 \n",
"\n",
" study_video_items_count link created_at updated_at \\\n",
"0 100 2021-10-13 10:12:09 2021-10-13 10:12:09 \n",
"1 38 2021-10-13 10:13:24 2021-10-13 10:13:24 \n",
"2 36 2021-10-13 10:13:55 2021-10-13 10:13:55 \n",
"3 28 2021-10-13 10:14:30 2021-10-13 10:14:30 \n",
"4 34 2021-10-13 10:14:56 2021-10-13 10:14:56 \n",
"\n",
" cover_url \\\n",
"0 https://video.educoder.net/92adea2926a64d5a839... \n",
"1 https://video.educoder.net/accb4f6c336e443f9bd... \n",
"2 https://video.educoder.net/ddb2e6f026474e7186a... \n",
"3 https://video.educoder.net/393ab26db5114dd39b3... \n",
"4 https://video.educoder.net/912575d6bc384a43a0d... \n",
"\n",
" file_url \\\n",
"0 https://outin-396971199eed11e991a100163e1c7426... \n",
"1 https://outin-396971199eed11e991a100163e1c7426... \n",
"2 https://outin-396971199eed11e991a100163e1c7426... \n",
"3 https://outin-396971199eed11e991a100163e1c7426... \n",
"4 https://outin-396971199eed11e991a100163e1c7426... \n",
"\n",
" play_url \n",
"0 https://video.educoder.net/sv/2438bbe3-17c776a... \n",
"1 https://video.educoder.net/sv/7e5e624-17c776cc... \n",
"2 https://video.educoder.net/sv/4818be33-17c776d... \n",
"3 https://video.educoder.net/sv/2f93bc38-17c776d... \n",
"4 https://video.educoder.net/sv/414bbec2-17c776e... "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"videos.head()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"stage_video=pd.merge(videos,stages,on='video_item_id')"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" video_item_id | \n",
" video_name | \n",
" description | \n",
" averge_star | \n",
" study_video_items_count | \n",
" link | \n",
" created_at_x | \n",
" updated_at_x | \n",
" cover_url | \n",
" file_url | \n",
" play_url | \n",
" stage_item_id | \n",
" subject_id | \n",
" stage_id | \n",
" item_type | \n",
" created_at_y | \n",
" updated_at_y | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1015 | \n",
" 软件工程引言和软件的概念 | \n",
" NaN | \n",
" 5.0 | \n",
" 100 | \n",
" | \n",
" 2021-10-13 10:12:09 | \n",
" 2021-10-13 10:12:09 | \n",
" https://video.educoder.net/92adea2926a64d5a839... | \n",
" https://outin-396971199eed11e991a100163e1c7426... | \n",
" https://video.educoder.net/sv/2438bbe3-17c776a... | \n",
" 78896 | \n",
" 952 | \n",
" 2428 | \n",
" VideoItem | \n",
" 2021-10-13 10:12:09 | \n",
" 2021-10-13 10:12:09 | \n",
"
\n",
" \n",
" 1 | \n",
" 1016 | \n",
" 软件的基本概念 | \n",
" NaN | \n",
" 5.0 | \n",
" 38 | \n",
" | \n",
" 2021-10-13 10:13:24 | \n",
" 2021-10-13 10:13:24 | \n",
" https://video.educoder.net/accb4f6c336e443f9bd... | \n",
" https://outin-396971199eed11e991a100163e1c7426... | \n",
" https://video.educoder.net/sv/7e5e624-17c776cc... | \n",
" 78898 | \n",
" 952 | \n",
" 2428 | \n",
" VideoItem | \n",
" 2021-10-13 10:13:24 | \n",
" 2021-10-13 10:13:24 | \n",
"
\n",
" \n",
" 2 | \n",
" 1017 | \n",
" 软件过程 | \n",
" NaN | \n",
" 5.0 | \n",
" 36 | \n",
" | \n",
" 2021-10-13 10:13:55 | \n",
" 2021-10-13 10:13:55 | \n",
" https://video.educoder.net/ddb2e6f026474e7186a... | \n",
" https://outin-396971199eed11e991a100163e1c7426... | \n",
" https://video.educoder.net/sv/4818be33-17c776d... | \n",
" 78899 | \n",
" 952 | \n",
" 2428 | \n",
" VideoItem | \n",
" 2021-10-13 10:13:55 | \n",
" 2021-10-13 10:13:55 | \n",
"
\n",
" \n",
" 3 | \n",
" 1018 | \n",
" RUP简介 | \n",
" NaN | \n",
" 5.0 | \n",
" 28 | \n",
" | \n",
" 2021-10-13 10:14:30 | \n",
" 2021-10-13 10:14:30 | \n",
" https://video.educoder.net/393ab26db5114dd39b3... | \n",
" https://outin-396971199eed11e991a100163e1c7426... | \n",
" https://video.educoder.net/sv/2f93bc38-17c776d... | \n",
" 78900 | \n",
" 952 | \n",
" 2428 | \n",
" VideoItem | \n",
" 2021-10-13 10:14:30 | \n",
" 2021-10-13 10:14:30 | \n",
"
\n",
" \n",
" 4 | \n",
" 1019 | \n",
" 敏捷的过程 | \n",
" NaN | \n",
" 5.0 | \n",
" 34 | \n",
" | \n",
" 2021-10-13 10:14:56 | \n",
" 2021-10-13 10:14:56 | \n",
" https://video.educoder.net/912575d6bc384a43a0d... | \n",
" https://outin-396971199eed11e991a100163e1c7426... | \n",
" https://video.educoder.net/sv/414bbec2-17c776e... | \n",
" 78901 | \n",
" 952 | \n",
" 2428 | \n",
" VideoItem | \n",
" 2021-10-13 10:14:56 | \n",
" 2021-10-13 10:14:56 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" video_item_id video_name description averge_star \\\n",
"0 1015 软件工程引言和软件的概念 NaN 5.0 \n",
"1 1016 软件的基本概念 NaN 5.0 \n",
"2 1017 软件过程 NaN 5.0 \n",
"3 1018 RUP简介 NaN 5.0 \n",
"4 1019 敏捷的过程 NaN 5.0 \n",
"\n",
" study_video_items_count link created_at_x updated_at_x \\\n",
"0 100 2021-10-13 10:12:09 2021-10-13 10:12:09 \n",
"1 38 2021-10-13 10:13:24 2021-10-13 10:13:24 \n",
"2 36 2021-10-13 10:13:55 2021-10-13 10:13:55 \n",
"3 28 2021-10-13 10:14:30 2021-10-13 10:14:30 \n",
"4 34 2021-10-13 10:14:56 2021-10-13 10:14:56 \n",
"\n",
" cover_url \\\n",
"0 https://video.educoder.net/92adea2926a64d5a839... \n",
"1 https://video.educoder.net/accb4f6c336e443f9bd... \n",
"2 https://video.educoder.net/ddb2e6f026474e7186a... \n",
"3 https://video.educoder.net/393ab26db5114dd39b3... \n",
"4 https://video.educoder.net/912575d6bc384a43a0d... \n",
"\n",
" file_url \\\n",
"0 https://outin-396971199eed11e991a100163e1c7426... \n",
"1 https://outin-396971199eed11e991a100163e1c7426... \n",
"2 https://outin-396971199eed11e991a100163e1c7426... \n",
"3 https://outin-396971199eed11e991a100163e1c7426... \n",
"4 https://outin-396971199eed11e991a100163e1c7426... \n",
"\n",
" play_url stage_item_id \\\n",
"0 https://video.educoder.net/sv/2438bbe3-17c776a... 78896 \n",
"1 https://video.educoder.net/sv/7e5e624-17c776cc... 78898 \n",
"2 https://video.educoder.net/sv/4818be33-17c776d... 78899 \n",
"3 https://video.educoder.net/sv/2f93bc38-17c776d... 78900 \n",
"4 https://video.educoder.net/sv/414bbec2-17c776e... 78901 \n",
"\n",
" subject_id stage_id item_type created_at_y updated_at_y \n",
"0 952 2428 VideoItem 2021-10-13 10:12:09 2021-10-13 10:12:09 \n",
"1 952 2428 VideoItem 2021-10-13 10:13:24 2021-10-13 10:13:24 \n",
"2 952 2428 VideoItem 2021-10-13 10:13:55 2021-10-13 10:13:55 \n",
"3 952 2428 VideoItem 2021-10-13 10:14:30 2021-10-13 10:14:30 \n",
"4 952 2428 VideoItem 2021-10-13 10:14:56 2021-10-13 10:14:56 "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stage_video.head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_31398/3011727954.py:2: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" stage_video['link'][i] = 'https://www.educoder.net/video/'+str(stage_video['video_item_id'][i])+'?subject_id='+str(stage_video['subject_id'][i])\n"
]
}
],
"source": [
"for i in range(len(stage_video)):\n",
" stage_video['link'][i] = 'https://www.educoder.net/video/'+str(stage_video['video_item_id'][i])+'?subject_id='+str(stage_video['subject_id'][i])"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"https://www.educoder.net/video/1017?subject_id=952\n"
]
}
],
"source": [
"print(stage_video['link'][2])"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['video_item_id', 'video_name', 'description', 'averge_star',\n",
" 'study_video_items_count', 'link', 'created_at_x', 'updated_at_x',\n",
" 'cover_url', 'file_url', 'play_url', 'stage_item_id', 'subject_id',\n",
" 'stage_id', 'item_type', 'created_at_y', 'updated_at_y'],\n",
" dtype='object')"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stage_video.columns"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"video_items = stage_video[['video_item_id','video_name','description','averge_star','study_video_items_count','link','created_at_x','updated_at_x','cover_url', 'file_url', 'play_url']]"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/root/anaconda3/envs/mooc/lib/python3.9/site-packages/pandas/core/frame.py:5039: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" return super().rename(\n"
]
}
],
"source": [
"video_items.rename(columns={'created_at_x':'created_at','updated_at_x':'updated_at'},inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" video_item_id | \n",
" video_name | \n",
" description | \n",
" averge_star | \n",
" study_video_items_count | \n",
" link | \n",
" created_at | \n",
" updated_at | \n",
" cover_url | \n",
" file_url | \n",
" play_url | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1015 | \n",
" 软件工程引言和软件的概念 | \n",
" NaN | \n",
" 5.0 | \n",
" 100 | \n",
" https://www.educoder.net/video/1015?subject_id... | \n",
" 2021-10-13 10:12:09 | \n",
" 2021-10-13 10:12:09 | \n",
" https://video.educoder.net/92adea2926a64d5a839... | \n",
" https://outin-396971199eed11e991a100163e1c7426... | \n",
" https://video.educoder.net/sv/2438bbe3-17c776a... | \n",
"
\n",
" \n",
" 1 | \n",
" 1016 | \n",
" 软件的基本概念 | \n",
" NaN | \n",
" 5.0 | \n",
" 38 | \n",
" https://www.educoder.net/video/1016?subject_id... | \n",
" 2021-10-13 10:13:24 | \n",
" 2021-10-13 10:13:24 | \n",
" https://video.educoder.net/accb4f6c336e443f9bd... | \n",
" https://outin-396971199eed11e991a100163e1c7426... | \n",
" https://video.educoder.net/sv/7e5e624-17c776cc... | \n",
"
\n",
" \n",
" 2 | \n",
" 1017 | \n",
" 软件过程 | \n",
" NaN | \n",
" 5.0 | \n",
" 36 | \n",
" https://www.educoder.net/video/1017?subject_id... | \n",
" 2021-10-13 10:13:55 | \n",
" 2021-10-13 10:13:55 | \n",
" https://video.educoder.net/ddb2e6f026474e7186a... | \n",
" https://outin-396971199eed11e991a100163e1c7426... | \n",
" https://video.educoder.net/sv/4818be33-17c776d... | \n",
"
\n",
" \n",
" 3 | \n",
" 1018 | \n",
" RUP简介 | \n",
" NaN | \n",
" 5.0 | \n",
" 28 | \n",
" https://www.educoder.net/video/1018?subject_id... | \n",
" 2021-10-13 10:14:30 | \n",
" 2021-10-13 10:14:30 | \n",
" https://video.educoder.net/393ab26db5114dd39b3... | \n",
" https://outin-396971199eed11e991a100163e1c7426... | \n",
" https://video.educoder.net/sv/2f93bc38-17c776d... | \n",
"
\n",
" \n",
" 4 | \n",
" 1019 | \n",
" 敏捷的过程 | \n",
" NaN | \n",
" 5.0 | \n",
" 34 | \n",
" https://www.educoder.net/video/1019?subject_id... | \n",
" 2021-10-13 10:14:56 | \n",
" 2021-10-13 10:14:56 | \n",
" https://video.educoder.net/912575d6bc384a43a0d... | \n",
" https://outin-396971199eed11e991a100163e1c7426... | \n",
" https://video.educoder.net/sv/414bbec2-17c776e... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" video_item_id video_name description averge_star \\\n",
"0 1015 软件工程引言和软件的概念 NaN 5.0 \n",
"1 1016 软件的基本概念 NaN 5.0 \n",
"2 1017 软件过程 NaN 5.0 \n",
"3 1018 RUP简介 NaN 5.0 \n",
"4 1019 敏捷的过程 NaN 5.0 \n",
"\n",
" study_video_items_count link \\\n",
"0 100 https://www.educoder.net/video/1015?subject_id... \n",
"1 38 https://www.educoder.net/video/1016?subject_id... \n",
"2 36 https://www.educoder.net/video/1017?subject_id... \n",
"3 28 https://www.educoder.net/video/1018?subject_id... \n",
"4 34 https://www.educoder.net/video/1019?subject_id... \n",
"\n",
" created_at updated_at \\\n",
"0 2021-10-13 10:12:09 2021-10-13 10:12:09 \n",
"1 2021-10-13 10:13:24 2021-10-13 10:13:24 \n",
"2 2021-10-13 10:13:55 2021-10-13 10:13:55 \n",
"3 2021-10-13 10:14:30 2021-10-13 10:14:30 \n",
"4 2021-10-13 10:14:56 2021-10-13 10:14:56 \n",
"\n",
" cover_url \\\n",
"0 https://video.educoder.net/92adea2926a64d5a839... \n",
"1 https://video.educoder.net/accb4f6c336e443f9bd... \n",
"2 https://video.educoder.net/ddb2e6f026474e7186a... \n",
"3 https://video.educoder.net/393ab26db5114dd39b3... \n",
"4 https://video.educoder.net/912575d6bc384a43a0d... \n",
"\n",
" file_url \\\n",
"0 https://outin-396971199eed11e991a100163e1c7426... \n",
"1 https://outin-396971199eed11e991a100163e1c7426... \n",
"2 https://outin-396971199eed11e991a100163e1c7426... \n",
"3 https://outin-396971199eed11e991a100163e1c7426... \n",
"4 https://outin-396971199eed11e991a100163e1c7426... \n",
"\n",
" play_url \n",
"0 https://video.educoder.net/sv/2438bbe3-17c776a... \n",
"1 https://video.educoder.net/sv/7e5e624-17c776cc... \n",
"2 https://video.educoder.net/sv/4818be33-17c776d... \n",
"3 https://video.educoder.net/sv/2f93bc38-17c776d... \n",
"4 https://video.educoder.net/sv/414bbec2-17c776e... "
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"video_items.head()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"video_items.to_csv('knowledge_forest_data/sample/entity/video_items.csv',sep='\\t',index=False)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"video_items=pd.read_csv('knowledge_forest_data/sample/entity/video_items.csv',sep='\\t')"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" video_item_id | \n",
" video_name | \n",
" description | \n",
" averge_star | \n",
" study_video_items_count | \n",
" link | \n",
" created_at | \n",
" updated_at | \n",
" cover_url | \n",
" file_url | \n",
" play_url | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1015 | \n",
" 软件工程引言和软件的概念 | \n",
" NaN | \n",
" 5.0 | \n",
" 100 | \n",
" https://www.educoder.net/video/1015?subject_id... | \n",
" 2021-10-13 10:12:09 | \n",
" 2021-10-13 10:12:09 | \n",
" https://video.educoder.net/92adea2926a64d5a839... | \n",
" https://outin-396971199eed11e991a100163e1c7426... | \n",
" https://video.educoder.net/sv/2438bbe3-17c776a... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" video_item_id video_name description averge_star \\\n",
"0 1015 软件工程引言和软件的概念 NaN 5.0 \n",
"\n",
" study_video_items_count link \\\n",
"0 100 https://www.educoder.net/video/1015?subject_id... \n",
"\n",
" created_at updated_at \\\n",
"0 2021-10-13 10:12:09 2021-10-13 10:12:09 \n",
"\n",
" cover_url \\\n",
"0 https://video.educoder.net/92adea2926a64d5a839... \n",
"\n",
" file_url \\\n",
"0 https://outin-396971199eed11e991a100163e1c7426... \n",
"\n",
" play_url \n",
"0 https://video.educoder.net/sv/2438bbe3-17c776a... "
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"video_items.head(1)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"stage_video_item=pd.read_csv('knowledge_forest_data/sample/relation/stage_video_item.csv',sep='\\t')\n",
"subject=pd.read_csv('knowledge_forest_data/sample/entity/subjects.csv',sep='\\t')"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"subject_video=pd.merge(stage_video_item,subject,on='subject_id',how='left')"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" stage_item_id | \n",
" subject_id | \n",
" stage_id | \n",
" video_item_id | \n",
" item_type | \n",
" created_at_x | \n",
" updated_at_x | \n",
" subject_name | \n",
" description | \n",
" visits | \n",
" created_at_y | \n",
" updated_at_y | \n",
" learning_notes | \n",
" stages_count | \n",
" stage_shixuns_count | \n",
" shixuns_count | \n",
" excellent | \n",
" student_count | \n",
" participant_count | \n",
" link | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 78896 | \n",
" 952 | \n",
" 2428 | \n",
" 1015 | \n",
" VideoItem | \n",
" 2021-10-13 10:12:09 | \n",
" 2021-10-13 10:12:09 | \n",
" 软件工程 | \n",
" 随着计算机应用需求的不断增长,软件的规模也越来越大,然而软件开发的生产率远远跟不上计算机应用... | \n",
" 19470 | \n",
" 2020-02-10 19:11:19 | \n",
" 2023-06-25 10:52:03 | \n",
" **适用对象:**\\n本课程作为软件工程方向的理论学习与实践性课程,主要介绍软件工程的概念、... | \n",
" 12 | \n",
" 0 | \n",
" 61 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" https://www.educoder.net/paths/952 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" stage_item_id subject_id stage_id video_item_id item_type \\\n",
"0 78896 952 2428 1015 VideoItem \n",
"\n",
" created_at_x updated_at_x subject_name \\\n",
"0 2021-10-13 10:12:09 2021-10-13 10:12:09 软件工程 \n",
"\n",
" description visits \\\n",
"0 随着计算机应用需求的不断增长,软件的规模也越来越大,然而软件开发的生产率远远跟不上计算机应用... 19470 \n",
"\n",
" created_at_y updated_at_y \\\n",
"0 2020-02-10 19:11:19 2023-06-25 10:52:03 \n",
"\n",
" learning_notes stages_count \\\n",
"0 **适用对象:**\\n本课程作为软件工程方向的理论学习与实践性课程,主要介绍软件工程的概念、... 12 \n",
"\n",
" stage_shixuns_count shixuns_count excellent student_count \\\n",
"0 0 61 0 0 \n",
"\n",
" participant_count link \n",
"0 0 https://www.educoder.net/paths/952 "
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"subject_video.head(1)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"subject_video=subject_video[['video_item_id','subject_name']]"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" video_item_id | \n",
" subject_name | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1015 | \n",
" 软件工程 | \n",
"
\n",
" \n",
" 1 | \n",
" 1016 | \n",
" 软件工程 | \n",
"
\n",
" \n",
" 2 | \n",
" 1017 | \n",
" 软件工程 | \n",
"
\n",
" \n",
" 3 | \n",
" 1018 | \n",
" 软件工程 | \n",
"
\n",
" \n",
" 4 | \n",
" 1019 | \n",
" 软件工程 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" video_item_id subject_name\n",
"0 1015 软件工程\n",
"1 1016 软件工程\n",
"2 1017 软件工程\n",
"3 1018 软件工程\n",
"4 1019 软件工程"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"subject_video.head()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" video_item_id | \n",
" video_name | \n",
" description | \n",
" averge_star | \n",
" study_video_items_count | \n",
" link | \n",
" created_at | \n",
" updated_at | \n",
" cover_url | \n",
" file_url | \n",
" play_url | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1015 | \n",
" 软件工程引言和软件的概念 | \n",
" NaN | \n",
" 5.0 | \n",
" 100 | \n",
" https://www.educoder.net/video/1015?subject_id... | \n",
" 2021-10-13 10:12:09 | \n",
" 2021-10-13 10:12:09 | \n",
" https://video.educoder.net/92adea2926a64d5a839... | \n",
" https://outin-396971199eed11e991a100163e1c7426... | \n",
" https://video.educoder.net/sv/2438bbe3-17c776a... | \n",
"
\n",
" \n",
" 1 | \n",
" 1016 | \n",
" 软件的基本概念 | \n",
" NaN | \n",
" 5.0 | \n",
" 38 | \n",
" https://www.educoder.net/video/1016?subject_id... | \n",
" 2021-10-13 10:13:24 | \n",
" 2021-10-13 10:13:24 | \n",
" https://video.educoder.net/accb4f6c336e443f9bd... | \n",
" https://outin-396971199eed11e991a100163e1c7426... | \n",
" https://video.educoder.net/sv/7e5e624-17c776cc... | \n",
"
\n",
" \n",
" 2 | \n",
" 1017 | \n",
" 软件过程 | \n",
" NaN | \n",
" 5.0 | \n",
" 36 | \n",
" https://www.educoder.net/video/1017?subject_id... | \n",
" 2021-10-13 10:13:55 | \n",
" 2021-10-13 10:13:55 | \n",
" https://video.educoder.net/ddb2e6f026474e7186a... | \n",
" https://outin-396971199eed11e991a100163e1c7426... | \n",
" https://video.educoder.net/sv/4818be33-17c776d... | \n",
"
\n",
" \n",
" 3 | \n",
" 1018 | \n",
" RUP简介 | \n",
" NaN | \n",
" 5.0 | \n",
" 28 | \n",
" https://www.educoder.net/video/1018?subject_id... | \n",
" 2021-10-13 10:14:30 | \n",
" 2021-10-13 10:14:30 | \n",
" https://video.educoder.net/393ab26db5114dd39b3... | \n",
" https://outin-396971199eed11e991a100163e1c7426... | \n",
" https://video.educoder.net/sv/2f93bc38-17c776d... | \n",
"
\n",
" \n",
" 4 | \n",
" 1019 | \n",
" 敏捷的过程 | \n",
" NaN | \n",
" 5.0 | \n",
" 34 | \n",
" https://www.educoder.net/video/1019?subject_id... | \n",
" 2021-10-13 10:14:56 | \n",
" 2021-10-13 10:14:56 | \n",
" https://video.educoder.net/912575d6bc384a43a0d... | \n",
" https://outin-396971199eed11e991a100163e1c7426... | \n",
" https://video.educoder.net/sv/414bbec2-17c776e... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" video_item_id video_name description averge_star \\\n",
"0 1015 软件工程引言和软件的概念 NaN 5.0 \n",
"1 1016 软件的基本概念 NaN 5.0 \n",
"2 1017 软件过程 NaN 5.0 \n",
"3 1018 RUP简介 NaN 5.0 \n",
"4 1019 敏捷的过程 NaN 5.0 \n",
"\n",
" study_video_items_count link \\\n",
"0 100 https://www.educoder.net/video/1015?subject_id... \n",
"1 38 https://www.educoder.net/video/1016?subject_id... \n",
"2 36 https://www.educoder.net/video/1017?subject_id... \n",
"3 28 https://www.educoder.net/video/1018?subject_id... \n",
"4 34 https://www.educoder.net/video/1019?subject_id... \n",
"\n",
" created_at updated_at \\\n",
"0 2021-10-13 10:12:09 2021-10-13 10:12:09 \n",
"1 2021-10-13 10:13:24 2021-10-13 10:13:24 \n",
"2 2021-10-13 10:13:55 2021-10-13 10:13:55 \n",
"3 2021-10-13 10:14:30 2021-10-13 10:14:30 \n",
"4 2021-10-13 10:14:56 2021-10-13 10:14:56 \n",
"\n",
" cover_url \\\n",
"0 https://video.educoder.net/92adea2926a64d5a839... \n",
"1 https://video.educoder.net/accb4f6c336e443f9bd... \n",
"2 https://video.educoder.net/ddb2e6f026474e7186a... \n",
"3 https://video.educoder.net/393ab26db5114dd39b3... \n",
"4 https://video.educoder.net/912575d6bc384a43a0d... \n",
"\n",
" file_url \\\n",
"0 https://outin-396971199eed11e991a100163e1c7426... \n",
"1 https://outin-396971199eed11e991a100163e1c7426... \n",
"2 https://outin-396971199eed11e991a100163e1c7426... \n",
"3 https://outin-396971199eed11e991a100163e1c7426... \n",
"4 https://outin-396971199eed11e991a100163e1c7426... \n",
"\n",
" play_url \n",
"0 https://video.educoder.net/sv/2438bbe3-17c776a... \n",
"1 https://video.educoder.net/sv/7e5e624-17c776cc... \n",
"2 https://video.educoder.net/sv/4818be33-17c776d... \n",
"3 https://video.educoder.net/sv/2f93bc38-17c776d... \n",
"4 https://video.educoder.net/sv/414bbec2-17c776e... "
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"video_items.head()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"video_item=pd.merge(video_items,subject_video,on='video_item_id',how='left')"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" video_item_id | \n",
" video_name | \n",
" description | \n",
" averge_star | \n",
" study_video_items_count | \n",
" link | \n",
" created_at | \n",
" updated_at | \n",
" cover_url | \n",
" file_url | \n",
" play_url | \n",
" subject_name | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1015 | \n",
" 软件工程引言和软件的概念 | \n",
" NaN | \n",
" 5.0 | \n",
" 100 | \n",
" https://www.educoder.net/video/1015?subject_id... | \n",
" 2021-10-13 10:12:09 | \n",
" 2021-10-13 10:12:09 | \n",
" https://video.educoder.net/92adea2926a64d5a839... | \n",
" https://outin-396971199eed11e991a100163e1c7426... | \n",
" https://video.educoder.net/sv/2438bbe3-17c776a... | \n",
" 软件工程 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" video_item_id video_name description averge_star \\\n",
"0 1015 软件工程引言和软件的概念 NaN 5.0 \n",
"\n",
" study_video_items_count link \\\n",
"0 100 https://www.educoder.net/video/1015?subject_id... \n",
"\n",
" created_at updated_at \\\n",
"0 2021-10-13 10:12:09 2021-10-13 10:12:09 \n",
"\n",
" cover_url \\\n",
"0 https://video.educoder.net/92adea2926a64d5a839... \n",
"\n",
" file_url \\\n",
"0 https://outin-396971199eed11e991a100163e1c7426... \n",
"\n",
" play_url subject_name \n",
"0 https://video.educoder.net/sv/2438bbe3-17c776a... 软件工程 "
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"video_item.head(1)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"video_item.to_csv('knowledge_forest_data/sample/entity/video_items.csv',sep='\\t',index=False)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"courses = pd.read_csv(\"knowledge_forest_data/sample/entity/courses.csv\",sep='\\t',low_memory=False)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"courses[\"link\"]=''"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_31398/959543430.py:2: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" courses['link'][i] = 'https://forge.educoder.net/classrooms/{}/announcement'.format(courses['identifier'][i])\n"
]
}
],
"source": [
"for i in range(len(courses)):\n",
" courses['link'][i] = 'https://forge.educoder.net/classrooms/{}/announcement'.format(courses['identifier'][i])"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"courses1=courses.drop(labels='identifier', axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"courses1.to_csv(\"knowledge_forest_data/sample/entity/courses.csv\",sep='\\t',index=False)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"shixuns=pd.read_csv(\"knowledge_forest_data/sample/entity/shixuns.csv\",sep='\\t',low_memory=False)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"shixuns[\"link\"]=''"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_31398/1676815638.py:2: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" shixuns['link'][i] = \"https://forge.educoder.net/shixuns/{}/challenges\".format(shixuns['identifier'][i])\n"
]
}
],
"source": [
"for i in range(len(shixuns)):\n",
" shixuns['link'][i] = \"https://forge.educoder.net/shixuns/{}/challenges\".format(shixuns['identifier'][i])"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"shixuns1=shixuns.drop(labels='identifier', axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"shixuns1.to_csv(\"knowledge_forest_data/sample/entity/shixuns.csv\",sep='\\t',index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "mooc",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.2"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}