You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

263 lines
11 KiB

# 导入需要的工具包
from py2neo import Graph, Node, Relationship, NodeMatcher, Subgraph
import pandas as pd
import numpy as np
import os
import sys
sys.path.append(os.getcwd())
from config import neo4j_url,neo4j_username,neo4j_password
from config import kg_data
# 连接Neo4j数据库
graph = Graph(neo4j_url, auth=(neo4j_username,neo4j_password) )
graph.delete_all() # 删除此库中所有数据
## 导入实体表
# 导入课件实体
attachments = pd.read_csv(kg_data+'entity/attachments.csv', sep='\t')
attachments.fillna('NaN', inplace=True)
for i in range(len(attachments)):
# print(attachments.iloc[i,1])
a = Node("课件", attachment_id=str(attachments.iloc[i,0]), filename=str(attachments.iloc[i,1]),
filesize=str(attachments.iloc[i,2]),downloads=str(attachments.iloc[i,3]),
description=str(attachments.iloc[i,4]),created_on=str(attachments.iloc[i,5]),
attachtype=str(attachments.iloc[i,6]),link=str(attachments.iloc[i,7]) )
graph.create(a)
print("课件实体导入成功!")
# 导入关卡实体
challenges = pd.read_csv(kg_data+'entity/challenges.csv', sep='\t')
challenges.fillna('NaN', inplace=True)
for i in range(len(challenges)):
a = Node("关卡", challenge_id=str(challenges.iloc[i,0]), challenge_name=str(challenges.iloc[i,1]),
created_at=str(challenges.iloc[i,2]),updated_at=str(challenges.iloc[i,3]),
status=str(challenges.iloc[i,4]),position=str(challenges.iloc[i,5]),
task_pass=str(challenges.iloc[i,6]),score=str(challenges.iloc[i,7]),
visits=str(challenges.iloc[i,8]),challenge_tags_count=str(challenges.iloc[i,9]),
challenge_tag=str(challenges.iloc[i,10]) )
graph.create(a)
print("关卡实体导入成功!")
# 导入教学课堂实体
courses = pd.read_csv(kg_data+'entity/courses.csv', sep='\t')
courses.fillna('NaN', inplace=True)
for i in range(len(courses)):
a = Node("教学课堂", course_id=str(courses.iloc[i,0]), course_name=str(courses.iloc[i,1]),
created_at=str(courses.iloc[i,2]),updated_at=str(courses.iloc[i,3]),
description=str(courses.iloc[i,4]),status=str(courses.iloc[i,5]),
attachmenttype=str(courses.iloc[i,6]),visits=str(courses.iloc[i,7]),
is_end=str(courses.iloc[i,8]),end_date=str(courses.iloc[i,9]),
members_count=str(courses.iloc[i,10]),homework_commons_count=str(courses.iloc[i,11]),
course_groups_count=str(courses.iloc[i,12]),exercises_count=str(courses.iloc[i,13]),
link=str(courses.iloc[i,14]))
graph.create(a)
print("教学课堂实体导入成功!")
# 导入考试实体
exercises = pd.read_csv(kg_data+'entity/exercises.csv', sep='\t')
exercises.fillna('NaN', inplace=True)
for i in range(len(exercises)):
a = Node("考试", exercise_id=str(exercises.iloc[i,0]), exercise_name=str(exercises.iloc[i,1]),
exercise_description=str(exercises.iloc[i,2]),exercise_status=str(exercises.iloc[i,3]),
created_at=str(exercises.iloc[i,4]),updated_at=str(exercises.iloc[i,5]))
graph.create(a)
print("考试实体导入成功!")
# 导入作业实体
homeworks = pd.read_csv(kg_data+'entity/homeworks.csv', sep='\t')
homeworks.fillna('NaN', inplace=True)
for i in range(len(homeworks)):
a = Node("作业", homework_id=str(homeworks.iloc[i,0]), homework_name=str(homeworks.iloc[i,1]),
description=str(homeworks.iloc[i,2]),homework_type=str(homeworks.iloc[i,3]),
created_at=str(homeworks.iloc[i,4]),updated_at=str(homeworks.iloc[i,5]))
graph.create(a)
print("作业实体导入成功!")
# 导入知识点实体
knowledge = pd.read_csv(kg_data+'entity/knowledge.csv')
knowledge.fillna('NaN', inplace=True)
for i in range(len(knowledge)):
a = Node("知识点", knowledge_id=str(knowledge.iloc[i,0]), knowledge=str(knowledge.iloc[i,1]))
graph.create(a)
print("知识点实体导入成功!")
# 导入实训实体
shixuns = pd.read_csv(kg_data+'entity/shixuns.csv', sep='\t')
shixuns.fillna('NaN', inplace=True)
for i in range(len(shixuns)):
a = Node("实训", shixun_id=str(shixuns.iloc[i,0]), shixun_name=str(shixuns.iloc[i,1]),
teacher_name=str(shixuns.iloc[i,2]),school_name=str(shixuns.iloc[i,3]),
created_at=str(shixuns.iloc[i,4]),updated_at=str(shixuns.iloc[i,5]),
visits=str(shixuns.iloc[i,6]),language=str(shixuns.iloc[i,7]),
myshixuns_count=str(shixuns.iloc[i,8]),challenges_count=str(shixuns.iloc[i,9]),
averge_star=str(shixuns.iloc[i,10]),users_count=str(shixuns.iloc[i,11]),
initiative_study_num=str(shixuns.iloc[i,12]),spoc_study_num=str(shixuns.iloc[i,13]),
shixun_tag=str(shixuns.iloc[i,14]),description=str(shixuns.iloc[i,15]),
propaedeutics=str(shixuns.iloc[i,16]),link=str(shixuns.iloc[i,17])
)
graph.create(a)
print("实训实体导入成功!")
# 导入章节实体
stages = pd.read_csv(kg_data+'entity/stages.csv')
stages.fillna('NaN', inplace=True)
for i in range(len(stages)):
a = Node("章节", stage_id=str(stages.iloc[i,0]), stage_name=str(stages.iloc[i,1]),
description=str(stages.iloc[i,2]),shixuns_count=str(stages.iloc[i,3]),
created_at=str(stages.iloc[i,4]),updated_at=str(stages.iloc[i,5]),stage_sort=str(stages.iloc[i,6])
)
graph.create(a)
print("章节实体导入成功!")
# 导入课程实体
subjects = pd.read_csv(kg_data+'entity/subjects.csv', sep='\t')
subjects.fillna('NaN', inplace=True)
for i in range(len(subjects)):
a = Node("实践课程", subject_id=str(subjects.iloc[i,0]), subject_name=str(subjects.iloc[i,1]),
description=str(subjects.iloc[i,2]),visits=str(subjects.iloc[i,3]),
created_at=str(subjects.iloc[i,4]),updated_at=str(subjects.iloc[i,5]),
learning_notes=str(subjects.iloc[i,6]),stages_count=str(subjects.iloc[i,7]),
stage_shixuns_count=str(subjects.iloc[i,8]),shixuns_count=str(subjects.iloc[i,9]),
excellent=str(subjects.iloc[i,10]),student_count=str(subjects.iloc[i,11]),
participant_count=str(subjects.iloc[i,12]),link=str(subjects.iloc[i,13]))
graph.create(a)
print("实践课程实体导入成功!")
# 导入教学视频实体
video_items = pd.read_csv(kg_data+'entity/video_items.csv', sep='\t')
video_items.fillna('NaN', inplace=True)
for i in range(len(video_items)):
a = Node("教学视频", video_item_id=str(video_items.iloc[i,0]), video_name=str(video_items.iloc[i,1]),
description=str(video_items.iloc[i,2]),averge_star=str(video_items.iloc[i,3]),
study_video_items_count=str(video_items.iloc[i,4]),link=str(video_items.iloc[i,5]),
created_at=str(video_items.iloc[i,6]),updated_at=str(video_items.iloc[i,7]),
cover_url=str(video_items.iloc[i,8]),file_url=str(video_items.iloc[i,9]),
play_url=str(video_items.iloc[i,10]),subject_name=str(video_items.iloc[i,11]))
graph.create(a)
print("教学视频实体导入成功!")
# 导入关系表
# 建立两个节点之间的关系
def create_relationship(graph, label1, attr1, name1, label2, attr2, name2, r_name):
value1 = match_node(graph, label1, attr1, name1)
value2 = match_node(graph, label2, attr2, name2)
if value1 is None or value2 is None:
return False
r = Relationship(value1, r_name, value2)
graph.create(r)
# 查询节点
def match_node(graph, label, attrs, name):
n = "_."+name+"=" + "\"" +attrs+ "\""
matcher = NodeMatcher(graph)
return matcher.match(label).where(n).first()
# 导入课堂考试关系
ce = pd.read_csv(kg_data+'relation/course_exercise.csv', sep='\t')
label1 = "教学课堂"
label2 = "考试"
name1 = "course_id"
name2 = "exercise_id"
for i in range(len(ce)):
course_id= str(ce.iloc[i,0])
exercise_id= str(ce.iloc[i,1])
reValue = create_relationship(graph,label1,course_id,name1,label2,exercise_id,name2,ce.iloc[i,2])
print("课堂考试关系导入成功!")
# 导入课堂课程关系
cs = pd.read_csv(kg_data+'relation/course_subject.csv', sep='\t')
label1 = "教学课堂"
label2 = "实践课程"
name1 = "course_id"
name2 = "subject_id"
for i in range(len(cs)):
subject_id= str(cs.iloc[i,0])
course_id= str(cs.iloc[i,1])
reValue = create_relationship(graph,label1,course_id,name1,label2,subject_id,name2,cs.iloc[i,2])
print("课堂课程关系导入成功!")
# 导入知识点课件关系
ka = pd.read_csv(kg_data+'relation/kg_att.csv', sep='\t')
label1 = "知识点"
label2 = "课件"
name1 = "knowledge_id"
name2 = "attachment_id"
for i in range(len(ka)):
knowledge_id = str(ka.iloc[i,0])
attachment_id = str(ka.iloc[i,1])
reValue = create_relationship(graph,label1,knowledge_id,name1,label2,attachment_id,name2,ka.iloc[i,2])
print("知识点课件关系导入成功!")
# 导入知识点实训关系
ks = pd.read_csv(kg_data+'relation/kg_shixun.csv', sep='\t')
label1 = "知识点"
label2 = "实训"
name1 = "knowledge_id"
name2 = "shixun_id"
for i in range(len(ks)):
knowledge_id = str(ks.iloc[i,0])
shixun_id = str(ks.iloc[i,1])
reValue = create_relationship(graph,label1,knowledge_id,name1,label2,shixun_id,name2,ks.iloc[i,2])
print("知识点实训关系导入成功!")
# 导入知识点视频关系
kv = pd.read_csv(kg_data+'relation/kg_video.csv', sep='\t')
label1 = "知识点"
label2 = "教学视频"
name1 = "knowledge_id"
name2 = "video_item_id"
for i in range(len(kv)):
knowledge_id = str(kv.iloc[i,0])
video_item_id = str(kv.iloc[i,1])
reValue = create_relationship(graph,label1,knowledge_id,name1,label2,video_item_id,name2,kv.iloc[i,2])
print("知识点视频关系导入成功!")
# 导入实训关卡关系
sc = pd.read_csv(kg_data+'relation/shixun_challenge.csv', sep='\t')
label1 = "实训"
label2 = "关卡"
name1 = "shixun_id"
name2 = "challenge_id"
for i in range(len(sc)):
challenge_id = str(sc.iloc[i,0])
shixun_id = str(sc.iloc[i,1])
reValue = create_relationship(graph,label1,shixun_id,name1,label2,challenge_id,name2,sc.iloc[i,2])
print("实训关卡关系导入成功!")
# 导入实训作业关系
sh = pd.read_csv(kg_data+'relation/shixun_homework.csv', sep='\t')
label1 = "实训"
label2 = "作业"
name1 = "shixun_id"
name2 = "homework_id"
for i in range(len(sh)):
shixun_id = str(sh.iloc[i,0])
homework_id = str(sh.iloc[i,1])
reValue = create_relationship(graph,label1,shixun_id,name1,label2,homework_id,name2,sh.iloc[i,2])
print("实训作业关系导入成功!")
# 导入章节知识点关系
sk = pd.read_csv(kg_data+'relation/stage_kg.csv', sep='\t')
label1 = "章节"
label2 = "知识点"
name1 = "stage_id"
name2 = "knowledge_id"
for i in range(len(sk)):
stage_id = str(sk.iloc[i,0])
knowledge_id = str(sk.iloc[i,1])
reValue = create_relationship(graph,label1,stage_id,name1,label2,knowledge_id,name2,sk.iloc[i,2])
print("章节知识点关系导入成功!")
# 导入课程章节关系
ss = pd.read_csv(kg_data+'relation/subject_stage.csv', sep='\t')
label1 = "实践课程"
label2 = "章节"
name1 = "subject_id"
name2 = "stage_id"
for i in range(len(ss)):
subject_id = str(ss.iloc[i,0])
stage_id = str(ss.iloc[i,1])
reValue = create_relationship(graph,label1,subject_id,name1,label2,stage_id,name2,ss.iloc[i,2])
print("课程章节关系导入成功!")