# 导入需要的工具包 from py2neo import Graph, Node, Relationship, NodeMatcher, Subgraph import pandas as pd import numpy as np import os import sys sys.path.append(os.getcwd()) from config import neo4j_url,neo4j_username,neo4j_password from config import kg_data # 连接Neo4j数据库 graph = Graph(neo4j_url, auth=(neo4j_username,neo4j_password) ) graph.delete_all() # 删除此库中所有数据 ## 导入实体表 # 导入课件实体 attachments = pd.read_csv(kg_data+'entity/attachments.csv', sep='\t') attachments.fillna('NaN', inplace=True) for i in range(len(attachments)): # print(attachments.iloc[i,1]) a = Node("课件", attachment_id=str(attachments.iloc[i,0]), filename=str(attachments.iloc[i,1]), filesize=str(attachments.iloc[i,2]),downloads=str(attachments.iloc[i,3]), description=str(attachments.iloc[i,4]),created_on=str(attachments.iloc[i,5]), attachtype=str(attachments.iloc[i,6]),link=str(attachments.iloc[i,7]) ) graph.create(a) print("课件实体导入成功!") # 导入关卡实体 challenges = pd.read_csv(kg_data+'entity/challenges.csv', sep='\t') challenges.fillna('NaN', inplace=True) for i in range(len(challenges)): a = Node("关卡", challenge_id=str(challenges.iloc[i,0]), challenge_name=str(challenges.iloc[i,1]), created_at=str(challenges.iloc[i,2]),updated_at=str(challenges.iloc[i,3]), status=str(challenges.iloc[i,4]),position=str(challenges.iloc[i,5]), task_pass=str(challenges.iloc[i,6]),score=str(challenges.iloc[i,7]), visits=str(challenges.iloc[i,8]),challenge_tags_count=str(challenges.iloc[i,9]), challenge_tag=str(challenges.iloc[i,10]) ) graph.create(a) print("关卡实体导入成功!") # 导入教学课堂实体 courses = pd.read_csv(kg_data+'entity/courses.csv', sep='\t') courses.fillna('NaN', inplace=True) for i in range(len(courses)): a = Node("教学课堂", course_id=str(courses.iloc[i,0]), course_name=str(courses.iloc[i,1]), created_at=str(courses.iloc[i,2]),updated_at=str(courses.iloc[i,3]), description=str(courses.iloc[i,4]),status=str(courses.iloc[i,5]), attachmenttype=str(courses.iloc[i,6]),visits=str(courses.iloc[i,7]), is_end=str(courses.iloc[i,8]),end_date=str(courses.iloc[i,9]), members_count=str(courses.iloc[i,10]),homework_commons_count=str(courses.iloc[i,11]), course_groups_count=str(courses.iloc[i,12]),exercises_count=str(courses.iloc[i,13]), link=str(courses.iloc[i,14])) graph.create(a) print("教学课堂实体导入成功!") # 导入考试实体 exercises = pd.read_csv(kg_data+'entity/exercises.csv', sep='\t') exercises.fillna('NaN', inplace=True) for i in range(len(exercises)): a = Node("考试", exercise_id=str(exercises.iloc[i,0]), exercise_name=str(exercises.iloc[i,1]), exercise_description=str(exercises.iloc[i,2]),exercise_status=str(exercises.iloc[i,3]), created_at=str(exercises.iloc[i,4]),updated_at=str(exercises.iloc[i,5])) graph.create(a) print("考试实体导入成功!") # 导入作业实体 homeworks = pd.read_csv(kg_data+'entity/homeworks.csv', sep='\t') homeworks.fillna('NaN', inplace=True) for i in range(len(homeworks)): a = Node("作业", homework_id=str(homeworks.iloc[i,0]), homework_name=str(homeworks.iloc[i,1]), description=str(homeworks.iloc[i,2]),homework_type=str(homeworks.iloc[i,3]), created_at=str(homeworks.iloc[i,4]),updated_at=str(homeworks.iloc[i,5])) graph.create(a) print("作业实体导入成功!") # 导入知识点实体 knowledge = pd.read_csv(kg_data+'entity/knowledge.csv') knowledge.fillna('NaN', inplace=True) for i in range(len(knowledge)): a = Node("知识点", knowledge_id=str(knowledge.iloc[i,0]), knowledge=str(knowledge.iloc[i,1])) graph.create(a) print("知识点实体导入成功!") # 导入实训实体 shixuns = pd.read_csv(kg_data+'entity/shixuns.csv', sep='\t') shixuns.fillna('NaN', inplace=True) for i in range(len(shixuns)): a = Node("实训", shixun_id=str(shixuns.iloc[i,0]), shixun_name=str(shixuns.iloc[i,1]), teacher_name=str(shixuns.iloc[i,2]),school_name=str(shixuns.iloc[i,3]), created_at=str(shixuns.iloc[i,4]),updated_at=str(shixuns.iloc[i,5]), visits=str(shixuns.iloc[i,6]),language=str(shixuns.iloc[i,7]), myshixuns_count=str(shixuns.iloc[i,8]),challenges_count=str(shixuns.iloc[i,9]), averge_star=str(shixuns.iloc[i,10]),users_count=str(shixuns.iloc[i,11]), initiative_study_num=str(shixuns.iloc[i,12]),spoc_study_num=str(shixuns.iloc[i,13]), shixun_tag=str(shixuns.iloc[i,14]),description=str(shixuns.iloc[i,15]), propaedeutics=str(shixuns.iloc[i,16]),link=str(shixuns.iloc[i,17]) ) graph.create(a) print("实训实体导入成功!") # 导入章节实体 stages = pd.read_csv(kg_data+'entity/stages.csv') stages.fillna('NaN', inplace=True) for i in range(len(stages)): a = Node("章节", stage_id=str(stages.iloc[i,0]), stage_name=str(stages.iloc[i,1]), description=str(stages.iloc[i,2]),shixuns_count=str(stages.iloc[i,3]), created_at=str(stages.iloc[i,4]),updated_at=str(stages.iloc[i,5]),stage_sort=str(stages.iloc[i,6]) ) graph.create(a) print("章节实体导入成功!") # 导入课程实体 subjects = pd.read_csv(kg_data+'entity/subjects.csv', sep='\t') subjects.fillna('NaN', inplace=True) for i in range(len(subjects)): a = Node("实践课程", subject_id=str(subjects.iloc[i,0]), subject_name=str(subjects.iloc[i,1]), description=str(subjects.iloc[i,2]),visits=str(subjects.iloc[i,3]), created_at=str(subjects.iloc[i,4]),updated_at=str(subjects.iloc[i,5]), learning_notes=str(subjects.iloc[i,6]),stages_count=str(subjects.iloc[i,7]), stage_shixuns_count=str(subjects.iloc[i,8]),shixuns_count=str(subjects.iloc[i,9]), excellent=str(subjects.iloc[i,10]),student_count=str(subjects.iloc[i,11]), participant_count=str(subjects.iloc[i,12]),link=str(subjects.iloc[i,13])) graph.create(a) print("实践课程实体导入成功!") # 导入教学视频实体 video_items = pd.read_csv(kg_data+'entity/video_items.csv', sep='\t') video_items.fillna('NaN', inplace=True) for i in range(len(video_items)): a = Node("教学视频", video_item_id=str(video_items.iloc[i,0]), video_name=str(video_items.iloc[i,1]), description=str(video_items.iloc[i,2]),averge_star=str(video_items.iloc[i,3]), study_video_items_count=str(video_items.iloc[i,4]),link=str(video_items.iloc[i,5]), created_at=str(video_items.iloc[i,6]),updated_at=str(video_items.iloc[i,7]), cover_url=str(video_items.iloc[i,8]),file_url=str(video_items.iloc[i,9]), play_url=str(video_items.iloc[i,10]),subject_name=str(video_items.iloc[i,11])) graph.create(a) print("教学视频实体导入成功!") # 导入关系表 # 建立两个节点之间的关系 def create_relationship(graph, label1, attr1, name1, label2, attr2, name2, r_name): value1 = match_node(graph, label1, attr1, name1) value2 = match_node(graph, label2, attr2, name2) if value1 is None or value2 is None: return False r = Relationship(value1, r_name, value2) graph.create(r) # 查询节点 def match_node(graph, label, attrs, name): n = "_."+name+"=" + "\"" +attrs+ "\"" matcher = NodeMatcher(graph) return matcher.match(label).where(n).first() # 导入课堂考试关系 ce = pd.read_csv(kg_data+'relation/course_exercise.csv', sep='\t') label1 = "教学课堂" label2 = "考试" name1 = "course_id" name2 = "exercise_id" for i in range(len(ce)): course_id= str(ce.iloc[i,0]) exercise_id= str(ce.iloc[i,1]) reValue = create_relationship(graph,label1,course_id,name1,label2,exercise_id,name2,ce.iloc[i,2]) print("课堂考试关系导入成功!") # 导入课堂课程关系 cs = pd.read_csv(kg_data+'relation/course_subject.csv', sep='\t') label1 = "教学课堂" label2 = "实践课程" name1 = "course_id" name2 = "subject_id" for i in range(len(cs)): subject_id= str(cs.iloc[i,0]) course_id= str(cs.iloc[i,1]) reValue = create_relationship(graph,label1,course_id,name1,label2,subject_id,name2,cs.iloc[i,2]) print("课堂课程关系导入成功!") # 导入知识点课件关系 ka = pd.read_csv(kg_data+'relation/kg_att.csv', sep='\t') label1 = "知识点" label2 = "课件" name1 = "knowledge_id" name2 = "attachment_id" for i in range(len(ka)): knowledge_id = str(ka.iloc[i,0]) attachment_id = str(ka.iloc[i,1]) reValue = create_relationship(graph,label1,knowledge_id,name1,label2,attachment_id,name2,ka.iloc[i,2]) print("知识点课件关系导入成功!") # 导入知识点实训关系 ks = pd.read_csv(kg_data+'relation/kg_shixun.csv', sep='\t') label1 = "知识点" label2 = "实训" name1 = "knowledge_id" name2 = "shixun_id" for i in range(len(ks)): knowledge_id = str(ks.iloc[i,0]) shixun_id = str(ks.iloc[i,1]) reValue = create_relationship(graph,label1,knowledge_id,name1,label2,shixun_id,name2,ks.iloc[i,2]) print("知识点实训关系导入成功!") # 导入知识点视频关系 kv = pd.read_csv(kg_data+'relation/kg_video.csv', sep='\t') label1 = "知识点" label2 = "教学视频" name1 = "knowledge_id" name2 = "video_item_id" for i in range(len(kv)): knowledge_id = str(kv.iloc[i,0]) video_item_id = str(kv.iloc[i,1]) reValue = create_relationship(graph,label1,knowledge_id,name1,label2,video_item_id,name2,kv.iloc[i,2]) print("知识点视频关系导入成功!") # 导入实训关卡关系 sc = pd.read_csv(kg_data+'relation/shixun_challenge.csv', sep='\t') label1 = "实训" label2 = "关卡" name1 = "shixun_id" name2 = "challenge_id" for i in range(len(sc)): challenge_id = str(sc.iloc[i,0]) shixun_id = str(sc.iloc[i,1]) reValue = create_relationship(graph,label1,shixun_id,name1,label2,challenge_id,name2,sc.iloc[i,2]) print("实训关卡关系导入成功!") # 导入实训作业关系 sh = pd.read_csv(kg_data+'relation/shixun_homework.csv', sep='\t') label1 = "实训" label2 = "作业" name1 = "shixun_id" name2 = "homework_id" for i in range(len(sh)): shixun_id = str(sh.iloc[i,0]) homework_id = str(sh.iloc[i,1]) reValue = create_relationship(graph,label1,shixun_id,name1,label2,homework_id,name2,sh.iloc[i,2]) print("实训作业关系导入成功!") # 导入章节知识点关系 sk = pd.read_csv(kg_data+'relation/stage_kg.csv', sep='\t') label1 = "章节" label2 = "知识点" name1 = "stage_id" name2 = "knowledge_id" for i in range(len(sk)): stage_id = str(sk.iloc[i,0]) knowledge_id = str(sk.iloc[i,1]) reValue = create_relationship(graph,label1,stage_id,name1,label2,knowledge_id,name2,sk.iloc[i,2]) print("章节知识点关系导入成功!") # 导入课程章节关系 ss = pd.read_csv(kg_data+'relation/subject_stage.csv', sep='\t') label1 = "实践课程" label2 = "章节" name1 = "subject_id" name2 = "stage_id" for i in range(len(ss)): subject_id = str(ss.iloc[i,0]) stage_id = str(ss.iloc[i,1]) reValue = create_relationship(graph,label1,subject_id,name1,label2,stage_id,name2,ss.iloc[i,2]) print("课程章节关系导入成功!")