# 导入需要的工具包 from py2neo import Graph, Node, Relationship, NodeMatcher, Subgraph import pandas as pd import numpy as np import os import sys sys.path.append(os.getcwd()) import json from config import neo4j_url,neo4j_username,neo4j_password from config import kg_data from config import study_path_data from pprint import pprint as pp # 连接Neo4j数据库 graph = Graph(neo4j_url, auth=(neo4j_username,neo4j_password) ) # 获取学生行为表 # 获取学生闯关情况表 games = pd.read_csv(study_path_data+'games.csv',sep='\t') # 获取学生作业情况表 student_works = pd.read_csv(study_path_data+'student_works.csv',sep='\t') #获取学生观看教学视频情况表 watch_course_video = pd.read_csv(study_path_data+'watch_course_videos.csv',sep='\t') # 获取学生访问课件表 attachment_logs = pd.read_csv(study_path_data+'attachment_logs.csv',sep='\t') def get_review_path(course_id, student_id, platform): #根据course_id查找知识森林中的节点,判断是否存在 course_query = "match(c:`教学课堂`) where c.course_id='{}' return c.course_id".format(course_id) course_result = graph.run(course_query).data() # 判断course_id是否存在,如果存在,进行如下查询操作,返回查询结果;如不存在,返回空值 if course_result: # 根据course_id查询知识森林实训的相关节点 shixun_query = '''match(c:`教学课堂`) where c.course_id="{}" match(c)-[:`课堂课程`]->(s:`实践课程`) match(s)-[:`课程章节`]->(st:`章节`) match(st)-[:`章节知识点`]->(kn:`知识点`) match(kn)-[:`知识点实训`]->(shixun:`实训`) return st.stage_id as stage_id,st.stage_name as stage_name, st.stage_sort as stage_sort,kn.knowledge_id as knowledge_id,kn.knowledge as knowledge, shixun.shixun_id as shixun_id,shixun.shixun_name as shixun_name,shixun.link as shixun_link, count(shixun.shixun_id) as shixun_num'''.format(course_id) # 获取知识森林查询的实训信息 shixun = graph.run(shixun_query).data() #判断查询结果是否为空,若非空进行如下筛选工作;若空值,赋值相同结构为空值的dataframe格式表 if shixun: # 将查询结果转化为dataframe格式 shixun_data = pd.DataFrame(shixun) # 分析学生关卡闯关情况 # 筛选指定学生student_id stu_games = games[games['user_id']==int(student_id)] # 根据学生闯关的准确率获取通关关卡记录 finish_games = stu_games[stu_games['accuracy']>=0.6] # 根据学生闯关的准确率获取未通关关卡记录 not_finish_games = stu_games[stu_games['accuracy']<0.6] # 获取通关关卡对应的实训集合 stu_finish_game = set(finish_games['shixun_id']) # 获取未通关关卡对应的实训集合 stu_not_finish_game = set(not_finish_games['shixun_id']) # 获取学生完成实训集合 stu_finish_shixun = stu_finish_game-stu_not_finish_game # 分析学生作业完成情况 # 筛选指定学生student_id stu_homestudent_works = student_works[student_works['user_id']==int(student_id)] # 快速筛选未完成的作业记录 stu_homestudent_works = stu_homestudent_works[stu_homestudent_works['final_score']<=0.6] # stu_homestudent_works = stu_homestudent_works[stu_homestudent_works['cost_time']<=100] # 获取未完成作业对应的实训集合 stu_not_finish_homework = set(stu_homestudent_works['shixun_id']) stu_finish_shixun = stu_finish_shixun-stu_not_finish_homework #快速筛选掉学生已完成的实训,保留学生未完成的实训 shixun_data = shixun_data[~shixun_data['shixun_id'].isin(stu_finish_shixun)] else: shixun_data = pd.DataFrame(columns=["stage_id","stage_name","stage_sort","knowledge_id","knowledge", "shixun_id","shixun_name","shixun_link","shixun_num"]) #知识森林查询教学视频相关节点 video_query = '''match(c:`教学课堂`) where c.course_id="{}" match(c)-[:`课堂课程`]->(s:`实践课程`) match(s)-[:`课程章节`]->(st:`章节`) match(st)-[:`章节知识点`]->(kn:`知识点`) match(kn)-[:`知识点视频`]->(video:`教学视频`) return st.stage_id as stage_id, st.stage_name as stage_name,st.stage_sort as stage_sort,kn.knowledge_id as knowledge_id, kn.knowledge as knowledge,video.video_item_id as video_id,video.video_name as video_name, video.link as video_link,count(video.video_item_id) as video_num'''.format(course_id) #获取知识森林查询的教学视频信息 video = graph.run(video_query).data() if video: # 将查询的结果转换为dataframe格式 video_data=pd.DataFrame(video) # 分析学生视频学习情况 stu_videos = watch_course_video[watch_course_video['user_id']==int(student_id)] finish_videos = stu_videos[stu_videos['is_finished']==1] # 获取学生完成教学视频的ID集合 stu_video = set(finish_videos['video_item_id']) # 快速筛选掉学生已完成的教学视频 video_data = video_data[~video_data['video_id'].isin(stu_video)] else: # 如果没有查询到视频信息,则返回空dataframe video_data = pd.DataFrame(columns=["stage_id","stage_name","knowledge_id","knowledge","video_id", "video_name","video_link","video_num"]) # 知识森林查询教学课件相关节点 att_query = '''match(c:`教学课堂`) where c.course_id="{}" match(c)-[:`课堂课程`]->(s:`实践课程`) match(s)-[:`课程章节`]->(st:`章节`) match(st)-[:`章节知识点`]->(kn:`知识点`) match(kn)-[:`知识点课件`]->(att:`课件`) return st.stage_id as stage_id,st.stage_name as stage_name, st.stage_sort as stage_sort,kn.knowledge_id as knowledge_id,kn.knowledge as knowledge, att.attachment_id as attachment_id, att.filename as filename,att.link as attachment_link, count(att.attachment_id) as attachment_num'''.format(course_id) att = graph.run(att_query).data() if att: att_data = pd.DataFrame(att) #分析学生访问课件学习情况 stu_att = attachment_logs[attachment_logs['user_id']==int(student_id)] finish_atts = stu_att[stu_att['visits']>1] att_set = set(finish_atts['attachment_id']) #快速筛选掉学生已完成的教学课件 att_data = att_data[~att_data['attachment_id'].isin(att_set)] else: att_data = pd.DataFrame(columns=["stage_id","stage_name","stage_sort","knowledge_id","knowledge", "attachment_id","filename","attachment_link"]) merged_data = pd.merge(shixun_data,video_data,on=['stage_id','stage_name',"stage_sort",'knowledge_id', 'knowledge'],how='outer') merged_data = pd.merge(merged_data,att_data,on=['stage_id','stage_name',"stage_sort",'knowledge_id', 'knowledge'],how='outer') merged_data['shixun_num'].fillna(0,inplace=True) merged_data['video_num'].fillna(0,inplace=True) merged_data['video_num'].fillna(0,inplace=True) if platform == '1': merged_data['attachment_link'] = merged_data['attachment_link'].str.replace('www.', 'ilearning.') merged_data['shixun_link'] = merged_data['shixun_link'].str.replace('www.', 'ilearning.') merged_data['video_link'] = merged_data['video_link'].str.replace('www.', 'ilearning.') shixun = (merged_data.groupby(['stage_id','stage_name',"stage_sort",'knowledge_id','knowledge', 'shixun_num','video_num']) .apply(lambda x:x[['shixun_id','shixun_name', 'shixun_link']].to_dict('r')) .reset_index(name='shixun')) video = (merged_data.groupby(['stage_id','stage_name',"stage_sort",'knowledge_id','knowledge', 'shixun_num','video_num']) .apply(lambda x:x[['video_id','video_name', 'video_link']].to_dict('r')) .reset_index(name='video')) att = (merged_data.groupby(['stage_id','stage_name',"stage_sort",'knowledge_id','knowledge', 'shixun_num','video_num']) .apply(lambda x:x[['attachment_id','filename', 'attachment_link']].to_dict('r')) .reset_index(name='attachment')) linshi = shixun.merge(video,how='outer',on=['stage_id','stage_name',"stage_sort",'knowledge_id', 'knowledge','shixun_num','video_num']) res=linshi.merge(att,how='outer',on=['stage_id','stage_name',"stage_sort",'knowledge_id', 'knowledge','shixun_num','video_num']) result = (res.groupby(['stage_id','stage_name',"stage_sort"]).apply(lambda x :x[['knowledge_id','knowledge','shixun_num','video_num','shixun','video','attachment']].to_dict('r')) .reset_index(name='knowledge')) result=json.loads(result.to_json(orient='records',force_ascii=False)) return result else: # 如果course_id不存在,返回空值 return {} if __name__ == '__main__': # 读取数据 course_id = "24002" student_id = '2383' result = get_review_path(course_id,student_id) print(result)