You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

203 lines
9.4 KiB

5 months ago
# 导入需要的工具包
from py2neo import Graph, Node, Relationship, NodeMatcher, Subgraph
import pandas as pd
import numpy as np
import os
import sys
sys.path.append(os.getcwd())
import json
from config import neo4j_url,neo4j_username,neo4j_password
from config import kg_data
from config import study_path_data
from pprint import pprint as pp
# 连接Neo4j数据库
graph = Graph(neo4j_url, auth=(neo4j_username,neo4j_password) )
# 获取学生行为表
# 获取学生闯关情况表
games = pd.read_csv(study_path_data+'games.csv',sep='\t')
# 获取学生作业情况表
student_works = pd.read_csv(study_path_data+'student_works.csv',sep='\t')
#获取学生观看教学视频情况表
watch_course_video = pd.read_csv(study_path_data+'watch_course_videos.csv',sep='\t')
# 获取学生访问课件表
attachment_logs = pd.read_csv(study_path_data+'attachment_logs.csv',sep='\t')
def get_pre_exam_path(course_id,student_id, platform):
#根据course_id查找知识森林中的节点判断是否存在
course_query = "match(c:`教学课堂`) where c.course_id='{}' return c.course_id".format(course_id)
course_result = graph.run(course_query).data()
# 判断course_id是否存在如果存在进行如下查询操作返回查询结果如不存在返回空值
if course_result:
# 根据course_id查询知识森林实训的相关节点
shixun_query = '''match(c:`教学课堂`) where c.course_id="{}" match(c)-[:`课堂课程`]->(s:`实践课程`)
match(s)-[:`课程章节`]->(st:`章节`) match(st)-[:`章节知识点`]->(kn:`知识点`)
match(kn)-[:`知识点实训`]->(shixun:`实训`) return st.stage_id as stage_id,st.stage_name as stage_name,
st.stage_sort as stage_sort,kn.knowledge_id as knowledge_id,kn.knowledge as knowledge,
shixun.shixun_id as shixun_id,shixun.shixun_name as shixun_name,shixun.link as shixun_link,
count(shixun.shixun_id) as shixun_num'''.format(course_id)
# 获取知识森林查询的实训信息
shixun = graph.run(shixun_query).data()
#判断查询结果是否为空若非空进行如下筛选工作若空值赋值相同结构为空值的dataframe格式表
if shixun:
# 将查询结果转化为dataframe格式
shixun_data = pd.DataFrame(shixun)
# 分析学生关卡闯关情况
# 筛选指定学生student_id
stu_games = games[games['user_id']==int(student_id)]
# 根据学生闯关的准确率获取通关关卡记录
finish_games = stu_games[stu_games['accuracy']>=0.6]
finish_games = finish_games[finish_games['final_score']>=100]
finish_games = finish_games[finish_games['answer_open']<2]
finish_games = finish_games[finish_games['cost_time']>300]
# finish_games = finish_games[finish_games['evaluate_count']>0]
# 根据学生闯关的准确率获取未通关关卡记录
not_finish_games = stu_games[stu_games['accuracy']<0.6]
# 获取通关关卡对应的实训集合
stu_finish_game = set(finish_games['shixun_id'])
# 获取未通关关卡对应的实训集合
stu_not_finish_game = set(not_finish_games['shixun_id'])
# 获取学生完成实训集合
stu_finish_shixun = stu_finish_game-stu_not_finish_game
# 分析学生作业完成情况
# 筛选指定学生student_id
stu_homestudent_works = student_works[student_works['user_id']==int(student_id)]
# 快速筛选未完成的作业记录
stu_homestudent_works = stu_homestudent_works[stu_homestudent_works['final_score']<=80]
# 获取未完成作业对应的实训集合
stu_not_finish_homework = set(stu_homestudent_works['shixun_id'])
stu_finish_shixun = stu_finish_shixun-stu_not_finish_homework
#快速筛选掉学生已完成的实训,保留学生未完成的实训
shixun_data = shixun_data[~shixun_data['shixun_id'].isin(stu_finish_shixun)]
else:
shixun_data = pd.DataFrame(columns=["stage_id","stage_name","stage_sort","knowledge_id","knowledge","shixun_id","shixun_name","shixun_link","shixun_num"])
#知识森林查询教学视频相关节点
video_query = '''match(c:`教学课堂`) where c.course_id="{}" match(c)-[:`课堂课程`]->(s:`实践课程`)
match(s)-[:`课程章节`]->(st:`章节`) match(st)-[:`章节知识点`]->(kn:`知识点`)
match(kn)-[:`知识点视频`]->(video:`教学视频`) return st.stage_id as stage_id,
st.stage_name as stage_name,st.stage_sort as stage_sort,kn.knowledge_id as knowledge_id,
kn.knowledge as knowledge,video.video_item_id as video_id,video.video_name as video_name,
video.link as video_link,count(video.video_item_id) as video_num'''.format(course_id)
#获取知识森林查询的教学视频信息
video = graph.run(video_query).data()
if video:
# 将查询的结果转换为dataframe格式
video_data=pd.DataFrame(video)
# 分析学生视频学习情况
stu_videos = watch_course_video[watch_course_video['user_id']==int(student_id)]
finish_videos = stu_videos[stu_videos['is_finished']==1]
# 获取学生完成教学视频的ID集合
stu_video = set(finish_videos['video_item_id'])
# 快速筛选掉学生已完成的教学视频
video_data = video_data[~video_data['video_id'].isin(stu_video)]
else:
# 如果没有查询到视频信息则返回空dataframe
video_data = pd.DataFrame(columns=["stage_id","stage_name","knowledge_id","knowledge","video_id","video_name","video_link","video_num"])
# 知识森林查询教学课件相关节点
att_query = '''match(c:`教学课堂`) where c.course_id="{}" match(c)-[:`课堂课程`]->(s:`实践课程`)
match(s)-[:`课程章节`]->(st:`章节`) match(st)-[:`章节知识点`]->(kn:`知识点`)
match(kn)-[:`知识点课件`]->(att:`课件`) return st.stage_id as stage_id,st.stage_name as stage_name,
st.stage_sort as stage_sort,kn.knowledge_id as knowledge_id,kn.knowledge as knowledge,
att.attachment_id as attachment_id, att.filename as filename,att.link as attachment_link,
count(att.attachment_id) as attachment_num'''.format(course_id)
att = graph.run(att_query).data()
if att:
att_data = pd.DataFrame(att)
#分析学生访问课件学习情况
stu_att = attachment_logs[attachment_logs['user_id']==int(student_id)]
finish_atts = stu_att[stu_att['visits']>1]
att_set = set(finish_atts['attachment_id'])
#快速筛选掉学生已完成的教学课件
att_data = att_data[~att_data['attachment_id'].isin(att_set)]
else:
att_data = pd.DataFrame(columns=["stage_id","stage_name","stage_sort","knowledge_id","knowledge",
"attachment_id","filename","attachment_link"])
merged_data = pd.merge(shixun_data,video_data,on=['stage_id','stage_name',"stage_sort",'knowledge_id',
'knowledge'],how='outer')
merged_data = pd.merge(merged_data,att_data,on=['stage_id','stage_name',"stage_sort",'knowledge_id',
'knowledge'],how='outer')
if platform == '1':
merged_data['attachment_link'] = merged_data['attachment_link'].str.replace('www.', 'ilearning.')
merged_data['shixun_link'] = merged_data['shixun_link'].str.replace('www.', 'ilearning.')
merged_data['video_link'] = merged_data['video_link'].str.replace('www.', 'ilearning.')
merged_data['shixun_num'].fillna(0,inplace=True)
merged_data['video_num'].fillna(0,inplace=True)
merged_data['video_num'].fillna(0,inplace=True)
shixun = (merged_data.groupby(['stage_id','stage_name',"stage_sort",'knowledge_id','knowledge',
'shixun_num','video_num'])
.apply(lambda x:x[['shixun_id','shixun_name', 'shixun_link']].to_dict('r'))
.reset_index(name='shixun'))
video = (merged_data.groupby(['stage_id','stage_name',"stage_sort",'knowledge_id','knowledge',
'shixun_num','video_num'])
.apply(lambda x:x[['video_id','video_name', 'video_link']].to_dict('r'))
.reset_index(name='video'))
att = (merged_data.groupby(['stage_id','stage_name',"stage_sort",'knowledge_id','knowledge',
'shixun_num','video_num'])
.apply(lambda x:x[['attachment_id','filename', 'attachment_link']].to_dict('r'))
.reset_index(name='attachment'))
linshi = shixun.merge(video,how='outer',on=['stage_id','stage_name',"stage_sort",'knowledge_id',
'knowledge','shixun_num','video_num'])
res=linshi.merge(att,how='outer',on=['stage_id','stage_name',"stage_sort",'knowledge_id','knowledge',
'shixun_num','video_num'])
result = (res.groupby(['stage_id','stage_name',"stage_sort"])
.apply(lambda x :x[['knowledge_id','knowledge','shixun_num','video_num','shixun',
'video','attachment']].to_dict('r'))
.reset_index(name='knowledge'))
result=json.loads(result.to_json(orient='records',force_ascii=False))
return result
else:
# 如果course_id不存在返回空值
return {}
if __name__ == '__main__':
# 读取数据
course_id = "24002"
student_id = '2383'
result = get_pre_exam_path(course_id,student_id)
print(result)