|
|
const fs = require('fs');
|
|
|
const path = require('path');
|
|
|
const { loadEvaluationDimensions } = require('./evaluationDimensions.cjs');
|
|
|
|
|
|
const PROJECT_ROOT = path.join(__dirname, '..');
|
|
|
|
|
|
const LAB_KEYWORDS = {
|
|
|
step2_generate: [/read_csv|pandas|pd\.read|scores\.csv|csv/i, /总分|平均分|各科/i],
|
|
|
step3_debug: [/报错|修复|FileNotFound|KeyError|Traceback|error/i],
|
|
|
step4_viz: [/matplotlib|plt\.|柱状|bar\(|score_chart|可视化|png/i],
|
|
|
step5_explain: [/解释|什么意思|为什么|pathlib/i],
|
|
|
};
|
|
|
|
|
|
const RUBRIC = [
|
|
|
{ id: 'prep', label: '准备数据 scores.csv', patterns: [/scores\.csv|成绩表|csv/i] },
|
|
|
{
|
|
|
id: 's1',
|
|
|
label: '步骤一:打开 Cursor / 新建项目',
|
|
|
patterns: [/myshixun/i, /打开文件夹/i, /score_analysis\.py/i, /新建文件/i],
|
|
|
},
|
|
|
{ id: 's2', label: '步骤二:自然语言生成基础代码', patterns: LAB_KEYWORDS.step2_generate.flat() },
|
|
|
{ id: 's3', label: '步骤三:运行与调试', patterns: LAB_KEYWORDS.step3_debug },
|
|
|
{ id: 's4', label: '步骤四:matplotlib 柱状图与 score_chart.png', patterns: LAB_KEYWORDS.step4_viz },
|
|
|
{ id: 's5', label: '步骤五:请 AI 解释代码', patterns: LAB_KEYWORDS.step5_explain },
|
|
|
{ id: 's6', label: '步骤六:保存 score_analysis.py', patterns: [/score_analysis|保存|submit/i] },
|
|
|
];
|
|
|
|
|
|
function collectTexts(events) {
|
|
|
const chunks = [];
|
|
|
for (const ev of events) {
|
|
|
const t = ev?.hook_input?.text;
|
|
|
if (typeof t === 'string' && t.trim()) chunks.push(t);
|
|
|
const last = ev?.user_queries?.last_user_text;
|
|
|
if (typeof last === 'string' && last.trim()) chunks.push(last);
|
|
|
const recent = ev?.user_queries?.recent_user_texts;
|
|
|
if (Array.isArray(recent)) {
|
|
|
for (const r of recent) {
|
|
|
if (typeof r === 'string' && r.trim()) chunks.push(r);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
return chunks.join('\n');
|
|
|
}
|
|
|
|
|
|
function matchRubric(combinedLower) {
|
|
|
return RUBRIC.map((row) => {
|
|
|
const hit = row.patterns.some((re) => re.test(combinedLower));
|
|
|
return { id: row.id, label: row.label, done: hit };
|
|
|
});
|
|
|
}
|
|
|
|
|
|
function scoreLabProgress(steps) {
|
|
|
const done = steps.filter((s) => s.done).length;
|
|
|
return Math.round((done / steps.length) * 100);
|
|
|
}
|
|
|
|
|
|
/** 用于判断「是否像整段讲义/任务书粘贴」而非分步实训对话 */
|
|
|
function collectUserFacingTexts(events) {
|
|
|
const out = [];
|
|
|
for (const ev of events) {
|
|
|
if (typeof ev?.hook_input?.text === 'string' && ev.hook_input.text.trim()) out.push(ev.hook_input.text);
|
|
|
const uq = ev?.user_queries;
|
|
|
if (uq && typeof uq.last_user_text === 'string' && uq.last_user_text.trim()) out.push(uq.last_user_text);
|
|
|
if (Array.isArray(uq?.recent_user_texts)) {
|
|
|
for (const r of uq.recent_user_texts) {
|
|
|
if (typeof r === 'string' && r.trim()) out.push(r);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
return out;
|
|
|
}
|
|
|
|
|
|
function detectSessionProfile(events) {
|
|
|
const texts = collectUserFacingTexts(events);
|
|
|
let maxLen = 0;
|
|
|
let shortTurns = 0;
|
|
|
for (const t of texts) {
|
|
|
const len = String(t).trim().length;
|
|
|
maxLen = Math.max(maxLen, len);
|
|
|
if (len >= 40 && len <= 900) shortTurns += 1;
|
|
|
}
|
|
|
const joined = texts.join('\n').toLowerCase();
|
|
|
const totalChars = joined.length;
|
|
|
const pasteHeavy = maxLen > 4500 || (totalChars > 8000 && shortTurns < 4);
|
|
|
const courseDump =
|
|
|
totalChars > 3500 &&
|
|
|
/(实训任务|教学大纲|课程建设|助教|report-ui|server\.js|skill\.md|lab-eval|头歌实践)/.test(joined) &&
|
|
|
/(score_analysis|read_csv|matplotlib)/.test(joined);
|
|
|
const heuristicUntrustworthy = pasteHeavy || courseDump;
|
|
|
const hasTraceback = /traceback|filenotfound|keyerror|syntaxerror|modulenotfound|error:\s|exception:/i.test(
|
|
|
joined
|
|
|
);
|
|
|
return {
|
|
|
pasteHeavy,
|
|
|
courseDump,
|
|
|
heuristicUntrustworthy,
|
|
|
maxLen,
|
|
|
shortTurns,
|
|
|
hasTraceback,
|
|
|
totalChars,
|
|
|
};
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* 按 transcript_path(与 hook_chat_windows 相同)分别做 profile,再聚合:
|
|
|
* 短问次数累加、单窗最长文取 max、全文拼接后做 courseDump 检测,更贴近「多窗口真实分步」。
|
|
|
*/
|
|
|
function aggregateProfileAcrossTranscriptWindows(windowGroups) {
|
|
|
if (!windowGroups || !windowGroups.length) {
|
|
|
return {
|
|
|
pasteHeavy: false,
|
|
|
courseDump: false,
|
|
|
heuristicUntrustworthy: false,
|
|
|
maxLen: 0,
|
|
|
shortTurns: 0,
|
|
|
hasTraceback: false,
|
|
|
totalChars: 0,
|
|
|
};
|
|
|
}
|
|
|
let maxLen = 0;
|
|
|
let shortTurns = 0;
|
|
|
let totalChars = 0;
|
|
|
let hasTraceback = false;
|
|
|
const blobParts = [];
|
|
|
for (const g of windowGroups) {
|
|
|
const evs = g.events || [];
|
|
|
const p = detectSessionProfile(evs);
|
|
|
maxLen = Math.max(maxLen, p.maxLen);
|
|
|
shortTurns += p.shortTurns;
|
|
|
totalChars += p.totalChars;
|
|
|
if (p.hasTraceback) hasTraceback = true;
|
|
|
blobParts.push(collectUserFacingTexts(evs).join('\n'));
|
|
|
}
|
|
|
const joined = blobParts.join('\n').toLowerCase();
|
|
|
const pasteHeavy = maxLen > 4500 || (totalChars > 8000 && shortTurns < 4);
|
|
|
const courseDump =
|
|
|
totalChars > 3500 &&
|
|
|
/(实训任务|教学大纲|课程建设|助教|report-ui|server\.js|skill\.md|lab-eval|头歌实践)/.test(joined) &&
|
|
|
/(score_analysis|read_csv|matplotlib)/.test(joined);
|
|
|
const heuristicUntrustworthy = pasteHeavy || courseDump;
|
|
|
return {
|
|
|
pasteHeavy,
|
|
|
courseDump,
|
|
|
heuristicUntrustworthy,
|
|
|
maxLen,
|
|
|
shortTurns,
|
|
|
hasTraceback,
|
|
|
totalChars,
|
|
|
};
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* 长文粘贴场景下:不把「关键词出现在讲义里」计为步骤完成;保留真实排错信号(s3)。
|
|
|
*/
|
|
|
function demoteStepsForProfile(steps, profile) {
|
|
|
if (!profile.heuristicUntrustworthy) {
|
|
|
return steps.map((s) => ({ ...s }));
|
|
|
}
|
|
|
const note = '启发式降权:对话多为长讲义/说明类粘贴,关键词命中不代表本人已逐步完成该步骤';
|
|
|
return steps.map((s) => {
|
|
|
if (s.id === 's3' && profile.hasTraceback) {
|
|
|
return { ...s, done: true, eval_note: '检测到报错/异常相关表述,保留为已体现' };
|
|
|
}
|
|
|
return { ...s, done: false, eval_note: note };
|
|
|
});
|
|
|
}
|
|
|
|
|
|
/** 与步骤表 + Hook 形态相关的打分(固定五类信号);再由 YAML 列表映射到教师自定义维度 */
|
|
|
function buildAbilityDimensions(steps, profile) {
|
|
|
const done = (id) => !!steps.find((x) => x.id === id)?.done;
|
|
|
const unt = profile.heuristicUntrustworthy;
|
|
|
const cap = (v) => (unt ? Math.min(v, 46) : v);
|
|
|
|
|
|
const promptScore = (() => {
|
|
|
if (unt) return cap(Math.min(44, 22 + profile.shortTurns * 8));
|
|
|
if (done('s2') && profile.shortTurns >= 3) return 82;
|
|
|
if (done('s2')) return 68;
|
|
|
return 36;
|
|
|
})();
|
|
|
|
|
|
const debugScore = (() => {
|
|
|
let v = done('s3') ? 72 : 34;
|
|
|
if (profile.hasTraceback) v = Math.min(100, v + 18);
|
|
|
if (unt) v = Math.min(v, 52);
|
|
|
return Math.round(v);
|
|
|
})();
|
|
|
|
|
|
const dataScore = (() => {
|
|
|
let v = done('s2') ? 74 : 32;
|
|
|
if (unt) v = cap(v);
|
|
|
return Math.round(v);
|
|
|
})();
|
|
|
|
|
|
const vizScore = (() => {
|
|
|
let v = done('s4') ? 78 : 30;
|
|
|
if (unt) v = cap(v);
|
|
|
return Math.round(v);
|
|
|
})();
|
|
|
|
|
|
const literacyScore = (() => {
|
|
|
let v = done('s5') ? 76 : 33;
|
|
|
if (unt) v = cap(v);
|
|
|
return Math.round(v);
|
|
|
})();
|
|
|
|
|
|
const c = (id, name, value, comment) => ({ id, name, value, comment });
|
|
|
|
|
|
return [
|
|
|
c(
|
|
|
'prompt_quality',
|
|
|
'自然语言驱动(Prompt)',
|
|
|
promptScore,
|
|
|
unt
|
|
|
? '单次粘贴过长、分步短问较少,更像整份材料投喂;难以认定已养成「目标—数据—输出」分步描述习惯。'
|
|
|
: done('s2')
|
|
|
? '日志中能识别与生成/改代码相关的交互;若含多轮短问更佳。'
|
|
|
: '建议在对话中分步写清输入文件、期望统计与输出形式,少用大段讲义替代个人表述。'
|
|
|
),
|
|
|
c(
|
|
|
'tool_use_debug',
|
|
|
'运行调试与排错',
|
|
|
debugScore,
|
|
|
profile.hasTraceback
|
|
|
? '出现过报错/异常类文本,具备可评估的排错语境。'
|
|
|
: '较少看到 Traceback 或明确报错描述;真实调试链路不足时该项从严给分。'
|
|
|
),
|
|
|
c(
|
|
|
'data_stats',
|
|
|
'数据读取与统计',
|
|
|
dataScore,
|
|
|
unt
|
|
|
? '关键词可能来自粘贴材料,已与仓库/独立短对话交叉前不计为高水平。'
|
|
|
: done('s2')
|
|
|
? '对话与步骤表与数据读取/统计相关表述较一致。'
|
|
|
: '需补充 read_csv、列含义、均值等业务向提问或代码讨论。'
|
|
|
),
|
|
|
c(
|
|
|
'visualization',
|
|
|
'可视化(matplotlib)',
|
|
|
vizScore,
|
|
|
done('s4')
|
|
|
? '有图表/png 等相关讨论。'
|
|
|
: '建议明确柱状图轴含义、保存 score_chart.png 等可评测产出。'
|
|
|
),
|
|
|
c(
|
|
|
'code_literacy',
|
|
|
'代码理解与反思',
|
|
|
literacyScore,
|
|
|
done('s5')
|
|
|
? '有请模型解释或追问「为什么」的迹象。'
|
|
|
: '可多用「解释这段」「逐步说明变量含义」类提问巩固理解。'
|
|
|
),
|
|
|
];
|
|
|
}
|
|
|
|
|
|
/** 按 config/evaluation-dimensions.yaml 的顺序与 name 输出能力条;未知 id 用整体对齐度估算 */
|
|
|
function buildAbilityDimensionsFromYaml(dimensions, steps, profile) {
|
|
|
const legacy = buildAbilityDimensions(steps, profile);
|
|
|
const legById = new Map(legacy.map((x) => [x.id, x]));
|
|
|
if (!dimensions.length) return legacy;
|
|
|
const pct = scoreLabProgress(steps);
|
|
|
const unt = profile.heuristicUntrustworthy;
|
|
|
return dimensions.map((d) => {
|
|
|
const hit = legById.get(d.id);
|
|
|
if (hit) {
|
|
|
return {
|
|
|
id: d.id,
|
|
|
name: d.name,
|
|
|
value: hit.value,
|
|
|
comment: `${hit.comment}(任务维度 rubric:${d.rubric})`.slice(0, 520),
|
|
|
};
|
|
|
}
|
|
|
const v = Math.round(
|
|
|
unt ? Math.min(42, pct * 0.35 + 10) : Math.min(88, pct * 0.82 + 8),
|
|
|
);
|
|
|
return {
|
|
|
id: d.id,
|
|
|
name: d.name,
|
|
|
value: v,
|
|
|
comment: `根据实训步骤整体对齐度(${pct}%)对该维度做保守估计。${d.rubric}`,
|
|
|
};
|
|
|
});
|
|
|
}
|
|
|
|
|
|
/** 取会话内时间上最后一条非空用户消息,作弹窗标题「会话名称」等(单行、截断) */
|
|
|
function deriveSessionTopicPreview(events) {
|
|
|
if (!Array.isArray(events) || !events.length) return '';
|
|
|
const sorted = [...events].sort((a, b) => {
|
|
|
const ta = String(a.timestamp || a.captured_at || '');
|
|
|
const tb = String(b.timestamp || b.captured_at || '');
|
|
|
return ta.localeCompare(tb);
|
|
|
});
|
|
|
for (let i = sorted.length - 1; i >= 0; i -= 1) {
|
|
|
const uq = sorted[i]?.user_queries;
|
|
|
if (!uq || typeof uq !== 'object') continue;
|
|
|
const last = typeof uq.last_user_text === 'string' ? uq.last_user_text.trim() : '';
|
|
|
if (last) return normalizeTopicPreviewLine(last);
|
|
|
const recent = uq.recent_user_texts;
|
|
|
if (Array.isArray(recent) && recent.length) {
|
|
|
const t = String(recent[recent.length - 1] || '').trim();
|
|
|
if (t) return normalizeTopicPreviewLine(t);
|
|
|
}
|
|
|
}
|
|
|
return '';
|
|
|
}
|
|
|
|
|
|
function normalizeTopicPreviewLine(s) {
|
|
|
const flat = String(s || '')
|
|
|
.replace(/\s+/g, ' ')
|
|
|
.trim();
|
|
|
if (!flat) return '';
|
|
|
const one = flat.split('\n')[0].trim();
|
|
|
return one.length > 48 ? `${one.slice(0, 48)}…` : one;
|
|
|
}
|
|
|
|
|
|
/** 按时间线拼接该会话全部 Hook 事件中的用户侧摘录与模型回复,供报告页「查看详情」弹窗展示(不含事件名、分隔线等技术头) */
|
|
|
function buildHookTranscript(events) {
|
|
|
const sorted = [...events].sort((a, b) => {
|
|
|
const ta = String(a.timestamp || a.captured_at || '');
|
|
|
const tb = String(b.timestamp || b.captured_at || '');
|
|
|
return ta.localeCompare(tb);
|
|
|
});
|
|
|
const chunks = [];
|
|
|
for (const ev of sorted) {
|
|
|
const hook = ev.hook_input || {};
|
|
|
|
|
|
const uq = ev.user_queries;
|
|
|
if (uq && typeof uq === 'object') {
|
|
|
const userBlocks = [];
|
|
|
if (typeof uq.last_user_text === 'string' && uq.last_user_text.trim()) {
|
|
|
userBlocks.push(`【末条用户消息】\n${uq.last_user_text.trim()}`);
|
|
|
}
|
|
|
if (Array.isArray(uq.recent_user_texts) && uq.recent_user_texts.length) {
|
|
|
const uniq = [...new Set(uq.recent_user_texts.map((x) => String(x).trim()).filter(Boolean))];
|
|
|
if (uniq.length) userBlocks.push(`【近期用户消息】\n${uniq.join('\n———\n')}`);
|
|
|
}
|
|
|
if (uq.user_turn_count != null) userBlocks.push(`(用户轮次:${uq.user_turn_count})`);
|
|
|
if (userBlocks.length) chunks.push(userBlocks.join('\n\n'), '\n\n');
|
|
|
}
|
|
|
|
|
|
if (typeof hook.text === 'string' && hook.text.trim()) {
|
|
|
chunks.push(`【模型回复文本】\n${hook.text.trim()}\n\n`);
|
|
|
}
|
|
|
|
|
|
const meta = [];
|
|
|
if (hook.input_tokens != null || hook.output_tokens != null) {
|
|
|
meta.push(
|
|
|
`TOKENS:输入 ${hook.input_tokens ?? 0} · 输出 ${hook.output_tokens ?? 0} · 缓存读取 ${hook.cache_read_tokens ?? 0}`
|
|
|
);
|
|
|
}
|
|
|
if (typeof hook.transcript_path === 'string' && hook.transcript_path.trim()) {
|
|
|
meta.push(`会话记录路径:${hook.transcript_path.trim()}`);
|
|
|
}
|
|
|
if (meta.length) chunks.push(`【元数据】${meta.join(' · ')}\n\n`);
|
|
|
}
|
|
|
const out = chunks.join('').trim();
|
|
|
return out || '(该会话在 chat_logs 中无可用钩子文本)';
|
|
|
}
|
|
|
|
|
|
function inferQuestionsFromEvents(convId, events) {
|
|
|
const seen = new Set();
|
|
|
const items = [];
|
|
|
for (const ev of events) {
|
|
|
const ts = ev.timestamp || ev.captured_at || '';
|
|
|
const recent = ev?.user_queries?.recent_user_texts;
|
|
|
const last = ev?.user_queries?.last_user_text;
|
|
|
const list = [];
|
|
|
if (Array.isArray(recent)) list.push(...recent);
|
|
|
if (last) list.push(last);
|
|
|
for (const text of list) {
|
|
|
const key = text.trim();
|
|
|
if (!key) continue;
|
|
|
const dedupe = `${key}\n@\n${ts}`;
|
|
|
if (seen.has(dedupe)) continue;
|
|
|
seen.add(dedupe);
|
|
|
const lower = key.toLowerCase();
|
|
|
const looksLab =
|
|
|
/csv|成绩|python|matplotlib|平均|报错|cursor|代码|图|分析/.test(key) ||
|
|
|
/who|how|you/.test(lower);
|
|
|
if (!looksLab && key.length < 4) continue;
|
|
|
const isDebug = /报错|修复|error|traceback|失败/.test(key);
|
|
|
items.push({
|
|
|
conversation_id: convId,
|
|
|
title: key.length > 80 ? `${key.slice(0, 80)}…` : key,
|
|
|
detail: key,
|
|
|
status: isDebug ? 'open' : 'resolved',
|
|
|
tags: [ev?.hook_input?.model || 'default', '钩子'],
|
|
|
time: ts,
|
|
|
});
|
|
|
}
|
|
|
}
|
|
|
items.sort((a, b) => String(b.time).localeCompare(String(a.time)));
|
|
|
return items.slice(0, 60);
|
|
|
}
|
|
|
|
|
|
function aggregateTokens(events) {
|
|
|
let inTok = 0;
|
|
|
let outTok = 0;
|
|
|
let cacheRead = 0;
|
|
|
for (const ev of events) {
|
|
|
const h = ev?.hook_input || {};
|
|
|
inTok += Number(h.input_tokens) || 0;
|
|
|
outTok += Number(h.output_tokens) || 0;
|
|
|
cacheRead += Number(h.cache_read_tokens) || 0;
|
|
|
}
|
|
|
return { input_tokens: inTok, output_tokens: outTok, cache_read_tokens: cacheRead };
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* 与 user_queries.path、hook_input.transcript_path 相同即同一 Cursor 聊天窗口。
|
|
|
* 返回窗口列表:默认按「最后一条 Hook」时间倒序;窗内 turns 按时间正序(完整问答链)。
|
|
|
*/
|
|
|
function normalizeTranscriptGroupKey(ev, fallbackConvId) {
|
|
|
const h = ev?.hook_input || {};
|
|
|
const uq = ev?.user_queries || {};
|
|
|
const p = String(h.transcript_path || uq.path || '').trim();
|
|
|
if (p) return p;
|
|
|
const cid = String(h.conversation_id || fallbackConvId || '').trim();
|
|
|
return cid ? `__single_path__:${cid}` : '';
|
|
|
}
|
|
|
|
|
|
function pickPrimaryConversationId(transcriptPath, idSet) {
|
|
|
const ids = [...idSet].filter(Boolean);
|
|
|
if (!ids.length) return '';
|
|
|
const base = String(transcriptPath || '').split('/').pop() || '';
|
|
|
const bare = base.replace(/\.jsonl$/i, '');
|
|
|
if (bare && ids.includes(bare)) return bare;
|
|
|
ids.sort();
|
|
|
return ids[0];
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* 原始 chat_logs 分桶:同一 transcript_path / user_queries.path 为同一 Cursor 聊天窗口(含 events,供评价聚合)。
|
|
|
*/
|
|
|
function groupRawEventsByTranscriptPath(rawData) {
|
|
|
if (!rawData || typeof rawData !== 'object') return [];
|
|
|
const byKey = new Map();
|
|
|
for (const [topConvId, events] of Object.entries(rawData)) {
|
|
|
if (!Array.isArray(events)) continue;
|
|
|
for (const ev of events) {
|
|
|
const key = normalizeTranscriptGroupKey(ev, topConvId);
|
|
|
if (!key) continue;
|
|
|
if (!byKey.has(key)) {
|
|
|
byKey.set(key, {
|
|
|
key,
|
|
|
transcript_path: null,
|
|
|
conversation_ids: new Set(),
|
|
|
events: [],
|
|
|
});
|
|
|
}
|
|
|
const g = byKey.get(key);
|
|
|
const h = ev?.hook_input || {};
|
|
|
const uq = ev?.user_queries || {};
|
|
|
const tp = String(h.transcript_path || uq.path || '').trim();
|
|
|
if (tp) g.transcript_path = tp;
|
|
|
g.conversation_ids.add(String(topConvId));
|
|
|
if (h.conversation_id != null) g.conversation_ids.add(String(h.conversation_id));
|
|
|
g.events.push(ev);
|
|
|
}
|
|
|
}
|
|
|
const groups = [];
|
|
|
for (const g of byKey.values()) {
|
|
|
g.events.sort((a, b) => {
|
|
|
const ta = String(a.timestamp || a.captured_at || '');
|
|
|
const tb = String(b.timestamp || b.captured_at || '');
|
|
|
return ta.localeCompare(tb);
|
|
|
});
|
|
|
groups.push(g);
|
|
|
}
|
|
|
groups.sort((a, b) => {
|
|
|
const ea = a.events;
|
|
|
const eb = b.events;
|
|
|
const la = ea.length ? String(ea[ea.length - 1].timestamp || ea[ea.length - 1].captured_at || '') : '';
|
|
|
const lb = eb.length ? String(eb[eb.length - 1].timestamp || eb[eb.length - 1].captured_at || '') : '';
|
|
|
return lb.localeCompare(la);
|
|
|
});
|
|
|
return groups;
|
|
|
}
|
|
|
|
|
|
function buildHookChatWindows(rawData) {
|
|
|
const groups = groupRawEventsByTranscriptPath(rawData);
|
|
|
const windows = [];
|
|
|
for (const g of groups) {
|
|
|
let inTok = 0;
|
|
|
let outTok = 0;
|
|
|
let cr = 0;
|
|
|
let cw = 0;
|
|
|
const turns = [];
|
|
|
let idx = 0;
|
|
|
for (const ev of g.events) {
|
|
|
const uq = ev.user_queries;
|
|
|
const hook = ev.hook_input || {};
|
|
|
const userText = typeof uq?.last_user_text === 'string' ? uq.last_user_text.trim() : '';
|
|
|
const modelText = typeof hook.text === 'string' ? hook.text.trim() : '';
|
|
|
inTok += Number(hook.input_tokens) || 0;
|
|
|
outTok += Number(hook.output_tokens) || 0;
|
|
|
cr += Number(hook.cache_read_tokens) || 0;
|
|
|
cw += Number(hook.cache_write_tokens) || 0;
|
|
|
if (!userText && !modelText) continue;
|
|
|
idx += 1;
|
|
|
turns.push({
|
|
|
index: idx,
|
|
|
user: userText,
|
|
|
model: modelText,
|
|
|
at: ev.timestamp || ev.captured_at || '',
|
|
|
model_name: hook.model || '',
|
|
|
});
|
|
|
}
|
|
|
if (!turns.length) continue;
|
|
|
const ids = [...g.conversation_ids];
|
|
|
const primaryConv = pickPrimaryConversationId(g.transcript_path || g.key, g.conversation_ids);
|
|
|
const lastAt =
|
|
|
g.events.length > 0
|
|
|
? String(g.events[g.events.length - 1].timestamp || g.events[g.events.length - 1].captured_at || '')
|
|
|
: '';
|
|
|
windows.push({
|
|
|
transcript_path: g.transcript_path || g.key,
|
|
|
conversation_id: primaryConv,
|
|
|
conversation_ids: ids,
|
|
|
last_activity: lastAt,
|
|
|
event_count: g.events.length,
|
|
|
turn_count: turns.length,
|
|
|
tokens: {
|
|
|
input_tokens: inTok,
|
|
|
output_tokens: outTok,
|
|
|
cache_read_tokens: cr,
|
|
|
cache_write_tokens: cw,
|
|
|
},
|
|
|
turns,
|
|
|
});
|
|
|
}
|
|
|
return windows;
|
|
|
}
|
|
|
|
|
|
function buildAiSnippets(events, limit = 5) {
|
|
|
const out = [];
|
|
|
for (const ev of events) {
|
|
|
const text = ev?.hook_input?.text;
|
|
|
if (typeof text !== 'string' || !text.trim()) continue;
|
|
|
out.push({
|
|
|
preview: text.trim().slice(0, 400),
|
|
|
full: text.trim(),
|
|
|
at: ev.timestamp || ev.captured_at,
|
|
|
model: ev?.hook_input?.model,
|
|
|
});
|
|
|
}
|
|
|
out.sort((a, b) => String(b.at).localeCompare(String(a.at)));
|
|
|
return out.slice(0, limit);
|
|
|
}
|
|
|
|
|
|
function evaluateNarrative({
|
|
|
steps,
|
|
|
percent,
|
|
|
userEmail,
|
|
|
convCount,
|
|
|
transcriptWindowCount,
|
|
|
conversationBucketCount,
|
|
|
eventCount,
|
|
|
tokens,
|
|
|
profile,
|
|
|
dimensions,
|
|
|
}) {
|
|
|
const prof = profile || {
|
|
|
heuristicUntrustworthy: false,
|
|
|
pasteHeavy: false,
|
|
|
courseDump: false,
|
|
|
hasTraceback: false,
|
|
|
shortTurns: 0,
|
|
|
};
|
|
|
|
|
|
const bucketN =
|
|
|
typeof conversationBucketCount === 'number' && conversationBucketCount >= 0
|
|
|
? conversationBucketCount
|
|
|
: convCount;
|
|
|
const winN =
|
|
|
typeof transcriptWindowCount === 'number' && transcriptWindowCount >= 0 ? transcriptWindowCount : convCount;
|
|
|
|
|
|
const doneLabels = steps.filter((s) => s.done).map((s) => s.label);
|
|
|
const missing = steps.filter((s) => !s.done).map((s) => s.label);
|
|
|
let tone = '进行中';
|
|
|
if (prof.heuristicUntrustworthy) {
|
|
|
tone = '因对话里整段粘贴较多,系统自动给出的「完成度」偏保守,避免只看关键词就认为全对';
|
|
|
} else if (percent >= 85) tone = '与实训目标较为一致(仍建议对照你已提交的文件是否齐全)';
|
|
|
else if (percent >= 50) tone = '已覆盖部分环节';
|
|
|
else tone = '尚处于早期摸索阶段';
|
|
|
|
|
|
const transcriptCtx =
|
|
|
winN > 0 && bucketN !== winN
|
|
|
? `一、这份总体评价依据什么\n主要依据两样:你在 Cursor 里与 AI 的对话(系统把同一聊天窗口合并后,约 ${winN} 个窗口;原始日志里约 ${bucketN} 组条目),以及(若页面后面有)对你远程作业仓库的自动检查。\n\n`
|
|
|
: winN > 0
|
|
|
? `一、这份总体评价依据什么\n主要依据你在 Cursor 里与 AI 的对话(合并后约 ${winN} 个聊天窗口),以及(若页面后面有)对你作业仓库的自动检查。\n\n`
|
|
|
: '';
|
|
|
|
|
|
const learnBlock = prof.heuristicUntrustworthy
|
|
|
? `二、从对话里能看出什么\n当前记录里单次粘贴的文字偏长,整体更像整份实训说明、任务书或与报告系统相关的讨论,较少出现你用自己语言描述「数据在哪、有哪些列、终端输出了什么、哪一行报错」等细节。因此,单靠这些聊天内容,不足以代替老师判断你是否一步步独立完成了读表、统计和作图;若你提交的仓库里脚本、数据和图表已经齐全、可运行,请以仓库和实际运行结果为准。\n\n`
|
|
|
: `二、从对话里能看出什么\n从摘录看,请关注你是否用个人化的说法描述数据路径、列名和运行结果,而不只是复述教材用语。\n\n`;
|
|
|
|
|
|
let stepBlock = '';
|
|
|
if (prof.heuristicUntrustworthy) {
|
|
|
stepBlock =
|
|
|
`三、关于页面上「步骤完成」的提示\n系统会根据对话里是否出现 read_csv、matplotlib、scores.csv 等词,自动估算完成度(当前约 ${percent}%)。` +
|
|
|
`若这些词多来自你粘贴的讲义或示例全文,「显示完成」不等于你已经亲手做完每一步;老师仍以你提交的 score_analysis.py、scores.csv、score_chart.png 等是否真实、可用为准。\n\n`;
|
|
|
} else if (percent >= 85) {
|
|
|
stepBlock =
|
|
|
`三、与实训任务的对照(自动估算)\n从关键词与对话形态看,与任务要求的吻合度约 ${percent}%,形态上「${tone}」。` +
|
|
|
(doneLabels.length
|
|
|
? ` 已能对上这些环节(仅供参考):${doneLabels.slice(0, 4).join(';')}${doneLabels.length > 4 ? '…' : ''}。`
|
|
|
: '') +
|
|
|
`\n\n`;
|
|
|
} else {
|
|
|
stepBlock =
|
|
|
`三、与实训任务的对照(自动估算)\n从关键词与对话形态看,约 ${percent}%(${tone})。` +
|
|
|
(missing.length ? ` 还可加强:${missing.slice(0, 4).join(';')}。` : '') +
|
|
|
`\n\n`;
|
|
|
}
|
|
|
|
|
|
const part4body =
|
|
|
winN === 0
|
|
|
? '目前几乎没有可用的对话片段,暂无法从记录里判断你与 AI 的协作方式。'
|
|
|
: prof.heuristicUntrustworthy
|
|
|
? '从记录形态看,更像一次性粘贴大段材料,不利于看出你是否分步向 AI 描述目标与报错;若你希望报告如实反映你的能力,请多用多轮、短句、带具体路径与报错信息的提问。'
|
|
|
: eventCount / Math.max(winN, 1) >= 12
|
|
|
? '你与 AI 互动较多;若每轮只问一件具体的事、写清约束,更有利于得到可运行的代码。'
|
|
|
: '建议每轮写清数据路径与期望输出,出错时把终端里的报错全文提供给 AI。';
|
|
|
|
|
|
const part5body =
|
|
|
winN === 0
|
|
|
? '若你确实在 Cursor 里完成过练习,请确认本平台的练习日志已正确提交或由教师端汇总;否则老师只能看到一份空白或极少的记录。'
|
|
|
: prof.heuristicUntrustworthy
|
|
|
? '把任务拆成「读表 → 统计 → 出图 → 保存」多轮完成;每一轮说明文件路径、想要的结果;若报错,复制完整英文报错与当前目录。'
|
|
|
: eventCount / Math.max(winN, 1) >= 12
|
|
|
? '继续保持「一次一事」的提问习惯,并在每轮附上相关代码或输出,方便 AI 接续上下文。'
|
|
|
: '先自己运行脚本看一眼输出,再把「现象 + 报错 + 文件路径」写进提问里。';
|
|
|
|
|
|
const overall =
|
|
|
`${transcriptCtx}${learnBlock}${stepBlock}四、使用 AI 辅助学习的情况\n${part4body}\n\n五、给你的建议\n${part5body}\n`;
|
|
|
|
|
|
const dimList =
|
|
|
dimensions && dimensions.length ? dimensions : loadEvaluationDimensions(PROJECT_ROOT);
|
|
|
const ability = buildAbilityDimensionsFromYaml(dimList, steps, prof);
|
|
|
|
|
|
const issues = [];
|
|
|
if (prof.heuristicUntrustworthy) {
|
|
|
issues.push({
|
|
|
title: '对话记录能否代表你的全部实训过程?',
|
|
|
body: '当前日志里长段粘贴较多,系统为避免「仅凭关键词就认为步骤全完成」,已对纯聊天推断做了保守处理。老师改作业时仍以你提交的脚本、数据与图表为准。若需要更详细的文字分析,可由教师配置完整报告生成(非快速模式)。',
|
|
|
});
|
|
|
}
|
|
|
if (!steps.find((s) => s.id === 's4')?.done) {
|
|
|
issues.push({
|
|
|
title: '大模型运用:可视化与交付物表述',
|
|
|
body: '未稳定识别学员在对话中围绕 matplotlib、保存 png、score_chart 的分步讨论。若已完成,请在对话里明确图表类型与文件名,便于与头歌评测对齐。',
|
|
|
});
|
|
|
}
|
|
|
if (!steps.find((s) => s.id === 's2')?.done) {
|
|
|
issues.push({
|
|
|
title: '大模型运用:数据与 Prompt 结构',
|
|
|
body: '建议在多轮对话中显式出现 scores.csv、read_csv、均值等业务词与个人运行结果,而不是只在长讲义中出现关键词。',
|
|
|
});
|
|
|
}
|
|
|
|
|
|
const learning = [
|
|
|
'把一次大需求拆成「读表 → 清洗/列名 → 统计 → 出图 → 保存」多轮提问,每轮附当前代码片段,更利于大模型稳定输出。',
|
|
|
'运行失败时粘贴完整 Traceback,并说明工作目录与文件名,体现真实排错能力,也便于模型定位。',
|
|
|
'对照头歌评测脚本自检:仓库中是否真实存在 scores.csv、score_analysis.py、score_chart.png,并与对话描述一致。',
|
|
|
];
|
|
|
|
|
|
return {
|
|
|
overall,
|
|
|
ability,
|
|
|
issues,
|
|
|
learning,
|
|
|
meta: {
|
|
|
user_hint: userEmail || '(未在日志中发现邮箱)',
|
|
|
tokens,
|
|
|
evaluation_signals: {
|
|
|
heuristic_untrustworthy: prof.heuristicUntrustworthy,
|
|
|
paste_heavy: prof.pasteHeavy,
|
|
|
course_style_dump: prof.courseDump,
|
|
|
transcript_window_count: winN,
|
|
|
conversation_bucket_count: bucketN,
|
|
|
},
|
|
|
},
|
|
|
};
|
|
|
}
|
|
|
|
|
|
function analyzeChatLogsFile(filePath) {
|
|
|
const raw = fs.readFileSync(filePath, 'utf8');
|
|
|
let data;
|
|
|
try {
|
|
|
data = JSON.parse(raw);
|
|
|
} catch (e) {
|
|
|
return { ok: false, error: 'chat_logs.json 不是合法 JSON', detail: String(e.message) };
|
|
|
}
|
|
|
|
|
|
if (!data || typeof data !== 'object') {
|
|
|
return { ok: false, error: 'chat_logs.json 格式应为 { conversationId: Event[] }' };
|
|
|
}
|
|
|
|
|
|
const conversations = [];
|
|
|
let allEvents = [];
|
|
|
|
|
|
for (const [convId, events] of Object.entries(data)) {
|
|
|
if (!Array.isArray(events)) continue;
|
|
|
const combined = collectTexts(events).toLowerCase();
|
|
|
const convProfile = detectSessionProfile(events);
|
|
|
const steps = demoteStepsForProfile(matchRubric(combined), convProfile);
|
|
|
const percent = scoreLabProgress(steps);
|
|
|
const tokens = aggregateTokens(events);
|
|
|
const userEmail = events.map((e) => e?.hook_input?.user_email).find(Boolean) || null;
|
|
|
const lastModel =
|
|
|
[...events].reverse().map((e) => e?.hook_input?.model).find((m) => m && m !== 'default') ||
|
|
|
[...events].reverse().map((e) => e?.hook_input?.model).find(Boolean) ||
|
|
|
null;
|
|
|
|
|
|
conversations.push({
|
|
|
id: convId,
|
|
|
event_count: events.length,
|
|
|
user_email: userEmail,
|
|
|
last_model: lastModel,
|
|
|
steps,
|
|
|
lab_progress_percent: percent,
|
|
|
tokens,
|
|
|
topic_preview: deriveSessionTopicPreview(events) || null,
|
|
|
last_capture: events.map((e) => e.timestamp || e.captured_at).filter(Boolean).sort().pop() || null,
|
|
|
hook_transcript: buildHookTranscript(events),
|
|
|
questions: inferQuestionsFromEvents(convId, events),
|
|
|
ai_snippets: buildAiSnippets(events),
|
|
|
});
|
|
|
allEvents = allEvents.concat(events.map((e) => ({ ...e, _convId: convId })));
|
|
|
}
|
|
|
|
|
|
conversations.sort((a, b) => String(b.last_capture || '').localeCompare(String(a.last_capture || '')));
|
|
|
|
|
|
const dimensions = loadEvaluationDimensions(PROJECT_ROOT);
|
|
|
|
|
|
const transcriptGroups = groupRawEventsByTranscriptPath(data);
|
|
|
const mergedText = allEvents.map((e) => collectTexts([e])).join('\n').toLowerCase();
|
|
|
const mergedProfile = aggregateProfileAcrossTranscriptWindows(transcriptGroups);
|
|
|
const mergedSteps = demoteStepsForProfile(matchRubric(mergedText), mergedProfile);
|
|
|
const mergedPercent = scoreLabProgress(mergedSteps);
|
|
|
const mergedTokens = aggregateTokens(allEvents);
|
|
|
const primaryEmail =
|
|
|
conversations.map((c) => c.user_email).find(Boolean) ||
|
|
|
allEvents.map((e) => e?.hook_input?.user_email).find(Boolean) ||
|
|
|
null;
|
|
|
|
|
|
const transcriptWindowCount = transcriptGroups.filter((g) => {
|
|
|
for (const ev of g.events || []) {
|
|
|
const uq = ev.user_queries;
|
|
|
const hook = ev.hook_input || {};
|
|
|
const u = typeof uq?.last_user_text === 'string' ? uq.last_user_text.trim() : '';
|
|
|
const m = typeof hook.text === 'string' ? hook.text.trim() : '';
|
|
|
if (u || m) return true;
|
|
|
}
|
|
|
return false;
|
|
|
}).length;
|
|
|
|
|
|
const evaluation = evaluateNarrative({
|
|
|
steps: mergedSteps,
|
|
|
percent: mergedPercent,
|
|
|
userEmail: primaryEmail,
|
|
|
convCount: conversations.length,
|
|
|
transcriptWindowCount,
|
|
|
conversationBucketCount: conversations.length,
|
|
|
eventCount: allEvents.length,
|
|
|
tokens: mergedTokens,
|
|
|
profile: mergedProfile,
|
|
|
dimensions,
|
|
|
});
|
|
|
|
|
|
return {
|
|
|
ok: true,
|
|
|
generated_at: new Date().toISOString(),
|
|
|
lab_title: '实训:AI 编程辅助工具原理与实操 — 成绩分析脚本',
|
|
|
course: '大模型与智能编程实践',
|
|
|
experiment: 'score_analysis.py / scores.csv / score_chart.png',
|
|
|
summary: {
|
|
|
conversation_count: conversations.length,
|
|
|
hook_event_count: allEvents.length,
|
|
|
lab_progress_percent: mergedPercent,
|
|
|
rubric_steps: mergedSteps,
|
|
|
},
|
|
|
conversations,
|
|
|
evaluation,
|
|
|
};
|
|
|
}
|
|
|
|
|
|
function defaultReport() {
|
|
|
return {
|
|
|
ok: true,
|
|
|
generated_at: new Date().toISOString(),
|
|
|
lab_title: '实训:AI 编程辅助工具原理与实操 — 成绩分析脚本',
|
|
|
course: '大模型与智能编程实践',
|
|
|
experiment: 'score_analysis.py / scores.csv / score_chart.png',
|
|
|
summary: {
|
|
|
conversation_count: 0,
|
|
|
hook_event_count: 0,
|
|
|
lab_progress_percent: 0,
|
|
|
rubric_steps: RUBRIC.map((r) => ({ id: r.id, label: r.label, done: false })),
|
|
|
},
|
|
|
conversations: [],
|
|
|
evaluation: evaluateNarrative({
|
|
|
steps: RUBRIC.map((r) => ({ id: r.id, label: r.label, done: false })),
|
|
|
percent: 0,
|
|
|
userEmail: null,
|
|
|
convCount: 0,
|
|
|
transcriptWindowCount: 0,
|
|
|
conversationBucketCount: 0,
|
|
|
eventCount: 0,
|
|
|
tokens: { input_tokens: 0, output_tokens: 0, cache_read_tokens: 0 },
|
|
|
profile: {
|
|
|
heuristicUntrustworthy: false,
|
|
|
pasteHeavy: false,
|
|
|
courseDump: false,
|
|
|
hasTraceback: false,
|
|
|
shortTurns: 0,
|
|
|
},
|
|
|
dimensions: loadEvaluationDimensions(PROJECT_ROOT),
|
|
|
}),
|
|
|
note: '未找到 chat_logs.json 或文件为空,以下为占位结构。',
|
|
|
};
|
|
|
}
|
|
|
|
|
|
module.exports = {
|
|
|
analyzeChatLogsFile,
|
|
|
defaultReport,
|
|
|
buildHookChatWindows,
|
|
|
LAB_TITLE: '实训:AI 编程辅助工具原理与实操 — 成绩分析脚本',
|
|
|
};
|