/** * 能力评估:结合 evaluation-dimensions、仓库扫描、实训步骤表、对话与提问摘录(含 hook_excerpt_questions)做证据链, * 修订分值(保守对齐)并写入可核验评语。 */ const { mergeLearningWithTaskEvidence } = require('./taskAlignedLearning.cjs'); function stepDone(steps, id) { return Boolean((steps || []).find((s) => s.id === id)?.done); } /** 从会话提问、跨会话 Hook 摘录、按 transcript_path 合并的轮次中统计关键词命中 */ function summarizeQuestionSignals(report) { const texts = []; for (const c of report.conversations || []) { for (const q of c.questions || []) { texts.push(`${q.title || ''} ${q.detail || ''}`); } } for (const q of report.hook_excerpt_questions || []) { texts.push(`${q.title || ''} ${q.detail || ''}`); } for (const w of report.hook_chat_windows || []) { for (const t of w.turns || []) { texts.push(`${t.user || ''}\n${t.model || ''}`); } } const blob = texts.join('\n').toLowerCase(); const countMatches = (re) => { let n = 0; for (const t of texts) { const x = t.toLowerCase(); if (re.test(x)) n += 1; } return n; }; return { nCards: texts.length, csv: countMatches(/csv|read_csv|成绩|scores|pandas\.read/), plot: countMatches(/matplotlib|柱状|pyplot|png|作图|chart|score_chart/), err: countMatches(/traceback|报错|error|异常|exception|失败/), explain: countMatches(/解释|为什么|含义|什么意思|咋回事/), }; } function buildRepoOneLiner(scan) { const parts = []; if (scan.score_analysis_py?.path) parts.push(`脚本 ${scan.score_analysis_py.path}`); if (scan.scores_csv?.path) parts.push(`数据 ${scan.scores_csv.path}`); if (scan.score_chart_png?.exists) parts.push(`图表 ${scan.score_chart_png.path}(${scan.score_chart_png.bytes} 字节)`); const f = scan.flags || {}; if (f.has_read_csv) parts.push('片段含 read_csv'); if (f.has_mean) parts.push('含统计/mean'); if (f.has_matplotlib) parts.push('含 matplotlib'); return parts.length ? parts.join(';') : '未检出典型 score_analysis.py / scores.csv / score_chart.png 或片段无上述 API 迹象'; } function buildAbilitySectionHint(report) { const src = report?.source || ''; const sig = report?.evaluation?.meta?.evaluation_signals || {}; const tw = sig.transcript_window_count; const tb = sig.conversation_bucket_count; const ctxLine = typeof tw === 'number' && tw >= 0 ? ` 学员对话侧已按 Cursor \`transcript_path\` 合并为 ${tw} 个聊天窗口${ typeof tb === 'number' && tb > tw ? `(日志顶层桶 ${tb} 个)` : '' },能力与提问证据与同口径对齐。` : ''; const base = '分值与评语依据 config/evaluation-dimensions.yaml,并与远程仓库扫描(交付物与脚本片段关键词)、「实训步骤 / 评测对齐」表、对话与提问摘录(含按 transcript_path 合并的 hook 轮次与同口径摘要)交叉核验。'; if (src === 'ai_full') return base + ctxLine + ' 整页大模型给出的维度已用仓库与提问证据校准;冲突时优先可信的机器扫描与步骤判定。'; if (src === 'heuristic_fast' || src === 'heuristic_only') return base + ctxLine + ' 当前为启发式路径:分数由规则与日志形态估算,评语中带【核验要点】便于对照失分点。'; if (src === 'heuristic_fallback') return base + ctxLine + ' 大模型整页失败,以下为启发式能力条与证据拼接。'; return base + ctxLine; } /** * 保守校准分值(避免「未检出代码却仍高分」) */ function reconcileAbilityScores(ability, ctx, report) { const scan = ctx?.scan || {}; const flags = scan.flags || {}; const steps = ctx?.rubric_steps || report.summary?.rubric_steps || []; const sig = report?.evaluation?.meta?.evaluation_signals || {}; const src = report?.source || ''; const trustCap = sig.heuristic_untrustworthy && src !== 'ai_full'; for (const row of ability) { if (typeof row.value !== 'number' || Number.isNaN(row.value)) continue; let v = row.value; const hasPy = Boolean(scan.score_analysis_py); const hasCsv = Boolean(scan.scores_csv); const hasPng = Boolean(scan.score_chart_png?.exists); if (row.id === 'data_stats') { if (!hasPy && !hasCsv && !flags.has_read_csv) v = Math.min(v, 46); if (hasPy && flags.has_read_csv && flags.has_mean) v = Math.min(100, v + 5); } if (row.id === 'visualization') { if (!hasPng && !flags.has_matplotlib) v = Math.min(v, 52); if (hasPng && flags.has_matplotlib) v = Math.min(100, v + 5); } if (row.id === 'tool_use_debug') { if (!stepDone(steps, 's3') && v > 58) v = Math.min(v, 56); } if (row.id === 'prompt_quality' && trustCap) v = Math.min(v, 58); if (row.id === 'code_literacy' && !stepDone(steps, 's5') && v > 62) v = Math.min(v, 58); if (trustCap) v = Math.round(v * 0.9); row.value = Math.max(0, Math.min(100, Math.round(v))); } } function enrichAbilityComments(ability, ctx, report) { const scan = ctx?.scan || {}; const flags = scan.flags || {}; const steps = ctx?.rubric_steps || report.summary?.rubric_steps || []; const sig = report?.evaluation?.meta?.evaluation_signals || {}; const qsig = summarizeQuestionSignals(report); const repoLine = buildRepoOneLiner(scan); const hookN = report.summary?.hook_event_count ?? 0; const convN = report.summary?.conversation_count ?? 0; const winN = sig.transcript_window_count ?? convN; for (const row of ability) { const parts = []; switch (row.id) { case 'prompt_quality': parts.push( `提问/摘录卡片约 ${qsig.nCards} 条;按 transcript_path 合并 ${winN} 个聊天窗口、${convN} 个日志顶层桶、${hookN} 次钩子事件` ); if (sig.heuristic_untrustworthy) parts.push('日志偏长文粘贴,分步目标描述证据偏弱'); else if (qsig.nCards >= 3) parts.push('多轮中有可检索的用户提问主题'); else parts.push('可归纳的短问较少,建议按「读表→统计→出图」分轮写清路径与期望输出'); break; case 'tool_use_debug': if (qsig.err) parts.push(`提问中含报错/异常类表述约 ${qsig.err} 条`); if (stepDone(steps, 's3')) parts.push('步骤表含运行与调试相关判定'); if (!qsig.err && !stepDone(steps, 's3')) parts.push('建议在出错时粘贴完整 Traceback 与工作目录'); break; case 'data_stats': if (flags.has_read_csv) parts.push('脚本片段检出 read_csv/pandas'); if (qsig.csv) parts.push(`摘录中含 CSV/读取/成绩相关约 ${qsig.csv} 条`); if (flags.has_mean) parts.push('片段中含 mean/统计'); if (stepDone(steps, 's2')) parts.push('评测步骤「统计代码」已对齐'); if (!flags.has_read_csv && !qsig.csv) parts.push('代码与提问两侧均未强体现数据读取,需补证据'); break; case 'visualization': if (flags.has_matplotlib) parts.push('脚本片段检出 matplotlib'); if (qsig.plot) parts.push(`摘录中含作图/png/柱状图相关约 ${qsig.plot} 条`); if (scan.score_chart_png?.exists) parts.push(`已检出图表文件 ${scan.score_chart_png.path}`); if (stepDone(steps, 's4')) parts.push('步骤表含可视化交付物判定'); break; case 'code_literacy': if (qsig.explain) parts.push(`摘录中含「解释/为什么」类约 ${qsig.explain} 条`); if (stepDone(steps, 's5')) parts.push('步骤表含请 AI 解释代码的迹象'); if (!qsig.explain && !stepDone(steps, 's5')) parts.push('可补充「解释这段统计与作图在做什么」类追问'); break; default: parts.push(repoLine); } const ev = `【核验要点】${repoLine}。【本维证据】${parts.join(';')}。`; const base = String(row.comment || '').trim(); row.comment = base ? `${base}\n\n${ev}` : ev; if (row.comment.length > 920) row.comment = `${row.comment.slice(0, 917)}…`; } } /** * 在 hook 译文地图、hook 提问合并写入 report 之后调用(保证 hook_excerpt_questions 已存在)。 */ function enrichAbilityWithTaskAndRepoEvidence(ctx, report) { if (!ctx || !report?.evaluation) return; const ability = report.evaluation.ability; if (!Array.isArray(ability) || !ability.length) return; reconcileAbilityScores(ability, ctx, report); enrichAbilityComments(ability, ctx, report); report.ui = report.ui || {}; report.ui.ability_section_hint = buildAbilitySectionHint(report); } /** * 合并学习建议 + 能力证据(在 enrichReportWithHookTranscriptMap 之后一次调用) */ function finalizeTaskAlignedEvidence(ctx, report) { if (!ctx || !report?.ok) return; mergeLearningWithTaskEvidence(ctx, report); enrichAbilityWithTaskAndRepoEvidence(ctx, report); } module.exports = { enrichAbilityWithTaskAndRepoEvidence, finalizeTaskAlignedEvidence, buildAbilitySectionHint, summarizeQuestionSignals, };