|
|
/**
|
|
|
* 能力评估:结合 evaluation-dimensions、仓库扫描、实训步骤表、对话与提问摘录(含 hook_excerpt_questions)做证据链,
|
|
|
* 修订分值(保守对齐)并写入可核验评语。
|
|
|
*/
|
|
|
|
|
|
const { mergeLearningWithTaskEvidence } = require('./taskAlignedLearning.cjs');
|
|
|
|
|
|
function stepDone(steps, id) {
|
|
|
return Boolean((steps || []).find((s) => s.id === id)?.done);
|
|
|
}
|
|
|
|
|
|
/** 从会话提问、跨会话 Hook 摘录、按 transcript_path 合并的轮次中统计关键词命中 */
|
|
|
function summarizeQuestionSignals(report) {
|
|
|
const texts = [];
|
|
|
for (const c of report.conversations || []) {
|
|
|
for (const q of c.questions || []) {
|
|
|
texts.push(`${q.title || ''} ${q.detail || ''}`);
|
|
|
}
|
|
|
}
|
|
|
for (const q of report.hook_excerpt_questions || []) {
|
|
|
texts.push(`${q.title || ''} ${q.detail || ''}`);
|
|
|
}
|
|
|
for (const w of report.hook_chat_windows || []) {
|
|
|
for (const t of w.turns || []) {
|
|
|
texts.push(`${t.user || ''}\n${t.model || ''}`);
|
|
|
}
|
|
|
}
|
|
|
const blob = texts.join('\n').toLowerCase();
|
|
|
const countMatches = (re) => {
|
|
|
let n = 0;
|
|
|
for (const t of texts) {
|
|
|
const x = t.toLowerCase();
|
|
|
if (re.test(x)) n += 1;
|
|
|
}
|
|
|
return n;
|
|
|
};
|
|
|
return {
|
|
|
nCards: texts.length,
|
|
|
csv: countMatches(/csv|read_csv|成绩|scores|pandas\.read/),
|
|
|
plot: countMatches(/matplotlib|柱状|pyplot|png|作图|chart|score_chart/),
|
|
|
err: countMatches(/traceback|报错|error|异常|exception|失败/),
|
|
|
explain: countMatches(/解释|为什么|含义|什么意思|咋回事/),
|
|
|
};
|
|
|
}
|
|
|
|
|
|
function buildRepoOneLiner(scan) {
|
|
|
const parts = [];
|
|
|
if (scan.score_analysis_py?.path) parts.push(`脚本 ${scan.score_analysis_py.path}`);
|
|
|
if (scan.scores_csv?.path) parts.push(`数据 ${scan.scores_csv.path}`);
|
|
|
if (scan.score_chart_png?.exists) parts.push(`图表 ${scan.score_chart_png.path}(${scan.score_chart_png.bytes} 字节)`);
|
|
|
const f = scan.flags || {};
|
|
|
if (f.has_read_csv) parts.push('片段含 read_csv');
|
|
|
if (f.has_mean) parts.push('含统计/mean');
|
|
|
if (f.has_matplotlib) parts.push('含 matplotlib');
|
|
|
return parts.length ? parts.join(';') : '未检出典型 score_analysis.py / scores.csv / score_chart.png 或片段无上述 API 迹象';
|
|
|
}
|
|
|
|
|
|
function buildAbilitySectionHint(report) {
|
|
|
const src = report?.source || '';
|
|
|
const sig = report?.evaluation?.meta?.evaluation_signals || {};
|
|
|
const tw = sig.transcript_window_count;
|
|
|
const tb = sig.conversation_bucket_count;
|
|
|
const ctxLine =
|
|
|
typeof tw === 'number' && tw >= 0
|
|
|
? ` 学员对话侧已按 Cursor \`transcript_path\` 合并为 ${tw} 个聊天窗口${
|
|
|
typeof tb === 'number' && tb > tw ? `(日志顶层桶 ${tb} 个)` : ''
|
|
|
},能力与提问证据与同口径对齐。`
|
|
|
: '';
|
|
|
const base =
|
|
|
'分值与评语依据 config/evaluation-dimensions.yaml,并与远程仓库扫描(交付物与脚本片段关键词)、「实训步骤 / 评测对齐」表、对话与提问摘录(含按 transcript_path 合并的 hook 轮次与同口径摘要)交叉核验。';
|
|
|
if (src === 'ai_full') return base + ctxLine + ' 整页大模型给出的维度已用仓库与提问证据校准;冲突时优先可信的机器扫描与步骤判定。';
|
|
|
if (src === 'heuristic_fast' || src === 'heuristic_only') return base + ctxLine + ' 当前为启发式路径:分数由规则与日志形态估算,评语中带【核验要点】便于对照失分点。';
|
|
|
if (src === 'heuristic_fallback') return base + ctxLine + ' 大模型整页失败,以下为启发式能力条与证据拼接。';
|
|
|
return base + ctxLine;
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* 保守校准分值(避免「未检出代码却仍高分」)
|
|
|
*/
|
|
|
function reconcileAbilityScores(ability, ctx, report) {
|
|
|
const scan = ctx?.scan || {};
|
|
|
const flags = scan.flags || {};
|
|
|
const steps = ctx?.rubric_steps || report.summary?.rubric_steps || [];
|
|
|
const sig = report?.evaluation?.meta?.evaluation_signals || {};
|
|
|
const src = report?.source || '';
|
|
|
const trustCap = sig.heuristic_untrustworthy && src !== 'ai_full';
|
|
|
|
|
|
for (const row of ability) {
|
|
|
if (typeof row.value !== 'number' || Number.isNaN(row.value)) continue;
|
|
|
let v = row.value;
|
|
|
const hasPy = Boolean(scan.score_analysis_py);
|
|
|
const hasCsv = Boolean(scan.scores_csv);
|
|
|
const hasPng = Boolean(scan.score_chart_png?.exists);
|
|
|
|
|
|
if (row.id === 'data_stats') {
|
|
|
if (!hasPy && !hasCsv && !flags.has_read_csv) v = Math.min(v, 46);
|
|
|
if (hasPy && flags.has_read_csv && flags.has_mean) v = Math.min(100, v + 5);
|
|
|
}
|
|
|
if (row.id === 'visualization') {
|
|
|
if (!hasPng && !flags.has_matplotlib) v = Math.min(v, 52);
|
|
|
if (hasPng && flags.has_matplotlib) v = Math.min(100, v + 5);
|
|
|
}
|
|
|
if (row.id === 'tool_use_debug') {
|
|
|
if (!stepDone(steps, 's3') && v > 58) v = Math.min(v, 56);
|
|
|
}
|
|
|
if (row.id === 'prompt_quality' && trustCap) v = Math.min(v, 58);
|
|
|
if (row.id === 'code_literacy' && !stepDone(steps, 's5') && v > 62) v = Math.min(v, 58);
|
|
|
|
|
|
if (trustCap) v = Math.round(v * 0.9);
|
|
|
|
|
|
row.value = Math.max(0, Math.min(100, Math.round(v)));
|
|
|
}
|
|
|
}
|
|
|
|
|
|
function enrichAbilityComments(ability, ctx, report) {
|
|
|
const scan = ctx?.scan || {};
|
|
|
const flags = scan.flags || {};
|
|
|
const steps = ctx?.rubric_steps || report.summary?.rubric_steps || [];
|
|
|
const sig = report?.evaluation?.meta?.evaluation_signals || {};
|
|
|
const qsig = summarizeQuestionSignals(report);
|
|
|
const repoLine = buildRepoOneLiner(scan);
|
|
|
const hookN = report.summary?.hook_event_count ?? 0;
|
|
|
const convN = report.summary?.conversation_count ?? 0;
|
|
|
const winN = sig.transcript_window_count ?? convN;
|
|
|
|
|
|
for (const row of ability) {
|
|
|
const parts = [];
|
|
|
switch (row.id) {
|
|
|
case 'prompt_quality':
|
|
|
parts.push(
|
|
|
`提问/摘录卡片约 ${qsig.nCards} 条;按 transcript_path 合并 ${winN} 个聊天窗口、${convN} 个日志顶层桶、${hookN} 次钩子事件`
|
|
|
);
|
|
|
if (sig.heuristic_untrustworthy) parts.push('日志偏长文粘贴,分步目标描述证据偏弱');
|
|
|
else if (qsig.nCards >= 3) parts.push('多轮中有可检索的用户提问主题');
|
|
|
else parts.push('可归纳的短问较少,建议按「读表→统计→出图」分轮写清路径与期望输出');
|
|
|
break;
|
|
|
case 'tool_use_debug':
|
|
|
if (qsig.err) parts.push(`提问中含报错/异常类表述约 ${qsig.err} 条`);
|
|
|
if (stepDone(steps, 's3')) parts.push('步骤表含运行与调试相关判定');
|
|
|
if (!qsig.err && !stepDone(steps, 's3')) parts.push('建议在出错时粘贴完整 Traceback 与工作目录');
|
|
|
break;
|
|
|
case 'data_stats':
|
|
|
if (flags.has_read_csv) parts.push('脚本片段检出 read_csv/pandas');
|
|
|
if (qsig.csv) parts.push(`摘录中含 CSV/读取/成绩相关约 ${qsig.csv} 条`);
|
|
|
if (flags.has_mean) parts.push('片段中含 mean/统计');
|
|
|
if (stepDone(steps, 's2')) parts.push('评测步骤「统计代码」已对齐');
|
|
|
if (!flags.has_read_csv && !qsig.csv) parts.push('代码与提问两侧均未强体现数据读取,需补证据');
|
|
|
break;
|
|
|
case 'visualization':
|
|
|
if (flags.has_matplotlib) parts.push('脚本片段检出 matplotlib');
|
|
|
if (qsig.plot) parts.push(`摘录中含作图/png/柱状图相关约 ${qsig.plot} 条`);
|
|
|
if (scan.score_chart_png?.exists) parts.push(`已检出图表文件 ${scan.score_chart_png.path}`);
|
|
|
if (stepDone(steps, 's4')) parts.push('步骤表含可视化交付物判定');
|
|
|
break;
|
|
|
case 'code_literacy':
|
|
|
if (qsig.explain) parts.push(`摘录中含「解释/为什么」类约 ${qsig.explain} 条`);
|
|
|
if (stepDone(steps, 's5')) parts.push('步骤表含请 AI 解释代码的迹象');
|
|
|
if (!qsig.explain && !stepDone(steps, 's5')) parts.push('可补充「解释这段统计与作图在做什么」类追问');
|
|
|
break;
|
|
|
default:
|
|
|
parts.push(repoLine);
|
|
|
}
|
|
|
const ev = `【核验要点】${repoLine}。【本维证据】${parts.join(';')}。`;
|
|
|
const base = String(row.comment || '').trim();
|
|
|
row.comment = base ? `${base}\n\n${ev}` : ev;
|
|
|
if (row.comment.length > 920) row.comment = `${row.comment.slice(0, 917)}…`;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* 在 hook 译文地图、hook 提问合并写入 report 之后调用(保证 hook_excerpt_questions 已存在)。
|
|
|
*/
|
|
|
function enrichAbilityWithTaskAndRepoEvidence(ctx, report) {
|
|
|
if (!ctx || !report?.evaluation) return;
|
|
|
const ability = report.evaluation.ability;
|
|
|
if (!Array.isArray(ability) || !ability.length) return;
|
|
|
reconcileAbilityScores(ability, ctx, report);
|
|
|
enrichAbilityComments(ability, ctx, report);
|
|
|
report.ui = report.ui || {};
|
|
|
report.ui.ability_section_hint = buildAbilitySectionHint(report);
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* 合并学习建议 + 能力证据(在 enrichReportWithHookTranscriptMap 之后一次调用)
|
|
|
*/
|
|
|
function finalizeTaskAlignedEvidence(ctx, report) {
|
|
|
if (!ctx || !report?.ok) return;
|
|
|
mergeLearningWithTaskEvidence(ctx, report);
|
|
|
enrichAbilityWithTaskAndRepoEvidence(ctx, report);
|
|
|
}
|
|
|
|
|
|
module.exports = {
|
|
|
enrichAbilityWithTaskAndRepoEvidence,
|
|
|
finalizeTaskAlignedEvidence,
|
|
|
buildAbilitySectionHint,
|
|
|
summarizeQuestionSignals,
|
|
|
};
|