You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

198 lines
9.0 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

/**
* 能力评估:结合 evaluation-dimensions、仓库扫描、实训步骤表、对话与提问摘录含 hook_excerpt_questions做证据链
* 修订分值(保守对齐)并写入可核验评语。
*/
const { mergeLearningWithTaskEvidence } = require('./taskAlignedLearning.cjs');
function stepDone(steps, id) {
return Boolean((steps || []).find((s) => s.id === id)?.done);
}
/** 从会话提问、跨会话 Hook 摘录、按 transcript_path 合并的轮次中统计关键词命中 */
function summarizeQuestionSignals(report) {
const texts = [];
for (const c of report.conversations || []) {
for (const q of c.questions || []) {
texts.push(`${q.title || ''} ${q.detail || ''}`);
}
}
for (const q of report.hook_excerpt_questions || []) {
texts.push(`${q.title || ''} ${q.detail || ''}`);
}
for (const w of report.hook_chat_windows || []) {
for (const t of w.turns || []) {
texts.push(`${t.user || ''}\n${t.model || ''}`);
}
}
const blob = texts.join('\n').toLowerCase();
const countMatches = (re) => {
let n = 0;
for (const t of texts) {
const x = t.toLowerCase();
if (re.test(x)) n += 1;
}
return n;
};
return {
nCards: texts.length,
csv: countMatches(/csv|read_csv|成绩|scores|pandas\.read/),
plot: countMatches(/matplotlib|柱状|pyplot|png|作图|chart|score_chart/),
err: countMatches(/traceback|报错|error|异常|exception|失败/),
explain: countMatches(/解释|为什么|含义|什么意思|咋回事/),
};
}
function buildRepoOneLiner(scan) {
const parts = [];
if (scan.score_analysis_py?.path) parts.push(`脚本 ${scan.score_analysis_py.path}`);
if (scan.scores_csv?.path) parts.push(`数据 ${scan.scores_csv.path}`);
if (scan.score_chart_png?.exists) parts.push(`图表 ${scan.score_chart_png.path}${scan.score_chart_png.bytes} 字节)`);
const f = scan.flags || {};
if (f.has_read_csv) parts.push('片段含 read_csv');
if (f.has_mean) parts.push('含统计/mean');
if (f.has_matplotlib) parts.push('含 matplotlib');
return parts.length ? parts.join('') : '未检出典型 score_analysis.py / scores.csv / score_chart.png 或片段无上述 API 迹象';
}
function buildAbilitySectionHint(report) {
const src = report?.source || '';
const sig = report?.evaluation?.meta?.evaluation_signals || {};
const tw = sig.transcript_window_count;
const tb = sig.conversation_bucket_count;
const ctxLine =
typeof tw === 'number' && tw >= 0
? ` 学员对话侧已按 Cursor \`transcript_path\` 合并为 ${tw} 个聊天窗口${
typeof tb === 'number' && tb > tw ? `(日志顶层桶 ${tb} 个)` : ''
},能力与提问证据与同口径对齐。`
: '';
const base =
'分值与评语依据 config/evaluation-dimensions.yaml并与远程仓库扫描交付物与脚本片段关键词、「实训步骤 / 评测对齐」表、对话与提问摘录(含按 transcript_path 合并的 hook 轮次与同口径摘要)交叉核验。';
if (src === 'ai_full') return base + ctxLine + ' 整页大模型给出的维度已用仓库与提问证据校准;冲突时优先可信的机器扫描与步骤判定。';
if (src === 'heuristic_fast' || src === 'heuristic_only') return base + ctxLine + ' 当前为启发式路径:分数由规则与日志形态估算,评语中带【核验要点】便于对照失分点。';
if (src === 'heuristic_fallback') return base + ctxLine + ' 大模型整页失败,以下为启发式能力条与证据拼接。';
return base + ctxLine;
}
/**
* 保守校准分值(避免「未检出代码却仍高分」)
*/
function reconcileAbilityScores(ability, ctx, report) {
const scan = ctx?.scan || {};
const flags = scan.flags || {};
const steps = ctx?.rubric_steps || report.summary?.rubric_steps || [];
const sig = report?.evaluation?.meta?.evaluation_signals || {};
const src = report?.source || '';
const trustCap = sig.heuristic_untrustworthy && src !== 'ai_full';
for (const row of ability) {
if (typeof row.value !== 'number' || Number.isNaN(row.value)) continue;
let v = row.value;
const hasPy = Boolean(scan.score_analysis_py);
const hasCsv = Boolean(scan.scores_csv);
const hasPng = Boolean(scan.score_chart_png?.exists);
if (row.id === 'data_stats') {
if (!hasPy && !hasCsv && !flags.has_read_csv) v = Math.min(v, 46);
if (hasPy && flags.has_read_csv && flags.has_mean) v = Math.min(100, v + 5);
}
if (row.id === 'visualization') {
if (!hasPng && !flags.has_matplotlib) v = Math.min(v, 52);
if (hasPng && flags.has_matplotlib) v = Math.min(100, v + 5);
}
if (row.id === 'tool_use_debug') {
if (!stepDone(steps, 's3') && v > 58) v = Math.min(v, 56);
}
if (row.id === 'prompt_quality' && trustCap) v = Math.min(v, 58);
if (row.id === 'code_literacy' && !stepDone(steps, 's5') && v > 62) v = Math.min(v, 58);
if (trustCap) v = Math.round(v * 0.9);
row.value = Math.max(0, Math.min(100, Math.round(v)));
}
}
function enrichAbilityComments(ability, ctx, report) {
const scan = ctx?.scan || {};
const flags = scan.flags || {};
const steps = ctx?.rubric_steps || report.summary?.rubric_steps || [];
const sig = report?.evaluation?.meta?.evaluation_signals || {};
const qsig = summarizeQuestionSignals(report);
const repoLine = buildRepoOneLiner(scan);
const hookN = report.summary?.hook_event_count ?? 0;
const convN = report.summary?.conversation_count ?? 0;
const winN = sig.transcript_window_count ?? convN;
for (const row of ability) {
const parts = [];
switch (row.id) {
case 'prompt_quality':
parts.push(
`提问/摘录卡片约 ${qsig.nCards} 条;按 transcript_path 合并 ${winN} 个聊天窗口、${convN} 个日志顶层桶、${hookN} 次钩子事件`
);
if (sig.heuristic_untrustworthy) parts.push('日志偏长文粘贴,分步目标描述证据偏弱');
else if (qsig.nCards >= 3) parts.push('多轮中有可检索的用户提问主题');
else parts.push('可归纳的短问较少,建议按「读表→统计→出图」分轮写清路径与期望输出');
break;
case 'tool_use_debug':
if (qsig.err) parts.push(`提问中含报错/异常类表述约 ${qsig.err}`);
if (stepDone(steps, 's3')) parts.push('步骤表含运行与调试相关判定');
if (!qsig.err && !stepDone(steps, 's3')) parts.push('建议在出错时粘贴完整 Traceback 与工作目录');
break;
case 'data_stats':
if (flags.has_read_csv) parts.push('脚本片段检出 read_csv/pandas');
if (qsig.csv) parts.push(`摘录中含 CSV/读取/成绩相关约 ${qsig.csv}`);
if (flags.has_mean) parts.push('片段中含 mean/统计');
if (stepDone(steps, 's2')) parts.push('评测步骤「统计代码」已对齐');
if (!flags.has_read_csv && !qsig.csv) parts.push('代码与提问两侧均未强体现数据读取,需补证据');
break;
case 'visualization':
if (flags.has_matplotlib) parts.push('脚本片段检出 matplotlib');
if (qsig.plot) parts.push(`摘录中含作图/png/柱状图相关约 ${qsig.plot}`);
if (scan.score_chart_png?.exists) parts.push(`已检出图表文件 ${scan.score_chart_png.path}`);
if (stepDone(steps, 's4')) parts.push('步骤表含可视化交付物判定');
break;
case 'code_literacy':
if (qsig.explain) parts.push(`摘录中含「解释/为什么」类约 ${qsig.explain}`);
if (stepDone(steps, 's5')) parts.push('步骤表含请 AI 解释代码的迹象');
if (!qsig.explain && !stepDone(steps, 's5')) parts.push('可补充「解释这段统计与作图在做什么」类追问');
break;
default:
parts.push(repoLine);
}
const ev = `【核验要点】${repoLine}。【本维证据】${parts.join('')}`;
const base = String(row.comment || '').trim();
row.comment = base ? `${base}\n\n${ev}` : ev;
if (row.comment.length > 920) row.comment = `${row.comment.slice(0, 917)}`;
}
}
/**
* 在 hook 译文地图、hook 提问合并写入 report 之后调用(保证 hook_excerpt_questions 已存在)。
*/
function enrichAbilityWithTaskAndRepoEvidence(ctx, report) {
if (!ctx || !report?.evaluation) return;
const ability = report.evaluation.ability;
if (!Array.isArray(ability) || !ability.length) return;
reconcileAbilityScores(ability, ctx, report);
enrichAbilityComments(ability, ctx, report);
report.ui = report.ui || {};
report.ui.ability_section_hint = buildAbilitySectionHint(report);
}
/**
* 合并学习建议 + 能力证据(在 enrichReportWithHookTranscriptMap 之后一次调用)
*/
function finalizeTaskAlignedEvidence(ctx, report) {
if (!ctx || !report?.ok) return;
mergeLearningWithTaskEvidence(ctx, report);
enrichAbilityWithTaskAndRepoEvidence(ctx, report);
}
module.exports = {
enrichAbilityWithTaskAndRepoEvidence,
finalizeTaskAlignedEvidence,
buildAbilitySectionHint,
summarizeQuestionSignals,
};