aiserver/lib/runFullPageAgent.mjs

/**
 * 拉取远程学员仓库 + 本地 chat_logs，调用 Cursor Agent 生成整页报告 JSON。
 * 仓库文件列表与「实训步骤/评测对齐」表由服务端扫描与任务描述生成，并在返回前覆盖模型中的对应字段，避免幻觉。
 */
import fs from 'fs';
import path from 'path';
import { fileURLToPath } from 'url';
import { createRequire } from 'module';
import { Agent, CursorAgentError } from '@cursor/sdk';

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const ROOT = path.join(__dirname, '..');
const require = createRequire(import.meta.url);
const { analyzeChatLogsFile, defaultReport } = require(path.join(__dirname, 'analyzeChatLogs.js'));
const { loadStudentRepoContext } = require(path.join(__dirname, 'repoContext.cjs'));
const {
  loadEvaluationDimensions,
  formatDimensionsForPrompt,
  alignEvaluationAbilityToDimensions,
} = require(path.join(__dirname, 'evaluationDimensions.cjs'));

function readText(p) {
  return fs.readFileSync(p, 'utf8');
}

function extractJson(text) {
  if (text == null) throw new Error('模型无输出');
  const s = typeof text === 'string' ? text : String(text);
  const fence = s.match(/```(?:json)?\s*([\s\S]*?)```/i);
  if (fence) return JSON.parse(fence[1].trim());
  const start = s.indexOf('{');
  const end = s.lastIndexOf('}');
  if (start >= 0 && end > start) return JSON.parse(s.slice(start, end + 1));
  throw new Error('无法从模型输出中解析 JSON');
}

function truncate(str, max) {
  const t = String(str);
  if (t.length <= max) return t;
  return `${t.slice(0, max)}\n…(truncated, ${t.length} chars total)…\n`;
}

function loadHeuristic(chatLogPath) {
  if (!fs.existsSync(chatLogPath)) {
    const r = defaultReport();
    r.note = '无 chat_logs.json';
    return r;
  }
  try {
    return analyzeChatLogsFile(chatLogPath);
  } catch (e) {
    return { ok: false, error: String(e.message || e) };
  }
}

function applyServerRepoTruth(data, ctx, gitUrl) {
  data.summary = data.summary || {};
  data.summary.rubric_steps = ctx.rubric_steps;
  data.summary.lab_progress_percent = ctx.labPct;
  if (ctx.heuristic?.summary) {
    data.summary.conversation_count = ctx.heuristic.summary.conversation_count;
    data.summary.hook_event_count = ctx.heuristic.summary.hook_event_count;
  }
  data.student_repo = {
    git_url: gitUrl,
    local_path: ctx.repoPath,
    file_count: ctx.files.length,
    files: ctx.files,
    scan_flags: ctx.scan?.flags || {},
  };
  data.rubric_footer_auto = ctx.rubric_footer;
  if (!data.ui) data.ui = {};
  data.ui.stat_lab_progress = ctx.labPct;
  const tok = ctx.heuristic?.evaluation?.meta?.tokens;
  const tin = Number(tok?.input_tokens) || 0;
  const tout = Number(tok?.output_tokens) || 0;
  const tc = Number(tok?.cache_read_tokens) || 0;
  const tcw = Number(tok?.cache_write_tokens) || 0;
  const tsum = tin + tout + tc + tcw;
  data.ui.stat_hook_main = tsum > 0 ? tsum.toLocaleString('zh-CN') : '0';
  data.ui.stat_hook_unit = 'TOKENS';
  data.ui.stat_hook_sub = tok
    ? `输入 ${tin.toLocaleString('zh-CN')} · 输出 ${tout.toLocaleString('zh-CN')} · 缓存读取 ${tc.toLocaleString('zh-CN')}${tcw ? ` · 缓存写入 ${tcw.toLocaleString('zh-CN')}` : ''}`
    : '（chat_logs 中未汇总到TOKENS）';
  data.ui.rubric_footer = ctx.rubric_footer;
}

const SCHEMA_BLOCK = `
你必须只输出一个 JSON 对象（不要 Markdown 围栏外的说明文字）。顶层结构如下：

{
  "ok": true,
  "source": "ai_full",
  "git_url": "string",
  "generated_at": "ISO-8601",
  "ui": { ... 与原先一致，含 student 文案、评价区展示等 },
  "summary": { "conversation_count", "hook_event_count", "lab_progress_percent", "rubric_steps" },
  "conversations": [ ... ],
  "evaluation": {
    "overall": "string，给学员看的总体评价，换行分段。必须用小标题分 4～5 段（如「一、总体结论」「二、从对话记录里能看出什么」…），少用「Hook」「启发式」「flags」等未解释术语；若出现机制说明须用白话括号解释。内容须覆盖：①总体结论（短句）；②对话里客观呈现的学习/提问方式；③与本实训（读表、统计、作图、解释）的对照，并说明「步骤打勾若为关键词推断须提醒不等于独立完成」；④仓库扫描结论（若有）；⑤可执行的改进建议。另须体现两大视角但可融入段落：学员是否用自己的话描述数据路径、运行结果、排错；使用 AI 是否分步提问、是否带报错与上下文。须写未完成项；若 evaluation_signals.heuristic_untrustworthy 为 true，用学员能懂的话说明「长文粘贴会让自动步骤表虚高」，并与仓库真实文件交叉论证。",
    "ability": [ { "id": "须与 evaluation-dimensions.yaml 中 dimensions.id 一致", "name": "string", "value": 0-100, "comment": "须引用仓库或对话中的具体证据，体现学习与工具使用两方面" } ],
    "issues": [ { "title": "string", "body": "string" } ],
    "learning": [ "string" ],
    "resources": [ { "title", "subtitle", "url" } ],
    "class_rank": { "place", "total", "note" }
  }
}

evaluation 约束：evaluation.ability 的条数、顺序、id、name 必须与「本请求中注入的 YAML 维度列表」完全一致（教师可在 config/evaluation-dimensions.yaml 增删维度）；issues 至少 2 条；learning 至少 3 条。禁止输出「未读仓库即满分」式结论。

注意：summary.rubric_steps 与「作业文件」列表将由服务端在返回前用真实 git 扫描覆盖；你的文字仍须与仓库扫描 JSON、chat 启发式 JSON 自洽。为控制耗时，evaluation 各字段优先写关键事实与可执行句，避免重复堆砌。
`;

export async function generateFullPageReport({ root, gitUrl, chatLogPath, repoContext }) {
  const apiKey = process.env.CURSOR_API_KEY;
  if (!apiKey) {
    throw new Error('缺少 CURSOR_API_KEY，无法调用 Cursor 大模型生成整页报告');
  }

  const ctx = repoContext || loadStudentRepoContext(gitUrl, root, chatLogPath);
  const dimObjs = loadEvaluationDimensions(root);
  const taskMd = readText(path.join(root, 'config/lab-task-description.md'));
  const dims = readText(path.join(root, 'config/evaluation-dimensions.yaml'));
  const skillPath = path.join(root, '.cursor/skills/student-lab-ai-evaluation/SKILL.md');
  const skill = fs.existsSync(skillPath) ? readText(skillPath) : '';

  const heuristic = ctx.heuristic ?? loadHeuristic(chatLogPath);
  const filePaths = (ctx.files || []).slice(0, 250).map((f) => f.path);

  const dimAnchor =
    dimObjs.length > 0
      ? `
--- 能力评估维度（作业评价 · 必须与 YAML 完全一致）---
evaluation.ability 必须恰好 ${dimObjs.length} 条，按下列顺序逐条输出，id 与 name 与下表一致（不得增删、不得改 id），value 为 0–100，comment 须引用仓库扫描 JSON 或 Hook 中的可核验证据：
${formatDimensionsForPrompt(dimObjs)}
`
      : '';

  const prompt = `你是头歌实训助教。请根据「远程学员仓库」与「本地 Cursor Hook 启发式摘要」生成实验报告页的完整数据。

${SCHEMA_BLOCK}

--- 教师 Skill（写作风格与评价原则）---
${truncate(skill, 12000)}

--- 实训任务描述 ---
${taskMd}

--- 评价维度 YAML（权威来源）---
${dims}
${dimAnchor}
--- 远程仓库 ---
git_url: ${gitUrl}
本地克隆路径（仅供参考）: ${ctx.repoPath}

--- 仓库扫描（机器生成，须引用）---
${truncate(JSON.stringify(ctx.scan, null, 2), 45000)}

--- 仓库文件路径列表（前 250 个，与左侧「作业文件」一致）---
${truncate(JSON.stringify(filePaths, null, 2), 9000)}

--- chat_logs 启发式解析 JSON（仅供参考，可能不完整）---
${truncate(JSON.stringify(heuristic, null, 2), 15000)}

「作业评价」页须同时服务教师与学员：evaluation.overall 以**学员第一眼能读懂**为准（小标题分段、少黑话、结论与不确定写清），但必须能回答「学员学得怎样」「AI 用得怎样」。能力与问题/建议可追溯到仓库路径或对话摘录。若 evaluation_signals.heuristic_untrustworthy 为 true，须在 overall 用白话说明「整段粘贴会让步骤表虚高」，并与仓库是否确有 score_analysis.py / score_chart.png 等交叉说明，不得沿用「全完成」式武断结论。

现在请输出完整 JSON。git_url 字段填「${gitUrl}」。generated_at 使用当前 UTC 时间 ISO 字符串。
`;

  const modelId = process.env.LAB_EVAL_MODEL || 'composer-2';

  let result;
  try {
    result = await Agent.prompt(prompt, {
      apiKey,
      model: { id: modelId },
      local: { cwd: root },
    });
  } catch (err) {
    if (err instanceof CursorAgentError) {
      throw new Error(`Cursor Agent 启动失败: ${err.message}`);
    }
    throw err;
  }

  if (result.status === 'error') {
    throw new Error(`Cursor Agent 运行失败: ${JSON.stringify(result)}`);
  }

  const data = extractJson(result.result);
  data.ok = true;
  data.source = 'ai_full';
  data.git_url = gitUrl;
  data.repo_local_path = ctx.repoPath;
  if (!data.generated_at) data.generated_at = new Date().toISOString();
  if (!data.evaluation) data.evaluation = {};
  if (Array.isArray(data.evaluation.abilities) && !data.evaluation.ability) {
    data.evaluation.ability = data.evaluation.abilities;
  }
  const heuristicEvalFallback =
    heuristic && heuristic.evaluation && heuristic.ok !== false ? heuristic.evaluation : defaultReport().evaluation;
  if (dimObjs.length) {
    alignEvaluationAbilityToDimensions(data.evaluation, dimObjs, heuristicEvalFallback);
  }
  if (!data.summary && data.rubric) {
    data.summary = data.rubric;
  }
  data.evaluation.meta = {
    ...(data.evaluation.meta || {}),
    ai_evaluation_source: 'cursor_agent_refresh',
    ai_evaluation_at: data.generated_at,
    repo_scan: ctx.scan?.flags || {},
  };

  const transcriptById = new Map((heuristic.conversations || []).map((c) => [c.id, c.hook_transcript]));
  const topicById = new Map(
    (heuristic.conversations || []).map((c) => [c.id, c.topic_preview]).filter(([, t]) => t)
  );
  if (Array.isArray(data.conversations) && (transcriptById.size || topicById.size)) {
    for (const c of data.conversations) {
      const cid = c.id || c.conversation_id;
      if (!c.hook_transcript && cid && transcriptById.has(cid)) {
        c.hook_transcript = transcriptById.get(cid);
      }
      if (!c.topic_preview && cid && topicById.has(cid)) {
        c.topic_preview = topicById.get(cid);
      }
    }
  }

  applyServerRepoTruth(data, ctx, gitUrl);
  return data;
}