aiserver/lib/analyzeChatLogs.js

const fs = require('fs');
const path = require('path');
const { loadEvaluationDimensions } = require('./evaluationDimensions.cjs');

const PROJECT_ROOT = path.join(__dirname, '..');

const LAB_KEYWORDS = {
  step2_generate: [/read_csv|pandas|pd\.read|scores\.csv|csv/i, /总分|平均分|各科/i],
  step3_debug: [/报错|修复|FileNotFound|KeyError|Traceback|error/i],
  step4_viz: [/matplotlib|plt\.|柱状|bar\(|score_chart|可视化|png/i],
  step5_explain: [/解释|什么意思|为什么|pathlib/i],
};

const RUBRIC = [
  { id: 'prep', label: '准备数据 scores.csv', patterns: [/scores\.csv|成绩表|csv/i] },
  {
    id: 's1',
    label: '步骤一：打开 Cursor / 新建项目',
    patterns: [/myshixun/i, /打开文件夹/i, /score_analysis\.py/i, /新建文件/i],
  },
  { id: 's2', label: '步骤二：自然语言生成基础代码', patterns: LAB_KEYWORDS.step2_generate.flat() },
  { id: 's3', label: '步骤三：运行与调试', patterns: LAB_KEYWORDS.step3_debug },
  { id: 's4', label: '步骤四：matplotlib 柱状图与 score_chart.png', patterns: LAB_KEYWORDS.step4_viz },
  { id: 's5', label: '步骤五：请 AI 解释代码', patterns: LAB_KEYWORDS.step5_explain },
  { id: 's6', label: '步骤六：保存 score_analysis.py', patterns: [/score_analysis|保存|submit/i] },
];

function collectTexts(events) {
  const chunks = [];
  for (const ev of events) {
    const t = ev?.hook_input?.text;
    if (typeof t === 'string' && t.trim()) chunks.push(t);
    const last = ev?.user_queries?.last_user_text;
    if (typeof last === 'string' && last.trim()) chunks.push(last);
    const recent = ev?.user_queries?.recent_user_texts;
    if (Array.isArray(recent)) {
      for (const r of recent) {
        if (typeof r === 'string' && r.trim()) chunks.push(r);
      }
    }
  }
  return chunks.join('\n');
}

function matchRubric(combinedLower) {
  return RUBRIC.map((row) => {
    const hit = row.patterns.some((re) => re.test(combinedLower));
    return { id: row.id, label: row.label, done: hit };
  });
}

function scoreLabProgress(steps) {
  const done = steps.filter((s) => s.done).length;
  return Math.round((done / steps.length) * 100);
}

/** 用于判断「是否像整段讲义/任务书粘贴」而非分步实训对话 */
function collectUserFacingTexts(events) {
  const out = [];
  for (const ev of events) {
    if (typeof ev?.hook_input?.text === 'string' && ev.hook_input.text.trim()) out.push(ev.hook_input.text);
    const uq = ev?.user_queries;
    if (uq && typeof uq.last_user_text === 'string' && uq.last_user_text.trim()) out.push(uq.last_user_text);
    if (Array.isArray(uq?.recent_user_texts)) {
      for (const r of uq.recent_user_texts) {
        if (typeof r === 'string' && r.trim()) out.push(r);
      }
    }
  }
  return out;
}

function detectSessionProfile(events) {
  const texts = collectUserFacingTexts(events);
  let maxLen = 0;
  let shortTurns = 0;
  for (const t of texts) {
    const len = String(t).trim().length;
    maxLen = Math.max(maxLen, len);
    if (len >= 40 && len <= 900) shortTurns += 1;
  }
  const joined = texts.join('\n').toLowerCase();
  const totalChars = joined.length;
  const pasteHeavy = maxLen > 4500 || (totalChars > 8000 && shortTurns < 4);
  const courseDump =
    totalChars > 3500 &&
    /(实训任务|教学大纲|课程建设|助教|report-ui|server\.js|skill\.md|lab-eval|头歌实践)/.test(joined) &&
    /(score_analysis|read_csv|matplotlib)/.test(joined);
  const heuristicUntrustworthy = pasteHeavy || courseDump;
  const hasTraceback = /traceback|filenotfound|keyerror|syntaxerror|modulenotfound|error:\s|exception:/i.test(
    joined
  );
  return {
    pasteHeavy,
    courseDump,
    heuristicUntrustworthy,
    maxLen,
    shortTurns,
    hasTraceback,
    totalChars,
  };
}

/**
 * 按 transcript_path（与 hook_chat_windows 相同）分别做 profile，再聚合：
 * 短问次数累加、单窗最长文取 max、全文拼接后做 courseDump 检测，更贴近「多窗口真实分步」。
 */
function aggregateProfileAcrossTranscriptWindows(windowGroups) {
  if (!windowGroups || !windowGroups.length) {
    return {
      pasteHeavy: false,
      courseDump: false,
      heuristicUntrustworthy: false,
      maxLen: 0,
      shortTurns: 0,
      hasTraceback: false,
      totalChars: 0,
    };
  }
  let maxLen = 0;
  let shortTurns = 0;
  let totalChars = 0;
  let hasTraceback = false;
  const blobParts = [];
  for (const g of windowGroups) {
    const evs = g.events || [];
    const p = detectSessionProfile(evs);
    maxLen = Math.max(maxLen, p.maxLen);
    shortTurns += p.shortTurns;
    totalChars += p.totalChars;
    if (p.hasTraceback) hasTraceback = true;
    blobParts.push(collectUserFacingTexts(evs).join('\n'));
  }
  const joined = blobParts.join('\n').toLowerCase();
  const pasteHeavy = maxLen > 4500 || (totalChars > 8000 && shortTurns < 4);
  const courseDump =
    totalChars > 3500 &&
    /(实训任务|教学大纲|课程建设|助教|report-ui|server\.js|skill\.md|lab-eval|头歌实践)/.test(joined) &&
    /(score_analysis|read_csv|matplotlib)/.test(joined);
  const heuristicUntrustworthy = pasteHeavy || courseDump;
  return {
    pasteHeavy,
    courseDump,
    heuristicUntrustworthy,
    maxLen,
    shortTurns,
    hasTraceback,
    totalChars,
  };
}

/**
 * 长文粘贴场景下：不把「关键词出现在讲义里」计为步骤完成；保留真实排错信号（s3）。
 */
function demoteStepsForProfile(steps, profile) {
  if (!profile.heuristicUntrustworthy) {
    return steps.map((s) => ({ ...s }));
  }
  const note = '启发式降权：对话多为长讲义/说明类粘贴，关键词命中不代表本人已逐步完成该步骤';
  return steps.map((s) => {
    if (s.id === 's3' && profile.hasTraceback) {
      return { ...s, done: true, eval_note: '检测到报错/异常相关表述，保留为已体现' };
    }
    return { ...s, done: false, eval_note: note };
  });
}

/** 与步骤表 + Hook 形态相关的打分（固定五类信号）；再由 YAML 列表映射到教师自定义维度 */
function buildAbilityDimensions(steps, profile) {
  const done = (id) => !!steps.find((x) => x.id === id)?.done;
  const unt = profile.heuristicUntrustworthy;
  const cap = (v) => (unt ? Math.min(v, 46) : v);

  const promptScore = (() => {
    if (unt) return cap(Math.min(44, 22 + profile.shortTurns * 8));
    if (done('s2') && profile.shortTurns >= 3) return 82;
    if (done('s2')) return 68;
    return 36;
  })();

  const debugScore = (() => {
    let v = done('s3') ? 72 : 34;
    if (profile.hasTraceback) v = Math.min(100, v + 18);
    if (unt) v = Math.min(v, 52);
    return Math.round(v);
  })();

  const dataScore = (() => {
    let v = done('s2') ? 74 : 32;
    if (unt) v = cap(v);
    return Math.round(v);
  })();

  const vizScore = (() => {
    let v = done('s4') ? 78 : 30;
    if (unt) v = cap(v);
    return Math.round(v);
  })();

  const literacyScore = (() => {
    let v = done('s5') ? 76 : 33;
    if (unt) v = cap(v);
    return Math.round(v);
  })();

  const c = (id, name, value, comment) => ({ id, name, value, comment });

  return [
    c(
      'prompt_quality',
      '自然语言驱动（Prompt）',
      promptScore,
      unt
        ? '单次粘贴过长、分步短问较少，更像整份材料投喂；难以认定已养成「目标—数据—输出」分步描述习惯。'
        : done('s2')
          ? '日志中能识别与生成/改代码相关的交互；若含多轮短问更佳。'
          : '建议在对话中分步写清输入文件、期望统计与输出形式，少用大段讲义替代个人表述。'
    ),
    c(
      'tool_use_debug',
      '运行调试与排错',
      debugScore,
      profile.hasTraceback
        ? '出现过报错/异常类文本，具备可评估的排错语境。'
        : '较少看到 Traceback 或明确报错描述；真实调试链路不足时该项从严给分。'
    ),
    c(
      'data_stats',
      '数据读取与统计',
      dataScore,
      unt
        ? '关键词可能来自粘贴材料，已与仓库/独立短对话交叉前不计为高水平。'
        : done('s2')
          ? '对话与步骤表与数据读取/统计相关表述较一致。'
          : '需补充 read_csv、列含义、均值等业务向提问或代码讨论。'
    ),
    c(
      'visualization',
      '可视化（matplotlib）',
      vizScore,
      done('s4')
        ? '有图表/png 等相关讨论。'
        : '建议明确柱状图轴含义、保存 score_chart.png 等可评测产出。'
    ),
    c(
      'code_literacy',
      '代码理解与反思',
      literacyScore,
      done('s5')
        ? '有请模型解释或追问「为什么」的迹象。'
        : '可多用「解释这段」「逐步说明变量含义」类提问巩固理解。'
    ),
  ];
}

/** 按 config/evaluation-dimensions.yaml 的顺序与 name 输出能力条；未知 id 用整体对齐度估算 */
function buildAbilityDimensionsFromYaml(dimensions, steps, profile) {
  const legacy = buildAbilityDimensions(steps, profile);
  const legById = new Map(legacy.map((x) => [x.id, x]));
  if (!dimensions.length) return legacy;
  const pct = scoreLabProgress(steps);
  const unt = profile.heuristicUntrustworthy;
  return dimensions.map((d) => {
    const hit = legById.get(d.id);
    if (hit) {
      return {
        id: d.id,
        name: d.name,
        value: hit.value,
        comment: `${hit.comment}（任务维度 rubric：${d.rubric}）`.slice(0, 520),
      };
    }
    const v = Math.round(
      unt ? Math.min(42, pct * 0.35 + 10) : Math.min(88, pct * 0.82 + 8),
    );
    return {
      id: d.id,
      name: d.name,
      value: v,
      comment: `根据实训步骤整体对齐度（${pct}%）对该维度做保守估计。${d.rubric}`,
    };
  });
}

/** 取会话内时间上最后一条非空用户消息，作弹窗标题「会话名称」等（单行、截断） */
function deriveSessionTopicPreview(events) {
  if (!Array.isArray(events) || !events.length) return '';
  const sorted = [...events].sort((a, b) => {
    const ta = String(a.timestamp || a.captured_at || '');
    const tb = String(b.timestamp || b.captured_at || '');
    return ta.localeCompare(tb);
  });
  for (let i = sorted.length - 1; i >= 0; i -= 1) {
    const uq = sorted[i]?.user_queries;
    if (!uq || typeof uq !== 'object') continue;
    const last = typeof uq.last_user_text === 'string' ? uq.last_user_text.trim() : '';
    if (last) return normalizeTopicPreviewLine(last);
    const recent = uq.recent_user_texts;
    if (Array.isArray(recent) && recent.length) {
      const t = String(recent[recent.length - 1] || '').trim();
      if (t) return normalizeTopicPreviewLine(t);
    }
  }
  return '';
}

function normalizeTopicPreviewLine(s) {
  const flat = String(s || '')
    .replace(/\s+/g, ' ')
    .trim();
  if (!flat) return '';
  const one = flat.split('\n')[0].trim();
  return one.length > 48 ? `${one.slice(0, 48)}…` : one;
}

/** 按时间线拼接该会话全部 Hook 事件中的用户侧摘录与模型回复，供报告页「查看详情」弹窗展示（不含事件名、分隔线等技术头） */
function buildHookTranscript(events) {
  const sorted = [...events].sort((a, b) => {
    const ta = String(a.timestamp || a.captured_at || '');
    const tb = String(b.timestamp || b.captured_at || '');
    return ta.localeCompare(tb);
  });
  const chunks = [];
  for (const ev of sorted) {
    const hook = ev.hook_input || {};

    const uq = ev.user_queries;
    if (uq && typeof uq === 'object') {
      const userBlocks = [];
      if (typeof uq.last_user_text === 'string' && uq.last_user_text.trim()) {
        userBlocks.push(`【末条用户消息】\n${uq.last_user_text.trim()}`);
      }
      if (Array.isArray(uq.recent_user_texts) && uq.recent_user_texts.length) {
        const uniq = [...new Set(uq.recent_user_texts.map((x) => String(x).trim()).filter(Boolean))];
        if (uniq.length) userBlocks.push(`【近期用户消息】\n${uniq.join('\n———\n')}`);
      }
      if (uq.user_turn_count != null) userBlocks.push(`（用户轮次：${uq.user_turn_count}）`);
      if (userBlocks.length) chunks.push(userBlocks.join('\n\n'), '\n\n');
    }

    if (typeof hook.text === 'string' && hook.text.trim()) {
      chunks.push(`【模型回复文本】\n${hook.text.trim()}\n\n`);
    }

    const meta = [];
    if (hook.input_tokens != null || hook.output_tokens != null) {
      meta.push(
        `TOKENS：输入 ${hook.input_tokens ?? 0} · 输出 ${hook.output_tokens ?? 0} · 缓存读取 ${hook.cache_read_tokens ?? 0}`
      );
    }
    if (typeof hook.transcript_path === 'string' && hook.transcript_path.trim()) {
      meta.push(`会话记录路径：${hook.transcript_path.trim()}`);
    }
    if (meta.length) chunks.push(`【元数据】${meta.join(' · ')}\n\n`);
  }
  const out = chunks.join('').trim();
  return out || '（该会话在 chat_logs 中无可用钩子文本）';
}

function inferQuestionsFromEvents(convId, events) {
  const seen = new Set();
  const items = [];
  for (const ev of events) {
    const ts = ev.timestamp || ev.captured_at || '';
    const recent = ev?.user_queries?.recent_user_texts;
    const last = ev?.user_queries?.last_user_text;
    const list = [];
    if (Array.isArray(recent)) list.push(...recent);
    if (last) list.push(last);
    for (const text of list) {
      const key = text.trim();
      if (!key) continue;
      const dedupe = `${key}\n@\n${ts}`;
      if (seen.has(dedupe)) continue;
      seen.add(dedupe);
      const lower = key.toLowerCase();
      const looksLab =
        /csv|成绩|python|matplotlib|平均|报错|cursor|代码|图|分析/.test(key) ||
        /who|how|you/.test(lower);
      if (!looksLab && key.length < 4) continue;
      const isDebug = /报错|修复|error|traceback|失败/.test(key);
      items.push({
        conversation_id: convId,
        title: key.length > 80 ? `${key.slice(0, 80)}…` : key,
        detail: key,
        status: isDebug ? 'open' : 'resolved',
        tags: [ev?.hook_input?.model || 'default', '钩子'],
        time: ts,
      });
    }
  }
  items.sort((a, b) => String(b.time).localeCompare(String(a.time)));
  return items.slice(0, 60);
}

function aggregateTokens(events) {
  let inTok = 0;
  let outTok = 0;
  let cacheRead = 0;
  for (const ev of events) {
    const h = ev?.hook_input || {};
    inTok += Number(h.input_tokens) || 0;
    outTok += Number(h.output_tokens) || 0;
    cacheRead += Number(h.cache_read_tokens) || 0;
  }
  return { input_tokens: inTok, output_tokens: outTok, cache_read_tokens: cacheRead };
}

/**
 * 与 user_queries.path、hook_input.transcript_path 相同即同一 Cursor 聊天窗口。
 * 返回窗口列表：默认按「最后一条 Hook」时间倒序；窗内 turns 按时间正序（完整问答链）。
 */
function normalizeTranscriptGroupKey(ev, fallbackConvId) {
  const h = ev?.hook_input || {};
  const uq = ev?.user_queries || {};
  const p = String(h.transcript_path || uq.path || '').trim();
  if (p) return p;
  const cid = String(h.conversation_id || fallbackConvId || '').trim();
  return cid ? `__single_path__:${cid}` : '';
}

function pickPrimaryConversationId(transcriptPath, idSet) {
  const ids = [...idSet].filter(Boolean);
  if (!ids.length) return '';
  const base = String(transcriptPath || '').split('/').pop() || '';
  const bare = base.replace(/\.jsonl$/i, '');
  if (bare && ids.includes(bare)) return bare;
  ids.sort();
  return ids[0];
}

/**
 * 原始 chat_logs 分桶：同一 transcript_path / user_queries.path 为同一 Cursor 聊天窗口（含 events，供评价聚合）。
 */
function groupRawEventsByTranscriptPath(rawData) {
  if (!rawData || typeof rawData !== 'object') return [];
  const byKey = new Map();
  for (const [topConvId, events] of Object.entries(rawData)) {
    if (!Array.isArray(events)) continue;
    for (const ev of events) {
      const key = normalizeTranscriptGroupKey(ev, topConvId);
      if (!key) continue;
      if (!byKey.has(key)) {
        byKey.set(key, {
          key,
          transcript_path: null,
          conversation_ids: new Set(),
          events: [],
        });
      }
      const g = byKey.get(key);
      const h = ev?.hook_input || {};
      const uq = ev?.user_queries || {};
      const tp = String(h.transcript_path || uq.path || '').trim();
      if (tp) g.transcript_path = tp;
      g.conversation_ids.add(String(topConvId));
      if (h.conversation_id != null) g.conversation_ids.add(String(h.conversation_id));
      g.events.push(ev);
    }
  }
  const groups = [];
  for (const g of byKey.values()) {
    g.events.sort((a, b) => {
      const ta = String(a.timestamp || a.captured_at || '');
      const tb = String(b.timestamp || b.captured_at || '');
      return ta.localeCompare(tb);
    });
    groups.push(g);
  }
  groups.sort((a, b) => {
    const ea = a.events;
    const eb = b.events;
    const la = ea.length ? String(ea[ea.length - 1].timestamp || ea[ea.length - 1].captured_at || '') : '';
    const lb = eb.length ? String(eb[eb.length - 1].timestamp || eb[eb.length - 1].captured_at || '') : '';
    return lb.localeCompare(la);
  });
  return groups;
}

function buildHookChatWindows(rawData) {
  const groups = groupRawEventsByTranscriptPath(rawData);
  const windows = [];
  for (const g of groups) {
    let inTok = 0;
    let outTok = 0;
    let cr = 0;
    let cw = 0;
    const turns = [];
    let idx = 0;
    for (const ev of g.events) {
      const uq = ev.user_queries;
      const hook = ev.hook_input || {};
      const userText = typeof uq?.last_user_text === 'string' ? uq.last_user_text.trim() : '';
      const modelText = typeof hook.text === 'string' ? hook.text.trim() : '';
      inTok += Number(hook.input_tokens) || 0;
      outTok += Number(hook.output_tokens) || 0;
      cr += Number(hook.cache_read_tokens) || 0;
      cw += Number(hook.cache_write_tokens) || 0;
      if (!userText && !modelText) continue;
      idx += 1;
      turns.push({
        index: idx,
        user: userText,
        model: modelText,
        at: ev.timestamp || ev.captured_at || '',
        model_name: hook.model || '',
      });
    }
    if (!turns.length) continue;
    const ids = [...g.conversation_ids];
    const primaryConv = pickPrimaryConversationId(g.transcript_path || g.key, g.conversation_ids);
    const lastAt =
      g.events.length > 0
        ? String(g.events[g.events.length - 1].timestamp || g.events[g.events.length - 1].captured_at || '')
        : '';
    windows.push({
      transcript_path: g.transcript_path || g.key,
      conversation_id: primaryConv,
      conversation_ids: ids,
      last_activity: lastAt,
      event_count: g.events.length,
      turn_count: turns.length,
      tokens: {
        input_tokens: inTok,
        output_tokens: outTok,
        cache_read_tokens: cr,
        cache_write_tokens: cw,
      },
      turns,
    });
  }
  return windows;
}

function buildAiSnippets(events, limit = 5) {
  const out = [];
  for (const ev of events) {
    const text = ev?.hook_input?.text;
    if (typeof text !== 'string' || !text.trim()) continue;
    out.push({
      preview: text.trim().slice(0, 400),
      full: text.trim(),
      at: ev.timestamp || ev.captured_at,
      model: ev?.hook_input?.model,
    });
  }
  out.sort((a, b) => String(b.at).localeCompare(String(a.at)));
  return out.slice(0, limit);
}

function evaluateNarrative({
  steps,
  percent,
  userEmail,
  convCount,
  transcriptWindowCount,
  conversationBucketCount,
  eventCount,
  tokens,
  profile,
  dimensions,
}) {
  const prof = profile || {
    heuristicUntrustworthy: false,
    pasteHeavy: false,
    courseDump: false,
    hasTraceback: false,
    shortTurns: 0,
  };

  const bucketN =
    typeof conversationBucketCount === 'number' && conversationBucketCount >= 0
      ? conversationBucketCount
      : convCount;
  const winN =
    typeof transcriptWindowCount === 'number' && transcriptWindowCount >= 0 ? transcriptWindowCount : convCount;

  const doneLabels = steps.filter((s) => s.done).map((s) => s.label);
  const missing = steps.filter((s) => !s.done).map((s) => s.label);
  let tone = '进行中';
  if (prof.heuristicUntrustworthy) {
    tone = '因对话里整段粘贴较多，系统自动给出的「完成度」偏保守，避免只看关键词就认为全对';
  } else if (percent >= 85) tone = '与实训目标较为一致（仍建议对照你已提交的文件是否齐全）';
  else if (percent >= 50) tone = '已覆盖部分环节';
  else tone = '尚处于早期摸索阶段';

  const transcriptCtx =
    winN > 0 && bucketN !== winN
      ? `一、这份总体评价依据什么\n主要依据两样：你在 Cursor 里与 AI 的对话（系统把同一聊天窗口合并后，约 ${winN} 个窗口；原始日志里约 ${bucketN} 组条目），以及（若页面后面有）对你远程作业仓库的自动检查。\n\n`
      : winN > 0
        ? `一、这份总体评价依据什么\n主要依据你在 Cursor 里与 AI 的对话（合并后约 ${winN} 个聊天窗口），以及（若页面后面有）对你作业仓库的自动检查。\n\n`
        : '';

  const learnBlock = prof.heuristicUntrustworthy
    ? `二、从对话里能看出什么\n当前记录里单次粘贴的文字偏长，整体更像整份实训说明、任务书或与报告系统相关的讨论，较少出现你用自己语言描述「数据在哪、有哪些列、终端输出了什么、哪一行报错」等细节。因此，单靠这些聊天内容，不足以代替老师判断你是否一步步独立完成了读表、统计和作图；若你提交的仓库里脚本、数据和图表已经齐全、可运行，请以仓库和实际运行结果为准。\n\n`
    : `二、从对话里能看出什么\n从摘录看，请关注你是否用个人化的说法描述数据路径、列名和运行结果，而不只是复述教材用语。\n\n`;

  let stepBlock = '';
  if (prof.heuristicUntrustworthy) {
    stepBlock =
      `三、关于页面上「步骤完成」的提示\n系统会根据对话里是否出现 read_csv、matplotlib、scores.csv 等词，自动估算完成度（当前约 ${percent}%）。` +
      `若这些词多来自你粘贴的讲义或示例全文，「显示完成」不等于你已经亲手做完每一步；老师仍以你提交的 score_analysis.py、scores.csv、score_chart.png 等是否真实、可用为准。\n\n`;
  } else if (percent >= 85) {
    stepBlock =
      `三、与实训任务的对照（自动估算）\n从关键词与对话形态看，与任务要求的吻合度约 ${percent}%，形态上「${tone}」。` +
      (doneLabels.length
        ? ` 已能对上这些环节（仅供参考）：${doneLabels.slice(0, 4).join('；')}${doneLabels.length > 4 ? '…' : ''}。`
        : '') +
      `\n\n`;
  } else {
    stepBlock =
      `三、与实训任务的对照（自动估算）\n从关键词与对话形态看，约 ${percent}%（${tone}）。` +
      (missing.length ? ` 还可加强：${missing.slice(0, 4).join('；')}。` : '') +
      `\n\n`;
  }

  const part4body =
    winN === 0
      ? '目前几乎没有可用的对话片段，暂无法从记录里判断你与 AI 的协作方式。'
      : prof.heuristicUntrustworthy
        ? '从记录形态看，更像一次性粘贴大段材料，不利于看出你是否分步向 AI 描述目标与报错；若你希望报告如实反映你的能力，请多用多轮、短句、带具体路径与报错信息的提问。'
        : eventCount / Math.max(winN, 1) >= 12
          ? '你与 AI 互动较多；若每轮只问一件具体的事、写清约束，更有利于得到可运行的代码。'
          : '建议每轮写清数据路径与期望输出，出错时把终端里的报错全文提供给 AI。';

  const part5body =
    winN === 0
      ? '若你确实在 Cursor 里完成过练习，请确认本平台的练习日志已正确提交或由教师端汇总；否则老师只能看到一份空白或极少的记录。'
      : prof.heuristicUntrustworthy
        ? '把任务拆成「读表 → 统计 → 出图 → 保存」多轮完成；每一轮说明文件路径、想要的结果；若报错，复制完整英文报错与当前目录。'
        : eventCount / Math.max(winN, 1) >= 12
          ? '继续保持「一次一事」的提问习惯，并在每轮附上相关代码或输出，方便 AI 接续上下文。'
          : '先自己运行脚本看一眼输出，再把「现象 + 报错 + 文件路径」写进提问里。';

  const overall =
    `${transcriptCtx}${learnBlock}${stepBlock}四、使用 AI 辅助学习的情况\n${part4body}\n\n五、给你的建议\n${part5body}\n`;

  const dimList =
    dimensions && dimensions.length ? dimensions : loadEvaluationDimensions(PROJECT_ROOT);
  const ability = buildAbilityDimensionsFromYaml(dimList, steps, prof);

  const issues = [];
  if (prof.heuristicUntrustworthy) {
    issues.push({
      title: '对话记录能否代表你的全部实训过程？',
      body: '当前日志里长段粘贴较多，系统为避免「仅凭关键词就认为步骤全完成」，已对纯聊天推断做了保守处理。老师改作业时仍以你提交的脚本、数据与图表为准。若需要更详细的文字分析，可由教师配置完整报告生成（非快速模式）。',
    });
  }
  if (!steps.find((s) => s.id === 's4')?.done) {
    issues.push({
      title: '大模型运用：可视化与交付物表述',
      body: '未稳定识别学员在对话中围绕 matplotlib、保存 png、score_chart 的分步讨论。若已完成，请在对话里明确图表类型与文件名，便于与头歌评测对齐。',
    });
  }
  if (!steps.find((s) => s.id === 's2')?.done) {
    issues.push({
      title: '大模型运用：数据与 Prompt 结构',
      body: '建议在多轮对话中显式出现 scores.csv、read_csv、均值等业务词与个人运行结果，而不是只在长讲义中出现关键词。',
    });
  }

  const learning = [
    '把一次大需求拆成「读表 → 清洗/列名 → 统计 → 出图 → 保存」多轮提问，每轮附当前代码片段，更利于大模型稳定输出。',
    '运行失败时粘贴完整 Traceback，并说明工作目录与文件名，体现真实排错能力，也便于模型定位。',
    '对照头歌评测脚本自检：仓库中是否真实存在 scores.csv、score_analysis.py、score_chart.png，并与对话描述一致。',
  ];

  return {
    overall,
    ability,
    issues,
    learning,
    meta: {
      user_hint: userEmail || '（未在日志中发现邮箱）',
      tokens,
      evaluation_signals: {
        heuristic_untrustworthy: prof.heuristicUntrustworthy,
        paste_heavy: prof.pasteHeavy,
        course_style_dump: prof.courseDump,
        transcript_window_count: winN,
        conversation_bucket_count: bucketN,
      },
    },
  };
}

function analyzeChatLogsFile(filePath) {
  const raw = fs.readFileSync(filePath, 'utf8');
  let data;
  try {
    data = JSON.parse(raw);
  } catch (e) {
    return { ok: false, error: 'chat_logs.json 不是合法 JSON', detail: String(e.message) };
  }

  if (!data || typeof data !== 'object') {
    return { ok: false, error: 'chat_logs.json 格式应为 { conversationId: Event[] }' };
  }

  const conversations = [];
  let allEvents = [];

  for (const [convId, events] of Object.entries(data)) {
    if (!Array.isArray(events)) continue;
    const combined = collectTexts(events).toLowerCase();
    const convProfile = detectSessionProfile(events);
    const steps = demoteStepsForProfile(matchRubric(combined), convProfile);
    const percent = scoreLabProgress(steps);
    const tokens = aggregateTokens(events);
    const userEmail = events.map((e) => e?.hook_input?.user_email).find(Boolean) || null;
    const lastModel =
      [...events].reverse().map((e) => e?.hook_input?.model).find((m) => m && m !== 'default') ||
      [...events].reverse().map((e) => e?.hook_input?.model).find(Boolean) ||
      null;

    conversations.push({
      id: convId,
      event_count: events.length,
      user_email: userEmail,
      last_model: lastModel,
      steps,
      lab_progress_percent: percent,
      tokens,
      topic_preview: deriveSessionTopicPreview(events) || null,
      last_capture: events.map((e) => e.timestamp || e.captured_at).filter(Boolean).sort().pop() || null,
      hook_transcript: buildHookTranscript(events),
      questions: inferQuestionsFromEvents(convId, events),
      ai_snippets: buildAiSnippets(events),
    });
    allEvents = allEvents.concat(events.map((e) => ({ ...e, _convId: convId })));
  }

  conversations.sort((a, b) => String(b.last_capture || '').localeCompare(String(a.last_capture || '')));

  const dimensions = loadEvaluationDimensions(PROJECT_ROOT);

  const transcriptGroups = groupRawEventsByTranscriptPath(data);
  const mergedText = allEvents.map((e) => collectTexts([e])).join('\n').toLowerCase();
  const mergedProfile = aggregateProfileAcrossTranscriptWindows(transcriptGroups);
  const mergedSteps = demoteStepsForProfile(matchRubric(mergedText), mergedProfile);
  const mergedPercent = scoreLabProgress(mergedSteps);
  const mergedTokens = aggregateTokens(allEvents);
  const primaryEmail =
    conversations.map((c) => c.user_email).find(Boolean) ||
    allEvents.map((e) => e?.hook_input?.user_email).find(Boolean) ||
    null;

  const transcriptWindowCount = transcriptGroups.filter((g) => {
    for (const ev of g.events || []) {
      const uq = ev.user_queries;
      const hook = ev.hook_input || {};
      const u = typeof uq?.last_user_text === 'string' ? uq.last_user_text.trim() : '';
      const m = typeof hook.text === 'string' ? hook.text.trim() : '';
      if (u || m) return true;
    }
    return false;
  }).length;

  const evaluation = evaluateNarrative({
    steps: mergedSteps,
    percent: mergedPercent,
    userEmail: primaryEmail,
    convCount: conversations.length,
    transcriptWindowCount,
    conversationBucketCount: conversations.length,
    eventCount: allEvents.length,
    tokens: mergedTokens,
    profile: mergedProfile,
    dimensions,
  });

  return {
    ok: true,
    generated_at: new Date().toISOString(),
    lab_title: '实训：AI 编程辅助工具原理与实操 — 成绩分析脚本',
    course: '大模型与智能编程实践',
    experiment: 'score_analysis.py / scores.csv / score_chart.png',
    summary: {
      conversation_count: conversations.length,
      hook_event_count: allEvents.length,
      lab_progress_percent: mergedPercent,
      rubric_steps: mergedSteps,
    },
    conversations,
    evaluation,
  };
}

function defaultReport() {
  return {
    ok: true,
    generated_at: new Date().toISOString(),
    lab_title: '实训：AI 编程辅助工具原理与实操 — 成绩分析脚本',
    course: '大模型与智能编程实践',
    experiment: 'score_analysis.py / scores.csv / score_chart.png',
    summary: {
      conversation_count: 0,
      hook_event_count: 0,
      lab_progress_percent: 0,
      rubric_steps: RUBRIC.map((r) => ({ id: r.id, label: r.label, done: false })),
    },
    conversations: [],
    evaluation: evaluateNarrative({
      steps: RUBRIC.map((r) => ({ id: r.id, label: r.label, done: false })),
      percent: 0,
      userEmail: null,
      convCount: 0,
      transcriptWindowCount: 0,
      conversationBucketCount: 0,
      eventCount: 0,
      tokens: { input_tokens: 0, output_tokens: 0, cache_read_tokens: 0 },
      profile: {
        heuristicUntrustworthy: false,
        pasteHeavy: false,
        courseDump: false,
        hasTraceback: false,
        shortTurns: 0,
      },
      dimensions: loadEvaluationDimensions(PROJECT_ROOT),
    }),
    note: '未找到 chat_logs.json 或文件为空，以下为占位结构。',
  };
}

module.exports = {
  analyzeChatLogsFile,
  defaultReport,
  buildHookChatWindows,
  LAB_TITLE: '实训：AI 编程辅助工具原理与实操 — 成绩分析脚本',
};