diff --git a/.claude/hooks/block-destructive.sh b/.claude/hooks/block-destructive.sh new file mode 100755 index 00000000..28695fdd --- /dev/null +++ b/.claude/hooks/block-destructive.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# 阻止危险操作:rm -rf /, git reset --hard, git push --force 到 master +set -euo pipefail + +# 从 stdin 读取 hook 输入(JSON) +input=$(cat) + +# 提取命令 +command=$(echo "$input" | python3 -c "import sys,json; print(json.load(sys.stdin).get('tool_input',{}).get('command',''))" 2>/dev/null || echo "") + +# 检测危险模式 +# 1. rm -rf / 或 rm -rf /* 等真正危险的根目录操作 +if echo "$command" | grep -qE 'rm\s+-rf\s+/(\*|\s|$)'; then + echo '{"permissionDecision": "deny", "reason": "危险操作:rm -rf / 被阻止。如需删除构建产物,请使用更精确的路径。"}' + exit 0 +fi + +# 2. git reset --hard(防止丢失未提交工作) +if echo "$command" | grep -qE 'git\s+reset\s+--hard'; then + echo '{"permissionDecision": "deny", "reason": "危险操作:git reset --hard 被阻止。请先确认所有改动已提交或使用 git stash。"}' + exit 0 +fi + +# 3. git push --force/-f 到 master/main +if echo "$command" | grep -qE 'git\s+push\s+.*(--force|-f).*(master|main)'; then + echo '{"permissionDecision": "deny", "reason": "禁止 force push 到 master/main 分支。请使用正常 push 或创建新分支。"}' + exit 0 +fi + +# 通过 +echo '{"permissionDecision": "allow"}' diff --git a/.claude/hooks/spec-reminder.sh b/.claude/hooks/spec-reminder.sh new file mode 100755 index 00000000..2f008613 --- /dev/null +++ b/.claude/hooks/spec-reminder.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +# SessionStart 钩子:注入关键规范提醒到上下文 +# 输出 JSON 带 context 字段,会被注入到新会话的上下文中 +cat << 'HEREDOC_END' +{ + "continue": true, + "hookSpecificOutput": { + "hookEventName": "SessionStart", + "additionalContext": "## 本次会话开发规范提醒\n\n遵循 CLAUDE.md 中的七个模块规范。关键要点:\n\n1. **安全第一**:任何优化必须通用,不得针对特定测试用例;段错误高危区(寄存器分配合并后重算degree、大栈帧用movz/movk、大函数限制spill轮次≤3)\n2. **性能北极星**:假设→测量基线→实现→正确性验证→性能测量→决策。修改前跑 ./count_asm.sh\n3. **快门禁(每次commit前)**:./2026test.sh -c functional -x && ./2026test.sh -c h_functional -x\n4. **MCP铁律**:查符号用codegraph,搜字面量才用grep;改代码前先用codegraph_impact评估影响\n5. **小diff原则**:每次变更≤200行,大改动分步进行\n" + } +} +HEREDOC_END diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 00000000..f6981373 --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,28 @@ +{ + "hooks": { + "SessionStart": [ + { + "matcher": "", + "type": "command", + "command": "bash .claude/hooks/spec-reminder.sh", + "timeout": 5000 + } + ], + "PreToolUse": [ + { + "matcher": "Bash", + "type": "command", + "command": "bash .claude/hooks/block-destructive.sh", + "timeout": 5000 + } + ], + "Stop": [ + { + "matcher": "", + "type": "prompt", + "prompt": "在结束本次会话之前,请确认以下事项:\n1. 快门禁是否通过?(./2026test.sh -c functional -x && ./2026test.sh -c h_functional -x)\n2. 指令数是否有退化?(如有优化改动,跑 ./count_asm.sh)\n3. 是否有未提交的改动需要处理?\n4. 如有重要经验教训,是否已写入 memory?\n\n请简要回答每个问题(是/否/不适用),然后正常结束。", + "timeout": 15000 + } + ] + } +} diff --git a/CLAUDE.md b/CLAUDE.md index cbd782bc..2ee16d99 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -2,133 +2,322 @@ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. -## Project Overview +## 语言规范 -SysY 编译器课程实验 — a progressive compiler (Lab1–Lab6) for the SysY language (a C subset) targeting ARM64/AArch64. Built with C++17, CMake, and ANTLR4. +主要使用中文交流、写注释、写文档和 commit message。代码标识符(变量名、函数名、类名)和文件名使用英文。 -## Build Commands +## 项目概述 + +SysY → ARM64/AArch64 编译器,CMake + C++17 + ANTLR 4.13.2。2026 编译系统设计赛(华为毕昇杯)ARM 赛道参赛项目。 + +## 构建 -### Prerequisites ```bash -# Install dependencies (Ubuntu 22.04 / WSL) -sudo apt install -y build-essential cmake git openjdk-11-jre llvm clang gcc-aarch64-linux-gnu qemu-user +# 首次:生成 ANTLR Lexer/Parser +mkdir -p build/generated/antlr4 +java -jar third_party/antlr-4.13.2-complete.jar -Dlanguage=Cpp -visitor -no-listener \ + -Xexact-output-dir -o build/generated/antlr4 src/antlr4/SysY.g4 + +# 全量构建 +cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DCOMPILER_PARSE_ONLY=OFF +cmake --build build -j "$(nproc)" ``` -### Generate ANTLR Lexer/Parser (required before first build) +可执行文件:`./build/bin/compiler` + +## 编译器 CLI(比赛格式) + ```bash -mkdir -p build/generated/antlr4 -java -jar third_party/antlr-4.13.2-complete.jar \ - -Dlanguage=Cpp -visitor -no-listener -Xexact-output-dir \ - -o build/generated/antlr4 src/antlr4/SysY.g4 +compiler -S -o output.s input.sy # 汇编输出(比赛标准) +compiler -S -o output.s input.sy -O # 带优化 +compiler --emit-ir input.sy # 打印 IR +compiler --emit-parse-tree input.sy # 打印语法树 +``` + +## 架构 + +编译管线:`SysY → [frontend] ANTLR 语法树 → [sem] 语义分析 → [irgen] IR 生成 → [ir/passes] IR 优化 → [mir/Lowering] MIR 降级 → [mir/RegAlloc] 寄存器分配 → [mir/FrameLowering] 栈帧 → [mir/peephole] 窥孔 → [mir/AsmPrinter] AArch64 汇编` + +源码目录: +- `src/frontend/` — ANTLR 驱动、语法树打印 +- `src/sem/` — Sema、SymbolTable、ConstEval +- `src/irgen/` — 语法树 → LLVM IR 翻译 +- `src/ir/` — IR 数据结构(Module→Function→BasicBlock→Instruction),passes/ 含 Mem2Reg/CFGSimplify/ConstFold/ConstProp/DCE/CSE/LICM +- `src/mir/` — 机器 IR(MachineModule→MachineFunction→MachineBasicBlock→MachineInstr),Lowering/RegAlloc/FrameLowering/AsmPrinter/Peephole +- `src/include/` — 各模块头文件 + +关键设计:IR 类型支持 void/i1/i32/float/i32*/float*;MIR 操作数为 PhysReg/VReg/Imm/FrameIndex/Label/Symbol;`-O` 触发所有 IR pass;GP 可分配集含 x16/x17;xzr/wzr 为零寄存器,sp 为栈指针。 + +--- + +# 一、安全第一:编译器优化的生存法则 + +## 竞赛红线(零容忍,违反即取消成绩) + +1. **禁止投机优化**:不得识别特定函数名、字符串、输入特征来激活优化 +2. **禁止硬编码结果**:不得对计算结果预设答案 +3. **禁止依赖 UB**:不得假设数组不越界、除法不溢出等 +4. **优化必须通用**:对所有合法 SysY2026 程序语义保持 + +## 段错误预防(从历史故障中提炼的高危区) + +| 区域 | 常见根因 | 预防规则 | +|------|----------|----------| +| 寄存器分配 | 合并后颜色/degree 不一致 | 合并后总是重算 degree;不修改遍历中的容器 | +| 栈帧 | 大偏移量(>12KB)ldr/str 溢出 | 大栈帧必须用 movz/movk 合成偏移 | +| spill | 无限 spill 循环 | 大函数(>120 vregs)限制 spill 轮次 ≤3 | +| 活跃合并 | 干涉图边不完整 | Briggs 保守测试:邻居 degree≥K 计数 < K | +| 迭代器 | move_adj 自环导致失效 | 合并前检查 u != v,清理后验证 | + +## 优化安全性自检清单 + +实现任何优化变换时必须确认: +- [ ] 对所有合法 SysY2026 程序,语义是否保持? +- [ ] 副作用顺序是否保持?(Load/Store/Call 不能重排跨越彼此) +- [ ] 浮点语义是否保持?(不能随意重关联) +- [ ] 是否有针对特定测试用例的投机判断?(有则违规) + +## 修改范围限制 + +- 一次只改一个 pass 或一个模块 +- 寄存器分配和 IR 优化不能混在一次改动中 +- 改 MIR 层前先确认 IR 层正确性基线 + +--- + +# 二、性能北极星:指令数驱动的开发循环 + +## 核心循环(不可跳步) + ``` +假设 → 测量基线 → 实现 → 正确性验证 → 性能测量 → 决策 + ↑ | + └──────────── 回退或调整 ←─────────────────────────────┘ +``` + +| 步骤 | 命令/动作 | +|------|-----------| +| 假设 | 写一句话:「改 X,预期指令数减少 Y 条」 | +| 基线 | `./count_asm.sh` | +| 实现 | 写代码 | +| 正确性 | `./2026test.sh -c functional -x && ./2026test.sh -c h_functional -x` | +| 性能 | `./count_asm.sh` | +| 决策 | 对比基线,判断合并/调整/回退 | + +## 指令数作为核心指标 + +指令数(`wc -l` 汇编)无测量噪声、可复现。目标硬件 Cortex-A53 为顺序核,指令数与运行时间相关性较高。指令数减少是积极信号但非绝对保证。 + +## 性能退化门禁 + +| 规模 | 动作 | +|------|------| +| 大面积退化(>5 用例) | **阻止合并**,分析根因 | +| 少量退化(2-5 用例,<5%) | 标记关注,分析根因,记录到 commit | +| 零散退化(1-2 用例,<2%) | 可容忍,注明到 commit | +| 零退化 + 改善 | **理想**,更新基线,合并 | + +## 基线管理 + +`指令数基线.md` 记录历史最低值,`count_asm.sh` 自动维护——新值更低时才更新。性能改善 commit 须附带基线更新。 + +--- + +# 三、AI 协作协议 + +## 任务规模 → 流程映射 + +| 规模 | 特征 | 流程 | +|------|------|------| +| 轻量 | 单文件、阈值调整、明显 bug(<30 分钟) | 直接改 → 快门禁 → commit | +| 中量 | 跨文件、新 pass、算法改动(1-4 小时) | brainstorming → 计划 → 实现 → 快门禁 → 中门禁 | +| 重量 | 寄存器分配重构、IR 框架(多日) | office-hours → GSD 全流程 → 全门禁 → extract-learnings | + +## 提示词模式 + +- **先说为什么再说做什么**:告诉 AI 目标(「减少 spill 代码量」)而非操作(「修改 spillCost 函数」) +- **先计划后代码**:要求 AI 先提变更计划,审查通过后再写代码。即使是「简单」改动 +- **显式约束前置**:一次性给出所有约束(红线、修改范围、语义安全要求) +- **小 diff(≤200 行)**:大改动分步进行;大 diff 的审查成本超过生成收益 + +## AI 不能做的事 + +- **不能自行验证自己生成的代码**——必须通过编译器运行和测试脚本等外部工具验证 +- **不能决定核心算法设计**——可以建议和实现,最终设计由人审查 +- **不能跳过门禁**——任何改动都必须过测试 +- **AI 生成的测试不能作为唯一的正确性验证**——需要独立的外部 oracle(.out 对比) + +## DeepSeek 特化策略 + +- **开启 thinking mode**:复杂推理任务使用 `reasoning_effort: max` +- **利用上下文缓存**:同一文件反复读取几乎免费,大胆多次参考已有代码 +- **分割大 prompt**:DeepSeek 在独立小任务上表现最好 +- **C++ 代码 double-check**:DeepSeek 在系统编程上偏弱,指针/迭代器/内存操作需额外验证 + +## MCP 使用铁律 + +| 场景 | 工具 | 不要 | +|------|------|------| +| 查找符号 | `codegraph_search` | 不要 grep | +| 调用关系 | `codegraph_callers/callees` | 不要手动 Read 追踪 | +| 改动影响 | `codegraph_impact` | 不要猜测 | +| 代码区探索 | `codegraph_explore`(一次) | 不要逐个 codegraph_node | +| 字面量搜索 | `grep` | 不要用 codegraph 搜 | +| PR 管理 | `gh create_pr/create_review` | 不要手动 | + +--- + +# 四、三层门禁 + +## 数据集优先级 + +- **一级**:`2026test/`(竞赛官方,有 .out,每次必跑) +- **二级**:`test_merged/`、`testdata2022/`、`testdata2024/`(历史回归,关键节点跑) + +## 快门禁(每次 commit 前,~2 分钟) -### Lab1 (frontend only — parse tree printing) ```bash -cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DCOMPILER_PARSE_ONLY=ON -cmake --build build -j "$(nproc)" -./build/bin/compiler --emit-parse-tree test/test_case/functional/simple_add.sy +./2026test.sh -c functional -x && ./2026test.sh -c h_functional -x ``` -### Full build (all labs, including IR gen, optimization, and codegen) +通过标准:0 失败。失败则阻止 commit。 + +## 中门禁(每次 merge 前,~10 分钟) + ```bash -cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DCOMPILER_PARSE_ONLY=OFF -cmake --build build -j "$(nproc)" +./2026test.sh -c functional -x +./2026test.sh -c h_functional -x +./2026test.sh -c performance -x +./count_asm.sh ``` -## Compiler Usage +通过标准:0 功能失败 + 指令数无大面积退化。 + +## 全门禁(关键节点,~30 分钟) ```bash -# Competition format: compile to assembly -./build/bin/compiler -S -o output.s input.sy +./2026test.sh # 全量 +# 历史数据集回归 +for f in test_merged/**/*.sy; do + ./scripts/verify_asm.sh "$f" --run -O || echo "FAIL: $f" +done +``` + +触发条件:寄存器分配重构、IR 框架改动、赛前验证。 + +## 门禁纪律 + +- 绝不跳过门禁——改动能通过门禁才是真的「完成」 +- 功能测试失败时绝不进入性能测量——先修正确性 +- 门禁失败后修复再跑——不允许「先合并后修复」 + +--- + +# 五、代码与 Git 规范 -# With optimization -./build/bin/compiler -S -o output.s input.sy -O1 +## 命名 -# Emit IR -./build/bin/compiler --emit-ir input.sy +- 变量:`snake_case`;函数/类:`PascalCase`;成员变量:`snake_case_` +- 标识符和文件名用英文 -# Single test: compile, link, run with QEMU, and compare output -./scripts/verify_asm.sh test/test_case/functional/simple_add.sy --run +## 中文规范 -# Same for IR path (uses llc + clang to compile/run) -./scripts/verify_ir.sh test/test_case/functional/simple_add.sy --run +注释、commit message、文档、错误信息用中文。IR/MIR 调试打印用英文。 + +```cpp +// 使用 Briggs 保守测试而非 George,因为 O(k) vs O(k^2) +if (HasMovePair(u)) ReactivatePairs(u); ``` -### CLI Options -| Flag | Effect | -|------|--------| -| `-S` | Emit assembly (default when no mode specified) | -| `-o ` | Output file path | -| `-O`, `-O1`, `-O2`, `-O3` | Optimization level | -| `--emit-parse-tree` | Print ANTLR parse tree | -| `--emit-ir` | Print LLVM-style IR | -| `--emit-asm` | Print AArch64 assembly | -| `-h`, `--help` | Show help | +## Git -## Testing +格式:`(): <中文简述>`。一 commit 一逻辑变更。不提交编译或测试失败的代码。功能分支开发,master 保护。 -### Main test harness -```bash -# Run all 2026test functional tests with optimization -./2026test.sh +--- + +# 六、自进化机制 + +## 触发条件 -# Functional tests only, max 10 cases, stop on first failure -./2026test.sh -c functional -n 10 -x +1. **故障驱动**:段错误、性能退化、测试失败 → 诊断根因 → 可预防则更新本规范 +2. **成功固化**:新优化方法被验证有效 → 记录模式 +3. **定期审查**:每两周 review 本规范,删除过时规则,强化有效规则 -# Without optimization -./2026test.sh -O0 +## 操作流程 + +``` +故障发生 → 根因分析(中文 ≤100 字) + → 可预防? + ├─ 是 → 更新 CLAUDE.md 或 memory + └─ 否 → 只记录 ``` -### Legacy test scripts -```bash -./test1.sh # Lab1: syntax tree -./test2.sh # Lab2: IR generation -./test3.sh # Lab3: assembly generation -./test4.sh # Lab4: scalar optimization -./test5.sh # Lab5: register allocation -``` - -## Architecture - -### Compiler Pipeline -``` -SysY source (.sy) → ANTLR Lexer/Parser → AST (ANTLR parse tree) - → Sema (name resolution, type checking) → IR (LLVM-style, load/store form) - → IR Passes (Mem2Reg → LICM → ConstFold/Prop/DCE/CFG/CSE) → MIR (machine IR, AArch64) - → RegAlloc → FrameLowering → Peephole → AArch64 Assembly output -``` - -### Source Layout - -| Directory | Purpose | -|-----------|---------| -| `src/antlr4/SysY.g4` | ANTLR grammar for SysY language | -| `src/frontend/` | ANTLR driver + syntax tree printer (Lab1) | -| `src/sem/` | Semantic analysis: name binding, scope, type checking (Lab2 prep) | -| `src/irgen/` | IR generation via ANTLR visitor: Decl, Exp, Stmt, Func (Lab2) | -| `src/ir/` | LLVM-style IR: Value/User/Use hierarchy, Module/Function/BasicBlock, IRBuilder (Lab2) | -| `src/ir/passes/` | Scalar optimizations (Lab4): Mem2Reg, ConstFold, ConstProp, DCE, CFGSimplify, CSE, LICM | -| `src/ir/analysis/` | DominatorTree, LoopInfo | -| `src/mir/` | Machine IR + AArch64 backend (Lab3, Lab5): Lowering (IR→MIR), RegAlloc, FrameLowering, AsmPrinter | -| `src/mir/passes/` | MIR peephole pass | -| `src/utils/` | CLI argument parsing, logging | -| `src/include/` | **Build-time headers.** At build time CMake adds `src/include` as include path. | -| `include/` | **Platform-provided headers.** Gitignored — supplied externally by grading platform. Mirrors `src/include/`. | -| `sylib/` | SysY runtime library (sylib.c), linked into final executables | -| `scripts/` | verify_asm.sh, verify_ir.sh — single-case test helpers | -| `third_party/` | ANTLR jar + antlr4-runtime sources | -| `test/test_case/` | Reference test cases with expected outputs | - -### Key Design Patterns - -- **IR IR**: Lightweight LLVM-style IR with `Value → User → Instruction` class hierarchy and def-use chains via `Use` objects. `IRBuilder` appends instructions to a `BasicBlock`. The IR starts in load/store (alloca-based) form; `Mem2Reg` promotes allocas to SSA phi nodes. -- **MIR IR**: Lower-level, three-address machine IR using AArch64 opcodes and a union-like `Operand` (reg, vreg, imm, frame index, label, symbol). Purely a data container — no SSA, no def-use analysis. -- **ANTLR Visitor**: `IRGenImpl` extends the ANTLR-generated `SysYBaseVisitor` to walk the parse tree and emit IR. Each `visit*` method returns `std::any` (typically `ir::Value*`). -- **Pass infrastructure**: Each IR pass is a standalone function (`RunMem2Reg`, `RunDCE`, etc.) taking a `Module&`. `PassManager` and `PassManagerModule` orchestrate them with fixed-point iteration (serialize, compare, re-run until convergence). - -### Important Notes - -- The `#if COMPILER_PARSE_ONLY` macro in `main.cpp` guards all code beyond Lab1. The CMake option `COMPILER_PARSE_ONLY` controls whether `sem`, `irgen`, and `mir` subdirectories are built. -- `include/` is in `.gitignore` and absent from the build include path. It is provided externally by the grading platform. When editing headers, work in `src/include/`. -- The grammar `SysY.g4` defines the SysY language subset: `int`/`float`/`void` types, arrays, `const`, `if`/`else`/`while`/`break`/`continue`/`return`, and C-like expressions including logical short-circuit `&&`/`||`. -- Commit message convention: `(): ` where type ∈ {feat, fix, refactor, docs, test, chore} and scope ∈ {frontend, irgen, backend, test, doc}. +## 有效性度量 + +- 同类 bug 是否再次出现 +- 门禁是否在 commit 前拦截了问题 +- 指令数趋势是否持续改善 + +--- + +# 七、Skill & MCP 最优编排 + +## 仅保留有用的工具 + +本项目是 C++ 编译器,以下可用工具**有价值**: +- **MCP**:codegraph(代码智能)、github(PR/issue) +- **流程 skill**:brainstorming、writing-plans、TDD、verification-before-completion、systematic-debugging +- **门禁 skill**:gsd-code-review、review +- **战略 skill**(仅多日架构决策):gsd-discuss/plan/execute-phase、office-hours、gsd-extract-learnings、ship + +以下**忽略**:qa/browse/benchmark/canary(Web)、figma/react-bits/design-shotgun(前端)、cso/security-review(Web 安全) + +## 三种模式 + +### 快刀(<30 分钟) + +``` +codegraph_context → 直接改 → 快门禁 → commit +``` + +只用 Superpowers 战术层。示例:调阈值、修明显 bug。 + +### 标准优化(1-4 小时) + +``` +brainstorming → writing-plans → TDD → 快门禁 → gsd-code-review → 中门禁 → commit +``` + +**brainstorming 是强制的**——「太简单不需要设计」是反模式。 + +### 重器(多日) + +``` +office-hours → gsd-discuss → gsd-plan → gsd-execute(内含 TDD 循环) + → 全门禁 → gsd-code-review → gsd-extract-learnings → ship +``` + +只在 3-4 个关键架构点使用。编译器反馈循环很快,偏向执行而非过度计划。 + +## 最高 ROI 组合 + +``` +bug 发现 → codegraph_impact(影响范围) + → gh create_issue(用 impact 结果作 body) + → brainstorming(设计方案) + → TDD(先写会失败的测试) + → 实现 → verification(快门禁) + → gh create_pr +``` + +每一步都有外部信号验证,避免 AI 自说自话。 + +## 自进化闭环 + +``` +gsd-extract-learnings → 更新 CLAUDE.md/memory + ↑ ↓ + 全门禁验证 下次任务自动加载 + ↑ ↓ + 实现优化 AI 知道上次的教训 +``` diff --git a/src/mir/Lowering.cpp b/src/mir/Lowering.cpp index 679eb75c..1252c6be 100644 --- a/src/mir/Lowering.cpp +++ b/src/mir/Lowering.cpp @@ -115,6 +115,24 @@ namespace mir } } + // 交换比较操作数时反转条件码(aa) + static CondCode SwapCondCode(CondCode cond) + { + switch (cond) + { + case CondCode::LT: + return CondCode::GT; + case CondCode::LE: + return CondCode::GE; + case CondCode::GT: + return CondCode::LT; + case CondCode::GE: + return CondCode::LE; + default: + return cond; // EQ/NE 对称 + } + } + static PhysReg GetArgWReg(size_t index) { static const PhysReg regs[] = { @@ -338,16 +356,43 @@ namespace mir { if (IsIntegerCompareOpcode(bin->GetOpcode())) { - int lhs = EmitIntValue(bin->GetLhs(), function, value_vregs, - scalar_slots, array_slots, block); - int rhs = EmitIntValue(bin->GetRhs(), function, value_vregs, - scalar_slots, array_slots, block); - block.Append(Opcode::CmpRR, - {Operand::VReg(lhs, VRegClass::Int), Operand::VReg(rhs, VRegClass::Int)}); + // 常量折叠到 CmpImm,消除冗余 MovImm + int lhs_imm, rhs_imm; + bool lhs_const = TryGetConstantInt(bin->GetLhs(), lhs_imm); + bool rhs_const = TryGetConstantInt(bin->GetRhs(), rhs_imm); + auto imm_fits = [](int imm) { return imm >= 0 && imm <= 4095; }; + + CondCode cond = GetCondCodeForCompareOpcode(bin->GetOpcode()); + + if (rhs_const && imm_fits(rhs_imm)) + { + int lhs = EmitIntValue(bin->GetLhs(), function, value_vregs, + scalar_slots, array_slots, block); + block.Append(Opcode::CmpImm, + {Operand::VReg(lhs, VRegClass::Int), Operand::Imm(rhs_imm)}); + } + else if (lhs_const && imm_fits(lhs_imm)) + { + int rhs = EmitIntValue(bin->GetRhs(), function, value_vregs, + scalar_slots, array_slots, block); + block.Append(Opcode::CmpImm, + {Operand::VReg(rhs, VRegClass::Int), Operand::Imm(lhs_imm)}); + cond = SwapCondCode(cond); + } + else + { + int lhs = EmitIntValue(bin->GetLhs(), function, value_vregs, + scalar_slots, array_slots, block); + int rhs = EmitIntValue(bin->GetRhs(), function, value_vregs, + scalar_slots, array_slots, block); + block.Append(Opcode::CmpRR, + {Operand::VReg(lhs, VRegClass::Int), Operand::VReg(rhs, VRegClass::Int)}); + } + int dst = function.CreateVReg(VRegClass::Int); block.Append(Opcode::CSet, {Operand::VReg(dst, VRegClass::Int), - Operand::Imm(static_cast(GetCondCodeForCompareOpcode(bin->GetOpcode())))}); + Operand::Imm(static_cast(cond))}); value_vregs[value] = dst; return dst; } @@ -958,12 +1003,35 @@ namespace mir return; } - int lhs = EmitIntValue(bin.GetLhs(), function, value_vregs, - scalar_slots, array_slots, block); - int rhs = EmitIntValue(bin.GetRhs(), function, value_vregs, - scalar_slots, array_slots, block); - block.Append(Opcode::CmpRR, - {Operand::VReg(lhs, VRegClass::Int), Operand::VReg(rhs, VRegClass::Int)}); + // 常量折叠到 CmpImm + int lhs_imm, rhs_imm; + bool lhs_const = TryGetConstantInt(bin.GetLhs(), lhs_imm); + bool rhs_const = TryGetConstantInt(bin.GetRhs(), rhs_imm); + auto imm_fits = [](int imm) { return imm >= 0 && imm <= 4095; }; + + if (rhs_const && imm_fits(rhs_imm)) + { + int lhs = EmitIntValue(bin.GetLhs(), function, value_vregs, + scalar_slots, array_slots, block); + block.Append(Opcode::CmpImm, + {Operand::VReg(lhs, VRegClass::Int), Operand::Imm(rhs_imm)}); + } + else if (lhs_const && imm_fits(lhs_imm)) + { + int rhs = EmitIntValue(bin.GetRhs(), function, value_vregs, + scalar_slots, array_slots, block); + block.Append(Opcode::CmpImm, + {Operand::VReg(rhs, VRegClass::Int), Operand::Imm(lhs_imm)}); + } + else + { + int lhs = EmitIntValue(bin.GetLhs(), function, value_vregs, + scalar_slots, array_slots, block); + int rhs = EmitIntValue(bin.GetRhs(), function, value_vregs, + scalar_slots, array_slots, block); + block.Append(Opcode::CmpRR, + {Operand::VReg(lhs, VRegClass::Int), Operand::VReg(rhs, VRegClass::Int)}); + } } static bool TryEmitCondValueToFlags(const ir::Value *value, diff --git a/指令数基线.md b/指令数基线.md index 37066c6c..29f24025 100644 --- a/指令数基线.md +++ b/指令数基线.md @@ -20,66 +20,66 @@ | 测试集标识 | 基线(行) | |---|---| -| performance/01_mm1 | 310 | -| performance/01_mm2 | 310 | -| performance/01_mm3 | 310 | -| performance/03_sort1 | 640 | -| performance/03_sort2 | 640 | -| performance/03_sort3 | 640 | -| performance/conv2d-1 | 629 | -| performance/conv2d-2 | 629 | -| performance/conv2d-3 | 629 | -| performance/crc1 | 290 | -| performance/crc2 | 290 | -| performance/crc3 | 290 | -| performance/crypto-1 | 1949 | -| performance/crypto-2 | 1949 | -| performance/crypto-3 | 1949 | -| performance/fft0 | 605 | -| performance/fft1 | 605 | -| performance/fft2 | 605 | -| performance/h-1-01 | 158 | -| performance/h-1-02 | 158 | -| performance/h-1-03 | 158 | -| performance/h-10-01 | 329 | -| performance/h-10-02 | 329 | -| performance/h-10-03 | 329 | +| performance/01_mm1 | 309 | +| performance/01_mm2 | 309 | +| performance/01_mm3 | 309 | +| performance/03_sort1 | 641 | +| performance/03_sort2 | 641 | +| performance/03_sort3 | 641 | +| performance/conv2d-1 | 656 | +| performance/conv2d-2 | 656 | +| performance/conv2d-3 | 656 | +| performance/crc1 | 279 | +| performance/crc2 | 279 | +| performance/crc3 | 279 | +| performance/crypto-1 | 1926 | +| performance/crypto-2 | 1926 | +| performance/crypto-3 | 1926 | +| performance/fft0 | 597 | +| performance/fft1 | 597 | +| performance/fft2 | 597 | +| performance/h-1-01 | 157 | +| performance/h-1-02 | 157 | +| performance/h-1-03 | 157 | +| performance/h-10-01 | 328 | +| performance/h-10-02 | 328 | +| performance/h-10-03 | 328 | | performance/h-4-01 | 163 | | performance/h-4-02 | 163 | | performance/h-4-03 | 163 | -| performance/h-5-01 | 352 | -| performance/h-5-02 | 352 | -| performance/h-5-03 | 352 | -| performance/h-8-01 | 407 | -| performance/h-8-02 | 407 | -| performance/h-8-03 | 407 | -| performance/h-9-01 | 227 | -| performance/h-9-02 | 227 | -| performance/h-9-03 | 227 | -| performance/huffman-01 | 829 | -| performance/huffman-02 | 829 | -| performance/huffman-03 | 829 | +| performance/h-5-01 | 341 | +| performance/h-5-02 | 341 | +| performance/h-5-03 | 341 | +| performance/h-8-01 | 411 | +| performance/h-8-02 | 411 | +| performance/h-8-03 | 411 | +| performance/h-9-01 | 224 | +| performance/h-9-02 | 224 | +| performance/h-9-03 | 224 | +| performance/huffman-01 | 792 | +| performance/huffman-02 | 792 | +| performance/huffman-03 | 792 | | performance/knapsack_naive-1 | 167 | | performance/knapsack_naive-2 | 167 | | performance/knapsack_naive-3 | 167 | -| performance/many_mat_cal-1 | 432 | -| performance/many_mat_cal-2 | 432 | -| performance/many_mat_cal-3 | 432 | -| performance/matmul1 | 366 | -| performance/matmul2 | 366 | -| performance/matmul3 | 366 | -| performance/optimization_scheduling1 | 122 | -| performance/optimization_scheduling2 | 122 | -| performance/optimization_scheduling3 | 122 | -| performance/shuffle0 | 472 | -| performance/shuffle1 | 472 | -| performance/shuffle2 | 472 | -| performance/sl1 | 264 | -| performance/sl2 | 264 | -| performance/sl3 | 264 | -| performance/transpose0 | 207 | -| performance/transpose1 | 207 | -| performance/transpose2 | 207 | +| performance/many_mat_cal-1 | 434 | +| performance/many_mat_cal-2 | 434 | +| performance/many_mat_cal-3 | 434 | +| performance/matmul1 | 379 | +| performance/matmul2 | 379 | +| performance/matmul3 | 379 | +| performance/optimization_scheduling1 | 116 | +| performance/optimization_scheduling2 | 116 | +| performance/optimization_scheduling3 | 116 | +| performance/shuffle0 | 471 | +| performance/shuffle1 | 471 | +| performance/shuffle2 | 471 | +| performance/sl1 | 261 | +| performance/sl2 | 261 | +| performance/sl3 | 261 | +| performance/transpose0 | 204 | +| performance/transpose1 | 204 | +| performance/transpose2 | 204 | ## 统计