code-analysis/Report-Generation/cppcheck_test_generator/main.py

"""
主程序入口
"""
import argparse
import sys
from pathlib import Path
from typing import List, Set

from .models import CppcheckIssue
from .parsers import parse_cppcheck_xml, parse_cppcheck_text
from .analysis import (
    analyze_project_structure,
    filter_and_clean_issues,
    write_cleaned_report,
    get_enhanced_issue_analysis
)
from .generation import (
    generate_test_for_issue,
    smart_select_issues,
    write_issue_output
)
from .verification import (
    auto_verify_tests,
    generate_verification_report,
    generate_json_report
)


def main(argv: list[str]) -> int:
    parser = argparse.ArgumentParser(description="根据 cppcheck XML 与源码生成可运行的 C++ 复现用例")
    parser.add_argument("report", help="cppcheck 报告路径：支持 XML（--xml）或文本日志（自动识别或 --text）")
    parser.add_argument("--out", default="cppcheck_tests", help="输出目录，默认 cppcheck_tests")
    parser.add_argument("--model", default="deepseek-chat", help="模型名称，默认 deepseek-chat")
    parser.add_argument("--emit-runner", action="store_true", help="为每个用例生成一键编译运行的 PowerShell 脚本")
    parser.add_argument("--text", action="store_true", help="强制按文本日志格式解析")
    parser.add_argument("--xml", action="store_true", help="强制按 XML 格式解析")
    parser.add_argument("--max", type=int, default=10, help="最多处理前 N 条问题（默认 10，设为 0 表示不限）")
    parser.add_argument(
        "--severities",
        default="warning,error",
        help="过滤等级，逗号分隔（如 warning,error,information,note；默认 warning,error）",
    )
    parser.add_argument(
        "--include-ids",
        default="",
        help="仅包含这些 ruleId（逗号分隔，留空表示不限）",
    )
    parser.add_argument(
        "--exclude-ids",
        default="missingInclude,missingIncludeSystem,toomanyconfigs,normalCheckLevelMaxBranches,checkLevelNormal,unknown",
        help="排除这些 ruleId（逗号分隔，默认排除若干低价值项）",
    )
    parser.add_argument(
        "--smart-select",
        action="store_true",
        help="使用AI智能选择最有代表性的测试用例（推荐用于大量问题）",
    )
    parser.add_argument(
        "--smart-max",
        type=int,
        default=10,
        help="智能选择模式下的最大测试用例数量（默认10）",
    )
    parser.add_argument(
        "--auto-verify",
        action="store_true",
        help="生成测试用例后自动运行验证并生成结果报告",
    )
    parser.add_argument(
        "--verify-timeout",
        type=int,
        default=30,
        help="验证超时时间（秒，默认30）",
    )
    parser.add_argument(
        "--verify-tests",
        action="store_true",
        help="生成测试用例时立即验证每个测试用例的有效性",
    )
    parser.add_argument(
        "--use-templates",
        action="store_true",
        help="使用预定义的测试用例模板，确保能有效触发cppcheck检测",
    )
    parser.add_argument(
        "--project-root",
        help="原始项目根目录路径（用于包含头文件和依赖）",
    )
    parser.add_argument(
        "--include-dirs",
        help="额外的头文件包含目录（逗号分隔）",
    )
    parser.add_argument(
        "--integration-test",
        action="store_true",
        help="生成集成测试用例（需要原始项目）",
    )
    parser.add_argument(
        "--enhanced-analysis",
        action="store_true",
        help="启用增强分析模式，基于代码上下文和项目结构进行智能筛选",
    )
    parser.add_argument(
        "--clean-report",
        action="store_true",
        help="生成清理后的cppcheck报告文件，过滤掉不可靠的问题",
    )
    parser.add_argument(
        "--cleaned-report",
        help="使用已清理的报告文件（跳过问题过滤步骤）",
    )
    args = parser.parse_args(argv)

    # 处理报告文件路径
    if args.cleaned_report:
        # 使用已清理的报告文件
        report_path = Path(args.cleaned_report).expanduser().resolve()
        if not report_path.exists():
            raise SystemExit(f"找不到已清理的报告文件: {report_path}")
        print(f"使用已清理的报告文件: {report_path}")
    else:
        # 使用原始报告文件
        report_path = Path(args.report).expanduser().resolve()
        if not report_path.exists():
            raise SystemExit(f"找不到报告文件: {report_path}")

    # 解析报告文件
    issues: List[CppcheckIssue] = []
    if args.xml or (report_path.suffix.lower() in {".xml"} and not args.text):
        issues = parse_cppcheck_xml(report_path)
    else:
        issues = parse_cppcheck_text(report_path)

    print(f"原始报告包含 {len(issues)} 个问题")

    # 基本过滤：按严重级别、包含/排除的 ruleId、去重
    sev_set: Set[str] = {s.strip().lower() for s in (args.severities or "").split(",") if s.strip()}
    include_ids: Set[str] = {s.strip() for s in (args.include_ids or "").split(",") if s.strip()}
    exclude_ids: Set[str] = {s.strip() for s in (args.exclude_ids or "").split(",") if s.strip()}

    filtered: List[CppcheckIssue] = []
    seen: Set[tuple] = set()
    for iss in issues:
        if sev_set and iss.severity and iss.severity.lower() not in sev_set:
            continue
        if include_ids and iss.id not in include_ids:
            continue
        if exclude_ids and iss.id in exclude_ids:
            continue
        # 以 (id, first_file, first_line) 去重
        key = (iss.id, str(iss.locations[0].file_path) if iss.locations else "", iss.locations[0].line if iss.locations else None)
        if key in seen:
            continue
        seen.add(key)
        filtered.append(iss)

    print(f"基本过滤后剩余 {len(filtered)} 个问题")

    if not filtered:
        print("未在报告中发现问题项。")
        return 0

    # 处理项目上下文
    project_root = None
    include_dirs = []
    project_info = None

    if args.project_root:
        project_root = Path(args.project_root).expanduser().resolve()
        if not project_root.exists():
            print(f"警告: 项目根目录不存在: {project_root}")
            project_root = None
        else:
            print("正在分析项目结构...")
            project_info = analyze_project_structure(project_root)
            print(f"项目分析完成: 发现 {len(project_info['source_files'])} 个源文件, {len(project_info['header_files'])} 个头文件")

    if args.include_dirs:
        include_dirs = [d.strip() for d in args.include_dirs.split(",") if d.strip()]
        valid_include_dirs = []
        for include_dir in include_dirs:
            include_path = Path(include_dir).expanduser().resolve()
            if include_path.exists():
                valid_include_dirs.append(str(include_path))
            else:
                print(f"警告: 头文件目录不存在: {include_path}")
        include_dirs = valid_include_dirs

    # 问题过滤和清理
    if args.clean_report and not args.cleaned_report:
        print("\n" + "="*50)
        print("开始问题过滤和清理...")
        print("="*50)

        cleaned_issues = filter_and_clean_issues(filtered, project_info)

        # 生成清理后的报告文件
        cleaned_report_path = Path(args.out) / "cleaned_cppcheck_report.txt"
        write_cleaned_report(cleaned_issues, cleaned_report_path)

        print(f"\n清理完成！")
        print(f"原始问题数量: {len(issues)}")
        print(f"基本过滤后: {len(filtered)}")
        print(f"智能清理后: {len(cleaned_issues)}")
        print(f"清理后的报告已保存: {cleaned_report_path}")

        # 使用清理后的问题继续处理
        filtered = cleaned_issues
    elif args.enhanced_analysis:
        # 使用增强分析进行智能筛选
        print("\n" + "="*50)
        print("开始增强分析...")
        print("="*50)

        cleaned_issues = filter_and_clean_issues(filtered, project_info)
        filtered = cleaned_issues

    # 智能选择模式
    if args.smart_select or args.enhanced_analysis:
        if args.enhanced_analysis:
            print(f"启用增强分析模式，从 {len(filtered)} 个问题中选择最多 {args.smart_max} 个最有代表性的测试用例...")
        else:
            print(f"启用AI智能选择模式，从 {len(filtered)} 个问题中选择最多 {args.smart_max} 个最有代表性的测试用例...")
        issues = smart_select_issues(filtered, args.smart_max, args.model)
    else:
        # 传统模式：简单限制数量
        if args.max and args.max > 0:
            issues = filtered[: args.max]
        else:
            issues = filtered

    output_dir = Path(args.out).expanduser().resolve()

    # 为每个问题生成增强的测试用例
    for idx, issue in enumerate(issues, start=1):
        print(f"生成测试用例 {idx}/{len(issues)}: {issue.id}")

        # 获取增强的问题分析
        code_context, relevance_analysis = get_enhanced_issue_analysis(issue, project_info)

        # 显示分析结果
        print(f"  相关性分数: {relevance_analysis['relevance_score']}, 置信度: {relevance_analysis['confidence']}%")
        if code_context.function_name:
            print(f"  所在函数: {code_context.function_name}")
        if code_context.class_name:
            print(f"  所在类: {code_context.class_name}")

        # 使用AI生成模式（这是核心功能）
        content = generate_test_for_issue(
            issue,
            model=args.model,
            project_root=project_root,
            include_dirs=include_dirs,
            integration_test=args.integration_test,
            code_context=code_context,
            relevance_analysis=relevance_analysis
        )
        out_path = write_issue_output(output_dir, idx, issue, content, emit_runner=args.emit_runner, verify=args.verify_tests)
        print(f"  已生成: {out_path}")

    print(f"完成，共生成 {len(issues)} 条用例说明。")

    # 自动验证
    if args.auto_verify:
        print("\n" + "="*50)
        print("开始自动验证测试用例...")
        print("="*50)

        verification_results = auto_verify_tests(output_dir, args.verify_timeout, project_root, include_dirs)

        # 生成报告
        print("\n生成验证报告...")
        md_report = generate_verification_report(output_dir, verification_results)
        json_report = generate_json_report(output_dir, verification_results)

        print(f"Markdown报告: {md_report}")
        print(f"JSON报告: {json_report}")

        # 显示汇总
        summary = verification_results["summary"]
        print(f"\n验证汇总:")
        print(f"  总测试用例: {summary['total']}")
        print(f"  编译成功: {summary['compiled']}")
        print(f"  执行成功: {summary['executed']}")
        print(f"  漏洞确认: {summary['vulnerabilities_confirmed']}")
        print(f"  验证超时: {summary['timeouts']}")
        print(f"  验证错误: {summary['errors']}")

        # 显示确认的漏洞
        confirmed_vulns = [r for r in verification_results["results"] if r["vulnerability_confirmed"]]
        if confirmed_vulns:
            print(f"\n确认的漏洞 ({len(confirmed_vulns)} 个):")
            for result in confirmed_vulns:
                print(f"  ✓ {result['file']}: {result['vulnerability_type']}")
        else:
            print("\n未确认任何漏洞")

    return 0


if __name__ == "__main__":
    raise SystemExit(main(sys.argv[1:]))