cbmc/codedetect/.github/workflows/quality-gates.yml

name: Quality Gates

on:
  push:
    branches: [ main, develop ]
  pull_request:
    branches: [ main, develop ]

env:
  PYTHON_VERSION: '3.11'

jobs:
  test-coverage:
    name: Test Coverage
    runs-on: ubuntu-latest

    steps:
    - name: Checkout code
      uses: actions/checkout@v4

    - name: Set up Python
      uses: actions/setup-python@v4
      with:
        python-version: ${{ env.PYTHON_VERSION }}

    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
        pip install -r requirements.txt
        pip install -r requirements-dev.txt
        pip install coverage pytest-cov

    - name: Run tests with coverage
      run: |
        chmod +x scripts/run_tests.sh
        ./scripts/run_tests.sh --verbose --coverage unit integration regression

    - name: Generate coverage report
      run: |
        coverage xml
        coverage html

    - name: Upload coverage to Codecov
      uses: codecov/codecov-action@v3
      with:
        file: ./coverage.xml
        flags: unittests
        name: codecov-umbrella

    - name: Check coverage thresholds
      run: |
        TOTAL_COVERAGE=$(coverage report --show-missing | grep TOTAL | awk '{print $4}' | sed 's/%//')
        echo "Total coverage: ${TOTAL_COVERAGE}%"

        if [ "$TOTAL_COVERAGE" -lt 80 ]; then
          echo "❌ Coverage below 80% threshold"
          exit 1
        fi

        echo "✅ Coverage meets 80% threshold"

    - name: Upload coverage report
      uses: actions/upload-artifact@v3
      with:
        name: coverage-report
        path: |
          coverage.xml
          htmlcov/

  performance-benchmarks:
    name: Performance Benchmarks
    runs-on: ubuntu-latest

    steps:
    - name: Checkout code
      uses: actions/checkout@v4

    - name: Set up Python
      uses: actions/setup-python@v4
      with:
        python-version: ${{ env.PYTHON_VERSION }}

    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
        pip install -r requirements.txt
        pip install -r requirements-dev.txt
        pip install matplotlib psutil

    - name: Run performance benchmarks
      run: |
        chmod +x tools/run_benchmarks.py
        python tools/run_benchmarks.py --output-dir benchmark_results --iterations 5

    - name: Compare with baseline
      run: |
        # Download baseline results (if available)
        curl -s https://raw.githubusercontent.com/codedetect/codedetect/main/benchmark_baseline.json -o baseline.json || true

        if [ -f "baseline.json" ] && [ -f "benchmark_results/complete_benchmark_suite.json" ]; then
          echo "📊 Comparing with baseline performance..."
          python3 << 'EOF'
          import json
          import sys

          # Load baseline and current results
          with open('baseline.json', 'r') as f:
              baseline = json.load(f)

          with open('benchmark_results/complete_benchmark_suite.json', 'r') as f:
              current = json.load(f)

          # Define performance thresholds
          THRESHOLDS = {
              'parsing_time': 1.2,  # 20% increase allowed
              'verification_time': 1.15,  # 15% increase allowed
              'mutation_time': 1.1,  # 10% increase allowed
              'memory_usage': 1.25  # 25% increase allowed
          }

          # Compare results
          regressions = []
          for result in current['results']:
              if result['category'] in ['parsing', 'verification', 'mutation']:
                  metric_name = f"{result['category']}_time"
                  if metric_name in THRESHOLDS:
                      # Find corresponding baseline result
                      baseline_result = None
                      for br in baseline['results']:
                          if br['name'] == result['name']:
                              baseline_result = br
                              break

                      if baseline_result:
                          baseline_value = baseline_result['value']
                          current_value = result['value']
                          threshold = THRESHOLDS[metric_name]

                          if current_value > baseline_value * threshold:
                              regressions.append({
                                  'metric': metric_name,
                                  'baseline': baseline_value,
                                  'current': current_value,
                                  'threshold': threshold,
                                  'degradation': (current_value - baseline_value) / baseline_value * 100
                              })

          if regressions:
              print("❌ Performance regressions detected:")
              for regression in regressions:
                  print(f"  {regression['metric']}: {regression['degradation']:.1f}% degradation")
                  print(f"    Baseline: {regression['baseline']:.4f}")
                  print(f"    Current: {regression['current']:.4f}")
                  print(f"    Threshold: {regression['threshold'] * 100:.0f}% increase allowed")
              sys.exit(1)
          else:
              print("✅ No performance regressions detected")

          EOF
        else
          echo "⚠️  No baseline available for comparison"
        fi

    - name: Upload benchmark results
      uses: actions/upload-artifact@v3
      with:
        name: benchmark-results
        path: benchmark_results/

  security-scan:
    name: Security Scan
    runs-on: ubuntu-latest

    steps:
    - name: Checkout code
      uses: actions/checkout@v4

    - name: Set up Python
      uses: actions/setup-python@v4
      with:
        python-version: ${{ env.PYTHON_VERSION }}

    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
        pip install -r requirements.txt
        pip install bandit safety

    - name: Run bandit security scan
      run: |
        bandit -r src/ -f json -o bandit-report.json
        python3 << 'EOF'
        import json
        import sys

        with open('bandit-report.json', 'r') as f:
            results = json.load(f)

        # Define severity thresholds
        HIGH_SEVERITY_THRESHOLD = 0
        MEDIUM_SEVERITY_THRESHOLD = 2
        LOW_SEVERITY_THRESHOLD = 5

        high_severity = [r for r in results['results'] if r['issue_severity'] == 'HIGH']
        medium_severity = [r for r in results['results'] if r['issue_severity'] == 'MEDIUM']
        low_severity = [r for r in results['results'] if r['issue_severity'] == 'LOW']

        print(f"🔍 Security scan results:")
        print(f"  High severity: {len(high_severity)}")
        print(f"  Medium severity: {len(medium_severity)}")
        print(f"  Low severity: {len(low_severity)}")

        if len(high_severity) > HIGH_SEVERITY_THRESHOLD:
            print(f"❌ High severity issues exceed threshold ({HIGH_SEVERITY_THRESHOLD})")
            for issue in high_severity:
                print(f"  - {issue['test_name']}: {issue['issue_text']}")
            sys.exit(1)

        if len(medium_severity) > MEDIUM_SEVERITY_THRESHOLD:
            print(f"⚠️  Medium severity issues exceed threshold ({MEDIUM_SEVERITY_THRESHOLD})")
            # Don't exit, just warn

        if len(low_severity) > LOW_SEVERITY_THRESHOLD:
            print(f"⚠️  Low severity issues exceed threshold ({LOW_SEVERITY_THRESHOLD})")
            # Don't exit, just warn

        print("✅ Security scan passed")
        EOF

    - name: Check dependencies for known vulnerabilities
      run: |
        safety check --json --output safety-report.json || true
        python3 << 'EOF'
        import json
        import sys

        try:
            with open('safety-report.json', 'r') as f:
                results = json.load(f)
        except FileNotFoundError:
            print("⚠️  No safety report generated")
            sys.exit(0)

        if results:
            print(f"🔍 Found {len(results)} known vulnerabilities in dependencies:")
            for vuln in results:
                print(f"  - {vuln['id']}: {vuln['advisory']}")
                print(f"    Package: {vuln['package']}")
                print(f"    Version: {vuln['installed_version']}")
                print(f"    Fixed in: {vuln['fixed_version']}")

            # Allow only low severity vulnerabilities
            critical_high_vulns = [v for v in results if v['severity'] in ['CRITICAL', 'HIGH']]
            if critical_high_vulns:
                print(f"❌ Critical/High severity vulnerabilities found: {len(critical_high_vulns)}")
                sys.exit(1)
            else:
                print("✅ No critical/high severity vulnerabilities found")
        else:
            print("✅ No known vulnerabilities found")
        EOF

    - name: Upload security reports
      uses: actions/upload-artifact@v3
      with:
        name: security-reports
        path: |
          bandit-report.json
          safety-report.json

  code-quality:
    name: Code Quality
    runs-on: ubuntu-latest

    steps:
    - name: Checkout code
      uses: actions/checkout@v4

    - name: Set up Python
      uses: actions/setup-python@v4
      with:
        python-version: ${{ env.PYTHON_VERSION }}

    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
        pip install -r requirements-dev.txt

    - name: Run flake8
      run: |
        flake8 src/ tests/ --max-line-length=120 --exclude=__pycache__ --statistics

    - name: Check code formatting
      run: |
        black --check --diff src/ tests/ || {
          echo "❌ Code formatting issues found"
          echo "Run 'black src/ tests/' to fix formatting"
          exit 1
        }

    - name: Check import sorting
      run: |
        isort --check-only --diff src/ tests/ || {
          echo "❌ Import sorting issues found"
          echo "Run 'isort src/ tests/' to fix imports"
          exit 1
        }

    - name: Check for TODO comments
      run: |
        # Count TODO comments (excluding test files)
        TODO_COUNT=$(grep -r "TODO\|FIXME" src/ --include="*.py" | grep -v test | wc -l)
        echo "Found $TODO_COUNT TODO/FIXME comments"

        if [ "$TODO_COUNT" -gt 10 ]; then
          echo "⚠️  High number of TODO comments: $TODO_COUNT"
          # Don't fail, just warn
        fi

    - name: Check code complexity
      run: |
        pip install radon
        radon cc src/ -a -nb --show-complexity
        python3 << 'EOF'
        import subprocess
        import sys

        # Run radon and check complexity
        result = subprocess.run(['radon', 'cc', 'src/', '-a', '-nb'],
                              capture_output=True, text=True)

        # Parse results
        lines = result.stdout.strip().split('\n')
        high_complexity = []

        for line in lines:
            if line.strip() and not line.startswith('Average'):
                parts = line.split()
                if len(parts) >= 2:
                    try:
                        complexity = int(parts[1])
                        if complexity > 10:  # Cyclomatic complexity threshold
                            high_complexity.append((parts[0], complexity))
                    except ValueError:
                        continue

        if high_complexity:
            print("⚠️  High complexity functions found:")
            for func_name, complexity in high_complexity:
                print(f"  {func_name}: {complexity}")

            # Allow up to 5 high complexity functions
            if len(high_complexity) > 5:
                print("❌ Too many high complexity functions")
                sys.exit(1)

        print("✅ Code complexity check passed")
        EOF

  integration-test-coverage:
    name: Integration Test Coverage
    runs-on: ubuntu-latest

    steps:
    - name: Checkout code
      uses: actions/checkout@v4

    - name: Set up Python
      uses: actions/setup-python@v4
      with:
        python-version: ${{ env.PYTHON_VERSION }}

    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
        pip install -r requirements.txt
        pip install -r requirements-dev.txt
        pip install coverage

    - name: Run integration tests with coverage
      run: |
        chmod +x scripts/run_tests.sh
        ./scripts/run_tests.sh --verbose --coverage integration

    - name: Check integration test coverage
      run: |
        python3 << 'EOF'
        import json
        import sys

        # Parse coverage report
        with open('coverage.xml', 'r') as f:
            coverage_data = f.read()

        # Extract line coverage (simplified parsing)
        import re
        line_rate_match = re.search(r'line-rate="([^"]+)"', coverage_data)
        if line_rate_match:
            line_rate = float(line_rate_match.group(1))
            coverage_percentage = line_rate * 100

            print(f"Integration test coverage: {coverage_percentage:.1f}%")

            if coverage_percentage < 70:
                print("❌ Integration test coverage below 70% threshold")
                sys.exit(1)

            print("✅ Integration test coverage meets threshold")
        else:
            print("⚠️  Could not parse coverage report")
        EOF

  documentation-coverage:
    name: Documentation Coverage
    runs-on: ubuntu-latest

    steps:
    - name: Checkout code
      uses: actions/checkout@v4

    - name: Set up Python
      uses: actions/setup-python@v4
      with:
        python-version: ${{ env.PYTHON_VERSION }}

    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
        pip install -r requirements-dev.txt
        pip install pydoc-markdown

    - name: Generate API documentation
      run: |
        chmod +x scripts/generate_api_docs.py
        python scripts/generate_api_docs.py --output-dir docs/api

    - name: Check documentation coverage
      run: |
        python3 << 'EOF'
        import os
        import ast
        sys
        from pathlib import Path

        def check_documentation_coverage():
            src_dir = Path('src')
            documented_functions = 0
            total_functions = 0

            for py_file in src_dir.rglob('*.py'):
                if '__pycache__' in str(py_file):
                    continue

                try:
                    with open(py_file, 'r', encoding='utf-8') as f:
                        content = f.read()

                    tree = ast.parse(content)

                    for node in ast.walk(tree):
                        if isinstance(node, ast.FunctionDef):
                            total_functions += 1
                            docstring = ast.get_docstring(node)
                            if docstring and docstring.strip():
                                documented_functions += 1

                except Exception as e:
                    print(f"Error parsing {py_file}: {e}")
                    continue

            if total_functions == 0:
                print("⚠️  No functions found to check documentation")
                return

            coverage = (documented_functions / total_functions) * 100
            print(f"Documentation coverage: {coverage:.1f}% ({documented_functions}/{total_functions})")

            if coverage < 60:
                print("❌ Documentation coverage below 60% threshold")
                sys.exit(1)

            print("✅ Documentation coverage meets threshold")

        check_documentation_coverage()
        EOF

  quality-summary:
    name: Quality Summary
    runs-on: ubuntu-latest
    needs: [test-coverage, performance-benchmarks, security-scan, code-quality, integration-test-coverage, documentation-coverage]
    if: always()

    steps:
    - name: Generate quality summary
      run: |
        echo "# 📊 Quality Gates Summary" >> $GITHUB_STEP_SUMMARY
        echo "" >> $GITHUB_STEP_SUMMARY

        # Test Coverage
        echo "## ✅ Test Coverage" >> $GITHUB_STEP_SUMMARY
        echo "- Unit tests: $(if [ "${{ needs.test-coverage.result }}" = "success" ]; then echo "Passed (>80%)"; else echo "Failed"; fi)" >> $GITHUB_STEP_SUMMARY
        echo "- Integration tests: $(if [ "${{ needs.integration-test-coverage.result }}" = "success" ]; then echo "Passed (>70%)"; else echo "Failed"; fi)" >> $GITHUB_STEP_SUMMARY
        echo "" >> $GITHUB_STEP_SUMMARY

        # Performance
        echo "## ✅ Performance Benchmarks" >> $GITHUB_STEP_SUMMARY
        echo "- Performance regression: $(if [ "${{ needs.performance-benchmarks.result }}" = "success" ]; then echo "No regressions"; else echo "Regressions detected"; fi)" >> $GITHUB_STEP_SUMMARY
        echo "" >> $GITHUB_STEP_SUMMARY

        # Security
        echo "## ✅ Security Scan" >> $GITHUB_STEP_SUMMARY
        echo "- Security vulnerabilities: $(if [ "${{ needs.security-scan.result }}" = "success" ]; then echo "No critical issues"; else echo "Issues found"; fi)" >> $GITHUB_STEP_SUMMARY
        echo "" >> $GITHUB_STEP_SUMMARY

        # Code Quality
        echo "## ✅ Code Quality" >> $GITHUB_STEP_SUMMARY
        echo "- Code formatting: $(if [ "${{ needs.code-quality.result }}" = "success" ]; then echo "Passed"; else echo "Failed"; fi)" >> $GITHUB_STEP_SUMMARY
        echo "- Documentation coverage: $(if [ "${{ needs.documentation-coverage.result }}" = "success" ]; then echo "Passed (>60%)"; else echo "Failed"; fi)" >> $GITHUB_STEP_SUMMARY
        echo "" >> $GITHUB_STEP_SUMMARY

        # Overall status
        failed_jobs=0
        for job in test-coverage performance-benchmarks security-scan code-quality integration-test-coverage documentation-coverage; do
          if [ "${{ needs.$job.result }}" != "success" ]; then
            failed_jobs=$((failed_jobs + 1))
          fi
        done

        if [ $failed_jobs -eq 0 ]; then
          echo "## 🎉 All Quality Gates Passed!" >> $GITHUB_STEP_SUMMARY
        else
          echo "## ❌ $failed_jobs Quality Gate(s) Failed" >> $GITHUB_STEP_SUMMARY
        fi

        echo "" >> $GITHUB_STEP_SUMMARY
        echo "*Generated on $(date)*" >> $GITHUB_STEP_SUMMARY