You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
cbmc/codedetect/.github/workflows/quality-gates.yml

536 lines
18 KiB

name: Quality Gates
on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main, develop ]
env:
PYTHON_VERSION: '3.11'
jobs:
test-coverage:
name: Test Coverage
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install -r requirements-dev.txt
pip install coverage pytest-cov
- name: Run tests with coverage
run: |
chmod +x scripts/run_tests.sh
./scripts/run_tests.sh --verbose --coverage unit integration regression
- name: Generate coverage report
run: |
coverage xml
coverage html
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
file: ./coverage.xml
flags: unittests
name: codecov-umbrella
- name: Check coverage thresholds
run: |
TOTAL_COVERAGE=$(coverage report --show-missing | grep TOTAL | awk '{print $4}' | sed 's/%//')
echo "Total coverage: ${TOTAL_COVERAGE}%"
if [ "$TOTAL_COVERAGE" -lt 80 ]; then
echo "❌ Coverage below 80% threshold"
exit 1
fi
echo "✅ Coverage meets 80% threshold"
- name: Upload coverage report
uses: actions/upload-artifact@v3
with:
name: coverage-report
path: |
coverage.xml
htmlcov/
performance-benchmarks:
name: Performance Benchmarks
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install -r requirements-dev.txt
pip install matplotlib psutil
- name: Run performance benchmarks
run: |
chmod +x tools/run_benchmarks.py
python tools/run_benchmarks.py --output-dir benchmark_results --iterations 5
- name: Compare with baseline
run: |
# Download baseline results (if available)
curl -s https://raw.githubusercontent.com/codedetect/codedetect/main/benchmark_baseline.json -o baseline.json || true
if [ -f "baseline.json" ] && [ -f "benchmark_results/complete_benchmark_suite.json" ]; then
echo "📊 Comparing with baseline performance..."
python3 << 'EOF'
import json
import sys
# Load baseline and current results
with open('baseline.json', 'r') as f:
baseline = json.load(f)
with open('benchmark_results/complete_benchmark_suite.json', 'r') as f:
current = json.load(f)
# Define performance thresholds
THRESHOLDS = {
'parsing_time': 1.2, # 20% increase allowed
'verification_time': 1.15, # 15% increase allowed
'mutation_time': 1.1, # 10% increase allowed
'memory_usage': 1.25 # 25% increase allowed
}
# Compare results
regressions = []
for result in current['results']:
if result['category'] in ['parsing', 'verification', 'mutation']:
metric_name = f"{result['category']}_time"
if metric_name in THRESHOLDS:
# Find corresponding baseline result
baseline_result = None
for br in baseline['results']:
if br['name'] == result['name']:
baseline_result = br
break
if baseline_result:
baseline_value = baseline_result['value']
current_value = result['value']
threshold = THRESHOLDS[metric_name]
if current_value > baseline_value * threshold:
regressions.append({
'metric': metric_name,
'baseline': baseline_value,
'current': current_value,
'threshold': threshold,
'degradation': (current_value - baseline_value) / baseline_value * 100
})
if regressions:
print("❌ Performance regressions detected:")
for regression in regressions:
print(f" {regression['metric']}: {regression['degradation']:.1f}% degradation")
print(f" Baseline: {regression['baseline']:.4f}")
print(f" Current: {regression['current']:.4f}")
print(f" Threshold: {regression['threshold'] * 100:.0f}% increase allowed")
sys.exit(1)
else:
print("✅ No performance regressions detected")
EOF
else
echo "⚠️ No baseline available for comparison"
fi
- name: Upload benchmark results
uses: actions/upload-artifact@v3
with:
name: benchmark-results
path: benchmark_results/
security-scan:
name: Security Scan
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install bandit safety
- name: Run bandit security scan
run: |
bandit -r src/ -f json -o bandit-report.json
python3 << 'EOF'
import json
import sys
with open('bandit-report.json', 'r') as f:
results = json.load(f)
# Define severity thresholds
HIGH_SEVERITY_THRESHOLD = 0
MEDIUM_SEVERITY_THRESHOLD = 2
LOW_SEVERITY_THRESHOLD = 5
high_severity = [r for r in results['results'] if r['issue_severity'] == 'HIGH']
medium_severity = [r for r in results['results'] if r['issue_severity'] == 'MEDIUM']
low_severity = [r for r in results['results'] if r['issue_severity'] == 'LOW']
print(f"🔍 Security scan results:")
print(f" High severity: {len(high_severity)}")
print(f" Medium severity: {len(medium_severity)}")
print(f" Low severity: {len(low_severity)}")
if len(high_severity) > HIGH_SEVERITY_THRESHOLD:
print(f"❌ High severity issues exceed threshold ({HIGH_SEVERITY_THRESHOLD})")
for issue in high_severity:
print(f" - {issue['test_name']}: {issue['issue_text']}")
sys.exit(1)
if len(medium_severity) > MEDIUM_SEVERITY_THRESHOLD:
print(f"⚠️ Medium severity issues exceed threshold ({MEDIUM_SEVERITY_THRESHOLD})")
# Don't exit, just warn
if len(low_severity) > LOW_SEVERITY_THRESHOLD:
print(f"⚠️ Low severity issues exceed threshold ({LOW_SEVERITY_THRESHOLD})")
# Don't exit, just warn
print("✅ Security scan passed")
EOF
- name: Check dependencies for known vulnerabilities
run: |
safety check --json --output safety-report.json || true
python3 << 'EOF'
import json
import sys
try:
with open('safety-report.json', 'r') as f:
results = json.load(f)
except FileNotFoundError:
print("⚠️ No safety report generated")
sys.exit(0)
if results:
print(f"🔍 Found {len(results)} known vulnerabilities in dependencies:")
for vuln in results:
print(f" - {vuln['id']}: {vuln['advisory']}")
print(f" Package: {vuln['package']}")
print(f" Version: {vuln['installed_version']}")
print(f" Fixed in: {vuln['fixed_version']}")
# Allow only low severity vulnerabilities
critical_high_vulns = [v for v in results if v['severity'] in ['CRITICAL', 'HIGH']]
if critical_high_vulns:
print(f"❌ Critical/High severity vulnerabilities found: {len(critical_high_vulns)}")
sys.exit(1)
else:
print("✅ No critical/high severity vulnerabilities found")
else:
print("✅ No known vulnerabilities found")
EOF
- name: Upload security reports
uses: actions/upload-artifact@v3
with:
name: security-reports
path: |
bandit-report.json
safety-report.json
code-quality:
name: Code Quality
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements-dev.txt
- name: Run flake8
run: |
flake8 src/ tests/ --max-line-length=120 --exclude=__pycache__ --statistics
- name: Check code formatting
run: |
black --check --diff src/ tests/ || {
echo "❌ Code formatting issues found"
echo "Run 'black src/ tests/' to fix formatting"
exit 1
}
- name: Check import sorting
run: |
isort --check-only --diff src/ tests/ || {
echo "❌ Import sorting issues found"
echo "Run 'isort src/ tests/' to fix imports"
exit 1
}
- name: Check for TODO comments
run: |
# Count TODO comments (excluding test files)
TODO_COUNT=$(grep -r "TODO\|FIXME" src/ --include="*.py" | grep -v test | wc -l)
echo "Found $TODO_COUNT TODO/FIXME comments"
if [ "$TODO_COUNT" -gt 10 ]; then
echo "⚠️ High number of TODO comments: $TODO_COUNT"
# Don't fail, just warn
fi
- name: Check code complexity
run: |
pip install radon
radon cc src/ -a -nb --show-complexity
python3 << 'EOF'
import subprocess
import sys
# Run radon and check complexity
result = subprocess.run(['radon', 'cc', 'src/', '-a', '-nb'],
capture_output=True, text=True)
# Parse results
lines = result.stdout.strip().split('\n')
high_complexity = []
for line in lines:
if line.strip() and not line.startswith('Average'):
parts = line.split()
if len(parts) >= 2:
try:
complexity = int(parts[1])
if complexity > 10: # Cyclomatic complexity threshold
high_complexity.append((parts[0], complexity))
except ValueError:
continue
if high_complexity:
print("⚠️ High complexity functions found:")
for func_name, complexity in high_complexity:
print(f" {func_name}: {complexity}")
# Allow up to 5 high complexity functions
if len(high_complexity) > 5:
print("❌ Too many high complexity functions")
sys.exit(1)
print("✅ Code complexity check passed")
EOF
integration-test-coverage:
name: Integration Test Coverage
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install -r requirements-dev.txt
pip install coverage
- name: Run integration tests with coverage
run: |
chmod +x scripts/run_tests.sh
./scripts/run_tests.sh --verbose --coverage integration
- name: Check integration test coverage
run: |
python3 << 'EOF'
import json
import sys
# Parse coverage report
with open('coverage.xml', 'r') as f:
coverage_data = f.read()
# Extract line coverage (simplified parsing)
import re
line_rate_match = re.search(r'line-rate="([^"]+)"', coverage_data)
if line_rate_match:
line_rate = float(line_rate_match.group(1))
coverage_percentage = line_rate * 100
print(f"Integration test coverage: {coverage_percentage:.1f}%")
if coverage_percentage < 70:
print("❌ Integration test coverage below 70% threshold")
sys.exit(1)
print("✅ Integration test coverage meets threshold")
else:
print("⚠️ Could not parse coverage report")
EOF
documentation-coverage:
name: Documentation Coverage
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements-dev.txt
pip install pydoc-markdown
- name: Generate API documentation
run: |
chmod +x scripts/generate_api_docs.py
python scripts/generate_api_docs.py --output-dir docs/api
- name: Check documentation coverage
run: |
python3 << 'EOF'
import os
import ast
sys
from pathlib import Path
def check_documentation_coverage():
src_dir = Path('src')
documented_functions = 0
total_functions = 0
for py_file in src_dir.rglob('*.py'):
if '__pycache__' in str(py_file):
continue
try:
with open(py_file, 'r', encoding='utf-8') as f:
content = f.read()
tree = ast.parse(content)
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef):
total_functions += 1
docstring = ast.get_docstring(node)
if docstring and docstring.strip():
documented_functions += 1
except Exception as e:
print(f"Error parsing {py_file}: {e}")
continue
if total_functions == 0:
print("⚠️ No functions found to check documentation")
return
coverage = (documented_functions / total_functions) * 100
print(f"Documentation coverage: {coverage:.1f}% ({documented_functions}/{total_functions})")
if coverage < 60:
print("❌ Documentation coverage below 60% threshold")
sys.exit(1)
print("✅ Documentation coverage meets threshold")
check_documentation_coverage()
EOF
quality-summary:
name: Quality Summary
runs-on: ubuntu-latest
needs: [test-coverage, performance-benchmarks, security-scan, code-quality, integration-test-coverage, documentation-coverage]
if: always()
steps:
- name: Generate quality summary
run: |
echo "# 📊 Quality Gates Summary" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
# Test Coverage
echo "## ✅ Test Coverage" >> $GITHUB_STEP_SUMMARY
echo "- Unit tests: $(if [ "${{ needs.test-coverage.result }}" = "success" ]; then echo "Passed (>80%)"; else echo "Failed"; fi)" >> $GITHUB_STEP_SUMMARY
echo "- Integration tests: $(if [ "${{ needs.integration-test-coverage.result }}" = "success" ]; then echo "Passed (>70%)"; else echo "Failed"; fi)" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
# Performance
echo "## ✅ Performance Benchmarks" >> $GITHUB_STEP_SUMMARY
echo "- Performance regression: $(if [ "${{ needs.performance-benchmarks.result }}" = "success" ]; then echo "No regressions"; else echo "Regressions detected"; fi)" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
# Security
echo "## ✅ Security Scan" >> $GITHUB_STEP_SUMMARY
echo "- Security vulnerabilities: $(if [ "${{ needs.security-scan.result }}" = "success" ]; then echo "No critical issues"; else echo "Issues found"; fi)" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
# Code Quality
echo "## ✅ Code Quality" >> $GITHUB_STEP_SUMMARY
echo "- Code formatting: $(if [ "${{ needs.code-quality.result }}" = "success" ]; then echo "Passed"; else echo "Failed"; fi)" >> $GITHUB_STEP_SUMMARY
echo "- Documentation coverage: $(if [ "${{ needs.documentation-coverage.result }}" = "success" ]; then echo "Passed (>60%)"; else echo "Failed"; fi)" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
# Overall status
failed_jobs=0
for job in test-coverage performance-benchmarks security-scan code-quality integration-test-coverage documentation-coverage; do
if [ "${{ needs.$job.result }}" != "success" ]; then
failed_jobs=$((failed_jobs + 1))
fi
done
if [ $failed_jobs -eq 0 ]; then
echo "## 🎉 All Quality Gates Passed!" >> $GITHUB_STEP_SUMMARY
else
echo "## ❌ $failed_jobs Quality Gate(s) Failed" >> $GITHUB_STEP_SUMMARY
fi
echo "" >> $GITHUB_STEP_SUMMARY
echo "*Generated on $(date)*" >> $GITHUB_STEP_SUMMARY