You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
cbmc/codedetect/tests/fixtures/llm_responses.py

337 lines
11 KiB

#!/usr/bin/env python3
# LLM响应测试固件
from typing import List, Dict, Any
from dataclasses import dataclass
@dataclass
class LLMResponseFixture:
"""LLM响应测试固件"""
def __init__(self):
self.success_fixtures = self._load_success_fixtures()
self.failure_fixtures = self._load_failure_fixtures()
self.timeout_fixtures = self._load_timeout_fixtures()
self.error_fixtures = self._load_error_fixtures()
def _load_success_fixtures(self) -> List[Dict[str, Any]]:
"""加载成功案例固件"""
return [
{
"name": "simple_specification_generation",
"input": {
"function_name": "add",
"function_signature": "int add(int a, int b)",
"function_body": "return a + b;",
"context": {
"complexity": "simple",
"purpose": "arithmetic operation"
}
},
"output": {
"status": "success",
"specification": """
// Simple addition function specification
#include <assert.h>
void add_spec() {
int a, b;
__CPROVER_assume(a >= INT_MIN && a <= INT_MAX);
__CPROVER_assume(b >= INT_MIN && b <= INT_MAX);
int result = add(a, b);
// Basic properties of addition
assert(result >= a && result >= b);
assert(result <= INT_MAX);
assert(result >= INT_MIN);
// Commutative property
assert(add(a, b) == add(b, a));
}
""",
"confidence": 0.95,
"metadata": {
"specification_type": "functional",
"verification_properties": ["commutativity", "bounds"],
"estimated_verification_time": 2.0
}
}
},
{
"name": "array_specification_generation",
"input": {
"function_name": "array_sum",
"function_signature": "int array_sum(int* arr, int size)",
"function_body": """
if (arr == NULL || size <= 0) return 0;
int sum = 0;
for (int i = 0; i < size; i++) {
sum += arr[i];
}
return sum;
""",
"context": {
"complexity": "medium",
"purpose": "array processing"
}
},
"output": {
"status": "success",
"specification": """
// Array sum function specification
#include <assert.h>
void array_sum_spec() {
int size;
__CPROVER_assume(size >= 0 && size <= 100);
int* arr = malloc(size * sizeof(int));
__CPROVER_assume(arr != NULL || size == 0);
// Initialize array elements
for (int i = 0; i < size; i++) {
__CPROVER_assume(arr[i] >= INT_MIN && arr[i] <= INT_MAX);
}
int result = array_sum(arr, size);
// Properties of array sum
if (size == 0) {
assert(result == 0);
} else if (size == 1) {
assert(result == arr[0]);
}
// Bounds checking
assert(result >= size * INT_MIN);
assert(result <= size * INT_MAX);
free(arr);
}
""",
"confidence": 0.92,
"metadata": {
"specification_type": "functional",
"verification_properties": ["null_safety", "bounds", "memory_safety"],
"estimated_verification_time": 3.5
}
}
},
{
"name": "freertos_task_specification",
"input": {
"function_name": "vTaskFunction",
"function_signature": "void vTaskFunction(void* pvParameters)",
"function_body": """
TaskParameters_t* params = (TaskParameters_t*)pvParameters;
configASSERT(params != NULL);
configASSERT(params->task_id > 0);
while (1) {
vTaskDelay(pdMS_TO_TICKS(1000));
}
""",
"context": {
"complexity": "complex",
"purpose": "FreeRTOS task",
"framework": "FreeRTOS"
}
},
"output": {
"status": "success",
"specification": """
// FreeRTOS task specification
#include "FreeRTOS.h"
#include "task.h"
#include "assert.h"
void vTaskFunction_spec() {
TaskParameters_t* params = (TaskParameters_t*)__CPROVER_alloc_object(sizeof(TaskParameters_t));
__CPROVER_assume(params != NULL);
__CPROVER_assume(params->task_id > 0 && params->task_id <= MAX_TASK_ID);
// Call the task function
vTaskFunction(params);
// Verify task parameter validation
assert(params != NULL);
assert(params->task_id > 0);
// Verify task behavior (simplified for verification)
// In real FreeRTOS, we would verify scheduler behavior
assert(1); // Task should not crash
}
""",
"confidence": 0.88,
"metadata": {
"specification_type": "system",
"verification_properties": ["parameter_validation", "task_safety"],
"framework_requirements": ["FreeRTOS"],
"estimated_verification_time": 5.0
}
}
}
]
def _load_failure_fixtures(self) -> List[Dict[str, Any]]:
"""加载失败案例固件"""
return [
{
"name": "syntax_error_function",
"input": {
"function_name": "broken_function",
"function_signature": "int broken_function(",
"function_body": "return x; // Undefined variable",
"context": {
"complexity": "simple",
"purpose": "broken example"
}
},
"output": {
"status": "failure",
"error": "syntax_error",
"message": "Function signature is incomplete",
"details": {
"error_type": "parsing_error",
"line": 1,
"expected": "Complete function signature"
}
}
},
{
"name": "unsafe_function_rejection",
"input": {
"function_name": "unsafe_memory",
"function_signature": "char* unsafe_function()",
"function_body": "char buffer[10]; return buffer;", // Returning stack address
"context": {
"complexity": "medium",
"purpose": "memory manipulation"
}
},
"output": {
"status": "failure",
"error": "unsafe_pattern",
"message": "Function returns address of local variable",
"details": {
"error_type": "memory_safety_violation",
"pattern": "stack_address_return",
"severity": "high",
"suggestion": "Use dynamic allocation or modify function design"
}
}
}
]
def _load_timeout_fixtures(self) -> List[Dict[str, Any]]:
"""加载超时案例固件"""
return [
{
"name": "complex_function_timeout",
"input": {
"function_name": "complex_algorithm",
"function_signature": "int complex_algorithm(int* data, int size)",
"function_body": """
// Complex nested loops and recursion
for (int i = 0; i < size; i++) {
for (int j = 0; j < size; j++) {
for (int k = 0; k < size; k++) {
data[i] = data[j] + data[k];
}
}
}
return data[0];
""",
"context": {
"complexity": "very_high",
"purpose": "complex computation"
}
},
"output": {
"status": "timeout",
"error": "generation_timeout",
"message": "Specification generation timed out after 30 seconds",
"details": {
"timeout_duration": 30,
"reason": "high_complexity",
"suggestion": "Consider breaking down function or simplifying logic"
}
}
}
]
def _load_error_fixtures(self) -> List[Dict[str, Any]]:
"""加载错误案例固件"""
return [
{
"name": "api_error",
"input": {
"function_name": "test_function",
"function_signature": "void test_function()",
"function_body": "int x = 0;",
"context": {}
},
"output": {
"status": "error",
"error": "api_error",
"message": "LLM API request failed",
"details": {
"error_type": "connection_error",
"status_code": 500,
"retry_attempts": 3
}
}
},
{
"name": "rate_limit_error",
"input": {
"function_name": "another_test",
"function_signature": "int another_test()",
"function_body": "return 0;",
"context": {}
},
"output": {
"status": "error",
"error": "rate_limit",
"message": "LLM API rate limit exceeded",
"details": {
"error_type": "rate_limit_exceeded",
"retry_after": 60,
"current_usage": 100,
"limit": 50
}
}
}
]
def get_fixture(self, name: str) -> Dict[str, Any]:
"""获取指定名称的固件"""
all_fixtures = (
self.success_fixtures +
self.failure_fixtures +
self.timeout_fixtures +
self.error_fixtures
)
for fixture in all_fixtures:
if fixture["name"] == name:
return fixture
raise ValueError(f"Fixture not found: {name}")
def get_fixtures_by_type(self, fixture_type: str) -> List[Dict[str, Any]]:
"""根据类型获取固件"""
if fixture_type == "success":
return self.success_fixtures
elif fixture_type == "failure":
return self.failure_fixtures
elif fixture_type == "timeout":
return self.timeout_fixtures
elif fixture_type == "error":
return self.error_fixtures
else:
raise ValueError(f"Unknown fixture type: {fixture_type}")
# 全局固件实例
llm_fixtures = LLMResponseFixture()