You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
337 lines
11 KiB
337 lines
11 KiB
#!/usr/bin/env python3
|
|
# LLM响应测试固件
|
|
|
|
from typing import List, Dict, Any
|
|
from dataclasses import dataclass
|
|
|
|
@dataclass
|
|
class LLMResponseFixture:
|
|
"""LLM响应测试固件"""
|
|
|
|
def __init__(self):
|
|
self.success_fixtures = self._load_success_fixtures()
|
|
self.failure_fixtures = self._load_failure_fixtures()
|
|
self.timeout_fixtures = self._load_timeout_fixtures()
|
|
self.error_fixtures = self._load_error_fixtures()
|
|
|
|
def _load_success_fixtures(self) -> List[Dict[str, Any]]:
|
|
"""加载成功案例固件"""
|
|
return [
|
|
{
|
|
"name": "simple_specification_generation",
|
|
"input": {
|
|
"function_name": "add",
|
|
"function_signature": "int add(int a, int b)",
|
|
"function_body": "return a + b;",
|
|
"context": {
|
|
"complexity": "simple",
|
|
"purpose": "arithmetic operation"
|
|
}
|
|
},
|
|
"output": {
|
|
"status": "success",
|
|
"specification": """
|
|
// Simple addition function specification
|
|
#include <assert.h>
|
|
|
|
void add_spec() {
|
|
int a, b;
|
|
__CPROVER_assume(a >= INT_MIN && a <= INT_MAX);
|
|
__CPROVER_assume(b >= INT_MIN && b <= INT_MAX);
|
|
|
|
int result = add(a, b);
|
|
|
|
// Basic properties of addition
|
|
assert(result >= a && result >= b);
|
|
assert(result <= INT_MAX);
|
|
assert(result >= INT_MIN);
|
|
|
|
// Commutative property
|
|
assert(add(a, b) == add(b, a));
|
|
}
|
|
""",
|
|
"confidence": 0.95,
|
|
"metadata": {
|
|
"specification_type": "functional",
|
|
"verification_properties": ["commutativity", "bounds"],
|
|
"estimated_verification_time": 2.0
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"name": "array_specification_generation",
|
|
"input": {
|
|
"function_name": "array_sum",
|
|
"function_signature": "int array_sum(int* arr, int size)",
|
|
"function_body": """
|
|
if (arr == NULL || size <= 0) return 0;
|
|
int sum = 0;
|
|
for (int i = 0; i < size; i++) {
|
|
sum += arr[i];
|
|
}
|
|
return sum;
|
|
""",
|
|
"context": {
|
|
"complexity": "medium",
|
|
"purpose": "array processing"
|
|
}
|
|
},
|
|
"output": {
|
|
"status": "success",
|
|
"specification": """
|
|
// Array sum function specification
|
|
#include <assert.h>
|
|
|
|
void array_sum_spec() {
|
|
int size;
|
|
__CPROVER_assume(size >= 0 && size <= 100);
|
|
|
|
int* arr = malloc(size * sizeof(int));
|
|
__CPROVER_assume(arr != NULL || size == 0);
|
|
|
|
// Initialize array elements
|
|
for (int i = 0; i < size; i++) {
|
|
__CPROVER_assume(arr[i] >= INT_MIN && arr[i] <= INT_MAX);
|
|
}
|
|
|
|
int result = array_sum(arr, size);
|
|
|
|
// Properties of array sum
|
|
if (size == 0) {
|
|
assert(result == 0);
|
|
} else if (size == 1) {
|
|
assert(result == arr[0]);
|
|
}
|
|
|
|
// Bounds checking
|
|
assert(result >= size * INT_MIN);
|
|
assert(result <= size * INT_MAX);
|
|
|
|
free(arr);
|
|
}
|
|
""",
|
|
"confidence": 0.92,
|
|
"metadata": {
|
|
"specification_type": "functional",
|
|
"verification_properties": ["null_safety", "bounds", "memory_safety"],
|
|
"estimated_verification_time": 3.5
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"name": "freertos_task_specification",
|
|
"input": {
|
|
"function_name": "vTaskFunction",
|
|
"function_signature": "void vTaskFunction(void* pvParameters)",
|
|
"function_body": """
|
|
TaskParameters_t* params = (TaskParameters_t*)pvParameters;
|
|
configASSERT(params != NULL);
|
|
configASSERT(params->task_id > 0);
|
|
|
|
while (1) {
|
|
vTaskDelay(pdMS_TO_TICKS(1000));
|
|
}
|
|
""",
|
|
"context": {
|
|
"complexity": "complex",
|
|
"purpose": "FreeRTOS task",
|
|
"framework": "FreeRTOS"
|
|
}
|
|
},
|
|
"output": {
|
|
"status": "success",
|
|
"specification": """
|
|
// FreeRTOS task specification
|
|
#include "FreeRTOS.h"
|
|
#include "task.h"
|
|
#include "assert.h"
|
|
|
|
void vTaskFunction_spec() {
|
|
TaskParameters_t* params = (TaskParameters_t*)__CPROVER_alloc_object(sizeof(TaskParameters_t));
|
|
__CPROVER_assume(params != NULL);
|
|
__CPROVER_assume(params->task_id > 0 && params->task_id <= MAX_TASK_ID);
|
|
|
|
// Call the task function
|
|
vTaskFunction(params);
|
|
|
|
// Verify task parameter validation
|
|
assert(params != NULL);
|
|
assert(params->task_id > 0);
|
|
|
|
// Verify task behavior (simplified for verification)
|
|
// In real FreeRTOS, we would verify scheduler behavior
|
|
assert(1); // Task should not crash
|
|
}
|
|
""",
|
|
"confidence": 0.88,
|
|
"metadata": {
|
|
"specification_type": "system",
|
|
"verification_properties": ["parameter_validation", "task_safety"],
|
|
"framework_requirements": ["FreeRTOS"],
|
|
"estimated_verification_time": 5.0
|
|
}
|
|
}
|
|
}
|
|
]
|
|
|
|
def _load_failure_fixtures(self) -> List[Dict[str, Any]]:
|
|
"""加载失败案例固件"""
|
|
return [
|
|
{
|
|
"name": "syntax_error_function",
|
|
"input": {
|
|
"function_name": "broken_function",
|
|
"function_signature": "int broken_function(",
|
|
"function_body": "return x; // Undefined variable",
|
|
"context": {
|
|
"complexity": "simple",
|
|
"purpose": "broken example"
|
|
}
|
|
},
|
|
"output": {
|
|
"status": "failure",
|
|
"error": "syntax_error",
|
|
"message": "Function signature is incomplete",
|
|
"details": {
|
|
"error_type": "parsing_error",
|
|
"line": 1,
|
|
"expected": "Complete function signature"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"name": "unsafe_function_rejection",
|
|
"input": {
|
|
"function_name": "unsafe_memory",
|
|
"function_signature": "char* unsafe_function()",
|
|
"function_body": "char buffer[10]; return buffer;", // Returning stack address
|
|
"context": {
|
|
"complexity": "medium",
|
|
"purpose": "memory manipulation"
|
|
}
|
|
},
|
|
"output": {
|
|
"status": "failure",
|
|
"error": "unsafe_pattern",
|
|
"message": "Function returns address of local variable",
|
|
"details": {
|
|
"error_type": "memory_safety_violation",
|
|
"pattern": "stack_address_return",
|
|
"severity": "high",
|
|
"suggestion": "Use dynamic allocation or modify function design"
|
|
}
|
|
}
|
|
}
|
|
]
|
|
|
|
def _load_timeout_fixtures(self) -> List[Dict[str, Any]]:
|
|
"""加载超时案例固件"""
|
|
return [
|
|
{
|
|
"name": "complex_function_timeout",
|
|
"input": {
|
|
"function_name": "complex_algorithm",
|
|
"function_signature": "int complex_algorithm(int* data, int size)",
|
|
"function_body": """
|
|
// Complex nested loops and recursion
|
|
for (int i = 0; i < size; i++) {
|
|
for (int j = 0; j < size; j++) {
|
|
for (int k = 0; k < size; k++) {
|
|
data[i] = data[j] + data[k];
|
|
}
|
|
}
|
|
}
|
|
return data[0];
|
|
""",
|
|
"context": {
|
|
"complexity": "very_high",
|
|
"purpose": "complex computation"
|
|
}
|
|
},
|
|
"output": {
|
|
"status": "timeout",
|
|
"error": "generation_timeout",
|
|
"message": "Specification generation timed out after 30 seconds",
|
|
"details": {
|
|
"timeout_duration": 30,
|
|
"reason": "high_complexity",
|
|
"suggestion": "Consider breaking down function or simplifying logic"
|
|
}
|
|
}
|
|
}
|
|
]
|
|
|
|
def _load_error_fixtures(self) -> List[Dict[str, Any]]:
|
|
"""加载错误案例固件"""
|
|
return [
|
|
{
|
|
"name": "api_error",
|
|
"input": {
|
|
"function_name": "test_function",
|
|
"function_signature": "void test_function()",
|
|
"function_body": "int x = 0;",
|
|
"context": {}
|
|
},
|
|
"output": {
|
|
"status": "error",
|
|
"error": "api_error",
|
|
"message": "LLM API request failed",
|
|
"details": {
|
|
"error_type": "connection_error",
|
|
"status_code": 500,
|
|
"retry_attempts": 3
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"name": "rate_limit_error",
|
|
"input": {
|
|
"function_name": "another_test",
|
|
"function_signature": "int another_test()",
|
|
"function_body": "return 0;",
|
|
"context": {}
|
|
},
|
|
"output": {
|
|
"status": "error",
|
|
"error": "rate_limit",
|
|
"message": "LLM API rate limit exceeded",
|
|
"details": {
|
|
"error_type": "rate_limit_exceeded",
|
|
"retry_after": 60,
|
|
"current_usage": 100,
|
|
"limit": 50
|
|
}
|
|
}
|
|
}
|
|
]
|
|
|
|
def get_fixture(self, name: str) -> Dict[str, Any]:
|
|
"""获取指定名称的固件"""
|
|
all_fixtures = (
|
|
self.success_fixtures +
|
|
self.failure_fixtures +
|
|
self.timeout_fixtures +
|
|
self.error_fixtures
|
|
)
|
|
|
|
for fixture in all_fixtures:
|
|
if fixture["name"] == name:
|
|
return fixture
|
|
|
|
raise ValueError(f"Fixture not found: {name}")
|
|
|
|
def get_fixtures_by_type(self, fixture_type: str) -> List[Dict[str, Any]]:
|
|
"""根据类型获取固件"""
|
|
if fixture_type == "success":
|
|
return self.success_fixtures
|
|
elif fixture_type == "failure":
|
|
return self.failure_fixtures
|
|
elif fixture_type == "timeout":
|
|
return self.timeout_fixtures
|
|
elif fixture_type == "error":
|
|
return self.error_fixtures
|
|
else:
|
|
raise ValueError(f"Unknown fixture type: {fixture_type}")
|
|
|
|
# 全局固件实例
|
|
llm_fixtures = LLMResponseFixture() |