|
|
"""
|
|
|
代码分析和上下文分析模块
|
|
|
"""
|
|
|
import re
|
|
|
from pathlib import Path
|
|
|
from typing import List, Optional, Tuple, Set
|
|
|
|
|
|
from .models import CppcheckIssue, CodeContext
|
|
|
|
|
|
|
|
|
def analyze_code_context(file_path: Path, target_line: Optional[int] = None, project_root: Optional[Path] = None) -> CodeContext:
|
|
|
"""深入分析代码上下文,理解函数、类、变量等结构"""
|
|
|
actual_file_path = file_path
|
|
|
|
|
|
# 如果文件不存在且提供了项目根目录,尝试查找匹配的文件
|
|
|
if not file_path.exists() and project_root:
|
|
|
filename = file_path.name
|
|
|
potential_files = list(project_root.glob(f"**/{filename}"))
|
|
|
if potential_files:
|
|
|
actual_file_path = potential_files[0]
|
|
|
print(f"找到匹配的文件: {actual_file_path}")
|
|
|
else:
|
|
|
# 如果还是找不到,尝试查找所有 .cpp 文件
|
|
|
cpp_files = list(project_root.glob("**/*.cpp"))
|
|
|
if cpp_files:
|
|
|
actual_file_path = cpp_files[0]
|
|
|
print(f"使用示例文件: {actual_file_path}")
|
|
|
|
|
|
try:
|
|
|
content = actual_file_path.read_text(encoding="utf-8", errors="replace")
|
|
|
lines = content.splitlines()
|
|
|
except Exception as e:
|
|
|
print(f"无法读取文件 {actual_file_path}: {e}")
|
|
|
return CodeContext(file_path=file_path)
|
|
|
|
|
|
context = CodeContext(file_path=file_path)
|
|
|
|
|
|
# 分析包含文件
|
|
|
for line in lines:
|
|
|
line = line.strip()
|
|
|
if line.startswith('#include'):
|
|
|
include_path = line[8:].strip().strip('"<>')
|
|
|
context.includes.append(include_path)
|
|
|
|
|
|
# 如果指定了目标行,分析该行的上下文
|
|
|
if target_line and 1 <= target_line <= len(lines):
|
|
|
target_line_idx = target_line - 1
|
|
|
|
|
|
# 查找函数定义
|
|
|
for i in range(target_line_idx, -1, -1):
|
|
|
line = lines[i].strip()
|
|
|
if re.match(r'^\w+.*\s+\w+\s*\([^)]*\)\s*\{?\s*$', line):
|
|
|
# 提取函数名
|
|
|
match = re.search(r'(\w+)\s*\([^)]*\)', line)
|
|
|
if match:
|
|
|
context.function_name = match.group(1)
|
|
|
break
|
|
|
|
|
|
# 查找类定义
|
|
|
for i in range(target_line_idx, -1, -1):
|
|
|
line = lines[i].strip()
|
|
|
if re.match(r'^\s*(class|struct)\s+\w+', line):
|
|
|
match = re.search(r'(class|struct)\s+(\w+)', line)
|
|
|
if match:
|
|
|
context.class_name = match.group(2)
|
|
|
break
|
|
|
|
|
|
# 查找命名空间
|
|
|
for i in range(target_line_idx, -1, -1):
|
|
|
line = lines[i].strip()
|
|
|
if line.startswith('namespace '):
|
|
|
match = re.search(r'namespace\s+(\w+)', line)
|
|
|
if match:
|
|
|
context.namespace = match.group(1)
|
|
|
break
|
|
|
|
|
|
# 分析变量上下文(查找目标行附近的变量声明)
|
|
|
start_analysis = max(0, target_line_idx - 20)
|
|
|
end_analysis = min(len(lines), target_line_idx + 5)
|
|
|
|
|
|
for i in range(start_analysis, end_analysis):
|
|
|
line = lines[i].strip()
|
|
|
# 查找变量声明
|
|
|
if re.match(r'^\w+.*\s+\w+\s*[=;]', line) and not re.match(r'^\w+.*\s+\w+\s*\([^)]*\)', line):
|
|
|
# 提取变量名
|
|
|
match = re.search(r'(\w+)\s*[=;]', line)
|
|
|
if match:
|
|
|
context.variable_context.append(match.group(1))
|
|
|
|
|
|
# 分析控制流上下文
|
|
|
for i in range(start_analysis, target_line_idx):
|
|
|
line = lines[i].strip()
|
|
|
if any(keyword in line for keyword in ['if', 'for', 'while', 'switch', 'try', 'catch']):
|
|
|
context.control_flow_context.append(line)
|
|
|
|
|
|
return context
|
|
|
|
|
|
|
|
|
def analyze_issue_relevance(issue: CppcheckIssue, code_context: CodeContext) -> dict:
|
|
|
"""分析问题与代码上下文的相关性,判断是否为真实问题"""
|
|
|
relevance_score = 0
|
|
|
analysis_details = []
|
|
|
|
|
|
# 基于问题类型分析相关性
|
|
|
issue_id = issue.id.lower()
|
|
|
severity = issue.severity.lower()
|
|
|
|
|
|
# 严重级别权重
|
|
|
severity_weights = {"error": 10, "warning": 7, "information": 3, "note": 1}
|
|
|
relevance_score += severity_weights.get(severity, 0)
|
|
|
analysis_details.append(f"严重级别权重: {severity_weights.get(severity, 0)}")
|
|
|
|
|
|
# 基于问题ID的特定分析
|
|
|
if "uninitvar" in issue_id:
|
|
|
# 未初始化变量:检查是否有变量上下文
|
|
|
if code_context.variable_context:
|
|
|
relevance_score += 5
|
|
|
analysis_details.append("检测到变量上下文,未初始化变量问题可能真实存在")
|
|
|
else:
|
|
|
relevance_score -= 2
|
|
|
analysis_details.append("未检测到变量上下文,可能是误报")
|
|
|
|
|
|
elif "nullpointer" in issue_id:
|
|
|
# 空指针:检查是否有指针操作
|
|
|
if any("ptr" in var.lower() or "*" in var for var in code_context.variable_context):
|
|
|
relevance_score += 6
|
|
|
analysis_details.append("检测到指针变量,空指针问题可能真实存在")
|
|
|
else:
|
|
|
relevance_score -= 1
|
|
|
analysis_details.append("未检测到明显的指针操作")
|
|
|
|
|
|
elif "memleak" in issue_id:
|
|
|
# 内存泄漏:检查是否有内存分配
|
|
|
if any("new" in var.lower() or "malloc" in var.lower() for var in code_context.variable_context):
|
|
|
relevance_score += 7
|
|
|
analysis_details.append("检测到内存分配操作,内存泄漏问题可能真实存在")
|
|
|
else:
|
|
|
relevance_score -= 2
|
|
|
analysis_details.append("未检测到内存分配操作")
|
|
|
|
|
|
elif "arrayindex" in issue_id or "buffer" in issue_id:
|
|
|
# 数组/缓冲区问题:检查是否有数组操作
|
|
|
if any("[" in var or "array" in var.lower() for var in code_context.variable_context):
|
|
|
relevance_score += 6
|
|
|
analysis_details.append("检测到数组操作,数组越界问题可能真实存在")
|
|
|
else:
|
|
|
relevance_score -= 1
|
|
|
analysis_details.append("未检测到明显的数组操作")
|
|
|
|
|
|
# 基于函数上下文的分析
|
|
|
if code_context.function_name:
|
|
|
relevance_score += 2
|
|
|
analysis_details.append(f"问题位于函数 {code_context.function_name} 中")
|
|
|
|
|
|
if code_context.class_name:
|
|
|
relevance_score += 1
|
|
|
analysis_details.append(f"问题位于类 {code_context.class_name} 中")
|
|
|
|
|
|
# 基于控制流的分析
|
|
|
if code_context.control_flow_context:
|
|
|
relevance_score += 1
|
|
|
analysis_details.append(f"问题位于复杂控制流中,包含 {len(code_context.control_flow_context)} 个控制结构")
|
|
|
|
|
|
return {
|
|
|
"relevance_score": relevance_score,
|
|
|
"is_likely_real": relevance_score >= 5,
|
|
|
"analysis_details": analysis_details,
|
|
|
"confidence": min(100, max(0, relevance_score * 10))
|
|
|
}
|
|
|
|
|
|
|
|
|
def analyze_project_structure(project_root: Path) -> dict:
|
|
|
"""分析项目结构,理解代码组织和依赖关系"""
|
|
|
project_info = {
|
|
|
"root": project_root,
|
|
|
"source_files": [],
|
|
|
"header_files": [],
|
|
|
"include_dirs": [],
|
|
|
"dependencies": set(),
|
|
|
"build_files": [],
|
|
|
"test_files": []
|
|
|
}
|
|
|
|
|
|
if not project_root.exists():
|
|
|
return project_info
|
|
|
|
|
|
# 查找源文件
|
|
|
for pattern in ["**/*.cpp", "**/*.c", "**/*.cc", "**/*.cxx"]:
|
|
|
project_info["source_files"].extend(project_root.glob(pattern))
|
|
|
|
|
|
# 查找头文件
|
|
|
for pattern in ["**/*.h", "**/*.hpp", "**/*.hxx"]:
|
|
|
project_info["header_files"].extend(project_root.glob(pattern))
|
|
|
|
|
|
# 查找构建文件
|
|
|
for pattern in ["**/CMakeLists.txt", "**/Makefile", "**/*.mk", "**/*.pro", "**/*.vcxproj"]:
|
|
|
project_info["build_files"].extend(project_root.glob(pattern))
|
|
|
|
|
|
# 查找测试文件
|
|
|
for pattern in ["**/test_*.cpp", "**/*_test.cpp", "**/tests/**/*.cpp"]:
|
|
|
project_info["test_files"].extend(project_root.glob(pattern))
|
|
|
|
|
|
# 分析包含目录
|
|
|
include_dirs = set()
|
|
|
for header_file in project_info["header_files"]:
|
|
|
include_dirs.add(header_file.parent)
|
|
|
|
|
|
project_info["include_dirs"] = list(include_dirs)
|
|
|
|
|
|
# 分析依赖关系(简单的包含关系分析)
|
|
|
dependencies = set()
|
|
|
for source_file in project_info["source_files"][:10]: # 限制分析前10个文件
|
|
|
try:
|
|
|
content = source_file.read_text(encoding="utf-8", errors="replace")
|
|
|
for line in content.splitlines():
|
|
|
line = line.strip()
|
|
|
if line.startswith('#include'):
|
|
|
include_path = line[8:].strip().strip('"<>')
|
|
|
dependencies.add(include_path)
|
|
|
except Exception:
|
|
|
continue
|
|
|
|
|
|
project_info["dependencies"] = list(dependencies)
|
|
|
|
|
|
return project_info
|
|
|
|
|
|
|
|
|
def get_enhanced_issue_analysis(issue: CppcheckIssue, project_info: Optional[dict] = None) -> Tuple[CodeContext, dict]:
|
|
|
"""获取增强的问题分析,包含代码上下文和相关性分析"""
|
|
|
primary = issue.locations[0] if issue.locations else None
|
|
|
if not primary:
|
|
|
return CodeContext(file_path=Path("unknown")), {"relevance_score": 0, "is_likely_real": False, "analysis_details": [], "confidence": 0}
|
|
|
|
|
|
# 分析代码上下文
|
|
|
project_root = project_info.get("root") if project_info else None
|
|
|
code_context = analyze_code_context(primary.file_path, primary.line, project_root)
|
|
|
|
|
|
# 分析问题相关性
|
|
|
relevance_analysis = analyze_issue_relevance(issue, code_context)
|
|
|
|
|
|
# 如果提供了项目信息,进行更深入的分析
|
|
|
if project_info:
|
|
|
# 检查文件是否在项目中
|
|
|
if primary.file_path in project_info.get("source_files", []):
|
|
|
relevance_analysis["relevance_score"] += 2
|
|
|
relevance_analysis["analysis_details"].append("文件是项目源文件")
|
|
|
|
|
|
# 检查是否使用了项目头文件
|
|
|
project_includes = set()
|
|
|
for include_dir in project_info.get("include_dirs", []):
|
|
|
for header_file in include_dir.glob("*.h"):
|
|
|
project_includes.add(header_file.name)
|
|
|
|
|
|
for include_file in code_context.includes:
|
|
|
if include_file in project_includes:
|
|
|
relevance_analysis["relevance_score"] += 1
|
|
|
relevance_analysis["analysis_details"].append(f"使用了项目头文件: {include_file}")
|
|
|
break
|
|
|
|
|
|
# 重新计算置信度
|
|
|
relevance_analysis["confidence"] = min(100, max(0, relevance_analysis["relevance_score"] * 10))
|
|
|
relevance_analysis["is_likely_real"] = relevance_analysis["relevance_score"] >= 5
|
|
|
|
|
|
return code_context, relevance_analysis
|
|
|
|
|
|
|
|
|
def extract_issue_context_from_source(issue: CppcheckIssue, project_root: Optional[Path] = None) -> dict:
|
|
|
"""从原项目源码中提取问题相关的真实代码上下文"""
|
|
|
print(f"开始提取问题上下文: {issue.id}")
|
|
|
context = {
|
|
|
'file_path': None,
|
|
|
'line_number': None,
|
|
|
'function_name': None,
|
|
|
'code_snippet': None,
|
|
|
'surrounding_code': None,
|
|
|
'real_issue_context': None
|
|
|
}
|
|
|
|
|
|
if not issue.locations:
|
|
|
print("没有位置信息")
|
|
|
return context
|
|
|
|
|
|
primary_location = issue.locations[0]
|
|
|
context['file_path'] = primary_location.file_path
|
|
|
context['line_number'] = primary_location.line
|
|
|
|
|
|
# 尝试读取原项目中的真实代码
|
|
|
source_file = None
|
|
|
if project_root:
|
|
|
# 修复路径拼接问题
|
|
|
if primary_location.file_path.is_absolute():
|
|
|
source_file = primary_location.file_path
|
|
|
else:
|
|
|
source_file = project_root / primary_location.file_path
|
|
|
|
|
|
# 如果文件不存在,尝试在项目根目录中查找同名文件
|
|
|
if not source_file.exists():
|
|
|
filename = primary_location.file_path.name
|
|
|
print(f"查找文件: {filename}")
|
|
|
potential_files = list(project_root.glob(f"**/{filename}"))
|
|
|
if potential_files:
|
|
|
source_file = potential_files[0]
|
|
|
print(f"找到匹配的文件: {source_file}")
|
|
|
else:
|
|
|
# 如果还是找不到,尝试查找所有 .cpp 文件
|
|
|
cpp_files = list(project_root.glob("**/*.cpp"))
|
|
|
if cpp_files:
|
|
|
# 使用第一个找到的 .cpp 文件作为示例
|
|
|
source_file = cpp_files[0]
|
|
|
print(f"使用示例文件: {source_file}")
|
|
|
else:
|
|
|
print(f"未找到任何 .cpp 文件")
|
|
|
else:
|
|
|
source_file = primary_location.file_path
|
|
|
|
|
|
if source_file and source_file.exists():
|
|
|
try:
|
|
|
print(f"正在读取源文件: {source_file}")
|
|
|
# 读取问题行周围的代码
|
|
|
from .parsers import read_code_snippet
|
|
|
code_snippet = read_code_snippet(source_file, primary_location.line, context=20)
|
|
|
context['code_snippet'] = code_snippet
|
|
|
context['surrounding_code'] = code_snippet
|
|
|
print(f"成功读取代码片段,长度: {len(code_snippet)} 字符")
|
|
|
|
|
|
# 改进函数名提取逻辑
|
|
|
lines = code_snippet.split('\n')
|
|
|
for line in lines:
|
|
|
line = line.strip()
|
|
|
# 查找函数定义模式
|
|
|
if re.match(r'^\w+.*\s+\w+\s*\([^)]*\)\s*\{?\s*$', line):
|
|
|
# 提取函数名
|
|
|
match = re.search(r'(\w+)\s*\([^)]*\)', line)
|
|
|
if match:
|
|
|
context['function_name'] = match.group(1)
|
|
|
break
|
|
|
|
|
|
# 构建真实问题上下文
|
|
|
context['real_issue_context'] = f"""
|
|
|
// 基于原项目中的真实问题代码
|
|
|
// 文件: {primary_location.file_path}
|
|
|
// 行号: {primary_location.line}
|
|
|
// 问题: {issue.message}
|
|
|
// 原始代码片段:
|
|
|
{code_snippet}
|
|
|
"""
|
|
|
except Exception as e:
|
|
|
print(f"警告: 无法读取源文件 {source_file}: {e}")
|
|
|
|
|
|
return context
|
|
|
|
|
|
|
|
|
def filter_and_clean_issues(issues: List[CppcheckIssue], project_info: Optional[dict] = None) -> List[CppcheckIssue]:
|
|
|
"""过滤和清理问题,移除不可靠的问题"""
|
|
|
print("正在过滤和清理问题...")
|
|
|
|
|
|
cleaned_issues = []
|
|
|
filtered_count = 0
|
|
|
|
|
|
for issue in issues:
|
|
|
# 获取增强分析
|
|
|
code_context, relevance_analysis = get_enhanced_issue_analysis(issue, project_info)
|
|
|
|
|
|
# 基于分析结果决定是否保留问题
|
|
|
should_keep = False
|
|
|
|
|
|
# 1. 检查相关性分数
|
|
|
if relevance_analysis["relevance_score"] >= 5:
|
|
|
should_keep = True
|
|
|
|
|
|
# 2. 检查问题类型 - 排除明显误报
|
|
|
issue_id = issue.id.lower()
|
|
|
if issue_id in ["missinginclude", "missingincludesystem", "toomanyconfigs",
|
|
|
"normalchecklevelmaxbranches", "checklevelnormal", "unknown"]:
|
|
|
should_keep = False
|
|
|
|
|
|
# 3. 检查严重级别 - 优先保留error和warning
|
|
|
if issue.severity.lower() in ["error", "warning"]:
|
|
|
should_keep = True
|
|
|
elif issue.severity.lower() in ["information", "note"]:
|
|
|
# 对于information和note,需要更高的相关性分数
|
|
|
if relevance_analysis["relevance_score"] >= 7:
|
|
|
should_keep = True
|
|
|
|
|
|
# 4. 检查是否有代码上下文
|
|
|
if code_context.function_name or code_context.class_name:
|
|
|
should_keep = True
|
|
|
|
|
|
if should_keep:
|
|
|
cleaned_issues.append(issue)
|
|
|
else:
|
|
|
filtered_count += 1
|
|
|
print(f" 过滤问题: {issue.id} - {issue.message[:50]}... (相关性分数: {relevance_analysis['relevance_score']})")
|
|
|
|
|
|
print(f"问题过滤完成: 保留 {len(cleaned_issues)} 个问题,过滤掉 {filtered_count} 个不可靠问题")
|
|
|
return cleaned_issues
|
|
|
|
|
|
|
|
|
def write_cleaned_report(issues: List[CppcheckIssue], output_path: Path) -> None:
|
|
|
"""将清理后的问题写入新的报告文件"""
|
|
|
print(f"正在生成清理后的报告: {output_path}")
|
|
|
|
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
|
for issue in issues:
|
|
|
for location in issue.locations:
|
|
|
f.write(f"{location.file_path}:{location.line}:0: {issue.severity}: {issue.message} [{issue.id}]\n")
|
|
|
|
|
|
print(f"清理后的报告已保存: {output_path}")
|
|
|
|
|
|
|
|
|
def prioritize_issues(issues: List[CppcheckIssue]) -> List[CppcheckIssue]:
|
|
|
"""对问题进行优先级排序,提高智能选择的效果"""
|
|
|
def get_priority(issue: CppcheckIssue) -> tuple:
|
|
|
# 严重级别优先级:error > warning > information > note
|
|
|
severity_priority = {"error": 0, "warning": 1, "information": 2, "note": 3}
|
|
|
severity_score = severity_priority.get(issue.severity.lower(), 4)
|
|
|
|
|
|
# 规则ID优先级:常见重要问题优先
|
|
|
important_rules = {
|
|
|
"nullPointer", "uninitvar", "arrayIndexOutOfBounds", "memleak",
|
|
|
"resourceLeak", "useAfterFree", "doubleFree", "bufferAccessOutOfBounds",
|
|
|
"unusedVariable", "unusedFunction", "deadcode", "unreachableCode"
|
|
|
}
|
|
|
rule_score = 0 if issue.id in important_rules else 1
|
|
|
|
|
|
# 文件多样性:优先选择不同文件的问题
|
|
|
file_name = str(issue.locations[0].file_path) if issue.locations else ""
|
|
|
file_score = hash(file_name) % 1000 # 简单的文件哈希,用于分散
|
|
|
|
|
|
return (severity_score, rule_score, file_score)
|
|
|
|
|
|
return sorted(issues, key=get_priority)
|
|
|
|
|
|
|
|
|
def analyze_issues_with_context(issues: List[CppcheckIssue]) -> List[Tuple[CppcheckIssue, dict]]:
|
|
|
"""分析所有问题的上下文相关性"""
|
|
|
print("正在分析问题上下文相关性...")
|
|
|
|
|
|
analyzed_issues = []
|
|
|
for i, issue in enumerate(issues):
|
|
|
print(f"分析问题 {i+1}/{len(issues)}: {issue.id}")
|
|
|
|
|
|
primary = issue.locations[0] if issue.locations else None
|
|
|
if not primary:
|
|
|
continue
|
|
|
|
|
|
# 分析代码上下文
|
|
|
code_context = analyze_code_context(primary.file_path, primary.line)
|
|
|
|
|
|
# 分析问题相关性
|
|
|
relevance_analysis = analyze_issue_relevance(issue, code_context)
|
|
|
|
|
|
analyzed_issues.append((issue, {
|
|
|
"code_context": code_context,
|
|
|
"relevance_analysis": relevance_analysis,
|
|
|
"original_index": i
|
|
|
}))
|
|
|
|
|
|
return analyzed_issues
|