Compare commits
No commits in common. 'main' and 'develop' have entirely different histories.
|
Before Width: | Height: | Size: 46 KiB |
@ -1,51 +1,29 @@
|
|||||||
@startuml
|
@startuml
|
||||||
title 核心业务流程时序图 (Updated)
|
title 日志诊断与自动修复流程
|
||||||
|
|
||||||
actor User
|
actor User
|
||||||
actor Admin
|
|
||||||
participant Frontend as FE
|
participant Frontend as FE
|
||||||
participant "FastAPI Auth" as Auth
|
participant FastAPI as API
|
||||||
participant "FastAPI Diagnosis" as Diag
|
participant Flume
|
||||||
participant "Agents (DA/PA/RA)" as Agents
|
database MySQL as DB
|
||||||
database PostgreSQL as DB
|
queue Redis
|
||||||
participant LLM
|
participant LLM
|
||||||
participant "Hadoop Cluster" as Cluster
|
|
||||||
|
|
||||||
== 用户注册与审批 ==
|
Flume -> API : 推送结构化日志
|
||||||
User -> FE : 提交注册信息
|
API -> DB : 写入 fault_record
|
||||||
FE -> Auth : POST /api/auth/register
|
FE -> API : 查询 /api/logs/query
|
||||||
Auth -> DB : 写入用户 (status=pending)
|
API -> FE : 返回日志列表
|
||||||
Admin -> FE : 查看审批队列
|
|
||||||
FE -> Auth : GET /api/auth/pending_users
|
|
||||||
Auth -> DB : 查询
|
|
||||||
Admin -> FE : 批准注册
|
|
||||||
FE -> Auth : POST /api/auth/approve/{uid}
|
|
||||||
Auth -> DB : 更新用户 (status=active)
|
|
||||||
User -> FE : 登录 (正确/错误凭据)
|
|
||||||
FE -> Auth : POST /api/auth/login
|
|
||||||
Auth -> DB : 校验
|
|
||||||
Auth -> FE : 返回 JWT / 错误提示
|
|
||||||
|
|
||||||
== 故障诊断与自动修复 ==
|
API -> LLM : call_llm_diagnose(logs)
|
||||||
Cluster -> Diag : 推送日志 (Flume/SSH)
|
LLM --> API : 返回 FixCommand(JSON)
|
||||||
Diag -> DB : 记录 Fault (status=detected)
|
API -> DB : 写入 exec_log
|
||||||
User -> FE : 点击 "AI 诊断"
|
API -> Redis : 缓存/发布修复任务
|
||||||
FE -> Diag : POST /api/diagnosis/trigger
|
API -> FE : WebSocket 推送诊断结果
|
||||||
Diag -> Agents : 调用 DiagnosisAgent
|
|
||||||
Agents -> LLM : 分析日志上下文
|
|
||||||
LLM -> Agents : 返回根因与建议 (FixCommand)
|
|
||||||
Agents -> Diag : 诊断结果 (Risk Level)
|
|
||||||
Diag -> DB : 更新 Fault (status=analyzing)
|
|
||||||
Diag -> FE : WebSocket 推送报告
|
|
||||||
|
|
||||||
User -> FE : 确认执行修复 (High risk needs approval)
|
FE -> API : /api/repair/execute
|
||||||
FE -> Diag : POST /api/repair/execute
|
API -> "修复脚本" : 执行Shell/Hadoop命令
|
||||||
Diag -> Agents : 调用 PolicyAgent (评估风险)
|
"修复脚本" -> API : stdout/stderr
|
||||||
Agents -> Agents : 调用 RepairAgent
|
API -> DB : 更新 exec_log
|
||||||
Agents -> Cluster : SSH 执行修复脚本
|
API -> FE : 返回执行结果
|
||||||
Cluster -> Agents : 返回 stdout/stderr
|
|
||||||
Agents -> Diag : 修复完成
|
|
||||||
Diag -> DB : 记录 ExecLog & 更新 Fault (status=resolved/failed)
|
|
||||||
Diag -> FE : 推送最终结果
|
|
||||||
|
|
||||||
@enduml
|
@enduml
|
||||||
|
Before Width: | Height: | Size: 36 KiB |
@ -1,52 +1,36 @@
|
|||||||
@startuml
|
@startuml
|
||||||
title 故障检测系统总体架构 (Aligned with Backend)
|
title 故障检测系统总体架构
|
||||||
|
|
||||||
node "Hadoop Cluster" {
|
node "Hadoop Cluster" {
|
||||||
[NameNode]
|
[NameNode]
|
||||||
[ResourceManager]
|
[DataNode] as DN1
|
||||||
[DataNode / NodeManager] as Node
|
[DataNode] as DN2
|
||||||
}
|
}
|
||||||
|
|
||||||
cloud "Log & Metrics Collection" {
|
cloud "Flume Agents" as Flume
|
||||||
[Flume Agent] as Flume
|
Flume --> DN1 : 采集HDFS/YARN日志
|
||||||
[SSH Probe Service] as Probe
|
Flume --> DN2 : 采集HDFS/YARN日志
|
||||||
Flume --> Node : 采集 Hadoop 日志
|
|
||||||
Probe --> Node : 采集系统指标 (CPU/Mem/Disk)
|
component "FastAPI Service" as API
|
||||||
}
|
database "PostgreSQL" as DB
|
||||||
|
queue "Redis" as Cache
|
||||||
package "Backend Service (FastAPI)" {
|
API --> DB : 写入/查询故障记录
|
||||||
component "Auth Router" as Auth
|
API --> Cache : 状态缓存/队列
|
||||||
component "Cluster/Node Router" as ClusterSvc
|
API --> "LLM Diagnose" : 调用大模型\n返回FixCommand
|
||||||
component "Fault/Log Router" as FaultSvc
|
|
||||||
component "AI/Chat Router" as ChatSvc
|
component "Agent Orchestrator" as Orchestrator
|
||||||
|
component "Diagnosis Agent" as DA
|
||||||
component "Orchestrator" as Orchestrator
|
component "Repair Agent" as RA
|
||||||
component "DiagnosisAgent" as DA
|
component "Policy Agent" as PA
|
||||||
component "PolicyAgent" as PA
|
API --> Orchestrator : 触发诊断/修复流程
|
||||||
component "RepairAgent" as RA
|
Orchestrator --> DA : 传递结构化日志
|
||||||
|
Orchestrator --> PA : 风险评估与审批策略
|
||||||
Auth --> [PostgreSQL] : users
|
Orchestrator --> RA : 下发修复命令
|
||||||
ClusterSvc --> [PostgreSQL] : clusters, nodes
|
DA --> "LLM Diagnose" : 调用LLM分析
|
||||||
FaultSvc --> [PostgreSQL] : fault_records, hadoop_exec_logs
|
RA --> Cluster : SSH/命令执行
|
||||||
ChatSvc --> [PostgreSQL] : chat_sessions, chat_messages
|
|
||||||
|
component "Frontend Web (Vue/React + ECharts)" as FE
|
||||||
Orchestrator --> DA
|
FE --> API : /api/cluster/status\n/api/logs/query\n/api/diagnosis/result\n/api/repair/execute
|
||||||
Orchestrator --> PA
|
API --> FE : WebSocket推送状态/诊断结果
|
||||||
Orchestrator --> RA
|
|
||||||
|
|
||||||
DA --> [LLM Service] : LangChain / OpenAI
|
|
||||||
RA --> [Hadoop Cluster] : SSH (Paramiko)
|
|
||||||
}
|
|
||||||
|
|
||||||
package "Frontend (Vue 3)" {
|
|
||||||
component "Dashboard" as Dash
|
|
||||||
component "Diagnosis Chat" as DiagUI
|
|
||||||
component "Admin UI" as Admin
|
|
||||||
}
|
|
||||||
|
|
||||||
Dash --> ClusterSvc
|
|
||||||
DiagUI --> ChatSvc
|
|
||||||
Admin --> Auth
|
|
||||||
|
|
||||||
Orchestrator ..> [Redis] : 任务状态与实时推送
|
|
||||||
@enduml
|
@enduml
|
||||||
|
Before Width: | Height: | Size: 10 KiB |
@ -1,36 +1,45 @@
|
|||||||
@startuml
|
@startuml
|
||||||
title 故障生命周期状态机与修复流程 (Updated)
|
title 日志诊断与自动修复 - 活动图
|
||||||
|
|
||||||
skinparam defaultFontName Microsoft YaHei
|
skinparam defaultFontName Microsoft YaHei
|
||||||
|
|
||||||
(*) --> "Detected (故障发现)" : 系统采集到异常日志/指标
|
start
|
||||||
|
:Flume采集日志;
|
||||||
partition "AI 诊断阶段" {
|
:FastAPI接收并解析日志;
|
||||||
"Detected (故障发现)" --> "Analyzing (正在分析)" : 触发 AI 诊断 (DiagnosisAgent)
|
:保存 FaultRecord 到 MySQL;
|
||||||
"Analyzing (正在分析)" --> "Diagnosed (已生成建议)" : LLM 分析完成并生成 FixCommand
|
|
||||||
|
partition "用户/系统触发" {
|
||||||
|
if (是否需要诊断?) then (是)
|
||||||
|
:聚合相关日志;
|
||||||
|
:构造 Prompt;
|
||||||
|
:调用 LLM 诊断;
|
||||||
|
:生成 FixCommand(JSON);
|
||||||
|
:安全校验(禁止高危命令);
|
||||||
|
else (否)
|
||||||
|
:等待新日志/用户请求;
|
||||||
|
stop
|
||||||
|
endif
|
||||||
}
|
}
|
||||||
|
|
||||||
partition "策略评估与修复阶段" {
|
if (风险等级 == high?) then (是)
|
||||||
"Diagnosed (已生成建议)" --> "Risk Assessment (PolicyAgent)"
|
:前端弹窗请求人工确认;
|
||||||
|
if (用户确认执行?) then (是)
|
||||||
if "风险等级" then
|
:继续执行修复;
|
||||||
-->[High] "Pending Approval (待审批)"
|
else (否)
|
||||||
-->[Approved] "Repairing (修复中)"
|
:记录并通知未执行;
|
||||||
else
|
stop
|
||||||
-->[Low/Medium] "Repairing (修复中)"
|
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
"Repairing (修复中)" --> "Executing (RepairAgent)"
|
:修复前预检查(配置/路径/权限);
|
||||||
"Executing (RepairAgent)" --> "Post-Check (修复后校验)"
|
if (预检查通过?) then (是)
|
||||||
}
|
:执行修复脚本;
|
||||||
|
:采集stdout/stderr;
|
||||||
"Post-Check (修复后校验)" --> if "是否修复成功?" then
|
:保存 ExecLog 到 MySQL;
|
||||||
-->[Yes] "Resolved (已解决)"
|
:更新状态到 Redis 并推送 WebSocket;
|
||||||
--> (*)
|
else (否)
|
||||||
else
|
:记录失败原因;
|
||||||
-->[No] "Failed (修复失败)"
|
|
||||||
--> "Manual Intervention (需人工介入)"
|
|
||||||
--> (*)
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
:返回结果给前端;
|
||||||
|
stop
|
||||||
@enduml
|
@enduml
|
||||||
|
Before Width: | Height: | Size: 22 KiB |
@ -1,42 +1,38 @@
|
|||||||
@startuml
|
@startuml
|
||||||
title 故障检测系统用例图 (Updated)
|
title 故障检测系统 - 用例图
|
||||||
|
|
||||||
skinparam defaultFontName Microsoft YaHei
|
skinparam defaultFontName Microsoft YaHei
|
||||||
|
|
||||||
actor "访客" as Guest
|
actor 运维工程师 as Ops
|
||||||
actor "管理员" as Admin
|
actor 前端用户 as User
|
||||||
actor "运维人员/操作员" as Operator
|
actor 测试工程师 as QA
|
||||||
actor "观察员" as Observer
|
|
||||||
|
rectangle "故障检测系统" {
|
||||||
rectangle "故障检测与诊断系统" {
|
usecase "查看集群状态" as UC_Status
|
||||||
(注册与登录) as UC_Auth
|
usecase "查询日志" as UC_QueryLogs
|
||||||
(注册审批) as UC_Approve
|
usecase "发起故障诊断" as UC_Diagnose
|
||||||
(集群管理) as UC_Cluster
|
usecase "执行自动修复" as UC_Repair
|
||||||
(实时监控) as UC_Monitor
|
usecase "查看执行日志" as UC_ExecLogs
|
||||||
(日志检索) as UC_Logs
|
usecase "配置Flume收集" as UC_ConfigFlume
|
||||||
(AI 故障诊断) as UC_Diag
|
usecase "配置告警阈值" as UC_ConfigAlert
|
||||||
(自动/手动修复) as UC_Repair
|
usecase "导出故障与诊断报告" as UC_Export
|
||||||
(操作审计) as UC_Audit
|
usecase "生成FixCommand" as UC_FixCmd
|
||||||
(用户权限管理) as UC_UserMgmt
|
usecase "命令安全校验" as UC_SafeCheck
|
||||||
|
|
||||||
Guest --> UC_Auth
|
User --> UC_Status
|
||||||
|
User --> UC_QueryLogs
|
||||||
Admin --> UC_Approve
|
User --> UC_Diagnose
|
||||||
Admin --> UC_UserMgmt
|
User --> UC_Repair
|
||||||
Admin --> UC_Audit
|
User --> UC_ExecLogs
|
||||||
Admin --> UC_Cluster
|
|
||||||
|
Ops --> UC_ConfigFlume
|
||||||
Operator --> UC_Monitor
|
Ops --> UC_ConfigAlert
|
||||||
Operator --> UC_Logs
|
Ops --> UC_Repair
|
||||||
Operator --> UC_Diag
|
Ops --> UC_Status
|
||||||
Operator --> UC_Repair
|
|
||||||
Operator --> UC_Cluster
|
QA --> UC_QueryLogs
|
||||||
|
QA --> UC_Export
|
||||||
Observer --> UC_Monitor
|
|
||||||
Observer --> UC_Logs
|
UC_Diagnose --> UC_FixCmd : <<include>>
|
||||||
|
UC_Repair --> UC_SafeCheck : <<include>>
|
||||||
UC_Diag ..> (LLM 根因分析) : <<include>>
|
|
||||||
UC_Repair ..> (风险评估审批) : <<include>>
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@enduml
|
@enduml
|
||||||
|
Before Width: | Height: | Size: 54 KiB |
@ -1,112 +1,130 @@
|
|||||||
@startuml
|
@startuml
|
||||||
title 故障检测与自动修复 - 领域模型类图 (Updated from Code)
|
title 故障检测与自动修复 - 类图
|
||||||
|
skinparam backgroundColor #FFFFFF
|
||||||
|
skinparam defaultFontName Microsoft YaHei
|
||||||
skinparam classAttributeIconSize 0
|
skinparam classAttributeIconSize 0
|
||||||
|
|
||||||
package "Models (SQLAlchemy)" {
|
class FlumeAgent {
|
||||||
class User {
|
+config : Map
|
||||||
+id : int <<PK>>
|
+start()
|
||||||
+username : string
|
+stop()
|
||||||
+email : string
|
}
|
||||||
+password_hash : string
|
|
||||||
+full_name : string
|
class LogEvent {
|
||||||
+is_active : bool
|
+timestamp : datetime
|
||||||
+last_login : TIMESTAMP
|
+host : string
|
||||||
+created_at : TIMESTAMP
|
+source : string
|
||||||
}
|
+level : string
|
||||||
|
+message : string
|
||||||
class Cluster {
|
+raw : text
|
||||||
+id : int <<PK>>
|
}
|
||||||
+uuid : string <<Unique>>
|
|
||||||
+name : string
|
class FastAPIService {
|
||||||
+type : string
|
+ingestLog(e: LogEvent)
|
||||||
+node_count : int
|
+getClusterStatus()
|
||||||
+health_status : string
|
+queryLogs(filter)
|
||||||
+cpu_avg : float
|
+diagnose(logs)
|
||||||
+memory_avg : float
|
+executeRepair(cmd: FixCommand)
|
||||||
+namenode_ip : INET
|
}
|
||||||
+rm_ip : INET
|
|
||||||
+config_info : JSONB
|
class DiagnosisService {
|
||||||
+to_dict() : dict
|
+callLLM(logs) : FixCommand
|
||||||
}
|
+validateCommand(cmd: FixCommand) : bool
|
||||||
|
}
|
||||||
class Node {
|
|
||||||
+id : int <<PK>>
|
class LLMClient {
|
||||||
+uuid : string <<Unique>>
|
+apiKey : string
|
||||||
+cluster_id : int <<FK>>
|
+endpoint : string
|
||||||
+hostname : string
|
+invoke(prompt) : string
|
||||||
+ip_address : INET
|
}
|
||||||
+ssh_user : string
|
|
||||||
+ssh_password : string
|
class FixCommand {
|
||||||
+status : string
|
+fault_type : string
|
||||||
+cpu_usage : float
|
+reason : string
|
||||||
+memory_usage : float
|
+fix_script : string
|
||||||
+disk_usage : float
|
+risk_level : RiskLevel
|
||||||
+last_heartbeat : TIMESTAMP
|
}
|
||||||
}
|
|
||||||
|
enum RiskLevel {
|
||||||
class FaultRecord {
|
low
|
||||||
+id : int <<PK>>
|
medium
|
||||||
+fault_id : string <<Unique>>
|
high
|
||||||
+cluster_id : int <<FK>>
|
}
|
||||||
+fault_type : string
|
|
||||||
+fault_level : string
|
class RepairExecutor {
|
||||||
+title : string
|
+run(script) : ExecResult
|
||||||
+description : string
|
+precheck() : bool
|
||||||
+affected_nodes : JSONB
|
}
|
||||||
+affected_clusters : JSONB
|
|
||||||
+root_cause : string
|
class ExecResult {
|
||||||
+repair_suggestion : string
|
+stdout : text
|
||||||
+status : string
|
+stderr : text
|
||||||
+reporter : string
|
+exitCode : int
|
||||||
+to_dict() : dict
|
}
|
||||||
}
|
|
||||||
|
class FaultRecord {
|
||||||
class HadoopExecLog {
|
+id : int
|
||||||
+id : int <<PK>>
|
+fault_type : string
|
||||||
+from_user_id : int <<FK>>
|
+reason : string
|
||||||
+cluster_name : string
|
+timestamp : datetime
|
||||||
+description : text
|
+node : string
|
||||||
+start_time : TIMESTAMP
|
}
|
||||||
+end_time : TIMESTAMP
|
|
||||||
+to_dict() : dict
|
class ExecLog {
|
||||||
}
|
+id : int
|
||||||
|
+record_id : int
|
||||||
class ChatSession {
|
+stdout : text
|
||||||
+id : string <<PK>> (UUID)
|
+stderr : text
|
||||||
+user_id : int <<FK>>
|
+timestamp : datetime
|
||||||
+title : string
|
}
|
||||||
+created_at : DateTime
|
|
||||||
+messages : List<ChatMessage>
|
class MySQLClient {
|
||||||
}
|
+saveFault(record: FaultRecord)
|
||||||
|
+saveExecLog(log: ExecLog)
|
||||||
class ChatMessage {
|
+queryLogs(filter)
|
||||||
+id : int <<PK>>
|
}
|
||||||
+session_id : string <<FK>>
|
|
||||||
+role : string (system/user/assistant/tool)
|
class RedisCache {
|
||||||
+content : text
|
+set(key, value)
|
||||||
+created_at : DateTime
|
+publish(channel, msg)
|
||||||
}
|
+get(key)
|
||||||
}
|
}
|
||||||
|
|
||||||
package "Agents (Logic)" {
|
class ClusterStatus {
|
||||||
class DiagnosisAgent {
|
+nodesUp : int
|
||||||
+analyze(logs) : FixCommand
|
+nodesDown : int
|
||||||
}
|
+hdfsUsage : float
|
||||||
|
+yarnActiveApps : int
|
||||||
class PolicyAgent {
|
}
|
||||||
+evaluate(cmd) : RiskLevel
|
|
||||||
}
|
class FrontendWeb {
|
||||||
|
+viewStatus()
|
||||||
class RepairAgent {
|
+queryLogs()
|
||||||
+execute(cmd) : ExecResult
|
+requestDiagnosis()
|
||||||
}
|
+executeRepair()
|
||||||
}
|
}
|
||||||
|
|
||||||
User "1" -- "0..*" HadoopExecLog : executes
|
FlumeAgent --> FastAPIService : push(LogEvent)
|
||||||
User "1" -- "0..*" ChatSession : owns
|
FastAPIService --> DiagnosisService : diagnose(logs)
|
||||||
Cluster "1" -- "0..*" Node : contains
|
DiagnosisService --> LLMClient : call_llm_diagnose
|
||||||
Cluster "1" -- "0..*" FaultRecord : has
|
DiagnosisService --> FixCommand : returns
|
||||||
ChatSession "1" -- "0..*" ChatMessage : has_many
|
FastAPIService --> RepairExecutor : execute(FixCommand)
|
||||||
|
RepairExecutor --> ExecResult : returns
|
||||||
|
FastAPIService --> MySQLClient : save FaultRecord/ExecLog
|
||||||
|
FastAPIService --> RedisCache : cache/publish status
|
||||||
|
FrontendWeb --> FastAPIService : REST/WebSocket
|
||||||
|
FastAPIService --> ClusterStatus : compose
|
||||||
|
MySQLClient --> FaultRecord
|
||||||
|
MySQLClient --> ExecLog
|
||||||
|
FixCommand --> RiskLevel
|
||||||
|
|
||||||
|
note right of FixCommand
|
||||||
|
JSON 示例:
|
||||||
|
{
|
||||||
|
fault_type: "DataNode故障",
|
||||||
|
reason: "磁盘占满",
|
||||||
|
fix_script: "ssh dn 'clean_temp.sh'",
|
||||||
|
risk_level: "medium"
|
||||||
|
}
|
||||||
|
end note
|
||||||
@enduml
|
@enduml
|
||||||
|
Before Width: | Height: | Size: 24 KiB |
@ -1,35 +1,25 @@
|
|||||||
@startuml
|
@startuml
|
||||||
title 故障检测系统部署拓扑 (Updated)
|
title 部署拓扑
|
||||||
|
|
||||||
node "Hadoop Cluster Node" {
|
node "On-Prem / Cloud" {
|
||||||
component "Hadoop Components" as HC
|
node "Hadoop Cluster" {
|
||||||
component "Flume Agent" as Flume
|
[NameNode]
|
||||||
HC - [SSH]
|
[DataNodes...]
|
||||||
}
|
|
||||||
|
|
||||||
node "Management Server" {
|
|
||||||
package "Backend (Docker Container)" {
|
|
||||||
[FastAPI Service] as API
|
|
||||||
[Celery Workers] as Workers
|
|
||||||
}
|
}
|
||||||
|
|
||||||
package "Frontend (Docker Container)" {
|
node "Logging Layer" {
|
||||||
[Nginx / Vue App] as Web
|
[Flume Agents]
|
||||||
}
|
}
|
||||||
|
|
||||||
database "PostgreSQL" as DB
|
node "Application Layer" {
|
||||||
queue "Redis" as Redis
|
[FastAPI]
|
||||||
}
|
[LLM Connector]
|
||||||
|
[Nginx for Frontend]
|
||||||
|
}
|
||||||
|
|
||||||
cloud "AI Platform" {
|
node "Storage/Caching" {
|
||||||
[OpenAI API / LangChain] as LLM
|
[MySQL]
|
||||||
|
[Redis]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Web --> API : HTTP/WebSocket
|
|
||||||
API --> DB : Persistence
|
|
||||||
API --> Redis : Task Queue
|
|
||||||
API --> LLM : AI Analysis
|
|
||||||
API --> HC : SSH Execution
|
|
||||||
Flume --> API : Log Streaming
|
|
||||||
|
|
||||||
@enduml
|
@enduml
|
||||||
Binary file not shown.
@ -1,65 +0,0 @@
|
|||||||
# 基于 Hadoop 的故障检测与智能诊断项目 - 测试报告
|
|
||||||
|
|
||||||
## 修订记录
|
|
||||||
|
|
||||||
| 版本号 | 修订日期 | 修订内容 | 修订人 |
|
|
||||||
| :--- | :--- | :--- | :--- |
|
|
||||||
| v1.0.0 | 2026-01-10 | 初始测试报告框架创建 | AI Assistant |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 1. 测试概述
|
|
||||||
|
|
||||||
### 1.1 测试目的
|
|
||||||
验证系统在 Hadoop 集群管理、指标采集、日志检索及 AI 诊断功能上的正确性、稳定性和响应速度,确保满足《需求规格说明书》中的定义。
|
|
||||||
|
|
||||||
### 1.2 测试范围
|
|
||||||
- **功能测试**: 集群注册、SSH 校验、日志同步、AI SSE 流式对话。
|
|
||||||
- **性能测试**: 大规模日志检索响应时间、多并发指标采集压力。
|
|
||||||
|
|
||||||
## 2. 测试环境
|
|
||||||
|
|
||||||
| 类别 | 配置要求 |
|
|
||||||
| :--- | :--- |
|
|
||||||
| **硬件** | 8 vCPU, 16GB RAM (测试服务器) |
|
|
||||||
| **软件** | Docker, PostgreSQL 14, Python 3.10 |
|
|
||||||
| **集群环境** | Hadoop 3.1.3 (1 NameNode, 5 DataNodes) |
|
|
||||||
|
|
||||||
## 3. 测试用例
|
|
||||||
|
|
||||||
| 用例编号 | 功能模块 | 测试点 | 预期结果 | 状态 |
|
|
||||||
| :--- | :--- | :--- | :--- | :--- |
|
|
||||||
| TC-01 | 集群管理 | 输入合法的 SSH 信息注册集群 | 注册成功并自动发现所有节点 | [待测试] |
|
|
||||||
| TC-02 | 日志采集 | 模拟节点产生 ERROR 日志 | 数据库 5 秒内出现对应增量日志 | [待测试] |
|
|
||||||
| TC-03 | AI 诊断 | 询问集群负载情况 | AI 正确调用指标工具并给出分析建议 | [待测试] |
|
|
||||||
|
|
||||||
## 4. 测试结果
|
|
||||||
|
|
||||||
### 4.1 通过/失败统计
|
|
||||||
- **总用例数**: 0
|
|
||||||
- **通过数**: 0
|
|
||||||
- **失败数**: 0
|
|
||||||
- **跳过数**: 0
|
|
||||||
- **通过率**: 0%
|
|
||||||
|
|
||||||
## 5. 缺陷分析
|
|
||||||
|
|
||||||
### 5.1 严重等级分布
|
|
||||||
- **致命 (Blocker)**: 0
|
|
||||||
- **严重 (Critical)**: 0
|
|
||||||
- **一般 (Major)**: 0
|
|
||||||
- **次要 (Minor)**: 0
|
|
||||||
|
|
||||||
## 6. 测试结论
|
|
||||||
|
|
||||||
### 6.1 质量评估
|
|
||||||
[在此填写本次测试阶段的总体质量评价,例如:系统核心流程已打通,但 AI 诊断在极端日志量下存在响应延迟。]
|
|
||||||
|
|
||||||
## 7. 附录
|
|
||||||
|
|
||||||
### 7.1 测试日志
|
|
||||||
- 后端服务日志: `backend/logs/test_run.log`
|
|
||||||
- 采集器性能统计: `docs/metrics_report.csv`
|
|
||||||
|
|
||||||
### 7.2 截图
|
|
||||||
[占位符:插入关键功能运行截图]
|
|
||||||
Binary file not shown.
Loading…
Reference in new issue