From f3d572edafb50cfe09bf66e72259c6b5e3c53f0c Mon Sep 17 00:00:00 2001 From: Darvin Date: Sat, 25 Apr 2026 20:49:06 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E6=8F=90=E4=BA=A4=EF=BC=9A?= =?UTF-8?q?=E6=9A=97=E5=93=A8=E7=B3=BB=E7=BB=9F=E5=AE=8C=E6=95=B4=E4=BB=A3?= =?UTF-8?q?=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 10 + CLAUDE.md | 37 ++++ dark_sentinel_ws/docs/架构说明.md | 68 ++++++ dark_sentinel_ws/gateway_standalone.py | 204 ++++++++++++++++++ dark_sentinel_ws/launch_all.py | 50 +++++ dark_sentinel_ws/requirements.txt | 6 + dark_sentinel_ws/setup_env.sh | 13 ++ .../dark_brain/dark_brain/fusion_engine.py | 86 ++++++++ .../dark_brain/dark_brain/state_machine.py | 89 ++++++++ .../dark_gateway/dark_gateway/api_server.py | 198 +++++++++++++++++ .../src/dark_hal/dark_hal/go2_motion_ctrl.py | 58 +++++ .../dark_hal/dark_hal/go2_sensor_bridge.py | 108 ++++++++++ .../src/dark_interfaces/msg/EnemyProfile.msg | 12 ++ .../src/dark_interfaces/srv/EmergencyStop.srv | 4 + .../dark_perception/audio_doa_node.py | 38 ++++ .../dark_perception/spatial_mapping_node.py | 115 ++++++++++ .../dark_perception/thermal_analyzer_node.py | 58 +++++ .../dark_perception/yolo_detector_node.py | 81 +++++++ docs/前端说明+接口说明.md | 81 +++++++ docs/后端框架建议.md | 113 ++++++++++ docs/开发日志.md | 199 +++++++++++++++++ docs/规格说明书.md | 163 ++++++++++++++ explore/get_video.py | 40 ++++ explore/main.py | 48 +++++ explore/stream_server.py | 102 +++++++++ explore/test_d435i.py | 43 ++++ explore/test_d435i_v4l2.py | 33 +++ explore/test_vl_analysis.py | 119 ++++++++++ explore/test_yolo_depth.py | 57 +++++ explore/test_yolo_remote.py | 117 ++++++++++ explore/test_yolo_trt.py | 109 ++++++++++ explore/教程.md | 14 ++ 32 files changed, 2473 insertions(+) create mode 100644 .gitignore create mode 100644 CLAUDE.md create mode 100644 dark_sentinel_ws/docs/架构说明.md create mode 100644 dark_sentinel_ws/gateway_standalone.py create mode 100644 dark_sentinel_ws/launch_all.py create mode 100644 dark_sentinel_ws/requirements.txt create mode 100644 dark_sentinel_ws/setup_env.sh create mode 100644 dark_sentinel_ws/src/dark_brain/dark_brain/fusion_engine.py create mode 100644 dark_sentinel_ws/src/dark_brain/dark_brain/state_machine.py create mode 100644 dark_sentinel_ws/src/dark_gateway/dark_gateway/api_server.py create mode 100644 dark_sentinel_ws/src/dark_hal/dark_hal/go2_motion_ctrl.py create mode 100644 dark_sentinel_ws/src/dark_hal/dark_hal/go2_sensor_bridge.py create mode 100644 dark_sentinel_ws/src/dark_interfaces/msg/EnemyProfile.msg create mode 100644 dark_sentinel_ws/src/dark_interfaces/srv/EmergencyStop.srv create mode 100644 dark_sentinel_ws/src/dark_perception/dark_perception/audio_doa_node.py create mode 100644 dark_sentinel_ws/src/dark_perception/dark_perception/spatial_mapping_node.py create mode 100644 dark_sentinel_ws/src/dark_perception/dark_perception/thermal_analyzer_node.py create mode 100644 dark_sentinel_ws/src/dark_perception/dark_perception/yolo_detector_node.py create mode 100644 docs/前端说明+接口说明.md create mode 100644 docs/后端框架建议.md create mode 100644 docs/开发日志.md create mode 100644 docs/规格说明书.md create mode 100644 explore/get_video.py create mode 100644 explore/main.py create mode 100644 explore/stream_server.py create mode 100644 explore/test_d435i.py create mode 100644 explore/test_d435i_v4l2.py create mode 100644 explore/test_vl_analysis.py create mode 100644 explore/test_yolo_depth.py create mode 100644 explore/test_yolo_remote.py create mode 100644 explore/test_yolo_trt.py create mode 100644 explore/教程.md diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..51b7429 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +sdk/Miniconda3-latest-Linux-x86_64.sh +sdk/unitree_sdk2/ +sdk/unitree_sdk2py/ +sdk/*.pt +sdk/*.onnx +__pycache__/ +*.pyc +*.pyo +.DS_Store +explore/*.png diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..713574e --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,37 @@ +# 暗哨系统 — 开发约束 + +## 硬件环境 +- 机器狗型号:Unitree Go2 +- 有线网卡:`enx9c69d36ee634`,本机 IP:`192.168.123.222` +- 机器人 IP:`192.168.123.161` +- 拓展坞 IP:`192.168.123.18`(SSH 登录用户 `unitree`,密码 `123`;浏览器访问用户名 `unitree`,密码 `123`) +- 深度相机:Intel RealSense D435i(接在拓展坞,USB ID `8086:0b3a`) + - 彩色流:`/dev/video4`(1920x1080) + - 深度流:`/dev/video2`(1280x720,V4L2 只能给 uint8,精度有限) +- SDK:`/home/darvin/dog/sdk/unitree_sdk2py`(Python SDK) +- 虚拟环境:`conda activate dark_sentinel` + +## 目录约定 +- `docs/`:文档(规格说明书、框架建议、开发日志) +- `sdk/`:开发工具(unitree_sdk2, unitree_sdk2py, yolov8n.pt, yolov8n.onnx, Miniconda) +- `dark_sentinel_ws/`:后端主体代码 +- `explore/`:调试脚本(硬件相关先在这里调通再合并) + +## 开发流程 +- 涉及硬件的功能:先在 `explore/` 里调试通,再合并到 `dark_sentinel_ws/` + +## 开发约束 +- 核心语言:Python 或 C++ +- 视频流(Go2 前置相机):`ChannelFactoryInitialize` + `VideoClient.GetImageSample()` +- YOLO 检测:`model.predict(frame, classes=[0], conf=0.5)`,只检测人 +- 每次工作完成后更新 `docs/开发日志.md` + +## 拓展坞关键信息 +- 系统:Ubuntu 20.04,aarch64(Jetson 平台,tegra) +- Python:3.8.10 +- **拓展坞没有外网**,pip/apt 均无法联网 +- SSH 免密已配置:`ssh -i ~/.ssh/id_rsa_unitree unitree@192.168.123.18` +- sudo 密码:`123` +- 已安装关键包:numpy 1.17.4, opencv 4.2, scipy 1.3.3, TensorRT 8.5.2.2, ros-noetic-librealsense2 +- **pyrealsense2 未安装**,正在从源码编译中(见开发日志) + diff --git a/dark_sentinel_ws/docs/架构说明.md b/dark_sentinel_ws/docs/架构说明.md new file mode 100644 index 0000000..58168f4 --- /dev/null +++ b/dark_sentinel_ws/docs/架构说明.md @@ -0,0 +1,68 @@ +# 暗哨系统后端架构说明 + +## 整体架构 + +``` +dark_sentinel_ws/ +├── src/ +│ ├── dark_interfaces/ # 自定义消息/服务定义 +│ ├── dark_hal/ # 硬件抽象层 +│ ├── dark_perception/ # AI感知层 +│ ├── dark_brain/ # 决策层 +│ └── dark_gateway/ # Web网关层 +├── launch_all.py # 一键启动 +├── setup_env.sh # 环境配置 +└── requirements.txt +``` + +## 各层说明 + +### dark_hal — 硬件抽象层 +| 文件 | 职责 | +|------|------| +| `go2_sensor_bridge.py` | 通过 unitree_sdk2py 获取前置相机图像,发布 `/camera/color/image_raw`;订阅机器狗状态发布 `/go2/state` | +| `go2_motion_ctrl.py` | 订阅 `/cmd_vel` 转为 SDK 运动指令;提供 `/emergency_stop` 急停服务 | + +### dark_perception — AI感知层 +| 文件 | 职责 | +|------|------| +| `yolo_detector_node.py` | 订阅相机图像,YOLOv8 只检测人(classes=[0]),发布检测结果到 `/perception/detections` | +| `spatial_mapping_node.py` | 融合 2D 检测框与深度图,估算目标 3D 坐标和身高,发布到 `/perception/spatial_detections` | +| `thermal_analyzer_node.py` | 订阅红外图像,伪彩色转化,检测热源异常,发布告警到 `/perception/thermal_alert` | +| `audio_doa_node.py` | 声源定位,发布方位角到 `/perception/audio_doa`(待接入 DFRobot 阵列) | + +### dark_brain — 决策层 +| 文件 | 职责 | +|------|------| +| `fusion_engine.py` | 订阅视觉/热成像/声源结果,多模态自适应权重融合,生成 `EnemyProfile` 发布到 `/brain/enemy_profiles` | +| `state_machine.py` | 监听心跳,断网 >3s 触发离线模式;离线时将 EnemyProfile 写入本地 SQLite | + +### dark_gateway — Web网关层 +| 文件 | 职责 | +|------|------| +| `api_server.py` | FastAPI 服务;WebSocket `/ws/alerts` 推送告警/状态/声源数据;REST `/api/move` `/api/estop` 接收控制指令 | + +## 话题总览 + +``` +/camera/color/image_raw ← go2_sensor_bridge +/go2/state ← go2_sensor_bridge +/cmd_vel → go2_motion_ctrl +/perception/detections ← yolo_detector +/perception/image_annotated ← yolo_detector +/perception/spatial_detections← spatial_mapping +/perception/thermal_alert ← thermal_analyzer +/perception/audio_doa ← audio_doa +/brain/enemy_profiles ← fusion_engine +/brain/system_status ← state_machine +/gateway/heartbeat ← api_server +/gateway/estop ← api_server +``` + +## 启动方式 + +```bash +source setup_env.sh # 默认网卡 enx9c69d36ee634 +pip install -r requirements.txt +python3 launch_all.py +``` diff --git a/dark_sentinel_ws/gateway_standalone.py b/dark_sentinel_ws/gateway_standalone.py new file mode 100644 index 0000000..0a5adc1 --- /dev/null +++ b/dark_sentinel_ws/gateway_standalone.py @@ -0,0 +1,204 @@ +""" +暗哨系统 — 无 ROS2 独立网关 +运行: conda activate dark_sentinel && python3 gateway_standalone.py +接口: + GET /api/video_feed MJPEG 视频流(带 YOLO 框) + WS /ws/telemetry 10Hz JSON 推送 + POST /api/control/emergency_stop 急停(待接入 SDK) + POST /api/move 速度控制(待接入 SDK) +""" +import io, time, base64, threading, urllib.request, json +import cv2, numpy as np +from ultralytics import YOLO +from openai import OpenAI +from fastapi import FastAPI, WebSocket, WebSocketDisconnect +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import StreamingResponse +import uvicorn + +DOCK_COLOR_URL = 'http://192.168.123.18:8080/color' +DOCK_DEPTH_URL = 'http://192.168.123.18:8080/depth_raw' +MODEL_PATH = '/home/darvin/dog/sdk/yolov8n.pt' +CONF = 0.5 +FY = 388.86 +BBOX_SCALE = 0.614 +VL_INTERVAL = 5.0 + +vl_client = OpenAI( + api_key='sk-1fdd0430c63e4f2d99db031b16e28f39', + base_url='https://dashscope.aliyuncs.com/compatible-mode/v1', +) + +# ── 共享状态 ───────────────────────────────────────────── +_lock = threading.Lock() +_latest_frame = None # 标注后的 BGR 帧 +_latest_depth = None # uint16 深度,mm +_telemetry = { # 最新遥测数据 + 'target_detected': False, + 'targets': [], + 'system_status': 'ONLINE', +} +_vl_text = '' +_vl_last = 0.0 +_estop = False + + +# ── VL 分析 ────────────────────────────────────────────── +def _analyze_person(crop): + global _vl_text + _, buf = cv2.imencode('.jpg', crop, [cv2.IMWRITE_JPEG_QUALITY, 85]) + b64 = base64.b64encode(buf).decode() + try: + resp = vl_client.chat.completions.create( + model='qwen-vl-plus', + messages=[{'role': 'user', 'content': [ + {'type': 'image_url', 'image_url': {'url': f'data:image/jpeg;base64,{b64}'}}, + {'type': 'text', 'text': '分析图中人物:1)是否持有武器或枪支 2)姿态 3)行为意图。简洁回答。'} + ]}] + ) + text = resp.choices[0].message.content + except Exception as e: + text = f'VL错误:{e}' + with _lock: + _vl_text = text + print(f'[VL] {text}') + + +# ── 深度拉取线程 ────────────────────────────────────────── +def _depth_loop(): + global _latest_depth + while True: + try: + resp = urllib.request.urlopen(DOCK_DEPTH_URL, timeout=2) + depth = np.load(io.BytesIO(resp.read())) + with _lock: + _latest_depth = depth + except Exception: + pass + + +# ── 感知主线程 ──────────────────────────────────────────── +def _perception_loop(): + global _latest_frame, _vl_last + model = YOLO(MODEL_PATH) + cap = cv2.VideoCapture(DOCK_COLOR_URL) + print('[感知] 已启动') + + while True: + ret, frame = cap.read() + if not ret: + continue + + with _lock: + depth = _latest_depth.copy() if _latest_depth is not None else None + vl = _vl_text + + result = model.predict(frame, classes=[0], conf=CONF, verbose=False)[0] + annotated = result.plot() + targets = [] + now = time.time() + + for i, box in enumerate(result.boxes): + x1, y1, x2, y2 = map(int, box.xyxy[0].tolist()) + dist_m, height_m = -1.0, -1.0 + if depth is not None: + roi = depth[max(0,y1):y2, max(0,x1):x2] + valid = roi[roi > 0] + if len(valid) > 0: + dist_m = float(np.percentile(valid, 15)) / 1000.0 + height_m = (y2 - y1) * dist_m / FY * BBOX_SCALE + + armed = any(w in vl for w in ('枪', '武器', '刀', 'gun', 'weapon')) + posture = 'STANDING' if height_m > 1.4 else ('CROUCHING' if height_m > 0.8 else 'PRONE') + targets.append({ + 'id': i + 1, + 'confidence': round(float(box.conf), 3), + 'posture': posture, + 'weapon_detected': armed, + 'vl_analysis': vl, + 'distance_z_meters': round(dist_m, 2), + 'estimated_height_cm': round(height_m * 100), + 'relative_coords': {'x': 0.0, 'y': 0.0, 'z': round(dist_m, 2)}, + }) + + if i == 0 and dist_m > 0 and now - _vl_last > VL_INTERVAL: + crop = frame[max(0,y1):y2, max(0,x1):x2].copy() + threading.Thread(target=_analyze_person, args=(crop,), daemon=True).start() + _vl_last = now + + if targets and targets[0]['distance_z_meters'] > 0: + t0 = targets[0] + cv2.putText(annotated, f"{t0['distance_z_meters']:.2f}m {t0['estimated_height_cm']}cm", + (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) + + with _lock: + _latest_frame = annotated + _telemetry['target_detected'] = len(targets) > 0 + _telemetry['targets'] = targets + + +# ── FastAPI ────────────────────────────────────────────── +app = FastAPI() +app.add_middleware(CORSMiddleware, allow_origins=['*'], allow_methods=['*'], allow_headers=['*']) + + +def _mjpeg_gen(): + while True: + with _lock: + frame = _latest_frame.copy() if _latest_frame is not None else None + if frame is None: + time.sleep(0.05) + continue + _, buf = cv2.imencode('.jpg', frame, [cv2.IMWRITE_JPEG_QUALITY, 80]) + yield b'--frame\r\nContent-Type: image/jpeg\r\n\r\n' + buf.tobytes() + b'\r\n' + time.sleep(0.033) + + +@app.get('/api/video_feed') +def video_feed(): + return StreamingResponse(_mjpeg_gen(), media_type='multipart/x-mixed-replace; boundary=frame') + + +@app.post('/api/move') +async def move(vx: float = 0.0, vy: float = 0.0, vyaw: float = 0.0): + # TODO: 接入 unitree_sdk2py SportClient.Move(vx, vy, vyaw) + return {'ok': True} + + +@app.post('/api/control/emergency_stop') +async def emergency_stop(): + global _estop + with _lock: + _estop = True + # TODO: 接入 unitree_sdk2py SportClient.Damp() + return {'ok': True} + + +@app.websocket('/ws/telemetry') +async def ws_telemetry(ws: WebSocket): + import asyncio + await ws.accept() + try: + while True: + with _lock: + payload = { + 'timestamp': int(time.time()), + 'visual_depth_fusion': { + 'target_detected': _telemetry['target_detected'], + 'targets': _telemetry['targets'], + }, + 'system_status': {'mode': 'AUTONOMOUS', 'estop': _estop}, + } + await ws.send_text(json.dumps(payload)) + await asyncio.sleep(0.1) + except WebSocketDisconnect: + pass + + +# ── 启动 ───────────────────────────────────────────────── +if __name__ == '__main__': + threading.Thread(target=_depth_loop, daemon=True).start() + threading.Thread(target=_perception_loop, daemon=True).start() + print('暗哨网关启动: http://0.0.0.0:8000') + uvicorn.run(app, host='0.0.0.0', port=8000) + diff --git a/dark_sentinel_ws/launch_all.py b/dark_sentinel_ws/launch_all.py new file mode 100644 index 0000000..62eaa8f --- /dev/null +++ b/dark_sentinel_ws/launch_all.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 +""" +暗哨系统一键启动脚本 +用法: python3 launch_all.py <网卡名> 例如: python3 launch_all.py eth0 +""" +import sys +import subprocess +import os + +WORKSPACE = os.path.dirname(os.path.abspath(__file__)) +SRC = os.path.join(WORKSPACE, 'src') +IFACE = sys.argv[1] if len(sys.argv) > 1 else 'eth0' + +procs = [] + +def start(name, cmd): + env = os.environ.copy() + env['RMW_IMPLEMENTATION'] = 'rmw_cyclonedds_cpp' + p = subprocess.Popen(cmd, env=env) + procs.append((name, p)) + print(f'[+] Started {name} (pid={p.pid})') + +if __name__ == '__main__': + start('go2_sensor_bridge', + ['python3', f'{SRC}/dark_hal/dark_hal/go2_sensor_bridge.py', IFACE]) + start('go2_motion_ctrl', + ['python3', f'{SRC}/dark_hal/dark_hal/go2_motion_ctrl.py', IFACE]) + start('yolo_detector', + ['python3', f'{SRC}/dark_perception/dark_perception/yolo_detector_node.py']) + start('thermal_analyzer', + ['python3', f'{SRC}/dark_perception/dark_perception/thermal_analyzer_node.py']) + start('audio_doa', + ['python3', f'{SRC}/dark_perception/dark_perception/audio_doa_node.py']) + start('spatial_mapping', + ['python3', f'{SRC}/dark_perception/dark_perception/spatial_mapping_node.py']) + start('fusion_engine', + ['python3', f'{SRC}/dark_brain/dark_brain/fusion_engine.py']) + start('state_machine', + ['python3', f'{SRC}/dark_brain/dark_brain/state_machine.py']) + start('api_server', + ['python3', f'{SRC}/dark_gateway/dark_gateway/api_server.py']) + + print('\n暗哨系统已启动,按 Ctrl+C 停止所有节点\n') + try: + for name, p in procs: + p.wait() + except KeyboardInterrupt: + print('\n正在停止所有节点...') + for name, p in procs: + p.terminate() diff --git a/dark_sentinel_ws/requirements.txt b/dark_sentinel_ws/requirements.txt new file mode 100644 index 0000000..09e3da7 --- /dev/null +++ b/dark_sentinel_ws/requirements.txt @@ -0,0 +1,6 @@ +fastapi>=0.110.0 +uvicorn>=0.29.0 +websockets>=12.0 +opencv-python>=4.9.0 +numpy>=1.24.0 +ultralytics>=8.0.0 diff --git a/dark_sentinel_ws/setup_env.sh b/dark_sentinel_ws/setup_env.sh new file mode 100644 index 0000000..13a47ad --- /dev/null +++ b/dark_sentinel_ws/setup_env.sh @@ -0,0 +1,13 @@ +#!/bin/bash +# 暗哨系统环境配置脚本 +# 用法: conda activate dark_sentinel && source setup_env.sh [网卡名] + +IFACE=${1:-enx9c69d36ee634} + +export RMW_IMPLEMENTATION=rmw_cyclonedds_cpp +export CYCLONEDDS_URI="" + +# 将 unitree_sdk2py 加入 Python 路径 +export PYTHONPATH=/home/darvin/dog/unitree_sdk2py:$PYTHONPATH + +echo "环境已配置: 网卡=${IFACE}, RMW=cyclonedds" diff --git a/dark_sentinel_ws/src/dark_brain/dark_brain/fusion_engine.py b/dark_sentinel_ws/src/dark_brain/dark_brain/fusion_engine.py new file mode 100644 index 0000000..c3c364d --- /dev/null +++ b/dark_sentinel_ws/src/dark_brain/dark_brain/fusion_engine.py @@ -0,0 +1,86 @@ +""" +多模态威胁融合节点 (F-07) +订阅视觉、热成像、声源定位结果,融合生成 EnemyProfile +""" +import json +import time +import rclpy +from rclpy.node import Node +from std_msgs.msg import String + + +class ThreatFusionNode(Node): + def __init__(self): + super().__init__('threat_fusion') + self.latest_spatial = [] + self.latest_thermal = [] + self.latest_audio = {} + + self.create_subscription(String, '/perception/spatial_detections', self._on_spatial, 10) + self.create_subscription(String, '/perception/thermal_alert', self._on_thermal, 10) + self.create_subscription(String, '/perception/audio_doa', self._on_audio, 10) + self.pub = self.create_publisher(String, '/brain/enemy_profiles', 10) + self.create_timer(0.2, self._fuse) + + def _on_spatial(self, msg): + self.latest_spatial = json.loads(msg.data) + + def _on_thermal(self, msg): + data = json.loads(msg.data) + self.latest_thermal = data.get('alerts', []) + + def _on_audio(self, msg): + self.latest_audio = json.loads(msg.data) + + def _fuse(self): + profiles = [] + for det in self.latest_spatial: + # 自适应权重:视觉基础权重 0.5,热成像补充 0.3,声源 0.2 + conf = det.get('conf', 0.0) * 0.5 + if self.latest_thermal: + conf += 0.3 + audio_conf = self.latest_audio.get('confidence', 0.0) + conf += audio_conf * 0.2 + conf = min(conf, 1.0) + + posture = self._estimate_posture(det) + vl = det.get('vl_analysis', '') + armed = det.get('class') in ('knife', 'gun', 'weapon') or \ + any(w in vl for w in ('枪', '武器', '刀', 'gun', 'weapon', 'knife')) + profile = { + 'timestamp': time.time(), + 'x': det.get('x', 0.0), + 'y': det.get('y', 0.0), + 'z': det.get('z', 0.0), + 'height': det.get('height_est', 0.0), + 'posture': posture, + 'armed': armed, + 'vl_analysis': vl, + 'action': 'static', + 'confidence': round(conf, 3), + 'source': 'fused', + } + profiles.append(profile) + + if profiles: + out = String() + out.data = json.dumps(profiles) + self.pub.publish(out) + + def _estimate_posture(self, det): + h = det.get('height_est', 0.0) + if h > 1.4: + return 'standing' + elif h > 0.8: + return 'crouching' + return 'prone' + + +def main(): + rclpy.init() + rclpy.spin(ThreatFusionNode()) + rclpy.shutdown() + + +if __name__ == '__main__': + main() diff --git a/dark_sentinel_ws/src/dark_brain/dark_brain/state_machine.py b/dark_sentinel_ws/src/dark_brain/dark_brain/state_machine.py new file mode 100644 index 0000000..9920c15 --- /dev/null +++ b/dark_sentinel_ws/src/dark_brain/dark_brain/state_machine.py @@ -0,0 +1,89 @@ +""" +任务状态机节点 +- 监听心跳,断网超过3秒触发本地保护模式 +- 监听急停指令 +- 断网时将 EnemyProfile 写入 SQLite +""" +import json +import time +import sqlite3 +import rclpy +from rclpy.node import Node +from std_msgs.msg import String +from geometry_msgs.msg import Twist + +DB_PATH = '/home/darvin/dog/dark_sentinel_ws/local_records.db' + + +class StateMachineNode(Node): + def __init__(self): + super().__init__('mission_state') + self.last_heartbeat = time.time() + self.offline_mode = False + + self._init_db() + + self.create_subscription(String, '/gateway/heartbeat', self._on_heartbeat, 10) + self.create_subscription(String, '/gateway/estop', self._on_estop, 10) + self.create_subscription(String, '/brain/enemy_profiles', self._on_profiles, 10) + + self.cmd_pub = self.create_publisher(Twist, '/cmd_vel', 10) + self.status_pub = self.create_publisher(String, '/brain/system_status', 10) + + self.create_timer(1.0, self._check_heartbeat) + + def _init_db(self): + conn = sqlite3.connect(DB_PATH) + conn.execute('''CREATE TABLE IF NOT EXISTS enemy_profiles ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + timestamp REAL, x REAL, y REAL, z REAL, + height REAL, posture TEXT, armed INTEGER, + confidence REAL, raw_json TEXT + )''') + conn.commit() + conn.close() + + def _on_heartbeat(self, msg): + self.last_heartbeat = time.time() + if self.offline_mode: + self.offline_mode = False + self.get_logger().info('Network restored, back to online mode') + + def _on_estop(self, msg): + stop = Twist() # 全零速度 = 停止 + self.cmd_pub.publish(stop) + self.get_logger().warn('Emergency stop triggered!') + + def _on_profiles(self, msg): + if not self.offline_mode: + return + profiles = json.loads(msg.data) + conn = sqlite3.connect(DB_PATH) + for p in profiles: + conn.execute( + 'INSERT INTO enemy_profiles (timestamp,x,y,z,height,posture,armed,confidence,raw_json) VALUES (?,?,?,?,?,?,?,?,?)', + (p['timestamp'], p['x'], p['y'], p['z'], p['height'], + p['posture'], int(p['armed']), p['confidence'], json.dumps(p)) + ) + conn.commit() + conn.close() + + def _check_heartbeat(self): + elapsed = time.time() - self.last_heartbeat + if elapsed > 3.0 and not self.offline_mode: + self.offline_mode = True + self.get_logger().warn(f'Network lost ({elapsed:.1f}s), entering offline mode') + status = {'offline_mode': self.offline_mode, 'heartbeat_age': round(elapsed, 1)} + out = String() + out.data = json.dumps(status) + self.status_pub.publish(out) + + +def main(): + rclpy.init() + rclpy.spin(StateMachineNode()) + rclpy.shutdown() + + +if __name__ == '__main__': + main() diff --git a/dark_sentinel_ws/src/dark_gateway/dark_gateway/api_server.py b/dark_sentinel_ws/src/dark_gateway/dark_gateway/api_server.py new file mode 100644 index 0000000..add4952 --- /dev/null +++ b/dark_sentinel_ws/src/dark_gateway/dark_gateway/api_server.py @@ -0,0 +1,198 @@ +""" +FastAPI + ROS2 网关 +接口: + GET /api/video_feed MJPEG 视频流(带 YOLO 框) + WS /ws/telemetry JSON 数据推送(10Hz) + POST /api/control/emergency_stop 急停 + POST /api/move 速度控制 +""" +import json +import asyncio +import threading +import queue +import time +from contextlib import asynccontextmanager + +import cv2 +import numpy as np +import rclpy +from rclpy.node import Node +from std_msgs.msg import String +from sensor_msgs.msg import Image +from geometry_msgs.msg import Twist +from cv_bridge import CvBridge + +from fastapi import FastAPI, WebSocket, WebSocketDisconnect +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import StreamingResponse +import uvicorn + +_profile_queue: queue.Queue = queue.Queue(maxsize=50) +_status_queue: queue.Queue = queue.Queue(maxsize=20) +_audio_queue: queue.Queue = queue.Queue(maxsize=20) +_latest_frame = None # 最新标注帧(numpy BGR) +_frame_lock = threading.Lock() + + +class GatewayRosNode(Node): + def __init__(self): + super().__init__('dark_gateway') + self.bridge = CvBridge() + self.create_subscription(String, '/brain/enemy_profiles', self._on_profiles, 10) + self.create_subscription(String, '/brain/system_status', self._on_status, 10) + self.create_subscription(String, '/perception/audio_doa', self._on_audio, 10) + self.create_subscription(Image, '/perception/image_annotated', self._on_frame, 10) + self.cmd_pub = self.create_publisher(Twist, '/cmd_vel', 10) + self.heartbeat_pub = self.create_publisher(String, '/gateway/heartbeat', 10) + self.estop_pub = self.create_publisher(String, '/gateway/estop', 10) + + def _on_profiles(self, msg): + if not _profile_queue.full(): + _profile_queue.put_nowait(msg.data) + + def _on_status(self, msg): + if not _status_queue.full(): + _status_queue.put_nowait(msg.data) + + def _on_audio(self, msg): + if not _audio_queue.full(): + _audio_queue.put_nowait(msg.data) + + def _on_frame(self, msg): + global _latest_frame + frame = self.bridge.imgmsg_to_cv2(msg, desired_encoding='bgr8') + with _frame_lock: + _latest_frame = frame + + def publish_cmd_vel(self, vx, vy, vyaw): + t = Twist() + t.linear.x, t.linear.y, t.angular.z = vx, vy, vyaw + self.cmd_pub.publish(t) + + def publish_heartbeat(self): + msg = String(); msg.data = 'ping' + self.heartbeat_pub.publish(msg) + + def publish_estop(self): + msg = String(); msg.data = 'stop' + self.estop_pub.publish(msg) + + +_ros_node: GatewayRosNode = None + + +def _ros_spin(): + rclpy.init() + global _ros_node + _ros_node = GatewayRosNode() + rclpy.spin(_ros_node) + rclpy.shutdown() + + +@asynccontextmanager +async def lifespan(app: FastAPI): + t = threading.Thread(target=_ros_spin, daemon=True) + t.start() + yield + + +app = FastAPI(lifespan=lifespan) +app.add_middleware(CORSMiddleware, allow_origins=['*'], allow_methods=['*'], allow_headers=['*']) + + +# ── 视频流 ────────────────────────────────────────────── +def _mjpeg_generator(): + while True: + with _frame_lock: + frame = _latest_frame.copy() if _latest_frame is not None else None + if frame is None: + time.sleep(0.05) + continue + ret, buf = cv2.imencode('.jpg', frame, [cv2.IMWRITE_JPEG_QUALITY, 80]) + if not ret: + continue + yield (b'--frame\r\nContent-Type: image/jpeg\r\n\r\n' + buf.tobytes() + b'\r\n') + time.sleep(0.033) # ~30fps + + +@app.get('/api/video_feed') +def video_feed(): + return StreamingResponse(_mjpeg_generator(), + media_type='multipart/x-mixed-replace; boundary=frame') + + +# ── 控制接口 ──────────────────────────────────────────── +@app.post('/api/move') +async def move(vx: float = 0.0, vy: float = 0.0, vyaw: float = 0.0): + if _ros_node: + _ros_node.publish_cmd_vel(vx, vy, vyaw) + return {'ok': True} + + +@app.post('/api/control/emergency_stop') +async def emergency_stop(): + if _ros_node: + _ros_node.publish_estop() + return {'ok': True} + + +# ── WebSocket 数据推送 ────────────────────────────────── +@app.websocket('/ws/telemetry') +async def ws_telemetry(ws: WebSocket): + await ws.accept() + try: + while True: + # 处理前端心跳 + try: + data = await asyncio.wait_for(ws.receive_text(), timeout=0.05) + msg = json.loads(data) + if msg.get('cmd') == 'heartbeat' and _ros_node: + _ros_node.publish_heartbeat() + except asyncio.TimeoutError: + pass + + # 组装前端说明里的 JSON 格式 + payload = {'timestamp': int(time.time()), 'visual_depth_fusion': None, + 'audio_radar': None, 'system_status': None} + + if not _profile_queue.empty(): + profiles = json.loads(_profile_queue.get_nowait()) + targets = [] + for i, p in enumerate(profiles): + targets.append({ + 'id': i + 1, + 'type': 'PERSON', + 'confidence': p.get('confidence', 0), + 'posture': p.get('posture', 'unknown').upper(), + 'weapon_detected': p.get('armed', False), + 'vl_analysis': p.get('vl_analysis', ''), + 'distance_z_meters': p.get('z', 0), + 'estimated_height_cm': round(p.get('height', 0) * 100), + 'relative_coords': {'x': p.get('x', 0), 'y': p.get('y', 0), 'z': p.get('z', 0)}, + }) + payload['visual_depth_fusion'] = {'target_detected': len(targets) > 0, 'targets': targets} + + if not _audio_queue.empty(): + audio = json.loads(_audio_queue.get_nowait()) + payload['audio_radar'] = { + 'sound_detected': audio.get('confidence', 0) > 0.3, + 'direction_angle': audio.get('angle'), + 'intensity_db': audio.get('sound_level', 0), + 'threat_level': 'HIGH' if audio.get('confidence', 0) > 0.7 else 'LOW', + } + + if not _status_queue.empty(): + status = json.loads(_status_queue.get_nowait()) + payload['system_status'] = { + 'mode': 'OFFLINE' if status.get('offline_mode') else 'AUTONOMOUS', + 'network': 'LOST' if status.get('offline_mode') else 'GOOD', + } + + await ws.send_text(json.dumps(payload)) + await asyncio.sleep(0.1) # 10Hz + except WebSocketDisconnect: + pass + + +if __name__ == '__main__': + uvicorn.run(app, host='0.0.0.0', port=8000) diff --git a/dark_sentinel_ws/src/dark_hal/dark_hal/go2_motion_ctrl.py b/dark_sentinel_ws/src/dark_hal/dark_hal/go2_motion_ctrl.py new file mode 100644 index 0000000..ad0a8f7 --- /dev/null +++ b/dark_sentinel_ws/src/dark_hal/dark_hal/go2_motion_ctrl.py @@ -0,0 +1,58 @@ +""" +Go2 运动控制节点 +- 订阅 /cmd_vel 话题,转为 SDK 运动指令 +- 提供急停服务 /emergency_stop +""" +import sys +import rclpy +from rclpy.node import Node +from geometry_msgs.msg import Twist +from std_srvs.srv import SetBool + +from unitree_sdk2py.core.channel import ChannelFactoryInitialize +from unitree_sdk2py.go2.sport.sport_client import SportClient + + +class Go2MotionCtrl(Node): + def __init__(self): + super().__init__('go2_motion_ctrl') + self.sport = SportClient() + self.sport.SetTimeout(5.0) + self.sport.Init() + + self.create_subscription(Twist, '/cmd_vel', self._on_cmd_vel, 10) + self.create_service(SetBool, '/emergency_stop', self._on_estop) + self.get_logger().info('Go2MotionCtrl started') + + def _on_cmd_vel(self, msg: Twist): + vx = msg.linear.x + vy = msg.linear.y + vyaw = msg.angular.z + self.sport.Move(vx, vy, vyaw) + + def _on_estop(self, req, resp): + if req.data: + self.sport.Damp() + resp.success = True + resp.message = 'Emergency stop executed' + else: + self.sport.RecoveryStand() + resp.success = True + resp.message = 'Recovery stand executed' + return resp + + +def main(): + if len(sys.argv) > 1: + ChannelFactoryInitialize(0, sys.argv[1]) + else: + ChannelFactoryInitialize(0) + + rclpy.init() + node = Go2MotionCtrl() + rclpy.spin(node) + rclpy.shutdown() + + +if __name__ == '__main__': + main() diff --git a/dark_sentinel_ws/src/dark_hal/dark_hal/go2_sensor_bridge.py b/dark_sentinel_ws/src/dark_hal/dark_hal/go2_sensor_bridge.py new file mode 100644 index 0000000..6d72097 --- /dev/null +++ b/dark_sentinel_ws/src/dark_hal/dark_hal/go2_sensor_bridge.py @@ -0,0 +1,108 @@ +""" +Go2 传感器桥接节点 +- 获取前置相机图像,发布为 /camera/color/image_raw +- 获取机器狗状态(位置、姿态、电量),发布为 /go2/state +""" +import sys +import threading +import numpy as np +import cv2 + +import rclpy +from rclpy.node import Node +from sensor_msgs.msg import Image +from std_msgs.msg import String +from cv_bridge import CvBridge + +import io +import urllib.request + +from unitree_sdk2py.core.channel import ChannelFactoryInitialize, ChannelSubscriber +from unitree_sdk2py.go2.video.video_client import VideoClient +from unitree_sdk2py.idl.default import unitree_go_msg_dds__SportModeState_ +from unitree_sdk2py.idl.unitree_go.msg.dds_ import SportModeState_ + +DOCK_COLOR_URL = 'http://192.168.123.18:8080/color' +DOCK_DEPTH_URL = 'http://192.168.123.18:8080/depth_raw' + + +class Go2SensorBridge(Node): + def __init__(self): + super().__init__('go2_sensor_bridge') + + self.bridge = CvBridge() + self.img_pub = self.create_publisher(Image, '/camera/color/image_raw', 10) + self.depth_pub = self.create_publisher(Image, '/camera/depth/image_raw', 10) + self.state_pub = self.create_publisher(String, '/go2/state', 10) + + # 拓展坞彩色流(MJPEG) + self.color_cap = cv2.VideoCapture(DOCK_COLOR_URL) + + # 后台线程拉取深度原始帧 + self._latest_depth = None + self._depth_lock = threading.Lock() + threading.Thread(target=self._depth_fetch_loop, daemon=True).start() + + # 订阅机器狗运动状态 + self.state_sub = ChannelSubscriber("rt/sportmodestate", SportModeState_) + self.state_sub.Init(self._on_sport_state, 10) + + self.create_timer(0.033, self._capture_frame) + self.get_logger().info('Go2SensorBridge started (RealSense via dock)') + + def _depth_fetch_loop(self): + while True: + try: + resp = urllib.request.urlopen(DOCK_DEPTH_URL, timeout=2) + depth = np.load(io.BytesIO(resp.read())) + with self._depth_lock: + self._latest_depth = depth + except Exception: + pass + + def _capture_frame(self): + ret, frame = self.color_cap.read() + if not ret: + return + stamp = self.get_clock().now().to_msg() + + msg = self.bridge.cv2_to_imgmsg(frame, encoding='bgr8') + msg.header.stamp = stamp + msg.header.frame_id = 'realsense_color' + self.img_pub.publish(msg) + + with self._depth_lock: + depth = self._latest_depth.copy() if self._latest_depth is not None else None + if depth is not None: + dmsg = self.bridge.cv2_to_imgmsg(depth, encoding='16UC1') + dmsg.header.stamp = stamp + dmsg.header.frame_id = 'realsense_depth' + self.depth_pub.publish(dmsg) + + def _on_sport_state(self, msg: SportModeState_): + import json + state = { + 'position': [msg.position[0], msg.position[1], msg.position[2]], + 'velocity': [msg.velocity[0], msg.velocity[1], msg.velocity[2]], + 'yaw_speed': msg.yaw_speed, + 'battery_voltage': msg.battery_voltage, + } + out = String() + out.data = json.dumps(state) + self.state_pub.publish(out) + + +def main(): + if len(sys.argv) > 1: + ChannelFactoryInitialize(0, sys.argv[1]) + else: + ChannelFactoryInitialize(0) + + rclpy.init() + node = Go2SensorBridge() + rclpy.spin(node) + rclpy.shutdown() + + +if __name__ == '__main__': + main() diff --git a/dark_sentinel_ws/src/dark_interfaces/msg/EnemyProfile.msg b/dark_sentinel_ws/src/dark_interfaces/msg/EnemyProfile.msg new file mode 100644 index 0000000..bedf37d --- /dev/null +++ b/dark_sentinel_ws/src/dark_interfaces/msg/EnemyProfile.msg @@ -0,0 +1,12 @@ +# 敌方综合特征消息 +std_msgs/Header header +float32 x +float32 y +float32 z +float32 height +string posture # standing/crouching/prone +bool armed +string weapon_type +string action # static/moving/running +float32 confidence +string source # visual/thermal/audio/fused diff --git a/dark_sentinel_ws/src/dark_interfaces/srv/EmergencyStop.srv b/dark_sentinel_ws/src/dark_interfaces/srv/EmergencyStop.srv new file mode 100644 index 0000000..235df1b --- /dev/null +++ b/dark_sentinel_ws/src/dark_interfaces/srv/EmergencyStop.srv @@ -0,0 +1,4 @@ +bool stop +--- +bool success +string message diff --git a/dark_sentinel_ws/src/dark_perception/dark_perception/audio_doa_node.py b/dark_sentinel_ws/src/dark_perception/dark_perception/audio_doa_node.py new file mode 100644 index 0000000..8b1ca11 --- /dev/null +++ b/dark_sentinel_ws/src/dark_perception/dark_perception/audio_doa_node.py @@ -0,0 +1,38 @@ +""" +声源定位节点 (F-03) +从麦克风阵列获取音频,计算声源方位角,发布到 /perception/audio_doa +""" +import json +import numpy as np +import rclpy +from rclpy.node import Node +from std_msgs.msg import String + + +class AudioDoaNode(Node): + def __init__(self): + super().__init__('audio_doa') + self.pub = self.create_publisher(String, '/perception/audio_doa', 10) + self.declare_parameter('sample_rate', 16000) + self.declare_parameter('num_mics', 7) + # 定时模拟发布(实际接入 DFRobot 阵列时替换此处) + self.create_timer(0.1, self._process) + self.get_logger().info('AudioDoaNode started (stub mode)') + + def _process(self): + # TODO: 接入 DFRobot 6+1 MEMS 阵列实际音频流 + # 此处为占位实现,返回空结果 + result = {'angle': None, 'confidence': 0.0, 'sound_level': 0.0} + msg = String() + msg.data = json.dumps(result) + self.pub.publish(msg) + + +def main(): + rclpy.init() + rclpy.spin(AudioDoaNode()) + rclpy.shutdown() + + +if __name__ == '__main__': + main() diff --git a/dark_sentinel_ws/src/dark_perception/dark_perception/spatial_mapping_node.py b/dark_sentinel_ws/src/dark_perception/dark_perception/spatial_mapping_node.py new file mode 100644 index 0000000..beb61cc --- /dev/null +++ b/dark_sentinel_ws/src/dark_perception/dark_perception/spatial_mapping_node.py @@ -0,0 +1,115 @@ +""" +空间映射节点 (F-07) +融合 YOLO 2D 检测框与 RealSense 深度数据,估算目标 3D 坐标和身高 +同时触发云端 VL 分析(qwen-vl-plus),结果附加到 spatial_detections +""" +import json +import time +import base64 +import threading +import numpy as np +import cv2 +import rclpy +from rclpy.node import Node +from sensor_msgs.msg import Image +from std_msgs.msg import String +from cv_bridge import CvBridge +from openai import OpenAI + +VL_CLIENT = OpenAI( + api_key='sk-1fdd0430c63e4f2d99db031b16e28f39', + base_url='https://dashscope.aliyuncs.com/compatible-mode/v1', +) +VL_INTERVAL = 5.0 +FX = FY = 388.86 +CX, CY = 321.21, 238.69 +BBOX_SCALE = 0.614 + + +class SpatialMappingNode(Node): + def __init__(self): + super().__init__('spatial_mapping') + self.bridge = CvBridge() + self.latest_depth = None + self.latest_color = None + self._vl_text = '' + self._vl_lock = threading.Lock() + self._vl_last = 0.0 + + self.create_subscription(Image, '/camera/depth/image_raw', self._on_depth, 10) + self.create_subscription(Image, '/camera/color/image_raw', self._on_color, 10) + self.create_subscription(String, '/perception/detections', self._on_detections, 10) + self.pub = self.create_publisher(String, '/perception/spatial_detections', 10) + + def _on_depth(self, msg: Image): + self.latest_depth = self.bridge.imgmsg_to_cv2(msg, desired_encoding='passthrough') + + def _on_color(self, msg: Image): + self.latest_color = self.bridge.imgmsg_to_cv2(msg, desired_encoding='bgr8') + + def _on_detections(self, msg: String): + detections = json.loads(msg.data) + if not detections or self.latest_depth is None: + return + + depth = self.latest_depth + results = [] + for i, det in enumerate(detections): + x1, y1, x2, y2 = det['bbox'] + rx1, ry1 = max(0, int(x1)), max(0, int(y1)) + rx2, ry2 = min(depth.shape[1]-1, int(x2)), min(depth.shape[0]-1, int(y2)) + roi = depth[ry1:ry2, rx1:rx2] + valid = roi[roi > 0] + if len(valid) == 0: + continue + z = float(np.percentile(valid, 15)) / 1000.0 + x3d = ((x1+x2)/2 - CX) * z / FX + y3d = ((y1+y2)/2 - CY) * z / FY + height_est = (y2 - y1) * z / FY * BBOX_SCALE + det.update({'x': round(x3d,3), 'y': round(y3d,3), 'z': round(z,3), + 'height_est': round(height_est,3)}) + + # VL 分析结果附加到第一个目标 + with self._vl_lock: + det['vl_analysis'] = self._vl_text + + # 每 VL_INTERVAL 秒对第一个目标触发一次分析 + if i == 0 and self.latest_color is not None and time.time() - self._vl_last > VL_INTERVAL: + crop = self.latest_color[ry1:ry2, rx1:rx2].copy() + threading.Thread(target=self._analyze, args=(crop,), daemon=True).start() + self._vl_last = time.time() + + results.append(det) + + if results: + out = String() + out.data = json.dumps(results) + self.pub.publish(out) + + def _analyze(self, crop): + _, buf = cv2.imencode('.jpg', crop, [cv2.IMWRITE_JPEG_QUALITY, 85]) + b64 = base64.b64encode(buf).decode() + try: + resp = VL_CLIENT.chat.completions.create( + model='qwen-vl-plus', + messages=[{'role':'user','content':[ + {'type':'image_url','image_url':{'url':f'data:image/jpeg;base64,{b64}'}}, + {'type':'text','text':'分析图中人物:1)是否持有武器或枪支 2)姿态 3)行为意图。简洁回答。'} + ]}] + ) + text = resp.choices[0].message.content + except Exception as e: + text = f'VL错误:{e}' + with self._vl_lock: + self._vl_text = text + self.get_logger().info(f'[VL] {text}') + + +def main(): + rclpy.init() + rclpy.spin(SpatialMappingNode()) + rclpy.shutdown() + + +if __name__ == '__main__': + main() diff --git a/dark_sentinel_ws/src/dark_perception/dark_perception/thermal_analyzer_node.py b/dark_sentinel_ws/src/dark_perception/dark_perception/thermal_analyzer_node.py new file mode 100644 index 0000000..4d9ac39 --- /dev/null +++ b/dark_sentinel_ws/src/dark_perception/dark_perception/thermal_analyzer_node.py @@ -0,0 +1,58 @@ +""" +热成像分析节点 (F-01) +订阅红外相机话题,检测热源异常,发布告警 +""" +import json +import cv2 +import numpy as np +import rclpy +from rclpy.node import Node +from sensor_msgs.msg import Image +from std_msgs.msg import String +from cv_bridge import CvBridge + + +class ThermalAnalyzerNode(Node): + def __init__(self): + super().__init__('thermal_analyzer') + self.bridge = CvBridge() + self.declare_parameter('temp_threshold', 37.0) + self.threshold = self.get_parameter('temp_threshold').value + + self.create_subscription(Image, '/thermal/image_raw', self._on_thermal, 10) + self.alert_pub = self.create_publisher(String, '/perception/thermal_alert', 10) + self.vis_pub = self.create_publisher(Image, '/thermal/image_colormap', 10) + self.get_logger().info('ThermalAnalyzerNode started') + + def _on_thermal(self, msg: Image): + frame = self.bridge.imgmsg_to_cv2(msg, desired_encoding='mono8') + # 伪彩色转化 + colormap = cv2.applyColorMap(frame, cv2.COLORMAP_INFERNO) + vis_msg = self.bridge.cv2_to_imgmsg(colormap, encoding='bgr8') + vis_msg.header = msg.header + self.vis_pub.publish(vis_msg) + + # 检测高温区域(像素值 > threshold_pixel) + threshold_pixel = int(self.threshold / 100.0 * 255) + hot_mask = frame > threshold_pixel + if hot_mask.any(): + contours, _ = cv2.findContours( + hot_mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + alerts = [] + for c in contours: + x, y, w, h = cv2.boundingRect(c) + alerts.append({'bbox': [x, y, x+w, y+h], 'area': int(cv2.contourArea(c))}) + if alerts: + out = String() + out.data = json.dumps({'alerts': alerts}) + self.alert_pub.publish(out) + + +def main(): + rclpy.init() + rclpy.spin(ThermalAnalyzerNode()) + rclpy.shutdown() + + +if __name__ == '__main__': + main() diff --git a/dark_sentinel_ws/src/dark_perception/dark_perception/yolo_detector_node.py b/dark_sentinel_ws/src/dark_perception/dark_perception/yolo_detector_node.py new file mode 100644 index 0000000..474478f --- /dev/null +++ b/dark_sentinel_ws/src/dark_perception/dark_perception/yolo_detector_node.py @@ -0,0 +1,81 @@ +""" +YOLO 视觉检测节点 (F-02, F-07) +订阅 /camera/color/image_raw,发布带边框的图像和检测结果 +""" +import json +import rclpy +from rclpy.node import Node +from sensor_msgs.msg import Image +from std_msgs.msg import String +from cv_bridge import CvBridge +import cv2 +import numpy as np + +try: + from ultralytics import YOLO + YOLO_AVAILABLE = True +except ImportError: + YOLO_AVAILABLE = False + + +class YoloDetectorNode(Node): + def __init__(self): + super().__init__('yolo_detector') + self.bridge = CvBridge() + + self.declare_parameter('model_path', '/home/darvin/dog/yolov8n.pt') + self.declare_parameter('conf_threshold', 0.5) + + model_path = self.get_parameter('model_path').value + self.conf = self.get_parameter('conf_threshold').value + + if YOLO_AVAILABLE: + self.model = YOLO(model_path) + self.get_logger().info(f'YOLO model loaded: {model_path}') + else: + self.model = None + self.get_logger().warn('ultralytics not installed, detection disabled') + + self.create_subscription(Image, '/camera/color/image_raw', self._on_image, 10) + self.det_pub = self.create_publisher(String, '/perception/detections', 10) + self.img_pub = self.create_publisher(Image, '/perception/image_annotated', 10) + + def _on_image(self, msg: Image): + frame = self.bridge.imgmsg_to_cv2(msg, desired_encoding='bgr8') + detections = [] + + if self.model is not None: + # classes=[0] 只检测人,与 explore/main.py 一致 + result = self.model.predict(frame, classes=[0], conf=self.conf, verbose=False)[0] + for box in result.boxes: + x1, y1, x2, y2 = box.xyxy[0].tolist() + detections.append({ + 'class': 'person', + 'conf': float(box.conf), + 'bbox': [x1, y1, x2, y2], + 'cx': (x1 + x2) / 2, + 'cy': (y1 + y2) / 2, + }) + # 用 ultralytics 自带标注(含人数统计) + frame = result.plot() + count = len(result.boxes) + cv2.putText(frame, f"Persons: {count}", (10, 30), + cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2) + + out = String() + out.data = json.dumps(detections) + self.det_pub.publish(out) + + ann_msg = self.bridge.cv2_to_imgmsg(frame, encoding='bgr8') + ann_msg.header = msg.header + self.img_pub.publish(ann_msg) + + +def main(): + rclpy.init() + rclpy.spin(YoloDetectorNode()) + rclpy.shutdown() + + +if __name__ == '__main__': + main() diff --git a/docs/前端说明+接口说明.md b/docs/前端说明+接口说明.md new file mode 100644 index 0000000..076905e --- /dev/null +++ b/docs/前端说明+接口说明.md @@ -0,0 +1,81 @@ +# 前端说明 + 接口说明 + +## 后端能提供的内容 + +| 类型 | 接口 | 说明 | +|------|------|------| +| 视频流 | `GET /api/video_feed` | MJPEG 流,已叠加 YOLO 检测框和人数 | +| 实时数据 | `WS /ws/telemetry` | 10Hz JSON 推送,含深度识别结果 + 声源方位 | +| 急停 | `POST /api/control/emergency_stop` | 触发机器狗阻尼停止 | +| 速度控制 | `POST /api/move?vx=&vy=&vyaw=` | 控制移动方向 | + +--- + +## WebSocket 数据格式(`/ws/telemetry`) + +```json +{ + "timestamp": 1713928192, + "system_status": { + "mode": "AUTONOMOUS", + "network": "GOOD" + }, + "visual_depth_fusion": { + "target_detected": true, + "targets": [ + { + "id": 1, + "type": "PERSON", + "confidence": 0.92, + "posture": "STANDING", + "weapon_detected": false, + "distance_z_meters": 4.5, + "estimated_height_cm": 178, + "relative_coords": {"x": 1.2, "y": 0.0, "z": 4.5} + } + ] + }, + "audio_radar": { + "sound_detected": true, + "direction_angle": 45.0, + "intensity_db": 65, + "threat_level": "HIGH" + } +} +``` + +字段说明: +- `visual_depth_fusion`:来自 YOLO + 深度图融合,`null` 表示当前无目标 +- `audio_radar`:来自麦克风阵列声源定位,`direction_angle` 为相对机器狗正前方的偏航角(度),`null` 表示无声音检测 +- `system_status`:`mode` 为 `OFFLINE` 时表示断网保护模式已触发 + +--- + +## 视频流接入 + +```html + +``` + +视频帧已由后端用 OpenCV 叠加: +- YOLO 检测框 +- 人数统计文字 `Persons: N` + +--- + +## 明天演示的两个核心功能 + +### 1. 深度相机识别 +- 视频流中实时显示 YOLO 检测框 +- WebSocket `visual_depth_fusion.targets` 提供距离、身高、姿态 + +### 2. 声源定位 +- WebSocket `audio_radar.direction_angle` 提供声源方位角 +- 前端用极坐标雷达图(ECharts)在对应角度显示红点 + +--- + +## 注意事项 +- 后端 IP 为机器狗所在局域网地址,端口 `8000` +- 前端每秒发送一次心跳:`{"cmd": "heartbeat"}`,超过 3 秒未收到心跳后端进入离线模式 +- `audio_radar` 目前声源定位模块待接入 DFRobot 阵列,演示时可用模拟数据 diff --git a/docs/后端框架建议.md b/docs/后端框架建议.md new file mode 100644 index 0000000..0da799c --- /dev/null +++ b/docs/后端框架建议.md @@ -0,0 +1,113 @@ +--- + +### 一、 核心技术栈重构 +* **操作系统**:Ubuntu 22.04 LTS +* **核心框架**:ROS 2 Humble (用于传感器数据流转、TF坐标转换) + Unitree SDK 2 (底层控制与内置传感器获取) +* **中间件通信**:CycloneDDS (宇树 SDK 2 强依赖此 DDS 确保高频通信不丢包) +* **AI 推理引擎**:TensorRT + ONNX (YOLOv8/v10 端侧加速) +* **后端网关**:FastAPI + ROS 2 `rclpy` 桥接 + WebRTC + +--- + +### 二、 更新后的系统架构设计 (ROS 2 Node 体系) + +我们将后端系统设计为一个标准的 ROS 2 工作空间(Workspace),划分为 4 个主要 Package: + +#### 1. 硬件抽象与桥接层 (`dark_hal_pkg`) +*既然用了 Go2 官方 SDK,此层的核心是“数据引出与指令下发”。* +* **`go2_bridge_node` (宇树核心桥接节点)**: + * 通过 `unitree_sdk2` 订阅狗的原始状态(`/sportmodestate` 里的位置、电量、姿态)。 + * 将狗的前置相机画面发布为标准 ROS 2 图像话题:`/camera/color/image_raw`。 + * 将内置 4D LiDAR 或深度感知模块输出的点云/深度数据发布为:`/camera/depth/pointcloud` 或 `/camera/depth/image_raw`。 + * 提供服务 (Service):接收来自上层的运动控制指令(速度、偏航、一键急停)。 +* **`audio_capture_node`**:接入 DFRobot 阵列,发布格式化音频流话题。 +* **`thermal_camera_node`**:驱动红外热成像模组,发布伪彩图。 + +#### 2. AI 感知与解析层 (`dark_perception_pkg`) +*独立节点并行计算,防止某一个算法卡顿拖慢全系统。* +* **`yolo_detector_node` (视觉主节点)**: + * 订阅前置相机 `/camera/color/image_raw`。 + * 加载 TensorRT 加速的 YOLO 模型,进行行人、武器、姿态检测。 + * 发布自定义消息 `BBox3DArray`(暂无 Z 轴深度)。 +* **`depth_estimator_node` (多模态空间映射,实现 F-07)**: + * **核心逻辑**:使用 ROS 2 的 `message_filters` 同步 YOLO 边框与 Go2 自带的雷达点云/深度图。 + * 提取 2D 边框内的深度点云,计算中位数距离,结合相机内参矩阵与 TF 树,计算出目标相对于机器狗的 `(x, y, z)` 三维坐标与预估身高。 +* **`thermal_analyzer_node` & `audio_doa_node`**:独立处理红外高亮检测与麦克风波束成形测向。 + +#### 3. 决策控制与状态机 (`dark_brain_pkg`) +*本层负责信息汇总和状态管理。* +* **`threat_fusion_node` (敌方特征综合节点)**: + * 订阅上述所有的感知话题,执行**多模态自适应权重融合**逻辑。 + * 生成完整的 `EnemyProfile`(包含 3D 坐标、身高、姿态、武器、置信度等)。 +* **`mission_state_node` (任务状态机)**: + * 监听网络心跳,如果网络断开,触发**断网保护逻辑**(停止向云端发视频,自主避障并本地持久化记录 `EnemyProfile` 到 SQLite 数据库)。 + * 监听最高优先级的急停 topic,直接向 `go2_bridge_node` 下发阻尼指令。 + +#### 4. Web 网关与通信层 (`dark_gateway_pkg`) +*由于操作终端是移动端/前端 UI,必须有一个协议转换层。* +* **`fastapi_ros2_bridge`**: + * 这是一个结合了 `rclpy` 和 FastAPI 的异步程序。 + * **HTTP REST**:用于配置参数下发(如更改深度相机的识别阈值、下发战术路径点)。 + * **WebSockets**:用于向操作员前端高频推送 JSON 格式的 `EnemyProfile` 告警、雷达图数据,并接收前端虚拟摇杆的控制指令(转化为 ROS `Twist` 消息下发)。 +* **`webrtc_streamer_node`**: + * 订阅带有 YOLO 锚框的图像话题与红外伪彩话题,将其编码并以 WebRTC 协议推流,确保局域网/无人机中继下 **视频延迟 < 0.5s**。 + +--- + +### 三、 推荐的 ROS 2 工作空间目录结构 + +基于 `colcon` 构建的目录如下: + +```text +dark_sentinel_ws/ +├── src/ +│ ├── dark_interfaces/ # 存放自定义的消息(msg)和服务(srv) +│ │ ├── msg/EnemyProfile.msg +│ │ └── srv/EmergencyStop.srv +│ ├── dark_hal/ # 硬件抽象层包 +│ │ ├── package.xml +│ │ └── dark_hal/ +│ │ ├── go2_sensor_bridge.py # 调用 SDKpy 获取图像/点云发布ROS话题 +│ │ └── go2_motion_ctrl.py # 订阅ROS控制话题转为 SDK 运动指令 +│ ├── dark_perception/ # 感知层包 +│ │ ├── yolo_trt_node.py # RGB 视觉检测 +│ │ ├── spatial_mapping.py # 将2D框映射到Go2深度获取XYZ +│ │ └── audio_doa_node.py # 声源定位 +│ ├── dark_brain/ # 决策层包 +│ │ ├── fusion_engine.py # 权重融合计算 +│ │ └── state_machine.py # 状态机与断网处理 +│ └── dark_gateway/ # 网关推流包 +│ ├── api_server.py # FastAPI 结合 ROS2 Spin +│ └── local_database.py # SQLite 本地存储 +├── unitree_sdk2_python/ # 您已经装好的 SDKpy 目录 (可作为依赖) +└── unitree_ros2/ # 宇树官方提供的底层ROS2包(可直接对接网卡) +``` + +--- + +### 四、 开发落地的关键“避坑”指南 + +结合 Go2 开发的实际痛点,给您的团队以下具体建议: + +#### 1. 网络与 CycloneDDS 的配置 (极其致命!) +宇树 Go2 的内部通信强烈依赖以太网(192.168.123.x网段)和 CycloneDDS。你在宿主机 Ubuntu 22.04 运行 ROS 2 节点时,**必须配置正确的 DDS 环境变量**,否则你的 ROS 2 节点将无法接收到 Go2 的深度图和视频流。 +*建议:在 `.bashrc` 中配置:* +```bash +export RMW_IMPLEMENTATION=rmw_cyclonedds_cpp +export CYCLONEDDS_URI='...' +``` +*(注:网络接口名 `eth0` 需替换为您连接狗的网卡名)* + +#### 2. 自带深度相机/LiDAR 的空间标定 +去掉奥比中光后,RGB 图像来自狗头前置相机,而深度点云来自 Go2 下巴位置的 4D LiDAR (或内置深度模块)。这两者的物理中心不在同一点。 +*建议:* 必须在 ROS 2 中建立一条完整的 **TF 树 (Transform Tree)**。用 `tf2_ros` 广播从 `base_link` -> `front_camera_link` 以及 `base_link` -> `lidar_link` 的相对坐标。在 `spatial_mapping.py` 中,计算 3D 坐标前,务必先用 `tf2` 将目标点云转换到统一坐标系下,否则身高和坐标估算会有严重偏差。 + +#### 3. FastAPI 与 ROS 2 的结合方式 +ROS 2 的 `rclpy.spin()` 是阻塞的,FastAPI 的 `uvicorn.run()` 也是阻塞的。 +*架构技巧:* 强烈建议使用异步机制。在 FastAPI 的生命周期 `@app.on_event("startup")` 中,启动一个后台线程跑 ROS 2 的 Spin;或者使用 `asyncio` 与 `rclpy` 结合的方案(如 `ros2-web-bridge` 的思路),通过队列(`queue.Queue`)在 ROS 2 节点回调和 FastAPI WebSocket 路由之间传递告警数据。 + +#### 4. “断网自主运行”如何测试? +在 `dark_brain/state_machine.py` 中,设定一个变量 `last_heartbeat_time`。操作员终端每隔 1 秒通过 WebSocket 发送 `{ "cmd": "heartbeat" }`。一旦后端发现当前时间减去心跳时间 > 3 秒,立即触发回调: +1. 暂停调用 `go2_motion_ctrl` 的常规前进指令。 +2. 启动断网巡航算法或原地驻留。 +3. 劫持 `yolo_detector_node` 的输出,不再推给 WebSocket,而是触发 SQLite 写入操作(保存时间戳、坐标、威胁等级)。 \ No newline at end of file diff --git a/docs/开发日志.md b/docs/开发日志.md new file mode 100644 index 0000000..1fd9ae0 --- /dev/null +++ b/docs/开发日志.md @@ -0,0 +1,199 @@ +# 暗哨系统开发日志 + +## 2026-04-25 + +### 完成 +- 搭建后端框架 `dark_sentinel_ws/`,4层架构:HAL层、感知层、决策层、网关层 +- 整合 explore 实验代码:视频流获取方式、YOLO只检测人(classes=[0])、实际网卡名 +- 整理项目目录:`docs/`、`sdk/`、`dark_sentinel_ws/`、`explore/` +- 更新前端接口说明(MJPEG流、WebSocket /ws/telemetry、急停接口) +- 确认 D435i 在拓展坞上可用(video4=彩色,video2=深度) +- 实现拓展坞推流服务(`explore/stream_server.py`),本机可接收 MJPEG 流 +- 本机 YOLO + 深度融合调试脚本(`explore/test_yolo_remote.py`)已验证能检测到人 + +### 当前架构(深度融合方案) +``` +拓展坞 stream_server.py + → /color (MJPEG, video4) + → /depth (MJPEG伪彩色, video2) + ↓ +本机 test_yolo_remote.py + → YOLO 检测人(classes=[0]) + → 深度值估算距离 + ↓ +前端 WebSocket /ws/telemetry +``` + +### 已踩的坑(重要!下次避免) + +#### 拓展坞环境 +- **拓展坞完全没有外网**,pip/apt 均无法联网,所有包需从本机下载传过去 +- SSH 免密已配置:`ssh -i ~/.ssh/id_rsa_unitree unitree@192.168.123.18` +- sudo 密码:`123` +- Python 3.8,aarch64(Jetson tegra),PyPI 上没有 aarch64 的 scipy/onnxruntime wheel +- TensorRT 8.5.2.2 已装,但 pycuda 未装,TRT 推理会报 CUDA illegal memory access +- V4L2 读取深度流只能得到 uint8(0-255),不是真实 16bit 深度值(mm) + +#### pyrealsense2 编译进度 +- librealsense 2.50.0 源码已在拓展坞 `/tmp/librealsense-2.50.0/` +- pybind11 2.6.2 已手动放到: + - `/tmp/librealsense-2.50.0/build/third-party/pybind11/` + - `/tmp/librealsense-2.50.0/wrappers/python/include/pybind11/` +- cmake 已成功配置(`-- pybind11 v2.6.2`,`-- Configuring done`) +- **编译未完成**(狗没电中断),下次继续: + ```bash + ssh -i ~/.ssh/id_rsa_unitree unitree@192.168.123.18 + cd /tmp/librealsense-2.50.0/build + nohup make -j4 pyrealsense2 > /tmp/build_rs.log 2>&1 & + # 监控进度: + tail -f /tmp/build_rs.log + # 完成后安装: + sudo cp wrappers/python/pyrealsense2*.so /usr/lib/python3/dist-packages/ + python3 -c "import pyrealsense2; print(pyrealsense2.__version__)" + ``` + +#### 深度精度问题 +- V4L2 深度流是 uint8,精度差(距离误差大) +- 要高精度必须用 pyrealsense2 获取 uint16 原始深度(单位 mm) +- pyrealsense2 装好后,用 `explore/test_d435i.py` 测试 + +### 当前文件结构 +``` +dog/ +├── CLAUDE.md # 开发约束(每次对话自动加载) +├── docs/ +│ ├── 规格说明书.md +│ ├── 后端框架建议.md +│ ├── 前端说明+接口说明.md +│ └── 开发日志.md +├── sdk/ +│ ├── unitree_sdk2/ +│ ├── unitree_sdk2py/ +│ ├── yolov8n.pt +│ ├── yolov8n.onnx # 已导出,可用于 TRT 转换 +│ └── Miniconda3-latest-Linux-x86_64.sh +├── explore/ +│ ├── main.py # 早期 YOLO+Go2 相机测试(已整合) +│ ├── get_video.py # 早期视频流测试 +│ ├── test_d435i.py # D435i pyrealsense2 测试(需装好后用) +│ ├── test_d435i_v4l2.py # D435i V4L2 测试(uint8,精度差) +│ ├── test_yolo_trt.py # TRT 推理测试(需 pycuda,暂不可用) +│ ├── test_yolo_remote.py # 本机 YOLO + 拓展坞深度流(当前可用) +│ └── stream_server.py # 拓展坞推流服务(在拓展坞上运行) +└── dark_sentinel_ws/ + ├── docs/架构说明.md + ├── setup_env.sh + ├── launch_all.py + ├── requirements.txt + └── src/ + ├── dark_interfaces/ + ├── dark_hal/ + ├── dark_perception/ + ├── dark_brain/ + └── dark_gateway/ + +``` + +### 待完成 +- [x] **pyrealsense2 编译安装**(已完成 2026-04-25) +- [x] 用 pyrealsense2 重写深度获取,获取 uint16 真实深度(mm) +- [ ] 修正身高估算公式(需相机内参标定) +- [ ] 声源定位接入 DFRobot 阵列 +- [ ] 前端 UI 开发 + +--- + +## 2026-04-25(续) + +### 完成 +- pyrealsense2 2.50.0 编译安装成功(aarch64,Python 3.8) + - /tmp 被清理,重新传源码包编译 + - pybind11 2.6.2 手动放置到 third-party 和 wrappers/python/include + - 链接步骤 make 未实际执行,手动运行 link.txt 命令完成链接 + - 安装到 /usr/lib/python3/dist-packages/ +- D435i 设备检测成功(`rs.context().query_devices()` 返回 1 台设备) +- stream_server.py 升级到 v3: + - 用 pyrealsense2 替换 V4L2,获取真实 uint16 深度(精度 1mm) + - 新增 `/depth_raw` 接口,返回 numpy npy 格式原始深度帧 + - 验证:中心点 4.60m,范围 1.05m~6.21m,dtype=uint16 +- test_yolo_remote.py 升级到 v2: + - 用 `/depth_raw` 替换伪彩色深度反推 + - 后台线程持续拉取深度帧,主线程 YOLO 推理不阻塞 + - 距离精度从 ±1m 级别提升到 mm 级别 + +### 已踩的坑(新增) +- pyrealsense2 编译:make 的 Linking 日志行出现不代表链接成功,需检查 .so 文件大小 +- /tmp 重启后会被清理,编译产物不要放 /tmp,下次考虑放 /home/unitree/ + +### 待完成 +- [x] 修正身高估算公式(fx=fy=388.86,用针孔模型 height=pixels*dist/fy) +- [x] stream_server.py 部署到 /home/unitree/,systemd 开机自启(stream_server.service) +- [x] 端到端测试:距离准确,身高标定系数 BBOX_SCALE=0.614(1.7m/2.77m) +- [x] 云端视觉分析(qwen-vl-plus):检测到人时每5秒分析枪支/姿态/意图,已验证可用 +- [x] 感知结果接入后端:gateway_standalone.py(无 ROS2,FastAPI) +- [ ] 声源定位接入 DFRobot 阵列 +- [ ] 前端 UI 开发 + +--- + +## 2026-04-25(续三) + +### 完成 +- 无 ROS2 独立网关 `dark_sentinel_ws/gateway_standalone.py`: + - 后台线程:拉彩色流(MJPEG)+ 深度帧(/depth_raw npy)+ YOLO 推理 + VL 分析 + - FastAPI 对外接口(端口 8000): + - `GET /api/video_feed` — MJPEG 视频流(带 YOLO 框) + - `WS /ws/telemetry` — 10Hz JSON 推送 + - `POST /api/move` — 速度控制(待接入 SDK) + - `POST /api/control/emergency_stop` — 急停(待接入 SDK) + - 启动命令:`/home/darvin/miniconda3/envs/dark_sentinel/bin/python3 gateway_standalone.py` + +### telemetry JSON 字段说明 +```json +{ + "timestamp": 1777119601, + "visual_depth_fusion": { + "target_detected": true, + "targets": [{ + "id": 1, + "confidence": 0.74, + "posture": "CROUCHING", // STANDING(>1.4m) / CROUCHING(>0.8m) / PRONE + "weapon_detected": false, // VL文本含枪/武器/刀/gun/weapon关键词 + "vl_analysis": "未见武器,蹲下姿态...", + "distance_z_meters": 1.56, // 深度第15百分位/1000 + "estimated_height_cm": 62, // bbox高×dist/FY×0.614 + "relative_coords": {"x": 0.0, "y": 0.0, "z": 1.56} + }] + }, + "system_status": {"mode": "AUTONOMOUS", "estop": false} +} +``` + +### 待完成 +- [ ] 声源定位接入 DFRobot 阵列 +- [ ] 前端 UI 开发 +- [ ] /api/move 和 /api/control/emergency_stop 接入 unitree_sdk2py + +--- + +## 2026-04-25(续二) + +### 完成 +- stream_server.service 开机自启修复:librealsense2.so.2.50 软链接到 /opt/ros/noetic/lib/aarch64-linux-gnu/ +- 云端视觉分析集成(explore/test_vl_analysis.py): + - 使用阿里云 qwen-vl-plus 模型 + - YOLO 检测到人时,每5秒裁剪人像以 base64 发送给 VL API + - 分析内容:武器/枪支、姿态、行为意图 + - 结果显示在画面底部,同时打印到终端 + - 已验证:能识别手持物体、站立姿态、行为意图 + +### 关键参数 +- D435i 内参:fx=fy=388.86,cx=321.21,cy=238.69(640x480) +- 深度取第15百分位(过滤背景) +- 身高标定系数:BBOX_SCALE=0.614(基于1.7m真人标定) +- VL API:dashscope.aliyuncs.com,模型 qwen-vl-plus,间隔5秒 + +### 待完成 +- [ ] 声源定位接入 DFRobot 阵列 +- [ ] 前端 UI 开发 +- [ ] 将感知结果接入 dark_sentinel_ws 后端框架 diff --git a/docs/规格说明书.md b/docs/规格说明书.md new file mode 100644 index 0000000..fc41161 --- /dev/null +++ b/docs/规格说明书.md @@ -0,0 +1,163 @@ +### 1. 软件项目概述 +本软件项目正式名称为“暗哨”无人机器狗深度巡检软件系统,在本文档及后续研发过程中简称为“暗哨系统”。该系统的用户单位涵盖了电力巡检部门、边防守备部队、特定战术侦察单位以及大型工业园区的安保管理方。开发工作由软件工程专业项目研发组承担。 + +在功能定位上,本系统依托于无人机器狗硬件平台,通过集成深度视觉、红外热成像与麦克风阵列,实现对隐蔽目标的智能识别与声源方位的精准探测。其主要用途在于替代人工进入高危或视距受限的复杂环境,如废弃建筑、巷战区域或全黑厂房,执行高精度的环境侦察、动态预警与安全巡检任务。 + +--- + +### 2. 定义 +为了确保文档理解的准确性,此处对涉及的关键术语进行统一界定。 +* **RGB-D(Red-Green-Blue-Depth)**:指在彩色图像基础上融合深度信息的成像技术,用于描述物体的空间几何特征。 +* **多模态自适应权重融合**:一种信息处理技术,根据环境变化动态调整视觉、红外、深度数据的置信度权重,以提升目标识别准确率。 +* **ROS(Robot Operating System)**:一种用于机器人软件开发的灵活框架,本系统选用 Noetic 版本进行传感器数据管理。 +* **SLAM(Simultaneous Localization and Mapping)**:同步定位与建图,指机器狗在运动过程中构建环境地图并实时估算自身位姿的技术。 +* **伪彩色转化**:将非可见光数据(如深度灰度值或红外辐射强度)映射到彩色空间,提升人眼对环境特征的分辨能力。 +* **声纹识别**:通过算法提取并匹配特定物理特征(如脚步声、摩擦声)的声音指纹技术。 +* **敌方综合特征向量**:指从多模态传感器数据中联合提取的一组描述可疑目标属性的结构化信息,包含但不限于:空间三维位置(x, y, z)、身高估计、姿态类别(站立/蹲伏/卧倒)、武器持有状态(是否持械及武器类型)以及动作意图(静止/移动/奔跑)。 + +--- + +### 3. 软件产品与其环境之间的关系 +“暗哨”深度巡检软件系统是一个集成硬件感知、边缘计算与移动交互的协同系统。该软件位于整个巡检体系的核心层,向上承接操作员的控制指令与视觉反馈需求,向下通过机器人操作系统驱动并管理机器狗端的各类传感器。系统通过无线数传链路或无人机中继节点,实现机器狗端(数据采集与初加工)、移动终端(实时监控与报警显示)以及云端(深度学习模型更新与历史轨迹存储)之间的高频数据交换。 + +在外部硬件环境方面,软件需与奥比中光深度相机、红外成像模组、麦克风阵列等物理设备建立稳定的驱动连接,确保多模态原始数据的同步采集。在外部用户层面,软件为现场操作员、技术运维人员以及战术指挥员提供差异化的交互界面。 + +**表 1 “暗哨”软件系统与外部环境的主要交互对象及其关系** + +| 外部环境分类 | 交互对象名称 | 交互关系描述 | +| :--- | :--- | :--- | +| 硬件系统 | 无人机器狗/传感器组 | 软件采集其视觉、声学、位姿数据,并下发运动与探测指令。 | +| 辅助设备 | 战术无人机 | 作为通信中继,确保软件数据在复杂遮挡环境下的远距离传输。 | +| 外部软件 | ROS | 软件运行的基础框架,负责传感器消息的发布、订阅与节点管理。 | +| 外部用户 | 操作员(移动端) | 通过触控界面接收预警信息、查看监控画面并下达巡检任务。 | +| 计算平台 | 边缘端 | 软件核心算法运行的物理载体,执行实时的目标检测与数据融合。 | + +--- + +### 4. 限制与约束 +在功能与性能的实现过程中,本软件受到多种客观条件的严格约束。 + +首先是硬件计算资源的限制,由于机器狗搭载的边缘计算模块在算力与内存带宽上远不及高性能服务器,软件在运行 YOLOv8/v10 等深度学习算法时,必须进行大量的模型量化与推理优化工作,以确保在实时处理多路视觉信息的同时,不会因过度消耗系统资源而导致程序崩溃。同时,开发周期被限制在八周之内,这要求软件架构必须采用高度模块化的设计,以便于各功能模块并行开发与快速集成。 + +环境因素同样构成了显著的设计约束。软件必须能够在全黑、烟雾或强干扰等极端环境下保持高可靠性,这对视觉融合算法的健壮性提出了挑战。在通信性能方面,由于巡检任务对实时性要求极高,系统必须将端到端的数据传输延迟控制在 0.5 秒以内,任何明显的画面滞后都可能导致任务失败。 + +此外,软件的设计还需符合安全性约束,包含一键急停与脱网自主运行机制,确保在无线连接中断的情况下,机器狗仍能依靠本地逻辑完成基础的避障与目标锁定任务。 + +--- + +### 5. 假设与前提条件 +本软件的顺利开发与部署基于多项假设前提。首先假设无人机器狗硬件平台的底层驱动 SDK 是开放且稳定的,能够支持通过 ROS 接口进行高频次的数据读取与控制指令下发。其次,假设在系统运行过程中,作为通信中继的无人机能够提供覆盖巡检区域的无线信号保障,确保图像传输的延迟维持在 0.5 秒以内。 + +在数据与算法层面,假设现有开源数据集(如 NYU Depth 等)以及团队录制的实地声纹数据足以支撑初期模型的训练与优化。此外,前提条件还包括操作员在执行巡检任务前,已对机器狗的各类传感器进行了标准化标定。如果硬件平台发生结构性变更或底层控制协议大幅修改,本软件的相关功能模块(如 SLAM定位、声源指引等)可能需要进行针对性的重构或重新适配。 + +--- + +### 6. 软件功能概述 +“暗哨”深度巡检软件系统的核心功能旨在通过多传感器协同,提升在复杂及高危环境下的环境感知与任务执行效率。根据系统的业务逻辑,功能主要分为感知识别、任务控制与态势呈现三大模块。 + +**表 2 “暗哨”软件系统各功能的标识、描述及应用场景** + +| 功能标识 | 功能名称 | 功能描述 | 优先级 | 使用场景 | +| :--- | :--- | :--- | :--- | :--- | +| F-01 | 热成像监控 | 实时采集并分析红外热源数据,自动检测环境异常温升并触发告警。 | P1 | 废弃建筑搜索、全黑环境下的人员识别。 | +| F-02 | 深度视觉识别 | 利用 RGB-D 图像提取目标几何特征,实现对伪装目标的智能识别。 | P1 | 针对穿着迷彩服或隔热服的人员识别场景。 | +| F-03 | 声源定位探测 | 通过麦克风阵列捕捉微弱声响,计算声源方位并标注异常声纹信息。 | P1 | 探测视觉死角处的脚步声或摩擦声。 | +| F-04 | 战术地图生成 | 协同无人机拍摄多张图像,经算法拼接后生成实时全局战术地图。 | P2 | 巡检区域态势预览与整体环境建模。 | +| F-05 | 巡检任务管理 | 规划机器狗与无人机的联动路径,控制设备协同启动或停止。 | P2 | 自动化巡检作业的部署与管控。 | +| F-06 | 信号中转 | 无人机作为中继节点,转发机器狗数据及操作员控制指令。 | P3 | 巷战或密闭空间内的无线通信。 | +| F-07 | 敌方综合特征提取 | 融合深度点云、红外轮廓与视觉特征,提取目标空间位置、估计身高、识别姿态、检测武器持有状态及动作意图,并以结构化数据形式推送至操作终端。 | P1 | 战术决策支持,为火力分配或突入行动提供精确情报。 | + +--- + +### 7. 软件需求用例模型 + +#### 7.1 用例图(Mermaid 描述) +```mermaid +usecaseDiagram + actor "操作员" as Operator + actor "无人机" as Drone + actor "机器狗" as RobotDog + + package "“暗哨”软件系统" { + usecase "启动巡检任务" as UC1 + usecase "接受异常告警" as UC2 + usecase "查看巡检画面" as UC3 + usecase "查看实时地图与轨迹" as UC4 + usecase "查看敌方综合特征" as UC5 + usecase "信号中转" as UC6 + usecase "红外热成像识别" as UC7 + usecase "深度相机识别" as UC8 + usecase "麦克风阵列识别" as UC9 + usecase "拍摄战术地图" as UC10 + } + + Operator --> UC1 + Operator --> UC2 + Operator --> UC3 + Operator --> UC4 + Operator --> UC5 + + UC6 <.. UC7 : <> + UC6 <.. UC8 : <> + UC6 <.. UC9 : <> + + Drone --> UC6 + Drone --> UC10 + + RobotDog --> UC7 + RobotDog --> UC8 + RobotDog --> UC9 + + UC3 ..> UC7 : <> + UC3 ..> UC8 : <> + UC3 ..> UC9 : <> + UC4 ..> UC10 : <> +``` + +#### 7.2 模块简要描述 +1. **基础管理与交互类用例模块**:包含“启动巡检任务”与“接受异常告警”。负责设备自检、路径下发、联动指令触发,以及在识别到异常(红外温升、伪装人员、异常声纹)时通过弹窗、震动、音效强制提醒操作员。 +2. **多模态感知识别类用例模块**:包含“红外热成像识别”、“深度相机识别”、“麦克风阵列识别”、“查看巡检画面”及“查看敌方综合特征”。实现环境多维度特征提取,支持自动化分析,仅在目标置信度达标或人工请求时推送画面。其中“查看敌方综合特征”是聚合型用例,调用底层引擎输出目标坐标、身高、姿态及武器检测结果。 +3. **态势感知与通信中继类用例模块**:包含“查看实时地图与轨迹”、“拍摄战术地图”及“信号中转”。侧重空间态势呈现与链路维护。利用无人机高空视角补充全局信息,并通过无人机中继规避信号衰减与盲区。 + +--- + +### 8. 软件需求分析模型 +本小组通过交互图、分析类图及状态图,对“暗哨”无人机器狗深度巡检软件系统内部各组件的协作逻辑进行详细解构。 + +**(1)核心用例交互分析** +在红外热成像监控用例中,操作员通过“MonitoringUI”边界类启动指令,由“TaskController”控制器初始化硬件实体并驱动“DetectionEngine”进行持续计算。当检测引擎判定数据异常时,会跨越层级向界面推送告警信息。深度识别逻辑侧重于“ObjectDetector”对几何特征的提取,音频识别则依赖“AudioProcessor”执行降噪与声源定位算法。在“查看敌方综合特征”用例中,操作员点击目标标记,由“TaskController”转发至“ThreatAnalyzer”,调用“DepthProcessor”获取点云估算身高姿态,调用“ObjectDetector”识别武器,最终封装为“EnemyProfile”实体对象返回 UI。 + +**(2)软件分析类图** +系统的整体结构由边界类、控制类与实体类共同构成: +* **边界类**:主要负责与操作员及外部硬件进行交互,包括各类监控界面(MonitoringUI, RecognitionUI, AudioUI)与通信接口(信号中转)。 +* **控制类**:作为系统的“大脑”,承担任务调度、算法推理及逻辑决策职能(TaskController, DetectionEngine, ThreatAnalyzer, ObjectDetector, AudioProcessor)。 +* **实体类**:代表系统中需要持久化存储的数据或底层硬件状态(ThermalSensor, DepthCamera, MicrophoneArray, EnemyProfile, 告警数据库, 轨迹记录)。 + +--- + +### 9. 性能要求 +* **识别精度要求**:深度相机采集的灰度数据经伪彩色转化后,人形目标识别率不低于 80%;RGB、红外、深度三路传感器经自适应权重融合后,目标综合识别准确率不低于 90%,能有效识别穿隔热服、迷彩服的伪装目标。 +* **声纹识别能力要求**:机器狗行进状态下,经自噪声消除算法处理后,能清晰识别 3 米范围内的人体脚步声、摩擦声等典型声纹;机器狗静止侦听状态下,人体声纹有效识别距离不低于 5 米。 +* **通信传输性能要求**:机器狗与无人机、无人机与操作员终端之间的无线数据传输延迟不超过 0.5 秒;视频和识别结果传输无明显卡顿。 +* **系统稳定性要求**:软件系统支持连续无故障作业时间不低于 2 小时;在断网状态下,机器狗可独立保持不低于 1 小时的识别、预警、数据本地存储能力。 +* **定位精度要求**:机器狗自身定位精度不超过 1 米;战术地图上的目标标点、行进轨迹标注与实际位置的偏差不超过 1.5 米,定位漂移超过 3 米时,系统可实时触发异常提示。 + +--- + +### 10. 设计约束 +* **开发工具与语言约束**:核心开发语言限定为 Python、C++;深度学习框架选用 PyTorch,目标检测基于 YOLOv8/v10 开源框架开发;ROS 机器人系统选用 Noetic 版本。 +* **运行环境约束**:机器狗运行环境限定为 Linux 系统(兼容 Ubuntu18.04 及以上),需适配树莓派 4B 或 Jetson 入门级边缘计算模块;操作员终端需兼容 Android 10.0 及以上移动操作系统;云端可采用普通云服务器。 +* **硬件适配约束**:软件需适配深度相机奥比中光 Astra Pro,麦克风阵列 DFRobot 6+1 MEMS 麦克风阵列等,红外热成像模组需适配 ROS 系统;软件不得依赖定制化硬件,需兼容市面主流的机器狗与无人机。 +* **安全性与可靠性约束**:软件需内置故障安全机制,包含一键急停、定位异常提示、传感器故障提醒等功能;机器狗需具备本地操作能力,断网状态下可通过硬件指令实现核心功能控制。 +* **开发与迭代约束**:所有传感器数据需进行离线录制与反复测试;软件采用模块化设计,各功能模块耦合性低。 + +--- + +### 11. 软件原型 +本软件系统的交互界面原型设计遵循战术优先与信息直观化的原则,界面布局分为左侧数据监测栏与右侧核心监控窗口。 + +* **界面右侧**:占据屏幕约四分之三区域,为视觉处理核心显示区。支持在“视觉监控”、“红外处理”、“深度处理”及“综合监控”四种模式间无缝切换。在“红外处理”模式下,利用热成像伪彩色技术将温度差异转化为高对比度图像,并对异常目标边缘进行高亮强化。 +* **界面左侧**:专注于多维数据定量分析与非视觉感知。 + * **左上部分**:声源方位雷达图,利用波束成形技术将异常波动以红色动点标注在极坐标雷达中。 + * **左下部分**:综合仪表盘,实时显示环境噪声等级、目标距离、识别置信度。 + * **仪表盘底部**:通过自适应权重算法自动生成“综合威胁”评估指标(如“HIGH”状态),协助快速决策。 \ No newline at end of file diff --git a/explore/get_video.py b/explore/get_video.py new file mode 100644 index 0000000..be4234a --- /dev/null +++ b/explore/get_video.py @@ -0,0 +1,40 @@ +import cv2 +import numpy as np +import time +import sys +from unitree_sdk2py.core.channel import ChannelFactory +from unitree_sdk2py.go2.video.video_client import VideoClient + +def main(): + iface = "enx9c69d36ee634" + + try: + ChannelFactory().Init(0, iface) + except Exception as e: + print(f"无法初始化 ChannelFactory: {e}") + return + + client = VideoClient() + client.Init() + + print(f"正在通过 {iface} 连接 Go2 视频流... 按 'q' 退出") + + while True: + code, data = client.GetImageSample() + + if code == 0 and data: + arr = np.frombuffer(bytes(data), dtype=np.uint8) + frame = cv2.imdecode(arr, cv2.IMREAD_COLOR) + if frame is not None: + cv2.imshow("Go2 Video", frame) + else: + print(f"等待画面... 状态码: {code}") + time.sleep(0.5) + + if cv2.waitKey(1) & 0xFF == ord('q'): + break + + cv2.destroyAllWindows() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/explore/main.py b/explore/main.py new file mode 100644 index 0000000..bc34347 --- /dev/null +++ b/explore/main.py @@ -0,0 +1,48 @@ +import cv2 +import numpy as np +import time +from ultralytics import YOLO +from unitree_sdk2py.core.channel import ChannelFactoryInitialize +from unitree_sdk2py.go2.video.video_client import VideoClient + +def main(): + iface = "enx9c69d36ee634" + + ChannelFactoryInitialize(0, iface) + + print("正在加载 YOLO 模型...") + model = YOLO("yolov8n.pt") + + client = VideoClient() + client.SetTimeout(3.0) + client.Init() + + print(f"正在通过 {iface} 连接 Go2 视频流... 按 'q' 退出") + + code, data = client.GetImageSample() + while code == 0: + code, data = client.GetImageSample() + + arr = np.frombuffer(bytes(data), dtype=np.uint8) + frame = cv2.imdecode(arr, cv2.IMREAD_COLOR) + if frame is None: + continue + + results = model.predict(frame, classes=[0], conf=0.5, verbose=False) + annotated = results[0].plot() + + count = len(results[0].boxes) + cv2.putText(annotated, f"Persons: {count}", (10, 30), + cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2) + + cv2.imshow("Go2 Person Detection", annotated) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + + if code != 0: + print(f"视频流错误,错误码: {code}") + + cv2.destroyAllWindows() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/explore/stream_server.py b/explore/stream_server.py new file mode 100644 index 0000000..913d98c --- /dev/null +++ b/explore/stream_server.py @@ -0,0 +1,102 @@ +""" +拓展坞推流脚本 v3(pyrealsense2,真实 uint16 深度) +在拓展坞上运行: python3 stream_server.py +/color — 彩色 MJPEG +/depth — 深度伪彩色 MJPEG +/depth_raw — 深度原始数据(numpy uint16,npy 格式,单帧) +""" +import threading +import socket +import numpy as np +import cv2 +import pyrealsense2 as rs + +pipeline = rs.pipeline() +config = rs.config() +config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30) +config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30) +pipeline.start(config) +align = rs.align(rs.stream.color) +print('RealSense 已启动') + +lock = threading.Lock() +latest_color = None +latest_depth = None # uint16, mm + + +def capture_loop(): + global latest_color, latest_depth + while True: + frames = pipeline.wait_for_frames() + aligned = align.process(frames) + color = np.asanyarray(aligned.get_color_frame().get_data()) + depth = np.asanyarray(aligned.get_depth_frame().get_data()) + with lock: + latest_color = color.copy() + latest_depth = depth.copy() + + +threading.Thread(target=capture_loop, daemon=True).start() + + +def encode_jpeg(frame, colormap=None): + if colormap is not None: + frame = cv2.applyColorMap(cv2.convertScaleAbs(frame, alpha=0.03), colormap) + _, buf = cv2.imencode('.jpg', frame, [cv2.IMWRITE_JPEG_QUALITY, 75]) + return buf.tobytes() + + +def handle_client(conn, path): + try: + if path == b'/depth_raw': + # 单帧原始深度,npy 格式 + with lock: + depth = latest_depth.copy() if latest_depth is not None else None + if depth is None: + conn.sendall(b'HTTP/1.1 503 Service Unavailable\r\n\r\n') + return + buf = np.save.__module__ # 用 BytesIO + import io + bio = io.BytesIO() + np.save(bio, depth) + data = bio.getvalue() + header = ( + f'HTTP/1.1 200 OK\r\nContent-Type: application/octet-stream\r\n' + f'Content-Length: {len(data)}\r\n\r\n' + ).encode() + conn.sendall(header + data) + return + + header = ( + 'HTTP/1.1 200 OK\r\n' + 'Content-Type: multipart/x-mixed-replace; boundary=frame\r\n\r\n' + ).encode() + conn.sendall(header) + while True: + with lock: + color = latest_color.copy() if latest_color is not None else None + depth = latest_depth.copy() if latest_depth is not None else None + if color is None: + continue + if path == b'/color': + data = encode_jpeg(color) + else: + data = encode_jpeg(depth, cv2.COLORMAP_JET) + conn.sendall(b'--frame\r\nContent-Type: image/jpeg\r\n\r\n' + data + b'\r\n') + except Exception: + pass + finally: + conn.close() + + +server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) +server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) +server.bind(('0.0.0.0', 8080)) +server.listen(5) +print('推流服务启动: http://192.168.123.18:8080/color /depth /depth_raw') + +while True: + conn, _ = server.accept() + req = conn.recv(1024) + path = req.split(b' ')[1] if b' ' in req else b'/color' + threading.Thread(target=handle_client, args=(conn, path), daemon=True).start() diff --git a/explore/test_d435i.py b/explore/test_d435i.py new file mode 100644 index 0000000..fd42b46 --- /dev/null +++ b/explore/test_d435i.py @@ -0,0 +1,43 @@ +""" +D435i 深度相机调试脚本 +在拓展坞上运行: python3 test_d435i.py +依赖: pip install pyrealsense2 opencv-python numpy +""" +import numpy as np +import cv2 +import pyrealsense2 as rs + +pipeline = rs.pipeline() +config = rs.config() +config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30) +config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30) + +pipeline.start(config) +align = rs.align(rs.stream.color) +print("D435i 已连接,按 q 退出") + +try: + while True: + frames = pipeline.wait_for_frames() + aligned = align.process(frames) + color = np.asanyarray(aligned.get_color_frame().get_data()) + depth = np.asanyarray(aligned.get_depth_frame().get_data()) + + # 中心点距离 + h, w = depth.shape + cx, cy = w // 2, h // 2 + dist = depth[cy, cx] / 1000.0 + cv2.putText(color, f"Center: {dist:.2f}m", (10, 30), + cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2) + + # 深度伪彩色 + depth_color = cv2.applyColorMap( + cv2.convertScaleAbs(depth, alpha=0.03), cv2.COLORMAP_JET) + + cv2.imshow("D435i Color", color) + cv2.imshow("D435i Depth", depth_color) + if cv2.waitKey(1) & 0xFF == ord('q'): + break +finally: + pipeline.stop() + cv2.destroyAllWindows() diff --git a/explore/test_d435i_v4l2.py b/explore/test_d435i_v4l2.py new file mode 100644 index 0000000..fb40e49 --- /dev/null +++ b/explore/test_d435i_v4l2.py @@ -0,0 +1,33 @@ +""" +D435i 调试脚本(无需 pyrealsense2,用 OpenCV V4L2 直接读取) +在拓展坞上运行: python3 test_d435i_v4l2.py +video2: 深度流 (720x1280) +video4: 彩色流 (1080x1920) +""" +import cv2 +import numpy as np + +color_cap = cv2.VideoCapture(4) +depth_cap = cv2.VideoCapture(2) + +print("D435i 已连接,按 q 退出") +while True: + ret_c, color = color_cap.read() + ret_d, depth = depth_cap.read() + + if ret_c: + color_small = cv2.resize(color, (640, 360)) + cv2.imshow("Color", color_small) + + if ret_d: + depth_vis = cv2.applyColorMap( + cv2.convertScaleAbs(depth, alpha=0.03), cv2.COLORMAP_JET) + depth_small = cv2.resize(depth_vis, (640, 360)) + cv2.imshow("Depth", depth_small) + + if cv2.waitKey(1) & 0xFF == ord('q'): + break + +color_cap.release() +depth_cap.release() +cv2.destroyAllWindows() diff --git a/explore/test_vl_analysis.py b/explore/test_vl_analysis.py new file mode 100644 index 0000000..72843f4 --- /dev/null +++ b/explore/test_vl_analysis.py @@ -0,0 +1,119 @@ +""" +YOLO + 深度 + 云端视觉分析(qwen-vl-plus) +检测到人时每5秒发一次图片给阿里云 VL 模型,分析枪支/姿态/意图 +运行: conda activate dark_sentinel && python3 test_vl_analysis.py +""" +import cv2 +import numpy as np +import urllib.request +import io +import threading +import time +import base64 +from openai import OpenAI +from ultralytics import YOLO + +COLOR_URL = 'http://192.168.123.18:8080/color' +DEPTH_RAW_URL = 'http://192.168.123.18:8080/depth_raw' +MODEL_PATH = '/home/darvin/dog/sdk/yolov8n.pt' +CONF = 0.5 +FY = 388.86 +BBOX_SCALE = 0.614 +VL_INTERVAL = 5 # 秒 + +vl_client = OpenAI( + api_key='sk-1fdd0430c63e4f2d99db031b16e28f39', + base_url='https://dashscope.aliyuncs.com/compatible-mode/v1', +) + +model = YOLO(MODEL_PATH) +color_cap = cv2.VideoCapture(COLOR_URL) + +latest_depth = None +depth_lock = threading.Lock() +vl_text = '' +vl_lock = threading.Lock() +vl_last_time = 0 + + +def depth_fetch_loop(): + global latest_depth + while True: + try: + resp = urllib.request.urlopen(DEPTH_RAW_URL, timeout=2) + with depth_lock: + latest_depth = np.load(io.BytesIO(resp.read())) + except Exception: + pass + + +def analyze_person(crop_bgr): + global vl_text + _, buf = cv2.imencode('.jpg', crop_bgr, [cv2.IMWRITE_JPEG_QUALITY, 85]) + b64 = base64.b64encode(buf).decode() + try: + resp = vl_client.chat.completions.create( + model='qwen-vl-plus', + messages=[{'role': 'user', 'content': [ + {'type': 'image_url', 'image_url': {'url': f'data:image/jpeg;base64,{b64}'}}, + {'type': 'text', 'text': '分析图中人物:1)是否持有武器或枪支 2)姿态(站/蹲/跑/举手等)3)行为意图(一句话)。简洁回答。'} + ]}] + ) + text = resp.choices[0].message.content + except Exception as e: + text = f'API错误:{e}' + with vl_lock: + vl_text = text + print(f'[VL] {text}') + + +threading.Thread(target=depth_fetch_loop, daemon=True).start() + +print('YOLO+深度+云端视觉分析,按 q 退出') +while True: + ret, color = color_cap.read() + if not ret: + continue + + with depth_lock: + depth = latest_depth.copy() if latest_depth is not None else None + + result = model.predict(color, classes=[0], conf=CONF, verbose=False)[0] + annotated = result.plot() + + now = time.time() + for i, box in enumerate(result.boxes): + x1, y1, x2, y2 = map(int, box.xyxy[0].tolist()) + + dist_m, height_m = -1, -1 + if depth is not None: + roi = depth[max(0, y1):y2, max(0, x1):x2] + valid = roi[roi > 0] + if len(valid) > 0: + dist_m = float(np.percentile(valid, 15)) / 1000.0 + height_m = (y2 - y1) * dist_m / FY * BBOX_SCALE + + if dist_m > 0: + cv2.putText(annotated, f'{dist_m:.2f}m {height_m:.2f}m', + (x1, y1 - 8), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) + print(f'Person dist={dist_m:.2f}m height={height_m:.2f}m conf={float(box.conf):.2f}') + + # 每 VL_INTERVAL 秒对第一个人触发一次分析 + if i == 0 and now - vl_last_time > VL_INTERVAL: + crop = color[max(0, y1):y2, max(0, x1):x2] + threading.Thread(target=analyze_person, args=(crop.copy(),), daemon=True).start() + vl_last_time = now + + # 显示最新 VL 结果 + with vl_lock: + txt = vl_text + if txt: + cv2.putText(annotated, txt[:60], (10, annotated.shape[0] - 10), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 200, 255), 1) + + cv2.imshow('YOLO+Depth+VL', annotated) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + +color_cap.release() +cv2.destroyAllWindows() diff --git a/explore/test_yolo_depth.py b/explore/test_yolo_depth.py new file mode 100644 index 0000000..e40457c --- /dev/null +++ b/explore/test_yolo_depth.py @@ -0,0 +1,57 @@ +""" +YOLO + D435i 深度融合调试脚本(在拓展坞上运行) +- 彩色流: /dev/video4 +- 深度流: /dev/video2 +- 输出: 检测框 + 估算距离(打印到终端) +运行: python3 test_yolo_depth.py +""" +import sys +sys.path.insert(0, '/home/unitree') # 根据实际 yolov8n.pt 位置调整 + +import cv2 +import numpy as np +from ultralytics import YOLO + +MODEL_PATH = '/home/unitree/yolov8n.pt' +CONF = 0.5 +# 深度 uint8: 255=最近(~0.3m), 18=最远(~10m),线性估算 +DEPTH_MIN_M = 0.3 +DEPTH_MAX_M = 10.0 + +model = YOLO(MODEL_PATH) +color_cap = cv2.VideoCapture(4) +depth_cap = cv2.VideoCapture(2) +depth_cap.set(cv2.CAP_PROP_CONVERT_RGB, 0) + +print("YOLO+深度融合测试,Ctrl+C 退出") +try: + while True: + ret_c, color = color_cap.read() + ret_d, depth = depth_cap.read() + if not ret_c or not ret_d: + continue + + # 缩放到统一尺寸 + color_s = cv2.resize(color, (640, 480)) + depth_s = cv2.resize(depth, (640, 480)) + + results = model.predict(color_s, classes=[0], conf=CONF, verbose=False)[0] + for box in results.boxes: + x1, y1, x2, y2 = map(int, box.xyxy[0].tolist()) + # 取边框内深度中位数 + roi = depth_s[y1:y2, x1:x2].astype(float) + valid = roi[roi > 0] + if len(valid) == 0: + continue + d_val = float(np.median(valid)) + # uint8 反推距离(线性映射) + dist_m = DEPTH_MAX_M - (d_val / 255.0) * (DEPTH_MAX_M - DEPTH_MIN_M) + # 粗略身高估算 + height_m = (y2 - y1) / 480.0 * dist_m * 2.0 + print(f"Person: dist={dist_m:.2f}m, height_est={height_m:.2f}m, conf={float(box.conf):.2f}") + +except KeyboardInterrupt: + pass + +color_cap.release() +depth_cap.release() diff --git a/explore/test_yolo_remote.py b/explore/test_yolo_remote.py new file mode 100644 index 0000000..a6443f9 --- /dev/null +++ b/explore/test_yolo_remote.py @@ -0,0 +1,117 @@ +""" +本机 YOLO + 深度融合调试脚本 v2(pyrealsense2 uint16 深度) +从拓展坞拉取彩色流 + 原始深度,YOLO 在本机推理 +运行: conda activate dark_sentinel && python3 test_yolo_remote.py +""" +import cv2 +import numpy as np +import urllib.request +import io +import threading +import base64 +from openai import OpenAI +from ultralytics import YOLO + +VL_CLIENT = OpenAI( + api_key='sk-1fdd0430c63e4f2d99db031b16e28f39', + base_url='https://dashscope.aliyuncs.com/compatible-mode/v1', +) +vl_result = {} # box_id -> 分析结果 +vl_lock = threading.Lock() + +def analyze_person(crop_bgr, box_id): + _, buf = cv2.imencode('.jpg', crop_bgr, [cv2.IMWRITE_JPEG_QUALITY, 85]) + b64 = base64.b64encode(buf).decode() + try: + resp = VL_CLIENT.chat.completions.create( + model='qwen-vl-plus', + messages=[{'role':'user','content':[ + {'type':'image_url','image_url':{'url':f'data:image/jpeg;base64,{b64}'}}, + {'type':'text','text':'分析图中人物:1)是否持有武器或枪支 2)姿态(站/蹲/跑/举手等)3)行为意图(一句话)。简洁回答。'} + ]}] + ) + text = resp.choices[0].message.content + except Exception as e: + text = f'API错误:{e}' + with vl_lock: + vl_result[box_id] = text + print(f'[VL] box{box_id}: {text}') + +COLOR_URL = 'http://192.168.123.18:8080/color' +DEPTH_RAW_URL = 'http://192.168.123.18:8080/depth_raw' +MODEL_PATH = '/home/darvin/dog/sdk/yolov8n.pt' +CONF = 0.5 + +model = YOLO(MODEL_PATH) +color_cap = cv2.VideoCapture(COLOR_URL) + +# 后台线程持续拉取最新深度帧(uint16, mm) +latest_depth = None +depth_lock = threading.Lock() + +def depth_fetch_loop(): + global latest_depth + while True: + try: + resp = urllib.request.urlopen(DEPTH_RAW_URL, timeout=2) + depth = np.load(io.BytesIO(resp.read())) + with depth_lock: + latest_depth = depth + except Exception: + pass + +threading.Thread(target=depth_fetch_loop, daemon=True).start() + +import time +vl_last_time = 0 +VL_INTERVAL = 5 # 每5秒触发一次 VL 分析 + +print('YOLO+深度融合+云端视觉分析,按 q 退出') +while True: + ret, color = color_cap.read() + if not ret: + continue + + with depth_lock: + depth = latest_depth.copy() if latest_depth is not None else None + + result = model.predict(color, classes=[0], conf=CONF, verbose=False)[0] + annotated = result.plot() + + now = time.time() + for i, box in enumerate(result.boxes): + x1, y1, x2, y2 = map(int, box.xyxy[0].tolist()) + + # 深度估算 + dist_m, height_m = -1, -1 + if depth is not None: + roi = depth[max(0,y1):y2, max(0,x1):x2] + valid = roi[roi > 0] + if len(valid) > 0: + dist_m = float(np.percentile(valid, 15)) / 1000.0 + height_m = (y2 - y1) * dist_m / 388.86 * 0.614 + + # 显示 VL 分析结果 + with vl_lock: + vl_text = vl_result.get(i, '') + if vl_text: + cv2.putText(annotated, vl_text[:50], (x1, y2 + 18), + cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 200, 255), 1) + + label = f'{dist_m:.2f}m {height_m:.2f}m' if dist_m > 0 else '' + cv2.putText(annotated, label, (x1, y1 - 8), + cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) + print(f'Person dist={dist_m:.2f}m height={height_m:.2f}m conf={float(box.conf):.2f}') + + # 每 VL_INTERVAL 秒对第一个检测到的人触发一次分析 + if i == 0 and now - vl_last_time > VL_INTERVAL: + crop = color[max(0,y1):y2, max(0,x1):x2] + threading.Thread(target=analyze_person, args=(crop.copy(), i), daemon=True).start() + vl_last_time = now + + cv2.imshow('YOLO+Depth+VL', annotated) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + +color_cap.release() +cv2.destroyAllWindows() diff --git a/explore/test_yolo_trt.py b/explore/test_yolo_trt.py new file mode 100644 index 0000000..9d0a912 --- /dev/null +++ b/explore/test_yolo_trt.py @@ -0,0 +1,109 @@ +""" +YOLOv8 TensorRT 推理 + D435i 深度融合(无需 pycuda,用 ctypes) +在拓展坞上运行: python3 test_yolo_trt.py +首次运行会转换 ONNX->TRT engine(约3-5分钟) +""" +import cv2 +import numpy as np +import tensorrt as trt +import ctypes +import os + +ONNX_PATH = '/home/unitree/yolov8n.onnx' +ENGINE_PATH = '/home/unitree/yolov8n.trt' +CONF = 0.5 +INPUT_SIZE = 640 +PERSON_ID = 0 + + +def build_engine(onnx_path, engine_path): + logger = trt.Logger(trt.Logger.WARNING) + builder = trt.Builder(logger) + network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) + parser = trt.OnnxParser(network, logger) + with open(onnx_path, 'rb') as f: + parser.parse(f.read()) + config = builder.create_builder_config() + config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 30) + data = builder.build_serialized_network(network, config) + with open(engine_path, 'wb') as f: + f.write(data) + print(f'Engine saved: {engine_path}') + return data + + +def load_engine(path): + logger = trt.Logger(trt.Logger.WARNING) + with open(path, 'rb') as f: + return trt.Runtime(logger).deserialize_cuda_engine(f.read()) + + +def preprocess(frame): + img = cv2.resize(frame, (INPUT_SIZE, INPUT_SIZE)) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0 + return np.ascontiguousarray(img.transpose(2, 0, 1)[np.newaxis]) + + +def postprocess(out, orig_h, orig_w): + preds = out[0].transpose(1, 0) # (8400, 84) + cls = np.argmax(preds[:, 4:], axis=1) + conf = preds[np.arange(len(preds)), 4 + cls] + mask = (cls == PERSON_ID) & (conf > CONF) + boxes, confs = preds[mask, :4], conf[mask] + sx, sy = orig_w / INPUT_SIZE, orig_h / INPUT_SIZE + results = [] + for b, c in zip(boxes, confs): + cx, cy, w, h = b + results.append((int((cx-w/2)*sx), int((cy-h/2)*sy), + int((cx+w/2)*sx), int((cy+h/2)*sy), float(c))) + return results + + +# ── 初始化 TRT ────────────────────────────────────────── +if not os.path.exists(ENGINE_PATH): + print('转换 ONNX -> TRT engine(约3-5分钟)...') + build_engine(ONNX_PATH, ENGINE_PATH) + +engine = load_engine(ENGINE_PATH) +context = engine.create_execution_context() + +# 用 numpy + ctypes 分配 pinned memory +h_in = np.zeros((1, 3, INPUT_SIZE, INPUT_SIZE), dtype=np.float32) +h_out = np.zeros((1, 84, 8400), dtype=np.float32) + +# 获取 binding 指针(TRT 8.x 用 get_tensor_address) +in_name = engine.get_tensor_name(0) +out_name = engine.get_tensor_name(1) + +color_cap = cv2.VideoCapture(4) +depth_cap = cv2.VideoCapture(2) +depth_cap.set(cv2.CAP_PROP_CONVERT_RGB, 0) + +print('TRT推理+深度融合,Ctrl+C 退出') +try: + while True: + ret_c, color = color_cap.read() + ret_d, depth = depth_cap.read() + if not ret_c or not ret_d: + continue + + orig_h, orig_w = color.shape[:2] + depth_s = cv2.resize(depth, (orig_w, orig_h)) + + h_in[:] = preprocess(color) + context.set_tensor_address(in_name, h_in.ctypes.data) + context.set_tensor_address(out_name, h_out.ctypes.data) + context.execute_v2([h_in.ctypes.data, h_out.ctypes.data]) + + for x1, y1, x2, y2, conf in postprocess(h_out, orig_h, orig_w): + roi = depth_s[max(0,y1):y2, max(0,x1):x2].astype(float) + valid = roi[roi > 0] + dist_m = 10.0 - float(np.median(valid)) / 255.0 * 9.7 if len(valid) else -1 + height_m = (y2 - y1) / orig_h * dist_m * 2.0 if dist_m > 0 else -1 + print(f'Person dist={dist_m:.2f}m height={height_m:.2f}m conf={conf:.2f}') + +except KeyboardInterrupt: + pass + +color_cap.release() +depth_cap.release() diff --git a/explore/教程.md b/explore/教程.md new file mode 100644 index 0000000..9f3c748 --- /dev/null +++ b/explore/教程.md @@ -0,0 +1,14 @@ +# Unitree Go2 教程 + +## 网络信息 + +- 有线网卡:`enx9c69d36ee634`,IP:`192.168.123.222` +- 机器人 IP:`192.168.123.161` + +## 运行 + +```bash +cd /home/darvin/dog +python main.py # YOLO 人体检测 +python get_video.py # 纯视频流 +``` -- 2.34.1