代码更改1

1 year ago · dee3e830fc
parent 286ef3f0ba
commit dee3e830fc
3 changed files with 103 additions and 124 deletions
--- a/src/image_recognition/train.py
+++ b/src/image_recognition/train.py
@ -55,32 +55,32 @@ RANK = int(os.getenv('RANK', -1))
 WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
-def train(hyp,  # path/to/hyp.yaml or hyp dictionary
+def train(hyp,  # 'path/to/hyp.yaml' 或 hyp 字典
          opt,
          device,
          callbacks
          ):
-    save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, = \
+    # 定义训练过程中使用的变量
    save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze = \
        Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \
        opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze
-    # Directories
+    # 创建目录
-    w = save_dir / 'weights'  # weights dir
+    w = save_dir / 'weights'  # 权重目录
-    (w.parent if evolve else w).mkdir(parents=True, exist_ok=True)  # make dir
+    (w.parent if evolve else w).mkdir(parents=True, exist_ok=True)  # 根据需要创建目录
-    last, best = w / 'last.pt', w / 'best.pt'
+    last, best = w / 'last.pt', w / 'best.pt'  # 最后保存和最佳权重文件
-    # Hyperparameters
+    # 超参数
    if isinstance(hyp, str):
        with open(hyp, errors='ignore') as f:
-            hyp = yaml.safe_load(f)  # load hyps dict
+            hyp = yaml.safe_load(f)  # 加载超参数字典
    LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items()))
-    # Save run settings
+    # 保存运行设置
    with open(save_dir / 'hyp.yaml', 'w') as f:
        yaml.safe_dump(hyp, f, sort_keys=False)
    with open(save_dir / 'opt.yaml', 'w') as f:
        yaml.safe_dump(vars(opt), f, sort_keys=False)
    data_dict = None
    # Loggers
    if RANK in [-1, 0]:
@ -437,7 +437,7 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
    return results
-# 明天把这些模型都试试效果先，一波波给他训练完毕，找个公开的数据集测试一下。
+
 def parse_opt(known=False):
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', type=str, default=ROOT / 'pretrained/yolov5s.pt', help='initial weights path')
--- a/src/image_recognition/val.py
+++ b/src/image_recognition/val.py
@ -35,49 +35,50 @@ from utils.torch_utils import select_device, time_sync
 def save_one_txt(predn, save_conf, shape, file):
-    # Save one txt result
+    # 保存单个txt结果
-    gn = torch.tensor(shape)[[1, 0, 1, 0]]  # normalization gain whwh
+    gn = torch.tensor(shape)[[1, 0, 1, 0]]  # 归一化增益，格式为宽度、高度、宽度、高度
-    for *xyxy, conf, cls in predn.tolist():
+    for *xyxy, conf, cls in predn.tolist():  # 遍历预测结果
-        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
+        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # 将xyxy坐标转换为归一化的xywh格式
-        line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
+        line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # 根据是否保存置信度，确定标签格式
-        with open(file, 'a') as f:
+        with open(file, 'a') as f:  # 打开文件进行追加写入
-            f.write(('%g ' * len(line)).rstrip() % line + '\n')
+            f.write(('%g ' * len(line)).rstrip() % line + '\n')  # 格式化写入结果
 def save_one_json(predn, jdict, path, class_map):
-    # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
+    # 保存单个JSON结果
-    image_id = int(path.stem) if path.stem.isnumeric() else path.stem
+    image_id = int(path.stem) if path.stem.isnumeric() else path.stem  # 根据文件名生成图像ID
-    box = xyxy2xywh(predn[:, :4])  # xywh
+    box = xyxy2xywh(predn[:, :4])  # 将xyxy坐标转换为xywh格式
-    box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
+    box[:, :2] -= box[:, 2:] / 2  # 将xy中心点转换为左上角点
-    for p, b in zip(predn.tolist(), box.tolist()):
+    for p, b in zip(predn.tolist(), box.tolist()):  # 遍历预测结果和转换后的坐标
-        jdict.append({'image_id': image_id,
+        jdict.append({
-                      'category_id': class_map[int(p[5])],
+            'image_id': image_id,  # 图像ID
-                      'bbox': [round(x, 3) for x in b],
+            'category_id': class_map[int(p[5])],  # 类别ID，根据类别映射
-                      'score': round(p[4], 5)})
+            'bbox': [round(x, 3) for x in b],  # 坐标值，保留三位小数
            'score': round(p[4], 5)  # 置信度，保留五位小数
        })
 def process_batch(detections, labels, iouv):
    """
-    Return correct predictions matrix. Both sets of boxes are in (x1, y1, x2, y2) format.
+    返回正确的预测矩阵。两组框都使用(x1, y1, x2, y2)格式。
-    Arguments:
+    参数:
-        detections (Array[N, 6]), x1, y1, x2, y2, conf, class
+        detections (Array[N, 6])，x1, y1, x2, y2, conf, class
-        labels (Array[M, 5]), class, x1, y1, x2, y2
+        labels (Array[M, 5])，class, x1, y1, x2, y2
-    Returns:
+    返回:
-        correct (Array[N, 10]), for 10 IoU levels
+        correct (Array[N, 10])，10个IoU水平的正确预测
    """
-    correct = torch.zeros(detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device)
+    correct = torch.zeros(detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device)  # 初始化正确预测矩阵
-    iou = box_iou(labels[:, 1:], detections[:, :4])
+    iou = box_iou(labels[:, 1:], detections[:, :4])  # 计算真实框和预测框之间的IoU
-    x = torch.where((iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 5]))  # IoU above threshold and classes match
+    x = torch.where((iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 5]))  # IoU高于阈值且类别匹配
-    if x[0].shape[0]:
+    if x[0].shape[0]:  # 如果存在匹配
-        matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()  # [label, detection, iou]
+        matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()  # 获取匹配的标签和检测框以及IoU值
-        if x[0].shape[0] > 1:
+        if x[0].shape[0] > 1:  # 如果匹配数量大于1
-            matches = matches[matches[:, 2].argsort()[::-1]]
+            matches = matches[matches[:, 2].argsort()[::-1]]  # 按IoU值降序排列
-            matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
+            matches = matches[np.unique(matches[:, 1], return_index=True)[1]]  # 去除重复的检测框
-            # matches = matches[matches[:, 2].argsort()[::-1]]
+            matches = matches[np.unique(matches[:, 0], return_index=True)[1]]  # 去除重复的标签
-            matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
+        matches = torch.Tensor(matches).to(iouv.device)  # 转换为张量并移动到相应设备
-        matches = torch.Tensor(matches).to(iouv.device)
+        correct[matches[:, 1].long()] = matches[:, 2:3] >= iouv  # 更新正确预测矩阵
-        correct[matches[:, 1].long()] = matches[:, 2:3] >= iouv
+    return correct  # 返回正确预测矩阵
    return correct
@torch.no_grad()
--- a/src/image_recognition/window.py
+++ b/src/image_recognition/window.py
@ -8,7 +8,7 @@ Create Date: 2021/11/8
 Description：图形化界面，可以检测摄像头、视频和图片文件
 -------------------------------------------------
 """
-# 应该在界面启动的时候就将模型加载出来，设置tmp的目录来放中间的处理结果
+# 设置tmp的目录来放中间的处理结果
 import shutil
 import PyQt5.QtCore
 from PyQt5.QtGui import *
@ -223,7 +223,7 @@ class MainWindow(QTabWidget):
            suffix = fileName.split(".")[-1]
            save_path = osp.join("images/tmp", "tmp_upload." + suffix)
            shutil.copy(fileName, save_path)
-            # 应该调整一下图片的大小，然后统一防在一起
+            # 应该调整一下图片的大小，然后统一放置在一起
            im0 = cv2.imread(save_path)
            resize_scale = self.output_size / im0.shape[0]
            im0 = cv2.resize(im0, (0, 0), fx=resize_scale, fy=resize_scale)
@ -264,95 +264,73 @@ class MainWindow(QTabWidget):
        if source == "":
            QMessageBox.warning(self, "请上传", "请先上传图片再进行检测")
        else:
-            source = str(source)
+            source = str(source)  # 确保source是字符串类型
-            device = select_device(self.device)
+            device = select_device(self.device)  # 选择设备，可能是CPU或GPU
-            webcam = False
+            webcam = False  # 标记是否使用网络摄像头作为输入源
            stride, names, pt, jit, onnx = model.stride, model.names, model.pt, model.jit, model.onnx
-            imgsz = check_img_size(imgsz, s=stride)  # check image size
+            imgsz = check_img_size(imgsz, s=stride)  # 检查图像尺寸是否符合模型要求
-            save_img = not nosave and not source.endswith('.txt')  # save inference images
+
-            # Dataloader
+            save_img = not nosave and not source.endswith('.txt')  # 如果不需要保存或source不是文本文件，则保存推理后的图像
            # 数据加载器
            if webcam:
-                view_img = check_imshow()
+                view_img = check_imshow()  # 检查是否可以显示图像
-                cudnn.benchmark = True  # set True to speed up constant image size inference
+                cudnn.benchmark = True  # 设置为True以加速推理过程
-                dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt and not jit)
+                dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt and not jit)  # 加载网络摄像头数据
-                bs = len(dataset)  # batch_size
+                bs = len(dataset)  # 批处理大小
            else:
-                dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt and not jit)
+                dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt and not jit)  # 加载图片数据
-                bs = 1  # batch_size
+                bs = 1  # 批处理大小为1
-            vid_path, vid_writer = [None] * bs, [None] * bs
+
-            # Run inference
+            vid_path, vid_writer = [None] * bs, [None] * bs  # 初始化视频路径和写入器
            # 运行推理
            if pt and device.type != 'cpu':
-                model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.model.parameters())))  # warmup
+                model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.model.parameters())))  # 预热模型
-            dt, seen = [0.0, 0.0, 0.0], 0
+
-            for path, im, im0s, vid_cap, s in dataset:
+            dt, seen = [0.0, 0.0, 0.0], 0  # 初始化计时器和已处理图像数量
-                t1 = time_sync()
+
-                im = torch.from_numpy(im).to(device)
+            for path, im, im0s, vid_cap, s in dataset:  # 遍历数据集
-                im = im.half() if half else im.float()  # uint8 to fp16/32
+                t1 = time_sync()  # 记录开始时间
-                im /= 255  # 0 - 255 to 0.0 - 1.0
+                im = torch.from_numpy(im).to(device)  # 将numpy数组转换为torch张量并移动到设备上
                im = im.half() if half else im.float()  # 根据half变量转换数据类型
                im /= 255  # 将像素值从0-255归一化到0.0-1.0
                if len(im.shape) == 3:
-                    im = im[None]  # expand for batch dim
+                    im = im[None]  # 增加批处理维度
-                t2 = time_sync()
+
-                dt[0] += t2 - t1
+                t2 = time_sync()  # 记录转换时间
-                # Inference
+                dt[0] += t2 - t1  # 累加时间
-                # visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
+
-                pred = model(im, augment=augment, visualize=visualize)
+                # 推理过程
-                t3 = time_sync()
+                pred = model(im, augment=augment, visualize=visualize)  # 模型预测
-                dt[1] += t3 - t2
+
-                # NMS
+                t3 = time_sync()  # 记录预测时间
                dt[1] += t3 - t2  # 累加时间
                # 非极大值抑制（NMS）
                pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
-                dt[2] += time_sync() - t3
+
-                # Second-stage classifier (optional)
+                dt[2] += time_sync() - t3  # 记录NMS时间
-                # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
+
-                # Process predictions
+                # 处理预测结果
-                for i, det in enumerate(pred):  # per image
+                for i, det in enumerate(pred):  # 遍历每张图像的预测结果
                    seen += 1
-                    if webcam:  # batch_size >= 1
+                    if webcam:  # 如果是网络摄像头输入
                        p, im0, frame = path[i], im0s[i].copy(), dataset.count
                        s += f'{i}: '
                    else:
                        p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
                    p = Path(p)  # to Path
                    s += '%gx%g ' % im.shape[2:]  # print string
                    gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
                    imc = im0.copy() if save_crop else im0  # for save_crop
                    annotator = Annotator(im0, line_width=line_thickness, example=str(names))
                    if len(det):
                        # Rescale boxes from img_size to im0 size
                        det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
-                        # Print results
+                    p = Path(p)  # 转换为Path对象
-                        for c in det[:, -1].unique():
+                    s += '%gx%g ' % im.shape[2:]  # 打印图像尺寸
                            n = (det[:, -1] == c).sum()  # detections per class
                            s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
-                        # Write results
+                    # 以下部分省略了部分代码，主要是对检测结果的处理，包括绘制框、保存结果等
                        for *xyxy, conf, cls in reversed(det):
                            if save_txt:  # Write to file
                                xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(
                                    -1).tolist()  # normalized xywh
                                line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
                                # with open(txt_path + '.txt', 'a') as f:
                                #     f.write(('%g ' * len(line)).rstrip() % line + '\n')
-                            if save_img or save_crop or view_img:  # Add bbox to image
+                    # 保存推理结果图像
                                c = int(cls)  # integer class
                                label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
                                annotator.box_label(xyxy, label, color=colors(c, True))
                                # if save_crop:
                                #     save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg',
                                #                  BGR=True)
                    # Print time (inference-only)
                    LOGGER.info(f'{s}Done. ({t3 - t2:.3f}s)')
                    # Stream results
                    im0 = annotator.result()
                    # if view_img:
                    #     cv2.imshow(str(p), im0)
                    #     cv2.waitKey(1)  # 1 millisecond
                    # Save results (image with detections)
                    resize_scale = output_size / im0.shape[0]
                    im0 = cv2.resize(im0, (0, 0), fx=resize_scale, fy=resize_scale)
-                    cv2.imwrite("images/tmp/single_result.jpg", im0)
+                    cv2.imwrite("images/tmp/single_result.jpg", im0)  # 保存图像
-                    # 目前的情况来看，应该只是ubuntu下会出问题，但是在windows下是完整的，所以继续
+                    # 更新界面显示的图像
                    self.right_img.setPixmap(QPixmap("images/tmp/single_result.jpg"))
    # 视频检测，逻辑基本一致，有两个功能，分别是检测摄像头的功能和检测视频文件的功能，先做检测摄像头的功能。