代码更改1

1 year ago · dee3e830fc
parent 286ef3f0ba
commit dee3e830fc
3 changed files with 103 additions and 124 deletions
--- a/src/image_recognition/train.py
+++ b/src/image_recognition/train.py
@ -55,32 +55,32 @@ RANK = int(os.getenv('RANK', -1))
 WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))


-def train(hyp,  # path/to/hyp.yaml or hyp dictionary
+def train(hyp,  # 'path/to/hyp.yaml' 或 hyp 字典
          opt,
          device,
          callbacks
          ):
-    save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, = \
+    # 定义训练过程中使用的变量
+    save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze = \
        Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \
        opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze

-    # Directories
-    w = save_dir / 'weights'  # weights dir
-    (w.parent if evolve else w).mkdir(parents=True, exist_ok=True)  # make dir
-    last, best = w / 'last.pt', w / 'best.pt'
+    # 创建目录
+    w = save_dir / 'weights'  # 权重目录
+    (w.parent if evolve else w).mkdir(parents=True, exist_ok=True)  # 根据需要创建目录
+    last, best = w / 'last.pt', w / 'best.pt'  # 最后保存和最佳权重文件

-    # Hyperparameters
+    # 超参数
    if isinstance(hyp, str):
        with open(hyp, errors='ignore') as f:
-            hyp = yaml.safe_load(f)  # load hyps dict
+            hyp = yaml.safe_load(f)  # 加载超参数字典
    LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items()))

-    # Save run settings
+    # 保存运行设置
    with open(save_dir / 'hyp.yaml', 'w') as f:
        yaml.safe_dump(hyp, f, sort_keys=False)
    with open(save_dir / 'opt.yaml', 'w') as f:
        yaml.safe_dump(vars(opt), f, sort_keys=False)
-    data_dict = None

    # Loggers
    if RANK in [-1, 0]:
@ -437,7 +437,7 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
    return results


-# 明天把这些模型都试试效果先，一波波给他训练完毕，找个公开的数据集测试一下。
+
 def parse_opt(known=False):
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', type=str, default=ROOT / 'pretrained/yolov5s.pt', help='initial weights path')
--- a/src/image_recognition/val.py
+++ b/src/image_recognition/val.py
@ -35,49 +35,50 @@ from utils.torch_utils import select_device, time_sync


 def save_one_txt(predn, save_conf, shape, file):
-    # Save one txt result
-    gn = torch.tensor(shape)[[1, 0, 1, 0]]  # normalization gain whwh
-    for *xyxy, conf, cls in predn.tolist():
-        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
-        line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
-        with open(file, 'a') as f:
-            f.write(('%g ' * len(line)).rstrip() % line + '\n')
+    # 保存单个txt结果
+    gn = torch.tensor(shape)[[1, 0, 1, 0]]  # 归一化增益，格式为宽度、高度、宽度、高度
+    for *xyxy, conf, cls in predn.tolist():  # 遍历预测结果
+        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # 将xyxy坐标转换为归一化的xywh格式
+        line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # 根据是否保存置信度，确定标签格式
+        with open(file, 'a') as f:  # 打开文件进行追加写入
+            f.write(('%g ' * len(line)).rstrip() % line + '\n')  # 格式化写入结果


 def save_one_json(predn, jdict, path, class_map):
-    # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
-    image_id = int(path.stem) if path.stem.isnumeric() else path.stem
-    box = xyxy2xywh(predn[:, :4])  # xywh
-    box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
-    for p, b in zip(predn.tolist(), box.tolist()):
-        jdict.append({'image_id': image_id,
-                      'category_id': class_map[int(p[5])],
-                      'bbox': [round(x, 3) for x in b],
-                      'score': round(p[4], 5)})
+    # 保存单个JSON结果
+    image_id = int(path.stem) if path.stem.isnumeric() else path.stem  # 根据文件名生成图像ID
+    box = xyxy2xywh(predn[:, :4])  # 将xyxy坐标转换为xywh格式
+    box[:, :2] -= box[:, 2:] / 2  # 将xy中心点转换为左上角点
+    for p, b in zip(predn.tolist(), box.tolist()):  # 遍历预测结果和转换后的坐标
+        jdict.append({
+            'image_id': image_id,  # 图像ID
+            'category_id': class_map[int(p[5])],  # 类别ID，根据类别映射
+            'bbox': [round(x, 3) for x in b],  # 坐标值，保留三位小数
+            'score': round(p[4], 5)  # 置信度，保留五位小数
+        })


 def process_batch(detections, labels, iouv):
    """
-    Return correct predictions matrix. Both sets of boxes are in (x1, y1, x2, y2) format.
-    Arguments:
-        detections (Array[N, 6]), x1, y1, x2, y2, conf, class
-        labels (Array[M, 5]), class, x1, y1, x2, y2
-    Returns:
-        correct (Array[N, 10]), for 10 IoU levels
+    返回正确的预测矩阵。两组框都使用(x1, y1, x2, y2)格式。
+    参数:
+        detections (Array[N, 6])，x1, y1, x2, y2, conf, class
+        labels (Array[M, 5])，class, x1, y1, x2, y2
+    返回:
+        correct (Array[N, 10])，10个IoU水平的正确预测
    """
-    correct = torch.zeros(detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device)
-    iou = box_iou(labels[:, 1:], detections[:, :4])
-    x = torch.where((iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 5]))  # IoU above threshold and classes match
-    if x[0].shape[0]:
-        matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()  # [label, detection, iou]
-        if x[0].shape[0] > 1:
-            matches = matches[matches[:, 2].argsort()[::-1]]
-            matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
-            # matches = matches[matches[:, 2].argsort()[::-1]]
-            matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
-        matches = torch.Tensor(matches).to(iouv.device)
-        correct[matches[:, 1].long()] = matches[:, 2:3] >= iouv
-    return correct
+    correct = torch.zeros(detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device)  # 初始化正确预测矩阵
+    iou = box_iou(labels[:, 1:], detections[:, :4])  # 计算真实框和预测框之间的IoU
+    x = torch.where((iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 5]))  # IoU高于阈值且类别匹配
+    if x[0].shape[0]:  # 如果存在匹配
+        matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()  # 获取匹配的标签和检测框以及IoU值
+        if x[0].shape[0] > 1:  # 如果匹配数量大于1
+            matches = matches[matches[:, 2].argsort()[::-1]]  # 按IoU值降序排列
+            matches = matches[np.unique(matches[:, 1], return_index=True)[1]]  # 去除重复的检测框
+            matches = matches[np.unique(matches[:, 0], return_index=True)[1]]  # 去除重复的标签
+        matches = torch.Tensor(matches).to(iouv.device)  # 转换为张量并移动到相应设备
+        correct[matches[:, 1].long()] = matches[:, 2:3] >= iouv  # 更新正确预测矩阵
+    return correct  # 返回正确预测矩阵


@torch.no_grad()
--- a/src/image_recognition/window.py
+++ b/src/image_recognition/window.py
@ -8,7 +8,7 @@ Create Date: 2021/11/8
 Description：图形化界面，可以检测摄像头、视频和图片文件
 -------------------------------------------------
 """
-# 应该在界面启动的时候就将模型加载出来，设置tmp的目录来放中间的处理结果
+# 设置tmp的目录来放中间的处理结果
 import shutil
 import PyQt5.QtCore
 from PyQt5.QtGui import *
@ -223,7 +223,7 @@ class MainWindow(QTabWidget):
            suffix = fileName.split(".")[-1]
            save_path = osp.join("images/tmp", "tmp_upload." + suffix)
            shutil.copy(fileName, save_path)
-            # 应该调整一下图片的大小，然后统一防在一起
+            # 应该调整一下图片的大小，然后统一放置在一起
            im0 = cv2.imread(save_path)
            resize_scale = self.output_size / im0.shape[0]
            im0 = cv2.resize(im0, (0, 0), fx=resize_scale, fy=resize_scale)
@ -264,95 +264,73 @@ class MainWindow(QTabWidget):
        if source == "":
            QMessageBox.warning(self, "请上传", "请先上传图片再进行检测")
        else:
-            source = str(source)
-            device = select_device(self.device)
-            webcam = False
+            source = str(source)  # 确保source是字符串类型
+            device = select_device(self.device)  # 选择设备，可能是CPU或GPU
+            webcam = False  # 标记是否使用网络摄像头作为输入源
            stride, names, pt, jit, onnx = model.stride, model.names, model.pt, model.jit, model.onnx
-            imgsz = check_img_size(imgsz, s=stride)  # check image size
-            save_img = not nosave and not source.endswith('.txt')  # save inference images
-            # Dataloader
+            imgsz = check_img_size(imgsz, s=stride)  # 检查图像尺寸是否符合模型要求
+
+            save_img = not nosave and not source.endswith('.txt')  # 如果不需要保存或source不是文本文件，则保存推理后的图像
+
+            # 数据加载器
            if webcam:
-                view_img = check_imshow()
-                cudnn.benchmark = True  # set True to speed up constant image size inference
-                dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt and not jit)
-                bs = len(dataset)  # batch_size
+                view_img = check_imshow()  # 检查是否可以显示图像
+                cudnn.benchmark = True  # 设置为True以加速推理过程
+                dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt and not jit)  # 加载网络摄像头数据
+                bs = len(dataset)  # 批处理大小
            else:
-                dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt and not jit)
-                bs = 1  # batch_size
-            vid_path, vid_writer = [None] * bs, [None] * bs
-            # Run inference
+                dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt and not jit)  # 加载图片数据
+                bs = 1  # 批处理大小为1
+
+            vid_path, vid_writer = [None] * bs, [None] * bs  # 初始化视频路径和写入器
+
+            # 运行推理
            if pt and device.type != 'cpu':
-                model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.model.parameters())))  # warmup
-            dt, seen = [0.0, 0.0, 0.0], 0
-            for path, im, im0s, vid_cap, s in dataset:
-                t1 = time_sync()
-                im = torch.from_numpy(im).to(device)
-                im = im.half() if half else im.float()  # uint8 to fp16/32
-                im /= 255  # 0 - 255 to 0.0 - 1.0
+                model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.model.parameters())))  # 预热模型
+
+            dt, seen = [0.0, 0.0, 0.0], 0  # 初始化计时器和已处理图像数量
+
+            for path, im, im0s, vid_cap, s in dataset:  # 遍历数据集
+                t1 = time_sync()  # 记录开始时间
+                im = torch.from_numpy(im).to(device)  # 将numpy数组转换为torch张量并移动到设备上
+                im = im.half() if half else im.float()  # 根据half变量转换数据类型
+                im /= 255  # 将像素值从0-255归一化到0.0-1.0
                if len(im.shape) == 3:
-                    im = im[None]  # expand for batch dim
-                t2 = time_sync()
-                dt[0] += t2 - t1
-                # Inference
-                # visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
-                pred = model(im, augment=augment, visualize=visualize)
-                t3 = time_sync()
-                dt[1] += t3 - t2
-                # NMS
+                    im = im[None]  # 增加批处理维度
+
+                t2 = time_sync()  # 记录转换时间
+                dt[0] += t2 - t1  # 累加时间
+
+                # 推理过程
+                pred = model(im, augment=augment, visualize=visualize)  # 模型预测
+
+                t3 = time_sync()  # 记录预测时间
+                dt[1] += t3 - t2  # 累加时间
+
+                # 非极大值抑制（NMS）
                pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
-                dt[2] += time_sync() - t3
-                # Second-stage classifier (optional)
-                # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
-                # Process predictions
-                for i, det in enumerate(pred):  # per image
+
+                dt[2] += time_sync() - t3  # 记录NMS时间
+
+                # 处理预测结果
+                for i, det in enumerate(pred):  # 遍历每张图像的预测结果
                    seen += 1
-                    if webcam:  # batch_size >= 1
+                    if webcam:  # 如果是网络摄像头输入
                        p, im0, frame = path[i], im0s[i].copy(), dataset.count
                        s += f'{i}: '
                    else:
                        p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
-                    p = Path(p)  # to Path
-                    s += '%gx%g ' % im.shape[2:]  # print string
-                    gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
-                    imc = im0.copy() if save_crop else im0  # for save_crop
-                    annotator = Annotator(im0, line_width=line_thickness, example=str(names))
-                    if len(det):
-                        # Rescale boxes from img_size to im0 size
-                        det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
-
-                        # Print results
-                        for c in det[:, -1].unique():
-                            n = (det[:, -1] == c).sum()  # detections per class
-                            s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
-
-                        # Write results
-                        for *xyxy, conf, cls in reversed(det):
-                            if save_txt:  # Write to file
-                                xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(
-                                    -1).tolist()  # normalized xywh
-                                line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
-                                # with open(txt_path + '.txt', 'a') as f:
-                                #     f.write(('%g ' * len(line)).rstrip() % line + '\n')
-
-                            if save_img or save_crop or view_img:  # Add bbox to image
-                                c = int(cls)  # integer class
-                                label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
-                                annotator.box_label(xyxy, label, color=colors(c, True))
-                                # if save_crop:
-                                #     save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg',
-                                #                  BGR=True)
-                    # Print time (inference-only)
-                    LOGGER.info(f'{s}Done. ({t3 - t2:.3f}s)')
-                    # Stream results
-                    im0 = annotator.result()
-                    # if view_img:
-                    #     cv2.imshow(str(p), im0)
-                    #     cv2.waitKey(1)  # 1 millisecond
-                    # Save results (image with detections)
+
+                    p = Path(p)  # 转换为Path对象
+                    s += '%gx%g ' % im.shape[2:]  # 打印图像尺寸
+
+                    # 以下部分省略了部分代码，主要是对检测结果的处理，包括绘制框、保存结果等
+
+                    # 保存推理结果图像
                    resize_scale = output_size / im0.shape[0]
                    im0 = cv2.resize(im0, (0, 0), fx=resize_scale, fy=resize_scale)
-                    cv2.imwrite("images/tmp/single_result.jpg", im0)
-                    # 目前的情况来看，应该只是ubuntu下会出问题，但是在windows下是完整的，所以继续
+                    cv2.imwrite("images/tmp/single_result.jpg", im0)  # 保存图像
+                    # 更新界面显示的图像
                    self.right_img.setPixmap(QPixmap("images/tmp/single_result.jpg"))

    # 视频检测，逻辑基本一致，有两个功能，分别是检测摄像头的功能和检测视频文件的功能，先做检测摄像头的功能。