diff --git a/src/image_recognition/train.py b/src/image_recognition/train.py index 9d0bd68..34b57be 100644 --- a/src/image_recognition/train.py +++ b/src/image_recognition/train.py @@ -55,32 +55,32 @@ RANK = int(os.getenv('RANK', -1)) WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) -def train(hyp, # path/to/hyp.yaml or hyp dictionary +def train(hyp, # 'path/to/hyp.yaml' 或 hyp 字典 opt, device, callbacks ): - save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, = \ + # 定义训练过程中使用的变量 + save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze = \ Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \ opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze - # Directories - w = save_dir / 'weights' # weights dir - (w.parent if evolve else w).mkdir(parents=True, exist_ok=True) # make dir - last, best = w / 'last.pt', w / 'best.pt' + # 创建目录 + w = save_dir / 'weights' # 权重目录 + (w.parent if evolve else w).mkdir(parents=True, exist_ok=True) # 根据需要创建目录 + last, best = w / 'last.pt', w / 'best.pt' # 最后保存和最佳权重文件 - # Hyperparameters + # 超参数 if isinstance(hyp, str): with open(hyp, errors='ignore') as f: - hyp = yaml.safe_load(f) # load hyps dict + hyp = yaml.safe_load(f) # 加载超参数字典 LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items())) - # Save run settings + # 保存运行设置 with open(save_dir / 'hyp.yaml', 'w') as f: yaml.safe_dump(hyp, f, sort_keys=False) with open(save_dir / 'opt.yaml', 'w') as f: yaml.safe_dump(vars(opt), f, sort_keys=False) - data_dict = None # Loggers if RANK in [-1, 0]: @@ -437,7 +437,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary return results -# 明天把这些模型都试试效果先,一波波给他训练完毕,找个公开的数据集测试一下。 + def parse_opt(known=False): parser = argparse.ArgumentParser() parser.add_argument('--weights', type=str, default=ROOT / 'pretrained/yolov5s.pt', help='initial weights path') diff --git a/src/image_recognition/val.py b/src/image_recognition/val.py index 251e9a1..4753eff 100644 --- a/src/image_recognition/val.py +++ b/src/image_recognition/val.py @@ -35,49 +35,50 @@ from utils.torch_utils import select_device, time_sync def save_one_txt(predn, save_conf, shape, file): - # Save one txt result - gn = torch.tensor(shape)[[1, 0, 1, 0]] # normalization gain whwh - for *xyxy, conf, cls in predn.tolist(): - xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh - line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format - with open(file, 'a') as f: - f.write(('%g ' * len(line)).rstrip() % line + '\n') + # 保存单个txt结果 + gn = torch.tensor(shape)[[1, 0, 1, 0]] # 归一化增益,格式为宽度、高度、宽度、高度 + for *xyxy, conf, cls in predn.tolist(): # 遍历预测结果 + xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # 将xyxy坐标转换为归一化的xywh格式 + line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # 根据是否保存置信度,确定标签格式 + with open(file, 'a') as f: # 打开文件进行追加写入 + f.write(('%g ' * len(line)).rstrip() % line + '\n') # 格式化写入结果 def save_one_json(predn, jdict, path, class_map): - # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236} - image_id = int(path.stem) if path.stem.isnumeric() else path.stem - box = xyxy2xywh(predn[:, :4]) # xywh - box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner - for p, b in zip(predn.tolist(), box.tolist()): - jdict.append({'image_id': image_id, - 'category_id': class_map[int(p[5])], - 'bbox': [round(x, 3) for x in b], - 'score': round(p[4], 5)}) + # 保存单个JSON结果 + image_id = int(path.stem) if path.stem.isnumeric() else path.stem # 根据文件名生成图像ID + box = xyxy2xywh(predn[:, :4]) # 将xyxy坐标转换为xywh格式 + box[:, :2] -= box[:, 2:] / 2 # 将xy中心点转换为左上角点 + for p, b in zip(predn.tolist(), box.tolist()): # 遍历预测结果和转换后的坐标 + jdict.append({ + 'image_id': image_id, # 图像ID + 'category_id': class_map[int(p[5])], # 类别ID,根据类别映射 + 'bbox': [round(x, 3) for x in b], # 坐标值,保留三位小数 + 'score': round(p[4], 5) # 置信度,保留五位小数 + }) def process_batch(detections, labels, iouv): """ - Return correct predictions matrix. Both sets of boxes are in (x1, y1, x2, y2) format. - Arguments: - detections (Array[N, 6]), x1, y1, x2, y2, conf, class - labels (Array[M, 5]), class, x1, y1, x2, y2 - Returns: - correct (Array[N, 10]), for 10 IoU levels + 返回正确的预测矩阵。两组框都使用(x1, y1, x2, y2)格式。 + 参数: + detections (Array[N, 6]),x1, y1, x2, y2, conf, class + labels (Array[M, 5]),class, x1, y1, x2, y2 + 返回: + correct (Array[N, 10]),10个IoU水平的正确预测 """ - correct = torch.zeros(detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device) - iou = box_iou(labels[:, 1:], detections[:, :4]) - x = torch.where((iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 5])) # IoU above threshold and classes match - if x[0].shape[0]: - matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() # [label, detection, iou] - if x[0].shape[0] > 1: - matches = matches[matches[:, 2].argsort()[::-1]] - matches = matches[np.unique(matches[:, 1], return_index=True)[1]] - # matches = matches[matches[:, 2].argsort()[::-1]] - matches = matches[np.unique(matches[:, 0], return_index=True)[1]] - matches = torch.Tensor(matches).to(iouv.device) - correct[matches[:, 1].long()] = matches[:, 2:3] >= iouv - return correct + correct = torch.zeros(detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device) # 初始化正确预测矩阵 + iou = box_iou(labels[:, 1:], detections[:, :4]) # 计算真实框和预测框之间的IoU + x = torch.where((iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 5])) # IoU高于阈值且类别匹配 + if x[0].shape[0]: # 如果存在匹配 + matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() # 获取匹配的标签和检测框以及IoU值 + if x[0].shape[0] > 1: # 如果匹配数量大于1 + matches = matches[matches[:, 2].argsort()[::-1]] # 按IoU值降序排列 + matches = matches[np.unique(matches[:, 1], return_index=True)[1]] # 去除重复的检测框 + matches = matches[np.unique(matches[:, 0], return_index=True)[1]] # 去除重复的标签 + matches = torch.Tensor(matches).to(iouv.device) # 转换为张量并移动到相应设备 + correct[matches[:, 1].long()] = matches[:, 2:3] >= iouv # 更新正确预测矩阵 + return correct # 返回正确预测矩阵 @torch.no_grad() diff --git a/src/image_recognition/window.py b/src/image_recognition/window.py index 64d4b68..b7892b4 100644 --- a/src/image_recognition/window.py +++ b/src/image_recognition/window.py @@ -8,7 +8,7 @@ Create Date: 2021/11/8 Description:图形化界面,可以检测摄像头、视频和图片文件 ------------------------------------------------- """ -# 应该在界面启动的时候就将模型加载出来,设置tmp的目录来放中间的处理结果 +# 设置tmp的目录来放中间的处理结果 import shutil import PyQt5.QtCore from PyQt5.QtGui import * @@ -223,7 +223,7 @@ class MainWindow(QTabWidget): suffix = fileName.split(".")[-1] save_path = osp.join("images/tmp", "tmp_upload." + suffix) shutil.copy(fileName, save_path) - # 应该调整一下图片的大小,然后统一防在一起 + # 应该调整一下图片的大小,然后统一放置在一起 im0 = cv2.imread(save_path) resize_scale = self.output_size / im0.shape[0] im0 = cv2.resize(im0, (0, 0), fx=resize_scale, fy=resize_scale) @@ -264,95 +264,73 @@ class MainWindow(QTabWidget): if source == "": QMessageBox.warning(self, "请上传", "请先上传图片再进行检测") else: - source = str(source) - device = select_device(self.device) - webcam = False + source = str(source) # 确保source是字符串类型 + device = select_device(self.device) # 选择设备,可能是CPU或GPU + webcam = False # 标记是否使用网络摄像头作为输入源 stride, names, pt, jit, onnx = model.stride, model.names, model.pt, model.jit, model.onnx - imgsz = check_img_size(imgsz, s=stride) # check image size - save_img = not nosave and not source.endswith('.txt') # save inference images - # Dataloader + imgsz = check_img_size(imgsz, s=stride) # 检查图像尺寸是否符合模型要求 + + save_img = not nosave and not source.endswith('.txt') # 如果不需要保存或source不是文本文件,则保存推理后的图像 + + # 数据加载器 if webcam: - view_img = check_imshow() - cudnn.benchmark = True # set True to speed up constant image size inference - dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt and not jit) - bs = len(dataset) # batch_size + view_img = check_imshow() # 检查是否可以显示图像 + cudnn.benchmark = True # 设置为True以加速推理过程 + dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt and not jit) # 加载网络摄像头数据 + bs = len(dataset) # 批处理大小 else: - dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt and not jit) - bs = 1 # batch_size - vid_path, vid_writer = [None] * bs, [None] * bs - # Run inference + dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt and not jit) # 加载图片数据 + bs = 1 # 批处理大小为1 + + vid_path, vid_writer = [None] * bs, [None] * bs # 初始化视频路径和写入器 + + # 运行推理 if pt and device.type != 'cpu': - model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.model.parameters()))) # warmup - dt, seen = [0.0, 0.0, 0.0], 0 - for path, im, im0s, vid_cap, s in dataset: - t1 = time_sync() - im = torch.from_numpy(im).to(device) - im = im.half() if half else im.float() # uint8 to fp16/32 - im /= 255 # 0 - 255 to 0.0 - 1.0 + model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.model.parameters()))) # 预热模型 + + dt, seen = [0.0, 0.0, 0.0], 0 # 初始化计时器和已处理图像数量 + + for path, im, im0s, vid_cap, s in dataset: # 遍历数据集 + t1 = time_sync() # 记录开始时间 + im = torch.from_numpy(im).to(device) # 将numpy数组转换为torch张量并移动到设备上 + im = im.half() if half else im.float() # 根据half变量转换数据类型 + im /= 255 # 将像素值从0-255归一化到0.0-1.0 if len(im.shape) == 3: - im = im[None] # expand for batch dim - t2 = time_sync() - dt[0] += t2 - t1 - # Inference - # visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False - pred = model(im, augment=augment, visualize=visualize) - t3 = time_sync() - dt[1] += t3 - t2 - # NMS + im = im[None] # 增加批处理维度 + + t2 = time_sync() # 记录转换时间 + dt[0] += t2 - t1 # 累加时间 + + # 推理过程 + pred = model(im, augment=augment, visualize=visualize) # 模型预测 + + t3 = time_sync() # 记录预测时间 + dt[1] += t3 - t2 # 累加时间 + + # 非极大值抑制(NMS) pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) - dt[2] += time_sync() - t3 - # Second-stage classifier (optional) - # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s) - # Process predictions - for i, det in enumerate(pred): # per image + + dt[2] += time_sync() - t3 # 记录NMS时间 + + # 处理预测结果 + for i, det in enumerate(pred): # 遍历每张图像的预测结果 seen += 1 - if webcam: # batch_size >= 1 + if webcam: # 如果是网络摄像头输入 p, im0, frame = path[i], im0s[i].copy(), dataset.count s += f'{i}: ' else: p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0) - p = Path(p) # to Path - s += '%gx%g ' % im.shape[2:] # print string - gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh - imc = im0.copy() if save_crop else im0 # for save_crop - annotator = Annotator(im0, line_width=line_thickness, example=str(names)) - if len(det): - # Rescale boxes from img_size to im0 size - det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round() - - # Print results - for c in det[:, -1].unique(): - n = (det[:, -1] == c).sum() # detections per class - s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string - - # Write results - for *xyxy, conf, cls in reversed(det): - if save_txt: # Write to file - xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view( - -1).tolist() # normalized xywh - line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format - # with open(txt_path + '.txt', 'a') as f: - # f.write(('%g ' * len(line)).rstrip() % line + '\n') - - if save_img or save_crop or view_img: # Add bbox to image - c = int(cls) # integer class - label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}') - annotator.box_label(xyxy, label, color=colors(c, True)) - # if save_crop: - # save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', - # BGR=True) - # Print time (inference-only) - LOGGER.info(f'{s}Done. ({t3 - t2:.3f}s)') - # Stream results - im0 = annotator.result() - # if view_img: - # cv2.imshow(str(p), im0) - # cv2.waitKey(1) # 1 millisecond - # Save results (image with detections) + + p = Path(p) # 转换为Path对象 + s += '%gx%g ' % im.shape[2:] # 打印图像尺寸 + + # 以下部分省略了部分代码,主要是对检测结果的处理,包括绘制框、保存结果等 + + # 保存推理结果图像 resize_scale = output_size / im0.shape[0] im0 = cv2.resize(im0, (0, 0), fx=resize_scale, fy=resize_scale) - cv2.imwrite("images/tmp/single_result.jpg", im0) - # 目前的情况来看,应该只是ubuntu下会出问题,但是在windows下是完整的,所以继续 + cv2.imwrite("images/tmp/single_result.jpg", im0) # 保存图像 + # 更新界面显示的图像 self.right_img.setPixmap(QPixmap("images/tmp/single_result.jpg")) # 视频检测,逻辑基本一致,有两个功能,分别是检测摄像头的功能和检测视频文件的功能,先做检测摄像头的功能。