{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "YOLOv5 Tutorial",
"provenance": [],
"collapsed_sections": [],
"machine_shape": "hm",
"toc_visible": true,
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"accelerator": "GPU",
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"c31d2039ccf74c22b67841f4877d1186": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_d4bba1727c714d94ad58a72bffa07c4c",
"IPY_MODEL_9aeff9f1780b45f892422fdc96e56913",
"IPY_MODEL_bf55a7c71d074d3fa88b10b997820825"
],
"layout": "IPY_MODEL_d8b66044e2fb4f5b916696834d880c81"
}
},
"d4bba1727c714d94ad58a72bffa07c4c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_102e1deda239436fa72751c58202fa0f",
"placeholder": "",
"style": "IPY_MODEL_4fd4431ced6c42368e18424912b877e4",
"value": "100%"
}
},
"9aeff9f1780b45f892422fdc96e56913": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_cdd709c4f40941bea1b2053523c9fac8",
"max": 818322941,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_a1ef2d8de2b741c78ca5d938e2ddbcdf",
"value": 818322941
}
},
"bf55a7c71d074d3fa88b10b997820825": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_0dbce99bb6184238842cbec0587d564a",
"placeholder": "",
"style": "IPY_MODEL_91ff5f93f2a24c5790ab29e347965946",
"value": " 780M/780M [01:10<00:00, 10.5MB/s]"
}
},
"d8b66044e2fb4f5b916696834d880c81": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"102e1deda239436fa72751c58202fa0f": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"4fd4431ced6c42368e18424912b877e4": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"cdd709c4f40941bea1b2053523c9fac8": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"a1ef2d8de2b741c78ca5d938e2ddbcdf": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"0dbce99bb6184238842cbec0587d564a": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"91ff5f93f2a24c5790ab29e347965946": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
}
}
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
""
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "t6MPjfT5NrKQ"
},
"source": [
"\n",
"
\n",
"\n",
"This is the **official YOLOv5 🚀 notebook** by **Ultralytics**, and is freely available for redistribution under the [GPL-3.0 license](https://choosealicense.com/licenses/gpl-3.0/). \n",
"For more information please visit https://github.com/ultralytics/yolov5 and https://ultralytics.com. Thank you!"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "7mGmQbAO5pQb"
},
"source": [
"# Setup\n",
"\n",
"Clone repo, install dependencies and check PyTorch and GPU."
]
},
{
"cell_type": "code",
"metadata": {
"id": "wbvMlHd_QwMG",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "185d0979-edcd-4860-e6fb-b8a27dbf5096"
},
"source": [
"!git clone https://github.com/ultralytics/yolov5 # clone\n",
"%cd yolov5\n",
"%pip install -qr requirements.txt # install\n",
"\n",
"import torch\n",
"import utils\n",
"display = utils.notebook_init() # checks"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"YOLOv5 🚀 v6.1-370-g20f1b7e Python-3.7.13 torch-1.12.0+cu113 CUDA:0 (Tesla V100-SXM2-16GB, 16160MiB)\n"
]
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"Setup complete ✅ (8 CPUs, 51.0 GB RAM, 37.4/166.8 GB disk)\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "4JnkELT0cIJg"
},
"source": [
"# 1. Inference\n",
"\n",
"`detect.py` runs YOLOv5 inference on a variety of sources, downloading models automatically from the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases), and saving results to `runs/detect`. Example inference sources are:\n",
"\n",
"```shell\n",
"python detect.py --source 0 # webcam\n",
" img.jpg # image \n",
" vid.mp4 # video\n",
" path/ # directory\n",
" 'path/*.jpg' # glob\n",
" 'https://youtu.be/Zgi9g1ksQHc' # YouTube\n",
" 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream\n",
"```"
]
},
{
"cell_type": "code",
"metadata": {
"id": "zR9ZbuQCH7FX",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "4b13989f-32a4-4ef0-b403-06ff3aac255c"
},
"source": [
"!python detect.py --weights yolov5s.pt --img 640 --conf 0.25 --source data/images\n",
"#display.Image(filename='runs/detect/exp/zidane.jpg', width=600)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\u001b[34m\u001b[1mdetect: \u001b[0mweights=['yolov5s.pt'], source=data/images, data=data/coco128.yaml, imgsz=[640, 640], conf_thres=0.25, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False\n",
"YOLOv5 🚀 v6.1-370-g20f1b7e Python-3.7.13 torch-1.12.0+cu113 CUDA:0 (Tesla V100-SXM2-16GB, 16160MiB)\n",
"\n",
"Downloading https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5s.pt to yolov5s.pt...\n",
"100% 14.1M/14.1M [00:00<00:00, 53.9MB/s]\n",
"\n",
"Fusing layers... \n",
"YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients\n",
"image 1/2 /content/yolov5/data/images/bus.jpg: 640x480 4 persons, 1 bus, Done. (0.016s)\n",
"image 2/2 /content/yolov5/data/images/zidane.jpg: 384x640 2 persons, 2 ties, Done. (0.021s)\n",
"Speed: 0.6ms pre-process, 18.6ms inference, 25.0ms NMS per image at shape (1, 3, 640, 640)\n",
"Results saved to \u001b[1mruns/detect/exp\u001b[0m\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "hkAzDWJ7cWTr"
},
"source": [
" \n",
"
"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "0eq1SMWl6Sfn"
},
"source": [
"# 2. Validate\n",
"Validate a model's accuracy on [COCO](https://cocodataset.org/#home) val or test-dev datasets. Models are downloaded automatically from the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases). To show results by class use the `--verbose` flag. Note that `pycocotools` metrics may be ~1% better than the equivalent repo metrics, as is visible below, due to slight differences in mAP computation."
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "eyTZYGgRjnMc"
},
"source": [
"## COCO val\n",
"Download [COCO val 2017](https://github.com/ultralytics/yolov5/blob/74b34872fdf41941cddcf243951cdb090fbac17b/data/coco.yaml#L14) dataset (1GB - 5000 images), and test model accuracy."
]
},
{
"cell_type": "code",
"metadata": {
"id": "WQPtK1QYVaD_",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 49,
"referenced_widgets": [
"c31d2039ccf74c22b67841f4877d1186",
"d4bba1727c714d94ad58a72bffa07c4c",
"9aeff9f1780b45f892422fdc96e56913",
"bf55a7c71d074d3fa88b10b997820825",
"d8b66044e2fb4f5b916696834d880c81",
"102e1deda239436fa72751c58202fa0f",
"4fd4431ced6c42368e18424912b877e4",
"cdd709c4f40941bea1b2053523c9fac8",
"a1ef2d8de2b741c78ca5d938e2ddbcdf",
"0dbce99bb6184238842cbec0587d564a",
"91ff5f93f2a24c5790ab29e347965946"
]
},
"outputId": "a9004b06-37a6-41ed-a1f2-ac956f3963b3"
},
"source": [
"# Download COCO val\n",
"torch.hub.download_url_to_file('https://ultralytics.com/assets/coco2017val.zip', 'tmp.zip')\n",
"!unzip -q tmp.zip -d ../datasets && rm tmp.zip"
],
"execution_count": null,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
" 0%| | 0.00/780M [00:00, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "c31d2039ccf74c22b67841f4877d1186"
}
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "X58w8JLpMnjH",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "c0f29758-4ec8-4def-893d-0efd6ed5b7f4"
},
"source": [
"# Run YOLOv5x on COCO val\n",
"!python val.py --weights yolov5x.pt --data coco.yaml --img 640 --iou 0.65 --half"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\u001b[34m\u001b[1mval: \u001b[0mdata=/content/yolov5/data/coco.yaml, weights=['yolov5x.pt'], batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.65, task=val, device=, workers=8, single_cls=False, augment=False, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=True, project=runs/val, name=exp, exist_ok=False, half=True, dnn=False\n",
"YOLOv5 🚀 v6.1-370-g20f1b7e Python-3.7.13 torch-1.12.0+cu113 CUDA:0 (Tesla V100-SXM2-16GB, 16160MiB)\n",
"\n",
"Downloading https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5x.pt to yolov5x.pt...\n",
"100% 166M/166M [00:35<00:00, 4.97MB/s]\n",
"\n",
"Fusing layers... \n",
"YOLOv5x summary: 444 layers, 86705005 parameters, 0 gradients\n",
"Downloading https://ultralytics.com/assets/Arial.ttf to /root/.config/Ultralytics/Arial.ttf...\n",
"100% 755k/755k [00:00<00:00, 49.4MB/s]\n",
"\u001b[34m\u001b[1mval: \u001b[0mScanning '/content/datasets/coco/val2017' images and labels...4952 found, 48 missing, 0 empty, 0 corrupt: 100% 5000/5000 [00:00<00:00, 10716.86it/s]\n",
"\u001b[34m\u001b[1mval: \u001b[0mNew cache created: /content/datasets/coco/val2017.cache\n",
" Class Images Labels P R mAP@.5 mAP@.5:.95: 100% 157/157 [01:08<00:00, 2.28it/s]\n",
" all 5000 36335 0.743 0.625 0.683 0.504\n",
"Speed: 0.1ms pre-process, 4.6ms inference, 1.2ms NMS per image at shape (32, 3, 640, 640)\n",
"\n",
"Evaluating pycocotools mAP... saving runs/val/exp/yolov5x_predictions.json...\n",
"loading annotations into memory...\n",
"Done (t=0.41s)\n",
"creating index...\n",
"index created!\n",
"Loading and preparing results...\n",
"DONE (t=5.64s)\n",
"creating index...\n",
"index created!\n",
"Running per image evaluation...\n",
"Evaluate annotation type *bbox*\n",
"DONE (t=72.86s).\n",
"Accumulating evaluation results...\n",
"DONE (t=14.20s).\n",
" Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.506\n",
" Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.688\n",
" Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.549\n",
" Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.340\n",
" Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.558\n",
" Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.651\n",
" Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.382\n",
" Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.631\n",
" Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.684\n",
" Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.528\n",
" Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.737\n",
" Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.833\n",
"Results saved to \u001b[1mruns/val/exp\u001b[0m\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "rc_KbFk0juX2"
},
"source": [
"## COCO test\n",
"Download [COCO test2017](https://github.com/ultralytics/yolov5/blob/74b34872fdf41941cddcf243951cdb090fbac17b/data/coco.yaml#L15) dataset (7GB - 40,000 images), to test model accuracy on test-dev set (**20,000 images, no labels**). Results are saved to a `*.json` file which should be **zipped** and submitted to the evaluation server at https://competitions.codalab.org/competitions/20794."
]
},
{
"cell_type": "code",
"metadata": {
"id": "V0AJnSeCIHyJ"
},
"source": [
"# Download COCO test-dev2017\n",
"!bash data/scripts/get_coco.sh --test"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "29GJXAP_lPrt"
},
"source": [
"# Run YOLOv5x on COCO test\n",
"!python val.py --weights yolov5x.pt --data coco.yaml --img 640 --iou 0.65 --half --task test"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "ZY2VXXXu74w5"
},
"source": [
"# 3. Train\n",
"\n",
"