master
fanchuana 4 years ago
parent 41dbf2e6fd
commit c572fda98e

2
.gitattributes vendored

@ -0,0 +1,2 @@
# this drop notebooks from GitHub language stats
*.ipynb linguist-vendored

256
.gitignore vendored

@ -0,0 +1,256 @@
# Repo-specific GitIgnore ----------------------------------------------------------------------------------------------
*.jpg
*.jpeg
*.png
*.bmp
*.tif
*.tiff
*.heic
*.JPG
*.JPEG
*.PNG
*.BMP
*.TIF
*.TIFF
*.HEIC
*.mp4
*.mov
*.MOV
*.avi
*.data
*.json
*.cfg
!setup.cfg
!cfg/yolov3*.cfg
storage.googleapis.com
runs/*
data/*
data/images/*
!data/*.yaml
!data/hyps
!data/scripts
!data/images
!data/images/zidane.jpg
!data/images/bus.jpg
!data/*.sh
results*.csv
# Datasets -------------------------------------------------------------------------------------------------------------
coco/
coco128/
VOC/
# MATLAB GitIgnore -----------------------------------------------------------------------------------------------------
*.m~
*.mat
!targets*.mat
# Neural Network weights -----------------------------------------------------------------------------------------------
*.weights
*.pt
*.pb
*.onnx
*.engine
*.mlmodel
*.torchscript
*.tflite
*.h5
*_saved_model/
*_web_model/
*_openvino_model/
darknet53.conv.74
yolov3-tiny.conv.15
# GitHub Python GitIgnore ----------------------------------------------------------------------------------------------
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
/wandb/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# dotenv
.env
# virtualenv
.venv*
venv*/
ENV*/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
# https://github.com/github/gitignore/blob/master/Global/macOS.gitignore -----------------------------------------------
# General
.DS_Store
.AppleDouble
.LSOverride
# Icon must end with two \r
Icon
Icon?
# Thumbnails
._*
# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
# https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
# User-specific stuff:
.idea/*
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/dictionaries
.html # Bokeh Plots
.pg # TensorFlow Frozen Graphs
.avi # videos
# Sensitive or high-churn files:
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
# Gradle:
.idea/**/gradle.xml
.idea/**/libraries
# CMake
cmake-build-debug/
cmake-build-release/
# Mongo Explorer plugin:
.idea/**/mongoSettings.xml
## File-based project format:
*.iws
## Plugin-specific files:
# IntelliJ
out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Cursive Clojure plugin
.idea/replstate.xml
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties

@ -0,0 +1,67 @@
# Define hooks for code formations
# Will be applied on any updated commit files if a user has installed and linked commit hook
default_language_version:
python: python3.8
# Define bot property if installed via https://github.com/marketplace/pre-commit-ci
ci:
autofix_prs: true
autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions'
autoupdate_schedule: monthly
# submodules: true
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.2.0
hooks:
- id: end-of-file-fixer
- id: trailing-whitespace
- id: check-case-conflict
- id: check-yaml
- id: check-toml
- id: pretty-format-json
- id: check-docstring-first
- repo: https://github.com/asottile/pyupgrade
rev: v2.32.0
hooks:
- id: pyupgrade
name: Upgrade code
args: [ --py37-plus ]
- repo: https://github.com/PyCQA/isort
rev: 5.10.1
hooks:
- id: isort
name: Sort imports
- repo: https://github.com/pre-commit/mirrors-yapf
rev: v0.32.0
hooks:
- id: yapf
name: YAPF formatting
- repo: https://github.com/executablebooks/mdformat
rev: 0.7.14
hooks:
- id: mdformat
name: MD formatting
additional_dependencies:
- mdformat-gfm
- mdformat-black
exclude: |
(?x)^(
README.md
)$
- repo: https://github.com/asottile/yesqa
rev: v1.3.0
hooks:
- id: yesqa
- repo: https://github.com/PyCQA/flake8
rev: 4.0.1
hooks:
- id: flake8
name: PEP8

@ -0,0 +1,98 @@
## Contributing to YOLOv5 🚀
We love your input! We want to make contributing to YOLOv5 as easy and transparent as possible, whether it's:
- Reporting a bug
- Discussing the current state of the code
- Submitting a fix
- Proposing a new feature
- Becoming a maintainer
YOLOv5 works so well due to our combined community effort, and for every small improvement you contribute you will be
helping push the frontiers of what's possible in AI 😃!
## Submitting a Pull Request (PR) 🛠️
Submitting a PR is easy! This example shows how to submit a PR for updating `requirements.txt` in 4 steps:
### 1. Select File to Update
Select `requirements.txt` to update by clicking on it in GitHub.
<p align="center"><img width="800" alt="PR_step1" src="https://user-images.githubusercontent.com/26833433/122260847-08be2600-ced4-11eb-828b-8287ace4136c.png"></p>
### 2. Click 'Edit this file'
Button is in top-right corner.
<p align="center"><img width="800" alt="PR_step2" src="https://user-images.githubusercontent.com/26833433/122260844-06f46280-ced4-11eb-9eec-b8a24be519ca.png"></p>
### 3. Make Changes
Change `matplotlib` version from `3.2.2` to `3.3`.
<p align="center"><img width="800" alt="PR_step3" src="https://user-images.githubusercontent.com/26833433/122260853-0a87e980-ced4-11eb-9fd2-3650fb6e0842.png"></p>
### 4. Preview Changes and Submit PR
Click on the **Preview changes** tab to verify your updates. At the bottom of the screen select 'Create a **new branch**
for this commit', assign your branch a descriptive name such as `fix/matplotlib_version` and click the green **Propose
changes** button. All done, your PR is now submitted to YOLOv5 for review and approval 😃!
<p align="center"><img width="800" alt="PR_step4" src="https://user-images.githubusercontent.com/26833433/122260856-0b208000-ced4-11eb-8e8e-77b6151cbcc3.png"></p>
### PR recommendations
To allow your work to be integrated as seamlessly as possible, we advise you to:
- ✅ Verify your PR is **up-to-date with upstream/master.** If your PR is behind upstream/master an
automatic [GitHub Actions](https://github.com/ultralytics/yolov5/blob/master/.github/workflows/rebase.yml) merge may
be attempted by writing /rebase in a new comment, or by running the following code, replacing 'feature' with the name
of your local branch:
```bash
git remote add upstream https://github.com/ultralytics/yolov5.git
git fetch upstream
# git checkout feature # <--- replace 'feature' with local branch name
git merge upstream/master
git push -u origin -f
```
- ✅ Verify all Continuous Integration (CI) **checks are passing**.
- ✅ Reduce changes to the absolute **minimum** required for your bug fix or feature addition. _"It is not daily increase
but daily decrease, hack away the unessential. The closer to the source, the less wastage there is."_ — Bruce Lee
## Submitting a Bug Report 🐛
If you spot a problem with YOLOv5 please submit a Bug Report!
For us to start investigating a possible problem we need to be able to reproduce it ourselves first. We've created a few
short guidelines below to help users provide what we need in order to get started.
When asking a question, people will be better able to provide help if you provide **code** that they can easily
understand and use to **reproduce** the problem. This is referred to by community members as creating
a [minimum reproducible example](https://stackoverflow.com/help/minimal-reproducible-example). Your code that reproduces
the problem should be:
- ✅ **Minimal** Use as little code as possible that still produces the same problem
- ✅ **Complete** Provide **all** parts someone else needs to reproduce your problem in the question itself
- ✅ **Reproducible** Test the code you're about to provide to make sure it reproduces the problem
In addition to the above requirements, for [Ultralytics](https://ultralytics.com/) to provide assistance your code
should be:
- ✅ **Current** Verify that your code is up-to-date with current
GitHub [master](https://github.com/ultralytics/yolov5/tree/master), and if necessary `git pull` or `git clone` a new
copy to ensure your problem has not already been resolved by previous commits.
- ✅ **Unmodified** Your problem must be reproducible without any modifications to the codebase in this
repository. [Ultralytics](https://ultralytics.com/) does not provide support for custom code ⚠️.
If you believe your problem meets all of the above criteria, please close this issue and raise a new one using the 🐛
**Bug Report** [template](https://github.com/ultralytics/yolov5/issues/new/choose) and providing
a [minimum reproducible example](https://stackoverflow.com/help/minimal-reproducible-example) to help us better
understand and diagnose your problem.
## License
By contributing, you agree that your contributions will be licensed under
the [GPL-3.0 license](https://choosealicense.com/licenses/gpl-3.0/)

@ -0,0 +1,674 @@
GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The GNU General Public License is a free, copyleft license for
software and other kinds of works.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
the GNU General Public License is intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users. We, the Free Software Foundation, use the
GNU General Public License for most of our software; it applies also to
any other work released this way by its authors. You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
To protect your rights, we need to prevent others from denying you
these rights or asking you to surrender the rights. Therefore, you have
certain responsibilities if you distribute copies of the software, or if
you modify it: responsibilities to respect the freedom of others.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must pass on to the recipients the same
freedoms that you received. You must make sure that they, too, receive
or can get the source code. And you must show them these terms so they
know their rights.
Developers that use the GNU GPL protect your rights with two steps:
(1) assert copyright on the software, and (2) offer you this License
giving you legal permission to copy, distribute and/or modify it.
For the developers' and authors' protection, the GPL clearly explains
that there is no warranty for this free software. For both users' and
authors' sake, the GPL requires that modified versions be marked as
changed, so that their problems will not be attributed erroneously to
authors of previous versions.
Some devices are designed to deny users access to install or run
modified versions of the software inside them, although the manufacturer
can do so. This is fundamentally incompatible with the aim of
protecting users' freedom to change the software. The systematic
pattern of such abuse occurs in the area of products for individuals to
use, which is precisely where it is most unacceptable. Therefore, we
have designed this version of the GPL to prohibit the practice for those
products. If such problems arise substantially in other domains, we
stand ready to extend this provision to those domains in future versions
of the GPL, as needed to protect the freedom of users.
Finally, every program is threatened constantly by software patents.
States should not allow patents to restrict development and use of
software on general-purpose computers, but in those that do, we wish to
avoid the special danger that patents applied to a free program could
make it effectively proprietary. To prevent this, the GPL assures that
patents cannot be used to render the program non-free.
The precise terms and conditions for copying, distribution and
modification follow.
TERMS AND CONDITIONS
0. Definitions.
"This License" refers to version 3 of the GNU General Public License.
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
"The Program" refers to any copyrightable work licensed under this
License. Each licensee is addressed as "you". "Licensees" and
"recipients" may be individuals or organizations.
To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy. The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.
A "covered work" means either the unmodified Program or a work based
on the Program.
To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
To "convey" a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.
An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
1. Source Code.
The "source code" for a work means the preferred form of the work
for making modifications to it. "Object code" means any non-source
form of a work.
A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.
The Corresponding Source for a work in source code form is that
same work.
2. Basic Permissions.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force. You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
Conveying under any other circumstances is permitted solely under
the conditions stated below. Sublicensing is not allowed; section 10
makes it unnecessary.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.
4. Conveying Verbatim Copies.
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to
"keep intact all notices".
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the
Corresponding Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission, provided
you inform other peers where the object code and Corresponding
Source of the work are being offered to the general public at no
charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling. In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage. For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product. A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.
"Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source. The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed. Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
"Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law. If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material, or
requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors or
authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions of
it) with contractual assumptions of liability to the recipient, for
any liability that these contractual assumptions directly impose on
those licensors and authors.
All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or
run a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's "contributor version".
A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License. You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all. For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
13. Use with the GNU Affero General Public License.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU Affero General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the special requirements of the GNU Affero General Public License,
section 13, concerning interaction through a network will apply to the
combination as such.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
the GNU General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies that a certain numbered version of the GNU General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
GNU General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
versions of the GNU General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Also add information on how to contact you by electronic and paper mail.
If the program does terminal interaction, make it output a short
notice like this when it starts in an interactive mode:
<program> Copyright (C) <year> <name of author>
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, your program's commands
might be different; for a GUI interface, you would use an "about box".
You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU GPL, see
<http://www.gnu.org/licenses/>.
The GNU General Public License does not permit incorporating your program
into proprietary programs. If your program is a subroutine library, you
may consider it more useful to permit linking proprietary applications with
the library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License. But first, please read
<http://www.gnu.org/philosophy/why-not-lgpl.html>.

@ -1,2 +1,65 @@
# digitalprocessing
```
测试环境:
机型联想拯救者R7000 2020
CPU: AMD R-4800H
GPU: RTX 2060(6G) GTX 1650(6G)
依赖:
# pip install -r requirements.txt
# Base ----------------------------------------
matplotlib>=3.2.2
numpy>=1.18.5
opencv-python>=4.1.2
Pillow>=7.1.2
PyYAML>=5.3.1
requests>=2.23.0
scipy>=1.4.1
torch>=1.7.0
torchvision>=0.8.1
tqdm>=4.41.0
pyqt5
# Logging -------------------------------------
tensorboard>=2.4.1
# wandb
# Plotting ------------------------------------
pandas>=1.1.4
seaborn>=0.11.0
# Export --------------------------------------
# coremltools>=4.1 # CoreML export
# onnx>=1.9.0 # ONNX export
# onnx-simplifier>=0.3.6 # ONNX simplifier
# scikit-learn==0.19.2 # CoreML quantization
# tensorflow>=2.4.1 # TFLite export
# tensorflowjs>=3.9.0 # TF.js export
# openvino-dev # OpenVINO export
# Extras --------------------------------------
# albumentations>=1.0.3
# Cython # for pycocotools https://github.com/cocodataset/cocoapi/issues/172
# pycocotools>=2.0 # COCO mAP
# roboflow
thop # FLOPs computation
```

@ -0,0 +1,52 @@
import json
import os
name2id = {'boy': 1, 'girl': 0} # 标签名称
def convert(img_size, box):
dw = 1. / (img_size[0])
dh = 1. / (img_size[1])
x = (box[0] + box[2]) / 2.0 - 1
y = (box[1] + box[3]) / 2.0 - 1
w = abs(box[2] - box[0])
h = abs(box[3] - box[1])
x = x * dw
w = w * dw
y = y * dh
h = h * dh
return (x, y, w, h)
def decode_json(json_floder_path, json_name):
txt_name = 'mydata/Yololabels/' + json_name[0:-5] + '.txt'
# 存放txt的绝对路径
txt_file = open(txt_name, 'w')
json_path = os.path.join(json_floder_path, json_name)
data = json.load(open(json_path, 'r', encoding='gb2312', errors='ignore'))
img_w = data['imageWidth']
img_h = data['imageHeight']
for i in data['shapes']:
label_name = i['label']
if (i['shape_type'] == 'rectangle'):
x1 = int(i['points'][0][0])
y1 = int(i['points'][0][1])
x2 = int(i['points'][1][0])
y2 = int(i['points'][1][1])
bb = (x1, y1, x2, y2)
bbox = convert((img_w, img_h), bb)
txt_file.write(str(name2id[label_name]) + " " + " ".join([str(a) for a in bbox]) + '\n')
if __name__ == "__main__":
json_floder_path = 'mydata/Annotations'
# 存放json的文件夹的绝对路径
json_names = os.listdir(json_floder_path)
for json_name in json_names:
decode_json(json_floder_path, json_name)

@ -0,0 +1,67 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/ by Argo AI
# Example usage: python train.py --data Argoverse.yaml
# parent
# ├── yolov5
# └── datasets
# └── Argoverse ← downloads here (31.3 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/Argoverse # dataset root dir
train: Argoverse-1.1/images/train/ # train images (relative to 'path') 39384 images
val: Argoverse-1.1/images/val/ # val images (relative to 'path') 15062 images
test: Argoverse-1.1/images/test/ # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview
# Classes
nc: 8 # number of classes
names: ['person', 'bicycle', 'car', 'motorcycle', 'bus', 'truck', 'traffic_light', 'stop_sign'] # class names
# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
import json
from tqdm.auto import tqdm
from utils.general import download, Path
def argoverse2yolo(set):
labels = {}
a = json.load(open(set, "rb"))
for annot in tqdm(a['annotations'], desc=f"Converting {set} to YOLOv5 format..."):
img_id = annot['image_id']
img_name = a['images'][img_id]['name']
img_label_name = img_name[:-3] + "txt"
cls = annot['category_id'] # instance class id
x_center, y_center, width, height = annot['bbox']
x_center = (x_center + width / 2) / 1920.0 # offset and scale
y_center = (y_center + height / 2) / 1200.0 # offset and scale
width /= 1920.0 # scale
height /= 1200.0 # scale
img_dir = set.parents[2] / 'Argoverse-1.1' / 'labels' / a['seq_dirs'][a['images'][annot['image_id']]['sid']]
if not img_dir.exists():
img_dir.mkdir(parents=True, exist_ok=True)
k = str(img_dir / img_label_name)
if k not in labels:
labels[k] = []
labels[k].append(f"{cls} {x_center} {y_center} {width} {height}\n")
for k in labels:
with open(k, "w") as f:
f.writelines(labels[k])
# Download
dir = Path('../datasets/Argoverse') # dataset root dir
urls = ['https://argoverse-hd.s3.us-east-2.amazonaws.com/Argoverse-HD-Full.zip']
download(urls, dir=dir, delete=False)
# Convert
annotations_dir = 'Argoverse-HD/annotations/'
(dir / 'Argoverse-1.1' / 'tracking').rename(dir / 'Argoverse-1.1' / 'images') # rename 'tracking' to 'images'
for d in "train.json", "val.json":
argoverse2yolo(dir / annotations_dir / d) # convert VisDrone annotations to YOLO labels

@ -0,0 +1,54 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Global Wheat 2020 dataset http://www.global-wheat.com/ by University of Saskatchewan
# Example usage: python train.py --data GlobalWheat2020.yaml
# parent
# ├── yolov5
# └── datasets
# └── GlobalWheat2020 ← downloads here (7.0 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/GlobalWheat2020 # dataset root dir
train: # train images (relative to 'path') 3422 images
- images/arvalis_1
- images/arvalis_2
- images/arvalis_3
- images/ethz_1
- images/rres_1
- images/inrae_1
- images/usask_1
val: # val images (relative to 'path') 748 images (WARNING: train set contains ethz_1)
- images/ethz_1
test: # test images (optional) 1276 images
- images/utokyo_1
- images/utokyo_2
- images/nau_1
- images/uq_1
# Classes
nc: 1 # number of classes
names: ['wheat_head'] # class names
# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
from utils.general import download, Path
# Download
dir = Path(yaml['path']) # dataset root dir
urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
'https://github.com/ultralytics/yolov5/releases/download/v1.0/GlobalWheat2020_labels.zip']
download(urls, dir=dir)
# Make Directories
for p in 'annotations', 'images', 'labels':
(dir / p).mkdir(parents=True, exist_ok=True)
# Move
for p in 'arvalis_1', 'arvalis_2', 'arvalis_3', 'ethz_1', 'rres_1', 'inrae_1', 'usask_1', \
'utokyo_1', 'utokyo_2', 'nau_1', 'uq_1':
(dir / p).rename(dir / 'images' / p) # move to /images
f = (dir / p).with_suffix('.json') # json file
if f.exists():
f.rename((dir / 'annotations' / p).with_suffix('.json')) # move to /annotations

@ -0,0 +1,114 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Objects365 dataset https://www.objects365.org/ by Megvii
# Example usage: python train.py --data Objects365.yaml
# parent
# ├── yolov5
# └── datasets
# └── Objects365 ← downloads here (712 GB = 367G data + 345G zips)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/Objects365 # dataset root dir
train: images/train # train images (relative to 'path') 1742289 images
val: images/val # val images (relative to 'path') 80000 images
test: # test images (optional)
# Classes
nc: 365 # number of classes
names: ['Person', 'Sneakers', 'Chair', 'Other Shoes', 'Hat', 'Car', 'Lamp', 'Glasses', 'Bottle', 'Desk', 'Cup',
'Street Lights', 'Cabinet/shelf', 'Handbag/Satchel', 'Bracelet', 'Plate', 'Picture/Frame', 'Helmet', 'Book',
'Gloves', 'Storage box', 'Boat', 'Leather Shoes', 'Flower', 'Bench', 'Potted Plant', 'Bowl/Basin', 'Flag',
'Pillow', 'Boots', 'Vase', 'Microphone', 'Necklace', 'Ring', 'SUV', 'Wine Glass', 'Belt', 'Monitor/TV',
'Backpack', 'Umbrella', 'Traffic Light', 'Speaker', 'Watch', 'Tie', 'Trash bin Can', 'Slippers', 'Bicycle',
'Stool', 'Barrel/bucket', 'Van', 'Couch', 'Sandals', 'Basket', 'Drum', 'Pen/Pencil', 'Bus', 'Wild Bird',
'High Heels', 'Motorcycle', 'Guitar', 'Carpet', 'Cell Phone', 'Bread', 'Camera', 'Canned', 'Truck',
'Traffic cone', 'Cymbal', 'Lifesaver', 'Towel', 'Stuffed Toy', 'Candle', 'Sailboat', 'Laptop', 'Awning',
'Bed', 'Faucet', 'Tent', 'Horse', 'Mirror', 'Power outlet', 'Sink', 'Apple', 'Air Conditioner', 'Knife',
'Hockey Stick', 'Paddle', 'Pickup Truck', 'Fork', 'Traffic Sign', 'Balloon', 'Tripod', 'Dog', 'Spoon', 'Clock',
'Pot', 'Cow', 'Cake', 'Dinning Table', 'Sheep', 'Hanger', 'Blackboard/Whiteboard', 'Napkin', 'Other Fish',
'Orange/Tangerine', 'Toiletry', 'Keyboard', 'Tomato', 'Lantern', 'Machinery Vehicle', 'Fan',
'Green Vegetables', 'Banana', 'Baseball Glove', 'Airplane', 'Mouse', 'Train', 'Pumpkin', 'Soccer', 'Skiboard',
'Luggage', 'Nightstand', 'Tea pot', 'Telephone', 'Trolley', 'Head Phone', 'Sports Car', 'Stop Sign',
'Dessert', 'Scooter', 'Stroller', 'Crane', 'Remote', 'Refrigerator', 'Oven', 'Lemon', 'Duck', 'Baseball Bat',
'Surveillance Camera', 'Cat', 'Jug', 'Broccoli', 'Piano', 'Pizza', 'Elephant', 'Skateboard', 'Surfboard',
'Gun', 'Skating and Skiing shoes', 'Gas stove', 'Donut', 'Bow Tie', 'Carrot', 'Toilet', 'Kite', 'Strawberry',
'Other Balls', 'Shovel', 'Pepper', 'Computer Box', 'Toilet Paper', 'Cleaning Products', 'Chopsticks',
'Microwave', 'Pigeon', 'Baseball', 'Cutting/chopping Board', 'Coffee Table', 'Side Table', 'Scissors',
'Marker', 'Pie', 'Ladder', 'Snowboard', 'Cookies', 'Radiator', 'Fire Hydrant', 'Basketball', 'Zebra', 'Grape',
'Giraffe', 'Potato', 'Sausage', 'Tricycle', 'Violin', 'Egg', 'Fire Extinguisher', 'Candy', 'Fire Truck',
'Billiards', 'Converter', 'Bathtub', 'Wheelchair', 'Golf Club', 'Briefcase', 'Cucumber', 'Cigar/Cigarette',
'Paint Brush', 'Pear', 'Heavy Truck', 'Hamburger', 'Extractor', 'Extension Cord', 'Tong', 'Tennis Racket',
'Folder', 'American Football', 'earphone', 'Mask', 'Kettle', 'Tennis', 'Ship', 'Swing', 'Coffee Machine',
'Slide', 'Carriage', 'Onion', 'Green beans', 'Projector', 'Frisbee', 'Washing Machine/Drying Machine',
'Chicken', 'Printer', 'Watermelon', 'Saxophone', 'Tissue', 'Toothbrush', 'Ice cream', 'Hot-air balloon',
'Cello', 'French Fries', 'Scale', 'Trophy', 'Cabbage', 'Hot dog', 'Blender', 'Peach', 'Rice', 'Wallet/Purse',
'Volleyball', 'Deer', 'Goose', 'Tape', 'Tablet', 'Cosmetics', 'Trumpet', 'Pineapple', 'Golf Ball',
'Ambulance', 'Parking meter', 'Mango', 'Key', 'Hurdle', 'Fishing Rod', 'Medal', 'Flute', 'Brush', 'Penguin',
'Megaphone', 'Corn', 'Lettuce', 'Garlic', 'Swan', 'Helicopter', 'Green Onion', 'Sandwich', 'Nuts',
'Speed Limit Sign', 'Induction Cooker', 'Broom', 'Trombone', 'Plum', 'Rickshaw', 'Goldfish', 'Kiwi fruit',
'Router/modem', 'Poker Card', 'Toaster', 'Shrimp', 'Sushi', 'Cheese', 'Notepaper', 'Cherry', 'Pliers', 'CD',
'Pasta', 'Hammer', 'Cue', 'Avocado', 'Hamimelon', 'Flask', 'Mushroom', 'Screwdriver', 'Soap', 'Recorder',
'Bear', 'Eggplant', 'Board Eraser', 'Coconut', 'Tape Measure/Ruler', 'Pig', 'Showerhead', 'Globe', 'Chips',
'Steak', 'Crosswalk Sign', 'Stapler', 'Camel', 'Formula 1', 'Pomegranate', 'Dishwasher', 'Crab',
'Hoverboard', 'Meat ball', 'Rice Cooker', 'Tuba', 'Calculator', 'Papaya', 'Antelope', 'Parrot', 'Seal',
'Butterfly', 'Dumbbell', 'Donkey', 'Lion', 'Urinal', 'Dolphin', 'Electric Drill', 'Hair Dryer', 'Egg tart',
'Jellyfish', 'Treadmill', 'Lighter', 'Grapefruit', 'Game board', 'Mop', 'Radish', 'Baozi', 'Target', 'French',
'Spring Rolls', 'Monkey', 'Rabbit', 'Pencil Case', 'Yak', 'Red Cabbage', 'Binoculars', 'Asparagus', 'Barbell',
'Scallop', 'Noddles', 'Comb', 'Dumpling', 'Oyster', 'Table Tennis paddle', 'Cosmetics Brush/Eyeliner Pencil',
'Chainsaw', 'Eraser', 'Lobster', 'Durian', 'Okra', 'Lipstick', 'Cosmetics Mirror', 'Curling', 'Table Tennis']
# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
from tqdm.auto import tqdm
from utils.general import Path, check_requirements, download, np, xyxy2xywhn
check_requirements(('pycocotools>=2.0',))
from pycocotools.coco import COCO
# Make Directories
dir = Path(yaml['path']) # dataset root dir
for p in 'images', 'labels':
(dir / p).mkdir(parents=True, exist_ok=True)
for q in 'train', 'val':
(dir / p / q).mkdir(parents=True, exist_ok=True)
# Train, Val Splits
for split, patches in [('train', 50 + 1), ('val', 43 + 1)]:
print(f"Processing {split} in {patches} patches ...")
images, labels = dir / 'images' / split, dir / 'labels' / split
# Download
url = f"https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/{split}/"
if split == 'train':
download([f'{url}zhiyuan_objv2_{split}.tar.gz'], dir=dir, delete=False) # annotations json
download([f'{url}patch{i}.tar.gz' for i in range(patches)], dir=images, curl=True, delete=False, threads=8)
elif split == 'val':
download([f'{url}zhiyuan_objv2_{split}.json'], dir=dir, delete=False) # annotations json
download([f'{url}images/v1/patch{i}.tar.gz' for i in range(15 + 1)], dir=images, curl=True, delete=False, threads=8)
download([f'{url}images/v2/patch{i}.tar.gz' for i in range(16, patches)], dir=images, curl=True, delete=False, threads=8)
# Move
for f in tqdm(images.rglob('*.jpg'), desc=f'Moving {split} images'):
f.rename(images / f.name) # move to /images/{split}
# Labels
coco = COCO(dir / f'zhiyuan_objv2_{split}.json')
names = [x["name"] for x in coco.loadCats(coco.getCatIds())]
for cid, cat in enumerate(names):
catIds = coco.getCatIds(catNms=[cat])
imgIds = coco.getImgIds(catIds=catIds)
for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid + 1}/{len(names)} {cat}'):
width, height = im["width"], im["height"]
path = Path(im["file_name"]) # image filename
try:
with open(labels / path.with_suffix('.txt').name, 'a') as file:
annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=None)
for a in coco.loadAnns(annIds):
x, y, w, h = a['bbox'] # bounding box in xywh (xy top-left corner)
xyxy = np.array([x, y, x + w, y + h])[None] # pixels(1,4)
x, y, w, h = xyxy2xywhn(xyxy, w=width, h=height, clip=True)[0] # normalized and clipped
file.write(f"{cid} {x:.5f} {y:.5f} {w:.5f} {h:.5f}\n")
except Exception as e:
print(e)

@ -0,0 +1,53 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19 by Trax Retail
# Example usage: python train.py --data SKU-110K.yaml
# parent
# ├── yolov5
# └── datasets
# └── SKU-110K ← downloads here (13.6 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/SKU-110K # dataset root dir
train: train.txt # train images (relative to 'path') 8219 images
val: val.txt # val images (relative to 'path') 588 images
test: test.txt # test images (optional) 2936 images
# Classes
nc: 1 # number of classes
names: ['object'] # class names
# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
import shutil
from tqdm.auto import tqdm
from utils.general import np, pd, Path, download, xyxy2xywh
# Download
dir = Path(yaml['path']) # dataset root dir
parent = Path(dir.parent) # download dir
urls = ['http://trax-geometry.s3.amazonaws.com/cvpr_challenge/SKU110K_fixed.tar.gz']
download(urls, dir=parent, delete=False)
# Rename directories
if dir.exists():
shutil.rmtree(dir)
(parent / 'SKU110K_fixed').rename(dir) # rename dir
(dir / 'labels').mkdir(parents=True, exist_ok=True) # create labels dir
# Convert labels
names = 'image', 'x1', 'y1', 'x2', 'y2', 'class', 'image_width', 'image_height' # column names
for d in 'annotations_train.csv', 'annotations_val.csv', 'annotations_test.csv':
x = pd.read_csv(dir / 'annotations' / d, names=names).values # annotations
images, unique_images = x[:, 0], np.unique(x[:, 0])
with open((dir / d).with_suffix('.txt').__str__().replace('annotations_', ''), 'w') as f:
f.writelines(f'./images/{s}\n' for s in unique_images)
for im in tqdm(unique_images, desc=f'Converting {dir / d}'):
cls = 0 # single-class dataset
with open((dir / 'labels' / im).with_suffix('.txt'), 'a') as f:
for r in x[images == im]:
w, h = r[6], r[7] # image width, height
xywh = xyxy2xywh(np.array([[r[1] / w, r[2] / h, r[3] / w, r[4] / h]]))[0] # instance
f.write(f"{cls} {xywh[0]:.5f} {xywh[1]:.5f} {xywh[2]:.5f} {xywh[3]:.5f}\n") # write label

@ -0,0 +1,81 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford
# Example usage: python train.py --data VOC.yaml
# parent
# ├── yolov5
# └── datasets
# └── VOC ← downloads here (2.8 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/VOC
train: # train images (relative to 'path') 16551 images
- images/train2012
- images/train2007
- images/val2012
- images/val2007
val: # val images (relative to 'path') 4952 images
- images/test2007
test: # test images (optional)
- images/test2007
# Classes
nc: 20 # number of classes
names: ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] # class names
# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
import xml.etree.ElementTree as ET
from tqdm.auto import tqdm
from utils.general import download, Path
def convert_label(path, lb_path, year, image_id):
def convert_box(size, box):
dw, dh = 1. / size[0], 1. / size[1]
x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
return x * dw, y * dh, w * dw, h * dh
in_file = open(path / f'VOC{year}/Annotations/{image_id}.xml')
out_file = open(lb_path, 'w')
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
cls = obj.find('name').text
if cls in yaml['names'] and not int(obj.find('difficult').text) == 1:
xmlbox = obj.find('bndbox')
bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
cls_id = yaml['names'].index(cls) # class id
out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n')
# Download
dir = Path(yaml['path']) # dataset root dir
url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
urls = [url + 'VOCtrainval_06-Nov-2007.zip', # 446MB, 5012 images
url + 'VOCtest_06-Nov-2007.zip', # 438MB, 4953 images
url + 'VOCtrainval_11-May-2012.zip'] # 1.95GB, 17126 images
download(urls, dir=dir / 'images', delete=False, curl=True, threads=3)
# Convert
path = dir / f'images/VOCdevkit'
for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'):
imgs_path = dir / 'images' / f'{image_set}{year}'
lbs_path = dir / 'labels' / f'{image_set}{year}'
imgs_path.mkdir(exist_ok=True, parents=True)
lbs_path.mkdir(exist_ok=True, parents=True)
with open(path / f'VOC{year}/ImageSets/Main/{image_set}.txt') as f:
image_ids = f.read().strip().split()
for id in tqdm(image_ids, desc=f'{image_set}{year}'):
f = path / f'VOC{year}/JPEGImages/{id}.jpg' # old img path
lb_path = (lbs_path / f.name).with_suffix('.txt') # new label path
f.rename(imgs_path / f.name) # move image
convert_label(path, lb_path, year, id) # convert labels to YOLO format

@ -0,0 +1,61 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset by Tianjin University
# Example usage: python train.py --data VisDrone.yaml
# parent
# ├── yolov5
# └── datasets
# └── VisDrone ← downloads here (2.3 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/VisDrone # dataset root dir
train: VisDrone2019-DET-train/images # train images (relative to 'path') 6471 images
val: VisDrone2019-DET-val/images # val images (relative to 'path') 548 images
test: VisDrone2019-DET-test-dev/images # test images (optional) 1610 images
# Classes
nc: 10 # number of classes
names: ['pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor']
# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
from utils.general import download, os, Path
def visdrone2yolo(dir):
from PIL import Image
from tqdm.auto import tqdm
def convert_box(size, box):
# Convert VisDrone box to YOLO xywh box
dw = 1. / size[0]
dh = 1. / size[1]
return (box[0] + box[2] / 2) * dw, (box[1] + box[3] / 2) * dh, box[2] * dw, box[3] * dh
(dir / 'labels').mkdir(parents=True, exist_ok=True) # make labels directory
pbar = tqdm((dir / 'annotations').glob('*.txt'), desc=f'Converting {dir}')
for f in pbar:
img_size = Image.open((dir / 'images' / f.name).with_suffix('.jpg')).size
lines = []
with open(f, 'r') as file: # read annotation.txt
for row in [x.split(',') for x in file.read().strip().splitlines()]:
if row[4] == '0': # VisDrone 'ignored regions' class 0
continue
cls = int(row[5]) - 1
box = convert_box(img_size, tuple(map(int, row[:4])))
lines.append(f"{cls} {' '.join(f'{x:.6f}' for x in box)}\n")
with open(str(f).replace(os.sep + 'annotations' + os.sep, os.sep + 'labels' + os.sep), 'w') as fl:
fl.writelines(lines) # write label.txt
# Download
dir = Path(yaml['path']) # dataset root dir
urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip',
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-challenge.zip']
download(urls, dir=dir, curl=True, threads=4)
# Convert
for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev':
visdrone2yolo(dir / d) # convert VisDrone annotations to YOLO labels

@ -0,0 +1,45 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# COCO 2017 dataset http://cocodataset.org by Microsoft
# Example usage: python train.py --data coco.yaml
# parent
# ├── yolov5
# └── datasets
# └── coco ← downloads here (20.1 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/coco # dataset root dir
train: train2017.txt # train images (relative to 'path') 118287 images
val: val2017.txt # val images (relative to 'path') 5000 images
test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
# Classes
nc: 80 # number of classes
names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
'hair drier', 'toothbrush'] # class names
# Download script/URL (optional)
download: |
from utils.general import download, Path
# Download labels
segments = False # segment or box labels
dir = Path(yaml['path']) # dataset root dir
url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')] # labels
download(urls, dir=dir.parent)
# Download data
urls = ['http://images.cocodataset.org/zips/train2017.zip', # 19G, 118k images
'http://images.cocodataset.org/zips/val2017.zip', # 1G, 5k images
'http://images.cocodataset.org/zips/test2017.zip'] # 7G, 41k images (optional)
download(urls, dir=dir / 'images', threads=3)

@ -0,0 +1,30 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
# Example usage: python train.py --data coco128.yaml
# parent
# ├── yolov5
# └── datasets
# └── coco128 ← downloads here (7 MB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/coco128 # dataset root dir
train: images/train2017 # train images (relative to 'path') 128 images
val: images/train2017 # val images (relative to 'path') 128 images
test: # test images (optional)
# Classes
nc: 80 # number of classes
names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
'hair drier', 'toothbrush'] # class names
# Download script/URL (optional)
download: https://ultralytics.com/assets/coco128.zip

@ -0,0 +1,34 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Hyperparameters for Objects365 training
# python train.py --weights yolov5m.pt --data Objects365.yaml --evolve
# See Hyperparameter Evolution tutorial for details https://github.com/ultralytics/yolov5#tutorials
lr0: 0.00258
lrf: 0.17
momentum: 0.779
weight_decay: 0.00058
warmup_epochs: 1.33
warmup_momentum: 0.86
warmup_bias_lr: 0.0711
box: 0.0539
cls: 0.299
cls_pw: 0.825
obj: 0.632
obj_pw: 1.0
iou_t: 0.2
anchor_t: 3.44
anchors: 3.2
fl_gamma: 0.0
hsv_h: 0.0188
hsv_s: 0.704
hsv_v: 0.36
degrees: 0.0
translate: 0.0902
scale: 0.491
shear: 0.0
perspective: 0.0
flipud: 0.0
fliplr: 0.5
mosaic: 1.0
mixup: 0.0
copy_paste: 0.0

@ -0,0 +1,40 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Hyperparameters for VOC training
# python train.py --batch 128 --weights yolov5m6.pt --data VOC.yaml --epochs 50 --img 512 --hyp hyp.scratch-med.yaml --evolve
# See Hyperparameter Evolution tutorial for details https://github.com/ultralytics/yolov5#tutorials
# YOLOv5 Hyperparameter Evolution Results
# Best generation: 467
# Last generation: 996
# metrics/precision, metrics/recall, metrics/mAP_0.5, metrics/mAP_0.5:0.95, val/box_loss, val/obj_loss, val/cls_loss
# 0.87729, 0.85125, 0.91286, 0.72664, 0.0076739, 0.0042529, 0.0013865
lr0: 0.00334
lrf: 0.15135
momentum: 0.74832
weight_decay: 0.00025
warmup_epochs: 3.3835
warmup_momentum: 0.59462
warmup_bias_lr: 0.18657
box: 0.02
cls: 0.21638
cls_pw: 0.5
obj: 0.51728
obj_pw: 0.67198
iou_t: 0.2
anchor_t: 3.3744
fl_gamma: 0.0
hsv_h: 0.01041
hsv_s: 0.54703
hsv_v: 0.27739
degrees: 0.0
translate: 0.04591
scale: 0.75544
shear: 0.0
perspective: 0.0
flipud: 0.0
fliplr: 0.5
mosaic: 0.85834
mixup: 0.04266
copy_paste: 0.0
anchors: 3.412

@ -0,0 +1,36 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Hyperparameters for high-augmentation COCO training from scratch
# python train.py --batch 32 --cfg yolov5m6.yaml --weights '' --data coco.yaml --img 1280 --epochs 300
# See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
lrf: 0.1 # final OneCycleLR learning rate (lr0 * lrf)
momentum: 0.937 # SGD momentum/Adam beta1
weight_decay: 0.0005 # optimizer weight decay 5e-4
warmup_epochs: 3.0 # warmup epochs (fractions ok)
warmup_momentum: 0.8 # warmup initial momentum
warmup_bias_lr: 0.1 # warmup initial bias lr
box: 0.05 # box loss gain
cls: 0.3 # cls loss gain
cls_pw: 1.0 # cls BCELoss positive_weight
obj: 0.7 # obj loss gain (scale with pixels)
obj_pw: 1.0 # obj BCELoss positive_weight
iou_t: 0.20 # IoU training threshold
anchor_t: 4.0 # anchor-multiple threshold
anchors: 3 # anchors per output layer (0 to ignore)
fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
hsv_v: 0.4 # image HSV-Value augmentation (fraction)
degrees: 0.0 # image rotation (+/- deg)
translate: 0.1 # image translation (+/- fraction)
scale: 0.9 # image scale (+/- gain)
shear: 0.0 # image shear (+/- deg)
perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
flipud: 0.0 # image flip up-down (probability)
fliplr: 0.5 # image flip left-right (probability)
mosaic: 1.0 # image mosaic (probability)
mixup: 0.1 # image mixup (probability)
copy_paste: 0.1 # segment copy-paste (probability)
depth_multiple: 0.33
width_multiple: 0.50

@ -0,0 +1,36 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Hyperparameters for low-augmentation COCO training from scratch
# python train.py --batch 64 --cfg yolov5n6.yaml --weights '' --data coco.yaml --img 640 --epochs 300 --linear
# See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf)
momentum: 0.937 # SGD momentum/Adam beta1
weight_decay: 0.0005 # optimizer weight decay 5e-4
warmup_epochs: 3.0 # warmup epochs (fractions ok)
warmup_momentum: 0.8 # warmup initial momentum
warmup_bias_lr: 0.1 # warmup initial bias lr
box: 0.05 # box loss gain
cls: 0.5 # cls loss gain
cls_pw: 1.0 # cls BCELoss positive_weight
obj: 1.0 # obj loss gain (scale with pixels)
obj_pw: 1.0 # obj BCELoss positive_weight
iou_t: 0.20 # IoU training threshold
anchor_t: 4.0 # anchor-multiple threshold
anchors: 3 # anchors per output layer (0 to ignore)
fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
hsv_v: 0.4 # image HSV-Value augmentation (fraction)
degrees: 0.0 # image rotation (+/- deg)
translate: 0.1 # image translation (+/- fraction)
scale: 0.5 # image scale (+/- gain)
shear: 0.0 # image shear (+/- deg)
perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
flipud: 0.0 # image flip up-down (probability)
fliplr: 0.5 # image flip left-right (probability)
mosaic: 1.0 # image mosaic (probability)
mixup: 0.0 # image mixup (probability)
copy_paste: 0.0 # segment copy-paste (probability)
depth_multiple: 0.33
width_multiple: 0.50

@ -0,0 +1,36 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Hyperparameters for medium-augmentation COCO training from scratch
# python train.py --batch 32 --cfg yolov5m6.yaml --weights '' --data coco.yaml --img 1280 --epochs 300
# See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
lrf: 0.1 # final OneCycleLR learning rate (lr0 * lrf)
momentum: 0.937 # SGD momentum/Adam beta1
weight_decay: 0.0005 # optimizer weight decay 5e-4
warmup_epochs: 3.0 # warmup epochs (fractions ok)
warmup_momentum: 0.8 # warmup initial momentum
warmup_bias_lr: 0.1 # warmup initial bias lr
box: 0.05 # box loss gain
cls: 0.3 # cls loss gain
cls_pw: 1.0 # cls BCELoss positive_weight
obj: 0.7 # obj loss gain (scale with pixels)
obj_pw: 1.0 # obj BCELoss positive_weight
iou_t: 0.20 # IoU training threshold
anchor_t: 4.0 # anchor-multiple threshold
anchors: 3 # anchors per output layer (0 to ignore)
fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
hsv_v: 0.4 # image HSV-Value augmentation (fraction)
degrees: 0.0 # image rotation (+/- deg)
translate: 0.1 # image translation (+/- fraction)
scale: 0.9 # image scale (+/- gain)
shear: 0.0 # image shear (+/- deg)
perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
flipud: 0.0 # image flip up-down (probability)
fliplr: 0.5 # image flip left-right (probability)
mosaic: 1.0 # image mosaic (probability)
mixup: 0.1 # image mixup (probability)
copy_paste: 0.0 # segment copy-paste (probability)
depth_multiple: 0.33
width_multiple: 0.50

Binary file not shown.

After

Width:  |  Height:  |  Size: 476 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 165 KiB

@ -0,0 +1,20 @@
#!/bin/bash
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Download latest models from https://github.com/ultralytics/yolov5/releases
# Example usage: bash path/to/download_weights.sh
# parent
# └── yolov5
# ├── yolov5s.pt ← downloads here
# ├── yolov5m.pt
# └── ...
python - <<EOF
from utils.downloads import attempt_download
models = ['n', 's', 'm', 'l', 'x']
models.extend([x + '6' for x in models]) # add P6 models
for x in models:
attempt_download(f'yolov5{x}.pt')
EOF

@ -0,0 +1,27 @@
#!/bin/bash
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Download COCO 2017 dataset http://cocodataset.org
# Example usage: bash data/scripts/get_coco.sh
# parent
# ├── yolov5
# └── datasets
# └── coco ← downloads here
# Download/unzip labels
d='../datasets' # unzip directory
url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
f='coco2017labels.zip' # or 'coco2017labels-segments.zip', 68 MB
echo 'Downloading' $url$f ' ...'
curl -L $url$f -o $f && unzip -q $f -d $d && rm $f &
# Download/unzip images
d='../datasets/coco/images' # unzip directory
url=http://images.cocodataset.org/zips/
f1='train2017.zip' # 19G, 118k images
f2='val2017.zip' # 1G, 5k images
f3='test2017.zip' # 7G, 41k images (optional)
for f in $f1 $f2; do
echo 'Downloading' $url$f '...'
curl -L $url$f -o $f && unzip -q $f -d $d && rm $f &
done
wait # finish background tasks

@ -0,0 +1,17 @@
#!/bin/bash
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Download COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017)
# Example usage: bash data/scripts/get_coco128.sh
# parent
# ├── yolov5
# └── datasets
# └── coco128 ← downloads here
# Download/unzip images and labels
d='../datasets' # unzip directory
url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
f='coco128.zip' # or 'coco128-segments.zip', 68 MB
echo 'Downloading' $url$f ' ...'
curl -L $url$f -o $f && unzip -q $f -d $d && rm $f &
wait # finish background tasks

@ -0,0 +1,102 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# DIUx xView 2018 Challenge https://challenge.xviewdataset.org by U.S. National Geospatial-Intelligence Agency (NGA)
# -------- DOWNLOAD DATA MANUALLY and jar xf val_images.zip to 'datasets/xView' before running train command! --------
# Example usage: python train.py --data xView.yaml
# parent
# ├── yolov5
# └── datasets
# └── xView ← downloads here (20.7 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/xView # dataset root dir
train: images/autosplit_train.txt # train images (relative to 'path') 90% of 847 train images
val: images/autosplit_val.txt # train images (relative to 'path') 10% of 847 train images
# Classes
nc: 60 # number of classes
names: ['Fixed-wing Aircraft', 'Small Aircraft', 'Cargo Plane', 'Helicopter', 'Passenger Vehicle', 'Small Car', 'Bus',
'Pickup Truck', 'Utility Truck', 'Truck', 'Cargo Truck', 'Truck w/Box', 'Truck Tractor', 'Trailer',
'Truck w/Flatbed', 'Truck w/Liquid', 'Crane Truck', 'Railway Vehicle', 'Passenger Car', 'Cargo Car',
'Flat Car', 'Tank car', 'Locomotive', 'Maritime Vessel', 'Motorboat', 'Sailboat', 'Tugboat', 'Barge',
'Fishing Vessel', 'Ferry', 'Yacht', 'Container Ship', 'Oil Tanker', 'Engineering Vehicle', 'Tower crane',
'Container Crane', 'Reach Stacker', 'Straddle Carrier', 'Mobile Crane', 'Dump Truck', 'Haul Truck',
'Scraper/Tractor', 'Front loader/Bulldozer', 'Excavator', 'Cement Mixer', 'Ground Grader', 'Hut/Tent', 'Shed',
'Building', 'Aircraft Hangar', 'Damaged Building', 'Facility', 'Construction Site', 'Vehicle Lot', 'Helipad',
'Storage Tank', 'Shipping container lot', 'Shipping Container', 'Pylon', 'Tower'] # class names
# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
import json
import os
from pathlib import Path
import numpy as np
from PIL import Image
from tqdm.auto import tqdm
from utils.datasets import autosplit
from utils.general import download, xyxy2xywhn
def convert_labels(fname=Path('xView/xView_train.geojson')):
# Convert xView geoJSON labels to YOLO format
path = fname.parent
with open(fname) as f:
print(f'Loading {fname}...')
data = json.load(f)
# Make dirs
labels = Path(path / 'labels' / 'train')
os.system(f'rm -rf {labels}')
labels.mkdir(parents=True, exist_ok=True)
# xView classes 11-94 to 0-59
xview_class2index = [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, -1, 9, 10, 11,
12, 13, 14, 15, -1, -1, 16, 17, 18, 19, 20, 21, 22, -1, 23, 24, 25, -1, 26, 27, -1, 28, -1,
29, 30, 31, 32, 33, 34, 35, 36, 37, -1, 38, 39, 40, 41, 42, 43, 44, 45, -1, -1, -1, -1, 46,
47, 48, 49, -1, 50, 51, -1, 52, -1, -1, -1, 53, 54, -1, 55, -1, -1, 56, -1, 57, -1, 58, 59]
shapes = {}
for feature in tqdm(data['features'], desc=f'Converting {fname}'):
p = feature['properties']
if p['bounds_imcoords']:
id = p['image_id']
file = path / 'train_images' / id
if file.exists(): # 1395.tif missing
try:
box = np.array([int(num) for num in p['bounds_imcoords'].split(",")])
assert box.shape[0] == 4, f'incorrect box shape {box.shape[0]}'
cls = p['type_id']
cls = xview_class2index[int(cls)] # xView class to 0-60
assert 59 >= cls >= 0, f'incorrect class index {cls}'
# Write YOLO label
if id not in shapes:
shapes[id] = Image.open(file).size
box = xyxy2xywhn(box[None].astype(np.float), w=shapes[id][0], h=shapes[id][1], clip=True)
with open((labels / id).with_suffix('.txt'), 'a') as f:
f.write(f"{cls} {' '.join(f'{x:.6f}' for x in box[0])}\n") # write label.txt
except Exception as e:
print(f'WARNING: skipping one label for {file}: {e}')
# Download manually from https://challenge.xviewdataset.org
dir = Path(yaml['path']) # dataset root dir
# urls = ['https://d307kc0mrhucc3.cloudfront.net/train_labels.zip', # train labels
# 'https://d307kc0mrhucc3.cloudfront.net/train_images.zip', # 15G, 847 train images
# 'https://d307kc0mrhucc3.cloudfront.net/val_images.zip'] # 5G, 282 val images (no labels)
# download(urls, dir=dir, delete=False)
# Convert labels
convert_labels(dir / 'xView_train.geojson')
# Move images
images = Path(dir / 'images')
images.mkdir(parents=True, exist_ok=True)
Path(dir / 'train_images').rename(dir / 'images' / 'train')
Path(dir / 'val_images').rename(dir / 'images' / 'val')
# Split
autosplit(dir / 'images' / 'train')

@ -0,0 +1,46 @@
import os
from shutil import copy
import random
def mkfile(file):
if not os.path.exists(file):
os.makedirs(file)
#os.path.exists(file)检测file路径是否有文件或者目录如果没有就在file路径创建一个空文件夹
def testjpg(image):
if (image[-3:] == 'jpg'):
s_image = image[:-4]
else:
s_image = image[:-5]
return s_image
file_path = 'mydata'
list1 = ['images', 'labels']
for cla in list1:
mkfile('./mydata/'+ cla + '/train')
mkfile('./mydata/'+ cla + '/val')
split_rate = 0.2 #划分比例
path1 = file_path + '/Yoloimages/'
path2 = file_path + '/Yololabels/'
images = os.listdir(path1)
num = len(images)
eval_index = random.sample(images, k=int(num * split_rate)) #随机截取images中k个元素组成新列表
for index, image in enumerate(images):#index是默认变量代表自增索引
if image in eval_index:
image_path = path1 + image
s_image = testjpg(image)
text_path = path2 + s_image + '.txt'
new_path1 = 'mydata/images/val/'
new_path2 = 'mydata/labels/val/'
copy(image_path, new_path1)
copy(text_path, new_path2) #从老路径copy到新路径
else:
image_path = path1 + image
s_image = testjpg(image)
text_path = path2 + s_image + '.txt'
new_path1 = 'mydata/images/train/'
new_path2 = 'mydata/labels/train/'
copy(image_path, new_path1)
copy(text_path, new_path2)
print('Processing files over')

@ -0,0 +1,254 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
Run inference on images, videos, directories, streams, etc.
Usage - sources:
$ python path/to/detect.py --weights yolov5s.pt --source 0 # webcam
img.jpg # image
vid.mp4 # video
path/ # directory
path/*.jpg # glob
'https://youtu.be/Zgi9g1ksQHc' # YouTube
'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream
Usage - formats:
$ python path/to/detect.py --weights yolov5s.pt # PyTorch
yolov5s.torchscript # TorchScript
yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn
yolov5s.xml # OpenVINO
yolov5s.engine # TensorRT
yolov5s.mlmodel # CoreML (macOS-only)
yolov5s_saved_model # TensorFlow SavedModel
yolov5s.pb # TensorFlow GraphDef
yolov5s.tflite # TensorFlow Lite
yolov5s_edgetpu.tflite # TensorFlow Edge TPU
"""
import argparse
import os
import sys
from pathlib import Path
import torch
import torch.backends.cudnn as cudnn
import unit7
FILE = Path(__file__).resolve()
ROOT = FILE.parents[0] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
from models.common import DetectMultiBackend
from utils.datasets import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams
from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
from utils.plots import Annotator, colors, save_one_box
from utils.torch_utils import select_device, time_sync
@torch.no_grad()
def run(
weights=ROOT / 'yolov5s.pt', # model.pt path(s)
source=ROOT / 'data/images', # file/dir/URL/glob, 0 for webcam
data=ROOT / 'data/coco128.yaml', # dataset.yaml path
imgsz=(640, 640), # inference size (height, width)
conf_thres=0.25, # confidence threshold
iou_thres=0.45, # NMS IOU threshold
max_det=1000, # maximum detections per image
device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
view_img=False, # show results
save_txt=False, # save results to *.txt
save_conf=False, # save confidences in --save-txt labels
save_crop=False, # save cropped prediction boxes
nosave=False, # do not save images/videos
classes=None, # filter by class: --class 0, or --class 0 2 3
agnostic_nms=False, # class-agnostic NMS
augment=False, # augmented inference
visualize=False, # visualize features
update=False, # update all models
project=ROOT/'runs/detect', # save results to project/name
name='exp', # save results to project/name
exist_ok=False, # existing project/name ok, do not increment
line_thickness=3, # bounding box thickness (pixels)
hide_labels=False, # hide labels
hide_conf=False, # hide confidences
half=False, # use FP16 half-precision inference
dnn=False, # use OpenCV DNN for ONNX inference
):
source = str(source)
save_img = not nosave and not source.endswith('.txt') # save inference images
is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))
webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file)
if is_url and is_file:
source = check_file(source) # download
# Directories
save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Load model
device = select_device(device)
model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
stride, names, pt = model.stride, model.names, model.pt
imgsz = check_img_size(imgsz, s=stride) # check image size
# Dataloader
if webcam:
view_img = check_imshow()
cudnn.benchmark = True # set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
bs = len(dataset) # batch_size
else:
dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
bs = 1 # batch_size
vid_path, vid_writer = [None] * bs, [None] * bs
# Run inference
model.warmup(imgsz=(1 if pt else bs, 3, *imgsz)) # warmup
dt, seen = [0.0, 0.0, 0.0], 0
for path, im, im0s, vid_cap, s in dataset:
t1 = time_sync()
im = torch.from_numpy(im).to(device)
im = im.half() if model.fp16 else im.float() # uint8 to fp16/32
im /= 255 # 0 - 255 to 0.0 - 1.0
if len(im.shape) == 3:
im = im[None] # expand for batch dim
t2 = time_sync()
dt[0] += t2 - t1
# Inference
visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
pred = model(im, augment=augment, visualize=visualize)
t3 = time_sync()
dt[1] += t3 - t2
# NMS
pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
dt[2] += time_sync() - t3
# Second-stage classifier (optional)
# pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
# Process predictions
for i, det in enumerate(pred): # per image
seen += 1
if webcam: # batch_size >= 1
p, im0, frame = path[i], im0s[i].copy(), dataset.count
s += f'{i}: '
else:
p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
p = Path(p) # to Path
save_path = str(save_dir / p.name) # im.jpg
txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # im.txt
s += '%gx%g ' % im.shape[2:] # print string
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
imc = im0.copy() if save_crop else im0 # for save_crop
annotator = Annotator(im0, line_width=line_thickness, example=str(names))
if len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
# Print results
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
# Write results
for *xyxy, conf, cls in reversed(det):
if save_txt: # Write to file
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format
with open(txt_path + '.txt', 'a') as f:
f.write(('%g ' * len(line)).rstrip() % line + '\n')
if save_img or save_crop or view_img: # Add bbox to image
c = int(cls) # integer class
label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
annotator.box_label(xyxy, label, color=colors(c, True))
if save_crop:
save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)
# Stream results
im0 = annotator.result()
if view_img:
cv2.imshow(str(p), im0)
cv2.waitKey(1) # 1 millisecond
# Save results (image with detections)
if save_img:
if dataset.mode == 'image':
cv2.imwrite(save_path,im0)
print(im0.shape)
else: # 'video' or 'stream'
if vid_path[i] != save_path: # new video
vid_path[i] = save_path
if isinstance(vid_writer[i], cv2.VideoWriter):
vid_writer[i].release() # release previous video writer
if vid_cap: # video
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
else: # stream
fps, w, h = 30, im0.shape[1], im0.shape[0]
save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos
vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
vid_writer[i].write(im0)
# Print time (inference-only)
LOGGER.info(f'{s}Done. ({t3 - t2:.3f}s)')
# Print results
t = tuple(x / seen * 1E3 for x in dt) # speeds per image
LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t)
if save_txt or save_img:
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
if update:
strip_optimizer(weights) # update model (to fix SourceChangeWarning)
def parse_opt(filepath, modelpath, savepath):
parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='+', type=str, default=modelpath, help='model path(s)')
parser.add_argument('--source', type=str, default=filepath, help='file/dir/URL/glob, 0 for webcam')
parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path')
parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--view-img', action='store_true', help='show results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--visualize', action='store_true', help='visualize features')
parser.add_argument('--update', action='store_true', help='update all models')
parser.add_argument('--project', default=savepath, help='save results to project/name')
parser.add_argument('--name', default='exp', help='save results to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
opt = parser.parse_args()
opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
print_args(vars(opt))
return opt
def init(opt):
print(ROOT)
check_requirements(exclude=('tensorboard', 'thop'))
run(**vars(opt))
def main(filename, modelname, savename):
opt = parse_opt(filename, modelname, savename)
init(opt)

@ -0,0 +1,596 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
Export a YOLOv5 PyTorch model to other formats. TensorFlow exports authored by https://github.com/zldrobit
Format | `export.py --include` | Model
--- | --- | ---
PyTorch | - | yolov5s.pt
TorchScript | `torchscript` | yolov5s.torchscript
ONNX | `onnx` | yolov5s.onnx
OpenVINO | `openvino` | yolov5s_openvino_model/
TensorRT | `engine` | yolov5s.engine
CoreML | `coreml` | yolov5s.mlmodel
TensorFlow SavedModel | `saved_model` | yolov5s_saved_model/
TensorFlow GraphDef | `pb` | yolov5s.pb
TensorFlow Lite | `tflite` | yolov5s.tflite
TensorFlow Edge TPU | `edgetpu` | yolov5s_edgetpu.tflite
TensorFlow.js | `tfjs` | yolov5s_web_model/
Requirements:
$ pip install -r requirements.txt coremltools onnx onnx-simplifier onnxruntime openvino-dev tensorflow-cpu # CPU
$ pip install -r requirements.txt coremltools onnx onnx-simplifier onnxruntime-gpu openvino-dev tensorflow # GPU
Usage:
$ python path/to/export.py --weights yolov5s.pt --include torchscript onnx openvino engine coreml tflite ...
Inference:
$ python path/to/detect.py --weights yolov5s.pt # PyTorch
yolov5s.torchscript # TorchScript
yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn
yolov5s.xml # OpenVINO
yolov5s.engine # TensorRT
yolov5s.mlmodel # CoreML (macOS-only)
yolov5s_saved_model # TensorFlow SavedModel
yolov5s.pb # TensorFlow GraphDef
yolov5s.tflite # TensorFlow Lite
yolov5s_edgetpu.tflite # TensorFlow Edge TPU
TensorFlow.js:
$ cd .. && git clone https://github.com/zldrobit/tfjs-yolov5-example.git && cd tfjs-yolov5-example
$ npm install
$ ln -s ../../yolov5/yolov5s_web_model public/yolov5s_web_model
$ npm start
"""
import argparse
import json
import os
import platform
import subprocess
import sys
import time
import warnings
from pathlib import Path
import pandas as pd
import torch
from torch.utils.mobile_optimizer import optimize_for_mobile
FILE = Path(__file__).resolve()
ROOT = FILE.parents[0] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
if platform.system() != 'Windows':
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
from models.experimental import attempt_load
from models.yolo import Detect
from utils.datasets import LoadImages
from utils.general import (LOGGER, check_dataset, check_img_size, check_requirements, check_version, colorstr,
file_size, print_args, url2file)
from utils.torch_utils import select_device
def export_formats():
# YOLOv5 export formats
x = [
['PyTorch', '-', '.pt', True],
['TorchScript', 'torchscript', '.torchscript', True],
['ONNX', 'onnx', '.onnx', True],
['OpenVINO', 'openvino', '_openvino_model', False],
['TensorRT', 'engine', '.engine', True],
['CoreML', 'coreml', '.mlmodel', False],
['TensorFlow SavedModel', 'saved_model', '_saved_model', True],
['TensorFlow GraphDef', 'pb', '.pb', True],
['TensorFlow Lite', 'tflite', '.tflite', False],
['TensorFlow Edge TPU', 'edgetpu', '_edgetpu.tflite', False],
['TensorFlow.js', 'tfjs', '_web_model', False],]
return pd.DataFrame(x, columns=['Format', 'Argument', 'Suffix', 'GPU'])
def export_torchscript(model, im, file, optimize, prefix=colorstr('TorchScript:')):
# YOLOv5 TorchScript model export
try:
LOGGER.info(f'\n{prefix} starting export with torch {torch.__version__}...')
f = file.with_suffix('.torchscript')
ts = torch.jit.trace(model, im, strict=False)
d = {"shape": im.shape, "stride": int(max(model.stride)), "names": model.names}
extra_files = {'config.txt': json.dumps(d)} # torch._C.ExtraFilesMap()
if optimize: # https://pytorch.org/tutorials/recipes/mobile_interpreter.html
optimize_for_mobile(ts)._save_for_lite_interpreter(str(f), _extra_files=extra_files)
else:
ts.save(str(f), _extra_files=extra_files)
LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
return f
except Exception as e:
LOGGER.info(f'{prefix} export failure: {e}')
def export_onnx(model, im, file, opset, train, dynamic, simplify, prefix=colorstr('ONNX:')):
# YOLOv5 ONNX export
try:
check_requirements(('onnx',))
import onnx
LOGGER.info(f'\n{prefix} starting export with onnx {onnx.__version__}...')
f = file.with_suffix('.onnx')
torch.onnx.export(
model,
im,
f,
verbose=False,
opset_version=opset,
training=torch.onnx.TrainingMode.TRAINING if train else torch.onnx.TrainingMode.EVAL,
do_constant_folding=not train,
input_names=['images'],
output_names=['output'],
dynamic_axes={
'images': {
0: 'batch',
2: 'height',
3: 'width'}, # shape(1,3,640,640)
'output': {
0: 'batch',
1: 'anchors'} # shape(1,25200,85)
} if dynamic else None)
# Checks
model_onnx = onnx.load(f) # load onnx model
onnx.checker.check_model(model_onnx) # check onnx model
# Metadata
d = {'stride': int(max(model.stride)), 'names': model.names}
for k, v in d.items():
meta = model_onnx.metadata_props.add()
meta.key, meta.value = k, str(v)
onnx.save(model_onnx, f)
# Simplify
if simplify:
try:
check_requirements(('onnx-simplifier',))
import onnxsim
LOGGER.info(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...')
model_onnx, check = onnxsim.simplify(model_onnx,
dynamic_input_shape=dynamic,
input_shapes={'images': list(im.shape)} if dynamic else None)
assert check, 'assert check failed'
onnx.save(model_onnx, f)
except Exception as e:
LOGGER.info(f'{prefix} simplifier failure: {e}')
LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
return f
except Exception as e:
LOGGER.info(f'{prefix} export failure: {e}')
def export_openvino(model, im, file, prefix=colorstr('OpenVINO:')):
# YOLOv5 OpenVINO export
try:
check_requirements(('openvino-dev',)) # requires openvino-dev: https://pypi.org/project/openvino-dev/
import openvino.inference_engine as ie
LOGGER.info(f'\n{prefix} starting export with openvino {ie.__version__}...')
f = str(file).replace('.pt', '_openvino_model' + os.sep)
cmd = f"mo --input_model {file.with_suffix('.onnx')} --output_dir {f}"
subprocess.check_output(cmd, shell=True)
LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
return f
except Exception as e:
LOGGER.info(f'\n{prefix} export failure: {e}')
def export_coreml(model, im, file, int8, half, prefix=colorstr('CoreML:')):
# YOLOv5 CoreML export
try:
check_requirements(('coremltools',))
import coremltools as ct
LOGGER.info(f'\n{prefix} starting export with coremltools {ct.__version__}...')
f = file.with_suffix('.mlmodel')
ts = torch.jit.trace(model, im, strict=False) # TorchScript model
ct_model = ct.convert(ts, inputs=[ct.ImageType('image', shape=im.shape, scale=1 / 255, bias=[0, 0, 0])])
bits, mode = (8, 'kmeans_lut') if int8 else (16, 'linear') if half else (32, None)
if bits < 32:
if platform.system() == 'Darwin': # quantization only supported on macOS
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=DeprecationWarning) # suppress numpy==1.20 float warning
ct_model = ct.models.neural_network.quantization_utils.quantize_weights(ct_model, bits, mode)
else:
print(f'{prefix} quantization only supported on macOS, skipping...')
ct_model.save(f)
LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
return ct_model, f
except Exception as e:
LOGGER.info(f'\n{prefix} export failure: {e}')
return None, None
def export_engine(model, im, file, train, half, simplify, workspace=4, verbose=False, prefix=colorstr('TensorRT:')):
# YOLOv5 TensorRT export https://developer.nvidia.com/tensorrt
try:
assert im.device.type != 'cpu', 'export running on CPU but must be on GPU, i.e. `python export.py --device 0`'
check_requirements(('nvidia-tensorrt',), cmds=('-U --index-url https://pypi.ngc.nvidia.com',))
import tensorrt as trt
if trt.__version__[0] == '7': # TensorRT 7 handling https://github.com/ultralytics/yolov5/issues/6012
grid = model.model[-1].anchor_grid
model.model[-1].anchor_grid = [a[..., :1, :1, :] for a in grid]
export_onnx(model, im, file, 12, train, False, simplify) # opset 12
model.model[-1].anchor_grid = grid
else: # TensorRT >= 8
check_version(trt.__version__, '8.0.0', hard=True) # require tensorrt>=8.0.0
export_onnx(model, im, file, 13, train, False, simplify) # opset 13
onnx = file.with_suffix('.onnx')
LOGGER.info(f'\n{prefix} starting export with TensorRT {trt.__version__}...')
assert onnx.exists(), f'failed to export ONNX file: {onnx}'
f = file.with_suffix('.engine') # TensorRT engine file
logger = trt.Logger(trt.Logger.INFO)
if verbose:
logger.min_severity = trt.Logger.Severity.VERBOSE
builder = trt.Builder(logger)
config = builder.create_builder_config()
config.max_workspace_size = workspace * 1 << 30
# config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace << 30) # fix TRT 8.4 deprecation notice
flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
network = builder.create_network(flag)
parser = trt.OnnxParser(network, logger)
if not parser.parse_from_file(str(onnx)):
raise RuntimeError(f'failed to load ONNX file: {onnx}')
inputs = [network.get_input(i) for i in range(network.num_inputs)]
outputs = [network.get_output(i) for i in range(network.num_outputs)]
LOGGER.info(f'{prefix} Network Description:')
for inp in inputs:
LOGGER.info(f'{prefix}\tinput "{inp.name}" with shape {inp.shape} and dtype {inp.dtype}')
for out in outputs:
LOGGER.info(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}')
LOGGER.info(f'{prefix} building FP{16 if builder.platform_has_fast_fp16 else 32} engine in {f}')
if builder.platform_has_fast_fp16:
config.set_flag(trt.BuilderFlag.FP16)
with builder.build_engine(network, config) as engine, open(f, 'wb') as t:
t.write(engine.serialize())
LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
return f
except Exception as e:
LOGGER.info(f'\n{prefix} export failure: {e}')
def export_saved_model(model,
im,
file,
dynamic,
tf_nms=False,
agnostic_nms=False,
topk_per_class=100,
topk_all=100,
iou_thres=0.45,
conf_thres=0.25,
keras=False,
prefix=colorstr('TensorFlow SavedModel:')):
# YOLOv5 TensorFlow SavedModel export
try:
import tensorflow as tf
from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2
from models.tf import TFDetect, TFModel
LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...')
f = str(file).replace('.pt', '_saved_model')
batch_size, ch, *imgsz = list(im.shape) # BCHW
tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz)
im = tf.zeros((batch_size, *imgsz, ch)) # BHWC order for TensorFlow
_ = tf_model.predict(im, tf_nms, agnostic_nms, topk_per_class, topk_all, iou_thres, conf_thres)
inputs = tf.keras.Input(shape=(*imgsz, ch), batch_size=None if dynamic else batch_size)
outputs = tf_model.predict(inputs, tf_nms, agnostic_nms, topk_per_class, topk_all, iou_thres, conf_thres)
keras_model = tf.keras.Model(inputs=inputs, outputs=outputs)
keras_model.trainable = False
keras_model.summary()
if keras:
keras_model.save(f, save_format='tf')
else:
spec = tf.TensorSpec(keras_model.inputs[0].shape, keras_model.inputs[0].dtype)
m = tf.function(lambda x: keras_model(x)) # full model
m = m.get_concrete_function(spec)
frozen_func = convert_variables_to_constants_v2(m)
tfm = tf.Module()
tfm.__call__ = tf.function(lambda x: frozen_func(x)[:4] if tf_nms else frozen_func(x)[0], [spec])
tfm.__call__(im)
tf.saved_model.save(tfm,
f,
options=tf.saved_model.SaveOptions(experimental_custom_gradients=False)
if check_version(tf.__version__, '2.6') else tf.saved_model.SaveOptions())
LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
return keras_model, f
except Exception as e:
LOGGER.info(f'\n{prefix} export failure: {e}')
return None, None
def export_pb(keras_model, im, file, prefix=colorstr('TensorFlow GraphDef:')):
# YOLOv5 TensorFlow GraphDef *.pb export https://github.com/leimao/Frozen_Graph_TensorFlow
try:
import tensorflow as tf
from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2
LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...')
f = file.with_suffix('.pb')
m = tf.function(lambda x: keras_model(x)) # full model
m = m.get_concrete_function(tf.TensorSpec(keras_model.inputs[0].shape, keras_model.inputs[0].dtype))
frozen_func = convert_variables_to_constants_v2(m)
frozen_func.graph.as_graph_def()
tf.io.write_graph(graph_or_graph_def=frozen_func.graph, logdir=str(f.parent), name=f.name, as_text=False)
LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
return f
except Exception as e:
LOGGER.info(f'\n{prefix} export failure: {e}')
def export_tflite(keras_model, im, file, int8, data, nms, agnostic_nms, prefix=colorstr('TensorFlow Lite:')):
# YOLOv5 TensorFlow Lite export
try:
import tensorflow as tf
LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...')
batch_size, ch, *imgsz = list(im.shape) # BCHW
f = str(file).replace('.pt', '-fp16.tflite')
converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]
converter.target_spec.supported_types = [tf.float16]
converter.optimizations = [tf.lite.Optimize.DEFAULT]
if int8:
from models.tf import representative_dataset_gen
dataset = LoadImages(check_dataset(data)['train'], img_size=imgsz, auto=False) # representative data
converter.representative_dataset = lambda: representative_dataset_gen(dataset, ncalib=100)
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.target_spec.supported_types = []
converter.inference_input_type = tf.uint8 # or tf.int8
converter.inference_output_type = tf.uint8 # or tf.int8
converter.experimental_new_quantizer = True
f = str(file).replace('.pt', '-int8.tflite')
if nms or agnostic_nms:
converter.target_spec.supported_ops.append(tf.lite.OpsSet.SELECT_TF_OPS)
tflite_model = converter.convert()
open(f, "wb").write(tflite_model)
LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
return f
except Exception as e:
LOGGER.info(f'\n{prefix} export failure: {e}')
def export_edgetpu(keras_model, im, file, prefix=colorstr('Edge TPU:')):
# YOLOv5 Edge TPU export https://coral.ai/docs/edgetpu/models-intro/
try:
cmd = 'edgetpu_compiler --version'
help_url = 'https://coral.ai/docs/edgetpu/compiler/'
assert platform.system() == 'Linux', f'export only supported on Linux. See {help_url}'
if subprocess.run(cmd + ' >/dev/null', shell=True).returncode != 0:
LOGGER.info(f'\n{prefix} export requires Edge TPU compiler. Attempting install from {help_url}')
sudo = subprocess.run('sudo --version >/dev/null', shell=True).returncode == 0 # sudo installed on system
for c in (
'curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -',
'echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | sudo tee /etc/apt/sources.list.d/coral-edgetpu.list',
'sudo apt-get update', 'sudo apt-get install edgetpu-compiler'):
subprocess.run(c if sudo else c.replace('sudo ', ''), shell=True, check=True)
ver = subprocess.run(cmd, shell=True, capture_output=True, check=True).stdout.decode().split()[-1]
LOGGER.info(f'\n{prefix} starting export with Edge TPU compiler {ver}...')
f = str(file).replace('.pt', '-int8_edgetpu.tflite') # Edge TPU model
f_tfl = str(file).replace('.pt', '-int8.tflite') # TFLite model
cmd = f"edgetpu_compiler -s -o {file.parent} {f_tfl}"
subprocess.run(cmd, shell=True, check=True)
LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
return f
except Exception as e:
LOGGER.info(f'\n{prefix} export failure: {e}')
def export_tfjs(keras_model, im, file, prefix=colorstr('TensorFlow.js:')):
# YOLOv5 TensorFlow.js export
try:
check_requirements(('tensorflowjs',))
import re
import tensorflowjs as tfjs
LOGGER.info(f'\n{prefix} starting export with tensorflowjs {tfjs.__version__}...')
f = str(file).replace('.pt', '_web_model') # js dir
f_pb = file.with_suffix('.pb') # *.pb path
f_json = f + '/model.json' # *.json path
cmd = f'tensorflowjs_converter --input_format=tf_frozen_model ' \
f'--output_node_names="Identity,Identity_1,Identity_2,Identity_3" {f_pb} {f}'
subprocess.run(cmd, shell=True)
with open(f_json) as j:
json = j.read()
with open(f_json, 'w') as j: # sort JSON Identity_* in ascending order
subst = re.sub(
r'{"outputs": {"Identity.?.?": {"name": "Identity.?.?"}, '
r'"Identity.?.?": {"name": "Identity.?.?"}, '
r'"Identity.?.?": {"name": "Identity.?.?"}, '
r'"Identity.?.?": {"name": "Identity.?.?"}}}', r'{"outputs": {"Identity": {"name": "Identity"}, '
r'"Identity_1": {"name": "Identity_1"}, '
r'"Identity_2": {"name": "Identity_2"}, '
r'"Identity_3": {"name": "Identity_3"}}}', json)
j.write(subst)
LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
return f
except Exception as e:
LOGGER.info(f'\n{prefix} export failure: {e}')
@torch.no_grad()
def run(
data=ROOT / 'data/coco128.yaml', # 'dataset.yaml path'
weights=ROOT / 'yolov5s.pt', # weights path
imgsz=(640, 640), # image (height, width)
batch_size=1, # batch size
device='cpu', # cuda device, i.e. 0 or 0,1,2,3 or cpu
include=('torchscript', 'onnx'), # include formats
half=False, # FP16 half-precision export
inplace=False, # set YOLOv5 Detect() inplace=True
train=False, # model.train() mode
optimize=False, # TorchScript: optimize for mobile
int8=False, # CoreML/TF INT8 quantization
dynamic=False, # ONNX/TF: dynamic axes
simplify=False, # ONNX: simplify model
opset=12, # ONNX: opset version
verbose=False, # TensorRT: verbose log
workspace=4, # TensorRT: workspace size (GB)
nms=False, # TF: add NMS to model
agnostic_nms=False, # TF: add agnostic NMS to model
topk_per_class=100, # TF.js NMS: topk per class to keep
topk_all=100, # TF.js NMS: topk for all classes to keep
iou_thres=0.45, # TF.js NMS: IoU threshold
conf_thres=0.25, # TF.js NMS: confidence threshold
):
t = time.time()
include = [x.lower() for x in include] # to lowercase
formats = tuple(export_formats()['Argument'][1:]) # --include arguments
flags = [x in include for x in formats]
assert sum(flags) == len(include), f'ERROR: Invalid --include {include}, valid --include arguments are {formats}'
jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs = flags # export booleans
file = Path(url2file(weights) if str(weights).startswith(('http:/', 'https:/')) else weights) # PyTorch weights
# Load PyTorch model
device = select_device(device)
if half:
assert device.type != 'cpu' or coreml, '--half only compatible with GPU export, i.e. use --device 0'
model = attempt_load(weights, map_location=device, inplace=True, fuse=True) # load FP32 model
nc, names = model.nc, model.names # number of classes, class names
# Checks
imgsz *= 2 if len(imgsz) == 1 else 1 # expand
assert nc == len(names), f'Model class count {nc} != len(names) {len(names)}'
# Input
gs = int(max(model.stride)) # grid size (max stride)
imgsz = [check_img_size(x, gs) for x in imgsz] # verify img_size are gs-multiples
im = torch.zeros(batch_size, 3, *imgsz).to(device) # image size(1,3,320,192) BCHW iDetection
# Update model
if half and not coreml:
im, model = im.half(), model.half() # to FP16
model.train() if train else model.eval() # training mode = no Detect() layer grid construction
for k, m in model.named_modules():
if isinstance(m, Detect):
m.inplace = inplace
m.onnx_dynamic = dynamic
m.export = True
for _ in range(2):
y = model(im) # dry runs
shape = tuple(y[0].shape) # model output shape
LOGGER.info(f"\n{colorstr('PyTorch:')} starting from {file} with output shape {shape} ({file_size(file):.1f} MB)")
# Exports
f = [''] * 10 # exported filenames
warnings.filterwarnings(action='ignore', category=torch.jit.TracerWarning) # suppress TracerWarning
if jit:
f[0] = export_torchscript(model, im, file, optimize)
if engine: # TensorRT required before ONNX
f[1] = export_engine(model, im, file, train, half, simplify, workspace, verbose)
if onnx or xml: # OpenVINO requires ONNX
f[2] = export_onnx(model, im, file, opset, train, dynamic, simplify)
if xml: # OpenVINO
f[3] = export_openvino(model, im, file)
if coreml:
_, f[4] = export_coreml(model, im, file, int8, half)
# TensorFlow Exports
if any((saved_model, pb, tflite, edgetpu, tfjs)):
if int8 or edgetpu: # TFLite --int8 bug https://github.com/ultralytics/yolov5/issues/5707
check_requirements(('flatbuffers==1.12',)) # required before `import tensorflow`
assert not (tflite and tfjs), 'TFLite and TF.js models must be exported separately, please pass only one type.'
model, f[5] = export_saved_model(model.cpu(),
im,
file,
dynamic,
tf_nms=nms or agnostic_nms or tfjs,
agnostic_nms=agnostic_nms or tfjs,
topk_per_class=topk_per_class,
topk_all=topk_all,
conf_thres=conf_thres,
iou_thres=iou_thres) # keras model
if pb or tfjs: # pb prerequisite to tfjs
f[6] = export_pb(model, im, file)
if tflite or edgetpu:
f[7] = export_tflite(model, im, file, int8=int8 or edgetpu, data=data, nms=nms, agnostic_nms=agnostic_nms)
if edgetpu:
f[8] = export_edgetpu(model, im, file)
if tfjs:
f[9] = export_tfjs(model, im, file)
# Finish
f = [str(x) for x in f if x] # filter out '' and None
if any(f):
LOGGER.info(f'\nExport complete ({time.time() - t:.2f}s)'
f"\nResults saved to {colorstr('bold', file.parent.resolve())}"
f"\nDetect: python detect.py --weights {f[-1]}"
f"\nPyTorch Hub: model = torch.hub.load('ultralytics/yolov5', 'custom', '{f[-1]}')"
f"\nValidate: python val.py --weights {f[-1]}"
f"\nVisualize: https://netron.app")
return f # return list of exported files/dirs
def parse_opt():
parser = argparse.ArgumentParser()
parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model.pt path(s)')
parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640, 640], help='image (h, w)')
parser.add_argument('--batch-size', type=int, default=1, help='batch size')
parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--half', action='store_true', help='FP16 half-precision export')
parser.add_argument('--inplace', action='store_true', help='set YOLOv5 Detect() inplace=True')
parser.add_argument('--train', action='store_true', help='model.train() mode')
parser.add_argument('--optimize', action='store_true', help='TorchScript: optimize for mobile')
parser.add_argument('--int8', action='store_true', help='CoreML/TF INT8 quantization')
parser.add_argument('--dynamic', action='store_true', help='ONNX/TF: dynamic axes')
parser.add_argument('--simplify', action='store_true', help='ONNX: simplify model')
parser.add_argument('--opset', type=int, default=12, help='ONNX: opset version')
parser.add_argument('--verbose', action='store_true', help='TensorRT: verbose log')
parser.add_argument('--workspace', type=int, default=4, help='TensorRT: workspace size (GB)')
parser.add_argument('--nms', action='store_true', help='TF: add NMS to model')
parser.add_argument('--agnostic-nms', action='store_true', help='TF: add agnostic NMS to model')
parser.add_argument('--topk-per-class', type=int, default=100, help='TF.js NMS: topk per class to keep')
parser.add_argument('--topk-all', type=int, default=100, help='TF.js NMS: topk for all classes to keep')
parser.add_argument('--iou-thres', type=float, default=0.45, help='TF.js NMS: IoU threshold')
parser.add_argument('--conf-thres', type=float, default=0.25, help='TF.js NMS: confidence threshold')
parser.add_argument('--include',
nargs='+',
default=['torchscript', 'onnx'],
help='torchscript, onnx, openvino, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs')
opt = parser.parse_args()
print_args(vars(opt))
return opt
def main(opt):
for opt.weights in (opt.weights if isinstance(opt.weights, list) else [opt.weights]):
run(**vars(opt))
if __name__ == "__main__":
opt = parse_opt()
main(opt)

@ -0,0 +1,145 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
PyTorch Hub models https://pytorch.org/hub/ultralytics_yolov5/
Usage:
import torch
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
model = torch.hub.load('ultralytics/yolov5:master', 'custom', 'path/to/yolov5s.onnx') # file from branch
"""
import torch
def _create(name, pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None):
"""Creates or loads a YOLOv5 model
Arguments:
name (str): model name 'yolov5s' or path 'path/to/best.pt'
pretrained (bool): load pretrained weights into the model
channels (int): number of input channels
classes (int): number of model classes
autoshape (bool): apply YOLOv5 .autoshape() wrapper to model
verbose (bool): print all information to screen
device (str, torch.device, None): device to use for model parameters
Returns:
YOLOv5 model
"""
from pathlib import Path
from models.common import AutoShape, DetectMultiBackend
from models.yolo import Model
from utils.downloads import attempt_download
from utils.general import LOGGER, check_requirements, intersect_dicts, logging
from utils.torch_utils import select_device
if not verbose:
LOGGER.setLevel(logging.WARNING)
check_requirements(exclude=('tensorboard', 'thop', 'opencv-python'))
name = Path(name)
path = name.with_suffix('.pt') if name.suffix == '' else name # checkpoint path
try:
device = select_device(('0' if torch.cuda.is_available() else 'cpu') if device is None else device)
if pretrained and channels == 3 and classes == 80:
model = DetectMultiBackend(path, device=device) # download/load FP32 model
# model = models.experimental.attempt_load(path, map_location=device) # download/load FP32 model
else:
cfg = list((Path(__file__).parent / 'models').rglob(f'{path.stem}.yaml'))[0] # model.yaml path
model = Model(cfg, channels, classes) # create model
if pretrained:
ckpt = torch.load(attempt_download(path), map_location=device) # load
csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32
csd = intersect_dicts(csd, model.state_dict(), exclude=['anchors']) # intersect
model.load_state_dict(csd, strict=False) # load
if len(ckpt['model'].names) == classes:
model.names = ckpt['model'].names # set class names attribute
if autoshape:
model = AutoShape(model) # for file/URI/PIL/cv2/np inputs and NMS
return model.to(device)
except Exception as e:
help_url = 'https://github.com/ultralytics/yolov5/issues/36'
s = f'{e}. Cache may be out of date, try `force_reload=True` or see {help_url} for help.'
raise Exception(s) from e
def custom(path='path/to/model.pt', autoshape=True, verbose=True, device=None):
# YOLOv5 custom or local model
return _create(path, autoshape=autoshape, verbose=verbose, device=device)
def yolov5n(pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None):
# YOLOv5-nano model https://github.com/ultralytics/yolov5
return _create('yolov5n', pretrained, channels, classes, autoshape, verbose, device)
def yolov5s(pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None):
# YOLOv5-small model https://github.com/ultralytics/yolov5
return _create('yolov5s', pretrained, channels, classes, autoshape, verbose, device)
def yolov5m(pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None):
# YOLOv5-medium model https://github.com/ultralytics/yolov5
return _create('yolov5m', pretrained, channels, classes, autoshape, verbose, device)
def yolov5l(pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None):
# YOLOv5-large model https://github.com/ultralytics/yolov5
return _create('yolov5l', pretrained, channels, classes, autoshape, verbose, device)
def yolov5x(pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None):
# YOLOv5-xlarge model https://github.com/ultralytics/yolov5
return _create('yolov5x', pretrained, channels, classes, autoshape, verbose, device)
def yolov5n6(pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None):
# YOLOv5-nano-P6 model https://github.com/ultralytics/yolov5
return _create('yolov5n6', pretrained, channels, classes, autoshape, verbose, device)
def yolov5s6(pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None):
# YOLOv5-small-P6 model https://github.com/ultralytics/yolov5
return _create('yolov5s6', pretrained, channels, classes, autoshape, verbose, device)
def yolov5m6(pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None):
# YOLOv5-medium-P6 model https://github.com/ultralytics/yolov5
return _create('yolov5m6', pretrained, channels, classes, autoshape, verbose, device)
def yolov5l6(pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None):
# YOLOv5-large-P6 model https://github.com/ultralytics/yolov5
return _create('yolov5l6', pretrained, channels, classes, autoshape, verbose, device)
def yolov5x6(pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None):
# YOLOv5-xlarge-P6 model https://github.com/ultralytics/yolov5
return _create('yolov5x6', pretrained, channels, classes, autoshape, verbose, device)
if __name__ == '__main__':
model = _create(name='yolov5s', pretrained=True, channels=3, classes=80, autoshape=True, verbose=True) # pretrained
# model = custom(path='path/to/model.pt') # custom
# Verify inference
from pathlib import Path
import numpy as np
from PIL import Image
from utils.general import cv2
imgs = [
'data/images/zidane.jpg', # filename
Path('data/images/zidane.jpg'), # Path
'https://ultralytics.com/images/zidane.jpg', # URI
cv2.imread('data/images/bus.jpg')[:, :, ::-1], # OpenCV
Image.open('data/images/bus.jpg'), # PIL
np.zeros((320, 640, 3))] # numpy
results = model(imgs, size=320) # batched inference
results.print()
results.save()

@ -0,0 +1,402 @@
import sys
from PyQt5.QtWidgets import *
from PyQt5.QtCore import Qt
from PyQt5.Qt import QApplication, QWidget, QThread
import numpy as np
import ui
import unit1
import unit2
import unit3
import unit4
import unit5
import unit6
import unit7
class MainDialog(QMainWindow):
def __init__(self, parent=None):
super(MainDialog, self).__init__(parent)
self.ui = ui.Ui_MainWindow()
self.ui.setupUi(self)
self.setWindowTitle('CV lrioxh')
self.m_drag = False
self.img = np.ndarray(())
self.imgOrg=np.ndarray(())
self.imgShow = np.ndarray(())
self.fname =''
self.w=0
self.h=0
self.c=1
self.ui.pushButton.clicked.connect(self.unit1_img_load) #第一个pushbutton 用来选择场景一的图像
self.ui.pushButton_2.clicked.connect(self.unit1_img_reset) #用来重置图像
self.ui.pushButton_3.clicked.connect(self.unit1_img_show)
self.ui.pushButton_4.clicked.connect(self.unit1_img_clear)
self.ui.pushButton_5.clicked.connect(self.unit1_img_save)
self.ui.pushButton_6.clicked.connect(self.trans_by_rate)
self.ui.pushButton_7.clicked.connect(self.trans_by_pixel)
self.ui.pushButton_8.clicked.connect(self.rotate)
self.ui.pushButton_9.clicked.connect(self.scale_by_rate)
self.ui.pushButton_10.clicked.connect(self.affine_trans)
unit2.init(self)
self.ui.pushButton_11.clicked.connect(self.unit2_img_load)
self.ui.pushButton_12.clicked.connect(self.unit2_globalH)
self.ui.pushButton_13.clicked.connect(self.unit2_localH)
self.ui.pushButton_14.clicked.connect(self.unit2_img_showNew)
self.ui.pushButton_15.clicked.connect(self.unit2_eqHist)
self.ui.pushButton_16.clicked.connect(self.unit2_clahe)
self.ui.pushButton_17.clicked.connect(self.unit2_clear)
self.ui.pushButton_18.clicked.connect(self.unit2_img_reset)
self.ui.pushButton_47.clicked.connect(self.unit2_img_save)
unit3.init(self)
self.ui.pushButton_64.clicked.connect(self.unit3_img_left_load1)
self.ui.pushButton_66.clicked.connect(self.unit3_img_left_load2)
self.ui.pushButton_21.clicked.connect(self.unit3_ADD)
self.ui.pushButton_19.clicked.connect(self.unit3_SUB)
self.ui.pushButton_24.clicked.connect(self.unit3_MULTI)
self.ui.pushButton_22.clicked.connect(self.unit3_DIVIDE)
self.ui.pushButton_20.clicked.connect(self.unit3_AND)
self.ui.pushButton_48.clicked.connect(self.unit3_OR)
self.ui.pushButton_49.clicked.connect(self.unit3_NOT)
self.ui.pushButton_57.clicked.connect(self.unit3_img_left_clear)
self.ui.pushButton_61.clicked.connect(self.unit3_img_left_save)
self.ui.pushButton_62.clicked.connect(self.unit3_img_left_show)
self.ui.pushButton_65.clicked.connect(self.unit3_img_left_load)
self.ui.pushButton_50.clicked.connect(self.unit3_erode)
self.ui.pushButton_51.clicked.connect(self.unit3_dilate)
self.ui.pushButton_52.clicked.connect(self.unit3_opening)
self.ui.pushButton_53.clicked.connect(self.unit3_closing)
self.ui.pushButton_55.clicked.connect(self.unit3_mean)
self.ui.pushButton_23.clicked.connect(self.unit3_guassian)
self.ui.pushButton_54.clicked.connect(self.unit3_Covfilter)
self.ui.pushButton_56.clicked.connect(self.unit3_median)
self.ui.pushButton_58.clicked.connect(self.unit3_img_right_clear)
self.ui.pushButton_59.clicked.connect(self.unit3_img_right_save)
self.ui.pushButton_63.clicked.connect(self.unit3_img_right_show)
self.ui.pushButton_67.clicked.connect(self.unit3_add_noise_Guass)
self.ui.pushButton_68.clicked.connect(self.unit3_add_noise_Jiaoyan)
self.ui.pushButton_60.clicked.connect(self.unit3_bilateralFilter)
unit4.init(self)
self.ui.pushButton_25.clicked.connect(self.unit4_img_load)
self.ui.pushButton_26.clicked.connect(self.unit4_action)
self.ui.pushButton_30.clicked.connect(self.unit4_img_clear)
self.ui.pushButton_27.clicked.connect(self.unit4_roberts_save)
self.ui.pushButton_28.clicked.connect(self.unit4_prewitt_save)
self.ui.pushButton_29.clicked.connect(self.unit4_sobel_save)
self.ui.pushButton_69.clicked.connect(self.unit4_laplacian_save)
self.ui.pushButton_70.clicked.connect(self.unit4_lough_save)
self.ui.pushButton_44.clicked.connect(self.unit4_log_save)
self.ui.pushButton_45.clicked.connect(self.unit4_canny_save)
unit5.init(self)
self.ui.pushButton_31.clicked.connect(self.unit5_img_load)
self.ui.pushButton_39.clicked.connect(self.unit5_img_clear)
self.ui.pushButton_37.clicked.connect(self.unit5_img_save)
self.ui.pushButton_40.clicked.connect(self.unit5_img_reset)
# self.ui.pushButton_32.clicked.connect(self.unit5_Butterworth)
self.ui.pushButton_35.clicked.connect(self.unit5_Gaussion)
self.ui.pushButton_33.clicked.connect(self.unit5_Idea)
self.ui.pushButton_36.clicked.connect(self.unit5_Laplacian)
self.ui.pushButton_46.clicked.connect(self.unit5_img_show)
unit6.init(self)
self.ui.pushButton_34.clicked.connect(self.unit6_img_load1)
self.ui.pushButton_38.clicked.connect(self.unit6_img_clear)
self.ui.pushButton_41.clicked.connect(self.unit6_img_load2)
self.ui.pushButton_43.clicked.connect(self.unit6_img_save)
self.ui.pushButton_42.clicked.connect(self.unit6_style_transfer)
unit7.init(self)
self.ui.pushButton_32.clicked.connect(self.unit7_img_load)
self.ui.pushButton_71.clicked.connect(self.unit7_model_load)
self.ui.pushButton_72.clicked.connect(self.unit7_object_detection)
self.ui.pushButton_73.clicked.connect(self.unit7_result_show)
self.ui.pushButton_74.clicked.connect(self.unit7_clear)
self.ui.pushButton_75.clicked.connect(self.unit7_result_save)
##unit7
def unit7_result_save(self):
return unit7.result_save(self)
def unit7_clear(self):
return unit7.clear(self)
def unit7_result_show(self):
return unit7.result_show(self)
def unit7_object_detection(self):
return unit7.object_detection(self)
def unit7_model_load(self):
return unit7.model_load(self)
def unit7_img_load(self):
return unit7.img_load(self)
##unit6
def unit6_style_transfer(self):
return unit6.style_transfer(self)
def unit6_img_save(self):
return unit6.img_save(self)
def unit6_img_load2(self):
return unit6.img_load2(self)
def unit6_img_clear(self):
return unit6.img_clear(self)
def unit6_img_load1(self):
return unit6.img_load1(self)
##unit5
def unit5_img_show(self):
return unit5.img_show(self)
def unit5_Laplacian(self):
return unit5.Laplacian(self)
def unit5_Idea(self):
return unit5.Idea(self)
def unit5_Gaussion(self):
return unit5.Gaussion(self)
# def unit5_Butterworth(self):
# return unit5.Butterworth(self)
def unit5_img_reset(self):
return unit5.img_reset(self)
def unit5_img_save(self):
return unit5.img_save(self)
def unit5_img_clear(self):
return unit5.img_clear(self)
def unit5_img_load(self):
return unit5.img_load(self)
##unit4
def unit4_canny_save(self):
return unit4.canny_save(self)
def unit4_log_save(self):
return unit4.log_save(self)
def unit4_lough_save(self):
return unit4.lough_save(self)
def unit4_laplacian_save(self):
return unit4.laplacian_save(self)
def unit4_sobel_save(self):
return unit4.sobel_save(self)
def unit4_prewitt_save(self):
return unit4.prewitt_save(self)
def unit4_roberts_save(self):
return unit4.roberts_save(self)
def unit4_img_clear(self):
return unit4.img_clear(self)
def unit4_action(self):
return unit4.action(self)
def unit4_img_load(self):
return unit4.img_load(self)
##unit3
def unit3_bilateralFilter(self):
return unit3.bilateralFilter(self)
def unit3_add_noise_Jiaoyan(self):
return unit3.add_noise_Jiaoyan(self)
def unit3_add_noise_Guass(self):
return unit3.add_noise_Guass(self)
def unit3_img_left_load1(self):
return unit3.img_left_load1(self)
def unit3_img_left_load2(self):
return unit3.img_left_load2(self)
def unit3_ADD(self):
return unit3.ADD(self)
def unit3_SUB(self):
return unit3.SUB(self)
def unit3_MULTI(self):
return unit3.MULTI(self)
def unit3_DIVIDE(self):
return unit3.DIVIDE(self)
def unit3_AND(self):
return unit3.AND(self)
def unit3_OR(self):
return unit3.OR(self)
def unit3_NOT(self):
return unit3.NOT(self)
def unit3_img_left_clear(self):
return unit3.img_left_clear(self)
def unit3_img_left_save(self):
return unit3.img_left_save(self)
def unit3_img_left_show(self):
return unit3.img_left_show(self)
def unit3_img_left_load(self):
return unit3.img_right_load(self)
def unit3_erode(self):
return unit3.erode(self)
def unit3_dilate(self):
return unit3.dilate(self)
def unit3_opening(self):
return unit3.opening(self)
def unit3_closing(self):
return unit3.closing(self)
def unit3_mean(self):
return unit3.mean(self)
def unit3_guassian(self):
return unit3.guassian(self)
def unit3_Covfilter(self):
return unit3.Covfilter(self)
def unit3_median(self):
return unit3.median(self)
def unit3_img_right_clear(self):
return unit3.img_right_clear(self)
def unit3_img_right_save(self):
return unit3.img_right_save(self)
def unit3_img_right_show(self):
return unit3.img_right_show(self)
### U2
def unit2_img_load(self):
return unit2.unit2_img_load(self)
def unit2_img_reset(self):
return unit2.unit2_img_reset(self)
def unit2_img_showNew(self):
return unit2.unit2_img_showNew(self)
def unit2_img_save(self):
return unit2.unit2_img_save(self)
def unit2_clahe(self):
return unit2.clahe(self)
def unit2_eqHist(self):
return unit2.eqHist(self)
def unit2_globalH(self):
return unit2.globalH(self)
def unit2_localH(self):
return unit2.localH(self)
def unit2_enhance(self):
return unit2.enhance(self)
def unit2_clear(self):
return unit2.unit2_clear(self)
### U1
def unit1_img_load(self):
return unit1.unit1_img_load(self)
def unit1_img_reset(self):
return unit1.unit1_img_reset(self)
def unit1_img_show(self):
return unit1.unit1_img_show(self)
def unit1_img_clear(self):
return unit1.unit1_img_clear(self)
def unit1_img_save(self):
return unit1.unit1_img_save(self)
def trans_by_rate(self):
return unit1.trans_by_rate(self)
def trans_by_pixel(self):
return unit1.trans_by_pixel(self)
def scale_by_rate(self):
return unit1.scale_by_rate(self)
def rotate(self):
return unit1.rotate(self)
def affine_trans(self):
return unit1.affine_trans(self)
###
def mouseReleaseEvent(self, e):
if Qt.LeftButton:
if self.ui.tabWidget.currentIndex() == 0:
return unit1.mouseReleaseEvent(self,e)
if self.ui.tabWidget.currentIndex() == 1:
return unit2.mouseReleaseEvent(self,e)
if self.ui.tabWidget.currentIndex() == 2:
return unit3.mouseReleaseEvent(self,e)
def mousePressEvent(self, e):
if Qt.LeftButton:
if self.ui.tabWidget.currentIndex() == 1:
return unit2.mousePressEvent(self,e)
if self.ui.tabWidget.currentIndex() == 2:
return unit3.mousePressEvent(self,e)
def mouseMoveEvent(self, e):
if Qt.LeftButton and self.m_drag:
if self.ui.tabWidget.currentIndex() == 1:
return unit2.mouseMoveEvent(self,e)
if self.ui.tabWidget.currentIndex() == 2:
return unit3.mouseMoveEvent(self,e)
if __name__ == '__main__':
app = QApplication(sys.argv)
MainWindow = QMainWindow()
Dlg = MainDialog()
Dlg.show()
sys.exit(app.exec_())

@ -0,0 +1,160 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
import numpy as np
from collections import defaultdict
from utils import *
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class VGG(nn.Module):
def __init__(self, features):
super(VGG, self).__init__()
self.features = features
self.layer_name_mapping = {
'3': "relu1_2",
'8': "relu2_2",
'15': "relu3_3",
'22': "relu4_3"
}
for p in self.parameters():
p.requires_grad = False
def forward(self, x):
outs = []
for name, module in self.features._modules.items():
x = module(x)
if name in self.layer_name_mapping:
outs.append(x)
return outs
class MyConv2D(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size=3, stride=1):
super(MyConv2D, self).__init__()
self.weight = torch.zeros((out_channels, in_channels, kernel_size, kernel_size)).to(device)
self.bias = torch.zeros(out_channels).to(device)
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = (kernel_size, kernel_size)
self.stride = (stride, stride)
def forward(self, x):
return F.conv2d(x, self.weight, self.bias, self.stride)
def extra_repr(self):
s = ('{in_channels}, {out_channels}, kernel_size={kernel_size}'
', stride={stride}')
return s.format(**self.__dict__)
class ResidualBlock(nn.Module):
def __init__(self, channels):
super(ResidualBlock, self).__init__()
self.conv = nn.Sequential(
*ConvLayer(channels, channels, kernel_size=3, stride=1),
*ConvLayer(channels, channels, kernel_size=3, stride=1, relu=False)
)
def forward(self, x):
return self.conv(x) + x
def ConvLayer(in_channels, out_channels, kernel_size=3, stride=1,
upsample=None, instance_norm=True, relu=True, trainable=False):
layers = []
if upsample:
layers.append(nn.Upsample(mode='nearest', scale_factor=upsample))
layers.append(nn.ReflectionPad2d(kernel_size // 2))
if trainable:
layers.append(nn.Conv2d(in_channels, out_channels, kernel_size, stride))
else:
layers.append(MyConv2D(in_channels, out_channels, kernel_size, stride))
if instance_norm:
layers.append(nn.InstanceNorm2d(out_channels))
if relu:
layers.append(nn.ReLU())
return layers
class TransformNet(nn.Module):
def __init__(self, base=8):
super(TransformNet, self).__init__()
self.base = base
self.weights = []
self.downsampling = nn.Sequential(
*ConvLayer(3, base, kernel_size=9, trainable=True),
*ConvLayer(base, base*2, kernel_size=3, stride=2),
*ConvLayer(base*2, base*4, kernel_size=3, stride=2),
)
self.residuals = nn.Sequential(*[ResidualBlock(base*4) for i in range(5)])
self.upsampling = nn.Sequential(
*ConvLayer(base*4, base*2, kernel_size=3, upsample=2),
*ConvLayer(base*2, base, kernel_size=3, upsample=2),
*ConvLayer(base, 3, kernel_size=9, instance_norm=False, relu=False, trainable=True),
)
self.get_param_dict()
def forward(self, X):
y = self.downsampling(X)
y = self.residuals(y)
y = self.upsampling(y)
return y
def get_param_dict(self):
"""找出该网络所有 MyConv2D 层,计算它们需要的权值数量"""
param_dict = defaultdict(int)
def dfs(module, name):
for name2, layer in module.named_children():
dfs(layer, '%s.%s' % (name, name2) if name != '' else name2)
if module.__class__ == MyConv2D:
param_dict[name] += int(np.prod(module.weight.shape))
param_dict[name] += int(np.prod(module.bias.shape))
dfs(self, '')
return param_dict
def set_my_attr(self, name, value):
# 下面这个循环是一步步遍历类似 residuals.0.conv.1 的字符串,找到相应的权值
target = self
for x in name.split('.'):
if x.isnumeric():
target = target.__getitem__(int(x))
else:
target = getattr(target, x)
# 设置对应的权值
n_weight = np.prod(target.weight.shape)
target.weight = value[:n_weight].view(target.weight.shape)
target.bias = value[n_weight:].view(target.bias.shape)
def set_weights(self, weights, i=0):
"""输入权值字典,对该网络所有的 MyConv2D 层设置权值"""
for name, param in weights.items():
self.set_my_attr(name, weights[name][i])
class MetaNet(nn.Module):
def __init__(self, param_dict):
super(MetaNet, self).__init__()
self.param_num = len(param_dict)
self.hidden = nn.Linear(1920, 128*self.param_num)
self.fc_dict = {}
for i, (name, params) in enumerate(param_dict.items()):
self.fc_dict[name] = i
setattr(self, 'fc{}'.format(i+1), nn.Linear(128, params))
def forward(self, mean_std_features):
hidden = F.relu(self.hidden(mean_std_features))
filters = {}
for name, i in self.fc_dict.items():
fc = getattr(self, 'fc{}'.format(i+1))
filters[name] = fc(hidden[:,i*128:(i+1)*128])
return filters

@ -0,0 +1,700 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
Common modules
"""
import json
import math
import platform
import warnings
from collections import OrderedDict, namedtuple
from copy import copy
from pathlib import Path
import cv2
import numpy as np
import pandas as pd
import requests
import torch
import torch.nn as nn
import yaml
from PIL import Image
from torch.cuda import amp
from utils.datasets import exif_transpose, letterbox
from utils.general import (LOGGER, check_requirements, check_suffix, check_version, colorstr, increment_path,
make_divisible, non_max_suppression, scale_coords, xywh2xyxy, xyxy2xywh)
from utils.plots import Annotator, colors, save_one_box
from utils.torch_utils import copy_attr, time_sync
def autopad(k, p=None): # kernel, padding
# Pad to 'same'
if p is None:
p = k // 2 if isinstance(k, int) else (x // 2 for x in k) # auto-pad
return p
class Conv(nn.Module):
# Standard convolution
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super().__init__()
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
self.bn = nn.BatchNorm2d(c2)
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
def forward(self, x):
return self.act(self.bn(self.conv(x)))
def forward_fuse(self, x):
return self.act(self.conv(x))
class DWConv(Conv):
# Depth-wise convolution class
def __init__(self, c1, c2, k=1, s=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
class TransformerLayer(nn.Module):
# Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
def __init__(self, c, num_heads):
super().__init__()
self.q = nn.Linear(c, c, bias=False)
self.k = nn.Linear(c, c, bias=False)
self.v = nn.Linear(c, c, bias=False)
self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
self.fc1 = nn.Linear(c, c, bias=False)
self.fc2 = nn.Linear(c, c, bias=False)
def forward(self, x):
x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
x = self.fc2(self.fc1(x)) + x
return x
class TransformerBlock(nn.Module):
# Vision Transformer https://arxiv.org/abs/2010.11929
def __init__(self, c1, c2, num_heads, num_layers):
super().__init__()
self.conv = None
if c1 != c2:
self.conv = Conv(c1, c2)
self.linear = nn.Linear(c2, c2) # learnable position embedding
self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers)))
self.c2 = c2
def forward(self, x):
if self.conv is not None:
x = self.conv(x)
b, _, w, h = x.shape
p = x.flatten(2).permute(2, 0, 1)
return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h)
class Bottleneck(nn.Module):
# Standard bottleneck
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_, c2, 3, 1, g=g)
self.add = shortcut and c1 == c2
def forward(self, x):
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
class BottleneckCSP(nn.Module):
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
self.cv4 = Conv(2 * c_, c2, 1, 1)
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
self.act = nn.SiLU()
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
def forward(self, x):
y1 = self.cv3(self.m(self.cv1(x)))
y2 = self.cv2(x)
return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1))))
class C3(nn.Module):
# CSP Bottleneck with 3 convolutions
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c1, c_, 1, 1)
self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2)
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
# self.m = nn.Sequential(*(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)))
def forward(self, x):
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
class C3TR(C3):
# C3 module with TransformerBlock()
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e)
self.m = TransformerBlock(c_, c_, 4, n)
class C3SPP(C3):
# C3 module with SPP()
def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e)
self.m = SPP(c_, c_, k)
class C3Ghost(C3):
# C3 module with GhostBottleneck()
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e) # hidden channels
self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))
class SPP(nn.Module):
# Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
def __init__(self, c1, c2, k=(5, 9, 13)):
super().__init__()
c_ = c1 // 2 # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
def forward(self, x):
x = self.cv1(x)
with warnings.catch_warnings():
warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
class SPPF(nn.Module):
# Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13))
super().__init__()
c_ = c1 // 2 # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_ * 4, c2, 1, 1)
self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
def forward(self, x):
x = self.cv1(x)
with warnings.catch_warnings():
warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
y1 = self.m(x)
y2 = self.m(y1)
return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
class Focus(nn.Module):
# Focus wh information into c-space
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super().__init__()
self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
# self.contract = Contract(gain=2)
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1))
# return self.conv(self.contract(x))
class GhostConv(nn.Module):
# Ghost Convolution https://github.com/huawei-noah/ghostnet
def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
super().__init__()
c_ = c2 // 2 # hidden channels
self.cv1 = Conv(c1, c_, k, s, None, g, act)
self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
def forward(self, x):
y = self.cv1(x)
return torch.cat((y, self.cv2(y)), 1)
class GhostBottleneck(nn.Module):
# Ghost Bottleneck https://github.com/huawei-noah/ghostnet
def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
super().__init__()
c_ = c2 // 2
self.conv = nn.Sequential(
GhostConv(c1, c_, 1, 1), # pw
DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1,
act=False)) if s == 2 else nn.Identity()
def forward(self, x):
return self.conv(x) + self.shortcut(x)
class Contract(nn.Module):
# Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
def __init__(self, gain=2):
super().__init__()
self.gain = gain
def forward(self, x):
b, c, h, w = x.size() # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
s = self.gain
x = x.view(b, c, h // s, s, w // s, s) # x(1,64,40,2,40,2)
x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
return x.view(b, c * s * s, h // s, w // s) # x(1,256,40,40)
class Expand(nn.Module):
# Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
def __init__(self, gain=2):
super().__init__()
self.gain = gain
def forward(self, x):
b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
s = self.gain
x = x.view(b, s, s, c // s ** 2, h, w) # x(1,2,2,16,80,80)
x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
return x.view(b, c // s ** 2, h * s, w * s) # x(1,16,160,160)
class Concat(nn.Module):
# Concatenate a list of tensors along dimension
def __init__(self, dimension=1):
super().__init__()
self.d = dimension
def forward(self, x):
return torch.cat(x, self.d)
class DetectMultiBackend(nn.Module):
# YOLOv5 MultiBackend class for python inference on various backends
def __init__(self, weights='yolov5s.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False):
# Usage:
# PyTorch: weights = *.pt
# TorchScript: *.torchscript
# ONNX Runtime: *.onnx
# ONNX OpenCV DNN: *.onnx with --dnn
# OpenVINO: *.xml
# CoreML: *.mlmodel
# TensorRT: *.engine
# TensorFlow SavedModel: *_saved_model
# TensorFlow GraphDef: *.pb
# TensorFlow Lite: *.tflite
# TensorFlow Edge TPU: *_edgetpu.tflite
from models.experimental import attempt_download, attempt_load # scoped to avoid circular import
super().__init__()
w = str(weights[0] if isinstance(weights, list) else weights)
pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs = self.model_type(w) # get backend
stride, names = 32, [f'class{i}' for i in range(1000)] # assign defaults
w = attempt_download(w) # download if not local
fp16 &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16
if data: # data.yaml path (optional)
with open(data, errors='ignore') as f:
names = yaml.safe_load(f)['names'] # class names
if pt: # PyTorch
model = attempt_load(weights if isinstance(weights, list) else w, map_location=device)
stride = max(int(model.stride.max()), 32) # model stride
names = model.module.names if hasattr(model, 'module') else model.names # get class names
model.half() if fp16 else model.float()
self.model = model # explicitly assign for to(), cpu(), cuda(), half()
elif jit: # TorchScript
LOGGER.info(f'Loading {w} for TorchScript inference...')
extra_files = {'config.txt': ''} # model metadata
model = torch.jit.load(w, _extra_files=extra_files)
model.half() if fp16 else model.float()
if extra_files['config.txt']:
d = json.loads(extra_files['config.txt']) # extra_files dict
stride, names = int(d['stride']), d['names']
elif dnn: # ONNX OpenCV DNN
LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
check_requirements(('opencv-python>=4.5.4',))
net = cv2.dnn.readNetFromONNX(w)
elif onnx: # ONNX Runtime
LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
cuda = torch.cuda.is_available()
check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
import onnxruntime
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
session = onnxruntime.InferenceSession(w, providers=providers)
meta = session.get_modelmeta().custom_metadata_map # metadata
if 'stride' in meta:
stride, names = int(meta['stride']), eval(meta['names'])
elif xml: # OpenVINO
LOGGER.info(f'Loading {w} for OpenVINO inference...')
check_requirements(('openvino-dev',)) # requires openvino-dev: https://pypi.org/project/openvino-dev/
import openvino.inference_engine as ie
core = ie.IECore()
if not Path(w).is_file(): # if not *.xml
w = next(Path(w).glob('*.xml')) # get *.xml file from *_openvino_model dir
network = core.read_network(model=w, weights=Path(w).with_suffix('.bin')) # *.xml, *.bin paths
executable_network = core.load_network(network, device_name='CPU', num_requests=1)
elif engine: # TensorRT
LOGGER.info(f'Loading {w} for TensorRT inference...')
import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0
Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
logger = trt.Logger(trt.Logger.INFO)
with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
model = runtime.deserialize_cuda_engine(f.read())
bindings = OrderedDict()
fp16 = False # default updated below
for index in range(model.num_bindings):
name = model.get_binding_name(index)
dtype = trt.nptype(model.get_binding_dtype(index))
shape = tuple(model.get_binding_shape(index))
data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device)
bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
if model.binding_is_input(index) and dtype == np.float16:
fp16 = True
binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
context = model.create_execution_context()
batch_size = bindings['images'].shape[0]
elif coreml: # CoreML
LOGGER.info(f'Loading {w} for CoreML inference...')
import coremltools as ct
model = ct.models.MLModel(w)
else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
if saved_model: # SavedModel
LOGGER.info(f'Loading {w} for TensorFlow SavedModel inference...')
import tensorflow as tf
keras = False # assume TF1 saved_model
model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w)
elif pb: # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...')
import tensorflow as tf
def wrap_frozen_graph(gd, inputs, outputs):
x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped
ge = x.graph.as_graph_element
return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs))
gd = tf.Graph().as_graph_def() # graph_def
with open(w, 'rb') as f:
gd.ParseFromString(f.read())
frozen_func = wrap_frozen_graph(gd, inputs="x:0", outputs="Identity:0")
elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu
from tflite_runtime.interpreter import Interpreter, load_delegate
except ImportError:
import tensorflow as tf
Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate,
if edgetpu: # Edge TPU https://coral.ai/software/#edgetpu-runtime
LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...')
delegate = {
'Linux': 'libedgetpu.so.1',
'Darwin': 'libedgetpu.1.dylib',
'Windows': 'edgetpu.dll'}[platform.system()]
interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)])
else: # Lite
LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
interpreter = Interpreter(model_path=w) # load TFLite model
interpreter.allocate_tensors() # allocate
input_details = interpreter.get_input_details() # inputs
output_details = interpreter.get_output_details() # outputs
elif tfjs:
raise Exception('ERROR: YOLOv5 TF.js inference is not supported')
self.__dict__.update(locals()) # assign all variables to self
def forward(self, im, augment=False, visualize=False, val=False):
# YOLOv5 MultiBackend inference
b, ch, h, w = im.shape # batch, channel, height, width
if self.pt: # PyTorch
y = self.model(im, augment=augment, visualize=visualize)[0]
elif self.jit: # TorchScript
y = self.model(im)[0]
elif self.dnn: # ONNX OpenCV DNN
im = im.cpu().numpy() # torch to numpy
self.net.setInput(im)
y = self.net.forward()
elif self.onnx: # ONNX Runtime
im = im.cpu().numpy() # torch to numpy
y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]
elif self.xml: # OpenVINO
im = im.cpu().numpy() # FP32
desc = self.ie.TensorDesc(precision='FP32', dims=im.shape, layout='NCHW') # Tensor Description
request = self.executable_network.requests[0] # inference request
request.set_blob(blob_name='images', blob=self.ie.Blob(desc, im)) # name=next(iter(request.input_blobs))
request.infer()
y = request.output_blobs['output'].buffer # name=next(iter(request.output_blobs))
elif self.engine: # TensorRT
assert im.shape == self.bindings['images'].shape, (im.shape, self.bindings['images'].shape)
self.binding_addrs['images'] = int(im.data_ptr())
self.context.execute_v2(list(self.binding_addrs.values()))
y = self.bindings['output'].data
elif self.coreml: # CoreML
im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3)
im = Image.fromarray((im[0] * 255).astype('uint8'))
# im = im.resize((192, 320), Image.ANTIALIAS)
y = self.model.predict({'image': im}) # coordinates are xywh normalized
if 'confidence' in y:
box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels
conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
else:
k = 'var_' + str(sorted(int(k.replace('var_', '')) for k in y)[-1]) # output key
y = y[k] # output
else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3)
if self.saved_model: # SavedModel
y = (self.model(im, training=False) if self.keras else self.model(im)).numpy()
elif self.pb: # GraphDef
y = self.frozen_func(x=self.tf.constant(im)).numpy()
else: # Lite or Edge TPU
input, output = self.input_details[0], self.output_details[0]
int8 = input['dtype'] == np.uint8 # is TFLite quantized uint8 model
if int8:
scale, zero_point = input['quantization']
im = (im / scale + zero_point).astype(np.uint8) # de-scale
self.interpreter.set_tensor(input['index'], im)
self.interpreter.invoke()
y = self.interpreter.get_tensor(output['index'])
if int8:
scale, zero_point = output['quantization']
y = (y.astype(np.float32) - zero_point) * scale # re-scale
y[..., :4] *= [w, h, w, h] # xywh normalized to pixels
if isinstance(y, np.ndarray):
y = torch.tensor(y, device=self.device)
return (y, []) if val else y
def warmup(self, imgsz=(1, 3, 640, 640)):
# Warmup model by running inference once
if any((self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb)): # warmup types
if self.device.type != 'cpu': # only warmup GPU models
im = torch.zeros(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input
for _ in range(2 if self.jit else 1): #
self.forward(im) # warmup
@staticmethod
def model_type(p='path/to/model.pt'):
# Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx
from export import export_formats
suffixes = list(export_formats().Suffix) + ['.xml'] # export suffixes
check_suffix(p, suffixes) # checks
p = Path(p).name # eliminate trailing separators
pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, xml2 = (s in p for s in suffixes)
xml |= xml2 # *_openvino_model or *.xml
tflite &= not edgetpu # *.tflite
return pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs
class AutoShape(nn.Module):
# YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
conf = 0.25 # NMS confidence threshold
iou = 0.45 # NMS IoU threshold
agnostic = False # NMS class-agnostic
multi_label = False # NMS multiple labels per box
classes = None # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
max_det = 1000 # maximum number of detections per image
amp = False # Automatic Mixed Precision (AMP) inference
def __init__(self, model):
super().__init__()
LOGGER.info('Adding AutoShape... ')
copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=()) # copy attributes
self.dmb = isinstance(model, DetectMultiBackend) # DetectMultiBackend() instance
self.pt = not self.dmb or model.pt # PyTorch model
self.model = model.eval()
def _apply(self, fn):
# Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
self = super()._apply(fn)
if self.pt:
m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
m.stride = fn(m.stride)
m.grid = list(map(fn, m.grid))
if isinstance(m.anchor_grid, list):
m.anchor_grid = list(map(fn, m.anchor_grid))
return self
@torch.no_grad()
def forward(self, imgs, size=640, augment=False, profile=False):
# Inference from various sources. For height=640, width=1280, RGB images example inputs are:
# file: imgs = 'data/images/zidane.jpg' # str or PosixPath
# URI: = 'https://ultralytics.com/images/zidane.jpg'
# OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
# PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3)
# numpy: = np.zeros((640,1280,3)) # HWC
# torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)
# multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
t = [time_sync()]
p = next(self.model.parameters()) if self.pt else torch.zeros(1) # for device and type
autocast = self.amp and (p.device.type != 'cpu') # Automatic Mixed Precision (AMP) inference
if isinstance(imgs, torch.Tensor): # torch
with amp.autocast(autocast):
return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference
# Pre-process
n, imgs = (len(imgs), list(imgs)) if isinstance(imgs, (list, tuple)) else (1, [imgs]) # number, list of images
shape0, shape1, files = [], [], [] # image and inference shapes, filenames
for i, im in enumerate(imgs):
f = f'image{i}' # filename
if isinstance(im, (str, Path)): # filename or uri
im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
im = np.asarray(exif_transpose(im))
elif isinstance(im, Image.Image): # PIL Image
im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f
files.append(Path(f).with_suffix('.jpg').name)
if im.shape[0] < 5: # image in CHW
im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
im = im[..., :3] if im.ndim == 3 else np.tile(im[..., None], 3) # enforce 3ch input
s = im.shape[:2] # HWC
shape0.append(s) # image shape
g = (size / max(s)) # gain
shape1.append([y * g for y in s])
imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update
shape1 = [make_divisible(x, self.stride) if self.pt else size for x in np.array(shape1).max(0)] # inf shape
x = [letterbox(im, shape1, auto=False)[0] for im in imgs] # pad
x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2))) # stack and BHWC to BCHW
x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32
t.append(time_sync())
with amp.autocast(autocast):
# Inference
y = self.model(x, augment, profile) # forward
t.append(time_sync())
# Post-process
y = non_max_suppression(y if self.dmb else y[0],
self.conf,
self.iou,
self.classes,
self.agnostic,
self.multi_label,
max_det=self.max_det) # NMS
for i in range(n):
scale_coords(shape1, y[i][:, :4], shape0[i])
t.append(time_sync())
return Detections(imgs, y, files, t, self.names, x.shape)
class Detections:
# YOLOv5 detections class for inference results
def __init__(self, imgs, pred, files, times=(0, 0, 0, 0), names=None, shape=None):
super().__init__()
d = pred[0].device # device
gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in imgs] # normalizations
self.imgs = imgs # list of images as numpy arrays
self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
self.names = names # class names
self.files = files # image filenames
self.times = times # profiling times
self.xyxy = pred # xyxy pixels
self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
self.n = len(self.pred) # number of images (batch size)
self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3)) # timestamps (ms)
self.s = shape # inference BCHW shape
def display(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')):
crops = []
for i, (im, pred) in enumerate(zip(self.imgs, self.pred)):
s = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' # string
if pred.shape[0]:
for c in pred[:, -1].unique():
n = (pred[:, -1] == c).sum() # detections per class
s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
if show or save or render or crop:
annotator = Annotator(im, example=str(self.names))
for *box, conf, cls in reversed(pred): # xyxy, confidence, class
label = f'{self.names[int(cls)]} {conf:.2f}'
if crop:
file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None
crops.append({
'box': box,
'conf': conf,
'cls': cls,
'label': label,
'im': save_one_box(box, im, file=file, save=save)})
else: # all others
annotator.box_label(box, label if labels else '', color=colors(cls))
im = annotator.im
else:
s += '(no detections)'
im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np
if pprint:
LOGGER.info(s.rstrip(', '))
if show:
im.show(self.files[i]) # show
if save:
f = self.files[i]
im.save(save_dir / f) # save
if i == self.n - 1:
LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
if render:
self.imgs[i] = np.asarray(im)
if crop:
if save:
LOGGER.info(f'Saved results to {save_dir}\n')
return crops
def print(self):
self.display(pprint=True) # print results
LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' %
self.t)
def show(self, labels=True):
self.display(show=True, labels=labels) # show results
def save(self, labels=True, save_dir='runs/detect/exp'):
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) # increment save_dir
self.display(save=True, labels=labels, save_dir=save_dir) # save results
def crop(self, save=True, save_dir='runs/detect/exp'):
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) if save else None
return self.display(crop=True, save=save, save_dir=save_dir) # crop results
def render(self, labels=True):
self.display(render=True, labels=labels) # render results
return self.imgs
def pandas(self):
# return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
new = copy(self) # return copy
ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns
for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update
setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
return new
def tolist(self):
# return a list of Detections objects, i.e. 'for result in results.tolist():'
r = range(self.n) # iterable
x = [Detections([self.imgs[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r]
# for d in x:
# for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
# setattr(d, k, getattr(d, k)[0]) # pop out of list
return x
def __len__(self):
return self.n
class Classify(nn.Module):
# Classification head, i.e. x(b,c1,20,20) to x(b,c2)
def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
super().__init__()
self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1)
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1)
self.flat = nn.Flatten()
def forward(self, x):
z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list
return self.flat(self.conv(z)) # flatten to x(b,c2)

@ -0,0 +1,122 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
Experimental modules
"""
import math
import numpy as np
import torch
import torch.nn as nn
from models.common import Conv
from utils.downloads import attempt_download
class CrossConv(nn.Module):
# Cross Convolution Downsample
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
# ch_in, ch_out, kernel, stride, groups, expansion, shortcut
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, (1, k), (1, s))
self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
self.add = shortcut and c1 == c2
def forward(self, x):
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
class Sum(nn.Module):
# Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
def __init__(self, n, weight=False): # n: number of inputs
super().__init__()
self.weight = weight # apply weights boolean
self.iter = range(n - 1) # iter object
if weight:
self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True) # layer weights
def forward(self, x):
y = x[0] # no weight
if self.weight:
w = torch.sigmoid(self.w) * 2
for i in self.iter:
y = y + x[i + 1] * w[i]
else:
for i in self.iter:
y = y + x[i + 1]
return y
class MixConv2d(nn.Module):
# Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595
def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): # ch_in, ch_out, kernel, stride, ch_strategy
super().__init__()
n = len(k) # number of convolutions
if equal_ch: # equal c_ per group
i = torch.linspace(0, n - 1E-6, c2).floor() # c2 indices
c_ = [(i == g).sum() for g in range(n)] # intermediate channels
else: # equal weight.numel() per group
b = [c2] + [0] * n
a = np.eye(n + 1, n, k=-1)
a -= np.roll(a, 1, axis=1)
a *= np.array(k) ** 2
a[0] = 1
c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b
self.m = nn.ModuleList([
nn.Conv2d(c1, int(c_), k, s, k // 2, groups=math.gcd(c1, int(c_)), bias=False) for k, c_ in zip(k, c_)])
self.bn = nn.BatchNorm2d(c2)
self.act = nn.SiLU()
def forward(self, x):
return self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
class Ensemble(nn.ModuleList):
# Ensemble of models
def __init__(self):
super().__init__()
def forward(self, x, augment=False, profile=False, visualize=False):
y = []
for module in self:
y.append(module(x, augment, profile, visualize)[0])
# y = torch.stack(y).max(0)[0] # max ensemble
# y = torch.stack(y).mean(0) # mean ensemble
y = torch.cat(y, 1) # nms ensemble
return y, None # inference, train output
def attempt_load(weights, map_location=None, inplace=True, fuse=True):
from models.yolo import Detect, Model
# Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
model = Ensemble()
for w in weights if isinstance(weights, list) else [weights]:
ckpt = torch.load(attempt_download(w), map_location=map_location) # load
ckpt = (ckpt.get('ema') or ckpt['model']).float() # FP32 model
model.append(ckpt.fuse().eval() if fuse else ckpt.eval()) # fused or un-fused model in eval mode
# Compatibility updates
for m in model.modules():
t = type(m)
if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model):
m.inplace = inplace # torch 1.7.0 compatibility
if t is Detect:
if not isinstance(m.anchor_grid, list): # new Detect Layer compatibility
delattr(m, 'anchor_grid')
setattr(m, 'anchor_grid', [torch.zeros(1)] * m.nl)
elif t is Conv:
m._non_persistent_buffers_set = set() # torch 1.6.0 compatibility
elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'):
m.recompute_scale_factor = None # torch 1.11.0 compatibility
if len(model) == 1:
return model[-1] # return model
else:
print(f'Ensemble created with {weights}\n')
for k in 'names', 'nc', 'yaml':
setattr(model, k, getattr(model[0], k))
model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride
assert all(model[0].nc == m.nc for m in model), f'Models have different class counts: {[m.nc for m in model]}'
return model # return ensemble

@ -0,0 +1,59 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Default anchors for COCO data
# P5 -------------------------------------------------------------------------------------------------------------------
# P5-640:
anchors_p5_640:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# P6 -------------------------------------------------------------------------------------------------------------------
# P6-640: thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11, 21,19, 17,41, 43,32, 39,70, 86,64, 65,131, 134,130, 120,265, 282,180, 247,354, 512,387
anchors_p6_640:
- [9,11, 21,19, 17,41] # P3/8
- [43,32, 39,70, 86,64] # P4/16
- [65,131, 134,130, 120,265] # P5/32
- [282,180, 247,354, 512,387] # P6/64
# P6-1280: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
anchors_p6_1280:
- [19,27, 44,40, 38,94] # P3/8
- [96,68, 86,152, 180,137] # P4/16
- [140,301, 303,264, 238,542] # P5/32
- [436,615, 739,380, 925,792] # P6/64
# P6-1920: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41, 67,59, 57,141, 144,103, 129,227, 270,205, 209,452, 455,396, 358,812, 653,922, 1109,570, 1387,1187
anchors_p6_1920:
- [28,41, 67,59, 57,141] # P3/8
- [144,103, 129,227, 270,205] # P4/16
- [209,452, 455,396, 358,812] # P5/32
- [653,922, 1109,570, 1387,1187] # P6/64
# P7 -------------------------------------------------------------------------------------------------------------------
# P7-640: thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11, 13,30, 29,20, 30,46, 61,38, 39,92, 78,80, 146,66, 79,163, 149,150, 321,143, 157,303, 257,402, 359,290, 524,372
anchors_p7_640:
- [11,11, 13,30, 29,20] # P3/8
- [30,46, 61,38, 39,92] # P4/16
- [78,80, 146,66, 79,163] # P5/32
- [149,150, 321,143, 157,303] # P6/64
- [257,402, 359,290, 524,372] # P7/128
# P7-1280: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22, 54,36, 32,77, 70,83, 138,71, 75,173, 165,159, 148,334, 375,151, 334,317, 251,626, 499,474, 750,326, 534,814, 1079,818
anchors_p7_1280:
- [19,22, 54,36, 32,77] # P3/8
- [70,83, 138,71, 75,173] # P4/16
- [165,159, 148,334, 375,151] # P5/32
- [334,317, 251,626, 499,474] # P6/64
- [750,326, 534,814, 1079,818] # P7/128
# P7-1920: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34, 81,55, 47,115, 105,124, 207,107, 113,259, 247,238, 222,500, 563,227, 501,476, 376,939, 749,711, 1126,489, 801,1222, 1618,1227
anchors_p7_1920:
- [29,34, 81,55, 47,115] # P3/8
- [105,124, 207,107, 113,259] # P4/16
- [247,238, 222,500, 563,227] # P5/32
- [501,476, 376,939, 749,711] # P6/64
- [1126,489, 801,1222, 1618,1227] # P7/128

@ -0,0 +1,51 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# darknet53 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [32, 3, 1]], # 0
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2
[-1, 1, Bottleneck, [64]],
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4
[-1, 2, Bottleneck, [128]],
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8
[-1, 8, Bottleneck, [256]],
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16
[-1, 8, Bottleneck, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
[-1, 4, Bottleneck, [1024]], # 10
]
# YOLOv3-SPP head
head:
[[-1, 1, Bottleneck, [1024, False]],
[-1, 1, SPP, [512, [5, 9, 13]]],
[-1, 1, Conv, [1024, 3, 1]],
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
[-2, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P4
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
[-2, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P3
[-1, 1, Bottleneck, [256, False]],
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
[[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

@ -0,0 +1,41 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,14, 23,27, 37,58] # P4/16
- [81,82, 135,169, 344,319] # P5/32
# YOLOv3-tiny backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [16, 3, 1]], # 0
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2
[-1, 1, Conv, [32, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4
[-1, 1, Conv, [64, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8
[-1, 1, Conv, [128, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16
[-1, 1, Conv, [256, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32
[-1, 1, Conv, [512, 3, 1]],
[-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11
[-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12
]
# YOLOv3-tiny head
head:
[[-1, 1, Conv, [1024, 3, 1]],
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large)
[-2, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P4
[-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium)
[[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5)
]

@ -0,0 +1,51 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# darknet53 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [32, 3, 1]], # 0
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2
[-1, 1, Bottleneck, [64]],
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4
[-1, 2, Bottleneck, [128]],
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8
[-1, 8, Bottleneck, [256]],
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16
[-1, 8, Bottleneck, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
[-1, 4, Bottleneck, [1024]], # 10
]
# YOLOv3 head
head:
[[-1, 1, Bottleneck, [1024, False]],
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, Conv, [1024, 3, 1]],
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
[-2, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P4
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
[-2, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P3
[-1, 1, Bottleneck, [256, False]],
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
[[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 BiFPN head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14, 6], 1, Concat, [1]], # cat P4 <--- BiFPN change
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

@ -0,0 +1,42 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 FPN head
head:
[[-1, 3, C3, [1024, False]], # 10 (P5/32-large)
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 1, Conv, [512, 1, 1]],
[-1, 3, C3, [512, False]], # 14 (P4/16-medium)
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 1, Conv, [256, 1, 1]],
[-1, 3, C3, [256, False]], # 18 (P3/8-small)
[[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

@ -0,0 +1,54 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head with (P2, P3, P4, P5) outputs
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 2], 1, Concat, [1]], # cat backbone P2
[-1, 1, C3, [128, False]], # 21 (P2/4-xsmall)
[-1, 1, Conv, [128, 3, 2]],
[[-1, 18], 1, Concat, [1]], # cat head P3
[-1, 3, C3, [256, False]], # 24 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 27 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 30 (P5/32-large)
[[21, 24, 27, 30], 1, Detect, [nc, anchors]], # Detect(P2, P3, P4, P5)
]

@ -0,0 +1,41 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[ [ -1, 1, Conv, [ 64, 6, 2, 2 ] ], # 0-P1/2
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
[ -1, 3, C3, [ 128 ] ],
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
[ -1, 6, C3, [ 256 ] ],
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
[ -1, 9, C3, [ 512 ] ],
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 7-P5/32
[ -1, 3, C3, [ 1024 ] ],
[ -1, 1, SPPF, [ 1024, 5 ] ], # 9
]
# YOLOv5 v6.0 head with (P3, P4) outputs
head:
[ [ -1, 1, Conv, [ 512, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
[ -1, 3, C3, [ 512, False ] ], # 13
[ -1, 1, Conv, [ 256, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
[ -1, 3, C3, [ 256, False ] ], # 17 (P3/8-small)
[ -1, 1, Conv, [ 256, 3, 2 ] ],
[ [ -1, 14 ], 1, Concat, [ 1 ] ], # cat head P4
[ -1, 3, C3, [ 512, False ] ], # 20 (P4/16-medium)
[ [ 17, 20 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4)
]

@ -0,0 +1,56 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 11
]
# YOLOv5 v6.0 head with (P3, P4, P5, P6) outputs
head:
[[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]

@ -0,0 +1,67 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, Conv, [1280, 3, 2]], # 11-P7/128
[-1, 3, C3, [1280]],
[-1, 1, SPPF, [1280, 5]], # 13
]
# YOLOv5 v6.0 head with (P3, P4, P5, P6, P7) outputs
head:
[[-1, 1, Conv, [1024, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 10], 1, Concat, [1]], # cat backbone P6
[-1, 3, C3, [1024, False]], # 17
[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 21
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 25
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 29 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 26], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 32 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 22], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 35 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 18], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 38 (P6/64-xlarge)
[-1, 1, Conv, [1024, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P7
[-1, 3, C3, [1280, False]], # 41 (P7/128-xxlarge)
[[29, 32, 35, 38, 41], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6, P7)
]

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 PANet head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

@ -0,0 +1,60 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [19,27, 44,40, 38,94] # P3/8
- [96,68, 86,152, 180,137] # P4/16
- [140,301, 303,264, 238,542] # P5/32
- [436,615, 739,380, 925,792] # P6/64
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 11
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]

@ -0,0 +1,60 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.67 # model depth multiple
width_multiple: 0.75 # layer channel multiple
anchors:
- [19,27, 44,40, 38,94] # P3/8
- [96,68, 86,152, 180,137] # P4/16
- [140,301, 303,264, 238,542] # P5/32
- [436,615, 739,380, 925,792] # P6/64
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 11
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]

@ -0,0 +1,60 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.25 # layer channel multiple
anchors:
- [19,27, 44,40, 38,94] # P3/8
- [96,68, 86,152, 180,137] # P4/16
- [140,301, 303,264, 238,542] # P5/32
- [436,615, 739,380, 925,792] # P6/64
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 11
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, GhostConv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3Ghost, [128]],
[-1, 1, GhostConv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3Ghost, [256]],
[-1, 1, GhostConv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3Ghost, [512]],
[-1, 1, GhostConv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3Ghost, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, GhostConv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3Ghost, [512, False]], # 13
[-1, 1, GhostConv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3Ghost, [256, False]], # 17 (P3/8-small)
[-1, 1, GhostConv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3Ghost, [512, False]], # 20 (P4/16-medium)
[-1, 1, GhostConv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3Ghost, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3TR, [1024]], # 9 <--- C3TR() Transformer module
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

@ -0,0 +1,60 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [19,27, 44,40, 38,94] # P3/8
- [96,68, 86,152, 180,137] # P4/16
- [140,301, 303,264, 238,542] # P5/32
- [436,615, 739,380, 925,792] # P6/64
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 11
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]

@ -0,0 +1,60 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.33 # model depth multiple
width_multiple: 1.25 # layer channel multiple
anchors:
- [19,27, 44,40, 38,94] # P3/8
- [96,68, 86,152, 180,137] # P4/16
- [140,301, 303,264, 238,542] # P5/32
- [436,615, 739,380, 925,792] # P6/64
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 11
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 2 # number of classes
depth_multiple: 0.67 # model depth multiple
width_multiple: 0.75 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 5 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

@ -0,0 +1,498 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
TensorFlow, Keras and TFLite versions of YOLOv5
Authored by https://github.com/zldrobit in PR https://github.com/ultralytics/yolov5/pull/1127
Usage:
$ python models/tf.py --weights yolov5s.pt
Export:
$ python path/to/export.py --weights yolov5s.pt --include saved_model pb tflite tfjs
"""
import argparse
import sys
from copy import deepcopy
from pathlib import Path
FILE = Path(__file__).resolve()
ROOT = FILE.parents[1] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
# ROOT = ROOT.relative_to(Path.cwd()) # relative
import numpy as np
import tensorflow as tf
import torch
import torch.nn as nn
from tensorflow import keras
from models.common import C3, SPP, SPPF, Bottleneck, BottleneckCSP, Concat, Conv, DWConv, Focus, autopad
from models.experimental import CrossConv, MixConv2d, attempt_load
from models.yolo import Detect
from utils.activations import SiLU
from utils.general import LOGGER, make_divisible, print_args
class TFBN(keras.layers.Layer):
# TensorFlow BatchNormalization wrapper
def __init__(self, w=None):
super().__init__()
self.bn = keras.layers.BatchNormalization(
beta_initializer=keras.initializers.Constant(w.bias.numpy()),
gamma_initializer=keras.initializers.Constant(w.weight.numpy()),
moving_mean_initializer=keras.initializers.Constant(w.running_mean.numpy()),
moving_variance_initializer=keras.initializers.Constant(w.running_var.numpy()),
epsilon=w.eps)
def call(self, inputs):
return self.bn(inputs)
class TFPad(keras.layers.Layer):
def __init__(self, pad):
super().__init__()
self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]])
def call(self, inputs):
return tf.pad(inputs, self.pad, mode='constant', constant_values=0)
class TFConv(keras.layers.Layer):
# Standard convolution
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
# ch_in, ch_out, weights, kernel, stride, padding, groups
super().__init__()
assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
assert isinstance(k, int), "Convolution with multiple kernels are not allowed."
# TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding)
# see https://stackoverflow.com/questions/52975843/comparing-conv2d-with-padding-between-tensorflow-and-pytorch
conv = keras.layers.Conv2D(
c2,
k,
s,
'SAME' if s == 1 else 'VALID',
use_bias=False if hasattr(w, 'bn') else True,
kernel_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
bias_initializer='zeros' if hasattr(w, 'bn') else keras.initializers.Constant(w.conv.bias.numpy()))
self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
self.bn = TFBN(w.bn) if hasattr(w, 'bn') else tf.identity
# YOLOv5 activations
if isinstance(w.act, nn.LeakyReLU):
self.act = (lambda x: keras.activations.relu(x, alpha=0.1)) if act else tf.identity
elif isinstance(w.act, nn.Hardswish):
self.act = (lambda x: x * tf.nn.relu6(x + 3) * 0.166666667) if act else tf.identity
elif isinstance(w.act, (nn.SiLU, SiLU)):
self.act = (lambda x: keras.activations.swish(x)) if act else tf.identity
else:
raise Exception(f'no matching TensorFlow activation found for {w.act}')
def call(self, inputs):
return self.act(self.bn(self.conv(inputs)))
class TFFocus(keras.layers.Layer):
# Focus wh information into c-space
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
# ch_in, ch_out, kernel, stride, padding, groups
super().__init__()
self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv)
def call(self, inputs): # x(b,w,h,c) -> y(b,w/2,h/2,4c)
# inputs = inputs / 255 # normalize 0-255 to 0-1
return self.conv(
tf.concat(
[inputs[:, ::2, ::2, :], inputs[:, 1::2, ::2, :], inputs[:, ::2, 1::2, :], inputs[:, 1::2, 1::2, :]],
3))
class TFBottleneck(keras.layers.Layer):
# Standard bottleneck
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None): # ch_in, ch_out, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv(c_, c2, 3, 1, g=g, w=w.cv2)
self.add = shortcut and c1 == c2
def call(self, inputs):
return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
class TFConv2d(keras.layers.Layer):
# Substitution for PyTorch nn.Conv2D
def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
super().__init__()
assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
self.conv = keras.layers.Conv2D(
c2,
k,
s,
'VALID',
use_bias=bias,
kernel_initializer=keras.initializers.Constant(w.weight.permute(2, 3, 1, 0).numpy()),
bias_initializer=keras.initializers.Constant(w.bias.numpy()) if bias else None,
)
def call(self, inputs):
return self.conv(inputs)
class TFBottleneckCSP(keras.layers.Layer):
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
# ch_in, ch_out, number, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv2d(c1, c_, 1, 1, bias=False, w=w.cv2)
self.cv3 = TFConv2d(c_, c_, 1, 1, bias=False, w=w.cv3)
self.cv4 = TFConv(2 * c_, c2, 1, 1, w=w.cv4)
self.bn = TFBN(w.bn)
self.act = lambda x: keras.activations.swish(x)
self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
def call(self, inputs):
y1 = self.cv3(self.m(self.cv1(inputs)))
y2 = self.cv2(inputs)
return self.cv4(self.act(self.bn(tf.concat((y1, y2), axis=3))))
class TFC3(keras.layers.Layer):
# CSP Bottleneck with 3 convolutions
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
# ch_in, ch_out, number, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
def call(self, inputs):
return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
class TFSPP(keras.layers.Layer):
# Spatial pyramid pooling layer used in YOLOv3-SPP
def __init__(self, c1, c2, k=(5, 9, 13), w=None):
super().__init__()
c_ = c1 // 2 # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv(c_ * (len(k) + 1), c2, 1, 1, w=w.cv2)
self.m = [keras.layers.MaxPool2D(pool_size=x, strides=1, padding='SAME') for x in k]
def call(self, inputs):
x = self.cv1(inputs)
return self.cv2(tf.concat([x] + [m(x) for m in self.m], 3))
class TFSPPF(keras.layers.Layer):
# Spatial pyramid pooling-Fast layer
def __init__(self, c1, c2, k=5, w=None):
super().__init__()
c_ = c1 // 2 # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv(c_ * 4, c2, 1, 1, w=w.cv2)
self.m = keras.layers.MaxPool2D(pool_size=k, strides=1, padding='SAME')
def call(self, inputs):
x = self.cv1(inputs)
y1 = self.m(x)
y2 = self.m(y1)
return self.cv2(tf.concat([x, y1, y2, self.m(y2)], 3))
class TFDetect(keras.layers.Layer):
# TF YOLOv5 Detect layer
def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None): # detection layer
super().__init__()
self.stride = tf.convert_to_tensor(w.stride.numpy(), dtype=tf.float32)
self.nc = nc # number of classes
self.no = nc + 5 # number of outputs per anchor
self.nl = len(anchors) # number of detection layers
self.na = len(anchors[0]) // 2 # number of anchors
self.grid = [tf.zeros(1)] * self.nl # init grid
self.anchors = tf.convert_to_tensor(w.anchors.numpy(), dtype=tf.float32)
self.anchor_grid = tf.reshape(self.anchors * tf.reshape(self.stride, [self.nl, 1, 1]), [self.nl, 1, -1, 1, 2])
self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)]
self.training = False # set to False after building model
self.imgsz = imgsz
for i in range(self.nl):
ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
self.grid[i] = self._make_grid(nx, ny)
def call(self, inputs):
z = [] # inference output
x = []
for i in range(self.nl):
x.append(self.m[i](inputs[i]))
# x(bs,20,20,255) to x(bs,3,20,20,85)
ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
x[i] = tf.reshape(x[i], [-1, ny * nx, self.na, self.no])
if not self.training: # inference
y = tf.sigmoid(x[i])
grid = tf.transpose(self.grid[i], [0, 2, 1, 3]) - 0.5
anchor_grid = tf.transpose(self.anchor_grid[i], [0, 2, 1, 3]) * 4
xy = (y[..., 0:2] * 2 + grid) * self.stride[i] # xy
wh = y[..., 2:4] ** 2 * anchor_grid
# Normalize xywh to 0-1 to reduce calibration error
xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
y = tf.concat([xy, wh, y[..., 4:]], -1)
z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no]))
return tf.transpose(x, [0, 2, 1, 3]) if self.training else (tf.concat(z, 1), x)
@staticmethod
def _make_grid(nx=20, ny=20):
# yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
# return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
xv, yv = tf.meshgrid(tf.range(nx), tf.range(ny))
return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32)
class TFUpsample(keras.layers.Layer):
# TF version of torch.nn.Upsample()
def __init__(self, size, scale_factor, mode, w=None): # warning: all arguments needed including 'w'
super().__init__()
assert scale_factor == 2, "scale_factor must be 2"
self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * 2, x.shape[2] * 2), method=mode)
# self.upsample = keras.layers.UpSampling2D(size=scale_factor, interpolation=mode)
# with default arguments: align_corners=False, half_pixel_centers=False
# self.upsample = lambda x: tf.raw_ops.ResizeNearestNeighbor(images=x,
# size=(x.shape[1] * 2, x.shape[2] * 2))
def call(self, inputs):
return self.upsample(inputs)
class TFConcat(keras.layers.Layer):
# TF version of torch.concat()
def __init__(self, dimension=1, w=None):
super().__init__()
assert dimension == 1, "convert only NCHW to NHWC concat"
self.d = 3
def call(self, inputs):
return tf.concat(inputs, self.d)
def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3)
LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}")
anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
m_str = m
m = eval(m) if isinstance(m, str) else m # eval strings
for j, a in enumerate(args):
try:
args[j] = eval(a) if isinstance(a, str) else a # eval strings
except NameError:
pass
n = max(round(n * gd), 1) if n > 1 else n # depth gain
if m in [nn.Conv2d, Conv, Bottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]:
c1, c2 = ch[f], args[0]
c2 = make_divisible(c2 * gw, 8) if c2 != no else c2
args = [c1, c2, *args[1:]]
if m in [BottleneckCSP, C3]:
args.insert(2, n)
n = 1
elif m is nn.BatchNorm2d:
args = [ch[f]]
elif m is Concat:
c2 = sum(ch[-1 if x == -1 else x + 1] for x in f)
elif m is Detect:
args.append([ch[x + 1] for x in f])
if isinstance(args[1], int): # number of anchors
args[1] = [list(range(args[1] * 2))] * len(f)
args.append(imgsz)
else:
c2 = ch[f]
tf_m = eval('TF' + m_str.replace('nn.', ''))
m_ = keras.Sequential([tf_m(*args, w=model.model[i][j]) for j in range(n)]) if n > 1 \
else tf_m(*args, w=model.model[i]) # module
torch_m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
t = str(m)[8:-2].replace('__main__.', '') # module type
np = sum(x.numel() for x in torch_m_.parameters()) # number params
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
LOGGER.info(f'{i:>3}{str(f):>18}{str(n):>3}{np:>10} {t:<40}{str(args):<30}') # print
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
layers.append(m_)
ch.append(c2)
return keras.Sequential(layers), sorted(save)
class TFModel:
# TF YOLOv5 model
def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, model=None, imgsz=(640, 640)): # model, channels, classes
super().__init__()
if isinstance(cfg, dict):
self.yaml = cfg # model dict
else: # is *.yaml
import yaml # for torch hub
self.yaml_file = Path(cfg).name
with open(cfg) as f:
self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict
# Define model
if nc and nc != self.yaml['nc']:
LOGGER.info(f"Overriding {cfg} nc={self.yaml['nc']} with nc={nc}")
self.yaml['nc'] = nc # override yaml value
self.model, self.savelist = parse_model(deepcopy(self.yaml), ch=[ch], model=model, imgsz=imgsz)
def predict(self,
inputs,
tf_nms=False,
agnostic_nms=False,
topk_per_class=100,
topk_all=100,
iou_thres=0.45,
conf_thres=0.25):
y = [] # outputs
x = inputs
for i, m in enumerate(self.model.layers):
if m.f != -1: # if not from previous layer
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
x = m(x) # run
y.append(x if m.i in self.savelist else None) # save output
# Add TensorFlow NMS
if tf_nms:
boxes = self._xywh2xyxy(x[0][..., :4])
probs = x[0][:, :, 4:5]
classes = x[0][:, :, 5:]
scores = probs * classes
if agnostic_nms:
nms = AgnosticNMS()((boxes, classes, scores), topk_all, iou_thres, conf_thres)
return nms, x[1]
else:
boxes = tf.expand_dims(boxes, 2)
nms = tf.image.combined_non_max_suppression(boxes,
scores,
topk_per_class,
topk_all,
iou_thres,
conf_thres,
clip_boxes=False)
return nms, x[1]
return x[0] # output only first tensor [1,6300,85] = [xywh, conf, class0, class1, ...]
# x = x[0][0] # [x(1,6300,85), ...] to x(6300,85)
# xywh = x[..., :4] # x(6300,4) boxes
# conf = x[..., 4:5] # x(6300,1) confidences
# cls = tf.reshape(tf.cast(tf.argmax(x[..., 5:], axis=1), tf.float32), (-1, 1)) # x(6300,1) classes
# return tf.concat([conf, cls, xywh], 1)
@staticmethod
def _xywh2xyxy(xywh):
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
x, y, w, h = tf.split(xywh, num_or_size_splits=4, axis=-1)
return tf.concat([x - w / 2, y - h / 2, x + w / 2, y + h / 2], axis=-1)
class AgnosticNMS(keras.layers.Layer):
# TF Agnostic NMS
def call(self, input, topk_all, iou_thres, conf_thres):
# wrap map_fn to avoid TypeSpec related error https://stackoverflow.com/a/65809989/3036450
return tf.map_fn(lambda x: self._nms(x, topk_all, iou_thres, conf_thres),
input,
fn_output_signature=(tf.float32, tf.float32, tf.float32, tf.int32),
name='agnostic_nms')
@staticmethod
def _nms(x, topk_all=100, iou_thres=0.45, conf_thres=0.25): # agnostic NMS
boxes, classes, scores = x
class_inds = tf.cast(tf.argmax(classes, axis=-1), tf.float32)
scores_inp = tf.reduce_max(scores, -1)
selected_inds = tf.image.non_max_suppression(boxes,
scores_inp,
max_output_size=topk_all,
iou_threshold=iou_thres,
score_threshold=conf_thres)
selected_boxes = tf.gather(boxes, selected_inds)
padded_boxes = tf.pad(selected_boxes,
paddings=[[0, topk_all - tf.shape(selected_boxes)[0]], [0, 0]],
mode="CONSTANT",
constant_values=0.0)
selected_scores = tf.gather(scores_inp, selected_inds)
padded_scores = tf.pad(selected_scores,
paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
mode="CONSTANT",
constant_values=-1.0)
selected_classes = tf.gather(class_inds, selected_inds)
padded_classes = tf.pad(selected_classes,
paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
mode="CONSTANT",
constant_values=-1.0)
valid_detections = tf.shape(selected_inds)[0]
return padded_boxes, padded_scores, padded_classes, valid_detections
def representative_dataset_gen(dataset, ncalib=100):
# Representative dataset generator for use with converter.representative_dataset, returns a generator of np arrays
for n, (path, img, im0s, vid_cap, string) in enumerate(dataset):
input = np.transpose(img, [1, 2, 0])
input = np.expand_dims(input, axis=0).astype(np.float32)
input /= 255
yield [input]
if n >= ncalib:
break
def run(
weights=ROOT / 'yolov5s.pt', # weights path
imgsz=(640, 640), # inference size h,w
batch_size=1, # batch size
dynamic=False, # dynamic batch size
):
# PyTorch model
im = torch.zeros((batch_size, 3, *imgsz)) # BCHW image
model = attempt_load(weights, map_location=torch.device('cpu'), inplace=True, fuse=False)
_ = model(im) # inference
model.info()
# TensorFlow model
im = tf.zeros((batch_size, *imgsz, 3)) # BHWC image
tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz)
_ = tf_model.predict(im) # inference
# Keras model
im = keras.Input(shape=(*imgsz, 3), batch_size=None if dynamic else batch_size)
keras_model = keras.Model(inputs=im, outputs=tf_model.predict(im))
keras_model.summary()
LOGGER.info('PyTorch, TensorFlow and Keras models successfully verified.\nUse export.py for TF model export.')
def parse_opt():
parser = argparse.ArgumentParser()
parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='weights path')
parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
parser.add_argument('--batch-size', type=int, default=1, help='batch size')
parser.add_argument('--dynamic', action='store_true', help='dynamic batch size')
opt = parser.parse_args()
opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
print_args(vars(opt))
return opt
def main(opt):
run(**vars(opt))
if __name__ == "__main__":
opt = parse_opt()
main(opt)

@ -0,0 +1,335 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
YOLO-specific modules
Usage:
$ python path/to/models/yolo.py --cfg yolov5s.yaml
"""
import argparse
import os
import platform
import sys
from copy import deepcopy
from pathlib import Path
FILE = Path(__file__).resolve()
ROOT = FILE.parents[1] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
if platform.system() != 'Windows':
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
from models.common import *
from models.experimental import *
from utils.autoanchor import check_anchor_order
from utils.general import LOGGER, check_version, check_yaml, make_divisible, print_args
from utils.plots import feature_visualization
from utils.torch_utils import (fuse_conv_and_bn, initialize_weights, model_info, profile, scale_img, select_device,
time_sync)
try:
import thop # for FLOPs computation
except ImportError:
thop = None
class Detect(nn.Module):
stride = None # strides computed during build
onnx_dynamic = False # ONNX export parameter
export = False # export mode
def __init__(self, nc=80, anchors=(), ch=(), inplace=True): # detection layer
super().__init__()
self.nc = nc # number of classes
self.no = nc + 5 # number of outputs per anchor
self.nl = len(anchors) # number of detection layers
self.na = len(anchors[0]) // 2 # number of anchors
self.grid = [torch.zeros(1)] * self.nl # init grid
self.anchor_grid = [torch.zeros(1)] * self.nl # init anchor grid
self.register_buffer('anchors', torch.tensor(anchors).float().view(self.nl, -1, 2)) # shape(nl,na,2)
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
self.inplace = inplace # use in-place ops (e.g. slice assignment)
def forward(self, x):
z = [] # inference output
for i in range(self.nl):
x[i] = self.m[i](x[i]) # conv
bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
if not self.training: # inference
if self.onnx_dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
y = x[i].sigmoid()
if self.inplace:
y[..., 0:2] = (y[..., 0:2] * 2 + self.grid[i]) * self.stride[i] # xy
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
xy, wh, conf = y.split((2, 2, self.nc + 1), 4) # y.tensor_split((2, 4, 5), 4) # torch 1.8.0
xy = (xy * 2 + self.grid[i]) * self.stride[i] # xy
wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh
y = torch.cat((xy, wh, conf), 4)
z.append(y.view(bs, -1, self.no))
return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x)
def _make_grid(self, nx=20, ny=20, i=0):
d = self.anchors[i].device
t = self.anchors[i].dtype
shape = 1, self.na, ny, nx, 2 # grid shape
y, x = torch.arange(ny, device=d, dtype=t), torch.arange(nx, device=d, dtype=t)
if check_version(torch.__version__, '1.10.0'): # torch>=1.10.0 meshgrid workaround for torch>=0.7 compatibility
yv, xv = torch.meshgrid(y, x, indexing='ij')
else:
yv, xv = torch.meshgrid(y, x)
grid = torch.stack((xv, yv), 2).expand(shape) - 0.5 # add grid offset, i.e. y = 2.0 * x - 0.5
anchor_grid = (self.anchors[i] * self.stride[i]).view((1, self.na, 1, 1, 2)).expand(shape)
return grid, anchor_grid
class Model(nn.Module):
# YOLOv5 model
def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes
super().__init__()
if isinstance(cfg, dict):
self.yaml = cfg # model dict
else: # is *.yaml
import yaml # for torch hub
self.yaml_file = Path(cfg).name
with open(cfg, encoding='ascii', errors='ignore') as f:
self.yaml = yaml.safe_load(f) # model dict
# Define model
ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels
if nc and nc != self.yaml['nc']:
LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
self.yaml['nc'] = nc # override yaml value
if anchors:
LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}')
self.yaml['anchors'] = round(anchors) # override yaml value
self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist
self.names = [str(i) for i in range(self.yaml['nc'])] # default names
self.inplace = self.yaml.get('inplace', True)
# Build strides, anchors
m = self.model[-1] # Detect()
if isinstance(m, Detect):
s = 256 # 2x min stride
m.inplace = self.inplace
m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward
check_anchor_order(m) # must be in pixel-space (not grid-space)
m.anchors /= m.stride.view(-1, 1, 1)
self.stride = m.stride
self._initialize_biases() # only run once
# Init weights, biases
initialize_weights(self)
self.info()
LOGGER.info('')
def forward(self, x, augment=False, profile=False, visualize=False):
if augment:
return self._forward_augment(x) # augmented inference, None
return self._forward_once(x, profile, visualize) # single-scale inference, train
def _forward_augment(self, x):
img_size = x.shape[-2:] # height, width
s = [1, 0.83, 0.67] # scales
f = [None, 3, None] # flips (2-ud, 3-lr)
y = [] # outputs
for si, fi in zip(s, f):
xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
yi = self._forward_once(xi)[0] # forward
# cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save
yi = self._descale_pred(yi, fi, si, img_size)
y.append(yi)
y = self._clip_augmented(y) # clip augmented tails
return torch.cat(y, 1), None # augmented inference, train
def _forward_once(self, x, profile=False, visualize=False):
y, dt = [], [] # outputs
for m in self.model:
if m.f != -1: # if not from previous layer
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
if profile:
self._profile_one_layer(m, x, dt)
x = m(x) # run
y.append(x if m.i in self.save else None) # save output
if visualize:
feature_visualization(x, m.type, m.i, save_dir=visualize)
return x
def _descale_pred(self, p, flips, scale, img_size):
# de-scale predictions following augmented inference (inverse operation)
if self.inplace:
p[..., :4] /= scale # de-scale
if flips == 2:
p[..., 1] = img_size[0] - p[..., 1] # de-flip ud
elif flips == 3:
p[..., 0] = img_size[1] - p[..., 0] # de-flip lr
else:
x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale # de-scale
if flips == 2:
y = img_size[0] - y # de-flip ud
elif flips == 3:
x = img_size[1] - x # de-flip lr
p = torch.cat((x, y, wh, p[..., 4:]), -1)
return p
def _clip_augmented(self, y):
# Clip YOLOv5 augmented inference tails
nl = self.model[-1].nl # number of detection layers (P3-P5)
g = sum(4 ** x for x in range(nl)) # grid points
e = 1 # exclude layer count
i = (y[0].shape[1] // g) * sum(4 ** x for x in range(e)) # indices
y[0] = y[0][:, :-i] # large
i = (y[-1].shape[1] // g) * sum(4 ** (nl - 1 - x) for x in range(e)) # indices
y[-1] = y[-1][:, i:] # small
return y
def _profile_one_layer(self, m, x, dt):
c = isinstance(m, Detect) # is final layer, copy input as inplace fix
o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPs
t = time_sync()
for _ in range(10):
m(x.copy() if c else x)
dt.append((time_sync() - t) * 100)
if m == self.model[0]:
LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s} {'module'}")
LOGGER.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f} {m.type}')
if c:
LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s} Total")
def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency
# https://arxiv.org/abs/1708.02002 section 3.3
# cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
m = self.model[-1] # Detect() module
for mi, s in zip(m.m, m.stride): # from
b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)
b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
b.data[:, 5:] += math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # cls
mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
def _print_biases(self):
m = self.model[-1] # Detect() module
for mi in m.m: # from
b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85)
LOGGER.info(
('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
# def _print_weights(self):
# for m in self.model.modules():
# if type(m) is Bottleneck:
# LOGGER.info('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights
def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers
LOGGER.info('Fusing layers... ')
for m in self.model.modules():
if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'):
m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv
delattr(m, 'bn') # remove batchnorm
m.forward = m.forward_fuse # update forward
self.info()
return self
def info(self, verbose=False, img_size=640): # print model information
model_info(self, verbose, img_size)
def _apply(self, fn):
# Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
self = super()._apply(fn)
m = self.model[-1] # Detect()
if isinstance(m, Detect):
m.stride = fn(m.stride)
m.grid = list(map(fn, m.grid))
if isinstance(m.anchor_grid, list):
m.anchor_grid = list(map(fn, m.anchor_grid))
return self
def parse_model(d, ch): # model_dict, input_channels(3)
LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}")
anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
m = eval(m) if isinstance(m, str) else m # eval strings
for j, a in enumerate(args):
try:
args[j] = eval(a) if isinstance(a, str) else a # eval strings
except NameError:
pass
n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain
if m in (Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
BottleneckCSP, C3, C3TR, C3SPP, C3Ghost):
c1, c2 = ch[f], args[0]
if c2 != no: # if not output
c2 = make_divisible(c2 * gw, 8)
args = [c1, c2, *args[1:]]
if m in [BottleneckCSP, C3, C3TR, C3Ghost]:
args.insert(2, n) # number of repeats
n = 1
elif m is nn.BatchNorm2d:
args = [ch[f]]
elif m is Concat:
c2 = sum(ch[x] for x in f)
elif m is Detect:
args.append([ch[x] for x in f])
if isinstance(args[1], int): # number of anchors
args[1] = [list(range(args[1] * 2))] * len(f)
elif m is Contract:
c2 = ch[f] * args[0] ** 2
elif m is Expand:
c2 = ch[f] // args[0] ** 2
else:
c2 = ch[f]
m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
t = str(m)[8:-2].replace('__main__.', '') # module type
np = sum(x.numel() for x in m_.parameters()) # number params
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
LOGGER.info(f'{i:>3}{str(f):>18}{n_:>3}{np:10.0f} {t:<40}{str(args):<30}') # print
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
layers.append(m_)
if i == 0:
ch = []
ch.append(c2)
return nn.Sequential(*layers), sorted(save)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml')
parser.add_argument('--batch-size', type=int, default=1, help='total batch size for all GPUs')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--profile', action='store_true', help='profile model speed')
parser.add_argument('--line-profile', action='store_true', help='profile model speed layer by layer')
parser.add_argument('--test', action='store_true', help='test all yolo*.yaml')
opt = parser.parse_args()
opt.cfg = check_yaml(opt.cfg) # check YAML
print_args(vars(opt))
device = select_device(opt.device)
# Create model
im = torch.rand(opt.batch_size, 3, 640, 640).to(device)
model = Model(opt.cfg).to(device)
# Options
if opt.line_profile: # profile layer by layer
_ = model(im, profile=True)
elif opt.profile: # profile forward-backward
results = profile(input=im, ops=[model], n=3)
elif opt.test: # test all models
for cfg in Path(ROOT / 'models').rglob('yolo*.yaml'):
try:
_ = Model(cfg)
except Exception as e:
print(f'Error in {cfg}: {e}')

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.67 # model depth multiple
width_multiple: 0.75 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.25 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.33 # model depth multiple
width_multiple: 1.25 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

@ -0,0 +1,31 @@
lr0: 0.01
lrf: 0.01
momentum: 0.937
weight_decay: 0.0005
warmup_epochs: 3.0
warmup_momentum: 0.8
warmup_bias_lr: 0.1
box: 0.05
cls: 0.5
cls_pw: 1.0
obj: 1.0
obj_pw: 1.0
iou_t: 0.2
anchor_t: 4.0
anchors: 3
fl_gamma: 0.0
hsv_h: 0.015
hsv_s: 0.7
hsv_v: 0.4
degrees: 0.0
translate: 0.1
scale: 0.5
shear: 0.0
perspective: 0.0
flipud: 0.0
fliplr: 0.5
mosaic: 1.0
mixup: 0.0
copy_paste: 0.0
depth_multiple: 0.33
width_multiple: 0.5

@ -0,0 +1,39 @@
weights: yolov5s.pt
cfg: models/myWeight/yolov5s-test.yaml
data: mydata/mydata.yaml
hyp: data/hyps/hyp.scratch-low.yaml
epochs: 300
batch_size: 8
imgsz: 640
rect: false
resume: false
nosave: false
noval: false
noautoanchor: false
noplots: false
evolve: null
bucket: ''
cache: null
image_weights: false
device: ''
multi_scale: false
single_cls: false
optimizer: SGD
sync_bn: false
workers: 2
project: runs\train
name: exp
exist_ok: false
quad: false
cos_lr: false
label_smoothing: 0.0
patience: 100
freeze:
- 0
save_period: -1
local_rank: -1
entity: null
upload_dataset: false
bbox_interval: -1
artifact_alias: latest
save_dir: runs\train\exp9

@ -0,0 +1,31 @@
lr0: 0.01
lrf: 0.01
momentum: 0.937
weight_decay: 0.0005
warmup_epochs: 3.0
warmup_momentum: 0.8
warmup_bias_lr: 0.1
box: 0.05
cls: 0.5
cls_pw: 1.0
obj: 1.0
obj_pw: 1.0
iou_t: 0.2
anchor_t: 4.0
anchors: 3
fl_gamma: 0.0
hsv_h: 0.015
hsv_s: 0.7
hsv_v: 0.4
degrees: 0.0
translate: 0.1
scale: 0.5
shear: 0.0
perspective: 0.0
flipud: 0.0
fliplr: 0.5
mosaic: 1.0
mixup: 0.0
copy_paste: 0.0
depth_multiple: 0.33
width_multiple: 0.5

@ -0,0 +1,39 @@
weights: yolov5s.pt
cfg: models/myWeight/yolov5s-test.yaml
data: mydata/mydata.yaml
hyp: data/hyps/hyp.scratch-low.yaml
epochs: 300
batch_size: 8
imgsz: 640
rect: false
resume: false
nosave: false
noval: false
noautoanchor: false
noplots: false
evolve: null
bucket: ''
cache: null
image_weights: false
device: ''
multi_scale: false
single_cls: false
optimizer: SGD
sync_bn: false
workers: 2
project: runs\train
name: exp
exist_ok: false
quad: false
cos_lr: false
label_smoothing: 0.0
patience: 100
freeze:
- 0
save_period: -1
local_rank: -1
entity: null
upload_dataset: false
bbox_interval: -1
artifact_alias: latest
save_dir: runs\train\exp9

@ -0,0 +1,37 @@
# pip install -r requirements.txt
# Base ----------------------------------------
matplotlib>=3.2.2
numpy>=1.18.5
opencv-python>=4.1.2
Pillow>=7.1.2
PyYAML>=5.3.1
requests>=2.23.0
scipy>=1.4.1
torch>=1.7.0
torchvision>=0.8.1
tqdm>=4.41.0
# Logging -------------------------------------
tensorboard>=2.4.1
# wandb
# Plotting ------------------------------------
pandas>=1.1.4
seaborn>=0.11.0
# Export --------------------------------------
# coremltools>=4.1 # CoreML export
# onnx>=1.9.0 # ONNX export
# onnx-simplifier>=0.3.6 # ONNX simplifier
# scikit-learn==0.19.2 # CoreML quantization
# tensorflow>=2.4.1 # TFLite export
# tensorflowjs>=3.9.0 # TF.js export
# openvino-dev # OpenVINO export
# Extras --------------------------------------
# albumentations>=1.0.3
# Cython # for pycocotools https://github.com/cocodataset/cocoapi/issues/172
# pycocotools>=2.0 # COCO mAP
# roboflow
thop # FLOPs computation

@ -0,0 +1,59 @@
# Project-wide configuration file, can be used for package metadata and other toll configurations
# Example usage: global configuration for PEP8 (via flake8) setting or default pytest arguments
# Local usage: pip install pre-commit, pre-commit run --all-files
[metadata]
license_file = LICENSE
description_file = README.md
[tool:pytest]
norecursedirs =
.git
dist
build
addopts =
--doctest-modules
--durations=25
--color=yes
[flake8]
max-line-length = 120
exclude = .tox,*.egg,build,temp
select = E,W,F
doctests = True
verbose = 2
# https://pep8.readthedocs.io/en/latest/intro.html#error-codes
format = pylint
# see: https://www.flake8rules.com/
ignore =
E731 # Do not assign a lambda expression, use a def
F405 # name may be undefined, or defined from star imports: module
E402 # module level import not at top of file
F401 # module imported but unused
W504 # line break after binary operator
E127 # continuation line over-indented for visual indent
W504 # line break after binary operator
E231 # missing whitespace after ,, ;, or :
E501 # line too long
F403 # from module import * used; unable to detect undefined names
[isort]
# https://pycqa.github.io/isort/docs/configuration/options.html
line_length = 120
# see: https://pycqa.github.io/isort/docs/configuration/multi_line_output_modes.html
multi_line_output = 0
[yapf]
based_on_style = pep8
spaces_before_comment = 2
COLUMN_LIMIT = 120
COALESCE_BRACKETS = True
SPACES_AROUND_POWER_OPERATOR = True
SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET = False
SPLIT_BEFORE_CLOSING_BRACKET = False
SPLIT_BEFORE_FIRST_ARGUMENT = False
# EACH_DICT_ENTRY_ON_SEPARATE_LINE = False

@ -0,0 +1,670 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
Train a YOLOv5 model on a custom dataset.
Models and datasets download automatically from the latest YOLOv5 release.
Models: https://github.com/ultralytics/yolov5/tree/master/models
Datasets: https://github.com/ultralytics/yolov5/tree/master/data
Tutorial: https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data
Usage:
$ python path/to/train.py --data coco128.yaml --weights yolov5s.pt --img 640 # from pretrained (RECOMMENDED)
$ python path/to/train.py --data coco128.yaml --weights '' --cfg yolov5s.yaml --img 640 # from scratch
"""
import argparse
import math
import os
import random
import sys
import time
from copy import deepcopy
from datetime import datetime
from pathlib import Path
import numpy as np
import torch
import torch.distributed as dist
import torch.nn as nn
import yaml
from torch.cuda import amp
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.optim import SGD, Adam, AdamW, lr_scheduler
from tqdm.auto import tqdm
FILE = Path(__file__).resolve()
ROOT = FILE.parents[0] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
import val # for end-of-epoch mAP
from models.experimental import attempt_load
from models.yolo import Model
from utils.autoanchor import check_anchors
from utils.autobatch import check_train_batch_size
from utils.callbacks import Callbacks
from utils.datasets import create_dataloader
from utils.downloads import attempt_download
from utils.general import (LOGGER, check_dataset, check_file, check_git_status, check_img_size, check_requirements,
check_suffix, check_yaml, colorstr, get_latest_run, increment_path, init_seeds,
intersect_dicts, is_ascii, labels_to_class_weights, labels_to_image_weights, methods,
one_cycle, print_args, print_mutation, strip_optimizer)
from utils.loggers import Loggers
from utils.loggers.wandb.wandb_utils import check_wandb_resume
from utils.loss import ComputeLoss
from utils.metrics import fitness
from utils.plots import check_font, plot_evolve, plot_labels
from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first
LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html
RANK = int(os.getenv('RANK', -1))
WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictionary
save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze = \
Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \
opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze
callbacks.run('on_pretrain_routine_start')
# Directories
w = save_dir / 'weights' # weights dir
(w.parent if evolve else w).mkdir(parents=True, exist_ok=True) # make dir
last, best = w / 'last.pt', w / 'best.pt'
# Hyperparameters
if isinstance(hyp, str):
with open(hyp, encoding='utf-8') as f:
hyp = yaml.safe_load(f) # load hyps dict
LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items()))
# Save run settings
if not evolve:
with open(save_dir / 'hyp.yaml', 'w') as f:
yaml.safe_dump(hyp, f, sort_keys=False)
with open(save_dir / 'opt.yaml', 'w') as f:
yaml.safe_dump(vars(opt), f, sort_keys=False)
# Loggers
data_dict = None
if RANK in [-1, 0]:
loggers = Loggers(save_dir, weights, opt, hyp, LOGGER) # loggers instance
if loggers.wandb:
data_dict = loggers.wandb.data_dict
if resume:
weights, epochs, hyp, batch_size = opt.weights, opt.epochs, opt.hyp, opt.batch_size
# Register actions
for k in methods(loggers):
callbacks.register_action(k, callback=getattr(loggers, k))
# Config
plots = not evolve and not opt.noplots # create plots
cuda = device.type != 'cpu'
init_seeds(1 + RANK)
with torch_distributed_zero_first(LOCAL_RANK):
data_dict = data_dict or check_dataset(data) # check if None
if not is_ascii(data_dict['names']): # non-latin labels, i.e. asian, arabic, cyrillic
check_font('Arial.Unicode.ttf', progress=True)
train_path, val_path = data_dict['train'], data_dict['val']
nc = 1 if single_cls else int(data_dict['nc']) # number of classes
names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names
assert len(names) == nc, f'{len(names)} names found for nc={nc} dataset in {data}' # check
is_coco = isinstance(val_path, str) and val_path.endswith('coco/val2017.txt') # COCO dataset
# Model
check_suffix(weights, '.pt') # check weights
pretrained = weights.endswith('.pt')
if pretrained:
with torch_distributed_zero_first(LOCAL_RANK):
weights = attempt_download(weights) # download if not found locally
ckpt = torch.load(weights, map_location='cpu') # load checkpoint to CPU to avoid CUDA memory leak
model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create
exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else [] # exclude keys
csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32
csd = intersect_dicts(csd, model.state_dict(), exclude=exclude) # intersect
model.load_state_dict(csd, strict=False) # load
LOGGER.info(f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}') # report
else:
model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create
# Freeze
freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))] # layers to freeze
for k, v in model.named_parameters():
v.requires_grad = True # train all layers
if any(x in k for x in freeze):
LOGGER.info(f'freezing {k}')
v.requires_grad = False
# Image size
gs = max(int(model.stride.max()), 32) # grid size (max stride)
imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2) # verify imgsz is gs-multiple
# Batch size
if RANK == -1 and batch_size == -1: # single-GPU only, estimate best batch size
batch_size = check_train_batch_size(model, imgsz)
loggers.on_params_update({"batch_size": batch_size})
# Optimizer
nbs = 64 # nominal batch size
accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing
hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay
LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}")
g = [], [], [] # optimizer parameter groups
bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k) # normalization layers, i.e. BatchNorm2d()
for v in model.modules():
if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): # bias
g[2].append(v.bias)
if isinstance(v, bn): # weight (no decay)
g[1].append(v.weight)
elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): # weight (with decay)
g[0].append(v.weight)
if opt.optimizer == 'Adam':
optimizer = Adam(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum
elif opt.optimizer == 'AdamW':
optimizer = AdamW(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum
else:
optimizer = SGD(g[2], lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
optimizer.add_param_group({'params': g[0], 'weight_decay': hyp['weight_decay']}) # add g0 with weight_decay
optimizer.add_param_group({'params': g[1]}) # add g1 (BatchNorm2d weights)
LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups "
f"{len(g[1])} weight (no decay), {len(g[0])} weight, {len(g[2])} bias")
del g
# Scheduler
if opt.cos_lr:
lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf']
else:
lf = lambda x: (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs)
# EMA
ema = ModelEMA(model) if RANK in [-1, 0] else None
# Resume
start_epoch, best_fitness = 0, 0.0
if pretrained:
# Optimizer
if ckpt['optimizer'] is not None:
optimizer.load_state_dict(ckpt['optimizer'])
best_fitness = ckpt['best_fitness']
# EMA
if ema and ckpt.get('ema'):
ema.ema.load_state_dict(ckpt['ema'].float().state_dict())
ema.updates = ckpt['updates']
# Epochs
start_epoch = ckpt['epoch'] + 1
if resume:
assert start_epoch > 0, f'{weights} training to {epochs} epochs is finished, nothing to resume.'
if epochs < start_epoch:
LOGGER.info(f"{weights} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {epochs} more epochs.")
epochs += ckpt['epoch'] # finetune additional epochs
del ckpt, csd
# DP mode
if cuda and RANK == -1 and torch.cuda.device_count() > 1:
LOGGER.warning('WARNING: DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.\n'
'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.')
model = torch.nn.DataParallel(model)
# SyncBatchNorm
if opt.sync_bn and cuda and RANK != -1:
model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
LOGGER.info('Using SyncBatchNorm()')
# Trainloader
train_loader, dataset = create_dataloader(train_path,
imgsz,
batch_size // WORLD_SIZE,
gs,
single_cls,
hyp=hyp,
augment=True,
cache=None if opt.cache == 'val' else opt.cache,
rect=opt.rect,
rank=LOCAL_RANK,
workers=workers,
image_weights=opt.image_weights,
quad=opt.quad,
prefix=colorstr('train: '),
shuffle=True)
mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max()) # max label class
nb = len(train_loader) # number of batches
assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}'
# Process 0
if RANK in [-1, 0]:
val_loader = create_dataloader(val_path,
imgsz,
batch_size // WORLD_SIZE * 2,
gs,
single_cls,
hyp=hyp,
cache=None if noval else opt.cache,
rect=True,
rank=-1,
workers=workers * 2,
pad=0.5,
prefix=colorstr('val: '))[0]
if not resume:
labels = np.concatenate(dataset.labels, 0)
# c = torch.tensor(labels[:, 0]) # classes
# cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency
# model._initialize_biases(cf.to(device))
if plots:
plot_labels(labels, names, save_dir)
# Anchors
if not opt.noautoanchor:
check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)
model.half().float() # pre-reduce anchor precision
callbacks.run('on_pretrain_routine_end')
# DDP mode
if cuda and RANK != -1:
model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK)
# Model attributes
nl = de_parallel(model).model[-1].nl # number of detection layers (to scale hyps)
hyp['box'] *= 3 / nl # scale to layers
hyp['cls'] *= nc / 80 * 3 / nl # scale to classes and layers
hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl # scale to image size and layers
hyp['label_smoothing'] = opt.label_smoothing
model.nc = nc # attach number of classes to model
model.hyp = hyp # attach hyperparameters to model
model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc # attach class weights
model.names = names
# Start training
t0 = time.time()
nw = max(round(hyp['warmup_epochs'] * nb), 100) # number of warmup iterations, max(3 epochs, 100 iterations)
# nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training
last_opt_step = -1
maps = np.zeros(nc) # mAP per class
results = (0, 0, 0, 0, 0, 0, 0) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
scheduler.last_epoch = start_epoch - 1 # do not move
scaler = amp.GradScaler(enabled=cuda)
stopper = EarlyStopping(patience=opt.patience)
compute_loss = ComputeLoss(model) # init loss class
callbacks.run('on_train_start')
LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n'
f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n'
f"Logging results to {colorstr('bold', save_dir)}\n"
f'Starting training for {epochs} epochs...')
for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------
callbacks.run('on_train_epoch_start')
model.train()
# Update image weights (optional, single-GPU only)
if opt.image_weights:
cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc # class weights
iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights
dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx
# Update mosaic border (optional)
# b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
# dataset.mosaic_border = [b - imgsz, -b] # height, width borders
mloss = torch.zeros(3, device=device) # mean losses
if RANK != -1:
train_loader.sampler.set_epoch(epoch)
pbar = enumerate(train_loader)
LOGGER.info(('\n' + '%10s' * 7) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'labels', 'img_size'))
if RANK in (-1, 0):
pbar = tqdm(pbar, total=nb, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar
optimizer.zero_grad()
for i, (imgs, targets, paths, _) in pbar: # batch -------------------------------------------------------------
callbacks.run('on_train_batch_start')
ni = i + nb * epoch # number integrated batches (since train start)
imgs = imgs.to(device, non_blocking=True).float() / 255 # uint8 to float32, 0-255 to 0.0-1.0
# Warmup
if ni <= nw:
xi = [0, nw] # x interp
# compute_loss.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou)
accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round())
for j, x in enumerate(optimizer.param_groups):
# bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
if 'momentum' in x:
x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']])
# Multi-scale
if opt.multi_scale:
sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size
sf = sz / max(imgs.shape[2:]) # scale factor
if sf != 1:
ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple)
imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
# Forward
with amp.autocast(enabled=cuda):
pred = model(imgs) # forward
loss, loss_items = compute_loss(pred, targets.to(device)) # loss scaled by batch_size
if RANK != -1:
loss *= WORLD_SIZE # gradient averaged between devices in DDP mode
if opt.quad:
loss *= 4.
# Backward
scaler.scale(loss).backward()
# Optimize
if ni - last_opt_step >= accumulate:
scaler.step(optimizer) # optimizer.step
scaler.update()
optimizer.zero_grad()
if ema:
ema.update(model)
last_opt_step = ni
# Log
if RANK in (-1, 0):
mloss = (mloss * i + loss_items) / (i + 1) # update mean losses
mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G' # (GB)
pbar.set_description(('%10s' * 2 + '%10.4g' * 5) %
(f'{epoch}/{epochs - 1}', mem, *mloss, targets.shape[0], imgs.shape[-1]))
callbacks.run('on_train_batch_end', ni, model, imgs, targets, paths, plots)
if callbacks.stop_training:
return
# end batch ------------------------------------------------------------------------------------------------
# Scheduler
lr = [x['lr'] for x in optimizer.param_groups] # for loggers
scheduler.step()
if RANK in (-1, 0):
# mAP
callbacks.run('on_train_epoch_end', epoch=epoch)
ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights'])
final_epoch = (epoch + 1 == epochs) or stopper.possible_stop
if not noval or final_epoch: # Calculate mAP
results, maps, _ = val.run(data_dict,
batch_size=batch_size // WORLD_SIZE * 2,
imgsz=imgsz,
model=ema.ema,
single_cls=single_cls,
dataloader=val_loader,
save_dir=save_dir,
plots=False,
callbacks=callbacks,
compute_loss=compute_loss)
# Update best mAP
fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
if fi > best_fitness:
best_fitness = fi
log_vals = list(mloss) + list(results) + lr
callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi)
# Save model
if (not nosave) or (final_epoch and not evolve): # if save
ckpt = {
'epoch': epoch,
'best_fitness': best_fitness,
'model': deepcopy(de_parallel(model)).half(),
'ema': deepcopy(ema.ema).half(),
'updates': ema.updates,
'optimizer': optimizer.state_dict(),
'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None,
'date': datetime.now().isoformat()}
# Save last, best and delete
torch.save(ckpt, last)
if best_fitness == fi:
torch.save(ckpt, best)
if (epoch > 0) and (opt.save_period > 0) and (epoch % opt.save_period == 0):
torch.save(ckpt, w / f'epoch{epoch}.pt')
del ckpt
callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi)
# Stop Single-GPU
if RANK == -1 and stopper(epoch=epoch, fitness=fi):
break
# Stop DDP TODO: known issues shttps://github.com/ultralytics/yolov5/pull/4576
# stop = stopper(epoch=epoch, fitness=fi)
# if RANK == 0:
# dist.broadcast_object_list([stop], 0) # broadcast 'stop' to all ranks
# Stop DPP
# with torch_distributed_zero_first(RANK):
# if stop:
# break # must break all DDP ranks
# end epoch ----------------------------------------------------------------------------------------------------
# end training -----------------------------------------------------------------------------------------------------
if RANK in (-1, 0):
LOGGER.info(f'\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.')
for f in last, best:
if f.exists():
strip_optimizer(f) # strip optimizers
if f is best:
LOGGER.info(f'\nValidating {f}...')
results, _, _ = val.run(
data_dict,
batch_size=batch_size // WORLD_SIZE * 2,
imgsz=imgsz,
model=attempt_load(f, device).half(),
iou_thres=0.65 if is_coco else 0.60, # best pycocotools results at 0.65
single_cls=single_cls,
dataloader=val_loader,
save_dir=save_dir,
save_json=is_coco,
verbose=True,
plots=plots,
callbacks=callbacks,
compute_loss=compute_loss) # val best model with plots
if is_coco:
callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi)
callbacks.run('on_train_end', last, best, plots, epoch, results)
LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}")
torch.cuda.empty_cache()
return results
def parse_opt(known=False):
parser = argparse.ArgumentParser()
parser.add_argument('--weights', type=str, default='yolov5s.pt', help='initial weights path')
parser.add_argument('--cfg', type=str, default='models/myWeight/yolov5s-test.yaml', help='model.yaml path')
parser.add_argument('--data', type=str, default='mydata/mydata.yaml', help='dataset.yaml path')
parser.add_argument('--hyp', type=str, default='data/hyps/hyp.scratch-low.yaml', help='hyperparameters path')
parser.add_argument('--epochs', type=int, default=120)
parser.add_argument('--batch-size', type=int, default=20, help='total batch size for all GPUs, -1 for autobatch')
parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
parser.add_argument('--rect', action='store_true', help='rectangular training')
parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
parser.add_argument('--noval', action='store_true', help='only validate final epoch')
parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor')
parser.add_argument('--noplots', action='store_true', help='save no plot files')
parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')
parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"')
parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer')
parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
parser.add_argument('--workers', type=int, default=0, help='max dataloader workers (per RANK in DDP mode)')
parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name')
parser.add_argument('--name', default='exp', help='save to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--quad', action='store_true', help='quad dataloader')
parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler')
parser.add_argument('--label-smoothing', type=float, default=0.1, help='Label smoothing epsilon')
parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)')
parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2')
parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')
# Weights & Biases arguments
parser.add_argument('--entity', default=None, help='W&B: Entity')
parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option')
parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval')
parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use')
opt = parser.parse_known_args()[0] if known else parser.parse_args()
return opt
def main(opt, callbacks=Callbacks()):
# Checks
if RANK in (-1, 0):
print_args(vars(opt))
check_git_status()
check_requirements(exclude=['thop'])
# Resume
if opt.resume and not check_wandb_resume(opt) and not opt.evolve: # resume an interrupted run
ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run() # specified or most recent path
assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist'
with open(Path(ckpt).parent.parent / 'opt.yaml', errors='ignore') as f:
opt = argparse.Namespace(**yaml.safe_load(f)) # replace
opt.cfg, opt.weights, opt.resume = '', ckpt, True # reinstate
LOGGER.info(f'Resuming training from {ckpt}')
else:
opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = \
check_file(opt.data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights), str(opt.project) # checks
assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified'
if opt.evolve:
if opt.project == str(ROOT / 'runs/train'): # if default project name, rename to runs/evolve
opt.project = str(ROOT / 'runs/evolve')
opt.exist_ok, opt.resume = opt.resume, False # pass resume to exist_ok and disable resume
if opt.name == 'cfg':
opt.name = Path(opt.cfg).stem # use model.yaml as name
opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))
# DDP mode
device = select_device(opt.device, batch_size=opt.batch_size)
if LOCAL_RANK != -1:
msg = 'is not compatible with YOLOv5 Multi-GPU DDP training'
assert not opt.image_weights, f'--image-weights {msg}'
assert not opt.evolve, f'--evolve {msg}'
assert opt.batch_size != -1, f'AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size'
assert opt.batch_size % WORLD_SIZE == 0, f'--batch-size {opt.batch_size} must be multiple of WORLD_SIZE'
assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command'
torch.cuda.set_device(LOCAL_RANK)
device = torch.device('cuda', LOCAL_RANK)
dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo")
# Train
if not opt.evolve:
train(opt.hyp, opt, device, callbacks)
if WORLD_SIZE > 1 and RANK == 0:
LOGGER.info('Destroying process group... ')
dist.destroy_process_group()
# Evolve hyperparameters (optional)
else:
# Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
meta = {
'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3)
'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf)
'momentum': (0.3, 0.6, 0.98), # SGD momentum/Adam beta1
'weight_decay': (1, 0.0, 0.001), # optimizer weight decay
'warmup_epochs': (1, 0.0, 5.0), # warmup epochs (fractions ok)
'warmup_momentum': (1, 0.0, 0.95), # warmup initial momentum
'warmup_bias_lr': (1, 0.0, 0.2), # warmup initial bias lr
'box': (1, 0.02, 0.2), # box loss gain
'cls': (1, 0.2, 4.0), # cls loss gain
'cls_pw': (1, 0.5, 2.0), # cls BCELoss positive_weight
'obj': (1, 0.2, 4.0), # obj loss gain (scale with pixels)
'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight
'iou_t': (0, 0.1, 0.7), # IoU training threshold
'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold
'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore)
'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5)
'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction)
'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction)
'hsv_v': (1, 0.0, 0.9), # image HSV-Value augmentation (fraction)
'degrees': (1, 0.0, 45.0), # image rotation (+/- deg)
'translate': (1, 0.0, 0.9), # image translation (+/- fraction)
'scale': (1, 0.0, 0.9), # image scale (+/- gain)
'shear': (1, 0.0, 10.0), # image shear (+/- deg)
'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001
'flipud': (1, 0.0, 1.0), # image flip up-down (probability)
'fliplr': (0, 0.0, 1.0), # image flip left-right (probability)
'mosaic': (1, 0.0, 1.0), # image mixup (probability)
'mixup': (1, 0.0, 1.0), # image mixup (probability)
'copy_paste': (1, 0.0, 1.0)} # segment copy-paste (probability)
with open(opt.hyp, errors='ignore') as f:
hyp = yaml.safe_load(f) # load hyps dict
if 'anchors' not in hyp: # anchors commented in hyp.yaml
hyp['anchors'] = 3
opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir) # only val/save final epoch
# ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices
evolve_yaml, evolve_csv = save_dir / 'hyp_evolve.yaml', save_dir / 'evolve.csv'
if opt.bucket:
os.system(f'gsutil cp gs://{opt.bucket}/evolve.csv {evolve_csv}') # download evolve.csv if exists
for _ in range(opt.evolve): # generations to evolve
if evolve_csv.exists(): # if evolve.csv exists: select best hyps and mutate
# Select parent(s)
parent = 'single' # parent selection method: 'single' or 'weighted'
x = np.loadtxt(evolve_csv, ndmin=2, delimiter=',', skiprows=1)
n = min(5, len(x)) # number of previous results to consider
x = x[np.argsort(-fitness(x))][:n] # top n mutations
w = fitness(x) - fitness(x).min() + 1E-6 # weights (sum > 0)
if parent == 'single' or len(x) == 1:
# x = x[random.randint(0, n - 1)] # random selection
x = x[random.choices(range(n), weights=w)[0]] # weighted selection
elif parent == 'weighted':
x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination
# Mutate
mp, s = 0.8, 0.2 # mutation probability, sigma
npr = np.random
npr.seed(int(time.time()))
g = np.array([meta[k][0] for k in hyp.keys()]) # gains 0-1
ng = len(meta)
v = np.ones(ng)
while all(v == 1): # mutate until a change occurs (prevent duplicates)
v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0)
for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300)
hyp[k] = float(x[i + 7] * v[i]) # mutate
# Constrain to limits
for k, v in meta.items():
hyp[k] = max(hyp[k], v[1]) # lower limit
hyp[k] = min(hyp[k], v[2]) # upper limit
hyp[k] = round(hyp[k], 5) # significant digits
# Train mutation
results = train(hyp.copy(), opt, device, callbacks)
callbacks = Callbacks()
# Write mutation results
print_mutation(results, hyp.copy(), save_dir, opt.bucket)
# Plot results
plot_evolve(evolve_csv)
LOGGER.info(f'Hyperparameter evolution finished {opt.evolve} generations\n'
f"Results saved to {colorstr('bold', save_dir)}\n"
f'Usage example: $ python train.py --hyp {evolve_yaml}')
def run(**kwargs):
# Usage: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolov5m.pt')
opt = parse_opt(True)
for k, v in kwargs.items():
setattr(opt, k, v)
main(opt)
return opt
if __name__ == "__main__":
opt = parse_opt()
main(opt)

1102
tutorial.ipynb vendored

File diff suppressed because it is too large Load Diff

827
ui.py

@ -0,0 +1,827 @@
# -*- coding: utf-8 -*-
# Form implementation generated from reading ui file 'ui.ui'
#
# Created by: PyQt5 UI code generator 5.15.4
#
# WARNING: Any manual changes made to this file will be lost when pyuic5 is
# run again. Do not edit this file unless you know what you are doing.
from PyQt5 import QtCore, QtGui, QtWidgets
class Ui_MainWindow(object):
def setupUi(self, MainWindow):
MainWindow.setObjectName("MainWindow")
MainWindow.resize(1077, 761)
MainWindow.setMinimumSize(QtCore.QSize(1077, 761))
self.centralwidget = QtWidgets.QWidget(MainWindow)
self.centralwidget.setObjectName("centralwidget")
self.tabWidget = QtWidgets.QTabWidget(self.centralwidget)
self.tabWidget.setEnabled(True)
self.tabWidget.setGeometry(QtCore.QRect(0, 0, 1080, 718))
self.tabWidget.setMinimumSize(QtCore.QSize(1080, 718))
self.tabWidget.setFocusPolicy(QtCore.Qt.TabFocus)
self.tabWidget.setContextMenuPolicy(QtCore.Qt.DefaultContextMenu)
self.tabWidget.setIconSize(QtCore.QSize(20, 20))
self.tabWidget.setObjectName("tabWidget")
self.tab = QtWidgets.QWidget()
self.tab.setObjectName("tab")
self.pushButton_3 = QtWidgets.QPushButton(self.tab)
self.pushButton_3.setGeometry(QtCore.QRect(150, 570, 93, 28))
self.pushButton_3.setObjectName("pushButton_3")
self.label_2 = QtWidgets.QLabel(self.tab)
self.label_2.setGeometry(QtCore.QRect(850, 320, 16, 21))
self.label_2.setObjectName("label_2")
self.label = QtWidgets.QLabel(self.tab)
self.label.setGeometry(QtCore.QRect(750, 320, 21, 21))
self.label.setObjectName("label")
self.lineEdit_4 = QtWidgets.QLineEdit(self.tab)
self.lineEdit_4.setGeometry(QtCore.QRect(880, 370, 41, 20))
self.lineEdit_4.setObjectName("lineEdit_4")
self.label_3 = QtWidgets.QLabel(self.tab)
self.label_3.setGeometry(QtCore.QRect(850, 370, 16, 21))
self.label_3.setObjectName("label_3")
self.pushButton_5 = QtWidgets.QPushButton(self.tab)
self.pushButton_5.setGeometry(QtCore.QRect(150, 610, 93, 28))
self.pushButton_5.setObjectName("pushButton_5")
self.lineEdit_9 = QtWidgets.QLineEdit(self.tab)
self.lineEdit_9.setGeometry(QtCore.QRect(810, 430, 91, 21))
self.lineEdit_9.setObjectName("lineEdit_9")
self.pushButton = QtWidgets.QPushButton(self.tab)
self.pushButton.setGeometry(QtCore.QRect(730, 20, 93, 28))
self.pushButton.setObjectName("pushButton")
self.lineEdit_2 = QtWidgets.QLineEdit(self.tab)
self.lineEdit_2.setGeometry(QtCore.QRect(880, 320, 41, 20))
self.lineEdit_2.setObjectName("lineEdit_2")
self.label_4 = QtWidgets.QLabel(self.tab)
self.label_4.setGeometry(QtCore.QRect(750, 370, 21, 21))
self.label_4.setObjectName("label_4")
self.lineEdit_10 = QtWidgets.QLineEdit(self.tab)
self.lineEdit_10.setGeometry(QtCore.QRect(810, 510, 91, 21))
self.lineEdit_10.setObjectName("lineEdit_10")
self.lineEdit = QtWidgets.QLineEdit(self.tab)
self.lineEdit.setGeometry(QtCore.QRect(790, 320, 41, 21))
self.lineEdit.setObjectName("lineEdit")
self.lineEdit_3 = QtWidgets.QLineEdit(self.tab)
self.lineEdit_3.setGeometry(QtCore.QRect(790, 370, 41, 21))
self.lineEdit_3.setObjectName("lineEdit_3")
self.label_7 = QtWidgets.QLabel(self.tab)
self.label_7.setGeometry(QtCore.QRect(830, 30, 31, 16))
self.label_7.setObjectName("label_7")
self.label_8 = QtWidgets.QLabel(self.tab)
self.label_8.setGeometry(QtCore.QRect(810, 130, 71, 16))
self.label_8.setObjectName("label_8")
self.textBrowser = QtWidgets.QTextBrowser(self.tab)
self.textBrowser.setGeometry(QtCore.QRect(900, 20, 141, 31))
self.textBrowser.setObjectName("textBrowser")
self.textBrowser_3 = QtWidgets.QTextBrowser(self.tab)
self.textBrowser_3.setGeometry(QtCore.QRect(900, 120, 141, 31))
self.textBrowser_3.setObjectName("textBrowser_3")
self.label_10 = QtWidgets.QLabel(self.tab)
self.label_10.setEnabled(True)
self.label_10.setGeometry(QtCore.QRect(10, 10, 721, 501))
self.label_10.setCursor(QtGui.QCursor(QtCore.Qt.CrossCursor))
self.label_10.setMouseTracking(False)
self.label_10.setAutoFillBackground(True)
self.label_10.setFrameShape(QtWidgets.QFrame.Box)
self.label_10.setFrameShadow(QtWidgets.QFrame.Sunken)
self.label_10.setText("")
self.label_10.setObjectName("label_10")
self.label_9 = QtWidgets.QLabel(self.tab)
self.label_9.setGeometry(QtCore.QRect(750, 300, 121, 16))
self.label_9.setObjectName("label_9")
self.pushButton_6 = QtWidgets.QPushButton(self.tab)
self.pushButton_6.setGeometry(QtCore.QRect(950, 320, 93, 28))
self.pushButton_6.setObjectName("pushButton_6")
self.label_11 = QtWidgets.QLabel(self.tab)
self.label_11.setGeometry(QtCore.QRect(750, 350, 151, 16))
self.label_11.setObjectName("label_11")
self.pushButton_7 = QtWidgets.QPushButton(self.tab)
self.pushButton_7.setGeometry(QtCore.QRect(950, 370, 93, 28))
self.pushButton_7.setObjectName("pushButton_7")
self.label_12 = QtWidgets.QLabel(self.tab)
self.label_12.setGeometry(QtCore.QRect(750, 400, 121, 16))
self.label_12.setObjectName("label_12")
self.label_13 = QtWidgets.QLabel(self.tab)
self.label_13.setGeometry(QtCore.QRect(750, 470, 101, 16))
self.label_13.setObjectName("label_13")
self.pushButton_8 = QtWidgets.QPushButton(self.tab)
self.pushButton_8.setGeometry(QtCore.QRect(950, 420, 93, 28))
self.pushButton_8.setObjectName("pushButton_8")
self.pushButton_9 = QtWidgets.QPushButton(self.tab)
self.pushButton_9.setGeometry(QtCore.QRect(950, 510, 93, 28))
self.pushButton_9.setObjectName("pushButton_9")
self.textBrowser_4 = QtWidgets.QTextBrowser(self.tab)
self.textBrowser_4.setGeometry(QtCore.QRect(900, 70, 141, 31))
self.textBrowser_4.setObjectName("textBrowser_4")
self.label_15 = QtWidgets.QLabel(self.tab)
self.label_15.setGeometry(QtCore.QRect(830, 80, 31, 16))
self.label_15.setObjectName("label_15")
self.pushButton_2 = QtWidgets.QPushButton(self.tab)
self.pushButton_2.setGeometry(QtCore.QRect(30, 570, 93, 28))
self.pushButton_2.setObjectName("pushButton_2")
self.pushButton_4 = QtWidgets.QPushButton(self.tab)
self.pushButton_4.setGeometry(QtCore.QRect(30, 610, 93, 28))
self.pushButton_4.setObjectName("pushButton_4")
self.label_5 = QtWidgets.QLabel(self.tab)
self.label_5.setGeometry(QtCore.QRect(740, 540, 141, 16))
self.label_5.setObjectName("label_5")
self.lineEdit_5 = QtWidgets.QLineEdit(self.tab)
self.lineEdit_5.setGeometry(QtCore.QRect(770, 570, 51, 21))
self.lineEdit_5.setObjectName("lineEdit_5")
self.lineEdit_6 = QtWidgets.QLineEdit(self.tab)
self.lineEdit_6.setGeometry(QtCore.QRect(870, 570, 51, 21))
self.lineEdit_6.setObjectName("lineEdit_6")
self.lineEdit_7 = QtWidgets.QLineEdit(self.tab)
self.lineEdit_7.setGeometry(QtCore.QRect(770, 610, 51, 21))
self.lineEdit_7.setObjectName("lineEdit_7")
self.lineEdit_8 = QtWidgets.QLineEdit(self.tab)
self.lineEdit_8.setGeometry(QtCore.QRect(870, 610, 51, 21))
self.lineEdit_8.setObjectName("lineEdit_8")
self.label_6 = QtWidgets.QLabel(self.tab)
self.label_6.setGeometry(QtCore.QRect(740, 570, 16, 15))
self.label_6.setObjectName("label_6")
self.label_14 = QtWidgets.QLabel(self.tab)
self.label_14.setGeometry(QtCore.QRect(840, 570, 16, 16))
self.label_14.setObjectName("label_14")
self.label_64 = QtWidgets.QLabel(self.tab)
self.label_64.setGeometry(QtCore.QRect(740, 610, 16, 15))
self.label_64.setObjectName("label_64")
self.label_65 = QtWidgets.QLabel(self.tab)
self.label_65.setGeometry(QtCore.QRect(840, 610, 21, 16))
self.label_65.setObjectName("label_65")
self.pushButton_10 = QtWidgets.QPushButton(self.tab)
self.pushButton_10.setGeometry(QtCore.QRect(950, 610, 93, 28))
self.pushButton_10.setObjectName("pushButton_10")
self.label_66 = QtWidgets.QLabel(self.tab)
self.label_66.setGeometry(QtCore.QRect(740, 650, 21, 16))
self.label_66.setObjectName("label_66")
self.label_67 = QtWidgets.QLabel(self.tab)
self.label_67.setGeometry(QtCore.QRect(840, 650, 21, 16))
self.label_67.setObjectName("label_67")
self.lineEdit_43 = QtWidgets.QLineEdit(self.tab)
self.lineEdit_43.setGeometry(QtCore.QRect(770, 650, 51, 21))
self.lineEdit_43.setObjectName("lineEdit_43")
self.lineEdit_44 = QtWidgets.QLineEdit(self.tab)
self.lineEdit_44.setGeometry(QtCore.QRect(870, 650, 51, 21))
self.lineEdit_44.setObjectName("lineEdit_44")
self.tabWidget.addTab(self.tab, "")
self.tab_2 = QtWidgets.QWidget()
self.tab_2.setObjectName("tab_2")
self.label_16 = QtWidgets.QLabel(self.tab_2)
self.label_16.setGeometry(QtCore.QRect(500, 20, 550, 360))
self.label_16.setCursor(QtGui.QCursor(QtCore.Qt.CrossCursor))
self.label_16.setFrameShape(QtWidgets.QFrame.Box)
self.label_16.setFrameShadow(QtWidgets.QFrame.Sunken)
self.label_16.setText("")
self.label_16.setObjectName("label_16")
self.label_18 = QtWidgets.QLabel(self.tab_2)
self.label_18.setGeometry(QtCore.QRect(550, 390, 471, 291))
self.label_18.setFrameShape(QtWidgets.QFrame.NoFrame)
self.label_18.setFrameShadow(QtWidgets.QFrame.Sunken)
self.label_18.setTextFormat(QtCore.Qt.AutoText)
self.label_18.setObjectName("label_18")
self.textBrowser_2 = QtWidgets.QTextBrowser(self.tab_2)
self.textBrowser_2.setGeometry(QtCore.QRect(90, 260, 141, 31))
self.textBrowser_2.setObjectName("textBrowser_2")
self.pushButton_11 = QtWidgets.QPushButton(self.tab_2)
self.pushButton_11.setGeometry(QtCore.QRect(20, 40, 93, 28))
self.pushButton_11.setObjectName("pushButton_11")
self.pushButton_12 = QtWidgets.QPushButton(self.tab_2)
self.pushButton_12.setGeometry(QtCore.QRect(340, 110, 93, 28))
self.pushButton_12.setObjectName("pushButton_12")
self.lineEdit9 = QtWidgets.QLineEdit(self.tab_2)
self.lineEdit9.setGeometry(QtCore.QRect(50, 150, 71, 31))
self.lineEdit9.setObjectName("lineEdit9")
self.lineEdit10 = QtWidgets.QLineEdit(self.tab_2)
self.lineEdit10.setGeometry(QtCore.QRect(140, 150, 71, 31))
self.lineEdit10.setObjectName("lineEdit10")
self.pushButton_14 = QtWidgets.QPushButton(self.tab_2)
self.pushButton_14.setGeometry(QtCore.QRect(60, 400, 93, 28))
self.pushButton_14.setObjectName("pushButton_14")
self.pushButton_15 = QtWidgets.QPushButton(self.tab_2)
self.pushButton_15.setGeometry(QtCore.QRect(60, 460, 93, 28))
self.pushButton_15.setObjectName("pushButton_15")
self.pushButton_16 = QtWidgets.QPushButton(self.tab_2)
self.pushButton_16.setGeometry(QtCore.QRect(190, 400, 93, 28))
self.pushButton_16.setObjectName("pushButton_16")
self.pushButton_17 = QtWidgets.QPushButton(self.tab_2)
self.pushButton_17.setGeometry(QtCore.QRect(190, 460, 93, 28))
self.pushButton_17.setObjectName("pushButton_17")
self.lineEdit_15 = QtWidgets.QLineEdit(self.tab_2)
self.lineEdit_15.setGeometry(QtCore.QRect(50, 190, 71, 31))
self.lineEdit_15.setObjectName("lineEdit_15")
self.lineEdit_16 = QtWidgets.QLineEdit(self.tab_2)
self.lineEdit_16.setGeometry(QtCore.QRect(140, 190, 71, 31))
self.lineEdit_16.setObjectName("lineEdit_16")
self.pushButton_18 = QtWidgets.QPushButton(self.tab_2)
self.pushButton_18.setGeometry(QtCore.QRect(340, 280, 93, 28))
self.pushButton_18.setObjectName("pushButton_18")
self.label_19 = QtWidgets.QLabel(self.tab_2)
self.label_19.setGeometry(QtCore.QRect(50, 270, 41, 16))
self.label_19.setObjectName("label_19")
self.label_21 = QtWidgets.QLabel(self.tab_2)
self.label_21.setGeometry(QtCore.QRect(20, 150, 21, 16))
self.label_21.setObjectName("label_21")
self.label_22 = QtWidgets.QLabel(self.tab_2)
self.label_22.setGeometry(QtCore.QRect(20, 200, 21, 16))
self.label_22.setObjectName("label_22")
self.label_27 = QtWidgets.QLabel(self.tab_2)
self.label_27.setGeometry(QtCore.QRect(20, 100, 31, 16))
self.label_27.setObjectName("label_27")
self.line = QtWidgets.QFrame(self.tab_2)
self.line.setGeometry(QtCore.QRect(470, 60, 20, 581))
self.line.setFrameShape(QtWidgets.QFrame.VLine)
self.line.setFrameShadow(QtWidgets.QFrame.Sunken)
self.line.setObjectName("line")
self.pushButton_47 = QtWidgets.QPushButton(self.tab_2)
self.pushButton_47.setGeometry(QtCore.QRect(340, 370, 93, 28))
self.pushButton_47.setObjectName("pushButton_47")
self.label_17 = QtWidgets.QLabel(self.tab_2)
self.label_17.setGeometry(QtCore.QRect(50, 340, 31, 16))
self.label_17.setObjectName("label_17")
self.textBrowser_5 = QtWidgets.QTextBrowser(self.tab_2)
self.textBrowser_5.setGeometry(QtCore.QRect(90, 330, 141, 31))
self.textBrowser_5.setObjectName("textBrowser_5")
self.pushButton_13 = QtWidgets.QPushButton(self.tab_2)
self.pushButton_13.setGeometry(QtCore.QRect(340, 190, 93, 28))
self.pushButton_13.setObjectName("pushButton_13")
self.tabWidget.addTab(self.tab_2, "")
self.tab_3 = QtWidgets.QWidget()
self.tab_3.setObjectName("tab_3")
self.label_29 = QtWidgets.QLabel(self.tab_3)
self.label_29.setGeometry(QtCore.QRect(420, 0, 200, 250))
self.label_29.setFrameShape(QtWidgets.QFrame.Box)
self.label_29.setFrameShadow(QtWidgets.QFrame.Sunken)
self.label_29.setObjectName("label_29")
self.label_31 = QtWidgets.QLabel(self.tab_3)
self.label_31.setGeometry(QtCore.QRect(850, 0, 200, 250))
self.label_31.setCursor(QtGui.QCursor(QtCore.Qt.CrossCursor))
self.label_31.setFrameShape(QtWidgets.QFrame.Box)
self.label_31.setFrameShadow(QtWidgets.QFrame.Sunken)
self.label_31.setObjectName("label_31")
self.label_33 = QtWidgets.QLabel(self.tab_3)
self.label_33.setGeometry(QtCore.QRect(640, 0, 200, 250))
self.label_33.setFrameShape(QtWidgets.QFrame.Box)
self.label_33.setFrameShadow(QtWidgets.QFrame.Sunken)
self.label_33.setObjectName("label_33")
self.label_35 = QtWidgets.QLabel(self.tab_3)
self.label_35.setGeometry(QtCore.QRect(0, 0, 200, 250))
self.label_35.setFrameShape(QtWidgets.QFrame.Box)
self.label_35.setFrameShadow(QtWidgets.QFrame.Sunken)
self.label_35.setObjectName("label_35")
self.label_36 = QtWidgets.QLabel(self.tab_3)
self.label_36.setGeometry(QtCore.QRect(210, 0, 200, 250))
self.label_36.setCursor(QtGui.QCursor(QtCore.Qt.CrossCursor))
self.label_36.setFrameShape(QtWidgets.QFrame.Box)
self.label_36.setFrameShadow(QtWidgets.QFrame.Sunken)
self.label_36.setObjectName("label_36")
self.lineEdit_20 = QtWidgets.QLineEdit(self.tab_3)
self.lineEdit_20.setGeometry(QtCore.QRect(790, 300, 30, 20))
self.lineEdit_20.setObjectName("lineEdit_20")
self.pushButton_19 = QtWidgets.QPushButton(self.tab_3)
self.pushButton_19.setGeometry(QtCore.QRect(130, 290, 93, 28))
self.pushButton_19.setObjectName("pushButton_19")
self.pushButton_21 = QtWidgets.QPushButton(self.tab_3)
self.pushButton_21.setGeometry(QtCore.QRect(10, 290, 93, 28))
self.pushButton_21.setObjectName("pushButton_21")
self.pushButton_22 = QtWidgets.QPushButton(self.tab_3)
self.pushButton_22.setGeometry(QtCore.QRect(130, 330, 93, 28))
self.pushButton_22.setObjectName("pushButton_22")
self.lineEdit_27 = QtWidgets.QLineEdit(self.tab_3)
self.lineEdit_27.setGeometry(QtCore.QRect(835, 300, 30, 20))
self.lineEdit_27.setObjectName("lineEdit_27")
self.pushButton_24 = QtWidgets.QPushButton(self.tab_3)
self.pushButton_24.setGeometry(QtCore.QRect(10, 330, 93, 28))
self.pushButton_24.setObjectName("pushButton_24")
self.pushButton_20 = QtWidgets.QPushButton(self.tab_3)
self.pushButton_20.setGeometry(QtCore.QRect(10, 440, 93, 28))
self.pushButton_20.setObjectName("pushButton_20")
self.label_20 = QtWidgets.QLabel(self.tab_3)
self.label_20.setGeometry(QtCore.QRect(10, 260, 72, 15))
self.label_20.setObjectName("label_20")
self.label_23 = QtWidgets.QLabel(self.tab_3)
self.label_23.setGeometry(QtCore.QRect(10, 410, 72, 15))
self.label_23.setObjectName("label_23")
self.pushButton_48 = QtWidgets.QPushButton(self.tab_3)
self.pushButton_48.setGeometry(QtCore.QRect(130, 440, 93, 28))
self.pushButton_48.setObjectName("pushButton_48")
self.pushButton_49 = QtWidgets.QPushButton(self.tab_3)
self.pushButton_49.setGeometry(QtCore.QRect(10, 480, 93, 28))
self.pushButton_49.setObjectName("pushButton_49")
self.label_24 = QtWidgets.QLabel(self.tab_3)
self.label_24.setGeometry(QtCore.QRect(790, 260, 131, 16))
self.label_24.setObjectName("label_24")
self.label_25 = QtWidgets.QLabel(self.tab_3)
self.label_25.setGeometry(QtCore.QRect(825, 305, 10, 10))
self.label_25.setObjectName("label_25")
self.pushButton_50 = QtWidgets.QPushButton(self.tab_3)
self.pushButton_50.setGeometry(QtCore.QRect(790, 340, 51, 28))
self.pushButton_50.setObjectName("pushButton_50")
self.pushButton_51 = QtWidgets.QPushButton(self.tab_3)
self.pushButton_51.setGeometry(QtCore.QRect(790, 380, 51, 28))
self.pushButton_51.setObjectName("pushButton_51")
self.pushButton_52 = QtWidgets.QPushButton(self.tab_3)
self.pushButton_52.setGeometry(QtCore.QRect(870, 340, 51, 28))
self.pushButton_52.setObjectName("pushButton_52")
self.pushButton_53 = QtWidgets.QPushButton(self.tab_3)
self.pushButton_53.setGeometry(QtCore.QRect(870, 380, 51, 28))
self.pushButton_53.setObjectName("pushButton_53")
self.pushButton_23 = QtWidgets.QPushButton(self.tab_3)
self.pushButton_23.setGeometry(QtCore.QRect(790, 480, 131, 28))
self.pushButton_23.setObjectName("pushButton_23")
self.pushButton_54 = QtWidgets.QPushButton(self.tab_3)
self.pushButton_54.setGeometry(QtCore.QRect(630, 430, 131, 28))
self.pushButton_54.setObjectName("pushButton_54")
self.pushButton_55 = QtWidgets.QPushButton(self.tab_3)
self.pushButton_55.setGeometry(QtCore.QRect(790, 430, 131, 28))
self.pushButton_55.setObjectName("pushButton_55")
self.pushButton_56 = QtWidgets.QPushButton(self.tab_3)
self.pushButton_56.setGeometry(QtCore.QRect(630, 480, 131, 28))
self.pushButton_56.setObjectName("pushButton_56")
self.radioButton_2 = QtWidgets.QRadioButton(self.tab_3)
self.radioButton_2.setGeometry(QtCore.QRect(950, 310, 115, 19))
self.radioButton_2.setAutoRepeat(True)
self.radioButton_2.setAutoExclusive(False)
self.radioButton_2.setObjectName("radioButton_2")
self.pushButton_58 = QtWidgets.QPushButton(self.tab_3)
self.pushButton_58.setGeometry(QtCore.QRect(950, 430, 93, 28))
self.pushButton_58.setObjectName("pushButton_58")
self.pushButton_59 = QtWidgets.QPushButton(self.tab_3)
self.pushButton_59.setGeometry(QtCore.QRect(950, 480, 93, 28))
self.pushButton_59.setObjectName("pushButton_59")
self.pushButton_61 = QtWidgets.QPushButton(self.tab_3)
self.pushButton_61.setGeometry(QtCore.QRect(140, 540, 93, 28))
self.pushButton_61.setObjectName("pushButton_61")
self.pushButton_62 = QtWidgets.QPushButton(self.tab_3)
self.pushButton_62.setGeometry(QtCore.QRect(10, 590, 93, 28))
self.pushButton_62.setObjectName("pushButton_62")
self.pushButton_63 = QtWidgets.QPushButton(self.tab_3)
self.pushButton_63.setGeometry(QtCore.QRect(950, 530, 93, 28))
self.pushButton_63.setObjectName("pushButton_63")
self.pushButton_64 = QtWidgets.QPushButton(self.tab_3)
self.pushButton_64.setGeometry(QtCore.QRect(130, 250, 93, 28))
self.pushButton_64.setObjectName("pushButton_64")
self.pushButton_65 = QtWidgets.QPushButton(self.tab_3)
self.pushButton_65.setGeometry(QtCore.QRect(640, 250, 93, 28))
self.pushButton_65.setObjectName("pushButton_65")
self.line_2 = QtWidgets.QFrame(self.tab_3)
self.line_2.setGeometry(QtCore.QRect(620, -30, 20, 731))
self.line_2.setFrameShape(QtWidgets.QFrame.VLine)
self.line_2.setFrameShadow(QtWidgets.QFrame.Sunken)
self.line_2.setObjectName("line_2")
self.label_26 = QtWidgets.QLabel(self.tab_3)
self.label_26.setGeometry(QtCore.QRect(310, 300, 241, 131))
self.label_26.setAlignment(QtCore.Qt.AlignLeading|QtCore.Qt.AlignLeft|QtCore.Qt.AlignTop)
self.label_26.setWordWrap(True)
self.label_26.setObjectName("label_26")
self.label_28 = QtWidgets.QLabel(self.tab_3)
self.label_28.setGeometry(QtCore.QRect(310, 440, 241, 211))
self.label_28.setAlignment(QtCore.Qt.AlignLeading|QtCore.Qt.AlignLeft|QtCore.Qt.AlignTop)
self.label_28.setWordWrap(True)
self.label_28.setObjectName("label_28")
self.pushButton_66 = QtWidgets.QPushButton(self.tab_3)
self.pushButton_66.setGeometry(QtCore.QRect(250, 250, 93, 28))
self.pushButton_66.setObjectName("pushButton_66")
self.pushButton_57 = QtWidgets.QPushButton(self.tab_3)
self.pushButton_57.setGeometry(QtCore.QRect(10, 540, 93, 28))
self.pushButton_57.setObjectName("pushButton_57")
self.pushButton_60 = QtWidgets.QPushButton(self.tab_3)
self.pushButton_60.setGeometry(QtCore.QRect(630, 540, 131, 28))
self.pushButton_60.setObjectName("pushButton_60")
self.lineEdit_11 = QtWidgets.QLineEdit(self.tab_3)
self.lineEdit_11.setGeometry(QtCore.QRect(690, 590, 71, 21))
self.lineEdit_11.setText("")
self.lineEdit_11.setObjectName("lineEdit_11")
self.lineEdit_12 = QtWidgets.QLineEdit(self.tab_3)
self.lineEdit_12.setGeometry(QtCore.QRect(730, 650, 91, 21))
self.lineEdit_12.setText("")
self.lineEdit_12.setObjectName("lineEdit_12")
self.lineEdit_13 = QtWidgets.QLineEdit(self.tab_3)
self.lineEdit_13.setGeometry(QtCore.QRect(860, 590, 91, 21))
self.lineEdit_13.setText("")
self.lineEdit_13.setObjectName("lineEdit_13")
self.pushButton_67 = QtWidgets.QPushButton(self.tab_3)
self.pushButton_67.setGeometry(QtCore.QRect(640, 290, 93, 28))
self.pushButton_67.setObjectName("pushButton_67")
self.pushButton_68 = QtWidgets.QPushButton(self.tab_3)
self.pushButton_68.setGeometry(QtCore.QRect(640, 330, 93, 28))
self.pushButton_68.setObjectName("pushButton_68")
self.label_61 = QtWidgets.QLabel(self.tab_3)
self.label_61.setGeometry(QtCore.QRect(640, 590, 31, 21))
self.label_61.setObjectName("label_61")
self.label_62 = QtWidgets.QLabel(self.tab_3)
self.label_62.setGeometry(QtCore.QRect(770, 590, 72, 21))
self.label_62.setObjectName("label_62")
self.label_63 = QtWidgets.QLabel(self.tab_3)
self.label_63.setGeometry(QtCore.QRect(640, 650, 72, 21))
self.label_63.setObjectName("label_63")
self.tabWidget.addTab(self.tab_3, "")
self.tab_4 = QtWidgets.QWidget()
self.tab_4.setObjectName("tab_4")
self.label_43 = QtWidgets.QLabel(self.tab_4)
self.label_43.setGeometry(QtCore.QRect(280, 60, 240, 240))
self.label_43.setFrameShape(QtWidgets.QFrame.Box)
self.label_43.setFrameShadow(QtWidgets.QFrame.Sunken)
self.label_43.setText("")
self.label_43.setObjectName("label_43")
self.pushButton_25 = QtWidgets.QPushButton(self.tab_4)
self.pushButton_25.setGeometry(QtCore.QRect(0, 0, 80, 30))
self.pushButton_25.setObjectName("pushButton_25")
self.pushButton_30 = QtWidgets.QPushButton(self.tab_4)
self.pushButton_30.setGeometry(QtCore.QRect(270, 0, 80, 30))
self.pushButton_30.setObjectName("pushButton_30")
self.pushButton_26 = QtWidgets.QPushButton(self.tab_4)
self.pushButton_26.setGeometry(QtCore.QRect(110, 0, 131, 30))
self.pushButton_26.setObjectName("pushButton_26")
self.label_30 = QtWidgets.QLabel(self.tab_4)
self.label_30.setGeometry(QtCore.QRect(560, 60, 240, 240))
self.label_30.setFrameShape(QtWidgets.QFrame.Box)
self.label_30.setFrameShadow(QtWidgets.QFrame.Sunken)
self.label_30.setText("")
self.label_30.setObjectName("label_30")
self.label_32 = QtWidgets.QLabel(self.tab_4)
self.label_32.setGeometry(QtCore.QRect(280, 400, 240, 240))
self.label_32.setFrameShape(QtWidgets.QFrame.Box)
self.label_32.setFrameShadow(QtWidgets.QFrame.Sunken)
self.label_32.setText("")
self.label_32.setObjectName("label_32")
self.label_34 = QtWidgets.QLabel(self.tab_4)
self.label_34.setGeometry(QtCore.QRect(560, 400, 240, 240))
self.label_34.setFrameShape(QtWidgets.QFrame.Box)
self.label_34.setFrameShadow(QtWidgets.QFrame.Sunken)
self.label_34.setText("")
self.label_34.setObjectName("label_34")
self.label_37 = QtWidgets.QLabel(self.tab_4)
self.label_37.setGeometry(QtCore.QRect(100, 40, 72, 15))
self.label_37.setObjectName("label_37")
self.label_39 = QtWidgets.QLabel(self.tab_4)
self.label_39.setGeometry(QtCore.QRect(280, 40, 91, 15))
self.label_39.setObjectName("label_39")
self.label_38 = QtWidgets.QLabel(self.tab_4)
self.label_38.setGeometry(QtCore.QRect(560, 40, 101, 15))
self.label_38.setObjectName("label_38")
self.label_40 = QtWidgets.QLabel(self.tab_4)
self.label_40.setGeometry(QtCore.QRect(0, 380, 81, 15))
self.label_40.setObjectName("label_40")
self.label_41 = QtWidgets.QLabel(self.tab_4)
self.label_41.setGeometry(QtCore.QRect(280, 380, 131, 15))
self.label_41.setObjectName("label_41")
self.label_42 = QtWidgets.QLabel(self.tab_4)
self.label_42.setGeometry(QtCore.QRect(560, 380, 111, 15))
self.label_42.setObjectName("label_42")
self.pushButton_27 = QtWidgets.QPushButton(self.tab_4)
self.pushButton_27.setGeometry(QtCore.QRect(420, 30, 101, 28))
self.pushButton_27.setObjectName("pushButton_27")
self.pushButton_28 = QtWidgets.QPushButton(self.tab_4)
self.pushButton_28.setGeometry(QtCore.QRect(660, 30, 111, 30))
self.pushButton_28.setObjectName("pushButton_28")
self.pushButton_29 = QtWidgets.QPushButton(self.tab_4)
self.pushButton_29.setGeometry(QtCore.QRect(90, 370, 93, 28))
self.pushButton_29.setObjectName("pushButton_29")
self.pushButton_69 = QtWidgets.QPushButton(self.tab_4)
self.pushButton_69.setGeometry(QtCore.QRect(400, 370, 111, 28))
self.pushButton_69.setObjectName("pushButton_69")
self.pushButton_70 = QtWidgets.QPushButton(self.tab_4)
self.pushButton_70.setGeometry(QtCore.QRect(660, 370, 93, 28))
self.pushButton_70.setObjectName("pushButton_70")
self.label_49 = QtWidgets.QLabel(self.tab_4)
self.label_49.setGeometry(QtCore.QRect(840, 60, 240, 240))
self.label_49.setFrameShape(QtWidgets.QFrame.Box)
self.label_49.setFrameShadow(QtWidgets.QFrame.Sunken)
self.label_49.setText("")
self.label_49.setObjectName("label_49")
self.label_50 = QtWidgets.QLabel(self.tab_4)
self.label_50.setGeometry(QtCore.QRect(840, 400, 240, 240))
self.label_50.setFrameShape(QtWidgets.QFrame.Box)
self.label_50.setFrameShadow(QtWidgets.QFrame.Sunken)
self.label_50.setText("")
self.label_50.setObjectName("label_50")
self.label_51 = QtWidgets.QLabel(self.tab_4)
self.label_51.setGeometry(QtCore.QRect(840, 380, 72, 15))
self.label_51.setObjectName("label_51")
self.label_52 = QtWidgets.QLabel(self.tab_4)
self.label_52.setGeometry(QtCore.QRect(840, 40, 72, 15))
self.label_52.setObjectName("label_52")
self.pushButton_44 = QtWidgets.QPushButton(self.tab_4)
self.pushButton_44.setGeometry(QtCore.QRect(930, 30, 93, 28))
self.pushButton_44.setObjectName("pushButton_44")
self.pushButton_45 = QtWidgets.QPushButton(self.tab_4)
self.pushButton_45.setGeometry(QtCore.QRect(920, 370, 93, 28))
self.pushButton_45.setObjectName("pushButton_45")
self.label_53 = QtWidgets.QLabel(self.tab_4)
self.label_53.setGeometry(QtCore.QRect(0, 60, 240, 240))
self.label_53.setFrameShape(QtWidgets.QFrame.Box)
self.label_53.setFrameShadow(QtWidgets.QFrame.Sunken)
self.label_53.setText("")
self.label_53.setObjectName("label_53")
self.label_58 = QtWidgets.QLabel(self.tab_4)
self.label_58.setGeometry(QtCore.QRect(0, 400, 240, 240))
self.label_58.setFrameShape(QtWidgets.QFrame.Box)
self.label_58.setFrameShadow(QtWidgets.QFrame.Sunken)
self.label_58.setText("")
self.label_58.setObjectName("label_58")
self.tabWidget.addTab(self.tab_4, "")
self.tab_5 = QtWidgets.QWidget()
self.tab_5.setAccessibleDescription("")
self.tab_5.setObjectName("tab_5")
self.label_46 = QtWidgets.QLabel(self.tab_5)
self.label_46.setGeometry(QtCore.QRect(30, 80, 481, 471))
self.label_46.setFrameShape(QtWidgets.QFrame.Box)
self.label_46.setFrameShadow(QtWidgets.QFrame.Sunken)
self.label_46.setText("")
self.label_46.setObjectName("label_46")
self.label_48 = QtWidgets.QLabel(self.tab_5)
self.label_48.setGeometry(QtCore.QRect(530, 230, 531, 471))
self.label_48.setFrameShape(QtWidgets.QFrame.Box)
self.label_48.setFrameShadow(QtWidgets.QFrame.Sunken)
self.label_48.setText("")
self.label_48.setObjectName("label_48")
self.pushButton_31 = QtWidgets.QPushButton(self.tab_5)
self.pushButton_31.setGeometry(QtCore.QRect(10, 20, 90, 28))
self.pushButton_31.setObjectName("pushButton_31")
self.pushButton_37 = QtWidgets.QPushButton(self.tab_5)
self.pushButton_37.setGeometry(QtCore.QRect(230, 20, 90, 28))
self.pushButton_37.setObjectName("pushButton_37")
self.pushButton_39 = QtWidgets.QPushButton(self.tab_5)
self.pushButton_39.setGeometry(QtCore.QRect(120, 20, 90, 28))
self.pushButton_39.setObjectName("pushButton_39")
self.pushButton_33 = QtWidgets.QPushButton(self.tab_5)
self.pushButton_33.setGeometry(QtCore.QRect(530, 80, 160, 41))
self.pushButton_33.setObjectName("pushButton_33")
self.pushButton_35 = QtWidgets.QPushButton(self.tab_5)
self.pushButton_35.setGeometry(QtCore.QRect(700, 80, 160, 41))
self.pushButton_35.setObjectName("pushButton_35")
self.pushButton_36 = QtWidgets.QPushButton(self.tab_5)
self.pushButton_36.setGeometry(QtCore.QRect(870, 80, 160, 41))
self.pushButton_36.setObjectName("pushButton_36")
self.pushButton_40 = QtWidgets.QPushButton(self.tab_5)
self.pushButton_40.setGeometry(QtCore.QRect(340, 20, 90, 28))
self.pushButton_40.setObjectName("pushButton_40")
self.pushButton_46 = QtWidgets.QPushButton(self.tab_5)
self.pushButton_46.setGeometry(QtCore.QRect(450, 20, 90, 28))
self.pushButton_46.setObjectName("pushButton_46")
self.tabWidget.addTab(self.tab_5, "")
self.tab_6 = QtWidgets.QWidget()
self.tab_6.setObjectName("tab_6")
self.label_44 = QtWidgets.QLabel(self.tab_6)
self.label_44.setGeometry(QtCore.QRect(10, 40, 350, 350))
self.label_44.setFrameShape(QtWidgets.QFrame.Box)
self.label_44.setFrameShadow(QtWidgets.QFrame.Sunken)
self.label_44.setObjectName("label_44")
self.label_45 = QtWidgets.QLabel(self.tab_6)
self.label_45.setGeometry(QtCore.QRect(380, 40, 350, 350))
self.label_45.setFrameShape(QtWidgets.QFrame.Box)
self.label_45.setFrameShadow(QtWidgets.QFrame.Sunken)
self.label_45.setObjectName("label_45")
self.label_47 = QtWidgets.QLabel(self.tab_6)
self.label_47.setGeometry(QtCore.QRect(750, 40, 350, 350))
self.label_47.setFrameShape(QtWidgets.QFrame.Box)
self.label_47.setFrameShadow(QtWidgets.QFrame.Sunken)
self.label_47.setObjectName("label_47")
self.pushButton_34 = QtWidgets.QPushButton(self.tab_6)
self.pushButton_34.setGeometry(QtCore.QRect(40, 480, 93, 28))
self.pushButton_34.setObjectName("pushButton_34")
self.pushButton_38 = QtWidgets.QPushButton(self.tab_6)
self.pushButton_38.setGeometry(QtCore.QRect(40, 550, 93, 28))
self.pushButton_38.setObjectName("pushButton_38")
self.pushButton_41 = QtWidgets.QPushButton(self.tab_6)
self.pushButton_41.setGeometry(QtCore.QRect(180, 480, 93, 28))
self.pushButton_41.setObjectName("pushButton_41")
self.pushButton_42 = QtWidgets.QPushButton(self.tab_6)
self.pushButton_42.setGeometry(QtCore.QRect(180, 550, 93, 28))
self.pushButton_42.setObjectName("pushButton_42")
self.pushButton_43 = QtWidgets.QPushButton(self.tab_6)
self.pushButton_43.setGeometry(QtCore.QRect(40, 620, 93, 28))
self.pushButton_43.setObjectName("pushButton_43")
self.progressBar = QtWidgets.QProgressBar(self.tab_6)
self.progressBar.setGeometry(QtCore.QRect(750, 420, 291, 31))
self.progressBar.setProperty("value", 24)
self.progressBar.setAlignment(QtCore.Qt.AlignLeading|QtCore.Qt.AlignLeft|QtCore.Qt.AlignVCenter)
self.progressBar.setTextVisible(True)
self.progressBar.setObjectName("progressBar")
self.label_60 = QtWidgets.QLabel(self.tab_6)
self.label_60.setGeometry(QtCore.QRect(330, 480, 371, 111))
self.label_60.setAlignment(QtCore.Qt.AlignLeading|QtCore.Qt.AlignLeft|QtCore.Qt.AlignTop)
self.label_60.setWordWrap(True)
self.label_60.setObjectName("label_60")
self.textBrowser_8 = QtWidgets.QTextBrowser(self.tab_6)
self.textBrowser_8.setGeometry(QtCore.QRect(190, 620, 411, 33))
self.textBrowser_8.setAcceptDrops(False)
self.textBrowser_8.setObjectName("textBrowser_8")
self.tabWidget.addTab(self.tab_6, "")
self.tab_7 = QtWidgets.QWidget()
self.tab_7.setEnabled(True)
self.tab_7.setObjectName("tab_7")
self.label_54 = QtWidgets.QLabel(self.tab_7)
self.label_54.setGeometry(QtCore.QRect(30, 60, 480, 500))
self.label_54.setFrameShape(QtWidgets.QFrame.Box)
self.label_54.setFrameShadow(QtWidgets.QFrame.Sunken)
self.label_54.setText("")
self.label_54.setObjectName("label_54")
self.label_55 = QtWidgets.QLabel(self.tab_7)
self.label_55.setGeometry(QtCore.QRect(550, 60, 480, 500))
self.label_55.setFrameShape(QtWidgets.QFrame.Box)
self.label_55.setFrameShadow(QtWidgets.QFrame.Sunken)
self.label_55.setText("")
self.label_55.setObjectName("label_55")
self.pushButton_32 = QtWidgets.QPushButton(self.tab_7)
self.pushButton_32.setEnabled(True)
self.pushButton_32.setGeometry(QtCore.QRect(50, 590, 93, 28))
self.pushButton_32.setObjectName("pushButton_32")
self.pushButton_71 = QtWidgets.QPushButton(self.tab_7)
self.pushButton_71.setEnabled(True)
self.pushButton_71.setGeometry(QtCore.QRect(180, 590, 93, 28))
self.pushButton_71.setAutoFillBackground(False)
self.pushButton_71.setCheckable(False)
self.pushButton_71.setObjectName("pushButton_71")
self.pushButton_72 = QtWidgets.QPushButton(self.tab_7)
self.pushButton_72.setGeometry(QtCore.QRect(50, 640, 93, 28))
self.pushButton_72.setObjectName("pushButton_72")
self.pushButton_73 = QtWidgets.QPushButton(self.tab_7)
self.pushButton_73.setGeometry(QtCore.QRect(310, 640, 93, 28))
self.pushButton_73.setObjectName("pushButton_73")
self.pushButton_74 = QtWidgets.QPushButton(self.tab_7)
self.pushButton_74.setGeometry(QtCore.QRect(180, 640, 93, 28))
self.pushButton_74.setObjectName("pushButton_74")
self.pushButton_75 = QtWidgets.QPushButton(self.tab_7)
self.pushButton_75.setGeometry(QtCore.QRect(310, 590, 93, 28))
self.pushButton_75.setObjectName("pushButton_75")
self.label_56 = QtWidgets.QLabel(self.tab_7)
self.label_56.setGeometry(QtCore.QRect(440, 590, 101, 28))
self.label_56.setObjectName("label_56")
self.textBrowser_6 = QtWidgets.QTextBrowser(self.tab_7)
self.textBrowser_6.setGeometry(QtCore.QRect(550, 590, 411, 28))
self.textBrowser_6.setAcceptDrops(False)
self.textBrowser_6.setObjectName("textBrowser_6")
self.label_57 = QtWidgets.QLabel(self.tab_7)
self.label_57.setGeometry(QtCore.QRect(440, 640, 111, 28))
self.label_57.setObjectName("label_57")
self.textBrowser_7 = QtWidgets.QTextBrowser(self.tab_7)
self.textBrowser_7.setGeometry(QtCore.QRect(550, 640, 411, 28))
self.textBrowser_7.setObjectName("textBrowser_7")
self.label_59 = QtWidgets.QLabel(self.tab_7)
self.label_59.setGeometry(QtCore.QRect(30, 10, 961, 41))
self.label_59.setAlignment(QtCore.Qt.AlignLeading|QtCore.Qt.AlignLeft|QtCore.Qt.AlignTop)
self.label_59.setWordWrap(True)
self.label_59.setObjectName("label_59")
self.tabWidget.addTab(self.tab_7, "")
MainWindow.setCentralWidget(self.centralwidget)
self.menubar = QtWidgets.QMenuBar(MainWindow)
self.menubar.setGeometry(QtCore.QRect(0, 0, 1077, 26))
self.menubar.setObjectName("menubar")
self.menuun1 = QtWidgets.QMenu(self.menubar)
self.menuun1.setObjectName("menuun1")
MainWindow.setMenuBar(self.menubar)
self.statusbar = QtWidgets.QStatusBar(MainWindow)
self.statusbar.setObjectName("statusbar")
MainWindow.setStatusBar(self.statusbar)
self.menubar.addAction(self.menuun1.menuAction())
self.retranslateUi(MainWindow)
self.tabWidget.setCurrentIndex(0)
QtCore.QMetaObject.connectSlotsByName(MainWindow)
def retranslateUi(self, MainWindow):
_translate = QtCore.QCoreApplication.translate
MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow"))
self.pushButton_3.setText(_translate("MainWindow", "查看图像"))
self.label_2.setText(_translate("MainWindow", "y"))
self.label.setText(_translate("MainWindow", "x"))
self.label_3.setText(_translate("MainWindow", "y"))
self.pushButton_5.setText(_translate("MainWindow", "保存图像"))
self.pushButton.setText(_translate("MainWindow", "选择图像"))
self.label_4.setText(_translate("MainWindow", "x"))
self.label_7.setText(_translate("MainWindow", "大小"))
self.label_8.setText(_translate("MainWindow", "RGB(灰度)"))
self.label_9.setText(_translate("MainWindow", "比例平移(%)"))
self.pushButton_6.setText(_translate("MainWindow", "应用"))
self.label_11.setText(_translate("MainWindow", "像素平移(pixel)"))
self.pushButton_7.setText(_translate("MainWindow", "应用"))
self.label_12.setText(_translate("MainWindow", "逆时针旋转(°)"))
self.label_13.setText(_translate("MainWindow", "比例缩放(%)"))
self.pushButton_8.setText(_translate("MainWindow", "应用"))
self.pushButton_9.setText(_translate("MainWindow", "应用"))
self.label_15.setText(_translate("MainWindow", "坐标"))
self.pushButton_2.setText(_translate("MainWindow", "重置图像"))
self.pushButton_4.setText(_translate("MainWindow", "清空图像"))
self.label_5.setText(_translate("MainWindow", "仿射变换metrix)"))
self.label_6.setText(_translate("MainWindow", "x1"))
self.label_14.setText(_translate("MainWindow", "x2"))
self.label_64.setText(_translate("MainWindow", "y1"))
self.label_65.setText(_translate("MainWindow", "y2"))
self.pushButton_10.setText(_translate("MainWindow", "应用"))
self.label_66.setText(_translate("MainWindow", "z1"))
self.label_67.setText(_translate("MainWindow", "z2"))
self.tabWidget.setTabText(self.tabWidget.indexOf(self.tab), _translate("MainWindow", "1.图像的基本操作"))
self.label_18.setText(_translate("MainWindow", " 直方图"))
self.pushButton_11.setText(_translate("MainWindow", "选择图像"))
self.pushButton_12.setText(_translate("MainWindow", "全局统计"))
self.pushButton_14.setText(_translate("MainWindow", "查看"))
self.pushButton_15.setText(_translate("MainWindow", "直方图均衡"))
self.pushButton_16.setText(_translate("MainWindow", "CLAHE"))
self.pushButton_17.setText(_translate("MainWindow", "清空"))
self.pushButton_18.setText(_translate("MainWindow", "重置"))
self.label_19.setText(_translate("MainWindow", "Mean"))
self.label_21.setText(_translate("MainWindow", "x"))
self.label_22.setText(_translate("MainWindow", "y"))
self.label_27.setText(_translate("MainWindow", "范围"))
self.pushButton_47.setText(_translate("MainWindow", "保存图像"))
self.label_17.setText(_translate("MainWindow", "Std"))
self.pushButton_13.setText(_translate("MainWindow", "局部统计"))
self.tabWidget.setTabText(self.tabWidget.indexOf(self.tab_2), _translate("MainWindow", "2.直方图统计与均衡"))
self.label_29.setText(_translate("MainWindow", " result"))
self.label_31.setText(_translate("MainWindow", " result"))
self.label_33.setText(_translate("MainWindow", " pic 3"))
self.label_35.setText(_translate("MainWindow", " pic 1"))
self.label_36.setText(_translate("MainWindow", " pic 2"))
self.pushButton_19.setText(_translate("MainWindow", "SUB"))
self.pushButton_21.setText(_translate("MainWindow", "ADD"))
self.pushButton_22.setText(_translate("MainWindow", "DIVIDE"))
self.pushButton_24.setText(_translate("MainWindow", "MULTI"))
self.pushButton_20.setText(_translate("MainWindow", ""))
self.label_20.setText(_translate("MainWindow", "算术运算"))
self.label_23.setText(_translate("MainWindow", "逻辑运算"))
self.pushButton_48.setText(_translate("MainWindow", ""))
self.pushButton_49.setText(_translate("MainWindow", ""))
self.label_24.setText(_translate("MainWindow", "结构元参数初始化"))
self.label_25.setText(_translate("MainWindow", "*"))
self.pushButton_50.setText(_translate("MainWindow", "腐蚀"))
self.pushButton_51.setText(_translate("MainWindow", "膨胀"))
self.pushButton_52.setText(_translate("MainWindow", ""))
self.pushButton_53.setText(_translate("MainWindow", ""))
self.pushButton_23.setText(_translate("MainWindow", "高斯滤波"))
self.pushButton_54.setText(_translate("MainWindow", "拉普拉斯滤波"))
self.pushButton_55.setText(_translate("MainWindow", "均值滤波"))
self.pushButton_56.setText(_translate("MainWindow", "中值滤波"))
self.radioButton_2.setText(_translate("MainWindow", "作用于结果图"))
self.pushButton_58.setText(_translate("MainWindow", "清空"))
self.pushButton_59.setText(_translate("MainWindow", "保存"))
self.pushButton_61.setText(_translate("MainWindow", "保存"))
self.pushButton_62.setText(_translate("MainWindow", "查看"))
self.pushButton_63.setText(_translate("MainWindow", "查看"))
self.pushButton_64.setText(_translate("MainWindow", "选择图像1"))
self.pushButton_65.setText(_translate("MainWindow", "选择图像"))
self.label_26.setText(_translate("MainWindow", " 左侧部分包括了图片的算术运算和逻辑运算其中算术运算包括了加减乘除四种运算逻辑运算包括了与或非三种运算。对于其中的双目运算符需要选择PIC1和PIC2合成出新的图片RESULT对于单目运算符会忽略PIC2直接操作PIC。"))
self.label_28.setText(_translate("MainWindow", " 右侧部分包括了图像结构元和算子、滤波的单目运算。运算前需要导入图像并输入结构元初始化参数规格并选择是否添加padding以及在原图还是在现有图片上操作。最后选择滤波按钮便可得到RESULT图片"))
self.pushButton_66.setText(_translate("MainWindow", "选择图像2"))
self.pushButton_57.setText(_translate("MainWindow", "清空"))
self.pushButton_60.setText(_translate("MainWindow", "双边滤波"))
self.pushButton_67.setText(_translate("MainWindow", "高斯噪声"))
self.pushButton_68.setText(_translate("MainWindow", "椒盐噪声"))
self.label_61.setText(_translate("MainWindow", "半径"))
self.label_62.setText(_translate("MainWindow", "色彩标准差"))
self.label_63.setText(_translate("MainWindow", "空间标准差"))
self.tabWidget.setTabText(self.tabWidget.indexOf(self.tab_3), _translate("MainWindow", "3.基本运算与空域运算"))
self.pushButton_25.setText(_translate("MainWindow", "选择图像"))
self.pushButton_30.setText(_translate("MainWindow", "清空"))
self.pushButton_26.setText(_translate("MainWindow", "提取边缘与直线"))
self.label_37.setText(_translate("MainWindow", "原图"))
self.label_39.setText(_translate("MainWindow", "Roberts算子"))
self.label_38.setText(_translate("MainWindow", "Prewitt算子"))
self.label_40.setText(_translate("MainWindow", "Sobel算子"))
self.label_41.setText(_translate("MainWindow", "Laplacian算子"))
self.label_42.setText(_translate("MainWindow", "Lough直线提取"))
self.pushButton_27.setText(_translate("MainWindow", "保存Roberts"))
self.pushButton_28.setText(_translate("MainWindow", "保存Prewtitt"))
self.pushButton_29.setText(_translate("MainWindow", "保存Sobel"))
self.pushButton_69.setText(_translate("MainWindow", "保存Laplacian"))
self.pushButton_70.setText(_translate("MainWindow", "保存Lough"))
self.label_51.setText(_translate("MainWindow", "Canny算子"))
self.label_52.setText(_translate("MainWindow", "Log算子"))
self.pushButton_44.setText(_translate("MainWindow", "保存Log"))
self.pushButton_45.setText(_translate("MainWindow", "保存Canny"))
self.tabWidget.setTabText(self.tabWidget.indexOf(self.tab_4), _translate("MainWindow", "4.直线检测与边缘检测"))
self.pushButton_31.setText(_translate("MainWindow", "选择图像"))
self.pushButton_37.setText(_translate("MainWindow", "保存图像"))
self.pushButton_39.setText(_translate("MainWindow", "清空"))
self.pushButton_33.setText(_translate("MainWindow", "理想低通滤波器"))
self.pushButton_35.setText(_translate("MainWindow", "高斯高通滤波器"))
self.pushButton_36.setText(_translate("MainWindow", "拉普拉斯低通滤波器"))
self.pushButton_40.setText(_translate("MainWindow", "原图"))
self.pushButton_46.setText(_translate("MainWindow", "查看图像"))
self.tabWidget.setTabText(self.tabWidget.indexOf(self.tab_5), _translate("MainWindow", "5.图像频域运算"))
self.label_44.setText(_translate("MainWindow", "原图"))
self.label_45.setText(_translate("MainWindow", "风格图"))
self.label_47.setText(_translate("MainWindow", "结果图"))
self.pushButton_34.setText(_translate("MainWindow", "选择图像"))
self.pushButton_38.setText(_translate("MainWindow", "清空"))
self.pushButton_41.setText(_translate("MainWindow", "选择风格图"))
self.pushButton_42.setText(_translate("MainWindow", "风格迁移"))
self.pushButton_43.setText(_translate("MainWindow", "保存"))
self.progressBar.setFormat(_translate("MainWindow", "loading...%"))
self.label_60.setText(_translate("MainWindow", " 在进行风格迁移前请先选择图像和风格图并点击“风格迁移”。由于网络骨干为VGG16规模较为庞大训练速度较慢请等待半分钟到一分钟即可。RTX2060 6G"))
self.tabWidget.setTabText(self.tabWidget.indexOf(self.tab_6), _translate("MainWindow", "6.风格迁移"))
self.pushButton_32.setText(_translate("MainWindow", "选择图像"))
self.pushButton_71.setText(_translate("MainWindow", "选择模型"))
self.pushButton_72.setText(_translate("MainWindow", "目标检测"))
self.pushButton_73.setText(_translate("MainWindow", "查看结果"))
self.pushButton_74.setText(_translate("MainWindow", "清空"))
self.pushButton_75.setText(_translate("MainWindow", "选择保存路径"))
self.label_56.setText(_translate("MainWindow", "当前模型路径:"))
self.label_57.setText(_translate("MainWindow", "当前保存路径:"))
self.label_59.setText(_translate("MainWindow", " 操作指南:先选择一副需要进行目标检测和分类的图片(性别检测),然后选择本地模型(pt_file中的.pt文件是已训练好的预训练模型)。此外还需要选择一个本地文件夹用于保存图片(图片会以/exp/originalname的形式保存。等待一小会后便会自动刷新结果"))
self.tabWidget.setTabText(self.tabWidget.indexOf(self.tab_7), _translate("MainWindow", "7.目标检测"))
self.menuun1.setTitle(_translate("MainWindow", "数字图像处理"))

2535
ui.ui

File diff suppressed because it is too large Load Diff

@ -0,0 +1,246 @@
from PyQt5 import QtGui
from PyQt5.QtWidgets import *
import cv2
import numpy as np
import os
#
def unit1_img_load(self):
self.img = np.ndarray(())
self.imgOrg = np.ndarray(())
self.imgShow = np.ndarray(())
self.w = 0
self.h = 0
self.channel = 1
fileName, tmp = QFileDialog.getOpenFileName(self, '打开图像', 'Image', '*.png *.jpg *.bmp *.jpeg')
if fileName == '':
return
self.img = cv2.imread(fileName, -1)
if self.img.size <= 1:
return
print(self.img.shape)
self.imgOrg = self.img.copy()
if len(self.img.shape) == 3:
self.channel = 3
if self.img.shape[2] == 4:
self.img = cv2.cvtColor(self.img, cv2.COLOR_BGRA2BGR)
print(self.img.shape)
img_refresh(self)
def unit1_img_reset(self):
if self.img.size>1:
self.img = self.imgOrg
img_refresh(self)
else:
msgbox = QMessageBox(QMessageBox.Warning, "没有图像", "请选择图像")
msgbox.exec_()
def unit1_img_show(self):
if self.img.size > 1:
cv2.imshow('Original pic', self.img)
cv2.waitKey(0)
else:
msg_box = QMessageBox(QMessageBox.Warning, '没有图像', '请选择图像 ')
msg_box.exec_()
def unit1_img_clear(self):
self.img = np.ndarray(())
self.imgOrg = np.ndarray(())
self.imgShow = np.ndarray(())
self.fname = ''
self.w = 0
self.h = 0
self.channel = 1
self.ui.textBrowser.setText('')
self.ui.textBrowser_3.setText('')
self.ui.textBrowser_4.setText('')
self.ui.label_10.setPixmap(QtGui.QPixmap(''))
def unit1_img_save(self):
if self.img.size>1:
fileName, tmp = QFileDialog.getSaveFileName(self, '保存图像', 'Image', '*.png *.jpg *.bmp *.jpeg')
if fileName == '':
return
cv2.imwrite(fileName, self.img)
msg_box = QMessageBox(QMessageBox.Information, '成功', '图像保存成功,保存路径为:'+fileName)
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '没有图像', '请选择图像')
msg_box.exec_()
def trans_by_rate(self):
if self.img.size>1:
x=self.ui.lineEdit.text()
y=self.ui.lineEdit_2.text()
if x and y:
x=float(x)/100
y=float(y)/100
M = np.float32([[1, 0, x * self.w], [0, 1, y * self.h]])
self.img = cv2.warpAffine(self.img, M, (self.w, self.h))
img_refresh(self)
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '请正确输入')
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '请选择图像')
msg_box.exec_()
def trans_by_pixel(self):
if self.img.size > 1:
x = self.ui.lineEdit_3.text()
y = self.ui.lineEdit_4.text()
if x and y:
try:
x = int(x)
y = int(y)
M = np.float32([[1, 0, x], [0, 1, y]])
self.img = cv2.warpAffine(self.img, M, (self.w, self.h))
img_refresh(self)
except:
msg_box = QMessageBox(QMessageBox.Warning, '异常', '请输入整数')
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '请正确输入')
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '请选择图像')
msg_box.exec_()
def scale_by_rate(self):
if self.img.size>1:
scale = self.ui.lineEdit_10.text()
if scale:
print("YES")
scale = float(scale)/100
print(scale)
x = int(self.w * scale)
y = int(self.h * scale)
print(x)
self.img = cv2.resize(self.img, (x, y))
img_refresh(self)
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '请正确输入')
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '请选择图像')
msg_box.exec_()
def rotate(self):
if self.img.size > 1:
theta = self.ui.lineEdit_9.text()
if theta:
try:
theta = float(theta)
M = cv2.getRotationMatrix2D((self.h / 2, self.w / 2), theta, 1)
self.img = cv2.warpAffine(self.img, M, (self.w, self.h))
img_refresh(self)
except:
msg_box = QMessageBox(QMessageBox.Warning, '异常', '旋转异常,请检查参数')
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '请正确输入')
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '请选择图像')
msg_box.exec_()
def affine_trans(self):
if self.img.size>1:
x1 = self.ui.lineEdit_5.text()
x2 = self.ui.lineEdit_6.text()
y1 = self.ui.lineEdit_7.text()
y2 = self.ui.lineEdit_8.text()
z1 = self.ui.lineEdit_43.text()
z2 = self.ui.lineEdit_44.text()
if x1 and x2 and y1 and y2 and z1 and z2:
try:
x1 = float(x1)
y1 = float(y1)
z1 = float(z1)
x2 = float(x2)
y2 = float(y2)
z2 = float(z2)
M = np.float32([[x1, y1, z1], [x2, y2, z2]])
self.img = cv2.warpAffine(self.img, M, (self.w, self.h))
img_refresh(self)
except:
msg_box = QMessageBox(QMessageBox.Warning, '异常', '仿射变换异常,请检查参数')
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '请正确输入')
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '请选择图像')
msg_box.exec_()
def img_refresh(self):
self.imgShow = self.img
self.h = self.imgShow.shape[0]
self.w = self.imgShow.shape[1]
self.ui.textBrowser.setText('%s×%s×%s' % (self.w, self.h, self.channel))
M = np.float32([[1, 0, 0], [0, 1, 0]])
if self.h / self.w == 50/72:
data = self.imgShow.tobytes()
if self.channel == 3:
image = QtGui.QImage(data, self.w, self.h, self.w * self.channel, QtGui.QImage.Format_BGR888)
else:
image = QtGui.QImage(data, self.w, self.h, self.w * self.channel, QtGui.QImage.Format_Grayscale8)
w_label = self.ui.label_10.width()
h_label = self.ui.label_10.height()
pix = QtGui.QPixmap.fromImage(image)
scale_pix = pix.scaled(w_label, h_label)
self.ui.label_10.setPixmap(scale_pix)
return
elif self.h / self.w > 50 / 72:
h_ = self.h
w_ = int(self.h * 72 / 50 + 0.5)
M[0, 2] += (w_ - self.w) / 2
M[1, 2] += (h_ - self.h) / 2
else:
h_ = int(self.w * 50 / 72 + 0.5)
w_ = self.w
M[0, 2] += (w_ - self.w) / 2
M[1, 2] += (h_ - self.h) / 2
self.imgShow = cv2.warpAffine(self.imgShow, M, (w_, h_))
data = self.imgShow.tobytes()
if self.channel == 3:
image = QtGui.QImage(data, w_, h_, w_ * self.channel, QtGui.QImage.Format_BGR888)
else:
image = QtGui.QImage(data, w_, h_, w_ * self.channel, QtGui.QImage.Format_Grayscale8)
w_label = self.ui.label_10.width()
h_label = self.ui.label_10.height()
pix = QtGui.QPixmap.fromImage(image)
scale_pix = pix.scaled(w_label, h_label)
self.ui.label_10.setPixmap(scale_pix)
def mouseReleaseEvent(self, e):
if self.imgShow.size > 1:
h = self.imgShow.shape[0]
w = self.imgShow.shape[1]
c = self.channel
globalpos = e.globalPos()
pos = self.ui.label_10.mapFromGlobal(globalpos)
if pos.y() < 500 and pos.y() > 0 and pos.x() > 0 and pos.x() < 720:
x = int(pos.x() / 720 * w)
y = int(pos.y() / 500 * h)
self.ui.textBrowser_4.setText(' (%s, %s)' % (x, y))
if c == 3:
rgb = self.imgShow[y, x]
self.ui.textBrowser_3.setText(' R%s G%s B%s' % (rgb[2], rgb[1], rgb[0]))
else:
gray = self.imgShow[y, x]
self.ui.textBrowser_3.setText(' G %s' % gray)

@ -0,0 +1,251 @@
from PyQt5 import QtGui
from PyQt5.QtWidgets import *
import cv2
import numpy as np
import matplotlib
matplotlib.use('Qt5Agg')
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
from matplotlib.backends.backend_qt5 import NavigationToolbar2QT as NavigationToolbar
import matplotlib.pyplot as plt
from matplotlib.figure import Figure
class pltFigure(FigureCanvas):
def __init__(self, parent=None, width=5, height=3, dpi=100):
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
self.fig = Figure(figsize=(width, height), dpi=dpi)
FigureCanvas.__init__(self, self.fig)
self.setParent(parent)
self.axes = self.fig.add_subplot(111)
FigureCanvas.setSizePolicy(self,
QSizePolicy.Expanding,
QSizePolicy.Expanding)
FigureCanvas.updateGeometry(self)
def init(self):
self.img2 = np.ndarray(())
self.img2Org = np.ndarray(())
self.img2Show = np.ndarray(())
self.channel2 = 1
self.fig1 = pltFigure(width=5, height=3, dpi=80)
self.fig_ntb1 = NavigationToolbar(self.fig1, self)
self.gridlayout1 = QGridLayout(self.ui.label_18)
self.gridlayout1.addWidget(self.fig1)
self.gridlayout1.addWidget(self.fig_ntb1)
def reinit(self):
self.img2 = np.ndarray(())
self.img2Org = np.ndarray(())
self.img2Show = np.ndarray(())
self.channel2 = 1
def unit2_img_load(self):
fileName, tmp = QFileDialog.getOpenFileName(self, '打开图像', 'Image', '*.png *.jpg *.bmp *.jpeg')
reinit(self)
if fileName == '':
return
self.img2 = cv2.imread(fileName, cv2.IMREAD_GRAYSCALE)
self.img2Org = self.img2.copy()
if self.img2.size <= 1:
return
self.gMean, self.gStd = cv2.meanStdDev(self.img2)
self.gMean = round(self.gMean[0][0], 3)
self.gStd = round(self.gStd[0][0], 3)
self.h2, self.w2 = self.img2.shape
print(self.img2.shape)
unit2_img_refresh(self)
def unit2_img_reset(self):
if self.img2.size > 1:
temp_img = self.img2Org
reinit(self)
self.img2 = temp_img
self.gMean, self.gStd = cv2.meanStdDev(self.img2)
self.gMean = round(self.gMean[0][0], 3)
self.gStd = round(self.gStd[0][0], 3)
self.h2, self.w2 = self.img2.shape
self.img2Org = self.img2.copy()
unit2_img_refresh(self)
hist = np.bincount(self.img2.ravel(), minlength=256)
hist_refresh(self, hist)
else:
msgbox = QMessageBox(QMessageBox.Warning, '提示', '请选择图像')
msgbox.exec_()
def unit2_img_showNew(self):
if self.img2.size > 1:
cv2.imshow('Original pic', self.img2)
cv2.waitKey(0)
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '请选择图像')
msg_box.exec_()
def unit2_img_save(self):
if self.img2.size > 1:
fileName, tmp = QFileDialog.getSaveFileName(self, '保存图像', 'Image', '*.png *.jpg *.bmp *.jpeg')
if fileName == '':
return
cv2.imwrite(fileName, self.img2)
msg_box = QMessageBox(QMessageBox.Information, '成功', '图像保存成功,保存路径为:' + fileName)
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '请选择图像')
msg_box.exec_()
def hist_refresh(self,hist):
self.fig1.axes.cla()
self.fig1.axes.plot(hist)
self.fig1.draw()
def clahe(self):
if self.img2.size>1:
clahe = cv2.createCLAHE(clipLimit=4.0, tileGridSize=(8, 8))
self.img2 = clahe.apply(self.img2Org)
unit2_img_refresh(self)
hist = np.bincount(self.img2.ravel(), minlength=256)
hist_refresh(self, hist)
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '请选择图像')
msg_box.exec_()
def eqHist(self):
if self.img2.size > 1:
self.img2 = cv2.equalizeHist(self.img2Org)
unit2_img_refresh(self)
hist = np.bincount(self.img2.ravel(), minlength=256)
hist_refresh(self, hist)
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '请选择图像')
msg_box.exec_()
def globalH(self):
if self.img2.size>1:
hist = np.bincount(self.img2.ravel(), minlength=256)
hist_refresh(self,hist)
Mean, Std = cv2.meanStdDev(self.img2)
Mean = round(Mean[0][0], 3)
Std = round(Std[0][0], 3)
self.ui.textBrowser_2.setText('%s' % Mean)
self.ui.textBrowser_5.setText('%s' % Std)
else:
msg_box = QMessageBox(QMessageBox.Warning, '没有图像', '请选择图像 ')
msg_box.exec_()
def localH(self):
x1 = self.ui.lineEdit9.text()
x2 = self.ui.lineEdit10.text()
y1 = self.ui.lineEdit_15.text()
y2 = self.ui.lineEdit_16.text()
if x1 and x2 and y1 and y2:
try:
x1 = int(x1)
x2 = int(x2)
y1 = int(y1)
y2 = int(y2)
a1 = x1 if x1>x2 else x2
a2 = x1 if x1<=x2 else x2
b1 = y1 if y1>y2 else y2
b2 = y1 if y1<=y2 else y2
img = self.img2[a2:a1+1,b2:b1+1]
hist = np.bincount(img.ravel(), minlength=256)
hist_refresh(self, hist)
Mean, Std = cv2.meanStdDev(img)
Mean = round(Mean[0][0], 3)
Std = round(Std[0][0],3)
self.ui.textBrowser_2.setText('%s' % Mean)
self.ui.textBrowser_5.setText('%s' % Std)
except:
msg_box = QMessageBox(QMessageBox.Warning, '参数异常', '请重新输入参数 ')
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '没有图像', '请选择图像 ')
msg_box.exec_()
def unit2_clear(self):
if self.img2.size>1:
init(self)
self.ui.label_16.setPixmap(QtGui.QPixmap(''))
msg_box = QMessageBox(QMessageBox.Information, '清空完成', '可以重新添加图片 ')
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '没有图片', '无需清除 ')
msg_box.exec_()
def unit2_img_refresh(self):
self.img2Show = self.img2
M = np.float32([[1, 0, 0], [0, 1, 0]])
if self.h2 / self.w2 == 360 / 550:
data = self.img2Show.tobytes()
image = QtGui.QImage(data, self.w2, self.h2, self.w2 * self.channel2, QtGui.QImage.Format_Grayscale8)
pix = QtGui.QPixmap.fromImage(image)
scale_pix = pix.scaled(550, 360)
self.ui.label_16.setPixmap(scale_pix)
return
elif self.h2 / self.w2 > 360 / 550:
h_ = self.h2
w_ = int(self.h2 * 550 / 360 + 0.5)
M[0, 2] += (w_ - self.w2) / 2
M[1, 2] += (h_ - self.h2) / 2
print(M)
else:
h_ = int(self.w2 * 360 / 550 + 0.5)
w_ = self.w2
M[0, 2] += (w_ - self.w2) / 2
M[1, 2] += (h_ - self.h2) / 2
self.img2Show = cv2.warpAffine(self.img2Show, M, (w_, h_))
data = self.img2Show.tobytes()
image = QtGui.QImage(data, w_, h_, w_ * self.channel2, QtGui.QImage.Format_Grayscale8)
pix = QtGui.QPixmap.fromImage(image)
scale_pix = pix.scaled(550, 360)
self.ui.label_16.setPixmap(scale_pix)
def mouseReleaseEvent(self, e):
if self.img2.size > 1:
globalpos = e.globalPos()
pos = self.ui.label_16.mapFromGlobal(globalpos)
if pos.y() < 360 and pos.y() > 0 and pos.x() > 0 and pos.x() < 550:
self.m_drag = False
e.accept()
else:
e.accept()
else:
msg_box = QMessageBox(QMessageBox.Warning, '没有图像', '请选择图像 ')
msg_box.exec_()
def mousePressEvent(self, e):
if self.img2.size > 1:
globalpos = e.globalPos()
pos = self.ui.label_16.mapFromGlobal(globalpos)
if pos.y() < 360 and pos.y() > 0 and pos.x() > 0 and pos.x() < 550:
self.m_drag = True
self.m_DragPosition = pos
e.accept()
else:
e.accept()
else:
msg_box = QMessageBox(QMessageBox.Warning, '没有图像', '请选择图像 ')
msg_box.exec_()
def mouseMoveEvent(self, e):
globalpos = e.globalPos()
pos = self.ui.label_16.mapFromGlobal(globalpos)
if pos.y() < 360 and pos.y() > 0 and pos.x() > 0 and pos.x() < 550:
h = self.img2.shape[0]
w = self.img2.shape[1]
self.ui.lineEdit9.setText('%s' % round(self.m_DragPosition.x()/550*w))
self.ui.lineEdit10.setText('%s' % round(pos.x()/550*w))
self.ui.lineEdit_15.setText('%s' % round(self.m_DragPosition.y()/ 360 * h))
self.ui.lineEdit_16.setText('%s' % round(pos.y()/ 360 * h))
e.accept()
else:
e.accept()

@ -0,0 +1,777 @@
from PyQt5 import QtGui
from PyQt5.QtWidgets import *
import cv2
import numpy as np
def init(self):
self.unit3_img1 = np.ndarray(())
self.unit3_img2 = np.ndarray(())
self.unit3_result1 = np.ndarray(())
self.unit3_img1_channel = 1
self.unit3_img2_channel = 1
self.unit3_result1_channel = 1
self.unit3_img3 = np.ndarray(())
self.unit3_result2 = np.ndarray(())
self.unit3_img3_channel = 1
self.unit3_result2_channel = 1
def unit3_img_refresh(self):
array = [self.unit3_img1, self.unit3_img2, self.unit3_result1, self.unit3_img3, self.unit3_result2]
array2 = [self.ui.label_35, self.ui.label_36, self.ui.label_29, self.ui.label_33, self.ui.label_31]
channel = [self.unit3_img1_channel,
self.unit3_img2_channel,
self.unit3_result1_channel,
self.unit3_img3_channel, self.unit3_result2_channel]
for index in range(len(array)):
M = np.float32([[1, 0, 0], [0, 1, 0]])
if array[index].size <=1 :
continue
print(array[index].shape)
index_h = array[index].shape[0]
index_w = array[index].shape[1]
if index_h / index_w == 250 / 200:
img = array[index].tobytes()
if channel[index] == 1:
image = QtGui.QImage(img, index_w, index_h, index_w * channel[index], QtGui.QImage.Format_Grayscale8)
pix = QtGui.QPixmap.fromImage(image)
scale_pix = pix.scaled(200, 250)
array2[index].setPixmap(scale_pix)
continue
elif channel[index] == 3:
image = QtGui.QImage(img, index_w, index_h, index_w * channel[index], QtGui.QImage.Format_BGR888)
pix = QtGui.QPixmap.fromImage(image)
scale_pix = pix.scaled(200, 250)
array2[index].setPixmap(scale_pix)
continue
elif index_h / index_w > 250 / 200:
h_ = index_h
w_ = int(index_h * 200 / 250 + 0.5)
M[0, 2] += (w_ - index_w) / 2
M[1, 2] += (h_ - index_h) / 2
else:
h_ = int(index_w * 250 / 200 + 0.5)
w_ = index_w
M[0, 2] += (w_ - index_w) / 2
M[1, 2] += (h_ - index_h) / 2
img = cv2.warpAffine(array[index], M, (w_, h_))
data = img.tobytes()
if channel[index] == 1:
image = QtGui.QImage(data, w_, h_, w_* channel[index], QtGui.QImage.Format_Grayscale8)
pix = QtGui.QPixmap.fromImage(image)
scale_pix = pix.scaled(200, 250)
array2[index].setPixmap(scale_pix)
continue
else:
image = QtGui.QImage(data, w_, h_, w_ * channel[index], QtGui.QImage.Format_BGR888)
pix = QtGui.QPixmap.fromImage(image)
scale_pix = pix.scaled(200, 250)
array2[index].setPixmap(scale_pix)
continue
return
def img_left_load1(self):
fileName, tmp = QFileDialog.getOpenFileName(self, '打开图像', 'Image', '*.png *.jpg *.bmp *.jpeg')
if fileName == '':
return
self.unit3_img1 = np.ndarray(())
self.unit3_result1 = np.ndarray(())
self.unit3_img1_channel = 1
self.unit3_result1_channel = 1
self.unit3_img1 = cv2.imread(fileName, -1)
if self.unit3_img1.size <= 1:
return
if len(self.unit3_img1.shape)==3:
self.unit3_img1_channel =3
if self.unit3_img1.shape[2]==4:
self.unit3_img1 = cv2.cvtColor(self.unit3_img1, cv2.COLOR_BGRA2BGR)
print(self.unit3_img1.shape)
unit3_img_refresh(self)
def img_left_load2(self):
fileName, tmp = QFileDialog.getOpenFileName(self, '打开图像', 'Image', '*.png *.jpg *.bmp *.jpeg')
if fileName == '':
return
self.unit3_img2 = np.ndarray(())
self.unit3_img2_channel = 1
self.unit3_img2 = cv2.imread(fileName, -1)
if self.unit3_img2.size <= 1:
return
if len(self.unit3_img2.shape) == 3:
self.unit3_img2_channel = 3
if self.unit3_img2.shape[2] == 4:
self.unit3_img2 = cv2.cvtColor(self.unit3_img2, cv2.COLOR_BGRA2BGR)
print(self.unit3_img2.shape)
unit3_img_refresh(self)
def ADD(self):
if self.unit3_img1.size>1 and self.unit3_img2.size>1 \
and self.unit3_img1_channel == self.unit3_img2_channel\
and self.unit3_img1.shape == self.unit3_img2.shape:
try:
self.unit3_result1 = cv2.add(src1=self.unit3_img1, src2=self.unit3_img2)
except:
msg_box = QMessageBox(QMessageBox.Warning, '图片异常', '请重新选择图片进行加操作')
msg_box.exec_()
if len(self.unit3_result1.shape) == 3:
self.unit3_result1_channel = 3
else:
self.unit3_result1_channel = 1
elif self.unit3_img1.size<=1 or self.unit3_img2.size<=1:
msg_box = QMessageBox(QMessageBox.Warning, '缺失图片', '请选择两张图片后再相加')
msg_box.exec_()
elif self.unit3_img1_channel != self.unit3_img2_channel:
msg_box = QMessageBox(QMessageBox.Warning, '通道不同', '彩色图与灰度图不能相加')
msg_box.exec_()
elif self.unit3_img1.shape != self.unit3_img2.shape:
msg_box = QMessageBox(QMessageBox.Warning, '图片尺寸不同', '尺寸不同的图像不能相加')
msg_box.exec_()
unit3_img_refresh(self)
def SUB(self):
if self.unit3_img1.size > 1 and self.unit3_img2.size > 1 \
and self.unit3_img1_channel == self.unit3_img2_channel \
and self.unit3_img1.shape == self.unit3_img2.shape:
try:
self.unit3_result1 = cv2.subtract(src1=self.unit3_img1, src2=self.unit3_img2)
except:
msg_box = QMessageBox(QMessageBox.Warning, '图片异常', '请重新选择图片进行减操作')
msg_box.exec_()
if len(self.unit3_result1.shape) == 3:
self.unit3_result1_channel = 3
else:
self.unit3_result1_channel = 1
elif self.unit3_img1.size <= 1 or self.unit3_img2.size <= 1:
msg_box = QMessageBox(QMessageBox.Warning, '缺失图片', '请选择两张图片后再相减')
msg_box.exec_()
elif self.unit3_img1_channel != self.unit3_img2_channel:
msg_box = QMessageBox(QMessageBox.Warning, '通道不同', '彩色图与灰度图不能相减')
msg_box.exec_()
elif self.unit3_img1.shape != self.unit3_img2.shape:
msg_box = QMessageBox(QMessageBox.Warning, '图片尺寸不同', '尺寸不同的图像不能相减')
msg_box.exec_()
unit3_img_refresh(self)
def MULTI(self):
if self.unit3_img1.size > 1 and self.unit3_img2.size > 1 \
and self.unit3_img1_channel == self.unit3_img2_channel \
and self.unit3_img1.shape == self.unit3_img2.shape:
try:
self.unit3_result1 = cv2.multiply(src1=self.unit3_img1, src2=self.unit3_img2)
except:
msg_box = QMessageBox(QMessageBox.Warning, '图片异常', '请重新选择图片进行乘操作')
msg_box.exec_()
if len(self.unit3_result1.shape) == 3:
self.unit3_result1_channel = 3
else:
self.unit3_result1_channel = 1
elif self.unit3_img1.size <= 1 or self.unit3_img2.size <= 1:
msg_box = QMessageBox(QMessageBox.Warning, '缺失图片', '请选择两张图片后再相乘')
msg_box.exec_()
elif self.unit3_img1_channel != self.unit3_img2_channel:
msg_box = QMessageBox(QMessageBox.Warning, '通道不同', '彩色图与灰度图不能相乘')
msg_box.exec_()
elif self.unit3_img1.shape != self.unit3_img2.shape:
msg_box = QMessageBox(QMessageBox.Warning, '图片尺寸不同', '尺寸不同的图像不能相乘')
msg_box.exec_()
unit3_img_refresh(self)
def DIVIDE(self):
if self.unit3_img1.size > 1 and self.unit3_img2.size > 1 \
and self.unit3_img1_channel == self.unit3_img2_channel \
and self.unit3_img1.shape == self.unit3_img2.shape:
try:
self.unit3_result1 = cv2.divide(src1=self.unit3_img1, src2=self.unit3_img2)
except:
msg_box = QMessageBox(QMessageBox.Warning, '图片异常', '图片中有灰度值为0的像素点')
msg_box.exec_()
if len(self.unit3_result1.shape) == 3:
self.unit3_result1_channel = 3
else:
self.unit3_result1_channel = 1
elif self.unit3_img1.size <= 1 or self.unit3_img2.size <= 1:
msg_box = QMessageBox(QMessageBox.Warning, '缺失图片', '请选择两张图片后再相除')
msg_box.exec_()
elif self.unit3_img1_channel != self.unit3_img2_channel:
msg_box = QMessageBox(QMessageBox.Warning, '通道不同', '彩色图与灰度图不能相除')
msg_box.exec_()
elif self.unit3_img1.shape != self.unit3_img2.shape:
msg_box = QMessageBox(QMessageBox.Warning, '图片尺寸不同', '尺寸不同的图像不能相除')
msg_box.exec_()
unit3_img_refresh(self)
def AND(self):
if self.unit3_img1.size > 1 and self.unit3_img2.size > 1 \
and self.unit3_img1_channel == self.unit3_img2_channel \
and self.unit3_img1.shape == self.unit3_img2.shape:
try:
self.unit3_result1 = self.unit3_img1&self.unit3_img2
except:
msg_box = QMessageBox(QMessageBox.Warning, '图片异常', '请重新选择图片进行与操作')
msg_box.exec_()
if len(self.unit3_result1.shape) == 3:
self.unit3_result1_channel = 3
else:
self.unit3_result1_channel = 1
elif self.unit3_img1.size <= 1 or self.unit3_img2.size <= 1:
msg_box = QMessageBox(QMessageBox.Warning, '缺失图片', '请选择两张图片再进行与操作')
msg_box.exec_()
elif self.unit3_img1_channel != self.unit3_img2_channel:
msg_box = QMessageBox(QMessageBox.Warning, '通道不同', '彩色图与灰度图不能进行与操作')
msg_box.exec_()
elif self.unit3_img1.shape != self.unit3_img2.shape:
msg_box = QMessageBox(QMessageBox.Warning, '图片尺寸不同', '尺寸不同的图像不能进行与操作')
msg_box.exec_()
unit3_img_refresh(self)
def OR(self):
if self.unit3_img1.size > 1 and self.unit3_img2.size > 1 \
and self.unit3_img1_channel == self.unit3_img2_channel \
and self.unit3_img1.shape == self.unit3_img2.shape:
try:
self.unit3_result1 = self.unit3_img1 | self.unit3_img2
except:
msg_box = QMessageBox(QMessageBox.Warning, '图片异常', '请重新选择图片进行或操作')
msg_box.exec_()
if len(self.unit3_result1.shape) == 3:
self.unit3_result1_channel = 3
else:
self.unit3_result1_channel = 1
elif self.unit3_img1.size <= 1 or self.unit3_img2.size <= 1:
msg_box = QMessageBox(QMessageBox.Warning, '缺失图片', '请选择两张图片后再进行或操作')
msg_box.exec_()
elif self.unit3_img1_channel != self.unit3_img2_channel:
msg_box = QMessageBox(QMessageBox.Warning, '通道不同', '彩色图与灰度图不能进行或操作')
msg_box.exec_()
elif self.unit3_img1.shape != self.unit3_img2.shape:
msg_box = QMessageBox(QMessageBox.Warning, '图片尺寸不同', '尺寸不同的图像不能进行或操作')
msg_box.exec_()
unit3_img_refresh(self)
def NOT(self):
if self.unit3_img1.size>1:
try:
self.unit3_result1 = cv2.bitwise_not(self.unit3_img1)
if len(self.unit3_result1.shape) == 3:
self.unit3_result1_channel = 3
else:
self.unit3_result1_channel = 1
except:
msg_box = QMessageBox(QMessageBox.Warning, '图片异常', '请重新选择图片进行或操作')
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '缺失图片', '请选择两张图片后再进行非操作')
msg_box.exec_()
unit3_img_refresh(self)
def img_left_clear(self):
if self.unit3_img1.size > 1 or self.unit3_img2.size > 1:
self.unit3_img1 = np.ndarray(())
self.unit3_img2 = np.ndarray(())
self.unit3_result1 = np.ndarray(())
self.unit3_img1_channel = 1
self.unit3_img2_channel = 1
self.unit3_result1_channel = 1
for label in [self.ui.label_35, self.ui.label_36, self.ui.label_29]:
label.setPixmap(QtGui.QPixmap(''))
else:
msg_box = QMessageBox(QMessageBox.Warning, '无需清空', '没有图片')
msg_box.exec_()
unit3_img_refresh(self)
def img_left_save(self):
if self.unit3_result1.size > 1:
fileName, tmp = QFileDialog.getSaveFileName(self, '保存图像', 'Image', '*.png *.jpg *.bmp *.jpeg')
if fileName == '':
return
cv2.imwrite(fileName, self.unit3_result1)
msg_box = QMessageBox(QMessageBox.Information, '成功', '图像保存成功,保存路径为:' + fileName)
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '没有图像', '请生成图像')
msg_box.exec_()
def img_left_show(self):
if self.unit3_result1.size > 1:
cv2.imshow('Original pic', self.unit3_result1)
cv2.waitKey(0)
else:
msg_box = QMessageBox(QMessageBox.Warning, '没有图像', '没有生成图像')
msg_box.exec_()
def img_right_load(self):
fileName, tmp = QFileDialog.getOpenFileName(self, '打开图像', 'Image', '*.png *.jpg *.bmp *.jpeg')
if fileName == '':
return
self.unit3_img3 = cv2.imread(fileName, cv2.IMREAD_GRAYSCALE)
if self.unit3_img3.size <= 1:
return
print(self.unit3_img3.shape)
unit3_img_refresh(self)
def erode(self):
checked = self.ui.radioButton_2.isChecked()
try:
x = self.ui.lineEdit_20.text()
y = self.ui.lineEdit_27.text()
except:
msg_box = QMessageBox(QMessageBox.Warning, '结构元不能为空', '请重新输入')
msg_box.exec_()
return
if self.unit3_img3.size>1:
if x and y:
x = int(x)
y = int(y)
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (x, y), (-1, -1))
if checked:
if self.unit3_result2.size>1:
self.unit3_result2 = cv2.erode(self.unit3_result2, kernel)
else:
self.unit3_result2 = cv2.erode(self.unit3_img3, kernel)
else:
self.unit3_result2 = cv2.erode(self.unit3_img3, kernel)
if len(self.unit3_result2.shape) == 3:
self.unit3_result2.channel = 3
else:
self.unit3_result2.channel = 1
unit3_img_refresh(self)
else:
msg_box = QMessageBox(QMessageBox.Warning, '结构元为空', '请输入结构元大小')
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '没有图像', '请选择图像')
msg_box.exec_()
def dilate(self):
checked = self.ui.radioButton_2.isChecked()
try:
x = self.ui.lineEdit_20.text()
y = self.ui.lineEdit_27.text()
except:
msg_box = QMessageBox(QMessageBox.Warning, '结构元不能为空', '请重新输入')
msg_box.exec_()
return
if self.unit3_img3.size > 1:
if x and y:
x = int(x)
y = int(y)
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (x, y), (-1, -1))
if checked:
if self.unit3_result2.size > 1:
self.unit3_result2 = cv2.dilate(self.unit3_result2, kernel)
else:
self.unit3_result2 = cv2.dilate(self.unit3_img3, kernel)
else:
self.unit3_result2 = cv2.dilate(self.unit3_img3, kernel)
if len(self.unit3_result2.shape) == 3:
self.unit3_result2.channel = 3
else:
self.unit3_result2.channel = 1
unit3_img_refresh(self)
else:
msg_box = QMessageBox(QMessageBox.Warning, '结构元为空', '请输入结构元大小')
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '没有图像', '请选择图像')
msg_box.exec_()
def opening(self):
checked = self.ui.radioButton_2.isChecked()
try:
x = self.ui.lineEdit_20.text()
y = self.ui.lineEdit_27.text()
except:
msg_box = QMessageBox(QMessageBox.Warning, '结构元不能为空', '请重新输入')
msg_box.exec_()
return
if self.unit3_img3.size > 1:
if x and y:
x = int(x)
y = int(y)
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (x, y), (-1, -1))
if checked:
if self.unit3_result2.size > 1:
self.unit3_result2 = cv2.morphologyEx(self.unit3_result2, cv2.MORPH_OPEN, kernel)
else:
self.unit3_result2 = cv2.morphologyEx(self.unit3_img3,cv2.MORPH_OPEN, kernel)
else:
self.unit3_result2 = cv2.morphologyEx(self.unit3_img3, cv2.MORPH_OPEN, kernel)
if len(self.unit3_result2.shape) == 3:
self.unit3_result2.channel = 3
else:
self.unit3_result2.channel = 1
unit3_img_refresh(self)
else:
msg_box = QMessageBox(QMessageBox.Warning, '结构元为空', '请输入结构元大小')
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '没有图像', '请选择图像')
msg_box.exec_()
def closing(self):
checked = self.ui.radioButton_2.isChecked()
try:
x = self.ui.lineEdit_20.text()
y = self.ui.lineEdit_27.text()
except:
msg_box = QMessageBox(QMessageBox.Warning, '结构元不能为空', '请重新输入')
msg_box.exec_()
return
if self.unit3_img3.size > 1:
if x and y:
x = int(x)
y = int(y)
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (x, y), (-1, -1))
if checked:
if self.unit3_result2.size > 1:
self.unit3_result2 = cv2.morphologyEx(self.unit3_result2, cv2.MORPH_CLOSE, kernel)
else:
self.unit3_result2 = cv2.morphologyEx(self.unit3_img3, cv2.MORPH_CLOSE, kernel)
else:
self.unit3_result2 = cv2.morphologyEx(self.unit3_img3, cv2.MORPH_CLOSE, kernel)
if len(self.unit3_result2.shape) == 3:
self.unit3_result2.channel = 3
else:
self.unit3_result2.channel = 1
unit3_img_refresh(self)
else:
msg_box = QMessageBox(QMessageBox.Warning, '结构元为空', '请输入结构元大小')
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '没有图像', '请选择图像')
msg_box.exec_()
def mean(self):
checked = self.ui.radioButton_2.isChecked()
try:
x = self.ui.lineEdit_20.text()
y = self.ui.lineEdit_27.text()
except:
msg_box = QMessageBox(QMessageBox.Warning, '结构元不能为空', '请重新输入')
msg_box.exec_()
return
if self.unit3_img3.size > 1:
if x and y:
x = int(x)
y = int(y)
if x<=0 or y<=0 or x!=y or x%2!=1:
msg_box = QMessageBox(QMessageBox.Warning, '均值滤波长宽均为正奇数', '请重新输入')
msg_box.exec_()
return
if checked:
if self.unit3_result2.size > 1:
self.unit3_result2 = cv2.blur(self.unit3_result2, (x,y),(-1,-1))
else:
self.unit3_result2 = cv2.blur(self.unit3_img3, (x,y),(-1,-1))
else:
self.unit3_result2 = cv2.blur(self.unit3_img3, (x,y),(-1,-1))
if len(self.unit3_result2.shape) == 3:
self.unit3_result2.channel = 3
else:
self.unit3_result2.channel = 1
unit3_img_refresh(self)
else:
msg_box = QMessageBox(QMessageBox.Warning, '结构元为空', '请输入结构元大小')
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '没有图像', '请选择图像')
msg_box.exec_()
def guassian(self):
checked = self.ui.radioButton_2.isChecked()
try:
x = self.ui.lineEdit_20.text()
y = self.ui.lineEdit_27.text()
except:
msg_box = QMessageBox(QMessageBox.Warning, '结构元不能为空', '请重新输入')
msg_box.exec_()
return
if self.unit3_img3.size > 1:
if x and y:
x = int(x)
y = int(y)
if x<=0 or y<=0 or x!=y or x%2!=1:
msg_box = QMessageBox(QMessageBox.Warning, '高斯滤波长宽均为正奇数', '请重新输入')
msg_box.exec_()
return
if checked:
if self.unit3_result2.size > 1:
self.unit3_result2 = cv2.GaussianBlur(self.unit3_result2, (x, y), 0,0)
else:
self.unit3_result2 = cv2.GaussianBlur(self.unit3_img3, (x, y), 0,0)
else:
self.unit3_result2 = cv2.GaussianBlur(self.unit3_img3, (x, y), 0,0)
if len(self.unit3_result2.shape) == 3:
self.unit3_result2.channel = 3
else:
self.unit3_result2.channel = 1
unit3_img_refresh(self)
else:
msg_box = QMessageBox(QMessageBox.Warning, '结构元为空', '请输入结构元大小')
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '没有图像', '请选择图像')
msg_box.exec_()
def Covfilter(self):
checked = self.ui.radioButton_2.isChecked()
try:
x = self.ui.lineEdit_20.text()
y = self.ui.lineEdit_27.text()
except:
msg_box = QMessageBox(QMessageBox.Warning, '结构元不能为空', '请重新输入')
msg_box.exec_()
return
if self.unit3_img3.size > 1:
if x and y:
x = int(x)
y = int(y)
if x <= 0 or y <= 0 or x % 2 != 1:
msg_box = QMessageBox(QMessageBox.Warning, '卷积核长宽均为正奇数', '请重新输入')
msg_box.exec_()
return
kernel = np.ones((x, y), np.float32) / (x*y)
if checked:
if self.unit3_result2.size > 1:
self.unit3_result2 = cv2.filter2D(self.unit3_result2, -1, kernel)
else:
self.unit3_result2 = cv2.filter2D(self.unit3_img3, -1, kernel)
else:
self.unit3_result2 = cv2.filter2D(self.unit3_img3, -1, kernel)
if len(self.unit3_result2.shape) == 3:
self.unit3_result2.channel = 3
else:
self.unit3_result2.channel = 1
unit3_img_refresh(self)
else:
msg_box = QMessageBox(QMessageBox.Warning, '结构元为空', '请输入结构元大小')
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '没有图像', '请选择图像')
msg_box.exec_()
def median(self):
checked = self.ui.radioButton_2.isChecked()
try:
x = self.ui.lineEdit_20.text()
y = self.ui.lineEdit_27.text()
except:
msg_box = QMessageBox(QMessageBox.Warning, '结构元不能为空', '请重新输入')
msg_box.exec_()
return
if self.unit3_img3.size > 1:
if x and y:
x = int(x)
y = int(y)
if x<=0 or y<=0 or x!=y or x%2!=1:
msg_box = QMessageBox(QMessageBox.Warning, '中值滤波算子长宽均为正奇数且相等', '请重新输入')
msg_box.exec_()
return
print(x,y)
if checked:
if self.unit3_result2.size > 1:
self.unit3_result2 = cv2.medianBlur(self.unit3_result2, x)
else:
self.unit3_result2 = cv2.medianBlur(self.unit3_img3, x)
else:
self.unit3_result2 = cv2.medianBlur(self.unit3_img3, x)
if len(self.unit3_result2.shape) == 3:
self.unit3_result2.channel = 3
else:
self.unit3_result2.channel = 1
unit3_img_refresh(self)
else:
msg_box = QMessageBox(QMessageBox.Warning, '结构元为空', '请输入结构元大小')
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '没有图像', '请选择图像')
msg_box.exec_()
def img_right_clear(self):
if self.unit3_img3.size > 1 or self.unit3_result2.size > 1:
self.unit3_img3 = np.ndarray(())
self.unit3_result2 = np.ndarray(())
self.unit3_img3_channel = 1
self.unit3_result2_channel = 1
for label in [self.ui.label_33, self.ui.label_31]:
label.setPixmap(QtGui.QPixmap(''))
else:
msg_box = QMessageBox(QMessageBox.Warning, '无需清空', '没有图片')
msg_box.exec_()
unit3_img_refresh(self)
def bilateralFilter(self):
checked = self.ui.radioButton_2.isChecked()
try:
d = self.ui.lineEdit_11.text()
sigmaspace = self.ui.lineEdit_12.text()
sigmacolor = self.ui.lineEdit_13.text()
except:
msg_box = QMessageBox(QMessageBox.Warning, '结构元不能为空', '请重新输入')
msg_box.exec_()
return
if self.unit3_img3.size > 1:
if d and sigmaspace and sigmacolor:
d = int(d)
sigmaspace = int(sigmaspace)
sigmacolor = int(sigmacolor)
if d <= 0 or sigmacolor <= 0 or sigmaspace <= 0:
msg_box = QMessageBox(QMessageBox.Warning, '中值滤波算子长宽均为正奇数且相等', '请重新输入')
msg_box.exec_()
return
if checked:
if self.unit3_result2.size > 1:
self.unit3_result2 = cv2.bilateralFilter(self.unit3_result2, d, sigmacolor, sigmaspace)
else:
self.unit3_result2 = cv2.bilateralFilter(self.unit3_img3, d, sigmacolor, sigmaspace)
else:
self.unit3_result2 = cv2.bilateralFilter(self.unit3_img3, d, sigmacolor, sigmaspace)
if len(self.unit3_result2.shape) == 3:
self.unit3_result2.channel = 3
else:
self.unit3_result2.channel = 1
unit3_img_refresh(self)
else:
msg_box = QMessageBox(QMessageBox.Warning, '结构元或方差为空', '请输入结构元大小和方差')
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '请选择图像')
msg_box.exec_()
def add_noise_Guass(self): # 添加高斯噪声
if self.unit3_img3.size>1:
mu = 0.0
sigma = 0.1
self.unit3_result2 = np.array(self.unit3_img3 / 255, dtype=float)
noise = np.random.normal(mu, sigma, self.unit3_result2.shape)
self.unit3_result2 = self.unit3_result2 + noise
if self.unit3_result2.min() < 0:
low_clip = -1.
else:
low_clip = 0.
self.unit3_result2 = np.clip(self.unit3_result2, low_clip, 1.0)
self.unit3_result2 = np.uint8(self.unit3_result2 * 255)
unit3_img_refresh(self)
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '请选择图像')
msg_box.exec_()
def add_noise_Jiaoyan(self):
if self.unit3_img3.size>1:
SNR = 0.9
print("Jiao Yan Ready")
self.unit3_result2 = self.unit3_img3.copy()
w, h = self.unit3_result2.shape[:2]
noisy_size = int(self.unit3_result2.size*(1-SNR))
print(noisy_size)
for k in range(0,noisy_size):
t = np.random.randint(0,1)
x = int(np.random.uniform(0, w))
y = int(np.random.uniform(0, h))
if t<0.5:
self.unit3_result2[x, y]=0
else :
self.unit3_result2[x, y]=256
print("Jiao Yan Over")
unit3_img_refresh(self)
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '请选择图像')
msg_box.exec_()
def img_right_save(self):
if self.unit3_result2.size > 1:
fileName, tmp = QFileDialog.getSaveFileName(self, '保存图像', 'Image', '*.png *.jpg *.bmp *.jpeg')
if fileName == '':
return
cv2.imwrite(fileName, self.unit3_result2)
msg_box = QMessageBox(QMessageBox.Information, '成功', '图像保存成功,保存路径为:' + fileName)
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '没有图像', '请生成图像')
msg_box.exec_()
def img_right_show(self):
if self.unit3_result2.size > 1:
cv2.imshow('Original pic', self.unit3_result2)
cv2.waitKey(0)
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '没有生成图像')
msg_box.exec_()
def mousePressEvent(self, e):
globalpos = e.globalPos()
pos = self.ui.label_35.mapFromGlobal(globalpos)
pos2 = self.ui.label_36.mapFromGlobal(globalpos)
pos3 = self.ui.label_29.mapFromGlobal(globalpos)
pos4 = self.ui.label_33.mapFromGlobal(globalpos)
pos5 = self.ui.label_31.mapFromGlobal(globalpos)
if pos.y() < 250 and pos.y() > 0 and pos.x() > 0 and pos.x() < 200:
self.m_drag = True
self.m_DragPosition = pos
e.accept()
elif pos2.y() < 250 and pos2.y() > 0 and pos2.x() > 0 and pos2.x() < 200:
self.m_drag = True
self.m_DragPosition = pos2
e.accept()
elif pos3.y() < 250 and pos3.y() > 0 and pos3.x() > 0 and pos3.x() < 200:
self.m_drag = True
self.m_DragPosition = pos3
e.accept()
elif pos4.y() < 250 and pos4.y() > 0 and pos4.x() > 0 and pos4.x() < 200:
self.m_drag = True
self.m_DragPosition = pos4
e.accept()
elif pos5.y() < 250 and pos5.y() > 0 and pos5.x() > 0 and pos5.x() < 200:
self.m_drag = True
self.m_DragPosition = pos5
e.accept()
def mouseReleaseEvent(self, e):
globalpos = e.globalPos()
pos = self.ui.label_35.mapFromGlobal(globalpos)
pos2 = self.ui.label_36.mapFromGlobal(globalpos)
pos3 = self.ui.label_29.mapFromGlobal(globalpos)
pos4 = self.ui.label_33.mapFromGlobal(globalpos)
pos5 = self.ui.label_31.mapFromGlobal(globalpos)
if pos.y() < 250 and pos.y() > 0 and pos.x() > 0 and pos.x() < 200:
self.m_drag = True
e.accept()
elif pos2.y() < 250 and pos2.y() > 0 and pos2.x() > 0 and pos2.x() < 200:
self.m_drag = True
e.accept()
elif pos3.y() < 250 and pos3.y() > 0 and pos3.x() > 0 and pos3.x() < 200:
self.m_drag = True
e.accept()
elif pos4.y() < 250 and pos4.y() > 0 and pos4.x() > 0 and pos4.x() < 200:
self.m_drag = True
e.accept()
elif pos5.y() < 250 and pos5.y() > 0 and pos5.x() > 0 and pos5.x() < 200:
self.m_drag = True
e.accept()
def mouseMoveEvent(self, e):
e.accept()

@ -0,0 +1,355 @@
from PyQt5 import QtGui
from PyQt5.QtWidgets import *
import cv2
import numpy as np
def init(self):
self.unit4_img = np.ndarray(())
self.unit4_robertsimg = np.ndarray(())
self.unit4_prewittimg = np.ndarray(())
self.unit4_logimg = np.ndarray(())
self.unit4_sobelimg = np.ndarray(())
self.unit4_laplacianimg = np.ndarray(())
self.unit4_loughimg = np.ndarray(())
self.unit4_cannyimg = np.ndarray(())
self.unit4_img_channel = 1
self.unit4_robertsimg_channel = 1
self.unit4_prewittimg_channel = 1
self.unit4_logimg_channel = 1
self.unit4_sobelimg_channel = 1
self.unit4_laplacianimg_channel = 1
self.unit4_loughimg_channel = 1
self.unit4_cannyimg_channel = 1
def img_load(self):
fileName, tmp = QFileDialog.getOpenFileName(self, '打开图像', 'Image', '*.png *.jpg *.bmp *.jpeg')
if fileName == '':
return
init(self)
self.unit4_img = cv2.imread(fileName, -1)
if self.unit4_img.size <= 1:
return
if len(self.unit4_img.shape) == 3:
self.unit4_img_channel = 3
if self.unit4_img.shape[2] == 4:
self.unit4_img = cv2.cvtColor(self.unit4_img, cv2.COLOR_BGRA2BGR)
print(self.unit4_img.shape)
unit4_img_refresh(self)
def unit4_img_refresh(self):
array = \
[self.unit4_img,
self.unit4_robertsimg,
self.unit4_prewittimg,
self.unit4_logimg,
self.unit4_sobelimg,
self.unit4_laplacianimg,
self.unit4_loughimg,
self.unit4_cannyimg]
array2 = [self.ui.label_53,
self.ui.label_43,
self.ui.label_30,
self.ui.label_49,
self.ui.label_58,
self.ui.label_32,
self.ui.label_34,
self.ui.label_50]
channel = [self.unit4_img_channel,
self.unit4_robertsimg_channel,
self.unit4_prewittimg_channel,
self.unit4_logimg_channel,
self.unit4_sobelimg_channel,
self.unit4_laplacianimg_channel,
self.unit4_loughimg_channel,
self.unit4_cannyimg_channel]
height = 240
weight = 240
for index in range(len(array)):
M = np.float32([[1, 0, 0], [0, 1, 0]])
if array[index].size <= 1:
array2[index].setPixmap(QtGui.QPixmap(''))
continue
print(array[index].shape)
index_h = array[index].shape[0]
index_w = array[index].shape[1]
if index_h / index_w == height / weight:
img = array[index].tobytes()
if channel[index] == 1:
image = QtGui.QImage(img, index_w, index_h, index_w * channel[index], QtGui.QImage.Format_Grayscale8)
pix = QtGui.QPixmap.fromImage(image)
scale_pix = pix.scaled(weight, height)
array2[index].setPixmap(scale_pix)
continue
elif channel[index] == 3:
image = QtGui.QImage(img, index_w, index_h, index_w * channel[index], QtGui.QImage.Format_BGR888)
pix = QtGui.QPixmap.fromImage(image)
scale_pix = pix.scaled(weight, height)
array2[index].setPixmap(scale_pix)
continue
elif index_h / index_w > height / weight:
h_ = index_h
w_ = int(index_h * weight / height+ 0.5)
M[0, 2] += (w_ - index_w) / 2
M[1, 2] += (h_ - index_h) / 2
else:
h_ = int(index_w * height / weight + 0.5)
w_ = index_w
M[0, 2] += (w_ - index_w) / 2
M[1, 2] += (h_ - index_h) / 2
img = cv2.warpAffine(array[index], M, (w_, h_))
data = img.tobytes()
if channel[index] == 1:
image = QtGui.QImage(data, w_, h_, w_ * channel[index], QtGui.QImage.Format_Grayscale8)
pix = QtGui.QPixmap.fromImage(image)
scale_pix = pix.scaled(weight, height)
array2[index].setPixmap(scale_pix)
continue
else:
image = QtGui.QImage(data, w_, h_, w_ * channel[index], QtGui.QImage.Format_BGR888)
pix = QtGui.QPixmap.fromImage(image)
scale_pix = pix.scaled(weight, height)
array2[index].setPixmap(scale_pix)
continue
return
def img_clear(self):
if self.unit4_img.size > 1:
init(self)
unit4_img_refresh(self)
else:
msg_box = QMessageBox(QMessageBox.Warning, '无需清空', '没有图片')
msg_box.exec_()
def action(self):
if self.unit4_img.size>1:
#try:
Roberts(self)
print("Roberts over!")
Prewitt(self)
print("Prewitt over!")
Log(self)
print("Log over!")
Sobel(self)
print("Sobel over!")
Laplacian(self)
print("Laplacian over!")
Lough(self)
print("Lough over!")
Canny(self)
print("Canny over!")
unit4_img_refresh(self)
#except:
#msg_box = QMessageBox(QMessageBox.Warning, '算子执行异常', '请更换图片')
#msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '请选择图片')
msg_box.exec_()
def Roberts(self):
img = self.unit4_img
if(self.unit4_img_channel == 3):
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Roberts算子
kernelx = np.array([[-1,0],[0,1]],dtype=int)
kernely = np.array([[0,-1],[1,0]],dtype=int)
# 卷积操作
x = cv2.filter2D(img,cv2.CV_16S,kernelx)
y = cv2.filter2D(img,cv2.CV_16S,kernely)
# 数据格式转换
absX=cv2.convertScaleAbs(x)
absY=cv2.convertScaleAbs(y)
self.unit4_robertsimg = cv2.addWeighted(absX,0.5,absY,0.5,0)
def Prewitt(self):
grayImage = self.unit4_img
if (self.unit4_img_channel == 3):
grayImage = cv2.cvtColor(grayImage, cv2.COLOR_BGR2GRAY)
kernelx = np.array([[1, 1, 1], [0, 0, 0], [-1, -1, -1]], dtype=int)
kernely = np.array([[-1, 0, 1], [-1, 0, 1], [-1, 0, 1]], dtype=int)
x = cv2.filter2D(grayImage, cv2.CV_16S, kernelx)
y = cv2.filter2D(grayImage, cv2.CV_16S, kernely)
absX = cv2.convertScaleAbs(x)
absY = cv2.convertScaleAbs(y)
self.unit4_prewittimg = cv2.addWeighted(absX, 0.5, absY, 0.5, 0)
def Log(self):
img = self.unit4_img
if (self.unit4_img_channel == 3):
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 2边缘扩充处理图像并使用高斯滤波处理该图像
image = cv2.copyMakeBorder(img, 2, 2, 2, 2, borderType=cv2.BORDER_REPLICATE)
image = cv2.GaussianBlur(image, (3, 3), 0, 0)
# 使用Numpy定义LoG算子
m1 = np.array(
[[0, 0, -1, 0, 0], [0, -1, -2, -1, 0], [-1, -2, 16, -2, -1], [0, -1, -2, -1, 0], [0, 0, -1, 0, 0]])
# 卷积运算
rows = image.shape[0]
cols = image.shape[1]
image1 = np.zeros(image.shape)
for i in range(2, rows - 2):
for j in range(2, cols - 2):
image1[i, j] = np.sum((m1 * image[i - 2:i + 3, j - 2:j + 3]))
self.unit4_logimg = cv2.convertScaleAbs(image1)
def Sobel(self):
grayImage = self.unit4_img
if (self.unit4_img_channel == 3):
grayImage = cv2.cvtColor(grayImage, cv2.COLOR_BGR2GRAY)
# 求Sobel 算子
x = cv2.Sobel(grayImage, cv2.CV_16S, 1, 0)
y = cv2.Sobel(grayImage, cv2.CV_16S, 0, 1)
# 数据格式转换
absX = cv2.convertScaleAbs(x)
absY = cv2.convertScaleAbs(y)
self.unit4_sobelimg = cv2.addWeighted(absX, 0.5, absY, 0.5, 0)
def Laplacian(self):
grayImage = self.unit4_img
if (self.unit4_img_channel == 3):
grayImage = cv2.cvtColor(grayImage, cv2.COLOR_BGR2GRAY)
# 高斯滤波
grayImage = cv2.GaussianBlur(grayImage,ksize=(5,5),sigmaX=0,sigmaY=0)
# 拉普拉斯算法
dst = cv2.Laplacian(grayImage, cv2.CV_16S, ksize=3)
# 数据格式转换
self.unit4_laplacianimg = cv2.convertScaleAbs(dst)
def Lough(self):
img = self.unit4_img
if (self.unit4_img_channel == 3):
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img = cv2.GaussianBlur(img, (3, 3), 0)
edges = cv2.Canny(img, 50, 150, apertureSize=3)
lines = cv2.HoughLines(edges, 1, np.pi / 2, 118)
result = img.copy()
for i_line in lines:
for line in i_line:
rho = line[0]
theta = line[1]
if (theta < (np.pi / 4.)) or (theta > (3. * np.pi / 4.0)): # 垂直直线
pt1 = (int(rho / np.cos(theta)), 0)
pt2 = (int((rho - result.shape[0] * np.sin(theta)) / np.cos(theta)), result.shape[0])
cv2.line(result, pt1, pt2, (0, 0, 255))
else:
pt1 = (0, int(rho / np.sin(theta)))
pt2 = (result.shape[1], int((rho - result.shape[1] * np.cos(theta)) / np.sin(theta)))
cv2.line(result, pt1, pt2, (0, 0, 255), 1)
linesP = cv2.HoughLinesP(edges, 1, np.pi / 180, 80, 200, 15)
result_P = img.copy()
for i_P in linesP:
for x1, y1, x2, y2 in i_P:
cv2.line(result_P, (x1, y1), (x2, y2), (0, 255, 0), 3)
self.unit4_loughimg = result_P
def Canny(self):
image = self.unit4_img
if (self.unit4_img_channel == 3):
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 高斯滤波
image= cv2.GaussianBlur(image, (3, 3), 0)
# 求xy方向的Sobel算子
gradx = cv2.Sobel(image, cv2.CV_16SC1, 1, 0)
grady = cv2.Sobel(image, cv2.CV_16SC1, 0, 1)
print("Yes")
# 使用Canny函数处理图像x,y分别是3求出来的梯度低阈值50高阈值150
self.unit4_cannyimg = cv2.Canny(gradx, grady, 50, 150)
def canny_save(self):
if self.unit4_cannyimg.size > 1:
fileName, tmp = QFileDialog.getSaveFileName(self, '保存图像', 'Image', '*.png *.jpg *.bmp *.jpeg')
if fileName == '':
return
cv2.imwrite(fileName, self.unit4_cannyimg)
msg_box = QMessageBox(QMessageBox.Information, '成功', '图像保存成功,保存路径为:' + fileName)
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '没有生成图像')
msg_box.exec_()
def log_save(self):
if self.unit4_logimg.size > 1:
fileName, tmp = QFileDialog.getSaveFileName(self, '保存图像', 'Image', '*.png *.jpg *.bmp *.jpeg')
if fileName == '':
return
cv2.imwrite(fileName, self.unit4_logimg)
msg_box = QMessageBox(QMessageBox.Information, '成功', '图像保存成功,保存路径为:' + fileName)
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '没有生成图像')
msg_box.exec_()
def lough_save(self):
if self.unit4_loughimg.size > 1:
fileName, tmp = QFileDialog.getSaveFileName(self, '保存图像', 'Image', '*.png *.jpg *.bmp *.jpeg')
if fileName == '':
return
cv2.imwrite(fileName, self.unit4_loughimg)
msg_box = QMessageBox(QMessageBox.Information, '成功', '图像保存成功,保存路径为:' + fileName)
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '没有生成图像')
msg_box.exec_()
def laplacian_save(self):
if self.unit4_laplacianimg.size > 1:
fileName, tmp = QFileDialog.getSaveFileName(self, '保存图像', 'Image', '*.png *.jpg *.bmp *.jpeg')
if fileName == '':
return
cv2.imwrite(fileName, self.unit4_laplacianimg)
msg_box = QMessageBox(QMessageBox.Information, '成功', '图像保存成功,保存路径为:' + fileName)
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '没有生成图像')
msg_box.exec_()
def sobel_save(self):
if self.unit4_sobelimg.size > 1:
fileName, tmp = QFileDialog.getSaveFileName(self, '保存图像', 'Image', '*.png *.jpg *.bmp *.jpeg')
if fileName == '':
return
cv2.imwrite(fileName, self.unit4_laplacianimg)
msg_box = QMessageBox(QMessageBox.Information, '成功', '图像保存成功,保存路径为:' + fileName)
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '没有生成图像')
msg_box.exec_()
def prewitt_save(self):
if self.unit4_prewittimg.size > 1:
fileName, tmp = QFileDialog.getSaveFileName(self, '保存图像', 'Image', '*.png *.jpg *.bmp *.jpeg')
if fileName == '':
return
cv2.imwrite(fileName, self.unit4_prewittimg)
msg_box = QMessageBox(QMessageBox.Information, '成功', '图像保存成功,保存路径为:' + fileName)
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '没有生成图像')
msg_box.exec_()
def roberts_save(self):
if self.unit4_robertsimg.size > 1:
fileName, tmp = QFileDialog.getSaveFileName(self, '保存图像', 'Image', '*.png *.jpg *.bmp *.jpeg')
if fileName == '':
return
cv2.imwrite(fileName, self.unit4_robertsimg)
msg_box = QMessageBox(QMessageBox.Information, '成功', '图像保存成功,保存路径为:' + fileName)
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '没有生成图像')
msg_box.exec_()

@ -0,0 +1,239 @@
from PyQt5 import QtGui
from PyQt5.QtWidgets import *
import cv2
import numpy as np
import matplotlib
matplotlib.use('Qt5Agg')
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
from matplotlib.backends.backend_qt5 import NavigationToolbar2QT as NavigationToolbar
import matplotlib.pyplot as plt
from matplotlib.figure import Figure
from math import sqrt,pow
class pltFigure(FigureCanvas):
def __init__(self, parent=None, width=5, height=3, dpi=100):
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
self.fig = Figure(figsize=(width, height), dpi=dpi)
FigureCanvas.__init__(self, self.fig)
self.setParent(parent)
self.axes = self.fig.add_subplot(111)
FigureCanvas.setSizePolicy(self,
QSizePolicy.Expanding,
QSizePolicy.Expanding)
FigureCanvas.updateGeometry(self)
def init(self):
self.unit5_img = np.ndarray(())
self.unit5_imgOrg = np.ndarray(())
self.unit5_img_channel = 1
self.fig3 = pltFigure(width=5, height=3, dpi=80)
self.fig_ntb3 = NavigationToolbar(self.fig3, self)
self.gridlayout2 = QGridLayout(self.ui.label_48)
self.gridlayout2.addWidget(self.fig3)
self.gridlayout2.addWidget(self.fig_ntb3)
self.unit5_img_channel=1
def hist_refresh(self):
hist = np.bincount(self.unit5_img.ravel(), minlength=256)
self.fig3.axes.cla()
self.fig3.axes.plot(hist)
self.fig3.draw()
def img_refresh(self):
imgShow = self.unit5_img
if self.unit5_img.size<=1:
self.ui.label_46.setPixmap(QtGui.QPixmap(''))
return
h = 471
w = 481
M = np.float32([[1, 0, 0], [0, 1, 0]])
h2, w2 = imgShow.shape
print(h2)
print(w2)
if h2 / w2 == h / w:
data = imgShow.tobytes()
image = QtGui.QImage(data, w2, h2, w2 , QtGui.QImage.Format_Grayscale8)
pix = QtGui.QPixmap.fromImage(image)
scale_pix = pix.scaled(w, h)
self.ui.label_46.setPixmap(scale_pix)
print("branch1")
return
elif h2 / w2 > h / w:
print("branch2")
h_ = h2
w_ = int(h2 * w / h + 0.5)
M[0, 2] += (w_ - w2) / 2
M[1, 2] += (h_ - h2) / 2
print(M)
else:
print("branch3")
h_ = int(w2 * h /w + 0.5)
w_ = w2
M[0, 2] += (w_ - w2) / 2
M[1, 2] += (h_ - h2) / 2
imgShow = cv2.warpAffine(imgShow, M, (w_, h_))
data = imgShow.tobytes()
image = QtGui.QImage(data, w_, h_, w_ , QtGui.QImage.Format_Grayscale8)
pix = QtGui.QPixmap.fromImage(image)
scale_pix = pix.scaled(w, h)
self.ui.label_46.setPixmap(scale_pix)
def img_save(self):
if self.unit5_img.size > 1:
fileName, tmp = QFileDialog.getSaveFileName(self, '保存图像', 'Image', '*.png *.jpg *.bmp *.jpeg')
if fileName == '':
return
cv2.imwrite(fileName, self.unit5_img)
msg_box = QMessageBox(QMessageBox.Information, '成功', '图像保存成功,保存路径为:' + fileName)
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '请选择图像')
msg_box.exec_()
def img_clear(self):
if self.unit5_img.size > 1:
init(self)
img_refresh(self)
else:
msg_box = QMessageBox(QMessageBox.Warning, '无需清空', '没有图片')
msg_box.exec_()
def img_load(self):
fileName, tmp = QFileDialog.getOpenFileName(self, '打开图像', 'Image', '*.png *.jpg *.bmp *.jpeg')
if fileName == '':
return
init(self)
self.unit5_img = cv2.imread(fileName, cv2.IMREAD_GRAYSCALE)
self.unit5_imgOrg = self.unit5_img.copy()
if self.unit5_img.size>= 1:
print(self.unit5_img.shape)
img_refresh(self)
hist_refresh(self)
def img_show(self):
if self.unit5_img.size > 1:
cv2.imshow('Original pic', self.unit5_img)
cv2.waitKey(0)
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '请选择图像')
msg_box.exec_()
def Laplacian(self):
if self.unit5_img.size>1:
imarr = self.unit5_img
height, width = imarr.shape
fft = np.fft.fft2(imarr)
fft = np.fft.fftshift(fft)
for i in range(height):
for j in range(width):
fft[i, j] *= -((i - (height - 1) / 2) ** 2 + (j - (width - 1) / 2) ** 2)
fft = np.fft.ifftshift(fft)
ifft = np.fft.ifft2(fft)
ifft = np.real(ifft)
max = np.max(ifft)
min = np.min(ifft)
res = np.zeros((height, width), dtype="uint8")
for i in range(height):
for j in range(width):
res[i, j] = 255 * (ifft[i, j] - min) / (max - min)
self.unit5_img = res
img_refresh(self)
hist_refresh(self)
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '请选择图像 ')
msg_box.exec_()
def Idea(self):
if self.unit5_img.size>1:
new_img = self.unit5_img
# pencv中的傅立叶变化
dft = cv2.dft(np.float32(new_img), flags=cv2.DFT_COMPLEX_OUTPUT)
dtf_shift = np.fft.fftshift(dft)
# np.fft.fftshift()函数来实现平移,让直流分量在输出图像的重心
rows, cols = new_img.shape
crow, ccol = int(rows / 2), int(cols / 2) # 计算频谱中心
mask = np.zeros((rows, cols, 2), np.uint8) # 生成rows行cols列的2纬矩阵数据格式为uint8
mask[crow - 30:crow + 30, ccol - 30:ccol + 30] = 1 # 将靠近频谱中心的部分低通信息 设置为1属于低通滤波
fshift = dtf_shift * mask
# 傅立叶逆变换
f_ishift = np.fft.ifftshift(fshift)
img_back = cv2.idft(f_ishift)
img_back = cv2.magnitude(img_back[:, :, 0], img_back[:, :, 1]) # 计算像素梯度的绝对值
img_back = np.abs(img_back)
img_back = (img_back - np.amin(img_back)) / (np.amax(img_back) - np.amin(img_back))
self.unit5_img = (img_back * 255).astype(np.uint8)
img_refresh(self)
hist_refresh(self)
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '请选择图像 ')
msg_box.exec_()
def Gaussion(self):
if self.unit5_img.size > 1:
sigma =1
imarr = self.unit5_img
height, width = imarr.shape
fft = np.fft.fft2(imarr)
fft = np.fft.fftshift(fft)
for i in range(height):
for j in range(width):
fft[i, j] *= (1 - np.exp(-((i - (height - 1) / 2) ** 2 + (j - (width - 1) / 2) ** 2) / 2 / sigma ** 2))
fft = np.fft.ifftshift(fft)
ifft = np.fft.ifft2(fft)
ifft = np.real(ifft)
max = np.max(ifft)
min = np.min(ifft)
res = np.zeros((height, width), dtype="uint8")
for i in range(height):
for j in range(width):
res[i, j] = 255 * (ifft[i, j] - min) / (max - min)
self.unit5_img = res
img_refresh(self)
hist_refresh(self)
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '请选择图像 ')
msg_box.exec_()
#def Butterworth(self):
#if self.unit5_img.size>1:
#image = self.unit5_img
# d = 40
# f = np.fft.fft2(image)
#fshift = np.fft.fftshift(f)
# transfor_matrix = np.zeros(image.shape)
# M = transfor_matrix.shape[0]
# N = transfor_matrix.shape[1]
# for u in range(M):
# for v in range(N):
# D = sqrt((u - M / 2) ** 2 + (v - N / 2) ** 2)
# transfor_matrix[u, v] = 1 / (1 + pow(D / d, 16))
# print(transfor_matrix.dtype)
# new_img = np.abs(np.fft.ifft2(np.fft.ifftshift(fshift * transfor_matrix)))
# new_img = new_img .astype(np.float32)
# self.unit5_img = (new_img * 255).astype(np.uint8)
# self.unit5_img = new_img
# print(self.unit5_img.shape)
# img_refresh(self)
# #hist_refresh(self)
# else:
# msg_box = QMessageBox(QMessageBox.Warning, '没有图像', '请先选择一副图像 ')
# msg_box.exec_()
def img_reset(self):
if self.unit5_img.size>1:
self.unit5_img = self.unit5_imgOrg
img_refresh(self)
hist_refresh(self)
else:
msgbox = QMessageBox(QMessageBox.Warning, "提示", "请选择图像")
msgbox.exec_()

@ -0,0 +1,254 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from PIL import Image
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
import torchvision.models as models
from PyQt5 import QtGui
from PyQt5.QtWidgets import *
import cv2
import numpy as np
from PyQt5 import QtWidgets, QtCore
import sys
from PyQt5.QtCore import *
import time
import util
from util import *
from model import *
#images = np.ndarray(())
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#class Runthread(QtCore.QThread):
# 通过类成员对象定义信号对象
# signal = pyqtSignal(int)
# filename2 = ''
# def __init__(self):
# super(Runthread, self).__init__()
# def __del__(self):
# self.wait()
# def setParam(self, file1, file2):
# self.filename1 = file1
# self.filename2 = file2
def init(self):
self.unit6_img1 = np.ndarray(())
self.unit6_img2 = np.ndarray(())
self.unit6_result = np.ndarray(())
self.unit6_img1_channel = 3
self.unit6_img2_channel = 3
self.unit6_result_channel = 3
self.filepath1 =''
self.filepath2 =''
self.ui.progressBar.setValue(0)
self.ui.progressBar.setMaximum(100)
self.ui.textBrowser_8.setText('您的设备有cuda可以运行'if torch.cuda.is_available() else "您的设备没有cuda不建议运行")
def img_load1(self):
fileName, tmp = QFileDialog.getOpenFileName(self, '打开图像', 'Image', '*.png *.jpg *.bmp *.jpeg')
if fileName == '':
return
self.unit6_img1 = np.ndarray(())
self.filepath1 = fileName
self.unit6_img1 = cv2.imread(fileName, -1)
if len(self.unit6_img1.shape) == 3:
self.unit6_img1_channel = 3
if self.unit6_img1.shape[2] == 4:
self.unit6_img1 = cv2.cvtColor(self.unit6_img1, cv2.COLOR_BGRA2BGR)
else:
msg_box = QMessageBox(QMessageBox.Warning, "不是彩图", '请选择彩图进行风格迁移 ')
msg_box.exec_()
init(self)
return
print(self.unit6_img1.shape)
img_refresh(self)
def img_load2(self):
fileName, tmp = QFileDialog.getOpenFileName(self, '打开图像', 'Image', '*.png *.jpg *.bmp *.jpeg')
if fileName == '':
return
self.unit6_img2 = np.ndarray(())
self.filepath2 = fileName
self.unit6_img2 = cv2.imread(fileName, -1)
if len(self.unit6_img2.shape) != 3:
msg_box = QMessageBox(QMessageBox.Warning, "不是彩图", '请选择彩图进行风格迁移 ')
msg_box.exec_()
init(self)
return
else:
self.unit6_img2_channel = 3
if self.unit6_img2.shape[2] == 4:
self.unit6_img2 = cv2.cvtColor(self.unit6_img2, cv2.COLOR_BGRA2BGR)
print(self.unit6_img1.shape)
img_refresh(self)
def img_refresh(self):
array = \
[self.unit6_img1,
self.unit6_img2,
self.unit6_result]
array2 = [self.ui.label_44,
self.ui.label_45,
self.ui.label_47
]
channel = [self.unit6_img1_channel,
self.unit6_img2_channel,
self.unit6_result_channel]
height = 350
weight = 350
for index in range(len(array)):
M = np.float32([[1, 0, 0], [0, 1, 0]])
if array[index].size <= 1:
array2[index].setPixmap(QtGui.QPixmap(''))
continue
print(array[index].shape)
index_h = array[index].shape[0]
index_w = array[index].shape[1]
if index_h / index_w == height / weight:
img = array[index].tobytes()
if channel[index] == 1:
image = QtGui.QImage(img, index_w, index_h, index_w * channel[index], QtGui.QImage.Format_Grayscale8)
pix = QtGui.QPixmap.fromImage(image)
scale_pix = pix.scaled(weight, height)
array2[index].setPixmap(scale_pix)
continue
elif channel[index] == 3:
image = QtGui.QImage(img, index_w, index_h, index_w * channel[index], QtGui.QImage.Format_BGR888)
pix = QtGui.QPixmap.fromImage(image)
scale_pix = pix.scaled(weight, height)
array2[index].setPixmap(scale_pix)
continue
elif index_h / index_w > height / weight:
h_ = index_h
w_ = int(index_h * weight / height + 0.5)
M[0, 2] += (w_ - index_w) / 2
M[1, 2] += (h_ - index_h) / 2
else:
h_ = int(index_w * height / weight + 0.5)
w_ = index_w
M[0, 2] += (w_ - index_w) / 2
M[1, 2] += (h_ - index_h) / 2
img = cv2.warpAffine(array[index], M, (w_, h_))
data = img.tobytes()
if channel[index] == 1:
image = QtGui.QImage(data, w_, h_, w_ * channel[index], QtGui.QImage.Format_Grayscale8)
pix = QtGui.QPixmap.fromImage(image)
scale_pix = pix.scaled(weight, height)
array2[index].setPixmap(scale_pix)
continue
else:
image = QtGui.QImage(data, w_, h_, w_ * channel[index], QtGui.QImage.Format_BGR888)
pix = QtGui.QPixmap.fromImage(image)
scale_pix = pix.scaled(weight, height)
array2[index].setPixmap(scale_pix)
continue
return
def gram_matrix(y):
(b, ch, h, w) = y.size()
features = y.view(b, ch, w * h)
features_t = features.transpose(1, 2)
gram = features.bmm(features_t) / (ch * h * w)
return gram
def style_transfer(self):
if self.unit6_img1.size>1 or self.unit6_img2.size>1:
style_img = read_image(self.filepath2, target_width=512).to(device)
print(torch.cuda.is_available())
content_img = read_image(self.filepath1, target_width=512).to(device)
print(style_img.shape)
print(content_img.shape)
vgg16 = models.vgg16(pretrained=True)
vgg16 = VGG(vgg16.features[:23]).to(device).eval()
style_features = vgg16(style_img)
content_features = vgg16(content_img)
style_grams = [gram_matrix(x) for x in style_features]
input_img = content_img.clone()
optimizer = optim.LBFGS([input_img.requires_grad_()])
print("Yes2")
style_weight = 1e6
content_weight = 1
run = [0]
print("Yes3")
while run[0] <= 300:
QApplication.processEvents()
self.ui.progressBar.setValue(int(run[0]/3))
# if(run[0]%3==0):
# self.signal.emit(int(run[0]/3))
def f():
optimizer.zero_grad()
features = vgg16(input_img)
content_loss = F.mse_loss(features[2], content_features[2]) * content_weight
style_loss = 0
grams = [gram_matrix(x) for x in features]
for a, b in zip(grams, style_grams):
style_loss += F.mse_loss(a, b) * style_weight
loss = style_loss + content_loss
if run[0] % 50 == 0:
print('Step {}: Style Loss: {:4f} Content Loss: {:4f}'.format(
run[0], style_loss.item(), content_loss.item()))
run[0] += 1
loss.backward()
return loss
optimizer.step(f)
self.unit6_result = util.recover_image(input_img)
img_refresh(self)
print("Train over!")
# thread = Runthread()
# thread.setParam(self.filepath1, self.filepath2)
# print("Yes0")
# try:
# thread.signal.connect(self.progressBar_refresh)
# except:
# print("Yes1")
# thread.start()
# print("Yes2")
# thread.wait()
# print("Yes3")
else:
msg_box = QMessageBox(QMessageBox.Warning, '无需清空', '没有图片')
msg_box.exec_()
#def progressBar_refresh(self, msg):
# self.ui.progressBar.setValue(int(msg))
def img_save(self):
if self.unit6_result.size > 1:
fileName, tmp = QFileDialog.getSaveFileName(self, '保存图像', 'Image', '*.png *.jpg *.bmp *.jpeg')
if fileName == '':
return
cv2.imwrite(fileName, self.unit6_result)
msg_box = QMessageBox(QMessageBox.Information, '成功', '图像保存成功,保存路径为:' + fileName)
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '提示', '没有生成图像')
msg_box.exec_()
def img_clear(self):
if self.unit6_img1.size > 1 or self.unit6_img2.size > 1:
init(self)
img_refresh(self)
else:
msg_box = QMessageBox(QMessageBox.Warning, '无需清空', '没有图片')
msg_box.exec_()

@ -0,0 +1,191 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from PIL import Image
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
import torchvision.models as models
from PyQt5 import QtGui
from PyQt5.QtWidgets import *
import cv2
import numpy as np
from PyQt5 import QtWidgets, QtCore
import sys
from PyQt5.QtCore import *
import time
import utils
from utils import *
from models import *
import detect
import utils.general
from pathlib import Path
def init(self):
self.unit7_img = np.ndarray(())
self.unit7_img_channel = 1
self.unit7_result = np.ndarray(())
self.unit7_result_channel = 1
self.unit7_filepath = ''
self.unit7_imgpath = ''
self.unit7_savepath = ''
self.unit7_suffix = ''
self.ui.textBrowser_6.setText('')
self.ui.textBrowser_7.setText('')
def img_load(self):
fileName, tmp = QFileDialog.getOpenFileName(self, '打开图像', 'Image', '*.png *.jpg *.bmp *.jpeg')
if fileName == '':
return
self.unit7_img = np.ndarray(())
self.unit7_img_channel = 1
self.unit7_result = np.ndarray(())
self.unit7_result_channel = 1
self.unit7_img = cv2.imread(fileName, -1)
self.unit7_suffix = fileName.split('/')[-1]
print(self.unit7_suffix)
self.unit7_imgpath = fileName
if self.unit7_img.size <= 1:
return
if len(self.unit7_img.shape) == 3:
self.unit7_img_channel = 3
if self.unit7_img.shape[2] == 4:
self.unit7_img = cv2.cvtColor(self.unit7_img, cv2.COLOR_BGRA2BGR)
print(self.unit7_img.shape)
img_refresh(self)
def img_refresh(self):
array = \
[self.unit7_img,
self.unit7_result]
array2 = [self.ui.label_54,
self.ui.label_55]
channel = [self.unit7_img_channel,
self.unit7_result_channel]
height = 480
weight = 500
for index in range(len(array)):
M = np.float32([[1, 0, 0], [0, 1, 0]])
if array[index].size <= 1:
array2[index].setPixmap(QtGui.QPixmap(''))
continue
print(array[index].shape)
index_h = array[index].shape[0]
index_w = array[index].shape[1]
if index_h / index_w == height / weight:
img = array[index].tobytes()
if channel[index] == 1:
image = QtGui.QImage(img, index_w, index_h, index_w * channel[index], QtGui.QImage.Format_Grayscale8)
pix = QtGui.QPixmap.fromImage(image)
scale_pix = pix.scaled(weight, height)
array2[index].setPixmap(scale_pix)
continue
elif channel[index] == 3:
image = QtGui.QImage(img, index_w, index_h, index_w * channel[index], QtGui.QImage.Format_BGR888)
pix = QtGui.QPixmap.fromImage(image)
scale_pix = pix.scaled(weight, height)
array2[index].setPixmap(scale_pix)
continue
elif index_h / index_w > height / weight:
h_ = index_h
w_ = int(index_h * weight / height + 0.5)
M[0, 2] += (w_ - index_w) / 2
M[1, 2] += (h_ - index_h) / 2
else:
h_ = int(index_w * height / weight + 0.5)
w_ = index_w
M[0, 2] += (w_ - index_w) / 2
M[1, 2] += (h_ - index_h) / 2
img = cv2.warpAffine(array[index], M, (w_, h_))
data = img.tobytes()
if channel[index] == 1:
image = QtGui.QImage(data, w_, h_, w_ * channel[index], QtGui.QImage.Format_Grayscale8)
pix = QtGui.QPixmap.fromImage(image)
scale_pix = pix.scaled(weight, height)
array2[index].setPixmap(scale_pix)
continue
else:
image = QtGui.QImage(data, w_, h_, w_ * channel[index], QtGui.QImage.Format_BGR888)
pix = QtGui.QPixmap.fromImage(image)
scale_pix = pix.scaled(weight, height)
array2[index].setPixmap(scale_pix)
continue
return
def result_save(self):
fileName= QFileDialog.getExistingDirectory(self, '保存图像')
if fileName == '':
return
self.unit7_savepath = fileName
self.ui.textBrowser_7.setText(fileName.split('/')[-2]+'/'+fileName.split('/')[-1])
msg_box = QMessageBox(QMessageBox.Information, '成功', '选择路径成功,保存路径为:' + fileName)
msg_box.exec_()
def clear(self):
if self.unit7_img.size > 1:
init(self)
img_refresh(self)
else:
msg_box = QMessageBox(QMessageBox.Warning, '无需清空', '没有图片')
msg_box.exec_()
def result_show(self):
if self.unit7_result.size > 1:
cv2.imshow('Original pic', self.unit7_result)
cv2.waitKey(0)
else:
msg_box = QMessageBox(QMessageBox.Warning, '没有图像', '没有生成图像')
msg_box.exec_()
def object_detection(self):
if self.unit7_filepath !='' and self.unit7_img.size>1 and self.unit7_savepath!='':
modelpath = self.unit7_filepath
imgpath = self.unit7_imgpath
savepath = self.unit7_savepath
detect.main(imgpath, modelpath,savepath)
name ='exp'
z = utils.general.increment_path_num(Path(savepath) / name, exist_ok=False)
num = str(z) if z!=1 else ''
path = savepath +'/exp'+ num+'/'+self.unit7_suffix
print(path)
self.unit7_result = cv2.imread(path, -1)
if self.unit7_result.size >1:
if len(self.unit7_result.shape) == 3:
self.unit7_result_channel = 3
if self.unit7_result.shape[2] == 4:
self.unit7_result = cv2.cvtColor(self.unit7_result, cv2.COLOR_BGRA2BGR)
print(self.unit7_result.shape)
img_refresh(self)
else:
msg_box = QMessageBox(QMessageBox.Warning, 'error', 'error1')
msg_box.exec_()
else:
msg_box = QMessageBox(QMessageBox.Warning, '没有导入模型或图片', '请导入模型和图片后再进行尝试')
msg_box.exec_()
def model_load(self):
fileName, tmp = QFileDialog.getOpenFileName(self, '选择模型路径', 'Model', '*.pt')
if fileName == '':
return
self.unit7_filepath = fileName
self.ui.textBrowser_6.setText(fileName.split('/')[-2]+'/'+fileName.split('/')[-1])
print(self.unit7_filepath)
if self.unit7_filepath =='':
return
else:
msg_box = QMessageBox(QMessageBox.Information, '已检测到模型', '模型导入成功')
msg_box.exec_()

@ -0,0 +1,106 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
import cv2
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
cnn_normalization_mean = [0.485, 0.456, 0.406]
cnn_normalization_std = [0.229, 0.224, 0.225]
tensor_normalizer = transforms.Normalize(mean=cnn_normalization_mean, std=cnn_normalization_std)
epsilon = 1e-5
def preprocess_image(image, target_width=None):
"""输入 PIL.Image 对象,输出标准化后的四维 tensor"""
if target_width:
t = transforms.Compose([
transforms.Resize(target_width),
transforms.CenterCrop(target_width),
transforms.ToTensor(),
tensor_normalizer,
])
else:
t = transforms.Compose([
transforms.ToTensor(),
tensor_normalizer,
])
return t(image).unsqueeze(0)
def image_to_tensor(image, target_width=None):
"""输入 OpenCV 图像,范围 0~255BGR 顺序,输出标准化后的四维 tensor"""
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = Image.fromarray(image)
return preprocess_image(image, target_width)
def read_image(path, target_width=None):
"""输入图像路径,输出标准化后的四维 tensor"""
image = Image.open(path)
return preprocess_image(image, target_width)
def recover_image(tensor):
"""输入 GPU 上的四维 tensor输出 0~255 范围的三维 numpy 矩阵RGB 顺序"""
image = tensor.detach().cpu().numpy()
image = image * np.array(cnn_normalization_std).reshape((1, 3, 1, 1)) + \
np.array(cnn_normalization_mean).reshape((1, 3, 1, 1))
return (image.transpose(0, 2, 3, 1) * 255.).clip(0, 255).astype(np.uint8)[0]
def recover_tensor(tensor):
m = torch.tensor(cnn_normalization_mean).view(1, 3, 1, 1).to(tensor.device)
s = torch.tensor(cnn_normalization_std).view(1, 3, 1, 1).to(tensor.device)
tensor = tensor * s + m
return tensor.clamp(0, 1)
def imshow(tensor, title=None):
"""输入 GPU 上的四维 tensor然后绘制该图像"""
image = recover_image(tensor)
print(image.shape)
plt.imshow(image)
if title is not None:
plt.title(title)
def mean_std(features):
"""输入 VGG16 计算的四个特征输出每张特征图的均值和标准差长度为1920"""
mean_std_features = []
for x in features:
x = x.view(*x.shape[:2], -1)
x = torch.cat([x.mean(-1), torch.sqrt(x.var(-1) + epsilon)], dim=-1)
n = x.shape[0]
x2 = x.view(n, 2, -1).transpose(2, 1).contiguous().view(n, -1) # 【mean, ..., std, ...] to [mean, std, ...]
mean_std_features.append(x2)
mean_std_features = torch.cat(mean_std_features, dim=-1)
return mean_std_features
class Smooth:
# 对输入的数据进行滑动平均
def __init__(self, windowsize=100):
self.window_size = windowsize
self.data = np.zeros((self.window_size, 1), dtype=np.float32)
self.index = 0
def __iadd__(self, x):
if self.index == 0:
self.data[:] = x
self.data[self.index % self.window_size] = x
self.index += 1
return self
def __float__(self):
return float(self.data.mean())
def __format__(self, f):
return self.__float__().__format__(f)

@ -0,0 +1,36 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
utils/initialization
"""
def notebook_init(verbose=True):
# Check system software and hardware
print('Checking setup...')
import os
import shutil
from utils.general import check_requirements, emojis, is_colab
from utils.torch_utils import select_device # imports
check_requirements(('psutil', 'IPython'))
import psutil
from IPython import display # to display images and clear console output
if is_colab():
shutil.rmtree('/content/sample_data', ignore_errors=True) # remove colab /sample_data directory
# System info
if verbose:
gb = 1 << 30 # bytes to GiB (1024 ** 3)
ram = psutil.virtual_memory().total
total, used, free = shutil.disk_usage("/")
display.clear_output()
s = f'({os.cpu_count()} CPUs, {ram / gb:.1f} GB RAM, {(total - free) / gb:.1f}/{total / gb:.1f} GB disk)'
else:
s = ''
select_device(newline=False)
print(emojis(f'Setup complete ✅ {s}'))
return display

@ -0,0 +1,103 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
Activation functions
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
class SiLU(nn.Module):
# SiLU activation https://arxiv.org/pdf/1606.08415.pdf
@staticmethod
def forward(x):
return x * torch.sigmoid(x)
class Hardswish(nn.Module):
# Hard-SiLU activation
@staticmethod
def forward(x):
# return x * F.hardsigmoid(x) # for TorchScript and CoreML
return x * F.hardtanh(x + 3, 0.0, 6.0) / 6.0 # for TorchScript, CoreML and ONNX
class Mish(nn.Module):
# Mish activation https://github.com/digantamisra98/Mish
@staticmethod
def forward(x):
return x * F.softplus(x).tanh()
class MemoryEfficientMish(nn.Module):
# Mish activation memory-efficient
class F(torch.autograd.Function):
@staticmethod
def forward(ctx, x):
ctx.save_for_backward(x)
return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x)))
@staticmethod
def backward(ctx, grad_output):
x = ctx.saved_tensors[0]
sx = torch.sigmoid(x)
fx = F.softplus(x).tanh()
return grad_output * (fx + x * sx * (1 - fx * fx))
def forward(self, x):
return self.F.apply(x)
class FReLU(nn.Module):
# FReLU activation https://arxiv.org/abs/2007.11824
def __init__(self, c1, k=3): # ch_in, kernel
super().__init__()
self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False)
self.bn = nn.BatchNorm2d(c1)
def forward(self, x):
return torch.max(x, self.bn(self.conv(x)))
class AconC(nn.Module):
r""" ACON activation (activate or not)
AconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is a learnable parameter
according to "Activate or Not: Learning Customized Activation" <https://arxiv.org/pdf/2009.04759.pdf>.
"""
def __init__(self, c1):
super().__init__()
self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1))
self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1))
self.beta = nn.Parameter(torch.ones(1, c1, 1, 1))
def forward(self, x):
dpx = (self.p1 - self.p2) * x
return dpx * torch.sigmoid(self.beta * dpx) + self.p2 * x
class MetaAconC(nn.Module):
r""" ACON activation (activate or not)
MetaAconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is generated by a small network
according to "Activate or Not: Learning Customized Activation" <https://arxiv.org/pdf/2009.04759.pdf>.
"""
def __init__(self, c1, k=1, s=1, r=16): # ch_in, kernel, stride, r
super().__init__()
c2 = max(r, c1 // r)
self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1))
self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1))
self.fc1 = nn.Conv2d(c1, c2, k, s, bias=True)
self.fc2 = nn.Conv2d(c2, c1, k, s, bias=True)
# self.bn1 = nn.BatchNorm2d(c2)
# self.bn2 = nn.BatchNorm2d(c1)
def forward(self, x):
y = x.mean(dim=2, keepdims=True).mean(dim=3, keepdims=True)
# batch-size 1 bug/instabilities https://github.com/ultralytics/yolov5/issues/2891
# beta = torch.sigmoid(self.bn2(self.fc2(self.bn1(self.fc1(y))))) # bug/unstable
beta = torch.sigmoid(self.fc2(self.fc1(y))) # bug patch BN layers removed
dpx = (self.p1 - self.p2) * x
return dpx * torch.sigmoid(beta * dpx) + self.p2 * x

@ -0,0 +1,284 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
Image augmentation functions
"""
import math
import random
import cv2
import numpy as np
from utils.general import LOGGER, check_version, colorstr, resample_segments, segment2box
from utils.metrics import bbox_ioa
class Albumentations:
# YOLOv5 Albumentations class (optional, only used if package is installed)
def __init__(self):
self.transform = None
try:
import albumentations as A
check_version(A.__version__, '1.0.3', hard=True) # version requirement
T = [
A.Blur(p=0.01),
A.MedianBlur(p=0.01),
A.ToGray(p=0.01),
A.CLAHE(p=0.01),
A.RandomBrightnessContrast(p=0.0),
A.RandomGamma(p=0.0),
A.ImageCompression(quality_lower=75, p=0.0)] # transforms
self.transform = A.Compose(T, bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))
LOGGER.info(colorstr('albumentations: ') + ', '.join(f'{x}' for x in self.transform.transforms if x.p))
except ImportError: # package not installed, skip
pass
except Exception as e:
LOGGER.info(colorstr('albumentations: ') + f'{e}')
def __call__(self, im, labels, p=1.0):
if self.transform and random.random() < p:
new = self.transform(image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0]) # transformed
im, labels = new['image'], np.array([[c, *b] for c, b in zip(new['class_labels'], new['bboxes'])])
return im, labels
def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):
# HSV color-space augmentation
if hgain or sgain or vgain:
r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV))
dtype = im.dtype # uint8
x = np.arange(0, 256, dtype=r.dtype)
lut_hue = ((x * r[0]) % 180).astype(dtype)
lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=im) # no return needed
def hist_equalize(im, clahe=True, bgr=False):
# Equalize histogram on BGR image 'im' with im.shape(n,m,3) and range 0-255
yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV)
if clahe:
c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
yuv[:, :, 0] = c.apply(yuv[:, :, 0])
else:
yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0]) # equalize Y channel histogram
return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB) # convert YUV image to RGB
def replicate(im, labels):
# Replicate labels
h, w = im.shape[:2]
boxes = labels[:, 1:].astype(int)
x1, y1, x2, y2 = boxes.T
s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels)
for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices
x1b, y1b, x2b, y2b = boxes[i]
bh, bw = y2b - y1b, x2b - x1b
yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y
x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
im[y1a:y2a, x1a:x2a] = im[y1b:y2b, x1b:x2b] # im4[ymin:ymax, xmin:xmax]
labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
return im, labels
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
# Resize and pad image while meeting stride-multiple constraints
shape = im.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not scaleup: # only scale down, do not scale up (for better val mAP)
r = min(r, 1.0)
# Compute padding
ratio = r, r # width, height ratios
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
if auto: # minimum rectangle
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
elif scaleFill: # stretch
dw, dh = 0.0, 0.0
new_unpad = (new_shape[1], new_shape[0])
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
dw /= 2 # divide padding into 2 sides
dh /= 2
if shape[::-1] != new_unpad: # resize
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return im, ratio, (dw, dh)
def random_perspective(im,
targets=(),
segments=(),
degrees=10,
translate=.1,
scale=.1,
shear=10,
perspective=0.0,
border=(0, 0)):
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(0.1, 0.1), scale=(0.9, 1.1), shear=(-10, 10))
# targets = [cls, xyxy]
height = im.shape[0] + border[0] * 2 # shape(h,w,c)
width = im.shape[1] + border[1] * 2
# Center
C = np.eye(3)
C[0, 2] = -im.shape[1] / 2 # x translation (pixels)
C[1, 2] = -im.shape[0] / 2 # y translation (pixels)
# Perspective
P = np.eye(3)
P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y)
P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x)
# Rotation and Scale
R = np.eye(3)
a = random.uniform(-degrees, degrees)
# a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
s = random.uniform(1 - scale, 1 + scale)
# s = 2 ** random.uniform(-scale, scale)
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
# Shear
S = np.eye(3)
S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
# Translation
T = np.eye(3)
T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels)
T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels)
# Combined rotation matrix
M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
if perspective:
im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114))
else: # affine
im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
# Visualize
# import matplotlib.pyplot as plt
# ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
# ax[0].imshow(im[:, :, ::-1]) # base
# ax[1].imshow(im2[:, :, ::-1]) # warped
# Transform label coordinates
n = len(targets)
if n:
use_segments = any(x.any() for x in segments)
new = np.zeros((n, 4))
if use_segments: # warp segments
segments = resample_segments(segments) # upsample
for i, segment in enumerate(segments):
xy = np.ones((len(segment), 3))
xy[:, :2] = segment
xy = xy @ M.T # transform
xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2] # perspective rescale or affine
# clip
new[i] = segment2box(xy, width, height)
else: # warp boxes
xy = np.ones((n * 4, 3))
xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
xy = xy @ M.T # transform
xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine
# create new boxes
x = xy[:, [0, 2, 4, 6]]
y = xy[:, [1, 3, 5, 7]]
new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
# clip
new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
# filter candidates
i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10)
targets = targets[i]
targets[:, 1:5] = new[i]
return im, targets
def copy_paste(im, labels, segments, p=0.5):
# Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)
n = len(segments)
if p and n:
h, w, c = im.shape # height, width, channels
im_new = np.zeros(im.shape, np.uint8)
for j in random.sample(range(n), k=round(p * n)):
l, s = labels[j], segments[j]
box = w - l[3], l[2], w - l[1], l[4]
ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
if (ioa < 0.30).all(): # allow 30% obscuration of existing labels
labels = np.concatenate((labels, [[l[0], *box]]), 0)
segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1))
cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED)
result = cv2.bitwise_and(src1=im, src2=im_new)
result = cv2.flip(result, 1) # augment segments (flip left-right)
i = result > 0 # pixels to replace
# i[:, :] = result.max(2).reshape(h, w, 1) # act over ch
im[i] = result[i] # cv2.imwrite('debug.jpg', im) # debug
return im, labels, segments
def cutout(im, labels, p=0.5):
# Applies image cutout augmentation https://arxiv.org/abs/1708.04552
if random.random() < p:
h, w = im.shape[:2]
scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction
for s in scales:
mask_h = random.randint(1, int(h * s)) # create random masks
mask_w = random.randint(1, int(w * s))
# box
xmin = max(0, random.randint(0, w) - mask_w // 2)
ymin = max(0, random.randint(0, h) - mask_h // 2)
xmax = min(w, xmin + mask_w)
ymax = min(h, ymin + mask_h)
# apply random color mask
im[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
# return unobscured labels
if len(labels) and s > 0.03:
box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
labels = labels[ioa < 0.60] # remove >60% obscured labels
return labels
def mixup(im, labels, im2, labels2):
# Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf
r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
im = (im * r + im2 * (1 - r)).astype(np.uint8)
labels = np.concatenate((labels, labels2), 0)
return im, labels
def box_candidates(box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n)
# Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio
return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates

@ -0,0 +1,170 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
AutoAnchor utils
"""
import random
import numpy as np
import torch
import yaml
from tqdm.auto import tqdm
from utils.general import LOGGER, colorstr, emojis
PREFIX = colorstr('AutoAnchor: ')
def check_anchor_order(m):
# Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary
a = m.anchors.prod(-1).mean(-1).view(-1) # mean anchor area per output layer
da = a[-1] - a[0] # delta a
ds = m.stride[-1] - m.stride[0] # delta s
if da and (da.sign() != ds.sign()): # same order
LOGGER.info(f'{PREFIX}Reversing anchor order')
m.anchors[:] = m.anchors.flip(0)
def check_anchors(dataset, model, thr=4.0, imgsz=640):
# Check anchor fit to data, recompute if necessary
m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect()
shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True)
scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale
wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh
def metric(k): # compute metric
r = wh[:, None] / k[None]
x = torch.min(r, 1 / r).min(2)[0] # ratio metric
best = x.max(1)[0] # best_x
aat = (x > 1 / thr).float().sum(1).mean() # anchors above threshold
bpr = (best > 1 / thr).float().mean() # best possible recall
return bpr, aat
stride = m.stride.to(m.anchors.device).view(-1, 1, 1) # model strides
anchors = m.anchors.clone() * stride # current anchors
bpr, aat = metric(anchors.cpu().view(-1, 2))
s = f'\n{PREFIX}{aat:.2f} anchors/target, {bpr:.3f} Best Possible Recall (BPR). '
if bpr > 0.98: # threshold to recompute
LOGGER.info(emojis(f'{s}Current anchors are a good fit to dataset ✅'))
else:
LOGGER.info(emojis(f'{s}Anchors are a poor fit to dataset ⚠️, attempting to improve...'))
na = m.anchors.numel() // 2 # number of anchors
try:
anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False)
except Exception as e:
LOGGER.info(f'{PREFIX}ERROR: {e}')
new_bpr = metric(anchors)[0]
if new_bpr > bpr: # replace anchors
anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors)
m.anchors[:] = anchors.clone().view_as(m.anchors)
check_anchor_order(m) # must be in pixel-space (not grid-space)
m.anchors /= stride
s = f'{PREFIX}Done ✅ (optional: update model *.yaml to use these anchors in the future)'
else:
s = f'{PREFIX}Done ⚠️ (original anchors better than new anchors, proceeding with original anchors)'
LOGGER.info(emojis(s))
def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True):
""" Creates kmeans-evolved anchors from training dataset
Arguments:
dataset: path to data.yaml, or a loaded dataset
n: number of anchors
img_size: image size used for training
thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0
gen: generations to evolve anchors using genetic algorithm
verbose: print all results
Return:
k: kmeans evolved anchors
Usage:
from utils.autoanchor import *; _ = kmean_anchors()
"""
from scipy.cluster.vq import kmeans
npr = np.random
thr = 1 / thr
def metric(k, wh): # compute metrics
r = wh[:, None] / k[None]
x = torch.min(r, 1 / r).min(2)[0] # ratio metric
# x = wh_iou(wh, torch.tensor(k)) # iou metric
return x, x.max(1)[0] # x, best_x
def anchor_fitness(k): # mutation fitness
_, best = metric(torch.tensor(k, dtype=torch.float32), wh)
return (best * (best > thr).float()).mean() # fitness
def print_results(k, verbose=True):
k = k[np.argsort(k.prod(1))] # sort small to large
x, best = metric(k, wh0)
bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr
s = f'{PREFIX}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr\n' \
f'{PREFIX}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, ' \
f'past_thr={x[x > thr].mean():.3f}-mean: '
for i, x in enumerate(k):
s += '%i,%i, ' % (round(x[0]), round(x[1]))
if verbose:
LOGGER.info(s[:-2])
return k
if isinstance(dataset, str): # *.yaml file
with open(dataset, errors='ignore') as f:
data_dict = yaml.safe_load(f) # model dict
from utils.datasets import LoadImagesAndLabels
dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True)
# Get label wh
shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True)
wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh
# Filter
i = (wh0 < 3.0).any(1).sum()
if i:
LOGGER.info(f'{PREFIX}WARNING: Extremely small objects found: {i} of {len(wh0)} labels are < 3 pixels in size')
wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels
# wh = wh * (npr.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1
# Kmeans init
try:
LOGGER.info(f'{PREFIX}Running kmeans for {n} anchors on {len(wh)} points...')
assert n <= len(wh) # apply overdetermined constraint
s = wh.std(0) # sigmas for whitening
k = kmeans(wh / s, n, iter=30)[0] * s # points
assert n == len(k) # kmeans may return fewer points than requested if wh is insufficient or too similar
except Exception:
LOGGER.warning(f'{PREFIX}WARNING: switching strategies from kmeans to random init')
k = np.sort(npr.rand(n * 2)).reshape(n, 2) * img_size # random init
wh, wh0 = (torch.tensor(x, dtype=torch.float32) for x in (wh, wh0))
k = print_results(k, verbose=False)
# Plot
# k, d = [None] * 20, [None] * 20
# for i in tqdm(range(1, 21)):
# k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance
# fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True)
# ax = ax.ravel()
# ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.')
# fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh
# ax[0].hist(wh[wh[:, 0]<100, 0],400)
# ax[1].hist(wh[wh[:, 1]<100, 1],400)
# fig.savefig('wh.png', dpi=200)
# Evolve
f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma
pbar = tqdm(range(gen), bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar
for _ in pbar:
v = np.ones(sh)
while (v == 1).all(): # mutate until a change occurs (prevent duplicates)
v = ((npr.random(sh) < mp) * random.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
kg = (k.copy() * v).clip(min=2.0)
fg = anchor_fitness(kg)
if fg > f:
f, k = fg, kg.copy()
pbar.desc = f'{PREFIX}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}'
if verbose:
print_results(k, verbose)
return print_results(k)

@ -0,0 +1,58 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
Auto-batch utils
"""
from copy import deepcopy
import numpy as np
import torch
from torch.cuda import amp
from utils.general import LOGGER, colorstr
from utils.torch_utils import profile
def check_train_batch_size(model, imgsz=640):
# Check YOLOv5 training batch size
with amp.autocast():
return autobatch(deepcopy(model).train(), imgsz) # compute optimal batch size
def autobatch(model, imgsz=640, fraction=0.9, batch_size=16):
# Automatically estimate best batch size to use `fraction` of available CUDA memory
# Usage:
# import torch
# from utils.autobatch import autobatch
# model = torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)
# print(autobatch(model))
prefix = colorstr('AutoBatch: ')
LOGGER.info(f'{prefix}Computing optimal batch size for --imgsz {imgsz}')
device = next(model.parameters()).device # get model device
if device.type == 'cpu':
LOGGER.info(f'{prefix}CUDA not detected, using default CPU batch-size {batch_size}')
return batch_size
gb = 1 << 30 # bytes to GiB (1024 ** 3)
d = str(device).upper() # 'CUDA:0'
properties = torch.cuda.get_device_properties(device) # device properties
t = properties.total_memory / gb # (GiB)
r = torch.cuda.memory_reserved(device) / gb # (GiB)
a = torch.cuda.memory_allocated(device) / gb # (GiB)
f = t - (r + a) # free inside reserved
LOGGER.info(f'{prefix}{d} ({properties.name}) {t:.2f}G total, {r:.2f}G reserved, {a:.2f}G allocated, {f:.2f}G free')
batch_sizes = [1, 2, 4, 8, 16]
try:
img = [torch.zeros(b, 3, imgsz, imgsz) for b in batch_sizes]
y = profile(img, model, n=3, device=device)
except Exception as e:
LOGGER.warning(f'{prefix}{e}')
y = [x[2] for x in y if x] # memory [2]
batch_sizes = batch_sizes[:len(y)]
p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit
b = int((f * fraction - p[1]) / p[0]) # y intercept (optimal batch size)
LOGGER.info(f'{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%)')
return b

@ -0,0 +1,26 @@
# AWS EC2 instance startup 'MIME' script https://aws.amazon.com/premiumsupport/knowledge-center/execute-user-data-ec2/
# This script will run on every instance restart, not only on first start
# --- DO NOT COPY ABOVE COMMENTS WHEN PASTING INTO USERDATA ---
Content-Type: multipart/mixed; boundary="//"
MIME-Version: 1.0
--//
Content-Type: text/cloud-config; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment; filename="cloud-config.txt"
#cloud-config
cloud_final_modules:
- [scripts-user, always]
--//
Content-Type: text/x-shellscript; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment; filename="userdata.txt"
#!/bin/bash
# --- paste contents of userdata.sh here ---
--//

@ -0,0 +1,40 @@
# Resume all interrupted trainings in yolov5/ dir including DDP trainings
# Usage: $ python utils/aws/resume.py
import os
import sys
from pathlib import Path
import torch
import yaml
FILE = Path(__file__).resolve()
ROOT = FILE.parents[2] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
port = 0 # --master_port
path = Path('').resolve()
for last in path.rglob('*/**/last.pt'):
ckpt = torch.load(last)
if ckpt['optimizer'] is None:
continue
# Load opt.yaml
with open(last.parent.parent / 'opt.yaml', errors='ignore') as f:
opt = yaml.safe_load(f)
# Get device count
d = opt['device'].split(',') # devices
nd = len(d) # number of devices
ddp = nd > 1 or (nd == 0 and torch.cuda.device_count() > 1) # distributed data parallel
if ddp: # multi-GPU
port += 1
cmd = f'python -m torch.distributed.run --nproc_per_node {nd} --master_port {port} train.py --resume {last}'
else: # single-GPU
cmd = f'python train.py --resume {last}'
cmd += ' > /dev/null 2>&1 &' # redirect output to dev/null and run in daemon thread
print(cmd)
os.system(cmd)

@ -0,0 +1,27 @@
#!/bin/bash
# AWS EC2 instance startup script https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html
# This script will run only once on first instance start (for a re-start script see mime.sh)
# /home/ubuntu (ubuntu) or /home/ec2-user (amazon-linux) is working dir
# Use >300 GB SSD
cd home/ubuntu
if [ ! -d yolov5 ]; then
echo "Running first-time script." # install dependencies, download COCO, pull Docker
git clone https://github.com/ultralytics/yolov5 -b master && sudo chmod -R 777 yolov5
cd yolov5
bash data/scripts/get_coco.sh && echo "COCO done." &
sudo docker pull ultralytics/yolov5:latest && echo "Docker done." &
python -m pip install --upgrade pip && pip install -r requirements.txt && python detect.py && echo "Requirements done." &
wait && echo "All tasks done." # finish background tasks
else
echo "Running re-start script." # resume interrupted runs
i=0
list=$(sudo docker ps -qa) # container list i.e. $'one\ntwo\nthree\nfour'
while IFS= read -r id; do
((i++))
echo "restarting container $i: $id"
sudo docker start $id
# sudo docker exec -it $id python train.py --resume # single-GPU
sudo docker exec -d $id python utils/aws/resume.py # multi-scenario
done <<<"$list"
fi

@ -0,0 +1,144 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
Run YOLOv5 benchmarks on all supported export formats
Format | `export.py --include` | Model
--- | --- | ---
PyTorch | - | yolov5s.pt
TorchScript | `torchscript` | yolov5s.torchscript
ONNX | `onnx` | yolov5s.onnx
OpenVINO | `openvino` | yolov5s_openvino_model/
TensorRT | `engine` | yolov5s.engine
CoreML | `coreml` | yolov5s.mlmodel
TensorFlow SavedModel | `saved_model` | yolov5s_saved_model/
TensorFlow GraphDef | `pb` | yolov5s.pb
TensorFlow Lite | `tflite` | yolov5s.tflite
TensorFlow Edge TPU | `edgetpu` | yolov5s_edgetpu.tflite
TensorFlow.js | `tfjs` | yolov5s_web_model/
Requirements:
$ pip install -r requirements.txt coremltools onnx onnx-simplifier onnxruntime openvino-dev tensorflow-cpu # CPU
$ pip install -r requirements.txt coremltools onnx onnx-simplifier onnxruntime-gpu openvino-dev tensorflow # GPU
$ pip install -U nvidia-tensorrt --index-url https://pypi.ngc.nvidia.com # TensorRT
Usage:
$ python utils/benchmarks.py --weights yolov5s.pt --img 640
"""
import argparse
import sys
import time
from pathlib import Path
import pandas as pd
FILE = Path(__file__).resolve()
ROOT = FILE.parents[1] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
# ROOT = ROOT.relative_to(Path.cwd()) # relative
import export
import val
from utils import notebook_init
from utils.general import LOGGER, print_args
from utils.torch_utils import select_device
def run(
weights=ROOT / 'yolov5s.pt', # weights path
imgsz=640, # inference size (pixels)
batch_size=1, # batch size
data=ROOT / 'data/coco128.yaml', # dataset.yaml path
device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
half=False, # use FP16 half-precision inference
test=False, # test exports only
):
y, t = [], time.time()
formats = export.export_formats()
device = select_device(device)
for i, (name, f, suffix, gpu) in formats.iterrows(): # index, (name, file, suffix, gpu-capable)
try:
assert i != 9, 'Edge TPU not supported'
assert i != 10, 'TF.js not supported'
if device.type != 'cpu':
assert gpu, f'{name} inference not supported on GPU'
# Export
if f == '-':
w = weights # PyTorch format
else:
w = export.run(weights=weights, imgsz=[imgsz], include=[f], device=device, half=half)[-1] # all others
assert suffix in str(w), 'export failed'
# Validate
result = val.run(data, w, batch_size, imgsz, plots=False, device=device, task='benchmark', half=half)
metrics = result[0] # metrics (mp, mr, map50, map, *losses(box, obj, cls))
speeds = result[2] # times (preprocess, inference, postprocess)
y.append([name, round(metrics[3], 4), round(speeds[1], 2)]) # mAP, t_inference
except Exception as e:
LOGGER.warning(f'WARNING: Benchmark failure for {name}: {e}')
y.append([name, None, None]) # mAP, t_inference
# Print results
LOGGER.info('\n')
parse_opt()
notebook_init() # print system info
py = pd.DataFrame(y, columns=['Format', 'mAP@0.5:0.95', 'Inference time (ms)'] if map else ['Format', 'Export', ''])
LOGGER.info(f'\nBenchmarks complete ({time.time() - t:.2f}s)')
LOGGER.info(str(py if map else py.iloc[:, :2]))
return py
def test(
weights=ROOT / 'yolov5s.pt', # weights path
imgsz=640, # inference size (pixels)
batch_size=1, # batch size
data=ROOT / 'data/coco128.yaml', # dataset.yaml path
device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
half=False, # use FP16 half-precision inference
test=False, # test exports only
):
y, t = [], time.time()
formats = export.export_formats()
device = select_device(device)
for i, (name, f, suffix, gpu) in formats.iterrows(): # index, (name, file, suffix, gpu-capable)
try:
w = weights if f == '-' else \
export.run(weights=weights, imgsz=[imgsz], include=[f], device=device, half=half)[-1] # weights
assert suffix in str(w), 'export failed'
y.append([name, True])
except Exception:
y.append([name, False]) # mAP, t_inference
# Print results
LOGGER.info('\n')
parse_opt()
notebook_init() # print system info
py = pd.DataFrame(y, columns=['Format', 'Export'])
LOGGER.info(f'\nExports complete ({time.time() - t:.2f}s)')
LOGGER.info(str(py))
return py
def parse_opt():
parser = argparse.ArgumentParser()
parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='weights path')
parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)')
parser.add_argument('--batch-size', type=int, default=1, help='batch size')
parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
parser.add_argument('--test', action='store_true', help='test exports only')
opt = parser.parse_args()
print_args(vars(opt))
return opt
def main(opt):
test(**vars(opt)) if opt.test else run(**vars(opt))
if __name__ == "__main__":
opt = parse_opt()
main(opt)

@ -0,0 +1,71 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
Callback utils
"""
class Callbacks:
""""
Handles all registered callbacks for YOLOv5 Hooks
"""
def __init__(self):
# Define the available callbacks
self._callbacks = {
'on_pretrain_routine_start': [],
'on_pretrain_routine_end': [],
'on_train_start': [],
'on_train_epoch_start': [],
'on_train_batch_start': [],
'optimizer_step': [],
'on_before_zero_grad': [],
'on_train_batch_end': [],
'on_train_epoch_end': [],
'on_val_start': [],
'on_val_batch_start': [],
'on_val_image_end': [],
'on_val_batch_end': [],
'on_val_end': [],
'on_fit_epoch_end': [], # fit = train + val
'on_model_save': [],
'on_train_end': [],
'on_params_update': [],
'teardown': [],}
self.stop_training = False # set True to interrupt training
def register_action(self, hook, name='', callback=None):
"""
Register a new action to a callback hook
Args:
hook: The callback hook name to register the action to
name: The name of the action for later reference
callback: The callback to fire
"""
assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}"
assert callable(callback), f"callback '{callback}' is not callable"
self._callbacks[hook].append({'name': name, 'callback': callback})
def get_registered_actions(self, hook=None):
""""
Returns all the registered actions by callback hook
Args:
hook: The name of the hook to check, defaults to all
"""
return self._callbacks[hook] if hook else self._callbacks
def run(self, hook, *args, **kwargs):
"""
Loop through the registered actions and fire all callbacks
Args:
hook: The name of the hook to check, defaults to all
args: Arguments to receive from YOLOv5
kwargs: Keyword Arguments to receive from YOLOv5
"""
assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}"
for logger in self._callbacks[hook]:
logger['callback'](*args, **kwargs)

File diff suppressed because it is too large Load Diff

@ -0,0 +1,222 @@
# Repo-specific DockerIgnore -------------------------------------------------------------------------------------------
#.git
.cache
.idea
runs
output
coco
storage.googleapis.com
data/samples/*
**/results*.csv
*.jpg
# Neural Network weights -----------------------------------------------------------------------------------------------
**/*.pt
**/*.pth
**/*.onnx
**/*.engine
**/*.mlmodel
**/*.torchscript
**/*.torchscript.pt
**/*.tflite
**/*.h5
**/*.pb
*_saved_model/
*_web_model/
*_openvino_model/
# Below Copied From .gitignore -----------------------------------------------------------------------------------------
# Below Copied From .gitignore -----------------------------------------------------------------------------------------
# GitHub Python GitIgnore ----------------------------------------------------------------------------------------------
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
wandb/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# dotenv
.env
# virtualenv
.venv*
venv*/
ENV*/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
# https://github.com/github/gitignore/blob/master/Global/macOS.gitignore -----------------------------------------------
# General
.DS_Store
.AppleDouble
.LSOverride
# Icon must end with two \r
Icon
Icon?
# Thumbnails
._*
# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
# https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
# User-specific stuff:
.idea/*
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/dictionaries
.html # Bokeh Plots
.pg # TensorFlow Frozen Graphs
.avi # videos
# Sensitive or high-churn files:
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
# Gradle:
.idea/**/gradle.xml
.idea/**/libraries
# CMake
cmake-build-debug/
cmake-build-release/
# Mongo Explorer plugin:
.idea/**/mongoSettings.xml
## File-based project format:
*.iws
## Plugin-specific files:
# IntelliJ
out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Cursive Clojure plugin
.idea/replstate.xml
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties

@ -0,0 +1,65 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Start FROM Nvidia PyTorch image https://ngc.nvidia.com/catalog/containers/nvidia:pytorch
FROM nvcr.io/nvidia/pytorch:21.10-py3
# Install linux packages
RUN apt update && apt install -y zip htop screen libgl1-mesa-glx
# Install python dependencies
COPY requirements.txt .
RUN python -m pip install --upgrade pip
RUN pip uninstall -y torch torchvision torchtext
RUN pip install --no-cache -r requirements.txt albumentations wandb gsutil notebook \
torch==1.11.0+cu113 torchvision==0.12.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
# RUN pip install --no-cache -U torch torchvision
# Create working directory
RUN mkdir -p /usr/src/app
WORKDIR /usr/src/app
# Copy contents
COPY . /usr/src/app
RUN git clone https://github.com/ultralytics/yolov5 /usr/src/yolov5
# Downloads to user config dir
ADD https://ultralytics.com/assets/Arial.ttf https://ultralytics.com/assets/Arial.Unicode.ttf /root/.config/Ultralytics/
# Set environment variables
ENV OMP_NUM_THREADS=8
# Usage Examples -------------------------------------------------------------------------------------------------------
# Build and Push
# t=ultralytics/yolov5:latest && sudo docker build -f utils/docker/Dockerfile -t $t . && sudo docker push $t
# Pull and Run
# t=ultralytics/yolov5:latest && sudo docker pull $t && sudo docker run -it --ipc=host --gpus all $t
# Pull and Run with local directory access
# t=ultralytics/yolov5:latest && sudo docker pull $t && sudo docker run -it --ipc=host --gpus all -v "$(pwd)"/datasets:/usr/src/datasets $t
# Kill all
# sudo docker kill $(sudo docker ps -q)
# Kill all image-based
# sudo docker kill $(sudo docker ps -qa --filter ancestor=ultralytics/yolov5:latest)
# Bash into running container
# sudo docker exec -it 5a9b5863d93d bash
# Bash into stopped container
# id=$(sudo docker ps -qa) && sudo docker start $id && sudo docker exec -it $id bash
# Clean up
# docker system prune -a --volumes
# Update Ubuntu drivers
# https://www.maketecheasier.com/install-nvidia-drivers-ubuntu/
# DDP test
# python -m torch.distributed.run --nproc_per_node 2 --master_port 1 train.py --epochs 3
# GCP VM from Image
# docker.io/ultralytics/yolov5:latest

@ -0,0 +1,37 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Start FROM Ubuntu image https://hub.docker.com/_/ubuntu
FROM ubuntu:latest
# Install linux packages
RUN apt update
RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt install -y tzdata
RUN apt install -y python3-pip git zip curl htop screen libgl1-mesa-glx libglib2.0-0
RUN alias python=python3
# Install python dependencies
COPY requirements.txt .
RUN python3 -m pip install --upgrade pip
RUN pip install --no-cache -r requirements.txt albumentations gsutil notebook \
coremltools onnx onnx-simplifier onnxruntime openvino-dev tensorflow-cpu tensorflowjs \
torch==1.11.0+cpu torchvision==0.12.0+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html
# Create working directory
RUN mkdir -p /usr/src/app
WORKDIR /usr/src/app
# Copy contents
COPY . /usr/src/app
RUN git clone https://github.com/ultralytics/yolov5 /usr/src/yolov5
# Downloads to user config dir
ADD https://ultralytics.com/assets/Arial.ttf https://ultralytics.com/assets/Arial.Unicode.ttf /root/.config/Ultralytics/
# Usage Examples -------------------------------------------------------------------------------------------------------
# Build and Push
# t=ultralytics/yolov5:latest-cpu && sudo docker build -f utils/docker/Dockerfile-cpu -t $t . && sudo docker push $t
# Pull and Run
# t=ultralytics/yolov5:latest-cpu && sudo docker pull $t && sudo docker run -it --ipc=host -v "$(pwd)"/datasets:/usr/src/datasets $t

@ -0,0 +1,157 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
Download utils
"""
import os
import platform
import subprocess
import time
import urllib
from pathlib import Path
from zipfile import ZipFile
import requests
import torch
def gsutil_getsize(url=''):
# gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du
s = subprocess.check_output(f'gsutil du {url}', shell=True).decode('utf-8')
return eval(s.split(' ')[0]) if len(s) else 0 # bytes
def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''):
# Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes
file = Path(file)
assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}"
try: # url1
print(f'Downloading {url} to {file}...')
torch.hub.download_url_to_file(url, str(file))
assert file.exists() and file.stat().st_size > min_bytes, assert_msg # check
except Exception as e: # url2
file.unlink(missing_ok=True) # remove partial downloads
print(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...')
os.system(f"curl -L '{url2 or url}' -o '{file}' --retry 3 -C -") # curl download, retry and resume on fail
finally:
if not file.exists() or file.stat().st_size < min_bytes: # check
file.unlink(missing_ok=True) # remove partial downloads
print(f"ERROR: {assert_msg}\n{error_msg}")
print('')
def attempt_download(file, repo='ultralytics/yolov5'): # from utils.downloads import *; attempt_download()
# Attempt file download if does not exist
file = Path(str(file).strip().replace("'", ''))
if not file.exists():
# URL specified
name = Path(urllib.parse.unquote(str(file))).name # decode '%2F' to '/' etc.
if str(file).startswith(('http:/', 'https:/')): # download
url = str(file).replace(':/', '://') # Pathlib turns :// -> :/
file = name.split('?')[0] # parse authentication https://url.com/file.txt?auth...
if Path(file).is_file():
print(f'Found {url} locally at {file}') # file already exists
else:
safe_download(file=file, url=url, min_bytes=1E5)
return file
# GitHub assets
file.parent.mkdir(parents=True, exist_ok=True) # make parent dir (if required)
try:
response = requests.get(f'https://api.github.com/repos/{repo}/releases/latest').json() # github api
assets = [x['name'] for x in response['assets']] # release assets, i.e. ['yolov5s.pt', 'yolov5m.pt', ...]
tag = response['tag_name'] # i.e. 'v1.0'
except Exception: # fallback plan
assets = [
'yolov5n.pt', 'yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov5n6.pt', 'yolov5s6.pt',
'yolov5m6.pt', 'yolov5l6.pt', 'yolov5x6.pt']
try:
tag = subprocess.check_output('git tag', shell=True, stderr=subprocess.STDOUT).decode().split()[-1]
except Exception:
tag = 'v6.1' # current release
if name in assets:
url3 = 'https://drive.google.com/drive/folders/1EFQTEUeXWSFww0luse2jB9M1QNZQGwNl' # backup gdrive mirror
safe_download(
file,
url=f'https://github.com/{repo}/releases/download/{tag}/{name}',
url2=f'https://storage.googleapis.com/{repo}/{tag}/{name}', # backup url (optional)
min_bytes=1E5,
error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag} or {url3}')
return str(file)
def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'):
# Downloads a file from Google Drive. from yolov5.utils.downloads import *; gdrive_download()
t = time.time()
file = Path(file)
cookie = Path('cookie') # gdrive cookie
print(f'Downloading https://drive.google.com/uc?export=download&id={id} as {file}... ', end='')
file.unlink(missing_ok=True) # remove existing file
cookie.unlink(missing_ok=True) # remove existing cookie
# Attempt file download
out = "NUL" if platform.system() == "Windows" else "/dev/null"
os.system(f'curl -c ./cookie -s -L "drive.google.com/uc?export=download&id={id}" > {out}')
if os.path.exists('cookie'): # large file
s = f'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm={get_token()}&id={id}" -o {file}'
else: # small file
s = f'curl -s -L -o {file} "drive.google.com/uc?export=download&id={id}"'
r = os.system(s) # execute, capture return
cookie.unlink(missing_ok=True) # remove existing cookie
# Error check
if r != 0:
file.unlink(missing_ok=True) # remove partial
print('Download error ') # raise Exception('Download error')
return r
# Unzip if archive
if file.suffix == '.zip':
print('unzipping... ', end='')
ZipFile(file).extractall(path=file.parent) # unzip
file.unlink() # remove zip
print(f'Done ({time.time() - t:.1f}s)')
return r
def get_token(cookie="./cookie"):
with open(cookie) as f:
for line in f:
if "download" in line:
return line.split()[-1]
return ""
# Google utils: https://cloud.google.com/storage/docs/reference/libraries ----------------------------------------------
#
#
# def upload_blob(bucket_name, source_file_name, destination_blob_name):
# # Uploads a file to a bucket
# # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
#
# storage_client = storage.Client()
# bucket = storage_client.get_bucket(bucket_name)
# blob = bucket.blob(destination_blob_name)
#
# blob.upload_from_filename(source_file_name)
#
# print('File {} uploaded to {}.'.format(
# source_file_name,
# destination_blob_name))
#
#
# def download_blob(bucket_name, source_blob_name, destination_file_name):
# # Uploads a blob from a bucket
# storage_client = storage.Client()
# bucket = storage_client.get_bucket(bucket_name)
# blob = bucket.blob(source_blob_name)
#
# blob.download_to_filename(destination_file_name)
#
# print('Blob {} downloaded to {}.'.format(
# source_blob_name,
# destination_file_name))

@ -0,0 +1,73 @@
# Flask REST API
[REST](https://en.wikipedia.org/wiki/Representational_state_transfer) [API](https://en.wikipedia.org/wiki/API)s are
commonly used to expose Machine Learning (ML) models to other services. This folder contains an example REST API
created using Flask to expose the YOLOv5s model from [PyTorch Hub](https://pytorch.org/hub/ultralytics_yolov5/).
## Requirements
[Flask](https://palletsprojects.com/p/flask/) is required. Install with:
```shell
$ pip install Flask
```
## Run
After Flask installation run:
```shell
$ python3 restapi.py --port 5000
```
Then use [curl](https://curl.se/) to perform a request:
```shell
$ curl -X POST -F image=@zidane.jpg 'http://localhost:5000/v1/object-detection/yolov5s'
```
The model inference results are returned as a JSON response:
```json
[
{
"class": 0,
"confidence": 0.8900438547,
"height": 0.9318675399,
"name": "person",
"width": 0.3264600933,
"xcenter": 0.7438579798,
"ycenter": 0.5207948685
},
{
"class": 0,
"confidence": 0.8440024257,
"height": 0.7155083418,
"name": "person",
"width": 0.6546785235,
"xcenter": 0.427829951,
"ycenter": 0.6334488392
},
{
"class": 27,
"confidence": 0.3771208823,
"height": 0.3902671337,
"name": "tie",
"width": 0.0696444362,
"xcenter": 0.3675483763,
"ycenter": 0.7991207838
},
{
"class": 27,
"confidence": 0.3527112305,
"height": 0.1540903747,
"name": "tie",
"width": 0.0336618312,
"xcenter": 0.7814827561,
"ycenter": 0.5065554976
}
]
```
An example python script to perform inference using [requests](https://docs.python-requests.org/en/master/) is given
in `example_request.py`

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save