Merge branch 'develop' of https://bdgit.educoder.net/pbyhqr72x/exercise_2 into develop
@ -0,0 +1,104 @@
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# celery beat schedule file
|
||||
celerybeat-schedule
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2018 DAMIÀ FUENTES ESCOTÉ
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
@ -0,0 +1,89 @@
|
||||
# DJITelloPy
|
||||
## [中文文档 (Chinese version of this readme)](README_CN.md)
|
||||
|
||||
DJI Tello drone python interface using the official [Tello SDK](https://dl-cdn.ryzerobotics.com/downloads/tello/20180910/Tello%20SDK%20Documentation%20EN_1.3.pdf) and [Tello EDU SDK](https://dl-cdn.ryzerobotics.com/downloads/Tello/Tello%20SDK%202.0%20User%20Guide.pdf). This library has the following features:
|
||||
|
||||
- implementation of all tello commands
|
||||
- easily retrieve a video stream
|
||||
- receive and parse state packets
|
||||
- control a swarm of drones
|
||||
- support for python >= 3.6
|
||||
|
||||
Feel free to contribute!
|
||||
|
||||
## Install using pip
|
||||
```
|
||||
pip install djitellopy
|
||||
```
|
||||
|
||||
For Linux distributions with both python2 and python3 (e.g. Debian, Ubuntu, ...) you need to run
|
||||
```
|
||||
pip3 install djitellopy
|
||||
```
|
||||
|
||||
## Install in developer mode
|
||||
Using the commands below you can install the repository in an _editable_ way. This allows you to modify the library and use the modified version as if you had installed it regularly.
|
||||
|
||||
```
|
||||
git clone https://github.com/damiafuentes/DJITelloPy.git
|
||||
cd DJITelloPy
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
## Usage
|
||||
### API Reference
|
||||
See [djitellopy.readthedocs.io](https://djitellopy.readthedocs.io/en/latest/) for a full reference of all classes and methods available.
|
||||
|
||||
### Simple example
|
||||
```python
|
||||
from djitellopy import Tello
|
||||
|
||||
tello = Tello()
|
||||
|
||||
tello.connect()
|
||||
tello.takeoff()
|
||||
|
||||
tello.move_left(100)
|
||||
tello.rotate_counter_clockwise(90)
|
||||
tello.move_forward(100)
|
||||
|
||||
tello.land()
|
||||
```
|
||||
|
||||
### More examples
|
||||
In the [examples](examples/) directory there are some code examples.
|
||||
Comments in the examples are mostly in both english and chinese.
|
||||
|
||||
- [taking a picture](examples/take-picture.py)
|
||||
- [recording a video](examples/record-video.py)
|
||||
- [flying a swarm (multiple Tellos at once)](examples/simple-swarm.py)
|
||||
- [simple controlling using your keyboard](examples/manual-control-opencv.py)
|
||||
- [mission pad detection](examples/mission-pads.py)
|
||||
- [fully featured manual control using pygame](examples/manual-control-pygame.py)
|
||||
|
||||
### Notes
|
||||
- If you are using the `streamon` command and the response is `Unknown command` means you have to update the Tello firmware. That can be done through the Tello app.
|
||||
- Mission pad detection and navigation is only supported by the Tello EDU.
|
||||
- Bright environment is necessary for successful use of mission pads.
|
||||
- Connecting to an existing wifi network is only supported by the Tello EDU.
|
||||
- When connected to an existing wifi network video streaming is not available (TODO: needs confirmation with the new SDK3 `port` commands)
|
||||
|
||||
## DJITelloPy in the media and in the wild
|
||||
- \>1.5 Million views Youtube: [Drone Programming With Python Course](https://youtu.be/LmEcyQnfpDA?t=1282)
|
||||
- German magazine "Make": ["KI steuert Follow-Me-Drohne" (paywall)](https://www.heise.de/select/make/2021/6/2116016361503211330), [authors notes](https://www.jentsch.io/ki-artikel-im-aktuellen-make-magazin-6-21/), [github repo](https://github.com/msoftware/tello-tracking)
|
||||
- Webinar on learn.droneblocks.io: ["DJITelloPy Drone Coding"](https://learn.droneblocks.io/p/djitellopy), [github repo](https://learn.droneblocks.io/p/djitellopy)
|
||||
- Universities & Schools using DJITelloPy in projects or in class:
|
||||
- [Ball State University in Muncie, Indiana](https://www.bsu.edu/)
|
||||
- [Technical University Kaiserslautern](https://www.uni-kl.de/)
|
||||
- [Sha Tin College, Hong Kong](https://shatincollege.edu.hk/)
|
||||
- [add yours...](https://github.com/damiafuentes/DJITelloPy/edit/master/README.md)
|
||||
|
||||
## Authors
|
||||
|
||||
* **Damià Fuentes Escoté**
|
||||
* **Jakob Löw**
|
||||
* [and more](https://github.com/damiafuentes/DJITelloPy/graphs/contributors)
|
||||
|
||||
## License
|
||||
|
||||
This project is licensed under the MIT License - see the [LICENSE.txt](LICENSE.txt) file for details
|
@ -0,0 +1,2 @@
|
||||
from .tello import Tello, TelloException, BackgroundFrameRead
|
||||
from .swarm import TelloSwarm
|
@ -0,0 +1,65 @@
|
||||
"""
|
||||
This file is based on a StackOverflow post by @301_Moved_Permanently.
|
||||
See https://stackoverflow.com/a/50622643
|
||||
|
||||
The code was adapted to be able to wrap all methods of a class by simply
|
||||
adding the decorator to the class itself.
|
||||
"""
|
||||
|
||||
import inspect
|
||||
import typing
|
||||
from contextlib import suppress
|
||||
from functools import wraps
|
||||
|
||||
|
||||
def _is_unparameterized_special_typing(type_hint):
|
||||
# Check for typing.Any, typing.Union, typing.ClassVar (without parameters)
|
||||
if hasattr(typing, "_SpecialForm"):
|
||||
return isinstance(type_hint, typing._SpecialForm)
|
||||
elif hasattr(type_hint, "__origin__"):
|
||||
return type_hint.__origin__ is None
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def enforce_types(target):
|
||||
"""Class decorator adding type checks to all member functions
|
||||
"""
|
||||
def check_types(spec, *args, **kwargs):
|
||||
parameters = dict(zip(spec.args, args))
|
||||
parameters.update(kwargs)
|
||||
for name, value in parameters.items():
|
||||
with suppress(KeyError): # Assume un-annotated parameters can be any type
|
||||
type_hint = spec.annotations[name]
|
||||
if _is_unparameterized_special_typing(type_hint):
|
||||
continue
|
||||
|
||||
if hasattr(type_hint, "__origin__") and type_hint.__origin__ is not None:
|
||||
actual_type = type_hint.__origin__
|
||||
elif hasattr(type_hint, "__args__") and type_hint.__args__ is not None:
|
||||
actual_type = type_hint.__args__
|
||||
else:
|
||||
actual_type = type_hint
|
||||
|
||||
if not isinstance(value, actual_type):
|
||||
raise TypeError("Unexpected type for '{}' (expected {} but found {})"
|
||||
.format(name, type_hint, type(value)))
|
||||
|
||||
def decorate(func):
|
||||
spec = inspect.getfullargspec(func)
|
||||
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
check_types(spec, *args, **kwargs)
|
||||
return func(*args, **kwargs)
|
||||
|
||||
return wrapper
|
||||
|
||||
if inspect.isclass(target):
|
||||
members = inspect.getmembers(target, predicate=inspect.isfunction)
|
||||
for name, func in members:
|
||||
setattr(target, name, decorate(func))
|
||||
|
||||
return target
|
||||
else:
|
||||
return decorate(target)
|
@ -0,0 +1,159 @@
|
||||
"""Library for controlling multiple DJI Ryze Tello drones.
|
||||
"""
|
||||
|
||||
from threading import Thread, Barrier
|
||||
from queue import Queue
|
||||
from typing import List, Callable
|
||||
|
||||
from .tello import Tello, TelloException
|
||||
from .enforce_types import enforce_types
|
||||
|
||||
|
||||
@enforce_types
|
||||
class TelloSwarm:
|
||||
"""Swarm library for controlling multiple Tellos simultaneously
|
||||
"""
|
||||
|
||||
tellos: List[Tello]
|
||||
barrier: Barrier
|
||||
funcBarier: Barrier
|
||||
funcQueues: List[Queue]
|
||||
threads: List[Thread]
|
||||
|
||||
@staticmethod
|
||||
def fromFile(path: str):
|
||||
"""Create TelloSwarm from file. The file should contain one IP address per line.
|
||||
|
||||
Arguments:
|
||||
path: path to the file
|
||||
"""
|
||||
with open(path, 'r') as fd:
|
||||
ips = fd.readlines()
|
||||
|
||||
return TelloSwarm.fromIps(ips)
|
||||
|
||||
@staticmethod
|
||||
def fromIps(ips: list):
|
||||
"""Create TelloSwarm from a list of IP addresses.
|
||||
|
||||
Arguments:
|
||||
ips: list of IP Addresses
|
||||
"""
|
||||
if not ips:
|
||||
raise TelloException("No ips provided")
|
||||
|
||||
tellos = []
|
||||
for ip in ips:
|
||||
tellos.append(Tello(ip.strip()))
|
||||
|
||||
return TelloSwarm(tellos)
|
||||
|
||||
def __init__(self, tellos: List[Tello]):
|
||||
"""Initialize a TelloSwarm instance
|
||||
|
||||
Arguments:
|
||||
tellos: list of [Tello][tello] instances
|
||||
"""
|
||||
self.tellos = tellos
|
||||
self.barrier = Barrier(len(tellos))
|
||||
self.funcBarrier = Barrier(len(tellos) + 1)
|
||||
self.funcQueues = [Queue() for tello in tellos]
|
||||
|
||||
def worker(i):
|
||||
queue = self.funcQueues[i]
|
||||
tello = self.tellos[i]
|
||||
|
||||
while True:
|
||||
func = queue.get()
|
||||
self.funcBarrier.wait()
|
||||
func(i, tello)
|
||||
self.funcBarrier.wait()
|
||||
|
||||
self.threads = []
|
||||
for i, _ in enumerate(tellos):
|
||||
thread = Thread(target=worker, daemon=True, args=(i,))
|
||||
thread.start()
|
||||
self.threads.append(thread)
|
||||
|
||||
def sequential(self, func: Callable[[int, Tello], None]):
|
||||
"""Call `func` for each tello sequentially. The function retrieves
|
||||
two arguments: The index `i` of the current drone and `tello` the
|
||||
current [Tello][tello] instance.
|
||||
|
||||
```python
|
||||
swarm.parallel(lambda i, tello: tello.land())
|
||||
```
|
||||
"""
|
||||
|
||||
for i, tello in enumerate(self.tellos):
|
||||
func(i, tello)
|
||||
|
||||
def parallel(self, func: Callable[[int, Tello], None]):
|
||||
"""Call `func` for each tello in parallel. The function retrieves
|
||||
two arguments: The index `i` of the current drone and `tello` the
|
||||
current [Tello][tello] instance.
|
||||
|
||||
You can use `swarm.sync()` for syncing between threads.
|
||||
|
||||
```python
|
||||
swarm.parallel(lambda i, tello: tello.move_up(50 + i * 10))
|
||||
```
|
||||
"""
|
||||
|
||||
for queue in self.funcQueues:
|
||||
queue.put(func)
|
||||
|
||||
self.funcBarrier.wait()
|
||||
self.funcBarrier.wait()
|
||||
|
||||
def sync(self, timeout: float = None):
|
||||
"""Sync parallel tello threads. The code continues when all threads
|
||||
have called `swarm.sync`.
|
||||
|
||||
```python
|
||||
def doStuff(i, tello):
|
||||
tello.move_up(50 + i * 10)
|
||||
swarm.sync()
|
||||
|
||||
if i == 2:
|
||||
tello.flip_back()
|
||||
# make all other drones wait for one to complete its flip
|
||||
swarm.sync()
|
||||
|
||||
swarm.parallel(doStuff)
|
||||
```
|
||||
"""
|
||||
return self.barrier.wait(timeout)
|
||||
|
||||
def __getattr__(self, attr):
|
||||
"""Call a standard tello function in parallel on all tellos.
|
||||
|
||||
```python
|
||||
swarm.command()
|
||||
swarm.takeoff()
|
||||
swarm.move_up(50)
|
||||
```
|
||||
"""
|
||||
def callAll(*args, **kwargs):
|
||||
self.parallel(lambda i, tello: getattr(tello, attr)(*args, **kwargs))
|
||||
|
||||
return callAll
|
||||
|
||||
def __iter__(self):
|
||||
"""Iterate over all drones in the swarm.
|
||||
|
||||
```python
|
||||
for tello in swarm:
|
||||
print(tello.get_battery())
|
||||
```
|
||||
"""
|
||||
return iter(self.tellos)
|
||||
|
||||
def __len__(self):
|
||||
"""Return the amount of tellos in the swarm
|
||||
|
||||
```python
|
||||
print("Tello count: {}".format(len(swarm)))
|
||||
```
|
||||
"""
|
||||
return len(self.tellos)
|
@ -0,0 +1,5 @@
|
||||
mkdocs>=1.1.2
|
||||
mkdocs-material>=5.2.2
|
||||
mkdocstrings>=0.11.2
|
||||
numpy==1.15.4
|
||||
opencv-python==3.4.3.18
|
@ -0,0 +1,28 @@
|
||||
# DJITelloPy
|
||||
|
||||
This documentation is the API reference of the DJITelloPy Library.
|
||||
|
||||
For more information on the project please see the [readme on github](https://github.com/damiafuentes/DJITelloPy/blob/master/README.md).
|
||||
|
||||
## API
|
||||
|
||||
Currently the library contains the following classes:
|
||||
|
||||
- [Tello][tello] for controlling a single tello drone.
|
||||
- [Swarm][swarm] for controlling multiple Tello EDUs in parallel.
|
||||
|
||||
## Example Code
|
||||
|
||||
Please see the [example directory](https://github.com/damiafuentes/DJITelloPy/tree/master/examples) on github.
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
pip install djitellopy
|
||||
```
|
||||
|
||||
For Linux distributions with both python2 and python3 (e.g. Debian, Ubuntu, ...) you need to run
|
||||
|
||||
```bash
|
||||
pip3 install djitellopy
|
||||
```
|
@ -0,0 +1,5 @@
|
||||
# Swarm
|
||||
|
||||
::: djitellopy.TelloSwarm
|
||||
:docstring:
|
||||
:members:
|
@ -0,0 +1,5 @@
|
||||
# Tello
|
||||
|
||||
::: djitellopy.Tello
|
||||
:docstring:
|
||||
:members:
|
@ -0,0 +1,34 @@
|
||||
from djitellopy import Tello
|
||||
|
||||
# create and connect
|
||||
# 创建Tello对象并连接
|
||||
tello = Tello()
|
||||
tello.connect()
|
||||
|
||||
# configure drone
|
||||
# 设置无人机
|
||||
tello.enable_mission_pads()
|
||||
tello.set_mission_pad_detection_direction(1) # forward detection only 只识别前方
|
||||
|
||||
tello.takeoff()
|
||||
|
||||
pad = tello.get_mission_pad_id()
|
||||
|
||||
# detect and react to pads until we see pad #1
|
||||
# 发现并识别挑战卡直到看见1号挑战卡
|
||||
while pad != 1:
|
||||
if pad == 3:
|
||||
tello.move_back(30)
|
||||
tello.rotate_clockwise(90)
|
||||
|
||||
if pad == 4:
|
||||
tello.move_up(30)
|
||||
tello.flip_forward()
|
||||
|
||||
pad = tello.get_mission_pad_id()
|
||||
|
||||
# graceful termination
|
||||
# 安全结束程序
|
||||
tello.disable_mission_pads()
|
||||
tello.land()
|
||||
tello.end()
|
@ -0,0 +1,16 @@
|
||||
#Simply import of "panoramaModule.py" and you can use each function by calling it with name of the drone inside arguments.
|
||||
from djitellopy import Tello
|
||||
import cv2
|
||||
import time
|
||||
import panoramaModule
|
||||
|
||||
|
||||
tello = Tello()
|
||||
tello.connect()
|
||||
|
||||
print(tello.get_battery())
|
||||
|
||||
tello.takeoff()
|
||||
tello.move_up(500)
|
||||
panoramaModule.panorama_half_clockwise(tello)
|
||||
tello.land()
|
@ -0,0 +1,88 @@
|
||||
#Module with individual panorama types defined. You can just import it and use hovever you like
|
||||
#
|
||||
#It will save photos from Tello inside folder that's in. You can change this by changing path inside every function.
|
||||
from djitellopy import Tello
|
||||
import cv2
|
||||
import time
|
||||
|
||||
global img
|
||||
|
||||
|
||||
def panorama_full_clockwise(tello_name):
|
||||
tello = tello_name
|
||||
tello.streamoff()
|
||||
tello.streamon()
|
||||
|
||||
for i in range(4):
|
||||
img = tello.get_frame_read().frame
|
||||
cv2.imwrite(f'Panorama-full-clockwise_{time.time()}.jpg', img)
|
||||
time.sleep(1)
|
||||
tello.rotate_clockwise(80)
|
||||
|
||||
img = tello.get_frame_read().frame
|
||||
cv2.imwrite(f'Panorama-full-clockwise_{time.time()}.jpg', img)
|
||||
time.sleep(1)
|
||||
tello.rotate_clockwise(40)
|
||||
|
||||
tello.streamoff()
|
||||
|
||||
|
||||
def panorama_half_clockwise(tello_name):
|
||||
tello = tello_name
|
||||
tello.streamoff()
|
||||
tello.streamon()
|
||||
|
||||
tello.rotate_counter_clockwise(90)
|
||||
|
||||
for i in range(3):
|
||||
img = tello.get_frame_read().frame
|
||||
cv2.imwrite(f'Panorama-half-clockwise_{time.time()}.jpg', img)
|
||||
time.sleep(1)
|
||||
tello.rotate_clockwise(60)
|
||||
|
||||
img = tello.get_frame_read().frame
|
||||
cv2.imwrite(f'Panorama-half-clockwise_{time.time()}.jpg', img)
|
||||
time.sleep(1)
|
||||
tello.rotate_counter_clockwise(90)
|
||||
|
||||
tello.streamoff()
|
||||
|
||||
|
||||
def panorama_full_counter_clockwise(tello_name):
|
||||
tello = tello_name
|
||||
tello.streamoff()
|
||||
tello.streamon()
|
||||
|
||||
for i in range(4):
|
||||
img = tello.get_frame_read().frame
|
||||
cv2.imwrite(f'Panorama-full-counter-clockwise_{time.time()}.jpg', img)
|
||||
time.sleep(1)
|
||||
tello.rotate_counter_clockwise(80)
|
||||
|
||||
img = tello.get_frame_read().frame
|
||||
cv2.imwrite(f'/Panorama-full-counter-clockwise_{time.time()}.jpg', img)
|
||||
time.sleep(1)
|
||||
tello.rotate_counter_clockwise(40)
|
||||
|
||||
tello.streamoff()
|
||||
|
||||
|
||||
def panorama_half_counter_clockwise(tello_name):
|
||||
tello = tello_name
|
||||
tello.streamoff()
|
||||
tello.streamon()
|
||||
|
||||
tello.rotate_clockwise(90)
|
||||
|
||||
for i in range(3):
|
||||
img = tello.get_frame_read().frame
|
||||
cv2.imwrite(f'Panorama-half-counter-clockwise_{time.time()}.jpg', img)
|
||||
time.sleep(1)
|
||||
tello.rotate_counter_clockwise(60)
|
||||
|
||||
img = tello.get_frame_read().frame
|
||||
cv2.imwrite(f'Panorama_half_counter_clockwise-{time.time()}.jpg', img)
|
||||
time.sleep(1)
|
||||
tello.rotate_clockwise(90)
|
||||
|
||||
tello.streamoff()
|
@ -0,0 +1,25 @@
|
||||
from djitellopy import TelloSwarm
|
||||
|
||||
swarm = TelloSwarm.fromIps([
|
||||
"192.168.178.42",
|
||||
"192.168.178.43",
|
||||
"192.168.178.44"
|
||||
])
|
||||
|
||||
swarm.connect()
|
||||
swarm.takeoff()
|
||||
|
||||
# run in parallel on all tellos
|
||||
# 同时在所有Tello上执行
|
||||
swarm.move_up(100)
|
||||
|
||||
# run by one tello after the other
|
||||
# 让Tello一个接一个执行
|
||||
swarm.sequential(lambda i, tello: tello.move_forward(i * 20 + 20))
|
||||
|
||||
# making each tello do something unique in parallel
|
||||
# 让每一架Tello单独执行不同的操作
|
||||
swarm.parallel(lambda i, tello: tello.move_left(i * 100 + 20))
|
||||
|
||||
swarm.land()
|
||||
swarm.end()
|
@ -0,0 +1,12 @@
|
||||
from djitellopy import Tello
|
||||
|
||||
tello = Tello()
|
||||
|
||||
tello.connect()
|
||||
tello.takeoff()
|
||||
|
||||
tello.move_left(100)
|
||||
tello.rotate_clockwise(90)
|
||||
tello.move_forward(100)
|
||||
|
||||
tello.land()
|
@ -0,0 +1,13 @@
|
||||
import cv2
|
||||
from djitellopy import Tello
|
||||
|
||||
tello = Tello()
|
||||
tello.connect()
|
||||
|
||||
tello.streamon()
|
||||
frame_read = tello.get_frame_read()
|
||||
|
||||
tello.takeoff()
|
||||
cv2.imwrite("picture.png", frame_read.frame)
|
||||
|
||||
tello.land()
|
@ -0,0 +1,22 @@
|
||||
site_name: DJITelloPy API Reference
|
||||
site_url: "https://djitellopy.readthedocs.io/en/latest/"
|
||||
repo_url: "https://github.com/damiafuentes/DJITelloPy"
|
||||
repo_name: "damiafuentes/DJITelloPy"
|
||||
|
||||
theme:
|
||||
name: "material"
|
||||
|
||||
markdown_extensions:
|
||||
- admonition
|
||||
- codehilite
|
||||
|
||||
plugins:
|
||||
- search
|
||||
- mkdocstrings:
|
||||
default_handler: python
|
||||
handlers:
|
||||
python:
|
||||
rendering:
|
||||
show_source: true
|
||||
watch:
|
||||
- djitellopy/
|
@ -0,0 +1,3 @@
|
||||
numpy==1.20.1
|
||||
av==8.0.3
|
||||
pillow==8.4.0
|
@ -0,0 +1,3 @@
|
||||
# Inside of setup.cfg
|
||||
[metadata]
|
||||
description-file = README.md
|
@ -0,0 +1,38 @@
|
||||
import setuptools
|
||||
|
||||
with open("README.md", "r", encoding="utf-8") as fd:
|
||||
long_description = fd.read()
|
||||
|
||||
# replace relative urls to example files with absolute urls to the main git repo
|
||||
repo_code_url = "https://github.com/damiafuentes/DJITelloPy/tree/master"
|
||||
long_description = long_description.replace("](examples/", "]({}/examples/".format(repo_code_url))
|
||||
|
||||
setuptools.setup(
|
||||
name='djitellopy',
|
||||
packages=['djitellopy'],
|
||||
version='2.4.0',
|
||||
license='MIT',
|
||||
description='Tello drone library including support for video streaming, swarms, state packets and more',
|
||||
long_description=long_description,
|
||||
long_description_content_type='text/markdown',
|
||||
author='Jakob Löw',
|
||||
author_email='djitellopy@m4gnus.de',
|
||||
url='https://github.com/damiafuentes/DJITelloPy',
|
||||
download_url='https://github.com/damiafuentes/DJITelloPy/archive/2.4.0.tar.gz',
|
||||
keywords=['tello', 'dji', 'drone', 'sdk', 'official sdk'],
|
||||
install_requires=[
|
||||
'numpy',
|
||||
'opencv-python',
|
||||
],
|
||||
python_requires='>=3.6',
|
||||
classifiers=[
|
||||
'Development Status :: 5 - Production/Stable',
|
||||
'Intended Audience :: Developers',
|
||||
'Topic :: Software Development :: Build Tools',
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Programming Language :: Python :: 3.6',
|
||||
'Programming Language :: Python :: 3.7',
|
||||
'Programming Language :: Python :: 3.8',
|
||||
'Programming Language :: Python :: 3.9',
|
||||
],
|
||||
)
|
@ -0,0 +1,8 @@
|
||||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# Datasource local storage ignored files
|
||||
/dataSources/
|
||||
/dataSources.local.xml
|
||||
# Editor-based HTTP Client requests
|
||||
/httpRequests/
|
@ -0,0 +1,7 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="PROJECT_PROFILE" value="Default" />
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
@ -0,0 +1,4 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (myTelloProject-master)" project-jdk-type="Python SDK" />
|
||||
</project>
|
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/myTelloProject.iml" filepath="$PROJECT_DIR$/.idea/myTelloProject.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="jdk" jdkName="Python 3.10 (myTelloProject-master)" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="PySciProjectComponent">
|
||||
<option name="PY_SCI_VIEW_SUGGESTED" value="true" />
|
||||
</component>
|
||||
</project>
|
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
@ -0,0 +1,29 @@
|
||||
human_detection/output
|
||||
examples/results
|
||||
examples/res
|
||||
PoseFlow/__pycache__
|
||||
PoseFlow/*.npy
|
||||
PoseFlow/alpha-pose-results-test.json
|
||||
PoseFlow/alpha-pose-results-val.json
|
||||
PoseFlow/test-predict
|
||||
PoseFlow/val-predict
|
||||
train_sppe/coco-minival500_images.txt
|
||||
train_sppe/person_keypoints_val2014.json
|
||||
|
||||
ssd/examples
|
||||
images
|
||||
|
||||
*.npy
|
||||
*.so
|
||||
*.pyc
|
||||
.ipynb_checkpoints
|
||||
*/.ipynb_checkpoints/
|
||||
*/.tensorboard/*
|
||||
*/exp
|
||||
|
||||
*.pth
|
||||
*.h5
|
||||
*.zip
|
||||
*.weights
|
||||
|
||||
coco-minival/
|
@ -0,0 +1,515 @@
|
||||
ALPHAPOSE: MULTIPERSON KEYPOINT DETECTION
|
||||
SOFTWARE LICENSE AGREEMENT
|
||||
ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
|
||||
|
||||
BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT. IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
|
||||
|
||||
This is a license agreement ("Agreement") between your academic institution or non-profit organization or self (called "Licensee" or "You" in this Agreement) and Shanghai Jiao Tong University (called "Licensor" in this Agreement). All rights not specifically granted to you in this Agreement are reserved for Licensor.
|
||||
|
||||
RESERVATION OF OWNERSHIP AND GRANT OF LICENSE:
|
||||
Licensor retains exclusive ownership of any copy of the Software (as defined below) licensed under this Agreement and hereby grants to Licensee a personal, non-exclusive,
|
||||
non-transferable license to use the Software for noncommercial research purposes, without the right to sublicense, pursuant to the terms and conditions of this Agreement. As used in this Agreement, the term "Software" means (i) the actual copy of all or any portion of code for program routines made accessible to Licensee by Licensor pursuant to this Agreement, inclusive of backups, updates, and/or merged copies permitted hereunder or subsequently supplied by Licensor, including all or any file structures, programming instructions, user interfaces and screen formats and sequences as well as any and all documentation and instructions related to it, and (ii) all or any derivatives and/or modifications created or made by You to any of the items specified in (i).
|
||||
|
||||
CONFIDENTIALITY: Licensee acknowledges that the Software is proprietary to Licensor, and as such, Licensee agrees to receive all such materials in confidence and use the Software only in accordance with the terms of this Agreement. Licensee agrees to use reasonable effort to protect the Software from unauthorized use, reproduction, distribution, or publication.
|
||||
|
||||
PERMITTED USES: The Software may be used for your own noncommercial internal research purposes. You understand and agree that Licensor is not obligated to implement any suggestions and/or feedback you might provide regarding the Software, but to the extent Licensor does so, you are not entitled to any compensation related thereto.
|
||||
|
||||
DERIVATIVES: You may create derivatives of or make modifications to the Software, however, You agree that all and any such derivatives and modifications will be owned by Licensor and become a part of the Software licensed to You under this Agreement. You may only use such derivatives and modifications for your own noncommercial internal research purposes, and you may not otherwise use, distribute or copy such derivatives and modifications in violation of this Agreement.
|
||||
|
||||
BACKUPS: If Licensee is an organization, it may make that number of copies of the Software necessary for internal noncommercial use at a single site within its organization provided that all information appearing in or on the original labels, including the copyright and trademark notices are copied onto the labels of the copies.
|
||||
|
||||
USES NOT PERMITTED: You may not distribute, copy or use the Software except as explicitly permitted herein. Licensee has not been granted any trademark license as part of this Agreement and may not use the name or mark “AlphaPose", "Shanghai Jiao Tong" or any renditions thereof without the prior written permission of Licensor.
|
||||
|
||||
You may not sell, rent, lease, sublicense, lend, time-share or transfer, in whole or in part, or provide third parties access to prior or present versions (or any parts thereof) of the Software.
|
||||
|
||||
ASSIGNMENT: You may not assign this Agreement or your rights hereunder without the prior written consent of Licensor. Any attempted assignment without such consent shall be null and void.
|
||||
|
||||
TERM: The term of the license granted by this Agreement is from Licensee's acceptance of this Agreement by downloading the Software or by using the Software until terminated as provided below.
|
||||
|
||||
The Agreement automatically terminates without notice if you fail to comply with any provision of this Agreement. Licensee may terminate this Agreement by ceasing using the Software. Upon any termination of this Agreement, Licensee will delete any and all copies of the Software. You agree that all provisions which operate to protect the proprietary rights of Licensor shall remain in force should breach occur and that the obligation of confidentiality described in this Agreement is binding in perpetuity and, as such, survives the term of the Agreement.
|
||||
|
||||
FEE: Provided Licensee abides completely by the terms and conditions of this Agreement, there is no fee due to Licensor for Licensee's use of the Software in accordance with this Agreement.
|
||||
|
||||
DISCLAIMER OF WARRANTIES: THE SOFTWARE IS PROVIDED "AS-IS" WITHOUT WARRANTY OF ANY KIND INCLUDING ANY WARRANTIES OF PERFORMANCE OR MERCHANTABILITY OR FITNESS FOR A PARTICULAR USE OR PURPOSE OR OF NON-INFRINGEMENT. LICENSEE BEARS ALL RISK RELATING TO QUALITY AND PERFORMANCE OF THE SOFTWARE AND RELATED MATERIALS.
|
||||
|
||||
SUPPORT AND MAINTENANCE: No Software support or training by the Licensor is provided as part of this Agreement.
|
||||
|
||||
EXCLUSIVE REMEDY AND LIMITATION OF LIABILITY: To the maximum extent permitted under applicable law, Licensor shall not be liable for direct, indirect, special, incidental, or consequential damages or lost profits related to Licensee's use of and/or inability to use the Software, even if Licensor is advised of the possibility of such damage.
|
||||
|
||||
EXPORT REGULATION: Licensee agrees to comply with any and all applicable
|
||||
U.S. export control laws, regulations, and/or other laws related to embargoes and sanction programs administered by the Office of Foreign Assets Control.
|
||||
|
||||
SEVERABILITY: If any provision(s) of this Agreement shall be held to be invalid, illegal, or unenforceable by a court or other tribunal of competent jurisdiction, the validity, legality and enforceability of the remaining provisions shall not in any way be affected or impaired thereby.
|
||||
|
||||
NO IMPLIED WAIVERS: No failure or delay by Licensor in enforcing any right or remedy under this Agreement shall be construed as a waiver of any future or other exercise of such right or remedy by Licensor.
|
||||
|
||||
ENTIRE AGREEMENT AND AMENDMENTS: This Agreement constitutes the sole and entire agreement between Licensee and Licensor as to the matter set forth herein and supersedes any previous agreements, understandings, and arrangements between the parties relating hereto.
|
||||
|
||||
|
||||
|
||||
************************************************************************
|
||||
|
||||
THIRD-PARTY SOFTWARE NOTICES AND INFORMATION
|
||||
|
||||
This project incorporates material from the project(s) listed below (collectively, "Third Party Code"). This Third Party Code is licensed to you under their original license terms set forth below. We reserves all other rights not expressly granted, whether by implication, estoppel or otherwise.
|
||||
|
||||
1. Torch, (https://github.com/torch/distro)
|
||||
|
||||
Copyright (c) 2016, Soumith Chintala, Ronan Collobert, Koray Kavukcuoglu, Clement Farabet All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
|
||||
|
||||
Neither the name of distro nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
2. TensorFlow (https://github.com/tensorflow/tensorflow)
|
||||
Copyright 2018 The TensorFlow Authors. All rights reserved.
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright 2017, The TensorFlow Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
3. tf-faster-rcnn (https://github.com/endernewton/tf-faster-rcnn)
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2017 Xinlei Chen
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
4.PyraNet (https://github.com/bearpaw/PyraNet)
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "{}"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright {yyyy} {name of copyright owner}
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
5. pose-hg-demo (https://github.com/umich-vl/pose-hg-demo)
|
||||
Copyright (c) 2016, University of Michigan
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
************END OF THIRD-PARTY SOFTWARE NOTICES AND INFORMATION**********
|
@ -0,0 +1,141 @@
|
||||
# Pose Flow
|
||||
|
||||
Official implementation of [Pose Flow: Efficient Online Pose Tracking ](https://arxiv.org/abs/1802.00977).
|
||||
|
||||
<p align='center'>
|
||||
<img src="posetrack1.gif", width="360">
|
||||
<img src="posetrack2.gif", width="344">
|
||||
</p>
|
||||
|
||||
Results on PoseTrack Challenge validation set:
|
||||
|
||||
1. Task2: Multi-Person Pose Estimation (mAP)
|
||||
<center>
|
||||
|
||||
| Method | Head mAP | Shoulder mAP | Elbow mAP | Wrist mAP | Hip mAP | Knee mAP | Ankle mAP | Total mAP |
|
||||
|:-------|:-----:|:-------:|:-------:|:-------:|:-------:|:-------:|:-------:|:-------:|
|
||||
| Detect-and-Track(FAIR) | **67.5** | 70.2 | 62 | 51.7 | 60.7 | 58.7 | 49.8 | 60.6 |
|
||||
| **AlphaPose** | 66.7 | **73.3** | **68.3** | **61.1** | **67.5** | **67.0** | **61.3** | **66.5** |
|
||||
|
||||
</center>
|
||||
|
||||
2. Task3: Pose Tracking (MOTA)
|
||||
<center>
|
||||
|
||||
| Method | Head MOTA | Shoulder MOTA | Elbow MOTA | Wrist MOTA | Hip MOTA | Knee MOTA | Ankle MOTA | Total MOTA | Total MOTP| Speed(FPS) |
|
||||
|:-------|:-----:|:-------:|:-------:|:-------:|:-------:|:-------:|:-------:|:-------:|:-------:|:-------:|
|
||||
| Detect-and-Track(FAIR) | **61.7** | 65.5 | 57.3 | 45.7 | 54.3 | 53.1 | 45.7 | 55.2 | 61.5 |Unknown|
|
||||
| **PoseFlow(DeepMatch)** | 59.8 | **67.0** | 59.8 | 51.6 | **60.0** | **58.4** | **50.5** | **58.3** | **67.8**|8|
|
||||
| **PoseFlow(OrbMatch)** | 59.0 | 66.8 | **60.0** | **51.8** | 59.4 | **58.4** | 50.3 | 58.0 | 62.2|24|
|
||||
|
||||
</center>
|
||||
|
||||
## Latest Features
|
||||
- Dec 2018: <strong>PoseFlow(General Version)</strong> released! Support ANY DATASET and pose tracking results visualization.
|
||||
- Oct 2018: Support generating correspondence files with ORB(OpenCV), 3X FASTER and no need to compile DeepMatching library.
|
||||
|
||||
## Requirements
|
||||
|
||||
- Python 2.7.13
|
||||
- OpenCV 3.4.2.16
|
||||
- OpenCV-contrib 3.4.2.16
|
||||
- tqdm 4.19.8
|
||||
|
||||
## Installation
|
||||
|
||||
1. Download PoseTrack Dataset from [PoseTrack](https://posetrack.net/) to `AlphaPose/PoseFlow/posetrack_data/`
|
||||
2. (Optional) Use [DeepMatching](http://lear.inrialpes.fr/src/deepmatching/) to extract dense correspondences between adjcent frames in every video, please refer to [DeepMatching Compile Error](https://github.com/MVIG-SJTU/AlphaPose/issues/97) to compile DeepMatching correctly
|
||||
|
||||
```shell
|
||||
pip install -r requirements.txt
|
||||
|
||||
cd deepmatching
|
||||
make clean all
|
||||
make
|
||||
cd ..
|
||||
```
|
||||
|
||||
## For Any Datasets (General Version)
|
||||
|
||||
1. Using [AlphaPose](https://github.com/MVIG-SJTU/AlphaPose) to generate multi-person pose estimation results.
|
||||
|
||||
```shell
|
||||
# pytorch version
|
||||
python demo.py --indir ${image_dir}$ --outdir ${results_dir}$
|
||||
|
||||
# torch version
|
||||
./run.sh --indir ${image_dir}$ --outdir ${results_dir}$
|
||||
```
|
||||
|
||||
2. Run pose tracking
|
||||
|
||||
|
||||
```shell
|
||||
# pytorch version
|
||||
python tracker-general.py --imgdir ${image_dir}$
|
||||
--in_json ${results_dir}$/alphapose-results.json
|
||||
--out_json ${results_dir}$/alphapose-results-forvis-tracked.json
|
||||
--visdir ${render_dir}$
|
||||
|
||||
# torch version
|
||||
python tracker-general.py --imgdir ${image_dir}$
|
||||
--in_json ${results_dir}$/POSE/alpha-pose-results-forvis.json
|
||||
--out_json ${results_dir}$/POSE/alpha-pose-results-forvis-tracked.json
|
||||
--visdir ${render_dir}$
|
||||
```
|
||||
|
||||
|
||||
## For PoseTrack Dataset Evaluation (Paper Baseline)
|
||||
|
||||
1. Using [AlphaPose](https://github.com/MVIG-SJTU/AlphaPose) to generate multi-person pose estimation results on videos with format like `alpha-pose-results-sample.json`.
|
||||
2. Using DeepMatching/ORB to generate correspondence files.
|
||||
|
||||
```shell
|
||||
# Generate correspondences by DeepMatching
|
||||
# (More Robust but Slower)
|
||||
python matching.py --orb=0
|
||||
|
||||
or
|
||||
|
||||
# Generate correspondences by Orb
|
||||
# (Faster but Less Robust)
|
||||
python matching.py --orb=1
|
||||
```
|
||||
|
||||
3. Run pose tracking
|
||||
|
||||
|
||||
```shell
|
||||
python tracker-baseline.py --dataset=val/test --orb=1/0
|
||||
```
|
||||
4. Evaluation
|
||||
|
||||
Original [poseval](https://github.com/leonid-pishchulin/poseval) has some instructions on how to convert annotation files from MAT to JSON.
|
||||
|
||||
Evaluate pose tracking results on validation dataset:
|
||||
|
||||
```shell
|
||||
git clone https://github.com/leonid-pishchulin/poseval.git --recursive
|
||||
cd poseval/py && export PYTHONPATH=$PWD/../py-motmetrics:$PYTHONPATH
|
||||
cd ../../
|
||||
python poseval/py/evaluate.py --groundTruth=./posetrack_data/annotations/val \
|
||||
--predictions=./${track_result_dir}/ \
|
||||
--evalPoseTracking --evalPoseEstimation
|
||||
```
|
||||
|
||||
|
||||
## Citation
|
||||
|
||||
Please cite these papers in your publications if it helps your research:
|
||||
|
||||
@inproceedings{xiu2018poseflow,
|
||||
author = {Xiu, Yuliang and Li, Jiefeng and Wang, Haoyu and Fang, Yinghong and Lu, Cewu},
|
||||
title = {{Pose Flow}: Efficient Online Pose Tracking},
|
||||
booktitle={BMVC},
|
||||
year = {2018}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,169 @@
|
||||
{
|
||||
"images/bonn_mpii_test_5sec/24621_mpii/00000103.jpg": [
|
||||
{
|
||||
"score": 8.385687289228619,
|
||||
"keypoints": [
|
||||
606.1139178059441,
|
||||
1055.7866630683084,
|
||||
0.1285074118632463,
|
||||
622,
|
||||
1016,
|
||||
1.24690842628479,
|
||||
701,
|
||||
785,
|
||||
1.3190804719924927,
|
||||
919,
|
||||
798,
|
||||
1.0360052585601807,
|
||||
622,
|
||||
1003,
|
||||
0.7249196767807007,
|
||||
600.5921057594508,
|
||||
1043.7039471202747,
|
||||
0.07363978983288405,
|
||||
747,
|
||||
785,
|
||||
1.1118680238723755,
|
||||
754,
|
||||
362,
|
||||
1.351969599723816,
|
||||
761,
|
||||
362,
|
||||
1.329826831817627,
|
||||
735.0299835119931,
|
||||
148.28635614181508,
|
||||
1.2460612274594385,
|
||||
616.8225141507821,
|
||||
775.3727265996391,
|
||||
4.076232522035756,
|
||||
648,
|
||||
613,
|
||||
2.3751518726348877,
|
||||
651.8582324380334,
|
||||
341.53551239931363,
|
||||
3.683300004030267,
|
||||
880,
|
||||
435,
|
||||
2.192237377166748,
|
||||
946,
|
||||
607,
|
||||
4.622312943140666,
|
||||
993.0232720577997,
|
||||
779,
|
||||
3.9823181915094947
|
||||
]
|
||||
},
|
||||
{
|
||||
"score": 10.950873801541226,
|
||||
"keypoints": [
|
||||
1079.745663413901,
|
||||
1057.876310361107,
|
||||
0.05478342392744616,
|
||||
1085.7446022663407,
|
||||
927.8509247239244,
|
||||
3.989027662754409,
|
||||
1076.4168091495721,
|
||||
681.0010309293239,
|
||||
3.69318636501652,
|
||||
1175.751233049613,
|
||||
675.4609653408796,
|
||||
3.894998808909425,
|
||||
1168.367434746748,
|
||||
925.6795830692723,
|
||||
4.041951319921906,
|
||||
1246.736699044823,
|
||||
1057.6171141024415,
|
||||
0.35268874869071126,
|
||||
1124.9376542870104,
|
||||
674.6766129035676,
|
||||
3.103561346457346,
|
||||
1135.6961084323723,
|
||||
314.70914186846545,
|
||||
3.308468804589743,
|
||||
1127.340462592704,
|
||||
258.60926488886156,
|
||||
4.206135445215616,
|
||||
1104.7237517457497,
|
||||
110.1842839789316,
|
||||
3.7932232834089974,
|
||||
1021.6023155423281,
|
||||
685.7139033202061,
|
||||
2.1369253795349024,
|
||||
1037.0987900834948,
|
||||
514.4666027032713,
|
||||
1.3758957654789534,
|
||||
1053.185942829918,
|
||||
324.0023196992991,
|
||||
3.6865770542425436,
|
||||
1219.9910902145912,
|
||||
313.78280708471095,
|
||||
4.968025243674319,
|
||||
1242.666019724613,
|
||||
508.9999999999999,
|
||||
5.531640558590693,
|
||||
1199.6364698448594,
|
||||
672.3816554867356,
|
||||
5.188543576240203
|
||||
]
|
||||
},
|
||||
{
|
||||
"score": 8.796343223208792,
|
||||
"keypoints": [
|
||||
892,
|
||||
1057,
|
||||
0.0642801970243454,
|
||||
879,
|
||||
960,
|
||||
2.7418549060821533,
|
||||
873,
|
||||
709,
|
||||
1.4490729570388794,
|
||||
976,
|
||||
709,
|
||||
2.326153039932251,
|
||||
976,
|
||||
947,
|
||||
1.8430407047271729,
|
||||
1018.3358121883978,
|
||||
1056.6669765235497,
|
||||
0.21173024539211077,
|
||||
924,
|
||||
709,
|
||||
1.6106798648834229,
|
||||
931,
|
||||
342,
|
||||
1.8503456115722656,
|
||||
944.5304231025389,
|
||||
289.8346541279278,
|
||||
2.84533776915699,
|
||||
911.8320538351469,
|
||||
133.33264423713035,
|
||||
2.8966951072816554,
|
||||
847,
|
||||
709,
|
||||
0.2632869780063629,
|
||||
1055.3791695827251,
|
||||
530.2416608345495,
|
||||
0.052683703823322964,
|
||||
868.2297245132042,
|
||||
343.1756749304577,
|
||||
1.56991625917443,
|
||||
1041.1065908708715,
|
||||
345.8934091291283,
|
||||
4.5100791598212595,
|
||||
1057.7401496489065,
|
||||
539.3480299297813,
|
||||
4.881036537158492,
|
||||
1025.9666633264292,
|
||||
694.5222411502352,
|
||||
4.941733110537884
|
||||
]
|
||||
}
|
||||
],
|
||||
"images/bonn_mpii_test_5sec/24621_mpii/00000104.jpg": [
|
||||
{"score": ,"keypoints":[]},
|
||||
...
|
||||
,{"score": ,"keypoints":[]}
|
||||
]
|
||||
...
|
||||
}
|
@ -0,0 +1,674 @@
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 3, 29 June 2007
|
||||
|
||||
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The GNU General Public License is a free, copyleft license for
|
||||
software and other kinds of works.
|
||||
|
||||
The licenses for most software and other practical works are designed
|
||||
to take away your freedom to share and change the works. By contrast,
|
||||
the GNU General Public License is intended to guarantee your freedom to
|
||||
share and change all versions of a program--to make sure it remains free
|
||||
software for all its users. We, the Free Software Foundation, use the
|
||||
GNU General Public License for most of our software; it applies also to
|
||||
any other work released this way by its authors. You can apply it to
|
||||
your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
them if you wish), that you receive source code or can get it if you
|
||||
want it, that you can change the software or use pieces of it in new
|
||||
free programs, and that you know you can do these things.
|
||||
|
||||
To protect your rights, we need to prevent others from denying you
|
||||
these rights or asking you to surrender the rights. Therefore, you have
|
||||
certain responsibilities if you distribute copies of the software, or if
|
||||
you modify it: responsibilities to respect the freedom of others.
|
||||
|
||||
For example, if you distribute copies of such a program, whether
|
||||
gratis or for a fee, you must pass on to the recipients the same
|
||||
freedoms that you received. You must make sure that they, too, receive
|
||||
or can get the source code. And you must show them these terms so they
|
||||
know their rights.
|
||||
|
||||
Developers that use the GNU GPL protect your rights with two steps:
|
||||
(1) assert copyright on the software, and (2) offer you this License
|
||||
giving you legal permission to copy, distribute and/or modify it.
|
||||
|
||||
For the developers' and authors' protection, the GPL clearly explains
|
||||
that there is no warranty for this free software. For both users' and
|
||||
authors' sake, the GPL requires that modified versions be marked as
|
||||
changed, so that their problems will not be attributed erroneously to
|
||||
authors of previous versions.
|
||||
|
||||
Some devices are designed to deny users access to install or run
|
||||
modified versions of the software inside them, although the manufacturer
|
||||
can do so. This is fundamentally incompatible with the aim of
|
||||
protecting users' freedom to change the software. The systematic
|
||||
pattern of such abuse occurs in the area of products for individuals to
|
||||
use, which is precisely where it is most unacceptable. Therefore, we
|
||||
have designed this version of the GPL to prohibit the practice for those
|
||||
products. If such problems arise substantially in other domains, we
|
||||
stand ready to extend this provision to those domains in future versions
|
||||
of the GPL, as needed to protect the freedom of users.
|
||||
|
||||
Finally, every program is threatened constantly by software patents.
|
||||
States should not allow patents to restrict development and use of
|
||||
software on general-purpose computers, but in those that do, we wish to
|
||||
avoid the special danger that patents applied to a free program could
|
||||
make it effectively proprietary. To prevent this, the GPL assures that
|
||||
patents cannot be used to render the program non-free.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
TERMS AND CONDITIONS
|
||||
|
||||
0. Definitions.
|
||||
|
||||
"This License" refers to version 3 of the GNU General Public License.
|
||||
|
||||
"Copyright" also means copyright-like laws that apply to other kinds of
|
||||
works, such as semiconductor masks.
|
||||
|
||||
"The Program" refers to any copyrightable work licensed under this
|
||||
License. Each licensee is addressed as "you". "Licensees" and
|
||||
"recipients" may be individuals or organizations.
|
||||
|
||||
To "modify" a work means to copy from or adapt all or part of the work
|
||||
in a fashion requiring copyright permission, other than the making of an
|
||||
exact copy. The resulting work is called a "modified version" of the
|
||||
earlier work or a work "based on" the earlier work.
|
||||
|
||||
A "covered work" means either the unmodified Program or a work based
|
||||
on the Program.
|
||||
|
||||
To "propagate" a work means to do anything with it that, without
|
||||
permission, would make you directly or secondarily liable for
|
||||
infringement under applicable copyright law, except executing it on a
|
||||
computer or modifying a private copy. Propagation includes copying,
|
||||
distribution (with or without modification), making available to the
|
||||
public, and in some countries other activities as well.
|
||||
|
||||
To "convey" a work means any kind of propagation that enables other
|
||||
parties to make or receive copies. Mere interaction with a user through
|
||||
a computer network, with no transfer of a copy, is not conveying.
|
||||
|
||||
An interactive user interface displays "Appropriate Legal Notices"
|
||||
to the extent that it includes a convenient and prominently visible
|
||||
feature that (1) displays an appropriate copyright notice, and (2)
|
||||
tells the user that there is no warranty for the work (except to the
|
||||
extent that warranties are provided), that licensees may convey the
|
||||
work under this License, and how to view a copy of this License. If
|
||||
the interface presents a list of user commands or options, such as a
|
||||
menu, a prominent item in the list meets this criterion.
|
||||
|
||||
1. Source Code.
|
||||
|
||||
The "source code" for a work means the preferred form of the work
|
||||
for making modifications to it. "Object code" means any non-source
|
||||
form of a work.
|
||||
|
||||
A "Standard Interface" means an interface that either is an official
|
||||
standard defined by a recognized standards body, or, in the case of
|
||||
interfaces specified for a particular programming language, one that
|
||||
is widely used among developers working in that language.
|
||||
|
||||
The "System Libraries" of an executable work include anything, other
|
||||
than the work as a whole, that (a) is included in the normal form of
|
||||
packaging a Major Component, but which is not part of that Major
|
||||
Component, and (b) serves only to enable use of the work with that
|
||||
Major Component, or to implement a Standard Interface for which an
|
||||
implementation is available to the public in source code form. A
|
||||
"Major Component", in this context, means a major essential component
|
||||
(kernel, window system, and so on) of the specific operating system
|
||||
(if any) on which the executable work runs, or a compiler used to
|
||||
produce the work, or an object code interpreter used to run it.
|
||||
|
||||
The "Corresponding Source" for a work in object code form means all
|
||||
the source code needed to generate, install, and (for an executable
|
||||
work) run the object code and to modify the work, including scripts to
|
||||
control those activities. However, it does not include the work's
|
||||
System Libraries, or general-purpose tools or generally available free
|
||||
programs which are used unmodified in performing those activities but
|
||||
which are not part of the work. For example, Corresponding Source
|
||||
includes interface definition files associated with source files for
|
||||
the work, and the source code for shared libraries and dynamically
|
||||
linked subprograms that the work is specifically designed to require,
|
||||
such as by intimate data communication or control flow between those
|
||||
subprograms and other parts of the work.
|
||||
|
||||
The Corresponding Source need not include anything that users
|
||||
can regenerate automatically from other parts of the Corresponding
|
||||
Source.
|
||||
|
||||
The Corresponding Source for a work in source code form is that
|
||||
same work.
|
||||
|
||||
2. Basic Permissions.
|
||||
|
||||
All rights granted under this License are granted for the term of
|
||||
copyright on the Program, and are irrevocable provided the stated
|
||||
conditions are met. This License explicitly affirms your unlimited
|
||||
permission to run the unmodified Program. The output from running a
|
||||
covered work is covered by this License only if the output, given its
|
||||
content, constitutes a covered work. This License acknowledges your
|
||||
rights of fair use or other equivalent, as provided by copyright law.
|
||||
|
||||
You may make, run and propagate covered works that you do not
|
||||
convey, without conditions so long as your license otherwise remains
|
||||
in force. You may convey covered works to others for the sole purpose
|
||||
of having them make modifications exclusively for you, or provide you
|
||||
with facilities for running those works, provided that you comply with
|
||||
the terms of this License in conveying all material for which you do
|
||||
not control copyright. Those thus making or running the covered works
|
||||
for you must do so exclusively on your behalf, under your direction
|
||||
and control, on terms that prohibit them from making any copies of
|
||||
your copyrighted material outside their relationship with you.
|
||||
|
||||
Conveying under any other circumstances is permitted solely under
|
||||
the conditions stated below. Sublicensing is not allowed; section 10
|
||||
makes it unnecessary.
|
||||
|
||||
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
|
||||
|
||||
No covered work shall be deemed part of an effective technological
|
||||
measure under any applicable law fulfilling obligations under article
|
||||
11 of the WIPO copyright treaty adopted on 20 December 1996, or
|
||||
similar laws prohibiting or restricting circumvention of such
|
||||
measures.
|
||||
|
||||
When you convey a covered work, you waive any legal power to forbid
|
||||
circumvention of technological measures to the extent such circumvention
|
||||
is effected by exercising rights under this License with respect to
|
||||
the covered work, and you disclaim any intention to limit operation or
|
||||
modification of the work as a means of enforcing, against the work's
|
||||
users, your or third parties' legal rights to forbid circumvention of
|
||||
technological measures.
|
||||
|
||||
4. Conveying Verbatim Copies.
|
||||
|
||||
You may convey verbatim copies of the Program's source code as you
|
||||
receive it, in any medium, provided that you conspicuously and
|
||||
appropriately publish on each copy an appropriate copyright notice;
|
||||
keep intact all notices stating that this License and any
|
||||
non-permissive terms added in accord with section 7 apply to the code;
|
||||
keep intact all notices of the absence of any warranty; and give all
|
||||
recipients a copy of this License along with the Program.
|
||||
|
||||
You may charge any price or no price for each copy that you convey,
|
||||
and you may offer support or warranty protection for a fee.
|
||||
|
||||
5. Conveying Modified Source Versions.
|
||||
|
||||
You may convey a work based on the Program, or the modifications to
|
||||
produce it from the Program, in the form of source code under the
|
||||
terms of section 4, provided that you also meet all of these conditions:
|
||||
|
||||
a) The work must carry prominent notices stating that you modified
|
||||
it, and giving a relevant date.
|
||||
|
||||
b) The work must carry prominent notices stating that it is
|
||||
released under this License and any conditions added under section
|
||||
7. This requirement modifies the requirement in section 4 to
|
||||
"keep intact all notices".
|
||||
|
||||
c) You must license the entire work, as a whole, under this
|
||||
License to anyone who comes into possession of a copy. This
|
||||
License will therefore apply, along with any applicable section 7
|
||||
additional terms, to the whole of the work, and all its parts,
|
||||
regardless of how they are packaged. This License gives no
|
||||
permission to license the work in any other way, but it does not
|
||||
invalidate such permission if you have separately received it.
|
||||
|
||||
d) If the work has interactive user interfaces, each must display
|
||||
Appropriate Legal Notices; however, if the Program has interactive
|
||||
interfaces that do not display Appropriate Legal Notices, your
|
||||
work need not make them do so.
|
||||
|
||||
A compilation of a covered work with other separate and independent
|
||||
works, which are not by their nature extensions of the covered work,
|
||||
and which are not combined with it such as to form a larger program,
|
||||
in or on a volume of a storage or distribution medium, is called an
|
||||
"aggregate" if the compilation and its resulting copyright are not
|
||||
used to limit the access or legal rights of the compilation's users
|
||||
beyond what the individual works permit. Inclusion of a covered work
|
||||
in an aggregate does not cause this License to apply to the other
|
||||
parts of the aggregate.
|
||||
|
||||
6. Conveying Non-Source Forms.
|
||||
|
||||
You may convey a covered work in object code form under the terms
|
||||
of sections 4 and 5, provided that you also convey the
|
||||
machine-readable Corresponding Source under the terms of this License,
|
||||
in one of these ways:
|
||||
|
||||
a) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by the
|
||||
Corresponding Source fixed on a durable physical medium
|
||||
customarily used for software interchange.
|
||||
|
||||
b) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by a
|
||||
written offer, valid for at least three years and valid for as
|
||||
long as you offer spare parts or customer support for that product
|
||||
model, to give anyone who possesses the object code either (1) a
|
||||
copy of the Corresponding Source for all the software in the
|
||||
product that is covered by this License, on a durable physical
|
||||
medium customarily used for software interchange, for a price no
|
||||
more than your reasonable cost of physically performing this
|
||||
conveying of source, or (2) access to copy the
|
||||
Corresponding Source from a network server at no charge.
|
||||
|
||||
c) Convey individual copies of the object code with a copy of the
|
||||
written offer to provide the Corresponding Source. This
|
||||
alternative is allowed only occasionally and noncommercially, and
|
||||
only if you received the object code with such an offer, in accord
|
||||
with subsection 6b.
|
||||
|
||||
d) Convey the object code by offering access from a designated
|
||||
place (gratis or for a charge), and offer equivalent access to the
|
||||
Corresponding Source in the same way through the same place at no
|
||||
further charge. You need not require recipients to copy the
|
||||
Corresponding Source along with the object code. If the place to
|
||||
copy the object code is a network server, the Corresponding Source
|
||||
may be on a different server (operated by you or a third party)
|
||||
that supports equivalent copying facilities, provided you maintain
|
||||
clear directions next to the object code saying where to find the
|
||||
Corresponding Source. Regardless of what server hosts the
|
||||
Corresponding Source, you remain obligated to ensure that it is
|
||||
available for as long as needed to satisfy these requirements.
|
||||
|
||||
e) Convey the object code using peer-to-peer transmission, provided
|
||||
you inform other peers where the object code and Corresponding
|
||||
Source of the work are being offered to the general public at no
|
||||
charge under subsection 6d.
|
||||
|
||||
A separable portion of the object code, whose source code is excluded
|
||||
from the Corresponding Source as a System Library, need not be
|
||||
included in conveying the object code work.
|
||||
|
||||
A "User Product" is either (1) a "consumer product", which means any
|
||||
tangible personal property which is normally used for personal, family,
|
||||
or household purposes, or (2) anything designed or sold for incorporation
|
||||
into a dwelling. In determining whether a product is a consumer product,
|
||||
doubtful cases shall be resolved in favor of coverage. For a particular
|
||||
product received by a particular user, "normally used" refers to a
|
||||
typical or common use of that class of product, regardless of the status
|
||||
of the particular user or of the way in which the particular user
|
||||
actually uses, or expects or is expected to use, the product. A product
|
||||
is a consumer product regardless of whether the product has substantial
|
||||
commercial, industrial or non-consumer uses, unless such uses represent
|
||||
the only significant mode of use of the product.
|
||||
|
||||
"Installation Information" for a User Product means any methods,
|
||||
procedures, authorization keys, or other information required to install
|
||||
and execute modified versions of a covered work in that User Product from
|
||||
a modified version of its Corresponding Source. The information must
|
||||
suffice to ensure that the continued functioning of the modified object
|
||||
code is in no case prevented or interfered with solely because
|
||||
modification has been made.
|
||||
|
||||
If you convey an object code work under this section in, or with, or
|
||||
specifically for use in, a User Product, and the conveying occurs as
|
||||
part of a transaction in which the right of possession and use of the
|
||||
User Product is transferred to the recipient in perpetuity or for a
|
||||
fixed term (regardless of how the transaction is characterized), the
|
||||
Corresponding Source conveyed under this section must be accompanied
|
||||
by the Installation Information. But this requirement does not apply
|
||||
if neither you nor any third party retains the ability to install
|
||||
modified object code on the User Product (for example, the work has
|
||||
been installed in ROM).
|
||||
|
||||
The requirement to provide Installation Information does not include a
|
||||
requirement to continue to provide support service, warranty, or updates
|
||||
for a work that has been modified or installed by the recipient, or for
|
||||
the User Product in which it has been modified or installed. Access to a
|
||||
network may be denied when the modification itself materially and
|
||||
adversely affects the operation of the network or violates the rules and
|
||||
protocols for communication across the network.
|
||||
|
||||
Corresponding Source conveyed, and Installation Information provided,
|
||||
in accord with this section must be in a format that is publicly
|
||||
documented (and with an implementation available to the public in
|
||||
source code form), and must require no special password or key for
|
||||
unpacking, reading or copying.
|
||||
|
||||
7. Additional Terms.
|
||||
|
||||
"Additional permissions" are terms that supplement the terms of this
|
||||
License by making exceptions from one or more of its conditions.
|
||||
Additional permissions that are applicable to the entire Program shall
|
||||
be treated as though they were included in this License, to the extent
|
||||
that they are valid under applicable law. If additional permissions
|
||||
apply only to part of the Program, that part may be used separately
|
||||
under those permissions, but the entire Program remains governed by
|
||||
this License without regard to the additional permissions.
|
||||
|
||||
When you convey a copy of a covered work, you may at your option
|
||||
remove any additional permissions from that copy, or from any part of
|
||||
it. (Additional permissions may be written to require their own
|
||||
removal in certain cases when you modify the work.) You may place
|
||||
additional permissions on material, added by you to a covered work,
|
||||
for which you have or can give appropriate copyright permission.
|
||||
|
||||
Notwithstanding any other provision of this License, for material you
|
||||
add to a covered work, you may (if authorized by the copyright holders of
|
||||
that material) supplement the terms of this License with terms:
|
||||
|
||||
a) Disclaiming warranty or limiting liability differently from the
|
||||
terms of sections 15 and 16 of this License; or
|
||||
|
||||
b) Requiring preservation of specified reasonable legal notices or
|
||||
author attributions in that material or in the Appropriate Legal
|
||||
Notices displayed by works containing it; or
|
||||
|
||||
c) Prohibiting misrepresentation of the origin of that material, or
|
||||
requiring that modified versions of such material be marked in
|
||||
reasonable ways as different from the original version; or
|
||||
|
||||
d) Limiting the use for publicity purposes of names of licensors or
|
||||
authors of the material; or
|
||||
|
||||
e) Declining to grant rights under trademark law for use of some
|
||||
trade names, trademarks, or service marks; or
|
||||
|
||||
f) Requiring indemnification of licensors and authors of that
|
||||
material by anyone who conveys the material (or modified versions of
|
||||
it) with contractual assumptions of liability to the recipient, for
|
||||
any liability that these contractual assumptions directly impose on
|
||||
those licensors and authors.
|
||||
|
||||
All other non-permissive additional terms are considered "further
|
||||
restrictions" within the meaning of section 10. If the Program as you
|
||||
received it, or any part of it, contains a notice stating that it is
|
||||
governed by this License along with a term that is a further
|
||||
restriction, you may remove that term. If a license document contains
|
||||
a further restriction but permits relicensing or conveying under this
|
||||
License, you may add to a covered work material governed by the terms
|
||||
of that license document, provided that the further restriction does
|
||||
not survive such relicensing or conveying.
|
||||
|
||||
If you add terms to a covered work in accord with this section, you
|
||||
must place, in the relevant source files, a statement of the
|
||||
additional terms that apply to those files, or a notice indicating
|
||||
where to find the applicable terms.
|
||||
|
||||
Additional terms, permissive or non-permissive, may be stated in the
|
||||
form of a separately written license, or stated as exceptions;
|
||||
the above requirements apply either way.
|
||||
|
||||
8. Termination.
|
||||
|
||||
You may not propagate or modify a covered work except as expressly
|
||||
provided under this License. Any attempt otherwise to propagate or
|
||||
modify it is void, and will automatically terminate your rights under
|
||||
this License (including any patent licenses granted under the third
|
||||
paragraph of section 11).
|
||||
|
||||
However, if you cease all violation of this License, then your
|
||||
license from a particular copyright holder is reinstated (a)
|
||||
provisionally, unless and until the copyright holder explicitly and
|
||||
finally terminates your license, and (b) permanently, if the copyright
|
||||
holder fails to notify you of the violation by some reasonable means
|
||||
prior to 60 days after the cessation.
|
||||
|
||||
Moreover, your license from a particular copyright holder is
|
||||
reinstated permanently if the copyright holder notifies you of the
|
||||
violation by some reasonable means, this is the first time you have
|
||||
received notice of violation of this License (for any work) from that
|
||||
copyright holder, and you cure the violation prior to 30 days after
|
||||
your receipt of the notice.
|
||||
|
||||
Termination of your rights under this section does not terminate the
|
||||
licenses of parties who have received copies or rights from you under
|
||||
this License. If your rights have been terminated and not permanently
|
||||
reinstated, you do not qualify to receive new licenses for the same
|
||||
material under section 10.
|
||||
|
||||
9. Acceptance Not Required for Having Copies.
|
||||
|
||||
You are not required to accept this License in order to receive or
|
||||
run a copy of the Program. Ancillary propagation of a covered work
|
||||
occurring solely as a consequence of using peer-to-peer transmission
|
||||
to receive a copy likewise does not require acceptance. However,
|
||||
nothing other than this License grants you permission to propagate or
|
||||
modify any covered work. These actions infringe copyright if you do
|
||||
not accept this License. Therefore, by modifying or propagating a
|
||||
covered work, you indicate your acceptance of this License to do so.
|
||||
|
||||
10. Automatic Licensing of Downstream Recipients.
|
||||
|
||||
Each time you convey a covered work, the recipient automatically
|
||||
receives a license from the original licensors, to run, modify and
|
||||
propagate that work, subject to this License. You are not responsible
|
||||
for enforcing compliance by third parties with this License.
|
||||
|
||||
An "entity transaction" is a transaction transferring control of an
|
||||
organization, or substantially all assets of one, or subdividing an
|
||||
organization, or merging organizations. If propagation of a covered
|
||||
work results from an entity transaction, each party to that
|
||||
transaction who receives a copy of the work also receives whatever
|
||||
licenses to the work the party's predecessor in interest had or could
|
||||
give under the previous paragraph, plus a right to possession of the
|
||||
Corresponding Source of the work from the predecessor in interest, if
|
||||
the predecessor has it or can get it with reasonable efforts.
|
||||
|
||||
You may not impose any further restrictions on the exercise of the
|
||||
rights granted or affirmed under this License. For example, you may
|
||||
not impose a license fee, royalty, or other charge for exercise of
|
||||
rights granted under this License, and you may not initiate litigation
|
||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
||||
any patent claim is infringed by making, using, selling, offering for
|
||||
sale, or importing the Program or any portion of it.
|
||||
|
||||
11. Patents.
|
||||
|
||||
A "contributor" is a copyright holder who authorizes use under this
|
||||
License of the Program or a work on which the Program is based. The
|
||||
work thus licensed is called the contributor's "contributor version".
|
||||
|
||||
A contributor's "essential patent claims" are all patent claims
|
||||
owned or controlled by the contributor, whether already acquired or
|
||||
hereafter acquired, that would be infringed by some manner, permitted
|
||||
by this License, of making, using, or selling its contributor version,
|
||||
but do not include claims that would be infringed only as a
|
||||
consequence of further modification of the contributor version. For
|
||||
purposes of this definition, "control" includes the right to grant
|
||||
patent sublicenses in a manner consistent with the requirements of
|
||||
this License.
|
||||
|
||||
Each contributor grants you a non-exclusive, worldwide, royalty-free
|
||||
patent license under the contributor's essential patent claims, to
|
||||
make, use, sell, offer for sale, import and otherwise run, modify and
|
||||
propagate the contents of its contributor version.
|
||||
|
||||
In the following three paragraphs, a "patent license" is any express
|
||||
agreement or commitment, however denominated, not to enforce a patent
|
||||
(such as an express permission to practice a patent or covenant not to
|
||||
sue for patent infringement). To "grant" such a patent license to a
|
||||
party means to make such an agreement or commitment not to enforce a
|
||||
patent against the party.
|
||||
|
||||
If you convey a covered work, knowingly relying on a patent license,
|
||||
and the Corresponding Source of the work is not available for anyone
|
||||
to copy, free of charge and under the terms of this License, through a
|
||||
publicly available network server or other readily accessible means,
|
||||
then you must either (1) cause the Corresponding Source to be so
|
||||
available, or (2) arrange to deprive yourself of the benefit of the
|
||||
patent license for this particular work, or (3) arrange, in a manner
|
||||
consistent with the requirements of this License, to extend the patent
|
||||
license to downstream recipients. "Knowingly relying" means you have
|
||||
actual knowledge that, but for the patent license, your conveying the
|
||||
covered work in a country, or your recipient's use of the covered work
|
||||
in a country, would infringe one or more identifiable patents in that
|
||||
country that you have reason to believe are valid.
|
||||
|
||||
If, pursuant to or in connection with a single transaction or
|
||||
arrangement, you convey, or propagate by procuring conveyance of, a
|
||||
covered work, and grant a patent license to some of the parties
|
||||
receiving the covered work authorizing them to use, propagate, modify
|
||||
or convey a specific copy of the covered work, then the patent license
|
||||
you grant is automatically extended to all recipients of the covered
|
||||
work and works based on it.
|
||||
|
||||
A patent license is "discriminatory" if it does not include within
|
||||
the scope of its coverage, prohibits the exercise of, or is
|
||||
conditioned on the non-exercise of one or more of the rights that are
|
||||
specifically granted under this License. You may not convey a covered
|
||||
work if you are a party to an arrangement with a third party that is
|
||||
in the business of distributing software, under which you make payment
|
||||
to the third party based on the extent of your activity of conveying
|
||||
the work, and under which the third party grants, to any of the
|
||||
parties who would receive the covered work from you, a discriminatory
|
||||
patent license (a) in connection with copies of the covered work
|
||||
conveyed by you (or copies made from those copies), or (b) primarily
|
||||
for and in connection with specific products or compilations that
|
||||
contain the covered work, unless you entered into that arrangement,
|
||||
or that patent license was granted, prior to 28 March 2007.
|
||||
|
||||
Nothing in this License shall be construed as excluding or limiting
|
||||
any implied license or other defenses to infringement that may
|
||||
otherwise be available to you under applicable patent law.
|
||||
|
||||
12. No Surrender of Others' Freedom.
|
||||
|
||||
If conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot convey a
|
||||
covered work so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you may
|
||||
not convey it at all. For example, if you agree to terms that obligate you
|
||||
to collect a royalty for further conveying from those to whom you convey
|
||||
the Program, the only way you could satisfy both those terms and this
|
||||
License would be to refrain entirely from conveying the Program.
|
||||
|
||||
13. Use with the GNU Affero General Public License.
|
||||
|
||||
Notwithstanding any other provision of this License, you have
|
||||
permission to link or combine any covered work with a work licensed
|
||||
under version 3 of the GNU Affero General Public License into a single
|
||||
combined work, and to convey the resulting work. The terms of this
|
||||
License will continue to apply to the part which is the covered work,
|
||||
but the special requirements of the GNU Affero General Public License,
|
||||
section 13, concerning interaction through a network will apply to the
|
||||
combination as such.
|
||||
|
||||
14. Revised Versions of this License.
|
||||
|
||||
The Free Software Foundation may publish revised and/or new versions of
|
||||
the GNU General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the
|
||||
Program specifies that a certain numbered version of the GNU General
|
||||
Public License "or any later version" applies to it, you have the
|
||||
option of following the terms and conditions either of that numbered
|
||||
version or of any later version published by the Free Software
|
||||
Foundation. If the Program does not specify a version number of the
|
||||
GNU General Public License, you may choose any version ever published
|
||||
by the Free Software Foundation.
|
||||
|
||||
If the Program specifies that a proxy can decide which future
|
||||
versions of the GNU General Public License can be used, that proxy's
|
||||
public statement of acceptance of a version permanently authorizes you
|
||||
to choose that version for the Program.
|
||||
|
||||
Later license versions may give you additional or different
|
||||
permissions. However, no additional obligations are imposed on any
|
||||
author or copyright holder as a result of your choosing to follow a
|
||||
later version.
|
||||
|
||||
15. Disclaimer of Warranty.
|
||||
|
||||
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
|
||||
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
|
||||
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
|
||||
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
|
||||
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
|
||||
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||
|
||||
16. Limitation of Liability.
|
||||
|
||||
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
|
||||
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
|
||||
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
|
||||
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
|
||||
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
|
||||
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
|
||||
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGES.
|
||||
|
||||
17. Interpretation of Sections 15 and 16.
|
||||
|
||||
If the disclaimer of warranty and limitation of liability provided
|
||||
above cannot be given local legal effect according to their terms,
|
||||
reviewing courts shall apply local law that most closely approximates
|
||||
an absolute waiver of all civil liability in connection with the
|
||||
Program, unless a warranty or assumption of liability accompanies a
|
||||
copy of the Program in return for a fee.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
state the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program does terminal interaction, make it output a short
|
||||
notice like this when it starts in an interactive mode:
|
||||
|
||||
<program> Copyright (C) <year> <name of author>
|
||||
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||
parts of the General Public License. Of course, your program's commands
|
||||
might be different; for a GUI interface, you would use an "about box".
|
||||
|
||||
You should also get your employer (if you work as a programmer) or school,
|
||||
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
||||
For more information on this, and how to apply and follow the GNU GPL, see
|
||||
<http://www.gnu.org/licenses/>.
|
||||
|
||||
The GNU General Public License does not permit incorporating your program
|
||||
into proprietary programs. If your program is a subroutine library, you
|
||||
may consider it more useful to permit linking proprietary applications with
|
||||
the library. If this is what you want to do, use the GNU Lesser General
|
||||
Public License instead of this License. But first, please read
|
||||
<http://www.gnu.org/philosophy/why-not-lgpl.html>.
|
@ -0,0 +1,42 @@
|
||||
CC=g++
|
||||
|
||||
OS_NAME=$(shell uname -s)
|
||||
ifeq ($(OS_NAME),Linux)
|
||||
LAPACKLDFLAGS=/usr/local/atlas/lib/libsatlas.so # single-threaded blas
|
||||
#LAPACKLDFLAGS=/usr/lib64/atlas/libtatlas.so # multi-threaded blas
|
||||
#BLAS_THREADING=-D MULTITHREADED_BLAS # remove this if wrong
|
||||
endif
|
||||
ifeq ($(OS_NAME),Darwin) # Mac OS X
|
||||
LAPACKLDFLAGS=-framework Accelerate # for OS X
|
||||
endif
|
||||
LAPACKCFLAGS=-Dinteger=int $(BLAS_THREADING)
|
||||
STATICLAPACKLDFLAGS=-fPIC -Wall -g -fopenmp -static -static-libstdc++ /home/lear/douze/tmp/jpeg-6b/libjpeg.a /usr/lib64/libpng.a /usr/lib64/libz.a /usr/lib64/libblas.a /usr/lib/gcc/x86_64-redhat-linux/4.9.2/libgfortran.a /usr/lib/gcc/x86_64-redhat-linux/4.9.2/libquadmath.a # statically linked version
|
||||
|
||||
CFLAGS= -fPIC -Wall -g -std=c++11 $(LAPACKCFLAGS) -fopenmp -DUSE_OPENMP -O3
|
||||
LDFLAGS=-fPIC -Wall -g -ljpeg -lpng -fopenmp
|
||||
CPYTHONFLAGS=-I/usr/include/python2.7
|
||||
|
||||
SOURCES := $(shell find . -name '*.cpp' ! -name 'deepmatching_matlab.cpp')
|
||||
OBJ := $(SOURCES:%.cpp=%.o)
|
||||
HEADERS := $(shell find . -name '*.h')
|
||||
|
||||
|
||||
all: deepmatching
|
||||
|
||||
.cpp.o: %.cpp %.h
|
||||
$(CC) -o $@ $(CFLAGS) -c $+
|
||||
|
||||
deepmatching: $(HEADERS) $(OBJ)
|
||||
$(CC) -o $@ $^ $(LDFLAGS) $(LAPACKLDFLAGS) -I/home/ibal_109/atlas/build/include
|
||||
|
||||
deepmatching-static: $(HEADERS) $(OBJ)
|
||||
$(CC) -o $@ $^ $(STATICLAPACKLDFLAGS)
|
||||
|
||||
python: $(HEADERS) $(OBJ)
|
||||
# swig -python $(CPYTHONFLAGS) deepmatching.i # not necessary, only do if you have swig compiler
|
||||
g++ $(CFLAGS) -c deepmatching_wrap.c $(CPYTHONFLAGS)
|
||||
g++ -shared $(LDFLAGS) $(LAPACKLDFLAGS) deepmatching_wrap.o $(OBJ) -o _deepmatching.so $(LIBFLAGS)
|
||||
|
||||
clean:
|
||||
rm -f $(OBJ) deepmatching *~ *.pyc .gdb_history deepmatching_wrap.o _deepmatching.so deepmatching.mex???
|
||||
|
@ -0,0 +1,185 @@
|
||||
Implementation of the Deep Matching algorithm, published at ICCV 2013 in
|
||||
"DeepFlow: Large displacement optical flow with deep matching" by Philippe
|
||||
Weinzaepfel, Jerome Revaud, Zaid Harchaoui and Cordelia Schmid.
|
||||
Code and idea by Jerome Revaud, INRIA. The code is only for scientific
|
||||
or personnal use. Please contact me/INRIA for commercial use.
|
||||
Email: jerome.revaud@inria.fr
|
||||
|
||||
Copyright (C) 2015 Jerome Revaud
|
||||
|
||||
Version 1.2.2
|
||||
|
||||
License:
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>
|
||||
|
||||
|
||||
Installation:
|
||||
|
||||
make clean all
|
||||
|
||||
This program has been built on a fedora18 x64 machine and tested on Mac OS X.
|
||||
*No assistance* will be given to compile the code on other OS. However, if
|
||||
you are able to sucessfully adapt the code for other platforms (Windows),
|
||||
please notify me so that I can release these versions on the webpage:
|
||||
|
||||
http://lear.inrialpes.fr/src/deepmatching/
|
||||
|
||||
|
||||
Matlab wrapper:
|
||||
[Prerequisite: to have compiled the executable, see above.]
|
||||
|
||||
1) Launch matlab by preloading the same 'libatlas' than the one used to compile ./deepmatching:
|
||||
LD_PRELOAD=/usr/lib64/atlas/libtatlas.so.3 matlab
|
||||
|
||||
2) Compile the MEX file:
|
||||
mex deepmatching_matlab.cpp deep_matching.o conv.o hog.o image.o io.o main.o maxfilter.o pixel_desc.o -output deepmatching '-DUSEOMP' CFLAGS="-fPIC -Wall -g -std=c++11 -O3 -fopenmp" LDFLAGS="-fopenmp" -lpng -ljpeg -lm /usr/local/atlas/lib/libsatlas.so
|
||||
|
||||
3) Try executing the code:
|
||||
>> help deepmatching
|
||||
>> deepmatching() % show some help about options
|
||||
>> img1 = single(imread('liberty1.png'));
|
||||
>> img2 = single(imread('liberty2.png'));
|
||||
>> matches = deepmatching( img1, img2, '-downscale 2 -v' );
|
||||
>> matches % print matches, should be as the listing shown below
|
||||
|
||||
Python wrapper:
|
||||
1) Compile the python module:
|
||||
make python
|
||||
|
||||
2) Try executing the code:
|
||||
>> import deepmatching as dm
|
||||
>> help(dm.deepmatching)
|
||||
>> dm.deepmatching() # show some help about options
|
||||
>> from PIL import Image
|
||||
>> import numpy as np
|
||||
>> img1 = np.array(Image.open('liberty1.png'))
|
||||
>> img2 = np.array(Image.open('liberty2.png'))
|
||||
>> matches = dm.deepmatching( img1, img2, '-downscale 2 -v' )
|
||||
>> matches % print matches, should be as the listing shown below
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Example usages and explanations:
|
||||
|
||||
To get detailed information on parameters:
|
||||
./deepmatching -h
|
||||
./deepmatching --help
|
||||
|
||||
|
||||
* Build verification:
|
||||
./deepmatching liberty1.png liberty2.png -downscale 2 -v
|
||||
|
||||
should produce the following output:
|
||||
layer 0, patch_size = 16x16
|
||||
remaining 16 big cells (actually, 16 are unique)
|
||||
layer 1, patch_size = 32x32
|
||||
remaining 25 big cells (actually, 25 are unique)
|
||||
layer 2, patch_size = 64x64
|
||||
remaining 25 big cells (actually, 25 are unique)
|
||||
found 625 local matches
|
||||
gathering correspondences 96%...
|
||||
8 8 0 12 2.6554 10
|
||||
8 40 4 48 2.65679 11
|
||||
8 24 8 32 2.5486 11
|
||||
40 40 40 32 2.64178 0
|
||||
40 56 44 52 2.58631 0
|
||||
40 24 40 12 2.65065 0
|
||||
56 40 56 28 2.64225 0
|
||||
56 24 56 12 2.68497 0
|
||||
24 40 24 32 2.62045 3
|
||||
24 56 28 60 2.5849 12
|
||||
|
||||
* To visualize the output correspondences:
|
||||
Use the "viz.py" python script provided.
|
||||
./deepmatching climb1.png climb2.png -nt 0 | python viz.py climb1.png climb2.png
|
||||
|
||||
* To restrict matching to local neighborhood:
|
||||
The "-ngh_rad <D>" option restricts the matching to a radius of <D> pixels.
|
||||
It uses less memory and is faster. For instance, This should produce about
|
||||
the same output as before but consumes 2 times less memory and cpu:
|
||||
|
||||
./deepmatching climb1.png climb2.png -nt 0 -ngh_rad 192 | python viz.py climb1.png climb2.png
|
||||
|
||||
* To rescore matches prior to calling deepflow / epicflow:
|
||||
simply pipe the output correspondences in 'rescore.py'
|
||||
./deepmatching img1 img2 [args] | python rescore.py img1 img2
|
||||
|
||||
|
||||
* Scale and invariant version: (see the --help)
|
||||
./deepmatching dino1.jpg dino2.jpg -nt 0 -downscale 1 -max_scale 2 -rot_range -45 +45 -v | python viz.py dino1.jpg dino2.jpg
|
||||
|
||||
param -max_scale: maximum scale factor (here x2, default = x5)
|
||||
param -rot_range: rotation range in degrees (default = from 0 to 360)
|
||||
|
||||
|
||||
For details about the options, please refer to the help, the papers or the code.
|
||||
|
||||
|
||||
Important tip:
|
||||
If the program stops with "segmentation fault", then it means that your machine
|
||||
does not have enough memory. In this case, you should consider increasing the
|
||||
"-downscale" parameter.
|
||||
|
||||
|
||||
Version history:
|
||||
|
||||
version 1.0.2:
|
||||
Many thanks to Bowen Zhang from Tongji University for reporting an issue with the makefile
|
||||
|
||||
version 1.1:
|
||||
- New mode added for "fully scale & rotation invariant DeepMatching".
|
||||
- Improved visualisation (viz.py)
|
||||
- Removed useless/suboptimal options (-iccv_settings)
|
||||
- Fixed a bug related to memory allocation for large images
|
||||
|
||||
version 1.2:
|
||||
- Added a new option "-ngh_rad" to restrict the matching to a local neighborhood, which allows
|
||||
much reduced memory usage and computations.
|
||||
- static-compiled version is now fully multhi-threaded with BLAS
|
||||
- few minor bugfix, code cleaning and updates.
|
||||
|
||||
version 1.2.1:
|
||||
- Now performing the maxpooling and subsampling steps jointly,
|
||||
which results in 2/3 of memory usage compared to before. Also, it is now a bit faster.
|
||||
- Removed some useless/confusing options in the executable.
|
||||
|
||||
version 1.2.2:
|
||||
- Now include a Matlab and a Python wrapper!
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,246 @@
|
||||
/*
|
||||
Copyright (C) 2014 Jerome Revaud
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>
|
||||
*/
|
||||
#ifndef ___ARRAY_TYPES_H___
|
||||
#define ___ARRAY_TYPES_H___
|
||||
|
||||
typedef unsigned char UBYTE;
|
||||
typedef unsigned int UINT;
|
||||
|
||||
|
||||
/************************
|
||||
* 1D Array
|
||||
|
||||
Equivalences:
|
||||
|
||||
C/Python/numpy: array.shape = (tx,)
|
||||
array[x] := array->pixels[x]
|
||||
|
||||
Matlab/Fortran: [1, tx] = size(array)
|
||||
array(x, 1) := array->pixels[x-1]
|
||||
*/
|
||||
|
||||
#define DEFINE_ARRAY(type) \
|
||||
typedef struct { \
|
||||
type* pixels; \
|
||||
int tx; \
|
||||
} type##_array;
|
||||
|
||||
DEFINE_ARRAY(UBYTE)
|
||||
DEFINE_ARRAY(int)
|
||||
DEFINE_ARRAY(UINT)
|
||||
DEFINE_ARRAY(float)
|
||||
|
||||
#define ASSERT_ARRAY_ZEROS(arr) {int size=arr->tx; assert((arr->pixels[0]==0 && arr->pixels[size/2]==0 && arr->pixels[size-1]==0) || !"error: matrix " #arr "is supposed to be zeros");}
|
||||
|
||||
|
||||
/************************
|
||||
* 2D Image
|
||||
|
||||
Equivalences:
|
||||
|
||||
C/Python/numpy: array.shape = (ty, tx)
|
||||
array[y, x] := array->pixels[x + y*tx]
|
||||
|
||||
Matlab/Fortran: [tx, ty] = size(array)
|
||||
array(x, y) := array->pixels[(x-1) + (y-1)*tx]
|
||||
*/
|
||||
|
||||
#define DEFINE_IMG(type) \
|
||||
typedef struct { \
|
||||
type* pixels;\
|
||||
int tx,ty;\
|
||||
} type##_image;
|
||||
|
||||
DEFINE_IMG(UBYTE)
|
||||
DEFINE_IMG(int)
|
||||
DEFINE_IMG(UINT)
|
||||
DEFINE_IMG(float)
|
||||
|
||||
#define ASSERT_SAME_SIZE ASSERT_SAME_IMG_SIZE
|
||||
#define ASSERT_IMG_SIZE ASSERT_SAME_IMG_SIZE
|
||||
#define ASSERT_SAME_IMG_SIZE(im1,im2) if(im1 && im2) assert(im1->tx==im2->tx && im1->ty==im2->ty);
|
||||
|
||||
#define ASSERT_IMAGE_ZEROS
|
||||
#define ASSERT_IMG_ZEROS(img) {int size=img->tx*img->ty; assert((img->pixels[0]==0 && img->pixels[size/2]==0 && img->pixels[size-1]==0) || !"error: matrix " #img "is supposed to be zeros");}
|
||||
#define IMG_SIZE(img) (long((img)->tx)*(img)->ty)
|
||||
|
||||
|
||||
|
||||
/************************
|
||||
* 3D Image = Cube (Z coordinates are contiguous)
|
||||
|
||||
Equivalences:
|
||||
|
||||
C/Python/numpy: array.shape = (ty, tx, tz)
|
||||
array[y, x, z] := array->pixels[z + x*tz + y*tx*tz]
|
||||
|
||||
Matlab/Fortran: [tz, tx, ty] = size(array)
|
||||
array(z, x, y) := array->pixels[(z-1) + (x-1)*tz + (y-1)*tx*tz]
|
||||
*/
|
||||
|
||||
#define DEFINE_CUBE(type) \
|
||||
typedef struct { \
|
||||
type* pixels; \
|
||||
int tx,ty,tz; \
|
||||
} type##_cube;
|
||||
|
||||
DEFINE_CUBE(UBYTE)
|
||||
DEFINE_CUBE(short)
|
||||
DEFINE_CUBE(int)
|
||||
DEFINE_CUBE(UINT)
|
||||
DEFINE_CUBE(float)
|
||||
|
||||
#define ASSERT_SAME_CUBE_SIZE(im1, im2) \
|
||||
if((im1) && (im2)) assert((im1)->tx==(im2)->tx && (im1)->ty==(im2)->ty && (im1)->tz==(im2)->tz);
|
||||
|
||||
#define ASSERT_CUBE_ZEROS(img) {int size=img->tx*img->ty*img->tz; assert((img->pixels[0]==0 && img->pixels[size/2]==0 && img->pixels[size-1]==0) || !"error: matrix " #img "is supposed to be zeros");}
|
||||
#define CUBE_SIZE(cube) (long((cube)->tx)*(cube)->ty*(cube)->tz)
|
||||
|
||||
|
||||
|
||||
/************************
|
||||
* 3D Image = concatenation of XY layers
|
||||
|
||||
Equivalences:
|
||||
|
||||
C/Python/numpy: array.shape = (tz, ty, tx)
|
||||
array[z, y, x] := array->pixels[x + y*tx + z*tx*ty]
|
||||
|
||||
Matlab/Fortran: [tx, ty, tz] = size(array)
|
||||
array(x, y, z) := array->pixels[(x-1) + (y-1)*tx + (z-1)*tx*ty]
|
||||
*/
|
||||
|
||||
#define DEFINE_LAYERS(type) \
|
||||
typedef struct { \
|
||||
type* pixels; \
|
||||
int tx,ty,tz; \
|
||||
} type##_layers; \
|
||||
|
||||
DEFINE_LAYERS(UBYTE)
|
||||
DEFINE_LAYERS(int)
|
||||
DEFINE_LAYERS(UINT)
|
||||
DEFINE_LAYERS(float)
|
||||
|
||||
|
||||
#define ASSERT_SAME_LAYERS_SIZE(im1,im2) ASSERT_SAME_CUBE_SIZE(im1,im2)
|
||||
#define ASSERT_LAYERS_ZEROS ASSERT_CUBE_ZEROS
|
||||
#define LAYERS_SIZE(layers) CUBE_SIZE(layers)
|
||||
|
||||
|
||||
|
||||
/*****************
|
||||
creation, reshaping macros
|
||||
*/
|
||||
|
||||
// Because there was a random bug happening because of uninitialized memory
|
||||
// and the bug was difficult to locate, I have just transformed all malloc(...)
|
||||
// into calloc(...) ( = malloc + memset(0) ), which is not really consuming more time anyways
|
||||
// and seems to solve the issue. This is kind of stupid technique but it works well.
|
||||
|
||||
#define empty_array(type,tx) ((type##_array){NEWAC(type,long(tx)),tx})
|
||||
#define empty_image(type,tx,ty) ((type##_image){NEWAC(type,long(tx)*(ty)),tx,ty})
|
||||
#define empty_cube(type,tx,ty,tz) ((type##_cube ){NEWAC(type,long(tx)*(ty)*long(tz)),tx,ty,tz})
|
||||
#define empty_layers(type,tx,ty,tz) ((type##_layers){NEWAC(type,long(tx)*(ty)*(tz)),tx,ty,tz})
|
||||
|
||||
#define zeros_array(type,tx) ((type##_array){NEWAC(type,long(tx)),tx})
|
||||
#define zeros_image(type,tx,ty) ((type##_image){NEWAC(type,long(tx)*(ty)),tx,ty})
|
||||
#define zeros_cube(type,tx,ty,tz) ((type##_cube ){NEWAC(type,long(tx)*(ty)*(tz)),tx,ty,tz})
|
||||
#define zeros_layers(type,tx,ty,tz) ((type##_layers){NEWAC(type,long(tx)*(ty)*(tz)),tx,ty,tz})
|
||||
|
||||
#define array_like(type,l) ((type##_array){NEWAC(type,long((l)->tx)),(l)->tx})
|
||||
#define image_like(type,l) ((type##_image){NEWAC(type,long((l)->tx)*(l)->ty),(l)->tx,(l)->ty})
|
||||
#define cube_like(type,l) ((type##_cube ){NEWAC(type,long((l)->tx)*(l)->ty*(l)->tz),(l)->tx,(l)->ty,(l)->tz})
|
||||
#define layers_like(type,l) ((type##_layers){NEWAC(type,long((l)->tx)*(l)->ty*(l)->tz),(l)->tx,(l)->ty,(l)->tz})
|
||||
|
||||
|
||||
#define reshape_xy(type, arr) ((type##_array){(arr)->pixels, (arr)->tx*(arr)->ty})
|
||||
#define reshape_xyz(type, arr) ((type##_array){(arr)->pixels, (arr)->tx*(arr)->ty*(arr)->tz})
|
||||
#define reshape_xy_z(type, arr) ((type##_image){(arr)->pixels, (arr)->tx*(arr)->ty, (arr)->tz})
|
||||
#define reshape_z_xy(type, arr) ((type##_image){(arr)->pixels, (arr)->tz, (arr)->tx*(arr)->ty})
|
||||
#define reshape_x_yz(type, arr) ((type##_image){(arr)->pixels, (arr)->tx, (arr)->ty*(arr)->tz})
|
||||
|
||||
|
||||
#define free_image(img) if(img){free(img->pixels); free(img); img=NULL;}
|
||||
#define free_cube(cube) free_image(cube)
|
||||
#define free_layers(cube) free_cube(cube)
|
||||
|
||||
|
||||
// debugging only
|
||||
//#include <stdio.h>
|
||||
//inline long hash_arr(char* ptr, int nb, bool show) {
|
||||
// long res = 0;
|
||||
// if(show) printf("hashing [");
|
||||
// for(int i=0; i<nb; i++) {
|
||||
// res = 1000003*res + ((UBYTE*)ptr)[i];
|
||||
// if(show) printf("%d, ",((UBYTE*)ptr)[i]);
|
||||
// res = (res>>17) | (res<<47);
|
||||
// }
|
||||
// if(show) printf("]\n");
|
||||
// return res;
|
||||
//}
|
||||
//#define H(arr,val) printf("hash(" #arr ") = %ld\n",val);
|
||||
//#define hash_array(arr) H(arr,hash_arr((char*)(arr)->pixels,(arr)->tx*sizeof(*(arr)->pixels),0))
|
||||
//#define hash_image(arr) H(arr,hash_arr((char*)(arr)->pixels,(arr)->tx*(arr)->ty*sizeof(*(arr)->pixels),0))
|
||||
//#define hash_cube(arr) H(arr,hash_arr((char*)(arr)->pixels,(arr)->tx*(arr)->ty*(arr)->tz*sizeof(*(arr)->pixels),0))
|
||||
//#define hash_layers(arr) hash_cube(arr)
|
||||
|
||||
//inline void save_raw(const char* fname, int* shape, int ndim, char* ptr, int size) {
|
||||
// FILE* f = fopen(fname, "w");
|
||||
// fwrite( &ndim, sizeof(int), 1, f);
|
||||
// fwrite( shape, sizeof(int), ndim, f);
|
||||
// fwrite( ptr, sizeof(*ptr), size, f);
|
||||
// fclose(f);
|
||||
//}
|
||||
//#define save_cube(fname,cube) {int sh[3] = {(cube)->ty, (cube)->tx, (cube)->tz}; save_raw(fname, sh, 3, (char*)(cube)->pixels, sizeof(*(cube)->pixels)*CUBE_SIZE(cube));}
|
||||
//#define save_layers(fname,layers) {int sh[3] = {(layers)->tz, (layers)->ty, (layers)->tx}; save_raw(fname, sh, 3, (char*)(layers)->pixels, sizeof(*(layers)->pixels)*LAYERS_SIZE(layers));}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
After Width: | Height: | Size: 194 KiB |
After Width: | Height: | Size: 165 KiB |
@ -0,0 +1,988 @@
|
||||
/*
|
||||
Copyright (C) 2014 Jerome Revaud
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>
|
||||
*/
|
||||
#include "conv.h"
|
||||
#include "std.h"
|
||||
#include "omp.h"
|
||||
#include "maxfilter.h"
|
||||
|
||||
extern "C" {
|
||||
#include <immintrin.h>
|
||||
#define integer int
|
||||
#define real float
|
||||
extern int saxpy_(integer *n, real *sa, real *sx, integer *incx, real *sy, integer *incy);
|
||||
extern int sscal_(integer *n, real *sa, real *sx, integer *incx);
|
||||
}
|
||||
|
||||
|
||||
static inline void fast_set_val( float * __restrict__ a, long d, const float val) {
|
||||
if(val) {
|
||||
int j;
|
||||
for(j=0; j<d; j++)
|
||||
a[j] = val;
|
||||
} else
|
||||
memset(a,0,d*sizeof(float));
|
||||
}
|
||||
static inline void fast_add_val( float * __restrict__ a, long d, const float val) {
|
||||
int j;
|
||||
for(j=0; j<d; j++)
|
||||
a[j] += val;
|
||||
}
|
||||
static inline void fast_set_vec( float * __restrict__ dest,
|
||||
const float * __restrict__ src, int d, const float mul) {
|
||||
if( mul==1)
|
||||
memcpy(dest,src,d*sizeof(float));
|
||||
else {
|
||||
int j;
|
||||
for(j=0; j<d; j++)
|
||||
dest[j] = mul*src[j];
|
||||
}
|
||||
}
|
||||
static inline void fast_add_vec( float * __restrict__ dest,
|
||||
const float * __restrict__ add, int d, float mul) {
|
||||
if(d<=4) {
|
||||
int j;
|
||||
for(j=0; j<d; j++)
|
||||
dest[j] += mul*add[j];
|
||||
} else {
|
||||
int inc = 1;
|
||||
saxpy_( &d, &mul, (float*)add, &inc, (float*)dest, &inc );
|
||||
}
|
||||
}
|
||||
static inline void fast_div( float * __restrict__ a, long d, const float div) {
|
||||
const float divi = 1/div;
|
||||
// assert( ((long)a & 15) == 0 && (d & 3) == 0 );
|
||||
// const float _divi4[] = {divi,divi,divi,divi};
|
||||
// __v4sf *a4 = (__v4sf*)a;
|
||||
// __v4sf *divi4 = (__v4sf*)_divi4;
|
||||
// int e = d>>2;
|
||||
// while(e--) *a4++ *= (*divi4);
|
||||
int j;
|
||||
for(j=0; j<d; j++)
|
||||
a[j] *= divi;
|
||||
}
|
||||
|
||||
static inline float* fast_set_trans( float * dest, const float * src, const float mul,
|
||||
int dx, int dy, const int tx, const int ty, const int ex, const float def ) {
|
||||
if(mul==0) {
|
||||
memset(dest,0,sizeof(float)*(tx+ex)*(ty+ex));
|
||||
return dest+(tx+ex)*(ty+ex);
|
||||
}
|
||||
if(dx>tx) dx=tx; // after those alues, nothing happens anyway
|
||||
if(dy>ty) dy=ty;
|
||||
if(-dx>tx) dx=-tx;
|
||||
if(-dy>ty) dy=-ty;
|
||||
|
||||
#define add_default(n) {fast_set_val(dest,(n),mul*def); dest+=(n);}
|
||||
float* _dest = dest;
|
||||
|
||||
// paste -v zeros rows
|
||||
if(dy<0) add_default(-dy*(tx+ex));
|
||||
|
||||
src += MAX(0,dx);
|
||||
const int row_len = MIN(tx,tx+dx+ex) - MAX(0,dx);
|
||||
int j;
|
||||
for(j=MAX(0,dy); j<MIN(ty,ty+dy+ex); j++) {
|
||||
|
||||
// paste -u zeros cols
|
||||
if(dx<0) add_default(-dx);
|
||||
|
||||
// past image
|
||||
fast_set_vec(dest,src+j*tx,row_len,mul);
|
||||
dest += row_len;
|
||||
|
||||
// paste +u zeros cols
|
||||
if(dx>=0) {add_default(dx)
|
||||
if(ex) add_default(ex)}
|
||||
}
|
||||
|
||||
// paste +v zeros rows
|
||||
if(dy>=0){add_default(dy*(tx+ex))
|
||||
if(ex) add_default(ex*(tx+ex))}
|
||||
|
||||
#undef add_default
|
||||
assert( dest-_dest == (tx+ex)*(ty+ex) );
|
||||
return dest;
|
||||
}
|
||||
|
||||
static inline float* fast_add_trans( float * dest, const float * src, const float mul,
|
||||
int dx, int dy, const int tx, const int ty, const int ex, const float def ) {
|
||||
if(mul==0) return dest+(tx+ex)*(ty+ex);
|
||||
if(dx>tx) dx=tx; // after those alues, nothing happens anyway
|
||||
if(dy>ty) dy=ty;
|
||||
if(-dx>tx) dx=-tx;
|
||||
if(-dy>ty) dy=-ty;
|
||||
#define add_default(n) {fast_add_val(dest,n,def*mul); dest+=n;}
|
||||
float* _dest = dest;
|
||||
|
||||
// paste -v zeros rows
|
||||
if(dy<0) add_default(-dy*(tx+ex));
|
||||
|
||||
src += MAX(0,dx);
|
||||
const int row_len = MIN(tx,tx+dx+ex) - MAX(0,dx);
|
||||
int j;
|
||||
for(j=MAX(0,dy); j<MIN(ty,ty+dy+ex); j++) {
|
||||
|
||||
// paste -u zeros cols
|
||||
if(dx<0) add_default(-dx);
|
||||
|
||||
// past image
|
||||
fast_add_vec(dest,src+j*tx,row_len,mul);
|
||||
dest += row_len;
|
||||
|
||||
// paste +u zeros cols
|
||||
if(dx>=0) {add_default(dx)
|
||||
if(ex) add_default(ex)}
|
||||
}
|
||||
|
||||
// paste +v zeros rows
|
||||
if(dy>=0){add_default(dy*(tx+ex))
|
||||
if(ex) add_default(ex*(tx+ex))}
|
||||
|
||||
#undef add_default
|
||||
assert( dest-_dest == (tx+ex)*(ty+ex) );
|
||||
return dest;
|
||||
}
|
||||
|
||||
|
||||
static inline void norm_norm( float* norms, int nb, float mode ) {
|
||||
int i;
|
||||
if( mode < 0 )
|
||||
assert(!"error: unknown norm mode");
|
||||
else if( mode == 0.5 ) {
|
||||
for(i=0; i<nb; i++)
|
||||
norms[i] = sqrt(sqrt(norms[i]));
|
||||
} else if( mode < 1 ) {
|
||||
mode *= 0.5; // cumulate with initial 1/sqrt(.)
|
||||
for(i=0; i<nb; i++)
|
||||
norms[i] = pow(norms[i], mode);
|
||||
} else if( mode == 1 ) {
|
||||
for(i=0; i<nb; i++)
|
||||
norms[i] = sqrt(norms[i]);
|
||||
} else if( mode > 1 )
|
||||
assert(!"error: unknown norm mode");
|
||||
}
|
||||
|
||||
|
||||
/* normalize each pixel of a multi-layers image
|
||||
norm = {0:nothing, 1:L2-normalization, 0-1: normalization by (L2-norm)**<norm> }
|
||||
*/
|
||||
void norm_layers( float_layers* res, float norm, int n_thread ) {
|
||||
if(norm==0) return;
|
||||
|
||||
const int layer_size = res->tx*res->ty;
|
||||
const int n_layers = res->tz;
|
||||
float* norms = NEWAC(float,layer_size);
|
||||
long l;
|
||||
|
||||
for(l=0; l<n_layers; l++) {
|
||||
float* r = res->pixels + l*layer_size;
|
||||
int i;
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(i=0; i<layer_size; i++)
|
||||
norms[i] += r[i]*r[i];
|
||||
}
|
||||
norm_norm( norms, layer_size, norm );
|
||||
|
||||
for(l=0; l<n_layers; l++) {
|
||||
float* r = res->pixels + l*layer_size;
|
||||
int i;
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(i=0; i<layer_size; i++)
|
||||
r[i] /= norms[i]+1e-8;
|
||||
}
|
||||
|
||||
free(norms);
|
||||
}
|
||||
|
||||
|
||||
/* Return the vectorized dimension of a HOG patch
|
||||
*/
|
||||
int get_patch_desc_dim( float_layers* hog, int patch_size )
|
||||
{
|
||||
return patch_size*patch_size * hog->tz; // number of dimensions of an atomic patch descriptor
|
||||
}
|
||||
|
||||
|
||||
/* Sample a set of patches from a HOG image.
|
||||
grid : array of (x,y) position of the patches
|
||||
size: size of the patches, ie. [x,x+size[ x [y,y+size[
|
||||
res: result array, n_patches x desc_dim
|
||||
desc_dim = n_layers * size**2
|
||||
norms: result, n_patches x 1, norm of each patch
|
||||
*/
|
||||
void _sample_patches( float_layers* hog, float_layers* color, int_image* grid, int size, float norm,
|
||||
float_image* res, float_array* norms, int n_thread ) {
|
||||
const int tx = hog->tx;
|
||||
const long npix = tx*hog->ty;
|
||||
assert( grid->tx == 2 );
|
||||
const int n_patches = grid->ty;
|
||||
assert( res->ty == n_patches );
|
||||
const int n_layers = hog->tz;
|
||||
const int n_colors = (color? color->tz: 0);
|
||||
const int color_npix = (color? color->tx*color->ty: 0);
|
||||
const int desc_size = size*size*n_layers + (color? color->tz: 0);
|
||||
assert(res->tx == desc_size );
|
||||
|
||||
int n;
|
||||
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(n=0; n<n_patches; n++) {
|
||||
float *r = res->pixels + desc_size*n;
|
||||
int *p = grid->pixels + 2*n;
|
||||
// copy hog
|
||||
int x=p[0],y=p[1];
|
||||
assert(0<=x && x+size<=tx);
|
||||
assert(0<=y && y+size<=hog->ty);
|
||||
int l,j;
|
||||
for(l=0; l<n_layers; l++) {
|
||||
float* h = hog->pixels + l*npix + y*tx + x;
|
||||
for(j=0; j<size; j++) {
|
||||
memcpy(r, h, size*sizeof(float));
|
||||
h += tx;
|
||||
r += size;
|
||||
}
|
||||
}
|
||||
if(!color) continue;
|
||||
// copy color
|
||||
float* c = color->pixels + (y+size/2)*color->ty + (x+size/2);
|
||||
for(l=0; l<n_colors; l++)
|
||||
*r++ = c[l*color_npix];
|
||||
}
|
||||
|
||||
if(norm) {
|
||||
float* normp = norms ? norms->pixels : NEWAC(float, n_patches);
|
||||
if(norms) {
|
||||
assert(norms->tx==n_patches);
|
||||
memset(normp,0,n_patches*sizeof(float));
|
||||
}
|
||||
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(n=0; n<n_patches; n++) {
|
||||
float *r = res->pixels + desc_size*n;
|
||||
int l;
|
||||
for(l=0; l<desc_size; l++)
|
||||
normp[n] += r[l]*r[l];
|
||||
}
|
||||
norm_norm( normp, n_patches, norm );
|
||||
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(n=0; n<n_patches; n++) {
|
||||
float *r = res->pixels + desc_size*n;
|
||||
int l;
|
||||
float nn = normp[n]+1e-8;
|
||||
for(l=0; l<desc_size; l++)
|
||||
r[l] /= nn;
|
||||
}
|
||||
|
||||
if(!norms) free(normp);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
static inline int retrieve_children( const int x, const int y, const int_cube* child_grid ) {
|
||||
const int size0_div2 = child_grid->pixels[0];
|
||||
const int step0 = child_grid->tx==1 && child_grid->ty==1 ? 1 :
|
||||
MAX( child_grid->pixels[2]-child_grid->pixels[0],
|
||||
child_grid->pixels[1+2*child_grid->tx]-child_grid->pixels[1] );
|
||||
int i = (x-size0_div2)/step0;
|
||||
int j = (y-size0_div2)/step0;
|
||||
assert( x==(i*step0+size0_div2) || !"error: child_grid does not match current grid" );
|
||||
assert( y==(j*step0+size0_div2) || !"error: child_grid does not match current grid" );
|
||||
if( i<0 || i>=child_grid->tx ) return -1;
|
||||
if( j<0 || j>=child_grid->ty ) return -1;
|
||||
return i+j*child_grid->tx;
|
||||
}
|
||||
|
||||
/* Prepare a grid of cell positions in the first image for a given scale. Big cells inherit the cell at the previous scale.
|
||||
size = size of cells at current scale
|
||||
offset, step = grid generator: (offset + i*step, offset + j*step)
|
||||
child_grid = grid of the previous layer (or None if first layer)
|
||||
child_norms = image containing the norms of the patch at the previous level
|
||||
grid = result center positions of cells in current scale
|
||||
children = index of cells in previous scale used to construct big cells
|
||||
norms = norms of the cells of this level
|
||||
*/
|
||||
void _prepare_big_cells( int size, int offset, int step,
|
||||
int_cube* child_grid, float_image* child_norms,
|
||||
int_cube* grid, int_cube* children, float_image* norms ) {
|
||||
assert(grid->tz==2);
|
||||
const int ntx = grid->tx; // should be == 1+(tx-size)/step so that patches do not pass the border
|
||||
const int nty = grid->ty; // should be == 1+(ty-size)/step so that patches do not pass the border
|
||||
|
||||
/* grid[i,j] = ( offset + i*step, offset + j*step )
|
||||
|
||||
connection between two scales:
|
||||
x cell position in lower scale == x position of children in upper scale
|
||||
child_offset + child_i*child_step = offset + i*step + (2*u/(nc-1)-1)*size/4
|
||||
*/
|
||||
|
||||
int i,j,u,v;
|
||||
int* r = grid->pixels;
|
||||
|
||||
if( !child_grid ) {
|
||||
// this is the first scale:
|
||||
// we just return a grid of step size*(1-overlap/2) in [0, tx[ x [0, ty[
|
||||
|
||||
for(j=0; j<nty; j++)
|
||||
for(i=0; i<ntx; i++) {
|
||||
*r++ = offset + i*step;
|
||||
*r++ = offset + j*step;
|
||||
}
|
||||
} else {
|
||||
assert(child_grid->tz==2);
|
||||
ASSERT_SAME_SIZE( child_grid, child_norms );
|
||||
assert( children );
|
||||
const int nc = sqrt(children->tz); // number of children per row or col
|
||||
assert( children->tz==pow2(nc) );
|
||||
ASSERT_SAME_SIZE( grid, children );
|
||||
ASSERT_SAME_SIZE( grid, norms );
|
||||
// this is at least second scale
|
||||
// we return a grid of step size*(1-overlap/2) in [0, tx[ x [0, ty[
|
||||
|
||||
const int quarter = size/4;
|
||||
assert(4*quarter==size);
|
||||
int* c = children->pixels;
|
||||
float *n = norms->pixels;
|
||||
memset(n,0,ntx*nty*sizeof(float));
|
||||
for(j=0; j<nty; j++)
|
||||
for(i=0; i<ntx; i++) {
|
||||
int x = offset + i*step;
|
||||
int y = offset + j*step;
|
||||
*r++ = x;
|
||||
*r++ = y;
|
||||
|
||||
// accumulate norms from 2x2 or 3x3 neighbors
|
||||
for(v=0; v<nc; v++)
|
||||
for(u=0; u<nc; u++,c++) {
|
||||
// we want to index the children at position:
|
||||
// ( center_x + (2*u/(nc-1)-1)*size/4, center_y + (2*v/(nc-1)-1)*size/4 )
|
||||
*c = retrieve_children( x+(2*u/(nc-1)-1)*quarter, y+(2*v/(nc-1)-1)*quarter, child_grid );
|
||||
if(*c>=0) *n += child_norms->pixels[*c];
|
||||
}
|
||||
n++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Prepare image for dotprod : dot(patches, res)
|
||||
where patches is n_patches x patch_dim
|
||||
set outside of the image to be equal to (0,...,ninth_val)
|
||||
*/
|
||||
void _prepare_dotprod_convolution( float_layers* img, int patch_size, float ninth_val, int extend,
|
||||
float_layers* res, int n_thread ) {
|
||||
assert( img->tx+extend == res->tx );
|
||||
assert( img->ty+extend == res->ty );
|
||||
const int n_layers = img->tz;
|
||||
const int tx = img->tx;
|
||||
const int ty = img->ty;
|
||||
const int npix = tx*ty;
|
||||
const int npixex = (tx+extend)*(ty+extend);
|
||||
assert( res->tz==patch_size*patch_size*img->tz );
|
||||
|
||||
long l;
|
||||
const int first_half = patch_size/2; // half-size
|
||||
const int second_half = patch_size - first_half;
|
||||
const int layer_size = patch_size*patch_size*npixex;
|
||||
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(l=0; l<n_layers; l++) {
|
||||
float* img_pix = img->pixels + l*npix;
|
||||
float* r = res->pixels + l*layer_size;
|
||||
int u,v;
|
||||
// copy translated version of the image into res
|
||||
for(v=-first_half; v<second_half; v++)
|
||||
for(u=-first_half; u<second_half; u++)
|
||||
r = fast_set_trans( r, img_pix, 1, u, v, tx, ty, extend, l+1<n_layers? 0 : ninth_val );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
float_layers* prepare_dotprod_convolution( float_layers* hog, int patch_size, int extend, float norm, int nt )
|
||||
{
|
||||
assert(0<=extend and extend<=1);
|
||||
const int nh = get_patch_desc_dim(hog,patch_size);
|
||||
const int etx = hog->tx+extend; // extend a bit the image
|
||||
const int ety = hog->ty+extend;
|
||||
|
||||
float_layers* res = NEW(float_layers);
|
||||
*res = empty_layers(float,etx,ety,nh);
|
||||
|
||||
float ninth_val = 0;
|
||||
_prepare_dotprod_convolution( hog, patch_size, ninth_val, extend, res, nt );
|
||||
|
||||
if( norm ) norm_layers( res, norm, nt );
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
inline float sum_array_f(const float* a, int n) {
|
||||
int i=n;
|
||||
double res = 0;
|
||||
while(i--) res+=a[i];
|
||||
return (float)res;
|
||||
}
|
||||
|
||||
|
||||
extern "C" {
|
||||
int sgemm_(char *transa, char *transb, integer *m, integer *
|
||||
n, integer *k, float *alpha, float *a, integer *lda, float *b, integer *
|
||||
ldb, float *beta, float *c, integer *ldc);
|
||||
}
|
||||
|
||||
/* matrix-matrix multiplication with several SGEMM (each is single-threaded)
|
||||
res = dot(patches, convolved_hog)
|
||||
P*npix P * nh nh * npix
|
||||
*/
|
||||
void _dotprod( float_image* patches, float_layers* convolved_hog, float_layers* res, int n_thread ) {
|
||||
int nh = patches->tx;
|
||||
assert( nh == convolved_hog->tz );
|
||||
ASSERT_SAME_IMG_SIZE( convolved_hog, res );
|
||||
int P = patches->ty;
|
||||
assert( res->tz == P );
|
||||
int threadP = 1 + (P-1) / n_thread; // how many patches per thread
|
||||
int npix = (int)IMG_SIZE(convolved_hog);
|
||||
|
||||
int l;
|
||||
#if (defined(USE_OPENMP) && !defined(MULTITHREADED_BLAS))
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#else
|
||||
n_thread = 1; // BLAS is already multithreaded
|
||||
threadP = P;
|
||||
#endif
|
||||
for(l=0; l<n_thread; l++) {
|
||||
// we do dotprod( patches[l*threadP : (l+1)*threadP], convolved_hog )
|
||||
long start = l*threadP;
|
||||
long end = MIN(P,(l+1)*threadP);
|
||||
int np = int(end - start);
|
||||
float* p = patches->pixels + nh*start;
|
||||
float* r = res->pixels + npix*start;
|
||||
|
||||
// blas fast matrix-matrix product
|
||||
char T='n'; float alpha = 1, beta = 0;
|
||||
sgemm_( &T, &T, &npix, &np, &nh, &alpha,
|
||||
convolved_hog->pixels, &npix,
|
||||
p, &nh, &beta, r, &npix);
|
||||
}
|
||||
}
|
||||
|
||||
inline void transpose_scalar_block(const float *A, float *B, const int lda, const int ldb,
|
||||
const int block_row, const int block_col) {
|
||||
for(int i=0; i<block_row; i++)
|
||||
for(int j=0; j<block_col; j++)
|
||||
B[j*ldb + i] = A[i*lda +j];
|
||||
}
|
||||
|
||||
// Transpose A (N rows by M cols) into B (M by N)
|
||||
void transpose_matrix(const float_image* A, float_image* B, int nt) {
|
||||
const int n = A->ty, m = A->tx;
|
||||
assert( n==B->tx && m==B->ty );
|
||||
const int block_size = 16;
|
||||
const float* pA = A->pixels;
|
||||
float* pB = B->pixels;
|
||||
|
||||
#ifdef USE_OPENMP
|
||||
#pragma omp parallel for num_threads(nt)
|
||||
#endif
|
||||
for(int i=0; i<n; i+=block_size)
|
||||
for(int j=0; j<m; j+=block_size)
|
||||
transpose_scalar_block(&pA[i*m +j], &pB[j*n + i], m, n, MIN(block_size, n-i), MIN(block_size, m-j));
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
int sgemv_(char *transa, integer *m, integer * n,
|
||||
float *alpha, float *a, integer *lda,
|
||||
float *b, integer * ldb, float *beta,
|
||||
float *c, integer * ldc);
|
||||
}
|
||||
|
||||
/* convolution of each patch within a local neighborhood
|
||||
ngh_rad = max translation
|
||||
neighborhood has size 2*ngh_rad
|
||||
patch at (x,y) is compared to patches in [y-ngh_rad : y+ngh_rad,
|
||||
x-ngh_rad : y+ngh_rad]
|
||||
*/
|
||||
void _dotprod_ngh_rad_T( int_cube* grid, float_image* patches, int ngh_rad,
|
||||
float_cube* convolved_hog, float_layers* res_out,
|
||||
int_image* offsets, int n_thread ) {
|
||||
int nh = patches->tx;
|
||||
assert( nh == convolved_hog->tz );
|
||||
const int P = patches->ty;
|
||||
assert( IMG_SIZE(grid)==P && grid->tz==2 );
|
||||
const int tx = convolved_hog->tx;
|
||||
const int ty = convolved_hog->ty;
|
||||
|
||||
// neighborhood size
|
||||
int res_tx = MIN(tx,2*ngh_rad);
|
||||
int res_ty = MIN(ty,2*ngh_rad);
|
||||
assert(res_tx<tx-1 || res_ty<ty-1 || !"ngh_rad is too large and results in loss of perf. Set ngh_rad=0 instead.");
|
||||
int res_npix = res_tx * res_ty;
|
||||
// allocate result
|
||||
*res_out = empty_layers(float, res_tx, res_ty, P);
|
||||
assert(res_out->pixels || !"error: ran out of memory before sgemm");
|
||||
*offsets = empty_image(int, 2, P);
|
||||
|
||||
char T='t'; float alpha=1, beta=0; int one=1;
|
||||
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(int j=0; j<res_ty; ++j) {
|
||||
// By organizing loops this way,
|
||||
// we exploit overlap between patches.
|
||||
|
||||
for(int l=0; l<P; l++) {
|
||||
float* p = patches->pixels + l*nh;
|
||||
float* r = res_out->pixels + l*res_npix;
|
||||
int left = MAX(0, MIN(grid->pixels[2*l+0] - ngh_rad, tx-2*ngh_rad));
|
||||
int top = MAX(0, MIN(grid->pixels[2*l+1] - ngh_rad, ty-2*ngh_rad));
|
||||
if(j==0) {
|
||||
offsets->pixels[2*l+0] = left;
|
||||
offsets->pixels[2*l+1] = top;
|
||||
}
|
||||
float* c = convolved_hog->pixels + (left + top*tx)*nh;
|
||||
|
||||
// blas fast matrix-vector product
|
||||
sgemv_( &T, &nh, &res_tx, &alpha, c + j*tx*nh, &nh,
|
||||
p, &one, &beta, r + j*res_tx, &one);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* correct the convolution on the boundaries of the image
|
||||
ttx, tty: true shape of the res_map (in case of using offsets)
|
||||
*/
|
||||
void rectify_conv( int patch_size, int nori, float_image* patches, int_image* offsets,
|
||||
const int ttx, const int tty, int extend, float_layers* res, int n_thread ) {
|
||||
const int n_patches = patches->ty;
|
||||
assert( n_patches == res->tz );
|
||||
//const int nori = patches->tx/pow2(patch_size);
|
||||
assert( patches->tx >= nori*pow2(patch_size) );
|
||||
const int tx = res->tx; // real true shape because it has been extended
|
||||
const int ty = res->ty;
|
||||
const int first_half = patch_size/2;
|
||||
const int second_half = patch_size - first_half; // in case patch_size is odd
|
||||
assert( offsets || (ttx==tx && tty==ty) );
|
||||
assert( !offsets || (ttx>=tx && tty>=ty) );
|
||||
assert( !offsets || (offsets->ty==res->tz && offsets->tx==2) );
|
||||
const long npix = IMG_SIZE(res);
|
||||
|
||||
int l;
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(l=0; l<n_patches; l++) {
|
||||
// load offsets
|
||||
const int offi = offsets ? offsets->pixels[2*l+0] : 0;
|
||||
const int offj = offsets ? offsets->pixels[2*l+1] : 0;
|
||||
|
||||
float sums[8]; // temporary norm of columns or rows
|
||||
assert( patch_size <= (int)(sizeof(sums)/sizeof(sums[0])) );
|
||||
int o,i,j;
|
||||
|
||||
// horizontal boundaries
|
||||
memset(sums,0,sizeof(sums));
|
||||
float* p = patches->pixels + l*patches->tx;
|
||||
for(o=0; o<nori; o++)
|
||||
for(j=0; j<patch_size; j++)
|
||||
for(i=0; i<patch_size; i++)
|
||||
sums[j] += pow2(*p++);
|
||||
|
||||
float old_norm = sqrt(sum_array_f(sums,patch_size));
|
||||
if( old_norm==0 ) continue;
|
||||
|
||||
// upper boundary
|
||||
for(j=offj; j<first_half; j++) {
|
||||
float new_norm = sqrt(sum_array_f(sums+(first_half-j),second_half+j)); // sums to patch_size
|
||||
float mul = old_norm / (new_norm + 1e-8);
|
||||
float* r = res->pixels + l*npix + (j-offj)*tx;
|
||||
for(i=0; i<tx; i++) {
|
||||
r[i] *= mul;
|
||||
//assert(r[i]<1.1);
|
||||
}
|
||||
}
|
||||
// lower boundary
|
||||
for(j=tty-extend+1-second_half; j<offj+ty; j++) {
|
||||
float new_norm = sqrt(sum_array_f(sums,first_half+tty-extend-j)); // sums to patch_size
|
||||
float mul = old_norm / (new_norm + 1e-8);
|
||||
float* r = res->pixels + l*npix + (j-offj)*tx;
|
||||
for(i=0; i<tx; i++) {
|
||||
r[i] *= mul;
|
||||
//assert(r[i]<1.1);
|
||||
}
|
||||
}
|
||||
|
||||
// vertical boundaries
|
||||
memset(sums,0,sizeof(sums));
|
||||
p = patches->pixels + l*patches->tx;
|
||||
for(o=0; o<nori; o++)
|
||||
for(j=0; j<patch_size; j++)
|
||||
for(i=0; i<patch_size; i++)
|
||||
sums[i] += pow2(*p++);
|
||||
|
||||
// left boundary
|
||||
for(i=offi; i<first_half; i++) {
|
||||
float new_norm = sqrt(sum_array_f(sums+(first_half-i),second_half+i));
|
||||
float mul = old_norm / (new_norm + 1e-8);
|
||||
float* r = res->pixels + l*npix + (i-offi);
|
||||
for(j=0; j<ty; j++) {
|
||||
r[j*tx] *= mul;
|
||||
//assert(r[j*tx]<1.1);
|
||||
}
|
||||
}
|
||||
// right boundary
|
||||
for(i=ttx-extend+1-second_half; i<offi+tx; i++) {
|
||||
float new_norm = sqrt(sum_array_f(sums,first_half+ttx-extend-i));
|
||||
float mul = old_norm / (new_norm + 1e-8);
|
||||
float* r = res->pixels + l*npix + (i-offi);
|
||||
for(j=0; j<ty; j++) {
|
||||
r[j*tx] *= mul;
|
||||
//assert(r[j*tx]<1.1);
|
||||
}
|
||||
}
|
||||
|
||||
// because we over-estimated the rectification for the corners, check that they do not overpass old_norm
|
||||
float* r = res->pixels + l*npix;
|
||||
for(j=offj; j<first_half; j++) {
|
||||
for(i=offi; i<first_half; i++)
|
||||
r[(j-offj)*tx+(i-offi)] = MIN(r[(j-offj)*tx+(i-offi)], old_norm);
|
||||
for(i=ttx-extend+1-second_half; i<offi+tx; i++)
|
||||
r[(j-offj)*tx+(i-offi)] = MIN(r[(j-offj)*tx+(i-offi)], old_norm);
|
||||
}
|
||||
for(j=tty-extend+1-second_half; j<offj+ty; j++) {
|
||||
for(i=offi; i<first_half; i++)
|
||||
r[(j-offj)*tx+(i-offi)] = MIN(r[(j-offj)*tx+(i-offi)], old_norm);
|
||||
for(i=ttx-extend+1-second_half; i<offi+tx; i++)
|
||||
r[(j-offj)*tx+(i-offi)] = MIN(r[(j-offj)*tx+(i-offi)], old_norm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Compute the correlation of all patches with the second image (hog).
|
||||
In case of ngh_rad>0, the correlation is only computed in a small local neighborhood
|
||||
(whose size is parameterized by ngh_rad).
|
||||
if extend: width and height of output maps are extended
|
||||
if norm: correlation are normalized afterwards.
|
||||
*/
|
||||
void fastconv( float_image* patches, float_layers* hog, int patch_size, int ngh_rad,
|
||||
int extend, float norm, int nt, res_scale* res ) {
|
||||
|
||||
assert(0<=extend and extend<=1);
|
||||
float_layers* convolved_hog = prepare_dotprod_convolution( hog, patch_size, extend, norm, nt );
|
||||
assert( patches->tx==convolved_hog->tz);
|
||||
res->true_shape[0] = convolved_hog->tx;
|
||||
res->true_shape[1] = convolved_hog->ty;
|
||||
//hash_layers(convolved_hog)
|
||||
|
||||
int_image* offsets = NULL;
|
||||
if( ngh_rad == 0 ) { // no limit on translation
|
||||
// allocate result
|
||||
res->res_map = empty_layers(float, convolved_hog->tx, convolved_hog->ty, patches->ty);
|
||||
assert(res->res_map.pixels || !"error: ran out of memory before sgemm");
|
||||
|
||||
// multi-threaded fast matrix product
|
||||
_dotprod( patches, convolved_hog, &res->res_map, nt );
|
||||
|
||||
} else { // ngh_rad>0: cropping res_map
|
||||
offsets = &res->offsets;
|
||||
|
||||
// transpose hog: _dotprod is much faster this way
|
||||
float_cube convolved_hog_T = empty_cube(float, convolved_hog->tx, convolved_hog->ty, convolved_hog->tz);
|
||||
{ float_image A = reshape_xy_z(float, convolved_hog); // cast to 2D matrix without copy
|
||||
float_image B = reshape_z_xy(float, &convolved_hog_T);
|
||||
transpose_matrix( &A, &B, nt);
|
||||
}
|
||||
//hash_cube(&convolved_hog_T)
|
||||
|
||||
// resized grid
|
||||
int_cube fgrid = cube_like(int, &res->grid);
|
||||
for(int i=0; i<CUBE_SIZE(&fgrid); i++)
|
||||
fgrid.pixels[i] = res->grid.pixels[i]/res->f;
|
||||
//hash_cube(&fgrid)
|
||||
|
||||
// multi-threaded fast matrix product
|
||||
_dotprod_ngh_rad_T( &fgrid, patches, ngh_rad, &convolved_hog_T, &res->res_map, offsets, nt );
|
||||
|
||||
free(fgrid.pixels);
|
||||
free(convolved_hog_T.pixels);
|
||||
//hash_image(offsets)
|
||||
}
|
||||
free_layers(convolved_hog);
|
||||
|
||||
// correct border effects on the correlation maps
|
||||
rectify_conv( patch_size, hog->tz, patches, offsets, res->true_shape[0], res->true_shape[1],
|
||||
extend, &res->res_map, nt );
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Compute: arr **= p
|
||||
*/
|
||||
void fastipow( float_layers* arr, const float p, int n_thread ) {
|
||||
const int n_layers = arr->tz;
|
||||
const long npix = arr->tx*arr->ty;
|
||||
int l;
|
||||
|
||||
// optimization: precompute some values of pow(x,p)
|
||||
const int npc = 64;
|
||||
float precom[npc+1];
|
||||
for(l=0; l<=npc; l++) precom[l]= pow(l/(float)npc,p);
|
||||
const float maxindex = npc - 0.001;
|
||||
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(l=0; l<n_layers; l++) {
|
||||
float* a = arr->pixels + l*npix;
|
||||
int i;
|
||||
for(i=0; i<npix; i++) {
|
||||
// arr[i] = pow(arr[i],p);
|
||||
float v = a[i]*npc;
|
||||
assert( v>=0 && v<npc+1 );
|
||||
if(v>maxindex) v=maxindex;
|
||||
int n = int(v);
|
||||
float w = v-n;
|
||||
a[i] = (1-w)*precom[n] + w*precom[n+1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Compute: arr = max(0,(arr-p)/(1-p))
|
||||
*/
|
||||
void fasthinge( float_layers* arr, const float p, int n_thread ) {
|
||||
const int n_layers = arr->tz;
|
||||
const long npix = arr->tx*arr->ty;
|
||||
int l;
|
||||
const float f = 1/(1-p);
|
||||
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(l=0; l<n_layers; l++) {
|
||||
float* a = arr->pixels + l*npix;
|
||||
int i;
|
||||
for(i=0; i<npix; i++) {
|
||||
float v = a[i];
|
||||
a[i] = MAX(0,f*(v-p));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline int max_array_i(const int* a, int n) {
|
||||
int i=n;
|
||||
int res = INT_MIN;
|
||||
while(i--) if(a[i]>res) res=a[i];
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Normalize weights in border areas of width <gap>.
|
||||
There are 9 areas: top-left, top-middle, top-right, ..., bottom-right.
|
||||
sum_divf indicates the current weight in those areas, i.e. values in the area
|
||||
should be divided by the weight. But trans_inv allow to control the amount of
|
||||
normalization: 0=no normalization, 1=normal
|
||||
*/
|
||||
static inline void normalize_trans(const int tx, const int ty, const int gap, float* rmap,
|
||||
const float trans_inv, float sum_divf[9] ) {
|
||||
if( trans_inv == 0 ) return;
|
||||
int i,j;
|
||||
for(i=0; i<9; i++) {
|
||||
if( sum_divf[i]>0 )
|
||||
sum_divf[i] = 1/pow(sum_divf[i], trans_inv); // if trans_inv==1, no effect
|
||||
}
|
||||
for(j=0; j<gap; j++) {
|
||||
if(sum_divf[0])
|
||||
for(i=0; i<gap; i++)
|
||||
rmap[j*tx+i] *= sum_divf[0];
|
||||
if(sum_divf[1])
|
||||
for(i=gap; i<tx-gap; i++)
|
||||
rmap[j*tx+i] *= sum_divf[1];
|
||||
if(sum_divf[2])
|
||||
for(i=tx-gap; i<tx; i++)
|
||||
rmap[j*tx+i] *= sum_divf[2];
|
||||
}
|
||||
for(; j<ty-gap; j++) {
|
||||
if(sum_divf[3])
|
||||
for(i=0; i<gap; i++)
|
||||
rmap[j*tx+i] *= sum_divf[3];
|
||||
if(sum_divf[5])
|
||||
for(i=tx-gap; i<tx; i++)
|
||||
rmap[j*tx+i] *= sum_divf[5];
|
||||
}
|
||||
for(; j<ty; j++) {
|
||||
if(sum_divf[6])
|
||||
for(i=0; i<gap; i++)
|
||||
rmap[j*tx+i] *= sum_divf[6];
|
||||
if(sum_divf[7])
|
||||
for(i=gap; i<tx-gap; i++)
|
||||
rmap[j*tx+i] *= sum_divf[7];
|
||||
if(sum_divf[8])
|
||||
for(i=tx-gap; i<tx; i++)
|
||||
rmap[j*tx+i] *= sum_divf[8];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Compute the (sparse) convolutions specified by <children> on <map> and put the result in <res>.
|
||||
A standard order is assumed on the children:
|
||||
a response map #p is built from the children[p] at positions
|
||||
[(gap*dx,gap*dy) for dy in dys for dx in dxs]
|
||||
where dxs = [-1,1] or [-1,0,1]
|
||||
dys = [-1,1] or [-1,0,1]
|
||||
child_assign denote assignement of the children level, while assign is for the next level
|
||||
child_norms contain the norms of small patches and norms for big new cells
|
||||
*/
|
||||
int _sparse_conv( int_image* children, int_array* child_assign, int gap, float trans_inv,
|
||||
float_layers* child_map, int_image* offsets, float_array* child_norms, float_array* norms,
|
||||
int_array* assign, float_layers* res, int_image* res_offsets, int n_thread ) {
|
||||
const int nconv = children->ty; // number of convolutions to perform
|
||||
const int nc2 = children->tx;
|
||||
const int nc = sqrt(nc2);
|
||||
assert( nc*nc == nc2 );
|
||||
assert( res->tz == nconv );
|
||||
const int tx = child_map->tx;
|
||||
const int ty = child_map->ty;
|
||||
const long npix = tx*ty;
|
||||
ASSERT_SAME_SIZE( child_map, res );
|
||||
const int n_lower_conv = max_array_i(children->pixels,nconv*nc2)+1;
|
||||
int* cass = child_assign ? child_assign->pixels : NEWA(int,n_lower_conv);
|
||||
if(!child_assign) {for(int i=0; i<n_lower_conv; i++) cass[i]=i;}
|
||||
assert( !offsets || (offsets->pixels && offsets->tx==2 && offsets->ty==n_lower_conv &&
|
||||
res_offsets && res_offsets->tx==2 && res_offsets->ty==nconv) );
|
||||
|
||||
if(assign) {
|
||||
assert(0); // not supposed to happen
|
||||
} else {
|
||||
// normal case: no redundancy to exploit in response maps
|
||||
|
||||
int l;
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(l=0; l<nconv; l++) {
|
||||
float *rmap = res->pixels + l*npix;
|
||||
|
||||
int u,v,c,ncall=0; // children number
|
||||
const int* const child = children->pixels + l*nc2;
|
||||
|
||||
float sum_divf[9];
|
||||
memset(sum_divf,0,sizeof(sum_divf));
|
||||
int i,j;
|
||||
|
||||
// first, choose an offset for the result rmap from the child offsets
|
||||
int offx=0, offy=0;
|
||||
if( offsets ) {
|
||||
int sum_ox=0, sum_oy=0, w=0;
|
||||
for(c=v=0; v<nc; v++) {
|
||||
int dy = (2*v/(nc-1)-1);
|
||||
for(u=0; u<nc; u++,c++) {
|
||||
int dx = (2*u/(nc-1)-1);
|
||||
|
||||
if(child[c]<0 || cass[child[c]]<0) continue;
|
||||
|
||||
sum_ox += offsets->pixels[2*child[c]+0] - dx*gap;
|
||||
sum_oy += offsets->pixels[2*child[c]+1] - dy*gap;
|
||||
w++;
|
||||
}
|
||||
}
|
||||
if(w==0) w++; // just in case
|
||||
offx = (int)floor(0.5 + sum_ox/float(w));
|
||||
offy = (int)floor(0.5 + sum_oy/float(w));
|
||||
|
||||
// store result for later
|
||||
res_offsets->pixels[2*l+0] = offx;
|
||||
res_offsets->pixels[2*l+1] = offy;
|
||||
}
|
||||
|
||||
for(c=v=0; v<nc; v++) {
|
||||
int dy = (2*v/(nc-1)-1);
|
||||
for(u=0; u<nc; u++,c++) {
|
||||
int dx = (2*u/(nc-1)-1);
|
||||
|
||||
if(child[c]<0 || cass[child[c]]<0) continue;
|
||||
float divf = child_norms->pixels[child[c]]/norms->pixels[l];
|
||||
|
||||
// difference with rmap's offset
|
||||
const int trans_x = dx*gap + (offsets? offx - offsets->pixels[2*child[c]+0] : 0);
|
||||
const int trans_y = dy*gap + (offsets? offy - offsets->pixels[2*child[c]+1] : 0);
|
||||
|
||||
// count the sum of weights in every image area
|
||||
for(i=-1; i<=1; i++)for(j=-1; j<=1; j++)
|
||||
if(i*trans_x<=0 && j*trans_y<=0)
|
||||
sum_divf[4+j*3+i] += divf;
|
||||
|
||||
// add a translated version of map[children[c]] by (ox-dx,oy-dy)
|
||||
if(ncall++==0) // first call
|
||||
fast_set_trans( rmap, child_map->pixels + cass[child[c]]*npix, divf, trans_x,trans_y, tx,ty, 0, 0 );
|
||||
else
|
||||
fast_add_trans( rmap, child_map->pixels + cass[child[c]]*npix, divf, trans_x,trans_y, tx,ty, 0, 0 );
|
||||
}
|
||||
}
|
||||
|
||||
if( ncall == 0) // default = zeros
|
||||
memset(rmap, 0, npix*sizeof(float));
|
||||
|
||||
// now we are supposed to rectify the boundaries (to perfect convolution)
|
||||
normalize_trans(tx, ty, gap, rmap, trans_inv, sum_divf );
|
||||
|
||||
//assert(min_array_f(rmap,npix)>=0 && max_array_f(rmap,npix)<=1.001);
|
||||
}
|
||||
}
|
||||
if(!child_assign) free(cass);
|
||||
|
||||
#define CHECK_MAPS(rmaps) assert(min_array_f((rmaps)->pixels,LAYERS_SIZE(rmaps))>=0 && \
|
||||
max_array_f((rmaps)->pixels,LAYERS_SIZE(rmaps))<=1.001)
|
||||
//CHECK_MAPS(res);
|
||||
|
||||
return nconv;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,144 @@
|
||||
/*
|
||||
Copyright (C) 2014 Jerome Revaud
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>
|
||||
*/
|
||||
#ifndef ___CONV_H___
|
||||
#define ___CONV_H___
|
||||
#include "array_types.h"
|
||||
#include "deep_matching.h"
|
||||
|
||||
|
||||
/* Return the vectorized dimension of a HOG patch
|
||||
*/
|
||||
int get_patch_desc_dim( float_layers* hog, int patch_size );
|
||||
|
||||
|
||||
/* Sample a set of patches from a HOG image.
|
||||
pos : array of (x,y) position of the patches
|
||||
size: size of the patches, ie. [x,x+size[ x [y,y+size[
|
||||
res: result array, n_patches x desc_dim
|
||||
desc_dim = n_layers * size**2
|
||||
norms: result, n_patches x 1, norm of each patch
|
||||
*/
|
||||
void _sample_patches( float_layers* hog, float_layers* color, int_image* pos, int size, float norm,
|
||||
float_image* res, float_array* norms, int n_thread );
|
||||
|
||||
|
||||
/* normalize each pixel of a multi-layers image
|
||||
norm = {0:nothing, 1:L2-normalization, 0-1: normalization by (L2-norm)**<norm> }
|
||||
*/
|
||||
void norm_layers( float_layers* res, float norm, int n_thread );
|
||||
|
||||
|
||||
/* Prepare a grid of cell positions in the first image for a given scale. Big cells inherit the cell at the previous scale.
|
||||
size = size of cells at current scale
|
||||
offset, step = grid generator: (offset + i*step, offset + j*step)
|
||||
child_grid = grid of the previous layer (or None if first layer)
|
||||
child_norms = image containing the norms of the patch at the previous level
|
||||
grid = result center positions of cells in current scale
|
||||
children = index of cells in previous scale used to construct big cells
|
||||
norms = norms of the cells of this level
|
||||
*/
|
||||
void _prepare_big_cells( int size, int offset, int step,
|
||||
int_cube* child_grid, float_image* child_norms,
|
||||
int_cube* grid, int_cube* children, float_image* norms );
|
||||
|
||||
|
||||
|
||||
/* Compute the correlation of all patches with the second image (hog).
|
||||
In case of ngh_rad>0, the correlation is only computed in a small local neighborhood
|
||||
(whose size is parameterized by ngh_rad).
|
||||
if extend: width and height of output maps are extended
|
||||
if norm: correlation are normalized afterwards.
|
||||
*/
|
||||
void fastconv( float_image* patches, float_layers* hog, int patch_size, int ngh_rad,
|
||||
int extend, float norm, int nt, res_scale* res );
|
||||
|
||||
|
||||
|
||||
/* Compute the (sparse) convolutions specified by <children> on <map> and put the result in <res>.
|
||||
A standard order is assumed on the children:
|
||||
a response map #p is built from the children[p] at positions
|
||||
[(gap*dx,gap*dy) for dy in dys for dx in dxs]
|
||||
where dxs = [-1,1] or [-1,0,1]
|
||||
dys = [-1,1] or [-1,0,1]
|
||||
child_assign denote assignement of the children level, while assign is for the next level
|
||||
child_norms contain the norms of small patches and norms for big new cells
|
||||
*/
|
||||
int _sparse_conv( int_image* children, int_array* child_assign, int gap, float trans_inv,
|
||||
float_layers* child_map, int_image* offsets, float_array* child_norms, float_array* norms,
|
||||
int_array* assign, float_layers* res, int_image* res_offsets, int n_thread );
|
||||
|
||||
|
||||
|
||||
/* Compute: arr **= p
|
||||
*/
|
||||
void fastipow( float_layers* arr, const float p, int n_thread );
|
||||
|
||||
/* Compute: arr = max(0,(arr-p)/(1-p))
|
||||
*/
|
||||
void fasthinge( float_layers* arr, const float p, int n_thread );
|
||||
|
||||
/* Compute: arr = exp(-arr)
|
||||
*/
|
||||
void fastnegexp( float_image* arr );
|
||||
|
||||
|
||||
|
||||
/* incorporate the color difference between patches into existing patch similarity
|
||||
|
||||
formula: new_response = ( color_sim*addw + old_response*(1-addw) ) * ( mulw*color_sim + 1-mulw )
|
||||
|
||||
if mulw=1, adddw=0, then: new_response = old_response * color_sim
|
||||
if mulw=0, adddw=0.5,then: new_response = (old_response + color_sim )/2
|
||||
*/
|
||||
void incorporate_color( int_cube* grid, int_array* assign, float_layers* lab0, float_layers* var0,
|
||||
float_layers* lab1, float_layers* var1,
|
||||
float_layers* res_maps, float L_std, float ab_std, int sym_dist, int n_opening,
|
||||
const float addw, const float mulw, int n_thread );
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
@ -0,0 +1,936 @@
|
||||
/*
|
||||
Copyright (C) 2014 Jerome Revaud
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>
|
||||
*/
|
||||
#include "deep_matching.h"
|
||||
#include "std.h"
|
||||
#include "conv.h"
|
||||
#include "maxfilter.h"
|
||||
|
||||
|
||||
|
||||
// return size of atomic patches
|
||||
int get_atomic_patch_size( const dm_params_t* params )
|
||||
{
|
||||
int upsize = (1 << params->prior_img_downscale);
|
||||
return 4*upsize;
|
||||
}
|
||||
|
||||
// crop dimensions to a multiple of patch_size
|
||||
void get_source_shape( const int width, const int height, const int patch_size, int* res ) {
|
||||
// crop the reference image to a multiple of patch size
|
||||
res[0] = patch_size * int(width / patch_size);
|
||||
res[1] = patch_size * int(height / patch_size);
|
||||
}
|
||||
|
||||
// extract pixel descriptor for both images
|
||||
void extract_image_desc( image_t* img0, image_t* img1, const dm_params_t* params,
|
||||
float_layers** desc0, float_layers** desc1 )
|
||||
{
|
||||
// slightly reduce img0 size to fit the patch tiling
|
||||
int patch_size = get_atomic_patch_size( params );
|
||||
|
||||
int size[2]; // = {width, height}
|
||||
get_source_shape( img0->width, img0->height, patch_size, size );
|
||||
image_crop(img0, size[0], size[1]);
|
||||
|
||||
// extract gradient-based information
|
||||
*desc0 = extract_desc( img0, ¶ms->desc_params, params->n_thread );
|
||||
*desc1 = extract_desc( img1, ¶ms->desc_params, params->n_thread );
|
||||
}
|
||||
|
||||
|
||||
void avgpool2( float_layers* hog, const dm_params_t* params )
|
||||
{
|
||||
int niter = params->prior_img_downscale;
|
||||
while(niter--) {
|
||||
float_layers res = empty_layers(float,hog->tx/2,hog->ty/2,hog->tz);
|
||||
_avgpool2(hog,&res,params->n_thread);
|
||||
|
||||
// replace hog by res
|
||||
free(hog->pixels);
|
||||
*hog = res;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* compute the grid of parent cell position, and their connection to children cells
|
||||
cells can be half-overlapping if <overlap>=1
|
||||
<dense_step> forces the grid spacing if >0
|
||||
*/
|
||||
void prepare_big_cells( const int imshape[2], int cell_size, int overlap, int child_overlap,
|
||||
int_cube* child_grid, float_image* child_norms, int dense_step,
|
||||
int_cube* grid, int_cube* children, float_image* norms )
|
||||
{
|
||||
int offset, step, gtx, gty;
|
||||
if( dense_step ) {
|
||||
step = dense_step;
|
||||
offset = 0;
|
||||
// we do not care if the patches are overlapping outside the image
|
||||
#define grid_size(imsize) (1+imsize/step)
|
||||
gtx = grid_size(imshape[0]);
|
||||
gty = grid_size(imshape[1]);
|
||||
#undef grid_size
|
||||
} else {
|
||||
// we want patches fully included in the image
|
||||
offset = cell_size/2;
|
||||
step = cell_size/(overlap+1);
|
||||
#define grid_size(imsize) (1+MAX(0,imsize-2*offset)/step)
|
||||
gtx = grid_size(imshape[0]);
|
||||
gty = grid_size(imshape[1]);
|
||||
#undef grid_size
|
||||
}
|
||||
|
||||
assert(!grid->pixels);
|
||||
*grid = empty_cube(int,gtx,gty,2);
|
||||
|
||||
assert(0<=overlap && overlap<=1);
|
||||
int nc = pow2(2+child_overlap); // number of children per cell
|
||||
if(child_grid) {
|
||||
assert(!norms->pixels);
|
||||
*norms = image_like(float,grid);
|
||||
assert(!children->pixels);
|
||||
*children = empty_cube(int,gtx,gty,nc);
|
||||
}
|
||||
|
||||
_prepare_big_cells( cell_size, offset, step, child_grid, child_norms, grid, children, norms );
|
||||
}
|
||||
|
||||
|
||||
void sample_patches( float_layers* hog, int_cube* pos, int patch_size, int f, float norm, int n_thread,
|
||||
float_image* patches, float_array* norms )
|
||||
{
|
||||
assert(norm>0);
|
||||
const int npos = pos->tx*pos->ty;
|
||||
int_image new_pos = empty_image(int,2,npos);
|
||||
for(int i=0; i<2*npos; i++)
|
||||
new_pos.pixels[i] = (pos->pixels[i]-patch_size/2)/f;
|
||||
|
||||
patch_size /= f;
|
||||
const int nh = get_patch_desc_dim(hog,patch_size);
|
||||
|
||||
assert(!patches->pixels);
|
||||
*patches = empty_image(float,nh,npos);
|
||||
assert(norms->tx==npos);
|
||||
|
||||
_sample_patches( hog, NULL, &new_pos, patch_size, norm, patches, norms, n_thread );
|
||||
|
||||
free(new_pos.pixels);
|
||||
}
|
||||
|
||||
|
||||
const float trans_inv = 0.9f;
|
||||
|
||||
void convolve_atomic_patches( float_layers* source, float_layers* target,
|
||||
const dm_params_t* params, res_scale* first_level )
|
||||
{
|
||||
const int extend = 1; // slightly spatially extend response maps
|
||||
const float norm = 1; // renorm patches
|
||||
|
||||
const int f = first_level->f; // scale factor w.r.t. original image
|
||||
const int psize = first_level->patch_size; // current patch size
|
||||
|
||||
// first, sample patches
|
||||
float_image patches = {0};
|
||||
assert(!first_level->norms.pixels);
|
||||
first_level->norms = image_like(float, &first_level->grid);
|
||||
float_array norms_arr = {first_level->norms.pixels, (int)IMG_SIZE(&first_level->norms)};
|
||||
sample_patches( source, &first_level->grid, psize, f, norm, params->n_thread, &patches, &norms_arr );
|
||||
//hash_image(&patches)
|
||||
|
||||
// rectify the norm to a boolean (0 or 1) (useless ?)
|
||||
first_level->assign = empty_array(int,norms_arr.tx);
|
||||
int n=0, tx = patches.tx;
|
||||
for(int i=0; i<norms_arr.tx; i++) {
|
||||
norms_arr.pixels[i] = norms_arr.pixels[i]>0;
|
||||
|
||||
// eliminate zero-norm patches
|
||||
if( norms_arr.pixels[i] ) {
|
||||
if( n < i ) // copy
|
||||
memcpy( patches.pixels + n*tx, patches.pixels + i*tx, tx*sizeof(float));
|
||||
first_level->assign.pixels[i] = n++;
|
||||
} else
|
||||
first_level->assign.pixels[i] = -1;
|
||||
|
||||
// convolution is not fully invariant to the image border:
|
||||
// blank cells outside the image are a bit disadvantageous
|
||||
if( norms_arr.pixels[i] == 0 )
|
||||
norms_arr.pixels[i] = 1-trans_inv;
|
||||
}
|
||||
patches.ty = n; // update new number of valid patches
|
||||
|
||||
//hash_image(&first_level->norms)
|
||||
//hash_image(&patches)
|
||||
|
||||
// compute the first level convolutions
|
||||
fastconv( &patches, target, psize/f, params->ngh_rad/f, extend, norm, params->n_thread, first_level );
|
||||
|
||||
free(patches.pixels);
|
||||
}
|
||||
|
||||
int_image* maxpool3_and_subsample2( float_layers* hog, int true_shape[2], int_image* offsets, float_layers* res, int nt )
|
||||
{
|
||||
assert(!res->pixels);
|
||||
if ( offsets->pixels == NULL )
|
||||
assert( hog->tx == true_shape[0] && hog->ty == true_shape[1] );
|
||||
|
||||
// set downsampled size
|
||||
true_shape[0] = (true_shape[0]+1)/2;
|
||||
true_shape[1] = (true_shape[1]+1)/2;
|
||||
assert( true_shape[0]>0 && true_shape[1]>0 );
|
||||
|
||||
if ( offsets->pixels == NULL ) {
|
||||
// joint max-pooling and subsampling
|
||||
*res = empty_layers(float, true_shape[0], true_shape[1], hog->tz);
|
||||
_max_filter_3_and_subsample_layers( hog, res, nt );
|
||||
return NULL;
|
||||
|
||||
} else {
|
||||
// with offsets
|
||||
float_layers maxpooled_hog = layers_like(float, hog);
|
||||
_max_filter_3_layers( hog, &maxpooled_hog, nt );
|
||||
//CHECK_MAPS(&maxpooled_hog);
|
||||
|
||||
// slightly bigger, so that mininum size always >= 2
|
||||
int width = (hog->tx+2)/2;
|
||||
int height = (hog->ty+2)/2;
|
||||
*res = empty_layers(float, width, height, hog->tz);
|
||||
_subsample2_offset( &maxpooled_hog, offsets, res, nt );
|
||||
free(maxpooled_hog.pixels);
|
||||
|
||||
// compute new offsets
|
||||
int_image* res_offsets = NEW(int_image);
|
||||
*res_offsets = image_like(int, offsets);
|
||||
for(long i=0; i<IMG_SIZE(offsets); i++)
|
||||
res_offsets->pixels[i] = (int)floor( offsets->pixels[i]/2.f );
|
||||
return res_offsets;
|
||||
}
|
||||
}
|
||||
|
||||
#define CHECK_MAPS(rmaps) assert(min_array_f((rmaps)->pixels,LAYERS_SIZE(rmaps))>=0 && \
|
||||
max_array_f((rmaps)->pixels,LAYERS_SIZE(rmaps))<=1.001)
|
||||
|
||||
/* aggregate response maps of children patches to form response maps of parent patches */
|
||||
int sparse_conv( int_cube* children, int_array* children_assign, float_image* child_norms,
|
||||
int true_patch_size, float_layers* map, int_image* offsets, int nt,
|
||||
res_scale* res )
|
||||
{
|
||||
float_layers ext_map;
|
||||
if( MIN(map->tx,map->ty) < 5 ) {
|
||||
ext_map = zeros_layers(float,MAX(5,map->tx),MAX(5,map->ty),map->tz);
|
||||
for(int l=0; l<map->tz; l++)
|
||||
for(int j=0; j<map->ty; j++)
|
||||
for(int i=0; i<map->tx; i++)
|
||||
ext_map.pixels[(l*ext_map.ty + j)*ext_map.tx + i] = map->pixels[(l*map->ty + j)*map->tx + i];
|
||||
map = &ext_map;
|
||||
res->true_shape[0] = ext_map.tx;
|
||||
res->true_shape[1] = ext_map.ty;
|
||||
}
|
||||
|
||||
int_image _children = reshape_z_xy(int, &res->children);
|
||||
|
||||
if( offsets )
|
||||
res->offsets = empty_image(int, 2, _children.ty);
|
||||
|
||||
assert(!res->res_map.pixels);
|
||||
res->res_map = empty_layers(float, map->tx, map->ty, _children.ty);
|
||||
int gap = true_patch_size / 4;
|
||||
assert(gap>0);
|
||||
float_array _norms = reshape_xy(float, &res->norms);
|
||||
float_array _child_norms = reshape_xy(float, child_norms);
|
||||
|
||||
// allocate useless assign
|
||||
res->assign = empty_array(int, res->res_map.tz);
|
||||
for(int i=0; i<res->assign.tx; i++) res->assign.pixels[i] = i;
|
||||
|
||||
int_array* _assign = NULL;
|
||||
int_array* _ch_assign = children_assign->pixels ? children_assign : NULL;
|
||||
int n = _sparse_conv( &_children, _ch_assign, gap, trans_inv, map, offsets,
|
||||
&_child_norms, &_norms, _assign, &res->res_map, &res->offsets, nt );
|
||||
//CHECK_MAPS(res);
|
||||
|
||||
if(map==&ext_map) free(ext_map.pixels);
|
||||
return n;
|
||||
}
|
||||
|
||||
res_scale new_pyramid_level(int f, int psize)
|
||||
{
|
||||
res_scale res = {0}; // initialize everything to 0/NULL
|
||||
res.f = f; // subsampling factor with respect to original image size
|
||||
res.patch_size = psize; // patch size in original image coordinates
|
||||
return res;
|
||||
}
|
||||
|
||||
// Compute the multi-scale pyramid response
|
||||
void compute_matching_pyr( float_layers* source, float_layers* target, const dm_params_t* params,
|
||||
matching_pyramid_t& res_maps )
|
||||
{
|
||||
const int src_shape[2] = {source->tx, source->ty};
|
||||
int L = 0; // current pyramid level
|
||||
const int atomic_psize = get_atomic_patch_size( params );
|
||||
int psize = atomic_psize; // will grow by a factor 2 at each level
|
||||
int f = psize/4; // initial scaling factor
|
||||
|
||||
// subsample if needed
|
||||
avgpool2( source, params );
|
||||
avgpool2( target, params );
|
||||
|
||||
//hash_layers(source)
|
||||
//hash_layers(target)
|
||||
|
||||
res_maps.clear();
|
||||
res_maps.push_back(new_pyramid_level(f,psize));
|
||||
res_scale *child, *last = &res_maps[res_maps.size()-1];
|
||||
|
||||
// compute the initial patches in source image
|
||||
if( params->verbose ) std_printf("layer %d, patch_size = %dx%d\n", L, psize, psize);
|
||||
prepare_big_cells( src_shape, psize, params->overlap<L+1, 0, NULL, NULL, 0, &last->grid, NULL, NULL );
|
||||
//hash_cube(&last->grid)
|
||||
|
||||
//hash_layers(source)
|
||||
convolve_atomic_patches( source, target, params, last );
|
||||
//hash_layers(&last->res_map)
|
||||
if( params->verbose )
|
||||
std_printf("remaining %ld big cells (actually, %d are unique)\n", IMG_SIZE(&last->grid), last->res_map.tz);
|
||||
|
||||
// non-linear correction
|
||||
if( params->nlpow>0 )
|
||||
fastipow( &last->res_map, params->nlpow, params->n_thread );
|
||||
|
||||
//hash_layers(&last->res_map)
|
||||
|
||||
const int dense_step = params->subsample_ref ? 0 : psize/(1+(params->overlap<1));
|
||||
|
||||
// aggregate patches for all subsequent levels
|
||||
while( 2*psize <= MIN(params->max_psize, MAX(src_shape[0], src_shape[1])) ) {
|
||||
L++;
|
||||
f *= 2;
|
||||
psize *= 2;
|
||||
res_maps.push_back(new_pyramid_level(f,psize));
|
||||
child = &res_maps[res_maps.size()-2]; // previous level
|
||||
last = &res_maps[res_maps.size()-1]; // current level
|
||||
if( params->verbose ) std_printf("layer %d, patch_size = %dx%d\n", L, psize, psize);
|
||||
|
||||
// max pooling + subsampling
|
||||
//CHECK_MAPS(&child->res_map);
|
||||
last->true_shape[0] = child->true_shape[0]; // will be modified in subsampled2()
|
||||
last->true_shape[1] = child->true_shape[1];
|
||||
float_layers subs_res_map = {0};
|
||||
int_image* offsets = maxpool3_and_subsample2( &child->res_map, last->true_shape, &child->offsets,
|
||||
&subs_res_map, params->n_thread );
|
||||
//CHECK_MAPS(&subs_res_map);
|
||||
|
||||
// build the set of patches at this scale
|
||||
prepare_big_cells( src_shape, psize, params->overlap<L+1, params->overlap<L,
|
||||
&child->grid, &child->norms, dense_step, &last->grid, &last->children, &last->norms );
|
||||
//DA(last->true_shape,2)
|
||||
//hash_cube(&last->grid)
|
||||
//hash_image(&last->norms)
|
||||
//hash_cube(&last->children)
|
||||
|
||||
// aggregate children response maps to form parent response maps
|
||||
sparse_conv( &last->children, &child->assign, &child->norms, psize/f, &subs_res_map, offsets,
|
||||
params->n_thread, last );
|
||||
free(subs_res_map.pixels);
|
||||
free_image(offsets);
|
||||
//CHECK_MAPS(&last->res_map);
|
||||
if( params->verbose )
|
||||
std_printf("remaining %ld big cells (actually, %d are unique)\n", IMG_SIZE(&last->grid), last->res_map.tz);
|
||||
|
||||
// non-linear correction
|
||||
if( params->nlpow>0 )
|
||||
fastipow(&last->res_map, params->nlpow, params->n_thread );
|
||||
//hash_layers(&last->res_map)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void free_matching_pyramid( matching_pyramid_t& res_maps ) {
|
||||
unsigned int i;
|
||||
for(i=0; i<res_maps.size(); i++) {
|
||||
res_scale& level = res_maps[i];
|
||||
|
||||
free(level.grid.pixels);
|
||||
free(level.norms.pixels);
|
||||
free(level.assign.pixels);
|
||||
free(level.res_map.pixels);
|
||||
free(level.max_map.pixels);
|
||||
free(level.children.pixels);
|
||||
free(level.passed.pixels);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef __APPLE__
|
||||
static int arg_sort_maxima(void* arr, const void* a, const void* b) {
|
||||
float diff = ((float*)arr)[5*(*(int*)a)+4] - ((float*)arr)[5*(*(int*)b)+4];
|
||||
return (diff<0) - (diff>0); // descending order
|
||||
}
|
||||
#else
|
||||
static int arg_sort_maxima(const void* a, const void* b, void* arr) {
|
||||
float diff = ((float*)arr)[5*(*(int*)a)+4] - ((float*)arr)[5*(*(int*)b)+4];
|
||||
return (diff<0) - (diff>0); // descending order
|
||||
}
|
||||
#endif
|
||||
|
||||
void reorder_rows( int_image* img, int_array* order )
|
||||
{
|
||||
assert(order->tx==img->ty);
|
||||
const int tx = img->tx;
|
||||
int_image res = image_like(int, img);
|
||||
|
||||
for(int i=0; i<order->tx; i++)
|
||||
memcpy(res.pixels + i*tx, img->pixels+order->pixels[i]*tx, tx*sizeof(int));
|
||||
|
||||
free(img->pixels);
|
||||
*img = res;
|
||||
}
|
||||
|
||||
// return points corresponding to patch matches
|
||||
int_image* find_optimal_matchings( matching_pyramid_t& mp, const dm_params_t* params )
|
||||
{
|
||||
const int nobordure = 0;
|
||||
int_image* maxima = NEW(int_image);
|
||||
int_array order = {0};
|
||||
|
||||
if( params->maxima_mode ) { // normal process: maxima detection
|
||||
|
||||
float th=0;
|
||||
int check_parents=false, check_children=false;
|
||||
|
||||
float_array sc_maxima = empty_array(float,int(mp.size()));
|
||||
for(unsigned int i=0; i<mp.size(); i++) sc_maxima.pixels[i]=1; // useless but well
|
||||
|
||||
_extract_maxima( mp.data(), mp.size(), &sc_maxima, th, params->min_level, params->nlpow,
|
||||
check_parents, check_children, nobordure, maxima, params->n_thread );
|
||||
free(sc_maxima.pixels);
|
||||
|
||||
order = empty_array(int,maxima->ty);
|
||||
for(int i=0; i<maxima->ty; i++) order.pixels[i] = maxima->ty-1-i; // last first
|
||||
|
||||
} else { // we just analyse all cells at the top level
|
||||
const float_layers* rmap = &mp[mp.size()-1].res_map;
|
||||
const int tx = rmap->tx, txy=tx*rmap->ty;
|
||||
*maxima = empty_image(int, 5, (int)LAYERS_SIZE(rmap));
|
||||
|
||||
for(int i=0; i<maxima->ty; i++) {
|
||||
int* row = maxima->pixels + 5*i;
|
||||
row[0] = mp.size()-1; // pyramid level
|
||||
row[1] = i/txy; // layer number
|
||||
row[2] = i%tx; // x position
|
||||
row[3] = (i%txy)/tx; // y position
|
||||
((float*)row)[4] = rmap->pixels[i];
|
||||
}
|
||||
//hash_image(maxima)
|
||||
|
||||
order = empty_array(int,maxima->ty);
|
||||
for(int i=0; i<maxima->ty; i++) order.pixels[i] = i;
|
||||
#ifdef __APPLE__
|
||||
qsort_r(order.pixels, maxima->ty, sizeof(int), maxima->pixels, arg_sort_maxima);
|
||||
#else
|
||||
qsort_r(order.pixels, maxima->ty, sizeof(int), arg_sort_maxima, maxima->pixels);
|
||||
#endif
|
||||
}
|
||||
|
||||
if( params->verbose>0 )
|
||||
std_printf("found %d local matches\n",maxima->ty);
|
||||
|
||||
// reorder maxima
|
||||
reorder_rows( maxima, &order );
|
||||
free(order.pixels);
|
||||
return maxima;
|
||||
}
|
||||
|
||||
|
||||
static inline float ptdot( const float* m, float x, float y ) {
|
||||
return x*m[0] + y*m[1] + m[2];
|
||||
}
|
||||
|
||||
void apply_rot( float_cube* corres, float rot[6] ) {
|
||||
assert( corres->tz == 6 );
|
||||
const int nb = IMG_SIZE(corres);
|
||||
float* p = corres->pixels;
|
||||
|
||||
for(int i=0; i<nb; i++) {
|
||||
// only apply to coordinates of the first image
|
||||
float x = p[0], y = p[1];
|
||||
p[0] = ptdot(rot+0, x, y);
|
||||
p[1] = ptdot(rot+3, x, y);
|
||||
p += 6;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* this function gather correspondences from each local maximum in the
|
||||
response maps
|
||||
*/
|
||||
float_image* gather_correspondences( int src_shape[2], int target_shape[2],
|
||||
matching_pyramid_t& scales, int_image* maxima,
|
||||
const dm_params_t* params, full_corres_t* corres_out )
|
||||
{
|
||||
const int step = 4*scales[0].f; // bin size
|
||||
const int n_scales = (int)scales.size();
|
||||
const int tx = maxima->tx;
|
||||
const int n_maxima = maxima->ty;
|
||||
|
||||
float_cube corres0 = zeros_cube(float, (src_shape[0]+step-1)/step, (src_shape[1]+step-1)/step,6);
|
||||
float_cube corres1 = zeros_cube(float, (target_shape[0]+step-1)/step, (target_shape[1]+step-1)/step,6);
|
||||
|
||||
int i;
|
||||
// allocate temporary optimization maps
|
||||
for(i=0; i<n_scales; i++) {
|
||||
long size = LAYERS_SIZE(&scales[i].res_map);
|
||||
if( params->low_mem && size > 1000003 ) size = 1000003; // big prime
|
||||
assert( size <= 2147483647 || !"try using -mem parameter");
|
||||
scales[i].passed = zeros_array(float, (int)size);
|
||||
}
|
||||
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for schedule(static,1) num_threads(params->n_thread)
|
||||
#endif
|
||||
for(i=0; i<n_maxima; i++) {
|
||||
if(params->verbose && i%100==0) std_printf("\rgathering correspondences %d%%...",100*i/n_maxima);
|
||||
int* m = maxima->pixels + tx*i;
|
||||
int level = m[0], num_map = m[1];
|
||||
int x = m[2], y = m[3];
|
||||
assert(level<n_scales);
|
||||
|
||||
if( scales[level].offsets.pixels ) {
|
||||
// add offset to form real image coordinates
|
||||
x += scales[level].offsets.pixels[2*num_map+0];
|
||||
y += scales[level].offsets.pixels[2*num_map+1];
|
||||
}
|
||||
|
||||
if( params->scoring_mode ) // new mode
|
||||
_argmax_correspondences( scales.data(), level, num_map, x, y, ((float*)m)[4],
|
||||
&corres0, step, &corres1, step, i );
|
||||
else // old iccv mode
|
||||
_argmax_correspondences_v1( scales.data(), level, num_map, x, y, m[0]*((float*)m)[4],
|
||||
&corres0, step, &corres1, step, i );
|
||||
}
|
||||
|
||||
// free optimization maps
|
||||
for(i=0; i<n_scales; i++) {
|
||||
free( scales[i].passed.pixels );
|
||||
scales[i].passed.pixels = NULL;
|
||||
}
|
||||
|
||||
if(params->verbose) std_printf("\n");
|
||||
|
||||
if( params->rot45 ) { // rectify correspondences
|
||||
assert( corres_out );
|
||||
apply_rot( &corres0, corres_out->rot );
|
||||
apply_rot( &corres1, corres_out->rot );
|
||||
}
|
||||
|
||||
// keep only reciprocal matches
|
||||
int nres;
|
||||
float* corres = _intersect_corres( &corres0, &corres1, &nres );
|
||||
float_image* res = NEW(float_image);
|
||||
*res = (float_image){corres, 6, nres};
|
||||
|
||||
if( corres_out == NULL ) {
|
||||
free(corres0.pixels);
|
||||
free(corres1.pixels);
|
||||
}
|
||||
else { // save unfiltered correspondences
|
||||
corres_out->corres0 = corres0;
|
||||
corres_out->corres1 = corres1;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
void eye_rot3x3( float rot[6] ) {
|
||||
memset( rot, 0, 6*sizeof(float));
|
||||
rot[0] = rot[4] = 1;
|
||||
}
|
||||
|
||||
inline float bilinear_interp(const float* img, const int tx, const int ty, float x, float y ) {
|
||||
if( x < 0 || x+1.001 >= tx ) return 0; // outside
|
||||
if( y < 0 || y+1.001 >= ty ) return 0; // outside
|
||||
int ix = int(x);
|
||||
int iy = int(y);
|
||||
img += ix + iy*tx; // move pointer
|
||||
float rx = x - ix;
|
||||
float ry = y - iy;
|
||||
return (1-ry)*((1-rx)*img[0] + rx*img[1]) +
|
||||
ry *((1-rx)*img[tx]+ rx*img[tx+1]);
|
||||
}
|
||||
|
||||
void scale_rot3x3( float rot[6], float sc ) {
|
||||
for(int i=0; i<6; i++)
|
||||
rot[i] *= sc;
|
||||
}
|
||||
|
||||
void inv_rot3x3( float rot[6], float res[6] ) {
|
||||
assert( fabs((rot[0]*rot[4] - rot[1]*rot[3]) - 1) < 1e-6 );
|
||||
// because rot is unitary, invert == transpose
|
||||
res[0] = rot[0];
|
||||
res[1] = rot[3];
|
||||
res[3] = rot[1];
|
||||
res[4] = rot[4];
|
||||
res[2] = -rot[2]*rot[0] - rot[5]*rot[3];
|
||||
res[5] = -rot[2]*rot[1] - rot[5]*rot[4];
|
||||
}
|
||||
|
||||
|
||||
|
||||
// rotate a descriptor HOG image by a given angle
|
||||
float_layers* rotate45( float_layers* hog, const dm_params_t* params, full_corres_t* corres_out ) {
|
||||
assert( corres_out ); // we need it to write rot !
|
||||
const int patch_size = get_atomic_patch_size( params );
|
||||
const int n_rot45 = params->rot45;
|
||||
|
||||
if( (n_rot45 % 8) == 0 ) { // nothing to do
|
||||
eye_rot3x3( corres_out->rot );
|
||||
return hog;
|
||||
}
|
||||
const int tx = hog->tx;
|
||||
const int ty = hog->ty;
|
||||
|
||||
// rotation matrix
|
||||
float angle = n_rot45 * M_PI / 4;
|
||||
float c = cos(angle), s = sin(angle);
|
||||
float rot[6] = {c, -s, 0, s, c, 0};
|
||||
// pt_in_original_image = rot * pt_in_rotated_image
|
||||
|
||||
// determine center of rotation before
|
||||
float cx_before = tx/2.0;
|
||||
float cy_before = ty/2.0;
|
||||
// determine center of rotation after
|
||||
float corners[2][4] = {{0, (float)tx, (float)tx, 0}, {0, 0, (float)ty, (float)ty}};
|
||||
for(int i=0; i<4; i++) { // rotate corners
|
||||
float x = corners[0][i], y = corners[1][i];
|
||||
corners[0][i] = ptdot(rot+0, x, y);
|
||||
corners[1][i] = ptdot(rot+3, x, y);
|
||||
}
|
||||
int rot_size[2] = {int(0.5 + max_array_f(corners[0], 4) - min_array_f(corners[0], 4)),
|
||||
int(0.5 + max_array_f(corners[1], 4) - min_array_f(corners[1], 4)) };
|
||||
get_source_shape( rot_size[0], rot_size[1], patch_size, rot_size );
|
||||
float cx_after = rot_size[0]/2.0;
|
||||
float cy_after = rot_size[1]/2.0;
|
||||
// compute the translation
|
||||
rot[2] = cx_before - ptdot(rot+0, cx_after, cy_after);
|
||||
rot[5] = cy_before - ptdot(rot+3, cx_after, cy_after);
|
||||
|
||||
// create result
|
||||
assert( hog->tz == 9 );
|
||||
float_layers* rot_hog = NEW(float_layers);
|
||||
*rot_hog = empty_layers(float, rot_size[0], rot_size[1], 9);
|
||||
|
||||
for(int c=0; c<hog->tz; c++) {
|
||||
const int src_c = (c<8) ? int((c+n_rot45+256)%8) : c; // roll channels except for last one (see hog.h)
|
||||
const float* f = hog->pixels + src_c * IMG_SIZE(hog);
|
||||
float* p = rot_hog->pixels + c * IMG_SIZE(rot_hog);
|
||||
|
||||
for(int y=0; y<rot_size[1]; y++)
|
||||
for(int x=0; x<rot_size[0]; x++) {
|
||||
float rx = ptdot( rot+0, x, y);
|
||||
float ry = ptdot( rot+3, x, y);
|
||||
|
||||
*p++ = bilinear_interp(f, tx, ty, rx, ry );
|
||||
}
|
||||
}
|
||||
|
||||
// output inverted rot
|
||||
memcpy( corres_out->rot, rot, 6*sizeof(float) );
|
||||
|
||||
return rot_hog;
|
||||
}
|
||||
|
||||
|
||||
// set default parameters
|
||||
void set_default_dm_params( dm_params_t* params )
|
||||
{
|
||||
// pixel descriptor params
|
||||
set_default_desc_params( ¶ms->desc_params );
|
||||
|
||||
// general parameters
|
||||
params->prior_img_downscale = 1; // resolution R = 1/2^downscale, default = 1/2
|
||||
params->rot45 = 0; // don't rotate the first image
|
||||
params->overlap = 999; // don't use overlapping patches
|
||||
params->subsample_ref = false; // don't subsample patches in reference image (=first image)
|
||||
params->nlpow = 1.4;
|
||||
params->ngh_rad = 0; // no limit by default
|
||||
params->maxima_mode = 0; // don't use maxima, just start from all top patches
|
||||
params->min_level = 2; // useless
|
||||
params->max_psize = 999; // maximum patch size
|
||||
params->low_mem = true; // optimize mem but then results are slightly unstable/non-reproducible
|
||||
params->verbose = 0;
|
||||
params->scoring_mode = 1; // improved scoring scheme
|
||||
params->n_thread = 1; // no multithreading by default
|
||||
}
|
||||
|
||||
|
||||
// main function
|
||||
float_image* deep_matching( image_t* img0, image_t* img1, const dm_params_t* params, full_corres_t* corres_out )
|
||||
{
|
||||
// verify parameters
|
||||
assert(between(0,params->prior_img_downscale,3));
|
||||
assert(between(0,params->overlap,999));
|
||||
assert(between(0,params->subsample_ref,1));
|
||||
assert(between(0.1,params->nlpow,10));
|
||||
assert(between(0,params->ngh_rad,1<<16));
|
||||
assert(between(0,params->maxima_mode,1));
|
||||
assert(between(0,params->min_level,4));
|
||||
assert(between(0,params->low_mem,1));
|
||||
assert(between(0,params->scoring_mode,1));
|
||||
assert(between(0,params->verbose,10));
|
||||
assert(between(1,params->n_thread,128));
|
||||
|
||||
// extract pixel descriptors
|
||||
float_layers *source, *target;
|
||||
extract_image_desc( img0, img1, params, &source, &target );
|
||||
if( corres_out ) // the first image is rotated
|
||||
source = rotate45( source, params, corres_out );
|
||||
int src_shape[2] = {source->tx, source->ty};
|
||||
assert( LAYERS_SIZE(source) > 0 );
|
||||
int target_shape[2] = {target->tx, target->ty};
|
||||
assert( LAYERS_SIZE(target) > 0 );
|
||||
|
||||
//hash_layers(source)
|
||||
//hash_layers(target)
|
||||
|
||||
// compute local matchings
|
||||
matching_pyramid_t matching_pyr;
|
||||
compute_matching_pyr( source, target, params, matching_pyr );
|
||||
free_layers(source);
|
||||
free_layers(target);
|
||||
|
||||
//hash_layers(&matching_pyr[matching_pyr.size()-1].res_map);
|
||||
|
||||
// find optmial matchings (maxima)
|
||||
int_image* maxima = find_optimal_matchings(matching_pyr, params);
|
||||
|
||||
//hash_image(maxima);
|
||||
|
||||
// select the best displacements (maxpool merge)
|
||||
float_image* corres = gather_correspondences( src_shape, target_shape, matching_pyr, maxima, params, corres_out );
|
||||
|
||||
//hash_image(corres);
|
||||
|
||||
// free everything
|
||||
free_matching_pyramid(matching_pyr);
|
||||
free_layers(maxima);
|
||||
|
||||
return corres;
|
||||
}
|
||||
|
||||
|
||||
void swap_first_second_img( float_cube* corres ) {
|
||||
assert( corres->tz == 6 );
|
||||
const int nb = IMG_SIZE(corres);
|
||||
float* p = corres->pixels;
|
||||
|
||||
for(int i = 0; i < nb; i++) {
|
||||
float a = p[0];
|
||||
float b = p[1];
|
||||
float c = p[2];
|
||||
float d = p[3];
|
||||
*p++ = c;
|
||||
*p++ = d;
|
||||
*p++ = a;
|
||||
*p++ = b;
|
||||
p += 2;
|
||||
}
|
||||
}
|
||||
|
||||
void rescale_corres( float_cube* corres, float f0, float f1, int code ) {
|
||||
assert( corres->tz == 6 );
|
||||
const int nb = IMG_SIZE(corres);
|
||||
float* p = corres->pixels;
|
||||
|
||||
for(int i = 0; i < nb; i++) {
|
||||
p[0] *= f0;
|
||||
p[1] *= f0;
|
||||
p[2] *= f1;
|
||||
p[3] *= f1;
|
||||
p[5] = code;
|
||||
p += 6;
|
||||
}
|
||||
}
|
||||
|
||||
// set default parameters
|
||||
void set_default_scalerot_params( scalerot_params_t* params ) {
|
||||
params->fast = true;
|
||||
params->min_sc0 = 0; // scale = 2^(-0/2) = 1
|
||||
params->max_sc0 = 5; // scale = 2^(-5/2) = 0.176
|
||||
params->min_sc1 = 0;
|
||||
params->max_sc1 = 5;
|
||||
params->min_rot = 0; // rot = 0*45 = 0
|
||||
params->max_rot = 8; // rot = 8*45 = 360
|
||||
}
|
||||
|
||||
|
||||
// main function for scale/rotation invariant version
|
||||
float_image* deep_matching_scale_rot( image_t* img0, image_t* img1, dm_params_t* params,
|
||||
const scalerot_params_t* sr_params ) {
|
||||
// verify parameters
|
||||
assert(sr_params->min_sc0 < sr_params->max_sc0);
|
||||
assert(sr_params->min_sc1 < sr_params->max_sc1);
|
||||
assert(between(0, sr_params->min_sc0, 5));
|
||||
assert(between(0, sr_params->max_sc0, 5));
|
||||
assert(between(0, sr_params->min_sc1, 5));
|
||||
assert(between(0, sr_params->max_sc1, 5));
|
||||
assert(sr_params->min_rot >= 0);
|
||||
assert(between(1,sr_params->max_rot - sr_params->min_rot, 8));
|
||||
|
||||
// init shape
|
||||
const int psize = get_atomic_patch_size(params);
|
||||
int imshape0[2];
|
||||
get_source_shape( img0->width, img0->height, psize, imshape0 );
|
||||
int imshape1[2] = {img1->width, img1->height};
|
||||
|
||||
// check dm params to ensure everything goes fine from now on
|
||||
#define mean_dim(shape) ((shape[0] + shape[1])/2)
|
||||
params->max_psize = MIN(mean_dim(imshape0), mean_dim(imshape1));
|
||||
const int verbose = params->verbose;
|
||||
params->verbose = MAX(0, verbose - 1); // decrease for inner deepmatchings
|
||||
|
||||
// prepare output
|
||||
const int step0 = psize/2;
|
||||
const int step1 = psize/2;
|
||||
float_cube all_corres0 = zeros_cube(float, (imshape0[0]+step0/2-1)/step0, (imshape0[1]+step0/2-1)/step0, 6);
|
||||
float_cube all_corres1 = zeros_cube(float, (imshape1[0]+step1/2-1)/step1, (imshape1[1]+step1/2-1)/step1, 6);
|
||||
full_corres_t out;
|
||||
|
||||
const int NS = 5;
|
||||
image_t *scaled_images1[NS] = {NULL};
|
||||
|
||||
// loop over all scale*rot combinations
|
||||
for(int sc0 = sr_params->min_sc0;
|
||||
sc0 < sr_params->max_sc0;
|
||||
sc0++) {
|
||||
const float scale0 = pow(2, -0.5*sc0 ); // scale factor for img0
|
||||
assert( scale0<=1 && sc0<5 );
|
||||
image_t* scaled_img0 = ( scale0 >= 1 ) ? img0 :
|
||||
image_resize_bilinear_scale( img0, scale0 );
|
||||
|
||||
for(int sc1 = sr_params->min_sc1;
|
||||
sc1 < sr_params->max_sc1;
|
||||
sc1++) {
|
||||
const float scale1 = pow(2, -0.5*sc1 ); // scale factor for img1
|
||||
assert( scale1<=1 && sc1<5 );
|
||||
// optimization, deactivate only if eg. both images are blurry
|
||||
if( sr_params->fast && !(scale0==1 || scale1==1)) continue;
|
||||
|
||||
image_t* scaled_img1 = scaled_images1[sc1 - sr_params->min_sc1];
|
||||
if( scaled_img1 == NULL ) {
|
||||
scaled_img1 = ( scale1 >= 1 ) ? img1 :
|
||||
image_resize_bilinear_scale( img1, scale1 );
|
||||
// remember result
|
||||
scaled_images1[sc1 - sr_params->min_sc1] = scaled_img1;
|
||||
}
|
||||
|
||||
for(int rotation = sr_params->min_rot;
|
||||
rotation < sr_params->max_rot;
|
||||
rotation++) {
|
||||
assert( rotation >= 0 );
|
||||
const int rot_scale_code = 8*(sc1*5+sc0) + (rotation%8); // cannot be negative, because of bin count
|
||||
|
||||
if( verbose )
|
||||
std_printf( "processing scale = (x%g, x%g) + rotation = %d deg (code %d)...\n",
|
||||
scale0, scale1, 45*rotation, rot_scale_code);
|
||||
|
||||
float rot0[6], rot1[6];
|
||||
|
||||
// compute correspondences with rotated+scaled image
|
||||
#define max_dim(img) MAX(img->width, img->height)
|
||||
if( max_dim(scaled_img0) >= max_dim(scaled_img1) ) { // first image is always the largest
|
||||
params->rot45 = rotation;
|
||||
|
||||
float_image* corres = deep_matching(scaled_img0, scaled_img1, params, &out );
|
||||
free_image( corres ); // we don't care
|
||||
|
||||
inv_rot3x3(out.rot, rot0);
|
||||
eye_rot3x3(rot1);
|
||||
|
||||
} else { // scaled_img1 is larger
|
||||
params->rot45 = -rotation;
|
||||
|
||||
float_image* corres = deep_matching(scaled_img1, scaled_img0, params, &out );
|
||||
free_image( corres ); // we don't care
|
||||
|
||||
// swap first and second image coordinates
|
||||
memswap( &out.corres0, &out.corres1, sizeof(float_cube) );
|
||||
swap_first_second_img( &out.corres0 );
|
||||
swap_first_second_img( &out.corres1 );
|
||||
|
||||
inv_rot3x3(out.rot, rot1);
|
||||
eye_rot3x3(rot0);
|
||||
}
|
||||
|
||||
// change scale of correspondences
|
||||
rescale_corres( &out.corres0, 1/scale0, 1/scale1, rot_scale_code );
|
||||
rescale_corres( &out.corres1, 1/scale0, 1/scale1, rot_scale_code );
|
||||
scale_rot3x3(rot0, scale0);
|
||||
scale_rot3x3(rot1, scale1);
|
||||
|
||||
// merge correspondences in the reference frame
|
||||
merge_corres( rot0, rot1,
|
||||
psize, psize, &out.corres0, &out.corres1, 2,
|
||||
step0, step1, &all_corres0, &all_corres1 ); // finer grid for merge
|
||||
|
||||
free(out.corres0.pixels);
|
||||
free(out.corres1.pixels);
|
||||
}
|
||||
}
|
||||
|
||||
// free memory
|
||||
if( img0 != scaled_img0 )
|
||||
image_delete( scaled_img0 );
|
||||
}
|
||||
|
||||
// final intersection
|
||||
int nres;
|
||||
float* corres = _intersect_corres( &all_corres0, &all_corres1, &nres );
|
||||
float_image* res = NEW(float_image);
|
||||
*res = (float_image){corres, 6, nres};
|
||||
|
||||
// free memory
|
||||
for(int i=0; i<NS; i++)
|
||||
if( scaled_images1[i] != img1 )
|
||||
image_delete( scaled_images1[i] );
|
||||
free(all_corres0.pixels);
|
||||
free(all_corres1.pixels);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,142 @@
|
||||
/*
|
||||
Copyright (C) 2014 Jerome Revaud
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>
|
||||
*/
|
||||
#ifndef ___DEEP_MATCHING_H___
|
||||
#define ___DEEP_MATCHING_H___
|
||||
#include "array_types.h"
|
||||
#include "pixel_desc.h"
|
||||
#include "image.h"
|
||||
|
||||
#include <vector>
|
||||
using namespace std;
|
||||
|
||||
|
||||
// deep matching parameters
|
||||
typedef struct {
|
||||
desc_params_t desc_params;
|
||||
|
||||
int prior_img_downscale;// downscale the image by 2^(this) prior to matching
|
||||
int rot45; // rotate second img by (45*rot45) prior to matching
|
||||
int overlap; // pyramid level at which patches starts to overlap (999 => no overlap at all)
|
||||
bool subsample_ref; // true if larger patches higher in the pyramid are not densely sampled
|
||||
float nlpow; // non-linear power rectification
|
||||
int ngh_rad; // neighborhood size in pixels => crop res_map (0 == infinite)
|
||||
int maxima_mode; // 1: standard / 0: from all top-level patches
|
||||
int min_level; // minimum pyramid level to retrieve maxima
|
||||
int max_psize; // maximum patch size
|
||||
int low_mem; // use less memory to retrieve the maxima (but approximate result)
|
||||
int scoring_mode; // 0: like ICCV paper / 1: improved scoring mode
|
||||
int verbose; // verbosity
|
||||
int n_thread; // parallelization on several cores, when possible
|
||||
|
||||
} dm_params_t;
|
||||
|
||||
// set default parameters
|
||||
void set_default_dm_params( dm_params_t* params );
|
||||
|
||||
// scale & rotation invariant version
|
||||
typedef struct {
|
||||
bool fast; // avoid comparing small scaled versions of both images
|
||||
int min_sc0, max_sc0; // scale range of image0 (expressed as scale=2^(-n/2))
|
||||
int min_sc1, max_sc1; // scale range of image1 (expressed as scale=2^(-n/2))
|
||||
int min_rot, max_rot; // rotation range (expressed as multiples of 45 degrees)
|
||||
|
||||
} scalerot_params_t;
|
||||
|
||||
// set default parameters
|
||||
void set_default_scalerot_params( scalerot_params_t* params );
|
||||
|
||||
|
||||
// response maps at a given scale
|
||||
typedef struct {
|
||||
int f; // subsampling factor with respect to original image size
|
||||
int patch_size; // patch size in original image coordinates in first image
|
||||
int_cube grid; // position (center) of each patch in first image
|
||||
float_image norms; // norm of each patch in first image
|
||||
int_array assign; // mapping between patches and their response maps
|
||||
float_layers res_map; // response map of the patches on the second image
|
||||
float_layers max_map; // max-filtered response map
|
||||
int true_shape[2]; // true res_map shape (width, height) in case of crop (if ngh_rad>0)
|
||||
int_image offsets; // res_map offsets in case of crop (if ngh_rad>0)
|
||||
int_cube children; // index of children patches in the previous level
|
||||
float_array passed; // remember the best score so far at each response when doing argmax
|
||||
|
||||
} res_scale;
|
||||
|
||||
typedef vector<res_scale> matching_pyramid_t;
|
||||
|
||||
|
||||
// output correspondences
|
||||
typedef struct {
|
||||
float x0, y0; // position in first image (reference image)
|
||||
float x1, y1; // position in second image (target image)
|
||||
float maxima; // from which maxima it was generated (index)
|
||||
float score; // matching score
|
||||
} corres_t;
|
||||
|
||||
// for scale rot invariant matching
|
||||
typedef struct {
|
||||
float rot[6];
|
||||
float_cube corres0;
|
||||
float_cube corres1;
|
||||
} full_corres_t;
|
||||
|
||||
|
||||
// main function. Returns a float_image where each row is <corres_t>
|
||||
float_image* deep_matching( image_t* img0, image_t* img1, const dm_params_t* params,
|
||||
full_corres_t* corres_out ); // NULL if you don't use it
|
||||
|
||||
// main function for scale & invariant matching. output is same as above.
|
||||
float_image* deep_matching_scale_rot( image_t* img0, image_t* img1, dm_params_t* params,
|
||||
const scalerot_params_t* sr_params );
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,186 @@
|
||||
/*
|
||||
Copyright (C) 2014 Jerome Revaud
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>
|
||||
*/
|
||||
%module(docstring="Module to compute DeepMatching") deepmatching
|
||||
|
||||
%{
|
||||
#define SWIG_FILE_WITH_INIT
|
||||
|
||||
#include <numpy/arrayobject.h>
|
||||
|
||||
|
||||
#define CHECK_NUMPY_ARRAY(a, expected_npy) \
|
||||
if(!a) { \
|
||||
fprintf(stderr,"error in %s(): NULL input\n",__PRETTY_FUNCTION__); \
|
||||
return NULL; \
|
||||
} \
|
||||
if(!PyArray_Check(a)) { \
|
||||
fprintf(stderr,"error in %s(): input not numpy array\n",__PRETTY_FUNCTION__); \
|
||||
return NULL; \
|
||||
} \
|
||||
if(!PyArray_ISCONTIGUOUS(a)) { \
|
||||
fprintf(stderr,"error in %s(): array is not C-contiguous\n",__PRETTY_FUNCTION__); \
|
||||
return NULL; \
|
||||
} \
|
||||
if(PyArray_TYPE(a)!=expected_npy) { \
|
||||
fprintf(stderr,"error in %s(): input has bad type (type id %d != " #expected_npy " %d)\n",__PRETTY_FUNCTION__, \
|
||||
PyArray_TYPE(a),expected_npy); \
|
||||
return NULL; \
|
||||
}
|
||||
|
||||
%}
|
||||
|
||||
%init %{
|
||||
import_array();
|
||||
%}
|
||||
|
||||
|
||||
%{
|
||||
#include "image.h"
|
||||
#include "array_types.h"
|
||||
%}
|
||||
|
||||
%typemap(in)
|
||||
(color_image_t* cimg)
|
||||
(color_image_t cimage) {
|
||||
|
||||
PyObject* a = $input;
|
||||
if(a==Py_None) {
|
||||
$1 = NULL;
|
||||
} else {
|
||||
CHECK_NUMPY_ARRAY(a, NPY_FLOAT)
|
||||
cimage.c1 = (float*) PyArray_DATA(a);
|
||||
a = PyObject_GetAttrString($input,"shape");
|
||||
assert(PyTuple_Size(a)==3);
|
||||
assert( PyInt_AsLong(PyTuple_GetItem(a,0)) == 3);
|
||||
cimage.height = PyInt_AsLong(PyTuple_GetItem(a,1));
|
||||
cimage.width = PyInt_AsLong(PyTuple_GetItem(a,2));
|
||||
cimage.c2 = cimage.c1 + cimage.width*cimage.height;
|
||||
cimage.c3 = cimage.c2 + cimage.width*cimage.height;
|
||||
$1=&cimage;
|
||||
}
|
||||
}
|
||||
%apply (color_image_t* cimg) {(color_image_t* )};
|
||||
|
||||
%typemap(out) float_image* corres {
|
||||
PyObject *o;
|
||||
npy_intp n_elem[2] = {$1->ty, $1->tx};
|
||||
o = PyArray_SimpleNewFromData(2,n_elem,NPY_FLOAT,$1->pixels);
|
||||
PyArray_FLAGS(o) |= NPY_OWNDATA;
|
||||
|
||||
// append to current function result as a tuple
|
||||
$result = o;
|
||||
|
||||
}
|
||||
%apply (float_image* corres) {(float_image* )};
|
||||
|
||||
float_image* deepmatching_numpy( color_image_t* cim1, color_image_t* cim2, char *options);
|
||||
|
||||
void usage_python();
|
||||
|
||||
%{
|
||||
#include "deep_matching.h"
|
||||
#include "io.h"
|
||||
#include "main.h"
|
||||
#include <string.h>
|
||||
|
||||
static inline bool ispowerof2( long n ) {
|
||||
return (n & (n-1))==0;
|
||||
}
|
||||
|
||||
float_image* deepmatching_numpy( color_image_t* cim1, color_image_t* cim2, char *options){
|
||||
// convert images to gray
|
||||
image_t *im1=image_gray_from_color(cim1), *im2=image_gray_from_color(cim2);
|
||||
|
||||
// set params to default
|
||||
dm_params_t params;
|
||||
set_default_dm_params(¶ms);
|
||||
scalerot_params_t sr_params;
|
||||
set_default_scalerot_params(&sr_params);
|
||||
bool use_scalerot = false;
|
||||
float fx=1, fy=1;
|
||||
|
||||
// read options
|
||||
if( options!=NULL ){
|
||||
int argc=0;
|
||||
const char* argv[256];
|
||||
argv[argc] = strtok(options," ");
|
||||
while(argv[argc]!=NULL)
|
||||
argv[++argc] = strtok(NULL," ");
|
||||
|
||||
parse_options(¶ms, &sr_params, &use_scalerot, &fx, &fy, argc, argv, PYTHON_OPTIONS, &im1, &im2);
|
||||
}
|
||||
|
||||
|
||||
if( use_scalerot )
|
||||
assert( params.ngh_rad == 0 || !"max trans cannot be used in full scale and rotation mode");
|
||||
else
|
||||
if( params.subsample_ref && (!ispowerof2(im1->width) || !ispowerof2(im1->height)) ) {
|
||||
fprintf(stderr, "WARNING: first image has dimension which are not power-of-2\n");
|
||||
fprintf(stderr, "For improved results, you should consider resizing the images with '-resize <w> <h>'\n");
|
||||
}
|
||||
|
||||
// compute deep matching
|
||||
float_image* corres = use_scalerot ?
|
||||
deep_matching_scale_rot( im1, im2, ¶ms, &sr_params ) :
|
||||
deep_matching ( im1, im2, ¶ms, NULL ); // standard call
|
||||
|
||||
image_delete(im1); image_delete(im2);
|
||||
return corres;
|
||||
}
|
||||
|
||||
void usage_python() {
|
||||
usage(PYTHON_OPTIONS);
|
||||
}
|
||||
|
||||
%}
|
||||
|
||||
|
||||
%pythoncode %{
|
||||
from numpy import float32, rollaxis, ascontiguousarray
|
||||
def deepmatching( im1=None, im2=None, options=""):
|
||||
"""
|
||||
matches = deepmatching.deepmatching(image1, image2, options='')
|
||||
Compute the 'DeepMatching' between two images.
|
||||
Images must be HxWx3 numpy arrays (converted to float32).
|
||||
Options is an optional string argument ('' by default), to set the options.
|
||||
The function returns a numpy array with 6 columns, each row being x1 y1 x2 y2 score index.
|
||||
(index refers to the local maximum from which the match was retrieved)
|
||||
Version 1.2"""
|
||||
if None in (im1,im2):
|
||||
usage_python()
|
||||
return
|
||||
|
||||
# convert images
|
||||
if im1.dtype != float32:
|
||||
im1 = im1.astype(float32)
|
||||
if im2.dtype != float32:
|
||||
im2 = im2.astype(float32)
|
||||
assert len(im1.shape)==3 and len(im2.shape)==3, "images must have 3 dimensions"
|
||||
h, w, nchannels = im1.shape
|
||||
assert nchannels==3, "images must have 3 channels"
|
||||
im1 = ascontiguousarray(rollaxis(im1,2))
|
||||
im2 = ascontiguousarray(rollaxis(im2,2))
|
||||
corres = deepmatching_numpy( im1, im2, options)
|
||||
return corres
|
||||
%}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,28 @@
|
||||
% mex wrapper to compute the 'DeepMatching' between two images.
|
||||
%
|
||||
% matches = deepmatching(image1, image2, options)
|
||||
%
|
||||
% Images must be HxWx3 single matrices.
|
||||
% Options is an optional string argument ('' by default).
|
||||
% Availalble options are listed when calling deepmatching() without args.
|
||||
%
|
||||
% The function returns a matrix with 6 columns, each row being x1 y1 x2 y2 score index.
|
||||
% (index refers to the local maximum from which the match was retrieved)
|
||||
%
|
||||
% Version 1.2.2
|
||||
%
|
||||
% Copyright (C) 2014 Jerome Revaud
|
||||
%
|
||||
% This program is free software: you can redistribute it and/or modify
|
||||
% it under the terms of the GNU General Public License as published by
|
||||
% the Free Software Foundation, either version 3 of the License, or
|
||||
% (at your option) any later version.
|
||||
%
|
||||
% This program is distributed in the hope that it will be useful,
|
||||
% but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
% GNU General Public License for more details.
|
||||
%
|
||||
% You should have received a copy of the GNU General Public License
|
||||
% along with this program. If not, see <http://www.gnu.org/licenses/>
|
||||
%
|
@ -0,0 +1,135 @@
|
||||
# This file was automatically generated by SWIG (http://www.swig.org).
|
||||
# Version 3.0.7
|
||||
#
|
||||
# Do not make changes to this file unless you know what you are doing--modify
|
||||
# the SWIG interface file instead.
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Module to compute DeepMatching
|
||||
"""
|
||||
|
||||
|
||||
from sys import version_info
|
||||
if version_info >= (2, 6, 0):
|
||||
def swig_import_helper():
|
||||
from os.path import dirname
|
||||
import imp
|
||||
fp = None
|
||||
try:
|
||||
fp, pathname, description = imp.find_module('_deepmatching', [dirname(__file__)])
|
||||
except ImportError:
|
||||
import _deepmatching
|
||||
return _deepmatching
|
||||
if fp is not None:
|
||||
try:
|
||||
_mod = imp.load_module('_deepmatching', fp, pathname, description)
|
||||
finally:
|
||||
fp.close()
|
||||
return _mod
|
||||
_deepmatching = swig_import_helper()
|
||||
del swig_import_helper
|
||||
else:
|
||||
import _deepmatching
|
||||
del version_info
|
||||
try:
|
||||
_swig_property = property
|
||||
except NameError:
|
||||
pass # Python < 2.2 doesn't have 'property'.
|
||||
|
||||
|
||||
def _swig_setattr_nondynamic(self, class_type, name, value, static=1):
|
||||
if (name == "thisown"):
|
||||
return self.this.own(value)
|
||||
if (name == "this"):
|
||||
if type(value).__name__ == 'SwigPyObject':
|
||||
self.__dict__[name] = value
|
||||
return
|
||||
method = class_type.__swig_setmethods__.get(name, None)
|
||||
if method:
|
||||
return method(self, value)
|
||||
if (not static):
|
||||
if _newclass:
|
||||
object.__setattr__(self, name, value)
|
||||
else:
|
||||
self.__dict__[name] = value
|
||||
else:
|
||||
raise AttributeError("You cannot add attributes to %s" % self)
|
||||
|
||||
|
||||
def _swig_setattr(self, class_type, name, value):
|
||||
return _swig_setattr_nondynamic(self, class_type, name, value, 0)
|
||||
|
||||
|
||||
def _swig_getattr_nondynamic(self, class_type, name, static=1):
|
||||
if (name == "thisown"):
|
||||
return self.this.own()
|
||||
method = class_type.__swig_getmethods__.get(name, None)
|
||||
if method:
|
||||
return method(self)
|
||||
if (not static):
|
||||
return object.__getattr__(self, name)
|
||||
else:
|
||||
raise AttributeError(name)
|
||||
|
||||
def _swig_getattr(self, class_type, name):
|
||||
return _swig_getattr_nondynamic(self, class_type, name, 0)
|
||||
|
||||
|
||||
def _swig_repr(self):
|
||||
try:
|
||||
strthis = "proxy of " + self.this.__repr__()
|
||||
except:
|
||||
strthis = ""
|
||||
return "<%s.%s; %s >" % (self.__class__.__module__, self.__class__.__name__, strthis,)
|
||||
|
||||
try:
|
||||
_object = object
|
||||
_newclass = 1
|
||||
except AttributeError:
|
||||
class _object:
|
||||
pass
|
||||
_newclass = 0
|
||||
|
||||
|
||||
|
||||
def deepmatching_numpy(cim1, cim2, options):
|
||||
return _deepmatching.deepmatching_numpy(cim1, cim2, options)
|
||||
deepmatching_numpy = _deepmatching.deepmatching_numpy
|
||||
|
||||
def usage_python():
|
||||
return _deepmatching.usage_python()
|
||||
usage_python = _deepmatching.usage_python
|
||||
|
||||
from numpy import float32, rollaxis, ascontiguousarray
|
||||
def deepmatching( im1=None, im2=None, options=""):
|
||||
"""
|
||||
matches = deepmatching.deepmatching(image1, image2, options='')
|
||||
Compute the 'DeepMatching' between two images.
|
||||
Images must be HxWx3 numpy arrays (converted to float32).
|
||||
Options is an optional string argument ('' by default), to set the options.
|
||||
The function returns a numpy array with 6 columns, each row being x1 y1 x2 y2 score index.
|
||||
(index refers to the local maximum from which the match was retrieved)
|
||||
Version 1.2"""
|
||||
if None in (im1,im2):
|
||||
usage_python()
|
||||
return
|
||||
|
||||
# convert images
|
||||
if im1.dtype != float32:
|
||||
im1 = im1.astype(float32)
|
||||
if im2.dtype != float32:
|
||||
im2 = im2.astype(float32)
|
||||
assert len(im1.shape)==3 and len(im2.shape)==3, "images must have 3 dimensions"
|
||||
h, w, nchannels = im1.shape
|
||||
assert nchannels==3, "images must have 3 channels"
|
||||
im1 = ascontiguousarray(rollaxis(im1,2))
|
||||
im2 = ascontiguousarray(rollaxis(im2,2))
|
||||
corres = deepmatching_numpy( im1, im2, options)
|
||||
return corres
|
||||
|
||||
# This file is compatible with both classic and new-style classes.
|
||||
|
||||
|
@ -0,0 +1,165 @@
|
||||
/*
|
||||
Copyright (C) 2014 Jerome Revaud
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>
|
||||
*/
|
||||
#include <mex.h>
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
|
||||
void std_printf(const char* format, ... ) {
|
||||
va_list arglist;
|
||||
va_start( arglist, format );
|
||||
char buffer[1024];
|
||||
vsprintf( buffer, format, arglist );
|
||||
va_end(arglist);
|
||||
|
||||
mexPrintf(buffer);
|
||||
}
|
||||
|
||||
void err_printf(const char* format, ... ) {
|
||||
va_list arglist;
|
||||
va_start( arglist, format );
|
||||
char buffer[1024];
|
||||
vsprintf( buffer, format, arglist );
|
||||
va_end(arglist);
|
||||
|
||||
mexErrMsgTxt(buffer);
|
||||
}
|
||||
|
||||
|
||||
#include "image.h"
|
||||
#include "deep_matching.h"
|
||||
#include "io.h"
|
||||
#include "main.h"
|
||||
|
||||
|
||||
static inline bool ispowerof2( long n ) {
|
||||
return (n & (n-1))==0;
|
||||
}
|
||||
|
||||
color_image_t *input3darray_to_color_image(const mxArray *p){
|
||||
const int *dims = mxGetDimensions(p);
|
||||
const int h = dims[0], w = dims[1];
|
||||
assert( dims[2]==3 );
|
||||
float *in = (float*) mxGetData(p);
|
||||
color_image_t *out = color_image_new(w, h);
|
||||
for(int c=0 ; c<3 ; c++){
|
||||
float *inptr = in + c*w*h;
|
||||
float *outptr = out->c1 + c*w*h;
|
||||
for( int j=0 ; j<h ; j++){
|
||||
for( int i=0 ; i<w ; i++){
|
||||
outptr[j*w+i] = inptr[i*h+j];
|
||||
}
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
void corres_to_output(float_image *corres, mxArray *p){
|
||||
const int h = corres->ty, w = corres->tx;
|
||||
float *data = (float*) mxGetData(p);
|
||||
for( int j=0 ; j<h ; j++) {
|
||||
for( int i=0 ; i<w ; i++) {
|
||||
data[i*h+j] = corres->pixels[j*w+i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void mexFunction( int nl, mxArray *pl[], int nr, const mxArray *pr[] ) {
|
||||
|
||||
if( nr==0 ) {
|
||||
usage(MATLAB_OPTIONS);
|
||||
return;
|
||||
}
|
||||
|
||||
if ( nl != 1){
|
||||
usage(MATLAB_OPTIONS);
|
||||
mexErrMsgTxt("error: returns one output");
|
||||
}
|
||||
if( nr < 2 || nr > 3){
|
||||
usage(MATLAB_OPTIONS);
|
||||
mexErrMsgTxt("error: takes two to four inputs");
|
||||
}
|
||||
|
||||
// The code is originally written for C-order arrays.
|
||||
// We thus transpose all arrays in this mex-function which is not efficient...
|
||||
|
||||
const int *pDims;
|
||||
if(mxGetNumberOfDimensions(pr[0]) != 3) mexErrMsgTxt("input images must have 3 dimensions");
|
||||
if(!mxIsClass(pr[0], "single")) mexErrMsgTxt("input images must be single");
|
||||
pDims = mxGetDimensions(pr[0]);
|
||||
if( pDims[2]!=3 ) mexErrMsgTxt("input images must have 3 channels");
|
||||
const int h = pDims[0], w = pDims[1];
|
||||
color_image_t *cim1 = input3darray_to_color_image( pr[0] );
|
||||
|
||||
if(mxGetNumberOfDimensions(pr[1]) != 3) mexErrMsgTxt("input images must have 3 dimensions");
|
||||
if(!mxIsClass(pr[1], "single")) mexErrMsgTxt("input images must be single");
|
||||
pDims = mxGetDimensions(pr[1]);
|
||||
if( pDims[2]!=3) mexErrMsgTxt("input images must have 3 channels");
|
||||
color_image_t *cim2 = input3darray_to_color_image( pr[1] );
|
||||
|
||||
// convert images to gray
|
||||
image_t *im1=image_gray_from_color(cim1), *im2=image_gray_from_color(cim2);;
|
||||
color_image_delete(cim1);
|
||||
color_image_delete(cim2);
|
||||
|
||||
// set params to default
|
||||
dm_params_t params;
|
||||
set_default_dm_params(¶ms);
|
||||
scalerot_params_t sr_params;
|
||||
set_default_scalerot_params(&sr_params);
|
||||
bool use_scalerot = false;
|
||||
float fx=1, fy=1;
|
||||
|
||||
// read options
|
||||
if( nr == 3 ){
|
||||
char *options = mxArrayToString(pr[2]);
|
||||
if( !options ) mexErrMsgTxt("Third parameter must be a string");
|
||||
int argc=0;
|
||||
const char* argv[256];
|
||||
argv[argc] = strtok(options," ");
|
||||
while(argv[argc]!=NULL)
|
||||
argv[++argc] = strtok(NULL," ");
|
||||
|
||||
parse_options(¶ms, &sr_params, &use_scalerot, &fx, &fy, argc, argv, MATLAB_OPTIONS, &im1, &im2);
|
||||
}
|
||||
|
||||
if( use_scalerot )
|
||||
assert( params.ngh_rad == 0 || !"max trans cannot be used in full scale and rotation mode");
|
||||
else
|
||||
if( params.subsample_ref && (!ispowerof2(im1->width) || !ispowerof2(im1->height)) ) {
|
||||
std_printf("WARNING: first image has dimension which are not power-of-2\n");
|
||||
std_printf("For improved results, you should consider resizing the images with '-resize <w> <h>'\n");
|
||||
}
|
||||
|
||||
// compute deep matching
|
||||
float_image* corres = use_scalerot ?
|
||||
deep_matching_scale_rot( im1, im2, ¶ms, &sr_params ) :
|
||||
deep_matching ( im1, im2, ¶ms, NULL ); // standard call
|
||||
|
||||
// output
|
||||
pl[0] = mxCreateNumericMatrix(corres->ty, corres->tx, mxSINGLE_CLASS, mxREAL);
|
||||
corres_to_output(corres, pl[0]);
|
||||
|
||||
image_delete(im1);
|
||||
image_delete(im2);
|
||||
free_image(corres);
|
||||
return;
|
||||
}
|
After Width: | Height: | Size: 78 KiB |
After Width: | Height: | Size: 29 KiB |
@ -0,0 +1,803 @@
|
||||
/*
|
||||
Copyright (C) 2014 Jerome Revaud
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>
|
||||
*/
|
||||
#include "hog.h"
|
||||
#include "std.h"
|
||||
|
||||
|
||||
/* compute horizontal gradient centered with [-1,0,1] mask
|
||||
*/
|
||||
void _diff_horiz(int tx, int ty, UBYTE* pixels, float* res) {
|
||||
int x,y,pos=0;
|
||||
float* r=res;
|
||||
for(y=0; y<ty; y++,pos+=tx) {
|
||||
*r++ = pixels[1+pos] - pixels[0+pos];
|
||||
for(x=1; x<tx-1; x++)
|
||||
*r++ = pixels[x+1+pos] - pixels[x-1+pos];
|
||||
*r++ = pixels[x+pos] - pixels[x-1+pos];
|
||||
}
|
||||
}
|
||||
|
||||
/* compute vertical gradient centered with [-1,0,1] mask
|
||||
*/
|
||||
void _diff_vert(int tx, int ty, UBYTE* pixels, float* res) {
|
||||
int x,y,pos=0;
|
||||
for(x=0; x<tx; x++,pos++)
|
||||
res[pos] = pixels[pos+tx] - pixels[pos];
|
||||
for(y=1; y<ty-1; y++) {
|
||||
pos = y*tx;
|
||||
for(x=0; x<tx; x++,pos++)
|
||||
res[pos] = pixels[pos+tx] - pixels[pos-tx];
|
||||
}
|
||||
for(x=0; x<tx; x++,pos++)
|
||||
res[pos] = pixels[pos] - pixels[pos-tx];
|
||||
}
|
||||
|
||||
/* compute original, unsmoothed, gradient
|
||||
*/
|
||||
void _compute_pure_gradient( UBYTE_image* img, float_layers* grad ) {
|
||||
ASSERT_SAME_SIZE(img,grad);
|
||||
assert(grad->tz==2);
|
||||
int tx = img->tx;
|
||||
int ty = img->ty;
|
||||
|
||||
// compute horizontal gradient
|
||||
_diff_vert(tx,ty,img->pixels,grad->pixels);
|
||||
|
||||
// compute vertical gradient
|
||||
_diff_horiz(tx,ty,img->pixels,grad->pixels+tx*ty);
|
||||
}
|
||||
|
||||
/* compute horizontal smoothing with 3-sized mask
|
||||
*/
|
||||
template<typename TData>
|
||||
void _smooth_3_horiz(int tx, int ty, const int w_center, const int w_side, TData* pixels, TData* _res, int n_thread) {
|
||||
int y;
|
||||
const int sum_w = 2*w_side + w_center;
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(y=0; y<ty; y++) {
|
||||
int x,pos = y*tx;
|
||||
TData* res = _res + pos;
|
||||
*res++ = ( (w_center+w_side)*pixels[0+pos] + w_side*pixels[1+pos])/sum_w;
|
||||
for(x=1; x<tx-1; x++)
|
||||
*res++ = (w_side*pixels[x+1+pos] + w_center*pixels[x+pos] + w_side*pixels[x-1+pos])/sum_w;
|
||||
*res++ = ( (w_center+w_side)*pixels[x+pos] + w_side*pixels[x-1+pos])/sum_w;
|
||||
}
|
||||
}
|
||||
void _smooth_121_horiz(int tx, int ty, UBYTE* pixels, UBYTE* res, int n_thread) {
|
||||
_smooth_3_horiz( tx, ty, 2, 1, pixels, res, n_thread );
|
||||
}
|
||||
template<typename TData>
|
||||
void _smooth_5_horiz( int tx, int ty, const int w_center, const int w_side1, const int w_side2,
|
||||
TData* pixels, TData* _res, int n_thread) {
|
||||
int y;
|
||||
const int sum_w = 2*(w_side1 + w_side2) + w_center;
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(y=0; y<ty; y++) {
|
||||
int x,pos = y*tx;
|
||||
TData* res = _res + pos;
|
||||
x=0;
|
||||
*res++ = (
|
||||
w_side2 * pixels[x +pos] +
|
||||
w_side1 * pixels[x +pos] +
|
||||
w_center* pixels[x +pos] +
|
||||
w_side1 * pixels[x+1+pos] +
|
||||
w_side2 * pixels[x+2+pos] ) / sum_w;
|
||||
x++;
|
||||
*res++ = (
|
||||
w_side2 * pixels[x-1+pos] +
|
||||
w_side1 * pixels[x-1+pos] +
|
||||
w_center* pixels[x +pos] +
|
||||
w_side1 * pixels[x+1+pos] +
|
||||
w_side2 * pixels[x+2+pos] ) / sum_w;
|
||||
|
||||
for(x=2; x<tx-2; x++)
|
||||
*res++ = (
|
||||
w_side2 * pixels[x-2+pos] +
|
||||
w_side1 * pixels[x-1+pos] +
|
||||
w_center* pixels[x +pos] +
|
||||
w_side1 * pixels[x+1+pos] +
|
||||
w_side2 * pixels[x+2+pos] ) / sum_w;
|
||||
|
||||
*res++ = (
|
||||
w_side2 * pixels[x-2+pos] +
|
||||
w_side1 * pixels[x-1+pos] +
|
||||
w_center* pixels[x +pos] +
|
||||
w_side1 * pixels[x+1+pos] +
|
||||
w_side2 * pixels[x+1+pos] ) / sum_w;
|
||||
x++;
|
||||
*res++ = (
|
||||
w_side2 * pixels[x-2+pos] +
|
||||
w_side1 * pixels[x-1+pos] +
|
||||
w_center* pixels[x +pos] +
|
||||
w_side1 * pixels[x +pos] +
|
||||
w_side2 * pixels[x +pos] ) / sum_w;
|
||||
}
|
||||
}
|
||||
template<typename TData>
|
||||
void _smooth_7_horiz(int tx, int ty, const int w_center, const int w_side1, const int w_side2, const int w_side3,
|
||||
TData* pixels, TData* _res, int n_thread) {
|
||||
int y;
|
||||
const int sum_w = 2*(w_side1 + w_side2 + w_side3) + w_center;
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(y=0; y<ty; y++) {
|
||||
int x,pos = y*tx;
|
||||
TData* res = _res + pos;
|
||||
x=0;
|
||||
*res++ = (
|
||||
w_side3 * pixels[x +pos] +
|
||||
w_side2 * pixels[x +pos] +
|
||||
w_side1 * pixels[x +pos] +
|
||||
w_center* pixels[x +pos] +
|
||||
w_side1 * pixels[x+1+pos] +
|
||||
w_side2 * pixels[x+2+pos] +
|
||||
w_side3 * pixels[x+3+pos] ) / sum_w;
|
||||
x++;
|
||||
*res++ = (
|
||||
w_side3 * pixels[x-1+pos] +
|
||||
w_side2 * pixels[x-1+pos] +
|
||||
w_side1 * pixels[x-1+pos] +
|
||||
w_center* pixels[x +pos] +
|
||||
w_side1 * pixels[x+1+pos] +
|
||||
w_side2 * pixels[x+2+pos] +
|
||||
w_side3 * pixels[x+3+pos] ) / sum_w;
|
||||
x++;
|
||||
*res++ = (
|
||||
w_side3 * pixels[x-2+pos] +
|
||||
w_side2 * pixels[x-2+pos] +
|
||||
w_side1 * pixels[x-1+pos] +
|
||||
w_center* pixels[x +pos] +
|
||||
w_side1 * pixels[x+1+pos] +
|
||||
w_side2 * pixels[x+2+pos] +
|
||||
w_side3 * pixels[x+3+pos] ) / sum_w;
|
||||
|
||||
for(x=3; x<tx-3; x++)
|
||||
*res++ = (
|
||||
w_side3 * pixels[x-3+pos] +
|
||||
w_side2 * pixels[x-2+pos] +
|
||||
w_side1 * pixels[x-1+pos] +
|
||||
w_center* pixels[x +pos] +
|
||||
w_side1 * pixels[x+1+pos] +
|
||||
w_side2 * pixels[x+2+pos] +
|
||||
w_side3 * pixels[x+3+pos] ) / sum_w;
|
||||
|
||||
*res++ = (
|
||||
w_side3 * pixels[x-3+pos] +
|
||||
w_side2 * pixels[x-2+pos] +
|
||||
w_side1 * pixels[x-1+pos] +
|
||||
w_center* pixels[x +pos] +
|
||||
w_side1 * pixels[x+1+pos] +
|
||||
w_side2 * pixels[x+2+pos] +
|
||||
w_side3 * pixels[x+2+pos] ) / sum_w;
|
||||
x++;
|
||||
*res++ = (
|
||||
w_side3 * pixels[x-3+pos] +
|
||||
w_side2 * pixels[x-2+pos] +
|
||||
w_side1 * pixels[x-1+pos] +
|
||||
w_center* pixels[x +pos] +
|
||||
w_side1 * pixels[x+1+pos] +
|
||||
w_side2 * pixels[x+1+pos] +
|
||||
w_side3 * pixels[x+1+pos] ) / sum_w;
|
||||
x++;
|
||||
*res++ = (
|
||||
w_side3 * pixels[x-3+pos] +
|
||||
w_side2 * pixels[x-2+pos] +
|
||||
w_side1 * pixels[x-1+pos] +
|
||||
w_center* pixels[x +pos] +
|
||||
w_side1 * pixels[x +pos] +
|
||||
w_side2 * pixels[x +pos] +
|
||||
w_side3 * pixels[x +pos] ) / sum_w;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* compute vertical smoothing with 3-sized mask
|
||||
*/
|
||||
template<typename TData>
|
||||
void _smooth_3_vert(int tx, int ty, const int w_center, const int w_side, TData* pixels, TData* res, int n_thread) {
|
||||
int x,y,pos=0;
|
||||
const int sum_w = 2*w_side + w_center;
|
||||
for(x=0; x<tx; x++,pos++)
|
||||
res[pos] = ( (w_center+w_side)*pixels[pos] + w_side*pixels[pos+tx])/sum_w;
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(y=1; y<ty-1; y++) {
|
||||
int x,pos = y*tx;
|
||||
for(x=0; x<tx; x++,pos++)
|
||||
res[pos] = ( w_side*pixels[pos+tx] + w_center*pixels[pos] + w_side*pixels[pos-tx])/sum_w;
|
||||
}
|
||||
pos = (ty-1)*tx;
|
||||
for(x=0; x<tx; x++,pos++)
|
||||
res[pos] = ( (w_center+w_side)*pixels[pos] + w_side*pixels[pos-tx])/sum_w;
|
||||
}
|
||||
void _smooth_121_vert(int tx, int ty, UBYTE* pixels, UBYTE* res, int n_thread) {
|
||||
_smooth_3_vert( tx, ty, 2, 1, pixels, res, n_thread );
|
||||
}
|
||||
template<typename TData>
|
||||
void _smooth_5_vert(int tx, int ty, const int w_center, const int w_side1, const int w_side2,
|
||||
TData* pixels, TData* res, int n_thread) {
|
||||
int x,y,pos=0;
|
||||
const int sum_w = 2*(w_side1 + w_side2) + w_center;
|
||||
const int tx1=tx,tx2=2*tx;
|
||||
for(x=0; x<tx; x++,pos++)
|
||||
res[pos] = (
|
||||
w_side2 * pixels[pos] +
|
||||
w_side1 * pixels[pos] +
|
||||
w_center* pixels[pos] +
|
||||
w_side1 * pixels[pos+tx1] +
|
||||
w_side2 * pixels[pos+tx2]
|
||||
)/sum_w;
|
||||
for(x=0; x<tx; x++,pos++)
|
||||
res[pos] = (
|
||||
w_side2 * pixels[pos-tx1] +
|
||||
w_side1 * pixels[pos-tx1] +
|
||||
w_center* pixels[pos] +
|
||||
w_side1 * pixels[pos+tx1] +
|
||||
w_side2 * pixels[pos+tx2]
|
||||
)/sum_w;
|
||||
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(y=2; y<ty-2; y++) {
|
||||
int x,pos = y*tx;
|
||||
for(x=0; x<tx; x++,pos++)
|
||||
res[pos] = (
|
||||
w_side2 * pixels[pos-tx2] +
|
||||
w_side1 * pixels[pos-tx1] +
|
||||
w_center* pixels[pos] +
|
||||
w_side1 * pixels[pos+tx1] +
|
||||
w_side2 * pixels[pos+tx2]
|
||||
)/sum_w;
|
||||
}
|
||||
pos = (ty-2)*tx;
|
||||
for(x=0; x<tx; x++,pos++)
|
||||
res[pos] = (
|
||||
w_side2 * pixels[pos-tx2] +
|
||||
w_side1 * pixels[pos-tx1] +
|
||||
w_center* pixels[pos] +
|
||||
w_side1 * pixels[pos+tx1] +
|
||||
w_side2 * pixels[pos+tx1]
|
||||
)/sum_w;
|
||||
for(x=0; x<tx; x++,pos++)
|
||||
res[pos] = (
|
||||
w_side2 * pixels[pos-tx2] +
|
||||
w_side1 * pixels[pos-tx1] +
|
||||
w_center* pixels[pos] +
|
||||
w_side1 * pixels[pos] +
|
||||
w_side2 * pixels[pos]
|
||||
)/sum_w;
|
||||
}
|
||||
template<typename TData>
|
||||
void _smooth_7_vert(int tx, int ty, const int w_center, const int w_side1, const int w_side2, const int w_side3,
|
||||
TData* pixels, TData* res, int n_thread) {
|
||||
int x,y,pos=0;
|
||||
const int sum_w = 2*(w_side1 + w_side2 + w_side3) + w_center;
|
||||
const int tx1=tx,tx2=2*tx,tx3=3*tx;
|
||||
for(x=0; x<tx; x++,pos++)
|
||||
res[pos] = (
|
||||
w_side3 * pixels[pos] +
|
||||
w_side2 * pixels[pos] +
|
||||
w_side1 * pixels[pos] +
|
||||
w_center* pixels[pos] +
|
||||
w_side1 * pixels[pos+tx1] +
|
||||
w_side2 * pixels[pos+tx2] +
|
||||
w_side3 * pixels[pos+tx3]
|
||||
)/sum_w;
|
||||
for(x=0; x<tx; x++,pos++)
|
||||
res[pos] = (
|
||||
w_side3 * pixels[pos-tx1] +
|
||||
w_side2 * pixels[pos-tx1] +
|
||||
w_side1 * pixels[pos-tx1] +
|
||||
w_center* pixels[pos] +
|
||||
w_side1 * pixels[pos+tx1] +
|
||||
w_side2 * pixels[pos+tx2] +
|
||||
w_side3 * pixels[pos+tx3]
|
||||
)/sum_w;
|
||||
for(x=0; x<tx; x++,pos++)
|
||||
res[pos] = (
|
||||
w_side3 * pixels[pos-tx2] +
|
||||
w_side2 * pixels[pos-tx2] +
|
||||
w_side1 * pixels[pos-tx1] +
|
||||
w_center* pixels[pos] +
|
||||
w_side1 * pixels[pos+tx1] +
|
||||
w_side2 * pixels[pos+tx2] +
|
||||
w_side3 * pixels[pos+tx3]
|
||||
)/sum_w;
|
||||
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(y=3; y<ty-3; y++) {
|
||||
int x,pos = y*tx;
|
||||
for(x=0; x<tx; x++,pos++)
|
||||
res[pos] = (
|
||||
w_side3 * pixels[pos-tx3] +
|
||||
w_side2 * pixels[pos-tx2] +
|
||||
w_side1 * pixels[pos-tx1] +
|
||||
w_center* pixels[pos] +
|
||||
w_side1 * pixels[pos+tx1] +
|
||||
w_side2 * pixels[pos+tx2] +
|
||||
w_side3 * pixels[pos+tx3]
|
||||
)/sum_w;
|
||||
}
|
||||
pos = (ty-3)*tx;
|
||||
for(x=0; x<tx; x++,pos++)
|
||||
res[pos] = (
|
||||
w_side3 * pixels[pos-tx3] +
|
||||
w_side2 * pixels[pos-tx2] +
|
||||
w_side1 * pixels[pos-tx1] +
|
||||
w_center* pixels[pos] +
|
||||
w_side1 * pixels[pos+tx1] +
|
||||
w_side2 * pixels[pos+tx2] +
|
||||
w_side3 * pixels[pos+tx2]
|
||||
)/sum_w;
|
||||
for(x=0; x<tx; x++,pos++)
|
||||
res[pos] = (
|
||||
w_side3 * pixels[pos-tx3] +
|
||||
w_side2 * pixels[pos-tx2] +
|
||||
w_side1 * pixels[pos-tx1] +
|
||||
w_center* pixels[pos] +
|
||||
w_side1 * pixels[pos+tx1] +
|
||||
w_side2 * pixels[pos+tx1] +
|
||||
w_side3 * pixels[pos+tx1]
|
||||
)/sum_w;
|
||||
for(x=0; x<tx; x++,pos++)
|
||||
res[pos] = (
|
||||
w_side3 * pixels[pos-tx3] +
|
||||
w_side2 * pixels[pos-tx2] +
|
||||
w_side1 * pixels[pos-tx1] +
|
||||
w_center* pixels[pos] +
|
||||
w_side1 * pixels[pos] +
|
||||
w_side2 * pixels[pos] +
|
||||
w_side3 * pixels[pos]
|
||||
)/sum_w;
|
||||
}
|
||||
|
||||
/* Smooth an image using a Gaussian filter.
|
||||
*/
|
||||
template<typename TData>
|
||||
void _smooth_gaussian_alltype( const int tx, const int ty, TData* img, float _sigma, TData* res, int n_thread ) {
|
||||
const float MAX_SIGMA = 1.86f;
|
||||
|
||||
TData* img2 = img;
|
||||
if(_sigma>MAX_SIGMA) { // reallocate if more than one smoothing pass is required
|
||||
img2 = NEWA(TData,tx*ty);
|
||||
memcpy(img2,img,tx*ty*sizeof(TData));
|
||||
}
|
||||
TData* tmp = NEWA(TData,tx*ty);
|
||||
TData* old_res = res;
|
||||
|
||||
float remaining = _sigma*_sigma;
|
||||
while( 1 ) {
|
||||
float sigma = MIN(MAX_SIGMA,sqrt(remaining));
|
||||
remaining -= sigma*sigma;
|
||||
|
||||
// compute gaussian filter coefficients
|
||||
const int wcenter = 1000;
|
||||
const int wside1 = int(0.5 + wcenter*exp( -pow2(1./sigma)/2 ));
|
||||
const int wside2 = int(0.5 + wcenter*exp( -pow2(2./sigma)/2 ));
|
||||
const int wside3 = int(0.5 + wcenter*exp( -pow2(3./sigma)/2 ));
|
||||
const int wside4 = int(0.5 + wcenter*exp( -pow2(4./sigma)/2 ));
|
||||
assert( wside4 < wcenter/10 || !"error: smoothing is too large" );
|
||||
|
||||
if ( wside2 < wcenter/10 ) {
|
||||
_smooth_3_horiz( tx, ty, wcenter, wside1, img2, tmp, n_thread );
|
||||
_smooth_3_vert( tx, ty, wcenter, wside1, tmp, res, n_thread );
|
||||
} else if( wside3 < wcenter/10 ) {
|
||||
_smooth_5_horiz( tx, ty, wcenter, wside1, wside2, img2, tmp, n_thread );
|
||||
_smooth_5_vert( tx, ty, wcenter, wside1, wside2, tmp, res, n_thread );
|
||||
} else {
|
||||
_smooth_7_horiz( tx, ty, wcenter, wside1, wside2, wside3, img2, tmp, n_thread );
|
||||
_smooth_7_vert( tx, ty, wcenter, wside1, wside2, wside3, tmp, res, n_thread );
|
||||
}
|
||||
|
||||
if(remaining < 0.001)
|
||||
break;
|
||||
else {
|
||||
TData* tmp3;
|
||||
tmp3 = img2;
|
||||
img2 = res;
|
||||
res = tmp3;
|
||||
}
|
||||
}
|
||||
|
||||
if(res!=old_res) { // copy to true res
|
||||
memcpy(old_res,res,tx*ty*sizeof(TData));
|
||||
img2 = res;
|
||||
}
|
||||
if(_sigma>MAX_SIGMA)
|
||||
free(img2);
|
||||
free(tmp);
|
||||
}
|
||||
|
||||
void _smooth_gaussian( UBYTE_image* img, float _sigma, UBYTE_image* res, int n_thread ) {
|
||||
ASSERT_SAME_SIZE(img,res);
|
||||
_smooth_gaussian_alltype(img->tx,img->ty,img->pixels,_sigma,res->pixels,n_thread);
|
||||
}
|
||||
|
||||
|
||||
/* compute gradient smoothed with Sobel mask
|
||||
*/
|
||||
void _compute_sobel_gradient( UBYTE_image* img, float_layers* grad, int n_thread ) {
|
||||
ASSERT_SAME_SIZE(img,grad);
|
||||
assert(grad->tz==2);
|
||||
int tx = img->tx;
|
||||
int ty = img->ty;
|
||||
UBYTE* tmp = NEWA(UBYTE,tx*ty);
|
||||
|
||||
// compute horizontal gradient
|
||||
_smooth_121_horiz(tx,ty,img->pixels,tmp, n_thread);
|
||||
_diff_vert(tx,ty,tmp,grad->pixels);
|
||||
|
||||
// compute vertical gradient
|
||||
_smooth_121_vert(tx,ty,img->pixels,tmp, n_thread);
|
||||
_diff_horiz(tx,ty,tmp,grad->pixels+tx*ty);
|
||||
|
||||
// free everything
|
||||
free(tmp);
|
||||
}
|
||||
|
||||
/* Compute the dx,dy gradient on the image based on a [-1,0,1] mask.
|
||||
=0 : no prior smoothing
|
||||
=1 : sobel smoothing
|
||||
*/
|
||||
void _compute_grad_101( UBYTE_image* img, int method, float_layers* grad, int n_thread ) {
|
||||
ASSERT_SAME_SIZE(img,grad);
|
||||
assert(grad->tz==2);
|
||||
|
||||
// compute gradient
|
||||
if( method == 0 )
|
||||
_compute_pure_gradient(img, grad);
|
||||
else if( method == 1 )
|
||||
_compute_sobel_gradient(img, grad, n_thread);
|
||||
else
|
||||
assert(!"error: unknown method for compute_grad_101");
|
||||
}
|
||||
|
||||
|
||||
/* Compute the Histogram of oriented gradient for each pixel.
|
||||
Number of orientations is determined by hog->tz;
|
||||
method determines orientation bining:
|
||||
=0 : atan + linear interpolation
|
||||
=1 : fast cos projection
|
||||
*/
|
||||
void _compute_hog( float_layers* grad, int method, float_layers* hog, int n_thread ) {
|
||||
ASSERT_SAME_SIZE(grad,hog);
|
||||
const int n_ori = hog->tz;
|
||||
const int npix = hog->tx*hog->ty;
|
||||
|
||||
const float* dx = grad->pixels;
|
||||
const float* dy = grad->pixels + npix;
|
||||
|
||||
if( method == 0 ) {
|
||||
// use atan
|
||||
memset(hog->pixels,0,n_ori*npix*sizeof(float));
|
||||
int i;
|
||||
for(i=0; i<npix; i++) {
|
||||
float norm = sqrt(dy[i]*dy[i] + dx[i]*dx[i]);
|
||||
float angle = atan2(dy[i],dx[i]); // angle in [-pi,pi]
|
||||
|
||||
float b_angle = (angle + M_PI)/n_ori;
|
||||
int q_angle = int(0.5 + b_angle); // first bin
|
||||
float coef = b_angle-q_angle;
|
||||
q_angle = (q_angle + 3*n_ori/2) % n_ori;
|
||||
|
||||
hog->pixels[ ((q_angle ) )*npix + i ] += (1-coef)*norm;
|
||||
hog->pixels[ ((q_angle+1)%n_ori)*npix + i ] += ( coef)*norm;
|
||||
}
|
||||
} else if (method == 1 ) {
|
||||
int l;
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(l=0; l<n_ori; l++) {
|
||||
float angle = -2*(l-2)*M_PI/n_ori;
|
||||
float kos = cos( angle );
|
||||
float zin = sin( angle );
|
||||
float* layer_l = hog->pixels + l*npix;
|
||||
int i;
|
||||
for(i=0; i<npix; i++) {
|
||||
float value = kos*dx[i] + zin*dy[i];
|
||||
layer_l[i] = (value > 0 ) ? value : 0;
|
||||
}
|
||||
}
|
||||
} else
|
||||
assert(!"error: unknown method for compute_hog");
|
||||
}
|
||||
|
||||
|
||||
/* compute 8 directions of gradient per pixels
|
||||
using 4 oriented filters extremely simple like [-1,1]
|
||||
*/
|
||||
void _compute_hog_8_direct( UBYTE_image* image, float_layers* hog_out, int n_thread ) {
|
||||
ASSERT_SAME_SIZE(image,hog_out);
|
||||
assert(hog_out->tz==8);
|
||||
int j,tx=image->tx, ty=image->ty;
|
||||
int npix=tx*image->ty;
|
||||
|
||||
// init output
|
||||
memset(hog_out->pixels,0,8*npix*sizeof(float));
|
||||
|
||||
// compute horizontal filter
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(j=0; j<ty; j++) {
|
||||
UBYTE* img = image->pixels + j*tx;
|
||||
UBYTE* lastimg = img + tx-1;
|
||||
float* hog0f = hog_out->pixels + 0*npix + j*tx; // first
|
||||
float* hog0l = hog0f+1; // last
|
||||
float* hog1f = hog_out->pixels + 4*npix + j*tx; // first
|
||||
float* hog1l = hog1f+1; // last
|
||||
|
||||
for(; img<lastimg; img++) {
|
||||
float diff = img[1] - img[0];
|
||||
float pos,neg;
|
||||
if( diff < 0 ) {
|
||||
pos = 0;
|
||||
neg = -diff/2.f;
|
||||
} else {
|
||||
neg = 0;
|
||||
pos = diff/2.f;
|
||||
}
|
||||
*hog0f++ += neg;
|
||||
*hog1f++ += pos;
|
||||
*hog0l++ += neg;
|
||||
*hog1l++ += pos;
|
||||
}
|
||||
}
|
||||
|
||||
// compute veritical filter
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(j=0; j<ty-1; j++) {
|
||||
UBYTE* img = image->pixels + j*tx;
|
||||
UBYTE* lastimg = img + tx;
|
||||
const int offset = tx;
|
||||
UBYTE* img2 = img + offset;
|
||||
float* hog0f = hog_out->pixels + 2*npix + j*tx; // first
|
||||
float* hog0l = hog0f + offset; // last
|
||||
float* hog1f = hog_out->pixels + 6*npix + j*tx; // first
|
||||
float* hog1l = hog1f + offset; // last
|
||||
|
||||
while(img<lastimg) {
|
||||
float diff = (*img2++) - (*img++);
|
||||
float pos,neg;
|
||||
if( diff < 0 ) {
|
||||
pos = 0;
|
||||
neg = -diff/2.f;
|
||||
} else {
|
||||
neg = 0;
|
||||
pos = diff/2.f;
|
||||
}
|
||||
*hog0f++ += neg;
|
||||
*hog1f++ += pos;
|
||||
*hog0l++ += neg;
|
||||
*hog1l++ += pos;
|
||||
}
|
||||
}
|
||||
|
||||
const float div_diag = 2*1.2666f; // learned
|
||||
|
||||
// compute diagonal filter 1
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(j=0; j<ty-1; j++) {
|
||||
UBYTE* img = image->pixels + j*tx;
|
||||
UBYTE* lastimg = img + tx-1;
|
||||
const int offset = 1+tx;
|
||||
UBYTE* img2 = img + offset;
|
||||
float* hog0f = hog_out->pixels + 1*npix + j*tx; // first
|
||||
float* hog0l = hog0f + offset; // last
|
||||
float* hog1f = hog_out->pixels + 5*npix + j*tx; // first
|
||||
float* hog1l = hog1f + offset; // last
|
||||
|
||||
while(img<lastimg) {
|
||||
float diff = (*img2++) - (*img++);
|
||||
float pos,neg;
|
||||
if( diff < 0 ) {
|
||||
pos = 0;
|
||||
neg = -diff/div_diag;
|
||||
} else {
|
||||
neg = 0;
|
||||
pos = diff/div_diag;
|
||||
}
|
||||
*hog0f++ += neg;
|
||||
*hog1f++ += pos;
|
||||
*hog0l++ += neg;
|
||||
*hog1l++ += pos;
|
||||
}
|
||||
}
|
||||
|
||||
// compute diagonal filter 2
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(j=1; j<ty; j++) {
|
||||
UBYTE* img = image->pixels + j*tx;
|
||||
UBYTE* lastimg = img + tx-1;
|
||||
const int offset = 1-tx;
|
||||
UBYTE* img2 = img + offset;
|
||||
float* hog0f = hog_out->pixels + 7*npix + j*tx; // first
|
||||
float* hog0l = hog0f + offset; // last
|
||||
float* hog1f = hog_out->pixels + 3*npix + j*tx; // first
|
||||
float* hog1l = hog1f + offset; // last
|
||||
|
||||
while(img<lastimg) {
|
||||
float diff = (*img2++) - (*img++);
|
||||
float pos,neg;
|
||||
if( diff < 0 ) {
|
||||
pos = 0;
|
||||
neg = -diff/div_diag;
|
||||
} else {
|
||||
neg = 0;
|
||||
pos = diff/div_diag;
|
||||
}
|
||||
*hog0f++ += neg;
|
||||
*hog1f++ += pos;
|
||||
*hog0l++ += neg;
|
||||
*hog1l++ += pos;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Post-processing of the HOG: cross-orientation inhibition.
|
||||
for one pixel i and orientation o: hog[i,o] = max(0, hog[i,o] - coef*hog[i,:].mean())
|
||||
*/
|
||||
void subtract_mean_ori( float_layers* hog, float coef, int n_thread ) {
|
||||
const int npix = hog->tx*hog->ty;
|
||||
int l;
|
||||
float* sum = NEWAC(float, npix);
|
||||
float* max = NEWAC(float, npix);
|
||||
|
||||
// compute mean per pixel
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(l=0; l<hog->tz; l++) {
|
||||
float* p = sum;
|
||||
float* m = max;
|
||||
float* hog_pix = hog->pixels + l*npix;
|
||||
int i;
|
||||
for(i=0; i<npix; i++,m++) {
|
||||
float v = *hog_pix++;
|
||||
*p++ += v;
|
||||
float max = *m;
|
||||
if(v>max) *m=v;
|
||||
}
|
||||
}
|
||||
|
||||
// subtract coef*mean
|
||||
coef /= hog->tz;
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(l=0; l<hog->tz; l++) {
|
||||
float* p = sum;
|
||||
float* m = max;
|
||||
float* hog_pix = hog->pixels + l*npix;
|
||||
int i;
|
||||
for(i=0; i<npix; i++) {
|
||||
float Max = *m++; // max
|
||||
float mean = coef * (*p++); // == mean * coef
|
||||
if( mean >= Max )
|
||||
*hog_pix = 0;
|
||||
else {
|
||||
*hog_pix = Max*(1 - (Max - (*hog_pix))/(Max - mean + 1e-8f));
|
||||
if(*hog_pix<0) *hog_pix = 0;
|
||||
}
|
||||
hog_pix++;
|
||||
}
|
||||
}
|
||||
|
||||
free(sum);
|
||||
free(max);
|
||||
}
|
||||
|
||||
|
||||
/* Pass the gradient image through a sigmoid
|
||||
*/
|
||||
void sigmoid_array( float_array* img, float coef, float offset, int n_thread ) {
|
||||
assert(coef>0);
|
||||
const int npix=img->tx;
|
||||
// float* p = img->pixels;
|
||||
// for(i=0; i<npix; i++) {
|
||||
// float v = *p;
|
||||
// *p++ = 2.f/(1.f + exp(-coef*v + offset)) - 1.f;
|
||||
// }
|
||||
int l;
|
||||
|
||||
// optimization: precompute some values of sigmoid
|
||||
// 2/(1 + exp(-arange(0,8,0.5)+offset)) -1
|
||||
const int npc = 64;
|
||||
float precom[npc+1];
|
||||
for(l=0; l<=npc; l++) precom[l]= 1.f/(1.f + exp(-l/8.f + offset));
|
||||
for(l=1; l<=npc; l++) precom[l] = (precom[l]-precom[0]) / (1 - precom[0]); // renorm between 0 and 1
|
||||
precom[0] = 0;
|
||||
const float maxindex = npc - 0.001;
|
||||
|
||||
#define NSUB 32
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(l=0; l<NSUB; l++) {
|
||||
int start = (l*npix)/NSUB;
|
||||
int end = (l+1)*npix/NSUB;
|
||||
int npixsub = end-start;
|
||||
float* p = img->pixels + start;
|
||||
int i;
|
||||
for(i=0; i<npixsub; i++) {
|
||||
float v = 8*(coef*(*p));
|
||||
if(v>maxindex) v=maxindex;
|
||||
int n = int(v);
|
||||
float w = v-n;
|
||||
*p++ = (1-w)*precom[n] + w*precom[n+1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Compute a spatially smoothed version of the HOG.
|
||||
*/
|
||||
void smooth_hog_gaussian( float_layers* hog, float smoothing, int n_thread ) {
|
||||
int l;
|
||||
const int npix = hog->tx*hog->ty;
|
||||
for(l=0; l<hog->tz; l++)
|
||||
_smooth_gaussian_alltype(hog->tx,hog->ty,hog->pixels+l*npix,smoothing,hog->pixels+l*npix, n_thread);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,111 @@
|
||||
/*
|
||||
Copyright (C) 2014 Jerome Revaud
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>
|
||||
*/
|
||||
#ifndef ___HOG_H___
|
||||
#define ___HOG_H___
|
||||
#include "array_types.h"
|
||||
|
||||
|
||||
/* * * * * * IMAGE SMOOTHING * * * * * * */
|
||||
|
||||
/* Smooth an image using a Gaussian filter.
|
||||
*/
|
||||
void _smooth_gaussian( UBYTE_image* img, float sigma, UBYTE_image* res, int n_thread );
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/* * * * * * GRADIENT COMPUTATIONS * * * * * * */
|
||||
|
||||
/* Compute the dx,dy gradient on the image based on a [-1,0,1] mask.
|
||||
method
|
||||
=0 : no prior smoothing
|
||||
=1 : sobel smoothing
|
||||
*/
|
||||
void _compute_grad_101( UBYTE_image* img, int method, float_layers* grad, int n_thread );
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/* * * * * * pixel-HOG COMPUTATIONS * * * * * * */
|
||||
|
||||
/* Compute the Histogram of oriented gradient for each pixel.
|
||||
Number of orientations is determined by hog->tz;
|
||||
method determines orientation bining:
|
||||
=0 : atan + linear interpolation
|
||||
=1 : fast cos projection
|
||||
*/
|
||||
void _compute_hog( float_layers* grad, int method, float_layers* hog, int n_thread );
|
||||
|
||||
/* Compute per-pixel HOG of 8 directions using a different pipeline.
|
||||
The method uses 4 oriented filters extremely simple ([-1,1])
|
||||
*/
|
||||
void _compute_hog_8_direct( UBYTE_image* image, float_layers* hog_out, int n_thread );
|
||||
|
||||
|
||||
/* Post-processing of the HOG: cross-orientation inhibition.
|
||||
for one pixel i and orientation o: hog[i,o] = max(0, hog[i,o] - coef*hog[i,:].mean())
|
||||
This is useful for HOGs computed from cosinus projection.
|
||||
*/
|
||||
void subtract_mean_ori( float_layers* hog, float coef, int n_thread );
|
||||
|
||||
|
||||
/* Pass the gradient image through a sigmoid
|
||||
lambda v: 2/(1 + exp(-coef*v + offset)) - 1
|
||||
*/
|
||||
void sigmoid_array( float_array* img, float coef, float offset, int n_thread );
|
||||
|
||||
|
||||
/* Compute a spatially smoothed version of the HOG.
|
||||
*/
|
||||
void smooth_hog_gaussian( float_layers* hog, float smoothing, int n_thread );
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
@ -0,0 +1,268 @@
|
||||
#include "image.h"
|
||||
#include "std.h"
|
||||
|
||||
|
||||
/********** Create/Delete **********/
|
||||
|
||||
/* allocate a new image of size width x height */
|
||||
image_t *image_new(int width, int height)
|
||||
{
|
||||
image_t *image = NEW(image_t);
|
||||
if(image == NULL)
|
||||
{
|
||||
err_printf( "Error: image_new() - not enough memory !\n");
|
||||
exit(1);
|
||||
}
|
||||
image->width = width;
|
||||
image->height = height;
|
||||
image->stride = ( (width+3) / 4 ) * 4;
|
||||
image->data = NEWA(float, image->stride*height*sizeof(float));
|
||||
if(image->data == NULL)
|
||||
{
|
||||
err_printf( "Error: image_new() - not enough memory !\n");
|
||||
exit(1);
|
||||
}
|
||||
return image;
|
||||
}
|
||||
|
||||
/* allocate a new image and copy the content from src */
|
||||
image_t *image_cpy(const image_t *src)
|
||||
{
|
||||
image_t *dst = image_new(src->width, src->height);
|
||||
memcpy(dst->data, src->data, src->stride*src->height*sizeof(float));
|
||||
return dst;
|
||||
}
|
||||
|
||||
/* set all pixels values to zeros */
|
||||
void image_erase(image_t *image)
|
||||
{
|
||||
memset(image->data, 0, image->stride*image->height*sizeof(float));
|
||||
}
|
||||
|
||||
|
||||
/* multiply an image by a scalar */
|
||||
void image_mul_scalar(image_t *image, float scalar)
|
||||
{
|
||||
int i;
|
||||
for( i=0 ; i<image->stride*image->height ; i++)
|
||||
image->data[i] *= scalar;
|
||||
}
|
||||
|
||||
/* free memory of an image */
|
||||
void image_delete(image_t *image)
|
||||
{
|
||||
if(image == NULL)
|
||||
{
|
||||
//err_printf( "Warning: Delete image --> Ignore action (image not allocated)\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
free(image->data);
|
||||
free(image);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* allocate a new color image of size width x height */
|
||||
color_image_t *color_image_new(int width, int height)
|
||||
{
|
||||
size_t stride_channel = width*height*sizeof(float);
|
||||
char *buffer = NEWA(char, sizeof(color_image_t) + 3*stride_channel);
|
||||
if(buffer == NULL)
|
||||
{
|
||||
err_printf( "Error: color_image_new() - not enough memory !\n");
|
||||
exit(1);
|
||||
}
|
||||
color_image_t *image = (color_image_t*) buffer;
|
||||
image->width = width;
|
||||
image->height = height;
|
||||
image->c1 = (float*) (buffer + sizeof(color_image_t));
|
||||
image->c2 = (float*) (buffer + sizeof(color_image_t) + stride_channel);
|
||||
image->c3 = (float*) (buffer + sizeof(color_image_t) + 2*stride_channel);
|
||||
return image;
|
||||
}
|
||||
|
||||
/* allocate a new color image and copy the content from src */
|
||||
color_image_t *color_image_cpy(const color_image_t *src)
|
||||
{
|
||||
color_image_t *dst = color_image_new(src->width, src->height);
|
||||
memcpy(dst->c1, src->c1, 3*src->width*src->height*sizeof(float));
|
||||
return dst;
|
||||
}
|
||||
|
||||
/* set all pixels values to zeros */
|
||||
void color_image_erase(color_image_t *image)
|
||||
{
|
||||
memset(image->c1, 0, 3*image->width*image->height*sizeof(float));
|
||||
}
|
||||
|
||||
/* free memory of a color image */
|
||||
void color_image_delete(color_image_t *image)
|
||||
{
|
||||
if(image)
|
||||
{
|
||||
free(image); // the image is allocated such that the data is stored just after the pointer
|
||||
}
|
||||
}
|
||||
|
||||
/* convert a color image to a gray-scale image */
|
||||
image_t* image_gray_from_color( color_image_t* img )
|
||||
{
|
||||
image_t* res = image_new(img->width, img->height);
|
||||
|
||||
int n=0;
|
||||
for(int j=0; j<img->height; j++)
|
||||
for(int i=0; i<img->width; i++,n++)
|
||||
res->data[i+j*res->stride] = (img->c1[n] + img->c2[n] + img->c3[n])/3;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
/* reallocate the memory of an image to fit the new width height */
|
||||
void resize_if_needed_newsize(image_t *im, int w, int h)
|
||||
{
|
||||
if(im->width != w || im->height != h)
|
||||
{
|
||||
im->width = w;
|
||||
im->height = h;
|
||||
im->stride = ((w+3)/4)*4;
|
||||
float *data = NEWA(float,im->stride*h*sizeof(float));
|
||||
if(data == NULL)
|
||||
{
|
||||
err_printf( "Error: resize_if_needed_newsize() - not enough memory !\n");
|
||||
exit(1);
|
||||
}
|
||||
free(im->data);
|
||||
im->data = data;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/************ Resizing *********/
|
||||
|
||||
/* resize an image to a new size (assumes a difference only in width) */
|
||||
void image_resize_horiz(image_t *dst, const image_t *src)
|
||||
{
|
||||
int i;
|
||||
float real_scale = ((float) src->width-1) / ((float) dst->width-1);
|
||||
for(i = 0; i < dst->height; i++)
|
||||
{
|
||||
int j;
|
||||
for(j = 0; j < dst->width; j++)
|
||||
{
|
||||
float dx;
|
||||
int x;
|
||||
x = floor((float) j * real_scale);
|
||||
dx = j * real_scale - x;
|
||||
if(x >= (src->width - 1))
|
||||
{
|
||||
dst->data[i * dst->stride + j] =
|
||||
src->data[i * src->stride + src->width - 1];
|
||||
}
|
||||
else
|
||||
{
|
||||
dst->data[i * dst->stride + j] =
|
||||
(1.0f - dx) * src->data[i * src->stride + x ] +
|
||||
( dx) * src->data[i * src->stride + x + 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* resize an image to a new size (assumes a difference only in height) */
|
||||
void image_resize_vert(image_t *dst, const image_t *src)
|
||||
{
|
||||
int i;
|
||||
float real_scale = ((float) src->height-1) / ((float) dst->height-1);
|
||||
for(i = 0; i < dst->width; i++)
|
||||
{
|
||||
int j;
|
||||
for(j = 0; j < dst->height; j++)
|
||||
{
|
||||
int y;
|
||||
float dy;
|
||||
y = floor((float) j * real_scale);
|
||||
dy = j * real_scale - y;
|
||||
if(y >= (src->height - 1))
|
||||
{
|
||||
dst->data[j * dst->stride + i] =
|
||||
src->data[i + (src->height - 1) * src->stride];
|
||||
}
|
||||
else
|
||||
{
|
||||
dst->data[j * dst->stride + i] =
|
||||
(1.0f - dy) * src->data[i + (y ) * src->stride] +
|
||||
( dy) * src->data[i + (y + 1) * src->stride];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* resize an image with bilinear interpolation to fit the new weidht, height ; reallocation is done if necessary */
|
||||
void image_resize_bilinear_newsize(image_t *dst, const image_t *src, int new_width, int new_height)
|
||||
{
|
||||
resize_if_needed_newsize(dst,new_width,new_height);
|
||||
if(new_width < new_height)
|
||||
{
|
||||
image_t *tmp = image_new(new_width,src->height);
|
||||
image_resize_horiz(tmp,src);
|
||||
image_resize_vert(dst,tmp);
|
||||
image_delete(tmp);
|
||||
}
|
||||
else
|
||||
{
|
||||
image_t *tmp = image_new(src->width,new_height);
|
||||
image_resize_vert(tmp,src);
|
||||
image_resize_horiz(dst,tmp);
|
||||
image_delete(tmp);
|
||||
}
|
||||
}
|
||||
|
||||
/* resize an image with bilinear interpolation */
|
||||
image_t *image_resize_bilinear_scale(const image_t *src, float scale) {
|
||||
const int new_width = int(0.5 + src->width * scale);
|
||||
const int new_height = int(0.5 + src->height * scale);
|
||||
|
||||
image_t *res = image_new(new_width,src->height);
|
||||
image_resize_bilinear_newsize(res, src, new_width, new_height);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
/* crop an image (in-place) */
|
||||
void image_crop(image_t* img, int width, int height)
|
||||
{
|
||||
assert(width<=img->width);
|
||||
img->width = width;
|
||||
assert(height<=img->height);
|
||||
img->height = height;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,103 @@
|
||||
#ifndef ___IMAGE_H___
|
||||
#define ___IMAGE_H___
|
||||
|
||||
/********** STRUCTURES *********/
|
||||
|
||||
/* structure for 1-channel image */
|
||||
typedef struct image_s
|
||||
{
|
||||
int width; /* Width of the image */
|
||||
int height; /* Height of the image */
|
||||
int stride; /* Width of the memory (width + paddind such that it is a multiple of 4) */
|
||||
float *data; /* Image data */
|
||||
} image_t;
|
||||
|
||||
/* structure for 3-channels image stored with one layer per color, it assumes that c2 = c1+width*height and c3 = c2+width*height. */
|
||||
typedef struct color_image_s
|
||||
{
|
||||
int width; /* Width of the image */
|
||||
int height; /* Height of the image */
|
||||
float *c1; /* Color 1 */
|
||||
float *c2; /* Color 2 */
|
||||
float *c3; /* Color 3 */
|
||||
} color_image_t;
|
||||
|
||||
|
||||
/********** Create/Delete **********/
|
||||
|
||||
/* allocate a new image of size width x height */
|
||||
image_t *image_new(int width, int height);
|
||||
|
||||
/* allocate a new image and copy the content from src */
|
||||
image_t *image_cpy(const image_t *src);
|
||||
|
||||
/* set all pixels values to zeros */
|
||||
void image_erase(image_t *image);
|
||||
|
||||
/* free memory of an image */
|
||||
void image_delete(image_t *image);
|
||||
|
||||
/* multiply an image by a scalar */
|
||||
void image_mul_scalar(image_t *image, float scalar);
|
||||
|
||||
/* allocate a new color image of size width x height */
|
||||
color_image_t *color_image_new(int width, int height);
|
||||
|
||||
/* allocate a new color image and copy the content from src */
|
||||
color_image_t *color_image_cpy(const color_image_t *src);
|
||||
|
||||
/* set all pixels values to zeros */
|
||||
void color_image_erase(color_image_t *image);
|
||||
|
||||
/* free memory of a color image */
|
||||
void color_image_delete(color_image_t *image);
|
||||
|
||||
/* convert a color image to a gray-scale image */
|
||||
image_t* image_gray_from_color( color_image_t* img ) ;
|
||||
|
||||
/* reallocate the memory of an image to fit the new width height */
|
||||
void resize_if_needed_newsize(image_t *im, int w, int h);
|
||||
|
||||
|
||||
/************ Resizing *********/
|
||||
|
||||
/* resize an image with bilinear interpolation */
|
||||
image_t *image_resize_bilinear_scale(const image_t *src, float scale);
|
||||
|
||||
/* resize an image with bilinear interpolation to fit the new weidht, height ; reallocation is done if necessary */
|
||||
void image_resize_bilinear_newsize(image_t *dst, const image_t *src, int new_width, int new_height);
|
||||
|
||||
/* resize a color image with bilinear interpolation */
|
||||
color_image_t *color_image_resize_bilinear(const color_image_t *src, float scale);
|
||||
|
||||
/* crop an image (in-place) */
|
||||
void image_crop(image_t* img, int width, int height);
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,402 @@
|
||||
/*
|
||||
Copyright (C) 2014 Jerome Revaud
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>
|
||||
*/
|
||||
#include "std.h"
|
||||
#include <stdio.h>
|
||||
#include <jpeglib.h>
|
||||
#include <png.h>
|
||||
#include <setjmp.h>
|
||||
#include "io.h"
|
||||
|
||||
|
||||
void output_correspondences( const char* out_filename, const corres_t* corres, int nb, float fx, float fy )
|
||||
{
|
||||
assert(0<fx && fx<=2);
|
||||
assert(0<fy && fy<=2);
|
||||
FILE* f = out_filename ? fopen(out_filename,"w") : NULL;
|
||||
for(int i=0; i<nb; i++) {
|
||||
const corres_t* r = corres + i; // one row
|
||||
if (f)
|
||||
fprintf(f,"%g %g %g %g %g %g\n",fx*r->x0,fy*r->y0,fx*r->x1,fy*r->y1,r->maxima,r->score);
|
||||
else
|
||||
std_printf("%g %g %g %g %g %g\n",fx*r->x0,fy*r->y0,fx*r->x1,fy*r->y1,r->maxima,r->score);
|
||||
}
|
||||
if(out_filename)
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
/* IMAGE */
|
||||
|
||||
// PPM
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int magic;
|
||||
int width;
|
||||
int height;
|
||||
int pixmax;
|
||||
} ppm_hdr_t;
|
||||
|
||||
static void get_magic(FILE *fp, ppm_hdr_t *ppm_hdr)
|
||||
{
|
||||
char str[1024];
|
||||
fgets(str, 1024, fp);
|
||||
if(str[0] == 'P' && (str[1] <= '6' || str[1] >= '1'))
|
||||
{
|
||||
ppm_hdr->magic = str[1] - '0';
|
||||
}
|
||||
}
|
||||
|
||||
static int skip_comment(FILE *fp)
|
||||
{
|
||||
char c;
|
||||
do
|
||||
{
|
||||
c = (char) fgetc(fp);
|
||||
}
|
||||
while (c == ' ' || c == '\t' || c == '\n');
|
||||
if(c == '#')
|
||||
{
|
||||
do
|
||||
{
|
||||
c = (char) fgetc(fp);
|
||||
|
||||
} while(c != 0x0A);
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
ungetc(c, fp);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
static void skip_comments(FILE *fp)
|
||||
{
|
||||
while(skip_comment(fp));
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
static int get_image_size(FILE *fp, ppm_hdr_t *ppm_hdr)
|
||||
{
|
||||
skip_comments(fp);
|
||||
if(fscanf(fp, "%d %d", &ppm_hdr->width, &ppm_hdr->height) != 2)
|
||||
{
|
||||
err_printf( "Warning: PGM --> File currupted\n");
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
static int get_pixmax(FILE *fp, ppm_hdr_t *ppm_hdr)
|
||||
{
|
||||
skip_comments(fp);
|
||||
ppm_hdr->pixmax = 1;
|
||||
if(ppm_hdr->magic == 2 || ppm_hdr->magic == 3 || ppm_hdr->magic == 5 || ppm_hdr->magic == 6)
|
||||
{
|
||||
if(fscanf(fp, "%d", &ppm_hdr->pixmax) != 1)
|
||||
{
|
||||
err_printf( "Warning: PGM --> pixmax not valid\n");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
fgetc(fp);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
static int get_ppm_hdr(FILE *fp, ppm_hdr_t *ppm_hdr)
|
||||
{
|
||||
get_magic(fp, ppm_hdr);
|
||||
if(!get_image_size(fp, ppm_hdr))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
if(!get_pixmax(fp, ppm_hdr))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void raw_read_color(FILE *fp, color_image_t *image)
|
||||
{
|
||||
int i, size=image->height*image->width;
|
||||
for(i=0;i<size;i++)
|
||||
{
|
||||
image->c1[i]=(float) fgetc(fp);
|
||||
image->c2[i]=(float) fgetc(fp);
|
||||
image->c3[i]=(float) fgetc(fp);
|
||||
}
|
||||
}
|
||||
|
||||
color_image_t *color_image_pnm_load(FILE *fp)
|
||||
{
|
||||
color_image_t *image = NULL;
|
||||
ppm_hdr_t ppm_hdr;
|
||||
if(!get_ppm_hdr(fp, &ppm_hdr))
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
switch(ppm_hdr.magic)
|
||||
{
|
||||
case 1: /* PBM ASCII */
|
||||
case 2: /* PGM ASCII */
|
||||
case 3: /* PPM ASCII */
|
||||
case 4: /* PBM RAW */
|
||||
case 5: /* PGM RAW */
|
||||
err_printf( "color_image_pnm_load: only PPM raw with maxval 255 supported\n");
|
||||
break;
|
||||
case 6: /* PPM RAW */
|
||||
image = color_image_new(ppm_hdr.width, ppm_hdr.height);
|
||||
raw_read_color(fp, image);
|
||||
break;
|
||||
}
|
||||
return image;
|
||||
}
|
||||
|
||||
// JPG
|
||||
|
||||
color_image_t *color_image_jpeg_load(FILE *fp)
|
||||
{
|
||||
struct jpeg_decompress_struct cinfo;
|
||||
struct jpeg_error_mgr jerr;
|
||||
JSAMPARRAY buffer;
|
||||
int row_stride;
|
||||
int index = 0;
|
||||
color_image_t *image = NULL;
|
||||
float *r_p, *g_p, *b_p;
|
||||
JSAMPROW buffer_p;
|
||||
cinfo.err = jpeg_std_error(&jerr);
|
||||
jpeg_create_decompress(&cinfo);
|
||||
jpeg_stdio_src(&cinfo, fp);
|
||||
jpeg_read_header(&cinfo, TRUE);
|
||||
cinfo.out_color_space = JCS_RGB;
|
||||
cinfo.quantize_colors = FALSE;
|
||||
image = color_image_new(cinfo.image_width, cinfo.image_height);
|
||||
if(image == NULL)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
jpeg_start_decompress(&cinfo);
|
||||
row_stride = cinfo.output_width * cinfo.output_components;
|
||||
buffer = (*cinfo.mem->alloc_sarray)
|
||||
((j_common_ptr) &cinfo, JPOOL_IMAGE, row_stride, 1);
|
||||
|
||||
r_p = image->c1;
|
||||
g_p = image->c2;
|
||||
b_p = image->c3;
|
||||
|
||||
while (cinfo.output_scanline < cinfo.output_height)
|
||||
{
|
||||
jpeg_read_scanlines(&cinfo, buffer, 1);
|
||||
buffer_p = buffer[0];
|
||||
index = cinfo.output_width;
|
||||
while(index--)
|
||||
{
|
||||
*r_p++ = (float) *buffer_p++;
|
||||
*g_p++ = (float) *buffer_p++;
|
||||
*b_p++ = (float) *buffer_p++;
|
||||
}
|
||||
}
|
||||
jpeg_finish_decompress(&cinfo);
|
||||
jpeg_destroy_decompress(&cinfo);
|
||||
return image;
|
||||
}
|
||||
|
||||
color_image_t * color_image_png_load( FILE* fp, const char* file_name )
|
||||
{
|
||||
// read the header
|
||||
png_byte header[8];
|
||||
fread(header, 1, 8, fp);
|
||||
|
||||
if (png_sig_cmp(header, 0, 8))
|
||||
{
|
||||
err_printf( "error: %s is not a PNG.\n", file_name);
|
||||
fclose(fp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
png_structp png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
|
||||
if (!png_ptr)
|
||||
{
|
||||
err_printf( "error: png_create_read_struct returned 0.\n");
|
||||
fclose(fp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// create png info struct
|
||||
png_infop info_ptr = png_create_info_struct(png_ptr);
|
||||
if (!info_ptr)
|
||||
{
|
||||
err_printf( "error: png_create_info_struct returned 0.\n");
|
||||
png_destroy_read_struct(&png_ptr, (png_infopp)NULL, (png_infopp)NULL);
|
||||
fclose(fp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// create png info struct
|
||||
png_infop end_info = png_create_info_struct(png_ptr);
|
||||
if (!end_info)
|
||||
{
|
||||
err_printf( "error: png_create_info_struct returned 0.\n");
|
||||
png_destroy_read_struct(&png_ptr, &info_ptr, (png_infopp) NULL);
|
||||
fclose(fp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// the code in this if statement gets called if libpng encounters an error
|
||||
if (setjmp(png_jmpbuf(png_ptr))) {
|
||||
err_printf( "error from libpng\n");
|
||||
png_destroy_read_struct(&png_ptr, &info_ptr, &end_info);
|
||||
fclose(fp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// init png reading
|
||||
png_init_io(png_ptr, fp);
|
||||
|
||||
// let libpng know you already read the first 8 bytes
|
||||
png_set_sig_bytes(png_ptr, 8);
|
||||
|
||||
// read all the info up to the image data
|
||||
png_read_info(png_ptr, info_ptr);
|
||||
|
||||
// variables to pass to get info
|
||||
int bit_depth, color_type;
|
||||
png_uint_32 temp_width, temp_height;
|
||||
|
||||
// get info about png
|
||||
png_get_IHDR(png_ptr, info_ptr, &temp_width, &temp_height, &bit_depth, &color_type,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
// Update the png info struct.
|
||||
png_read_update_info(png_ptr, info_ptr);
|
||||
|
||||
// Row size in bytes.
|
||||
int rowbytes = png_get_rowbytes(png_ptr, info_ptr);
|
||||
|
||||
// Allocate the image_data as a big block, to be given to opengl
|
||||
png_byte * image_data;
|
||||
image_data = NEWA(png_byte, rowbytes * temp_height);
|
||||
assert(image_data!=NULL);
|
||||
|
||||
// row_pointers is for pointing to image_data for reading the png with libpng
|
||||
png_bytep * row_pointers = NEWA(png_bytep, temp_height);
|
||||
assert(row_pointers!=NULL);
|
||||
|
||||
// set the individual row_pointers to point at the correct offsets of image_data
|
||||
unsigned int i;
|
||||
for (i = 0; i <temp_height; i++)
|
||||
row_pointers[i] = image_data + i * rowbytes;
|
||||
|
||||
// read the png into image_data through row_pointers
|
||||
png_read_image(png_ptr, row_pointers);
|
||||
|
||||
// copy into color image
|
||||
color_image_t* image = color_image_new(temp_width,temp_height);
|
||||
if( color_type==0 ) {
|
||||
assert((unsigned)rowbytes == temp_width || !"error: not a proper gray png image");
|
||||
for(i=0; i<temp_width*temp_height; i++)
|
||||
image->c1[i] = image->c2[i] = image->c3[i] = image_data[i];
|
||||
|
||||
}
|
||||
else if( color_type == 2 ) {
|
||||
assert((unsigned)rowbytes == 3*temp_width || !"error: not a proper color png image");
|
||||
for(i=0; i<temp_width*temp_height; i++) {
|
||||
image->c1[i] = image_data[3*i+0];
|
||||
image->c2[i] = image_data[3*i+1];
|
||||
image->c3[i] = image_data[3*i+2];
|
||||
}
|
||||
} else
|
||||
assert(!"error: unknown PNG color type" );
|
||||
|
||||
// clean up
|
||||
png_destroy_read_struct(&png_ptr, &info_ptr, &end_info);
|
||||
free(row_pointers);
|
||||
free(image_data);
|
||||
|
||||
return image;
|
||||
}
|
||||
|
||||
// GENERAL LOAD
|
||||
|
||||
/* load a color image from a file */
|
||||
color_image_t *color_image_load(const char *fname)
|
||||
{
|
||||
FILE *fp;
|
||||
char magic[2];
|
||||
unsigned short *magic_short = (unsigned short *) magic;
|
||||
color_image_t *image = NULL;
|
||||
if((fp = fopen(fname, "rb")) == NULL)
|
||||
{
|
||||
err_printf( "Warning: color_image_load() - can not open file `%s' !\n", fname);
|
||||
return NULL;
|
||||
}
|
||||
fread(magic, sizeof(char), 2, fp);
|
||||
rewind(fp);
|
||||
if(magic_short[0] == 0xd8ff)
|
||||
{
|
||||
image = color_image_jpeg_load(fp);
|
||||
}
|
||||
else if(magic[0]=='P' && (magic[1]=='6' || magic[1]=='5'))
|
||||
{ /* PPM raw */
|
||||
image = color_image_pnm_load(fp);
|
||||
}
|
||||
else if( magic[0]==-119 && magic[1]=='P' )
|
||||
{
|
||||
image = color_image_png_load( fp, fname );
|
||||
}
|
||||
else
|
||||
{
|
||||
err_printf( "Warning: color_image_load(%s) - image format not recognized\n",fname);
|
||||
}
|
||||
fclose(fp);
|
||||
return image;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,30 @@
|
||||
/*
|
||||
Copyright (C) 2014 Jerome Revaud
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>
|
||||
*/
|
||||
#ifndef ___IO_H___
|
||||
#define ___IO_H___
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "image.h"
|
||||
#include "deep_matching.h"
|
||||
|
||||
// output correspondences to a file or on the stdout
|
||||
void output_correspondences( const char* out_filename, const corres_t* corres, int nb, float fx, float fy );
|
||||
|
||||
/* load a color image from a file */
|
||||
color_image_t *color_image_load(const char *fname);
|
||||
|
||||
#endif
|
After Width: | Height: | Size: 3.9 KiB |
After Width: | Height: | Size: 3.7 KiB |
@ -0,0 +1,327 @@
|
||||
/*
|
||||
Copyright (C) 2014 Jerome Revaud
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>
|
||||
*/
|
||||
#include "std.h"
|
||||
#include "image.h"
|
||||
#include "io.h"
|
||||
#include "deep_matching.h"
|
||||
#include "main.h"
|
||||
#include <thread>
|
||||
|
||||
void usage(const int language)
|
||||
{
|
||||
#define p(msg) std_printf(msg "\n");
|
||||
p("usage:");
|
||||
switch(language){
|
||||
case EXE_OPTIONS:
|
||||
p("./deepmatching image1 image2 [options]");
|
||||
p("Compute the 'DeepMatching' between two images and print a list of")
|
||||
p("pair-wise point correspondences:")
|
||||
p(" x1 y1 x2 y2 score index ...")
|
||||
p("(index refers to the local maximum from which the match was retrieved)")
|
||||
p("Images must be in PPM, PNG or JPG format. Version 1.2.2")
|
||||
break;
|
||||
case MATLAB_OPTIONS:
|
||||
p("matches = deepmatching(image1, image2 [, options])")
|
||||
p("Compute the 'DeepMatching' between two images.")
|
||||
p("Images must be HxWx3 single matrices.")
|
||||
p("Options is an optional string argument ('' by default).")
|
||||
p("The function returns a matrix with 6 columns, each row being x1 y1 x2 y2 score index.")
|
||||
p("(index refers to the local maximum from which the match was retrieved)")
|
||||
p("Version 1.2.2")
|
||||
break;
|
||||
case PYTHON_OPTIONS:
|
||||
p("matches = deepmatching.deepmatching(image1, image2, options='')")
|
||||
p("Compute the 'DeepMatching' between two images.")
|
||||
p("Images must be HxWx3 numpy arrays (converted to float32).")
|
||||
p("Options is an optional string argument ('' by default).")
|
||||
p("The function returns a numpy array with 6 columns, each row being x1 y1 x2 y2 score index.")
|
||||
p("(index refers to the local maximum from which the match was retrieved)")
|
||||
p("Version 1.2.2")
|
||||
break;
|
||||
}
|
||||
p("")
|
||||
p("Options:")
|
||||
p(" -h, --help print this message")
|
||||
//p(" HOG parameters (low-level pixel descriptor):")
|
||||
//p(" -png_settings (auto) recommended for uncompressed images")
|
||||
//p(" -jpg_settings (auto) recommended for compressed images")
|
||||
//p(" in more details: (for fine-tuning)")
|
||||
//p(" -hog.presm <f=1.0> prior image smoothing")
|
||||
//p(" -hog.midsm <f=1.0> intermediate HOG smoothing")
|
||||
//p(" -hog.sig <f=0.2> sigmoid strength")
|
||||
//p(" -hog.postsm <f=1.0> final HOG-smoothing")
|
||||
//p(" -hog.ninth <f=0.3> robustness to pixel noise (eg. JPEG artifacts)")
|
||||
p("")
|
||||
p(" Matching parameters:")
|
||||
//p(" -iccv_settings settings used for the ICCV paper")
|
||||
//p(" -improved_settings (default) supposedly improved settings")
|
||||
//p(" in more details: (for fine-tuning)")
|
||||
p(" -downscale/-R <n=1> downsize the input images by a factor 2^n")
|
||||
//p(" -overlap <n=999> use overlapping patches in image1 from level n")
|
||||
//p(" -subref <n=0> 0: denser sampling or 1: not of image1 patches")
|
||||
p(" -ngh_rad <n=0> if n>0: restrict matching to n pxl neighborhood")
|
||||
p(" -nlpow <f=1.4> non-linear rectification x := x^f")
|
||||
//p(" -maxima_mode <n=0> 0: from all top cells / 1: from local maxima")
|
||||
//p(" -min_level <n=2> skip maxima in levels [0, 1, ..., n-1]")
|
||||
p(" -mem <n=1> if n>0: optimize memory footprint (bit unstable)")
|
||||
//p(" -scoring_mode <n=1> type of correspondence scoring mode (0/1)")
|
||||
p("")
|
||||
p(" Fully scale & rotation invariant DeepMatching:")
|
||||
p(" if either one of these options is used, then this mode is activated:")
|
||||
p(" -max_scale <factor=5> max scaling factor")
|
||||
p(" -rot_range <from=0> <to=360> rotation range")
|
||||
p("")
|
||||
p(" Other parameters:")
|
||||
p(" -resize <width> <height> to resize input images beforehand")
|
||||
p(" -v increase verbosity")
|
||||
p(" -nt <n> multi-threading with <n> threads")
|
||||
if(language==EXE_OPTIONS) {
|
||||
p(" -out <file_name> output correspondences in a file")
|
||||
exit(1);}
|
||||
}
|
||||
|
||||
bool endswith(const char *str, const char *suffix)
|
||||
{
|
||||
if(!str || !suffix) return false;
|
||||
size_t lenstr = strlen(str);
|
||||
size_t lensuffix = strlen(suffix);
|
||||
if(lensuffix > lenstr) return false;
|
||||
return strncmp(str + lenstr - lensuffix, suffix, lensuffix) == 0;
|
||||
}
|
||||
|
||||
image_t* rescale_image( image_t* im, int width, int height )
|
||||
{
|
||||
image_t* res = image_new(width,height);
|
||||
image_resize_bilinear_newsize(res, im, width, height);
|
||||
image_delete(im);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
const char *parse_options(dm_params_t *params, scalerot_params_t *sr_params, bool *use_scalerot, float *fx, float *fy, const int argc, const char **argv, const int language, image_t **im1, image_t **im2) {
|
||||
int current_arg = 0;
|
||||
const char* out_filename = NULL;
|
||||
|
||||
// parse options
|
||||
while(current_arg < argc)
|
||||
{
|
||||
const char* a = argv[current_arg++];
|
||||
#define isarg(key) !strcmp(a,key)
|
||||
|
||||
if(isarg("-h") || isarg("--help") ) usage(language);
|
||||
// HOG and patch parameters
|
||||
//else if(isarg("-hog.presm"))
|
||||
// params->desc_params.presmooth_sigma = atof(argv[current_arg++]);
|
||||
//else if(isarg("-hog.sig"))
|
||||
// params->desc_params.hog_sigmoid = atof(argv[current_arg++]);
|
||||
//else if(isarg("-hog.midsm"))
|
||||
// params->desc_params.mid_smoothing = atof(argv[current_arg++]);
|
||||
//else if(isarg("-hog.postsm"))
|
||||
// params->desc_params.post_smoothing = atof(argv[current_arg++]);
|
||||
//else if(isarg("-hog.ninth"))
|
||||
// params->desc_params.ninth_dim = atof(argv[current_arg++]);
|
||||
//else if(isarg("-hog.nrmpix"))
|
||||
// params->desc_params.norm_pixels = atof(argv[current_arg++]);
|
||||
else if(isarg("-png_settings")) {
|
||||
params->desc_params.presmooth_sigma = 0; // no image smoothing since the image is uncompressed
|
||||
params->desc_params.hog_sigmoid = 0.2;
|
||||
params->desc_params.mid_smoothing = 1.5;
|
||||
params->desc_params.post_smoothing = 1;
|
||||
params->desc_params.ninth_dim = 0.1; } // low ninth_dim since image PSNR is high
|
||||
else if(isarg("-jpg_settings")) {
|
||||
params->desc_params.presmooth_sigma = 1; // smooth the image to remove jpg artifacts
|
||||
params->desc_params.hog_sigmoid = 0.2;
|
||||
params->desc_params.mid_smoothing = 1.5;
|
||||
params->desc_params.post_smoothing = 1;
|
||||
params->desc_params.ninth_dim = 0.3; } // higher ninth_dim because of pixel noise
|
||||
// matching parameters
|
||||
else if(isarg("-R") || isarg("-downscale"))
|
||||
params->prior_img_downscale = atoi(argv[current_arg++]);
|
||||
//else if(isarg("-overlap"))
|
||||
// params->overlap = atoi(argv[current_arg++]);
|
||||
//else if(isarg("-subref"))
|
||||
// params->subsample_ref = atoi(argv[current_arg++]);
|
||||
else if(isarg("-nlpow"))
|
||||
params->nlpow = atof(argv[current_arg++]);
|
||||
else if(isarg("-ngh_rad"))
|
||||
params->ngh_rad = atoi(argv[current_arg++]);
|
||||
// maxima parameters
|
||||
//else if(isarg("-maxima_mode"))
|
||||
// params->maxima_mode = atoi(argv[current_arg++]);
|
||||
else if(isarg("-mem")) {
|
||||
params->low_mem = atoi(argv[current_arg++]); }
|
||||
//else if(isarg("-min_level"))
|
||||
// params->min_level = atoi(argv[current_arg++]);
|
||||
//else if(isarg("-scoring_mode"))
|
||||
// params->scoring_mode = atoi(argv[current_arg++]);
|
||||
//else if(isarg("-iccv_settings")) {
|
||||
// params->prior_img_downscale = 2;
|
||||
// params->overlap = 0; // overlap from level 0
|
||||
// params->subsample_ref = 1;
|
||||
// params->nlpow = 1.6;
|
||||
// params->maxima_mode = 1;
|
||||
// params->low_mem = 0;
|
||||
// params->min_level = 2;
|
||||
// params->scoring_mode = 0; }
|
||||
//else if(isarg("-improved_settings")) {
|
||||
// params->prior_img_downscale = 1; // less down-scale
|
||||
// params->overlap = 999; // no overlap
|
||||
// params->subsample_ref = 0; // dense patch sampling at every level in first image
|
||||
// params->nlpow = 1.4;
|
||||
// params->maxima_mode = 0;
|
||||
// params->low_mem = 1;
|
||||
// params->min_level = 2;
|
||||
// params->scoring_mode = 1; } // improved scoring
|
||||
//else if(isarg("-max_psize")) {
|
||||
// params->max_psize = atoi(argv[current_arg++]); }
|
||||
// scale & rot invariant version
|
||||
else if(isarg("-scale") || isarg("-max_scale")) {
|
||||
*use_scalerot = true;
|
||||
float scale = atof(argv[current_arg++]);
|
||||
sr_params->max_sc0 = sr_params->max_sc1 = int(1 + 2*log2(scale)); }
|
||||
else if(isarg("-rot") || isarg("-rot_range")) {
|
||||
*use_scalerot = true;
|
||||
int min_rot = atoi(argv[current_arg++]);
|
||||
int max_rot = atoi(argv[current_arg++]);
|
||||
while( min_rot < 0 ) {
|
||||
min_rot += 360;
|
||||
max_rot += 360;
|
||||
}
|
||||
sr_params->min_rot = int(floor(0.5 + min_rot/45.));
|
||||
sr_params->max_rot = int(floor(1.5 + max_rot/45.));
|
||||
while( sr_params->max_rot - sr_params->min_rot > 8 )
|
||||
sr_params->max_rot--;
|
||||
assert( sr_params->min_rot < sr_params->max_rot ); }
|
||||
// other parameters
|
||||
else if(isarg("-resize")) {
|
||||
assert((*im1)->width==(*im2)->width && (*im1)->height==(*im2)->height);
|
||||
int width = atoi(argv[current_arg++]);
|
||||
int height = atoi(argv[current_arg++]);
|
||||
*fx *= (*im1)->width / float(width);
|
||||
*fy *= (*im1)->height / float(height);
|
||||
*im1 = rescale_image(*im1, width, height);
|
||||
*im2 = rescale_image(*im2, width, height); }
|
||||
else if(isarg("-v"))
|
||||
params->verbose++;
|
||||
else if(isarg("-nt")) {
|
||||
params->n_thread = atoi(argv[current_arg++]);
|
||||
if (params->n_thread==0)
|
||||
params->n_thread = std::thread::hardware_concurrency(); }
|
||||
else if(language == EXE_OPTIONS && isarg("-out"))
|
||||
out_filename = argv[current_arg++];
|
||||
else {
|
||||
err_printf("error: unexpected parameter '%s'", a);
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
if( *use_scalerot )
|
||||
assert( params->ngh_rad == 0 || !"max trans cannot be used in full scale and rotation mode");
|
||||
else
|
||||
if( params->subsample_ref && (!ispowerof2((*im1)->width) || !ispowerof2((*im1)->height)) ) {
|
||||
err_printf("WARNING: first image has dimension which are not power-of-2\n");
|
||||
err_printf("For improved results, you should consider resizing the images with '-resize <w> <h>'\n");
|
||||
}
|
||||
|
||||
return out_filename;
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, const char ** argv)
|
||||
{
|
||||
if( argc<=2 || !strcmp(argv[1],"-h") || !strcmp(argv[1],"--help") ) usage(EXE_OPTIONS);
|
||||
|
||||
int current_arg = 3;
|
||||
image_t *im1=NULL, *im2=NULL;
|
||||
{
|
||||
color_image_t *cim1 = color_image_load(argv[1]);
|
||||
color_image_t *cim2 = color_image_load(argv[2]);
|
||||
|
||||
// Following deactivated because quite useless/dangerous in practice
|
||||
// default behavior == always using -jpg_settings
|
||||
|
||||
//if( endswith(argv[1],"png") || endswith(argv[1],"PNG") )
|
||||
// argv[--current_arg] = "-png_settings"; // set default
|
||||
//if( endswith(argv[1],"ppm") || endswith(argv[1],"PPM") )
|
||||
// argv[--current_arg] = "-png_settings"; // set default
|
||||
//if( endswith(argv[1],"jpg") || endswith(argv[1],"JPG") )
|
||||
// argv[--current_arg] = "-jpg_settings"; // set default
|
||||
//if( endswith(argv[1],"jpeg") || endswith(argv[1],"JPEG") )
|
||||
// argv[--current_arg] = "-jpg_settings"; // set default
|
||||
|
||||
im1 = image_gray_from_color(cim1);
|
||||
im2 = image_gray_from_color(cim2);
|
||||
color_image_delete(cim1);
|
||||
color_image_delete(cim2);
|
||||
}
|
||||
|
||||
// set params to default
|
||||
dm_params_t params;
|
||||
set_default_dm_params(¶ms);
|
||||
scalerot_params_t sr_params;
|
||||
set_default_scalerot_params(&sr_params);
|
||||
bool use_scalerot = false;
|
||||
float fx=1, fy=1;
|
||||
|
||||
// parse options
|
||||
const char* out_filename = parse_options(¶ms, &sr_params, &use_scalerot, &fx, &fy, argc-current_arg,
|
||||
&argv[current_arg], EXE_OPTIONS, &im1, &im2);
|
||||
|
||||
// compute deep matching
|
||||
float_image* corres = use_scalerot ?
|
||||
deep_matching_scale_rot( im1, im2, ¶ms, &sr_params ) :
|
||||
deep_matching ( im1, im2, ¶ms, NULL ); // standard call
|
||||
|
||||
// save result
|
||||
output_correspondences( out_filename, (corres_t*)corres->pixels, corres->ty, fx, fy );
|
||||
|
||||
free_image(corres);
|
||||
image_delete(im1);
|
||||
image_delete(im2);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,30 @@
|
||||
/*
|
||||
Copyright (C) 2014 Jerome Revaud
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>
|
||||
*/
|
||||
#ifndef ___MAIN_H___
|
||||
#define ___MAIN_H___
|
||||
|
||||
#define EXE_OPTIONS 0
|
||||
#define MATLAB_OPTIONS 1
|
||||
#define PYTHON_OPTIONS 2
|
||||
|
||||
#include "deep_matching.h"
|
||||
|
||||
void usage(const int language);
|
||||
|
||||
const char* parse_options(dm_params_t *params, scalerot_params_t *sr_params, bool *use_scalerot, float *fx, float *fy, const int argc, const char **argv, const int language, image_t **im1, image_t **im2);
|
||||
|
||||
#endif
|
@ -0,0 +1,994 @@
|
||||
/*
|
||||
Copyright (C) 2014 Jerome Revaud
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>
|
||||
*/
|
||||
#include "std.h"
|
||||
#include "maxfilter.h"
|
||||
#include "omp.h"
|
||||
|
||||
void _max_filter_3_horiz( float_image* img, float_image* res, int n_thread ) {
|
||||
ASSERT_SAME_SIZE(img,res);
|
||||
int j;
|
||||
const int tx = img->tx;
|
||||
const int ty = img->ty;
|
||||
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(j=0; j<ty; j++) {
|
||||
int i;
|
||||
float *p = img->pixels + j*tx;
|
||||
float *r = res->pixels + j*tx;
|
||||
|
||||
float m = MAX(p[0],p[1]);
|
||||
*r++ = m;
|
||||
|
||||
for(i=1; i<tx-1; i++) {
|
||||
float m2 = MAX(p[i],p[i+1]);
|
||||
*r++ = MAX(m,m2);
|
||||
m=m2;
|
||||
}
|
||||
|
||||
*r++ = m;
|
||||
}
|
||||
}
|
||||
|
||||
void _max_filter_3_vert( float_image* img, float_image* res ) {
|
||||
ASSERT_SAME_SIZE(img,res);
|
||||
const int tx = img->tx;
|
||||
const int ty = img->ty;
|
||||
int j;
|
||||
|
||||
for(j=0; j<ty-1; j++) {
|
||||
int i;
|
||||
float *p = img->pixels + j*tx;
|
||||
float *r = res->pixels + j*tx;
|
||||
|
||||
for(i=0; i<tx; i++) {
|
||||
*r++ = MAX(p[i],p[i+tx]);
|
||||
}
|
||||
}
|
||||
memcpy(res->pixels+(ty-1)*tx,res->pixels+(ty-2)*tx,tx*sizeof(float)); // copy last row
|
||||
|
||||
for(j=ty-2; j>0; j--) {
|
||||
int i;
|
||||
float *p = res->pixels + (j-1)*tx;
|
||||
float *r = res->pixels + j*tx;
|
||||
|
||||
for(i=0; i<tx; i++) {
|
||||
float r0 = *r;
|
||||
*r++ = MAX(r0,p[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void _max_filter_3( float_image* img, float_image* res, int n_thread ) {
|
||||
_max_filter_3_vert(img,res);
|
||||
_max_filter_3_horiz(res,res, res->ty>128? n_thread : 1);
|
||||
}
|
||||
|
||||
void _max_filter_3_layers( float_layers* img, float_layers* res, int n_thread ) {
|
||||
ASSERT_SAME_LAYERS_SIZE(img,res);
|
||||
const long npix = img->tx*img->ty;
|
||||
|
||||
int l;
|
||||
#if defined(USE_OPENMP)
|
||||
omp_set_nested(0);
|
||||
omp_set_dynamic(0);
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(l=0; l<img->tz; l++) {
|
||||
float_image img2 = {img->pixels + l*npix,img->tx,img->ty};
|
||||
float_image res2 = {res->pixels + l*npix,res->tx,res->ty};
|
||||
_max_filter_3( &img2, &res2, n_thread );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Subsample an array, equivalent to res = img[:,::2,::2]
|
||||
*/
|
||||
void _subsample2( float_layers* img, float_layers* res, int n_thread ) {
|
||||
const int n_layers = res->tz;
|
||||
assert( img->tz==n_layers );
|
||||
const int tx = res->tx;
|
||||
const int ty = res->ty;
|
||||
assert( (img->tx+1)/2 == tx );
|
||||
assert( (img->ty+1)/2 == ty );
|
||||
|
||||
long l;
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(l=0; l<n_layers; l++) {
|
||||
int x,y;
|
||||
for(y=0; y<ty; y++) {
|
||||
float* i = img->pixels + (l*img->ty + (2*y))*img->tx ;
|
||||
float* r = res->pixels + (l*ty + y)*tx;
|
||||
for(x=0; x<tx; x++)
|
||||
r[x] = i[x<<1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* joint max-pooling and subsampling
|
||||
*/
|
||||
void _max_filter_3_and_subsample_layers( float_layers* img, float_layers* res, int n_thread ) {
|
||||
const int n_layers = res->tz;
|
||||
assert( img->tz==n_layers );
|
||||
const int tx = res->tx;
|
||||
const int ty = res->ty;
|
||||
assert( tx>=2 && ty>=2 );
|
||||
const int tx2 = img->tx;
|
||||
const int ty2 = img->ty;
|
||||
assert( (tx2+1)/2 == tx ); // tx2=3 => tx=2
|
||||
assert( (ty2+1)/2 == ty );
|
||||
|
||||
long l;
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(l=0; l<n_layers; l++) {
|
||||
// reset output
|
||||
memset(res->pixels + l*tx*ty, 0, tx*ty*sizeof(float));
|
||||
|
||||
int x,y;
|
||||
for(y=0; y<ty; y++) {
|
||||
float* i = img->pixels + (l*ty2 + (2*y))*tx2 ;
|
||||
float* r = res->pixels + (l*ty + y)*tx;
|
||||
float* r2 = (y+1<ty) ? r + tx : r; // pointer to next row
|
||||
|
||||
#define maxEq(v,m) v = (m>v) ? m : v
|
||||
|
||||
// even rows of img
|
||||
for(x=0; x<tx-1; x++) {
|
||||
maxEq( r[x+0], *i ); // i[2*x+0]
|
||||
i++;
|
||||
maxEq( r[x+0], *i ); // i[2*x+1]
|
||||
maxEq( r[x+1], *i ); // i[2*x+1]
|
||||
i++;
|
||||
}
|
||||
// r[x+1] does NOT exist anymore
|
||||
maxEq( r[x+0], *i ); // i[2*x+0]
|
||||
i++;
|
||||
if(x<tx2/2) { // i[2*x+i] exists
|
||||
maxEq( r[x+0], *i ); // i[2*x+1]
|
||||
i++;
|
||||
}
|
||||
assert((i-img->pixels)%tx2 == 0);
|
||||
|
||||
// odd rows of img
|
||||
if (y<ty2/2) {
|
||||
for(x=0; x<tx-1; x++) {
|
||||
maxEq( r [x+0], *i ); // i[2*x+0]
|
||||
maxEq( r2[x+0], *i ); // i[2*x+0]
|
||||
i++;
|
||||
|
||||
maxEq( r [x+0], *i ); // i[2*x+1]
|
||||
maxEq( r [x+1], *i ); // i[2*x+1]
|
||||
maxEq( r2[x+0], *i ); // i[2*x+1]
|
||||
maxEq( r2[x+1], *i ); // i[2*x+1]
|
||||
i++;
|
||||
}
|
||||
// r[x+1] does NOT exist anymore
|
||||
maxEq( r [x+0], *i ); // i[2*x+0]
|
||||
maxEq( r2[x+0], *i ); // i[2*x+0]
|
||||
i++;
|
||||
if(x<tx2/2) { // i[2*x+i] exists
|
||||
maxEq( r [x+0], *i ); // i[2*x+1]
|
||||
maxEq( r2[x+0], *i ); // i[2*x+1]
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
assert((i-img->pixels)%tx2 == 0);
|
||||
|
||||
#undef maxEq
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Subsample an array, equivalent to res = trueimg[:,offset_y::2,offset_x::2]
|
||||
except at boundaries, where the rules are a bit more complex:
|
||||
if img->tx % 2 == 0:
|
||||
if offset_x % 2 == 0:
|
||||
trueimg[offset_x+img->tx-1] is also sampled
|
||||
else:
|
||||
trueimg[offset_x] is also sampled
|
||||
elif img->tx % 2 == 1:
|
||||
trueimg[offset_x] is also sampled
|
||||
|
||||
...and likewise for y dimension.
|
||||
*/
|
||||
void _subsample2_offset( float_layers* img, int_image* offsets, float_layers* res, int n_thread ) {
|
||||
const int n_layers = res->tz;
|
||||
assert( img->tz==n_layers );
|
||||
assert( offsets->tx==2 && offsets->ty==n_layers );
|
||||
const int tx = res->tx;
|
||||
const int ty = res->ty;
|
||||
assert( (img->tx+2)/2 == tx );
|
||||
assert( (img->ty+2)/2 == ty );
|
||||
|
||||
long l;
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(l=0; l<n_layers; l++) {
|
||||
int x,y;
|
||||
const int ox = (offsets->pixels[2*l]+0x10000) % 2;
|
||||
const int oy = (offsets->pixels[2*l+1]+0x10000) % 2;
|
||||
assert(ox>=0 && oy>=0);
|
||||
#define get_img_2pos(x,tx,ox) MAX(0, MIN(img->tx-1, 2*x-ox))
|
||||
|
||||
for(y=0; y<ty; y++) {
|
||||
float* i = img->pixels + (l*img->ty + get_img_2pos(y,ty,oy))*img->tx;
|
||||
float* r = res->pixels + (l*ty + y)*tx;
|
||||
r[0] = i[get_img_2pos(0,tx,ox)]; // first is special case
|
||||
for(x=1; x<tx-1; x++)
|
||||
r[x] = i[2*x-ox];
|
||||
r[x] = i[get_img_2pos(x,tx,ox)]; // last is special case
|
||||
}
|
||||
|
||||
#undef get_img_2pos
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/* Max-pool in 2x2 px non-overlapping cells
|
||||
*/
|
||||
void _maxpool2( float_layers* img, float_layers* res, int n_thread ) {
|
||||
const int n_layers = res->tz;
|
||||
assert( img->tz==n_layers );
|
||||
const int tx = res->tx;
|
||||
const int ty = res->ty;
|
||||
assert( (img->tx)/2 == tx );
|
||||
assert( (img->ty)/2 == ty );
|
||||
|
||||
long l;
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(l=0; l<n_layers; l++) {
|
||||
int x,y;
|
||||
for(y=0; y<ty; y++) {
|
||||
float* i = img->pixels + (l*img->ty + (2*y))*img->tx ;
|
||||
float* j = i + img->tx;
|
||||
float* r = res->pixels + (l*ty + y)*tx;
|
||||
for(x=0; x<tx; x++,i+=2,j+=2) {
|
||||
float mi = MAX(i[0],i[1]);
|
||||
float mj = MAX(j[0],j[1]);
|
||||
r[x] = MAX(mi,mj);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* average-pool in 2x2 px non-overlapping cells
|
||||
*/
|
||||
void _avgpool2( float_layers* img, float_layers* res, int n_thread ) {
|
||||
const int n_layers = res->tz;
|
||||
assert( img->tz==n_layers );
|
||||
const int tx = res->tx;
|
||||
const int ty = res->ty;
|
||||
assert( (img->tx)/2 == tx );
|
||||
assert( (img->ty)/2 == ty );
|
||||
|
||||
long l;
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(l=0; l<n_layers; l++) {
|
||||
int x,y;
|
||||
for(y=0; y<ty; y++) {
|
||||
float* i = img->pixels + (l*img->ty + (2*y))*img->tx ;
|
||||
float* j = i + img->tx;
|
||||
float* r = res->pixels + (l*ty + y)*tx;
|
||||
for(x=0; x<tx; x++,i+=2,j+=2) {
|
||||
r[x] = 0.25*(i[0] + i[1] + j[0] + j[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
typedef struct {
|
||||
int scale;
|
||||
int layer;
|
||||
int x,y;
|
||||
float score;
|
||||
} one_max;
|
||||
|
||||
typedef struct {
|
||||
one_max* list;
|
||||
int n_elems, n_alloc;
|
||||
} maxima;
|
||||
|
||||
|
||||
#include <pthread.h>
|
||||
static pthread_mutex_t mutex0, mutex1;
|
||||
|
||||
|
||||
static inline void add_one_max( maxima* list, int scale, int layer, int x, int y, float score ) {
|
||||
pthread_mutex_lock (&mutex0);
|
||||
if( list->n_alloc <= list->n_elems ) {
|
||||
list->n_alloc = 3*(list->n_alloc+64)/2;
|
||||
list->list = (one_max*)realloc(list->list, sizeof(one_max)*list->n_alloc);
|
||||
}
|
||||
one_max* m = &list->list[list->n_elems++];
|
||||
m->scale = scale;
|
||||
m->layer = layer;
|
||||
m->x = x;
|
||||
m->y = y;
|
||||
m->score = score;
|
||||
pthread_mutex_unlock (&mutex0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void _get_list_parents( int_cube* children, int_image* res ) {
|
||||
const int np2 = children->tz;
|
||||
assert( np2 == res->tx );
|
||||
const int n_cells_at_prev_scale = res->ty;
|
||||
int* parents = res->pixels;
|
||||
memset(parents,0xFF,n_cells_at_prev_scale*np2*sizeof(int)); // =-1 by default
|
||||
int i,j,ncells=children->tx*children->ty;
|
||||
int* cur = children->pixels;
|
||||
for(i=0; i<ncells; i++)
|
||||
for(j=0; j<np2; j++) {
|
||||
int c = *cur++;
|
||||
if(c<0) continue; // this one is not a real children
|
||||
parents[np2*c + j] = i;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int* get_list_parents( int_cube* children, int n_cells_at_prev_scale ) {
|
||||
const int np2 = children->tz;
|
||||
int_image res = {NEWA(int, n_cells_at_prev_scale*np2 ), np2, n_cells_at_prev_scale};
|
||||
_get_list_parents( children, &res );
|
||||
return res.pixels;
|
||||
}
|
||||
|
||||
|
||||
/* Return a list of local maxima in the scale-space of scores
|
||||
*/
|
||||
void _extract_maxima( res_scale* scales, int n_scales, float_array* sc_factor, float th, int min_scale, float nlpow,
|
||||
int check_parents, int check_children, int nobordure, int_image* res_out, int n_thread ) {
|
||||
|
||||
assert( sc_factor->tx == n_scales );
|
||||
assert( min_scale>=0 && min_scale<n_scales );
|
||||
const float* scf = sc_factor->pixels;
|
||||
|
||||
maxima res = {NULL,0,0};
|
||||
int s;
|
||||
|
||||
// compute the maximum filter for each scale separately
|
||||
const int min_scale_max = MAX(0,min_scale);
|
||||
for(s=min_scale_max; s<n_scales; s++) {
|
||||
res_scale* sc = scales + s;
|
||||
float_layers r = sc->res_map;
|
||||
assert(sc->max_map.pixels==NULL); // not already allocated
|
||||
sc->max_map = r; // initialize tx,ty,tz
|
||||
sc->max_map.pixels = NEWA(float, r.tx*r.ty*r.tz );
|
||||
_max_filter_3_layers( &r, &sc->max_map, n_thread );
|
||||
}
|
||||
|
||||
// then localize the local maxima in the scale-space
|
||||
for(s=min_scale; s<n_scales; s++) {
|
||||
res_scale* sc = scales + s;
|
||||
const int tx = sc->res_map.tx;
|
||||
const int ty = sc->res_map.ty;
|
||||
const long npix = tx*ty;
|
||||
const int n_layers = sc->assign.tx;
|
||||
|
||||
// helpful values...
|
||||
const int f = sc->f;
|
||||
|
||||
const int upper_tx = (s+1<n_scales) ? sc[+1].res_map.tx : 0;
|
||||
const int upper_ty = (s+1<n_scales) ? sc[+1].res_map.ty : 0;
|
||||
const int upper_npix = upper_tx*upper_ty;
|
||||
const float upper_scf= (s+1<n_scales) ? scf[s]/scf[s+1] : 0;
|
||||
const int np2 = (s+1<n_scales) ? sc[+1].children.tz : 0;
|
||||
const int np = (int)sqrt(np2);
|
||||
const int upper_f = (s+1<n_scales) ? sc[+1].f : 0;
|
||||
const int upper_gap = (s+1<n_scales) ? sc[+1].patch_size/4 : 0;
|
||||
const float* upper_layers = (s+1<n_scales) ? sc[+1].max_map.pixels : NULL;
|
||||
const int* upper_assign = (s+1<n_scales) ? sc[+1].assign.pixels : NULL;
|
||||
const int* list_parents = (s+1<n_scales) && check_parents ? get_list_parents(&sc[+1].children,sc->grid.tx*sc->grid.ty) : NULL;
|
||||
|
||||
const int down_tx = (s>min_scale_max) ? sc[-1].res_map.tx : 0;
|
||||
const int down_ty = (s>min_scale_max) ? sc[-1].res_map.ty : 0;
|
||||
const int down_npix = down_tx*down_ty;
|
||||
const float down_scf= (s>min_scale_max) ? scf[s]/scf[s-1] : 0;
|
||||
const int nc2 = (s>min_scale_max) ? sc->children.tz : 0;
|
||||
const int nc = (int)sqrt(nc2);
|
||||
const int down_gap = sc->patch_size/4;
|
||||
const int down_f = (s>min_scale_max) ? sc[-1].f : 0;
|
||||
const float* down_layers = (s>min_scale_max) ? sc[-1].max_map.pixels : NULL;
|
||||
const int* down_assign = (s>min_scale_max) ? sc[-1].assign.pixels : NULL;
|
||||
|
||||
int l;
|
||||
#if defined(USE_OPENMP)
|
||||
#pragma omp parallel for num_threads(n_thread)
|
||||
#endif
|
||||
for(l=0; l<n_layers; l++) {
|
||||
// compute maxima_filter for each layer
|
||||
if(sc->assign.pixels[l]<0) continue; // no layer for this
|
||||
float* res_map = sc->res_map.pixels + sc->assign.pixels[l]*npix;
|
||||
float* max_map = sc->max_map.pixels + sc->assign.pixels[l]*npix;
|
||||
|
||||
// for each point which is a local maxima, check
|
||||
int i;
|
||||
for(i=0; i<npix; i++)
|
||||
if( res_map[i]>th && res_map[i]==max_map[i] ) {
|
||||
// ok, we have a maxima at this scale <s>
|
||||
const float val = res_map[i];
|
||||
int x = i%tx;
|
||||
int y = i/tx;
|
||||
if( nobordure && (x<1 || y<1 || x>=tx-1 || y>=ty-1) ) continue; // not interested in maxima on image bordures
|
||||
|
||||
//if(s==2 && l==344 && x==41 && y==4) getchar();
|
||||
|
||||
// now compare with lower scale
|
||||
if( check_children && s>min_scale_max ) {
|
||||
float valref = down_scf*val;
|
||||
int* children = sc->children.pixels + l*nc2;
|
||||
int u,v,ok=1;
|
||||
for(v=0; ok && v<nc; v++) {
|
||||
int uy = (f*y + (2*v/(nc-1)-1)*down_gap)/down_f;
|
||||
if( uy>=0 && uy<down_ty )
|
||||
for(u=0; u<nc; u++) {
|
||||
int ch = children[v*nc+u];
|
||||
if( ch < 0 ) continue;
|
||||
int ux = (f*x + (2*u/(nc-1)-1)*down_gap)/down_f;
|
||||
if( (ux>=0 && ux<down_tx) &&
|
||||
valref < pow(down_layers[down_assign[ch]*down_npix + uy*down_tx + ux],nlpow) ) {ok = 0; break;}
|
||||
}
|
||||
}
|
||||
if(!ok) continue; // this is not a maximum
|
||||
}
|
||||
|
||||
//if(s==2 && l==344 && x==41 && y==4) getchar();
|
||||
|
||||
// now compare with upper scale <s+1> and eliminate non-maxima
|
||||
if( check_parents && list_parents ) {
|
||||
float valref = upper_scf*val;
|
||||
const int* parents = list_parents + l*np2;
|
||||
int u,v,ok=1;
|
||||
for(v=0; ok && v<np; v++) {
|
||||
int uy = (f*y + (1-2*v/(np-1))*upper_gap)/upper_f;
|
||||
if( uy>=0 && uy<upper_ty )
|
||||
for(u=0; u<np; u++) {
|
||||
const int p = parents[v*np+u];
|
||||
if( p<0 ) continue;
|
||||
int ux = (f*x + (1-2*u/(np-1))*upper_gap)/upper_f;
|
||||
if( (ux>=0 && ux<upper_tx) &&
|
||||
valref < upper_layers[upper_assign[p]*upper_npix + uy*upper_tx + ux] ) {ok = 0; break;}
|
||||
}
|
||||
}
|
||||
if(!ok) continue; // this is not a maximum
|
||||
}
|
||||
|
||||
add_one_max( &res, s, l, x, y, res_map[i] );
|
||||
}
|
||||
}
|
||||
|
||||
free((void*)list_parents);
|
||||
}
|
||||
|
||||
// free memory
|
||||
for(s=min_scale_max; s<n_scales; s++) {
|
||||
free(scales[s].max_map.pixels);
|
||||
scales[s].max_map.pixels = NULL;
|
||||
}
|
||||
|
||||
res_out->tx = 5;
|
||||
res_out->ty = res.n_elems;
|
||||
res_out->pixels = (int*)res.list;
|
||||
}
|
||||
|
||||
|
||||
/* Return the best local children assignement in a 3x3 neigborhood
|
||||
l,u,v is the approximate position of the children in the corresponding response map[l,v,u]
|
||||
*/
|
||||
static inline float _local_argmax( long l, int u, int v, const float_layers* map, int extended, /*float reg,*/ int* x, int* y ) {
|
||||
assert(0<=l && l<map->tz);
|
||||
int umin = MAX(0, u-1);
|
||||
int vmin = MAX(0, v-1);
|
||||
const int etx = map->tx-extended; // because of extended response map
|
||||
const int ety = map->ty-extended;
|
||||
int umax = MIN(etx, u+2);
|
||||
int vmax = MIN(ety, v+2);
|
||||
|
||||
// determine best children in the neighborhood (argmax)
|
||||
const int tx = map->tx;
|
||||
int i,j,bestx=0,besty=0; float m=0.f;
|
||||
const float *r = map->pixels + l*tx*map->ty;
|
||||
for(j=vmin; j<vmax; j++)
|
||||
for(i=umin; i<umax; i++) {
|
||||
const int p = j*tx+i;
|
||||
if(r[p]>m) {m=r[p]; bestx=i; besty=j;}
|
||||
}
|
||||
*x = bestx;
|
||||
*y = besty;
|
||||
return m;
|
||||
}
|
||||
|
||||
/* Return the best assignment (= list of correspondences) for a given maxima
|
||||
from a pyramid top, this function returns
|
||||
a list of weigthed correspondences (matches) between
|
||||
img0 pixels and img1 pixels
|
||||
*/
|
||||
void _argmax_correspondences_rec( res_scale* scales, int s, int l, int x, int y,
|
||||
float_cube* res0, int step0, float_cube* res1, int step1,
|
||||
int index, float score ) {
|
||||
res_scale* sc = scales + s;
|
||||
|
||||
if(s==0) {
|
||||
const int x0 = sc->grid.pixels[2*l];
|
||||
const int y0 = sc->grid.pixels[2*l+1];
|
||||
const int x1 = sc->f * x;
|
||||
const int y1 = sc->f * y;
|
||||
|
||||
const int qx0 = x0/step0;
|
||||
const int qy0 = y0/step0;
|
||||
//assert(0<=l && l<sc->res_map.tz);
|
||||
|
||||
if( qx0<res0->tx && qy0<res0->ty ) {
|
||||
assert(qx0>=0 && qy0>=0);
|
||||
float* r0 = res0->pixels + ((qy0*res0->tx + qx0))*res0->tz;
|
||||
//assert(res0->pixels<=r0 && r0+5<res0->pixels+res0->tx*res0->ty*res0->tz);
|
||||
|
||||
pthread_mutex_lock (&mutex0);
|
||||
if( score > r0[4] ) {
|
||||
// r[0:2] = pos in img0
|
||||
r0[0] = x0;
|
||||
r0[1] = y0;
|
||||
// r[2:4] = pos in img1
|
||||
r0[2] = x1;
|
||||
r0[3] = y1;
|
||||
// r[4] = score
|
||||
r0[4] = score;
|
||||
r0[5] = index;
|
||||
}
|
||||
pthread_mutex_unlock (&mutex0);
|
||||
|
||||
const int qx1 = x1/step1;
|
||||
const int qy1 = y1/step1;
|
||||
assert(qx1>=0 && qy1>=0);
|
||||
if( qx1<res1->tx && qy1<res1->ty ) {
|
||||
float* r1 = res1->pixels + ((qy1)*res1->tx + (qx1))*res1->tz;
|
||||
//assert(res1->pixels<=r1 && r1+5<res1->pixels+res1->tx*res1->ty*res1->tz);
|
||||
pthread_mutex_lock (&mutex1);
|
||||
if( score > r1[4] ) {
|
||||
// r[0:2] = pos in img0
|
||||
r1[0] = x0;
|
||||
r1[1] = y0;
|
||||
// r[2:4] = pos in img1
|
||||
r1[2] = x1;
|
||||
r1[3] = y1;
|
||||
// r[4] = score
|
||||
r1[4] = score;
|
||||
r1[5] = index;
|
||||
}
|
||||
pthread_mutex_unlock (&mutex1);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// mark this maximum as already processed
|
||||
assert(0<=l && l<sc->assign.tx);
|
||||
if( sc->passed.pixels ) {
|
||||
const long truel = sc->assign.pixels[l];
|
||||
const long offset = ((truel*sc->true_shape[1] + MAX(0,y))*sc->true_shape[0] + MAX(0,x)) % sc->passed.tx;
|
||||
//pthread_mutex_lock (&mutex);
|
||||
int useless = ( sc->passed.pixels[offset] >= score );
|
||||
if(!useless) sc->passed.pixels[offset] = score;
|
||||
//pthread_mutex_unlock (&mutex);
|
||||
if(useless) return; // this maximum was already investigated with a better score
|
||||
}
|
||||
|
||||
const int f = sc->f;
|
||||
const res_scale* lower = &scales[s-1];
|
||||
const int lower_f = lower->f;
|
||||
// position in lower response map
|
||||
x *= f/lower_f;
|
||||
y *= f/lower_f;
|
||||
const int lower_gap = sc->patch_size/(4*lower_f); // gap is equal to patch_size/4 in absolute size
|
||||
const int nc2 = sc->children.tz;
|
||||
const int nc = (nc2==4) ? 2 : 3;
|
||||
const int* children = sc->children.pixels + l*nc2;
|
||||
const int* lower_ass = lower->assign.pixels;
|
||||
|
||||
// for all children
|
||||
int u,v,c=0;
|
||||
for(v=0; v<nc; v++) {
|
||||
for(u=0; u<nc; u++,c++) {
|
||||
const int ch = children[c];
|
||||
if(ch<0) continue;
|
||||
const long l = lower_ass[ch];
|
||||
if(l<0) continue;
|
||||
|
||||
// position of children in child1 = parent1 - (parent0-child0)
|
||||
int yc = y + (2*v/(nc-1)-1)*lower_gap;
|
||||
int xc = x + (2*u/(nc-1)-1)*lower_gap;
|
||||
int ex = 1; // extended response_maps
|
||||
|
||||
if( lower->offsets.pixels ) {
|
||||
// take offsets into account
|
||||
xc -= lower->offsets.pixels[2*l+0];
|
||||
yc -= lower->offsets.pixels[2*l+1];
|
||||
ex = 0; // no extension... maybe
|
||||
}
|
||||
|
||||
// position of children in child1 = parent1 - (parent0-child0)
|
||||
int xb, yb;
|
||||
float child_score = _local_argmax( lower_ass[ch], xc, yc, &lower->res_map, ex, &xb, &yb );
|
||||
|
||||
if( lower->offsets.pixels ) {
|
||||
// back to real image coordinates
|
||||
xb += lower->offsets.pixels[2*l+0];
|
||||
yb += lower->offsets.pixels[2*l+1];
|
||||
}
|
||||
|
||||
if( child_score )
|
||||
_argmax_correspondences_rec( scales, s-1, ch, xb, yb, res0, step0, res1, step1, index, score + child_score );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void _argmax_correspondences( res_scale* scales, int s, int l, int x, int y, float score,
|
||||
float_cube* res0, int step0, float_cube* res1, int step1,
|
||||
int index ) {
|
||||
assert(res0->tz==6);
|
||||
if(res1) assert(res0->tz==6);
|
||||
_argmax_correspondences_rec( scales, s, l, x, y, res0, step0, res1, step1, index, score );
|
||||
}
|
||||
|
||||
|
||||
void _argmax_correspondences_rec_v1( res_scale* scales, int s, int l, int x, int y,
|
||||
float_cube* res0, int step0, float_cube* res1, int step1,
|
||||
int index, float top_score ) {
|
||||
res_scale* sc = scales + s;
|
||||
const int f = sc->f;
|
||||
|
||||
if(s==0) {
|
||||
const int* ass = sc->assign.pixels;
|
||||
const float score = top_score * sc->res_map.pixels[(ass[l]*sc->res_map.ty + y)*sc->res_map.tx + x];
|
||||
const int x0 = sc->grid.pixels[2*l];
|
||||
const int y0 = sc->grid.pixels[2*l+1];
|
||||
const int x1 = f * x;
|
||||
const int y1 = f * y;
|
||||
|
||||
const int qx0 = x0/step0;
|
||||
const int qy0 = y0/step0;
|
||||
if( qx0<res0->tx && qy0<res0->ty ) {
|
||||
float* r0 = res0->pixels + ((qy0*res0->tx + qx0))*res0->tz;
|
||||
|
||||
pthread_mutex_lock (&mutex0);
|
||||
if( score > r0[4] ) {
|
||||
// r[0:2] = pos in img0
|
||||
r0[0] = x0;
|
||||
r0[1] = y0;
|
||||
// r[2:4] = pos in img1
|
||||
r0[2] = x1;
|
||||
r0[3] = y1;
|
||||
// r[4] = score
|
||||
r0[4] = score;
|
||||
r0[5] = index;
|
||||
}
|
||||
pthread_mutex_unlock (&mutex0);
|
||||
|
||||
if( res1 ) {
|
||||
const int qx1 = x1/step1;
|
||||
const int qy1 = y1/step1;
|
||||
// if( qx1<res1->tx && qy1<res1->ty ) { // useless check
|
||||
float* r1 = res1->pixels + ((qy1)*res1->tx + (qx1))*res1->tz;
|
||||
pthread_mutex_lock (&mutex1);
|
||||
if( score > r1[4] ) {
|
||||
// r[0:2] = pos in img0
|
||||
r1[0] = x0;
|
||||
r1[1] = y0;
|
||||
// r[2:4] = pos in img1
|
||||
r1[2] = x1;
|
||||
r1[3] = y1;
|
||||
// r[4] = score
|
||||
r1[4] = score;
|
||||
r1[5] = index;
|
||||
}
|
||||
pthread_mutex_unlock (&mutex1);
|
||||
}}
|
||||
|
||||
} else {
|
||||
const res_scale* lower = &scales[s-1];
|
||||
const int lower_f = lower->f;
|
||||
// position in lower response map
|
||||
x *= f/lower_f;
|
||||
y *= f/lower_f;
|
||||
const int lower_gap = sc->patch_size/(4*lower_f); // gap is equal to patch_size/4 in absolute size
|
||||
const int nc2 = sc->children.tz;
|
||||
const int nc = (nc2==4) ? 2 : 3;
|
||||
const int* children = sc->children.pixels + l*nc2;
|
||||
const int* lower_ass = lower->assign.pixels;
|
||||
|
||||
// remember all scores for all children
|
||||
int u,v,c=0;
|
||||
for(v=0; v<nc; v++) {
|
||||
const int yc = y + (2*v/(nc-1)-1)*lower_gap;
|
||||
for(u=0; u<nc; u++,c++) {
|
||||
int ch = children[c];
|
||||
if(ch<0) continue;
|
||||
const int xc = x + (2*u/(nc-1)-1)*lower_gap;
|
||||
|
||||
// position of children in child1 = parent1 - (parent0-child0)
|
||||
const int l = lower_ass[children[c]];
|
||||
int xb=0, yb=0;
|
||||
float child_score = _local_argmax( l, xc, yc, &lower->res_map, 1, &xb, &yb );
|
||||
|
||||
if( child_score>0 )
|
||||
_argmax_correspondences_rec_v1( scales, s-1, ch, xb, yb, res0, step0, res1, step1, index, top_score );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void _argmax_correspondences_v1( res_scale* scales, int s, int l, int x, int y, float top_score,
|
||||
float_cube* res0, int step0, float_cube* res1, int step1,
|
||||
int index ) {
|
||||
assert(res0->tz==6);
|
||||
if(res1) assert(res0->tz==6);
|
||||
_argmax_correspondences_rec_v1( scales, s, l, x, y, res0, step0, res1, step1, index, top_score );
|
||||
}
|
||||
|
||||
|
||||
|
||||
static float** get_list_corres( const float_cube* map, int* nb ) {
|
||||
const int tz = map->tz;
|
||||
float* m = map->pixels;
|
||||
const long npix = map->tx*map->ty;
|
||||
float** res = NEWA(float*,npix);
|
||||
|
||||
int i,n=0;
|
||||
for(i=0; i<npix; i++,m+=tz)
|
||||
if(m[4]) { // if score non-null
|
||||
res[n++] = m; // remember pointer
|
||||
}
|
||||
|
||||
*nb = n;
|
||||
return res;
|
||||
}
|
||||
|
||||
static inline int cmp_corres( const void* a, const void* b) {
|
||||
return memcmp(*(float**)a,*(float**)b,4*sizeof(float));
|
||||
}
|
||||
|
||||
|
||||
/* Intersect 2 mappings: erase all correspondences that are not reciprocal
|
||||
*/
|
||||
float* _intersect_corres( const float_cube* map0, const float_cube* map1, int* nres ) {
|
||||
const int tz = 6;
|
||||
assert( map0->tz==tz && map1->tz==tz );
|
||||
|
||||
// build the list of triplets
|
||||
int n0,n1;
|
||||
float** const corres0 = get_list_corres(map0,&n0);
|
||||
float** const corres1 = get_list_corres(map1,&n1);
|
||||
|
||||
// arg-sort the lists
|
||||
qsort( corres0, n0, sizeof(float*), cmp_corres );
|
||||
qsort( corres1, n1, sizeof(float*), cmp_corres );
|
||||
|
||||
// remove all correspondences from map0/map1 that is not shared
|
||||
float** c0 = corres0;
|
||||
float** c1 = corres1;
|
||||
float** const c0max = corres0 + n0;
|
||||
float** const c1max = corres1 + n1;
|
||||
float* res = NEWA(float, tz*MIN(n1,n0) );
|
||||
float* r = res;
|
||||
while(c0<c0max && c1<c1max) {
|
||||
int d = memcmp(*c0,*c1,5*sizeof(float));
|
||||
if(d<0) { // corres0 < corres1
|
||||
c0++;
|
||||
} else
|
||||
if(d>0) { // corres0 > corres1
|
||||
c1++;
|
||||
} else { // corres0 == corres1
|
||||
if( r==res || memcmp( r-tz, *c0, tz*sizeof(float) ) ) { // if not already copied
|
||||
memcpy( r, *c0, tz*sizeof(float) );
|
||||
r += tz;
|
||||
}
|
||||
c0++;
|
||||
c1++;
|
||||
}
|
||||
}
|
||||
|
||||
free(corres0);
|
||||
free(corres1);
|
||||
*nres = (r-res)/tz;
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
/* erase corres in the first array that are not in the second one
|
||||
*/
|
||||
void transfer_corres_score( const float_image* ref, float_cube* map0 ) {
|
||||
const int tz = 6;
|
||||
assert( map0->tz==tz && ref->tx==tz );
|
||||
|
||||
// build the list of triplets
|
||||
int n0,n1;
|
||||
float** const corres0 = get_list_corres(map0,&n0);
|
||||
float_cube map1 = {ref->pixels,1,ref->ty,ref->tx};
|
||||
float** const corres1 = get_list_corres(&map1,&n1);
|
||||
|
||||
// arg-sort the lists
|
||||
qsort( corres0, n0, sizeof(float*), cmp_corres );
|
||||
qsort( corres1, n1, sizeof(float*), cmp_corres );
|
||||
|
||||
// remove all correspondences from map0/map1 that is not shared
|
||||
float** c0 = corres0;
|
||||
float** c1 = corres1;
|
||||
float** const c0max = corres0 + n0;
|
||||
float** const c1max = corres1 + n1;
|
||||
while(c0<c0max && c1<c1max) {
|
||||
int d = memcmp(*c0,*c1,4*sizeof(float));
|
||||
if(d<0) { // corres0 < corres1
|
||||
c0++;
|
||||
} else
|
||||
if(d>0) { // corres0 > corres1
|
||||
assert(!"error: 'ref in map0' is not verified");
|
||||
c1++;
|
||||
} else { // corres0 == corres1
|
||||
(*c0)[4] = (*c1)[4]; // copy score from ref
|
||||
c0++;
|
||||
c1++;
|
||||
}
|
||||
}
|
||||
|
||||
while(c0<c0max) memset( *c0++, 0, tz*sizeof(float));
|
||||
|
||||
free(corres0);
|
||||
free(corres1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static inline float ptdot( const float* m, float x, float y ) {
|
||||
return x*m[0] + y*m[1] + m[2];
|
||||
}
|
||||
|
||||
|
||||
static void merge_one_side( const float aff[6], int step, float_cube* corres, float tol,
|
||||
int all_step, float_cube* all_corres, int offset ) {
|
||||
assert( corres->tz==6 && all_corres->tz==6 );
|
||||
const float* corres_pix = corres->pixels;
|
||||
assert(tol>=1);
|
||||
tol*=tol; // squared
|
||||
float dmax = 2*step / sqrt( aff[0]*aff[4] - aff[1]*aff[3] );
|
||||
dmax*=dmax; // squared
|
||||
|
||||
// for each bin of the final histograms, we get the nearest-neighbour bin in corres0 and corres1
|
||||
int i,j;
|
||||
for(j=0; j<all_corres->ty; j++)
|
||||
for(i=0; i<all_corres->tx; i++) {
|
||||
float* all_cor = all_corres->pixels + (j*all_corres->tx + i)*corres->tz;
|
||||
|
||||
// center of the bin in the reference frame
|
||||
float x = i*all_step + all_step/2;
|
||||
float y = j*all_step + all_step/2;
|
||||
|
||||
// center of the bin on the rescaled+rotated image
|
||||
float xr = ptdot( aff + 0, x, y );
|
||||
float yr = ptdot( aff + 3, x, y );
|
||||
|
||||
// iterate on the nearby bins
|
||||
int xb = (int)(0.5+ xr/step); // rescaled+rotated image is binned with size <step>
|
||||
int yb = (int)(0.5+ yr/step);
|
||||
int u,v;
|
||||
float best = 9e9f;
|
||||
for(v=MAX(0,yb-1); v<MIN(corres->ty,yb+2); v++)
|
||||
for(u=MAX(0,xb-1); u<MIN(corres->tx,xb+2); u++) {
|
||||
const float* cor = corres_pix + (v*corres->tx + u)*corres->tz;
|
||||
float d = pow2(cor[offset]-x) + pow2(cor[offset+1]-y);
|
||||
if( d < best && d<dmax ) best = d;
|
||||
}
|
||||
|
||||
for(v=MAX(0,yb-1); v<MIN(corres->ty,yb+2); v++)
|
||||
for(u=MAX(0,xb-1); u<MIN(corres->tx,xb+2); u++) {
|
||||
const float* cor = corres_pix + (v*corres->tx + u)*corres->tz;
|
||||
float d = pow2(cor[offset]-x) + pow2(cor[offset+1]-y);
|
||||
if( d <= tol*best ) { // spatially close
|
||||
// merge correspondence if score is better than actual
|
||||
if( cor[4] > all_cor[4] )
|
||||
memcpy( all_cor, cor, 6*sizeof(float) );
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* merge correspondences from several rotated/scaled version of an image into a single common reference frame
|
||||
rot0 = 2x3 rotation matrix: (pt in rotated img0) = rot0 * (pt in ref frame)
|
||||
rot1 = 2x3 rotation matrix: (pt in rotated img1) = rot1 * (pt in ref frame)
|
||||
step0 and step1 are bin size of correspondences histograms
|
||||
tol >= 1 is the tolerance to grid rotation (default = 2)
|
||||
corres0, corres1: correspondences histograms of rotated image
|
||||
all_corres0, all_corres1: correspondences histograms of reference frame (result)
|
||||
*/
|
||||
void merge_corres( float rot0[6], float rot1[6], int step0, int step1,
|
||||
float_cube* corres0, float_cube* corres1, float tol,
|
||||
int all_step0, int all_step1, float_cube* all_corres0, float_cube* all_corres1 ) {
|
||||
|
||||
merge_one_side( rot0, step0, corres0, tol, all_step0, all_corres0, 0 );
|
||||
merge_one_side( rot1, step1, corres1, tol, all_step1, all_corres1, 2 );
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,142 @@
|
||||
/*
|
||||
Copyright (C) 2014 Jerome Revaud
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>
|
||||
*/
|
||||
#ifndef ___MAXFILTER_H___
|
||||
#define ___MAXFILTER_H___
|
||||
#include "array_types.h"
|
||||
#include "deep_matching.h"
|
||||
|
||||
|
||||
/* compute the 3x3 maximum filter on an image and store the result in <res>
|
||||
*/
|
||||
void _max_filter_3( float_image* img, float_image* res, int n_thread );
|
||||
|
||||
|
||||
/* Same as above for float_layers* images
|
||||
*/
|
||||
void _max_filter_3_layers( float_layers* img, float_layers* res, int n_thread );
|
||||
|
||||
|
||||
/* Subsample an array, equivalent to res = img[:,1::2,1::2]
|
||||
*/
|
||||
void _subsample2( float_layers* img, float_layers* res, int n_thread );
|
||||
|
||||
/* joint max-pooling and subsampling
|
||||
*/
|
||||
void _max_filter_3_and_subsample_layers( float_layers* img, float_layers* res, int n_thread );
|
||||
|
||||
|
||||
/* Subsample an array, equivalent to res = trueimg[:,offset_y::2,offset_x::2]
|
||||
except at boundaries, where the rules are a bit more complex (see code)
|
||||
*/
|
||||
void _subsample2_offset( float_layers* img, int_image* offsets, float_layers* res, int n_thread );
|
||||
|
||||
/* Max-pool in 2x2 px non-overlapping cells
|
||||
*/
|
||||
void _maxpool2( float_layers* img, float_layers* res, int n_thread );
|
||||
|
||||
|
||||
/* average-pool in 2x2 px non-overlapping cells
|
||||
*/
|
||||
void _avgpool2( float_layers* img, float_layers* res, int n_thread );
|
||||
|
||||
|
||||
/* Return the list of parent cells of all cells of a given scale (parents are from the upper scale)
|
||||
children: list of children of the parent cells
|
||||
res: result matrix, n_cells_at_current_scale x n_max_parents
|
||||
res == -1 when there is no parent
|
||||
*/
|
||||
void _get_list_parents( int_cube* children, int_image* res );
|
||||
|
||||
|
||||
/* Return a list of local maxima in the scale-space of scores
|
||||
*/
|
||||
void _extract_maxima( res_scale* scales, int n_scales, float_array* sc_factor, float th, int min_scale, float nlpow,
|
||||
int check_parents, int check_children, int nobordure, int_image* res_out, int n_thread );
|
||||
|
||||
|
||||
/* Return the best assignment (= list of correspondences) for a given maxima
|
||||
from a pyramid top, this function returns
|
||||
a list of weigthed correspondences (matches) between
|
||||
img0 pixels and img1 pixels
|
||||
index = index of the maxima (s,l,x,y), so that it can be linked to the correspondences it generated
|
||||
*/
|
||||
void _argmax_correspondences( res_scale* scales, int s, int l, int x, int y, float score,
|
||||
float_cube* res0, int step0, float_cube* res1, int step1,
|
||||
int index );
|
||||
|
||||
void _argmax_correspondences_v1( res_scale* scales, int s, int l, int x, int y, float score,
|
||||
float_cube* res0, int step0, float_cube* res1, int step1,
|
||||
int index );
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/* Intersect 2 mappings: erase all correspondences that are not reciprocal
|
||||
*/
|
||||
float* _intersect_corres( const float_cube* map0, const float_cube* map1, int* nres );
|
||||
|
||||
|
||||
/* erase corres in the first array that are not in the second one
|
||||
*/
|
||||
void transfer_corres_score( const float_image* ref, float_cube* map0 );
|
||||
|
||||
|
||||
|
||||
|
||||
/* merge correspondences from several rotated/scaled version of an image into a single common reference frame
|
||||
rot0 = 2x3 rotation matrix: (pt in rotated img0) = rot0 * (pt in ref frame)
|
||||
rot1 = 2x3 rotation matrix: (pt in rotated img1) = rot1 * (pt in ref frame)
|
||||
step0 and step1 are bin size of correspondences histograms
|
||||
tol >= 1 is the tolerance to grid rotation (default = 2)
|
||||
corres0, corres1: correspondences histograms of rotated image
|
||||
all_corres0, all_corres1: correspondences histograms of reference frame (result)
|
||||
*/
|
||||
void merge_corres( float rot0[6], float rot1[6], int step0, int step1,
|
||||
float_cube* corres0, float_cube* corres1, float tol,
|
||||
int all_step0, int all_step1, float_cube* all_corres0, float_cube* all_corres1 );
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,150 @@
|
||||
/*
|
||||
Copyright (C) 2014 Jerome Revaud
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>
|
||||
*/
|
||||
#include "pixel_desc.h"
|
||||
#include "std.h"
|
||||
#include "image.h"
|
||||
#include "hog.h"
|
||||
#include "conv.h"
|
||||
|
||||
|
||||
/* convert a float image to a consecutive array
|
||||
a bit stupid but well
|
||||
*/
|
||||
UBYTE_image* image_to_arraytype( image_t* img ) {
|
||||
UBYTE_image* res = NEW(UBYTE_image);
|
||||
*res = empty_image(UBYTE,img->width,img->height);
|
||||
|
||||
for(int j=0; j<img->height; j++)
|
||||
for(int i=0; i<img->width; i++)
|
||||
res->pixels[i+j*res->tx] = (UBYTE)img->data[i+j*img->stride];
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
// set default params
|
||||
void set_default_desc_params( desc_params_t* params )
|
||||
{
|
||||
// default = jpg settings,
|
||||
// better in almost all cases
|
||||
params->presmooth_sigma = 1.0;
|
||||
params->mid_smoothing = 1.0;
|
||||
params->post_smoothing = 1.0;
|
||||
params->hog_sigmoid = 0.2;
|
||||
params->ninth_dim = 0.3;
|
||||
params->norm_pixels = false;
|
||||
}
|
||||
|
||||
|
||||
/* extract pixel descriptors (pixel-wise HOG)
|
||||
*/
|
||||
float_layers* extract_desc( image_t* _img, const desc_params_t* params, int nt )
|
||||
{
|
||||
// verify parameters
|
||||
assert(between(0,params->presmooth_sigma,3));
|
||||
assert(between(0,params->mid_smoothing,3));
|
||||
assert(between(0,params->post_smoothing,3));
|
||||
assert(between(0.05,params->hog_sigmoid,0.8));
|
||||
assert(between(0,params->ninth_dim,1));
|
||||
assert(between(0,params->norm_pixels,1));
|
||||
|
||||
UBYTE_image* img = image_to_arraytype(_img); // could be optimized but well
|
||||
const int npix = img->tx*img->ty;
|
||||
//hash_image(img)D(img->tx)D(img->ty)
|
||||
|
||||
// pre-smooth image
|
||||
assert( params->presmooth_sigma>=0 );
|
||||
if( params->presmooth_sigma>0 )
|
||||
_smooth_gaussian( img, params->presmooth_sigma, img, nt );
|
||||
//hash_image(img)
|
||||
|
||||
// extract HOG
|
||||
float_layers grad = {NEWA(float,npix*2),img->tx,img->ty,2};
|
||||
_compute_grad_101( img, 0, &grad, nt );
|
||||
//hash_cube(&grad)
|
||||
float_layers* hog = NEW(float_layers);
|
||||
*hog = {NEWA(float,9*npix),img->tx,img->ty,8};
|
||||
_compute_hog( &grad, 1, hog, nt );
|
||||
free(grad.pixels);
|
||||
free_image(img);
|
||||
//hash_layers(hog)
|
||||
|
||||
// mid smoothing
|
||||
assert( params->mid_smoothing>=0 );
|
||||
if( params->mid_smoothing )
|
||||
smooth_hog_gaussian( hog, params->mid_smoothing, nt );
|
||||
//hash_layers(hog)
|
||||
|
||||
// apply non-linearity
|
||||
assert( params->hog_sigmoid>=0 );
|
||||
if( params->hog_sigmoid ) {
|
||||
float_array hog_ravel = {hog->pixels,npix*hog->tz};
|
||||
sigmoid_array( &hog_ravel, params->hog_sigmoid, 0, nt);
|
||||
}
|
||||
//hash_layers(hog)
|
||||
|
||||
// final smoothing
|
||||
assert( params->post_smoothing>=0 );
|
||||
if( params->post_smoothing )
|
||||
smooth_hog_gaussian( hog, params->post_smoothing, nt );
|
||||
//hash_layers(hog)
|
||||
|
||||
// add ninth dimension and normalize per-pixel
|
||||
float* ninth_layer = hog->pixels + hog->tz*npix;
|
||||
for(int i=0; i<npix; i++)
|
||||
ninth_layer[i] = params->ninth_dim;
|
||||
hog->tz++;
|
||||
//hash_layers(hog)
|
||||
if( params->norm_pixels )
|
||||
norm_layers( hog, 1, nt );
|
||||
//hash_layers(hog);D(0)getchar();
|
||||
|
||||
return hog;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,43 @@
|
||||
/*
|
||||
Copyright (C) 2014 Jerome Revaud
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>
|
||||
*/
|
||||
#ifndef ___PIXEL_DESC_H___
|
||||
#define ___PIXEL_DESC_H___
|
||||
#include "image.h"
|
||||
#include "array_types.h"
|
||||
|
||||
|
||||
// pixel descriptor params
|
||||
typedef struct {
|
||||
float presmooth_sigma; // image pre-smoothing
|
||||
float mid_smoothing; // smoothing of oriented gradients (before sigmoid)
|
||||
float post_smoothing; // smoothing of oriented gradients (after sigmoid)
|
||||
float hog_sigmoid; // sigmoid strength
|
||||
float ninth_dim; // small constant for gradient-less area
|
||||
bool norm_pixels; // 1: normalize pixels separately / 0: normalize atomic patches
|
||||
|
||||
} desc_params_t;
|
||||
|
||||
|
||||
// set default params
|
||||
void set_default_desc_params( desc_params_t* params );
|
||||
|
||||
|
||||
/* extract pixel descriptors (pixel-wise HOG)
|
||||
*/
|
||||
float_layers* extract_desc( image_t* _img, const desc_params_t* params, int nt );
|
||||
|
||||
#endif
|
@ -0,0 +1,116 @@
|
||||
import sys, Image
|
||||
from numpy import *
|
||||
import scipy.ndimage
|
||||
|
||||
def score_from_autocorr(img0, img1, corres):
|
||||
# Code by Philippe Weinzaepfel
|
||||
# Compute autocorrelation
|
||||
# parameters
|
||||
sigma_image = 0.8 # for the gaussian filter applied to images before computing derivatives
|
||||
sigma_matrix = 3.0 # for the integration gaussian filter
|
||||
derivfilter = array([-0.5,0,0.5]) # function to compute the derivatives
|
||||
# smooth_images
|
||||
tmp = scipy.ndimage.filters.gaussian_filter1d(img0.astype(float32), sigma_image, axis=0, order=0, mode='nearest')
|
||||
img0_smooth = scipy.ndimage.filters.gaussian_filter1d(tmp, sigma_image, axis=1, order=0, mode='nearest')
|
||||
# compute the derivatives
|
||||
img0_dx = scipy.ndimage.filters.convolve1d(img0_smooth, derivfilter, axis=0, mode='nearest')
|
||||
img0_dy = scipy.ndimage.filters.convolve1d(img0_smooth, derivfilter, axis=1, mode='nearest')
|
||||
# compute the auto correlation matrix
|
||||
dx2 = sum(img0_dx*img0_dx,axis=2)
|
||||
dxy = sum(img0_dx*img0_dy,axis=2)
|
||||
dy2 = sum(img0_dy*img0_dy,axis=2)
|
||||
# integrate it
|
||||
tmp = scipy.ndimage.filters.gaussian_filter1d(dx2, sigma_matrix, axis=0, order=0, mode='nearest')
|
||||
dx2_smooth = scipy.ndimage.filters.gaussian_filter1d(tmp, sigma_matrix, axis=1, order=0, mode='nearest')
|
||||
tmp = scipy.ndimage.filters.gaussian_filter1d(dxy, sigma_matrix, axis=0, order=0, mode='nearest')
|
||||
dxy_smooth = scipy.ndimage.filters.gaussian_filter1d(tmp, sigma_matrix, axis=1, order=0, mode='nearest')
|
||||
tmp = scipy.ndimage.filters.gaussian_filter1d(dy2, sigma_matrix, axis=0, order=0, mode='nearest')
|
||||
dy2_smooth = scipy.ndimage.filters.gaussian_filter1d(tmp, sigma_matrix, axis=1, order=0, mode='nearest')
|
||||
# compute minimal eigenvalues: it is done by computing (dx2+dy2)/2 - sqrt( ((dx2+dy2)/2)^2 + (dxy)^2 - dx^2*dy^2)
|
||||
tmp = 0.5*(dx2_smooth+dy2_smooth)
|
||||
small_eigen = tmp - sqrt( maximum(0,tmp*tmp + dxy_smooth*dxy_smooth - dx2_smooth*dy2_smooth)) # the numbers can be negative in practice due to rounding errors
|
||||
large_eigen = tmp + sqrt( maximum(0,tmp*tmp + dxy_smooth*dxy_smooth - dx2_smooth*dy2_smooth))
|
||||
# Compute weight as flow score: preparing variable
|
||||
#parameters
|
||||
sigma_image = 0.8 # gaussian applied to images
|
||||
derivfilter = array([1.0,-8.0,0.0,8.0,-1.0])/12.0 # filter to compute the derivatives
|
||||
sigma_score = 50.0 # gaussian to convert dist to score
|
||||
mul_coef = 10.0 # multiplicative coefficients
|
||||
# smooth images
|
||||
tmp = scipy.ndimage.filters.gaussian_filter1d(img0.astype(float32), sigma_image, axis=0, order=0, mode='nearest')
|
||||
img0_smooth = scipy.ndimage.filters.gaussian_filter1d(tmp, sigma_image, axis=1, order=0, mode='nearest')
|
||||
tmp = scipy.ndimage.filters.gaussian_filter1d(img1.astype(float32), sigma_image, axis=0, order=0, mode='nearest')
|
||||
img1_smooth = scipy.ndimage.filters.gaussian_filter1d(tmp, sigma_image, axis=1, order=0, mode='nearest')
|
||||
# compute derivatives
|
||||
img0_dx = scipy.ndimage.filters.convolve1d(img0_smooth, derivfilter, axis=0, mode='nearest')
|
||||
img0_dy = scipy.ndimage.filters.convolve1d(img0_smooth, derivfilter, axis=1, mode='nearest')
|
||||
img1_dx = scipy.ndimage.filters.convolve1d(img1_smooth, derivfilter, axis=0, mode='nearest')
|
||||
img1_dy = scipy.ndimage.filters.convolve1d(img1_smooth, derivfilter, axis=1, mode='nearest')
|
||||
# compute it
|
||||
res = []
|
||||
for pos0, pos1, score in corres:
|
||||
p0, p1 = tuple(pos0)[::-1], tuple(pos1)[::-1] # numpy coordinates
|
||||
dist = sum( abs(img0_smooth[p0]-img1_smooth[p1]) + abs(img0_dx[p0]-img1_dx[p1]) + abs(img0_dy[p0]-img1_dy[p1]) )
|
||||
score = mul_coef * sqrt( max(0,small_eigen[p0])) / (sigma_score*sqrt(2*pi))*exp(-0.5*square(dist/sigma_score));
|
||||
res.append((pos0,pos1,score))
|
||||
return res
|
||||
|
||||
|
||||
if __name__=='__main__':
|
||||
args = sys.argv[1:]
|
||||
img0 = array(Image.open(args[0]).convert('RGB'))
|
||||
img1 = array(Image.open(args[1]).convert('RGB'))
|
||||
out = open(args[2]) if len(args)>=3 else sys.stdout
|
||||
|
||||
ty0, tx0 = img0.shape[:2]
|
||||
ty1, tx1 = img1.shape[:2]
|
||||
rint = lambda s: int(0.5+float(s))
|
||||
|
||||
retained_matches = []
|
||||
for line in sys.stdin:
|
||||
line = line.split()
|
||||
if not line or len(line)!=6 or not line[0][0].isdigit(): continue
|
||||
x0, y0, x1, y1, score, index = line
|
||||
retained_matches.append(((min(tx0-1,rint(x0)),min(ty0-1,rint(y0))),
|
||||
(min(tx1-1,rint(x1)),min(ty1-1,rint(y1))),0))
|
||||
|
||||
assert retained_matches, 'error: no matches piped to this program'
|
||||
|
||||
for p0, p1, score in score_from_autocorr(img0, img1, retained_matches):
|
||||
print >>out, '%d %d %d %d %f' %(p0[0],p0[1],p1[0],p1[1],score)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,17 @@
|
||||
#include "std.h"
|
||||
#include <stdarg.h>
|
||||
#include "stdio.h"
|
||||
|
||||
void std_printf(const char* format, ... ) {
|
||||
va_list arglist;
|
||||
va_start( arglist, format );
|
||||
vprintf( format, arglist );
|
||||
va_end(arglist);
|
||||
}
|
||||
|
||||
void err_printf(const char* format, ... ) {
|
||||
va_list arglist;
|
||||
va_start( arglist, format );
|
||||
vfprintf( stderr, format, arglist );
|
||||
va_end(arglist);
|
||||
}
|
@ -0,0 +1,132 @@
|
||||
/*
|
||||
Copyright (C) 2014 Jerome Revaud
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>
|
||||
*/
|
||||
#ifndef ___STD_H___
|
||||
#define ___STD_H___
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
//#include <time.h>
|
||||
|
||||
#define MIN(a,b) (((a)<(b)) ? (a) : (b))
|
||||
#define MAX(a,b) (((a)>(b)) ? (a) : (b))
|
||||
#define SWAP(a,b,type) {type _t = a; a = b; b = _t;}
|
||||
#define between(min,val,max) (min<=val && val<=max)
|
||||
|
||||
#define NEWA(type,n) (type*)malloc(sizeof(type)*long(n))
|
||||
#define NEWAC(type,n) (type*)calloc(sizeof(type),(n))
|
||||
#define NEW(type) NEWA(type,1)
|
||||
#define REALLOC(ptr,type,n) ptr = (type*)realloc(ptr, sizeof(type)*long(n))
|
||||
|
||||
/* debugging macros */
|
||||
#define P(x) printf(#x " = %g\n",(double)(x));
|
||||
#define D(x) P(x)
|
||||
#define DA(x,nb) {int _iter; printf(#x " = {"); for(_iter=0; _iter<nb; _iter++) printf("%g,",(double)((x)[_iter])); puts("}");}
|
||||
#define ASSERT(test,msg,p1) if(!(test)){fprintf(stderr," ---\n " msg "\n ---\n",p1); assert(0);}
|
||||
#define EXIT(msg,p1) ASSERT(1,msg,p1)
|
||||
|
||||
static inline void memswap( void* a, void* b, unsigned int nbytes ) {
|
||||
while(nbytes>=sizeof(double)) {
|
||||
double tmp = *(double*)a;
|
||||
*((double*&)a)++ = *(double*)b;
|
||||
*((double*&)b)++ = tmp;
|
||||
nbytes -= sizeof(double);
|
||||
}
|
||||
while(nbytes) {
|
||||
char tmp = *(char*)a;
|
||||
*((char*&)a)++ = *(char*)b;
|
||||
*((char*&)b)++ = tmp;
|
||||
nbytes--;
|
||||
}
|
||||
}
|
||||
|
||||
static inline float pow2( float f ) {
|
||||
return f*f;
|
||||
}
|
||||
static inline bool ispowerof2( long n ) {
|
||||
return (n & (n-1))==0;
|
||||
}
|
||||
|
||||
const double INF = 1.0/0.0;
|
||||
const double NaN = 0.0/0.0;
|
||||
const int INT_MIN = 0x80000000;
|
||||
const int INT_MAX = 0x7FFFFFFF;
|
||||
const float FLOAT_MIN = -1e39; // converted to -inf
|
||||
const float FLOAT_MAX = +1e39; // converted to +inf
|
||||
|
||||
|
||||
inline float min_array_f(const float* a, int n) {
|
||||
int i=n;
|
||||
float res = FLOAT_MAX;
|
||||
while(i--) if(a[i]<res) res=a[i];
|
||||
return res;
|
||||
}
|
||||
|
||||
inline float max_array_f(const float* a, int n) {
|
||||
int i=n;
|
||||
float res = FLOAT_MIN;
|
||||
while(i--) if(a[i]>res) res=a[i];
|
||||
return res;
|
||||
}
|
||||
|
||||
// override printf because matlab can't use it as such
|
||||
void std_printf(const char* fmt, ... );
|
||||
void err_printf(const char* fmt, ... );
|
||||
|
||||
|
||||
//#include <sys/time.h>
|
||||
//inline double now()
|
||||
//{
|
||||
// struct timeval tv;
|
||||
// gettimeofday (&tv,NULL);
|
||||
// return (tv.tv_sec*1e3 +tv.tv_usec*1e-3)/1000;
|
||||
//}
|
||||
//#define tic {double t = now();
|
||||
//#define toc t=now()-t; printf("elapsed time = %g ms\n",1000*t);}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,117 @@
|
||||
import sys
|
||||
from PIL import Image
|
||||
from numpy import *
|
||||
from matplotlib.pyplot import *
|
||||
|
||||
|
||||
def show_correspondences( img0, img1, corr ):
|
||||
assert corr.shape[-1]==6
|
||||
corr = corr[corr[:,4]>0,:]
|
||||
|
||||
# make beautiful colors
|
||||
center = corr[:,[1,0]].mean(axis=0) # array(img0.shape[:2])/2 #
|
||||
corr[:,5] = arctan2(*(corr[:,[1,0]] - center).T)
|
||||
corr[:,5] = int32(64*corr[:,5]/pi) % 128
|
||||
|
||||
set_max = set(corr[:,5])
|
||||
colors = {m:i for i,m in enumerate(set_max)}
|
||||
colors = {m:cm.hsv(i/float(len(colors))) for m,i in colors.items()}
|
||||
|
||||
def motion_notify_callback(event):
|
||||
if event.inaxes==None: return
|
||||
numaxis = event.inaxes.numaxis
|
||||
if numaxis<0: return
|
||||
x,y = event.xdata, event.ydata
|
||||
ax1.lines = []
|
||||
ax2.lines = []
|
||||
n = sum((corr[:,2*numaxis:2*(numaxis+1)] - [x,y])**2,1).argmin() # find nearest point
|
||||
x,y = corr[n,0:2]
|
||||
ax1.plot(x,y,'+',ms=10,mew=2,color='blue',scalex=False,scaley=False)
|
||||
x,y = corr[n,2:4]
|
||||
ax2.plot(x,y,'+',ms=10,mew=2,color='red',scalex=False,scaley=False)
|
||||
# we redraw only the concerned axes
|
||||
renderer = fig.canvas.get_renderer()
|
||||
ax1.draw(renderer)
|
||||
ax2.draw(renderer)
|
||||
fig.canvas.blit(ax1.bbox)
|
||||
fig.canvas.blit(ax2.bbox)
|
||||
|
||||
def noticks():
|
||||
xticks([])
|
||||
yticks([])
|
||||
clf()
|
||||
ax1 = subplot(221)
|
||||
ax1.numaxis = 0
|
||||
imshow(img0,interpolation='nearest')
|
||||
noticks()
|
||||
ax2 = subplot(222)
|
||||
ax2.numaxis = 1
|
||||
imshow(img1,interpolation='nearest')
|
||||
noticks()
|
||||
|
||||
ax = subplot(223)
|
||||
ax.numaxis = -1
|
||||
imshow(img0,interpolation='nearest')
|
||||
for m in set_max:
|
||||
plot(corr[corr[:,5]==m,0],corr[corr[:,5]==m,1],'+',ms=10,mew=2,color=colors[m],scalex=0,scaley=0)
|
||||
noticks()
|
||||
|
||||
ax = subplot(224)
|
||||
ax.numaxis = -1
|
||||
imshow(img1,interpolation='nearest')
|
||||
for m in set_max:
|
||||
plot(corr[corr[:,5]==m,2],corr[corr[:,5]==m,3],'+',ms=10,mew=2,color=colors[m],scalex=0,scaley=0)
|
||||
noticks()
|
||||
show()
|
||||
subplots_adjust(left=0.01, bottom=0.01, right=0.99, top=0.99,
|
||||
wspace=0.02, hspace=0.02)
|
||||
|
||||
fig = get_current_fig_manager().canvas.figure
|
||||
cid_move = fig.canvas.mpl_connect('motion_notify_event',motion_notify_callback)
|
||||
|
||||
show()
|
||||
fig.canvas.mpl_disconnect(cid_move)
|
||||
|
||||
|
||||
|
||||
if __name__=='__main__':
|
||||
args = sys.argv[1:]
|
||||
img0 = array(Image.open(args[0]).convert('RGB'))
|
||||
img1 = array(Image.open(args[1]).convert('RGB'))
|
||||
|
||||
retained_matches = []
|
||||
for line in sys.stdin:
|
||||
line = line.split()
|
||||
if not line or len(line)!=6 or not line[0][0].isdigit(): continue
|
||||
x0, y0, x1, y1, score, index = line
|
||||
retained_matches.append((float(x0),float(y0),float(x1),float(y1),float(score),float(index)))
|
||||
|
||||
assert retained_matches, 'error: no matches piped to this program'
|
||||
show_correspondences(img0, img1, array(retained_matches))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,122 @@
|
||||
# coding: utf-8
|
||||
'''
|
||||
File: matching.py
|
||||
Project: AlphaPose
|
||||
File Created: Monday, 1st October 2018 12:53:12 pm
|
||||
Author: Yuliang Xiu (yuliangxiu@sjtu.edu.cn)
|
||||
Copyright 2018 - 2018 Shanghai Jiao Tong University, Machine Vision and Intelligence Group
|
||||
'''
|
||||
|
||||
|
||||
import os
|
||||
import cv2
|
||||
from tqdm import tqdm
|
||||
import numpy as np
|
||||
import time
|
||||
import argparse
|
||||
|
||||
def generate_fake_cor(img, out_path):
|
||||
print("Generate fake correspondence files...%s"%out_path)
|
||||
fd = open(out_path,"w")
|
||||
height, width, channels = img.shape
|
||||
|
||||
for x in range(width):
|
||||
for y in range(height):
|
||||
ret = fd.write("%d %d %d %d %f \n"%(x, y, x, y, 1.0))
|
||||
fd.close()
|
||||
|
||||
|
||||
def orb_matching(img1_path, img2_path, vidname, img1_id, img2_id):
|
||||
|
||||
out_path = "%s/%s_%s_orb.txt"%(vidname, img1_id, img2_id)
|
||||
# print(out_path)
|
||||
|
||||
img1 = cv2.cvtColor(cv2.imread(img1_path), cv2.COLOR_BGR2RGB)
|
||||
img2 = cv2.cvtColor(cv2.imread(img2_path), cv2.COLOR_BGR2RGB)
|
||||
|
||||
# Initiate ORB detector
|
||||
orb = cv2.ORB_create(nfeatures=10000, scoreType=cv2.ORB_FAST_SCORE)
|
||||
|
||||
# find the keypoints and descriptors with ORB
|
||||
kp1, des1 = orb.detectAndCompute(img1,None)
|
||||
kp2, des2 = orb.detectAndCompute(img2,None)
|
||||
|
||||
if len(kp1)*len(kp2) < 400:
|
||||
generate_fake_cor(img1, out_path)
|
||||
return
|
||||
|
||||
# FLANN parameters
|
||||
FLANN_INDEX_LSH = 6
|
||||
index_params= dict(algorithm = FLANN_INDEX_LSH,
|
||||
table_number = 12, # 12
|
||||
key_size = 12, # 20
|
||||
multi_probe_level = 2) #2
|
||||
|
||||
search_params = dict(checks=100) # or pass empty dictionary
|
||||
|
||||
flann = cv2.FlannBasedMatcher(index_params,search_params)
|
||||
|
||||
matches = flann.knnMatch(des1, des2, k=2)
|
||||
|
||||
# Open file
|
||||
fd = open(out_path,"w")
|
||||
|
||||
# ratio test as per Lowe's paper
|
||||
for i, m_n in enumerate(matches):
|
||||
if len(m_n) != 2:
|
||||
continue
|
||||
elif m_n[0].distance < 0.80*m_n[1].distance:
|
||||
ret = fd.write("%d %d %d %d %f \n"%(kp1[m_n[0].queryIdx].pt[0], kp1[m_n[0].queryIdx].pt[1], kp2[m_n[0].trainIdx].pt[0], kp2[m_n[0].trainIdx].pt[1], m_n[0].distance))
|
||||
|
||||
# Close opened file
|
||||
fd.close()
|
||||
|
||||
# print(os.stat(out_path).st_size)
|
||||
|
||||
if os.stat(out_path).st_size<1000:
|
||||
generate_fake_cor(img1, out_path)
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
parser = argparse.ArgumentParser(description='FoseFlow Matching')
|
||||
parser.add_argument('--orb', type=int, default=0)
|
||||
args = parser.parse_args()
|
||||
|
||||
image_dir = "posetrack_data/images"
|
||||
imgnames = []
|
||||
vidnames = []
|
||||
|
||||
for a,b,c in os.walk(image_dir):
|
||||
if len(a.split("/")) == 4:
|
||||
vidnames.append(a)
|
||||
|
||||
for vidname in tqdm(sorted(vidnames)):
|
||||
for a,b,c in os.walk(vidname):
|
||||
c=[item for item in c if "jpg" in item]
|
||||
imgnames = sorted(c)
|
||||
break
|
||||
for imgname in imgnames[:-1]:
|
||||
if 'crop' in imgname:
|
||||
continue
|
||||
img1 = os.path.join(vidname,imgname)
|
||||
len_name = len(imgname.split(".")[0])
|
||||
if len_name == 5:
|
||||
img2 = os.path.join(vidname,"%05d.jpg"%(int(imgname.split(".")[0])+1))
|
||||
else:
|
||||
img2 = os.path.join(vidname,"%08d.jpg"%(int(imgname.split(".")[0])+1))
|
||||
if not os.path.exists(img2):
|
||||
continue
|
||||
img1_id = img1.split(".")[0].split("/")[-1]
|
||||
img2_id = img2.split(".")[0].split("/")[-1]
|
||||
if args.orb:
|
||||
cor_file = "%s/%s_%s_orb.txt"%(vidname,img1_id,img2_id)
|
||||
else:
|
||||
cor_file = "%s/%s_%s.txt"%(vidname,img1_id,img2_id)
|
||||
if not os.path.exists(cor_file) or os.stat(cor_file).st_size<1000:
|
||||
if args.orb:
|
||||
# calc orb matching
|
||||
orb_matching(img1,img2,vidname,img1_id,img2_id)
|
||||
else:
|
||||
# calc deep matching
|
||||
cmd = "./deepmatching/deepmatching %s %s -nt 10 -downscale 3 -out %s/%s_%s.txt > cache"%(img1,img2,vidname,img1_id,img2_id)
|
||||
os.system(cmd)
|
After Width: | Height: | Size: 3.9 MiB |
After Width: | Height: | Size: 3.1 MiB |
@ -0,0 +1,9 @@
|
||||
numpy==1.14.5
|
||||
scipy==1.1.0
|
||||
opencv_python==3.4.2.16
|
||||
opencv_contrib_python==3.4.2.16
|
||||
matplotlib==2.2.2
|
||||
tqdm==4.23.4
|
||||
Image==1.5.25
|
||||
Pillow==5.3.0
|
||||
munkres==1.0.12
|
@ -0,0 +1,278 @@
|
||||
# coding: utf-8
|
||||
|
||||
'''
|
||||
File: tracker-baseline.py
|
||||
Project: AlphaPose
|
||||
File Created: Thursday, 1st March 2018 6:12:23 pm
|
||||
Author: Yuliang Xiu (yuliangxiu@sjtu.edu.cn)
|
||||
-----
|
||||
Last Modified: Monday, 1st October 2018 12:53:12 pm
|
||||
Modified By: Yuliang Xiu (yuliangxiu@sjtu.edu.cn>)
|
||||
-----
|
||||
Copyright 2018 - 2018 Shanghai Jiao Tong University, Machine Vision and Intelligence Group
|
||||
'''
|
||||
|
||||
import numpy as np
|
||||
import os
|
||||
import json
|
||||
import copy
|
||||
import heapq
|
||||
from munkres import Munkres, print_matrix
|
||||
from PIL import Image
|
||||
from tqdm import tqdm
|
||||
from utils import *
|
||||
from matching import orb_matching
|
||||
import argparse
|
||||
|
||||
|
||||
# posetrack dataset path
|
||||
image_dir = "./posetrack_data"
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
parser = argparse.ArgumentParser(description='FoseFlow Tracker')
|
||||
parser.add_argument('--link', type=int, default=100)
|
||||
parser.add_argument('--drop', type=float, default=2.0)
|
||||
parser.add_argument('--num', type=int, default=7)
|
||||
parser.add_argument('--mag', type=int, default=30)
|
||||
parser.add_argument('--match', type=float, default=0.2)
|
||||
parser.add_argument('--dataset', type=str, default='val')
|
||||
parser.add_argument('--orb', type=int, default=0)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# super parameters
|
||||
# 1. look-ahead LINK_LEN frames to find tracked human bbox
|
||||
# 2. bbox_IoU(deepmatching), bbox_IoU(general), pose_IoU(deepmatching), pose_IoU(general), box1_score, box2_score
|
||||
# 3. bbox_IoU(deepmatching), bbox_IoU(general), pose_IoU(deepmatching), pose_IoU(general), box1_score, box2_score(Non DeepMatching)
|
||||
# 4. drop low-score(<DROP) keypoints
|
||||
# 5. pick high-score(top NUM) keypoints when computing pose_IOU
|
||||
# 6. box width/height around keypoint for computing pose IoU
|
||||
# 7. match threshold in Hungarian Matching
|
||||
# 8. dataset = 'test' or 'val'
|
||||
# 9. use orb matching or not
|
||||
|
||||
link_len = args.link
|
||||
weights = [1,2,1,2,0,0]
|
||||
weights_fff = [0,1,0,1,0,0]
|
||||
drop = args.drop
|
||||
num = args.num
|
||||
mag = args.mag
|
||||
match_thres = args.match
|
||||
dataset = args.dataset
|
||||
use_orb = args.orb
|
||||
|
||||
anno_dir = "./posetrack_data/annotations/{}".format(dataset)
|
||||
notrack_json = "alpha-pose-results-{}.json".format(dataset)
|
||||
track_dir = "{}-predict".format(dataset) # results dir name
|
||||
|
||||
if not os.path.exists(track_dir):
|
||||
os.mkdir(track_dir)
|
||||
|
||||
track = {}
|
||||
cur_vname = ""
|
||||
num_persons = 0
|
||||
|
||||
# load json file without tracking information
|
||||
# Note: time is a little long, so it is better to uncomment the following save operation at first time
|
||||
with open(notrack_json,'r') as f:
|
||||
notrack = json.load(f)
|
||||
for imgpath in tqdm(sorted(notrack.keys())):
|
||||
|
||||
if 'crop' in imgpath:
|
||||
vname,fname = imgpath[:-18],imgpath[-17:]
|
||||
print(imgpath,vname,fname)
|
||||
continue
|
||||
|
||||
vname,fname = imgpath[:-13],imgpath[-12:]
|
||||
if vname != cur_vname:
|
||||
cur_vname = vname
|
||||
track[vname] = {}
|
||||
|
||||
track[vname][fname] = {'num_boxes':len(notrack[imgpath])}
|
||||
for bid in range(len(notrack[imgpath])):
|
||||
track[vname][fname][bid+1] = {}
|
||||
track[vname][fname][bid+1]['box_score'] = notrack[imgpath][bid]['score']
|
||||
track[vname][fname][bid+1]['box_pos'] = get_box(notrack[imgpath][bid]['keypoints'], os.path.join(image_dir,imgpath))
|
||||
track[vname][fname][bid+1]['box_pose_pos'] = np.array(notrack[imgpath][bid]['keypoints']).reshape(-1,3)[:,0:2]
|
||||
track[vname][fname][bid+1]['box_pose_score'] = np.array(notrack[imgpath][bid]['keypoints']).reshape(-1,3)[:,-1]
|
||||
|
||||
np.save('notrack-{}.npy'.format(dataset),track)
|
||||
track = np.load('notrack-{}.npy'.format(dataset)).item()
|
||||
|
||||
# tracking process
|
||||
for video_name in tqdm(track.keys()):
|
||||
|
||||
max_pid_id = 0
|
||||
frame_list = sorted(list(track[video_name].keys()))
|
||||
|
||||
for idx, frame_name in enumerate(frame_list[:-1]):
|
||||
frame_new_pids = []
|
||||
frame_id = frame_name.split(".")[0]
|
||||
|
||||
next_frame_name = frame_list[idx+1]
|
||||
next_frame_id = next_frame_name.split(".")[0]
|
||||
|
||||
# deal with image file whose name ended with '__crop'
|
||||
if 'crop' in next_frame_name:
|
||||
track[video_name][next_frame_name] = copy.deepcopy(track[video_name][frame_name])
|
||||
continue
|
||||
|
||||
# init tracking info of the first frame in one video
|
||||
if idx == 0:
|
||||
for pid in range(1, track[video_name][frame_name]['num_boxes']+1):
|
||||
track[video_name][frame_name][pid]['new_pid'] = pid
|
||||
track[video_name][frame_name][pid]['match_score'] = 0
|
||||
|
||||
max_pid_id = max(max_pid_id, track[video_name][frame_name]['num_boxes'])
|
||||
if use_orb:
|
||||
cor_file = os.path.join(image_dir, video_name, "".join([frame_id, '_', next_frame_id, '_orb.txt']))
|
||||
else:
|
||||
cor_file = os.path.join(image_dir, video_name, "".join([frame_id, '_', next_frame_id, '.txt']))
|
||||
|
||||
# regenerate the missed pair-matching txt
|
||||
if not os.path.exists(cor_file) or os.stat(cor_file).st_size<200:
|
||||
|
||||
dm = "/home/yuliang/code/PoseTrack-CVPR2017/external/deepmatching/deepmatching"
|
||||
img1_path = os.path.join(image_dir,video_name,frame_name)
|
||||
img2_path = os.path.join(image_dir,video_name,next_frame_name)
|
||||
|
||||
if use_orb:
|
||||
orb_matching(img1_path,img2_path, os.path.join(image_dir, video_name), frame_id, next_frame_id)
|
||||
else:
|
||||
cmd = "%s %s %s -nt 20 -downscale 2 -out %s"%(dm,img1_path,img2_path,cor_file)
|
||||
os.system(cmd)
|
||||
|
||||
all_cors = np.loadtxt(cor_file)
|
||||
|
||||
# if there is no people in this frame, then copy the info from former frame
|
||||
if track[video_name][next_frame_name]['num_boxes'] == 0:
|
||||
track[video_name][next_frame_name] = copy.deepcopy(track[video_name][frame_name])
|
||||
continue
|
||||
cur_all_pids, cur_all_pids_fff = stack_all_pids(track[video_name], frame_list[:-1], idx, max_pid_id, link_len)
|
||||
match_indexes, match_scores = best_matching_hungarian(
|
||||
all_cors, cur_all_pids, cur_all_pids_fff, track[video_name][next_frame_name], weights, weights_fff, num, mag)
|
||||
|
||||
for pid1, pid2 in match_indexes:
|
||||
if match_scores[pid1][pid2] > match_thres:
|
||||
track[video_name][next_frame_name][pid2+1]['new_pid'] = cur_all_pids[pid1]['new_pid']
|
||||
max_pid_id = max(max_pid_id, track[video_name][next_frame_name][pid2+1]['new_pid'])
|
||||
track[video_name][next_frame_name][pid2+1]['match_score'] = match_scores[pid1][pid2]
|
||||
|
||||
# add the untracked new person
|
||||
for next_pid in range(1, track[video_name][next_frame_name]['num_boxes'] + 1):
|
||||
if 'new_pid' not in track[video_name][next_frame_name][next_pid]:
|
||||
max_pid_id += 1
|
||||
track[video_name][next_frame_name][next_pid]['new_pid'] = max_pid_id
|
||||
track[video_name][next_frame_name][next_pid]['match_score'] = 0
|
||||
|
||||
# deal with unconsecutive frames caused by this fucking terrible dataset
|
||||
gap = int(next_frame_id)-int(frame_id)
|
||||
if gap>1:
|
||||
for i in range(gap):
|
||||
if i>0:
|
||||
new_frame_name = "%08d.jpg"%(int(frame_id)+i)
|
||||
track[video_name][new_frame_name] = copy.deepcopy(track[video_name][frame_name])
|
||||
|
||||
rmpe_part_ids = [0, 1, 2, 3, 4, 5, 10, 11, 12, 13, 14, 15, 8, 9]
|
||||
|
||||
for video_name in tqdm(track.keys()):
|
||||
num_persons = 0
|
||||
frame_list = sorted(list(track[video_name].keys()))
|
||||
for fid, frame_name in enumerate(frame_list):
|
||||
for pid in range(1, track[video_name][frame_name]['num_boxes']+1):
|
||||
new_score = copy.deepcopy(track[video_name][frame_name][pid]['box_pose_score'])
|
||||
new_pose = copy.deepcopy(track[video_name][frame_name][pid]['box_pose_pos'])
|
||||
track[video_name][frame_name][pid]['box_pose_score'] = new_score[rmpe_part_ids]
|
||||
track[video_name][frame_name][pid]['box_pose_pos'] = new_pose[rmpe_part_ids,:]
|
||||
num_persons = max(num_persons, track[video_name][frame_name][pid]['new_pid'])
|
||||
track[video_name]['num_persons'] = num_persons
|
||||
|
||||
np.save('track-{}.npy'.format(dataset),track)
|
||||
track = np.load('track-{}.npy'.format(dataset)).item()
|
||||
|
||||
for a,b,c in os.walk(anno_dir):
|
||||
val_jsons = [item for item in c if 'json' in item]
|
||||
break
|
||||
|
||||
# export tracking result into json files
|
||||
for video_name in tqdm(track.keys()):
|
||||
if dataset == 'val':
|
||||
name = [item for item in val_jsons if video_name.split("/")[-1] in item]
|
||||
if len(name) == 0:
|
||||
name = [item for item in val_jsons if video_name.split("/")[-1][1:] in item]
|
||||
name = name[0]
|
||||
else:
|
||||
# FUCK the dirty PoseTrack dataset
|
||||
name = [item for item in val_jsons if video_name.split("/")[-1].split("_")[0] == item.split("_")[0]]
|
||||
if video_name.split("/")[-1].split("_")[0] == "000044":
|
||||
if video_name.split("/")[-2]=='mpii_5sec':
|
||||
name = ["00044_mpii_step1_relpath_5sec_testsub.json"]
|
||||
elif video_name.split("/")[-2]=='bonn_5sec':
|
||||
name = ["000044_mpii_relpath_5sec_testsub.json"]
|
||||
|
||||
if video_name.split("/")[-1].split("_")[0] == "002279":
|
||||
if video_name.split("/")[-2]=='mpii_5sec':
|
||||
name = ["02279_mpii_step2_relpath_5sec_testsub.json"]
|
||||
elif video_name.split("/")[-2]=='bonn_mpii_test_v2_5sec':
|
||||
name = ["02279_mpii_relpath_5sec_testsub.json"]
|
||||
|
||||
if video_name.split("/")[-1].split("_")[0] == "019980":
|
||||
if video_name.split("/")[-2]=='bonn_5sec':
|
||||
name = ["019980_mpii_relpath_5sec_testsub.json"]
|
||||
elif video_name.split("/")[-2]=='mpii_5sec':
|
||||
name = ["19980_mpii_step1_relpath_5sec_testsub.json"]
|
||||
|
||||
if video_name.split("/")[-1].split("_")[0] == "09611":
|
||||
name = ["09611_mpii_relpath_5sec_testsub.json"]
|
||||
if video_name.split("/")[-1].split("_")[0] == "009611":
|
||||
name = ["09611_mpii_step2_relpath_5sec_testsub.json"]
|
||||
|
||||
if video_name.split("/")[-1].split("_")[0][:-1] == '00000':
|
||||
name = [item for item in val_jsons if video_name.split("/")[-1].split("_")[0][1:] == item.split("_")[0]]
|
||||
if len(name)==0:
|
||||
name = [item for item in val_jsons if video_name.split("/")[-1].split("_")[0][1:] == item.split("_")[0]]
|
||||
name = name[0]
|
||||
|
||||
final = {'annolist':[]}
|
||||
frame_list = list(track[video_name].keys())
|
||||
frame_list.remove('num_persons')
|
||||
frame_list = sorted(frame_list)
|
||||
|
||||
with open(os.path.join(anno_dir,name)) as f:
|
||||
annot = json.load(f)
|
||||
|
||||
imgs = []
|
||||
for img in annot['annolist']:
|
||||
imgs.append(img['image'][0]['name'])
|
||||
|
||||
for fid, frame_name in enumerate(frame_list):
|
||||
if os.path.join(video_name,frame_name) not in imgs:
|
||||
continue
|
||||
final['annolist'].append({"image":[{"name":os.path.join(video_name,frame_name)}],"annorect":[]})
|
||||
for pid in range(1, track[video_name][frame_name]['num_boxes']+1):
|
||||
pid_info = track[video_name][frame_name][pid]
|
||||
box_pos = pid_info['box_pos']
|
||||
box_score = pid_info['box_score']
|
||||
pose_pos = pid_info['box_pose_pos']
|
||||
pose_score = pid_info['box_pose_score']
|
||||
pose_pos = add_nose(pose_pos)
|
||||
pose_score = add_nose(pose_score)
|
||||
new_pid = pid_info['new_pid']
|
||||
|
||||
point_struct = []
|
||||
for idx,pose in enumerate(pose_pos):
|
||||
if pose_score[idx]>drop:
|
||||
point_struct.append({"id":[idx],"x":[pose[0]],"y":[pose[1]],"score":[pose_score[idx]]})
|
||||
final['annolist'][fid]['annorect'].append({"x1":[box_pos[0]],\
|
||||
"x2":[box_pos[1]],\
|
||||
"y1":[box_pos[2]],\
|
||||
"y2":[box_pos[3]],\
|
||||
"score":[box_score],\
|
||||
"track_id":[new_pid-1],\
|
||||
"annopoints":[{"point":point_struct}]})
|
||||
|
||||
for rest_name in enumerate(remove_list(imgs,video_name,frame_list)):
|
||||
final['annolist'].append({"image":[{"name":rest_name}],"annorect":[]})
|
||||
with open("%s/%s"%(track_dir,name),'w') as json_file:
|
||||
json_file.write(json.dumps(final))
|
@ -0,0 +1,226 @@
|
||||
# coding: utf-8
|
||||
|
||||
'''
|
||||
File: tracker-general.py
|
||||
Project: AlphaPose
|
||||
File Created: Tuesday, 18st Dec 2018 14:55:41 pm
|
||||
-----
|
||||
Last Modified: Thursday, 20st Dec 2018 23:24:47 pm
|
||||
Modified By: Yuliang Xiu (yuliangxiu@sjtu.edu.cn>)
|
||||
-----
|
||||
Author: Yuliang Xiu (yuliangxiu@sjtu.edu.cn)
|
||||
Copyright 2018 - 2018 Shanghai Jiao Tong University, Machine Vision and Intelligence Group
|
||||
'''
|
||||
|
||||
import numpy as np
|
||||
import os
|
||||
import json
|
||||
import copy
|
||||
import heapq
|
||||
from munkres import Munkres, print_matrix
|
||||
from PIL import Image
|
||||
import matplotlib.pyplot as plt
|
||||
from tqdm import tqdm
|
||||
from utils import *
|
||||
from matching import orb_matching
|
||||
import argparse
|
||||
|
||||
# visualization
|
||||
def display_pose(imgdir, visdir, tracked, cmap):
|
||||
|
||||
print("Start visualization...\n")
|
||||
for imgname in tqdm(tracked.keys()):
|
||||
img = Image.open(os.path.join(imgdir,imgname))
|
||||
width, height = img.size
|
||||
fig = plt.figure(figsize=(width/10,height/10),dpi=10)
|
||||
plt.imshow(img)
|
||||
for pid in range(len(tracked[imgname])):
|
||||
pose = np.array(tracked[imgname][pid]['keypoints']).reshape(-1,3)[:,:3]
|
||||
tracked_id = tracked[imgname][pid]['idx']
|
||||
|
||||
# keypoint scores of torch version and pytorch version are different
|
||||
if np.mean(pose[:,2]) <1 :
|
||||
alpha_ratio = 1.0
|
||||
else:
|
||||
alpha_ratio = 5.0
|
||||
|
||||
if pose.shape[0] == 16:
|
||||
mpii_part_names = ['RAnkle','RKnee','RHip','LHip','LKnee','LAnkle','Pelv','Thrx','Neck','Head','RWrist','RElbow','RShoulder','LShoulder','LElbow','LWrist']
|
||||
colors = ['m', 'b', 'b', 'r', 'r', 'b', 'b', 'r', 'r', 'm', 'm', 'm', 'r', 'r','b','b']
|
||||
pairs = [[8,9],[11,12],[11,10],[2,1],[1,0],[13,14],[14,15],[3,4],[4,5],[8,7],[7,6],[6,2],[6,3],[8,12],[8,13]]
|
||||
for idx_c, color in enumerate(colors):
|
||||
plt.plot(np.clip(pose[idx_c,0],0,width), np.clip(pose[idx_c,1],0,height), marker='o',
|
||||
color=color, ms=80/alpha_ratio*np.mean(pose[idx_c,2]), markerfacecolor=(1, 1, 0, 0.7/alpha_ratio*pose[idx_c,2]))
|
||||
for idx in range(len(pairs)):
|
||||
plt.plot(np.clip(pose[pairs[idx],0],0,width),np.clip(pose[pairs[idx],1],0,height), 'r-',
|
||||
color=cmap(tracked_id), linewidth=60/alpha_ratio*np.mean(pose[pairs[idx],2]), alpha=0.6/alpha_ratio*np.mean(pose[pairs[idx],2]))
|
||||
elif pose.shape[0] == 17:
|
||||
coco_part_names = ['Nose','LEye','REye','LEar','REar','LShoulder','RShoulder','LElbow','RElbow','LWrist','RWrist','LHip','RHip','LKnee','RKnee','LAnkle','RAnkle']
|
||||
colors = ['r', 'r', 'r', 'r', 'r', 'y', 'y', 'y', 'y', 'y', 'y', 'g', 'g', 'g','g','g','g']
|
||||
pairs = [[0,1],[0,2],[1,3],[2,4],[5,6],[5,7],[7,9],[6,8],[8,10],[11,12],[11,13],[13,15],[12,14],[14,16],[6,12],[5,11]]
|
||||
for idx_c, color in enumerate(colors):
|
||||
plt.plot(np.clip(pose[idx_c,0],0,width), np.clip(pose[idx_c,1],0,height), marker='o',
|
||||
color=color, ms=80/alpha_ratio*np.mean(pose[idx_c,2]), markerfacecolor=(1, 1, 0, 0.7/alpha_ratio*pose[idx_c,2]))
|
||||
for idx in range(len(pairs)):
|
||||
plt.plot(np.clip(pose[pairs[idx],0],0,width),np.clip(pose[pairs[idx],1],0,height),'r-',
|
||||
color=cmap(tracked_id), linewidth=60/alpha_ratio*np.mean(pose[pairs[idx],2]), alpha=0.6/alpha_ratio*np.mean(pose[pairs[idx],2]))
|
||||
plt.axis('off')
|
||||
ax = plt.gca()
|
||||
ax.set_xlim([0,width])
|
||||
ax.set_ylim([height,0])
|
||||
extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
|
||||
if not os.path.exists(visdir):
|
||||
os.mkdir(visdir)
|
||||
fig.savefig(os.path.join(visdir,imgname.split()[0]+".png"), pad_inches = 0.0, bbox_inches=extent, dpi=13)
|
||||
plt.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
parser = argparse.ArgumentParser(description='FoseFlow Tracker')
|
||||
parser.add_argument('--imgdir', type=str, required=True, help="Must input the images dir")
|
||||
parser.add_argument('--in_json', type=str, required=True, help="result json predicted by AlphaPose")
|
||||
parser.add_argument('--out_json', type=str, required=True, help="output path of tracked json")
|
||||
parser.add_argument('--visdir', type=str, default="", help="visulization tracked results of video sequences")
|
||||
|
||||
parser.add_argument('--link', type=int, default=100)
|
||||
parser.add_argument('--drop', type=float, default=2.0)
|
||||
parser.add_argument('--num', type=int, default=7)
|
||||
parser.add_argument('--mag', type=int, default=30)
|
||||
parser.add_argument('--match', type=float, default=0.2)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# super parameters
|
||||
# 1. look-ahead LINK_LEN frames to find tracked human bbox
|
||||
# 2. bbox_IoU(deepmatching), bbox_IoU(general), pose_IoU(deepmatching), pose_IoU(general), box1_score, box2_score
|
||||
# 3. bbox_IoU(deepmatching), bbox_IoU(general), pose_IoU(deepmatching), pose_IoU(general), box1_score, box2_score(Non DeepMatching)
|
||||
# 4. drop low-score(<DROP) keypoints
|
||||
# 5. pick high-score(top NUM) keypoints when computing pose_IOU
|
||||
# 6. box width/height around keypoint for computing pose IoU
|
||||
# 7. match threshold in Hungarian Matching
|
||||
|
||||
link_len = args.link
|
||||
weights = [1,2,1,2,0,0]
|
||||
weights_fff = [0,1,0,1,0,0]
|
||||
drop = args.drop
|
||||
num = args.num
|
||||
mag = args.mag
|
||||
match_thres = args.match
|
||||
|
||||
notrack_json = args.in_json
|
||||
tracked_json = args.out_json
|
||||
image_dir = args.imgdir
|
||||
vis_dir = args.visdir
|
||||
|
||||
# if json format is differnt from "alphapose-forvis.json" (pytorch version)
|
||||
if "forvis" not in notrack_json:
|
||||
results_forvis = {}
|
||||
last_image_name = ' '
|
||||
|
||||
with open(notrack_json) as f:
|
||||
results = json.load(f)
|
||||
for i in xrange(len(results)):
|
||||
imgpath = results[i]['image_id']
|
||||
if last_image_name != imgpath:
|
||||
results_forvis[imgpath] = []
|
||||
results_forvis[imgpath].append({'keypoints':results[i]['keypoints'],'scores':results[i]['score']})
|
||||
else:
|
||||
results_forvis[imgpath].append({'keypoints':results[i]['keypoints'],'scores':results[i]['score']})
|
||||
last_image_name = imgpath
|
||||
notrack_json = os.path.join(os.path.dirname(notrack_json), "alphapose-results-forvis.json")
|
||||
with open(notrack_json,'w') as json_file:
|
||||
json_file.write(json.dumps(results_forvis))
|
||||
|
||||
notrack = {}
|
||||
track = {}
|
||||
num_persons = 0
|
||||
|
||||
# load json file without tracking information
|
||||
print("Start loading json file...\n")
|
||||
with open(notrack_json,'r') as f:
|
||||
notrack = json.load(f)
|
||||
for img_name in tqdm(sorted(notrack.keys())):
|
||||
track[img_name] = {'num_boxes':len(notrack[img_name])}
|
||||
for bid in range(len(notrack[img_name])):
|
||||
track[img_name][bid+1] = {}
|
||||
track[img_name][bid+1]['box_score'] = notrack[img_name][bid]['scores']
|
||||
track[img_name][bid+1]['box_pos'] = get_box(notrack[img_name][bid]['keypoints'], os.path.join(image_dir,img_name))
|
||||
track[img_name][bid+1]['box_pose_pos'] = np.array(notrack[img_name][bid]['keypoints']).reshape(-1,3)[:,0:2]
|
||||
track[img_name][bid+1]['box_pose_score'] = np.array(notrack[img_name][bid]['keypoints']).reshape(-1,3)[:,-1]
|
||||
|
||||
np.save('notrack-bl.npy',track)
|
||||
# track = np.load('notrack-bl.npy').item()
|
||||
|
||||
# tracking process
|
||||
max_pid_id = 0
|
||||
frame_list = sorted(list(track.keys()))
|
||||
|
||||
print("Start pose tracking...\n")
|
||||
for idx, frame_name in enumerate(tqdm(frame_list[:-1])):
|
||||
frame_new_pids = []
|
||||
frame_id = frame_name.split(".")[0]
|
||||
|
||||
next_frame_name = frame_list[idx+1]
|
||||
next_frame_id = next_frame_name.split(".")[0]
|
||||
|
||||
# init tracking info of the first frame in one video
|
||||
if idx == 0:
|
||||
for pid in range(1, track[frame_name]['num_boxes']+1):
|
||||
track[frame_name][pid]['new_pid'] = pid
|
||||
track[frame_name][pid]['match_score'] = 0
|
||||
|
||||
max_pid_id = max(max_pid_id, track[frame_name]['num_boxes'])
|
||||
cor_file = os.path.join(image_dir, "".join([frame_id, '_', next_frame_id, '_orb.txt']))
|
||||
|
||||
# regenerate the missed pair-matching txt
|
||||
if not os.path.exists(cor_file) or os.stat(cor_file).st_size<200:
|
||||
img1_path = os.path.join(image_dir, frame_name)
|
||||
img2_path = os.path.join(image_dir, next_frame_name)
|
||||
orb_matching(img1_path,img2_path, image_dir, frame_id, next_frame_id)
|
||||
|
||||
all_cors = np.loadtxt(cor_file)
|
||||
|
||||
# if there is no people in this frame, then copy the info from former frame
|
||||
if track[next_frame_name]['num_boxes'] == 0:
|
||||
track[next_frame_name] = copy.deepcopy(track[frame_name])
|
||||
continue
|
||||
cur_all_pids, cur_all_pids_fff = stack_all_pids(track, frame_list[:-1], idx, max_pid_id, link_len)
|
||||
match_indexes, match_scores = best_matching_hungarian(
|
||||
all_cors, cur_all_pids, cur_all_pids_fff, track[next_frame_name], weights, weights_fff, num, mag)
|
||||
|
||||
for pid1, pid2 in match_indexes:
|
||||
if match_scores[pid1][pid2] > match_thres:
|
||||
track[next_frame_name][pid2+1]['new_pid'] = cur_all_pids[pid1]['new_pid']
|
||||
max_pid_id = max(max_pid_id, track[next_frame_name][pid2+1]['new_pid'])
|
||||
track[next_frame_name][pid2+1]['match_score'] = match_scores[pid1][pid2]
|
||||
|
||||
# add the untracked new person
|
||||
for next_pid in range(1, track[next_frame_name]['num_boxes'] + 1):
|
||||
if 'new_pid' not in track[next_frame_name][next_pid]:
|
||||
max_pid_id += 1
|
||||
track[next_frame_name][next_pid]['new_pid'] = max_pid_id
|
||||
track[next_frame_name][next_pid]['match_score'] = 0
|
||||
|
||||
np.save('track-bl.npy',track)
|
||||
# track = np.load('track-bl.npy').item()
|
||||
|
||||
# calculate number of people
|
||||
num_persons = 0
|
||||
for fid, frame_name in enumerate(frame_list):
|
||||
for pid in range(1, track[frame_name]['num_boxes']+1):
|
||||
num_persons = max(num_persons, track[frame_name][pid]['new_pid'])
|
||||
print("This video contains %d people."%(num_persons))
|
||||
|
||||
# export tracking result into notrack json files
|
||||
print("Export tracking results to json...\n")
|
||||
for fid, frame_name in enumerate(tqdm(frame_list)):
|
||||
for pid in range(track[frame_name]['num_boxes']):
|
||||
notrack[frame_name][pid]['idx'] = track[frame_name][pid+1]['new_pid']
|
||||
|
||||
with open(tracked_json,'w') as json_file:
|
||||
json_file.write(json.dumps(notrack))
|
||||
|
||||
if len(args.visdir)>0:
|
||||
cmap = plt.cm.get_cmap("hsv", num_persons)
|
||||
display_pose(image_dir, vis_dir, notrack, cmap)
|
@ -0,0 +1,238 @@
|
||||
# coding: utf-8
|
||||
|
||||
'''
|
||||
File: utils.py
|
||||
Project: AlphaPose
|
||||
File Created: Thursday, 1st March 2018 5:32:34 pm
|
||||
Author: Yuliang Xiu (yuliangxiu@sjtu.edu.cn)
|
||||
-----
|
||||
Last Modified: Thursday, 20th March 2018 1:18:17 am
|
||||
Modified By: Yuliang Xiu (yuliangxiu@sjtu.edu.cn>)
|
||||
-----
|
||||
Copyright 2018 - 2018 Shanghai Jiao Tong University, Machine Vision and Intelligence Group
|
||||
'''
|
||||
|
||||
import numpy as np
|
||||
import cv2 as cv
|
||||
import os
|
||||
import json
|
||||
import copy
|
||||
import heapq
|
||||
from munkres import Munkres, print_matrix
|
||||
from PIL import Image
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
# keypoint penalty weight
|
||||
delta = 2*np.array([0.01388152, 0.01515228, 0.01057665, 0.01417709, 0.01497891, 0.01402144, \
|
||||
0.03909642, 0.03686941, 0.01981803, 0.03843971, 0.03412318, 0.02415081, \
|
||||
0.01291456, 0.01236173,0.01291456, 0.01236173])
|
||||
|
||||
|
||||
# get expand bbox surrounding single person's keypoints
|
||||
def get_box(pose, imgpath):
|
||||
|
||||
pose = np.array(pose).reshape(-1,3)
|
||||
xmin = np.min(pose[:,0])
|
||||
xmax = np.max(pose[:,0])
|
||||
ymin = np.min(pose[:,1])
|
||||
ymax = np.max(pose[:,1])
|
||||
|
||||
img_height, img_width, _ = cv.imread(imgpath).shape
|
||||
|
||||
return expand_bbox(xmin, xmax, ymin, ymax, img_width, img_height)
|
||||
|
||||
# expand bbox for containing more background
|
||||
def expand_bbox(left, right, top, bottom, img_width, img_height):
|
||||
|
||||
width = right - left
|
||||
height = bottom - top
|
||||
ratio = 0.1 # expand ratio
|
||||
new_left = np.clip(left - ratio * width, 0, img_width)
|
||||
new_right = np.clip(right + ratio * width, 0, img_width)
|
||||
new_top = np.clip(top - ratio * height, 0, img_height)
|
||||
new_bottom = np.clip(bottom + ratio * height, 0, img_height)
|
||||
|
||||
return [int(new_left), int(new_right), int(new_top), int(new_bottom)]
|
||||
|
||||
# calculate final matching grade
|
||||
def cal_grade(l, w):
|
||||
return sum(np.array(l)*np.array(w))
|
||||
|
||||
# calculate IoU of two boxes(thanks @ZongweiZhou1)
|
||||
def cal_bbox_iou(boxA, boxB):
|
||||
|
||||
xA = max(boxA[0], boxB[0]) #xmin
|
||||
yA = max(boxA[2], boxB[2]) #ymin
|
||||
xB = min(boxA[1], boxB[1]) #xmax
|
||||
yB = min(boxA[3], boxB[3]) #ymax
|
||||
|
||||
if xA < xB and yA < yB:
|
||||
interArea = (xB - xA + 1) * (yB - yA + 1)
|
||||
boxAArea = (boxA[1] - boxA[0] + 1) * (boxA[3] - boxA[2] + 1)
|
||||
boxBArea = (boxB[1] - boxB[0] + 1) * (boxB[3] - boxB[2] + 1)
|
||||
iou = interArea / float(boxAArea + boxBArea - interArea+0.00001)
|
||||
else:
|
||||
iou=0.0
|
||||
|
||||
return iou
|
||||
|
||||
# calculate OKS between two single poses
|
||||
def compute_oks(anno, predict, delta):
|
||||
|
||||
xmax = np.max(np.vstack((anno[:, 0], predict[:, 0])))
|
||||
xmin = np.min(np.vstack((anno[:, 0], predict[:, 0])))
|
||||
ymax = np.max(np.vstack((anno[:, 1], predict[:, 1])))
|
||||
ymin = np.min(np.vstack((anno[:, 1], predict[:, 1])))
|
||||
scale = (xmax - xmin) * (ymax - ymin)
|
||||
dis = np.sum((anno - predict)**2, axis=1)
|
||||
oks = np.mean(np.exp(-dis / 2 / delta**2 / scale))
|
||||
|
||||
return oks
|
||||
|
||||
# stack all already tracked people's info together(thanks @ZongweiZhou1)
|
||||
def stack_all_pids(track_vid, frame_list, idxs, max_pid_id, link_len):
|
||||
|
||||
#track_vid contains track_vid[<=idx]
|
||||
all_pids_info = []
|
||||
all_pids_fff = [] # boolean list, 'fff' means From Former Frame
|
||||
all_pids_ids = [(item+1) for item in range(max_pid_id)]
|
||||
|
||||
for idx in np.arange(idxs,max(idxs-link_len,-1),-1):
|
||||
for pid in range(1, track_vid[frame_list[idx]]['num_boxes']+1):
|
||||
if len(all_pids_ids) == 0:
|
||||
return all_pids_info, all_pids_fff
|
||||
elif track_vid[frame_list[idx]][pid]['new_pid'] in all_pids_ids:
|
||||
all_pids_ids.remove(track_vid[frame_list[idx]][pid]['new_pid'])
|
||||
all_pids_info.append(track_vid[frame_list[idx]][pid])
|
||||
if idx == idxs:
|
||||
all_pids_fff.append(True)
|
||||
else:
|
||||
all_pids_fff.append(False)
|
||||
return all_pids_info, all_pids_fff
|
||||
|
||||
# calculate DeepMatching Pose IoU given two boxes
|
||||
def find_two_pose_box_iou(pose1_box, pose2_box, all_cors):
|
||||
|
||||
x1, y1, x2, y2 = [all_cors[:, col] for col in range(4)]
|
||||
x_min, x_max, y_min, y_max = pose1_box
|
||||
x1_region_ids = set(np.where((x1 >= x_min) & (x1 <= x_max))[0].tolist())
|
||||
y1_region_ids = set(np.where((y1 >= y_min) & (y1 <= y_max))[0].tolist())
|
||||
region_ids1 = x1_region_ids & y1_region_ids
|
||||
x_min, x_max, y_min, y_max = pose2_box
|
||||
x2_region_ids = set(np.where((x2 >= x_min) & (x2 <= x_max))[0].tolist())
|
||||
y2_region_ids = set(np.where((y2 >= y_min) & (y2 <= y_max))[0].tolist())
|
||||
region_ids2 = x2_region_ids & y2_region_ids
|
||||
inter = region_ids1 & region_ids2
|
||||
union = region_ids1 | region_ids2
|
||||
pose_box_iou = len(inter) / (len(union) + 0.00001)
|
||||
|
||||
return pose_box_iou
|
||||
|
||||
# calculate general Pose IoU(only consider top NUM matched keypoints)
|
||||
def cal_pose_iou(pose1_box,pose2_box, num,mag):
|
||||
|
||||
pose_iou = []
|
||||
for row in range(len(pose1_box)):
|
||||
x1,y1 = pose1_box[row]
|
||||
x2,y2 = pose2_box[row]
|
||||
box1 = [x1-mag,x1+mag,y1-mag,y1+mag]
|
||||
box2 = [x2-mag,x2+mag,y2-mag,y2+mag]
|
||||
pose_iou.append(cal_bbox_iou(box1,box2))
|
||||
|
||||
return np.mean(heapq.nlargest(num, pose_iou))
|
||||
|
||||
# calculate DeepMatching based Pose IoU(only consider top NUM matched keypoints)
|
||||
def cal_pose_iou_dm(all_cors,pose1,pose2,num,mag):
|
||||
|
||||
poses_iou = []
|
||||
for ids in range(len(pose1)):
|
||||
pose1_box = [pose1[ids][0]-mag,pose1[ids][0]+mag,pose1[ids][1]-mag,pose1[ids][1]+mag]
|
||||
pose2_box = [pose2[ids][0]-mag,pose2[ids][0]+mag,pose2[ids][1]-mag,pose2[ids][1]+mag]
|
||||
poses_iou.append(find_two_pose_box_iou(pose1_box, pose2_box, all_cors))
|
||||
|
||||
return np.mean(heapq.nlargest(num, poses_iou))
|
||||
|
||||
# hungarian matching algorithm(thanks @ZongweiZhou1)
|
||||
def best_matching_hungarian(all_cors, all_pids_info, all_pids_fff, track_vid_next_fid, weights, weights_fff, num, mag):
|
||||
|
||||
x1, y1, x2, y2 = [all_cors[:, col] for col in range(4)]
|
||||
all_grades_details = []
|
||||
all_grades = []
|
||||
|
||||
box1_num = len(all_pids_info)
|
||||
box2_num = track_vid_next_fid['num_boxes']
|
||||
cost_matrix = np.zeros((box1_num, box2_num))
|
||||
|
||||
for pid1 in range(box1_num):
|
||||
box1_pos = all_pids_info[pid1]['box_pos']
|
||||
box1_region_ids = find_region_cors_last(box1_pos, all_cors)
|
||||
box1_score = all_pids_info[pid1]['box_score']
|
||||
box1_pose = all_pids_info[pid1]['box_pose_pos']
|
||||
box1_fff = all_pids_fff[pid1]
|
||||
|
||||
for pid2 in range(1, track_vid_next_fid['num_boxes'] + 1):
|
||||
box2_pos = track_vid_next_fid[pid2]['box_pos']
|
||||
box2_region_ids = find_region_cors_next(box2_pos, all_cors)
|
||||
box2_score = track_vid_next_fid[pid2]['box_score']
|
||||
box2_pose = track_vid_next_fid[pid2]['box_pose_pos']
|
||||
|
||||
inter = box1_region_ids & box2_region_ids
|
||||
union = box1_region_ids | box2_region_ids
|
||||
dm_iou = len(inter) / (len(union) + 0.00001)
|
||||
box_iou = cal_bbox_iou(box1_pos, box2_pos)
|
||||
pose_iou_dm = cal_pose_iou_dm(all_cors, box1_pose, box2_pose, num,mag)
|
||||
pose_iou = cal_pose_iou(box1_pose, box2_pose,num,mag)
|
||||
if box1_fff:
|
||||
grade = cal_grade([dm_iou, box_iou, pose_iou_dm, pose_iou, box1_score, box2_score], weights)
|
||||
else:
|
||||
grade = cal_grade([dm_iou, box_iou, pose_iou_dm, pose_iou, box1_score, box2_score], weights_fff)
|
||||
|
||||
cost_matrix[pid1, pid2 - 1] = grade
|
||||
m = Munkres()
|
||||
indexes = m.compute((-np.array(cost_matrix)).tolist())
|
||||
|
||||
return indexes, cost_matrix
|
||||
|
||||
# calculate number of matching points in one box from last frame
|
||||
def find_region_cors_last(box_pos, all_cors):
|
||||
|
||||
x1, y1, x2, y2 = [all_cors[:, col] for col in range(4)]
|
||||
x_min, x_max, y_min, y_max = box_pos
|
||||
x1_region_ids = set(np.where((x1 >= x_min) & (x1 <= x_max))[0].tolist())
|
||||
y1_region_ids = set(np.where((y1 >= y_min) & (y1 <= y_max))[0].tolist())
|
||||
region_ids = x1_region_ids & y1_region_ids
|
||||
|
||||
return region_ids
|
||||
|
||||
# calculate number of matching points in one box from next frame
|
||||
def find_region_cors_next(box_pos, all_cors):
|
||||
|
||||
x1, y1, x2, y2 = [all_cors[:, col] for col in range(4)]
|
||||
x_min, x_max, y_min, y_max = box_pos
|
||||
x2_region_ids = set(np.where((x2 >= x_min) & (x2 <= x_max))[0].tolist())
|
||||
y2_region_ids = set(np.where((y2 >= y_min) & (y2 <= y_max))[0].tolist())
|
||||
region_ids = x2_region_ids & y2_region_ids
|
||||
|
||||
return region_ids
|
||||
|
||||
# fill the nose keypoint by averaging head and neck
|
||||
def add_nose(array):
|
||||
|
||||
if min(array.shape) == 2:
|
||||
head = array[-1,:]
|
||||
neck = array[-2,:]
|
||||
else:
|
||||
head = array[-1]
|
||||
neck = array[-2]
|
||||
nose = (head+neck)/2.0
|
||||
|
||||
return np.insert(array,-1,nose,axis=0)
|
||||
|
||||
# list remove operation
|
||||
def remove_list(l1,vname,l2):
|
||||
|
||||
for item in l2:
|
||||
l1.remove(os.path.join(vname,item))
|
||||
|
||||
return l1
|
@ -0,0 +1,115 @@
|
||||
|
||||
<div align="center">
|
||||
<img src="doc/logo.jpg", width="400">
|
||||
</div>
|
||||
|
||||
## Notice
|
||||
### This branch is developed on PyTorch 0.4.0. We have released a new version of AlphaPose based on PyTorch 1.1+. Please checkout our [master](https://github.com/MVIG-SJTU/AlphaPose) branch for more details.
|
||||
|
||||
## News!
|
||||
- Dec 2019: [**v0.3.0** version](https://github.com/MVIG-SJTU/AlphaPose) of AlphaPose is released! Smaller model, higher accuracy!
|
||||
- Apr 2019: [**MXNet** version](https://github.com/MVIG-SJTU/AlphaPose/tree/mxnet) of AlphaPose is released! It runs at **23 fps** on COCO validation set.
|
||||
- Feb 2019: [CrowdPose](https://github.com/MVIG-SJTU/AlphaPose/blob/pytorch/doc/CrowdPose.md) is integrated into AlphaPose Now!
|
||||
- Dec 2018: [General version](https://github.com/MVIG-SJTU/AlphaPose/tree/pytorch/PoseFlow) of PoseFlow is released! 3X Faster and support pose tracking results visualization!
|
||||
- Sep 2018: [**v0.2.0** version](https://github.com/MVIG-SJTU/AlphaPose/tree/pytorch) of AlphaPose is released! It runs at **20 fps** on COCO validation set (4.6 people per image on average) and achieves 71 mAP!
|
||||
|
||||
## AlphaPose
|
||||
[Alpha Pose](http://www.mvig.org/research/alphapose.html) is an accurate multi-person pose estimator, which is the **first open-source system that achieves 70+ mAP (72.3 mAP) on COCO dataset and 80+ mAP (82.1 mAP) on MPII dataset.**
|
||||
To match poses that correspond to the same person across frames, we also provide an efficient online pose tracker called Pose Flow. It is the **first open-source online pose tracker that achieves both 60+ mAP (66.5 mAP) and 50+ MOTA (58.3 MOTA) on PoseTrack Challenge dataset.**
|
||||
|
||||
AlphaPose supports both Linux and **Windows!**
|
||||
|
||||
<div align="center">
|
||||
<img src="doc/alphapose.gif", width="400">
|
||||
</div>
|
||||
|
||||
|
||||
## Installation
|
||||
**Windows Version** please check out [doc/win_install.md](doc/win_install.md)
|
||||
|
||||
1. Get the code.
|
||||
```Shell
|
||||
git clone -b pytorch https://github.com/MVIG-SJTU/AlphaPose.git
|
||||
```
|
||||
|
||||
2. Install [pytorch 0.4.0](https://github.com/pytorch/pytorch) and other dependencies.
|
||||
```Shell
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
3. Download the models manually: **duc_se.pth** (2018/08/30) ([Google Drive]( https://drive.google.com/open?id=1OPORTWB2cwd5YTVBX-NE8fsauZJWsrtW) | [Baidu pan](https://pan.baidu.com/s/15jbRNKuslzm5wRSgUVytrA)), **yolov3-spp.weights**([Google Drive](https://drive.google.com/open?id=1D47msNOOiJKvPOXlnpyzdKA3k6E97NTC) | [Baidu pan](https://pan.baidu.com/s/1Zb2REEIk8tcahDa8KacPNA)). Place them into `./models/sppe` and `./models/yolo` respectively.
|
||||
|
||||
|
||||
## Quick Start
|
||||
- **Input dir**: Run AlphaPose for all images in a folder with:
|
||||
```
|
||||
python3 demo.py --indir ${img_directory} --outdir examples/res
|
||||
```
|
||||
- **Video**: Run AlphaPose for a video and save the rendered video with:
|
||||
```
|
||||
python3 video_demo.py --video ${path to video} --outdir examples/res --save_video
|
||||
```
|
||||
- **Webcam**: Run AlphaPose using webcam and visualize the results with:
|
||||
```
|
||||
python3 webcam_demo.py --webcam 0 --outdir examples/res --vis
|
||||
```
|
||||
- **Input list**: Run AlphaPose for images in a list and save the rendered images with:
|
||||
```
|
||||
python3 demo.py --list examples/list-coco-demo.txt --indir ${img_directory} --outdir examples/res --save_img
|
||||
```
|
||||
- **Note**: If you meet OOM(out of memory) problem, decreasing the pose estimation batch until the program can run on your computer:
|
||||
```
|
||||
python3 demo.py --indir ${img_directory} --outdir examples/res --posebatch 30
|
||||
```
|
||||
- **Getting more accurate**: You can enable flip testing to get more accurate results by disable fast_inference, e.g.:
|
||||
```
|
||||
python3 demo.py --indir ${img_directory} --outdir examples/res --fast_inference False
|
||||
```
|
||||
- **Speeding up**: Checkout the [speed_up.md](doc/speed_up.md) for more details.
|
||||
- **Output format**: Checkout the [output.md](doc/output.md) for more details.
|
||||
- **For more**: Checkout the [run.md](doc/run.md) for more options
|
||||
|
||||
## Pose Tracking
|
||||
|
||||
<p align='center'>
|
||||
<img src="doc/posetrack.gif", width="360">
|
||||
<img src="doc/posetrack2.gif", width="344">
|
||||
</p>
|
||||
|
||||
Please read [PoseFlow/README.md](PoseFlow/) for details.
|
||||
|
||||
### CrowdPose
|
||||
<p align='center'>
|
||||
<img src="doc/crowdpose.gif", width="360">
|
||||
</p>
|
||||
|
||||
Please read [doc/CrowdPose.md](doc/CrowdPose.md) for details.
|
||||
|
||||
|
||||
## FAQ
|
||||
Check out [faq.md](doc/faq.md) for faq.
|
||||
|
||||
## Contributors
|
||||
Pytorch version of AlphaPose is developed and maintained by [Jiefeng Li](http://jeff-leaf.site/), [Hao-Shu Fang](https://fang-haoshu.github.io/), [Yuliang Xiu](http://xiuyuliang.cn) and [Cewu Lu](http://www.mvig.org/).
|
||||
|
||||
## Citation
|
||||
Please cite these papers in your publications if it helps your research:
|
||||
|
||||
@inproceedings{fang2017rmpe,
|
||||
title={{RMPE}: Regional Multi-person Pose Estimation},
|
||||
author={Fang, Hao-Shu and Xie, Shuqin and Tai, Yu-Wing and Lu, Cewu},
|
||||
booktitle={ICCV},
|
||||
year={2017}
|
||||
}
|
||||
|
||||
@inproceedings{xiu2018poseflow,
|
||||
author = {Xiu, Yuliang and Li, Jiefeng and Wang, Haoyu and Fang, Yinghong and Lu, Cewu},
|
||||
title = {{Pose Flow}: Efficient Online Pose Tracking},
|
||||
booktitle={BMVC},
|
||||
year = {2018}
|
||||
}
|
||||
|
||||
|
||||
|
||||
## License
|
||||
AlphaPose is freely available for free non-commercial use, and may be redistributed under these conditions. For commercial queries, please drop an e-mail at mvig.alphapose[at]gmail[dot]com and cc lucewu[[at]sjtu[dot]edu[dot]cn. We will send the detail agreement to you.
|
@ -0,0 +1,2 @@
|
||||
# Auto detect text files and perform LF normalization
|
||||
* text=auto
|
@ -0,0 +1,114 @@
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
.hypothesis/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# celery beat schedule file
|
||||
celerybeat-schedule
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
|
||||
.vscode/
|
||||
*.pkl
|
||||
exp
|
||||
exp/*
|
||||
data
|
||||
data/*
|
||||
model
|
||||
model/*
|
||||
*/images
|
||||
*/images/*
|
||||
|
||||
*.h5
|
||||
*.pth
|
||||
|
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2018 Jeff-sjtu
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
@ -0,0 +1 @@
|
||||
# pytorch-AlphaPose
|
@ -0,0 +1,72 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.utils.data
|
||||
import torch.utils.data.distributed
|
||||
import torch.nn.functional as F
|
||||
import numpy as np
|
||||
from AlphaPose.SPPE.src.utils.img import flip, shuffleLR
|
||||
from AlphaPose.SPPE.src.utils.eval import getPrediction
|
||||
from AlphaPose.SPPE.src.models.FastPose import createModel
|
||||
|
||||
import visdom
|
||||
import time
|
||||
import sys
|
||||
|
||||
import torch._utils
|
||||
try:
|
||||
torch._utils._rebuild_tensor_v2
|
||||
except AttributeError:
|
||||
def _rebuild_tensor_v2(storage, storage_offset, size, stride, requires_grad, backward_hooks):
|
||||
tensor = torch._utils._rebuild_tensor(storage, storage_offset, size, stride)
|
||||
tensor.requires_grad = requires_grad
|
||||
tensor._backward_hooks = backward_hooks
|
||||
return tensor
|
||||
torch._utils._rebuild_tensor_v2 = _rebuild_tensor_v2
|
||||
|
||||
|
||||
class InferenNet(nn.Module):
|
||||
def __init__(self, kernel_size, dataset):
|
||||
super(InferenNet, self).__init__()
|
||||
|
||||
model = createModel().cuda()
|
||||
print('Loading pose model from {}'.format('./models/sppe/duc_se.pth'))
|
||||
sys.stdout.flush()
|
||||
model.load_state_dict(torch.load('./models/sppe/duc_se.pth'))
|
||||
model.eval()
|
||||
self.pyranet = model
|
||||
|
||||
self.dataset = dataset
|
||||
|
||||
def forward(self, x):
|
||||
out = self.pyranet(x)
|
||||
out = out.narrow(1, 0, 17)
|
||||
|
||||
flip_out = self.pyranet(flip(x))
|
||||
flip_out = flip_out.narrow(1, 0, 17)
|
||||
|
||||
flip_out = flip(shuffleLR(
|
||||
flip_out, self.dataset))
|
||||
|
||||
out = (flip_out + out) / 2
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class InferenNet_fast(nn.Module):
|
||||
def __init__(self, kernel_size, dataset):
|
||||
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
super(InferenNet_fast, self).__init__()
|
||||
|
||||
model = createModel().to(device)
|
||||
print('Loading pose model from {}'.format('./models/sppe/duc_se.pth'))
|
||||
model.load_state_dict(torch.load('/Users/yunyi/Desktop/AlphaPose/models/sppe/duc_se.pth',map_location='cpu'))
|
||||
model.eval()
|
||||
self.pyranet = model
|
||||
|
||||
self.dataset = dataset
|
||||
|
||||
def forward(self, x):
|
||||
out = self.pyranet(x)
|
||||
out = out.narrow(1, 0, 17)
|
||||
|
||||
return out
|
@ -0,0 +1,35 @@
|
||||
import torch.nn as nn
|
||||
from torch.autograd import Variable
|
||||
|
||||
from .layers.SE_Resnet import SEResnet
|
||||
from .layers.DUC import DUC
|
||||
from AlphaPose.opt import opt
|
||||
|
||||
|
||||
def createModel():
|
||||
return FastPose()
|
||||
|
||||
|
||||
class FastPose(nn.Module):
|
||||
DIM = 128
|
||||
|
||||
def __init__(self):
|
||||
super(FastPose, self).__init__()
|
||||
|
||||
self.preact = SEResnet('resnet101')
|
||||
|
||||
self.suffle1 = nn.PixelShuffle(2)
|
||||
self.duc1 = DUC(512, 1024, upscale_factor=2)
|
||||
self.duc2 = DUC(256, 512, upscale_factor=2)
|
||||
|
||||
self.conv_out = nn.Conv2d(
|
||||
self.DIM, opt.nClasses, kernel_size=3, stride=1, padding=1)
|
||||
|
||||
def forward(self, x: Variable):
|
||||
out = self.preact(x)
|
||||
out = self.suffle1(out)
|
||||
out = self.duc1(out)
|
||||
out = self.duc2(out)
|
||||
|
||||
out = self.conv_out(out)
|
||||
return out
|
@ -0,0 +1 @@
|
||||
from . import *
|
@ -0,0 +1,126 @@
|
||||
import torch.nn as nn
|
||||
from .layers.PRM import Residual as ResidualPyramid
|
||||
from .layers.Residual import Residual as Residual
|
||||
from torch.autograd import Variable
|
||||
from opt import opt
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
class Hourglass(nn.Module):
|
||||
def __init__(self, n, nFeats, nModules, inputResH, inputResW, net_type, B, C):
|
||||
super(Hourglass, self).__init__()
|
||||
|
||||
self.ResidualUp = ResidualPyramid if n >= 2 else Residual
|
||||
self.ResidualDown = ResidualPyramid if n >= 3 else Residual
|
||||
|
||||
self.depth = n
|
||||
self.nModules = nModules
|
||||
self.nFeats = nFeats
|
||||
self.net_type = net_type
|
||||
self.B = B
|
||||
self.C = C
|
||||
self.inputResH = inputResH
|
||||
self.inputResW = inputResW
|
||||
|
||||
self.up1 = self._make_residual(self.ResidualUp, False, inputResH, inputResW)
|
||||
self.low1 = nn.Sequential(
|
||||
nn.MaxPool2d(2),
|
||||
self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2)
|
||||
)
|
||||
if n > 1:
|
||||
self.low2 = Hourglass(n - 1, nFeats, nModules, inputResH / 2, inputResW / 2, net_type, B, C)
|
||||
else:
|
||||
self.low2 = self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2)
|
||||
|
||||
self.low3 = self._make_residual(self.ResidualDown, True, inputResH / 2, inputResW / 2)
|
||||
self.up2 = nn.UpsamplingNearest2d(scale_factor=2)
|
||||
|
||||
self.upperBranch = self.up1
|
||||
self.lowerBranch = nn.Sequential(
|
||||
self.low1,
|
||||
self.low2,
|
||||
self.low3,
|
||||
self.up2
|
||||
)
|
||||
|
||||
def _make_residual(self, resBlock, useConv, inputResH, inputResW):
|
||||
layer_list = []
|
||||
for i in range(self.nModules):
|
||||
layer_list.append(resBlock(self.nFeats, self.nFeats, inputResH, inputResW,
|
||||
stride=1, net_type=self.net_type, useConv=useConv,
|
||||
baseWidth=self.B, cardinality=self.C))
|
||||
return nn.Sequential(*layer_list)
|
||||
|
||||
def forward(self, x: Variable):
|
||||
up1 = self.upperBranch(x)
|
||||
up2 = self.lowerBranch(x)
|
||||
out = up1 + up2
|
||||
return out
|
||||
|
||||
|
||||
class PyraNet(nn.Module):
|
||||
def __init__(self):
|
||||
super(PyraNet, self).__init__()
|
||||
|
||||
B, C = opt.baseWidth, opt.cardinality
|
||||
self.inputResH = opt.inputResH / 4
|
||||
self.inputResW = opt.inputResW / 4
|
||||
self.nStack = opt.nStack
|
||||
|
||||
self.cnv1 = nn.Sequential(
|
||||
nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
|
||||
nn.BatchNorm2d(64),
|
||||
nn.ReLU(True)
|
||||
)
|
||||
self.r1 = nn.Sequential(
|
||||
ResidualPyramid(64, 128, opt.inputResH / 2, opt.inputResW / 2,
|
||||
stride=1, net_type='no_preact', useConv=False, baseWidth=B, cardinality=C),
|
||||
nn.MaxPool2d(2)
|
||||
)
|
||||
self.r4 = ResidualPyramid(128, 128, self.inputResH, self.inputResW,
|
||||
stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
|
||||
self.r5 = ResidualPyramid(128, opt.nFeats, self.inputResH, self.inputResW,
|
||||
stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
|
||||
self.preact = nn.Sequential(
|
||||
self.cnv1,
|
||||
self.r1,
|
||||
self.r4,
|
||||
self.r5
|
||||
)
|
||||
self.stack_layers = defaultdict(list)
|
||||
for i in range(self.nStack):
|
||||
hg = Hourglass(4, opt.nFeats, opt.nResidual, self.inputResH, self.inputResW, 'preact', B, C)
|
||||
lin = nn.Sequential(
|
||||
hg,
|
||||
nn.BatchNorm2d(opt.nFeats),
|
||||
nn.ReLU(True),
|
||||
nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0),
|
||||
nn.BatchNorm2d(opt.nFeats),
|
||||
nn.ReLU(True)
|
||||
)
|
||||
tmpOut = nn.Conv2d(opt.nFeats, opt.nClasses, kernel_size=1, stride=1, padding=0)
|
||||
self.stack_layers['lin'].append(lin)
|
||||
self.stack_layers['out'].append(tmpOut)
|
||||
if i < self.nStack - 1:
|
||||
lin_ = nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0)
|
||||
tmpOut_ = nn.Conv2d(opt.nClasses, opt.nFeats, kernel_size=1, stride=1, padding=0)
|
||||
self.stack_layers['lin_'].append(lin_)
|
||||
self.stack_layers['out_'].append(tmpOut_)
|
||||
|
||||
def forward(self, x: Variable):
|
||||
out = []
|
||||
inter = self.preact(x)
|
||||
for i in range(self.nStack):
|
||||
lin = self.stack_layers['lin'][i](inter)
|
||||
tmpOut = self.stack_layers['out'][i](lin)
|
||||
out.append(tmpOut)
|
||||
if i < self.nStack - 1:
|
||||
lin_ = self.stack_layers['lin_'][i](lin)
|
||||
tmpOut_ = self.stack_layers['out_'][i](tmpOut)
|
||||
inter = inter + lin_ + tmpOut_
|
||||
return out
|
||||
|
||||
|
||||
def createModel(**kw):
|
||||
model = PyraNet()
|
||||
return model
|