newdevelop
Aoi 3 years ago
commit 22fcc26566

@ -0,0 +1,104 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2018 DAMIÀ FUENTES ESCOTÉ
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

@ -0,0 +1,89 @@
# DJITelloPy
## [中文文档 (Chinese version of this readme)](README_CN.md)
DJI Tello drone python interface using the official [Tello SDK](https://dl-cdn.ryzerobotics.com/downloads/tello/20180910/Tello%20SDK%20Documentation%20EN_1.3.pdf) and [Tello EDU SDK](https://dl-cdn.ryzerobotics.com/downloads/Tello/Tello%20SDK%202.0%20User%20Guide.pdf). This library has the following features:
- implementation of all tello commands
- easily retrieve a video stream
- receive and parse state packets
- control a swarm of drones
- support for python >= 3.6
Feel free to contribute!
## Install using pip
```
pip install djitellopy
```
For Linux distributions with both python2 and python3 (e.g. Debian, Ubuntu, ...) you need to run
```
pip3 install djitellopy
```
## Install in developer mode
Using the commands below you can install the repository in an _editable_ way. This allows you to modify the library and use the modified version as if you had installed it regularly.
```
git clone https://github.com/damiafuentes/DJITelloPy.git
cd DJITelloPy
pip install -e .
```
## Usage
### API Reference
See [djitellopy.readthedocs.io](https://djitellopy.readthedocs.io/en/latest/) for a full reference of all classes and methods available.
### Simple example
```python
from djitellopy import Tello
tello = Tello()
tello.connect()
tello.takeoff()
tello.move_left(100)
tello.rotate_counter_clockwise(90)
tello.move_forward(100)
tello.land()
```
### More examples
In the [examples](examples/) directory there are some code examples.
Comments in the examples are mostly in both english and chinese.
- [taking a picture](examples/take-picture.py)
- [recording a video](examples/record-video.py)
- [flying a swarm (multiple Tellos at once)](examples/simple-swarm.py)
- [simple controlling using your keyboard](examples/manual-control-opencv.py)
- [mission pad detection](examples/mission-pads.py)
- [fully featured manual control using pygame](examples/manual-control-pygame.py)
### Notes
- If you are using the `streamon` command and the response is `Unknown command` means you have to update the Tello firmware. That can be done through the Tello app.
- Mission pad detection and navigation is only supported by the Tello EDU.
- Bright environment is necessary for successful use of mission pads.
- Connecting to an existing wifi network is only supported by the Tello EDU.
- When connected to an existing wifi network video streaming is not available (TODO: needs confirmation with the new SDK3 `port` commands)
## DJITelloPy in the media and in the wild
- \>1.5 Million views Youtube: [Drone Programming With Python Course](https://youtu.be/LmEcyQnfpDA?t=1282)
- German magazine "Make": ["KI steuert Follow-Me-Drohne" (paywall)](https://www.heise.de/select/make/2021/6/2116016361503211330), [authors notes](https://www.jentsch.io/ki-artikel-im-aktuellen-make-magazin-6-21/), [github repo](https://github.com/msoftware/tello-tracking)
- Webinar on learn.droneblocks.io: ["DJITelloPy Drone Coding"](https://learn.droneblocks.io/p/djitellopy), [github repo](https://learn.droneblocks.io/p/djitellopy)
- Universities & Schools using DJITelloPy in projects or in class:
- [Ball State University in Muncie, Indiana](https://www.bsu.edu/)
- [Technical University Kaiserslautern](https://www.uni-kl.de/)
- [Sha Tin College, Hong Kong](https://shatincollege.edu.hk/)
- [add yours...](https://github.com/damiafuentes/DJITelloPy/edit/master/README.md)
## Authors
* **Damià Fuentes Escoté**
* **Jakob Löw**
* [and more](https://github.com/damiafuentes/DJITelloPy/graphs/contributors)
## License
This project is licensed under the MIT License - see the [LICENSE.txt](LICENSE.txt) file for details

@ -0,0 +1,87 @@
# DJITelloPy
这是一个大疆Tello无人机的Python接口
使用官方 [Tello SDK](https://dl-cdn.ryzerobotics.com/downloads/tello/20180910/Tello%20SDK%20Documentation%20EN_1.3.pdf) 和 [Tello EDU SDK](https://dl-cdn.ryzerobotics.com/downloads/Tello/Tello%20SDK%202.0%20User%20Guide.pdf)。 这个库有以下功能:
- 支持使用所有的tello命令
- 轻松获取视频流
- 接受并解析状态包
- 操控多架无人机
- 支持Python3.6以上版本
欢迎随时捐献!
## 使用pip安装
```
pip install djitellopy
```
> 译者注国内使用pip安装速度较慢可能出现超时错误\
> 建议使用国内镜像(此处为清华源):
> ```
> pip install djitellopy -i https://pypi.tuna.tsinghua.edu.cn/simple/
> ```
对于同时安装了python2与python3的Linux发行版Ubuntu、Debian等使用
```
pip3 install djitellopy
```
## 以开发者模式安装
你可以使用下面的命令以 *可编辑模式* 安装此项目。这允许你修改此库并像正常安装的一样使用它。
```
git clone https://github.com/damiafuentes/DJITelloPy.git
cd DJITelloPy
pip install -e .
```
## 使用
### 查阅API
查看 [djitellopy.readthedocs.io](https://djitellopy.readthedocs.io/en/latest/) 以获取所有可用的类与方法。
### 简单示例
```python
from djitellopy import Tello
tello = Tello()
tello.connect()
tello.takeoff()
tello.move_left(100)
tello.rotate_counter_clockwise(90)
tello.move_forward(100)
tello.land()
```
### 更多示例
在 [示例](examples/) 有一些代码示例:
- [拍张照](examples/take-picture.py)
- [记录视频](examples/record-video.py)
- [一次控制多架无人机](examples/simple-swarm.py)
- [使用键盘简单控制无人机](examples/manual-control-opencv.py)
- [识别任务卡(应该是指挑战卡)](examples/mission-pads.py)
- [使用Pygame实现键盘控制飞机](examples/manual-control-pygame.py)
### 提示
- 如果你使用 ```streamon``` 命令时返回 ```Unknown command```你需要通过Tello app升级固件。
- 挑战卡识别与导航只支持Tello EDU
- 必须在明亮的环境下识别挑战卡
- 只有Tello EDU支持连接一个已存在的wifi
- 当连接一个已存在wifi时视频流不可用
## 作者
* **Damià Fuentes Escoté**
* **Jakob Löw**
* [更多](https://github.com/damiafuentes/DJITelloPy/graphs/contributors)
## 译者
* [C0derGeorge](https://github.com/C0derGeorge)
## 许可证
此项目遵循 MIT License - 查看 [LICENSE.txt](LICENSE.txt) 获取详情

@ -0,0 +1,2 @@
from .tello import Tello, TelloException, BackgroundFrameRead
from .swarm import TelloSwarm

@ -0,0 +1,65 @@
"""
This file is based on a StackOverflow post by @301_Moved_Permanently.
See https://stackoverflow.com/a/50622643
The code was adapted to be able to wrap all methods of a class by simply
adding the decorator to the class itself.
"""
import inspect
import typing
from contextlib import suppress
from functools import wraps
def _is_unparameterized_special_typing(type_hint):
# Check for typing.Any, typing.Union, typing.ClassVar (without parameters)
if hasattr(typing, "_SpecialForm"):
return isinstance(type_hint, typing._SpecialForm)
elif hasattr(type_hint, "__origin__"):
return type_hint.__origin__ is None
else:
return False
def enforce_types(target):
"""Class decorator adding type checks to all member functions
"""
def check_types(spec, *args, **kwargs):
parameters = dict(zip(spec.args, args))
parameters.update(kwargs)
for name, value in parameters.items():
with suppress(KeyError): # Assume un-annotated parameters can be any type
type_hint = spec.annotations[name]
if _is_unparameterized_special_typing(type_hint):
continue
if hasattr(type_hint, "__origin__") and type_hint.__origin__ is not None:
actual_type = type_hint.__origin__
elif hasattr(type_hint, "__args__") and type_hint.__args__ is not None:
actual_type = type_hint.__args__
else:
actual_type = type_hint
if not isinstance(value, actual_type):
raise TypeError("Unexpected type for '{}' (expected {} but found {})"
.format(name, type_hint, type(value)))
def decorate(func):
spec = inspect.getfullargspec(func)
@wraps(func)
def wrapper(*args, **kwargs):
check_types(spec, *args, **kwargs)
return func(*args, **kwargs)
return wrapper
if inspect.isclass(target):
members = inspect.getmembers(target, predicate=inspect.isfunction)
for name, func in members:
setattr(target, name, decorate(func))
return target
else:
return decorate(target)

@ -0,0 +1,159 @@
"""Library for controlling multiple DJI Ryze Tello drones.
"""
from threading import Thread, Barrier
from queue import Queue
from typing import List, Callable
from .tello import Tello, TelloException
from .enforce_types import enforce_types
@enforce_types
class TelloSwarm:
"""Swarm library for controlling multiple Tellos simultaneously
"""
tellos: List[Tello]
barrier: Barrier
funcBarier: Barrier
funcQueues: List[Queue]
threads: List[Thread]
@staticmethod
def fromFile(path: str):
"""Create TelloSwarm from file. The file should contain one IP address per line.
Arguments:
path: path to the file
"""
with open(path, 'r') as fd:
ips = fd.readlines()
return TelloSwarm.fromIps(ips)
@staticmethod
def fromIps(ips: list):
"""Create TelloSwarm from a list of IP addresses.
Arguments:
ips: list of IP Addresses
"""
if not ips:
raise TelloException("No ips provided")
tellos = []
for ip in ips:
tellos.append(Tello(ip.strip()))
return TelloSwarm(tellos)
def __init__(self, tellos: List[Tello]):
"""Initialize a TelloSwarm instance
Arguments:
tellos: list of [Tello][tello] instances
"""
self.tellos = tellos
self.barrier = Barrier(len(tellos))
self.funcBarrier = Barrier(len(tellos) + 1)
self.funcQueues = [Queue() for tello in tellos]
def worker(i):
queue = self.funcQueues[i]
tello = self.tellos[i]
while True:
func = queue.get()
self.funcBarrier.wait()
func(i, tello)
self.funcBarrier.wait()
self.threads = []
for i, _ in enumerate(tellos):
thread = Thread(target=worker, daemon=True, args=(i,))
thread.start()
self.threads.append(thread)
def sequential(self, func: Callable[[int, Tello], None]):
"""Call `func` for each tello sequentially. The function retrieves
two arguments: The index `i` of the current drone and `tello` the
current [Tello][tello] instance.
```python
swarm.parallel(lambda i, tello: tello.land())
```
"""
for i, tello in enumerate(self.tellos):
func(i, tello)
def parallel(self, func: Callable[[int, Tello], None]):
"""Call `func` for each tello in parallel. The function retrieves
two arguments: The index `i` of the current drone and `tello` the
current [Tello][tello] instance.
You can use `swarm.sync()` for syncing between threads.
```python
swarm.parallel(lambda i, tello: tello.move_up(50 + i * 10))
```
"""
for queue in self.funcQueues:
queue.put(func)
self.funcBarrier.wait()
self.funcBarrier.wait()
def sync(self, timeout: float = None):
"""Sync parallel tello threads. The code continues when all threads
have called `swarm.sync`.
```python
def doStuff(i, tello):
tello.move_up(50 + i * 10)
swarm.sync()
if i == 2:
tello.flip_back()
# make all other drones wait for one to complete its flip
swarm.sync()
swarm.parallel(doStuff)
```
"""
return self.barrier.wait(timeout)
def __getattr__(self, attr):
"""Call a standard tello function in parallel on all tellos.
```python
swarm.command()
swarm.takeoff()
swarm.move_up(50)
```
"""
def callAll(*args, **kwargs):
self.parallel(lambda i, tello: getattr(tello, attr)(*args, **kwargs))
return callAll
def __iter__(self):
"""Iterate over all drones in the swarm.
```python
for tello in swarm:
print(tello.get_battery())
```
"""
return iter(self.tellos)
def __len__(self):
"""Return the amount of tellos in the swarm
```python
print("Tello count: {}".format(len(swarm)))
```
"""
return len(self.tellos)

File diff suppressed because it is too large Load Diff

@ -0,0 +1,5 @@
mkdocs>=1.1.2
mkdocs-material>=5.2.2
mkdocstrings>=0.11.2
numpy==1.15.4
opencv-python==3.4.3.18

@ -0,0 +1,28 @@
# DJITelloPy
This documentation is the API reference of the DJITelloPy Library.
For more information on the project please see the [readme on github](https://github.com/damiafuentes/DJITelloPy/blob/master/README.md).
## API
Currently the library contains the following classes:
- [Tello][tello] for controlling a single tello drone.
- [Swarm][swarm] for controlling multiple Tello EDUs in parallel.
## Example Code
Please see the [example directory](https://github.com/damiafuentes/DJITelloPy/tree/master/examples) on github.
## Installation
```bash
pip install djitellopy
```
For Linux distributions with both python2 and python3 (e.g. Debian, Ubuntu, ...) you need to run
```bash
pip3 install djitellopy
```

@ -0,0 +1,5 @@
# Swarm
::: djitellopy.TelloSwarm
:docstring:
:members:

@ -0,0 +1,5 @@
# Tello
::: djitellopy.Tello
:docstring:
:members:

@ -0,0 +1,53 @@
# simple example demonstrating how to control a Tello using your keyboard.
# For a more fully featured example see manual-control-pygame.py
#
# Use W, A, S, D for moving, E, Q for rotating and R, F for going up and down.
# When starting the script the Tello will takeoff, pressing ESC makes it land
# and the script exit.
# 简单的演示如何用键盘控制Tello
# 欲使用全手动控制请查看 manual-control-pygame.py
#
# W, A, S, D 移动, E, Q 转向R、F上升与下降.
# 开始运行程序时Tello会自动起飞按ESC键降落
# 并且程序会退出
from djitellopy import Tello
import cv2, math, time
tello = Tello()
tello.connect()
tello.streamon()
frame_read = tello.get_frame_read()
tello.takeoff()
while True:
# In reality you want to display frames in a seperate thread. Otherwise
# they will freeze while the drone moves.
# 在实际开发里请在另一个线程中显示摄像头画面,否则画面会在无人机移动时静止
img = frame_read.frame
cv2.imshow("drone", img)
key = cv2.waitKey(1) & 0xff
if key == 27: # ESC
break
elif key == ord('w'):
tello.move_forward(30)
elif key == ord('s'):
tello.move_back(30)
elif key == ord('a'):
tello.move_left(30)
elif key == ord('d'):
tello.move_right(30)
elif key == ord('e'):
tello.rotate_clockwise(30)
elif key == ord('q'):
tello.rotate_counter_clockwise(30)
elif key == ord('r'):
tello.move_up(30)
elif key == ord('f'):
tello.move_down(30)
tello.land()

@ -0,0 +1,187 @@
from djitellopy import Tello
import cv2
import pygame
import numpy as np
import time
# Speed of the drone
# 无人机的速度
S = 60
# Frames per second of the pygame window display
# A low number also results in input lag, as input information is processed once per frame.
# pygame窗口显示的帧数
# 较低的帧数会导致输入延迟,因为一帧只会处理一次输入信息
FPS = 120
class FrontEnd(object):
""" Maintains the Tello display and moves it through the keyboard keys.
Press escape key to quit.
The controls are:
- T: Takeoff
- L: Land
- Arrow keys: Forward, backward, left and right.
- A and D: Counter clockwise and clockwise rotations (yaw)
- W and S: Up and down.
保持Tello画面显示并用键盘移动它
按下ESC键退出
操作说明
T起飞
L降落
方向键前后左右
A和D逆时针与顺时针转向
W和S上升与下降
"""
def __init__(self):
# Init pygame
# 初始化pygame
pygame.init()
# Creat pygame window
# 创建pygame窗口
pygame.display.set_caption("Tello video stream")
self.screen = pygame.display.set_mode([960, 720])
# Init Tello object that interacts with the Tello drone
# 初始化与Tello交互的Tello对象
self.tello = Tello()
# Drone velocities between -100~100
# 无人机各方向速度在-100~100之间
self.for_back_velocity = 0
self.left_right_velocity = 0
self.up_down_velocity = 0
self.yaw_velocity = 0
self.speed = 10
self.send_rc_control = False
# create update timer
# 创建上传定时器
pygame.time.set_timer(pygame.USEREVENT + 1, 1000 // FPS)
def run(self):
self.tello.connect()
self.tello.set_speed(self.speed)
# In case streaming is on. This happens when we quit this program without the escape key.
# 防止视频流已开启。这会在不使用ESC键退出的情况下发生。
self.tello.streamoff()
self.tello.streamon()
frame_read = self.tello.get_frame_read()
should_stop = False
while not should_stop:
for event in pygame.event.get():
if event.type == pygame.USEREVENT + 1:
self.update()
elif event.type == pygame.QUIT:
should_stop = True
elif event.type == pygame.KEYDOWN:
if event.key == pygame.K_ESCAPE:
should_stop = True
else:
self.keydown(event.key)
elif event.type == pygame.KEYUP:
self.keyup(event.key)
if frame_read.stopped:
break
self.screen.fill([0, 0, 0])
frame = frame_read.frame
# battery n. 电池
text = "Battery: {}%".format(self.tello.get_battery())
cv2.putText(frame, text, (5, 720 - 5),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = np.rot90(frame)
frame = np.flipud(frame)
frame = pygame.surfarray.make_surface(frame)
self.screen.blit(frame, (0, 0))
pygame.display.update()
time.sleep(1 / FPS)
# Call it always before finishing. To deallocate resources.
# 通常在结束前调用它以释放资源
self.tello.end()
def keydown(self, key):
""" Update velocities based on key pressed
Arguments:
key: pygame key
基于键的按下上传各个方向的速度
参数
keypygame事件循环中的键事件
"""
if key == pygame.K_UP: # set forward velocity
self.for_back_velocity = S
elif key == pygame.K_DOWN: # set backward velocity
self.for_back_velocity = -S
elif key == pygame.K_LEFT: # set left velocity
self.left_right_velocity = -S
elif key == pygame.K_RIGHT: # set right velocity
self.left_right_velocity = S
elif key == pygame.K_w: # set up velocity
self.up_down_velocity = S
elif key == pygame.K_s: # set down velocity
self.up_down_velocity = -S
elif key == pygame.K_a: # set yaw counter clockwise velocity
self.yaw_velocity = -S
elif key == pygame.K_d: # set yaw clockwise velocity
self.yaw_velocity = S
def keyup(self, key):
""" Update velocities based on key released
Arguments:
key: pygame key
基于键的松开上传各个方向的速度
参数
keypygame事件循环中的键事件
"""
if key == pygame.K_UP or key == pygame.K_DOWN: # set zero forward/backward velocity
self.for_back_velocity = 0
elif key == pygame.K_LEFT or key == pygame.K_RIGHT: # set zero left/right velocity
self.left_right_velocity = 0
elif key == pygame.K_w or key == pygame.K_s: # set zero up/down velocity
self.up_down_velocity = 0
elif key == pygame.K_a or key == pygame.K_d: # set zero yaw velocity
self.yaw_velocity = 0
elif key == pygame.K_t: # takeoff
self.tello.takeoff()
self.send_rc_control = True
elif key == pygame.K_l: # land
not self.tello.land()
self.send_rc_control = False
def update(self):
""" Update routine. Send velocities to Tello.
向Tello发送各方向速度信息
"""
if self.send_rc_control:
self.tello.send_rc_control(self.left_right_velocity, self.for_back_velocity,
self.up_down_velocity, self.yaw_velocity)
def main():
frontend = FrontEnd()
# run frontend
frontend.run()
if __name__ == '__main__':
main()

@ -0,0 +1,34 @@
from djitellopy import Tello
# create and connect
# 创建Tello对象并连接
tello = Tello()
tello.connect()
# configure drone
# 设置无人机
tello.enable_mission_pads()
tello.set_mission_pad_detection_direction(1) # forward detection only 只识别前方
tello.takeoff()
pad = tello.get_mission_pad_id()
# detect and react to pads until we see pad #1
# 发现并识别挑战卡直到看见1号挑战卡
while pad != 1:
if pad == 3:
tello.move_back(30)
tello.rotate_clockwise(90)
if pad == 4:
tello.move_up(30)
tello.flip_forward()
pad = tello.get_mission_pad_id()
# graceful termination
# 安全结束程序
tello.disable_mission_pads()
tello.land()
tello.end()

@ -0,0 +1,16 @@
#Simply import of "panoramaModule.py" and you can use each function by calling it with name of the drone inside arguments.
from djitellopy import Tello
import cv2
import time
import panoramaModule
tello = Tello()
tello.connect()
print(tello.get_battery())
tello.takeoff()
tello.move_up(500)
panoramaModule.panorama_half_clockwise(tello)
tello.land()

@ -0,0 +1,88 @@
#Module with individual panorama types defined. You can just import it and use hovever you like
#
#It will save photos from Tello inside folder that's in. You can change this by changing path inside every function.
from djitellopy import Tello
import cv2
import time
global img
def panorama_full_clockwise(tello_name):
tello = tello_name
tello.streamoff()
tello.streamon()
for i in range(4):
img = tello.get_frame_read().frame
cv2.imwrite(f'Panorama-full-clockwise_{time.time()}.jpg', img)
time.sleep(1)
tello.rotate_clockwise(80)
img = tello.get_frame_read().frame
cv2.imwrite(f'Panorama-full-clockwise_{time.time()}.jpg', img)
time.sleep(1)
tello.rotate_clockwise(40)
tello.streamoff()
def panorama_half_clockwise(tello_name):
tello = tello_name
tello.streamoff()
tello.streamon()
tello.rotate_counter_clockwise(90)
for i in range(3):
img = tello.get_frame_read().frame
cv2.imwrite(f'Panorama-half-clockwise_{time.time()}.jpg', img)
time.sleep(1)
tello.rotate_clockwise(60)
img = tello.get_frame_read().frame
cv2.imwrite(f'Panorama-half-clockwise_{time.time()}.jpg', img)
time.sleep(1)
tello.rotate_counter_clockwise(90)
tello.streamoff()
def panorama_full_counter_clockwise(tello_name):
tello = tello_name
tello.streamoff()
tello.streamon()
for i in range(4):
img = tello.get_frame_read().frame
cv2.imwrite(f'Panorama-full-counter-clockwise_{time.time()}.jpg', img)
time.sleep(1)
tello.rotate_counter_clockwise(80)
img = tello.get_frame_read().frame
cv2.imwrite(f'/Panorama-full-counter-clockwise_{time.time()}.jpg', img)
time.sleep(1)
tello.rotate_counter_clockwise(40)
tello.streamoff()
def panorama_half_counter_clockwise(tello_name):
tello = tello_name
tello.streamoff()
tello.streamon()
tello.rotate_clockwise(90)
for i in range(3):
img = tello.get_frame_read().frame
cv2.imwrite(f'Panorama-half-counter-clockwise_{time.time()}.jpg', img)
time.sleep(1)
tello.rotate_counter_clockwise(60)
img = tello.get_frame_read().frame
cv2.imwrite(f'Panorama_half_counter_clockwise-{time.time()}.jpg', img)
time.sleep(1)
tello.rotate_clockwise(90)
tello.streamoff()

@ -0,0 +1,37 @@
import time, cv2
from threading import Thread
from djitellopy import Tello
tello = Tello()
tello.connect()
keepRecording = True
tello.streamon()
frame_read = tello.get_frame_read()
def videoRecorder():
# create a VideoWrite object, recoring to ./video.avi
# 创建一个VideoWrite对象存储画面至./video.avi
height, width, _ = frame_read.frame.shape
video = cv2.VideoWriter('video.avi', cv2.VideoWriter_fourcc(*'XVID'), 30, (width, height))
while keepRecording:
video.write(frame_read.frame)
time.sleep(1 / 30)
video.release()
# we need to run the recorder in a seperate thread, otherwise blocking options
# would prevent frames from getting added to the video
# 我们需要在另一个线程中记录画面视频文件,否则其他的阻塞操作会阻止画面记录
recorder = Thread(target=videoRecorder)
recorder.start()
tello.takeoff()
tello.move_up(100)
tello.rotate_counter_clockwise(360)
tello.land()
keepRecording = False
recorder.join()

@ -0,0 +1,25 @@
from djitellopy import TelloSwarm
swarm = TelloSwarm.fromIps([
"192.168.178.42",
"192.168.178.43",
"192.168.178.44"
])
swarm.connect()
swarm.takeoff()
# run in parallel on all tellos
# 同时在所有Tello上执行
swarm.move_up(100)
# run by one tello after the other
# 让Tello一个接一个执行
swarm.sequential(lambda i, tello: tello.move_forward(i * 20 + 20))
# making each tello do something unique in parallel
# 让每一架Tello单独执行不同的操作
swarm.parallel(lambda i, tello: tello.move_left(i * 100 + 20))
swarm.land()
swarm.end()

@ -0,0 +1,12 @@
from djitellopy import Tello
tello = Tello()
tello.connect()
tello.takeoff()
tello.move_left(100)
tello.rotate_clockwise(90)
tello.move_forward(100)
tello.land()

@ -0,0 +1,13 @@
import cv2
from djitellopy import Tello
tello = Tello()
tello.connect()
tello.streamon()
frame_read = tello.get_frame_read()
tello.takeoff()
cv2.imwrite("picture.png", frame_read.frame)
tello.land()

@ -0,0 +1,22 @@
site_name: DJITelloPy API Reference
site_url: "https://djitellopy.readthedocs.io/en/latest/"
repo_url: "https://github.com/damiafuentes/DJITelloPy"
repo_name: "damiafuentes/DJITelloPy"
theme:
name: "material"
markdown_extensions:
- admonition
- codehilite
plugins:
- search
- mkdocstrings:
default_handler: python
handlers:
python:
rendering:
show_source: true
watch:
- djitellopy/

@ -0,0 +1,3 @@
numpy==1.20.1
av==8.0.3
pillow==8.4.0

@ -0,0 +1,3 @@
# Inside of setup.cfg
[metadata]
description-file = README.md

@ -0,0 +1,38 @@
import setuptools
with open("README.md", "r", encoding="utf-8") as fd:
long_description = fd.read()
# replace relative urls to example files with absolute urls to the main git repo
repo_code_url = "https://github.com/damiafuentes/DJITelloPy/tree/master"
long_description = long_description.replace("](examples/", "]({}/examples/".format(repo_code_url))
setuptools.setup(
name='djitellopy',
packages=['djitellopy'],
version='2.4.0',
license='MIT',
description='Tello drone library including support for video streaming, swarms, state packets and more',
long_description=long_description,
long_description_content_type='text/markdown',
author='Jakob Löw',
author_email='djitellopy@m4gnus.de',
url='https://github.com/damiafuentes/DJITelloPy',
download_url='https://github.com/damiafuentes/DJITelloPy/archive/2.4.0.tar.gz',
keywords=['tello', 'dji', 'drone', 'sdk', 'official sdk'],
install_requires=[
'numpy',
'opencv-python',
],
python_requires='>=3.6',
classifiers=[
'Development Status :: 5 - Production/Stable',
'Intended Audience :: Developers',
'Topic :: Software Development :: Build Tools',
'License :: OSI Approved :: MIT License',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
],
)

@ -0,0 +1,8 @@
# Default ignored files
/shelf/
/workspace.xml
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
# Editor-based HTTP Client requests
/httpRequests/

@ -0,0 +1,7 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="PROJECT_PROFILE" value="Default" />
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (myTelloProject-master)" project-jdk-type="Python SDK" />
</project>

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/myTelloProject.iml" filepath="$PROJECT_DIR$/.idea/myTelloProject.iml" />
</modules>
</component>
</project>

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.10 (myTelloProject-master)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="PySciProjectComponent">
<option name="PY_SCI_VIEW_SUGGESTED" value="true" />
</component>
</project>

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

@ -0,0 +1,29 @@
human_detection/output
examples/results
examples/res
PoseFlow/__pycache__
PoseFlow/*.npy
PoseFlow/alpha-pose-results-test.json
PoseFlow/alpha-pose-results-val.json
PoseFlow/test-predict
PoseFlow/val-predict
train_sppe/coco-minival500_images.txt
train_sppe/person_keypoints_val2014.json
ssd/examples
images
*.npy
*.so
*.pyc
.ipynb_checkpoints
*/.ipynb_checkpoints/
*/.tensorboard/*
*/exp
*.pth
*.h5
*.zip
*.weights
coco-minival/

@ -0,0 +1,515 @@
ALPHAPOSE: MULTIPERSON KEYPOINT DETECTION
SOFTWARE LICENSE AGREEMENT
ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT. IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
This is a license agreement ("Agreement") between your academic institution or non-profit organization or self (called "Licensee" or "You" in this Agreement) and Shanghai Jiao Tong University (called "Licensor" in this Agreement). All rights not specifically granted to you in this Agreement are reserved for Licensor.
RESERVATION OF OWNERSHIP AND GRANT OF LICENSE:
Licensor retains exclusive ownership of any copy of the Software (as defined below) licensed under this Agreement and hereby grants to Licensee a personal, non-exclusive,
non-transferable license to use the Software for noncommercial research purposes, without the right to sublicense, pursuant to the terms and conditions of this Agreement. As used in this Agreement, the term "Software" means (i) the actual copy of all or any portion of code for program routines made accessible to Licensee by Licensor pursuant to this Agreement, inclusive of backups, updates, and/or merged copies permitted hereunder or subsequently supplied by Licensor, including all or any file structures, programming instructions, user interfaces and screen formats and sequences as well as any and all documentation and instructions related to it, and (ii) all or any derivatives and/or modifications created or made by You to any of the items specified in (i).
CONFIDENTIALITY: Licensee acknowledges that the Software is proprietary to Licensor, and as such, Licensee agrees to receive all such materials in confidence and use the Software only in accordance with the terms of this Agreement. Licensee agrees to use reasonable effort to protect the Software from unauthorized use, reproduction, distribution, or publication.
PERMITTED USES: The Software may be used for your own noncommercial internal research purposes. You understand and agree that Licensor is not obligated to implement any suggestions and/or feedback you might provide regarding the Software, but to the extent Licensor does so, you are not entitled to any compensation related thereto.
DERIVATIVES: You may create derivatives of or make modifications to the Software, however, You agree that all and any such derivatives and modifications will be owned by Licensor and become a part of the Software licensed to You under this Agreement. You may only use such derivatives and modifications for your own noncommercial internal research purposes, and you may not otherwise use, distribute or copy such derivatives and modifications in violation of this Agreement.
BACKUPS: If Licensee is an organization, it may make that number of copies of the Software necessary for internal noncommercial use at a single site within its organization provided that all information appearing in or on the original labels, including the copyright and trademark notices are copied onto the labels of the copies.
USES NOT PERMITTED: You may not distribute, copy or use the Software except as explicitly permitted herein. Licensee has not been granted any trademark license as part of this Agreement and may not use the name or mark “AlphaPose", "Shanghai Jiao Tong" or any renditions thereof without the prior written permission of Licensor.
You may not sell, rent, lease, sublicense, lend, time-share or transfer, in whole or in part, or provide third parties access to prior or present versions (or any parts thereof) of the Software.
ASSIGNMENT: You may not assign this Agreement or your rights hereunder without the prior written consent of Licensor. Any attempted assignment without such consent shall be null and void.
TERM: The term of the license granted by this Agreement is from Licensee's acceptance of this Agreement by downloading the Software or by using the Software until terminated as provided below.
The Agreement automatically terminates without notice if you fail to comply with any provision of this Agreement. Licensee may terminate this Agreement by ceasing using the Software. Upon any termination of this Agreement, Licensee will delete any and all copies of the Software. You agree that all provisions which operate to protect the proprietary rights of Licensor shall remain in force should breach occur and that the obligation of confidentiality described in this Agreement is binding in perpetuity and, as such, survives the term of the Agreement.
FEE: Provided Licensee abides completely by the terms and conditions of this Agreement, there is no fee due to Licensor for Licensee's use of the Software in accordance with this Agreement.
DISCLAIMER OF WARRANTIES: THE SOFTWARE IS PROVIDED "AS-IS" WITHOUT WARRANTY OF ANY KIND INCLUDING ANY WARRANTIES OF PERFORMANCE OR MERCHANTABILITY OR FITNESS FOR A PARTICULAR USE OR PURPOSE OR OF NON-INFRINGEMENT. LICENSEE BEARS ALL RISK RELATING TO QUALITY AND PERFORMANCE OF THE SOFTWARE AND RELATED MATERIALS.
SUPPORT AND MAINTENANCE: No Software support or training by the Licensor is provided as part of this Agreement.
EXCLUSIVE REMEDY AND LIMITATION OF LIABILITY: To the maximum extent permitted under applicable law, Licensor shall not be liable for direct, indirect, special, incidental, or consequential damages or lost profits related to Licensee's use of and/or inability to use the Software, even if Licensor is advised of the possibility of such damage.
EXPORT REGULATION: Licensee agrees to comply with any and all applicable
U.S. export control laws, regulations, and/or other laws related to embargoes and sanction programs administered by the Office of Foreign Assets Control.
SEVERABILITY: If any provision(s) of this Agreement shall be held to be invalid, illegal, or unenforceable by a court or other tribunal of competent jurisdiction, the validity, legality and enforceability of the remaining provisions shall not in any way be affected or impaired thereby.
NO IMPLIED WAIVERS: No failure or delay by Licensor in enforcing any right or remedy under this Agreement shall be construed as a waiver of any future or other exercise of such right or remedy by Licensor.
ENTIRE AGREEMENT AND AMENDMENTS: This Agreement constitutes the sole and entire agreement between Licensee and Licensor as to the matter set forth herein and supersedes any previous agreements, understandings, and arrangements between the parties relating hereto.
************************************************************************
THIRD-PARTY SOFTWARE NOTICES AND INFORMATION
This project incorporates material from the project(s) listed below (collectively, "Third Party Code"). This Third Party Code is licensed to you under their original license terms set forth below. We reserves all other rights not expressly granted, whether by implication, estoppel or otherwise.
1. Torch, (https://github.com/torch/distro)
Copyright (c) 2016, Soumith Chintala, Ronan Collobert, Koray Kavukcuoglu, Clement Farabet All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
Neither the name of distro nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2. TensorFlow (https://github.com/tensorflow/tensorflow)
Copyright 2018 The TensorFlow Authors. All rights reserved.
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2017, The TensorFlow Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
3. tf-faster-rcnn (https://github.com/endernewton/tf-faster-rcnn)
MIT License
Copyright (c) 2017 Xinlei Chen
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
4.PyraNet (https://github.com/bearpaw/PyraNet)
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "{}"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright {yyyy} {name of copyright owner}
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
5. pose-hg-demo (https://github.com/umich-vl/pose-hg-demo)
Copyright (c) 2016, University of Michigan
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
************END OF THIRD-PARTY SOFTWARE NOTICES AND INFORMATION**********

@ -0,0 +1,141 @@
# Pose Flow
Official implementation of [Pose Flow: Efficient Online Pose Tracking ](https://arxiv.org/abs/1802.00977).
<p align='center'>
<img src="posetrack1.gif", width="360">
<img src="posetrack2.gif", width="344">
</p>
Results on PoseTrack Challenge validation set:
1. Task2: Multi-Person Pose Estimation (mAP)
<center>
| Method | Head mAP | Shoulder mAP | Elbow mAP | Wrist mAP | Hip mAP | Knee mAP | Ankle mAP | Total mAP |
|:-------|:-----:|:-------:|:-------:|:-------:|:-------:|:-------:|:-------:|:-------:|
| Detect-and-Track(FAIR) | **67.5** | 70.2 | 62 | 51.7 | 60.7 | 58.7 | 49.8 | 60.6 |
| **AlphaPose** | 66.7 | **73.3** | **68.3** | **61.1** | **67.5** | **67.0** | **61.3** | **66.5** |
</center>
2. Task3: Pose Tracking (MOTA)
<center>
| Method | Head MOTA | Shoulder MOTA | Elbow MOTA | Wrist MOTA | Hip MOTA | Knee MOTA | Ankle MOTA | Total MOTA | Total MOTP| Speed(FPS) |
|:-------|:-----:|:-------:|:-------:|:-------:|:-------:|:-------:|:-------:|:-------:|:-------:|:-------:|
| Detect-and-Track(FAIR) | **61.7** | 65.5 | 57.3 | 45.7 | 54.3 | 53.1 | 45.7 | 55.2 | 61.5 |Unknown|
| **PoseFlow(DeepMatch)** | 59.8 | **67.0** | 59.8 | 51.6 | **60.0** | **58.4** | **50.5** | **58.3** | **67.8**|8|
| **PoseFlow(OrbMatch)** | 59.0 | 66.8 | **60.0** | **51.8** | 59.4 | **58.4** | 50.3 | 58.0 | 62.2|24|
</center>
## Latest Features
- Dec 2018: <strong>PoseFlow(General Version)</strong> released! Support ANY DATASET and pose tracking results visualization.
- Oct 2018: Support generating correspondence files with ORB(OpenCV), 3X FASTER and no need to compile DeepMatching library.
## Requirements
- Python 2.7.13
- OpenCV 3.4.2.16
- OpenCV-contrib 3.4.2.16
- tqdm 4.19.8
## Installation
1. Download PoseTrack Dataset from [PoseTrack](https://posetrack.net/) to `AlphaPose/PoseFlow/posetrack_data/`
2. (Optional) Use [DeepMatching](http://lear.inrialpes.fr/src/deepmatching/) to extract dense correspondences between adjcent frames in every video, please refer to [DeepMatching Compile Error](https://github.com/MVIG-SJTU/AlphaPose/issues/97) to compile DeepMatching correctly
```shell
pip install -r requirements.txt
cd deepmatching
make clean all
make
cd ..
```
## For Any Datasets (General Version)
1. Using [AlphaPose](https://github.com/MVIG-SJTU/AlphaPose) to generate multi-person pose estimation results.
```shell
# pytorch version
python demo.py --indir ${image_dir}$ --outdir ${results_dir}$
# torch version
./run.sh --indir ${image_dir}$ --outdir ${results_dir}$
```
2. Run pose tracking
```shell
# pytorch version
python tracker-general.py --imgdir ${image_dir}$
--in_json ${results_dir}$/alphapose-results.json
--out_json ${results_dir}$/alphapose-results-forvis-tracked.json
--visdir ${render_dir}$
# torch version
python tracker-general.py --imgdir ${image_dir}$
--in_json ${results_dir}$/POSE/alpha-pose-results-forvis.json
--out_json ${results_dir}$/POSE/alpha-pose-results-forvis-tracked.json
--visdir ${render_dir}$
```
## For PoseTrack Dataset Evaluation (Paper Baseline)
1. Using [AlphaPose](https://github.com/MVIG-SJTU/AlphaPose) to generate multi-person pose estimation results on videos with format like `alpha-pose-results-sample.json`.
2. Using DeepMatching/ORB to generate correspondence files.
```shell
# Generate correspondences by DeepMatching
# (More Robust but Slower)
python matching.py --orb=0
or
# Generate correspondences by Orb
# (Faster but Less Robust)
python matching.py --orb=1
```
3. Run pose tracking
```shell
python tracker-baseline.py --dataset=val/test --orb=1/0
```
4. Evaluation
Original [poseval](https://github.com/leonid-pishchulin/poseval) has some instructions on how to convert annotation files from MAT to JSON.
Evaluate pose tracking results on validation dataset:
```shell
git clone https://github.com/leonid-pishchulin/poseval.git --recursive
cd poseval/py && export PYTHONPATH=$PWD/../py-motmetrics:$PYTHONPATH
cd ../../
python poseval/py/evaluate.py --groundTruth=./posetrack_data/annotations/val \
--predictions=./${track_result_dir}/ \
--evalPoseTracking --evalPoseEstimation
```
## Citation
Please cite these papers in your publications if it helps your research:
@inproceedings{xiu2018poseflow,
author = {Xiu, Yuliang and Li, Jiefeng and Wang, Haoyu and Fang, Yinghong and Lu, Cewu},
title = {{Pose Flow}: Efficient Online Pose Tracking},
booktitle={BMVC},
year = {2018}
}

@ -0,0 +1,169 @@
{
"images/bonn_mpii_test_5sec/24621_mpii/00000103.jpg": [
{
"score": 8.385687289228619,
"keypoints": [
606.1139178059441,
1055.7866630683084,
0.1285074118632463,
622,
1016,
1.24690842628479,
701,
785,
1.3190804719924927,
919,
798,
1.0360052585601807,
622,
1003,
0.7249196767807007,
600.5921057594508,
1043.7039471202747,
0.07363978983288405,
747,
785,
1.1118680238723755,
754,
362,
1.351969599723816,
761,
362,
1.329826831817627,
735.0299835119931,
148.28635614181508,
1.2460612274594385,
616.8225141507821,
775.3727265996391,
4.076232522035756,
648,
613,
2.3751518726348877,
651.8582324380334,
341.53551239931363,
3.683300004030267,
880,
435,
2.192237377166748,
946,
607,
4.622312943140666,
993.0232720577997,
779,
3.9823181915094947
]
},
{
"score": 10.950873801541226,
"keypoints": [
1079.745663413901,
1057.876310361107,
0.05478342392744616,
1085.7446022663407,
927.8509247239244,
3.989027662754409,
1076.4168091495721,
681.0010309293239,
3.69318636501652,
1175.751233049613,
675.4609653408796,
3.894998808909425,
1168.367434746748,
925.6795830692723,
4.041951319921906,
1246.736699044823,
1057.6171141024415,
0.35268874869071126,
1124.9376542870104,
674.6766129035676,
3.103561346457346,
1135.6961084323723,
314.70914186846545,
3.308468804589743,
1127.340462592704,
258.60926488886156,
4.206135445215616,
1104.7237517457497,
110.1842839789316,
3.7932232834089974,
1021.6023155423281,
685.7139033202061,
2.1369253795349024,
1037.0987900834948,
514.4666027032713,
1.3758957654789534,
1053.185942829918,
324.0023196992991,
3.6865770542425436,
1219.9910902145912,
313.78280708471095,
4.968025243674319,
1242.666019724613,
508.9999999999999,
5.531640558590693,
1199.6364698448594,
672.3816554867356,
5.188543576240203
]
},
{
"score": 8.796343223208792,
"keypoints": [
892,
1057,
0.0642801970243454,
879,
960,
2.7418549060821533,
873,
709,
1.4490729570388794,
976,
709,
2.326153039932251,
976,
947,
1.8430407047271729,
1018.3358121883978,
1056.6669765235497,
0.21173024539211077,
924,
709,
1.6106798648834229,
931,
342,
1.8503456115722656,
944.5304231025389,
289.8346541279278,
2.84533776915699,
911.8320538351469,
133.33264423713035,
2.8966951072816554,
847,
709,
0.2632869780063629,
1055.3791695827251,
530.2416608345495,
0.052683703823322964,
868.2297245132042,
343.1756749304577,
1.56991625917443,
1041.1065908708715,
345.8934091291283,
4.5100791598212595,
1057.7401496489065,
539.3480299297813,
4.881036537158492,
1025.9666633264292,
694.5222411502352,
4.941733110537884
]
}
],
"images/bonn_mpii_test_5sec/24621_mpii/00000104.jpg": [
{"score": ,"keypoints":[]},
...
,{"score": ,"keypoints":[]}
]
...
}

@ -0,0 +1,674 @@
GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The GNU General Public License is a free, copyleft license for
software and other kinds of works.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
the GNU General Public License is intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users. We, the Free Software Foundation, use the
GNU General Public License for most of our software; it applies also to
any other work released this way by its authors. You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
To protect your rights, we need to prevent others from denying you
these rights or asking you to surrender the rights. Therefore, you have
certain responsibilities if you distribute copies of the software, or if
you modify it: responsibilities to respect the freedom of others.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must pass on to the recipients the same
freedoms that you received. You must make sure that they, too, receive
or can get the source code. And you must show them these terms so they
know their rights.
Developers that use the GNU GPL protect your rights with two steps:
(1) assert copyright on the software, and (2) offer you this License
giving you legal permission to copy, distribute and/or modify it.
For the developers' and authors' protection, the GPL clearly explains
that there is no warranty for this free software. For both users' and
authors' sake, the GPL requires that modified versions be marked as
changed, so that their problems will not be attributed erroneously to
authors of previous versions.
Some devices are designed to deny users access to install or run
modified versions of the software inside them, although the manufacturer
can do so. This is fundamentally incompatible with the aim of
protecting users' freedom to change the software. The systematic
pattern of such abuse occurs in the area of products for individuals to
use, which is precisely where it is most unacceptable. Therefore, we
have designed this version of the GPL to prohibit the practice for those
products. If such problems arise substantially in other domains, we
stand ready to extend this provision to those domains in future versions
of the GPL, as needed to protect the freedom of users.
Finally, every program is threatened constantly by software patents.
States should not allow patents to restrict development and use of
software on general-purpose computers, but in those that do, we wish to
avoid the special danger that patents applied to a free program could
make it effectively proprietary. To prevent this, the GPL assures that
patents cannot be used to render the program non-free.
The precise terms and conditions for copying, distribution and
modification follow.
TERMS AND CONDITIONS
0. Definitions.
"This License" refers to version 3 of the GNU General Public License.
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
"The Program" refers to any copyrightable work licensed under this
License. Each licensee is addressed as "you". "Licensees" and
"recipients" may be individuals or organizations.
To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy. The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.
A "covered work" means either the unmodified Program or a work based
on the Program.
To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
To "convey" a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.
An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
1. Source Code.
The "source code" for a work means the preferred form of the work
for making modifications to it. "Object code" means any non-source
form of a work.
A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.
The Corresponding Source for a work in source code form is that
same work.
2. Basic Permissions.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force. You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
Conveying under any other circumstances is permitted solely under
the conditions stated below. Sublicensing is not allowed; section 10
makes it unnecessary.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.
4. Conveying Verbatim Copies.
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to
"keep intact all notices".
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the
Corresponding Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission, provided
you inform other peers where the object code and Corresponding
Source of the work are being offered to the general public at no
charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling. In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage. For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product. A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.
"Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source. The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed. Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
"Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law. If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material, or
requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors or
authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions of
it) with contractual assumptions of liability to the recipient, for
any liability that these contractual assumptions directly impose on
those licensors and authors.
All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or
run a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's "contributor version".
A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License. You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all. For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
13. Use with the GNU Affero General Public License.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU Affero General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the special requirements of the GNU Affero General Public License,
section 13, concerning interaction through a network will apply to the
combination as such.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
the GNU General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies that a certain numbered version of the GNU General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
GNU General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
versions of the GNU General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Also add information on how to contact you by electronic and paper mail.
If the program does terminal interaction, make it output a short
notice like this when it starts in an interactive mode:
<program> Copyright (C) <year> <name of author>
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, your program's commands
might be different; for a GUI interface, you would use an "about box".
You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU GPL, see
<http://www.gnu.org/licenses/>.
The GNU General Public License does not permit incorporating your program
into proprietary programs. If your program is a subroutine library, you
may consider it more useful to permit linking proprietary applications with
the library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License. But first, please read
<http://www.gnu.org/philosophy/why-not-lgpl.html>.

@ -0,0 +1,42 @@
CC=g++
OS_NAME=$(shell uname -s)
ifeq ($(OS_NAME),Linux)
LAPACKLDFLAGS=/usr/local/atlas/lib/libsatlas.so # single-threaded blas
#LAPACKLDFLAGS=/usr/lib64/atlas/libtatlas.so # multi-threaded blas
#BLAS_THREADING=-D MULTITHREADED_BLAS # remove this if wrong
endif
ifeq ($(OS_NAME),Darwin) # Mac OS X
LAPACKLDFLAGS=-framework Accelerate # for OS X
endif
LAPACKCFLAGS=-Dinteger=int $(BLAS_THREADING)
STATICLAPACKLDFLAGS=-fPIC -Wall -g -fopenmp -static -static-libstdc++ /home/lear/douze/tmp/jpeg-6b/libjpeg.a /usr/lib64/libpng.a /usr/lib64/libz.a /usr/lib64/libblas.a /usr/lib/gcc/x86_64-redhat-linux/4.9.2/libgfortran.a /usr/lib/gcc/x86_64-redhat-linux/4.9.2/libquadmath.a # statically linked version
CFLAGS= -fPIC -Wall -g -std=c++11 $(LAPACKCFLAGS) -fopenmp -DUSE_OPENMP -O3
LDFLAGS=-fPIC -Wall -g -ljpeg -lpng -fopenmp
CPYTHONFLAGS=-I/usr/include/python2.7
SOURCES := $(shell find . -name '*.cpp' ! -name 'deepmatching_matlab.cpp')
OBJ := $(SOURCES:%.cpp=%.o)
HEADERS := $(shell find . -name '*.h')
all: deepmatching
.cpp.o: %.cpp %.h
$(CC) -o $@ $(CFLAGS) -c $+
deepmatching: $(HEADERS) $(OBJ)
$(CC) -o $@ $^ $(LDFLAGS) $(LAPACKLDFLAGS) -I/home/ibal_109/atlas/build/include
deepmatching-static: $(HEADERS) $(OBJ)
$(CC) -o $@ $^ $(STATICLAPACKLDFLAGS)
python: $(HEADERS) $(OBJ)
# swig -python $(CPYTHONFLAGS) deepmatching.i # not necessary, only do if you have swig compiler
g++ $(CFLAGS) -c deepmatching_wrap.c $(CPYTHONFLAGS)
g++ -shared $(LDFLAGS) $(LAPACKLDFLAGS) deepmatching_wrap.o $(OBJ) -o _deepmatching.so $(LIBFLAGS)
clean:
rm -f $(OBJ) deepmatching *~ *.pyc .gdb_history deepmatching_wrap.o _deepmatching.so deepmatching.mex???

@ -0,0 +1,185 @@
Implementation of the Deep Matching algorithm, published at ICCV 2013 in
"DeepFlow: Large displacement optical flow with deep matching" by Philippe
Weinzaepfel, Jerome Revaud, Zaid Harchaoui and Cordelia Schmid.
Code and idea by Jerome Revaud, INRIA. The code is only for scientific
or personnal use. Please contact me/INRIA for commercial use.
Email: jerome.revaud@inria.fr
Copyright (C) 2015 Jerome Revaud
Version 1.2.2
License:
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
Installation:
make clean all
This program has been built on a fedora18 x64 machine and tested on Mac OS X.
*No assistance* will be given to compile the code on other OS. However, if
you are able to sucessfully adapt the code for other platforms (Windows),
please notify me so that I can release these versions on the webpage:
http://lear.inrialpes.fr/src/deepmatching/
Matlab wrapper:
[Prerequisite: to have compiled the executable, see above.]
1) Launch matlab by preloading the same 'libatlas' than the one used to compile ./deepmatching:
LD_PRELOAD=/usr/lib64/atlas/libtatlas.so.3 matlab
2) Compile the MEX file:
mex deepmatching_matlab.cpp deep_matching.o conv.o hog.o image.o io.o main.o maxfilter.o pixel_desc.o -output deepmatching '-DUSEOMP' CFLAGS="-fPIC -Wall -g -std=c++11 -O3 -fopenmp" LDFLAGS="-fopenmp" -lpng -ljpeg -lm /usr/local/atlas/lib/libsatlas.so
3) Try executing the code:
>> help deepmatching
>> deepmatching() % show some help about options
>> img1 = single(imread('liberty1.png'));
>> img2 = single(imread('liberty2.png'));
>> matches = deepmatching( img1, img2, '-downscale 2 -v' );
>> matches % print matches, should be as the listing shown below
Python wrapper:
1) Compile the python module:
make python
2) Try executing the code:
>> import deepmatching as dm
>> help(dm.deepmatching)
>> dm.deepmatching() # show some help about options
>> from PIL import Image
>> import numpy as np
>> img1 = np.array(Image.open('liberty1.png'))
>> img2 = np.array(Image.open('liberty2.png'))
>> matches = dm.deepmatching( img1, img2, '-downscale 2 -v' )
>> matches % print matches, should be as the listing shown below
Example usages and explanations:
To get detailed information on parameters:
./deepmatching -h
./deepmatching --help
* Build verification:
./deepmatching liberty1.png liberty2.png -downscale 2 -v
should produce the following output:
layer 0, patch_size = 16x16
remaining 16 big cells (actually, 16 are unique)
layer 1, patch_size = 32x32
remaining 25 big cells (actually, 25 are unique)
layer 2, patch_size = 64x64
remaining 25 big cells (actually, 25 are unique)
found 625 local matches
gathering correspondences 96%...
8 8 0 12 2.6554 10
8 40 4 48 2.65679 11
8 24 8 32 2.5486 11
40 40 40 32 2.64178 0
40 56 44 52 2.58631 0
40 24 40 12 2.65065 0
56 40 56 28 2.64225 0
56 24 56 12 2.68497 0
24 40 24 32 2.62045 3
24 56 28 60 2.5849 12
* To visualize the output correspondences:
Use the "viz.py" python script provided.
./deepmatching climb1.png climb2.png -nt 0 | python viz.py climb1.png climb2.png
* To restrict matching to local neighborhood:
The "-ngh_rad <D>" option restricts the matching to a radius of <D> pixels.
It uses less memory and is faster. For instance, This should produce about
the same output as before but consumes 2 times less memory and cpu:
./deepmatching climb1.png climb2.png -nt 0 -ngh_rad 192 | python viz.py climb1.png climb2.png
* To rescore matches prior to calling deepflow / epicflow:
simply pipe the output correspondences in 'rescore.py'
./deepmatching img1 img2 [args] | python rescore.py img1 img2
* Scale and invariant version: (see the --help)
./deepmatching dino1.jpg dino2.jpg -nt 0 -downscale 1 -max_scale 2 -rot_range -45 +45 -v | python viz.py dino1.jpg dino2.jpg
param -max_scale: maximum scale factor (here x2, default = x5)
param -rot_range: rotation range in degrees (default = from 0 to 360)
For details about the options, please refer to the help, the papers or the code.
Important tip:
If the program stops with "segmentation fault", then it means that your machine
does not have enough memory. In this case, you should consider increasing the
"-downscale" parameter.
Version history:
version 1.0.2:
Many thanks to Bowen Zhang from Tongji University for reporting an issue with the makefile
version 1.1:
- New mode added for "fully scale & rotation invariant DeepMatching".
- Improved visualisation (viz.py)
- Removed useless/suboptimal options (-iccv_settings)
- Fixed a bug related to memory allocation for large images
version 1.2:
- Added a new option "-ngh_rad" to restrict the matching to a local neighborhood, which allows
much reduced memory usage and computations.
- static-compiled version is now fully multhi-threaded with BLAS
- few minor bugfix, code cleaning and updates.
version 1.2.1:
- Now performing the maxpooling and subsampling steps jointly,
which results in 2/3 of memory usage compared to before. Also, it is now a bit faster.
- Removed some useless/confusing options in the executable.
version 1.2.2:
- Now include a Matlab and a Python wrapper!

@ -0,0 +1,246 @@
/*
Copyright (C) 2014 Jerome Revaud
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
*/
#ifndef ___ARRAY_TYPES_H___
#define ___ARRAY_TYPES_H___
typedef unsigned char UBYTE;
typedef unsigned int UINT;
/************************
* 1D Array
Equivalences:
C/Python/numpy: array.shape = (tx,)
array[x] := array->pixels[x]
Matlab/Fortran: [1, tx] = size(array)
array(x, 1) := array->pixels[x-1]
*/
#define DEFINE_ARRAY(type) \
typedef struct { \
type* pixels; \
int tx; \
} type##_array;
DEFINE_ARRAY(UBYTE)
DEFINE_ARRAY(int)
DEFINE_ARRAY(UINT)
DEFINE_ARRAY(float)
#define ASSERT_ARRAY_ZEROS(arr) {int size=arr->tx; assert((arr->pixels[0]==0 && arr->pixels[size/2]==0 && arr->pixels[size-1]==0) || !"error: matrix " #arr "is supposed to be zeros");}
/************************
* 2D Image
Equivalences:
C/Python/numpy: array.shape = (ty, tx)
array[y, x] := array->pixels[x + y*tx]
Matlab/Fortran: [tx, ty] = size(array)
array(x, y) := array->pixels[(x-1) + (y-1)*tx]
*/
#define DEFINE_IMG(type) \
typedef struct { \
type* pixels;\
int tx,ty;\
} type##_image;
DEFINE_IMG(UBYTE)
DEFINE_IMG(int)
DEFINE_IMG(UINT)
DEFINE_IMG(float)
#define ASSERT_SAME_SIZE ASSERT_SAME_IMG_SIZE
#define ASSERT_IMG_SIZE ASSERT_SAME_IMG_SIZE
#define ASSERT_SAME_IMG_SIZE(im1,im2) if(im1 && im2) assert(im1->tx==im2->tx && im1->ty==im2->ty);
#define ASSERT_IMAGE_ZEROS
#define ASSERT_IMG_ZEROS(img) {int size=img->tx*img->ty; assert((img->pixels[0]==0 && img->pixels[size/2]==0 && img->pixels[size-1]==0) || !"error: matrix " #img "is supposed to be zeros");}
#define IMG_SIZE(img) (long((img)->tx)*(img)->ty)
/************************
* 3D Image = Cube (Z coordinates are contiguous)
Equivalences:
C/Python/numpy: array.shape = (ty, tx, tz)
array[y, x, z] := array->pixels[z + x*tz + y*tx*tz]
Matlab/Fortran: [tz, tx, ty] = size(array)
array(z, x, y) := array->pixels[(z-1) + (x-1)*tz + (y-1)*tx*tz]
*/
#define DEFINE_CUBE(type) \
typedef struct { \
type* pixels; \
int tx,ty,tz; \
} type##_cube;
DEFINE_CUBE(UBYTE)
DEFINE_CUBE(short)
DEFINE_CUBE(int)
DEFINE_CUBE(UINT)
DEFINE_CUBE(float)
#define ASSERT_SAME_CUBE_SIZE(im1, im2) \
if((im1) && (im2)) assert((im1)->tx==(im2)->tx && (im1)->ty==(im2)->ty && (im1)->tz==(im2)->tz);
#define ASSERT_CUBE_ZEROS(img) {int size=img->tx*img->ty*img->tz; assert((img->pixels[0]==0 && img->pixels[size/2]==0 && img->pixels[size-1]==0) || !"error: matrix " #img "is supposed to be zeros");}
#define CUBE_SIZE(cube) (long((cube)->tx)*(cube)->ty*(cube)->tz)
/************************
* 3D Image = concatenation of XY layers
Equivalences:
C/Python/numpy: array.shape = (tz, ty, tx)
array[z, y, x] := array->pixels[x + y*tx + z*tx*ty]
Matlab/Fortran: [tx, ty, tz] = size(array)
array(x, y, z) := array->pixels[(x-1) + (y-1)*tx + (z-1)*tx*ty]
*/
#define DEFINE_LAYERS(type) \
typedef struct { \
type* pixels; \
int tx,ty,tz; \
} type##_layers; \
DEFINE_LAYERS(UBYTE)
DEFINE_LAYERS(int)
DEFINE_LAYERS(UINT)
DEFINE_LAYERS(float)
#define ASSERT_SAME_LAYERS_SIZE(im1,im2) ASSERT_SAME_CUBE_SIZE(im1,im2)
#define ASSERT_LAYERS_ZEROS ASSERT_CUBE_ZEROS
#define LAYERS_SIZE(layers) CUBE_SIZE(layers)
/*****************
creation, reshaping macros
*/
// Because there was a random bug happening because of uninitialized memory
// and the bug was difficult to locate, I have just transformed all malloc(...)
// into calloc(...) ( = malloc + memset(0) ), which is not really consuming more time anyways
// and seems to solve the issue. This is kind of stupid technique but it works well.
#define empty_array(type,tx) ((type##_array){NEWAC(type,long(tx)),tx})
#define empty_image(type,tx,ty) ((type##_image){NEWAC(type,long(tx)*(ty)),tx,ty})
#define empty_cube(type,tx,ty,tz) ((type##_cube ){NEWAC(type,long(tx)*(ty)*long(tz)),tx,ty,tz})
#define empty_layers(type,tx,ty,tz) ((type##_layers){NEWAC(type,long(tx)*(ty)*(tz)),tx,ty,tz})
#define zeros_array(type,tx) ((type##_array){NEWAC(type,long(tx)),tx})
#define zeros_image(type,tx,ty) ((type##_image){NEWAC(type,long(tx)*(ty)),tx,ty})
#define zeros_cube(type,tx,ty,tz) ((type##_cube ){NEWAC(type,long(tx)*(ty)*(tz)),tx,ty,tz})
#define zeros_layers(type,tx,ty,tz) ((type##_layers){NEWAC(type,long(tx)*(ty)*(tz)),tx,ty,tz})
#define array_like(type,l) ((type##_array){NEWAC(type,long((l)->tx)),(l)->tx})
#define image_like(type,l) ((type##_image){NEWAC(type,long((l)->tx)*(l)->ty),(l)->tx,(l)->ty})
#define cube_like(type,l) ((type##_cube ){NEWAC(type,long((l)->tx)*(l)->ty*(l)->tz),(l)->tx,(l)->ty,(l)->tz})
#define layers_like(type,l) ((type##_layers){NEWAC(type,long((l)->tx)*(l)->ty*(l)->tz),(l)->tx,(l)->ty,(l)->tz})
#define reshape_xy(type, arr) ((type##_array){(arr)->pixels, (arr)->tx*(arr)->ty})
#define reshape_xyz(type, arr) ((type##_array){(arr)->pixels, (arr)->tx*(arr)->ty*(arr)->tz})
#define reshape_xy_z(type, arr) ((type##_image){(arr)->pixels, (arr)->tx*(arr)->ty, (arr)->tz})
#define reshape_z_xy(type, arr) ((type##_image){(arr)->pixels, (arr)->tz, (arr)->tx*(arr)->ty})
#define reshape_x_yz(type, arr) ((type##_image){(arr)->pixels, (arr)->tx, (arr)->ty*(arr)->tz})
#define free_image(img) if(img){free(img->pixels); free(img); img=NULL;}
#define free_cube(cube) free_image(cube)
#define free_layers(cube) free_cube(cube)
// debugging only
//#include <stdio.h>
//inline long hash_arr(char* ptr, int nb, bool show) {
// long res = 0;
// if(show) printf("hashing [");
// for(int i=0; i<nb; i++) {
// res = 1000003*res + ((UBYTE*)ptr)[i];
// if(show) printf("%d, ",((UBYTE*)ptr)[i]);
// res = (res>>17) | (res<<47);
// }
// if(show) printf("]\n");
// return res;
//}
//#define H(arr,val) printf("hash(" #arr ") = %ld\n",val);
//#define hash_array(arr) H(arr,hash_arr((char*)(arr)->pixels,(arr)->tx*sizeof(*(arr)->pixels),0))
//#define hash_image(arr) H(arr,hash_arr((char*)(arr)->pixels,(arr)->tx*(arr)->ty*sizeof(*(arr)->pixels),0))
//#define hash_cube(arr) H(arr,hash_arr((char*)(arr)->pixels,(arr)->tx*(arr)->ty*(arr)->tz*sizeof(*(arr)->pixels),0))
//#define hash_layers(arr) hash_cube(arr)
//inline void save_raw(const char* fname, int* shape, int ndim, char* ptr, int size) {
// FILE* f = fopen(fname, "w");
// fwrite( &ndim, sizeof(int), 1, f);
// fwrite( shape, sizeof(int), ndim, f);
// fwrite( ptr, sizeof(*ptr), size, f);
// fclose(f);
//}
//#define save_cube(fname,cube) {int sh[3] = {(cube)->ty, (cube)->tx, (cube)->tz}; save_raw(fname, sh, 3, (char*)(cube)->pixels, sizeof(*(cube)->pixels)*CUBE_SIZE(cube));}
//#define save_layers(fname,layers) {int sh[3] = {(layers)->tz, (layers)->ty, (layers)->tx}; save_raw(fname, sh, 3, (char*)(layers)->pixels, sizeof(*(layers)->pixels)*LAYERS_SIZE(layers));}
#endif

Binary file not shown.

After

Width:  |  Height:  |  Size: 194 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 165 KiB

@ -0,0 +1,988 @@
/*
Copyright (C) 2014 Jerome Revaud
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
*/
#include "conv.h"
#include "std.h"
#include "omp.h"
#include "maxfilter.h"
extern "C" {
#include <immintrin.h>
#define integer int
#define real float
extern int saxpy_(integer *n, real *sa, real *sx, integer *incx, real *sy, integer *incy);
extern int sscal_(integer *n, real *sa, real *sx, integer *incx);
}
static inline void fast_set_val( float * __restrict__ a, long d, const float val) {
if(val) {
int j;
for(j=0; j<d; j++)
a[j] = val;
} else
memset(a,0,d*sizeof(float));
}
static inline void fast_add_val( float * __restrict__ a, long d, const float val) {
int j;
for(j=0; j<d; j++)
a[j] += val;
}
static inline void fast_set_vec( float * __restrict__ dest,
const float * __restrict__ src, int d, const float mul) {
if( mul==1)
memcpy(dest,src,d*sizeof(float));
else {
int j;
for(j=0; j<d; j++)
dest[j] = mul*src[j];
}
}
static inline void fast_add_vec( float * __restrict__ dest,
const float * __restrict__ add, int d, float mul) {
if(d<=4) {
int j;
for(j=0; j<d; j++)
dest[j] += mul*add[j];
} else {
int inc = 1;
saxpy_( &d, &mul, (float*)add, &inc, (float*)dest, &inc );
}
}
static inline void fast_div( float * __restrict__ a, long d, const float div) {
const float divi = 1/div;
// assert( ((long)a & 15) == 0 && (d & 3) == 0 );
// const float _divi4[] = {divi,divi,divi,divi};
// __v4sf *a4 = (__v4sf*)a;
// __v4sf *divi4 = (__v4sf*)_divi4;
// int e = d>>2;
// while(e--) *a4++ *= (*divi4);
int j;
for(j=0; j<d; j++)
a[j] *= divi;
}
static inline float* fast_set_trans( float * dest, const float * src, const float mul,
int dx, int dy, const int tx, const int ty, const int ex, const float def ) {
if(mul==0) {
memset(dest,0,sizeof(float)*(tx+ex)*(ty+ex));
return dest+(tx+ex)*(ty+ex);
}
if(dx>tx) dx=tx; // after those alues, nothing happens anyway
if(dy>ty) dy=ty;
if(-dx>tx) dx=-tx;
if(-dy>ty) dy=-ty;
#define add_default(n) {fast_set_val(dest,(n),mul*def); dest+=(n);}
float* _dest = dest;
// paste -v zeros rows
if(dy<0) add_default(-dy*(tx+ex));
src += MAX(0,dx);
const int row_len = MIN(tx,tx+dx+ex) - MAX(0,dx);
int j;
for(j=MAX(0,dy); j<MIN(ty,ty+dy+ex); j++) {
// paste -u zeros cols
if(dx<0) add_default(-dx);
// past image
fast_set_vec(dest,src+j*tx,row_len,mul);
dest += row_len;
// paste +u zeros cols
if(dx>=0) {add_default(dx)
if(ex) add_default(ex)}
}
// paste +v zeros rows
if(dy>=0){add_default(dy*(tx+ex))
if(ex) add_default(ex*(tx+ex))}
#undef add_default
assert( dest-_dest == (tx+ex)*(ty+ex) );
return dest;
}
static inline float* fast_add_trans( float * dest, const float * src, const float mul,
int dx, int dy, const int tx, const int ty, const int ex, const float def ) {
if(mul==0) return dest+(tx+ex)*(ty+ex);
if(dx>tx) dx=tx; // after those alues, nothing happens anyway
if(dy>ty) dy=ty;
if(-dx>tx) dx=-tx;
if(-dy>ty) dy=-ty;
#define add_default(n) {fast_add_val(dest,n,def*mul); dest+=n;}
float* _dest = dest;
// paste -v zeros rows
if(dy<0) add_default(-dy*(tx+ex));
src += MAX(0,dx);
const int row_len = MIN(tx,tx+dx+ex) - MAX(0,dx);
int j;
for(j=MAX(0,dy); j<MIN(ty,ty+dy+ex); j++) {
// paste -u zeros cols
if(dx<0) add_default(-dx);
// past image
fast_add_vec(dest,src+j*tx,row_len,mul);
dest += row_len;
// paste +u zeros cols
if(dx>=0) {add_default(dx)
if(ex) add_default(ex)}
}
// paste +v zeros rows
if(dy>=0){add_default(dy*(tx+ex))
if(ex) add_default(ex*(tx+ex))}
#undef add_default
assert( dest-_dest == (tx+ex)*(ty+ex) );
return dest;
}
static inline void norm_norm( float* norms, int nb, float mode ) {
int i;
if( mode < 0 )
assert(!"error: unknown norm mode");
else if( mode == 0.5 ) {
for(i=0; i<nb; i++)
norms[i] = sqrt(sqrt(norms[i]));
} else if( mode < 1 ) {
mode *= 0.5; // cumulate with initial 1/sqrt(.)
for(i=0; i<nb; i++)
norms[i] = pow(norms[i], mode);
} else if( mode == 1 ) {
for(i=0; i<nb; i++)
norms[i] = sqrt(norms[i]);
} else if( mode > 1 )
assert(!"error: unknown norm mode");
}
/* normalize each pixel of a multi-layers image
norm = {0:nothing, 1:L2-normalization, 0-1: normalization by (L2-norm)**<norm> }
*/
void norm_layers( float_layers* res, float norm, int n_thread ) {
if(norm==0) return;
const int layer_size = res->tx*res->ty;
const int n_layers = res->tz;
float* norms = NEWAC(float,layer_size);
long l;
for(l=0; l<n_layers; l++) {
float* r = res->pixels + l*layer_size;
int i;
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(i=0; i<layer_size; i++)
norms[i] += r[i]*r[i];
}
norm_norm( norms, layer_size, norm );
for(l=0; l<n_layers; l++) {
float* r = res->pixels + l*layer_size;
int i;
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(i=0; i<layer_size; i++)
r[i] /= norms[i]+1e-8;
}
free(norms);
}
/* Return the vectorized dimension of a HOG patch
*/
int get_patch_desc_dim( float_layers* hog, int patch_size )
{
return patch_size*patch_size * hog->tz; // number of dimensions of an atomic patch descriptor
}
/* Sample a set of patches from a HOG image.
grid : array of (x,y) position of the patches
size: size of the patches, ie. [x,x+size[ x [y,y+size[
res: result array, n_patches x desc_dim
desc_dim = n_layers * size**2
norms: result, n_patches x 1, norm of each patch
*/
void _sample_patches( float_layers* hog, float_layers* color, int_image* grid, int size, float norm,
float_image* res, float_array* norms, int n_thread ) {
const int tx = hog->tx;
const long npix = tx*hog->ty;
assert( grid->tx == 2 );
const int n_patches = grid->ty;
assert( res->ty == n_patches );
const int n_layers = hog->tz;
const int n_colors = (color? color->tz: 0);
const int color_npix = (color? color->tx*color->ty: 0);
const int desc_size = size*size*n_layers + (color? color->tz: 0);
assert(res->tx == desc_size );
int n;
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(n=0; n<n_patches; n++) {
float *r = res->pixels + desc_size*n;
int *p = grid->pixels + 2*n;
// copy hog
int x=p[0],y=p[1];
assert(0<=x && x+size<=tx);
assert(0<=y && y+size<=hog->ty);
int l,j;
for(l=0; l<n_layers; l++) {
float* h = hog->pixels + l*npix + y*tx + x;
for(j=0; j<size; j++) {
memcpy(r, h, size*sizeof(float));
h += tx;
r += size;
}
}
if(!color) continue;
// copy color
float* c = color->pixels + (y+size/2)*color->ty + (x+size/2);
for(l=0; l<n_colors; l++)
*r++ = c[l*color_npix];
}
if(norm) {
float* normp = norms ? norms->pixels : NEWAC(float, n_patches);
if(norms) {
assert(norms->tx==n_patches);
memset(normp,0,n_patches*sizeof(float));
}
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(n=0; n<n_patches; n++) {
float *r = res->pixels + desc_size*n;
int l;
for(l=0; l<desc_size; l++)
normp[n] += r[l]*r[l];
}
norm_norm( normp, n_patches, norm );
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(n=0; n<n_patches; n++) {
float *r = res->pixels + desc_size*n;
int l;
float nn = normp[n]+1e-8;
for(l=0; l<desc_size; l++)
r[l] /= nn;
}
if(!norms) free(normp);
}
}
static inline int retrieve_children( const int x, const int y, const int_cube* child_grid ) {
const int size0_div2 = child_grid->pixels[0];
const int step0 = child_grid->tx==1 && child_grid->ty==1 ? 1 :
MAX( child_grid->pixels[2]-child_grid->pixels[0],
child_grid->pixels[1+2*child_grid->tx]-child_grid->pixels[1] );
int i = (x-size0_div2)/step0;
int j = (y-size0_div2)/step0;
assert( x==(i*step0+size0_div2) || !"error: child_grid does not match current grid" );
assert( y==(j*step0+size0_div2) || !"error: child_grid does not match current grid" );
if( i<0 || i>=child_grid->tx ) return -1;
if( j<0 || j>=child_grid->ty ) return -1;
return i+j*child_grid->tx;
}
/* Prepare a grid of cell positions in the first image for a given scale. Big cells inherit the cell at the previous scale.
size = size of cells at current scale
offset, step = grid generator: (offset + i*step, offset + j*step)
child_grid = grid of the previous layer (or None if first layer)
child_norms = image containing the norms of the patch at the previous level
grid = result center positions of cells in current scale
children = index of cells in previous scale used to construct big cells
norms = norms of the cells of this level
*/
void _prepare_big_cells( int size, int offset, int step,
int_cube* child_grid, float_image* child_norms,
int_cube* grid, int_cube* children, float_image* norms ) {
assert(grid->tz==2);
const int ntx = grid->tx; // should be == 1+(tx-size)/step so that patches do not pass the border
const int nty = grid->ty; // should be == 1+(ty-size)/step so that patches do not pass the border
/* grid[i,j] = ( offset + i*step, offset + j*step )
connection between two scales:
x cell position in lower scale == x position of children in upper scale
child_offset + child_i*child_step = offset + i*step + (2*u/(nc-1)-1)*size/4
*/
int i,j,u,v;
int* r = grid->pixels;
if( !child_grid ) {
// this is the first scale:
// we just return a grid of step size*(1-overlap/2) in [0, tx[ x [0, ty[
for(j=0; j<nty; j++)
for(i=0; i<ntx; i++) {
*r++ = offset + i*step;
*r++ = offset + j*step;
}
} else {
assert(child_grid->tz==2);
ASSERT_SAME_SIZE( child_grid, child_norms );
assert( children );
const int nc = sqrt(children->tz); // number of children per row or col
assert( children->tz==pow2(nc) );
ASSERT_SAME_SIZE( grid, children );
ASSERT_SAME_SIZE( grid, norms );
// this is at least second scale
// we return a grid of step size*(1-overlap/2) in [0, tx[ x [0, ty[
const int quarter = size/4;
assert(4*quarter==size);
int* c = children->pixels;
float *n = norms->pixels;
memset(n,0,ntx*nty*sizeof(float));
for(j=0; j<nty; j++)
for(i=0; i<ntx; i++) {
int x = offset + i*step;
int y = offset + j*step;
*r++ = x;
*r++ = y;
// accumulate norms from 2x2 or 3x3 neighbors
for(v=0; v<nc; v++)
for(u=0; u<nc; u++,c++) {
// we want to index the children at position:
// ( center_x + (2*u/(nc-1)-1)*size/4, center_y + (2*v/(nc-1)-1)*size/4 )
*c = retrieve_children( x+(2*u/(nc-1)-1)*quarter, y+(2*v/(nc-1)-1)*quarter, child_grid );
if(*c>=0) *n += child_norms->pixels[*c];
}
n++;
}
}
}
/* Prepare image for dotprod : dot(patches, res)
where patches is n_patches x patch_dim
set outside of the image to be equal to (0,...,ninth_val)
*/
void _prepare_dotprod_convolution( float_layers* img, int patch_size, float ninth_val, int extend,
float_layers* res, int n_thread ) {
assert( img->tx+extend == res->tx );
assert( img->ty+extend == res->ty );
const int n_layers = img->tz;
const int tx = img->tx;
const int ty = img->ty;
const int npix = tx*ty;
const int npixex = (tx+extend)*(ty+extend);
assert( res->tz==patch_size*patch_size*img->tz );
long l;
const int first_half = patch_size/2; // half-size
const int second_half = patch_size - first_half;
const int layer_size = patch_size*patch_size*npixex;
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(l=0; l<n_layers; l++) {
float* img_pix = img->pixels + l*npix;
float* r = res->pixels + l*layer_size;
int u,v;
// copy translated version of the image into res
for(v=-first_half; v<second_half; v++)
for(u=-first_half; u<second_half; u++)
r = fast_set_trans( r, img_pix, 1, u, v, tx, ty, extend, l+1<n_layers? 0 : ninth_val );
}
}
float_layers* prepare_dotprod_convolution( float_layers* hog, int patch_size, int extend, float norm, int nt )
{
assert(0<=extend and extend<=1);
const int nh = get_patch_desc_dim(hog,patch_size);
const int etx = hog->tx+extend; // extend a bit the image
const int ety = hog->ty+extend;
float_layers* res = NEW(float_layers);
*res = empty_layers(float,etx,ety,nh);
float ninth_val = 0;
_prepare_dotprod_convolution( hog, patch_size, ninth_val, extend, res, nt );
if( norm ) norm_layers( res, norm, nt );
return res;
}
inline float sum_array_f(const float* a, int n) {
int i=n;
double res = 0;
while(i--) res+=a[i];
return (float)res;
}
extern "C" {
int sgemm_(char *transa, char *transb, integer *m, integer *
n, integer *k, float *alpha, float *a, integer *lda, float *b, integer *
ldb, float *beta, float *c, integer *ldc);
}
/* matrix-matrix multiplication with several SGEMM (each is single-threaded)
res = dot(patches, convolved_hog)
P*npix P * nh nh * npix
*/
void _dotprod( float_image* patches, float_layers* convolved_hog, float_layers* res, int n_thread ) {
int nh = patches->tx;
assert( nh == convolved_hog->tz );
ASSERT_SAME_IMG_SIZE( convolved_hog, res );
int P = patches->ty;
assert( res->tz == P );
int threadP = 1 + (P-1) / n_thread; // how many patches per thread
int npix = (int)IMG_SIZE(convolved_hog);
int l;
#if (defined(USE_OPENMP) && !defined(MULTITHREADED_BLAS))
#pragma omp parallel for num_threads(n_thread)
#else
n_thread = 1; // BLAS is already multithreaded
threadP = P;
#endif
for(l=0; l<n_thread; l++) {
// we do dotprod( patches[l*threadP : (l+1)*threadP], convolved_hog )
long start = l*threadP;
long end = MIN(P,(l+1)*threadP);
int np = int(end - start);
float* p = patches->pixels + nh*start;
float* r = res->pixels + npix*start;
// blas fast matrix-matrix product
char T='n'; float alpha = 1, beta = 0;
sgemm_( &T, &T, &npix, &np, &nh, &alpha,
convolved_hog->pixels, &npix,
p, &nh, &beta, r, &npix);
}
}
inline void transpose_scalar_block(const float *A, float *B, const int lda, const int ldb,
const int block_row, const int block_col) {
for(int i=0; i<block_row; i++)
for(int j=0; j<block_col; j++)
B[j*ldb + i] = A[i*lda +j];
}
// Transpose A (N rows by M cols) into B (M by N)
void transpose_matrix(const float_image* A, float_image* B, int nt) {
const int n = A->ty, m = A->tx;
assert( n==B->tx && m==B->ty );
const int block_size = 16;
const float* pA = A->pixels;
float* pB = B->pixels;
#ifdef USE_OPENMP
#pragma omp parallel for num_threads(nt)
#endif
for(int i=0; i<n; i+=block_size)
for(int j=0; j<m; j+=block_size)
transpose_scalar_block(&pA[i*m +j], &pB[j*n + i], m, n, MIN(block_size, n-i), MIN(block_size, m-j));
}
extern "C" {
int sgemv_(char *transa, integer *m, integer * n,
float *alpha, float *a, integer *lda,
float *b, integer * ldb, float *beta,
float *c, integer * ldc);
}
/* convolution of each patch within a local neighborhood
ngh_rad = max translation
neighborhood has size 2*ngh_rad
patch at (x,y) is compared to patches in [y-ngh_rad : y+ngh_rad,
x-ngh_rad : y+ngh_rad]
*/
void _dotprod_ngh_rad_T( int_cube* grid, float_image* patches, int ngh_rad,
float_cube* convolved_hog, float_layers* res_out,
int_image* offsets, int n_thread ) {
int nh = patches->tx;
assert( nh == convolved_hog->tz );
const int P = patches->ty;
assert( IMG_SIZE(grid)==P && grid->tz==2 );
const int tx = convolved_hog->tx;
const int ty = convolved_hog->ty;
// neighborhood size
int res_tx = MIN(tx,2*ngh_rad);
int res_ty = MIN(ty,2*ngh_rad);
assert(res_tx<tx-1 || res_ty<ty-1 || !"ngh_rad is too large and results in loss of perf. Set ngh_rad=0 instead.");
int res_npix = res_tx * res_ty;
// allocate result
*res_out = empty_layers(float, res_tx, res_ty, P);
assert(res_out->pixels || !"error: ran out of memory before sgemm");
*offsets = empty_image(int, 2, P);
char T='t'; float alpha=1, beta=0; int one=1;
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(int j=0; j<res_ty; ++j) {
// By organizing loops this way,
// we exploit overlap between patches.
for(int l=0; l<P; l++) {
float* p = patches->pixels + l*nh;
float* r = res_out->pixels + l*res_npix;
int left = MAX(0, MIN(grid->pixels[2*l+0] - ngh_rad, tx-2*ngh_rad));
int top = MAX(0, MIN(grid->pixels[2*l+1] - ngh_rad, ty-2*ngh_rad));
if(j==0) {
offsets->pixels[2*l+0] = left;
offsets->pixels[2*l+1] = top;
}
float* c = convolved_hog->pixels + (left + top*tx)*nh;
// blas fast matrix-vector product
sgemv_( &T, &nh, &res_tx, &alpha, c + j*tx*nh, &nh,
p, &one, &beta, r + j*res_tx, &one);
}
}
}
/* correct the convolution on the boundaries of the image
ttx, tty: true shape of the res_map (in case of using offsets)
*/
void rectify_conv( int patch_size, int nori, float_image* patches, int_image* offsets,
const int ttx, const int tty, int extend, float_layers* res, int n_thread ) {
const int n_patches = patches->ty;
assert( n_patches == res->tz );
//const int nori = patches->tx/pow2(patch_size);
assert( patches->tx >= nori*pow2(patch_size) );
const int tx = res->tx; // real true shape because it has been extended
const int ty = res->ty;
const int first_half = patch_size/2;
const int second_half = patch_size - first_half; // in case patch_size is odd
assert( offsets || (ttx==tx && tty==ty) );
assert( !offsets || (ttx>=tx && tty>=ty) );
assert( !offsets || (offsets->ty==res->tz && offsets->tx==2) );
const long npix = IMG_SIZE(res);
int l;
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(l=0; l<n_patches; l++) {
// load offsets
const int offi = offsets ? offsets->pixels[2*l+0] : 0;
const int offj = offsets ? offsets->pixels[2*l+1] : 0;
float sums[8]; // temporary norm of columns or rows
assert( patch_size <= (int)(sizeof(sums)/sizeof(sums[0])) );
int o,i,j;
// horizontal boundaries
memset(sums,0,sizeof(sums));
float* p = patches->pixels + l*patches->tx;
for(o=0; o<nori; o++)
for(j=0; j<patch_size; j++)
for(i=0; i<patch_size; i++)
sums[j] += pow2(*p++);
float old_norm = sqrt(sum_array_f(sums,patch_size));
if( old_norm==0 ) continue;
// upper boundary
for(j=offj; j<first_half; j++) {
float new_norm = sqrt(sum_array_f(sums+(first_half-j),second_half+j)); // sums to patch_size
float mul = old_norm / (new_norm + 1e-8);
float* r = res->pixels + l*npix + (j-offj)*tx;
for(i=0; i<tx; i++) {
r[i] *= mul;
//assert(r[i]<1.1);
}
}
// lower boundary
for(j=tty-extend+1-second_half; j<offj+ty; j++) {
float new_norm = sqrt(sum_array_f(sums,first_half+tty-extend-j)); // sums to patch_size
float mul = old_norm / (new_norm + 1e-8);
float* r = res->pixels + l*npix + (j-offj)*tx;
for(i=0; i<tx; i++) {
r[i] *= mul;
//assert(r[i]<1.1);
}
}
// vertical boundaries
memset(sums,0,sizeof(sums));
p = patches->pixels + l*patches->tx;
for(o=0; o<nori; o++)
for(j=0; j<patch_size; j++)
for(i=0; i<patch_size; i++)
sums[i] += pow2(*p++);
// left boundary
for(i=offi; i<first_half; i++) {
float new_norm = sqrt(sum_array_f(sums+(first_half-i),second_half+i));
float mul = old_norm / (new_norm + 1e-8);
float* r = res->pixels + l*npix + (i-offi);
for(j=0; j<ty; j++) {
r[j*tx] *= mul;
//assert(r[j*tx]<1.1);
}
}
// right boundary
for(i=ttx-extend+1-second_half; i<offi+tx; i++) {
float new_norm = sqrt(sum_array_f(sums,first_half+ttx-extend-i));
float mul = old_norm / (new_norm + 1e-8);
float* r = res->pixels + l*npix + (i-offi);
for(j=0; j<ty; j++) {
r[j*tx] *= mul;
//assert(r[j*tx]<1.1);
}
}
// because we over-estimated the rectification for the corners, check that they do not overpass old_norm
float* r = res->pixels + l*npix;
for(j=offj; j<first_half; j++) {
for(i=offi; i<first_half; i++)
r[(j-offj)*tx+(i-offi)] = MIN(r[(j-offj)*tx+(i-offi)], old_norm);
for(i=ttx-extend+1-second_half; i<offi+tx; i++)
r[(j-offj)*tx+(i-offi)] = MIN(r[(j-offj)*tx+(i-offi)], old_norm);
}
for(j=tty-extend+1-second_half; j<offj+ty; j++) {
for(i=offi; i<first_half; i++)
r[(j-offj)*tx+(i-offi)] = MIN(r[(j-offj)*tx+(i-offi)], old_norm);
for(i=ttx-extend+1-second_half; i<offi+tx; i++)
r[(j-offj)*tx+(i-offi)] = MIN(r[(j-offj)*tx+(i-offi)], old_norm);
}
}
}
/* Compute the correlation of all patches with the second image (hog).
In case of ngh_rad>0, the correlation is only computed in a small local neighborhood
(whose size is parameterized by ngh_rad).
if extend: width and height of output maps are extended
if norm: correlation are normalized afterwards.
*/
void fastconv( float_image* patches, float_layers* hog, int patch_size, int ngh_rad,
int extend, float norm, int nt, res_scale* res ) {
assert(0<=extend and extend<=1);
float_layers* convolved_hog = prepare_dotprod_convolution( hog, patch_size, extend, norm, nt );
assert( patches->tx==convolved_hog->tz);
res->true_shape[0] = convolved_hog->tx;
res->true_shape[1] = convolved_hog->ty;
//hash_layers(convolved_hog)
int_image* offsets = NULL;
if( ngh_rad == 0 ) { // no limit on translation
// allocate result
res->res_map = empty_layers(float, convolved_hog->tx, convolved_hog->ty, patches->ty);
assert(res->res_map.pixels || !"error: ran out of memory before sgemm");
// multi-threaded fast matrix product
_dotprod( patches, convolved_hog, &res->res_map, nt );
} else { // ngh_rad>0: cropping res_map
offsets = &res->offsets;
// transpose hog: _dotprod is much faster this way
float_cube convolved_hog_T = empty_cube(float, convolved_hog->tx, convolved_hog->ty, convolved_hog->tz);
{ float_image A = reshape_xy_z(float, convolved_hog); // cast to 2D matrix without copy
float_image B = reshape_z_xy(float, &convolved_hog_T);
transpose_matrix( &A, &B, nt);
}
//hash_cube(&convolved_hog_T)
// resized grid
int_cube fgrid = cube_like(int, &res->grid);
for(int i=0; i<CUBE_SIZE(&fgrid); i++)
fgrid.pixels[i] = res->grid.pixels[i]/res->f;
//hash_cube(&fgrid)
// multi-threaded fast matrix product
_dotprod_ngh_rad_T( &fgrid, patches, ngh_rad, &convolved_hog_T, &res->res_map, offsets, nt );
free(fgrid.pixels);
free(convolved_hog_T.pixels);
//hash_image(offsets)
}
free_layers(convolved_hog);
// correct border effects on the correlation maps
rectify_conv( patch_size, hog->tz, patches, offsets, res->true_shape[0], res->true_shape[1],
extend, &res->res_map, nt );
}
/* Compute: arr **= p
*/
void fastipow( float_layers* arr, const float p, int n_thread ) {
const int n_layers = arr->tz;
const long npix = arr->tx*arr->ty;
int l;
// optimization: precompute some values of pow(x,p)
const int npc = 64;
float precom[npc+1];
for(l=0; l<=npc; l++) precom[l]= pow(l/(float)npc,p);
const float maxindex = npc - 0.001;
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(l=0; l<n_layers; l++) {
float* a = arr->pixels + l*npix;
int i;
for(i=0; i<npix; i++) {
// arr[i] = pow(arr[i],p);
float v = a[i]*npc;
assert( v>=0 && v<npc+1 );
if(v>maxindex) v=maxindex;
int n = int(v);
float w = v-n;
a[i] = (1-w)*precom[n] + w*precom[n+1];
}
}
}
/* Compute: arr = max(0,(arr-p)/(1-p))
*/
void fasthinge( float_layers* arr, const float p, int n_thread ) {
const int n_layers = arr->tz;
const long npix = arr->tx*arr->ty;
int l;
const float f = 1/(1-p);
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(l=0; l<n_layers; l++) {
float* a = arr->pixels + l*npix;
int i;
for(i=0; i<npix; i++) {
float v = a[i];
a[i] = MAX(0,f*(v-p));
}
}
}
inline int max_array_i(const int* a, int n) {
int i=n;
int res = INT_MIN;
while(i--) if(a[i]>res) res=a[i];
return res;
}
/* Normalize weights in border areas of width <gap>.
There are 9 areas: top-left, top-middle, top-right, ..., bottom-right.
sum_divf indicates the current weight in those areas, i.e. values in the area
should be divided by the weight. But trans_inv allow to control the amount of
normalization: 0=no normalization, 1=normal
*/
static inline void normalize_trans(const int tx, const int ty, const int gap, float* rmap,
const float trans_inv, float sum_divf[9] ) {
if( trans_inv == 0 ) return;
int i,j;
for(i=0; i<9; i++) {
if( sum_divf[i]>0 )
sum_divf[i] = 1/pow(sum_divf[i], trans_inv); // if trans_inv==1, no effect
}
for(j=0; j<gap; j++) {
if(sum_divf[0])
for(i=0; i<gap; i++)
rmap[j*tx+i] *= sum_divf[0];
if(sum_divf[1])
for(i=gap; i<tx-gap; i++)
rmap[j*tx+i] *= sum_divf[1];
if(sum_divf[2])
for(i=tx-gap; i<tx; i++)
rmap[j*tx+i] *= sum_divf[2];
}
for(; j<ty-gap; j++) {
if(sum_divf[3])
for(i=0; i<gap; i++)
rmap[j*tx+i] *= sum_divf[3];
if(sum_divf[5])
for(i=tx-gap; i<tx; i++)
rmap[j*tx+i] *= sum_divf[5];
}
for(; j<ty; j++) {
if(sum_divf[6])
for(i=0; i<gap; i++)
rmap[j*tx+i] *= sum_divf[6];
if(sum_divf[7])
for(i=gap; i<tx-gap; i++)
rmap[j*tx+i] *= sum_divf[7];
if(sum_divf[8])
for(i=tx-gap; i<tx; i++)
rmap[j*tx+i] *= sum_divf[8];
}
}
/* Compute the (sparse) convolutions specified by <children> on <map> and put the result in <res>.
A standard order is assumed on the children:
a response map #p is built from the children[p] at positions
[(gap*dx,gap*dy) for dy in dys for dx in dxs]
where dxs = [-1,1] or [-1,0,1]
dys = [-1,1] or [-1,0,1]
child_assign denote assignement of the children level, while assign is for the next level
child_norms contain the norms of small patches and norms for big new cells
*/
int _sparse_conv( int_image* children, int_array* child_assign, int gap, float trans_inv,
float_layers* child_map, int_image* offsets, float_array* child_norms, float_array* norms,
int_array* assign, float_layers* res, int_image* res_offsets, int n_thread ) {
const int nconv = children->ty; // number of convolutions to perform
const int nc2 = children->tx;
const int nc = sqrt(nc2);
assert( nc*nc == nc2 );
assert( res->tz == nconv );
const int tx = child_map->tx;
const int ty = child_map->ty;
const long npix = tx*ty;
ASSERT_SAME_SIZE( child_map, res );
const int n_lower_conv = max_array_i(children->pixels,nconv*nc2)+1;
int* cass = child_assign ? child_assign->pixels : NEWA(int,n_lower_conv);
if(!child_assign) {for(int i=0; i<n_lower_conv; i++) cass[i]=i;}
assert( !offsets || (offsets->pixels && offsets->tx==2 && offsets->ty==n_lower_conv &&
res_offsets && res_offsets->tx==2 && res_offsets->ty==nconv) );
if(assign) {
assert(0); // not supposed to happen
} else {
// normal case: no redundancy to exploit in response maps
int l;
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(l=0; l<nconv; l++) {
float *rmap = res->pixels + l*npix;
int u,v,c,ncall=0; // children number
const int* const child = children->pixels + l*nc2;
float sum_divf[9];
memset(sum_divf,0,sizeof(sum_divf));
int i,j;
// first, choose an offset for the result rmap from the child offsets
int offx=0, offy=0;
if( offsets ) {
int sum_ox=0, sum_oy=0, w=0;
for(c=v=0; v<nc; v++) {
int dy = (2*v/(nc-1)-1);
for(u=0; u<nc; u++,c++) {
int dx = (2*u/(nc-1)-1);
if(child[c]<0 || cass[child[c]]<0) continue;
sum_ox += offsets->pixels[2*child[c]+0] - dx*gap;
sum_oy += offsets->pixels[2*child[c]+1] - dy*gap;
w++;
}
}
if(w==0) w++; // just in case
offx = (int)floor(0.5 + sum_ox/float(w));
offy = (int)floor(0.5 + sum_oy/float(w));
// store result for later
res_offsets->pixels[2*l+0] = offx;
res_offsets->pixels[2*l+1] = offy;
}
for(c=v=0; v<nc; v++) {
int dy = (2*v/(nc-1)-1);
for(u=0; u<nc; u++,c++) {
int dx = (2*u/(nc-1)-1);
if(child[c]<0 || cass[child[c]]<0) continue;
float divf = child_norms->pixels[child[c]]/norms->pixels[l];
// difference with rmap's offset
const int trans_x = dx*gap + (offsets? offx - offsets->pixels[2*child[c]+0] : 0);
const int trans_y = dy*gap + (offsets? offy - offsets->pixels[2*child[c]+1] : 0);
// count the sum of weights in every image area
for(i=-1; i<=1; i++)for(j=-1; j<=1; j++)
if(i*trans_x<=0 && j*trans_y<=0)
sum_divf[4+j*3+i] += divf;
// add a translated version of map[children[c]] by (ox-dx,oy-dy)
if(ncall++==0) // first call
fast_set_trans( rmap, child_map->pixels + cass[child[c]]*npix, divf, trans_x,trans_y, tx,ty, 0, 0 );
else
fast_add_trans( rmap, child_map->pixels + cass[child[c]]*npix, divf, trans_x,trans_y, tx,ty, 0, 0 );
}
}
if( ncall == 0) // default = zeros
memset(rmap, 0, npix*sizeof(float));
// now we are supposed to rectify the boundaries (to perfect convolution)
normalize_trans(tx, ty, gap, rmap, trans_inv, sum_divf );
//assert(min_array_f(rmap,npix)>=0 && max_array_f(rmap,npix)<=1.001);
}
}
if(!child_assign) free(cass);
#define CHECK_MAPS(rmaps) assert(min_array_f((rmaps)->pixels,LAYERS_SIZE(rmaps))>=0 && \
max_array_f((rmaps)->pixels,LAYERS_SIZE(rmaps))<=1.001)
//CHECK_MAPS(res);
return nconv;
}

@ -0,0 +1,144 @@
/*
Copyright (C) 2014 Jerome Revaud
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
*/
#ifndef ___CONV_H___
#define ___CONV_H___
#include "array_types.h"
#include "deep_matching.h"
/* Return the vectorized dimension of a HOG patch
*/
int get_patch_desc_dim( float_layers* hog, int patch_size );
/* Sample a set of patches from a HOG image.
pos : array of (x,y) position of the patches
size: size of the patches, ie. [x,x+size[ x [y,y+size[
res: result array, n_patches x desc_dim
desc_dim = n_layers * size**2
norms: result, n_patches x 1, norm of each patch
*/
void _sample_patches( float_layers* hog, float_layers* color, int_image* pos, int size, float norm,
float_image* res, float_array* norms, int n_thread );
/* normalize each pixel of a multi-layers image
norm = {0:nothing, 1:L2-normalization, 0-1: normalization by (L2-norm)**<norm> }
*/
void norm_layers( float_layers* res, float norm, int n_thread );
/* Prepare a grid of cell positions in the first image for a given scale. Big cells inherit the cell at the previous scale.
size = size of cells at current scale
offset, step = grid generator: (offset + i*step, offset + j*step)
child_grid = grid of the previous layer (or None if first layer)
child_norms = image containing the norms of the patch at the previous level
grid = result center positions of cells in current scale
children = index of cells in previous scale used to construct big cells
norms = norms of the cells of this level
*/
void _prepare_big_cells( int size, int offset, int step,
int_cube* child_grid, float_image* child_norms,
int_cube* grid, int_cube* children, float_image* norms );
/* Compute the correlation of all patches with the second image (hog).
In case of ngh_rad>0, the correlation is only computed in a small local neighborhood
(whose size is parameterized by ngh_rad).
if extend: width and height of output maps are extended
if norm: correlation are normalized afterwards.
*/
void fastconv( float_image* patches, float_layers* hog, int patch_size, int ngh_rad,
int extend, float norm, int nt, res_scale* res );
/* Compute the (sparse) convolutions specified by <children> on <map> and put the result in <res>.
A standard order is assumed on the children:
a response map #p is built from the children[p] at positions
[(gap*dx,gap*dy) for dy in dys for dx in dxs]
where dxs = [-1,1] or [-1,0,1]
dys = [-1,1] or [-1,0,1]
child_assign denote assignement of the children level, while assign is for the next level
child_norms contain the norms of small patches and norms for big new cells
*/
int _sparse_conv( int_image* children, int_array* child_assign, int gap, float trans_inv,
float_layers* child_map, int_image* offsets, float_array* child_norms, float_array* norms,
int_array* assign, float_layers* res, int_image* res_offsets, int n_thread );
/* Compute: arr **= p
*/
void fastipow( float_layers* arr, const float p, int n_thread );
/* Compute: arr = max(0,(arr-p)/(1-p))
*/
void fasthinge( float_layers* arr, const float p, int n_thread );
/* Compute: arr = exp(-arr)
*/
void fastnegexp( float_image* arr );
/* incorporate the color difference between patches into existing patch similarity
formula: new_response = ( color_sim*addw + old_response*(1-addw) ) * ( mulw*color_sim + 1-mulw )
if mulw=1, adddw=0, then: new_response = old_response * color_sim
if mulw=0, adddw=0.5,then: new_response = (old_response + color_sim )/2
*/
void incorporate_color( int_cube* grid, int_array* assign, float_layers* lab0, float_layers* var0,
float_layers* lab1, float_layers* var1,
float_layers* res_maps, float L_std, float ab_std, int sym_dist, int n_opening,
const float addw, const float mulw, int n_thread );
#endif

@ -0,0 +1,936 @@
/*
Copyright (C) 2014 Jerome Revaud
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
*/
#include "deep_matching.h"
#include "std.h"
#include "conv.h"
#include "maxfilter.h"
// return size of atomic patches
int get_atomic_patch_size( const dm_params_t* params )
{
int upsize = (1 << params->prior_img_downscale);
return 4*upsize;
}
// crop dimensions to a multiple of patch_size
void get_source_shape( const int width, const int height, const int patch_size, int* res ) {
// crop the reference image to a multiple of patch size
res[0] = patch_size * int(width / patch_size);
res[1] = patch_size * int(height / patch_size);
}
// extract pixel descriptor for both images
void extract_image_desc( image_t* img0, image_t* img1, const dm_params_t* params,
float_layers** desc0, float_layers** desc1 )
{
// slightly reduce img0 size to fit the patch tiling
int patch_size = get_atomic_patch_size( params );
int size[2]; // = {width, height}
get_source_shape( img0->width, img0->height, patch_size, size );
image_crop(img0, size[0], size[1]);
// extract gradient-based information
*desc0 = extract_desc( img0, &params->desc_params, params->n_thread );
*desc1 = extract_desc( img1, &params->desc_params, params->n_thread );
}
void avgpool2( float_layers* hog, const dm_params_t* params )
{
int niter = params->prior_img_downscale;
while(niter--) {
float_layers res = empty_layers(float,hog->tx/2,hog->ty/2,hog->tz);
_avgpool2(hog,&res,params->n_thread);
// replace hog by res
free(hog->pixels);
*hog = res;
}
}
/* compute the grid of parent cell position, and their connection to children cells
cells can be half-overlapping if <overlap>=1
<dense_step> forces the grid spacing if >0
*/
void prepare_big_cells( const int imshape[2], int cell_size, int overlap, int child_overlap,
int_cube* child_grid, float_image* child_norms, int dense_step,
int_cube* grid, int_cube* children, float_image* norms )
{
int offset, step, gtx, gty;
if( dense_step ) {
step = dense_step;
offset = 0;
// we do not care if the patches are overlapping outside the image
#define grid_size(imsize) (1+imsize/step)
gtx = grid_size(imshape[0]);
gty = grid_size(imshape[1]);
#undef grid_size
} else {
// we want patches fully included in the image
offset = cell_size/2;
step = cell_size/(overlap+1);
#define grid_size(imsize) (1+MAX(0,imsize-2*offset)/step)
gtx = grid_size(imshape[0]);
gty = grid_size(imshape[1]);
#undef grid_size
}
assert(!grid->pixels);
*grid = empty_cube(int,gtx,gty,2);
assert(0<=overlap && overlap<=1);
int nc = pow2(2+child_overlap); // number of children per cell
if(child_grid) {
assert(!norms->pixels);
*norms = image_like(float,grid);
assert(!children->pixels);
*children = empty_cube(int,gtx,gty,nc);
}
_prepare_big_cells( cell_size, offset, step, child_grid, child_norms, grid, children, norms );
}
void sample_patches( float_layers* hog, int_cube* pos, int patch_size, int f, float norm, int n_thread,
float_image* patches, float_array* norms )
{
assert(norm>0);
const int npos = pos->tx*pos->ty;
int_image new_pos = empty_image(int,2,npos);
for(int i=0; i<2*npos; i++)
new_pos.pixels[i] = (pos->pixels[i]-patch_size/2)/f;
patch_size /= f;
const int nh = get_patch_desc_dim(hog,patch_size);
assert(!patches->pixels);
*patches = empty_image(float,nh,npos);
assert(norms->tx==npos);
_sample_patches( hog, NULL, &new_pos, patch_size, norm, patches, norms, n_thread );
free(new_pos.pixels);
}
const float trans_inv = 0.9f;
void convolve_atomic_patches( float_layers* source, float_layers* target,
const dm_params_t* params, res_scale* first_level )
{
const int extend = 1; // slightly spatially extend response maps
const float norm = 1; // renorm patches
const int f = first_level->f; // scale factor w.r.t. original image
const int psize = first_level->patch_size; // current patch size
// first, sample patches
float_image patches = {0};
assert(!first_level->norms.pixels);
first_level->norms = image_like(float, &first_level->grid);
float_array norms_arr = {first_level->norms.pixels, (int)IMG_SIZE(&first_level->norms)};
sample_patches( source, &first_level->grid, psize, f, norm, params->n_thread, &patches, &norms_arr );
//hash_image(&patches)
// rectify the norm to a boolean (0 or 1) (useless ?)
first_level->assign = empty_array(int,norms_arr.tx);
int n=0, tx = patches.tx;
for(int i=0; i<norms_arr.tx; i++) {
norms_arr.pixels[i] = norms_arr.pixels[i]>0;
// eliminate zero-norm patches
if( norms_arr.pixels[i] ) {
if( n < i ) // copy
memcpy( patches.pixels + n*tx, patches.pixels + i*tx, tx*sizeof(float));
first_level->assign.pixels[i] = n++;
} else
first_level->assign.pixels[i] = -1;
// convolution is not fully invariant to the image border:
// blank cells outside the image are a bit disadvantageous
if( norms_arr.pixels[i] == 0 )
norms_arr.pixels[i] = 1-trans_inv;
}
patches.ty = n; // update new number of valid patches
//hash_image(&first_level->norms)
//hash_image(&patches)
// compute the first level convolutions
fastconv( &patches, target, psize/f, params->ngh_rad/f, extend, norm, params->n_thread, first_level );
free(patches.pixels);
}
int_image* maxpool3_and_subsample2( float_layers* hog, int true_shape[2], int_image* offsets, float_layers* res, int nt )
{
assert(!res->pixels);
if ( offsets->pixels == NULL )
assert( hog->tx == true_shape[0] && hog->ty == true_shape[1] );
// set downsampled size
true_shape[0] = (true_shape[0]+1)/2;
true_shape[1] = (true_shape[1]+1)/2;
assert( true_shape[0]>0 && true_shape[1]>0 );
if ( offsets->pixels == NULL ) {
// joint max-pooling and subsampling
*res = empty_layers(float, true_shape[0], true_shape[1], hog->tz);
_max_filter_3_and_subsample_layers( hog, res, nt );
return NULL;
} else {
// with offsets
float_layers maxpooled_hog = layers_like(float, hog);
_max_filter_3_layers( hog, &maxpooled_hog, nt );
//CHECK_MAPS(&maxpooled_hog);
// slightly bigger, so that mininum size always >= 2
int width = (hog->tx+2)/2;
int height = (hog->ty+2)/2;
*res = empty_layers(float, width, height, hog->tz);
_subsample2_offset( &maxpooled_hog, offsets, res, nt );
free(maxpooled_hog.pixels);
// compute new offsets
int_image* res_offsets = NEW(int_image);
*res_offsets = image_like(int, offsets);
for(long i=0; i<IMG_SIZE(offsets); i++)
res_offsets->pixels[i] = (int)floor( offsets->pixels[i]/2.f );
return res_offsets;
}
}
#define CHECK_MAPS(rmaps) assert(min_array_f((rmaps)->pixels,LAYERS_SIZE(rmaps))>=0 && \
max_array_f((rmaps)->pixels,LAYERS_SIZE(rmaps))<=1.001)
/* aggregate response maps of children patches to form response maps of parent patches */
int sparse_conv( int_cube* children, int_array* children_assign, float_image* child_norms,
int true_patch_size, float_layers* map, int_image* offsets, int nt,
res_scale* res )
{
float_layers ext_map;
if( MIN(map->tx,map->ty) < 5 ) {
ext_map = zeros_layers(float,MAX(5,map->tx),MAX(5,map->ty),map->tz);
for(int l=0; l<map->tz; l++)
for(int j=0; j<map->ty; j++)
for(int i=0; i<map->tx; i++)
ext_map.pixels[(l*ext_map.ty + j)*ext_map.tx + i] = map->pixels[(l*map->ty + j)*map->tx + i];
map = &ext_map;
res->true_shape[0] = ext_map.tx;
res->true_shape[1] = ext_map.ty;
}
int_image _children = reshape_z_xy(int, &res->children);
if( offsets )
res->offsets = empty_image(int, 2, _children.ty);
assert(!res->res_map.pixels);
res->res_map = empty_layers(float, map->tx, map->ty, _children.ty);
int gap = true_patch_size / 4;
assert(gap>0);
float_array _norms = reshape_xy(float, &res->norms);
float_array _child_norms = reshape_xy(float, child_norms);
// allocate useless assign
res->assign = empty_array(int, res->res_map.tz);
for(int i=0; i<res->assign.tx; i++) res->assign.pixels[i] = i;
int_array* _assign = NULL;
int_array* _ch_assign = children_assign->pixels ? children_assign : NULL;
int n = _sparse_conv( &_children, _ch_assign, gap, trans_inv, map, offsets,
&_child_norms, &_norms, _assign, &res->res_map, &res->offsets, nt );
//CHECK_MAPS(res);
if(map==&ext_map) free(ext_map.pixels);
return n;
}
res_scale new_pyramid_level(int f, int psize)
{
res_scale res = {0}; // initialize everything to 0/NULL
res.f = f; // subsampling factor with respect to original image size
res.patch_size = psize; // patch size in original image coordinates
return res;
}
// Compute the multi-scale pyramid response
void compute_matching_pyr( float_layers* source, float_layers* target, const dm_params_t* params,
matching_pyramid_t& res_maps )
{
const int src_shape[2] = {source->tx, source->ty};
int L = 0; // current pyramid level
const int atomic_psize = get_atomic_patch_size( params );
int psize = atomic_psize; // will grow by a factor 2 at each level
int f = psize/4; // initial scaling factor
// subsample if needed
avgpool2( source, params );
avgpool2( target, params );
//hash_layers(source)
//hash_layers(target)
res_maps.clear();
res_maps.push_back(new_pyramid_level(f,psize));
res_scale *child, *last = &res_maps[res_maps.size()-1];
// compute the initial patches in source image
if( params->verbose ) std_printf("layer %d, patch_size = %dx%d\n", L, psize, psize);
prepare_big_cells( src_shape, psize, params->overlap<L+1, 0, NULL, NULL, 0, &last->grid, NULL, NULL );
//hash_cube(&last->grid)
//hash_layers(source)
convolve_atomic_patches( source, target, params, last );
//hash_layers(&last->res_map)
if( params->verbose )
std_printf("remaining %ld big cells (actually, %d are unique)\n", IMG_SIZE(&last->grid), last->res_map.tz);
// non-linear correction
if( params->nlpow>0 )
fastipow( &last->res_map, params->nlpow, params->n_thread );
//hash_layers(&last->res_map)
const int dense_step = params->subsample_ref ? 0 : psize/(1+(params->overlap<1));
// aggregate patches for all subsequent levels
while( 2*psize <= MIN(params->max_psize, MAX(src_shape[0], src_shape[1])) ) {
L++;
f *= 2;
psize *= 2;
res_maps.push_back(new_pyramid_level(f,psize));
child = &res_maps[res_maps.size()-2]; // previous level
last = &res_maps[res_maps.size()-1]; // current level
if( params->verbose ) std_printf("layer %d, patch_size = %dx%d\n", L, psize, psize);
// max pooling + subsampling
//CHECK_MAPS(&child->res_map);
last->true_shape[0] = child->true_shape[0]; // will be modified in subsampled2()
last->true_shape[1] = child->true_shape[1];
float_layers subs_res_map = {0};
int_image* offsets = maxpool3_and_subsample2( &child->res_map, last->true_shape, &child->offsets,
&subs_res_map, params->n_thread );
//CHECK_MAPS(&subs_res_map);
// build the set of patches at this scale
prepare_big_cells( src_shape, psize, params->overlap<L+1, params->overlap<L,
&child->grid, &child->norms, dense_step, &last->grid, &last->children, &last->norms );
//DA(last->true_shape,2)
//hash_cube(&last->grid)
//hash_image(&last->norms)
//hash_cube(&last->children)
// aggregate children response maps to form parent response maps
sparse_conv( &last->children, &child->assign, &child->norms, psize/f, &subs_res_map, offsets,
params->n_thread, last );
free(subs_res_map.pixels);
free_image(offsets);
//CHECK_MAPS(&last->res_map);
if( params->verbose )
std_printf("remaining %ld big cells (actually, %d are unique)\n", IMG_SIZE(&last->grid), last->res_map.tz);
// non-linear correction
if( params->nlpow>0 )
fastipow(&last->res_map, params->nlpow, params->n_thread );
//hash_layers(&last->res_map)
}
}
void free_matching_pyramid( matching_pyramid_t& res_maps ) {
unsigned int i;
for(i=0; i<res_maps.size(); i++) {
res_scale& level = res_maps[i];
free(level.grid.pixels);
free(level.norms.pixels);
free(level.assign.pixels);
free(level.res_map.pixels);
free(level.max_map.pixels);
free(level.children.pixels);
free(level.passed.pixels);
}
}
#ifdef __APPLE__
static int arg_sort_maxima(void* arr, const void* a, const void* b) {
float diff = ((float*)arr)[5*(*(int*)a)+4] - ((float*)arr)[5*(*(int*)b)+4];
return (diff<0) - (diff>0); // descending order
}
#else
static int arg_sort_maxima(const void* a, const void* b, void* arr) {
float diff = ((float*)arr)[5*(*(int*)a)+4] - ((float*)arr)[5*(*(int*)b)+4];
return (diff<0) - (diff>0); // descending order
}
#endif
void reorder_rows( int_image* img, int_array* order )
{
assert(order->tx==img->ty);
const int tx = img->tx;
int_image res = image_like(int, img);
for(int i=0; i<order->tx; i++)
memcpy(res.pixels + i*tx, img->pixels+order->pixels[i]*tx, tx*sizeof(int));
free(img->pixels);
*img = res;
}
// return points corresponding to patch matches
int_image* find_optimal_matchings( matching_pyramid_t& mp, const dm_params_t* params )
{
const int nobordure = 0;
int_image* maxima = NEW(int_image);
int_array order = {0};
if( params->maxima_mode ) { // normal process: maxima detection
float th=0;
int check_parents=false, check_children=false;
float_array sc_maxima = empty_array(float,int(mp.size()));
for(unsigned int i=0; i<mp.size(); i++) sc_maxima.pixels[i]=1; // useless but well
_extract_maxima( mp.data(), mp.size(), &sc_maxima, th, params->min_level, params->nlpow,
check_parents, check_children, nobordure, maxima, params->n_thread );
free(sc_maxima.pixels);
order = empty_array(int,maxima->ty);
for(int i=0; i<maxima->ty; i++) order.pixels[i] = maxima->ty-1-i; // last first
} else { // we just analyse all cells at the top level
const float_layers* rmap = &mp[mp.size()-1].res_map;
const int tx = rmap->tx, txy=tx*rmap->ty;
*maxima = empty_image(int, 5, (int)LAYERS_SIZE(rmap));
for(int i=0; i<maxima->ty; i++) {
int* row = maxima->pixels + 5*i;
row[0] = mp.size()-1; // pyramid level
row[1] = i/txy; // layer number
row[2] = i%tx; // x position
row[3] = (i%txy)/tx; // y position
((float*)row)[4] = rmap->pixels[i];
}
//hash_image(maxima)
order = empty_array(int,maxima->ty);
for(int i=0; i<maxima->ty; i++) order.pixels[i] = i;
#ifdef __APPLE__
qsort_r(order.pixels, maxima->ty, sizeof(int), maxima->pixels, arg_sort_maxima);
#else
qsort_r(order.pixels, maxima->ty, sizeof(int), arg_sort_maxima, maxima->pixels);
#endif
}
if( params->verbose>0 )
std_printf("found %d local matches\n",maxima->ty);
// reorder maxima
reorder_rows( maxima, &order );
free(order.pixels);
return maxima;
}
static inline float ptdot( const float* m, float x, float y ) {
return x*m[0] + y*m[1] + m[2];
}
void apply_rot( float_cube* corres, float rot[6] ) {
assert( corres->tz == 6 );
const int nb = IMG_SIZE(corres);
float* p = corres->pixels;
for(int i=0; i<nb; i++) {
// only apply to coordinates of the first image
float x = p[0], y = p[1];
p[0] = ptdot(rot+0, x, y);
p[1] = ptdot(rot+3, x, y);
p += 6;
}
}
/* this function gather correspondences from each local maximum in the
response maps
*/
float_image* gather_correspondences( int src_shape[2], int target_shape[2],
matching_pyramid_t& scales, int_image* maxima,
const dm_params_t* params, full_corres_t* corres_out )
{
const int step = 4*scales[0].f; // bin size
const int n_scales = (int)scales.size();
const int tx = maxima->tx;
const int n_maxima = maxima->ty;
float_cube corres0 = zeros_cube(float, (src_shape[0]+step-1)/step, (src_shape[1]+step-1)/step,6);
float_cube corres1 = zeros_cube(float, (target_shape[0]+step-1)/step, (target_shape[1]+step-1)/step,6);
int i;
// allocate temporary optimization maps
for(i=0; i<n_scales; i++) {
long size = LAYERS_SIZE(&scales[i].res_map);
if( params->low_mem && size > 1000003 ) size = 1000003; // big prime
assert( size <= 2147483647 || !"try using -mem parameter");
scales[i].passed = zeros_array(float, (int)size);
}
#if defined(USE_OPENMP)
#pragma omp parallel for schedule(static,1) num_threads(params->n_thread)
#endif
for(i=0; i<n_maxima; i++) {
if(params->verbose && i%100==0) std_printf("\rgathering correspondences %d%%...",100*i/n_maxima);
int* m = maxima->pixels + tx*i;
int level = m[0], num_map = m[1];
int x = m[2], y = m[3];
assert(level<n_scales);
if( scales[level].offsets.pixels ) {
// add offset to form real image coordinates
x += scales[level].offsets.pixels[2*num_map+0];
y += scales[level].offsets.pixels[2*num_map+1];
}
if( params->scoring_mode ) // new mode
_argmax_correspondences( scales.data(), level, num_map, x, y, ((float*)m)[4],
&corres0, step, &corres1, step, i );
else // old iccv mode
_argmax_correspondences_v1( scales.data(), level, num_map, x, y, m[0]*((float*)m)[4],
&corres0, step, &corres1, step, i );
}
// free optimization maps
for(i=0; i<n_scales; i++) {
free( scales[i].passed.pixels );
scales[i].passed.pixels = NULL;
}
if(params->verbose) std_printf("\n");
if( params->rot45 ) { // rectify correspondences
assert( corres_out );
apply_rot( &corres0, corres_out->rot );
apply_rot( &corres1, corres_out->rot );
}
// keep only reciprocal matches
int nres;
float* corres = _intersect_corres( &corres0, &corres1, &nres );
float_image* res = NEW(float_image);
*res = (float_image){corres, 6, nres};
if( corres_out == NULL ) {
free(corres0.pixels);
free(corres1.pixels);
}
else { // save unfiltered correspondences
corres_out->corres0 = corres0;
corres_out->corres1 = corres1;
}
return res;
}
void eye_rot3x3( float rot[6] ) {
memset( rot, 0, 6*sizeof(float));
rot[0] = rot[4] = 1;
}
inline float bilinear_interp(const float* img, const int tx, const int ty, float x, float y ) {
if( x < 0 || x+1.001 >= tx ) return 0; // outside
if( y < 0 || y+1.001 >= ty ) return 0; // outside
int ix = int(x);
int iy = int(y);
img += ix + iy*tx; // move pointer
float rx = x - ix;
float ry = y - iy;
return (1-ry)*((1-rx)*img[0] + rx*img[1]) +
ry *((1-rx)*img[tx]+ rx*img[tx+1]);
}
void scale_rot3x3( float rot[6], float sc ) {
for(int i=0; i<6; i++)
rot[i] *= sc;
}
void inv_rot3x3( float rot[6], float res[6] ) {
assert( fabs((rot[0]*rot[4] - rot[1]*rot[3]) - 1) < 1e-6 );
// because rot is unitary, invert == transpose
res[0] = rot[0];
res[1] = rot[3];
res[3] = rot[1];
res[4] = rot[4];
res[2] = -rot[2]*rot[0] - rot[5]*rot[3];
res[5] = -rot[2]*rot[1] - rot[5]*rot[4];
}
// rotate a descriptor HOG image by a given angle
float_layers* rotate45( float_layers* hog, const dm_params_t* params, full_corres_t* corres_out ) {
assert( corres_out ); // we need it to write rot !
const int patch_size = get_atomic_patch_size( params );
const int n_rot45 = params->rot45;
if( (n_rot45 % 8) == 0 ) { // nothing to do
eye_rot3x3( corres_out->rot );
return hog;
}
const int tx = hog->tx;
const int ty = hog->ty;
// rotation matrix
float angle = n_rot45 * M_PI / 4;
float c = cos(angle), s = sin(angle);
float rot[6] = {c, -s, 0, s, c, 0};
// pt_in_original_image = rot * pt_in_rotated_image
// determine center of rotation before
float cx_before = tx/2.0;
float cy_before = ty/2.0;
// determine center of rotation after
float corners[2][4] = {{0, (float)tx, (float)tx, 0}, {0, 0, (float)ty, (float)ty}};
for(int i=0; i<4; i++) { // rotate corners
float x = corners[0][i], y = corners[1][i];
corners[0][i] = ptdot(rot+0, x, y);
corners[1][i] = ptdot(rot+3, x, y);
}
int rot_size[2] = {int(0.5 + max_array_f(corners[0], 4) - min_array_f(corners[0], 4)),
int(0.5 + max_array_f(corners[1], 4) - min_array_f(corners[1], 4)) };
get_source_shape( rot_size[0], rot_size[1], patch_size, rot_size );
float cx_after = rot_size[0]/2.0;
float cy_after = rot_size[1]/2.0;
// compute the translation
rot[2] = cx_before - ptdot(rot+0, cx_after, cy_after);
rot[5] = cy_before - ptdot(rot+3, cx_after, cy_after);
// create result
assert( hog->tz == 9 );
float_layers* rot_hog = NEW(float_layers);
*rot_hog = empty_layers(float, rot_size[0], rot_size[1], 9);
for(int c=0; c<hog->tz; c++) {
const int src_c = (c<8) ? int((c+n_rot45+256)%8) : c; // roll channels except for last one (see hog.h)
const float* f = hog->pixels + src_c * IMG_SIZE(hog);
float* p = rot_hog->pixels + c * IMG_SIZE(rot_hog);
for(int y=0; y<rot_size[1]; y++)
for(int x=0; x<rot_size[0]; x++) {
float rx = ptdot( rot+0, x, y);
float ry = ptdot( rot+3, x, y);
*p++ = bilinear_interp(f, tx, ty, rx, ry );
}
}
// output inverted rot
memcpy( corres_out->rot, rot, 6*sizeof(float) );
return rot_hog;
}
// set default parameters
void set_default_dm_params( dm_params_t* params )
{
// pixel descriptor params
set_default_desc_params( &params->desc_params );
// general parameters
params->prior_img_downscale = 1; // resolution R = 1/2^downscale, default = 1/2
params->rot45 = 0; // don't rotate the first image
params->overlap = 999; // don't use overlapping patches
params->subsample_ref = false; // don't subsample patches in reference image (=first image)
params->nlpow = 1.4;
params->ngh_rad = 0; // no limit by default
params->maxima_mode = 0; // don't use maxima, just start from all top patches
params->min_level = 2; // useless
params->max_psize = 999; // maximum patch size
params->low_mem = true; // optimize mem but then results are slightly unstable/non-reproducible
params->verbose = 0;
params->scoring_mode = 1; // improved scoring scheme
params->n_thread = 1; // no multithreading by default
}
// main function
float_image* deep_matching( image_t* img0, image_t* img1, const dm_params_t* params, full_corres_t* corres_out )
{
// verify parameters
assert(between(0,params->prior_img_downscale,3));
assert(between(0,params->overlap,999));
assert(between(0,params->subsample_ref,1));
assert(between(0.1,params->nlpow,10));
assert(between(0,params->ngh_rad,1<<16));
assert(between(0,params->maxima_mode,1));
assert(between(0,params->min_level,4));
assert(between(0,params->low_mem,1));
assert(between(0,params->scoring_mode,1));
assert(between(0,params->verbose,10));
assert(between(1,params->n_thread,128));
// extract pixel descriptors
float_layers *source, *target;
extract_image_desc( img0, img1, params, &source, &target );
if( corres_out ) // the first image is rotated
source = rotate45( source, params, corres_out );
int src_shape[2] = {source->tx, source->ty};
assert( LAYERS_SIZE(source) > 0 );
int target_shape[2] = {target->tx, target->ty};
assert( LAYERS_SIZE(target) > 0 );
//hash_layers(source)
//hash_layers(target)
// compute local matchings
matching_pyramid_t matching_pyr;
compute_matching_pyr( source, target, params, matching_pyr );
free_layers(source);
free_layers(target);
//hash_layers(&matching_pyr[matching_pyr.size()-1].res_map);
// find optmial matchings (maxima)
int_image* maxima = find_optimal_matchings(matching_pyr, params);
//hash_image(maxima);
// select the best displacements (maxpool merge)
float_image* corres = gather_correspondences( src_shape, target_shape, matching_pyr, maxima, params, corres_out );
//hash_image(corres);
// free everything
free_matching_pyramid(matching_pyr);
free_layers(maxima);
return corres;
}
void swap_first_second_img( float_cube* corres ) {
assert( corres->tz == 6 );
const int nb = IMG_SIZE(corres);
float* p = corres->pixels;
for(int i = 0; i < nb; i++) {
float a = p[0];
float b = p[1];
float c = p[2];
float d = p[3];
*p++ = c;
*p++ = d;
*p++ = a;
*p++ = b;
p += 2;
}
}
void rescale_corres( float_cube* corres, float f0, float f1, int code ) {
assert( corres->tz == 6 );
const int nb = IMG_SIZE(corres);
float* p = corres->pixels;
for(int i = 0; i < nb; i++) {
p[0] *= f0;
p[1] *= f0;
p[2] *= f1;
p[3] *= f1;
p[5] = code;
p += 6;
}
}
// set default parameters
void set_default_scalerot_params( scalerot_params_t* params ) {
params->fast = true;
params->min_sc0 = 0; // scale = 2^(-0/2) = 1
params->max_sc0 = 5; // scale = 2^(-5/2) = 0.176
params->min_sc1 = 0;
params->max_sc1 = 5;
params->min_rot = 0; // rot = 0*45 = 0
params->max_rot = 8; // rot = 8*45 = 360
}
// main function for scale/rotation invariant version
float_image* deep_matching_scale_rot( image_t* img0, image_t* img1, dm_params_t* params,
const scalerot_params_t* sr_params ) {
// verify parameters
assert(sr_params->min_sc0 < sr_params->max_sc0);
assert(sr_params->min_sc1 < sr_params->max_sc1);
assert(between(0, sr_params->min_sc0, 5));
assert(between(0, sr_params->max_sc0, 5));
assert(between(0, sr_params->min_sc1, 5));
assert(between(0, sr_params->max_sc1, 5));
assert(sr_params->min_rot >= 0);
assert(between(1,sr_params->max_rot - sr_params->min_rot, 8));
// init shape
const int psize = get_atomic_patch_size(params);
int imshape0[2];
get_source_shape( img0->width, img0->height, psize, imshape0 );
int imshape1[2] = {img1->width, img1->height};
// check dm params to ensure everything goes fine from now on
#define mean_dim(shape) ((shape[0] + shape[1])/2)
params->max_psize = MIN(mean_dim(imshape0), mean_dim(imshape1));
const int verbose = params->verbose;
params->verbose = MAX(0, verbose - 1); // decrease for inner deepmatchings
// prepare output
const int step0 = psize/2;
const int step1 = psize/2;
float_cube all_corres0 = zeros_cube(float, (imshape0[0]+step0/2-1)/step0, (imshape0[1]+step0/2-1)/step0, 6);
float_cube all_corres1 = zeros_cube(float, (imshape1[0]+step1/2-1)/step1, (imshape1[1]+step1/2-1)/step1, 6);
full_corres_t out;
const int NS = 5;
image_t *scaled_images1[NS] = {NULL};
// loop over all scale*rot combinations
for(int sc0 = sr_params->min_sc0;
sc0 < sr_params->max_sc0;
sc0++) {
const float scale0 = pow(2, -0.5*sc0 ); // scale factor for img0
assert( scale0<=1 && sc0<5 );
image_t* scaled_img0 = ( scale0 >= 1 ) ? img0 :
image_resize_bilinear_scale( img0, scale0 );
for(int sc1 = sr_params->min_sc1;
sc1 < sr_params->max_sc1;
sc1++) {
const float scale1 = pow(2, -0.5*sc1 ); // scale factor for img1
assert( scale1<=1 && sc1<5 );
// optimization, deactivate only if eg. both images are blurry
if( sr_params->fast && !(scale0==1 || scale1==1)) continue;
image_t* scaled_img1 = scaled_images1[sc1 - sr_params->min_sc1];
if( scaled_img1 == NULL ) {
scaled_img1 = ( scale1 >= 1 ) ? img1 :
image_resize_bilinear_scale( img1, scale1 );
// remember result
scaled_images1[sc1 - sr_params->min_sc1] = scaled_img1;
}
for(int rotation = sr_params->min_rot;
rotation < sr_params->max_rot;
rotation++) {
assert( rotation >= 0 );
const int rot_scale_code = 8*(sc1*5+sc0) + (rotation%8); // cannot be negative, because of bin count
if( verbose )
std_printf( "processing scale = (x%g, x%g) + rotation = %d deg (code %d)...\n",
scale0, scale1, 45*rotation, rot_scale_code);
float rot0[6], rot1[6];
// compute correspondences with rotated+scaled image
#define max_dim(img) MAX(img->width, img->height)
if( max_dim(scaled_img0) >= max_dim(scaled_img1) ) { // first image is always the largest
params->rot45 = rotation;
float_image* corres = deep_matching(scaled_img0, scaled_img1, params, &out );
free_image( corres ); // we don't care
inv_rot3x3(out.rot, rot0);
eye_rot3x3(rot1);
} else { // scaled_img1 is larger
params->rot45 = -rotation;
float_image* corres = deep_matching(scaled_img1, scaled_img0, params, &out );
free_image( corres ); // we don't care
// swap first and second image coordinates
memswap( &out.corres0, &out.corres1, sizeof(float_cube) );
swap_first_second_img( &out.corres0 );
swap_first_second_img( &out.corres1 );
inv_rot3x3(out.rot, rot1);
eye_rot3x3(rot0);
}
// change scale of correspondences
rescale_corres( &out.corres0, 1/scale0, 1/scale1, rot_scale_code );
rescale_corres( &out.corres1, 1/scale0, 1/scale1, rot_scale_code );
scale_rot3x3(rot0, scale0);
scale_rot3x3(rot1, scale1);
// merge correspondences in the reference frame
merge_corres( rot0, rot1,
psize, psize, &out.corres0, &out.corres1, 2,
step0, step1, &all_corres0, &all_corres1 ); // finer grid for merge
free(out.corres0.pixels);
free(out.corres1.pixels);
}
}
// free memory
if( img0 != scaled_img0 )
image_delete( scaled_img0 );
}
// final intersection
int nres;
float* corres = _intersect_corres( &all_corres0, &all_corres1, &nres );
float_image* res = NEW(float_image);
*res = (float_image){corres, 6, nres};
// free memory
for(int i=0; i<NS; i++)
if( scaled_images1[i] != img1 )
image_delete( scaled_images1[i] );
free(all_corres0.pixels);
free(all_corres1.pixels);
return res;
}

@ -0,0 +1,142 @@
/*
Copyright (C) 2014 Jerome Revaud
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
*/
#ifndef ___DEEP_MATCHING_H___
#define ___DEEP_MATCHING_H___
#include "array_types.h"
#include "pixel_desc.h"
#include "image.h"
#include <vector>
using namespace std;
// deep matching parameters
typedef struct {
desc_params_t desc_params;
int prior_img_downscale;// downscale the image by 2^(this) prior to matching
int rot45; // rotate second img by (45*rot45) prior to matching
int overlap; // pyramid level at which patches starts to overlap (999 => no overlap at all)
bool subsample_ref; // true if larger patches higher in the pyramid are not densely sampled
float nlpow; // non-linear power rectification
int ngh_rad; // neighborhood size in pixels => crop res_map (0 == infinite)
int maxima_mode; // 1: standard / 0: from all top-level patches
int min_level; // minimum pyramid level to retrieve maxima
int max_psize; // maximum patch size
int low_mem; // use less memory to retrieve the maxima (but approximate result)
int scoring_mode; // 0: like ICCV paper / 1: improved scoring mode
int verbose; // verbosity
int n_thread; // parallelization on several cores, when possible
} dm_params_t;
// set default parameters
void set_default_dm_params( dm_params_t* params );
// scale & rotation invariant version
typedef struct {
bool fast; // avoid comparing small scaled versions of both images
int min_sc0, max_sc0; // scale range of image0 (expressed as scale=2^(-n/2))
int min_sc1, max_sc1; // scale range of image1 (expressed as scale=2^(-n/2))
int min_rot, max_rot; // rotation range (expressed as multiples of 45 degrees)
} scalerot_params_t;
// set default parameters
void set_default_scalerot_params( scalerot_params_t* params );
// response maps at a given scale
typedef struct {
int f; // subsampling factor with respect to original image size
int patch_size; // patch size in original image coordinates in first image
int_cube grid; // position (center) of each patch in first image
float_image norms; // norm of each patch in first image
int_array assign; // mapping between patches and their response maps
float_layers res_map; // response map of the patches on the second image
float_layers max_map; // max-filtered response map
int true_shape[2]; // true res_map shape (width, height) in case of crop (if ngh_rad>0)
int_image offsets; // res_map offsets in case of crop (if ngh_rad>0)
int_cube children; // index of children patches in the previous level
float_array passed; // remember the best score so far at each response when doing argmax
} res_scale;
typedef vector<res_scale> matching_pyramid_t;
// output correspondences
typedef struct {
float x0, y0; // position in first image (reference image)
float x1, y1; // position in second image (target image)
float maxima; // from which maxima it was generated (index)
float score; // matching score
} corres_t;
// for scale rot invariant matching
typedef struct {
float rot[6];
float_cube corres0;
float_cube corres1;
} full_corres_t;
// main function. Returns a float_image where each row is <corres_t>
float_image* deep_matching( image_t* img0, image_t* img1, const dm_params_t* params,
full_corres_t* corres_out ); // NULL if you don't use it
// main function for scale & invariant matching. output is same as above.
float_image* deep_matching_scale_rot( image_t* img0, image_t* img1, dm_params_t* params,
const scalerot_params_t* sr_params );
#endif

@ -0,0 +1,186 @@
/*
Copyright (C) 2014 Jerome Revaud
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
*/
%module(docstring="Module to compute DeepMatching") deepmatching
%{
#define SWIG_FILE_WITH_INIT
#include <numpy/arrayobject.h>
#define CHECK_NUMPY_ARRAY(a, expected_npy) \
if(!a) { \
fprintf(stderr,"error in %s(): NULL input\n",__PRETTY_FUNCTION__); \
return NULL; \
} \
if(!PyArray_Check(a)) { \
fprintf(stderr,"error in %s(): input not numpy array\n",__PRETTY_FUNCTION__); \
return NULL; \
} \
if(!PyArray_ISCONTIGUOUS(a)) { \
fprintf(stderr,"error in %s(): array is not C-contiguous\n",__PRETTY_FUNCTION__); \
return NULL; \
} \
if(PyArray_TYPE(a)!=expected_npy) { \
fprintf(stderr,"error in %s(): input has bad type (type id %d != " #expected_npy " %d)\n",__PRETTY_FUNCTION__, \
PyArray_TYPE(a),expected_npy); \
return NULL; \
}
%}
%init %{
import_array();
%}
%{
#include "image.h"
#include "array_types.h"
%}
%typemap(in)
(color_image_t* cimg)
(color_image_t cimage) {
PyObject* a = $input;
if(a==Py_None) {
$1 = NULL;
} else {
CHECK_NUMPY_ARRAY(a, NPY_FLOAT)
cimage.c1 = (float*) PyArray_DATA(a);
a = PyObject_GetAttrString($input,"shape");
assert(PyTuple_Size(a)==3);
assert( PyInt_AsLong(PyTuple_GetItem(a,0)) == 3);
cimage.height = PyInt_AsLong(PyTuple_GetItem(a,1));
cimage.width = PyInt_AsLong(PyTuple_GetItem(a,2));
cimage.c2 = cimage.c1 + cimage.width*cimage.height;
cimage.c3 = cimage.c2 + cimage.width*cimage.height;
$1=&cimage;
}
}
%apply (color_image_t* cimg) {(color_image_t* )};
%typemap(out) float_image* corres {
PyObject *o;
npy_intp n_elem[2] = {$1->ty, $1->tx};
o = PyArray_SimpleNewFromData(2,n_elem,NPY_FLOAT,$1->pixels);
PyArray_FLAGS(o) |= NPY_OWNDATA;
// append to current function result as a tuple
$result = o;
}
%apply (float_image* corres) {(float_image* )};
float_image* deepmatching_numpy( color_image_t* cim1, color_image_t* cim2, char *options);
void usage_python();
%{
#include "deep_matching.h"
#include "io.h"
#include "main.h"
#include <string.h>
static inline bool ispowerof2( long n ) {
return (n & (n-1))==0;
}
float_image* deepmatching_numpy( color_image_t* cim1, color_image_t* cim2, char *options){
// convert images to gray
image_t *im1=image_gray_from_color(cim1), *im2=image_gray_from_color(cim2);
// set params to default
dm_params_t params;
set_default_dm_params(&params);
scalerot_params_t sr_params;
set_default_scalerot_params(&sr_params);
bool use_scalerot = false;
float fx=1, fy=1;
// read options
if( options!=NULL ){
int argc=0;
const char* argv[256];
argv[argc] = strtok(options," ");
while(argv[argc]!=NULL)
argv[++argc] = strtok(NULL," ");
parse_options(&params, &sr_params, &use_scalerot, &fx, &fy, argc, argv, PYTHON_OPTIONS, &im1, &im2);
}
if( use_scalerot )
assert( params.ngh_rad == 0 || !"max trans cannot be used in full scale and rotation mode");
else
if( params.subsample_ref && (!ispowerof2(im1->width) || !ispowerof2(im1->height)) ) {
fprintf(stderr, "WARNING: first image has dimension which are not power-of-2\n");
fprintf(stderr, "For improved results, you should consider resizing the images with '-resize <w> <h>'\n");
}
// compute deep matching
float_image* corres = use_scalerot ?
deep_matching_scale_rot( im1, im2, &params, &sr_params ) :
deep_matching ( im1, im2, &params, NULL ); // standard call
image_delete(im1); image_delete(im2);
return corres;
}
void usage_python() {
usage(PYTHON_OPTIONS);
}
%}
%pythoncode %{
from numpy import float32, rollaxis, ascontiguousarray
def deepmatching( im1=None, im2=None, options=""):
"""
matches = deepmatching.deepmatching(image1, image2, options='')
Compute the 'DeepMatching' between two images.
Images must be HxWx3 numpy arrays (converted to float32).
Options is an optional string argument ('' by default), to set the options.
The function returns a numpy array with 6 columns, each row being x1 y1 x2 y2 score index.
(index refers to the local maximum from which the match was retrieved)
Version 1.2"""
if None in (im1,im2):
usage_python()
return
# convert images
if im1.dtype != float32:
im1 = im1.astype(float32)
if im2.dtype != float32:
im2 = im2.astype(float32)
assert len(im1.shape)==3 and len(im2.shape)==3, "images must have 3 dimensions"
h, w, nchannels = im1.shape
assert nchannels==3, "images must have 3 channels"
im1 = ascontiguousarray(rollaxis(im1,2))
im2 = ascontiguousarray(rollaxis(im2,2))
corres = deepmatching_numpy( im1, im2, options)
return corres
%}

@ -0,0 +1,28 @@
% mex wrapper to compute the 'DeepMatching' between two images.
%
% matches = deepmatching(image1, image2, options)
%
% Images must be HxWx3 single matrices.
% Options is an optional string argument ('' by default).
% Availalble options are listed when calling deepmatching() without args.
%
% The function returns a matrix with 6 columns, each row being x1 y1 x2 y2 score index.
% (index refers to the local maximum from which the match was retrieved)
%
% Version 1.2.2
%
% Copyright (C) 2014 Jerome Revaud
%
% This program is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% This program is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with this program. If not, see <http://www.gnu.org/licenses/>
%

@ -0,0 +1,135 @@
# This file was automatically generated by SWIG (http://www.swig.org).
# Version 3.0.7
#
# Do not make changes to this file unless you know what you are doing--modify
# the SWIG interface file instead.
"""
Module to compute DeepMatching
"""
from sys import version_info
if version_info >= (2, 6, 0):
def swig_import_helper():
from os.path import dirname
import imp
fp = None
try:
fp, pathname, description = imp.find_module('_deepmatching', [dirname(__file__)])
except ImportError:
import _deepmatching
return _deepmatching
if fp is not None:
try:
_mod = imp.load_module('_deepmatching', fp, pathname, description)
finally:
fp.close()
return _mod
_deepmatching = swig_import_helper()
del swig_import_helper
else:
import _deepmatching
del version_info
try:
_swig_property = property
except NameError:
pass # Python < 2.2 doesn't have 'property'.
def _swig_setattr_nondynamic(self, class_type, name, value, static=1):
if (name == "thisown"):
return self.this.own(value)
if (name == "this"):
if type(value).__name__ == 'SwigPyObject':
self.__dict__[name] = value
return
method = class_type.__swig_setmethods__.get(name, None)
if method:
return method(self, value)
if (not static):
if _newclass:
object.__setattr__(self, name, value)
else:
self.__dict__[name] = value
else:
raise AttributeError("You cannot add attributes to %s" % self)
def _swig_setattr(self, class_type, name, value):
return _swig_setattr_nondynamic(self, class_type, name, value, 0)
def _swig_getattr_nondynamic(self, class_type, name, static=1):
if (name == "thisown"):
return self.this.own()
method = class_type.__swig_getmethods__.get(name, None)
if method:
return method(self)
if (not static):
return object.__getattr__(self, name)
else:
raise AttributeError(name)
def _swig_getattr(self, class_type, name):
return _swig_getattr_nondynamic(self, class_type, name, 0)
def _swig_repr(self):
try:
strthis = "proxy of " + self.this.__repr__()
except:
strthis = ""
return "<%s.%s; %s >" % (self.__class__.__module__, self.__class__.__name__, strthis,)
try:
_object = object
_newclass = 1
except AttributeError:
class _object:
pass
_newclass = 0
def deepmatching_numpy(cim1, cim2, options):
return _deepmatching.deepmatching_numpy(cim1, cim2, options)
deepmatching_numpy = _deepmatching.deepmatching_numpy
def usage_python():
return _deepmatching.usage_python()
usage_python = _deepmatching.usage_python
from numpy import float32, rollaxis, ascontiguousarray
def deepmatching( im1=None, im2=None, options=""):
"""
matches = deepmatching.deepmatching(image1, image2, options='')
Compute the 'DeepMatching' between two images.
Images must be HxWx3 numpy arrays (converted to float32).
Options is an optional string argument ('' by default), to set the options.
The function returns a numpy array with 6 columns, each row being x1 y1 x2 y2 score index.
(index refers to the local maximum from which the match was retrieved)
Version 1.2"""
if None in (im1,im2):
usage_python()
return
# convert images
if im1.dtype != float32:
im1 = im1.astype(float32)
if im2.dtype != float32:
im2 = im2.astype(float32)
assert len(im1.shape)==3 and len(im2.shape)==3, "images must have 3 dimensions"
h, w, nchannels = im1.shape
assert nchannels==3, "images must have 3 channels"
im1 = ascontiguousarray(rollaxis(im1,2))
im2 = ascontiguousarray(rollaxis(im2,2))
corres = deepmatching_numpy( im1, im2, options)
return corres
# This file is compatible with both classic and new-style classes.

@ -0,0 +1,165 @@
/*
Copyright (C) 2014 Jerome Revaud
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
*/
#include <mex.h>
#include <assert.h>
#include <math.h>
#include <string.h>
#include <stdint.h>
#include <stdio.h>
#include <stdarg.h>
void std_printf(const char* format, ... ) {
va_list arglist;
va_start( arglist, format );
char buffer[1024];
vsprintf( buffer, format, arglist );
va_end(arglist);
mexPrintf(buffer);
}
void err_printf(const char* format, ... ) {
va_list arglist;
va_start( arglist, format );
char buffer[1024];
vsprintf( buffer, format, arglist );
va_end(arglist);
mexErrMsgTxt(buffer);
}
#include "image.h"
#include "deep_matching.h"
#include "io.h"
#include "main.h"
static inline bool ispowerof2( long n ) {
return (n & (n-1))==0;
}
color_image_t *input3darray_to_color_image(const mxArray *p){
const int *dims = mxGetDimensions(p);
const int h = dims[0], w = dims[1];
assert( dims[2]==3 );
float *in = (float*) mxGetData(p);
color_image_t *out = color_image_new(w, h);
for(int c=0 ; c<3 ; c++){
float *inptr = in + c*w*h;
float *outptr = out->c1 + c*w*h;
for( int j=0 ; j<h ; j++){
for( int i=0 ; i<w ; i++){
outptr[j*w+i] = inptr[i*h+j];
}
}
}
return out;
}
void corres_to_output(float_image *corres, mxArray *p){
const int h = corres->ty, w = corres->tx;
float *data = (float*) mxGetData(p);
for( int j=0 ; j<h ; j++) {
for( int i=0 ; i<w ; i++) {
data[i*h+j] = corres->pixels[j*w+i];
}
}
}
void mexFunction( int nl, mxArray *pl[], int nr, const mxArray *pr[] ) {
if( nr==0 ) {
usage(MATLAB_OPTIONS);
return;
}
if ( nl != 1){
usage(MATLAB_OPTIONS);
mexErrMsgTxt("error: returns one output");
}
if( nr < 2 || nr > 3){
usage(MATLAB_OPTIONS);
mexErrMsgTxt("error: takes two to four inputs");
}
// The code is originally written for C-order arrays.
// We thus transpose all arrays in this mex-function which is not efficient...
const int *pDims;
if(mxGetNumberOfDimensions(pr[0]) != 3) mexErrMsgTxt("input images must have 3 dimensions");
if(!mxIsClass(pr[0], "single")) mexErrMsgTxt("input images must be single");
pDims = mxGetDimensions(pr[0]);
if( pDims[2]!=3 ) mexErrMsgTxt("input images must have 3 channels");
const int h = pDims[0], w = pDims[1];
color_image_t *cim1 = input3darray_to_color_image( pr[0] );
if(mxGetNumberOfDimensions(pr[1]) != 3) mexErrMsgTxt("input images must have 3 dimensions");
if(!mxIsClass(pr[1], "single")) mexErrMsgTxt("input images must be single");
pDims = mxGetDimensions(pr[1]);
if( pDims[2]!=3) mexErrMsgTxt("input images must have 3 channels");
color_image_t *cim2 = input3darray_to_color_image( pr[1] );
// convert images to gray
image_t *im1=image_gray_from_color(cim1), *im2=image_gray_from_color(cim2);;
color_image_delete(cim1);
color_image_delete(cim2);
// set params to default
dm_params_t params;
set_default_dm_params(&params);
scalerot_params_t sr_params;
set_default_scalerot_params(&sr_params);
bool use_scalerot = false;
float fx=1, fy=1;
// read options
if( nr == 3 ){
char *options = mxArrayToString(pr[2]);
if( !options ) mexErrMsgTxt("Third parameter must be a string");
int argc=0;
const char* argv[256];
argv[argc] = strtok(options," ");
while(argv[argc]!=NULL)
argv[++argc] = strtok(NULL," ");
parse_options(&params, &sr_params, &use_scalerot, &fx, &fy, argc, argv, MATLAB_OPTIONS, &im1, &im2);
}
if( use_scalerot )
assert( params.ngh_rad == 0 || !"max trans cannot be used in full scale and rotation mode");
else
if( params.subsample_ref && (!ispowerof2(im1->width) || !ispowerof2(im1->height)) ) {
std_printf("WARNING: first image has dimension which are not power-of-2\n");
std_printf("For improved results, you should consider resizing the images with '-resize <w> <h>'\n");
}
// compute deep matching
float_image* corres = use_scalerot ?
deep_matching_scale_rot( im1, im2, &params, &sr_params ) :
deep_matching ( im1, im2, &params, NULL ); // standard call
// output
pl[0] = mxCreateNumericMatrix(corres->ty, corres->tx, mxSINGLE_CLASS, mxREAL);
corres_to_output(corres, pl[0]);
image_delete(im1);
image_delete(im2);
free_image(corres);
return;
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 78 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

@ -0,0 +1,803 @@
/*
Copyright (C) 2014 Jerome Revaud
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
*/
#include "hog.h"
#include "std.h"
/* compute horizontal gradient centered with [-1,0,1] mask
*/
void _diff_horiz(int tx, int ty, UBYTE* pixels, float* res) {
int x,y,pos=0;
float* r=res;
for(y=0; y<ty; y++,pos+=tx) {
*r++ = pixels[1+pos] - pixels[0+pos];
for(x=1; x<tx-1; x++)
*r++ = pixels[x+1+pos] - pixels[x-1+pos];
*r++ = pixels[x+pos] - pixels[x-1+pos];
}
}
/* compute vertical gradient centered with [-1,0,1] mask
*/
void _diff_vert(int tx, int ty, UBYTE* pixels, float* res) {
int x,y,pos=0;
for(x=0; x<tx; x++,pos++)
res[pos] = pixels[pos+tx] - pixels[pos];
for(y=1; y<ty-1; y++) {
pos = y*tx;
for(x=0; x<tx; x++,pos++)
res[pos] = pixels[pos+tx] - pixels[pos-tx];
}
for(x=0; x<tx; x++,pos++)
res[pos] = pixels[pos] - pixels[pos-tx];
}
/* compute original, unsmoothed, gradient
*/
void _compute_pure_gradient( UBYTE_image* img, float_layers* grad ) {
ASSERT_SAME_SIZE(img,grad);
assert(grad->tz==2);
int tx = img->tx;
int ty = img->ty;
// compute horizontal gradient
_diff_vert(tx,ty,img->pixels,grad->pixels);
// compute vertical gradient
_diff_horiz(tx,ty,img->pixels,grad->pixels+tx*ty);
}
/* compute horizontal smoothing with 3-sized mask
*/
template<typename TData>
void _smooth_3_horiz(int tx, int ty, const int w_center, const int w_side, TData* pixels, TData* _res, int n_thread) {
int y;
const int sum_w = 2*w_side + w_center;
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(y=0; y<ty; y++) {
int x,pos = y*tx;
TData* res = _res + pos;
*res++ = ( (w_center+w_side)*pixels[0+pos] + w_side*pixels[1+pos])/sum_w;
for(x=1; x<tx-1; x++)
*res++ = (w_side*pixels[x+1+pos] + w_center*pixels[x+pos] + w_side*pixels[x-1+pos])/sum_w;
*res++ = ( (w_center+w_side)*pixels[x+pos] + w_side*pixels[x-1+pos])/sum_w;
}
}
void _smooth_121_horiz(int tx, int ty, UBYTE* pixels, UBYTE* res, int n_thread) {
_smooth_3_horiz( tx, ty, 2, 1, pixels, res, n_thread );
}
template<typename TData>
void _smooth_5_horiz( int tx, int ty, const int w_center, const int w_side1, const int w_side2,
TData* pixels, TData* _res, int n_thread) {
int y;
const int sum_w = 2*(w_side1 + w_side2) + w_center;
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(y=0; y<ty; y++) {
int x,pos = y*tx;
TData* res = _res + pos;
x=0;
*res++ = (
w_side2 * pixels[x +pos] +
w_side1 * pixels[x +pos] +
w_center* pixels[x +pos] +
w_side1 * pixels[x+1+pos] +
w_side2 * pixels[x+2+pos] ) / sum_w;
x++;
*res++ = (
w_side2 * pixels[x-1+pos] +
w_side1 * pixels[x-1+pos] +
w_center* pixels[x +pos] +
w_side1 * pixels[x+1+pos] +
w_side2 * pixels[x+2+pos] ) / sum_w;
for(x=2; x<tx-2; x++)
*res++ = (
w_side2 * pixels[x-2+pos] +
w_side1 * pixels[x-1+pos] +
w_center* pixels[x +pos] +
w_side1 * pixels[x+1+pos] +
w_side2 * pixels[x+2+pos] ) / sum_w;
*res++ = (
w_side2 * pixels[x-2+pos] +
w_side1 * pixels[x-1+pos] +
w_center* pixels[x +pos] +
w_side1 * pixels[x+1+pos] +
w_side2 * pixels[x+1+pos] ) / sum_w;
x++;
*res++ = (
w_side2 * pixels[x-2+pos] +
w_side1 * pixels[x-1+pos] +
w_center* pixels[x +pos] +
w_side1 * pixels[x +pos] +
w_side2 * pixels[x +pos] ) / sum_w;
}
}
template<typename TData>
void _smooth_7_horiz(int tx, int ty, const int w_center, const int w_side1, const int w_side2, const int w_side3,
TData* pixels, TData* _res, int n_thread) {
int y;
const int sum_w = 2*(w_side1 + w_side2 + w_side3) + w_center;
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(y=0; y<ty; y++) {
int x,pos = y*tx;
TData* res = _res + pos;
x=0;
*res++ = (
w_side3 * pixels[x +pos] +
w_side2 * pixels[x +pos] +
w_side1 * pixels[x +pos] +
w_center* pixels[x +pos] +
w_side1 * pixels[x+1+pos] +
w_side2 * pixels[x+2+pos] +
w_side3 * pixels[x+3+pos] ) / sum_w;
x++;
*res++ = (
w_side3 * pixels[x-1+pos] +
w_side2 * pixels[x-1+pos] +
w_side1 * pixels[x-1+pos] +
w_center* pixels[x +pos] +
w_side1 * pixels[x+1+pos] +
w_side2 * pixels[x+2+pos] +
w_side3 * pixels[x+3+pos] ) / sum_w;
x++;
*res++ = (
w_side3 * pixels[x-2+pos] +
w_side2 * pixels[x-2+pos] +
w_side1 * pixels[x-1+pos] +
w_center* pixels[x +pos] +
w_side1 * pixels[x+1+pos] +
w_side2 * pixels[x+2+pos] +
w_side3 * pixels[x+3+pos] ) / sum_w;
for(x=3; x<tx-3; x++)
*res++ = (
w_side3 * pixels[x-3+pos] +
w_side2 * pixels[x-2+pos] +
w_side1 * pixels[x-1+pos] +
w_center* pixels[x +pos] +
w_side1 * pixels[x+1+pos] +
w_side2 * pixels[x+2+pos] +
w_side3 * pixels[x+3+pos] ) / sum_w;
*res++ = (
w_side3 * pixels[x-3+pos] +
w_side2 * pixels[x-2+pos] +
w_side1 * pixels[x-1+pos] +
w_center* pixels[x +pos] +
w_side1 * pixels[x+1+pos] +
w_side2 * pixels[x+2+pos] +
w_side3 * pixels[x+2+pos] ) / sum_w;
x++;
*res++ = (
w_side3 * pixels[x-3+pos] +
w_side2 * pixels[x-2+pos] +
w_side1 * pixels[x-1+pos] +
w_center* pixels[x +pos] +
w_side1 * pixels[x+1+pos] +
w_side2 * pixels[x+1+pos] +
w_side3 * pixels[x+1+pos] ) / sum_w;
x++;
*res++ = (
w_side3 * pixels[x-3+pos] +
w_side2 * pixels[x-2+pos] +
w_side1 * pixels[x-1+pos] +
w_center* pixels[x +pos] +
w_side1 * pixels[x +pos] +
w_side2 * pixels[x +pos] +
w_side3 * pixels[x +pos] ) / sum_w;
}
}
/* compute vertical smoothing with 3-sized mask
*/
template<typename TData>
void _smooth_3_vert(int tx, int ty, const int w_center, const int w_side, TData* pixels, TData* res, int n_thread) {
int x,y,pos=0;
const int sum_w = 2*w_side + w_center;
for(x=0; x<tx; x++,pos++)
res[pos] = ( (w_center+w_side)*pixels[pos] + w_side*pixels[pos+tx])/sum_w;
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(y=1; y<ty-1; y++) {
int x,pos = y*tx;
for(x=0; x<tx; x++,pos++)
res[pos] = ( w_side*pixels[pos+tx] + w_center*pixels[pos] + w_side*pixels[pos-tx])/sum_w;
}
pos = (ty-1)*tx;
for(x=0; x<tx; x++,pos++)
res[pos] = ( (w_center+w_side)*pixels[pos] + w_side*pixels[pos-tx])/sum_w;
}
void _smooth_121_vert(int tx, int ty, UBYTE* pixels, UBYTE* res, int n_thread) {
_smooth_3_vert( tx, ty, 2, 1, pixels, res, n_thread );
}
template<typename TData>
void _smooth_5_vert(int tx, int ty, const int w_center, const int w_side1, const int w_side2,
TData* pixels, TData* res, int n_thread) {
int x,y,pos=0;
const int sum_w = 2*(w_side1 + w_side2) + w_center;
const int tx1=tx,tx2=2*tx;
for(x=0; x<tx; x++,pos++)
res[pos] = (
w_side2 * pixels[pos] +
w_side1 * pixels[pos] +
w_center* pixels[pos] +
w_side1 * pixels[pos+tx1] +
w_side2 * pixels[pos+tx2]
)/sum_w;
for(x=0; x<tx; x++,pos++)
res[pos] = (
w_side2 * pixels[pos-tx1] +
w_side1 * pixels[pos-tx1] +
w_center* pixels[pos] +
w_side1 * pixels[pos+tx1] +
w_side2 * pixels[pos+tx2]
)/sum_w;
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(y=2; y<ty-2; y++) {
int x,pos = y*tx;
for(x=0; x<tx; x++,pos++)
res[pos] = (
w_side2 * pixels[pos-tx2] +
w_side1 * pixels[pos-tx1] +
w_center* pixels[pos] +
w_side1 * pixels[pos+tx1] +
w_side2 * pixels[pos+tx2]
)/sum_w;
}
pos = (ty-2)*tx;
for(x=0; x<tx; x++,pos++)
res[pos] = (
w_side2 * pixels[pos-tx2] +
w_side1 * pixels[pos-tx1] +
w_center* pixels[pos] +
w_side1 * pixels[pos+tx1] +
w_side2 * pixels[pos+tx1]
)/sum_w;
for(x=0; x<tx; x++,pos++)
res[pos] = (
w_side2 * pixels[pos-tx2] +
w_side1 * pixels[pos-tx1] +
w_center* pixels[pos] +
w_side1 * pixels[pos] +
w_side2 * pixels[pos]
)/sum_w;
}
template<typename TData>
void _smooth_7_vert(int tx, int ty, const int w_center, const int w_side1, const int w_side2, const int w_side3,
TData* pixels, TData* res, int n_thread) {
int x,y,pos=0;
const int sum_w = 2*(w_side1 + w_side2 + w_side3) + w_center;
const int tx1=tx,tx2=2*tx,tx3=3*tx;
for(x=0; x<tx; x++,pos++)
res[pos] = (
w_side3 * pixels[pos] +
w_side2 * pixels[pos] +
w_side1 * pixels[pos] +
w_center* pixels[pos] +
w_side1 * pixels[pos+tx1] +
w_side2 * pixels[pos+tx2] +
w_side3 * pixels[pos+tx3]
)/sum_w;
for(x=0; x<tx; x++,pos++)
res[pos] = (
w_side3 * pixels[pos-tx1] +
w_side2 * pixels[pos-tx1] +
w_side1 * pixels[pos-tx1] +
w_center* pixels[pos] +
w_side1 * pixels[pos+tx1] +
w_side2 * pixels[pos+tx2] +
w_side3 * pixels[pos+tx3]
)/sum_w;
for(x=0; x<tx; x++,pos++)
res[pos] = (
w_side3 * pixels[pos-tx2] +
w_side2 * pixels[pos-tx2] +
w_side1 * pixels[pos-tx1] +
w_center* pixels[pos] +
w_side1 * pixels[pos+tx1] +
w_side2 * pixels[pos+tx2] +
w_side3 * pixels[pos+tx3]
)/sum_w;
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(y=3; y<ty-3; y++) {
int x,pos = y*tx;
for(x=0; x<tx; x++,pos++)
res[pos] = (
w_side3 * pixels[pos-tx3] +
w_side2 * pixels[pos-tx2] +
w_side1 * pixels[pos-tx1] +
w_center* pixels[pos] +
w_side1 * pixels[pos+tx1] +
w_side2 * pixels[pos+tx2] +
w_side3 * pixels[pos+tx3]
)/sum_w;
}
pos = (ty-3)*tx;
for(x=0; x<tx; x++,pos++)
res[pos] = (
w_side3 * pixels[pos-tx3] +
w_side2 * pixels[pos-tx2] +
w_side1 * pixels[pos-tx1] +
w_center* pixels[pos] +
w_side1 * pixels[pos+tx1] +
w_side2 * pixels[pos+tx2] +
w_side3 * pixels[pos+tx2]
)/sum_w;
for(x=0; x<tx; x++,pos++)
res[pos] = (
w_side3 * pixels[pos-tx3] +
w_side2 * pixels[pos-tx2] +
w_side1 * pixels[pos-tx1] +
w_center* pixels[pos] +
w_side1 * pixels[pos+tx1] +
w_side2 * pixels[pos+tx1] +
w_side3 * pixels[pos+tx1]
)/sum_w;
for(x=0; x<tx; x++,pos++)
res[pos] = (
w_side3 * pixels[pos-tx3] +
w_side2 * pixels[pos-tx2] +
w_side1 * pixels[pos-tx1] +
w_center* pixels[pos] +
w_side1 * pixels[pos] +
w_side2 * pixels[pos] +
w_side3 * pixels[pos]
)/sum_w;
}
/* Smooth an image using a Gaussian filter.
*/
template<typename TData>
void _smooth_gaussian_alltype( const int tx, const int ty, TData* img, float _sigma, TData* res, int n_thread ) {
const float MAX_SIGMA = 1.86f;
TData* img2 = img;
if(_sigma>MAX_SIGMA) { // reallocate if more than one smoothing pass is required
img2 = NEWA(TData,tx*ty);
memcpy(img2,img,tx*ty*sizeof(TData));
}
TData* tmp = NEWA(TData,tx*ty);
TData* old_res = res;
float remaining = _sigma*_sigma;
while( 1 ) {
float sigma = MIN(MAX_SIGMA,sqrt(remaining));
remaining -= sigma*sigma;
// compute gaussian filter coefficients
const int wcenter = 1000;
const int wside1 = int(0.5 + wcenter*exp( -pow2(1./sigma)/2 ));
const int wside2 = int(0.5 + wcenter*exp( -pow2(2./sigma)/2 ));
const int wside3 = int(0.5 + wcenter*exp( -pow2(3./sigma)/2 ));
const int wside4 = int(0.5 + wcenter*exp( -pow2(4./sigma)/2 ));
assert( wside4 < wcenter/10 || !"error: smoothing is too large" );
if ( wside2 < wcenter/10 ) {
_smooth_3_horiz( tx, ty, wcenter, wside1, img2, tmp, n_thread );
_smooth_3_vert( tx, ty, wcenter, wside1, tmp, res, n_thread );
} else if( wside3 < wcenter/10 ) {
_smooth_5_horiz( tx, ty, wcenter, wside1, wside2, img2, tmp, n_thread );
_smooth_5_vert( tx, ty, wcenter, wside1, wside2, tmp, res, n_thread );
} else {
_smooth_7_horiz( tx, ty, wcenter, wside1, wside2, wside3, img2, tmp, n_thread );
_smooth_7_vert( tx, ty, wcenter, wside1, wside2, wside3, tmp, res, n_thread );
}
if(remaining < 0.001)
break;
else {
TData* tmp3;
tmp3 = img2;
img2 = res;
res = tmp3;
}
}
if(res!=old_res) { // copy to true res
memcpy(old_res,res,tx*ty*sizeof(TData));
img2 = res;
}
if(_sigma>MAX_SIGMA)
free(img2);
free(tmp);
}
void _smooth_gaussian( UBYTE_image* img, float _sigma, UBYTE_image* res, int n_thread ) {
ASSERT_SAME_SIZE(img,res);
_smooth_gaussian_alltype(img->tx,img->ty,img->pixels,_sigma,res->pixels,n_thread);
}
/* compute gradient smoothed with Sobel mask
*/
void _compute_sobel_gradient( UBYTE_image* img, float_layers* grad, int n_thread ) {
ASSERT_SAME_SIZE(img,grad);
assert(grad->tz==2);
int tx = img->tx;
int ty = img->ty;
UBYTE* tmp = NEWA(UBYTE,tx*ty);
// compute horizontal gradient
_smooth_121_horiz(tx,ty,img->pixels,tmp, n_thread);
_diff_vert(tx,ty,tmp,grad->pixels);
// compute vertical gradient
_smooth_121_vert(tx,ty,img->pixels,tmp, n_thread);
_diff_horiz(tx,ty,tmp,grad->pixels+tx*ty);
// free everything
free(tmp);
}
/* Compute the dx,dy gradient on the image based on a [-1,0,1] mask.
=0 : no prior smoothing
=1 : sobel smoothing
*/
void _compute_grad_101( UBYTE_image* img, int method, float_layers* grad, int n_thread ) {
ASSERT_SAME_SIZE(img,grad);
assert(grad->tz==2);
// compute gradient
if( method == 0 )
_compute_pure_gradient(img, grad);
else if( method == 1 )
_compute_sobel_gradient(img, grad, n_thread);
else
assert(!"error: unknown method for compute_grad_101");
}
/* Compute the Histogram of oriented gradient for each pixel.
Number of orientations is determined by hog->tz;
method determines orientation bining:
=0 : atan + linear interpolation
=1 : fast cos projection
*/
void _compute_hog( float_layers* grad, int method, float_layers* hog, int n_thread ) {
ASSERT_SAME_SIZE(grad,hog);
const int n_ori = hog->tz;
const int npix = hog->tx*hog->ty;
const float* dx = grad->pixels;
const float* dy = grad->pixels + npix;
if( method == 0 ) {
// use atan
memset(hog->pixels,0,n_ori*npix*sizeof(float));
int i;
for(i=0; i<npix; i++) {
float norm = sqrt(dy[i]*dy[i] + dx[i]*dx[i]);
float angle = atan2(dy[i],dx[i]); // angle in [-pi,pi]
float b_angle = (angle + M_PI)/n_ori;
int q_angle = int(0.5 + b_angle); // first bin
float coef = b_angle-q_angle;
q_angle = (q_angle + 3*n_ori/2) % n_ori;
hog->pixels[ ((q_angle ) )*npix + i ] += (1-coef)*norm;
hog->pixels[ ((q_angle+1)%n_ori)*npix + i ] += ( coef)*norm;
}
} else if (method == 1 ) {
int l;
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(l=0; l<n_ori; l++) {
float angle = -2*(l-2)*M_PI/n_ori;
float kos = cos( angle );
float zin = sin( angle );
float* layer_l = hog->pixels + l*npix;
int i;
for(i=0; i<npix; i++) {
float value = kos*dx[i] + zin*dy[i];
layer_l[i] = (value > 0 ) ? value : 0;
}
}
} else
assert(!"error: unknown method for compute_hog");
}
/* compute 8 directions of gradient per pixels
using 4 oriented filters extremely simple like [-1,1]
*/
void _compute_hog_8_direct( UBYTE_image* image, float_layers* hog_out, int n_thread ) {
ASSERT_SAME_SIZE(image,hog_out);
assert(hog_out->tz==8);
int j,tx=image->tx, ty=image->ty;
int npix=tx*image->ty;
// init output
memset(hog_out->pixels,0,8*npix*sizeof(float));
// compute horizontal filter
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(j=0; j<ty; j++) {
UBYTE* img = image->pixels + j*tx;
UBYTE* lastimg = img + tx-1;
float* hog0f = hog_out->pixels + 0*npix + j*tx; // first
float* hog0l = hog0f+1; // last
float* hog1f = hog_out->pixels + 4*npix + j*tx; // first
float* hog1l = hog1f+1; // last
for(; img<lastimg; img++) {
float diff = img[1] - img[0];
float pos,neg;
if( diff < 0 ) {
pos = 0;
neg = -diff/2.f;
} else {
neg = 0;
pos = diff/2.f;
}
*hog0f++ += neg;
*hog1f++ += pos;
*hog0l++ += neg;
*hog1l++ += pos;
}
}
// compute veritical filter
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(j=0; j<ty-1; j++) {
UBYTE* img = image->pixels + j*tx;
UBYTE* lastimg = img + tx;
const int offset = tx;
UBYTE* img2 = img + offset;
float* hog0f = hog_out->pixels + 2*npix + j*tx; // first
float* hog0l = hog0f + offset; // last
float* hog1f = hog_out->pixels + 6*npix + j*tx; // first
float* hog1l = hog1f + offset; // last
while(img<lastimg) {
float diff = (*img2++) - (*img++);
float pos,neg;
if( diff < 0 ) {
pos = 0;
neg = -diff/2.f;
} else {
neg = 0;
pos = diff/2.f;
}
*hog0f++ += neg;
*hog1f++ += pos;
*hog0l++ += neg;
*hog1l++ += pos;
}
}
const float div_diag = 2*1.2666f; // learned
// compute diagonal filter 1
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(j=0; j<ty-1; j++) {
UBYTE* img = image->pixels + j*tx;
UBYTE* lastimg = img + tx-1;
const int offset = 1+tx;
UBYTE* img2 = img + offset;
float* hog0f = hog_out->pixels + 1*npix + j*tx; // first
float* hog0l = hog0f + offset; // last
float* hog1f = hog_out->pixels + 5*npix + j*tx; // first
float* hog1l = hog1f + offset; // last
while(img<lastimg) {
float diff = (*img2++) - (*img++);
float pos,neg;
if( diff < 0 ) {
pos = 0;
neg = -diff/div_diag;
} else {
neg = 0;
pos = diff/div_diag;
}
*hog0f++ += neg;
*hog1f++ += pos;
*hog0l++ += neg;
*hog1l++ += pos;
}
}
// compute diagonal filter 2
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(j=1; j<ty; j++) {
UBYTE* img = image->pixels + j*tx;
UBYTE* lastimg = img + tx-1;
const int offset = 1-tx;
UBYTE* img2 = img + offset;
float* hog0f = hog_out->pixels + 7*npix + j*tx; // first
float* hog0l = hog0f + offset; // last
float* hog1f = hog_out->pixels + 3*npix + j*tx; // first
float* hog1l = hog1f + offset; // last
while(img<lastimg) {
float diff = (*img2++) - (*img++);
float pos,neg;
if( diff < 0 ) {
pos = 0;
neg = -diff/div_diag;
} else {
neg = 0;
pos = diff/div_diag;
}
*hog0f++ += neg;
*hog1f++ += pos;
*hog0l++ += neg;
*hog1l++ += pos;
}
}
}
/* Post-processing of the HOG: cross-orientation inhibition.
for one pixel i and orientation o: hog[i,o] = max(0, hog[i,o] - coef*hog[i,:].mean())
*/
void subtract_mean_ori( float_layers* hog, float coef, int n_thread ) {
const int npix = hog->tx*hog->ty;
int l;
float* sum = NEWAC(float, npix);
float* max = NEWAC(float, npix);
// compute mean per pixel
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(l=0; l<hog->tz; l++) {
float* p = sum;
float* m = max;
float* hog_pix = hog->pixels + l*npix;
int i;
for(i=0; i<npix; i++,m++) {
float v = *hog_pix++;
*p++ += v;
float max = *m;
if(v>max) *m=v;
}
}
// subtract coef*mean
coef /= hog->tz;
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(l=0; l<hog->tz; l++) {
float* p = sum;
float* m = max;
float* hog_pix = hog->pixels + l*npix;
int i;
for(i=0; i<npix; i++) {
float Max = *m++; // max
float mean = coef * (*p++); // == mean * coef
if( mean >= Max )
*hog_pix = 0;
else {
*hog_pix = Max*(1 - (Max - (*hog_pix))/(Max - mean + 1e-8f));
if(*hog_pix<0) *hog_pix = 0;
}
hog_pix++;
}
}
free(sum);
free(max);
}
/* Pass the gradient image through a sigmoid
*/
void sigmoid_array( float_array* img, float coef, float offset, int n_thread ) {
assert(coef>0);
const int npix=img->tx;
// float* p = img->pixels;
// for(i=0; i<npix; i++) {
// float v = *p;
// *p++ = 2.f/(1.f + exp(-coef*v + offset)) - 1.f;
// }
int l;
// optimization: precompute some values of sigmoid
// 2/(1 + exp(-arange(0,8,0.5)+offset)) -1
const int npc = 64;
float precom[npc+1];
for(l=0; l<=npc; l++) precom[l]= 1.f/(1.f + exp(-l/8.f + offset));
for(l=1; l<=npc; l++) precom[l] = (precom[l]-precom[0]) / (1 - precom[0]); // renorm between 0 and 1
precom[0] = 0;
const float maxindex = npc - 0.001;
#define NSUB 32
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(l=0; l<NSUB; l++) {
int start = (l*npix)/NSUB;
int end = (l+1)*npix/NSUB;
int npixsub = end-start;
float* p = img->pixels + start;
int i;
for(i=0; i<npixsub; i++) {
float v = 8*(coef*(*p));
if(v>maxindex) v=maxindex;
int n = int(v);
float w = v-n;
*p++ = (1-w)*precom[n] + w*precom[n+1];
}
}
}
/* Compute a spatially smoothed version of the HOG.
*/
void smooth_hog_gaussian( float_layers* hog, float smoothing, int n_thread ) {
int l;
const int npix = hog->tx*hog->ty;
for(l=0; l<hog->tz; l++)
_smooth_gaussian_alltype(hog->tx,hog->ty,hog->pixels+l*npix,smoothing,hog->pixels+l*npix, n_thread);
}

@ -0,0 +1,111 @@
/*
Copyright (C) 2014 Jerome Revaud
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
*/
#ifndef ___HOG_H___
#define ___HOG_H___
#include "array_types.h"
/* * * * * * IMAGE SMOOTHING * * * * * * */
/* Smooth an image using a Gaussian filter.
*/
void _smooth_gaussian( UBYTE_image* img, float sigma, UBYTE_image* res, int n_thread );
/* * * * * * GRADIENT COMPUTATIONS * * * * * * */
/* Compute the dx,dy gradient on the image based on a [-1,0,1] mask.
method
=0 : no prior smoothing
=1 : sobel smoothing
*/
void _compute_grad_101( UBYTE_image* img, int method, float_layers* grad, int n_thread );
/* * * * * * pixel-HOG COMPUTATIONS * * * * * * */
/* Compute the Histogram of oriented gradient for each pixel.
Number of orientations is determined by hog->tz;
method determines orientation bining:
=0 : atan + linear interpolation
=1 : fast cos projection
*/
void _compute_hog( float_layers* grad, int method, float_layers* hog, int n_thread );
/* Compute per-pixel HOG of 8 directions using a different pipeline.
The method uses 4 oriented filters extremely simple ([-1,1])
*/
void _compute_hog_8_direct( UBYTE_image* image, float_layers* hog_out, int n_thread );
/* Post-processing of the HOG: cross-orientation inhibition.
for one pixel i and orientation o: hog[i,o] = max(0, hog[i,o] - coef*hog[i,:].mean())
This is useful for HOGs computed from cosinus projection.
*/
void subtract_mean_ori( float_layers* hog, float coef, int n_thread );
/* Pass the gradient image through a sigmoid
lambda v: 2/(1 + exp(-coef*v + offset)) - 1
*/
void sigmoid_array( float_array* img, float coef, float offset, int n_thread );
/* Compute a spatially smoothed version of the HOG.
*/
void smooth_hog_gaussian( float_layers* hog, float smoothing, int n_thread );
#endif

@ -0,0 +1,268 @@
#include "image.h"
#include "std.h"
/********** Create/Delete **********/
/* allocate a new image of size width x height */
image_t *image_new(int width, int height)
{
image_t *image = NEW(image_t);
if(image == NULL)
{
err_printf( "Error: image_new() - not enough memory !\n");
exit(1);
}
image->width = width;
image->height = height;
image->stride = ( (width+3) / 4 ) * 4;
image->data = NEWA(float, image->stride*height*sizeof(float));
if(image->data == NULL)
{
err_printf( "Error: image_new() - not enough memory !\n");
exit(1);
}
return image;
}
/* allocate a new image and copy the content from src */
image_t *image_cpy(const image_t *src)
{
image_t *dst = image_new(src->width, src->height);
memcpy(dst->data, src->data, src->stride*src->height*sizeof(float));
return dst;
}
/* set all pixels values to zeros */
void image_erase(image_t *image)
{
memset(image->data, 0, image->stride*image->height*sizeof(float));
}
/* multiply an image by a scalar */
void image_mul_scalar(image_t *image, float scalar)
{
int i;
for( i=0 ; i<image->stride*image->height ; i++)
image->data[i] *= scalar;
}
/* free memory of an image */
void image_delete(image_t *image)
{
if(image == NULL)
{
//err_printf( "Warning: Delete image --> Ignore action (image not allocated)\n");
}
else
{
free(image->data);
free(image);
}
}
/* allocate a new color image of size width x height */
color_image_t *color_image_new(int width, int height)
{
size_t stride_channel = width*height*sizeof(float);
char *buffer = NEWA(char, sizeof(color_image_t) + 3*stride_channel);
if(buffer == NULL)
{
err_printf( "Error: color_image_new() - not enough memory !\n");
exit(1);
}
color_image_t *image = (color_image_t*) buffer;
image->width = width;
image->height = height;
image->c1 = (float*) (buffer + sizeof(color_image_t));
image->c2 = (float*) (buffer + sizeof(color_image_t) + stride_channel);
image->c3 = (float*) (buffer + sizeof(color_image_t) + 2*stride_channel);
return image;
}
/* allocate a new color image and copy the content from src */
color_image_t *color_image_cpy(const color_image_t *src)
{
color_image_t *dst = color_image_new(src->width, src->height);
memcpy(dst->c1, src->c1, 3*src->width*src->height*sizeof(float));
return dst;
}
/* set all pixels values to zeros */
void color_image_erase(color_image_t *image)
{
memset(image->c1, 0, 3*image->width*image->height*sizeof(float));
}
/* free memory of a color image */
void color_image_delete(color_image_t *image)
{
if(image)
{
free(image); // the image is allocated such that the data is stored just after the pointer
}
}
/* convert a color image to a gray-scale image */
image_t* image_gray_from_color( color_image_t* img )
{
image_t* res = image_new(img->width, img->height);
int n=0;
for(int j=0; j<img->height; j++)
for(int i=0; i<img->width; i++,n++)
res->data[i+j*res->stride] = (img->c1[n] + img->c2[n] + img->c3[n])/3;
return res;
}
/* reallocate the memory of an image to fit the new width height */
void resize_if_needed_newsize(image_t *im, int w, int h)
{
if(im->width != w || im->height != h)
{
im->width = w;
im->height = h;
im->stride = ((w+3)/4)*4;
float *data = NEWA(float,im->stride*h*sizeof(float));
if(data == NULL)
{
err_printf( "Error: resize_if_needed_newsize() - not enough memory !\n");
exit(1);
}
free(im->data);
im->data = data;
}
}
/************ Resizing *********/
/* resize an image to a new size (assumes a difference only in width) */
void image_resize_horiz(image_t *dst, const image_t *src)
{
int i;
float real_scale = ((float) src->width-1) / ((float) dst->width-1);
for(i = 0; i < dst->height; i++)
{
int j;
for(j = 0; j < dst->width; j++)
{
float dx;
int x;
x = floor((float) j * real_scale);
dx = j * real_scale - x;
if(x >= (src->width - 1))
{
dst->data[i * dst->stride + j] =
src->data[i * src->stride + src->width - 1];
}
else
{
dst->data[i * dst->stride + j] =
(1.0f - dx) * src->data[i * src->stride + x ] +
( dx) * src->data[i * src->stride + x + 1];
}
}
}
}
/* resize an image to a new size (assumes a difference only in height) */
void image_resize_vert(image_t *dst, const image_t *src)
{
int i;
float real_scale = ((float) src->height-1) / ((float) dst->height-1);
for(i = 0; i < dst->width; i++)
{
int j;
for(j = 0; j < dst->height; j++)
{
int y;
float dy;
y = floor((float) j * real_scale);
dy = j * real_scale - y;
if(y >= (src->height - 1))
{
dst->data[j * dst->stride + i] =
src->data[i + (src->height - 1) * src->stride];
}
else
{
dst->data[j * dst->stride + i] =
(1.0f - dy) * src->data[i + (y ) * src->stride] +
( dy) * src->data[i + (y + 1) * src->stride];
}
}
}
}
/* resize an image with bilinear interpolation to fit the new weidht, height ; reallocation is done if necessary */
void image_resize_bilinear_newsize(image_t *dst, const image_t *src, int new_width, int new_height)
{
resize_if_needed_newsize(dst,new_width,new_height);
if(new_width < new_height)
{
image_t *tmp = image_new(new_width,src->height);
image_resize_horiz(tmp,src);
image_resize_vert(dst,tmp);
image_delete(tmp);
}
else
{
image_t *tmp = image_new(src->width,new_height);
image_resize_vert(tmp,src);
image_resize_horiz(dst,tmp);
image_delete(tmp);
}
}
/* resize an image with bilinear interpolation */
image_t *image_resize_bilinear_scale(const image_t *src, float scale) {
const int new_width = int(0.5 + src->width * scale);
const int new_height = int(0.5 + src->height * scale);
image_t *res = image_new(new_width,src->height);
image_resize_bilinear_newsize(res, src, new_width, new_height);
return res;
}
/* crop an image (in-place) */
void image_crop(image_t* img, int width, int height)
{
assert(width<=img->width);
img->width = width;
assert(height<=img->height);
img->height = height;
}

@ -0,0 +1,103 @@
#ifndef ___IMAGE_H___
#define ___IMAGE_H___
/********** STRUCTURES *********/
/* structure for 1-channel image */
typedef struct image_s
{
int width; /* Width of the image */
int height; /* Height of the image */
int stride; /* Width of the memory (width + paddind such that it is a multiple of 4) */
float *data; /* Image data */
} image_t;
/* structure for 3-channels image stored with one layer per color, it assumes that c2 = c1+width*height and c3 = c2+width*height. */
typedef struct color_image_s
{
int width; /* Width of the image */
int height; /* Height of the image */
float *c1; /* Color 1 */
float *c2; /* Color 2 */
float *c3; /* Color 3 */
} color_image_t;
/********** Create/Delete **********/
/* allocate a new image of size width x height */
image_t *image_new(int width, int height);
/* allocate a new image and copy the content from src */
image_t *image_cpy(const image_t *src);
/* set all pixels values to zeros */
void image_erase(image_t *image);
/* free memory of an image */
void image_delete(image_t *image);
/* multiply an image by a scalar */
void image_mul_scalar(image_t *image, float scalar);
/* allocate a new color image of size width x height */
color_image_t *color_image_new(int width, int height);
/* allocate a new color image and copy the content from src */
color_image_t *color_image_cpy(const color_image_t *src);
/* set all pixels values to zeros */
void color_image_erase(color_image_t *image);
/* free memory of a color image */
void color_image_delete(color_image_t *image);
/* convert a color image to a gray-scale image */
image_t* image_gray_from_color( color_image_t* img ) ;
/* reallocate the memory of an image to fit the new width height */
void resize_if_needed_newsize(image_t *im, int w, int h);
/************ Resizing *********/
/* resize an image with bilinear interpolation */
image_t *image_resize_bilinear_scale(const image_t *src, float scale);
/* resize an image with bilinear interpolation to fit the new weidht, height ; reallocation is done if necessary */
void image_resize_bilinear_newsize(image_t *dst, const image_t *src, int new_width, int new_height);
/* resize a color image with bilinear interpolation */
color_image_t *color_image_resize_bilinear(const color_image_t *src, float scale);
/* crop an image (in-place) */
void image_crop(image_t* img, int width, int height);
#endif

@ -0,0 +1,402 @@
/*
Copyright (C) 2014 Jerome Revaud
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
*/
#include "std.h"
#include <stdio.h>
#include <jpeglib.h>
#include <png.h>
#include <setjmp.h>
#include "io.h"
void output_correspondences( const char* out_filename, const corres_t* corres, int nb, float fx, float fy )
{
assert(0<fx && fx<=2);
assert(0<fy && fy<=2);
FILE* f = out_filename ? fopen(out_filename,"w") : NULL;
for(int i=0; i<nb; i++) {
const corres_t* r = corres + i; // one row
if (f)
fprintf(f,"%g %g %g %g %g %g\n",fx*r->x0,fy*r->y0,fx*r->x1,fy*r->y1,r->maxima,r->score);
else
std_printf("%g %g %g %g %g %g\n",fx*r->x0,fy*r->y0,fx*r->x1,fy*r->y1,r->maxima,r->score);
}
if(out_filename)
fclose(f);
}
/* IMAGE */
// PPM
typedef struct
{
int magic;
int width;
int height;
int pixmax;
} ppm_hdr_t;
static void get_magic(FILE *fp, ppm_hdr_t *ppm_hdr)
{
char str[1024];
fgets(str, 1024, fp);
if(str[0] == 'P' && (str[1] <= '6' || str[1] >= '1'))
{
ppm_hdr->magic = str[1] - '0';
}
}
static int skip_comment(FILE *fp)
{
char c;
do
{
c = (char) fgetc(fp);
}
while (c == ' ' || c == '\t' || c == '\n');
if(c == '#')
{
do
{
c = (char) fgetc(fp);
} while(c != 0x0A);
return 1;
}
else
{
ungetc(c, fp);
}
return 0;
}
/*----------------------------------------------------------------------------*/
static void skip_comments(FILE *fp)
{
while(skip_comment(fp));
}
/*----------------------------------------------------------------------------*/
static int get_image_size(FILE *fp, ppm_hdr_t *ppm_hdr)
{
skip_comments(fp);
if(fscanf(fp, "%d %d", &ppm_hdr->width, &ppm_hdr->height) != 2)
{
err_printf( "Warning: PGM --> File currupted\n");
return 0;
}
return 1;
}
/*----------------------------------------------------------------------------*/
static int get_pixmax(FILE *fp, ppm_hdr_t *ppm_hdr)
{
skip_comments(fp);
ppm_hdr->pixmax = 1;
if(ppm_hdr->magic == 2 || ppm_hdr->magic == 3 || ppm_hdr->magic == 5 || ppm_hdr->magic == 6)
{
if(fscanf(fp, "%d", &ppm_hdr->pixmax) != 1)
{
err_printf( "Warning: PGM --> pixmax not valid\n");
return 0;
}
}
fgetc(fp);
return 1;
}
/*----------------------------------------------------------------------------*/
static int get_ppm_hdr(FILE *fp, ppm_hdr_t *ppm_hdr)
{
get_magic(fp, ppm_hdr);
if(!get_image_size(fp, ppm_hdr))
{
return 0;
}
if(!get_pixmax(fp, ppm_hdr))
{
return 0;
}
return 1;
}
static void raw_read_color(FILE *fp, color_image_t *image)
{
int i, size=image->height*image->width;
for(i=0;i<size;i++)
{
image->c1[i]=(float) fgetc(fp);
image->c2[i]=(float) fgetc(fp);
image->c3[i]=(float) fgetc(fp);
}
}
color_image_t *color_image_pnm_load(FILE *fp)
{
color_image_t *image = NULL;
ppm_hdr_t ppm_hdr;
if(!get_ppm_hdr(fp, &ppm_hdr))
{
return NULL;
}
switch(ppm_hdr.magic)
{
case 1: /* PBM ASCII */
case 2: /* PGM ASCII */
case 3: /* PPM ASCII */
case 4: /* PBM RAW */
case 5: /* PGM RAW */
err_printf( "color_image_pnm_load: only PPM raw with maxval 255 supported\n");
break;
case 6: /* PPM RAW */
image = color_image_new(ppm_hdr.width, ppm_hdr.height);
raw_read_color(fp, image);
break;
}
return image;
}
// JPG
color_image_t *color_image_jpeg_load(FILE *fp)
{
struct jpeg_decompress_struct cinfo;
struct jpeg_error_mgr jerr;
JSAMPARRAY buffer;
int row_stride;
int index = 0;
color_image_t *image = NULL;
float *r_p, *g_p, *b_p;
JSAMPROW buffer_p;
cinfo.err = jpeg_std_error(&jerr);
jpeg_create_decompress(&cinfo);
jpeg_stdio_src(&cinfo, fp);
jpeg_read_header(&cinfo, TRUE);
cinfo.out_color_space = JCS_RGB;
cinfo.quantize_colors = FALSE;
image = color_image_new(cinfo.image_width, cinfo.image_height);
if(image == NULL)
{
return NULL;
}
jpeg_start_decompress(&cinfo);
row_stride = cinfo.output_width * cinfo.output_components;
buffer = (*cinfo.mem->alloc_sarray)
((j_common_ptr) &cinfo, JPOOL_IMAGE, row_stride, 1);
r_p = image->c1;
g_p = image->c2;
b_p = image->c3;
while (cinfo.output_scanline < cinfo.output_height)
{
jpeg_read_scanlines(&cinfo, buffer, 1);
buffer_p = buffer[0];
index = cinfo.output_width;
while(index--)
{
*r_p++ = (float) *buffer_p++;
*g_p++ = (float) *buffer_p++;
*b_p++ = (float) *buffer_p++;
}
}
jpeg_finish_decompress(&cinfo);
jpeg_destroy_decompress(&cinfo);
return image;
}
color_image_t * color_image_png_load( FILE* fp, const char* file_name )
{
// read the header
png_byte header[8];
fread(header, 1, 8, fp);
if (png_sig_cmp(header, 0, 8))
{
err_printf( "error: %s is not a PNG.\n", file_name);
fclose(fp);
return 0;
}
png_structp png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
if (!png_ptr)
{
err_printf( "error: png_create_read_struct returned 0.\n");
fclose(fp);
return 0;
}
// create png info struct
png_infop info_ptr = png_create_info_struct(png_ptr);
if (!info_ptr)
{
err_printf( "error: png_create_info_struct returned 0.\n");
png_destroy_read_struct(&png_ptr, (png_infopp)NULL, (png_infopp)NULL);
fclose(fp);
return 0;
}
// create png info struct
png_infop end_info = png_create_info_struct(png_ptr);
if (!end_info)
{
err_printf( "error: png_create_info_struct returned 0.\n");
png_destroy_read_struct(&png_ptr, &info_ptr, (png_infopp) NULL);
fclose(fp);
return 0;
}
// the code in this if statement gets called if libpng encounters an error
if (setjmp(png_jmpbuf(png_ptr))) {
err_printf( "error from libpng\n");
png_destroy_read_struct(&png_ptr, &info_ptr, &end_info);
fclose(fp);
return 0;
}
// init png reading
png_init_io(png_ptr, fp);
// let libpng know you already read the first 8 bytes
png_set_sig_bytes(png_ptr, 8);
// read all the info up to the image data
png_read_info(png_ptr, info_ptr);
// variables to pass to get info
int bit_depth, color_type;
png_uint_32 temp_width, temp_height;
// get info about png
png_get_IHDR(png_ptr, info_ptr, &temp_width, &temp_height, &bit_depth, &color_type,
NULL, NULL, NULL);
// Update the png info struct.
png_read_update_info(png_ptr, info_ptr);
// Row size in bytes.
int rowbytes = png_get_rowbytes(png_ptr, info_ptr);
// Allocate the image_data as a big block, to be given to opengl
png_byte * image_data;
image_data = NEWA(png_byte, rowbytes * temp_height);
assert(image_data!=NULL);
// row_pointers is for pointing to image_data for reading the png with libpng
png_bytep * row_pointers = NEWA(png_bytep, temp_height);
assert(row_pointers!=NULL);
// set the individual row_pointers to point at the correct offsets of image_data
unsigned int i;
for (i = 0; i <temp_height; i++)
row_pointers[i] = image_data + i * rowbytes;
// read the png into image_data through row_pointers
png_read_image(png_ptr, row_pointers);
// copy into color image
color_image_t* image = color_image_new(temp_width,temp_height);
if( color_type==0 ) {
assert((unsigned)rowbytes == temp_width || !"error: not a proper gray png image");
for(i=0; i<temp_width*temp_height; i++)
image->c1[i] = image->c2[i] = image->c3[i] = image_data[i];
}
else if( color_type == 2 ) {
assert((unsigned)rowbytes == 3*temp_width || !"error: not a proper color png image");
for(i=0; i<temp_width*temp_height; i++) {
image->c1[i] = image_data[3*i+0];
image->c2[i] = image_data[3*i+1];
image->c3[i] = image_data[3*i+2];
}
} else
assert(!"error: unknown PNG color type" );
// clean up
png_destroy_read_struct(&png_ptr, &info_ptr, &end_info);
free(row_pointers);
free(image_data);
return image;
}
// GENERAL LOAD
/* load a color image from a file */
color_image_t *color_image_load(const char *fname)
{
FILE *fp;
char magic[2];
unsigned short *magic_short = (unsigned short *) magic;
color_image_t *image = NULL;
if((fp = fopen(fname, "rb")) == NULL)
{
err_printf( "Warning: color_image_load() - can not open file `%s' !\n", fname);
return NULL;
}
fread(magic, sizeof(char), 2, fp);
rewind(fp);
if(magic_short[0] == 0xd8ff)
{
image = color_image_jpeg_load(fp);
}
else if(magic[0]=='P' && (magic[1]=='6' || magic[1]=='5'))
{ /* PPM raw */
image = color_image_pnm_load(fp);
}
else if( magic[0]==-119 && magic[1]=='P' )
{
image = color_image_png_load( fp, fname );
}
else
{
err_printf( "Warning: color_image_load(%s) - image format not recognized\n",fname);
}
fclose(fp);
return image;
}

@ -0,0 +1,30 @@
/*
Copyright (C) 2014 Jerome Revaud
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
*/
#ifndef ___IO_H___
#define ___IO_H___
#include <stdlib.h>
#include "image.h"
#include "deep_matching.h"
// output correspondences to a file or on the stdout
void output_correspondences( const char* out_filename, const corres_t* corres, int nb, float fx, float fy );
/* load a color image from a file */
color_image_t *color_image_load(const char *fname);
#endif

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

@ -0,0 +1,327 @@
/*
Copyright (C) 2014 Jerome Revaud
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
*/
#include "std.h"
#include "image.h"
#include "io.h"
#include "deep_matching.h"
#include "main.h"
#include <thread>
void usage(const int language)
{
#define p(msg) std_printf(msg "\n");
p("usage:");
switch(language){
case EXE_OPTIONS:
p("./deepmatching image1 image2 [options]");
p("Compute the 'DeepMatching' between two images and print a list of")
p("pair-wise point correspondences:")
p(" x1 y1 x2 y2 score index ...")
p("(index refers to the local maximum from which the match was retrieved)")
p("Images must be in PPM, PNG or JPG format. Version 1.2.2")
break;
case MATLAB_OPTIONS:
p("matches = deepmatching(image1, image2 [, options])")
p("Compute the 'DeepMatching' between two images.")
p("Images must be HxWx3 single matrices.")
p("Options is an optional string argument ('' by default).")
p("The function returns a matrix with 6 columns, each row being x1 y1 x2 y2 score index.")
p("(index refers to the local maximum from which the match was retrieved)")
p("Version 1.2.2")
break;
case PYTHON_OPTIONS:
p("matches = deepmatching.deepmatching(image1, image2, options='')")
p("Compute the 'DeepMatching' between two images.")
p("Images must be HxWx3 numpy arrays (converted to float32).")
p("Options is an optional string argument ('' by default).")
p("The function returns a numpy array with 6 columns, each row being x1 y1 x2 y2 score index.")
p("(index refers to the local maximum from which the match was retrieved)")
p("Version 1.2.2")
break;
}
p("")
p("Options:")
p(" -h, --help print this message")
//p(" HOG parameters (low-level pixel descriptor):")
//p(" -png_settings (auto) recommended for uncompressed images")
//p(" -jpg_settings (auto) recommended for compressed images")
//p(" in more details: (for fine-tuning)")
//p(" -hog.presm <f=1.0> prior image smoothing")
//p(" -hog.midsm <f=1.0> intermediate HOG smoothing")
//p(" -hog.sig <f=0.2> sigmoid strength")
//p(" -hog.postsm <f=1.0> final HOG-smoothing")
//p(" -hog.ninth <f=0.3> robustness to pixel noise (eg. JPEG artifacts)")
p("")
p(" Matching parameters:")
//p(" -iccv_settings settings used for the ICCV paper")
//p(" -improved_settings (default) supposedly improved settings")
//p(" in more details: (for fine-tuning)")
p(" -downscale/-R <n=1> downsize the input images by a factor 2^n")
//p(" -overlap <n=999> use overlapping patches in image1 from level n")
//p(" -subref <n=0> 0: denser sampling or 1: not of image1 patches")
p(" -ngh_rad <n=0> if n>0: restrict matching to n pxl neighborhood")
p(" -nlpow <f=1.4> non-linear rectification x := x^f")
//p(" -maxima_mode <n=0> 0: from all top cells / 1: from local maxima")
//p(" -min_level <n=2> skip maxima in levels [0, 1, ..., n-1]")
p(" -mem <n=1> if n>0: optimize memory footprint (bit unstable)")
//p(" -scoring_mode <n=1> type of correspondence scoring mode (0/1)")
p("")
p(" Fully scale & rotation invariant DeepMatching:")
p(" if either one of these options is used, then this mode is activated:")
p(" -max_scale <factor=5> max scaling factor")
p(" -rot_range <from=0> <to=360> rotation range")
p("")
p(" Other parameters:")
p(" -resize <width> <height> to resize input images beforehand")
p(" -v increase verbosity")
p(" -nt <n> multi-threading with <n> threads")
if(language==EXE_OPTIONS) {
p(" -out <file_name> output correspondences in a file")
exit(1);}
}
bool endswith(const char *str, const char *suffix)
{
if(!str || !suffix) return false;
size_t lenstr = strlen(str);
size_t lensuffix = strlen(suffix);
if(lensuffix > lenstr) return false;
return strncmp(str + lenstr - lensuffix, suffix, lensuffix) == 0;
}
image_t* rescale_image( image_t* im, int width, int height )
{
image_t* res = image_new(width,height);
image_resize_bilinear_newsize(res, im, width, height);
image_delete(im);
return res;
}
const char *parse_options(dm_params_t *params, scalerot_params_t *sr_params, bool *use_scalerot, float *fx, float *fy, const int argc, const char **argv, const int language, image_t **im1, image_t **im2) {
int current_arg = 0;
const char* out_filename = NULL;
// parse options
while(current_arg < argc)
{
const char* a = argv[current_arg++];
#define isarg(key) !strcmp(a,key)
if(isarg("-h") || isarg("--help") ) usage(language);
// HOG and patch parameters
//else if(isarg("-hog.presm"))
// params->desc_params.presmooth_sigma = atof(argv[current_arg++]);
//else if(isarg("-hog.sig"))
// params->desc_params.hog_sigmoid = atof(argv[current_arg++]);
//else if(isarg("-hog.midsm"))
// params->desc_params.mid_smoothing = atof(argv[current_arg++]);
//else if(isarg("-hog.postsm"))
// params->desc_params.post_smoothing = atof(argv[current_arg++]);
//else if(isarg("-hog.ninth"))
// params->desc_params.ninth_dim = atof(argv[current_arg++]);
//else if(isarg("-hog.nrmpix"))
// params->desc_params.norm_pixels = atof(argv[current_arg++]);
else if(isarg("-png_settings")) {
params->desc_params.presmooth_sigma = 0; // no image smoothing since the image is uncompressed
params->desc_params.hog_sigmoid = 0.2;
params->desc_params.mid_smoothing = 1.5;
params->desc_params.post_smoothing = 1;
params->desc_params.ninth_dim = 0.1; } // low ninth_dim since image PSNR is high
else if(isarg("-jpg_settings")) {
params->desc_params.presmooth_sigma = 1; // smooth the image to remove jpg artifacts
params->desc_params.hog_sigmoid = 0.2;
params->desc_params.mid_smoothing = 1.5;
params->desc_params.post_smoothing = 1;
params->desc_params.ninth_dim = 0.3; } // higher ninth_dim because of pixel noise
// matching parameters
else if(isarg("-R") || isarg("-downscale"))
params->prior_img_downscale = atoi(argv[current_arg++]);
//else if(isarg("-overlap"))
// params->overlap = atoi(argv[current_arg++]);
//else if(isarg("-subref"))
// params->subsample_ref = atoi(argv[current_arg++]);
else if(isarg("-nlpow"))
params->nlpow = atof(argv[current_arg++]);
else if(isarg("-ngh_rad"))
params->ngh_rad = atoi(argv[current_arg++]);
// maxima parameters
//else if(isarg("-maxima_mode"))
// params->maxima_mode = atoi(argv[current_arg++]);
else if(isarg("-mem")) {
params->low_mem = atoi(argv[current_arg++]); }
//else if(isarg("-min_level"))
// params->min_level = atoi(argv[current_arg++]);
//else if(isarg("-scoring_mode"))
// params->scoring_mode = atoi(argv[current_arg++]);
//else if(isarg("-iccv_settings")) {
// params->prior_img_downscale = 2;
// params->overlap = 0; // overlap from level 0
// params->subsample_ref = 1;
// params->nlpow = 1.6;
// params->maxima_mode = 1;
// params->low_mem = 0;
// params->min_level = 2;
// params->scoring_mode = 0; }
//else if(isarg("-improved_settings")) {
// params->prior_img_downscale = 1; // less down-scale
// params->overlap = 999; // no overlap
// params->subsample_ref = 0; // dense patch sampling at every level in first image
// params->nlpow = 1.4;
// params->maxima_mode = 0;
// params->low_mem = 1;
// params->min_level = 2;
// params->scoring_mode = 1; } // improved scoring
//else if(isarg("-max_psize")) {
// params->max_psize = atoi(argv[current_arg++]); }
// scale & rot invariant version
else if(isarg("-scale") || isarg("-max_scale")) {
*use_scalerot = true;
float scale = atof(argv[current_arg++]);
sr_params->max_sc0 = sr_params->max_sc1 = int(1 + 2*log2(scale)); }
else if(isarg("-rot") || isarg("-rot_range")) {
*use_scalerot = true;
int min_rot = atoi(argv[current_arg++]);
int max_rot = atoi(argv[current_arg++]);
while( min_rot < 0 ) {
min_rot += 360;
max_rot += 360;
}
sr_params->min_rot = int(floor(0.5 + min_rot/45.));
sr_params->max_rot = int(floor(1.5 + max_rot/45.));
while( sr_params->max_rot - sr_params->min_rot > 8 )
sr_params->max_rot--;
assert( sr_params->min_rot < sr_params->max_rot ); }
// other parameters
else if(isarg("-resize")) {
assert((*im1)->width==(*im2)->width && (*im1)->height==(*im2)->height);
int width = atoi(argv[current_arg++]);
int height = atoi(argv[current_arg++]);
*fx *= (*im1)->width / float(width);
*fy *= (*im1)->height / float(height);
*im1 = rescale_image(*im1, width, height);
*im2 = rescale_image(*im2, width, height); }
else if(isarg("-v"))
params->verbose++;
else if(isarg("-nt")) {
params->n_thread = atoi(argv[current_arg++]);
if (params->n_thread==0)
params->n_thread = std::thread::hardware_concurrency(); }
else if(language == EXE_OPTIONS && isarg("-out"))
out_filename = argv[current_arg++];
else {
err_printf("error: unexpected parameter '%s'", a);
exit(-1);
}
}
if( *use_scalerot )
assert( params->ngh_rad == 0 || !"max trans cannot be used in full scale and rotation mode");
else
if( params->subsample_ref && (!ispowerof2((*im1)->width) || !ispowerof2((*im1)->height)) ) {
err_printf("WARNING: first image has dimension which are not power-of-2\n");
err_printf("For improved results, you should consider resizing the images with '-resize <w> <h>'\n");
}
return out_filename;
}
int main(int argc, const char ** argv)
{
if( argc<=2 || !strcmp(argv[1],"-h") || !strcmp(argv[1],"--help") ) usage(EXE_OPTIONS);
int current_arg = 3;
image_t *im1=NULL, *im2=NULL;
{
color_image_t *cim1 = color_image_load(argv[1]);
color_image_t *cim2 = color_image_load(argv[2]);
// Following deactivated because quite useless/dangerous in practice
// default behavior == always using -jpg_settings
//if( endswith(argv[1],"png") || endswith(argv[1],"PNG") )
// argv[--current_arg] = "-png_settings"; // set default
//if( endswith(argv[1],"ppm") || endswith(argv[1],"PPM") )
// argv[--current_arg] = "-png_settings"; // set default
//if( endswith(argv[1],"jpg") || endswith(argv[1],"JPG") )
// argv[--current_arg] = "-jpg_settings"; // set default
//if( endswith(argv[1],"jpeg") || endswith(argv[1],"JPEG") )
// argv[--current_arg] = "-jpg_settings"; // set default
im1 = image_gray_from_color(cim1);
im2 = image_gray_from_color(cim2);
color_image_delete(cim1);
color_image_delete(cim2);
}
// set params to default
dm_params_t params;
set_default_dm_params(&params);
scalerot_params_t sr_params;
set_default_scalerot_params(&sr_params);
bool use_scalerot = false;
float fx=1, fy=1;
// parse options
const char* out_filename = parse_options(&params, &sr_params, &use_scalerot, &fx, &fy, argc-current_arg,
&argv[current_arg], EXE_OPTIONS, &im1, &im2);
// compute deep matching
float_image* corres = use_scalerot ?
deep_matching_scale_rot( im1, im2, &params, &sr_params ) :
deep_matching ( im1, im2, &params, NULL ); // standard call
// save result
output_correspondences( out_filename, (corres_t*)corres->pixels, corres->ty, fx, fy );
free_image(corres);
image_delete(im1);
image_delete(im2);
return 0;
}

@ -0,0 +1,30 @@
/*
Copyright (C) 2014 Jerome Revaud
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
*/
#ifndef ___MAIN_H___
#define ___MAIN_H___
#define EXE_OPTIONS 0
#define MATLAB_OPTIONS 1
#define PYTHON_OPTIONS 2
#include "deep_matching.h"
void usage(const int language);
const char* parse_options(dm_params_t *params, scalerot_params_t *sr_params, bool *use_scalerot, float *fx, float *fy, const int argc, const char **argv, const int language, image_t **im1, image_t **im2);
#endif

@ -0,0 +1,994 @@
/*
Copyright (C) 2014 Jerome Revaud
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
*/
#include "std.h"
#include "maxfilter.h"
#include "omp.h"
void _max_filter_3_horiz( float_image* img, float_image* res, int n_thread ) {
ASSERT_SAME_SIZE(img,res);
int j;
const int tx = img->tx;
const int ty = img->ty;
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(j=0; j<ty; j++) {
int i;
float *p = img->pixels + j*tx;
float *r = res->pixels + j*tx;
float m = MAX(p[0],p[1]);
*r++ = m;
for(i=1; i<tx-1; i++) {
float m2 = MAX(p[i],p[i+1]);
*r++ = MAX(m,m2);
m=m2;
}
*r++ = m;
}
}
void _max_filter_3_vert( float_image* img, float_image* res ) {
ASSERT_SAME_SIZE(img,res);
const int tx = img->tx;
const int ty = img->ty;
int j;
for(j=0; j<ty-1; j++) {
int i;
float *p = img->pixels + j*tx;
float *r = res->pixels + j*tx;
for(i=0; i<tx; i++) {
*r++ = MAX(p[i],p[i+tx]);
}
}
memcpy(res->pixels+(ty-1)*tx,res->pixels+(ty-2)*tx,tx*sizeof(float)); // copy last row
for(j=ty-2; j>0; j--) {
int i;
float *p = res->pixels + (j-1)*tx;
float *r = res->pixels + j*tx;
for(i=0; i<tx; i++) {
float r0 = *r;
*r++ = MAX(r0,p[i]);
}
}
}
void _max_filter_3( float_image* img, float_image* res, int n_thread ) {
_max_filter_3_vert(img,res);
_max_filter_3_horiz(res,res, res->ty>128? n_thread : 1);
}
void _max_filter_3_layers( float_layers* img, float_layers* res, int n_thread ) {
ASSERT_SAME_LAYERS_SIZE(img,res);
const long npix = img->tx*img->ty;
int l;
#if defined(USE_OPENMP)
omp_set_nested(0);
omp_set_dynamic(0);
#pragma omp parallel for num_threads(n_thread)
#endif
for(l=0; l<img->tz; l++) {
float_image img2 = {img->pixels + l*npix,img->tx,img->ty};
float_image res2 = {res->pixels + l*npix,res->tx,res->ty};
_max_filter_3( &img2, &res2, n_thread );
}
}
/* Subsample an array, equivalent to res = img[:,::2,::2]
*/
void _subsample2( float_layers* img, float_layers* res, int n_thread ) {
const int n_layers = res->tz;
assert( img->tz==n_layers );
const int tx = res->tx;
const int ty = res->ty;
assert( (img->tx+1)/2 == tx );
assert( (img->ty+1)/2 == ty );
long l;
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(l=0; l<n_layers; l++) {
int x,y;
for(y=0; y<ty; y++) {
float* i = img->pixels + (l*img->ty + (2*y))*img->tx ;
float* r = res->pixels + (l*ty + y)*tx;
for(x=0; x<tx; x++)
r[x] = i[x<<1];
}
}
}
/* joint max-pooling and subsampling
*/
void _max_filter_3_and_subsample_layers( float_layers* img, float_layers* res, int n_thread ) {
const int n_layers = res->tz;
assert( img->tz==n_layers );
const int tx = res->tx;
const int ty = res->ty;
assert( tx>=2 && ty>=2 );
const int tx2 = img->tx;
const int ty2 = img->ty;
assert( (tx2+1)/2 == tx ); // tx2=3 => tx=2
assert( (ty2+1)/2 == ty );
long l;
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(l=0; l<n_layers; l++) {
// reset output
memset(res->pixels + l*tx*ty, 0, tx*ty*sizeof(float));
int x,y;
for(y=0; y<ty; y++) {
float* i = img->pixels + (l*ty2 + (2*y))*tx2 ;
float* r = res->pixels + (l*ty + y)*tx;
float* r2 = (y+1<ty) ? r + tx : r; // pointer to next row
#define maxEq(v,m) v = (m>v) ? m : v
// even rows of img
for(x=0; x<tx-1; x++) {
maxEq( r[x+0], *i ); // i[2*x+0]
i++;
maxEq( r[x+0], *i ); // i[2*x+1]
maxEq( r[x+1], *i ); // i[2*x+1]
i++;
}
// r[x+1] does NOT exist anymore
maxEq( r[x+0], *i ); // i[2*x+0]
i++;
if(x<tx2/2) { // i[2*x+i] exists
maxEq( r[x+0], *i ); // i[2*x+1]
i++;
}
assert((i-img->pixels)%tx2 == 0);
// odd rows of img
if (y<ty2/2) {
for(x=0; x<tx-1; x++) {
maxEq( r [x+0], *i ); // i[2*x+0]
maxEq( r2[x+0], *i ); // i[2*x+0]
i++;
maxEq( r [x+0], *i ); // i[2*x+1]
maxEq( r [x+1], *i ); // i[2*x+1]
maxEq( r2[x+0], *i ); // i[2*x+1]
maxEq( r2[x+1], *i ); // i[2*x+1]
i++;
}
// r[x+1] does NOT exist anymore
maxEq( r [x+0], *i ); // i[2*x+0]
maxEq( r2[x+0], *i ); // i[2*x+0]
i++;
if(x<tx2/2) { // i[2*x+i] exists
maxEq( r [x+0], *i ); // i[2*x+1]
maxEq( r2[x+0], *i ); // i[2*x+1]
i++;
}
}
assert((i-img->pixels)%tx2 == 0);
#undef maxEq
}
}
}
/* Subsample an array, equivalent to res = trueimg[:,offset_y::2,offset_x::2]
except at boundaries, where the rules are a bit more complex:
if img->tx % 2 == 0:
if offset_x % 2 == 0:
trueimg[offset_x+img->tx-1] is also sampled
else:
trueimg[offset_x] is also sampled
elif img->tx % 2 == 1:
trueimg[offset_x] is also sampled
...and likewise for y dimension.
*/
void _subsample2_offset( float_layers* img, int_image* offsets, float_layers* res, int n_thread ) {
const int n_layers = res->tz;
assert( img->tz==n_layers );
assert( offsets->tx==2 && offsets->ty==n_layers );
const int tx = res->tx;
const int ty = res->ty;
assert( (img->tx+2)/2 == tx );
assert( (img->ty+2)/2 == ty );
long l;
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(l=0; l<n_layers; l++) {
int x,y;
const int ox = (offsets->pixels[2*l]+0x10000) % 2;
const int oy = (offsets->pixels[2*l+1]+0x10000) % 2;
assert(ox>=0 && oy>=0);
#define get_img_2pos(x,tx,ox) MAX(0, MIN(img->tx-1, 2*x-ox))
for(y=0; y<ty; y++) {
float* i = img->pixels + (l*img->ty + get_img_2pos(y,ty,oy))*img->tx;
float* r = res->pixels + (l*ty + y)*tx;
r[0] = i[get_img_2pos(0,tx,ox)]; // first is special case
for(x=1; x<tx-1; x++)
r[x] = i[2*x-ox];
r[x] = i[get_img_2pos(x,tx,ox)]; // last is special case
}
#undef get_img_2pos
}
}
/* Max-pool in 2x2 px non-overlapping cells
*/
void _maxpool2( float_layers* img, float_layers* res, int n_thread ) {
const int n_layers = res->tz;
assert( img->tz==n_layers );
const int tx = res->tx;
const int ty = res->ty;
assert( (img->tx)/2 == tx );
assert( (img->ty)/2 == ty );
long l;
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(l=0; l<n_layers; l++) {
int x,y;
for(y=0; y<ty; y++) {
float* i = img->pixels + (l*img->ty + (2*y))*img->tx ;
float* j = i + img->tx;
float* r = res->pixels + (l*ty + y)*tx;
for(x=0; x<tx; x++,i+=2,j+=2) {
float mi = MAX(i[0],i[1]);
float mj = MAX(j[0],j[1]);
r[x] = MAX(mi,mj);
}
}
}
}
/* average-pool in 2x2 px non-overlapping cells
*/
void _avgpool2( float_layers* img, float_layers* res, int n_thread ) {
const int n_layers = res->tz;
assert( img->tz==n_layers );
const int tx = res->tx;
const int ty = res->ty;
assert( (img->tx)/2 == tx );
assert( (img->ty)/2 == ty );
long l;
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(l=0; l<n_layers; l++) {
int x,y;
for(y=0; y<ty; y++) {
float* i = img->pixels + (l*img->ty + (2*y))*img->tx ;
float* j = i + img->tx;
float* r = res->pixels + (l*ty + y)*tx;
for(x=0; x<tx; x++,i+=2,j+=2) {
r[x] = 0.25*(i[0] + i[1] + j[0] + j[1]);
}
}
}
}
typedef struct {
int scale;
int layer;
int x,y;
float score;
} one_max;
typedef struct {
one_max* list;
int n_elems, n_alloc;
} maxima;
#include <pthread.h>
static pthread_mutex_t mutex0, mutex1;
static inline void add_one_max( maxima* list, int scale, int layer, int x, int y, float score ) {
pthread_mutex_lock (&mutex0);
if( list->n_alloc <= list->n_elems ) {
list->n_alloc = 3*(list->n_alloc+64)/2;
list->list = (one_max*)realloc(list->list, sizeof(one_max)*list->n_alloc);
}
one_max* m = &list->list[list->n_elems++];
m->scale = scale;
m->layer = layer;
m->x = x;
m->y = y;
m->score = score;
pthread_mutex_unlock (&mutex0);
}
void _get_list_parents( int_cube* children, int_image* res ) {
const int np2 = children->tz;
assert( np2 == res->tx );
const int n_cells_at_prev_scale = res->ty;
int* parents = res->pixels;
memset(parents,0xFF,n_cells_at_prev_scale*np2*sizeof(int)); // =-1 by default
int i,j,ncells=children->tx*children->ty;
int* cur = children->pixels;
for(i=0; i<ncells; i++)
for(j=0; j<np2; j++) {
int c = *cur++;
if(c<0) continue; // this one is not a real children
parents[np2*c + j] = i;
}
}
static inline int* get_list_parents( int_cube* children, int n_cells_at_prev_scale ) {
const int np2 = children->tz;
int_image res = {NEWA(int, n_cells_at_prev_scale*np2 ), np2, n_cells_at_prev_scale};
_get_list_parents( children, &res );
return res.pixels;
}
/* Return a list of local maxima in the scale-space of scores
*/
void _extract_maxima( res_scale* scales, int n_scales, float_array* sc_factor, float th, int min_scale, float nlpow,
int check_parents, int check_children, int nobordure, int_image* res_out, int n_thread ) {
assert( sc_factor->tx == n_scales );
assert( min_scale>=0 && min_scale<n_scales );
const float* scf = sc_factor->pixels;
maxima res = {NULL,0,0};
int s;
// compute the maximum filter for each scale separately
const int min_scale_max = MAX(0,min_scale);
for(s=min_scale_max; s<n_scales; s++) {
res_scale* sc = scales + s;
float_layers r = sc->res_map;
assert(sc->max_map.pixels==NULL); // not already allocated
sc->max_map = r; // initialize tx,ty,tz
sc->max_map.pixels = NEWA(float, r.tx*r.ty*r.tz );
_max_filter_3_layers( &r, &sc->max_map, n_thread );
}
// then localize the local maxima in the scale-space
for(s=min_scale; s<n_scales; s++) {
res_scale* sc = scales + s;
const int tx = sc->res_map.tx;
const int ty = sc->res_map.ty;
const long npix = tx*ty;
const int n_layers = sc->assign.tx;
// helpful values...
const int f = sc->f;
const int upper_tx = (s+1<n_scales) ? sc[+1].res_map.tx : 0;
const int upper_ty = (s+1<n_scales) ? sc[+1].res_map.ty : 0;
const int upper_npix = upper_tx*upper_ty;
const float upper_scf= (s+1<n_scales) ? scf[s]/scf[s+1] : 0;
const int np2 = (s+1<n_scales) ? sc[+1].children.tz : 0;
const int np = (int)sqrt(np2);
const int upper_f = (s+1<n_scales) ? sc[+1].f : 0;
const int upper_gap = (s+1<n_scales) ? sc[+1].patch_size/4 : 0;
const float* upper_layers = (s+1<n_scales) ? sc[+1].max_map.pixels : NULL;
const int* upper_assign = (s+1<n_scales) ? sc[+1].assign.pixels : NULL;
const int* list_parents = (s+1<n_scales) && check_parents ? get_list_parents(&sc[+1].children,sc->grid.tx*sc->grid.ty) : NULL;
const int down_tx = (s>min_scale_max) ? sc[-1].res_map.tx : 0;
const int down_ty = (s>min_scale_max) ? sc[-1].res_map.ty : 0;
const int down_npix = down_tx*down_ty;
const float down_scf= (s>min_scale_max) ? scf[s]/scf[s-1] : 0;
const int nc2 = (s>min_scale_max) ? sc->children.tz : 0;
const int nc = (int)sqrt(nc2);
const int down_gap = sc->patch_size/4;
const int down_f = (s>min_scale_max) ? sc[-1].f : 0;
const float* down_layers = (s>min_scale_max) ? sc[-1].max_map.pixels : NULL;
const int* down_assign = (s>min_scale_max) ? sc[-1].assign.pixels : NULL;
int l;
#if defined(USE_OPENMP)
#pragma omp parallel for num_threads(n_thread)
#endif
for(l=0; l<n_layers; l++) {
// compute maxima_filter for each layer
if(sc->assign.pixels[l]<0) continue; // no layer for this
float* res_map = sc->res_map.pixels + sc->assign.pixels[l]*npix;
float* max_map = sc->max_map.pixels + sc->assign.pixels[l]*npix;
// for each point which is a local maxima, check
int i;
for(i=0; i<npix; i++)
if( res_map[i]>th && res_map[i]==max_map[i] ) {
// ok, we have a maxima at this scale <s>
const float val = res_map[i];
int x = i%tx;
int y = i/tx;
if( nobordure && (x<1 || y<1 || x>=tx-1 || y>=ty-1) ) continue; // not interested in maxima on image bordures
//if(s==2 && l==344 && x==41 && y==4) getchar();
// now compare with lower scale
if( check_children && s>min_scale_max ) {
float valref = down_scf*val;
int* children = sc->children.pixels + l*nc2;
int u,v,ok=1;
for(v=0; ok && v<nc; v++) {
int uy = (f*y + (2*v/(nc-1)-1)*down_gap)/down_f;
if( uy>=0 && uy<down_ty )
for(u=0; u<nc; u++) {
int ch = children[v*nc+u];
if( ch < 0 ) continue;
int ux = (f*x + (2*u/(nc-1)-1)*down_gap)/down_f;
if( (ux>=0 && ux<down_tx) &&
valref < pow(down_layers[down_assign[ch]*down_npix + uy*down_tx + ux],nlpow) ) {ok = 0; break;}
}
}
if(!ok) continue; // this is not a maximum
}
//if(s==2 && l==344 && x==41 && y==4) getchar();
// now compare with upper scale <s+1> and eliminate non-maxima
if( check_parents && list_parents ) {
float valref = upper_scf*val;
const int* parents = list_parents + l*np2;
int u,v,ok=1;
for(v=0; ok && v<np; v++) {
int uy = (f*y + (1-2*v/(np-1))*upper_gap)/upper_f;
if( uy>=0 && uy<upper_ty )
for(u=0; u<np; u++) {
const int p = parents[v*np+u];
if( p<0 ) continue;
int ux = (f*x + (1-2*u/(np-1))*upper_gap)/upper_f;
if( (ux>=0 && ux<upper_tx) &&
valref < upper_layers[upper_assign[p]*upper_npix + uy*upper_tx + ux] ) {ok = 0; break;}
}
}
if(!ok) continue; // this is not a maximum
}
add_one_max( &res, s, l, x, y, res_map[i] );
}
}
free((void*)list_parents);
}
// free memory
for(s=min_scale_max; s<n_scales; s++) {
free(scales[s].max_map.pixels);
scales[s].max_map.pixels = NULL;
}
res_out->tx = 5;
res_out->ty = res.n_elems;
res_out->pixels = (int*)res.list;
}
/* Return the best local children assignement in a 3x3 neigborhood
l,u,v is the approximate position of the children in the corresponding response map[l,v,u]
*/
static inline float _local_argmax( long l, int u, int v, const float_layers* map, int extended, /*float reg,*/ int* x, int* y ) {
assert(0<=l && l<map->tz);
int umin = MAX(0, u-1);
int vmin = MAX(0, v-1);
const int etx = map->tx-extended; // because of extended response map
const int ety = map->ty-extended;
int umax = MIN(etx, u+2);
int vmax = MIN(ety, v+2);
// determine best children in the neighborhood (argmax)
const int tx = map->tx;
int i,j,bestx=0,besty=0; float m=0.f;
const float *r = map->pixels + l*tx*map->ty;
for(j=vmin; j<vmax; j++)
for(i=umin; i<umax; i++) {
const int p = j*tx+i;
if(r[p]>m) {m=r[p]; bestx=i; besty=j;}
}
*x = bestx;
*y = besty;
return m;
}
/* Return the best assignment (= list of correspondences) for a given maxima
from a pyramid top, this function returns
a list of weigthed correspondences (matches) between
img0 pixels and img1 pixels
*/
void _argmax_correspondences_rec( res_scale* scales, int s, int l, int x, int y,
float_cube* res0, int step0, float_cube* res1, int step1,
int index, float score ) {
res_scale* sc = scales + s;
if(s==0) {
const int x0 = sc->grid.pixels[2*l];
const int y0 = sc->grid.pixels[2*l+1];
const int x1 = sc->f * x;
const int y1 = sc->f * y;
const int qx0 = x0/step0;
const int qy0 = y0/step0;
//assert(0<=l && l<sc->res_map.tz);
if( qx0<res0->tx && qy0<res0->ty ) {
assert(qx0>=0 && qy0>=0);
float* r0 = res0->pixels + ((qy0*res0->tx + qx0))*res0->tz;
//assert(res0->pixels<=r0 && r0+5<res0->pixels+res0->tx*res0->ty*res0->tz);
pthread_mutex_lock (&mutex0);
if( score > r0[4] ) {
// r[0:2] = pos in img0
r0[0] = x0;
r0[1] = y0;
// r[2:4] = pos in img1
r0[2] = x1;
r0[3] = y1;
// r[4] = score
r0[4] = score;
r0[5] = index;
}
pthread_mutex_unlock (&mutex0);
const int qx1 = x1/step1;
const int qy1 = y1/step1;
assert(qx1>=0 && qy1>=0);
if( qx1<res1->tx && qy1<res1->ty ) {
float* r1 = res1->pixels + ((qy1)*res1->tx + (qx1))*res1->tz;
//assert(res1->pixels<=r1 && r1+5<res1->pixels+res1->tx*res1->ty*res1->tz);
pthread_mutex_lock (&mutex1);
if( score > r1[4] ) {
// r[0:2] = pos in img0
r1[0] = x0;
r1[1] = y0;
// r[2:4] = pos in img1
r1[2] = x1;
r1[3] = y1;
// r[4] = score
r1[4] = score;
r1[5] = index;
}
pthread_mutex_unlock (&mutex1);
}
}
} else {
// mark this maximum as already processed
assert(0<=l && l<sc->assign.tx);
if( sc->passed.pixels ) {
const long truel = sc->assign.pixels[l];
const long offset = ((truel*sc->true_shape[1] + MAX(0,y))*sc->true_shape[0] + MAX(0,x)) % sc->passed.tx;
//pthread_mutex_lock (&mutex);
int useless = ( sc->passed.pixels[offset] >= score );
if(!useless) sc->passed.pixels[offset] = score;
//pthread_mutex_unlock (&mutex);
if(useless) return; // this maximum was already investigated with a better score
}
const int f = sc->f;
const res_scale* lower = &scales[s-1];
const int lower_f = lower->f;
// position in lower response map
x *= f/lower_f;
y *= f/lower_f;
const int lower_gap = sc->patch_size/(4*lower_f); // gap is equal to patch_size/4 in absolute size
const int nc2 = sc->children.tz;
const int nc = (nc2==4) ? 2 : 3;
const int* children = sc->children.pixels + l*nc2;
const int* lower_ass = lower->assign.pixels;
// for all children
int u,v,c=0;
for(v=0; v<nc; v++) {
for(u=0; u<nc; u++,c++) {
const int ch = children[c];
if(ch<0) continue;
const long l = lower_ass[ch];
if(l<0) continue;
// position of children in child1 = parent1 - (parent0-child0)
int yc = y + (2*v/(nc-1)-1)*lower_gap;
int xc = x + (2*u/(nc-1)-1)*lower_gap;
int ex = 1; // extended response_maps
if( lower->offsets.pixels ) {
// take offsets into account
xc -= lower->offsets.pixels[2*l+0];
yc -= lower->offsets.pixels[2*l+1];
ex = 0; // no extension... maybe
}
// position of children in child1 = parent1 - (parent0-child0)
int xb, yb;
float child_score = _local_argmax( lower_ass[ch], xc, yc, &lower->res_map, ex, &xb, &yb );
if( lower->offsets.pixels ) {
// back to real image coordinates
xb += lower->offsets.pixels[2*l+0];
yb += lower->offsets.pixels[2*l+1];
}
if( child_score )
_argmax_correspondences_rec( scales, s-1, ch, xb, yb, res0, step0, res1, step1, index, score + child_score );
}
}
}
}
void _argmax_correspondences( res_scale* scales, int s, int l, int x, int y, float score,
float_cube* res0, int step0, float_cube* res1, int step1,
int index ) {
assert(res0->tz==6);
if(res1) assert(res0->tz==6);
_argmax_correspondences_rec( scales, s, l, x, y, res0, step0, res1, step1, index, score );
}
void _argmax_correspondences_rec_v1( res_scale* scales, int s, int l, int x, int y,
float_cube* res0, int step0, float_cube* res1, int step1,
int index, float top_score ) {
res_scale* sc = scales + s;
const int f = sc->f;
if(s==0) {
const int* ass = sc->assign.pixels;
const float score = top_score * sc->res_map.pixels[(ass[l]*sc->res_map.ty + y)*sc->res_map.tx + x];
const int x0 = sc->grid.pixels[2*l];
const int y0 = sc->grid.pixels[2*l+1];
const int x1 = f * x;
const int y1 = f * y;
const int qx0 = x0/step0;
const int qy0 = y0/step0;
if( qx0<res0->tx && qy0<res0->ty ) {
float* r0 = res0->pixels + ((qy0*res0->tx + qx0))*res0->tz;
pthread_mutex_lock (&mutex0);
if( score > r0[4] ) {
// r[0:2] = pos in img0
r0[0] = x0;
r0[1] = y0;
// r[2:4] = pos in img1
r0[2] = x1;
r0[3] = y1;
// r[4] = score
r0[4] = score;
r0[5] = index;
}
pthread_mutex_unlock (&mutex0);
if( res1 ) {
const int qx1 = x1/step1;
const int qy1 = y1/step1;
// if( qx1<res1->tx && qy1<res1->ty ) { // useless check
float* r1 = res1->pixels + ((qy1)*res1->tx + (qx1))*res1->tz;
pthread_mutex_lock (&mutex1);
if( score > r1[4] ) {
// r[0:2] = pos in img0
r1[0] = x0;
r1[1] = y0;
// r[2:4] = pos in img1
r1[2] = x1;
r1[3] = y1;
// r[4] = score
r1[4] = score;
r1[5] = index;
}
pthread_mutex_unlock (&mutex1);
}}
} else {
const res_scale* lower = &scales[s-1];
const int lower_f = lower->f;
// position in lower response map
x *= f/lower_f;
y *= f/lower_f;
const int lower_gap = sc->patch_size/(4*lower_f); // gap is equal to patch_size/4 in absolute size
const int nc2 = sc->children.tz;
const int nc = (nc2==4) ? 2 : 3;
const int* children = sc->children.pixels + l*nc2;
const int* lower_ass = lower->assign.pixels;
// remember all scores for all children
int u,v,c=0;
for(v=0; v<nc; v++) {
const int yc = y + (2*v/(nc-1)-1)*lower_gap;
for(u=0; u<nc; u++,c++) {
int ch = children[c];
if(ch<0) continue;
const int xc = x + (2*u/(nc-1)-1)*lower_gap;
// position of children in child1 = parent1 - (parent0-child0)
const int l = lower_ass[children[c]];
int xb=0, yb=0;
float child_score = _local_argmax( l, xc, yc, &lower->res_map, 1, &xb, &yb );
if( child_score>0 )
_argmax_correspondences_rec_v1( scales, s-1, ch, xb, yb, res0, step0, res1, step1, index, top_score );
}
}
}
}
void _argmax_correspondences_v1( res_scale* scales, int s, int l, int x, int y, float top_score,
float_cube* res0, int step0, float_cube* res1, int step1,
int index ) {
assert(res0->tz==6);
if(res1) assert(res0->tz==6);
_argmax_correspondences_rec_v1( scales, s, l, x, y, res0, step0, res1, step1, index, top_score );
}
static float** get_list_corres( const float_cube* map, int* nb ) {
const int tz = map->tz;
float* m = map->pixels;
const long npix = map->tx*map->ty;
float** res = NEWA(float*,npix);
int i,n=0;
for(i=0; i<npix; i++,m+=tz)
if(m[4]) { // if score non-null
res[n++] = m; // remember pointer
}
*nb = n;
return res;
}
static inline int cmp_corres( const void* a, const void* b) {
return memcmp(*(float**)a,*(float**)b,4*sizeof(float));
}
/* Intersect 2 mappings: erase all correspondences that are not reciprocal
*/
float* _intersect_corres( const float_cube* map0, const float_cube* map1, int* nres ) {
const int tz = 6;
assert( map0->tz==tz && map1->tz==tz );
// build the list of triplets
int n0,n1;
float** const corres0 = get_list_corres(map0,&n0);
float** const corres1 = get_list_corres(map1,&n1);
// arg-sort the lists
qsort( corres0, n0, sizeof(float*), cmp_corres );
qsort( corres1, n1, sizeof(float*), cmp_corres );
// remove all correspondences from map0/map1 that is not shared
float** c0 = corres0;
float** c1 = corres1;
float** const c0max = corres0 + n0;
float** const c1max = corres1 + n1;
float* res = NEWA(float, tz*MIN(n1,n0) );
float* r = res;
while(c0<c0max && c1<c1max) {
int d = memcmp(*c0,*c1,5*sizeof(float));
if(d<0) { // corres0 < corres1
c0++;
} else
if(d>0) { // corres0 > corres1
c1++;
} else { // corres0 == corres1
if( r==res || memcmp( r-tz, *c0, tz*sizeof(float) ) ) { // if not already copied
memcpy( r, *c0, tz*sizeof(float) );
r += tz;
}
c0++;
c1++;
}
}
free(corres0);
free(corres1);
*nres = (r-res)/tz;
return res;
}
/* erase corres in the first array that are not in the second one
*/
void transfer_corres_score( const float_image* ref, float_cube* map0 ) {
const int tz = 6;
assert( map0->tz==tz && ref->tx==tz );
// build the list of triplets
int n0,n1;
float** const corres0 = get_list_corres(map0,&n0);
float_cube map1 = {ref->pixels,1,ref->ty,ref->tx};
float** const corres1 = get_list_corres(&map1,&n1);
// arg-sort the lists
qsort( corres0, n0, sizeof(float*), cmp_corres );
qsort( corres1, n1, sizeof(float*), cmp_corres );
// remove all correspondences from map0/map1 that is not shared
float** c0 = corres0;
float** c1 = corres1;
float** const c0max = corres0 + n0;
float** const c1max = corres1 + n1;
while(c0<c0max && c1<c1max) {
int d = memcmp(*c0,*c1,4*sizeof(float));
if(d<0) { // corres0 < corres1
c0++;
} else
if(d>0) { // corres0 > corres1
assert(!"error: 'ref in map0' is not verified");
c1++;
} else { // corres0 == corres1
(*c0)[4] = (*c1)[4]; // copy score from ref
c0++;
c1++;
}
}
while(c0<c0max) memset( *c0++, 0, tz*sizeof(float));
free(corres0);
free(corres1);
}
static inline float ptdot( const float* m, float x, float y ) {
return x*m[0] + y*m[1] + m[2];
}
static void merge_one_side( const float aff[6], int step, float_cube* corres, float tol,
int all_step, float_cube* all_corres, int offset ) {
assert( corres->tz==6 && all_corres->tz==6 );
const float* corres_pix = corres->pixels;
assert(tol>=1);
tol*=tol; // squared
float dmax = 2*step / sqrt( aff[0]*aff[4] - aff[1]*aff[3] );
dmax*=dmax; // squared
// for each bin of the final histograms, we get the nearest-neighbour bin in corres0 and corres1
int i,j;
for(j=0; j<all_corres->ty; j++)
for(i=0; i<all_corres->tx; i++) {
float* all_cor = all_corres->pixels + (j*all_corres->tx + i)*corres->tz;
// center of the bin in the reference frame
float x = i*all_step + all_step/2;
float y = j*all_step + all_step/2;
// center of the bin on the rescaled+rotated image
float xr = ptdot( aff + 0, x, y );
float yr = ptdot( aff + 3, x, y );
// iterate on the nearby bins
int xb = (int)(0.5+ xr/step); // rescaled+rotated image is binned with size <step>
int yb = (int)(0.5+ yr/step);
int u,v;
float best = 9e9f;
for(v=MAX(0,yb-1); v<MIN(corres->ty,yb+2); v++)
for(u=MAX(0,xb-1); u<MIN(corres->tx,xb+2); u++) {
const float* cor = corres_pix + (v*corres->tx + u)*corres->tz;
float d = pow2(cor[offset]-x) + pow2(cor[offset+1]-y);
if( d < best && d<dmax ) best = d;
}
for(v=MAX(0,yb-1); v<MIN(corres->ty,yb+2); v++)
for(u=MAX(0,xb-1); u<MIN(corres->tx,xb+2); u++) {
const float* cor = corres_pix + (v*corres->tx + u)*corres->tz;
float d = pow2(cor[offset]-x) + pow2(cor[offset+1]-y);
if( d <= tol*best ) { // spatially close
// merge correspondence if score is better than actual
if( cor[4] > all_cor[4] )
memcpy( all_cor, cor, 6*sizeof(float) );
}
}
}
}
/* merge correspondences from several rotated/scaled version of an image into a single common reference frame
rot0 = 2x3 rotation matrix: (pt in rotated img0) = rot0 * (pt in ref frame)
rot1 = 2x3 rotation matrix: (pt in rotated img1) = rot1 * (pt in ref frame)
step0 and step1 are bin size of correspondences histograms
tol >= 1 is the tolerance to grid rotation (default = 2)
corres0, corres1: correspondences histograms of rotated image
all_corres0, all_corres1: correspondences histograms of reference frame (result)
*/
void merge_corres( float rot0[6], float rot1[6], int step0, int step1,
float_cube* corres0, float_cube* corres1, float tol,
int all_step0, int all_step1, float_cube* all_corres0, float_cube* all_corres1 ) {
merge_one_side( rot0, step0, corres0, tol, all_step0, all_corres0, 0 );
merge_one_side( rot1, step1, corres1, tol, all_step1, all_corres1, 2 );
}

@ -0,0 +1,142 @@
/*
Copyright (C) 2014 Jerome Revaud
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
*/
#ifndef ___MAXFILTER_H___
#define ___MAXFILTER_H___
#include "array_types.h"
#include "deep_matching.h"
/* compute the 3x3 maximum filter on an image and store the result in <res>
*/
void _max_filter_3( float_image* img, float_image* res, int n_thread );
/* Same as above for float_layers* images
*/
void _max_filter_3_layers( float_layers* img, float_layers* res, int n_thread );
/* Subsample an array, equivalent to res = img[:,1::2,1::2]
*/
void _subsample2( float_layers* img, float_layers* res, int n_thread );
/* joint max-pooling and subsampling
*/
void _max_filter_3_and_subsample_layers( float_layers* img, float_layers* res, int n_thread );
/* Subsample an array, equivalent to res = trueimg[:,offset_y::2,offset_x::2]
except at boundaries, where the rules are a bit more complex (see code)
*/
void _subsample2_offset( float_layers* img, int_image* offsets, float_layers* res, int n_thread );
/* Max-pool in 2x2 px non-overlapping cells
*/
void _maxpool2( float_layers* img, float_layers* res, int n_thread );
/* average-pool in 2x2 px non-overlapping cells
*/
void _avgpool2( float_layers* img, float_layers* res, int n_thread );
/* Return the list of parent cells of all cells of a given scale (parents are from the upper scale)
children: list of children of the parent cells
res: result matrix, n_cells_at_current_scale x n_max_parents
res == -1 when there is no parent
*/
void _get_list_parents( int_cube* children, int_image* res );
/* Return a list of local maxima in the scale-space of scores
*/
void _extract_maxima( res_scale* scales, int n_scales, float_array* sc_factor, float th, int min_scale, float nlpow,
int check_parents, int check_children, int nobordure, int_image* res_out, int n_thread );
/* Return the best assignment (= list of correspondences) for a given maxima
from a pyramid top, this function returns
a list of weigthed correspondences (matches) between
img0 pixels and img1 pixels
index = index of the maxima (s,l,x,y), so that it can be linked to the correspondences it generated
*/
void _argmax_correspondences( res_scale* scales, int s, int l, int x, int y, float score,
float_cube* res0, int step0, float_cube* res1, int step1,
int index );
void _argmax_correspondences_v1( res_scale* scales, int s, int l, int x, int y, float score,
float_cube* res0, int step0, float_cube* res1, int step1,
int index );
/* Intersect 2 mappings: erase all correspondences that are not reciprocal
*/
float* _intersect_corres( const float_cube* map0, const float_cube* map1, int* nres );
/* erase corres in the first array that are not in the second one
*/
void transfer_corres_score( const float_image* ref, float_cube* map0 );
/* merge correspondences from several rotated/scaled version of an image into a single common reference frame
rot0 = 2x3 rotation matrix: (pt in rotated img0) = rot0 * (pt in ref frame)
rot1 = 2x3 rotation matrix: (pt in rotated img1) = rot1 * (pt in ref frame)
step0 and step1 are bin size of correspondences histograms
tol >= 1 is the tolerance to grid rotation (default = 2)
corres0, corres1: correspondences histograms of rotated image
all_corres0, all_corres1: correspondences histograms of reference frame (result)
*/
void merge_corres( float rot0[6], float rot1[6], int step0, int step1,
float_cube* corres0, float_cube* corres1, float tol,
int all_step0, int all_step1, float_cube* all_corres0, float_cube* all_corres1 );
#endif

@ -0,0 +1,150 @@
/*
Copyright (C) 2014 Jerome Revaud
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
*/
#include "pixel_desc.h"
#include "std.h"
#include "image.h"
#include "hog.h"
#include "conv.h"
/* convert a float image to a consecutive array
a bit stupid but well
*/
UBYTE_image* image_to_arraytype( image_t* img ) {
UBYTE_image* res = NEW(UBYTE_image);
*res = empty_image(UBYTE,img->width,img->height);
for(int j=0; j<img->height; j++)
for(int i=0; i<img->width; i++)
res->pixels[i+j*res->tx] = (UBYTE)img->data[i+j*img->stride];
return res;
}
// set default params
void set_default_desc_params( desc_params_t* params )
{
// default = jpg settings,
// better in almost all cases
params->presmooth_sigma = 1.0;
params->mid_smoothing = 1.0;
params->post_smoothing = 1.0;
params->hog_sigmoid = 0.2;
params->ninth_dim = 0.3;
params->norm_pixels = false;
}
/* extract pixel descriptors (pixel-wise HOG)
*/
float_layers* extract_desc( image_t* _img, const desc_params_t* params, int nt )
{
// verify parameters
assert(between(0,params->presmooth_sigma,3));
assert(between(0,params->mid_smoothing,3));
assert(between(0,params->post_smoothing,3));
assert(between(0.05,params->hog_sigmoid,0.8));
assert(between(0,params->ninth_dim,1));
assert(between(0,params->norm_pixels,1));
UBYTE_image* img = image_to_arraytype(_img); // could be optimized but well
const int npix = img->tx*img->ty;
//hash_image(img)D(img->tx)D(img->ty)
// pre-smooth image
assert( params->presmooth_sigma>=0 );
if( params->presmooth_sigma>0 )
_smooth_gaussian( img, params->presmooth_sigma, img, nt );
//hash_image(img)
// extract HOG
float_layers grad = {NEWA(float,npix*2),img->tx,img->ty,2};
_compute_grad_101( img, 0, &grad, nt );
//hash_cube(&grad)
float_layers* hog = NEW(float_layers);
*hog = {NEWA(float,9*npix),img->tx,img->ty,8};
_compute_hog( &grad, 1, hog, nt );
free(grad.pixels);
free_image(img);
//hash_layers(hog)
// mid smoothing
assert( params->mid_smoothing>=0 );
if( params->mid_smoothing )
smooth_hog_gaussian( hog, params->mid_smoothing, nt );
//hash_layers(hog)
// apply non-linearity
assert( params->hog_sigmoid>=0 );
if( params->hog_sigmoid ) {
float_array hog_ravel = {hog->pixels,npix*hog->tz};
sigmoid_array( &hog_ravel, params->hog_sigmoid, 0, nt);
}
//hash_layers(hog)
// final smoothing
assert( params->post_smoothing>=0 );
if( params->post_smoothing )
smooth_hog_gaussian( hog, params->post_smoothing, nt );
//hash_layers(hog)
// add ninth dimension and normalize per-pixel
float* ninth_layer = hog->pixels + hog->tz*npix;
for(int i=0; i<npix; i++)
ninth_layer[i] = params->ninth_dim;
hog->tz++;
//hash_layers(hog)
if( params->norm_pixels )
norm_layers( hog, 1, nt );
//hash_layers(hog);D(0)getchar();
return hog;
}

@ -0,0 +1,43 @@
/*
Copyright (C) 2014 Jerome Revaud
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
*/
#ifndef ___PIXEL_DESC_H___
#define ___PIXEL_DESC_H___
#include "image.h"
#include "array_types.h"
// pixel descriptor params
typedef struct {
float presmooth_sigma; // image pre-smoothing
float mid_smoothing; // smoothing of oriented gradients (before sigmoid)
float post_smoothing; // smoothing of oriented gradients (after sigmoid)
float hog_sigmoid; // sigmoid strength
float ninth_dim; // small constant for gradient-less area
bool norm_pixels; // 1: normalize pixels separately / 0: normalize atomic patches
} desc_params_t;
// set default params
void set_default_desc_params( desc_params_t* params );
/* extract pixel descriptors (pixel-wise HOG)
*/
float_layers* extract_desc( image_t* _img, const desc_params_t* params, int nt );
#endif

@ -0,0 +1,116 @@
import sys, Image
from numpy import *
import scipy.ndimage
def score_from_autocorr(img0, img1, corres):
# Code by Philippe Weinzaepfel
# Compute autocorrelation
# parameters
sigma_image = 0.8 # for the gaussian filter applied to images before computing derivatives
sigma_matrix = 3.0 # for the integration gaussian filter
derivfilter = array([-0.5,0,0.5]) # function to compute the derivatives
# smooth_images
tmp = scipy.ndimage.filters.gaussian_filter1d(img0.astype(float32), sigma_image, axis=0, order=0, mode='nearest')
img0_smooth = scipy.ndimage.filters.gaussian_filter1d(tmp, sigma_image, axis=1, order=0, mode='nearest')
# compute the derivatives
img0_dx = scipy.ndimage.filters.convolve1d(img0_smooth, derivfilter, axis=0, mode='nearest')
img0_dy = scipy.ndimage.filters.convolve1d(img0_smooth, derivfilter, axis=1, mode='nearest')
# compute the auto correlation matrix
dx2 = sum(img0_dx*img0_dx,axis=2)
dxy = sum(img0_dx*img0_dy,axis=2)
dy2 = sum(img0_dy*img0_dy,axis=2)
# integrate it
tmp = scipy.ndimage.filters.gaussian_filter1d(dx2, sigma_matrix, axis=0, order=0, mode='nearest')
dx2_smooth = scipy.ndimage.filters.gaussian_filter1d(tmp, sigma_matrix, axis=1, order=0, mode='nearest')
tmp = scipy.ndimage.filters.gaussian_filter1d(dxy, sigma_matrix, axis=0, order=0, mode='nearest')
dxy_smooth = scipy.ndimage.filters.gaussian_filter1d(tmp, sigma_matrix, axis=1, order=0, mode='nearest')
tmp = scipy.ndimage.filters.gaussian_filter1d(dy2, sigma_matrix, axis=0, order=0, mode='nearest')
dy2_smooth = scipy.ndimage.filters.gaussian_filter1d(tmp, sigma_matrix, axis=1, order=0, mode='nearest')
# compute minimal eigenvalues: it is done by computing (dx2+dy2)/2 - sqrt( ((dx2+dy2)/2)^2 + (dxy)^2 - dx^2*dy^2)
tmp = 0.5*(dx2_smooth+dy2_smooth)
small_eigen = tmp - sqrt( maximum(0,tmp*tmp + dxy_smooth*dxy_smooth - dx2_smooth*dy2_smooth)) # the numbers can be negative in practice due to rounding errors
large_eigen = tmp + sqrt( maximum(0,tmp*tmp + dxy_smooth*dxy_smooth - dx2_smooth*dy2_smooth))
# Compute weight as flow score: preparing variable
#parameters
sigma_image = 0.8 # gaussian applied to images
derivfilter = array([1.0,-8.0,0.0,8.0,-1.0])/12.0 # filter to compute the derivatives
sigma_score = 50.0 # gaussian to convert dist to score
mul_coef = 10.0 # multiplicative coefficients
# smooth images
tmp = scipy.ndimage.filters.gaussian_filter1d(img0.astype(float32), sigma_image, axis=0, order=0, mode='nearest')
img0_smooth = scipy.ndimage.filters.gaussian_filter1d(tmp, sigma_image, axis=1, order=0, mode='nearest')
tmp = scipy.ndimage.filters.gaussian_filter1d(img1.astype(float32), sigma_image, axis=0, order=0, mode='nearest')
img1_smooth = scipy.ndimage.filters.gaussian_filter1d(tmp, sigma_image, axis=1, order=0, mode='nearest')
# compute derivatives
img0_dx = scipy.ndimage.filters.convolve1d(img0_smooth, derivfilter, axis=0, mode='nearest')
img0_dy = scipy.ndimage.filters.convolve1d(img0_smooth, derivfilter, axis=1, mode='nearest')
img1_dx = scipy.ndimage.filters.convolve1d(img1_smooth, derivfilter, axis=0, mode='nearest')
img1_dy = scipy.ndimage.filters.convolve1d(img1_smooth, derivfilter, axis=1, mode='nearest')
# compute it
res = []
for pos0, pos1, score in corres:
p0, p1 = tuple(pos0)[::-1], tuple(pos1)[::-1] # numpy coordinates
dist = sum( abs(img0_smooth[p0]-img1_smooth[p1]) + abs(img0_dx[p0]-img1_dx[p1]) + abs(img0_dy[p0]-img1_dy[p1]) )
score = mul_coef * sqrt( max(0,small_eigen[p0])) / (sigma_score*sqrt(2*pi))*exp(-0.5*square(dist/sigma_score));
res.append((pos0,pos1,score))
return res
if __name__=='__main__':
args = sys.argv[1:]
img0 = array(Image.open(args[0]).convert('RGB'))
img1 = array(Image.open(args[1]).convert('RGB'))
out = open(args[2]) if len(args)>=3 else sys.stdout
ty0, tx0 = img0.shape[:2]
ty1, tx1 = img1.shape[:2]
rint = lambda s: int(0.5+float(s))
retained_matches = []
for line in sys.stdin:
line = line.split()
if not line or len(line)!=6 or not line[0][0].isdigit(): continue
x0, y0, x1, y1, score, index = line
retained_matches.append(((min(tx0-1,rint(x0)),min(ty0-1,rint(y0))),
(min(tx1-1,rint(x1)),min(ty1-1,rint(y1))),0))
assert retained_matches, 'error: no matches piped to this program'
for p0, p1, score in score_from_autocorr(img0, img1, retained_matches):
print >>out, '%d %d %d %d %f' %(p0[0],p0[1],p1[0],p1[1],score)

@ -0,0 +1,17 @@
#include "std.h"
#include <stdarg.h>
#include "stdio.h"
void std_printf(const char* format, ... ) {
va_list arglist;
va_start( arglist, format );
vprintf( format, arglist );
va_end(arglist);
}
void err_printf(const char* format, ... ) {
va_list arglist;
va_start( arglist, format );
vfprintf( stderr, format, arglist );
va_end(arglist);
}

@ -0,0 +1,132 @@
/*
Copyright (C) 2014 Jerome Revaud
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
*/
#ifndef ___STD_H___
#define ___STD_H___
#include <math.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include <stdio.h>
//#include <time.h>
#define MIN(a,b) (((a)<(b)) ? (a) : (b))
#define MAX(a,b) (((a)>(b)) ? (a) : (b))
#define SWAP(a,b,type) {type _t = a; a = b; b = _t;}
#define between(min,val,max) (min<=val && val<=max)
#define NEWA(type,n) (type*)malloc(sizeof(type)*long(n))
#define NEWAC(type,n) (type*)calloc(sizeof(type),(n))
#define NEW(type) NEWA(type,1)
#define REALLOC(ptr,type,n) ptr = (type*)realloc(ptr, sizeof(type)*long(n))
/* debugging macros */
#define P(x) printf(#x " = %g\n",(double)(x));
#define D(x) P(x)
#define DA(x,nb) {int _iter; printf(#x " = {"); for(_iter=0; _iter<nb; _iter++) printf("%g,",(double)((x)[_iter])); puts("}");}
#define ASSERT(test,msg,p1) if(!(test)){fprintf(stderr," ---\n " msg "\n ---\n",p1); assert(0);}
#define EXIT(msg,p1) ASSERT(1,msg,p1)
static inline void memswap( void* a, void* b, unsigned int nbytes ) {
while(nbytes>=sizeof(double)) {
double tmp = *(double*)a;
*((double*&)a)++ = *(double*)b;
*((double*&)b)++ = tmp;
nbytes -= sizeof(double);
}
while(nbytes) {
char tmp = *(char*)a;
*((char*&)a)++ = *(char*)b;
*((char*&)b)++ = tmp;
nbytes--;
}
}
static inline float pow2( float f ) {
return f*f;
}
static inline bool ispowerof2( long n ) {
return (n & (n-1))==0;
}
const double INF = 1.0/0.0;
const double NaN = 0.0/0.0;
const int INT_MIN = 0x80000000;
const int INT_MAX = 0x7FFFFFFF;
const float FLOAT_MIN = -1e39; // converted to -inf
const float FLOAT_MAX = +1e39; // converted to +inf
inline float min_array_f(const float* a, int n) {
int i=n;
float res = FLOAT_MAX;
while(i--) if(a[i]<res) res=a[i];
return res;
}
inline float max_array_f(const float* a, int n) {
int i=n;
float res = FLOAT_MIN;
while(i--) if(a[i]>res) res=a[i];
return res;
}
// override printf because matlab can't use it as such
void std_printf(const char* fmt, ... );
void err_printf(const char* fmt, ... );
//#include <sys/time.h>
//inline double now()
//{
// struct timeval tv;
// gettimeofday (&tv,NULL);
// return (tv.tv_sec*1e3 +tv.tv_usec*1e-3)/1000;
//}
//#define tic {double t = now();
//#define toc t=now()-t; printf("elapsed time = %g ms\n",1000*t);}
#endif

@ -0,0 +1,117 @@
import sys
from PIL import Image
from numpy import *
from matplotlib.pyplot import *
def show_correspondences( img0, img1, corr ):
assert corr.shape[-1]==6
corr = corr[corr[:,4]>0,:]
# make beautiful colors
center = corr[:,[1,0]].mean(axis=0) # array(img0.shape[:2])/2 #
corr[:,5] = arctan2(*(corr[:,[1,0]] - center).T)
corr[:,5] = int32(64*corr[:,5]/pi) % 128
set_max = set(corr[:,5])
colors = {m:i for i,m in enumerate(set_max)}
colors = {m:cm.hsv(i/float(len(colors))) for m,i in colors.items()}
def motion_notify_callback(event):
if event.inaxes==None: return
numaxis = event.inaxes.numaxis
if numaxis<0: return
x,y = event.xdata, event.ydata
ax1.lines = []
ax2.lines = []
n = sum((corr[:,2*numaxis:2*(numaxis+1)] - [x,y])**2,1).argmin() # find nearest point
x,y = corr[n,0:2]
ax1.plot(x,y,'+',ms=10,mew=2,color='blue',scalex=False,scaley=False)
x,y = corr[n,2:4]
ax2.plot(x,y,'+',ms=10,mew=2,color='red',scalex=False,scaley=False)
# we redraw only the concerned axes
renderer = fig.canvas.get_renderer()
ax1.draw(renderer)
ax2.draw(renderer)
fig.canvas.blit(ax1.bbox)
fig.canvas.blit(ax2.bbox)
def noticks():
xticks([])
yticks([])
clf()
ax1 = subplot(221)
ax1.numaxis = 0
imshow(img0,interpolation='nearest')
noticks()
ax2 = subplot(222)
ax2.numaxis = 1
imshow(img1,interpolation='nearest')
noticks()
ax = subplot(223)
ax.numaxis = -1
imshow(img0,interpolation='nearest')
for m in set_max:
plot(corr[corr[:,5]==m,0],corr[corr[:,5]==m,1],'+',ms=10,mew=2,color=colors[m],scalex=0,scaley=0)
noticks()
ax = subplot(224)
ax.numaxis = -1
imshow(img1,interpolation='nearest')
for m in set_max:
plot(corr[corr[:,5]==m,2],corr[corr[:,5]==m,3],'+',ms=10,mew=2,color=colors[m],scalex=0,scaley=0)
noticks()
show()
subplots_adjust(left=0.01, bottom=0.01, right=0.99, top=0.99,
wspace=0.02, hspace=0.02)
fig = get_current_fig_manager().canvas.figure
cid_move = fig.canvas.mpl_connect('motion_notify_event',motion_notify_callback)
show()
fig.canvas.mpl_disconnect(cid_move)
if __name__=='__main__':
args = sys.argv[1:]
img0 = array(Image.open(args[0]).convert('RGB'))
img1 = array(Image.open(args[1]).convert('RGB'))
retained_matches = []
for line in sys.stdin:
line = line.split()
if not line or len(line)!=6 or not line[0][0].isdigit(): continue
x0, y0, x1, y1, score, index = line
retained_matches.append((float(x0),float(y0),float(x1),float(y1),float(score),float(index)))
assert retained_matches, 'error: no matches piped to this program'
show_correspondences(img0, img1, array(retained_matches))

@ -0,0 +1,122 @@
# coding: utf-8
'''
File: matching.py
Project: AlphaPose
File Created: Monday, 1st October 2018 12:53:12 pm
Author: Yuliang Xiu (yuliangxiu@sjtu.edu.cn)
Copyright 2018 - 2018 Shanghai Jiao Tong University, Machine Vision and Intelligence Group
'''
import os
import cv2
from tqdm import tqdm
import numpy as np
import time
import argparse
def generate_fake_cor(img, out_path):
print("Generate fake correspondence files...%s"%out_path)
fd = open(out_path,"w")
height, width, channels = img.shape
for x in range(width):
for y in range(height):
ret = fd.write("%d %d %d %d %f \n"%(x, y, x, y, 1.0))
fd.close()
def orb_matching(img1_path, img2_path, vidname, img1_id, img2_id):
out_path = "%s/%s_%s_orb.txt"%(vidname, img1_id, img2_id)
# print(out_path)
img1 = cv2.cvtColor(cv2.imread(img1_path), cv2.COLOR_BGR2RGB)
img2 = cv2.cvtColor(cv2.imread(img2_path), cv2.COLOR_BGR2RGB)
# Initiate ORB detector
orb = cv2.ORB_create(nfeatures=10000, scoreType=cv2.ORB_FAST_SCORE)
# find the keypoints and descriptors with ORB
kp1, des1 = orb.detectAndCompute(img1,None)
kp2, des2 = orb.detectAndCompute(img2,None)
if len(kp1)*len(kp2) < 400:
generate_fake_cor(img1, out_path)
return
# FLANN parameters
FLANN_INDEX_LSH = 6
index_params= dict(algorithm = FLANN_INDEX_LSH,
table_number = 12, # 12
key_size = 12, # 20
multi_probe_level = 2) #2
search_params = dict(checks=100) # or pass empty dictionary
flann = cv2.FlannBasedMatcher(index_params,search_params)
matches = flann.knnMatch(des1, des2, k=2)
# Open file
fd = open(out_path,"w")
# ratio test as per Lowe's paper
for i, m_n in enumerate(matches):
if len(m_n) != 2:
continue
elif m_n[0].distance < 0.80*m_n[1].distance:
ret = fd.write("%d %d %d %d %f \n"%(kp1[m_n[0].queryIdx].pt[0], kp1[m_n[0].queryIdx].pt[1], kp2[m_n[0].trainIdx].pt[0], kp2[m_n[0].trainIdx].pt[1], m_n[0].distance))
# Close opened file
fd.close()
# print(os.stat(out_path).st_size)
if os.stat(out_path).st_size<1000:
generate_fake_cor(img1, out_path)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='FoseFlow Matching')
parser.add_argument('--orb', type=int, default=0)
args = parser.parse_args()
image_dir = "posetrack_data/images"
imgnames = []
vidnames = []
for a,b,c in os.walk(image_dir):
if len(a.split("/")) == 4:
vidnames.append(a)
for vidname in tqdm(sorted(vidnames)):
for a,b,c in os.walk(vidname):
c=[item for item in c if "jpg" in item]
imgnames = sorted(c)
break
for imgname in imgnames[:-1]:
if 'crop' in imgname:
continue
img1 = os.path.join(vidname,imgname)
len_name = len(imgname.split(".")[0])
if len_name == 5:
img2 = os.path.join(vidname,"%05d.jpg"%(int(imgname.split(".")[0])+1))
else:
img2 = os.path.join(vidname,"%08d.jpg"%(int(imgname.split(".")[0])+1))
if not os.path.exists(img2):
continue
img1_id = img1.split(".")[0].split("/")[-1]
img2_id = img2.split(".")[0].split("/")[-1]
if args.orb:
cor_file = "%s/%s_%s_orb.txt"%(vidname,img1_id,img2_id)
else:
cor_file = "%s/%s_%s.txt"%(vidname,img1_id,img2_id)
if not os.path.exists(cor_file) or os.stat(cor_file).st_size<1000:
if args.orb:
# calc orb matching
orb_matching(img1,img2,vidname,img1_id,img2_id)
else:
# calc deep matching
cmd = "./deepmatching/deepmatching %s %s -nt 10 -downscale 3 -out %s/%s_%s.txt > cache"%(img1,img2,vidname,img1_id,img2_id)
os.system(cmd)

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.9 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.1 MiB

@ -0,0 +1,9 @@
numpy==1.14.5
scipy==1.1.0
opencv_python==3.4.2.16
opencv_contrib_python==3.4.2.16
matplotlib==2.2.2
tqdm==4.23.4
Image==1.5.25
Pillow==5.3.0
munkres==1.0.12

@ -0,0 +1,278 @@
# coding: utf-8
'''
File: tracker-baseline.py
Project: AlphaPose
File Created: Thursday, 1st March 2018 6:12:23 pm
Author: Yuliang Xiu (yuliangxiu@sjtu.edu.cn)
-----
Last Modified: Monday, 1st October 2018 12:53:12 pm
Modified By: Yuliang Xiu (yuliangxiu@sjtu.edu.cn>)
-----
Copyright 2018 - 2018 Shanghai Jiao Tong University, Machine Vision and Intelligence Group
'''
import numpy as np
import os
import json
import copy
import heapq
from munkres import Munkres, print_matrix
from PIL import Image
from tqdm import tqdm
from utils import *
from matching import orb_matching
import argparse
# posetrack dataset path
image_dir = "./posetrack_data"
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='FoseFlow Tracker')
parser.add_argument('--link', type=int, default=100)
parser.add_argument('--drop', type=float, default=2.0)
parser.add_argument('--num', type=int, default=7)
parser.add_argument('--mag', type=int, default=30)
parser.add_argument('--match', type=float, default=0.2)
parser.add_argument('--dataset', type=str, default='val')
parser.add_argument('--orb', type=int, default=0)
args = parser.parse_args()
# super parameters
# 1. look-ahead LINK_LEN frames to find tracked human bbox
# 2. bbox_IoU(deepmatching), bbox_IoU(general), pose_IoU(deepmatching), pose_IoU(general), box1_score, box2_score
# 3. bbox_IoU(deepmatching), bbox_IoU(general), pose_IoU(deepmatching), pose_IoU(general), box1_score, box2_score(Non DeepMatching)
# 4. drop low-score(<DROP) keypoints
# 5. pick high-score(top NUM) keypoints when computing pose_IOU
# 6. box width/height around keypoint for computing pose IoU
# 7. match threshold in Hungarian Matching
# 8. dataset = 'test' or 'val'
# 9. use orb matching or not
link_len = args.link
weights = [1,2,1,2,0,0]
weights_fff = [0,1,0,1,0,0]
drop = args.drop
num = args.num
mag = args.mag
match_thres = args.match
dataset = args.dataset
use_orb = args.orb
anno_dir = "./posetrack_data/annotations/{}".format(dataset)
notrack_json = "alpha-pose-results-{}.json".format(dataset)
track_dir = "{}-predict".format(dataset) # results dir name
if not os.path.exists(track_dir):
os.mkdir(track_dir)
track = {}
cur_vname = ""
num_persons = 0
# load json file without tracking information
# Note: time is a little long, so it is better to uncomment the following save operation at first time
with open(notrack_json,'r') as f:
notrack = json.load(f)
for imgpath in tqdm(sorted(notrack.keys())):
if 'crop' in imgpath:
vname,fname = imgpath[:-18],imgpath[-17:]
print(imgpath,vname,fname)
continue
vname,fname = imgpath[:-13],imgpath[-12:]
if vname != cur_vname:
cur_vname = vname
track[vname] = {}
track[vname][fname] = {'num_boxes':len(notrack[imgpath])}
for bid in range(len(notrack[imgpath])):
track[vname][fname][bid+1] = {}
track[vname][fname][bid+1]['box_score'] = notrack[imgpath][bid]['score']
track[vname][fname][bid+1]['box_pos'] = get_box(notrack[imgpath][bid]['keypoints'], os.path.join(image_dir,imgpath))
track[vname][fname][bid+1]['box_pose_pos'] = np.array(notrack[imgpath][bid]['keypoints']).reshape(-1,3)[:,0:2]
track[vname][fname][bid+1]['box_pose_score'] = np.array(notrack[imgpath][bid]['keypoints']).reshape(-1,3)[:,-1]
np.save('notrack-{}.npy'.format(dataset),track)
track = np.load('notrack-{}.npy'.format(dataset)).item()
# tracking process
for video_name in tqdm(track.keys()):
max_pid_id = 0
frame_list = sorted(list(track[video_name].keys()))
for idx, frame_name in enumerate(frame_list[:-1]):
frame_new_pids = []
frame_id = frame_name.split(".")[0]
next_frame_name = frame_list[idx+1]
next_frame_id = next_frame_name.split(".")[0]
# deal with image file whose name ended with '__crop'
if 'crop' in next_frame_name:
track[video_name][next_frame_name] = copy.deepcopy(track[video_name][frame_name])
continue
# init tracking info of the first frame in one video
if idx == 0:
for pid in range(1, track[video_name][frame_name]['num_boxes']+1):
track[video_name][frame_name][pid]['new_pid'] = pid
track[video_name][frame_name][pid]['match_score'] = 0
max_pid_id = max(max_pid_id, track[video_name][frame_name]['num_boxes'])
if use_orb:
cor_file = os.path.join(image_dir, video_name, "".join([frame_id, '_', next_frame_id, '_orb.txt']))
else:
cor_file = os.path.join(image_dir, video_name, "".join([frame_id, '_', next_frame_id, '.txt']))
# regenerate the missed pair-matching txt
if not os.path.exists(cor_file) or os.stat(cor_file).st_size<200:
dm = "/home/yuliang/code/PoseTrack-CVPR2017/external/deepmatching/deepmatching"
img1_path = os.path.join(image_dir,video_name,frame_name)
img2_path = os.path.join(image_dir,video_name,next_frame_name)
if use_orb:
orb_matching(img1_path,img2_path, os.path.join(image_dir, video_name), frame_id, next_frame_id)
else:
cmd = "%s %s %s -nt 20 -downscale 2 -out %s"%(dm,img1_path,img2_path,cor_file)
os.system(cmd)
all_cors = np.loadtxt(cor_file)
# if there is no people in this frame, then copy the info from former frame
if track[video_name][next_frame_name]['num_boxes'] == 0:
track[video_name][next_frame_name] = copy.deepcopy(track[video_name][frame_name])
continue
cur_all_pids, cur_all_pids_fff = stack_all_pids(track[video_name], frame_list[:-1], idx, max_pid_id, link_len)
match_indexes, match_scores = best_matching_hungarian(
all_cors, cur_all_pids, cur_all_pids_fff, track[video_name][next_frame_name], weights, weights_fff, num, mag)
for pid1, pid2 in match_indexes:
if match_scores[pid1][pid2] > match_thres:
track[video_name][next_frame_name][pid2+1]['new_pid'] = cur_all_pids[pid1]['new_pid']
max_pid_id = max(max_pid_id, track[video_name][next_frame_name][pid2+1]['new_pid'])
track[video_name][next_frame_name][pid2+1]['match_score'] = match_scores[pid1][pid2]
# add the untracked new person
for next_pid in range(1, track[video_name][next_frame_name]['num_boxes'] + 1):
if 'new_pid' not in track[video_name][next_frame_name][next_pid]:
max_pid_id += 1
track[video_name][next_frame_name][next_pid]['new_pid'] = max_pid_id
track[video_name][next_frame_name][next_pid]['match_score'] = 0
# deal with unconsecutive frames caused by this fucking terrible dataset
gap = int(next_frame_id)-int(frame_id)
if gap>1:
for i in range(gap):
if i>0:
new_frame_name = "%08d.jpg"%(int(frame_id)+i)
track[video_name][new_frame_name] = copy.deepcopy(track[video_name][frame_name])
rmpe_part_ids = [0, 1, 2, 3, 4, 5, 10, 11, 12, 13, 14, 15, 8, 9]
for video_name in tqdm(track.keys()):
num_persons = 0
frame_list = sorted(list(track[video_name].keys()))
for fid, frame_name in enumerate(frame_list):
for pid in range(1, track[video_name][frame_name]['num_boxes']+1):
new_score = copy.deepcopy(track[video_name][frame_name][pid]['box_pose_score'])
new_pose = copy.deepcopy(track[video_name][frame_name][pid]['box_pose_pos'])
track[video_name][frame_name][pid]['box_pose_score'] = new_score[rmpe_part_ids]
track[video_name][frame_name][pid]['box_pose_pos'] = new_pose[rmpe_part_ids,:]
num_persons = max(num_persons, track[video_name][frame_name][pid]['new_pid'])
track[video_name]['num_persons'] = num_persons
np.save('track-{}.npy'.format(dataset),track)
track = np.load('track-{}.npy'.format(dataset)).item()
for a,b,c in os.walk(anno_dir):
val_jsons = [item for item in c if 'json' in item]
break
# export tracking result into json files
for video_name in tqdm(track.keys()):
if dataset == 'val':
name = [item for item in val_jsons if video_name.split("/")[-1] in item]
if len(name) == 0:
name = [item for item in val_jsons if video_name.split("/")[-1][1:] in item]
name = name[0]
else:
# FUCK the dirty PoseTrack dataset
name = [item for item in val_jsons if video_name.split("/")[-1].split("_")[0] == item.split("_")[0]]
if video_name.split("/")[-1].split("_")[0] == "000044":
if video_name.split("/")[-2]=='mpii_5sec':
name = ["00044_mpii_step1_relpath_5sec_testsub.json"]
elif video_name.split("/")[-2]=='bonn_5sec':
name = ["000044_mpii_relpath_5sec_testsub.json"]
if video_name.split("/")[-1].split("_")[0] == "002279":
if video_name.split("/")[-2]=='mpii_5sec':
name = ["02279_mpii_step2_relpath_5sec_testsub.json"]
elif video_name.split("/")[-2]=='bonn_mpii_test_v2_5sec':
name = ["02279_mpii_relpath_5sec_testsub.json"]
if video_name.split("/")[-1].split("_")[0] == "019980":
if video_name.split("/")[-2]=='bonn_5sec':
name = ["019980_mpii_relpath_5sec_testsub.json"]
elif video_name.split("/")[-2]=='mpii_5sec':
name = ["19980_mpii_step1_relpath_5sec_testsub.json"]
if video_name.split("/")[-1].split("_")[0] == "09611":
name = ["09611_mpii_relpath_5sec_testsub.json"]
if video_name.split("/")[-1].split("_")[0] == "009611":
name = ["09611_mpii_step2_relpath_5sec_testsub.json"]
if video_name.split("/")[-1].split("_")[0][:-1] == '00000':
name = [item for item in val_jsons if video_name.split("/")[-1].split("_")[0][1:] == item.split("_")[0]]
if len(name)==0:
name = [item for item in val_jsons if video_name.split("/")[-1].split("_")[0][1:] == item.split("_")[0]]
name = name[0]
final = {'annolist':[]}
frame_list = list(track[video_name].keys())
frame_list.remove('num_persons')
frame_list = sorted(frame_list)
with open(os.path.join(anno_dir,name)) as f:
annot = json.load(f)
imgs = []
for img in annot['annolist']:
imgs.append(img['image'][0]['name'])
for fid, frame_name in enumerate(frame_list):
if os.path.join(video_name,frame_name) not in imgs:
continue
final['annolist'].append({"image":[{"name":os.path.join(video_name,frame_name)}],"annorect":[]})
for pid in range(1, track[video_name][frame_name]['num_boxes']+1):
pid_info = track[video_name][frame_name][pid]
box_pos = pid_info['box_pos']
box_score = pid_info['box_score']
pose_pos = pid_info['box_pose_pos']
pose_score = pid_info['box_pose_score']
pose_pos = add_nose(pose_pos)
pose_score = add_nose(pose_score)
new_pid = pid_info['new_pid']
point_struct = []
for idx,pose in enumerate(pose_pos):
if pose_score[idx]>drop:
point_struct.append({"id":[idx],"x":[pose[0]],"y":[pose[1]],"score":[pose_score[idx]]})
final['annolist'][fid]['annorect'].append({"x1":[box_pos[0]],\
"x2":[box_pos[1]],\
"y1":[box_pos[2]],\
"y2":[box_pos[3]],\
"score":[box_score],\
"track_id":[new_pid-1],\
"annopoints":[{"point":point_struct}]})
for rest_name in enumerate(remove_list(imgs,video_name,frame_list)):
final['annolist'].append({"image":[{"name":rest_name}],"annorect":[]})
with open("%s/%s"%(track_dir,name),'w') as json_file:
json_file.write(json.dumps(final))

@ -0,0 +1,226 @@
# coding: utf-8
'''
File: tracker-general.py
Project: AlphaPose
File Created: Tuesday, 18st Dec 2018 14:55:41 pm
-----
Last Modified: Thursday, 20st Dec 2018 23:24:47 pm
Modified By: Yuliang Xiu (yuliangxiu@sjtu.edu.cn>)
-----
Author: Yuliang Xiu (yuliangxiu@sjtu.edu.cn)
Copyright 2018 - 2018 Shanghai Jiao Tong University, Machine Vision and Intelligence Group
'''
import numpy as np
import os
import json
import copy
import heapq
from munkres import Munkres, print_matrix
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm
from utils import *
from matching import orb_matching
import argparse
# visualization
def display_pose(imgdir, visdir, tracked, cmap):
print("Start visualization...\n")
for imgname in tqdm(tracked.keys()):
img = Image.open(os.path.join(imgdir,imgname))
width, height = img.size
fig = plt.figure(figsize=(width/10,height/10),dpi=10)
plt.imshow(img)
for pid in range(len(tracked[imgname])):
pose = np.array(tracked[imgname][pid]['keypoints']).reshape(-1,3)[:,:3]
tracked_id = tracked[imgname][pid]['idx']
# keypoint scores of torch version and pytorch version are different
if np.mean(pose[:,2]) <1 :
alpha_ratio = 1.0
else:
alpha_ratio = 5.0
if pose.shape[0] == 16:
mpii_part_names = ['RAnkle','RKnee','RHip','LHip','LKnee','LAnkle','Pelv','Thrx','Neck','Head','RWrist','RElbow','RShoulder','LShoulder','LElbow','LWrist']
colors = ['m', 'b', 'b', 'r', 'r', 'b', 'b', 'r', 'r', 'm', 'm', 'm', 'r', 'r','b','b']
pairs = [[8,9],[11,12],[11,10],[2,1],[1,0],[13,14],[14,15],[3,4],[4,5],[8,7],[7,6],[6,2],[6,3],[8,12],[8,13]]
for idx_c, color in enumerate(colors):
plt.plot(np.clip(pose[idx_c,0],0,width), np.clip(pose[idx_c,1],0,height), marker='o',
color=color, ms=80/alpha_ratio*np.mean(pose[idx_c,2]), markerfacecolor=(1, 1, 0, 0.7/alpha_ratio*pose[idx_c,2]))
for idx in range(len(pairs)):
plt.plot(np.clip(pose[pairs[idx],0],0,width),np.clip(pose[pairs[idx],1],0,height), 'r-',
color=cmap(tracked_id), linewidth=60/alpha_ratio*np.mean(pose[pairs[idx],2]), alpha=0.6/alpha_ratio*np.mean(pose[pairs[idx],2]))
elif pose.shape[0] == 17:
coco_part_names = ['Nose','LEye','REye','LEar','REar','LShoulder','RShoulder','LElbow','RElbow','LWrist','RWrist','LHip','RHip','LKnee','RKnee','LAnkle','RAnkle']
colors = ['r', 'r', 'r', 'r', 'r', 'y', 'y', 'y', 'y', 'y', 'y', 'g', 'g', 'g','g','g','g']
pairs = [[0,1],[0,2],[1,3],[2,4],[5,6],[5,7],[7,9],[6,8],[8,10],[11,12],[11,13],[13,15],[12,14],[14,16],[6,12],[5,11]]
for idx_c, color in enumerate(colors):
plt.plot(np.clip(pose[idx_c,0],0,width), np.clip(pose[idx_c,1],0,height), marker='o',
color=color, ms=80/alpha_ratio*np.mean(pose[idx_c,2]), markerfacecolor=(1, 1, 0, 0.7/alpha_ratio*pose[idx_c,2]))
for idx in range(len(pairs)):
plt.plot(np.clip(pose[pairs[idx],0],0,width),np.clip(pose[pairs[idx],1],0,height),'r-',
color=cmap(tracked_id), linewidth=60/alpha_ratio*np.mean(pose[pairs[idx],2]), alpha=0.6/alpha_ratio*np.mean(pose[pairs[idx],2]))
plt.axis('off')
ax = plt.gca()
ax.set_xlim([0,width])
ax.set_ylim([height,0])
extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
if not os.path.exists(visdir):
os.mkdir(visdir)
fig.savefig(os.path.join(visdir,imgname.split()[0]+".png"), pad_inches = 0.0, bbox_inches=extent, dpi=13)
plt.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='FoseFlow Tracker')
parser.add_argument('--imgdir', type=str, required=True, help="Must input the images dir")
parser.add_argument('--in_json', type=str, required=True, help="result json predicted by AlphaPose")
parser.add_argument('--out_json', type=str, required=True, help="output path of tracked json")
parser.add_argument('--visdir', type=str, default="", help="visulization tracked results of video sequences")
parser.add_argument('--link', type=int, default=100)
parser.add_argument('--drop', type=float, default=2.0)
parser.add_argument('--num', type=int, default=7)
parser.add_argument('--mag', type=int, default=30)
parser.add_argument('--match', type=float, default=0.2)
args = parser.parse_args()
# super parameters
# 1. look-ahead LINK_LEN frames to find tracked human bbox
# 2. bbox_IoU(deepmatching), bbox_IoU(general), pose_IoU(deepmatching), pose_IoU(general), box1_score, box2_score
# 3. bbox_IoU(deepmatching), bbox_IoU(general), pose_IoU(deepmatching), pose_IoU(general), box1_score, box2_score(Non DeepMatching)
# 4. drop low-score(<DROP) keypoints
# 5. pick high-score(top NUM) keypoints when computing pose_IOU
# 6. box width/height around keypoint for computing pose IoU
# 7. match threshold in Hungarian Matching
link_len = args.link
weights = [1,2,1,2,0,0]
weights_fff = [0,1,0,1,0,0]
drop = args.drop
num = args.num
mag = args.mag
match_thres = args.match
notrack_json = args.in_json
tracked_json = args.out_json
image_dir = args.imgdir
vis_dir = args.visdir
# if json format is differnt from "alphapose-forvis.json" (pytorch version)
if "forvis" not in notrack_json:
results_forvis = {}
last_image_name = ' '
with open(notrack_json) as f:
results = json.load(f)
for i in xrange(len(results)):
imgpath = results[i]['image_id']
if last_image_name != imgpath:
results_forvis[imgpath] = []
results_forvis[imgpath].append({'keypoints':results[i]['keypoints'],'scores':results[i]['score']})
else:
results_forvis[imgpath].append({'keypoints':results[i]['keypoints'],'scores':results[i]['score']})
last_image_name = imgpath
notrack_json = os.path.join(os.path.dirname(notrack_json), "alphapose-results-forvis.json")
with open(notrack_json,'w') as json_file:
json_file.write(json.dumps(results_forvis))
notrack = {}
track = {}
num_persons = 0
# load json file without tracking information
print("Start loading json file...\n")
with open(notrack_json,'r') as f:
notrack = json.load(f)
for img_name in tqdm(sorted(notrack.keys())):
track[img_name] = {'num_boxes':len(notrack[img_name])}
for bid in range(len(notrack[img_name])):
track[img_name][bid+1] = {}
track[img_name][bid+1]['box_score'] = notrack[img_name][bid]['scores']
track[img_name][bid+1]['box_pos'] = get_box(notrack[img_name][bid]['keypoints'], os.path.join(image_dir,img_name))
track[img_name][bid+1]['box_pose_pos'] = np.array(notrack[img_name][bid]['keypoints']).reshape(-1,3)[:,0:2]
track[img_name][bid+1]['box_pose_score'] = np.array(notrack[img_name][bid]['keypoints']).reshape(-1,3)[:,-1]
np.save('notrack-bl.npy',track)
# track = np.load('notrack-bl.npy').item()
# tracking process
max_pid_id = 0
frame_list = sorted(list(track.keys()))
print("Start pose tracking...\n")
for idx, frame_name in enumerate(tqdm(frame_list[:-1])):
frame_new_pids = []
frame_id = frame_name.split(".")[0]
next_frame_name = frame_list[idx+1]
next_frame_id = next_frame_name.split(".")[0]
# init tracking info of the first frame in one video
if idx == 0:
for pid in range(1, track[frame_name]['num_boxes']+1):
track[frame_name][pid]['new_pid'] = pid
track[frame_name][pid]['match_score'] = 0
max_pid_id = max(max_pid_id, track[frame_name]['num_boxes'])
cor_file = os.path.join(image_dir, "".join([frame_id, '_', next_frame_id, '_orb.txt']))
# regenerate the missed pair-matching txt
if not os.path.exists(cor_file) or os.stat(cor_file).st_size<200:
img1_path = os.path.join(image_dir, frame_name)
img2_path = os.path.join(image_dir, next_frame_name)
orb_matching(img1_path,img2_path, image_dir, frame_id, next_frame_id)
all_cors = np.loadtxt(cor_file)
# if there is no people in this frame, then copy the info from former frame
if track[next_frame_name]['num_boxes'] == 0:
track[next_frame_name] = copy.deepcopy(track[frame_name])
continue
cur_all_pids, cur_all_pids_fff = stack_all_pids(track, frame_list[:-1], idx, max_pid_id, link_len)
match_indexes, match_scores = best_matching_hungarian(
all_cors, cur_all_pids, cur_all_pids_fff, track[next_frame_name], weights, weights_fff, num, mag)
for pid1, pid2 in match_indexes:
if match_scores[pid1][pid2] > match_thres:
track[next_frame_name][pid2+1]['new_pid'] = cur_all_pids[pid1]['new_pid']
max_pid_id = max(max_pid_id, track[next_frame_name][pid2+1]['new_pid'])
track[next_frame_name][pid2+1]['match_score'] = match_scores[pid1][pid2]
# add the untracked new person
for next_pid in range(1, track[next_frame_name]['num_boxes'] + 1):
if 'new_pid' not in track[next_frame_name][next_pid]:
max_pid_id += 1
track[next_frame_name][next_pid]['new_pid'] = max_pid_id
track[next_frame_name][next_pid]['match_score'] = 0
np.save('track-bl.npy',track)
# track = np.load('track-bl.npy').item()
# calculate number of people
num_persons = 0
for fid, frame_name in enumerate(frame_list):
for pid in range(1, track[frame_name]['num_boxes']+1):
num_persons = max(num_persons, track[frame_name][pid]['new_pid'])
print("This video contains %d people."%(num_persons))
# export tracking result into notrack json files
print("Export tracking results to json...\n")
for fid, frame_name in enumerate(tqdm(frame_list)):
for pid in range(track[frame_name]['num_boxes']):
notrack[frame_name][pid]['idx'] = track[frame_name][pid+1]['new_pid']
with open(tracked_json,'w') as json_file:
json_file.write(json.dumps(notrack))
if len(args.visdir)>0:
cmap = plt.cm.get_cmap("hsv", num_persons)
display_pose(image_dir, vis_dir, notrack, cmap)

@ -0,0 +1,238 @@
# coding: utf-8
'''
File: utils.py
Project: AlphaPose
File Created: Thursday, 1st March 2018 5:32:34 pm
Author: Yuliang Xiu (yuliangxiu@sjtu.edu.cn)
-----
Last Modified: Thursday, 20th March 2018 1:18:17 am
Modified By: Yuliang Xiu (yuliangxiu@sjtu.edu.cn>)
-----
Copyright 2018 - 2018 Shanghai Jiao Tong University, Machine Vision and Intelligence Group
'''
import numpy as np
import cv2 as cv
import os
import json
import copy
import heapq
from munkres import Munkres, print_matrix
from PIL import Image
from tqdm import tqdm
# keypoint penalty weight
delta = 2*np.array([0.01388152, 0.01515228, 0.01057665, 0.01417709, 0.01497891, 0.01402144, \
0.03909642, 0.03686941, 0.01981803, 0.03843971, 0.03412318, 0.02415081, \
0.01291456, 0.01236173,0.01291456, 0.01236173])
# get expand bbox surrounding single person's keypoints
def get_box(pose, imgpath):
pose = np.array(pose).reshape(-1,3)
xmin = np.min(pose[:,0])
xmax = np.max(pose[:,0])
ymin = np.min(pose[:,1])
ymax = np.max(pose[:,1])
img_height, img_width, _ = cv.imread(imgpath).shape
return expand_bbox(xmin, xmax, ymin, ymax, img_width, img_height)
# expand bbox for containing more background
def expand_bbox(left, right, top, bottom, img_width, img_height):
width = right - left
height = bottom - top
ratio = 0.1 # expand ratio
new_left = np.clip(left - ratio * width, 0, img_width)
new_right = np.clip(right + ratio * width, 0, img_width)
new_top = np.clip(top - ratio * height, 0, img_height)
new_bottom = np.clip(bottom + ratio * height, 0, img_height)
return [int(new_left), int(new_right), int(new_top), int(new_bottom)]
# calculate final matching grade
def cal_grade(l, w):
return sum(np.array(l)*np.array(w))
# calculate IoU of two boxes(thanks @ZongweiZhou1)
def cal_bbox_iou(boxA, boxB):
xA = max(boxA[0], boxB[0]) #xmin
yA = max(boxA[2], boxB[2]) #ymin
xB = min(boxA[1], boxB[1]) #xmax
yB = min(boxA[3], boxB[3]) #ymax
if xA < xB and yA < yB:
interArea = (xB - xA + 1) * (yB - yA + 1)
boxAArea = (boxA[1] - boxA[0] + 1) * (boxA[3] - boxA[2] + 1)
boxBArea = (boxB[1] - boxB[0] + 1) * (boxB[3] - boxB[2] + 1)
iou = interArea / float(boxAArea + boxBArea - interArea+0.00001)
else:
iou=0.0
return iou
# calculate OKS between two single poses
def compute_oks(anno, predict, delta):
xmax = np.max(np.vstack((anno[:, 0], predict[:, 0])))
xmin = np.min(np.vstack((anno[:, 0], predict[:, 0])))
ymax = np.max(np.vstack((anno[:, 1], predict[:, 1])))
ymin = np.min(np.vstack((anno[:, 1], predict[:, 1])))
scale = (xmax - xmin) * (ymax - ymin)
dis = np.sum((anno - predict)**2, axis=1)
oks = np.mean(np.exp(-dis / 2 / delta**2 / scale))
return oks
# stack all already tracked people's info together(thanks @ZongweiZhou1)
def stack_all_pids(track_vid, frame_list, idxs, max_pid_id, link_len):
#track_vid contains track_vid[<=idx]
all_pids_info = []
all_pids_fff = [] # boolean list, 'fff' means From Former Frame
all_pids_ids = [(item+1) for item in range(max_pid_id)]
for idx in np.arange(idxs,max(idxs-link_len,-1),-1):
for pid in range(1, track_vid[frame_list[idx]]['num_boxes']+1):
if len(all_pids_ids) == 0:
return all_pids_info, all_pids_fff
elif track_vid[frame_list[idx]][pid]['new_pid'] in all_pids_ids:
all_pids_ids.remove(track_vid[frame_list[idx]][pid]['new_pid'])
all_pids_info.append(track_vid[frame_list[idx]][pid])
if idx == idxs:
all_pids_fff.append(True)
else:
all_pids_fff.append(False)
return all_pids_info, all_pids_fff
# calculate DeepMatching Pose IoU given two boxes
def find_two_pose_box_iou(pose1_box, pose2_box, all_cors):
x1, y1, x2, y2 = [all_cors[:, col] for col in range(4)]
x_min, x_max, y_min, y_max = pose1_box
x1_region_ids = set(np.where((x1 >= x_min) & (x1 <= x_max))[0].tolist())
y1_region_ids = set(np.where((y1 >= y_min) & (y1 <= y_max))[0].tolist())
region_ids1 = x1_region_ids & y1_region_ids
x_min, x_max, y_min, y_max = pose2_box
x2_region_ids = set(np.where((x2 >= x_min) & (x2 <= x_max))[0].tolist())
y2_region_ids = set(np.where((y2 >= y_min) & (y2 <= y_max))[0].tolist())
region_ids2 = x2_region_ids & y2_region_ids
inter = region_ids1 & region_ids2
union = region_ids1 | region_ids2
pose_box_iou = len(inter) / (len(union) + 0.00001)
return pose_box_iou
# calculate general Pose IoU(only consider top NUM matched keypoints)
def cal_pose_iou(pose1_box,pose2_box, num,mag):
pose_iou = []
for row in range(len(pose1_box)):
x1,y1 = pose1_box[row]
x2,y2 = pose2_box[row]
box1 = [x1-mag,x1+mag,y1-mag,y1+mag]
box2 = [x2-mag,x2+mag,y2-mag,y2+mag]
pose_iou.append(cal_bbox_iou(box1,box2))
return np.mean(heapq.nlargest(num, pose_iou))
# calculate DeepMatching based Pose IoU(only consider top NUM matched keypoints)
def cal_pose_iou_dm(all_cors,pose1,pose2,num,mag):
poses_iou = []
for ids in range(len(pose1)):
pose1_box = [pose1[ids][0]-mag,pose1[ids][0]+mag,pose1[ids][1]-mag,pose1[ids][1]+mag]
pose2_box = [pose2[ids][0]-mag,pose2[ids][0]+mag,pose2[ids][1]-mag,pose2[ids][1]+mag]
poses_iou.append(find_two_pose_box_iou(pose1_box, pose2_box, all_cors))
return np.mean(heapq.nlargest(num, poses_iou))
# hungarian matching algorithm(thanks @ZongweiZhou1)
def best_matching_hungarian(all_cors, all_pids_info, all_pids_fff, track_vid_next_fid, weights, weights_fff, num, mag):
x1, y1, x2, y2 = [all_cors[:, col] for col in range(4)]
all_grades_details = []
all_grades = []
box1_num = len(all_pids_info)
box2_num = track_vid_next_fid['num_boxes']
cost_matrix = np.zeros((box1_num, box2_num))
for pid1 in range(box1_num):
box1_pos = all_pids_info[pid1]['box_pos']
box1_region_ids = find_region_cors_last(box1_pos, all_cors)
box1_score = all_pids_info[pid1]['box_score']
box1_pose = all_pids_info[pid1]['box_pose_pos']
box1_fff = all_pids_fff[pid1]
for pid2 in range(1, track_vid_next_fid['num_boxes'] + 1):
box2_pos = track_vid_next_fid[pid2]['box_pos']
box2_region_ids = find_region_cors_next(box2_pos, all_cors)
box2_score = track_vid_next_fid[pid2]['box_score']
box2_pose = track_vid_next_fid[pid2]['box_pose_pos']
inter = box1_region_ids & box2_region_ids
union = box1_region_ids | box2_region_ids
dm_iou = len(inter) / (len(union) + 0.00001)
box_iou = cal_bbox_iou(box1_pos, box2_pos)
pose_iou_dm = cal_pose_iou_dm(all_cors, box1_pose, box2_pose, num,mag)
pose_iou = cal_pose_iou(box1_pose, box2_pose,num,mag)
if box1_fff:
grade = cal_grade([dm_iou, box_iou, pose_iou_dm, pose_iou, box1_score, box2_score], weights)
else:
grade = cal_grade([dm_iou, box_iou, pose_iou_dm, pose_iou, box1_score, box2_score], weights_fff)
cost_matrix[pid1, pid2 - 1] = grade
m = Munkres()
indexes = m.compute((-np.array(cost_matrix)).tolist())
return indexes, cost_matrix
# calculate number of matching points in one box from last frame
def find_region_cors_last(box_pos, all_cors):
x1, y1, x2, y2 = [all_cors[:, col] for col in range(4)]
x_min, x_max, y_min, y_max = box_pos
x1_region_ids = set(np.where((x1 >= x_min) & (x1 <= x_max))[0].tolist())
y1_region_ids = set(np.where((y1 >= y_min) & (y1 <= y_max))[0].tolist())
region_ids = x1_region_ids & y1_region_ids
return region_ids
# calculate number of matching points in one box from next frame
def find_region_cors_next(box_pos, all_cors):
x1, y1, x2, y2 = [all_cors[:, col] for col in range(4)]
x_min, x_max, y_min, y_max = box_pos
x2_region_ids = set(np.where((x2 >= x_min) & (x2 <= x_max))[0].tolist())
y2_region_ids = set(np.where((y2 >= y_min) & (y2 <= y_max))[0].tolist())
region_ids = x2_region_ids & y2_region_ids
return region_ids
# fill the nose keypoint by averaging head and neck
def add_nose(array):
if min(array.shape) == 2:
head = array[-1,:]
neck = array[-2,:]
else:
head = array[-1]
neck = array[-2]
nose = (head+neck)/2.0
return np.insert(array,-1,nose,axis=0)
# list remove operation
def remove_list(l1,vname,l2):
for item in l2:
l1.remove(os.path.join(vname,item))
return l1

@ -0,0 +1,115 @@
<div align="center">
<img src="doc/logo.jpg", width="400">
</div>
## Notice
### This branch is developed on PyTorch 0.4.0. We have released a new version of AlphaPose based on PyTorch 1.1+. Please checkout our [master](https://github.com/MVIG-SJTU/AlphaPose) branch for more details.
## News!
- Dec 2019: [**v0.3.0** version](https://github.com/MVIG-SJTU/AlphaPose) of AlphaPose is released! Smaller model, higher accuracy!
- Apr 2019: [**MXNet** version](https://github.com/MVIG-SJTU/AlphaPose/tree/mxnet) of AlphaPose is released! It runs at **23 fps** on COCO validation set.
- Feb 2019: [CrowdPose](https://github.com/MVIG-SJTU/AlphaPose/blob/pytorch/doc/CrowdPose.md) is integrated into AlphaPose Now!
- Dec 2018: [General version](https://github.com/MVIG-SJTU/AlphaPose/tree/pytorch/PoseFlow) of PoseFlow is released! 3X Faster and support pose tracking results visualization!
- Sep 2018: [**v0.2.0** version](https://github.com/MVIG-SJTU/AlphaPose/tree/pytorch) of AlphaPose is released! It runs at **20 fps** on COCO validation set (4.6 people per image on average) and achieves 71 mAP!
## AlphaPose
[Alpha Pose](http://www.mvig.org/research/alphapose.html) is an accurate multi-person pose estimator, which is the **first open-source system that achieves 70+ mAP (72.3 mAP) on COCO dataset and 80+ mAP (82.1 mAP) on MPII dataset.**
To match poses that correspond to the same person across frames, we also provide an efficient online pose tracker called Pose Flow. It is the **first open-source online pose tracker that achieves both 60+ mAP (66.5 mAP) and 50+ MOTA (58.3 MOTA) on PoseTrack Challenge dataset.**
AlphaPose supports both Linux and **Windows!**
<div align="center">
<img src="doc/alphapose.gif", width="400">
</div>
## Installation
**Windows Version** please check out [doc/win_install.md](doc/win_install.md)
1. Get the code.
```Shell
git clone -b pytorch https://github.com/MVIG-SJTU/AlphaPose.git
```
2. Install [pytorch 0.4.0](https://github.com/pytorch/pytorch) and other dependencies.
```Shell
pip install -r requirements.txt
```
3. Download the models manually: **duc_se.pth** (2018/08/30) ([Google Drive]( https://drive.google.com/open?id=1OPORTWB2cwd5YTVBX-NE8fsauZJWsrtW) | [Baidu pan](https://pan.baidu.com/s/15jbRNKuslzm5wRSgUVytrA)), **yolov3-spp.weights**([Google Drive](https://drive.google.com/open?id=1D47msNOOiJKvPOXlnpyzdKA3k6E97NTC) | [Baidu pan](https://pan.baidu.com/s/1Zb2REEIk8tcahDa8KacPNA)). Place them into `./models/sppe` and `./models/yolo` respectively.
## Quick Start
- **Input dir**: Run AlphaPose for all images in a folder with:
```
python3 demo.py --indir ${img_directory} --outdir examples/res
```
- **Video**: Run AlphaPose for a video and save the rendered video with:
```
python3 video_demo.py --video ${path to video} --outdir examples/res --save_video
```
- **Webcam**: Run AlphaPose using webcam and visualize the results with:
```
python3 webcam_demo.py --webcam 0 --outdir examples/res --vis
```
- **Input list**: Run AlphaPose for images in a list and save the rendered images with:
```
python3 demo.py --list examples/list-coco-demo.txt --indir ${img_directory} --outdir examples/res --save_img
```
- **Note**: If you meet OOM(out of memory) problem, decreasing the pose estimation batch until the program can run on your computer:
```
python3 demo.py --indir ${img_directory} --outdir examples/res --posebatch 30
```
- **Getting more accurate**: You can enable flip testing to get more accurate results by disable fast_inference, e.g.:
```
python3 demo.py --indir ${img_directory} --outdir examples/res --fast_inference False
```
- **Speeding up**: Checkout the [speed_up.md](doc/speed_up.md) for more details.
- **Output format**: Checkout the [output.md](doc/output.md) for more details.
- **For more**: Checkout the [run.md](doc/run.md) for more options
## Pose Tracking
<p align='center'>
<img src="doc/posetrack.gif", width="360">
<img src="doc/posetrack2.gif", width="344">
</p>
Please read [PoseFlow/README.md](PoseFlow/) for details.
### CrowdPose
<p align='center'>
<img src="doc/crowdpose.gif", width="360">
</p>
Please read [doc/CrowdPose.md](doc/CrowdPose.md) for details.
## FAQ
Check out [faq.md](doc/faq.md) for faq.
## Contributors
Pytorch version of AlphaPose is developed and maintained by [Jiefeng Li](http://jeff-leaf.site/), [Hao-Shu Fang](https://fang-haoshu.github.io/), [Yuliang Xiu](http://xiuyuliang.cn) and [Cewu Lu](http://www.mvig.org/).
## Citation
Please cite these papers in your publications if it helps your research:
@inproceedings{fang2017rmpe,
title={{RMPE}: Regional Multi-person Pose Estimation},
author={Fang, Hao-Shu and Xie, Shuqin and Tai, Yu-Wing and Lu, Cewu},
booktitle={ICCV},
year={2017}
}
@inproceedings{xiu2018poseflow,
author = {Xiu, Yuliang and Li, Jiefeng and Wang, Haoyu and Fang, Yinghong and Lu, Cewu},
title = {{Pose Flow}: Efficient Online Pose Tracking},
booktitle={BMVC},
year = {2018}
}
## License
AlphaPose is freely available for free non-commercial use, and may be redistributed under these conditions. For commercial queries, please drop an e-mail at mvig.alphapose[at]gmail[dot]com and cc lucewu[[at]sjtu[dot]edu[dot]cn. We will send the detail agreement to you.

@ -0,0 +1,2 @@
# Auto detect text files and perform LF normalization
* text=auto

@ -0,0 +1,114 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.vscode/
*.pkl
exp
exp/*
data
data/*
model
model/*
*/images
*/images/*
*.h5
*.pth

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2018 Jeff-sjtu
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

@ -0,0 +1,72 @@
import torch
import torch.nn as nn
import torch.utils.data
import torch.utils.data.distributed
import torch.nn.functional as F
import numpy as np
from AlphaPose.SPPE.src.utils.img import flip, shuffleLR
from AlphaPose.SPPE.src.utils.eval import getPrediction
from AlphaPose.SPPE.src.models.FastPose import createModel
import visdom
import time
import sys
import torch._utils
try:
torch._utils._rebuild_tensor_v2
except AttributeError:
def _rebuild_tensor_v2(storage, storage_offset, size, stride, requires_grad, backward_hooks):
tensor = torch._utils._rebuild_tensor(storage, storage_offset, size, stride)
tensor.requires_grad = requires_grad
tensor._backward_hooks = backward_hooks
return tensor
torch._utils._rebuild_tensor_v2 = _rebuild_tensor_v2
class InferenNet(nn.Module):
def __init__(self, kernel_size, dataset):
super(InferenNet, self).__init__()
model = createModel().cuda()
print('Loading pose model from {}'.format('./models/sppe/duc_se.pth'))
sys.stdout.flush()
model.load_state_dict(torch.load('./models/sppe/duc_se.pth'))
model.eval()
self.pyranet = model
self.dataset = dataset
def forward(self, x):
out = self.pyranet(x)
out = out.narrow(1, 0, 17)
flip_out = self.pyranet(flip(x))
flip_out = flip_out.narrow(1, 0, 17)
flip_out = flip(shuffleLR(
flip_out, self.dataset))
out = (flip_out + out) / 2
return out
class InferenNet_fast(nn.Module):
def __init__(self, kernel_size, dataset):
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
super(InferenNet_fast, self).__init__()
model = createModel().to(device)
print('Loading pose model from {}'.format('./models/sppe/duc_se.pth'))
model.load_state_dict(torch.load('/Users/yunyi/Desktop/AlphaPose/models/sppe/duc_se.pth',map_location='cpu'))
model.eval()
self.pyranet = model
self.dataset = dataset
def forward(self, x):
out = self.pyranet(x)
out = out.narrow(1, 0, 17)
return out

@ -0,0 +1,35 @@
import torch.nn as nn
from torch.autograd import Variable
from .layers.SE_Resnet import SEResnet
from .layers.DUC import DUC
from AlphaPose.opt import opt
def createModel():
return FastPose()
class FastPose(nn.Module):
DIM = 128
def __init__(self):
super(FastPose, self).__init__()
self.preact = SEResnet('resnet101')
self.suffle1 = nn.PixelShuffle(2)
self.duc1 = DUC(512, 1024, upscale_factor=2)
self.duc2 = DUC(256, 512, upscale_factor=2)
self.conv_out = nn.Conv2d(
self.DIM, opt.nClasses, kernel_size=3, stride=1, padding=1)
def forward(self, x: Variable):
out = self.preact(x)
out = self.suffle1(out)
out = self.duc1(out)
out = self.duc2(out)
out = self.conv_out(out)
return out

@ -0,0 +1,126 @@
import torch.nn as nn
from .layers.PRM import Residual as ResidualPyramid
from .layers.Residual import Residual as Residual
from torch.autograd import Variable
from opt import opt
from collections import defaultdict
class Hourglass(nn.Module):
def __init__(self, n, nFeats, nModules, inputResH, inputResW, net_type, B, C):
super(Hourglass, self).__init__()
self.ResidualUp = ResidualPyramid if n >= 2 else Residual
self.ResidualDown = ResidualPyramid if n >= 3 else Residual
self.depth = n
self.nModules = nModules
self.nFeats = nFeats
self.net_type = net_type
self.B = B
self.C = C
self.inputResH = inputResH
self.inputResW = inputResW
self.up1 = self._make_residual(self.ResidualUp, False, inputResH, inputResW)
self.low1 = nn.Sequential(
nn.MaxPool2d(2),
self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2)
)
if n > 1:
self.low2 = Hourglass(n - 1, nFeats, nModules, inputResH / 2, inputResW / 2, net_type, B, C)
else:
self.low2 = self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2)
self.low3 = self._make_residual(self.ResidualDown, True, inputResH / 2, inputResW / 2)
self.up2 = nn.UpsamplingNearest2d(scale_factor=2)
self.upperBranch = self.up1
self.lowerBranch = nn.Sequential(
self.low1,
self.low2,
self.low3,
self.up2
)
def _make_residual(self, resBlock, useConv, inputResH, inputResW):
layer_list = []
for i in range(self.nModules):
layer_list.append(resBlock(self.nFeats, self.nFeats, inputResH, inputResW,
stride=1, net_type=self.net_type, useConv=useConv,
baseWidth=self.B, cardinality=self.C))
return nn.Sequential(*layer_list)
def forward(self, x: Variable):
up1 = self.upperBranch(x)
up2 = self.lowerBranch(x)
out = up1 + up2
return out
class PyraNet(nn.Module):
def __init__(self):
super(PyraNet, self).__init__()
B, C = opt.baseWidth, opt.cardinality
self.inputResH = opt.inputResH / 4
self.inputResW = opt.inputResW / 4
self.nStack = opt.nStack
self.cnv1 = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
nn.BatchNorm2d(64),
nn.ReLU(True)
)
self.r1 = nn.Sequential(
ResidualPyramid(64, 128, opt.inputResH / 2, opt.inputResW / 2,
stride=1, net_type='no_preact', useConv=False, baseWidth=B, cardinality=C),
nn.MaxPool2d(2)
)
self.r4 = ResidualPyramid(128, 128, self.inputResH, self.inputResW,
stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
self.r5 = ResidualPyramid(128, opt.nFeats, self.inputResH, self.inputResW,
stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
self.preact = nn.Sequential(
self.cnv1,
self.r1,
self.r4,
self.r5
)
self.stack_layers = defaultdict(list)
for i in range(self.nStack):
hg = Hourglass(4, opt.nFeats, opt.nResidual, self.inputResH, self.inputResW, 'preact', B, C)
lin = nn.Sequential(
hg,
nn.BatchNorm2d(opt.nFeats),
nn.ReLU(True),
nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0),
nn.BatchNorm2d(opt.nFeats),
nn.ReLU(True)
)
tmpOut = nn.Conv2d(opt.nFeats, opt.nClasses, kernel_size=1, stride=1, padding=0)
self.stack_layers['lin'].append(lin)
self.stack_layers['out'].append(tmpOut)
if i < self.nStack - 1:
lin_ = nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0)
tmpOut_ = nn.Conv2d(opt.nClasses, opt.nFeats, kernel_size=1, stride=1, padding=0)
self.stack_layers['lin_'].append(lin_)
self.stack_layers['out_'].append(tmpOut_)
def forward(self, x: Variable):
out = []
inter = self.preact(x)
for i in range(self.nStack):
lin = self.stack_layers['lin'][i](inter)
tmpOut = self.stack_layers['out'][i](lin)
out.append(tmpOut)
if i < self.nStack - 1:
lin_ = self.stack_layers['lin_'][i](lin)
tmpOut_ = self.stack_layers['out_'][i](tmpOut)
inter = inter + lin_ + tmpOut_
return out
def createModel(**kw):
model = PyraNet()
return model

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save