You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
exercise_2/3rdparty/colmap-dev/lib/SiftGPU/PyramidGL.cpp

2806 lines
69 KiB

////////////////////////////////////////////////////////////////////////////
// File: PyramidGL.cpp
// Author: Changchang Wu
// Description : implementation of PyramidGL/PyramidNaive/PyramidPackdc .
//
//
// Copyright (c) 2007 University of North Carolina at Chapel Hill
// All Rights Reserved
//
// Permission to use, copy, modify and distribute this software and its
// documentation for educational, research and non-profit purposes, without
// fee, and without a written agreement is hereby granted, provided that the
// above copyright notice and the following paragraph appear in all copies.
//
// The University of North Carolina at Chapel Hill make no representations
// about the suitability of this software for any purpose. It is provided
// 'as is' without express or implied warranty.
//
// Please send BUG REPORTS to ccwu@cs.unc.edu
//
////////////////////////////////////////////////////////////////////////////
#include "GL/glew.h"
#include <iostream>
#include <iomanip>
#include <vector>
#include <algorithm>
#include <fstream>
#include <math.h>
#include <string.h>
using namespace std;
#include "GlobalUtil.h"
#include "GLTexImage.h"
#include "SiftGPU.h"
#include "ShaderMan.h"
#include "SiftPyramid.h"
#include "ProgramGLSL.h"
#include "PyramidGL.h"
#include "FrameBufferObject.h"
#if defined(__SSE__) || _MSC_VER > 1200
#define USE_SSE_FOR_SIFTGPU
#include <xmmintrin.h>
#else
//#pragma message( "SSE optimization off!\n" )
#endif
#define USE_TIMING() double t, t0, tt;
#define OCTAVE_START() if(GlobalUtil::_timingO){ t = t0 = CLOCK(); cout<<"#"<<i+_down_sample_factor<<"\t"; }
#define LEVEL_FINISH() if(GlobalUtil::_timingL){ glFinish(); tt = CLOCK();cout<<(tt-t)<<"\t"; t = CLOCK();}
#define OCTAVE_FINISH() if(GlobalUtil::_timingO)cout<<"|\t"<<(CLOCK()-t0)<<endl;
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
PyramidNaive::PyramidNaive(SiftParam& sp): PyramidGL(sp)
{
_texPyramid = NULL;
_auxPyramid = NULL;
}
PyramidNaive::~PyramidNaive()
{
DestroyPyramidData();
}
//align must be 2^i
void PyramidGL:: GetAlignedStorageSize(int num, int align, int &fw, int &fh)
{
if(num <=0)
{
fw = fh = 0;
}else if(num < align*align)
{
fw = align;
fh = (int)ceil(double(num) / fw);
}else if(GlobalUtil::_NarrowFeatureTex)
{
double dn = double(num);
int nb = (int) ceil(dn/GlobalUtil::_texMaxDim/align);
fw = align * nb;
fh = (int)ceil(dn /fw);/**/
}else
{
double dn = double(num);
int nb = (int) ceil(dn/GlobalUtil::_texMaxDim/align);
fh = align * nb;
if(nb <=1)
{
fw = (int)ceil(dn / fh);
//align this dimension to blocksize
fw = ((int) ceil(double(fw) /align))*align;
}else
{
fw = GlobalUtil::_texMaxDim;
}
}
}
void PyramidGL::GetTextureStorageSize(int num, int &fw, int& fh)
{
if(num <=0)
{
fw = fh = 0;
}else if(num <= GlobalUtil::_FeatureTexBlock)
{
fw = num;
fh = 1;
}else if(GlobalUtil::_NarrowFeatureTex)
{
double dn = double(num);
int nb = (int) ceil(dn/GlobalUtil::_texMaxDim/GlobalUtil::_FeatureTexBlock);
fw = GlobalUtil::_FeatureTexBlock * nb;
fh = (int)ceil(dn /fw);/**/
}else
{
double dn = double(num);
int nb = (int) ceil(dn/GlobalUtil::_texMaxDim/GlobalUtil::_FeatureTexBlock);
fh = GlobalUtil::_FeatureTexBlock * nb;
if(nb <=1)
{
fw = (int)ceil(dn / fh);
//align this dimension to blocksize
//
if( fw < fh)
{
int temp = fh;
fh = fw;
fw = temp;
}
}else
{
fw = GlobalUtil::_texMaxDim;
}
}
}
void PyramidNaive::DestroyPyramidData()
{
if(_texPyramid)
{
delete [] _texPyramid;
_texPyramid = NULL;
}
if(_auxPyramid)
{
delete [] _auxPyramid;
_auxPyramid = NULL;
}
}
PyramidGL::PyramidGL(SiftParam &sp):SiftPyramid(sp)
{
_featureTex = NULL;
_orientationTex = NULL;
_descriptorTex = NULL;
_histoPyramidTex = NULL;
//////////////////////////
InitializeContext();
}
PyramidGL::~PyramidGL()
{
DestroyPerLevelData();
DestroySharedData();
ShaderMan::DestroyShaders();
}
void PyramidGL::InitializeContext()
{
GlobalUtil::InitGLParam(0);
if(!GlobalUtil::_GoodOpenGL) return;
//////////////////////////////////////////////
ShaderMan::InitShaderMan(param);
}
void PyramidGL::DestroyPerLevelData()
{
//integers vector to store the feature numbers.
if(_levelFeatureNum)
{
delete [] _levelFeatureNum;
_levelFeatureNum = NULL;
}
//texture used to store features
if( _featureTex)
{
delete [] _featureTex;
_featureTex = NULL;
}
//texture used for multi-orientation
if(_orientationTex)
{
delete [] _orientationTex;
_orientationTex = NULL;
}
int no = _octave_num* param._dog_level_num;
//two sets of vbos used to display the features
if(_featureDisplayVBO)
{
glDeleteBuffers(no, _featureDisplayVBO);
delete [] _featureDisplayVBO;
_featureDisplayVBO = NULL;
}
if( _featurePointVBO)
{
glDeleteBuffers(no, _featurePointVBO);
delete [] _featurePointVBO;
_featurePointVBO = NULL;
}
}
void PyramidGL::DestroySharedData()
{
//histogram reduction
if(_histoPyramidTex)
{
delete[] _histoPyramidTex;
_hpLevelNum = 0;
_histoPyramidTex = NULL;
}
//descriptor storage shared by all levels
if(_descriptorTex)
{
delete [] _descriptorTex;
_descriptorTex = NULL;
}
//cpu reduction buffer.
if(_histo_buffer)
{
delete[] _histo_buffer;
_histo_buffer = 0;
}
}
void PyramidNaive::FitHistogramPyramid()
{
GLTexImage * tex, *htex;
int hist_level_num = _hpLevelNum - _pyramid_octave_first;
tex = GetBaseLevel(_octave_min , DATA_KEYPOINT) + 2;
htex = _histoPyramidTex + hist_level_num - 1;
int w = tex->GetImgWidth() >> 1;
int h = tex->GetImgHeight() >> 1;
for(int k = 0; k <hist_level_num -1; k++, htex--)
{
if(htex->GetImgHeight()!= h || htex->GetImgWidth() != w)
{
htex->SetImageSize(w, h);
htex->ZeroHistoMargin();
}
w = (w + 1)>>1; h = (h + 1) >> 1;
}
}
void PyramidNaive::FitPyramid(int w, int h)
{
//(w, h) <= (_pyramid_width, _pyramid_height);
_pyramid_octave_first = 0;
//
_octave_num = GlobalUtil::_octave_num_default;
int _octave_num_max = GetRequiredOctaveNum(min(w, h));
if(_octave_num < 1 || _octave_num > _octave_num_max)
{
_octave_num = _octave_num_max;
}
int pw = _pyramid_width>>1, ph = _pyramid_height>>1;
while(_pyramid_octave_first + _octave_num < _pyramid_octave_num &&
pw >= w && ph >= h)
{
_pyramid_octave_first++;
pw >>= 1;
ph >>= 1;
}
for(int i = 0; i < _octave_num; i++)
{
GLTexImage * tex = GetBaseLevel(i + _octave_min);
GLTexImage * aux = GetBaseLevel(i + _octave_min, DATA_KEYPOINT);
for(int j = param._level_min; j <= param._level_max; j++, tex++, aux++)
{
tex->SetImageSize(w, h);
aux->SetImageSize(w, h);
}
w>>=1;
h>>=1;
}
}
void PyramidNaive::InitPyramid(int w, int h, int ds)
{
int wp, hp, toobig = 0;
if(ds == 0)
{
_down_sample_factor = 0;
if(GlobalUtil::_octave_min_default>=0)
{
wp = w >> GlobalUtil::_octave_min_default;
hp = h >> GlobalUtil::_octave_min_default;
}else
{
wp = w << (-GlobalUtil::_octave_min_default);
hp = h << (-GlobalUtil::_octave_min_default);
}
_octave_min = _octave_min_default;
}else
{
//must use 0 as _octave_min;
_octave_min = 0;
_down_sample_factor = ds;
w >>= ds;
h >>= ds;
wp = w;
hp = h;
}
while(wp > GlobalUtil::_texMaxDim || hp > GlobalUtil::_texMaxDim)
{
_octave_min ++;
wp >>= 1;
hp >>= 1;
toobig = 1;
}
while(GlobalUtil::_MemCapGPU > 0 && GlobalUtil::_FitMemoryCap && (wp >_pyramid_width || hp > _pyramid_height) &&
max(max(wp, hp), max(_pyramid_width, _pyramid_height)) > 1024 * sqrt(GlobalUtil::_MemCapGPU / 140.0))
{
_octave_min ++;
wp >>= 1;
hp >>= 1;
toobig = 2;
}
if(toobig && GlobalUtil::_verbose)
{
std::cout<<(toobig == 2 ? "[**SKIP OCTAVES**]:\tExceeding Memory Cap (-nomc)\n" :
"[**SKIP OCTAVES**]:\tReaching the dimension limit (-maxd)!\n");
}
if( wp == _pyramid_width && hp == _pyramid_height && _allocated )
{
FitPyramid(wp, hp);
}else if(GlobalUtil::_ForceTightPyramid || _allocated ==0)
{
ResizePyramid(wp, hp);
}
else if( wp > _pyramid_width || hp > _pyramid_height )
{
ResizePyramid(max(wp, _pyramid_width), max(hp, _pyramid_height));
if(wp < _pyramid_width || hp < _pyramid_height) FitPyramid(wp, hp);
}
else
{
//try use the pyramid allocated for large image on small input images
FitPyramid(wp, hp);
}
//select the initial smoothing filter according to the new _octave_min
ShaderMan::SelectInitialSmoothingFilter(_octave_min + _down_sample_factor, param);
}
void PyramidNaive::ResizePyramid( int w, int h)
{
//
unsigned int totalkb = 0;
int _octave_num_new, input_sz;
int i, j;
GLTexImage * tex, *aux;
//
if(_pyramid_width == w && _pyramid_height == h && _allocated) return;
if(w > GlobalUtil::_texMaxDim || h > GlobalUtil::_texMaxDim) return ;
if(GlobalUtil::_verbose && GlobalUtil::_timingS) std::cout<<"[Allocate Pyramid]:\t" <<w<<"x"<<h<<endl;
//first octave does not change
_pyramid_octave_first = 0;
//compute # of octaves
input_sz = min(w,h) ;
_pyramid_width = w;
_pyramid_height = h;
//reset to preset parameters
_octave_num_new = GlobalUtil::_octave_num_default;
if(_octave_num_new < 1) _octave_num_new = GetRequiredOctaveNum(input_sz) ;
if(_pyramid_octave_num != _octave_num_new)
{
//destroy the original pyramid if the # of octave changes
if(_octave_num >0)
{
DestroyPerLevelData();
DestroyPyramidData();
}
_pyramid_octave_num = _octave_num_new;
}
_octave_num = _pyramid_octave_num;
int noct = _octave_num;
int nlev = param._level_num;
// //initialize the pyramid
if(_texPyramid==NULL) _texPyramid = new GLTexImage[ noct* nlev ];
if(_auxPyramid==NULL) _auxPyramid = new GLTexImage[ noct* nlev ];
tex = GetBaseLevel(_octave_min, DATA_GAUSSIAN);
aux = GetBaseLevel(_octave_min, DATA_KEYPOINT);
for(i = 0; i< noct; i++)
{
totalkb += (nlev * w * h * 16 / 1024);
for( j = 0; j< nlev; j++, tex++)
{
tex->InitTexture(w, h);
//tex->AttachToFBO(0);
}
//several auxilary textures are not actually required
totalkb += ((nlev - 3) * w * h * 16 /1024);
for( j = 0; j< nlev ; j++, aux++)
{
if(j < 2) continue;
if(j >= nlev - 1) continue;
aux->InitTexture(w, h, 0);
//aux->AttachToFBO(0);
}
w>>=1;
h>>=1;
}
totalkb += ResizeFeatureStorage();
//
_allocated = 1;
if(GlobalUtil::_verbose && GlobalUtil::_timingS) std::cout<<"[Allocate Pyramid]:\t" <<(totalkb/1024)<<"MB\n";
}
int PyramidGL::ResizeFeatureStorage()
{
int totalkb = 0;
if(_levelFeatureNum==NULL) _levelFeatureNum = new int[_octave_num * param._dog_level_num];
std::fill(_levelFeatureNum, _levelFeatureNum+_octave_num * param._dog_level_num, 0);
int wmax = GetBaseLevel(_octave_min)->GetDrawWidth();
int hmax = GetBaseLevel(_octave_min)->GetDrawHeight();
int w ,h, i;
//use a fbo to initialize textures..
FrameBufferObject fbo;
//
if(_histo_buffer == NULL) _histo_buffer = new float[((size_t)1) << (2 + 2 * GlobalUtil::_ListGenSkipGPU)];
//histogram for feature detection
int num = (int)ceil(log(double(max(wmax, hmax)))/log(2.0));
if( _hpLevelNum != num)
{
_hpLevelNum = num;
if(GlobalUtil::_ListGenGPU)
{
if(_histoPyramidTex ) delete [] _histoPyramidTex;
_histoPyramidTex = new GLTexImage[_hpLevelNum];
w = h = 1 ;
for(i = 0; i < _hpLevelNum; i++)
{
_histoPyramidTex[i].InitTexture(w, h, 0);
_histoPyramidTex[i].AttachToFBO(0);
w<<=1;
h<<=1;
}
}
}
// (4 ^ (_hpLevelNum) -1 / 3) pixels
if(GlobalUtil::_ListGenGPU) totalkb += (((1 << (2 * _hpLevelNum)) -1) / 3 * 16 / 1024);
//initialize the feature texture
int idx = 0, n = _octave_num * param._dog_level_num;
if(_featureTex==NULL) _featureTex = new GLTexImage[n];
if(GlobalUtil::_MaxOrientation >1 && GlobalUtil::_OrientationPack2==0)
{
if(_orientationTex== NULL) _orientationTex = new GLTexImage[n];
}
for(i = 0; i < _octave_num; i++)
{
GLTexImage * tex = GetBaseLevel(i+_octave_min);
int fmax = int(tex->GetImgWidth()*tex->GetImgHeight()*GlobalUtil::_MaxFeaturePercent);
int fw, fh;
//
if(fmax > GlobalUtil::_MaxLevelFeatureNum) fmax = GlobalUtil::_MaxLevelFeatureNum;
else if(fmax < 32) fmax = 32; //give it at least a space of 32 feature
GetTextureStorageSize(fmax, fw, fh);
for(int j = 0; j < param._dog_level_num; j++, idx++)
{
_featureTex[idx].InitTexture(fw, fh, 0);
_featureTex[idx].AttachToFBO(0);
//
if(_orientationTex)
{
_orientationTex[idx].InitTexture(fw, fh, 0);
_orientationTex[idx].AttachToFBO(0);
}
}
totalkb += fw * fh * 16 * param._dog_level_num * (_orientationTex? 2 : 1) /1024;
}
//this just need be initialized once
if(_descriptorTex==NULL)
{
//initialize feature texture pyramid
wmax = _featureTex->GetImgWidth();
hmax = _featureTex->GetImgHeight();
int nf, ns;
if(GlobalUtil::_DescriptorPPT)
{
//32*4 = 128.
nf = 32 / GlobalUtil::_DescriptorPPT; // how many textures we need
ns = max(4, GlobalUtil::_DescriptorPPT); // how many point in one texture for one descriptor
}else
{
//at least one, resue for visualization and other work
nf = 1; ns = 4;
}
//
_alignment = ns;
//
_descriptorTex = new GLTexImage[nf];
int fw, fh;
GetAlignedStorageSize(hmax*wmax* max(ns, 10), _alignment, fw, fh);
if(fh < hmax ) fh = hmax;
if(fw < wmax ) fw = wmax;
totalkb += ( fw * fh * nf * 16 /1024);
for(i =0; i < nf; i++)
{
_descriptorTex[i].InitTexture(fw, fh);
}
}else
{
int nf = GlobalUtil::_DescriptorPPT? 32 / GlobalUtil::_DescriptorPPT: 1;
totalkb += nf * _descriptorTex[0].GetTexWidth() * _descriptorTex[0].GetTexHeight() * 16 /1024;
}
return totalkb;
}
void PyramidNaive::BuildPyramid(GLTexInput *input)
{
USE_TIMING();
GLTexPacked * tex;
FilterProgram** filter;
FrameBufferObject fbo;
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
input->FitTexViewPort();
for (int i = _octave_min; i < _octave_min + _octave_num; i++)
{
tex = (GLTexPacked*)GetBaseLevel(i);
filter = ShaderMan::s_bag->f_gaussian_step;
OCTAVE_START();
if( i == _octave_min )
{
if(i < 0) TextureUpSample(tex, input, 1<<(-i) );
else TextureDownSample(tex, input, 1<<i);
ShaderMan::FilterInitialImage(tex, NULL);
}else
{
TextureDownSample(tex, GetLevelTexture(i-1, param._level_ds));
ShaderMan::FilterSampledImage(tex, NULL);
}
LEVEL_FINISH();
for(int j = param._level_min + 1; j <= param._level_max ; j++, tex++, filter++)
{
// filtering
ShaderMan::FilterImage(*filter, tex+1, tex, NULL);
LEVEL_FINISH();
}
OCTAVE_FINISH();
}
if(GlobalUtil::_timingS) glFinish();
UnloadProgram();
}
GLTexImage* PyramidNaive::GetLevelTexture(int octave, int level, int dataName)
{
if(octave <_octave_min || octave > _octave_min + _octave_num) return NULL;
switch(dataName)
{
case DATA_GAUSSIAN:
case DATA_DOG:
case DATA_GRAD:
case DATA_ROT:
return _texPyramid+ (_pyramid_octave_first + octave - _octave_min) * param._level_num + (level - param._level_min);
case DATA_KEYPOINT:
return _auxPyramid + (_pyramid_octave_first + octave - _octave_min) * param._level_num + (level - param._level_min);
default:
return NULL;
}
}
GLTexImage* PyramidNaive::GetLevelTexture(int octave, int level)
{
return _texPyramid+ (_pyramid_octave_first + octave - _octave_min) * param._level_num
+ (level - param._level_min);
}
//in the packed implementation
// DATA_GAUSSIAN, DATA_DOG, DATA_GAD will be stored in different textures.
GLTexImage* PyramidNaive::GetBaseLevel(int octave, int dataName)
{
if(octave <_octave_min || octave > _octave_min + _octave_num) return NULL;
switch(dataName)
{
case DATA_GAUSSIAN:
case DATA_DOG:
case DATA_GRAD:
case DATA_ROT:
return _texPyramid+ (_pyramid_octave_first + octave - _octave_min) * param._level_num;
case DATA_KEYPOINT:
return _auxPyramid + (_pyramid_octave_first + octave - _octave_min) * param._level_num;
default:
return NULL;
}
}
void PyramidNaive::ComputeGradient()
{
int i, j;
double ts, t1;
GLTexImage * tex;
FrameBufferObject fbo;
if(GlobalUtil::_timingS && GlobalUtil::_verbose)ts = CLOCK();
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
for ( i = _octave_min; i < _octave_min + _octave_num; i++)
{
for( j = param._level_min + 1 ; j < param._level_max ; j++)
{
tex = GetLevelTexture(i, j);
tex->FitTexViewPort();
tex->AttachToFBO(0);
tex->BindTex();
ShaderMan::UseShaderGradientPass();
tex->DrawQuadMT4();
}
}
if(GlobalUtil::_timingS && GlobalUtil::_verbose)
{
glFinish();
t1 = CLOCK();
std::cout<<"<Compute Gradient>\t"<<(t1-ts)<<"\n";
}
UnloadProgram();
GLTexImage::UnbindMultiTex(3);
fbo.UnattachTex(GL_COLOR_ATTACHMENT1_EXT);
}
//keypoint detection with subpixel localization
void PyramidNaive::DetectKeypointsEX()
{
int i, j;
double t0, t, ts, t1, t2;
GLTexImage * tex, *aux;
FrameBufferObject fbo;
if(GlobalUtil::_timingS && GlobalUtil::_verbose)ts = CLOCK();
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
//extra gradient data required for visualization
int gradient_only_levels[2] = {param._level_min +1, param._level_max};
int n_gradient_only_level = GlobalUtil::_UseSiftGPUEX ? 2 : 1;
for ( i = _octave_min; i < _octave_min + _octave_num; i++)
{
for( j =0; j < n_gradient_only_level ; j++)
{
tex = GetLevelTexture(i, gradient_only_levels[j]);
tex->FitTexViewPort();
tex->AttachToFBO(0);
tex->BindTex();
ShaderMan::UseShaderGradientPass();
tex->DrawQuadMT4();
}
}
if(GlobalUtil::_timingS && GlobalUtil::_verbose)
{
glFinish();
t1 = CLOCK();
}
GLenum buffers[] = { GL_COLOR_ATTACHMENT0_EXT, GL_COLOR_ATTACHMENT1_EXT };
glDrawBuffers(2, buffers);
for ( i = _octave_min; i < _octave_min + _octave_num; i++)
{
if(GlobalUtil::_timingO)
{
t0 = CLOCK();
std::cout<<"#"<<(i + _down_sample_factor)<<"\t";
}
tex = GetBaseLevel(i) + 2;
aux = GetBaseLevel(i, DATA_KEYPOINT) +2;
aux->FitTexViewPort();
for( j = param._level_min + 2; j < param._level_max ; j++, aux++, tex++)
{
if(GlobalUtil::_timingL)t = CLOCK();
tex->AttachToFBO(0);
aux->AttachToFBO(1);
glActiveTexture(GL_TEXTURE0);
tex->BindTex();
glActiveTexture(GL_TEXTURE1);
(tex+1)->BindTex();
glActiveTexture(GL_TEXTURE2);
(tex-1)->BindTex();
ShaderMan::UseShaderKeypoint((tex+1)->GetTexID(), (tex-1)->GetTexID());
aux->DrawQuadMT8();
if(GlobalUtil::_timingL)
{
glFinish();
std::cout<<(CLOCK()-t)<<"\t";
}
tex->DetachFBO(0);
aux->DetachFBO(1);
}
if(GlobalUtil::_timingO)
{
std::cout<<"|\t"<<(CLOCK()-t0)<<"\n";
}
}
if(GlobalUtil::_timingS)
{
glFinish();
t2 = CLOCK();
if(GlobalUtil::_verbose)
std::cout <<"<Get Keypoints .. >\t"<<(t2-t1)<<"\n"
<<"<Extra Gradient.. >\t"<<(t1-ts)<<"\n";
}
UnloadProgram();
GLTexImage::UnbindMultiTex(3);
fbo.UnattachTex(GL_COLOR_ATTACHMENT1_EXT);
}
void PyramidNaive::GenerateFeatureList(int i, int j)
{
int hist_level_num = _hpLevelNum - _pyramid_octave_first;
int hist_skip_gpu = GlobalUtil::_ListGenSkipGPU;
int idx = i * param._dog_level_num + j;
GLTexImage* htex, *ftex, *tex;
tex = GetBaseLevel(_octave_min + i, DATA_KEYPOINT) + 2 + j;
ftex = _featureTex + idx;
htex = _histoPyramidTex + hist_level_num - 1 - i;
///
glActiveTexture(GL_TEXTURE0);
tex->BindTex();
htex->AttachToFBO(0);
int tight = ((htex->GetImgWidth() * 2 == tex->GetImgWidth() -1 || tex->GetTexWidth() == tex->GetImgWidth()) &&
(htex->GetImgHeight() *2 == tex->GetImgHeight()-1 || tex->GetTexHeight() == tex->GetImgHeight()));
ShaderMan::UseShaderGenListInit(tex->GetImgWidth(), tex->GetImgHeight(), tight);
htex->FitTexViewPort();
//this uses the fact that no feature is on the edge.
htex->DrawQuadReduction();
//reduction..
htex--;
//this part might have problems on several GPUS
//because the output of one pass is the input of the next pass
//need to call glFinish to make it right
//but too much glFinish makes it slow
for(int k = 0; k <hist_level_num - i - 1 - hist_skip_gpu; k++, htex--)
{
htex->AttachToFBO(0);
htex->FitTexViewPort();
(htex+1)->BindTex();
ShaderMan::UseShaderGenListHisto();
htex->DrawQuadReduction();
}
//
if(hist_skip_gpu == 0)
{
//read back one pixel
float fn[4], fcount;
glReadPixels(0, 0, 1, 1, GL_RGBA , GL_FLOAT, fn);
fcount = (fn[0] + fn[1] + fn[2] + fn[3]);
if(fcount < 1) fcount = 0;
_levelFeatureNum[ idx] = (int)(fcount);
SetLevelFeatureNum(idx, (int)fcount);
_featureNum += int(fcount);
//
if(fcount < 1.0) return;
///generate the feature texture
htex= _histoPyramidTex;
htex->BindTex();
//first pass
ftex->AttachToFBO(0);
if(GlobalUtil::_MaxOrientation>1)
{
//this is very important...
ftex->FitRealTexViewPort();
glClear(GL_COLOR_BUFFER_BIT);
glFinish();
}else
{
ftex->FitTexViewPort();
//glFinish();
}
ShaderMan::UseShaderGenListStart((float)ftex->GetImgWidth(), htex->GetTexID());
ftex->DrawQuad();
//make sure it finishes before the next step
ftex->DetachFBO(0);
//pass on each pyramid level
htex++;
}else
{
int tw = htex[1].GetDrawWidth(), th = htex[1].GetDrawHeight();
int fc = 0;
glReadPixels(0, 0, tw, th, GL_RGBA , GL_FLOAT, _histo_buffer);
_keypoint_buffer.resize(0);
for(int y = 0, pos = 0; y < th; y++)
{
for(int x= 0; x < tw; x++)
{
for(int c = 0; c < 4; c++, pos++)
{
int ss = (int) _histo_buffer[pos];
if(ss == 0) continue;
float ft[4] = {2 * x + (c%2? 1.5f: 0.5f), 2 * y + (c>=2? 1.5f: 0.5f), 0, 1 };
for(int t = 0; t < ss; t++)
{
ft[2] = (float) t;
_keypoint_buffer.insert(_keypoint_buffer.end(), ft, ft+4);
}
fc += (int)ss;
}
}
}
_levelFeatureNum[ idx] = fc;
SetLevelFeatureNum(idx, fc);
if(fc == 0) return;
_featureNum += fc;
/////////////////////
ftex->AttachToFBO(0);
if(GlobalUtil::_MaxOrientation>1)
{
ftex->FitRealTexViewPort();
glClear(GL_COLOR_BUFFER_BIT);
glFlush();
}else
{
ftex->FitTexViewPort();
glFlush();
}
_keypoint_buffer.resize(ftex->GetDrawWidth() * ftex->GetDrawHeight()*4, 0);
///////////
glActiveTexture(GL_TEXTURE0);
ftex->BindTex();
glTexSubImage2D(GlobalUtil::_texTarget, 0, 0, 0, ftex->GetDrawWidth(),
ftex->GetDrawHeight(), GL_RGBA, GL_FLOAT, &_keypoint_buffer[0]);
htex += 2;
}
for(int lev = 1 + hist_skip_gpu; lev < hist_level_num - i; lev++, htex++)
{
glActiveTexture(GL_TEXTURE0);
ftex->BindTex();
ftex->AttachToFBO(0);
glActiveTexture(GL_TEXTURE1);
htex->BindTex();
ShaderMan::UseShaderGenListStep(ftex->GetTexID(), htex->GetTexID());
ftex->DrawQuad();
ftex->DetachFBO(0);
}
GLTexImage::UnbindMultiTex(2);
}
//generate feature list on GPU
void PyramidNaive::GenerateFeatureList()
{
//generate the histogram0pyramid
FrameBufferObject fbo;
glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
double t1, t2;
int ocount, reverse = (GlobalUtil::_TruncateMethod == 1);
_featureNum = 0;
FitHistogramPyramid();
//for(int i = 0, idx = 0; i < _octave_num; i++)
FOR_EACH_OCTAVE(i, reverse)
{
//output
if(GlobalUtil::_timingO)
{
t1= CLOCK();
ocount = 0;
std::cout<<"#"<<i+_octave_min + _down_sample_factor<<":\t";
}
//for(int j = 0; j < param._dog_level_num; j++, idx++)
FOR_EACH_LEVEL(j, reverse)
{
if(GlobalUtil::_TruncateMethod && GlobalUtil::_FeatureCountThreshold > 0
&& _featureNum > GlobalUtil::_FeatureCountThreshold)
{
_levelFeatureNum[i * param._dog_level_num + j] = 0;
continue;
}else
{
GenerateFeatureList(i, j);
if(GlobalUtil::_timingO)
{
int idx = i * param._dog_level_num + j;
std::cout<< _levelFeatureNum[idx] <<"\t";
ocount += _levelFeatureNum[idx];
}
}
}
if(GlobalUtil::_timingO)
{
t2 = CLOCK();
std::cout << "| \t" << int(ocount) << " :\t(" << (t2 - t1) << ")\n";
}
}
if(GlobalUtil::_timingS)glFinish();
if(GlobalUtil::_verbose)
{
std::cout<<"#Features:\t"<<_featureNum<<"\n";
}
}
void PyramidGL::GenerateFeatureDisplayVBO()
{
//use a big VBO to save all the SIFT box vertices
int w, h, esize; GLint bsize;
int nvbo = _octave_num * param._dog_level_num;
//initialize the vbos
if(_featureDisplayVBO==NULL)
{
_featureDisplayVBO = new GLuint[nvbo];
glGenBuffers( nvbo, _featureDisplayVBO );
}
if(_featurePointVBO == NULL)
{
_featurePointVBO = new GLuint[nvbo];
glGenBuffers(nvbo, _featurePointVBO);
}
FrameBufferObject fbo;
glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
glActiveTexture(GL_TEXTURE0);
//
GLTexImage & tempTex = *_descriptorTex;
//
for(int i = 0, idx = 0; i < _octave_num; i++)
{
for(int j = 0; j < param._dog_level_num; j ++, idx++)
{
GLTexImage * ftex = _featureTex + idx;
if(_levelFeatureNum[idx]<=0)continue;
//copy the texture into vbo
fbo.BindFBO();
tempTex.AttachToFBO(0);
ftex->BindTex();
ftex->FitTexViewPort();
ShaderMan::UseShaderCopyKeypoint();
ftex->DrawQuad();
glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, _featurePointVBO[ idx]);
glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize);
esize = ftex->GetImgHeight() * ftex->GetImgWidth()*sizeof(float) *4;
//increase size when necessary
if(bsize < esize)
{
glBufferData(GL_PIXEL_PACK_BUFFER_ARB, esize*3/2 , NULL, GL_STATIC_DRAW_ARB);
glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize);
}
//read back if we have enough buffer
if(bsize >= esize) glReadPixels(0, 0, ftex->GetImgWidth(), ftex->GetImgHeight(), GL_RGBA, GL_FLOAT, 0);
else glBufferData(GL_PIXEL_PACK_BUFFER_ARB, 0, NULL, GL_STATIC_DRAW_ARB);
glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0);
//box display vbo
int count = _levelFeatureNum[idx]* 10;
GetAlignedStorageSize(count, _alignment, w, h);
w = (int)ceil(double(count)/ h);
//input
fbo.BindFBO();
ftex->BindTex();
//output
tempTex.AttachToFBO(0);
GlobalUtil::FitViewPort(w, h);
//shader
ShaderMan::UseShaderGenVBO( (float)ftex->GetImgWidth(), (float) w,
param.GetLevelSigma(j + param._level_min + 1));
GLTexImage::DrawQuad(0, (float)w, 0, (float)h);
//
glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, _featureDisplayVBO[ idx]);
glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize);
esize = w*h * sizeof(float)*4;
//increase size when necessary
if(bsize < esize)
{
glBufferData(GL_PIXEL_PACK_BUFFER_ARB, esize*3/2, NULL, GL_STATIC_DRAW_ARB);
glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize);
}
//read back if we have enough buffer
if(bsize >= esize) glReadPixels(0, 0, w, h, GL_RGBA, GL_FLOAT, 0);
else glBufferData(GL_PIXEL_PACK_BUFFER_ARB, 0, NULL, GL_STATIC_DRAW_ARB);
glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0);
}
}
glReadBuffer(GL_NONE);
glFinish();
}
void PyramidNaive::GetFeatureOrientations()
{
GLTexImage * gtex;
GLTexImage * stex = NULL;
GLTexImage * ftex = _featureTex;
GLTexImage * otex = _orientationTex;
int sid = 0;
int * count = _levelFeatureNum;
float sigma, sigma_step = powf(2.0f, 1.0f/param._dog_level_num);
FrameBufferObject fbo;
if(_orientationTex)
{
GLenum buffers[] = { GL_COLOR_ATTACHMENT0_EXT, GL_COLOR_ATTACHMENT1_EXT };
glDrawBuffers(2, buffers);
}else
{
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
}
for(int i = 0; i < _octave_num; i++)
{
gtex = GetLevelTexture(i+_octave_min, param._level_min + 1);
if(GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign)
stex = GetBaseLevel(i+_octave_min, DATA_KEYPOINT) + 2;
for(int j = 0; j < param._dog_level_num; j++, ftex++, otex++, count++, gtex++, stex++)
{
if(*count<=0)continue;
sigma = param.GetLevelSigma(j+param._level_min+1);
//
ftex->FitTexViewPort();
glActiveTexture(GL_TEXTURE0);
ftex->BindTex();
glActiveTexture(GL_TEXTURE1);
gtex->BindTex();
//
ftex->AttachToFBO(0);
if(_orientationTex) otex->AttachToFBO(1);
if(!_existing_keypoints && (GlobalUtil::_SubpixelLocalization|| GlobalUtil::_KeepExtremumSign))
{
glActiveTexture(GL_TEXTURE2);
stex->BindTex();
sid = * stex;
}
ShaderMan::UseShaderOrientation(gtex->GetTexID(),
gtex->GetImgWidth(), gtex->GetImgHeight(),
sigma, sid, sigma_step, _existing_keypoints);
ftex->DrawQuad();
// glFinish();
}
}
GLTexImage::UnbindMultiTex(3);
if(GlobalUtil::_timingS)glFinish();
if(_orientationTex) fbo.UnattachTex(GL_COLOR_ATTACHMENT1_EXT);
}
//to compare with GPU feature list generation
void PyramidNaive::GenerateFeatureListCPU()
{
FrameBufferObject fbo;
_featureNum = 0;
GLTexImage * tex = GetBaseLevel(_octave_min);
float * mem = new float [tex->GetTexWidth()*tex->GetTexHeight()];
vector<float> list;
int idx = 0;
for(int i = 0; i < _octave_num; i++)
{
for(int j = 0; j < param._dog_level_num; j++, idx++)
{
tex = GetBaseLevel(_octave_min + i, DATA_KEYPOINT) + j + 2;
tex->BindTex();
glGetTexImage(GlobalUtil::_texTarget, 0, GL_RED, GL_FLOAT, mem);
//tex->AttachToFBO(0);
//tex->FitTexViewPort();
//glReadPixels(0, 0, tex->GetTexWidth(), tex->GetTexHeight(), GL_RED, GL_FLOAT, mem);
//
//make a list of
list.resize(0);
float * p = mem;
int fcount = 0 ;
for(int k = 0; k < tex->GetTexHeight(); k++)
{
for( int m = 0; m < tex->GetTexWidth(); m ++, p++)
{
if(*p==0)continue;
if(m ==0 || k ==0 || k >= tex->GetImgHeight() -1 || m >= tex->GetImgWidth() -1 ) continue;
list.push_back(m+0.5f);
list.push_back(k+0.5f);
list.push_back(0);
list.push_back(1);
fcount ++;
}
}
if(fcount==0)continue;
GLTexImage * ftex = _featureTex+idx;
_levelFeatureNum[idx] = (fcount);
SetLevelFeatureNum(idx, fcount);
_featureNum += (fcount);
int fw = ftex->GetImgWidth();
int fh = ftex->GetImgHeight();
list.resize(4*fh*fw);
ftex->BindTex();
ftex->AttachToFBO(0);
// glTexImage2D(GlobalUtil::_texTarget, 0, GlobalUtil::_iTexFormat, fw, fh, 0, GL_BGRA, GL_FLOAT, &list[0]);
glTexSubImage2D(GlobalUtil::_texTarget, 0, 0, 0, fw, fh, GL_RGBA, GL_FLOAT, &list[0]);
//
}
}
GLTexImage::UnbindTex();
delete[] mem;
if(GlobalUtil::_verbose)
{
std::cout<<"#Features:\t"<<_featureNum<<"\n";
}
}
#define FEATURELIST_USE_PBO
void PyramidGL::ReshapeFeatureListCPU()
{
//make a compact feature list, each with only one orientation
//download orientations and the featue list
//reshape it and upload it
FrameBufferObject fbo;
int i, szmax =0, sz;
int n = param._dog_level_num*_octave_num;
for( i = 0; i < n; i++)
{
sz = _featureTex[i].GetImgWidth() * _featureTex[i].GetImgHeight();
if(sz > szmax ) szmax = sz;
}
float * buffer = new float[szmax*24];
float * buffer1 = buffer;
float * buffer2 = buffer + szmax*4;
float * buffer3 = buffer + szmax*8;
glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
#ifdef FEATURELIST_USE_PBO
GLuint ListUploadPBO;
glGenBuffers(1, &ListUploadPBO);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, ListUploadPBO);
glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, szmax * 8 * sizeof(float), NULL, GL_STREAM_DRAW);
#endif
_featureNum = 0;
#ifdef NO_DUPLICATE_DOWNLOAD
const double twopi = 2.0*3.14159265358979323846;
_keypoint_buffer.resize(0);
float os = _octave_min>=0? float(1<<_octave_min): 1.0f/(1<<(-_octave_min));
if(_down_sample_factor>0) os *= float(1<<_down_sample_factor);
float offset = GlobalUtil::_LoweOrigin? 0 : 0.5f;
#endif
for(i = 0; i < n; i++)
{
if(_levelFeatureNum[i]==0)continue;
_featureTex[i].AttachToFBO(0);
_featureTex[i].FitTexViewPort();
glReadPixels(0, 0, _featureTex[i].GetImgWidth(), _featureTex[i].GetImgHeight(),GL_RGBA, GL_FLOAT, buffer1);
int fcount =0, ocount;
float * src = buffer1;
float * orientation = buffer2;
float * des = buffer3;
if(GlobalUtil::_OrientationPack2 == 0)
{
//read back orientations from another texture
_orientationTex[i].AttachToFBO(0);
glReadPixels(0, 0, _orientationTex[i].GetImgWidth(), _orientationTex[i].GetImgHeight(),GL_RGBA, GL_FLOAT, buffer2);
//make the feature list
for(int j = 0; j < _levelFeatureNum[i]; j++, src+=4, orientation+=4)
{
if(_existing_keypoints)
{
des[0] = src[0];
des[1] = src[1];
des[2] = orientation[0];
des[3] = src[3];
fcount++;
des += 4;
}else
{
ocount = (int)src[2];
for(int k = 0 ; k < ocount; k++, des+=4)
{
des[0] = src[0];
des[1] = src[1];
des[2] = orientation[k];
des[3] = src[3];
fcount++;
}
}
}
}else
{
_featureTex[i].DetachFBO(0);
const static double factor = 2.0*3.14159265358979323846/65535.0;
for(int j = 0; j < _levelFeatureNum[i]; j++, src+=4)
{
unsigned short * orientations = (unsigned short*) (&src[2]);
if(_existing_keypoints)
{
des[0] = src[0];
des[1] = src[1];
des[2] = float( factor* orientations[0]);
des[3] = src[3];
fcount++;
des += 4;
}else
{
if(orientations[0] != 65535)
{
des[0] = src[0];
des[1] = src[1];
des[2] = float( factor* orientations[0]);
des[3] = src[3];
fcount++;
des += 4;
if(orientations[1] != 65535)
{
des[0] = src[0];
des[1] = src[1];
des[2] = float(factor* orientations[1]);
des[3] = src[3];
fcount++;
des += 4;
}
}
}
}
}
if (fcount == 0){ _levelFeatureNum[i] = 0; continue; }
//texture size --------------
SetLevelFeatureNum(i, fcount);
int nfw = _featureTex[i].GetImgWidth();
int nfh = _featureTex[i].GetImgHeight();
int sz = nfh * nfw;
if(sz > fcount) memset(des, 0, sizeof(float) * (sz - fcount) * 4);
#ifndef FEATURELIST_USE_PBO
_featureTex[i].BindTex();
glTexSubImage2D(GlobalUtil::_texTarget, 0, 0, 0, nfw, nfh, GL_RGBA, GL_FLOAT, buffer3);
_featureTex[i].UnbindTex();
#else
float* mem = (float*) glMapBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, GL_WRITE_ONLY);
memcpy(mem, buffer3, sz * 4 * sizeof(float) );
glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER_ARB);
_featureTex[i].BindTex();
glTexSubImage2D(GlobalUtil::_texTarget, 0, 0, 0, nfw, nfh, GL_RGBA, GL_FLOAT, 0);
_featureTex[i].UnbindTex();
#endif
#ifdef NO_DUPLICATE_DOWNLOAD
if(fcount > 0)
{
float oss = os * (1 << (i / param._dog_level_num));
_keypoint_buffer.resize((_featureNum + fcount) * 4);
float* ds = &_keypoint_buffer[_featureNum * 4];
float* fs = buffer3;
for(int k = 0; k < fcount; k++, ds+=4, fs+=4)
{
ds[0] = oss*(fs[0]-0.5f) + offset; //x
ds[1] = oss*(fs[1]-0.5f) + offset; //y
ds[3] = (float)fmod(twopi-fs[2], twopi); //orientation, mirrored
ds[2] = oss*fs[3]; //scale
}
}
#endif
_levelFeatureNum[i] = fcount;
_featureNum += fcount;
}
delete[] buffer;
if(GlobalUtil::_verbose)
{
std::cout<<"#Features MO:\t"<<_featureNum<<endl;
}
///////////////////////////////////
#ifdef FEATURELIST_USE_PBO
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
glDeleteBuffers(1, &ListUploadPBO);
#endif
}
inline void PyramidGL::SetLevelFeatureNum(int idx, int fcount)
{
int fw, fh;
GLTexImage * ftex = _featureTex + idx;
//set feature texture size. normally fh will be one
GetTextureStorageSize(fcount, fw, fh);
if(fcount > ftex->GetTexWidth()*ftex->GetTexHeight())
{
ftex->InitTexture(fw, fh, 0);
if(_orientationTex) _orientationTex[idx].InitTexture(fw, fh, 0);
}
if(GlobalUtil::_NarrowFeatureTex)
fh = fcount ==0? 0:(int)ceil(double(fcount)/fw);
else
fw = fcount ==0? 0:(int)ceil(double(fcount)/fh);
ftex->SetImageSize(fw, fh);
if(_orientationTex) _orientationTex[idx].SetImageSize(fw, fh);
}
void PyramidGL::CleanUpAfterSIFT()
{
GLTexImage::UnbindMultiTex(3);
ShaderMan::UnloadProgram();
FrameBufferObject::DeleteGlobalFBO();
GlobalUtil::CleanupOpenGL();
}
void PyramidNaive::GetSimplifiedOrientation()
{
//
int idx = 0;
// int n = _octave_num * param._dog_level_num;
float sigma, sigma_step = powf(2.0f, 1.0f/param._dog_level_num);
GLTexImage * ftex = _featureTex;
FrameBufferObject fbo;
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
for(int i = 0; i < _octave_num; i++)
{
GLTexImage *gtex = GetLevelTexture(i+_octave_min, 2+param._level_min);
for(int j = 0; j < param._dog_level_num; j++, ftex++, gtex++, idx ++)
{
if(_levelFeatureNum[idx]<=0)continue;
sigma = param.GetLevelSigma(j+param._level_min+1);
//
ftex->AttachToFBO(0);
ftex->FitTexViewPort();
glActiveTexture(GL_TEXTURE0);
ftex->BindTex();
glActiveTexture(GL_TEXTURE1);
gtex->BindTex();
ShaderMan::UseShaderSimpleOrientation(gtex->GetTexID(), sigma, sigma_step);
ftex->DrawQuad();
}
}
GLTexImage::UnbindMultiTex(2);
}
#ifdef USE_SSE_FOR_SIFTGPU
static inline float dotproduct_128d(float * p)
{
float z = 0.0f;
__m128 sse =_mm_load_ss(&z);
float* pf = (float*) (&sse);
for( int i = 0; i < 32; i++, p+=4)
{
__m128 ps = _mm_loadu_ps(p);
sse = _mm_add_ps(sse, _mm_mul_ps(ps, ps));
}
return pf[0] + pf[1] + pf[2] + pf[3];
}
static inline void multiply_and_truncate_128d(float* p, float m)
{
float z = 0.2f;
__m128 t = _mm_load_ps1(&z);
__m128 r = _mm_load_ps1(&m);
for(int i = 0; i < 32; i++, p+=4)
{
__m128 ps = _mm_loadu_ps(p);
_mm_storeu_ps(p, _mm_min_ps(_mm_mul_ps(ps, r), t));
}
}
static inline void multiply_128d(float* p, float m)
{
__m128 r = _mm_load_ps1(&m);
for(int i = 0; i < 32; i++, p+=4)
{
__m128 ps = _mm_loadu_ps(p);
_mm_storeu_ps(p, _mm_mul_ps(ps, r));
}
}
#endif
inline void PyramidGL::NormalizeDescriptor(int num, float*pd)
{
#ifdef USE_SSE_FOR_SIFTGPU
for(int k = 0; k < num; k++, pd +=128)
{
float sq;
//normalize and truncate to .2
sq = dotproduct_128d(pd); sq = 1.0f / sqrtf(sq);
multiply_and_truncate_128d(pd, sq);
//renormalize
sq = dotproduct_128d(pd); sq = 1.0f / sqrtf(sq);
multiply_128d(pd, sq);
}
#else
//descriptor normalization runs on cpu for OpenGL implemenations
for(int k = 0; k < num; k++, pd +=128)
{
int v;
float* ppd, sq = 0;
//int v;
//normalize
ppd = pd;
for(v = 0 ; v < 128; v++, ppd++) sq += (*ppd)*(*ppd);
sq = 1.0f / sqrtf(sq);
//truncate to .2
ppd = pd;
for(v = 0; v < 128; v ++, ppd++) *ppd = min(*ppd*sq, 0.2f);
//renormalize
ppd = pd; sq = 0;
for(v = 0; v < 128; v++, ppd++) sq += (*ppd)*(*ppd);
sq = 1.0f / sqrtf(sq);
ppd = pd;
for(v = 0; v < 128; v ++, ppd++) *ppd = *ppd*sq;
}
#endif
}
inline void PyramidGL::InterlaceDescriptorF2(int w, int h, float* buf, float* pd, int step)
{
/*
if(GlobalUtil::_DescriptorPPR == 8)
{
const int dstep = w * 128;
float* pp1 = buf;
float* pp2 = buf + step;
for(int u = 0; u < h ; u++, pd+=dstep)
{
int v;
float* ppd = pd;
for(v= 0; v < w; v++)
{
for(int t = 0; t < 8; t++)
{
*ppd++ = *pp1++;*ppd++ = *pp1++;*ppd++ = *pp1++;*ppd++ = *pp1++;
*ppd++ = *pp2++;*ppd++ = *pp2++;*ppd++ = *pp2++;*ppd++ = *pp2++;
}
ppd += 64;
}
ppd = pd + 64;
for(v= 0; v < w; v++)
{
for(int t = 0; t < 8; t++)
{
*ppd++ = *pp1++;*ppd++ = *pp1++;*ppd++ = *pp1++;*ppd++ = *pp1++;
*ppd++ = *pp2++;*ppd++ = *pp2++;*ppd++ = *pp2++;*ppd++ = *pp2++;
}
ppd += 64;
}
}
}else */
if(GlobalUtil::_DescriptorPPR == 8)
{
//interlace
for(int k = 0; k < 2; k++)
{
float* pp = buf + k * step;
float* ppd = pd + k * 4;
for(int u = 0; u < h ; u++)
{
int v;
for(v= 0; v < w; v++)
{
for(int t = 0; t < 8; t++)
{
ppd[0] = pp[0];
ppd[1] = pp[1];
ppd[2] = pp[2];
ppd[3] = pp[3];
ppd += 8;
pp+= 4;
}
ppd += 64;
}
ppd += ( 64 - 128 * w );
for(v= 0; v < w; v++)
{
for(int t = 0; t < 8; t++)
{
ppd[0] = pp[0];
ppd[1] = pp[1];
ppd[2] = pp[2];
ppd[3] = pp[3];
ppd += 8;
pp+= 4;
}
ppd += 64;
}
ppd -=64;
}
}
}else if(GlobalUtil::_DescriptorPPR == 4)
{
}
}
void PyramidGL::GetFeatureDescriptors()
{
//descriptors...
float sigma;
int idx, i, j, k, w, h;
int ndf = 32 / GlobalUtil::_DescriptorPPT; //number of textures
int block_width = GlobalUtil::_DescriptorPPR;
int block_height = GlobalUtil::_DescriptorPPT/GlobalUtil::_DescriptorPPR;
float* pd = &_descriptor_buffer[0], * pbuf = NULL;
vector<float>read_buffer, descriptor_buffer2;
//use another buffer, if we need to re-order the descriptors
if(_keypoint_index.size() > 0)
{
descriptor_buffer2.resize(_descriptor_buffer.size());
pd = &descriptor_buffer2[0];
}
FrameBufferObject fbo;
GLTexImage * gtex, *otex, * ftex;
GLenum buffers[8] = {
GL_COLOR_ATTACHMENT0_EXT, GL_COLOR_ATTACHMENT1_EXT ,
GL_COLOR_ATTACHMENT2_EXT, GL_COLOR_ATTACHMENT3_EXT ,
GL_COLOR_ATTACHMENT4_EXT, GL_COLOR_ATTACHMENT5_EXT ,
GL_COLOR_ATTACHMENT6_EXT, GL_COLOR_ATTACHMENT7_EXT ,
};
glDrawBuffers(ndf, buffers);
glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
for( i = 0, idx = 0, ftex = _featureTex; i < _octave_num; i++)
{
gtex = GetBaseLevel(i + _octave_min, DATA_GRAD) + 1;
otex = GetBaseLevel(i + _octave_min, DATA_ROT) + 1;
for( j = 0; j < param._dog_level_num; j++, ftex++, idx++, gtex++, otex++)
{
if(_levelFeatureNum[idx]==0)continue;
sigma = IsUsingRectDescription()? 0 : param.GetLevelSigma(j+param._level_min+1);
int count = _levelFeatureNum[idx] * block_width;
GetAlignedStorageSize(count, block_width, w, h);
h = ((int)ceil(double(count) / w)) * block_height;
//not enought space for holding the descriptor data
if(w > _descriptorTex[0].GetTexWidth() || h > _descriptorTex[0].GetTexHeight())
{
for(k = 0; k < ndf; k++)_descriptorTex[k].InitTexture(w, h);
}
for(k = 0; k < ndf; k++) _descriptorTex[k].AttachToFBO(k);
GlobalUtil::FitViewPort(w, h);
glActiveTexture(GL_TEXTURE0);
ftex->BindTex();
glActiveTexture(GL_TEXTURE1);
gtex->BindTex();
if(otex!=gtex)
{
glActiveTexture(GL_TEXTURE2);
otex->BindTex();
}
ShaderMan::UseShaderDescriptor(gtex->GetTexID(), otex->GetTexID(),
w, ftex->GetImgWidth(), gtex->GetImgWidth(), gtex->GetImgHeight(), sigma);
GLTexImage::DrawQuad(0, (float)w, 0, (float)h);
//read back float format descriptors and do normalization on CPU
int step = w*h*4;
if((unsigned int)step*ndf > read_buffer.size())
{
read_buffer.resize(ndf*step);
}
pbuf = &read_buffer[0];
//read back
for(k = 0; k < ndf; k++, pbuf+=step)
{
glReadBuffer(GL_COLOR_ATTACHMENT0_EXT + k);
if(GlobalUtil::_IsNvidia || w * h <= 16384) //were
{
glReadPixels(0, 0, w, h, GL_RGBA, GL_FLOAT, pbuf);
}else
{
int hstep = 16384 / w;
for(int kk = 0; kk < h; kk += hstep)
glReadPixels(0, kk, w, min(hstep, h - kk), GL_RGBA, GL_FLOAT, pbuf + w * kk * 4);
}
}
//the following two steps run on cpu, so better cpu better speed
//and release version can be a lot faster than debug version
//interlace data on the two texture to get the descriptor
InterlaceDescriptorF2(w / block_width, h / block_height, &read_buffer[0], pd, step);
//need to do normalization
//the new version uses SSE to speed up this part
if(GlobalUtil::_NormalizedSIFT) NormalizeDescriptor(_levelFeatureNum[idx], pd);
pd += 128*_levelFeatureNum[idx];
glReadBuffer(GL_NONE);
}
}
//finally, put the descriptor back to their original order for existing keypoint list.
if(_keypoint_index.size() > 0)
{
for(i = 0; i < _featureNum; ++i)
{
int index = _keypoint_index[i];
memcpy(&_descriptor_buffer[index*128], &descriptor_buffer2[i*128], 128 * sizeof(float));
}
}
////////////////////////
GLTexImage::UnbindMultiTex(3);
glDrawBuffer(GL_NONE);
ShaderMan::UnloadProgram();
if(GlobalUtil::_timingS)glFinish();
for(i = 0; i < ndf; i++) fbo.UnattachTex(GL_COLOR_ATTACHMENT0_EXT +i);
}
void PyramidGL::DownloadKeypoints()
{
const double twopi = 2.0*3.14159265358979323846;
int idx = 0;
float * buffer = &_keypoint_buffer[0];
vector<float> keypoint_buffer2;
//use a different keypoint buffer when processing with an exisint features list
//without orientation information.
if(_keypoint_index.size() > 0)
{
keypoint_buffer2.resize(_keypoint_buffer.size());
buffer = &keypoint_buffer2[0];
}
float * p = buffer, *ps, sigma;
GLTexImage * ftex = _featureTex;
FrameBufferObject fbo;
ftex->FitRealTexViewPort();
/////////////////////
float os = _octave_min>=0? float(1<<_octave_min): 1.0f/(1<<(-_octave_min));
if(_down_sample_factor>0) os *= float(1<<_down_sample_factor);
float offset = GlobalUtil::_LoweOrigin? 0 : 0.5f;
/////////////////////
for(int i = 0; i < _octave_num; i++, os *= 2.0f)
{
for(int j = 0; j < param._dog_level_num; j++, idx++, ftex++)
{
if(_levelFeatureNum[idx]>0)
{
ftex->AttachToFBO(0);
glReadPixels(0, 0, ftex->GetImgWidth(), ftex->GetImgHeight(),GL_RGBA, GL_FLOAT, p);
ps = p;
for(int k = 0; k < _levelFeatureNum[idx]; k++, ps+=4)
{
ps[0] = os*(ps[0]-0.5f) + offset; //x
ps[1] = os*(ps[1]-0.5f) + offset; //y
sigma = os*ps[3];
ps[3] = (float)fmod(twopi-ps[2], twopi); //orientation, mirrored
ps[2] = sigma; //scale
}
p+= 4* _levelFeatureNum[idx];
}
}
}
//put the feature into their original order
if(_keypoint_index.size() > 0)
{
for(int i = 0; i < _featureNum; ++i)
{
int index = _keypoint_index[i];
memcpy(&_keypoint_buffer[index*4], &keypoint_buffer2[i*4], 4 * sizeof(float));
}
}
}
void PyramidGL::GenerateFeatureListTex()
{
//generate feature list texture from existing keypoints
//do feature sorting in the same time?
FrameBufferObject fbo;
vector<float> list;
int idx = 0;
const double twopi = 2.0*3.14159265358979323846;
float sigma_half_step = powf(2.0f, 0.5f / param._dog_level_num);
float octave_sigma = _octave_min>=0? float(1<<_octave_min): 1.0f/(1<<(-_octave_min));
float offset = GlobalUtil::_LoweOrigin? 0 : 0.5f;
if(_down_sample_factor>0) octave_sigma *= float(1<<_down_sample_factor);
std::fill(_levelFeatureNum, _levelFeatureNum + _octave_num * param._dog_level_num, 0);
_keypoint_index.resize(0); // should already be 0
for(int i = 0; i < _octave_num; i++, octave_sigma*= 2.0f)
{
for(int j = 0; j < param._dog_level_num; j++, idx++)
{
list.resize(0);
float level_sigma = param.GetLevelSigma(j + param._level_min + 1) * octave_sigma;
float sigma_min = level_sigma / sigma_half_step;
float sigma_max = level_sigma * sigma_half_step;
int fcount = 0 ;
for(int k = 0; k < _featureNum; k++)
{
float * key = &_keypoint_buffer[k*4];
float sigmak = key[2];
//////////////////////////////////////
if(IsUsingRectDescription()) sigmak = min(key[2], key[3]) / 12.0f;
if( (sigmak >= sigma_min && sigmak < sigma_max)
||(sigmak < sigma_min && i ==0 && j == 0)
||(sigmak > sigma_max && j == param._dog_level_num - 1&&
(i == _octave_num -1 || GlobalUtil::_KeyPointListForceLevel0)))
{
//add this keypoint to the list
list.push_back((key[0] - offset) / octave_sigma + 0.5f);
list.push_back((key[1] - offset) / octave_sigma + 0.5f);
if(IsUsingRectDescription())
{
list.push_back(key[2] / octave_sigma);
list.push_back(key[3] / octave_sigma);
}else
{
list.push_back((float)fmod(twopi-key[3], twopi));
list.push_back(key[2] / octave_sigma);
}
fcount ++;
//save the index of keypoints
_keypoint_index.push_back(k);
}
}
_levelFeatureNum[idx] = fcount;
if(fcount==0)continue;
GLTexImage * ftex = _featureTex+idx;
SetLevelFeatureNum(idx, fcount);
int fw = ftex->GetImgWidth();
int fh = ftex->GetImgHeight();
list.resize(4*fh*fw);
ftex->BindTex();
ftex->AttachToFBO(0);
glTexSubImage2D(GlobalUtil::_texTarget, 0, 0, 0, fw, fh, GL_RGBA, GL_FLOAT, &list[0]);
if( fcount == _featureNum) _keypoint_index.resize(0);
}
if( GlobalUtil::_KeyPointListForceLevel0 ) break;
}
GLTexImage::UnbindTex();
if(GlobalUtil::_verbose)
{
std::cout<<"#Features:\t"<<_featureNum<<"\n";
}
}
PyramidPacked::PyramidPacked(SiftParam& sp): PyramidGL(sp)
{
_allPyramid = NULL;
}
PyramidPacked::~PyramidPacked()
{
DestroyPyramidData();
}
//build the gaussian pyrmaid
void PyramidPacked::BuildPyramid(GLTexInput * input)
{
//
USE_TIMING();
GLTexImage * tex, *tmp;
FilterProgram ** filter;
FrameBufferObject fbo;
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
input->FitTexViewPort();
for (int i = _octave_min; i < _octave_min + _octave_num; i++)
{
tex = GetBaseLevel(i);
tmp = GetBaseLevel(i, DATA_DOG) + 2; //use this as a temperory texture
filter = ShaderMan::s_bag->f_gaussian_step;
OCTAVE_START();
if( i == _octave_min )
{
if(i < 0) TextureUpSample(tex, input, 1<<(-i-1));
else TextureDownSample(tex, input, 1<<(i+1));
ShaderMan::FilterInitialImage(tex, tmp);
}else
{
TextureDownSample(tex, GetLevelTexture(i-1, param._level_ds));
ShaderMan::FilterSampledImage(tex, tmp);
}
LEVEL_FINISH();
for(int j = param._level_min + 1; j <= param._level_max ; j++, tex++, filter++)
{
// filtering
ShaderMan::FilterImage(*filter, tex+1, tex, tmp);
LEVEL_FINISH();
}
OCTAVE_FINISH();
}
if(GlobalUtil::_timingS) glFinish();
UnloadProgram();
}
void PyramidPacked::ComputeGradient()
{
//first pass, compute dog, gradient, orientation
GLenum buffers[4] = {
GL_COLOR_ATTACHMENT0_EXT, GL_COLOR_ATTACHMENT1_EXT ,
GL_COLOR_ATTACHMENT2_EXT, GL_COLOR_ATTACHMENT3_EXT
};
int i, j;
double ts, t1;
FrameBufferObject fbo;
if(GlobalUtil::_timingS && GlobalUtil::_verbose)ts = CLOCK();
for(i = _octave_min; i < _octave_min + _octave_num; i++)
{
GLTexImage * gus = GetBaseLevel(i) + 1;
GLTexImage * dog = GetBaseLevel(i, DATA_DOG) + 1;
GLTexImage * grd = GetBaseLevel(i, DATA_GRAD) + 1;
GLTexImage * rot = GetBaseLevel(i, DATA_ROT) + 1;
glDrawBuffers(3, buffers);
gus->FitTexViewPort();
//compute the gradient
for(j = 0; j < param._dog_level_num ; j++, gus++, dog++, grd++, rot++)
{
//gradient, dog, orientation
glActiveTexture(GL_TEXTURE0);
gus->BindTex();
glActiveTexture(GL_TEXTURE1);
(gus-1)->BindTex();
//output
dog->AttachToFBO(0);
grd->AttachToFBO(1);
rot->AttachToFBO(2);
ShaderMan::UseShaderGradientPass((gus-1)->GetTexID());
//compute
dog->DrawQuadMT4();
}
}
if(GlobalUtil::_timingS)
{
glFinish();
if(GlobalUtil::_verbose)
{
t1 = CLOCK();
std::cout <<"<Gradient, DOG >\t"<<(t1-ts)<<"\n";
}
}
GLTexImage::DetachFBO(1);
GLTexImage::DetachFBO(2);
UnloadProgram();
GLTexImage::UnbindMultiTex(3);
fbo.UnattachTex(GL_COLOR_ATTACHMENT1_EXT);
}
void PyramidPacked::DetectKeypointsEX()
{
//first pass, compute dog, gradient, orientation
GLenum buffers[4] = {
GL_COLOR_ATTACHMENT0_EXT, GL_COLOR_ATTACHMENT1_EXT ,
GL_COLOR_ATTACHMENT2_EXT, GL_COLOR_ATTACHMENT3_EXT
};
int i, j;
double t0, t, ts, t1, t2;
FrameBufferObject fbo;
if(GlobalUtil::_timingS && GlobalUtil::_verbose)ts = CLOCK();
for(i = _octave_min; i < _octave_min + _octave_num; i++)
{
GLTexImage * gus = GetBaseLevel(i) + 1;
GLTexImage * dog = GetBaseLevel(i, DATA_DOG) + 1;
GLTexImage * grd = GetBaseLevel(i, DATA_GRAD) + 1;
GLTexImage * rot = GetBaseLevel(i, DATA_ROT) + 1;
glDrawBuffers(3, buffers);
gus->FitTexViewPort();
//compute the gradient
for(j = param._level_min +1; j <= param._level_max ; j++, gus++, dog++, grd++, rot++)
{
//gradient, dog, orientation
glActiveTexture(GL_TEXTURE0);
gus->BindTex();
glActiveTexture(GL_TEXTURE1);
(gus-1)->BindTex();
//output
dog->AttachToFBO(0);
grd->AttachToFBO(1);
rot->AttachToFBO(2);
ShaderMan::UseShaderGradientPass((gus-1)->GetTexID());
//compute
dog->DrawQuadMT4();
}
}
if(GlobalUtil::_timingS && GlobalUtil::_verbose)
{
glFinish();
t1 = CLOCK();
}
GLTexImage::DetachFBO(1);
GLTexImage::DetachFBO(2);
//glDrawBuffers(1, buffers);
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
GlobalUtil::CheckErrorsGL();
for ( i = _octave_min; i < _octave_min + _octave_num; i++)
{
if(GlobalUtil::_timingO)
{
t0 = CLOCK();
std::cout<<"#"<<(i + _down_sample_factor)<<"\t";
}
GLTexImage * dog = GetBaseLevel(i, DATA_DOG) + 2;
GLTexImage * key = GetBaseLevel(i, DATA_KEYPOINT) +2;
GLTexImage * gus = GetBaseLevel(i) + 2;
key->FitTexViewPort();
for( j = param._level_min +2; j < param._level_max ; j++, dog++, key++, gus++)
{
if(GlobalUtil::_timingL)t = CLOCK();
key->AttachToFBO(0);
glActiveTexture(GL_TEXTURE0);
dog->BindTex();
glActiveTexture(GL_TEXTURE1);
(dog+1)->BindTex();
glActiveTexture(GL_TEXTURE2);
(dog-1)->BindTex();
if(GlobalUtil::_DarknessAdaption)
{
glActiveTexture(GL_TEXTURE3);
gus->BindTex();
}
ShaderMan::UseShaderKeypoint((dog+1)->GetTexID(), (dog-1)->GetTexID());
key->DrawQuadMT8();
if(GlobalUtil::_timingL)
{
glFinish();
std::cout<<(CLOCK()-t)<<"\t";
}
}
if(GlobalUtil::_timingO)
{
glFinish();
std::cout<<"|\t"<<(CLOCK()-t0)<<"\n";
}
}
if(GlobalUtil::_timingS)
{
glFinish();
if(GlobalUtil::_verbose)
{
t2 = CLOCK();
std::cout <<"<Gradient, DOG >\t"<<(t1-ts)<<"\n"
<<"<Get Keypoints >\t"<<(t2-t1)<<"\n";
}
}
UnloadProgram();
GLTexImage::UnbindMultiTex(3);
fbo.UnattachTex(GL_COLOR_ATTACHMENT1_EXT);
}
void PyramidPacked::GenerateFeatureList(int i, int j)
{
float fcount = 0.0f;
int hist_skip_gpu = GlobalUtil::_ListGenSkipGPU;
int idx = i * param._dog_level_num + j;
int hist_level_num = _hpLevelNum - _pyramid_octave_first;
GLTexImage * htex, * ftex, * tex;
htex = _histoPyramidTex + hist_level_num - 1 - i;
ftex = _featureTex + idx;
tex = GetBaseLevel(_octave_min + i, DATA_KEYPOINT) + 2 + j;
//fill zero to an extra row/col if the height/width is odd
glActiveTexture(GL_TEXTURE0);
tex->BindTex();
htex->AttachToFBO(0);
int tight = (htex->GetImgWidth() * 4 == tex->GetImgWidth() -1 && htex->GetImgHeight() *4 == tex->GetImgHeight()-1 );
ShaderMan::UseShaderGenListInit(tex->GetImgWidth(), tex->GetImgHeight(), tight);
htex->FitTexViewPort();
//this uses the fact that no feature is on the edge.
htex->DrawQuadReduction();
//reduction..
htex--;
//this part might have problems on several GPUS
//because the output of one pass is the input of the next pass
//may require glFinish to make it right, but too much glFinish makes it slow
for(int k = 0; k <hist_level_num - i-1 - hist_skip_gpu; k++, htex--)
{
htex->AttachToFBO(0);
htex->FitTexViewPort();
(htex+1)->BindTex();
ShaderMan::UseShaderGenListHisto();
htex->DrawQuadReduction();
}
if(hist_skip_gpu == 0)
{
//read back one pixel
float fn[4];
glReadPixels(0, 0, 1, 1, GL_RGBA , GL_FLOAT, fn);
fcount = (fn[0] + fn[1] + fn[2] + fn[3]);
if(fcount < 1) fcount = 0;
_levelFeatureNum[ idx] = (int)(fcount);
SetLevelFeatureNum(idx, (int)fcount);
//save number of features
_featureNum += int(fcount);
//
if(fcount < 1.0) return;;
///generate the feature texture
htex= _histoPyramidTex;
htex->BindTex();
//first pass
ftex->AttachToFBO(0);
if(GlobalUtil::_MaxOrientation>1)
{
//this is very important...
ftex->FitRealTexViewPort();
glClear(GL_COLOR_BUFFER_BIT);
glFinish();
}else
{
ftex->FitTexViewPort();
//glFinish();
}
ShaderMan::UseShaderGenListStart((float)ftex->GetImgWidth(), htex->GetTexID());
ftex->DrawQuad();
//make sure it finishes before the next step
ftex->DetachFBO(0);
//pass on each pyramid level
htex++;
}else
{
int tw = htex[1].GetDrawWidth(), th = htex[1].GetDrawHeight();
int fc = 0;
glReadPixels(0, 0, tw, th, GL_RGBA , GL_FLOAT, _histo_buffer);
_keypoint_buffer.resize(0);
for(int y = 0, pos = 0; y < th; y++)
{
for(int x= 0; x < tw; x++)
{
for(int c = 0; c < 4; c++, pos++)
{
int ss = (int) _histo_buffer[pos];
if(ss == 0) continue;
float ft[4] = {2 * x + (c%2? 1.5f: 0.5f), 2 * y + (c>=2? 1.5f: 0.5f), 0, 1 };
for(int t = 0; t < ss; t++)
{
ft[2] = (float) t;
_keypoint_buffer.insert(_keypoint_buffer.end(), ft, ft+4);
}
fc += (int)ss;
}
}
}
_levelFeatureNum[ idx] = fc;
SetLevelFeatureNum(idx, fc);
if(fc == 0) return;
fcount = (float) fc;
_featureNum += fc;
/////////////////////
ftex->AttachToFBO(0);
if(GlobalUtil::_MaxOrientation>1)
{
ftex->FitRealTexViewPort();
glClear(GL_COLOR_BUFFER_BIT);
glFlush();
}else
{
ftex->FitTexViewPort();
glFlush();
}
_keypoint_buffer.resize(ftex->GetDrawWidth() * ftex->GetDrawHeight()*4, 0);
///////////
glActiveTexture(GL_TEXTURE0);
ftex->BindTex();
glTexSubImage2D(GlobalUtil::_texTarget, 0, 0, 0, ftex->GetDrawWidth(),
ftex->GetDrawHeight(), GL_RGBA, GL_FLOAT, &_keypoint_buffer[0]);
htex += 2;
}
for(int lev = 1 + hist_skip_gpu; lev < hist_level_num - i; lev++, htex++)
{
glActiveTexture(GL_TEXTURE0);
ftex->BindTex();
ftex->AttachToFBO(0);
glActiveTexture(GL_TEXTURE1);
htex->BindTex();
ShaderMan::UseShaderGenListStep(ftex->GetTexID(), htex->GetTexID());
ftex->DrawQuad();
ftex->DetachFBO(0);
}
ftex->AttachToFBO(0);
glActiveTexture(GL_TEXTURE1);
tex->BindTex();
ShaderMan::UseShaderGenListEnd(tex->GetTexID());
ftex->DrawQuad();
GLTexImage::UnbindMultiTex(2);
}
void PyramidPacked::GenerateFeatureList()
{
//generate the histogram pyramid
FrameBufferObject fbo;
glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
double t1, t2;
int ocount= 0, reverse = (GlobalUtil::_TruncateMethod == 1);
_featureNum = 0;
FitHistogramPyramid();
//for(int i = 0, idx = 0; i < _octave_num; i++)
FOR_EACH_OCTAVE(i, reverse)
{
if(GlobalUtil::_timingO)
{
t1= CLOCK();
ocount = 0;
std::cout<<"#"<<i+_octave_min + _down_sample_factor<<":\t";
}
//for(int j = 0; j < param._dog_level_num; j++, idx++)
FOR_EACH_LEVEL(j, reverse)
{
if(GlobalUtil::_TruncateMethod && GlobalUtil::_FeatureCountThreshold > 0
&& _featureNum > GlobalUtil::_FeatureCountThreshold)
{
_levelFeatureNum[i * param._dog_level_num + j] = 0;
continue;
}
GenerateFeatureList(i, j);
if(GlobalUtil::_timingO)
{
int idx = i * param._dog_level_num + j;
ocount += _levelFeatureNum[idx];
std::cout<< _levelFeatureNum[idx] <<"\t";
}
}
if(GlobalUtil::_timingO)
{
t2 = CLOCK();
std::cout << "| \t" << int(ocount) << " :\t(" << (t2 - t1) << ")\n";
}
}
if(GlobalUtil::_timingS)glFinish();
if(GlobalUtil::_verbose)
{
std::cout<<"#Features:\t"<<_featureNum<<"\n";
}
}
void PyramidPacked::GenerateFeatureListCPU()
{
FrameBufferObject fbo;
_featureNum = 0;
GLTexImage * tex = GetBaseLevel(_octave_min);
float * mem = new float [tex->GetTexWidth()*tex->GetTexHeight()*4];
vector<float> list;
int idx = 0;
for(int i = 0; i < _octave_num; i++)
{
for(int j = 0; j < param._dog_level_num; j++, idx++)
{
tex = GetBaseLevel(_octave_min + i, DATA_KEYPOINT) + j + 2;
tex->BindTex();
glGetTexImage(GlobalUtil::_texTarget, 0, GL_RGBA, GL_FLOAT, mem);
//tex->AttachToFBO(0);
//tex->FitTexViewPort();
//glReadPixels(0, 0, tex->GetTexWidth(), tex->GetTexHeight(), GL_RED, GL_FLOAT, mem);
//
//make a list of
list.resize(0);
float *pl = mem;
int fcount = 0 ;
for(int k = 0; k < tex->GetDrawHeight(); k++)
{
float * p = pl;
pl += tex->GetTexWidth() * 4;
for( int m = 0; m < tex->GetDrawWidth(); m ++, p+=4)
{
// if(m ==0 || k ==0 || k == tex->GetDrawHeight() -1 || m == tex->GetDrawWidth() -1) continue;
// if(*p == 0) continue;
int t = ((int) fabs(p[0])) - 1;
if(t < 0) continue;
int xx = m + m + ( (t %2)? 1 : 0);
int yy = k + k + ( (t <2)? 0 : 1);
if(xx ==0 || yy == 0) continue;
if(xx >= tex->GetImgWidth() - 1 || yy >= tex->GetImgHeight() - 1)continue;
list.push_back(xx + 0.5f + p[1]);
list.push_back(yy + 0.5f + p[2]);
list.push_back(GlobalUtil::_KeepExtremumSign && p[0] < 0 ? -1.0f : 1.0f);
list.push_back(p[3]);
fcount ++;
}
}
if(fcount==0)continue;
if(GlobalUtil::_timingL) std::cout<<fcount<<".";
GLTexImage * ftex = _featureTex+idx;
_levelFeatureNum[idx] = (fcount);
SetLevelFeatureNum(idx, fcount);
_featureNum += (fcount);
int fw = ftex->GetImgWidth();
int fh = ftex->GetImgHeight();
list.resize(4*fh*fw);
ftex->BindTex();
ftex->AttachToFBO(0);
// glTexImage2D(GlobalUtil::_texTarget, 0, GlobalUtil::_iTexFormat, fw, fh, 0, GL_BGRA, GL_FLOAT, &list[0]);
glTexSubImage2D(GlobalUtil::_texTarget, 0, 0, 0, fw, fh, GL_RGBA, GL_FLOAT, &list[0]);
//
}
}
GLTexImage::UnbindTex();
delete[] mem;
if(GlobalUtil::_verbose)
{
std::cout<<"#Features:\t"<<_featureNum<<"\n";
}
}
void PyramidPacked::GetFeatureOrientations()
{
GLTexImage * gtex, * otex;
GLTexImage * ftex = _featureTex;
GLTexImage * fotex = _orientationTex;
int * count = _levelFeatureNum;
float sigma, sigma_step = powf(2.0f, 1.0f/param._dog_level_num);
FrameBufferObject fbo;
if(_orientationTex)
{
GLenum buffers[] = { GL_COLOR_ATTACHMENT0_EXT, GL_COLOR_ATTACHMENT1_EXT };
glDrawBuffers(2, buffers);
}else
{
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
}
for(int i = 0; i < _octave_num; i++)
{
gtex = GetBaseLevel(i+_octave_min, DATA_GRAD) + 1;
otex = GetBaseLevel(i+_octave_min, DATA_ROT) + 1;
for(int j = 0; j < param._dog_level_num; j++, ftex++, otex++, count++, gtex++, fotex++)
{
if(*count<=0)continue;
sigma = param.GetLevelSigma(j+param._level_min+1);
ftex->FitTexViewPort();
glActiveTexture(GL_TEXTURE0);
ftex->BindTex();
glActiveTexture(GL_TEXTURE1);
gtex->BindTex();
glActiveTexture(GL_TEXTURE2);
otex->BindTex();
//
ftex->AttachToFBO(0);
if(_orientationTex) fotex->AttachToFBO(1);
GlobalUtil::CheckFramebufferStatus();
ShaderMan::UseShaderOrientation(gtex->GetTexID(),
gtex->GetImgWidth(), gtex->GetImgHeight(),
sigma, otex->GetTexID(), sigma_step, _existing_keypoints);
ftex->DrawQuad();
}
}
GLTexImage::UnbindMultiTex(3);
if(GlobalUtil::_timingS)glFinish();
if(_orientationTex) fbo.UnattachTex(GL_COLOR_ATTACHMENT1_EXT);
}
void PyramidPacked::GetSimplifiedOrientation()
{
//
int idx = 0;
// int n = _octave_num * param._dog_level_num;
float sigma, sigma_step = powf(2.0f, 1.0f/param._dog_level_num);
GLTexImage * ftex = _featureTex;
FrameBufferObject fbo;
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
for(int i = 0; i < _octave_num; i++)
{
GLTexImage *otex = GetBaseLevel(i + _octave_min, DATA_ROT) + 2;
for(int j = 0; j < param._dog_level_num; j++, ftex++, otex++, idx ++)
{
if(_levelFeatureNum[idx]<=0)continue;
sigma = param.GetLevelSigma(j+param._level_min+1);
//
ftex->AttachToFBO(0);
ftex->FitTexViewPort();
glActiveTexture(GL_TEXTURE0);
ftex->BindTex();
glActiveTexture(GL_TEXTURE1);
otex->BindTex();
ShaderMan::UseShaderSimpleOrientation(otex->GetTexID(), sigma, sigma_step);
ftex->DrawQuad();
}
}
GLTexImage::UnbindMultiTex(2);
}
void PyramidPacked::InitPyramid(int w, int h, int ds)
{
int wp, hp, toobig = 0;
if(ds == 0)
{
_down_sample_factor = 0;
if(GlobalUtil::_octave_min_default>=0)
{
wp = w >> GlobalUtil::_octave_min_default;
hp = h >> GlobalUtil::_octave_min_default;
}else
{
wp = w << (-GlobalUtil::_octave_min_default);
hp = h << (-GlobalUtil::_octave_min_default);
}
_octave_min = _octave_min_default;
}else
{
//must use 0 as _octave_min;
_octave_min = 0;
_down_sample_factor = ds;
w >>= ds;
h >>= ds;
wp = w;
hp = h;
}
while(wp > GlobalUtil::_texMaxDim || hp > GlobalUtil::_texMaxDim )
{
_octave_min ++;
wp >>= 1;
hp >>= 1;
toobig = 1;
}
while(GlobalUtil::_MemCapGPU > 0 && GlobalUtil::_FitMemoryCap && (wp >_pyramid_width || hp > _pyramid_height) &&
max(max(wp, hp), max(_pyramid_width, _pyramid_height)) > 1024 * sqrt(GlobalUtil::_MemCapGPU / 96.0) )
{
_octave_min ++;
wp >>= 1;
hp >>= 1;
toobig = 2;
}
if(toobig && GlobalUtil::_verbose)
{
std::cout<<(toobig == 2 ? "[**SKIP OCTAVES**]:\tExceeding Memory Cap (-nomc)\n" :
"[**SKIP OCTAVES**]:\tReaching the dimension limit (-maxd)!\n");
}
if( wp == _pyramid_width && hp == _pyramid_height && _allocated )
{
FitPyramid(wp, hp);
}else if(GlobalUtil::_ForceTightPyramid || _allocated ==0)
{
ResizePyramid(wp, hp);
}
else if( wp > _pyramid_width || hp > _pyramid_height )
{
ResizePyramid(max(wp, _pyramid_width), max(hp, _pyramid_height));
if(wp < _pyramid_width || hp < _pyramid_height) FitPyramid(wp, hp);
}
else
{
//try use the pyramid allocated for large image on small input images
FitPyramid(wp, hp);
}
//select the initial smoothing filter according to the new _octave_min
ShaderMan::SelectInitialSmoothingFilter(_octave_min + _down_sample_factor, param);
}
void PyramidPacked::FitPyramid(int w, int h)
{
//(w, h) <= (_pyramid_width, _pyramid_height);
_pyramid_octave_first = 0;
//
_octave_num = GlobalUtil::_octave_num_default;
int _octave_num_max = GetRequiredOctaveNum(min(w, h));
if(_octave_num < 1 || _octave_num > _octave_num_max)
{
_octave_num = _octave_num_max;
}
int pw = _pyramid_width>>1, ph = _pyramid_height>>1;
while(_pyramid_octave_first + _octave_num < _pyramid_octave_num &&
pw >= w && ph >= h)
{
_pyramid_octave_first++;
pw >>= 1;
ph >>= 1;
}
for(int i = 0; i < _octave_num; i++)
{
GLTexImage * tex = GetBaseLevel(i + _octave_min);
GLTexImage * dog = GetBaseLevel(i + _octave_min, DATA_DOG);
GLTexImage * grd = GetBaseLevel(i + _octave_min, DATA_GRAD);
GLTexImage * rot = GetBaseLevel(i + _octave_min, DATA_ROT);
GLTexImage * key = GetBaseLevel(i + _octave_min, DATA_KEYPOINT);
for(int j = param._level_min; j <= param._level_max; j++, tex++, dog++, grd++, rot++, key++)
{
tex->SetImageSize(w, h);
if(j == param._level_min) continue;
dog->SetImageSize(w, h);
grd->SetImageSize(w, h);
rot->SetImageSize(w, h);
if(j == param._level_min + 1 || j == param._level_max) continue;
key->SetImageSize(w, h);
}
w>>=1;
h>>=1;
}
}
void PyramidPacked::ResizePyramid( int w, int h)
{
//
unsigned int totalkb = 0;
int _octave_num_new, input_sz, i, j;
//
if(_pyramid_width == w && _pyramid_height == h && _allocated) return;
if(w > GlobalUtil::_texMaxDim || h > GlobalUtil::_texMaxDim) return ;
if(GlobalUtil::_verbose && GlobalUtil::_timingS) std::cout<<"[Allocate Pyramid]:\t" <<w<<"x"<<h<<endl;
//first octave does not change
_pyramid_octave_first = 0;
//compute # of octaves
input_sz = min(w,h) ;
_pyramid_width = w;
_pyramid_height = h;
//reset to preset parameters
_octave_num_new = GlobalUtil::_octave_num_default;
if(_octave_num_new < 1) _octave_num_new = GetRequiredOctaveNum(input_sz) ;
if(_pyramid_octave_num != _octave_num_new)
{
//destroy the original pyramid if the # of octave changes
if(_octave_num >0)
{
DestroyPerLevelData();
DestroyPyramidData();
}
_pyramid_octave_num = _octave_num_new;
}
_octave_num = _pyramid_octave_num;
int noct = _octave_num;
int nlev = param._level_num;
// //initialize the pyramid
if(_allPyramid==NULL) _allPyramid = new GLTexPacked[ noct* nlev * DATA_NUM];
GLTexPacked * gus = (GLTexPacked *) GetBaseLevel(_octave_min, DATA_GAUSSIAN);
GLTexPacked * dog = (GLTexPacked *) GetBaseLevel(_octave_min, DATA_DOG);
GLTexPacked * grd = (GLTexPacked *) GetBaseLevel(_octave_min, DATA_GRAD);
GLTexPacked * rot = (GLTexPacked *) GetBaseLevel(_octave_min, DATA_ROT);
GLTexPacked * key = (GLTexPacked *) GetBaseLevel(_octave_min, DATA_KEYPOINT);
////////////there could be "out of memory" happening during the allocation
for(i = 0; i< noct; i++)
{
for( j = 0; j< nlev; j++, gus++, dog++, grd++, rot++, key++)
{
gus->InitTexture(w, h);
if(j==0)continue;
dog->InitTexture(w, h);
grd->InitTexture(w, h, 0);
rot->InitTexture(w, h);
if(j<=1 || j >=nlev -1) continue;
key->InitTexture(w, h, 0);
}
int tsz = (gus -1)->GetTexPixelCount() * 16;
totalkb += ((nlev *5 -6)* tsz / 1024);
//several auxilary textures are not actually required
w>>=1;
h>>=1;
}
totalkb += ResizeFeatureStorage();
_allocated = 1;
if(GlobalUtil::_verbose && GlobalUtil::_timingS) std::cout<<"[Allocate Pyramid]:\t" <<(totalkb/1024)<<"MB\n";
}
void PyramidPacked::DestroyPyramidData()
{
if(_allPyramid)
{
delete [] _allPyramid;
_allPyramid = NULL;
}
}
GLTexImage* PyramidPacked::GetLevelTexture(int octave, int level, int dataName)
{
return _allPyramid+ (_pyramid_octave_first + octave - _octave_min) * param._level_num
+ param._level_num * _pyramid_octave_num * dataName
+ (level - param._level_min);
}
GLTexImage* PyramidPacked::GetLevelTexture(int octave, int level)
{
return _allPyramid+ (_pyramid_octave_first + octave - _octave_min) * param._level_num
+ (level - param._level_min);
}
//in the packed implementation( still in progress)
// DATA_GAUSSIAN, DATA_DOG, DATA_GAD will be stored in different textures.
GLTexImage* PyramidPacked::GetBaseLevel(int octave, int dataName)
{
if(octave <_octave_min || octave > _octave_min + _octave_num) return NULL;
int offset = (_pyramid_octave_first + octave - _octave_min) * param._level_num;
int num = param._level_num * _pyramid_octave_num;
return _allPyramid + num *dataName + offset;
}
void PyramidPacked::FitHistogramPyramid()
{
GLTexImage * tex, *htex;
int hist_level_num = _hpLevelNum - _pyramid_octave_first;
tex = GetBaseLevel(_octave_min , DATA_KEYPOINT) + 2;
htex = _histoPyramidTex + hist_level_num - 1;
int w = (tex->GetImgWidth() + 2) >> 2;
int h = (tex->GetImgHeight() + 2)>> 2;
//4n+1 -> n; 4n+2,2, 3 -> n+1
for(int k = 0; k <hist_level_num -1; k++, htex--)
{
if(htex->GetImgHeight()!= h || htex->GetImgWidth() != w)
{
htex->SetImageSize(w, h);
htex->ZeroHistoMargin();
}
w = (w + 1)>>1; h = (h + 1) >> 1;
}
}