You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
537 lines
16 KiB
537 lines
16 KiB
/**********************************************************************
|
|
* Software License Agreement (BSD License)
|
|
*
|
|
* Copyright 2011 Andreas Muetzel (amuetzel@uni-koblenz.de). All rights reserved.
|
|
*
|
|
* THE BSD LICENSE
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*************************************************************************/
|
|
#ifndef FLANN_UTIL_CUDA_RESULTSET_H
|
|
#define FLANN_UTIL_CUDA_RESULTSET_H
|
|
|
|
#include <FLANN/util/cuda/heap.h>
|
|
#include <limits>
|
|
|
|
__device__ __forceinline__
|
|
float infinity()
|
|
{
|
|
return __int_as_float(0x7f800000);
|
|
}
|
|
|
|
#ifndef INFINITY
|
|
#define INFINITY infinity()
|
|
#endif
|
|
|
|
namespace flann
|
|
{
|
|
namespace cuda
|
|
{
|
|
//! result set for the 1nn search. Doesn't do any global memory accesses on its own,
|
|
template< typename DistanceType >
|
|
struct SingleResultSet
|
|
{
|
|
int bestIndex;
|
|
DistanceType bestDist;
|
|
const DistanceType epsError;
|
|
|
|
__device__ __host__
|
|
SingleResultSet( DistanceType eps ) : bestIndex(-1),bestDist(INFINITY), epsError(eps){ }
|
|
|
|
__device__
|
|
inline float
|
|
worstDist()
|
|
{
|
|
return bestDist;
|
|
}
|
|
|
|
__device__
|
|
inline void
|
|
insert(int index, DistanceType dist)
|
|
{
|
|
if( dist <= bestDist ) {
|
|
bestIndex=index;
|
|
bestDist=dist;
|
|
}
|
|
}
|
|
|
|
DistanceType* resultDist;
|
|
int* resultIndex;
|
|
|
|
__device__
|
|
inline void
|
|
setResultLocation( DistanceType* dists, int* index, int thread, int stride )
|
|
{
|
|
resultDist=dists+thread*stride;
|
|
resultIndex=index+thread*stride;
|
|
if( stride != 1 ) {
|
|
for( int i=1; i<stride; i++ ) {
|
|
resultDist[i]=INFINITY;
|
|
resultIndex[i]=-1;
|
|
}
|
|
}
|
|
}
|
|
|
|
__device__
|
|
inline void
|
|
finish()
|
|
{
|
|
resultDist[0]=bestDist;
|
|
resultIndex[0]=bestIndex;
|
|
}
|
|
};
|
|
|
|
template< typename DistanceType >
|
|
struct GreaterThan
|
|
{
|
|
__device__
|
|
bool operator()(DistanceType a, DistanceType b)
|
|
{
|
|
return a>b;
|
|
}
|
|
};
|
|
|
|
|
|
// using this and the template uses 2 or 3 registers more than the direct implementation in the kNearestKernel, but
|
|
// there is no speed difference.
|
|
// Setting useHeap as a template parameter leads to a whole lot of things being
|
|
// optimized away by nvcc.
|
|
// Register counts are the same as when removing not-needed variables in explicit specializations
|
|
// and the "if( useHeap )" branches are eliminated at compile time.
|
|
// The downside of this: a bit more complex kernel launch code.
|
|
template< typename DistanceType, bool useHeap >
|
|
struct KnnResultSet
|
|
{
|
|
int foundNeighbors;
|
|
DistanceType largestHeapDist;
|
|
int maxDistIndex;
|
|
const int k;
|
|
const bool sorted;
|
|
const DistanceType epsError;
|
|
|
|
|
|
__device__ __host__
|
|
KnnResultSet(int knn, bool sortResults, DistanceType eps) : foundNeighbors(0),largestHeapDist(INFINITY),k(knn), sorted(sortResults), epsError(eps){ }
|
|
|
|
// __host__ __device__
|
|
// KnnResultSet(const KnnResultSet& o):foundNeighbors(o.foundNeighbors),largestHeapDist(o.largestHeapDist),k(o.k){ }
|
|
|
|
__device__
|
|
inline DistanceType
|
|
worstDist()
|
|
{
|
|
return largestHeapDist;
|
|
}
|
|
|
|
__device__
|
|
inline void
|
|
insert(int index, DistanceType dist)
|
|
{
|
|
if( foundNeighbors<k ) {
|
|
resultDist[foundNeighbors]=dist;
|
|
resultIndex[foundNeighbors]=index;
|
|
if( foundNeighbors==k-1) {
|
|
if( useHeap ) {
|
|
flann::cuda::heap::make_heap(resultDist,resultIndex,k,GreaterThan<DistanceType>());
|
|
largestHeapDist=resultDist[0];
|
|
}
|
|
else {
|
|
findLargestDistIndex();
|
|
}
|
|
|
|
}
|
|
foundNeighbors++;
|
|
}
|
|
else if( dist < largestHeapDist ) {
|
|
if( useHeap ) {
|
|
resultDist[0]=dist;
|
|
resultIndex[0]=index;
|
|
flann::cuda::heap::sift_down(resultDist,resultIndex,0,k,GreaterThan<DistanceType>());
|
|
largestHeapDist=resultDist[0];
|
|
}
|
|
else {
|
|
resultDist[maxDistIndex]=dist;
|
|
resultIndex[maxDistIndex]=index;
|
|
findLargestDistIndex();
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
__device__
|
|
void
|
|
findLargestDistIndex( )
|
|
{
|
|
largestHeapDist=resultDist[0];
|
|
maxDistIndex=0;
|
|
for( int i=1; i<k; i++ )
|
|
if( resultDist[i] > largestHeapDist ) {
|
|
maxDistIndex=i;
|
|
largestHeapDist=resultDist[i];
|
|
}
|
|
}
|
|
|
|
float* resultDist;
|
|
int* resultIndex;
|
|
|
|
__device__
|
|
inline void
|
|
setResultLocation( DistanceType* dists, int* index, int thread, int stride )
|
|
{
|
|
resultDist=dists+stride*thread;
|
|
resultIndex=index+stride*thread;
|
|
for( int i=0; i<stride; i++ ) {
|
|
resultDist[i]=INFINITY;
|
|
resultIndex[i]=-1;
|
|
// resultIndex[tid+i*blockDim.x]=-1;
|
|
// resultDist[tid+i*blockDim.x]=INFINITY;
|
|
}
|
|
}
|
|
|
|
__host__ __device__
|
|
inline void
|
|
finish()
|
|
{
|
|
if( sorted ) {
|
|
if( !useHeap ) flann::cuda::heap::make_heap(resultDist,resultIndex,k,GreaterThan<DistanceType>());
|
|
for( int i=k-1; i>0; i-- ) {
|
|
flann::cuda::swap( resultDist[0], resultDist[i] );
|
|
flann::cuda::swap( resultIndex[0], resultIndex[i] );
|
|
flann::cuda::heap::sift_down( resultDist,resultIndex, 0, i, GreaterThan<DistanceType>() );
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
template <typename DistanceType>
|
|
struct CountingRadiusResultSet
|
|
{
|
|
int count_;
|
|
DistanceType radius_sq_;
|
|
int max_neighbors_;
|
|
|
|
__device__ __host__
|
|
CountingRadiusResultSet(DistanceType radius, int max_neighbors) : count_(0),radius_sq_(radius), max_neighbors_(max_neighbors){ }
|
|
|
|
__device__
|
|
inline DistanceType
|
|
worstDist()
|
|
{
|
|
return radius_sq_;
|
|
}
|
|
|
|
__device__
|
|
inline void
|
|
insert(int index, float dist)
|
|
{
|
|
if( dist < radius_sq_ ) {
|
|
count_++;
|
|
}
|
|
}
|
|
|
|
int* resultIndex;
|
|
|
|
__device__
|
|
inline void
|
|
setResultLocation( DistanceType* /*dists*/, int* count, int thread, int stride )
|
|
{
|
|
resultIndex=count+thread*stride;
|
|
}
|
|
|
|
__device__
|
|
inline void
|
|
finish()
|
|
{
|
|
if(( max_neighbors_<=0) ||( count_<=max_neighbors_) ) resultIndex[0]=count_;
|
|
else resultIndex[0]=max_neighbors_;
|
|
}
|
|
};
|
|
|
|
template<typename DistanceType, bool useHeap>
|
|
struct RadiusKnnResultSet
|
|
{
|
|
int foundNeighbors;
|
|
DistanceType largestHeapDist;
|
|
int maxDistElem;
|
|
const int k;
|
|
const bool sorted;
|
|
const DistanceType radius_sq_;
|
|
int* segment_starts_;
|
|
// int count_;
|
|
|
|
|
|
__device__ __host__
|
|
RadiusKnnResultSet(DistanceType radius, int knn, int* segment_starts, bool sortResults) : foundNeighbors(0),largestHeapDist(radius),k(knn), sorted(sortResults), radius_sq_(radius),segment_starts_(segment_starts) { }
|
|
|
|
// __host__ __device__
|
|
// KnnResultSet(const KnnResultSet& o):foundNeighbors(o.foundNeighbors),largestHeapDist(o.largestHeapDist),k(o.k){ }
|
|
|
|
__device__
|
|
inline DistanceType
|
|
worstDist()
|
|
{
|
|
return largestHeapDist;
|
|
}
|
|
|
|
__device__
|
|
inline void
|
|
insert(int index, DistanceType dist)
|
|
{
|
|
if( dist < radius_sq_ ) {
|
|
if( foundNeighbors<k ) {
|
|
resultDist[foundNeighbors]=dist;
|
|
resultIndex[foundNeighbors]=index;
|
|
if(( foundNeighbors==k-1) && useHeap) {
|
|
if( useHeap ) {
|
|
flann::cuda::heap::make_heap(resultDist,resultIndex,k,GreaterThan<DistanceType>());
|
|
largestHeapDist=resultDist[0];
|
|
}
|
|
else {
|
|
findLargestDistIndex();
|
|
}
|
|
}
|
|
foundNeighbors++;
|
|
|
|
}
|
|
else if( dist < largestHeapDist ) {
|
|
if( useHeap ) {
|
|
resultDist[0]=dist;
|
|
resultIndex[0]=index;
|
|
flann::cuda::heap::sift_down(resultDist,resultIndex,0,k,GreaterThan<DistanceType>());
|
|
largestHeapDist=resultDist[0];
|
|
}
|
|
else {
|
|
resultDist[maxDistElem]=dist;
|
|
resultIndex[maxDistElem]=index;
|
|
findLargestDistIndex();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
__device__
|
|
void
|
|
findLargestDistIndex( )
|
|
{
|
|
largestHeapDist=resultDist[0];
|
|
maxDistElem=0;
|
|
for( int i=1; i<k; i++ )
|
|
if( resultDist[i] > largestHeapDist ) {
|
|
maxDistElem=i;
|
|
largestHeapDist=resultDist[i];
|
|
}
|
|
}
|
|
|
|
|
|
DistanceType* resultDist;
|
|
int* resultIndex;
|
|
|
|
__device__
|
|
inline void
|
|
setResultLocation( DistanceType* dists, int* index, int thread, int /*stride*/ )
|
|
{
|
|
resultDist=dists+segment_starts_[thread];
|
|
resultIndex=index+segment_starts_[thread];
|
|
}
|
|
|
|
__device__
|
|
inline void
|
|
finish()
|
|
{
|
|
if( sorted ) {
|
|
if( !useHeap ) flann::cuda::heap::make_heap(resultDist,resultIndex,k,GreaterThan<DistanceType>());
|
|
for( int i=foundNeighbors-1; i>0; i-- ) {
|
|
flann::cuda::swap( resultDist[0], resultDist[i] );
|
|
flann::cuda::swap( resultIndex[0], resultIndex[i] );
|
|
flann::cuda::heap::sift_down( resultDist,resultIndex, 0, i, GreaterThan<DistanceType>() );
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
// Difference to RadiusKnnResultSet: Works like KnnResultSet, doesn't pack the results densely (as the RadiusResultSet does)
|
|
template <typename DistanceType, bool useHeap>
|
|
struct KnnRadiusResultSet
|
|
{
|
|
int foundNeighbors;
|
|
DistanceType largestHeapDist;
|
|
int maxDistIndex;
|
|
const int k;
|
|
const bool sorted;
|
|
const DistanceType epsError;
|
|
const DistanceType radius_sq;
|
|
|
|
|
|
__device__ __host__
|
|
KnnRadiusResultSet(int knn, bool sortResults, DistanceType eps, DistanceType radius) : foundNeighbors(0),largestHeapDist(radius),k(knn), sorted(sortResults), epsError(eps),radius_sq(radius){ }
|
|
|
|
// __host__ __device__
|
|
// KnnResultSet(const KnnResultSet& o):foundNeighbors(o.foundNeighbors),largestHeapDist(o.largestHeapDist),k(o.k){ }
|
|
|
|
__device__
|
|
inline DistanceType
|
|
worstDist()
|
|
{
|
|
return largestHeapDist;
|
|
}
|
|
|
|
__device__
|
|
inline void
|
|
insert(int index, DistanceType dist)
|
|
{
|
|
if( dist < largestHeapDist ) {
|
|
if( foundNeighbors<k ) {
|
|
resultDist[foundNeighbors]=dist;
|
|
resultIndex[foundNeighbors]=index;
|
|
if( foundNeighbors==k-1 ) {
|
|
if( useHeap ) {
|
|
flann::cuda::heap::make_heap(resultDist,resultIndex,k,GreaterThan<DistanceType>());
|
|
largestHeapDist=resultDist[0];
|
|
}
|
|
else {
|
|
findLargestDistIndex();
|
|
}
|
|
}
|
|
foundNeighbors++;
|
|
}
|
|
else { //if( dist < largestHeapDist )
|
|
if( useHeap ) {
|
|
resultDist[0]=dist;
|
|
resultIndex[0]=index;
|
|
flann::cuda::heap::sift_down(resultDist,resultIndex,0,k,GreaterThan<DistanceType>());
|
|
largestHeapDist=resultDist[0];
|
|
}
|
|
else {
|
|
resultDist[maxDistIndex]=dist;
|
|
resultIndex[maxDistIndex]=index;
|
|
findLargestDistIndex();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
__device__
|
|
void
|
|
findLargestDistIndex( )
|
|
{
|
|
largestHeapDist=resultDist[0];
|
|
maxDistIndex=0;
|
|
for( int i=1; i<k; i++ )
|
|
if( resultDist[i] > largestHeapDist ) {
|
|
maxDistIndex=i;
|
|
largestHeapDist=resultDist[i];
|
|
}
|
|
}
|
|
|
|
DistanceType* resultDist;
|
|
int* resultIndex;
|
|
|
|
__device__
|
|
inline void
|
|
setResultLocation( DistanceType* dists, int* index, int thread, int stride )
|
|
{
|
|
resultDist=dists+stride*thread;
|
|
resultIndex=index+stride*thread;
|
|
for( int i=0; i<stride; i++ ) {
|
|
resultDist[i]=INFINITY;
|
|
resultIndex[i]=-1;
|
|
// resultIndex[tid+i*blockDim.x]=-1;
|
|
// resultDist[tid+i*blockDim.x]=INFINITY;
|
|
}
|
|
}
|
|
|
|
__device__
|
|
inline void
|
|
finish()
|
|
{
|
|
if( sorted ) {
|
|
if( !useHeap ) flann::cuda::heap::make_heap(resultDist,resultIndex,k,GreaterThan<DistanceType>());
|
|
for( int i=k-1; i>0; i-- ) {
|
|
flann::cuda::swap( resultDist[0], resultDist[i] );
|
|
flann::cuda::swap( resultIndex[0], resultIndex[i] );
|
|
flann::cuda::heap::sift_down( resultDist,resultIndex, 0, i, GreaterThan<DistanceType>() );
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
//! fills the radius output buffer.
|
|
//! IMPORTANT ASSERTION: ASSUMES THAT THERE IS ENOUGH SPACE FOR EVERY NEIGHBOR! IF THIS ISN'T
|
|
//! TRUE, USE KnnRadiusResultSet! (Otherwise, the neighbors of one element might overflow into the next element, or past the buffer.)
|
|
template< typename DistanceType >
|
|
struct RadiusResultSet
|
|
{
|
|
DistanceType radius_sq_;
|
|
int* segment_starts_;
|
|
int count_;
|
|
bool sorted_;
|
|
|
|
__device__ __host__
|
|
RadiusResultSet(DistanceType radius, int* segment_starts, bool sorted) : radius_sq_(radius), segment_starts_(segment_starts), count_(0), sorted_(sorted){ }
|
|
|
|
__device__
|
|
inline DistanceType
|
|
worstDist()
|
|
{
|
|
return radius_sq_;
|
|
}
|
|
|
|
__device__
|
|
inline void
|
|
insert(int index, DistanceType dist)
|
|
{
|
|
if( dist < radius_sq_ ) {
|
|
resultIndex[count_]=index;
|
|
resultDist[count_]=dist;
|
|
count_++;
|
|
}
|
|
}
|
|
|
|
int* resultIndex;
|
|
DistanceType* resultDist;
|
|
|
|
__device__
|
|
inline void
|
|
setResultLocation( DistanceType* dists, int* index, int thread, int /*stride*/ )
|
|
{
|
|
resultIndex=index+segment_starts_[thread];
|
|
resultDist=dists+segment_starts_[thread];
|
|
}
|
|
|
|
__device__
|
|
inline void
|
|
finish()
|
|
{
|
|
if( sorted_ ) {
|
|
flann::cuda::heap::make_heap( resultDist,resultIndex, count_, GreaterThan<DistanceType>() );
|
|
for( int i=count_-1; i>0; i-- ) {
|
|
flann::cuda::swap( resultDist[0], resultDist[i] );
|
|
flann::cuda::swap( resultIndex[0], resultIndex[i] );
|
|
flann::cuda::heap::sift_down( resultDist,resultIndex, 0, i, GreaterThan<DistanceType>() );
|
|
}
|
|
}
|
|
}
|
|
};
|
|
}
|
|
}
|
|
|
|
#endif
|