exercise_2/colmap-dev/lib/VLFeat/slic.c

/** @file slic.c
 ** @brief SLIC superpixels - Definition
 ** @author Andrea Vedaldi
 **/

/*
Copyright (C) 2007-12 Andrea Vedaldi and Brian Fulkerson.
All rights reserved.

This file is part of the VLFeat library and is made available under
the terms of the BSD license (see the COPYING file).
*/

/**
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~  -->
@page slic Simple Linear Iterative Clustering (SLIC)
@author Andrea Vedaldi
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~  -->

@ref slic.h implements the *Simple Linear Iterative Clustering* (SLIC)
algorithm, an image segmentation method described in @cite{achanta10slic}.

- @ref slic-overview
- @ref slic-usage
- @ref slic-tech

<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~  -->
@section slic-overview Overview
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~  -->

SLIC @cite{achanta10slic} is a simple and efficient method to decompose
an image in visually homogeneous regions. It is based on a spatially
localized version of k-means clustering. Similar to mean shift or
quick shift (@ref quickshift.h), each pixel is associated to a feature
vector

@f[
\Psi(x,y) =
\left[
\begin{array}{c}
\lambda x \\
\lambda y \\
I(x,y)
\end{array}
\right]
@f]

and then k-means clustering is run on those. As discussed below, the
coefficient @f$ \lambda @f$ balances the spatial and appearance
components of the feature vectors, imposing a degree of spatial
regularization to the extracted regions.

SLIC takes two parameters: the nominal size of the regions
(superpixels) @c regionSize and the strength of the spatial
regularization @c regularizer. The image is first divided into a grid
with step @c regionSize. The center of each grid tile is then used to
initialize a corresponding k-means (up to a small shift to avoid
image edges). Finally, the k-means centers and clusters are refined by
using the Lloyd algorithm, yielding segmenting the image. As a
further restriction and simplification, during the k-means iterations
each pixel can be assigned to only the <em>2 x 2</em> centers
corresponding to grid tiles adjacent to the pixel.

The parameter @c regularizer sets the trade-off between clustering
appearance and spatial regularization. This is obtained by setting

@f[
 \lambda = \frac{\mathtt{regularizer}}{\mathtt{regionSize}}
@f]

in the definition of the feature @f$ \psi(x,y) @f$.

After the k-means step, SLIC optionally
removes any segment whose area is smaller than a threshld @c minRegionSize
by merging them into larger ones.

<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~  -->
@section slic-usage Usage from the C library
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~  -->

To compute the SLIC superpixels of an image use the function
::vl_slic_segment.

<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~  -->
@section slic-tech Technical details
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~  -->

SLIC starts by dividing the image domain into a regular grid with @f$
M \times N @f$ tiles, where

@f[
   M = \lceil \frac{\mathtt{imageWidth}}{\mathtt{regionSize}} \rceil,
   \quad
   N = \lceil \frac{\mathtt{imageHeight}}{\mathtt{regionSize}} \rceil.
@f]

A region (superpixel or k-means cluster) is initialized from each grid
center

@f[
  x_i = \operatorname{round} i \frac{\mathtt{imageWidth}}{\mathtt{regionSize}}
  \quad
  y_j = \operatorname{round} j \frac{\mathtt{imageWidth}}{\mathtt{regionSize}}.
@f]

In order to avoid placing these centers on top of image
discontinuities, the centers are then moved in a 3 x 3
neighbourohood to minimize the edge strength

@f[
   \operatorname{edge}(x,y) =
   \| I(x+1,y) - I(x-1,y) \|_2^2 +
   \| I(x,y+1) - I(x,y-1) \|_2^2.
@f]

Then the regions are obtained by running k-means clustering, started
from the centers

@f[
  C = \{ \Psi(x_i,y_j), i=0,1,\dots,M-1\ j=0,1,\dots,N-1 \}
@f]

thus obtained. K-means uses the standard LLoyd algorithm alternating
assigning pixels to the clostest centers a re-estiamting the centers
as the average of the corresponding feature vectors of the pixel
assigned to them. The only difference compared to standard k-means is
that each pixel can be assigned only to the center originated from the
neighbour tiles. This guarantees that there are exactly four
pixel-to-center comparisons at each round of minimization, which
threfore cost @f$ O(n) @f$, where @f$ n @f$ is the number of
superpixels.

After k-means has converged, SLIC eliminates any connected region whose
area is less than @c minRegionSize pixels. This is done by greedily
merging regions to neighbour ones: the pixels @f$ p @f$ are scanned in
lexicographical order and the corresponding connected components
are visited. If a region has already been visited, it is skipped; if not,
its area is computed and if this is less than  @c minRegionSize its label
is changed to the one of a neighbour
region at @f$ p @f$ that has already been vistied (there is always one
except for the very first pixel).

*/

#include "slic.h"
#include "mathop.h"
#include <math.h>
#include <string.h>

/** @brief SLIC superpixel segmentation
 ** @param segmentation segmentation.
 ** @param image image to segment.
 ** @param width image width.
 ** @param height image height.
 ** @param numChannels number of image channels (depth).
 ** @param regionSize nominal size of the regions.
 ** @param regularization trade-off between appearance and spatial terms.
 ** @param minRegionSize minimum size of a segment.
 **
 ** The function computes the SLIC superpixels of the specified image @a image.
 ** @a image is a pointer to an @c width by @c height by @c by numChannles array of @c float.
 ** @a segmentation is a pointer to a @c width by @c height array of @c vl_uint32.
 ** @a segmentation contain the labels of each image pixels, from 0 to
 ** the number of regions minus one.
 **
 ** @sa @ref slic-overview, @ref slic-tech
 **/

void
vl_slic_segment (vl_uint32 * segmentation,
                 float const * image,
                 vl_size width,
                 vl_size height,
                 vl_size numChannels,
                 vl_size regionSize,
                 float regularization,
                 vl_size minRegionSize)
{
  vl_index i, x, y, u, v, k, region ;
  vl_uindex iter ;
  vl_size const numRegionsX = (vl_size) ceil((double) width / regionSize) ;
  vl_size const numRegionsY = (vl_size) ceil((double) height / regionSize) ;
  vl_size const numRegions = numRegionsX * numRegionsY ;
  vl_size const numPixels = width * height ;
  float * centers ;
  float * edgeMap ;
  float previousEnergy = VL_INFINITY_F ;
  float startingEnergy ;
  vl_uint32 * masses ;
  vl_size const maxNumIterations = 100 ;

  assert(segmentation) ;
  assert(image) ;
  assert(width >= 1) ;
  assert(height >= 1) ;
  assert(numChannels >= 1) ;
  assert(regionSize >= 1) ;
  assert(regularization >= 0) ;

#define atimage(x,y,k) image[(x)+(y)*width+(k)*width*height]
#define atEdgeMap(x,y) edgeMap[(x)+(y)*width]

  edgeMap = vl_calloc(numPixels, sizeof(float)) ;
  masses = vl_malloc(sizeof(vl_uint32) * numPixels) ;
  centers = vl_malloc(sizeof(float) * (2 + numChannels) * numRegions) ;

  /* compute edge map (gradient strength) */
  for (k = 0 ; k < (signed)numChannels ; ++k) {
    for (y = 1 ; y < (signed)height-1 ; ++y) {
      for (x = 1 ; x < (signed)width-1 ; ++x) {
        float a = atimage(x-1,y,k) ;
        float b = atimage(x+1,y,k) ;
        float c = atimage(x,y+1,k) ;
        float d = atimage(x,y-1,k) ;
        atEdgeMap(x,y) += (a - b)  * (a - b) + (c - d) * (c - d) ;
      }
    }
  }

  /* initialize K-means centers */
  i = 0 ;
  for (v = 0 ; v < (signed)numRegionsY ; ++v) {
    for (u = 0 ; u < (signed)numRegionsX ; ++u) {
      vl_index xp ;
      vl_index yp ;
      vl_index centerx = 0 ;
      vl_index centery = 0 ;
      float minEdgeValue = VL_INFINITY_F ;

      x = (vl_index) vl_round_d(regionSize * (u + 0.5)) ;
      y = (vl_index) vl_round_d(regionSize * (v + 0.5)) ;

      x = VL_MAX(VL_MIN(x, (signed)width-1),0) ;
      y = VL_MAX(VL_MIN(y, (signed)height-1),0) ;

      /* search in a 3x3 neighbourhood the smallest edge response */
      for (yp = VL_MAX(0, y-1) ; yp <= VL_MIN((signed)height-1, y+1) ; ++ yp) {
        for (xp = VL_MAX(0, x-1) ; xp <= VL_MIN((signed)width-1, x+1) ; ++ xp) {
          float thisEdgeValue = atEdgeMap(xp,yp) ;
          if (thisEdgeValue < minEdgeValue) {
            minEdgeValue = thisEdgeValue ;
            centerx = xp ;
            centery = yp ;
          }
        }
      }

      /* initialize the new center at this location */
      centers[i++] = (float) centerx ;
      centers[i++] = (float) centery ;
      for (k  = 0 ; k < (signed)numChannels ; ++k) {
        centers[i++] = atimage(centerx,centery,k) ;
      }
    }
  }

  /* run k-means iterations */
  for (iter = 0 ; iter < maxNumIterations ; ++iter) {
    float factor = regularization / (regionSize * regionSize) ;
    float energy = 0 ;

    /* assign pixels to centers */
    for (y = 0 ; y < (signed)height ; ++y) {
      for (x = 0 ; x < (signed)width ; ++x) {
        vl_index u = floor((double)x / regionSize - 0.5) ;
        vl_index v = floor((double)y / regionSize - 0.5) ;
        vl_index up, vp ;
        float minDistance = VL_INFINITY_F ;

        for (vp = VL_MAX(0, v) ; vp <= VL_MIN((signed)numRegionsY-1, v+1) ; ++vp) {
          for (up = VL_MAX(0, u) ; up <= VL_MIN((signed)numRegionsX-1, u+1) ; ++up) {
            vl_index region = up  + vp * numRegionsX ;
            float centerx = centers[(2 + numChannels) * region + 0]  ;
            float centery = centers[(2 + numChannels) * region + 1] ;
            float spatial = (x - centerx) * (x - centerx) + (y - centery) * (y - centery) ;
            float appearance = 0 ;
            float distance ;
            for (k = 0 ; k < (signed)numChannels ; ++k) {
              float centerz = centers[(2 + numChannels) * region + k + 2]  ;
              float z = atimage(x,y,k) ;
              appearance += (z - centerz) * (z - centerz) ;
            }
            distance = appearance + factor * spatial ;
            if (minDistance > distance) {
              minDistance = distance ;
              segmentation[x + y * width] = (vl_uint32)region ;
            }
          }
        }
        energy += minDistance ;
      }
    }

    /*
     VL_PRINTF("vl:slic: iter %d: energy: %g\n", iter, energy) ;
    */

    /* check energy termination conditions */
    if (iter == 0) {
      startingEnergy = energy ;
    } else {
      if ((previousEnergy - energy) < 1e-5 * (startingEnergy - energy)) {
        break ;
      }
    }
    previousEnergy = energy ;

    /* recompute centers */
    memset(masses, 0, sizeof(vl_uint32) * width * height) ;
    memset(centers, 0, sizeof(float) * (2 + numChannels) * numRegions) ;

    for (y = 0 ; y < (signed)height ; ++y) {
      for (x = 0 ; x < (signed)width ; ++x) {
        vl_index pixel = x + y * width ;
        vl_index region = segmentation[pixel] ;
        masses[region] ++ ;
        centers[region * (2 + numChannels) + 0] += x ;
        centers[region * (2 + numChannels) + 1] += y ;
        for (k = 0 ; k < (signed)numChannels ; ++k) {
          centers[region * (2 + numChannels) + k + 2] += atimage(x,y,k) ;
        }
      }
    }

    for (region = 0 ; region < (signed)numRegions ; ++region) {
      float mass = VL_MAX(masses[region], 1e-8) ;
      for (i = (2 + numChannels) * region ;
           i < (signed)(2 + numChannels) * (region + 1) ;
           ++i) {
        centers[i] /= mass ;
      }
    }
  }

  vl_free(masses) ;
  vl_free(centers) ;
  vl_free(edgeMap) ;

  /* elimiate small regions */
  {
    vl_uint32 * cleaned = vl_calloc(numPixels, sizeof(vl_uint32)) ;
    vl_uindex * segment = vl_malloc(sizeof(vl_uindex) * numPixels) ;
    vl_size segmentSize ;
    vl_uint32 label ;
    vl_uint32 cleanedLabel ;
    vl_size numExpanded ;
    vl_index const dx [] = {+1, -1,  0,  0} ;
    vl_index const dy [] = { 0,  0, +1, -1} ;
    vl_index direction ;
    vl_index pixel ;

    for (pixel = 0 ; pixel < (signed)numPixels ; ++pixel) {
      if (cleaned[pixel]) continue ;
      label = segmentation[pixel] ;
      numExpanded = 0 ;
      segmentSize = 0 ;
      segment[segmentSize++] = pixel ;

      /*
       find cleanedLabel as the label of an already cleaned
       region neihbour of this pixel
       */
      cleanedLabel = label + 1 ;
      cleaned[pixel] = label + 1 ;
      x = pixel % width ;
      y = pixel / width ;
      for (direction = 0 ; direction < 4 ; ++direction) {
        vl_index xp = x + dx[direction] ;
        vl_index yp = y + dy[direction] ;
        vl_index neighbor = xp + yp * width ;
        if (0 <= xp && xp < (signed)width &&
            0 <= yp && yp < (signed)height &&
            cleaned[neighbor]) {
          cleanedLabel = cleaned[neighbor] ;
        }
      }

      /* expand the segment */
      while (numExpanded < segmentSize) {
        vl_index open = segment[numExpanded++] ;
        x = open % width ;
        y = open / width ;
        for (direction = 0 ; direction < 4 ; ++direction) {
          vl_index xp = x + dx[direction] ;
          vl_index yp = y + dy[direction] ;
          vl_index neighbor = xp + yp * width ;
          if (0 <= xp && xp < (signed)width &&
              0 <= yp && yp < (signed)height &&
              cleaned[neighbor] == 0 &&
              segmentation[neighbor] == label) {
            cleaned[neighbor] = label + 1 ;
            segment[segmentSize++] = neighbor ;
          }
        }
      }

      /* change label to cleanedLabel if the semgent is too small */
      if (segmentSize < minRegionSize) {
        while (segmentSize > 0) {
          cleaned[segment[--segmentSize]] = cleanedLabel ;
        }
      }
    }
    /* restore base 0 indexing of the regions */
    for (pixel = 0 ; pixel < (signed)numPixels ; ++pixel) cleaned[pixel] -- ;

    memcpy(segmentation, cleaned, numPixels * sizeof(vl_uint32)) ;
    vl_free(cleaned) ;
    vl_free(segment) ;
  }
}