You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
279 lines
6.7 KiB
279 lines
6.7 KiB
////////////////////////////////////////////////////////////////////////////
|
|
// File: CuTexImage.cpp
|
|
// Author: Changchang Wu
|
|
// Description : implementation of the CuTexImage class.
|
|
//
|
|
// Copyright (c) 2007 University of North Carolina at Chapel Hill
|
|
// All Rights Reserved
|
|
//
|
|
// Permission to use, copy, modify and distribute this software and its
|
|
// documentation for educational, research and non-profit purposes, without
|
|
// fee, and without a written agreement is hereby granted, provided that the
|
|
// above copyright notice and the following paragraph appear in all copies.
|
|
//
|
|
// The University of North Carolina at Chapel Hill make no representations
|
|
// about the suitability of this software for any purpose. It is provided
|
|
// 'as is' without express or implied warranty.
|
|
//
|
|
// Please send BUG REPORTS to ccwu@cs.unc.edu
|
|
//
|
|
////////////////////////////////////////////////////////////////////////////
|
|
|
|
#if defined(CUDA_SIFTGPU_ENABLED)
|
|
|
|
#include "GL/glew.h"
|
|
#include <iostream>
|
|
#include <vector>
|
|
#include <algorithm>
|
|
#include <stdlib.h>
|
|
#include <math.h>
|
|
using namespace std;
|
|
|
|
|
|
#include <cuda.h>
|
|
#include <cuda_runtime_api.h>
|
|
#include <cuda_gl_interop.h>
|
|
|
|
#include "GlobalUtil.h"
|
|
#include "GLTexImage.h"
|
|
#include "CuTexImage.h"
|
|
#include "ProgramCU.h"
|
|
|
|
#if CUDA_VERSION <= 2010 && defined(SIFTGPU_ENABLE_LINEAR_TEX2D)
|
|
#error "Require CUDA 2.2 or higher"
|
|
#endif
|
|
|
|
|
|
CuTexImage::CuTexImage()
|
|
{
|
|
_cuData = NULL;
|
|
_cuData2D = NULL;
|
|
_fromPBO = 0;
|
|
_numChannel = _numBytes = 0;
|
|
_imgWidth = _imgHeight = _texWidth = _texHeight = 0;
|
|
}
|
|
|
|
CuTexImage::CuTexImage(int width, int height, int nchannel, GLuint pbo)
|
|
{
|
|
_cuData = NULL;
|
|
|
|
//check size of pbo
|
|
GLint bsize, esize = width * height * nchannel * sizeof(float);
|
|
glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, pbo);
|
|
glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize);
|
|
if(bsize < esize)
|
|
{
|
|
glBufferData(GL_PIXEL_PACK_BUFFER_ARB, esize, NULL, GL_STATIC_DRAW_ARB);
|
|
glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize);
|
|
}
|
|
glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0);
|
|
if(bsize >=esize)
|
|
{
|
|
|
|
cudaGLRegisterBufferObject(pbo);
|
|
cudaGLMapBufferObject(&_cuData, pbo);
|
|
ProgramCU::CheckErrorCUDA("cudaGLMapBufferObject");
|
|
_fromPBO = pbo;
|
|
}else
|
|
{
|
|
_cuData = NULL;
|
|
_fromPBO = 0;
|
|
}
|
|
if(_cuData)
|
|
{
|
|
_numBytes = bsize;
|
|
_imgWidth = width;
|
|
_imgHeight = height;
|
|
_numChannel = nchannel;
|
|
}else
|
|
{
|
|
_numBytes = 0;
|
|
_imgWidth = 0;
|
|
_imgHeight = 0;
|
|
_numChannel = 0;
|
|
}
|
|
|
|
_texWidth = _texHeight =0;
|
|
|
|
_cuData2D = NULL;
|
|
}
|
|
|
|
CuTexImage::~CuTexImage()
|
|
{
|
|
|
|
|
|
if(_fromPBO)
|
|
{
|
|
cudaGLUnmapBufferObject(_fromPBO);
|
|
cudaGLUnregisterBufferObject(_fromPBO);
|
|
}else if(_cuData)
|
|
{
|
|
cudaFree(_cuData);
|
|
}
|
|
if(_cuData2D) cudaFreeArray(_cuData2D);
|
|
}
|
|
|
|
void CuTexImage::SetImageSize(int width, int height)
|
|
{
|
|
_imgWidth = width;
|
|
_imgHeight = height;
|
|
}
|
|
|
|
bool CuTexImage::InitTexture(int width, int height, int nchannel)
|
|
{
|
|
_imgWidth = width;
|
|
_imgHeight = height;
|
|
_numChannel = min(max(nchannel, 1), 4);
|
|
|
|
const size_t size = width * height * _numChannel * sizeof(float);
|
|
|
|
if (size < 0) {
|
|
return false;
|
|
}
|
|
|
|
// SiftGPU uses int for all indexes and
|
|
// this ensures that all elements can be accessed.
|
|
if (size >= INT_MAX * sizeof(float)) {
|
|
return false;
|
|
}
|
|
|
|
if(size <= _numBytes) return true;
|
|
|
|
if(_cuData) cudaFree(_cuData);
|
|
|
|
//allocate the array data
|
|
const cudaError_t status = cudaMalloc(&_cuData, _numBytes = size);
|
|
|
|
if (status != cudaSuccess) {
|
|
_cuData = NULL;
|
|
_numBytes = 0;
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void CuTexImage::CopyFromHost(const void * buf)
|
|
{
|
|
if(_cuData == NULL) return;
|
|
cudaMemcpy( _cuData, buf, _imgWidth * _imgHeight * _numChannel * sizeof(float), cudaMemcpyHostToDevice);
|
|
}
|
|
|
|
void CuTexImage::CopyToHost(void * buf)
|
|
{
|
|
if(_cuData == NULL) return;
|
|
cudaMemcpy(buf, _cuData, _imgWidth * _imgHeight * _numChannel * sizeof(float), cudaMemcpyDeviceToHost);
|
|
}
|
|
|
|
void CuTexImage::CopyToHost(void * buf, int stream)
|
|
{
|
|
if(_cuData == NULL) return;
|
|
cudaMemcpyAsync(buf, _cuData, _imgWidth * _imgHeight * _numChannel * sizeof(float), cudaMemcpyDeviceToHost, (cudaStream_t)stream);
|
|
}
|
|
|
|
void CuTexImage::InitTexture2D()
|
|
{
|
|
#if !defined(SIFTGPU_ENABLE_LINEAR_TEX2D)
|
|
if(_cuData2D && (_texWidth < _imgWidth || _texHeight < _imgHeight))
|
|
{
|
|
cudaFreeArray(_cuData2D);
|
|
_cuData2D = NULL;
|
|
}
|
|
|
|
if(_cuData2D == NULL)
|
|
{
|
|
_texWidth = max(_texWidth, _imgWidth);
|
|
_texHeight = max(_texHeight, _imgHeight);
|
|
cudaChannelFormatDesc desc;
|
|
desc.f = cudaChannelFormatKindFloat;
|
|
desc.x = sizeof(float) * 8;
|
|
desc.y = _numChannel >=2 ? sizeof(float) * 8 : 0;
|
|
desc.z = _numChannel >=3 ? sizeof(float) * 8 : 0;
|
|
desc.w = _numChannel >=4 ? sizeof(float) * 8 : 0;
|
|
const cudaError_t status = cudaMallocArray(&_cuData2D, &desc, _texWidth, _texHeight);
|
|
|
|
if (status != cudaSuccess) {
|
|
_cuData = NULL;
|
|
_numBytes = 0;
|
|
}
|
|
|
|
ProgramCU::CheckErrorCUDA("CuTexImage::InitTexture2D");
|
|
}
|
|
#endif
|
|
}
|
|
|
|
void CuTexImage::CopyToTexture2D()
|
|
{
|
|
#if !defined(SIFTGPU_ENABLE_LINEAR_TEX2D)
|
|
InitTexture2D();
|
|
|
|
if(_cuData2D)
|
|
{
|
|
cudaMemcpy2DToArray(_cuData2D, 0, 0, _cuData, _imgWidth* _numChannel* sizeof(float) ,
|
|
_imgWidth * _numChannel*sizeof(float), _imgHeight, cudaMemcpyDeviceToDevice);
|
|
ProgramCU::CheckErrorCUDA("cudaMemcpy2DToArray");
|
|
}
|
|
#endif
|
|
|
|
}
|
|
|
|
int CuTexImage::DebugCopyToTexture2D()
|
|
{
|
|
|
|
/* CuTexImage tex;
|
|
float data1[2][3] = {{1, 2, 5}, {3, 4, 5}}, data2[2][5];
|
|
tex.InitTexture(3, 2, 1);
|
|
cudaMemcpy(tex._cuData, data1[0], 6 * sizeof(float), cudaMemcpyHostToDevice);
|
|
cudaMemcpy(data1, tex._cuData, 4 * sizeof(float) , cudaMemcpyDeviceToHost);
|
|
tex._texWidth =5; tex._texHeight = 2;
|
|
tex.CopyToTexture2D();
|
|
cudaMemcpyFromArray(data2[0], tex._cuData2D, 0, 0, 10 * sizeof(float), cudaMemcpyDeviceToHost);*/
|
|
|
|
return 1;
|
|
}
|
|
|
|
|
|
|
|
void CuTexImage::CopyFromPBO(int width, int height, GLuint pbo)
|
|
{
|
|
void* pbuf =NULL;
|
|
GLint esize = width * height * sizeof(float);
|
|
cudaGLRegisterBufferObject(pbo);
|
|
cudaGLMapBufferObject(&pbuf, pbo);
|
|
|
|
cudaMemcpy(_cuData, pbuf, esize, cudaMemcpyDeviceToDevice);
|
|
|
|
cudaGLUnmapBufferObject(pbo);
|
|
cudaGLUnregisterBufferObject(pbo);
|
|
}
|
|
|
|
int CuTexImage::CopyToPBO(GLuint pbo)
|
|
{
|
|
void* pbuf =NULL;
|
|
GLint bsize, esize = _imgWidth * _imgHeight * sizeof(float) * _numChannel;
|
|
glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, pbo);
|
|
glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize);
|
|
if(bsize < esize)
|
|
{
|
|
glBufferData(GL_PIXEL_PACK_BUFFER_ARB, esize*3/2, NULL, GL_STATIC_DRAW_ARB);
|
|
glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize);
|
|
}
|
|
glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0);
|
|
|
|
if(bsize >= esize)
|
|
{
|
|
cudaGLRegisterBufferObject(pbo);
|
|
cudaGLMapBufferObject(&pbuf, pbo);
|
|
cudaMemcpy(pbuf, _cuData, esize, cudaMemcpyDeviceToDevice);
|
|
cudaGLUnmapBufferObject(pbo);
|
|
cudaGLUnregisterBufferObject(pbo);
|
|
return 1;
|
|
}else
|
|
{
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|