//////////////////////////////////////////////////////////////////////////// // File: CuTexImage.cpp // Author: Changchang Wu // Description : implementation of the CuTexImage class. // // Copyright (c) 2007 University of North Carolina at Chapel Hill // All Rights Reserved // // Permission to use, copy, modify and distribute this software and its // documentation for educational, research and non-profit purposes, without // fee, and without a written agreement is hereby granted, provided that the // above copyright notice and the following paragraph appear in all copies. // // The University of North Carolina at Chapel Hill make no representations // about the suitability of this software for any purpose. It is provided // 'as is' without express or implied warranty. // // Please send BUG REPORTS to ccwu@cs.unc.edu // //////////////////////////////////////////////////////////////////////////// #if defined(CUDA_SIFTGPU_ENABLED) #include "GL/glew.h" #include #include #include #include #include using namespace std; #include #include #include #include "GlobalUtil.h" #include "GLTexImage.h" #include "CuTexImage.h" #include "ProgramCU.h" #if CUDA_VERSION <= 2010 && defined(SIFTGPU_ENABLE_LINEAR_TEX2D) #error "Require CUDA 2.2 or higher" #endif CuTexImage::CuTexImage() { _cuData = NULL; _cuData2D = NULL; _fromPBO = 0; _numChannel = _numBytes = 0; _imgWidth = _imgHeight = _texWidth = _texHeight = 0; } CuTexImage::CuTexImage(int width, int height, int nchannel, GLuint pbo) { _cuData = NULL; //check size of pbo GLint bsize, esize = width * height * nchannel * sizeof(float); glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, pbo); glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize); if(bsize < esize) { glBufferData(GL_PIXEL_PACK_BUFFER_ARB, esize, NULL, GL_STATIC_DRAW_ARB); glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize); } glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0); if(bsize >=esize) { cudaGLRegisterBufferObject(pbo); cudaGLMapBufferObject(&_cuData, pbo); ProgramCU::CheckErrorCUDA("cudaGLMapBufferObject"); _fromPBO = pbo; }else { _cuData = NULL; _fromPBO = 0; } if(_cuData) { _numBytes = bsize; _imgWidth = width; _imgHeight = height; _numChannel = nchannel; }else { _numBytes = 0; _imgWidth = 0; _imgHeight = 0; _numChannel = 0; } _texWidth = _texHeight =0; _cuData2D = NULL; } CuTexImage::~CuTexImage() { if(_fromPBO) { cudaGLUnmapBufferObject(_fromPBO); cudaGLUnregisterBufferObject(_fromPBO); }else if(_cuData) { cudaFree(_cuData); } if(_cuData2D) cudaFreeArray(_cuData2D); } void CuTexImage::SetImageSize(int width, int height) { _imgWidth = width; _imgHeight = height; } bool CuTexImage::InitTexture(int width, int height, int nchannel) { _imgWidth = width; _imgHeight = height; _numChannel = min(max(nchannel, 1), 4); const size_t size = width * height * _numChannel * sizeof(float); if (size < 0) { return false; } // SiftGPU uses int for all indexes and // this ensures that all elements can be accessed. if (size >= INT_MAX * sizeof(float)) { return false; } if(size <= _numBytes) return true; if(_cuData) cudaFree(_cuData); //allocate the array data const cudaError_t status = cudaMalloc(&_cuData, _numBytes = size); if (status != cudaSuccess) { _cuData = NULL; _numBytes = 0; return false; } return true; } void CuTexImage::CopyFromHost(const void * buf) { if(_cuData == NULL) return; cudaMemcpy( _cuData, buf, _imgWidth * _imgHeight * _numChannel * sizeof(float), cudaMemcpyHostToDevice); } void CuTexImage::CopyToHost(void * buf) { if(_cuData == NULL) return; cudaMemcpy(buf, _cuData, _imgWidth * _imgHeight * _numChannel * sizeof(float), cudaMemcpyDeviceToHost); } void CuTexImage::CopyToHost(void * buf, int stream) { if(_cuData == NULL) return; cudaMemcpyAsync(buf, _cuData, _imgWidth * _imgHeight * _numChannel * sizeof(float), cudaMemcpyDeviceToHost, (cudaStream_t)stream); } void CuTexImage::InitTexture2D() { #if !defined(SIFTGPU_ENABLE_LINEAR_TEX2D) if(_cuData2D && (_texWidth < _imgWidth || _texHeight < _imgHeight)) { cudaFreeArray(_cuData2D); _cuData2D = NULL; } if(_cuData2D == NULL) { _texWidth = max(_texWidth, _imgWidth); _texHeight = max(_texHeight, _imgHeight); cudaChannelFormatDesc desc; desc.f = cudaChannelFormatKindFloat; desc.x = sizeof(float) * 8; desc.y = _numChannel >=2 ? sizeof(float) * 8 : 0; desc.z = _numChannel >=3 ? sizeof(float) * 8 : 0; desc.w = _numChannel >=4 ? sizeof(float) * 8 : 0; const cudaError_t status = cudaMallocArray(&_cuData2D, &desc, _texWidth, _texHeight); if (status != cudaSuccess) { _cuData = NULL; _numBytes = 0; } ProgramCU::CheckErrorCUDA("CuTexImage::InitTexture2D"); } #endif } void CuTexImage::CopyToTexture2D() { #if !defined(SIFTGPU_ENABLE_LINEAR_TEX2D) InitTexture2D(); if(_cuData2D) { cudaMemcpy2DToArray(_cuData2D, 0, 0, _cuData, _imgWidth* _numChannel* sizeof(float) , _imgWidth * _numChannel*sizeof(float), _imgHeight, cudaMemcpyDeviceToDevice); ProgramCU::CheckErrorCUDA("cudaMemcpy2DToArray"); } #endif } int CuTexImage::DebugCopyToTexture2D() { /* CuTexImage tex; float data1[2][3] = {{1, 2, 5}, {3, 4, 5}}, data2[2][5]; tex.InitTexture(3, 2, 1); cudaMemcpy(tex._cuData, data1[0], 6 * sizeof(float), cudaMemcpyHostToDevice); cudaMemcpy(data1, tex._cuData, 4 * sizeof(float) , cudaMemcpyDeviceToHost); tex._texWidth =5; tex._texHeight = 2; tex.CopyToTexture2D(); cudaMemcpyFromArray(data2[0], tex._cuData2D, 0, 0, 10 * sizeof(float), cudaMemcpyDeviceToHost);*/ return 1; } void CuTexImage::CopyFromPBO(int width, int height, GLuint pbo) { void* pbuf =NULL; GLint esize = width * height * sizeof(float); cudaGLRegisterBufferObject(pbo); cudaGLMapBufferObject(&pbuf, pbo); cudaMemcpy(_cuData, pbuf, esize, cudaMemcpyDeviceToDevice); cudaGLUnmapBufferObject(pbo); cudaGLUnregisterBufferObject(pbo); } int CuTexImage::CopyToPBO(GLuint pbo) { void* pbuf =NULL; GLint bsize, esize = _imgWidth * _imgHeight * sizeof(float) * _numChannel; glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, pbo); glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize); if(bsize < esize) { glBufferData(GL_PIXEL_PACK_BUFFER_ARB, esize*3/2, NULL, GL_STATIC_DRAW_ARB); glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize); } glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0); if(bsize >= esize) { cudaGLRegisterBufferObject(pbo); cudaGLMapBufferObject(&pbuf, pbo); cudaMemcpy(pbuf, _cuData, esize, cudaMemcpyDeviceToDevice); cudaGLUnmapBufferObject(pbo); cudaGLUnregisterBufferObject(pbo); return 1; }else { return 0; } } #endif