//////////////////////////////////////////////////////////////////////////// // File: SparseBundleCU.cpp // Author: Changchang Wu // Description : implementation of the CUDA-based multicore bundle adjustment // // Copyright (c) 2011 Changchang Wu (ccwu@cs.washington.edu) // and the University of Washington at Seattle // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public // License as published by the Free Software Foundation; either // Version 3 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details. // //////////////////////////////////////////////////////////////////////////////// #include #include #include #include #include #include using std::vector; using std::cout; using std::pair; using std::ofstream; #include #include #include #include "pba.h" #include "SparseBundleCU.h" #include "ProgramCU.h" using namespace pba::ProgramCU; #ifdef _WIN32 #define finite _finite #endif namespace pba { typedef float float_t; // data type for host computation; double doesn't make // much difference #define CHECK_VEC(v1, v2) \ for (size_t j = 0; j < v1.size(); ++j) { \ if (v1[j] != v2[j]) { \ different++; \ std::cout << i << ' ' << j << ' ' << v1[j] << ' ' << v2[j] << '\n'; \ } \ } #define DEBUG_FUNCN(v, func, input, N) \ if (__debug_pba && v.IsValid()) { \ vector buf(v.GetLength()), buf_(v.GetLength()); \ for (int i = 0; i < N; ++i) { \ int different = 0; \ func input; \ ProgramCU::FinishWorkCUDA(); \ if (i > 0) { \ v.CopyToHost(&buf_[0]); \ CHECK_VEC(buf, buf_); \ } else { \ v.CopyToHost(&buf[0]); \ } \ if (different != 0) \ std::cout << #func << " : " << i << " : " << different << '\n'; \ } \ } #define DEBUG_FUNC(v, func, input) DEBUG_FUNCN(v, func, input, 2) SparseBundleCU::SparseBundleCU(int device) : ParallelBA(PBA_INVALID_DEVICE), _num_camera(0), _num_point(0), _num_imgpt(0), _num_imgpt_q(0), _camera_data(NULL), _point_data(NULL), _imgpt_data(NULL), _camera_idx(NULL), _point_idx(NULL), _projection_sse(0) { __selected_device = device; } size_t SparseBundleCU::GetMemCapacity() { if (__selected_device != __current_device) SetCudaDevice(__selected_device); size_t sz = ProgramCU::GetCudaMemoryCap(); if (sz < 1024) std::cout << "ERROR: CUDA is unlikely to be supported!\n"; return sz < 1024 ? 0 : sz; } void SparseBundleCU::SetCameraData(size_t ncam, CameraT* cams) { if (sizeof(CameraT) != 16 * sizeof(float)) exit(0); // never gonna happen...? _num_camera = (int)ncam; _camera_data = cams; _focal_mask = NULL; } void SparseBundleCU::SetFocalMask(const int* fmask, float weight) { _focal_mask = fmask; _weight_q = weight; } void SparseBundleCU::SetPointData(size_t npoint, Point3D* pts) { _num_point = (int)npoint; _point_data = (float*)pts; } void SparseBundleCU::SetProjection(size_t nproj, const Point2D* imgpts, const int* point_idx, const int* cam_idx) { _num_imgpt = (int)nproj; _imgpt_data = (float*)imgpts; _camera_idx = cam_idx; _point_idx = point_idx; _imgpt_datax.resize(0); } float SparseBundleCU::GetMeanSquaredError() { return float(_projection_sse / (_num_imgpt * __focal_scaling * __focal_scaling)); } void SparseBundleCU::BundleAdjustment() { if (ValidateInputData() != STATUS_SUCCESS) return; // //////////////////////// TimerBA timer(this, TIMER_OVERALL); NormalizeData(); if (InitializeBundle() != STATUS_SUCCESS) { // failed to allocate gpu storage } else if (__profile_pba) { // profiling some stuff RunProfileSteps(); } else { // real optimization AdjustBundleAdjsutmentMode(); NonlinearOptimizeLM(); TransferDataToHost(); } DenormalizeData(); } int SparseBundleCU::RunBundleAdjustment() { if (__warmup_device) WarmupDevice(); ResetBundleStatistics(); BundleAdjustment(); if (__num_lm_success > 0) SaveBundleStatistics(_num_camera, _num_point, _num_imgpt); if (__num_lm_success > 0) PrintBundleStatistics(); ResetTemporarySetting(); return __num_lm_success; } bool SparseBundleCU::InitializeBundleGPU() { bool previous_allocated = __memory_usage > 0; bool success = TransferDataToGPU() && InitializeStorageForCG(); if (!success && previous_allocated) { if (__verbose_level) std::cout << "WARNING: try clean allocation\n"; ClearPreviousError(); ReleaseAllocatedData(); success = TransferDataToGPU() && InitializeStorageForCG(); } if (!success && __jc_store_original) { if (__verbose_level) std::cout << "WARNING: try not storing original JC\n"; __jc_store_original = false; ClearPreviousError(); ReleaseAllocatedData(); success = TransferDataToGPU() && InitializeStorageForCG(); } if (!success && __jc_store_transpose) { if (__verbose_level) std::cout << "WARNING: try not storing transpose JC\n"; __jc_store_transpose = false; ClearPreviousError(); ReleaseAllocatedData(); success = TransferDataToGPU() && InitializeStorageForCG(); } if (!success && !__no_jacobian_store) { if (__verbose_level) std::cout << "WARNING: switch to memory saving mode\n"; __no_jacobian_store = true; ClearPreviousError(); ReleaseAllocatedData(); success = TransferDataToGPU() && InitializeStorageForCG(); } return success; } int SparseBundleCU::ValidateInputData() { if (_camera_data == NULL) return STATUS_CAMERA_MISSING; if (_point_data == NULL) return STATUS_POINT_MISSING; if (_imgpt_data == NULL) return STATUS_MEASURMENT_MISSING; if (_camera_idx == NULL || _point_idx == NULL) return STATUS_PROJECTION_MISSING; return STATUS_SUCCESS; } void SparseBundleCU::WarmupDevice() { std::cout << "Warm up device with storage allocation...\n"; if (__selected_device != __current_device) SetCudaDevice(__selected_device); CheckRequiredMemX(); InitializeBundleGPU(); } int SparseBundleCU::InitializeBundle() { ///////////////////////////////////////////////////// TimerBA timer(this, TIMER_GPU_ALLOCATION); if (__selected_device != __current_device) SetCudaDevice(__selected_device); CheckRequiredMemX(); ReserveStorageAuto(); if (!InitializeBundleGPU()) return STATUS_ALLOCATION_FAIL; return STATUS_SUCCESS; } int SparseBundleCU::GetParameterLength() { return _num_camera * 8 + 4 * _num_point; } bool SparseBundleCU::CheckRequiredMemX() { if (CheckRequiredMem(0)) return true; if (__jc_store_original) { if (__verbose_level) std::cout << "NOTE: not storing original JC\n"; __jc_store_original = false; if (CheckRequiredMem(1)) return true; } if (__jc_store_transpose) { if (__verbose_level) std::cout << "NOTE: not storing camera Jacobian\n"; __jc_store_transpose = false; if (CheckRequiredMem(1)) return true; } if (!__no_jacobian_store) { if (__verbose_level) std::cout << "NOTE: not storing any Jacobian\n"; __no_jacobian_store = true; if (CheckRequiredMem(1)) return true; } return false; } bool SparseBundleCU::CheckRequiredMem(int fresh) { int m = _num_camera, n = _num_point, k = _num_imgpt; #ifdef PBA_CUDA_ALLOCATE_MORE if (!fresh) { int m0 = _cuCameraData.GetReservedWidth(); m = std::max(m, m0); int n0 = _cuPointData.GetReservedWidth(); n = std::max(n, n0); int k0 = _cuMeasurements.GetReservedWidth(); k = std::max(k, k0); } #endif int p = 8 * m + 4 * n, q = _num_imgpt_q; size_t szn, total = GetCudaMemoryCap(); size_t sz0 = 800 * 600 * 2 * 4 * sizeof(float); // size_t szq = q > 0 ? (sizeof(float) * (q + m) * 4) : 0; size_t sz = sizeof(float) * (258 + 9 * n + 33 * m + 7 * k) + sz0; /////////////////////////////////// CG sz += p * 6 * sizeof(float); sz += ((__use_radial_distortion ? 64 : 56) * m + 12 * n) * sizeof(float); sz += (2 * (k + q) * sizeof(float)); if (sz > total) return false; ///////////////////////////////////// szn = (__no_jacobian_store ? 0 : (sizeof(float) * 8 * k)); if (sz + szn > total) __no_jacobian_store = false; else sz += szn; ///////////////////////////// szn = ((!__no_jacobian_store && __jc_store_transpose) ? 16 * k * sizeof(float) : 0); if (sz + szn > total) __jc_store_transpose = false; else sz += szn; /////////////////////////// szn = ((!__no_jacobian_store && __jc_store_original) ? 16 * k * sizeof(float) : 0); if (sz + szn > total) __jc_store_original = false; else sz += szn; /////////////////////////////// szn = ((!__no_jacobian_store && __jc_store_transpose && !__jc_store_original) ? k * sizeof(int) : 0); if (sz + szn > total) { __jc_store_transpose = false; sz -= (16 * k * sizeof(float)); } else sz += szn; return sz <= total; } void SparseBundleCU::ReserveStorage(size_t ncam, size_t npt, size_t nproj) { if (ncam <= 1 || npt <= 1 || nproj <= 1) { ReleaseAllocatedData(); // Reset the memory strategy to the default. __jc_store_transpose = true; __jc_store_original = true; __no_jacobian_store = false; } else { const int* camidx = _camera_idx; const int* ptidx = _point_idx; int ncam_ = _num_camera; int npt_ = _num_point; int nproj_ = _num_imgpt; #ifdef PBA_CUDA_ALLOCATE_MORE size_t ncam_reserved = _cuCameraData.GetReservedWidth(); size_t npt_reserved = _cuPointData.GetReservedWidth(); size_t nproj_reserved = _cuMeasurements.GetReservedWidth(); ncam = std::max(ncam, ncam_reserved); npt = std::max(npt, npt_reserved); nproj = std::max(nproj, nproj_reserved); #endif _camera_idx = NULL; _point_idx = NULL; _num_camera = (int)ncam; _num_point = (int)npt; _num_imgpt = (int)nproj; if (__verbose_level) std::cout << "Reserving storage for ncam = " << ncam << "; npt = " << npt << "; nproj = " << nproj << '\n'; InitializeBundleGPU(); _num_camera = ncam_; _num_point = npt_; _num_imgpt = nproj_; _camera_idx = camidx; _point_idx = ptidx; } } static size_t upgrade_dimension(size_t sz) { size_t x = 1; while (x < sz) x <<= 1; return x; } void SparseBundleCU::ReserveStorageAuto() { if (_cuCameraData.data() == NULL || _cuPointData.data() == NULL || _cuMeasurements.data() == NULL) return; ReserveStorage(upgrade_dimension(_num_camera), upgrade_dimension(_num_point), upgrade_dimension(_num_imgpt)); } #define REPORT_ALLOCATION(NAME) \ if (__verbose_allocation && NAME.GetDataSize() > 1024) \ std::cout << (NAME.GetDataSize() > 1024 * 1024 \ ? NAME.GetDataSize() / 1024 / 1024 \ : NAME.GetDataSize() / 1024) \ << (NAME.GetDataSize() > 1024 * 1024 ? "MB" : "KB") \ << "\t allocated for " #NAME "\n"; #define ASSERT_ALLOCATION(NAME) \ if (!success) { \ std::cerr << "WARNING: failed to allocate " \ << (__verbose_allocation ? #NAME "; size = " : "") \ << (total_sz / 1024 / 1024) << "MB + " \ << (NAME.GetRequiredSize() / 1024 / 1024) << "MB\n"; \ return false; \ } else { \ total_sz += NAME.GetDataSize(); \ REPORT_ALLOCATION(NAME); \ } #define CHECK_ALLOCATION(NAME) \ if (NAME.GetDataSize() == 0 && NAME.GetRequiredSize() > 0) { \ ClearPreviousError(); \ std::cerr << "WARNING: unable to allocate " #NAME ": " \ << (NAME.GetRequiredSize() / 1024 / 1024) << "MB\n"; \ } else { \ total_sz += NAME.GetDataSize(); \ REPORT_ALLOCATION(NAME); \ } #define ALLOCATE_REQUIRED_DATA(NAME, num, channels) \ { \ success &= NAME.InitTexture(num, 1, channels); \ ASSERT_ALLOCATION(NAME); \ } #define ALLOCATE_OPTIONAL_DATA(NAME, num, channels, option) \ if (option) { \ option = NAME.InitTexture(num, 1, channels); \ CHECK_ALLOCATION(NAME); \ } else { \ NAME.InitTexture(0, 0, 0); \ } bool SparseBundleCU::TransferDataToGPU() { // given m camera, npoint, k measurements.. the number of float is bool success = true; size_t total_sz = 0; ///////////////////////////////////////////////////////////////////////////// vector qmap, qlist; vector qmapw, qlistw; ProcessIndexCameraQ(qmap, qlist); ////////////////////////////////////////////////////////////////////////////// ALLOCATE_REQUIRED_DATA(_cuBufferData, 256, 1); // 256 ALLOCATE_REQUIRED_DATA(_cuPointData, _num_point, 4); // 4n ALLOCATE_REQUIRED_DATA(_cuCameraData, _num_camera, 16); // 16m ALLOCATE_REQUIRED_DATA(_cuCameraDataEX, _num_camera, 16); // 16m //////////////////////////////////////////////////////////////// ALLOCATE_REQUIRED_DATA(_cuCameraMeasurementMap, _num_camera + 1, 1); // m ALLOCATE_REQUIRED_DATA(_cuCameraMeasurementList, _num_imgpt, 1); // k ALLOCATE_REQUIRED_DATA(_cuPointMeasurementMap, _num_point + 1, 1); // n ALLOCATE_REQUIRED_DATA(_cuProjectionMap, _num_imgpt, 2); // 2k ALLOCATE_REQUIRED_DATA(_cuImageProj, _num_imgpt + _num_imgpt_q, 2); // 2k ALLOCATE_REQUIRED_DATA(_cuPointDataEX, _num_point, 4); // 4n ALLOCATE_REQUIRED_DATA(_cuMeasurements, _num_imgpt, 2); // 2k // ALLOCATE_REQUIRED_DATA(_cuCameraQMap, _num_imgpt_q, 2); ALLOCATE_REQUIRED_DATA(_cuCameraQMapW, _num_imgpt_q, 2); ALLOCATE_REQUIRED_DATA(_cuCameraQList, (_num_imgpt_q > 0 ? _num_camera : 0), 2); ALLOCATE_REQUIRED_DATA(_cuCameraQListW, (_num_imgpt_q > 0 ? _num_camera : 0), 2); if (__no_jacobian_store) { _cuJacobianCamera.ReleaseData(); _cuJacobianCameraT.ReleaseData(); _cuJacobianPoint.ReleaseData(); _cuCameraMeasurementListT.ReleaseData(); } else { ALLOCATE_REQUIRED_DATA(_cuJacobianPoint, _num_imgpt * 2, 4); // 8k ALLOCATE_OPTIONAL_DATA(_cuJacobianCameraT, _num_imgpt * 2, 8, __jc_store_transpose); // 16k ALLOCATE_OPTIONAL_DATA(_cuJacobianCamera, _num_imgpt * 2, 8, __jc_store_original); // 16k if ((!__jc_store_original || __profile_pba) && __jc_store_transpose) { ALLOCATE_OPTIONAL_DATA(_cuCameraMeasurementListT, _num_imgpt, 1, __jc_store_transpose); // k if (!__jc_store_transpose) _cuJacobianCameraT.ReleaseData(); } else { _cuCameraMeasurementListT.ReleaseData(); } } ///////////////////////////////////////////////// if (_camera_idx && _point_idx) { ////////////////////////////////////////// BundleTimerSwap(TIMER_PREPROCESSING, TIMER_GPU_ALLOCATION); ////mapping from camera to measuremnts vector cpi(_num_camera + 1), cpidx(_num_imgpt); vector cpnum(_num_camera, 0); cpi[0] = 0; for (int i = 0; i < _num_imgpt; ++i) cpnum[_camera_idx[i]]++; for (int i = 1; i <= _num_camera; ++i) cpi[i] = cpi[i - 1] + cpnum[i - 1]; vector cptidx = cpi; for (int i = 0; i < _num_imgpt; ++i) cpidx[cptidx[_camera_idx[i]]++] = i; if (_num_imgpt_q > 0) ProcessWeightCameraQ(cpnum, qmap, qmapw, qlistw); BundleTimerSwap(TIMER_PREPROCESSING, TIMER_GPU_ALLOCATION); /////////////////////////////////////////////////////////////////////////////// BundleTimerSwap(TIMER_GPU_UPLOAD, TIMER_GPU_ALLOCATION); _cuMeasurements.CopyFromHost(_imgpt_datax.size() > 0 ? &_imgpt_datax[0] : _imgpt_data); _cuCameraData.CopyFromHost(_camera_data); _cuPointData.CopyFromHost(_point_data); _cuCameraMeasurementMap.CopyFromHost(&cpi[0]); _cuCameraMeasurementList.CopyFromHost(&cpidx[0]); if (_cuCameraMeasurementListT.IsValid()) { vector ridx(_num_imgpt); for (int i = 0; i < _num_imgpt; ++i) ridx[cpidx[i]] = i; _cuCameraMeasurementListT.CopyFromHost(&ridx[0]); } if (_num_imgpt_q > 0) { _cuCameraQMap.CopyFromHost(&qmap[0]); _cuCameraQMapW.CopyFromHost(&qmapw[0]); _cuCameraQList.CopyFromHost(&qlist[0]); _cuCameraQListW.CopyFromHost(&qlistw[0]); } BundleTimerSwap(TIMER_GPU_UPLOAD, TIMER_GPU_ALLOCATION); //////////////////////////////////////////// ///////mapping from point to measurment BundleTimerSwap(TIMER_PREPROCESSING, TIMER_GPU_ALLOCATION); vector ppi(_num_point + 1); for (int i = 0, last_point = -1; i < _num_imgpt; ++i) { int pt = _point_idx[i]; while (last_point < pt) ppi[++last_point] = i; } ppi[_num_point] = _num_imgpt; //////////projection map vector projection_map(_num_imgpt * 2); for (int i = 0; i < _num_imgpt; ++i) { int* imp = &projection_map[i * 2]; imp[0] = _camera_idx[i] * 2; imp[1] = _point_idx[i]; } BundleTimerSwap(TIMER_PREPROCESSING, TIMER_GPU_ALLOCATION); ////////////////////////////////////////////////////////////// BundleTimerSwap(TIMER_GPU_UPLOAD, TIMER_GPU_ALLOCATION); _cuPointMeasurementMap.CopyFromHost(&ppi[0]); _cuProjectionMap.CopyFromHost(&projection_map[0]); BundleTimerSwap(TIMER_GPU_UPLOAD, TIMER_GPU_ALLOCATION); } __memory_usage = total_sz; if (__verbose_level > 1) std::cout << "Memory for Motion/Structure/Jacobian:\t" << (total_sz / 1024 / 1024) << "MB\n"; return success; } bool SparseBundleCU::ProcessIndexCameraQ(vector& qmap, vector& qlist) { // reset q-data qmap.resize(0); qlist.resize(0); _num_imgpt_q = 0; // verify input if (_camera_idx == NULL) return true; if (_point_idx == NULL) return true; if (_focal_mask == NULL) return true; if (_num_camera == 0) return true; if (_weight_q <= 0) return true; /////////////////////////////////////// int error = 0; vector temp(_num_camera * 2, -1); for (int i = 0; i < _num_camera; ++i) { int iq = _focal_mask[i]; if (iq > i) { error = 1; break; } if (iq < 0) continue; if (iq == i) continue; int ip = temp[2 * iq]; // float ratio = _camera_data[i].f / _camera_data[iq].f; // if(ratio < 0.01 || ratio > 100) //{ // std::cout << "Warning: constaraints on largely different camreas\n"; // continue; //}else if (_focal_mask[iq] != iq) { error = 1; break; } else if (ip == -1) { temp[2 * iq] = i; temp[2 * iq + 1] = i; temp[2 * i] = iq; temp[2 * i + 1] = iq; } else { // maintain double-linked list temp[2 * i] = ip; temp[2 * i + 1] = iq; temp[2 * ip + 1] = i; temp[2 * iq] = i; } } if (error) { std::cout << "Error: incorrect constraints\n"; _focal_mask = NULL; return false; } qlist.resize(_num_camera * 2, -1); for (int i = 0; i < _num_camera; ++i) { int inext = temp[2 * i + 1]; if (inext == -1) continue; qlist[2 * i] = _num_imgpt + _num_imgpt_q; qlist[2 * inext + 1] = _num_imgpt + _num_imgpt_q; qmap.push_back(i); qmap.push_back(inext); _num_imgpt_q++; } return true; } void SparseBundleCU::ProcessWeightCameraQ(vector& cpnum, vector& qmap, vector& qmapw, vector& qlistw) { // set average focal length and average radial distortion vector qpnum(_num_camera, 0), qcnum(_num_camera, 0); vector fs(_num_camera, 0), rs(_num_camera, 0); for (int i = 0; i < _num_camera; ++i) { int qi = _focal_mask[i]; if (qi == -1) continue; // float ratio = _camera_data[i].f / _camera_data[qi].f; // if(ratio < 0.01 || ratio > 100) continue; fs[qi] += _camera_data[i].f; rs[qi] += _camera_data[i].radial; qpnum[qi] += cpnum[i]; qcnum[qi] += 1.0f; } // this seems not really matter..they will converge anyway for (int i = 0; i < _num_camera; ++i) { int qi = _focal_mask[i]; if (qi == -1) continue; // float ratio = _camera_data[i].f / _camera_data[qi].f; // if(ratio < 0.01 || ratio > 100) continue; _camera_data[i].f = fs[qi] / qcnum[qi]; _camera_data[i].radial = rs[qi] / qcnum[qi]; } qmapw.resize(_num_imgpt_q * 2, 0); qlistw.resize(_num_camera * 2, 0); for (int i = 0; i < _num_imgpt_q; ++i) { int cidx = qmap[i * 2], qi = _focal_mask[cidx]; float wi = sqrt(qpnum[qi] / qcnum[qi]) * _weight_q; float wr = (__use_radial_distortion ? wi * _camera_data[qi].f : 0.0); qmapw[i * 2] = wi; qmapw[i * 2 + 1] = wr; qlistw[cidx * 2] = wi; qlistw[cidx * 2 + 1] = wr; } } void SparseBundleCU::ReleaseAllocatedData() { _cuCameraData.ReleaseData(); _cuCameraDataEX.ReleaseData(); _cuPointData.ReleaseData(); _cuPointDataEX.ReleaseData(); _cuMeasurements.ReleaseData(); _cuImageProj.ReleaseData(); _cuJacobianCamera.ReleaseData(); _cuJacobianPoint.ReleaseData(); _cuJacobianCameraT.ReleaseData(); _cuProjectionMap.ReleaseData(); _cuPointMeasurementMap.ReleaseData(); _cuCameraMeasurementMap.ReleaseData(); _cuCameraMeasurementList.ReleaseData(); _cuCameraMeasurementListT.ReleaseData(); _cuBufferData.ReleaseData(); _cuBlockPC.ReleaseData(); _cuVectorJtE.ReleaseData(); _cuVectorJJ.ReleaseData(); _cuVectorJX.ReleaseData(); _cuVectorXK.ReleaseData(); _cuVectorPK.ReleaseData(); _cuVectorZK.ReleaseData(); _cuVectorRK.ReleaseData(); _cuVectorSJ.ReleaseData(); _cuCameraQList.ReleaseData(); _cuCameraQMap.ReleaseData(); _cuCameraQMapW.ReleaseData(); _cuCameraQListW.ReleaseData(); ProgramCU::ResetCurrentDevice(); } void SparseBundleCU::NormalizeDataF() { int incompatible_radial_distortion = 0; if (__focal_normalize) { if (__focal_scaling == 1.0f) { //------------------------------------------------------------------ ////////////////////////////////////////////////////////////// vector focals(_num_camera); for (int i = 0; i < _num_camera; ++i) focals[i] = _camera_data[i].f; std::nth_element(focals.begin(), focals.begin() + _num_camera / 2, focals.end()); float median_focal_length = focals[_num_camera / 2]; __focal_scaling = __data_normalize_median / median_focal_length; float radial_factor = median_focal_length * median_focal_length * 4.0f; /////////////////////////////// _imgpt_datax.resize(_num_imgpt * 2); for (int i = 0; i < _num_imgpt * 2; ++i) _imgpt_datax[i] = _imgpt_data[i] * __focal_scaling; for (int i = 0; i < _num_camera; ++i) { _camera_data[i].f *= __focal_scaling; if (!__use_radial_distortion) { } else if (__reset_initial_distortion) { _camera_data[i].radial = 0; } else if (_camera_data[i].distortion_type != __use_radial_distortion) { incompatible_radial_distortion++; _camera_data[i].radial = 0; } else if (__use_radial_distortion == -1) { _camera_data[i].radial *= radial_factor; } } if (__verbose_level > 2) std::cout << "Focal length normalized by " << __focal_scaling << '\n'; __reset_initial_distortion = false; } } else { if (__use_radial_distortion) { for (int i = 0; i < _num_camera; ++i) { if (__reset_initial_distortion) { _camera_data[i].radial = 0; } else if (_camera_data[i].distortion_type != __use_radial_distortion) { _camera_data[i].radial = 0; incompatible_radial_distortion++; } } __reset_initial_distortion = false; } _imgpt_datax.resize(0); } if (incompatible_radial_distortion) { std::cout << "ERROR: incompatible radial distortion input; reset to 0;\n"; } } void SparseBundleCU::NormalizeDataD() { if (__depth_scaling == 1.0f) { const float dist_bound = 1.0f; vector oz(_num_imgpt); vector cpdist1(_num_camera, dist_bound); vector cpdist2(_num_camera, -dist_bound); vector camnpj(_num_camera, 0), cambpj(_num_camera, 0); int bad_point_count = 0; for (int i = 0; i < _num_imgpt; ++i) { int cmidx = _camera_idx[i]; CameraT* cam = _camera_data + cmidx; float* rz = cam->m[2]; float* x = _point_data + 4 * _point_idx[i]; oz[i] = (rz[0] * x[0] + rz[1] * x[1] + rz[2] * x[2] + cam->t[2]); ///////////////////////////////////////////////// // points behind camera may causes big problem float ozr = oz[i] / cam->t[2]; if (fabs(ozr) < __depth_check_epsilon) { bad_point_count++; float px = cam->f * (cam->m[0][0] * x[0] + cam->m[0][1] * x[1] + cam->m[0][2] * x[2] + cam->t[0]); float py = cam->f * (cam->m[1][0] * x[0] + cam->m[1][1] * x[1] + cam->m[1][2] * x[2] + cam->t[1]); float mx = _imgpt_data[i * 2], my = _imgpt_data[2 * i + 1]; bool checkx = fabs(mx) > fabs(my); if ((checkx && px * oz[i] * mx < 0 && fabs(mx) > 64) || (!checkx && py * oz[i] * my < 0 && fabs(my) > 64)) { if (__verbose_level > 3) std::cout << "Warning: proj of #" << cmidx << " on the wrong side, oz = " << oz[i] << " (" << (px / oz[i]) << ',' << (py / oz[i]) << ") (" << mx << ',' << my << ")\n"; ///////////////////////////////////////////////////////////////////////// if (oz[i] > 0) cpdist2[cmidx] = 0; else cpdist1[cmidx] = 0; } if (oz[i] >= 0) cpdist1[cmidx] = std::min(cpdist1[cmidx], oz[i]); else cpdist2[cmidx] = std::max(cpdist2[cmidx], oz[i]); } if (oz[i] < 0) { __num_point_behind++; cambpj[cmidx]++; } camnpj[cmidx]++; } if (bad_point_count > 0 && __depth_degeneracy_fix) { if (!__focal_normalize || !__depth_normalize) std::cout << "Enable data normalization on degeneracy\n"; __focal_normalize = true; __depth_normalize = true; } if (__depth_normalize) { std::nth_element(oz.begin(), oz.begin() + _num_imgpt / 2, oz.end()); float oz_median = oz[_num_imgpt / 2]; float shift_min = std::min(oz_median * 0.001f, 1.0f); float dist_threshold = shift_min * 0.1f; __depth_scaling = (1.0f / oz_median) / __data_normalize_median; if (__verbose_level > 2) std::cout << "Depth normalized by " << __depth_scaling << " (" << oz_median << ")\n"; for (int i = 0; i < _num_camera; ++i) { // move the camera a little bit? if (!__depth_degeneracy_fix) { } else if ((cpdist1[i] < dist_threshold || cpdist2[i] > -dist_threshold)) { float shift = shift_min; //(cpdist1[i] <= -cpdist2[i] ? shift_min : //-shift_min); // if(cpdist1[i] < dist_bound && cpdist2[i] > - dist_bound) shift = - // 0.5f * (cpdist1[i] + cpdist2[i]); bool boths = cpdist1[i] < dist_threshold && cpdist2[i] > -dist_threshold; _camera_data[i].t[2] += shift; if (__verbose_level > 3) std::cout << "Adjust C" << std::setw(5) << i << " by " << std::setw(12) << shift << " [B" << std::setw(2) << cambpj[i] << "/" << std::setw(5) << camnpj[i] << "] [" << (boths ? 'X' : ' ') << "][" << cpdist1[i] << ", " << cpdist2[i] << "]\n"; __num_camera_modified++; } _camera_data[i].t[0] *= __depth_scaling; _camera_data[i].t[1] *= __depth_scaling; _camera_data[i].t[2] *= __depth_scaling; } for (int i = 0; i < _num_point; ++i) { ///////////////////////////////// _point_data[4 * i + 0] *= __depth_scaling; _point_data[4 * i + 1] *= __depth_scaling; _point_data[4 * i + 2] *= __depth_scaling; } } if (__num_point_behind > 0) std::cout << "WARNING: " << __num_point_behind << " points are behind cameras.\n"; if (__num_camera_modified > 0) std::cout << "WARNING: " << __num_camera_modified << " camera moved to avoid degeneracy.\n"; } } void SparseBundleCU::NormalizeData() { TimerBA timer(this, TIMER_PREPROCESSING); NormalizeDataD(); NormalizeDataF(); } void SparseBundleCU::DenormalizeData() { if (__focal_normalize && __focal_scaling != 1.0f) { float squared_focal_factor = (__focal_scaling * __focal_scaling); for (int i = 0; i < _num_camera; ++i) { _camera_data[i].f /= __focal_scaling; if (__use_radial_distortion == -1) _camera_data[i].radial *= squared_focal_factor; _camera_data[i].distortion_type = __use_radial_distortion; } _projection_sse /= squared_focal_factor; __focal_scaling = 1.0f; _imgpt_datax.resize(0); } else if (__use_radial_distortion) { for (int i = 0; i < _num_camera; ++i) _camera_data[i].distortion_type = __use_radial_distortion; } if (__depth_normalize && __depth_scaling != 1.0f) { for (int i = 0; i < _num_camera; ++i) { _camera_data[i].t[0] /= __depth_scaling; _camera_data[i].t[1] /= __depth_scaling; _camera_data[i].t[2] /= __depth_scaling; } for (int i = 0; i < _num_point; ++i) { _point_data[4 * i + 0] /= __depth_scaling; _point_data[4 * i + 1] /= __depth_scaling; _point_data[4 * i + 2] /= __depth_scaling; } __depth_scaling = 1.0f; } } void SparseBundleCU::TransferDataToHost() { TimerBA timer(this, TIMER_GPU_DOWNLOAD); _cuCameraData.CopyToHost(_camera_data); _cuPointData.CopyToHost(_point_data); } float SparseBundleCU::EvaluateProjection(CuTexImage& cam, CuTexImage& point, CuTexImage& proj) { ++__num_projection_eval; ConfigBA::TimerBA timer(this, TIMER_FUNCTION_PJ, true); ComputeProjection(cam, point, _cuMeasurements, _cuProjectionMap, proj, __use_radial_distortion); if (_num_imgpt_q > 0) ComputeProjectionQ(cam, _cuCameraQMap, _cuCameraQMapW, proj, _num_imgpt); return (float)ComputeVectorNorm(proj, _cuBufferData); } float SparseBundleCU::EvaluateProjectionX(CuTexImage& cam, CuTexImage& point, CuTexImage& proj) { ++__num_projection_eval; ConfigBA::TimerBA timer(this, TIMER_FUNCTION_PJ, true); ComputeProjectionX(cam, point, _cuMeasurements, _cuProjectionMap, proj, __use_radial_distortion); if (_num_imgpt_q > 0) ComputeProjectionQ(cam, _cuCameraQMap, _cuCameraQMapW, proj, _num_imgpt); return (float)ComputeVectorNorm(proj, _cuBufferData); } void SparseBundleCU::DebugProjections() { double e1 = 0, e2 = 0; for (int i = 0; i < _num_imgpt; ++i) { float* c = (float*)(_camera_data + _camera_idx[i]); float* p = _point_data + 4 * _point_idx[i]; const float* m = _imgpt_datax.size() > 0 ? (&_imgpt_datax[i * 2]) : (_imgpt_data + 2 * i); float* r = c + 4; float* t = c + 1; float dx1, dy1; //////////////////////////////////////////////////////////////////////////////// float z = r[6] * p[0] + r[7] * p[1] + r[8] * p[2] + t[2]; float xx = (r[0] * p[0] + r[1] * p[1] + r[2] * p[2] + t[0]); float yy = (r[3] * p[0] + r[4] * p[1] + r[5] * p[2] + t[1]); float x = xx / z; float y = yy / z; if (__use_radial_distortion == -1) { float rn = (m[0] * m[0] + m[1] * m[1]) * c[13] + 1.0f; dx1 = c[0] * x - m[0] * rn; dy1 = c[0] * y - m[1] * rn; e1 += (dx1 * dx1 + dy1 * dy1); e2 += (dx1 * dx1 + dy1 * dy1) / (rn * rn); } else if (__use_radial_distortion) { float rn = (x * x + y * y) * c[13] + 1.0f; dx1 = c[0] * x * rn - m[0]; dy1 = c[0] * y * rn - m[1]; e1 += (dx1 * dx1 + dy1 * dy1) / (rn * rn); e2 += (dx1 * dx1 + dy1 * dy1); } else { dx1 = c[0] * x - m[0]; dy1 = c[0] * y - m[1]; e1 += (dx1 * dx1 + dy1 * dy1); e2 += (dx1 * dx1 + dy1 * dy1); } if (!isfinite(dx1) || !isfinite(dy1)) std::cout << "x = " << xx << " y = " << yy << " z = " << z << '\n' << "t0 = " << t[0] << " t1 = " << t[1] << " t2 = " << t[2] << '\n' << "p0 = " << p[0] << " p1 = " << p[1] << " p2 = " << p[2] << '\n'; } e1 = e1 / (__focal_scaling * __focal_scaling) / _num_imgpt; e2 = e2 / (__focal_scaling * __focal_scaling) / _num_imgpt; std::cout << "DEBUG: mean squared error = " << e1 << " in undistorted domain;\n"; std::cout << "DEBUG: mean squared error = " << e2 << " in distorted domain.\n"; } bool SparseBundleCU::InitializeStorageForCG() { bool success = true; size_t total_sz = 0; int plen = GetParameterLength(); // q = 8m + 4n //////////////////////////////////////////// 6q ALLOCATE_REQUIRED_DATA(_cuVectorJtE, plen, 1); ALLOCATE_REQUIRED_DATA(_cuVectorXK, plen, 1); ALLOCATE_REQUIRED_DATA(_cuVectorPK, plen, 1); ALLOCATE_REQUIRED_DATA(_cuVectorRK, plen, 1); ALLOCATE_REQUIRED_DATA(_cuVectorJJ, plen, 1); ALLOCATE_REQUIRED_DATA(_cuVectorZK, plen, 1); ///////////////////////////////// unsigned int cblock_len = (__use_radial_distortion ? 64 : 56); ALLOCATE_REQUIRED_DATA(_cuBlockPC, _num_camera * cblock_len + 12 * _num_point, 1); // 64m + 12n if (__accurate_gain_ratio) { ALLOCATE_REQUIRED_DATA(_cuVectorJX, _num_imgpt + _num_imgpt_q, 2); // 2k } else { _cuVectorJX.SetTexture(_cuImageProj.data(), _num_imgpt + _num_imgpt_q, 2); } ALLOCATE_OPTIONAL_DATA(_cuVectorSJ, plen, 1, __jacobian_normalize); ///////////////////////////////////////// __memory_usage += total_sz; if (__verbose_level > 1) std::cout << "Memory for Conjugate Gradient Solver:\t" << (total_sz / 1024 / 1024) << "MB\n"; return success; } void SparseBundleCU::PrepareJacobianNormalization() { if (!_cuVectorSJ.IsValid()) return; if ((__jc_store_transpose || __jc_store_original) && _cuJacobianPoint.IsValid() && !__bundle_current_mode) { CuTexImage null; null.SwapData(_cuVectorSJ); EvaluateJacobians(); null.SwapData(_cuVectorSJ); ComputeDiagonal(_cuVectorJJ, _cuVectorSJ); ComputeSQRT(_cuVectorSJ); } else { CuTexImage null; null.SwapData(_cuVectorSJ); EvaluateJacobians(); ComputeBlockPC(0, true); null.SwapData(_cuVectorSJ); _cuVectorJJ.SwapData(_cuVectorSJ); ProgramCU::ComputeRSQRT(_cuVectorSJ); } } void SparseBundleCU::EvaluateJacobians(bool shuffle) { if (__no_jacobian_store) return; if (__bundle_current_mode == BUNDLE_ONLY_MOTION && !__jc_store_original && !__jc_store_transpose) return; ConfigBA::TimerBA timer(this, TIMER_FUNCTION_JJ, true); if (__jc_store_original || !__jc_store_transpose) { ComputeJacobian(_cuCameraData, _cuPointData, _cuJacobianCamera, _cuJacobianPoint, _cuProjectionMap, _cuVectorSJ, _cuMeasurements, _cuCameraMeasurementList, __fixed_intrinsics, __use_radial_distortion, false); if (shuffle && __jc_store_transpose && _cuJacobianCameraT.IsValid()) ShuffleCameraJacobian(_cuJacobianCamera, _cuCameraMeasurementList, _cuJacobianCameraT); } else { ComputeJacobian(_cuCameraData, _cuPointData, _cuJacobianCameraT, _cuJacobianPoint, _cuProjectionMap, _cuVectorSJ, _cuMeasurements, _cuCameraMeasurementListT, __fixed_intrinsics, __use_radial_distortion, true); } ++__num_jacobian_eval; } void SparseBundleCU::ComputeJtE(CuTexImage& E, CuTexImage& JtE, int mode) { ConfigBA::TimerBA timer(this, TIMER_FUNCTION_JTE, true); if (mode == 0) mode = __bundle_current_mode; if (__no_jacobian_store || (!__jc_store_original && !__jc_store_transpose)) { ProgramCU::ComputeJtE_(E, JtE, _cuCameraData, _cuPointData, _cuMeasurements, _cuCameraMeasurementMap, _cuCameraMeasurementList, _cuPointMeasurementMap, _cuProjectionMap, _cuJacobianPoint, __fixed_intrinsics, __use_radial_distortion, mode); //////////////////////////////////////////////////////////////////////////////////// if (!_cuVectorSJ.IsValid()) { } else if (mode == 2) { if (!_cuJacobianPoint.IsValid()) ComputeVXY(JtE, _cuVectorSJ, JtE, _num_point * 4, _num_camera * 8); } else if (mode == 1) ComputeVXY(JtE, _cuVectorSJ, JtE, _num_camera * 8); else ComputeVXY(JtE, _cuVectorSJ, JtE, _cuJacobianPoint.IsValid() ? _num_camera * 8 : 0); } else if (__jc_store_transpose) { ProgramCU::ComputeJtE(E, _cuJacobianCameraT, _cuCameraMeasurementMap, _cuCameraMeasurementList, _cuJacobianPoint, _cuPointMeasurementMap, JtE, true, mode); } else { ProgramCU::ComputeJtE(E, _cuJacobianCamera, _cuCameraMeasurementMap, _cuCameraMeasurementList, _cuJacobianPoint, _cuPointMeasurementMap, JtE, false, mode); } if (mode != 2 && _num_imgpt_q > 0) ProgramCU::ComputeJQtEC(E, _cuCameraQList, _cuCameraQListW, _cuVectorSJ, JtE); } void SparseBundleCU::SaveBundleRecord(int iter, float res, float damping, float& g_norm, float& g_inf) { // do not really compute if parameter not specified... // for large dataset, it never converges.. g_inf = __lm_check_gradient ? ComputeVectorMax(_cuVectorJtE, _cuBufferData) : 0; g_norm = __save_gradient_norm ? float(ComputeVectorNorm(_cuVectorJtE, _cuBufferData)) : g_inf; ConfigBA::SaveBundleRecord(iter, res, damping, g_norm, g_inf); } void SparseBundleCU::ComputeJX(CuTexImage& X, CuTexImage& JX, int mode) { ConfigBA::TimerBA timer(this, TIMER_FUNCTION_JX, true); if (__no_jacobian_store || (__multiply_jx_usenoj && mode != 2) || !__jc_store_original) { if (_cuVectorSJ.IsValid()) { if (mode == 0) ProgramCU::ComputeVXY(X, _cuVectorSJ, _cuVectorZK); else if (mode == 1) ProgramCU::ComputeVXY(X, _cuVectorSJ, _cuVectorZK, _num_camera * 8); else if (mode == 2) ProgramCU::ComputeVXY(X, _cuVectorSJ, _cuVectorZK, _num_point * 4, _num_camera * 8); ProgramCU::ComputeJX_(_cuVectorZK, JX, _cuCameraData, _cuPointData, _cuMeasurements, _cuProjectionMap, __fixed_intrinsics, __use_radial_distortion, mode); } else { ProgramCU::ComputeJX_(X, JX, _cuCameraData, _cuPointData, _cuMeasurements, _cuProjectionMap, __fixed_intrinsics, __use_radial_distortion, mode); } } else { ProgramCU::ComputeJX(_num_camera * 2, X, _cuJacobianCamera, _cuJacobianPoint, _cuProjectionMap, JX, mode); } if (_num_imgpt_q > 0 && mode != 2) { ProgramCU::ComputeJQX(X, _cuCameraQMap, _cuCameraQMapW, _cuVectorSJ, JX, _num_imgpt); } } void SparseBundleCU::ComputeBlockPC(float lambda, bool dampd) { ConfigBA::TimerBA timer(this, TIMER_FUNCTION_BC, true); bool use_diagonal_q = _cuCameraQListW.IsValid() && __bundle_current_mode != 2; if (use_diagonal_q) ComputeDiagonalQ(_cuCameraQListW, _cuVectorSJ, _cuVectorJJ); if (__no_jacobian_store || (!__jc_store_original && !__jc_store_transpose)) { ComputeDiagonalBlock_( lambda, dampd, _cuCameraData, _cuPointData, _cuMeasurements, _cuCameraMeasurementMap, _cuCameraMeasurementList, _cuPointMeasurementMap, _cuProjectionMap, _cuJacobianPoint, _cuVectorSJ, _cuVectorJJ, _cuBlockPC, __fixed_intrinsics, __use_radial_distortion, use_diagonal_q, __bundle_current_mode); } else if (__jc_store_transpose) { ComputeDiagonalBlock(lambda, dampd, _cuJacobianCameraT, _cuCameraMeasurementMap, _cuJacobianPoint, _cuPointMeasurementMap, _cuCameraMeasurementList, _cuVectorJJ, _cuBlockPC, __use_radial_distortion, true, use_diagonal_q, __bundle_current_mode); } else { ComputeDiagonalBlock(lambda, dampd, _cuJacobianCamera, _cuCameraMeasurementMap, _cuJacobianPoint, _cuPointMeasurementMap, _cuCameraMeasurementList, _cuVectorJJ, _cuBlockPC, __use_radial_distortion, false, use_diagonal_q, __bundle_current_mode); } } void SparseBundleCU::ApplyBlockPC(CuTexImage& v, CuTexImage& pv, int mode) { ConfigBA::TimerBA timer(this, TIMER_FUNCTION_MP, true); MultiplyBlockConditioner(_num_camera, _num_point, _cuBlockPC, v, pv, __use_radial_distortion, mode); } void SparseBundleCU::ComputeDiagonal(CuTexImage& JJ, CuTexImage& JJI) { ////////////////////checking the impossible. if (__no_jacobian_store) return; if (!__jc_store_transpose && !__jc_store_original) return; ConfigBA::TimerBA timer(this, TIMER_FUNCTION_DD, true); bool use_diagonal_q = _cuCameraQListW.IsValid(); if (use_diagonal_q) { CuTexImage null; ComputeDiagonalQ(_cuCameraQListW, null, JJ); } if (__jc_store_transpose) { ProgramCU::ComputeDiagonal(_cuJacobianCameraT, _cuCameraMeasurementMap, _cuJacobianPoint, _cuPointMeasurementMap, _cuCameraMeasurementList, JJ, JJI, true, __use_radial_distortion, use_diagonal_q); } else { ProgramCU::ComputeDiagonal(_cuJacobianCamera, _cuCameraMeasurementMap, _cuJacobianPoint, _cuPointMeasurementMap, _cuCameraMeasurementList, JJ, JJI, false, __use_radial_distortion, use_diagonal_q); } } int SparseBundleCU::SolveNormalEquationPCGX(float lambda) { //---------------------------------------------------------- //(Jt * J + lambda * diag(Jt * J)) X = Jt * e //------------------------------------------------------------- TimerBA timer(this, TIMER_CG_ITERATION); __recent_cg_status = ' '; // diagonal for jacobian preconditioning... int plen = GetParameterLength(); CuTexImage null; CuTexImage& VectorDP = __lm_use_diagonal_damp ? _cuVectorJJ : null; // diagonal ComputeBlockPC(lambda, __lm_use_diagonal_damp); /////////////////////////////////////////////////////// // B = [BC 0 ; 0 BP] // m = [mc 0; 0 mp]; // A x= BC * x - JcT * Jp * mp * JpT * Jc * x // = JcT * Jc x + lambda * D * x + ........ //////////////////////////////////////////////////////////// CuTexImage r; r.SetTexture(_cuVectorRK.data(), 8 * _num_camera); CuTexImage p; p.SetTexture(_cuVectorPK.data(), 8 * _num_camera); CuTexImage z; z.SetTexture(_cuVectorZK.data(), 8 * _num_camera); CuTexImage x; x.SetTexture(_cuVectorXK.data(), 8 * _num_camera); CuTexImage d; d.SetTexture(VectorDP.data(), 8 * _num_camera); CuTexImage& u = _cuVectorRK; CuTexImage& v = _cuVectorPK; CuTexImage up; up.SetTexture(u.data() + 8 * _num_camera, 4 * _num_point); CuTexImage vp; vp.SetTexture(v.data() + 8 * _num_camera, 4 * _num_point); CuTexImage uc; uc.SetTexture(z.data(), 8 * _num_camera); CuTexImage& e = _cuVectorJX; CuTexImage& e2 = _cuImageProj; ApplyBlockPC(_cuVectorJtE, u, 2); ComputeJX(u, e, 2); ComputeJtE(e, uc, 1); ComputeSAXPY(-1.0f, uc, _cuVectorJtE, r); // r ApplyBlockPC(r, p, 1); // z = p = M r float_t rtz0 = (float_t)ComputeVectorDot(r, p, _cuBufferData); // r(0)' * // z(0) ComputeJX(p, e, 1); // Jc * x ComputeJtE(e, u, 2); // JpT * jc * x ApplyBlockPC(u, v, 2); float_t qtq0 = (float_t)ComputeVectorNorm(e, _cuBufferData); // q(0)' * q(0) float_t pdp0 = (float_t)ComputeVectorNormW(p, d, _cuBufferData); // p(0)' * DDD * p(0) float_t uv0 = (float_t)ComputeVectorDot(up, vp, _cuBufferData); float_t alpha0 = rtz0 / (qtq0 + lambda * pdp0 - uv0); if (__verbose_cg_iteration) std::cout << " --0,\t alpha = " << alpha0 << ", t = " << BundleTimerGetNow(TIMER_CG_ITERATION) << "\n"; if (!isfinite(alpha0)) { return 0; } if (alpha0 == 0) { __recent_cg_status = 'I'; return 1; } //////////////////////////////////////////////////////////// ComputeSAX((float)alpha0, p, x); // x(k+1) = x(k) + a(k) * p(k) ComputeJX(v, e2, 2); // //Jp * mp * JpT * JcT * p ComputeSAXPY(-1.0f, e2, e, e); ComputeJtE(e, uc, 1); // JcT * .... ComputeSXYPZ(lambda, d, p, uc, uc); ComputeSAXPY((float)-alpha0, uc, r, r); // r(k + 1) = r(k) - a(k) * A * pk ////////////////////////////////////////////////////////////////////////// float_t rtzk = rtz0, rtz_min = rtz0, betak; int iteration = 1; ++__num_cg_iteration; while (true) { ApplyBlockPC(r, z, 1); /////////////////////////////////////////////////////////////////////////// float_t rtzp = rtzk; rtzk = (float_t)ComputeVectorDot( r, z, _cuBufferData); //[r(k + 1) = M^(-1) * z(k + 1)] * z(k+1) float_t rtz_ratio = sqrt(fabs(rtzk / rtz0)); if (rtz_ratio < __cg_norm_threshold) { if (__recent_cg_status == ' ') __recent_cg_status = iteration < std::min(10, __cg_min_iteration) ? '0' + iteration : 'N'; if (iteration >= __cg_min_iteration) break; } //////////////////////////////////////////////////////////////////////////// betak = rtzk / rtzp; // beta rtz_min = std::min(rtz_min, rtzk); ComputeSAXPY((float)betak, p, z, p); // p(k) = z(k) + b(k) * p(k - 1) ComputeJX(p, e, 1); // Jc * p ComputeJtE(e, u, 2); // JpT * jc * p ApplyBlockPC(u, v, 2); ////////////////////////////////////////////////////////////////////// float_t qtqk = (float_t)ComputeVectorNorm(e, _cuBufferData); // q(k)' q(k) float_t pdpk = (float_t)ComputeVectorNormW(p, d, _cuBufferData); // p(k)' * DDD * p(k) float_t uvk = (float_t)ComputeVectorDot(up, vp, _cuBufferData); float_t alphak = rtzk / (qtqk + lambda * pdpk - uvk); ///////////////////////////////////////////////////// if (__verbose_cg_iteration) std::cout << " --" << iteration << ",\t alpha= " << alphak << ", rtzk/rtz0 = " << rtz_ratio << ", t = " << BundleTimerGetNow(TIMER_CG_ITERATION) << "\n"; /////////////////////////////////////////////////// if (!isfinite(alphak) || rtz_ratio > __cg_norm_guard) { __recent_cg_status = 'X'; break; } // something doesn't converge.. //////////////////////////////////////////////// ComputeSAXPY((float)alphak, p, x, x); // x(k+1) = x(k) + a(k) * p(k) ///////////////////////////////////////////////// ++iteration; ++__num_cg_iteration; if (iteration >= std::min(__cg_max_iteration, plen)) break; ComputeJX(v, e2, 2); // //Jp * mp * JpT * JcT * p ComputeSAXPY(-1.0f, e2, e, e); ComputeJtE(e, uc, 1); // JcT * .... ComputeSXYPZ(lambda, d, p, uc, uc); ComputeSAXPY((float)-alphak, uc, r, r); // r(k + 1) = r(k) - a(k) * A * pk } // if(__recent_cg_status == 'X') return iteration; ComputeJX(x, e, 1); ComputeJtE(e, u, 2); CuTexImage jte_p; jte_p.SetTexture(_cuVectorJtE.data() + 8 * _num_camera, _num_point * 4); ComputeSAXPY(-1.0f, up, jte_p, vp); ApplyBlockPC(v, _cuVectorXK, 2); return iteration; } int SparseBundleCU::SolveNormalEquationPCGB(float lambda) { //---------------------------------------------------------- //(Jt * J + lambda * diag(Jt * J)) X = Jt * e //------------------------------------------------------------- TimerBA timer(this, TIMER_CG_ITERATION); __recent_cg_status = ' '; // diagonal for jacobian preconditioning... int plen = GetParameterLength(); CuTexImage null; CuTexImage& VectorDP = __lm_use_diagonal_damp ? _cuVectorJJ : null; // diagonal CuTexImage& VectorQK = _cuVectorZK; // temporary ComputeBlockPC(lambda, __lm_use_diagonal_damp); //////////////////////////////////////////////////////// ApplyBlockPC(_cuVectorJtE, _cuVectorPK); // z(0) = p(0) = M * r(0)//r(0) = Jt * e ComputeJX(_cuVectorPK, _cuVectorJX); // q(0) = J * p(0) ////////////////////////////////////////////////// float_t rtz0 = (float_t)ComputeVectorDot(_cuVectorJtE, _cuVectorPK, _cuBufferData); // r(0)' * z(0) float_t qtq0 = (float_t)ComputeVectorNorm(_cuVectorJX, _cuBufferData); // q(0)' * q(0) float_t ptdp0 = (float_t)ComputeVectorNormW( _cuVectorPK, VectorDP, _cuBufferData); // p(0)' * DDD * p(0) float_t alpha0 = rtz0 / (qtq0 + lambda * ptdp0); if (__verbose_cg_iteration) std::cout << " --0,\t alpha = " << alpha0 << ", t = " << BundleTimerGetNow(TIMER_CG_ITERATION) << "\n"; if (!isfinite(alpha0)) { return 0; } if (alpha0 == 0) { __recent_cg_status = 'I'; return 1; } //////////////////////////////////////////////////////////// ComputeSAX((float)alpha0, _cuVectorPK, _cuVectorXK); // x(k+1) = x(k) + a(k) * p(k) ComputeJtE(_cuVectorJX, VectorQK); // Jt * (J * p0) ComputeSXYPZ(lambda, VectorDP, _cuVectorPK, VectorQK, VectorQK); // Jt * J * p0 + lambda * DDD * p0 ComputeSAXPY( (float)-alpha0, VectorQK, _cuVectorJtE, _cuVectorRK); // r(k+1) = r(k) - a(k) * (Jt * q(k) + DDD * p(k)) ; float_t rtzk = rtz0, rtz_min = rtz0, betak; int iteration = 1; ++__num_cg_iteration; while (true) { ApplyBlockPC(_cuVectorRK, _cuVectorZK); /////////////////////////////////////////////////////////////////////////// float_t rtzp = rtzk; rtzk = (float_t)ComputeVectorDot( _cuVectorRK, _cuVectorZK, _cuBufferData); //[r(k + 1) = M^(-1) * z(k + 1)] * z(k+1) float_t rtz_ratio = sqrt(fabs(rtzk / rtz0)); if (rtz_ratio < __cg_norm_threshold) { if (__recent_cg_status == ' ') __recent_cg_status = iteration < std::min(10, __cg_min_iteration) ? '0' + iteration : 'N'; if (iteration >= __cg_min_iteration) break; } //////////////////////////////////////////////////////////////////////////// betak = rtzk / rtzp; // beta rtz_min = std::min(rtz_min, rtzk); ComputeSAXPY((float)betak, _cuVectorPK, _cuVectorZK, _cuVectorPK); // p(k) = z(k) + b(k) * p(k - 1) ComputeJX(_cuVectorPK, _cuVectorJX); // q(k) = J * p(k) ////////////////////////////////////////////////////////////////////// float_t qtqk = (float_t)ComputeVectorNorm(_cuVectorJX, _cuBufferData); // q(k)' q(k) float_t ptdpk = (float_t)ComputeVectorNormW( _cuVectorPK, VectorDP, _cuBufferData); // p(k)' * DDD * p(k) float_t alphak = rtzk / (qtqk + lambda * ptdpk); ///////////////////////////////////////////////////// if (__verbose_cg_iteration) std::cout << " --" << iteration << ",\t alpha= " << alphak << ", rtzk/rtz0 = " << rtz_ratio << ", t = " << BundleTimerGetNow(TIMER_CG_ITERATION) << "\n"; /////////////////////////////////////////////////// if (!isfinite(alphak) || rtz_ratio > __cg_norm_guard) { __recent_cg_status = 'X'; break; } // something doesn't converge.. //////////////////////////////////////////////// ComputeSAXPY((float)alphak, _cuVectorPK, _cuVectorXK, _cuVectorXK); // x(k+1) = x(k) + a(k) * p(k) ///////////////////////////////////////////////// ++iteration; ++__num_cg_iteration; if (iteration >= std::min(__cg_max_iteration, plen)) break; // if(iteration == 2 && rtz_ratio < __cg_norm_threshold) if (__cg_recalculate_freq > 0 && iteration % __cg_recalculate_freq == 0) { ////r = JtE - (Jt J + lambda * D) x ComputeJX(_cuVectorXK, _cuVectorJX); ComputeJtE(_cuVectorJX, VectorQK); ComputeSXYPZ(lambda, VectorDP, _cuVectorXK, VectorQK, VectorQK); ComputeSAXPY(-1.0f, VectorQK, _cuVectorJtE, _cuVectorRK); } else { ComputeJtE(_cuVectorJX, VectorQK); ComputeSXYPZ(lambda, VectorDP, _cuVectorPK, VectorQK, VectorQK); // ComputeSAXPY( (float)-alphak, VectorQK, _cuVectorRK, _cuVectorRK); // r(k+1) = r(k) - a(k) * (Jt * q(k) + DDD * p(k)) ; } } return iteration; } int SparseBundleCU::SolveNormalEquation(float lambda) { if (__bundle_current_mode == BUNDLE_ONLY_MOTION) { ComputeBlockPC(lambda, __lm_use_diagonal_damp); ApplyBlockPC(_cuVectorJtE, _cuVectorXK, 1); return 1; } else if (__bundle_current_mode == BUNDLE_ONLY_STRUCTURE) { ComputeBlockPC(lambda, __lm_use_diagonal_damp); ApplyBlockPC(_cuVectorJtE, _cuVectorXK, 2); return 1; } else { ////solve linear system using Conjugate Gradients return __cg_schur_complement ? SolveNormalEquationPCGX(lambda) : SolveNormalEquationPCGB(lambda); } } void SparseBundleCU::RunTestIterationLM(bool reduced) { EvaluateProjection(_cuCameraData, _cuPointData, _cuImageProj); EvaluateJacobians(); ComputeJtE(_cuImageProj, _cuVectorJtE); if (reduced) SolveNormalEquationPCGX(__lm_initial_damp); else SolveNormalEquationPCGB(__lm_initial_damp); UpdateCameraPoint(_cuVectorZK, _cuImageProj); ComputeVectorDot(_cuVectorXK, _cuVectorJtE, _cuBufferData); ComputeJX(_cuVectorXK, _cuVectorJX); ComputeVectorNorm(_cuVectorJX, _cuBufferData); } float SparseBundleCU::UpdateCameraPoint(CuTexImage& dx, CuTexImage& cuImageTempProj) { ConfigBA::TimerBA timer(this, TIMER_FUNCTION_UP, true); if (__bundle_current_mode == BUNDLE_ONLY_MOTION) { if (__jacobian_normalize) ComputeVXY(_cuVectorXK, _cuVectorSJ, dx, 8 * _num_camera); ProgramCU::UpdateCameraPoint(_num_camera, _cuCameraData, _cuPointData, dx, _cuCameraDataEX, _cuPointDataEX, __bundle_current_mode); return EvaluateProjection(_cuCameraDataEX, _cuPointData, cuImageTempProj); } else if (__bundle_current_mode == BUNDLE_ONLY_STRUCTURE) { if (__jacobian_normalize) ComputeVXY(_cuVectorXK, _cuVectorSJ, dx, 4 * _num_point, 8 * _num_camera); ProgramCU::UpdateCameraPoint(_num_camera, _cuCameraData, _cuPointData, dx, _cuCameraDataEX, _cuPointDataEX, __bundle_current_mode); return EvaluateProjection(_cuCameraData, _cuPointDataEX, cuImageTempProj); } else { if (__jacobian_normalize) ComputeVXY(_cuVectorXK, _cuVectorSJ, dx); ProgramCU::UpdateCameraPoint(_num_camera, _cuCameraData, _cuPointData, dx, _cuCameraDataEX, _cuPointDataEX, __bundle_current_mode); return EvaluateProjection(_cuCameraDataEX, _cuPointDataEX, cuImageTempProj); } } float SparseBundleCU::SaveUpdatedSystem(float residual_reduction, float dx_sqnorm, float damping) { float expected_reduction; if (__bundle_current_mode == BUNDLE_ONLY_MOTION) { CuTexImage xk; xk.SetTexture(_cuVectorXK.data(), 8 * _num_camera); CuTexImage jte; jte.SetTexture(_cuVectorJtE.data(), 8 * _num_camera); float dxtg = (float)ComputeVectorDot(xk, jte, _cuBufferData); if (__lm_use_diagonal_damp) { CuTexImage jj; jj.SetTexture(_cuVectorJJ.data(), 8 * _num_camera); float dq = (float)ComputeVectorNormW(xk, jj, _cuBufferData); expected_reduction = damping * dq + dxtg; } else { expected_reduction = damping * dx_sqnorm + dxtg; } _cuCameraData.SwapData(_cuCameraDataEX); } else if (__bundle_current_mode == BUNDLE_ONLY_STRUCTURE) { CuTexImage xk; xk.SetTexture(_cuVectorXK.data() + 8 * _num_camera, 4 * _num_point); CuTexImage jte; jte.SetTexture(_cuVectorJtE.data() + 8 * _num_camera, 4 * _num_point); float dxtg = (float)ComputeVectorDot(xk, jte, _cuBufferData); if (__lm_use_diagonal_damp) { CuTexImage jj; jj.SetTexture(_cuVectorJJ.data() + 8 * _num_camera, 4 * _num_point); float dq = (float)ComputeVectorNormW(xk, jj, _cuBufferData); expected_reduction = damping * dq + dxtg; } else { expected_reduction = damping * dx_sqnorm + dxtg; } _cuPointData.SwapData(_cuPointDataEX); } else { float dxtg = (float)ComputeVectorDot(_cuVectorXK, _cuVectorJtE, _cuBufferData); if (__accurate_gain_ratio) { ComputeJX(_cuVectorXK, _cuVectorJX); float njx = (float)ComputeVectorNorm(_cuVectorJX, _cuBufferData); expected_reduction = 2.0f * dxtg - njx; // could the expected reduction be negative??? not sure if (expected_reduction <= 0) expected_reduction = 0.001f * residual_reduction; } else if (__lm_use_diagonal_damp) { float dq = (float)ComputeVectorNormW(_cuVectorXK, _cuVectorJJ, _cuBufferData); expected_reduction = damping * dq + dxtg; } else { expected_reduction = damping * dx_sqnorm + dxtg; } /// save the new motion/struture _cuCameraData.SwapData(_cuCameraDataEX); _cuPointData.SwapData(_cuPointDataEX); //_cuCameraData.CopyToHost(_camera_data); //_cuPointData.CopyToHost(_point_data); // DebugProjections(); } //////////////////////////////////////////// return float(residual_reduction / expected_reduction); } void SparseBundleCU::AdjustBundleAdjsutmentMode() { if (__bundle_current_mode == BUNDLE_ONLY_STRUCTURE) { _cuJacobianCamera.InitTexture(0, 0); _cuJacobianCameraT.InitTexture(0, 0); } } float SparseBundleCU::EvaluateDeltaNorm() { if (__bundle_current_mode == BUNDLE_ONLY_MOTION) { CuTexImage temp; temp.SetTexture(_cuVectorXK.data(), 8 * _num_camera); return ComputeVectorNorm(temp, _cuBufferData); } else if (__bundle_current_mode == BUNDLE_ONLY_STRUCTURE) { CuTexImage temp; temp.SetTexture(_cuVectorXK.data() + 8 * _num_camera, 4 * _num_point); return ComputeVectorNorm(temp, _cuBufferData); } else { return (float)ComputeVectorNorm(_cuVectorXK, _cuBufferData); } } void SparseBundleCU::NonlinearOptimizeLM() { //////////////////////////////////////// TimerBA timer(this, TIMER_OPTIMIZATION); //////////////////////////////////////////////// float mse_convert_ratio = 1.0f / (_num_imgpt * __focal_scaling * __focal_scaling); float error_display_ratio = __verbose_sse ? _num_imgpt : 1.0f; const int edwidth = __verbose_sse ? 12 : 8; _projection_sse = EvaluateProjection(_cuCameraData, _cuPointData, _cuImageProj); __initial_mse = __final_mse = _projection_sse * mse_convert_ratio; // compute jacobian diagonals for normalization if (__jacobian_normalize) PrepareJacobianNormalization(); // evalaute jacobian EvaluateJacobians(); ComputeJtE(_cuImageProj, _cuVectorJtE); /////////////////////////////////////////////////////////////// if (__verbose_level) std::cout << "Initial " << (__verbose_sse ? "sumed" : "mean") << " squared error = " << __initial_mse * error_display_ratio << "\n----------------------------------------------\n"; ////////////////////////////////////////////////// CuTexImage& cuImageTempProj = _cuVectorJX; // CuTexImage& cuVectorTempJX = _cuVectorJX; CuTexImage& cuVectorDX = _cuVectorSJ.IsValid() ? _cuVectorZK : _cuVectorXK; ////////////////////////////////////////////////// float damping_adjust = 2.0f, damping = __lm_initial_damp, g_norm, g_inf; SaveBundleRecord(0, _projection_sse * mse_convert_ratio, damping, g_norm, g_inf); //////////////////////////////////// std::cout << std::left; for (int i = 0; i < __lm_max_iteration && !__abort_flag; __current_iteration = (++i)) { ////solve linear system int num_cg_iteration = SolveNormalEquation(damping); // there must be NaN somewhere if (num_cg_iteration == 0) { if (__verbose_level) std::cout << "#" << std::setw(3) << i << " quit on numeric errors\n"; __pba_return_code = 'E'; break; } // there must be infinity somewhere if (__recent_cg_status == 'I') { std::cout << "#" << std::setw(3) << i << " 0 I e=" << std::setw(edwidth) << "------- " << " u=" << std::setprecision(3) << std::setw(9) << damping << '\n' << std::setprecision(6); /////////////increase damping factor damping = damping * damping_adjust; damping_adjust = 2.0f * damping_adjust; --i; continue; } ///////////////////// ++__num_lm_iteration; //////////////////////////////////// float dx_sqnorm = EvaluateDeltaNorm(), dx_norm = sqrt(dx_sqnorm); // In this library, we check absolute difference instead of realtive // difference if (dx_norm <= __lm_delta_threshold) { // damping factor must be way too big...or it converges if (__verbose_level > 1) std::cout << "#" << std::setw(3) << i << " " << std::setw(3) << num_cg_iteration << char(__recent_cg_status) << " quit on too small change (" << dx_norm << " < " << __lm_delta_threshold << ")\n"; __pba_return_code = 'S'; break; } /////////////////////////////////////////////////////////////////////// // update structure and motion, check reprojection error float new_residual = UpdateCameraPoint(cuVectorDX, cuImageTempProj); float average_residual = new_residual * mse_convert_ratio; float residual_reduction = _projection_sse - new_residual; // do we find a better solution? if (isfinite(new_residual) && residual_reduction > 0) { ////compute relative norm change float relative_reduction = 1.0f - (new_residual / _projection_sse); //////////////////////////////////// __num_lm_success++; // increase counter _projection_sse = new_residual; // save the new residual _cuImageProj.SwapData(cuImageTempProj); // save the new projection ///////////////gain ratio//////////////////// float gain_ratio = SaveUpdatedSystem(residual_reduction, dx_sqnorm, damping); ///////////////////////////////////// SaveBundleRecord(i + 1, _projection_sse * mse_convert_ratio, damping, g_norm, g_inf); ///////////////////////////////////////////// if (__verbose_level > 1) std::cout << "#" << std::setw(3) << i << " " << std::setw(3) << num_cg_iteration << char(__recent_cg_status) << " e=" << std::setw(edwidth) << average_residual * error_display_ratio << " u=" << std::setprecision(3) << std::setw(9) << damping << " r=" << std::setw(6) << floor(gain_ratio * 1000.f) * 0.001f << " g=" << std::setw(g_norm > 0 ? 9 : 1) << g_norm << " " << std::setw(9) << relative_reduction << ' ' << std::setw(9) << dx_norm << " t=" << int(BundleTimerGetNow()) << "\n" << std::setprecision(6); ///////////////////////////// if (!IsTimeBudgetAvailable()) { if (__verbose_level > 1) std::cout << "#" << std::setw(3) << i << " used up time budget.\n"; __pba_return_code = 'T'; break; } else if (__lm_check_gradient && g_inf < __lm_gradient_threshold) { if (__verbose_level > 1) std::cout << "#" << std::setw(3) << i << " converged with small gradient\n"; __pba_return_code = 'G'; break; } else if (average_residual * error_display_ratio <= __lm_mse_threshold) { if (__verbose_level > 1) std::cout << "#" << std::setw(3) << i << " satisfies MSE threshold\n"; __pba_return_code = 'M'; break; } else { /////////////////////////////adjust damping factor float temp = gain_ratio * 2.0f - 1.0f; float adaptive_adjust = 1.0f - temp * temp * temp; // powf(, 3.0f); // float auto_adjust = std::max(1.0f / 3.0f, adaptive_adjust); ////////////////////////////////////////////////// damping = damping * auto_adjust; damping_adjust = 2.0f; if (damping < __lm_minimum_damp) damping = __lm_minimum_damp; else if (__lm_damping_auto_switch == 0 && damping > __lm_maximum_damp && __lm_use_diagonal_damp) damping = __lm_maximum_damp; EvaluateJacobians(); ComputeJtE(_cuImageProj, _cuVectorJtE); } } else { if (__verbose_level > 1) std::cout << "#" << std::setw(3) << i << " " << std::setw(3) << num_cg_iteration << char(__recent_cg_status) << " e=" << std::setw(edwidth) << std::left << average_residual * error_display_ratio << " u=" << std::setprecision(3) << std::setw(9) << damping << " r=----- " << (__lm_check_gradient || __save_gradient_norm ? " g=---------" : " g=0") << " --------- " << std::setw(9) << dx_norm << " t=" << int(BundleTimerGetNow()) << "\n" << std::setprecision(6); if (__lm_damping_auto_switch > 0 && __lm_use_diagonal_damp && damping > __lm_damping_auto_switch) { __lm_use_diagonal_damp = false; damping = __lm_damping_auto_switch; damping_adjust = 2.0f; if (__verbose_level > 1) std::cout << "NOTE: switch to damping with an identity matix\n"; } else { /////////////increase damping factor damping = damping * damping_adjust; damping_adjust = 2.0f * damping_adjust; } } if (__verbose_level == 1) std::cout << '.'; } __final_mse = float(_projection_sse * mse_convert_ratio); __final_mse_x = __use_radial_distortion ? EvaluateProjectionX(_cuCameraData, _cuPointData, _cuImageProj) * mse_convert_ratio : __final_mse; } #define PROFILE_(A, B) \ BundleTimerStart(TIMER_PROFILE_STEP); \ for (int i = 0; i < repeat; ++i) { \ B; \ FinishWorkCUDA(); \ } \ BundleTimerSwitch(TIMER_PROFILE_STEP); \ std::cout << std::setw(24) << A << ": " \ << (BundleTimerGet(TIMER_PROFILE_STEP) / repeat) << "\n"; #define PROFILE(A, B) PROFILE_(#A, A B) #define PROXILE(A, B) PROFILE_(A, B) void SparseBundleCU::RunProfileSteps() { const int repeat = __profile_pba; std::cout << "---------------------------------\n" "| Run profiling steps (" << repeat << ") |\n" "---------------------------------\n" << std::left; ; /////////////////////////////////////////////// PROXILE("Upload Measurements", _cuMeasurements.CopyFromHost( _imgpt_datax.size() > 0 ? &_imgpt_datax[0] : _imgpt_data)); PROXILE("Upload Point Data", _cuPointData.CopyToHost(_point_data)); std::cout << "---------------------------------\n"; ///////////////////////////////////////////// EvaluateProjection(_cuCameraData, _cuPointData, _cuImageProj); PrepareJacobianNormalization(); EvaluateJacobians(); ComputeJtE(_cuImageProj, _cuVectorJtE); ComputeBlockPC(__lm_initial_damp, true); FinishWorkCUDA(); do { if (SolveNormalEquationPCGX(__lm_initial_damp) == 10 && SolveNormalEquationPCGB(__lm_initial_damp) == 10) break; __lm_initial_damp *= 2.0f; } while (__lm_initial_damp < 1024.0f); std::cout << "damping set to " << __lm_initial_damp << " for profiling\n" << "---------------------------------\n"; { int repeat = 10, cgmin = __cg_min_iteration, cgmax = __cg_max_iteration; __cg_max_iteration = __cg_min_iteration = 10; __num_cg_iteration = 0; PROFILE(SolveNormalEquationPCGX, (__lm_initial_damp)); if (__num_cg_iteration != 100) std::cout << __num_cg_iteration << " cg iterations in all\n"; ///////////////////////////////////////////////////////////////////// __num_cg_iteration = 0; PROFILE(SolveNormalEquationPCGB, (__lm_initial_damp)); if (__num_cg_iteration != 100) std::cout << __num_cg_iteration << " cg iterations in all\n"; std::cout << "---------------------------------\n"; ////////////////////////////////////////////////////// __num_cg_iteration = 0; PROXILE("Single iteration LMX", RunTestIterationLM(true)); if (__num_cg_iteration != 100) std::cout << __num_cg_iteration << " cg iterations in all\n"; //////////////////////////////////////////////////////// __num_cg_iteration = 0; PROXILE("Single iteration LMB", RunTestIterationLM(false)); if (__num_cg_iteration != 100) std::cout << __num_cg_iteration << " cg iterations in all\n"; std::cout << "---------------------------------\n"; __cg_max_iteration = cgmax; __cg_min_iteration = cgmin; } ///////////////////////////////////////////////////// PROFILE(UpdateCameraPoint, (_cuVectorZK, _cuImageProj)); PROFILE(ComputeVectorNorm, (_cuVectorXK, _cuBufferData)); PROFILE(ComputeVectorDot, (_cuVectorXK, _cuVectorRK, _cuBufferData)); PROFILE(ComputeVectorNormW, (_cuVectorXK, _cuVectorRK, _cuBufferData)); PROFILE(ComputeSAXPY, (0.01f, _cuVectorXK, _cuVectorRK, _cuVectorZK)); PROFILE(ComputeSXYPZ, (0.01f, _cuVectorXK, _cuVectorPK, _cuVectorRK, _cuVectorZK)); std::cout << "---------------------------------\n"; PROFILE(ComputeVectorNorm, (_cuImageProj, _cuBufferData)); PROFILE(ComputeSAXPY, (0.000f, _cuImageProj, _cuVectorJX, _cuVectorJX)); std::cout << "---------------------------------\n"; __multiply_jx_usenoj = false; /////////////////////////////////////////////////////// PROFILE(EvaluateProjection, (_cuCameraData, _cuPointData, _cuImageProj)); PROFILE(ApplyBlockPC, (_cuVectorJtE, _cuVectorPK)); ///////////////////////////////////////////////// if (!__no_jacobian_store) { if (__jc_store_original) { PROFILE(ComputeJX, (_cuVectorJtE, _cuVectorJX)); PROFILE(EvaluateJacobians, (false)); if (__jc_store_transpose) { PROFILE( ShuffleCameraJacobian, (_cuJacobianCamera, _cuCameraMeasurementList, _cuJacobianCameraT)); PROFILE(ComputeDiagonal, (_cuVectorJJ, _cuVectorPK)); PROFILE(ComputeJtE, (_cuImageProj, _cuVectorJtE)); PROFILE(ComputeBlockPC, (0.001f, true)); std::cout << "---------------------------------\n" "| Not storing original JC | \n" "---------------------------------\n"; __jc_store_original = false; PROFILE(EvaluateJacobians, ()); __jc_store_original = true; } ////////////////////////////////////////////////// std::cout << "---------------------------------\n" "| Not storing transpose JC | \n" "---------------------------------\n"; __jc_store_transpose = false; PROFILE(ComputeDiagonal, (_cuVectorJJ, _cuVectorPK)); PROFILE(ComputeJtE, (_cuImageProj, _cuVectorJtE)); PROFILE(ComputeBlockPC, (0.001f, true)); ////////////////////////////////////// } else if (__jc_store_transpose) { PROFILE(ComputeDiagonal, (_cuVectorJJ, _cuVectorPK)); PROFILE(ComputeJtE, (_cuImageProj, _cuVectorJtE)); PROFILE(ComputeBlockPC, (0.001f, true)); std::cout << "---------------------------------\n" "| Not storing original JC | \n" "---------------------------------\n"; PROFILE(EvaluateJacobians, ()); } } if (!__no_jacobian_store) { std::cout << "---------------------------------\n" "| Not storing Camera Jacobians | \n" "---------------------------------\n"; __jc_store_transpose = false; __jc_store_original = false; _cuJacobianCamera.ReleaseData(); _cuJacobianCameraT.ReleaseData(); PROFILE(EvaluateJacobians, ()); PROFILE(ComputeJtE, (_cuImageProj, _cuVectorJtE)); PROFILE(ComputeBlockPC, (0.001f, true)); } /////////////////////////////////////////////// std::cout << "---------------------------------\n" "| Not storing any jacobians |\n" "---------------------------------\n"; __no_jacobian_store = true; _cuJacobianPoint.ReleaseData(); PROFILE(ComputeJX, (_cuVectorJtE, _cuVectorJX)); PROFILE(ComputeJtE, (_cuImageProj, _cuVectorJtE)); PROFILE(ComputeBlockPC, (0.001f, true)); std::cout << "---------------------------------\n"; } void SparseBundleCU::RunDebugSteps() { EvaluateProjection(_cuCameraData, _cuPointData, _cuImageProj); EvaluateJacobians(); ComputeJtE(_cuImageProj, _cuVectorJtE); // DEBUG_FUNCN(_cuVectorXK, SolveNormalEquationPCGB, (0.001f), 100); DEBUG_FUNCN(_cuVectorJtE, ComputeJtE, (_cuImageProj, _cuVectorJtE), 100); DEBUG_FUNCN(_cuVectorJX, ComputeJX, (_cuVectorJtE, _cuVectorJX), 100); } void SparseBundleCU::SaveNormalEquation(float lambda) { ofstream out1("../../matlab/cg_j.txt"); ofstream out2("../../matlab/cg_b.txt"); ofstream out3("../../matlab/cg_x.txt"); out1 << std::setprecision(20); out2 << std::setprecision(20); out3 << std::setprecision(20); int plen = GetParameterLength(); vector jc(16 * _num_imgpt); vector jp(8 * _num_imgpt); vector ee(2 * _num_imgpt); vector dx(plen); _cuJacobianCamera.CopyToHost(&jc[0]); _cuJacobianPoint.CopyToHost(&jp[0]); _cuImageProj.CopyToHost(&ee[0]); _cuVectorXK.CopyToHost(&dx[0]); for (int i = 0; i < _num_imgpt; ++i) { out2 << ee[i * 2] << ' ' << ee[i * 2 + 1] << ' '; int cidx = _camera_idx[i], pidx = _point_idx[i]; float *cp = &jc[i * 16], *pp = &jp[i * 8]; int cmin = cidx * 8, pmin = 8 * _num_camera + pidx * 4; for (int j = 0; j < 8; ++j) out1 << (i * 2 + 1) << ' ' << (cmin + j + 1) << ' ' << cp[j] << '\n'; for (int j = 0; j < 8; ++j) out1 << (i * 2 + 2) << ' ' << (cmin + j + 1) << ' ' << cp[j + 8] << '\n'; for (int j = 0; j < 4; ++j) out1 << (i * 2 + 1) << ' ' << (pmin + j + 1) << ' ' << pp[j] << '\n'; for (int j = 0; j < 4; ++j) out1 << (i * 2 + 2) << ' ' << (pmin + j + 1) << ' ' << pp[j + 4] << '\n'; } for (size_t i = 0; i < dx.size(); ++i) out3 << dx[i] << ' '; std::cout << "lambda = " << std::setprecision(20) << lambda << '\n'; } } // namespace pba