Skip to content

Commit

Permalink
Drop support of CUDA 6.5 and below. Includes deprecation warning fixes.
Browse files Browse the repository at this point in the history
  • Loading branch information
Heiko Thiel committed Dec 21, 2018
1 parent 214bb39 commit bbafa9d
Show file tree
Hide file tree
Showing 10 changed files with 31 additions and 71 deletions.
12 changes: 2 additions & 10 deletions cmake/pcl_find_cuda.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ if(MSVC)
endif()

set(CUDA_FIND_QUIETLY TRUE)
find_package(CUDA)
find_package(CUDA 7.0)

if(CUDA_FOUND)
message(STATUS "Found CUDA Toolkit v${CUDA_VERSION_STRING}")
Expand Down Expand Up @@ -50,16 +50,8 @@ if(CUDA_FOUND)
set(__cuda_arch_bin "3.0 3.5 5.0 5.2 5.3 6.0 6.1 7.0")
elseif(NOT ${CUDA_VERSION_STRING} VERSION_LESS "8.0")
set(__cuda_arch_bin "2.0 2.1(2.0) 3.0 3.5 5.0 5.2 5.3 6.0 6.1")
elseif(NOT ${CUDA_VERSION_STRING} VERSION_LESS "6.5")
set(__cuda_arch_bin "2.0 2.1(2.0) 3.0 3.5 5.0 5.2")
elseif(NOT ${CUDA_VERSION_STRING} VERSION_LESS "6.0")
set(__cuda_arch_bin "2.0 2.1(2.0) 3.0 3.5 5.0")
elseif(NOT ${CUDA_VERSION_STRING} VERSION_LESS "5.0")
set(__cuda_arch_bin "2.0 2.1(2.0) 3.0 3.5")
elseif(${CUDA_VERSION_STRING} VERSION_GREATER "4.1")
set(__cuda_arch_bin "2.0 2.1(2.0) 3.0")
else()
set(__cuda_arch_bin "2.0 2.1(2.0)")
set(__cuda_arch_bin "2.0 2.1(2.0) 3.0 3.5 5.0 5.2")
endif()

set(CUDA_ARCH_BIN ${__cuda_arch_bin} CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported")
Expand Down
2 changes: 1 addition & 1 deletion cuda/apps/src/kinect_segmentation_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ class Segmentation
normals = computeFastPointNormals<Storage> (data);
else
normals = computePointNormals<Storage> (data->points.begin (), data->points.end (), focallength, data, radius_cm / 100.0f, nr_neighbors);
cudaThreadSynchronize ();
cudaDeviceSynchronize ();
}

// retrieve normals as an image..
Expand Down
20 changes: 4 additions & 16 deletions cuda/common/include/pcl/cuda/cutil.h
Original file line number Diff line number Diff line change
Expand Up @@ -735,18 +735,6 @@ extern "C" {
////////////////////////////////////////////////////////////////////////////
//! Macros

#if CUDART_VERSION >= 4000
#define CUT_DEVICE_SYNCHRONIZE( ) cudaDeviceSynchronize();
#else
#define CUT_DEVICE_SYNCHRONIZE( ) cudaThreadSynchronize();
#endif

#if CUDART_VERSION >= 4000
#define CUT_DEVICE_RESET( ) cudaDeviceReset();
#else
#define CUT_DEVICE_RESET( ) cudaThreadExit();
#endif

// This is for the CUTIL bank checker
#ifdef _DEBUG
#if __DEVICE_EMULATION__
Expand Down Expand Up @@ -792,7 +780,7 @@ extern "C" {
# define CUDA_SAFE_CALL( call) CUDA_SAFE_CALL_NO_SYNC(call); \

# define CUDA_SAFE_THREAD_SYNC( ) { \
cudaError err = CUT_DEVICE_SYNCHRONIZE(); \
cudaError err = cudaDeviceSynchronize(); \
if ( cudaSuccess != err) { \
fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
__FILE__, __LINE__, cudaGetErrorString( err) ); \
Expand Down Expand Up @@ -822,7 +810,7 @@ extern "C" {
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
exit(EXIT_FAILURE); \
} \
err = CUT_DEVICE_SYNCHRONIZE(); \
err = cudaDeviceSynchronize(); \
if( cudaSuccess != err) { \
fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
Expand Down Expand Up @@ -891,7 +879,7 @@ extern "C" {
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
exit(EXIT_FAILURE); \
} \
err = CUT_DEVICE_SYNCHRONIZE(); \
err = cudaDeviceSynchronize(); \
if( cudaSuccess != err) { \
fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
Expand All @@ -906,7 +894,7 @@ extern "C" {
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
exit(EXIT_FAILURE); \
} \
err = CUT_DEVICE_SYNCHRONIZE(); \
err = cudaDeviceSynchronize(); \
if( cudaSuccess != err) { \
fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
Expand Down
8 changes: 0 additions & 8 deletions cuda/common/include/pcl/cuda/cutil_inline_runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,20 +42,12 @@

inline cudaError cutilDeviceSynchronize()
{
#if CUDART_VERSION >= 4000
return cudaDeviceSynchronize();
#else
return cudaThreadSynchronize();
#endif
}

inline cudaError cutilDeviceReset()
{
#if CUDART_VERSION >= 4000
return cudaDeviceReset();
#else
return cudaThreadExit();
#endif
}

inline void __cutilCondition(int val, char *file, int line)
Expand Down
2 changes: 1 addition & 1 deletion cuda/common/include/pcl/cuda/time_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ namespace pcl
{
CUT_CHECK_ERROR ("dude");
// Measure time needed to copy data
cutilSafeCall (cudaThreadSynchronize ());
cutilSafeCall (cudaDeviceSynchronize ());
cutilSafeCall (cudaEventRecord (end_, 0));
cutilSafeCall (cudaEventSynchronize (end_));
cutilSafeCall (cudaEventElapsedTime (&elapsed_time_, start_, end_));
Expand Down
8 changes: 0 additions & 8 deletions gpu/containers/src/initialization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,6 @@ void pcl::gpu::printCudaDeviceInfo(int device)
printf(" (%2d) Multiprocessors x (%2d) CUDA Cores/MP: %d CUDA Cores\n", prop.multiProcessorCount, sm_cores, sm_cores * prop.multiProcessorCount);
printf(" GPU Clock Speed: %.2f GHz\n", prop.clockRate * 1e-6f);

#if (CUDART_VERSION >= 4000)
// This is not available in the CUDA Runtime API, so we make the necessary calls the driver API to support this for output
int memoryClock, memBusWidth, L2CacheSize;
getCudaAttribute<int>( &memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, dev );
Expand All @@ -187,7 +186,6 @@ void pcl::gpu::printCudaDeviceInfo(int device)
printf(" Max Layered Texture Size (dim) x layers 1D=(%d) x %d, 2D=(%d,%d) x %d\n",
prop.maxTexture1DLayered[0], prop.maxTexture1DLayered[1],
prop.maxTexture2DLayered[0], prop.maxTexture2DLayered[1], prop.maxTexture2DLayered[2]);
#endif
printf(" Total amount of constant memory: %u bytes\n", (int)prop.totalConstMem);
printf(" Total amount of shared memory per block: %u bytes\n", (int)prop.sharedMemPerBlock);
printf(" Total number of registers available per block: %d\n", prop.regsPerBlock);
Expand All @@ -198,11 +196,7 @@ void pcl::gpu::printCudaDeviceInfo(int device)
printf(" Maximum memory pitch: %u bytes\n", (int)prop.memPitch);
printf(" Texture alignment: %u bytes\n", (int)prop.textureAlignment);

#if CUDART_VERSION >= 4000
printf(" Concurrent copy and execution: %s with %d copy engine(s)\n", (prop.deviceOverlap ? "Yes" : "No"), prop.asyncEngineCount);
#else
printf(" Concurrent copy and execution: %s\n", prop.deviceOverlap ? "Yes" : "No");
#endif
printf(" Run time limit on kernels: %s\n", prop.kernelExecTimeoutEnabled ? "Yes" : "No");
printf(" Integrated GPU sharing Host Memory: %s\n", prop.integrated ? "Yes" : "No");
printf(" Support host page-locked memory mapping: %s\n", prop.canMapHostMemory ? "Yes" : "No");
Expand All @@ -211,10 +205,8 @@ void pcl::gpu::printCudaDeviceInfo(int device)
printf(" Alignment requirement for Surfaces: %s\n", prop.surfaceAlignment ? "Yes" : "No");
printf(" Device has ECC support enabled: %s\n", prop.ECCEnabled ? "Yes" : "No");
printf(" Device is using TCC driver mode: %s\n", prop.tccDriver ? "Yes" : "No");
#if CUDART_VERSION >= 4000
printf(" Device supports Unified Addressing (UVA): %s\n", prop.unifiedAddressing ? "Yes" : "No");
printf(" Device PCI Bus ID / PCI location ID: %d / %d\n", prop.pciBusID, prop.pciDeviceID );
#endif
printf(" Compute Mode:\n");
printf(" %s \n", computeMode[prop.computeMode]);
}
Expand Down
28 changes: 12 additions & 16 deletions gpu/people/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,19 @@ REMOVE_VTK_DEFINITIONS()

#find NPP
unset(CUDA_npp_LIBRARY CACHE)
if(${CUDA_VERSION} VERSION_LESS "5.5")
find_cuda_helper_libs(npp)
find_cuda_helper_libs(nppc)
find_cuda_helper_libs(npps)
if(${CUDA_VERSION} VERSION_GREATER_EQUAL "9.0")
find_cuda_helper_libs(nppim)
find_cuda_helper_libs(nppidei)
else()
find_cuda_helper_libs(nppc)
find_cuda_helper_libs(npps)
if(${CUDA_VERSION} VERSION_GREATER_EQUAL "9.0")
find_cuda_helper_libs(nppim)
find_cuda_helper_libs(nppidei)
else()
find_cuda_helper_libs(nppi)
endif()

if(${CUDA_VERSION} VERSION_GREATER_EQUAL "9.0")
set(CUDA_npp_LIBRARY ${CUDA_nppc_LIBRARY} ${CUDA_nppim_LIBRARY} ${CUDA_nppidei_LIBRARY} ${CUDA_npps_LIBRARY} CACHE STRING "npp library")
else()
set(CUDA_npp_LIBRARY ${CUDA_nppc_LIBRARY} ${CUDA_nppi_LIBRARY} ${CUDA_npps_LIBRARY} CACHE STRING "npp library")
endif()
find_cuda_helper_libs(nppi)
endif()

if(${CUDA_VERSION} VERSION_GREATER_EQUAL "9.0")
set(CUDA_npp_LIBRARY ${CUDA_nppc_LIBRARY} ${CUDA_nppim_LIBRARY} ${CUDA_nppidei_LIBRARY} ${CUDA_npps_LIBRARY} CACHE STRING "npp library")
else()
set(CUDA_npp_LIBRARY ${CUDA_nppc_LIBRARY} ${CUDA_nppi_LIBRARY} ${CUDA_npps_LIBRARY} CACHE STRING "npp library")
endif()

#Label_skeleton
Expand Down
8 changes: 4 additions & 4 deletions gpu/people/src/cuda/multi_tree.cu
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ namespace pcl

KernelCUDA_runTree<<< grid, block >>>( focal, treeHeight, numNodes, nodes, leaves, labels);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}

void CUDA_runMultiTreePass ( int FGThresh,
Expand Down Expand Up @@ -198,7 +198,7 @@ namespace pcl
}

cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}

///////////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -323,7 +323,7 @@ namespace pcl
KernelCUDA_MultiTreeMerge<<< grid, block >>>( numTrees, labels );

cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}

/** \brief This will merge the votes from the different trees into one final vote, including probabilistic's */
Expand All @@ -348,7 +348,7 @@ namespace pcl
KernelCUDA_MultiTreeCreateProb<<< grid, block >>>( numTrees, probabilities);

cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}
}
}
Expand Down
10 changes: 5 additions & 5 deletions gpu/people/src/cuda/prob.cu
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ namespace pcl
KernelCUDA_SelectLabel<<< grid, block >>>( labels, probabilities );

cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}

/** \brief This will combine two probabilities according their weight **/
Expand All @@ -282,7 +282,7 @@ namespace pcl
KernelCUDA_CombineProb<<< grid, block >>>( probIn1, weight1, probIn2, weight2, probOut );

cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}

/** \brief This will combine two probabilities according their weight **/
Expand All @@ -299,7 +299,7 @@ namespace pcl
KernelCUDA_WeightedSumProb<<< grid, block >>>( probIn, weight, probOut );

cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}

/** \brief This will blur the input labelprobability with the given kernel **/
Expand Down Expand Up @@ -336,12 +336,12 @@ namespace pcl
KernelCUDA_GaussianBlurVer<<< grid, block >>>( probIn, kernel, kernel.size(), probTemp );
//KernelCUDA_GaussianBlurVer<<< grid, block >>>( probIn, kernel, kernel.size(), probOut );
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );

// CUDA kernel call Horizontal
KernelCUDA_GaussianBlurHor<<< grid, block >>>( probTemp, kernel, kernel.size(), probOut );
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
return 1;
}
}
Expand Down
4 changes: 2 additions & 2 deletions gpu/people/src/cuda/utils.cu
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ void pcl::device::colorLMap(const Labels& labels, const DeviceArray<uchar4>& map
colorKernel<<< grid, block >>>( labels, rgba );

cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}

void pcl::device::mixedColorMap(const Labels& labels, const DeviceArray<uchar4>& map, const Image& rgba, Image& output)
Expand Down Expand Up @@ -170,7 +170,7 @@ void pcl::device::prepareForeGroundDepth(const Depth& depth1, Mask& inverse_mask
fgDepthKernel<<< grid, block >>>( depth1, inverse_mask, depth2 );

cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}


Expand Down

0 comments on commit bbafa9d

Please sign in to comment.