Skip to content

Commit 344cdcb

Browse files
authored
[src] CUDA 8 compatibility patch for _strided_reduction_fused_kernel (kaldi-asr#3990)
1 parent 36afe3b commit 344cdcb

File tree

1 file changed

+5
-0
lines changed

1 file changed

+5
-0
lines changed

src/cudamatrix/cu-kernels.cu

+5
Original file line numberDiff line numberDiff line change
@@ -1840,6 +1840,11 @@ inline __device__ void myAtomicReduce(float *address, float val, TransReduceOp<S
18401840
myAtomicAdd(address, val);
18411841
}
18421842

1843+
#if CUDA_VERSION < 9000
1844+
// if not CUDA 9+, no need for syncwarp
1845+
inline __device__ void __syncwarp(unsigned mask=0xffffffff) {}
1846+
#endif
1847+
18431848
// Reduce a matrix 'data' to a row vector 'dots'
18441849
template <EnumTransformReduce TransReduceType, typename Real, int unroll_count>
18451850
__global__ void _strided_reduction_fused_kernel(Real * __restrict__ dots, const Real * __restrict__ data,

0 commit comments

Comments
 (0)