Skip to content

Commit 08cada9

Browse files
fix build error in low arch (#44391)
1 parent dd0a07f commit 08cada9

File tree

1 file changed

+4
-0
lines changed

1 file changed

+4
-0
lines changed

paddle/fluid/inference/tensorrt/plugin/fused_token_prune_op_plugin.cu

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,12 @@ __global__ void ElementwiseMask(const T* a,
3838
const T* b,
3939
T* res,
4040
int num_elements) {
41+
#if CUDA_ARCH_FP16_SUPPORTED(__CUDA_ARCH__)
4142
auto tid = threadIdx.x + blockIdx.x * blockDim.x;
4243
if (tid >= num_elements) return;
4344
const T zero = 0;
4445
res[tid] = b[tid] >= zero ? a[tid] : zero;
46+
#endif
4547
}
4648

4749
template <typename T>
@@ -121,6 +123,7 @@ __global__ void ReduceSum2(
121123
template <>
122124
__global__ void ReduceSum2<half>(
123125
const half* src, half* dst, int bsz, int nb_head, int max_seq_len) {
126+
#if CUDA_ARCH_FP16_SUPPORTED(__CUDA_ARCH__)
124127
int tid = threadIdx.x;
125128
int bid = blockIdx.x;
126129
int num_blocks_per_head = ((max_seq_len / blockDim.x) * max_seq_len);
@@ -152,6 +155,7 @@ __global__ void ReduceSum2<half>(
152155
static_cast<size_t>(bsz * max_seq_len),
153156
static_cast<platform::float16>(res_half[0]));
154157
}
158+
#endif
155159
}
156160

157161
template <typename T>

0 commit comments

Comments
 (0)