Skip to content

Commit bc854f4

Browse files
committed
slight SumRows optimization
1 parent af0655d commit bc854f4

File tree

3 files changed

+150
-73
lines changed

3 files changed

+150
-73
lines changed

kernels32.cu

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,14 @@ void addScaler(float s, float * dest, int destLen) {
124124
}
125125
}
126126

127+
extern "C" __global__
128+
void setScaler(float s, float * dest, int destLen) {
129+
int tid = blockIdx.x * blockDim.x + threadIdx.x;
130+
if (tid < destLen) {
131+
dest[tid] = s;
132+
}
133+
}
134+
127135
extern "C" __global__
128136
void addChunks(float * dest, float * source, int destLen, int chunkSize) {
129137
int tid = blockIdx.x * blockDim.x + threadIdx.x;

0 commit comments

Comments
 (0)