Skip to content

Commit 6831f30

Browse files
committed
Refactored DeEmbedOutOfPlace shader. See #716.
1 parent c93f271 commit 6831f30

File tree

2 files changed

+14
-9
lines changed

2 files changed

+14
-9
lines changed

scopeprotocols/CouplerDeEmbedFilter.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,11 @@ void CouplerDeEmbedFilter::ApplySParameters(
446446
m_deEmbedComputePipeline.BindBufferNonblocking(1, samplesOut, cmdBuf, true);
447447
m_deEmbedComputePipeline.BindBufferNonblocking(2, params.m_resampledSparamSines, cmdBuf);
448448
m_deEmbedComputePipeline.BindBufferNonblocking(3, params.m_resampledSparamCosines, cmdBuf);
449-
m_deEmbedComputePipeline.DispatchNoRebind(cmdBuf, (uint32_t)nouts, GetComputeBlockCount(npoints, 64));
449+
const uint32_t compute_block_count = GetComputeBlockCount(npoints, 64);
450+
m_deEmbedComputePipeline.DispatchNoRebind(
451+
cmdBuf, (uint32_t)nouts,
452+
min(compute_block_count, 32768u),
453+
compute_block_count / 32768 + 1);
450454
m_deEmbedComputePipeline.AddComputeMemoryBarrier(cmdBuf);
451455
samplesOut.MarkModifiedFromGpu();
452456
}

scopeprotocols/shaders/DeEmbedOutOfPlace.glsl

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* *
33
* libscopeprotocols *
44
* *
5-
* Copyright (c) 2012-2024 Andrew D. Zonenberg and contributors *
5+
* Copyright (c) 2012-2025 Andrew D. Zonenberg and contributors *
66
* All rights reserved. *
77
* *
88
* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the *
@@ -60,18 +60,19 @@ layout(local_size_x=64, local_size_y=1, local_size_z=1) in;
6060
void main()
6161
{
6262
//If off end of array, stop
63-
if(gl_GlobalInvocationID.x >= len)
63+
uint nthread = (gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x) + gl_GlobalInvocationID.x;
64+
if(nthread >= len)
6465
return;
6566

6667
//Sin/cos values from rotation matrix
67-
float sinval = sines[gl_GlobalInvocationID.x];
68-
float cosval = cosines[gl_GlobalInvocationID.x];
68+
float sinval = sines[nthread];
69+
float cosval = cosines[nthread];
6970

7071
//Uncorrected complex value
71-
float real_orig = din[gl_GlobalInvocationID.x*2 + 0];
72-
float imag_orig = din[gl_GlobalInvocationID.x*2 + 1];
72+
float real_orig = din[nthread*2 + 0];
73+
float imag_orig = din[nthread*2 + 1];
7374

7475
//Apply the matrix and write back
75-
dout[gl_GlobalInvocationID.x*2 + 0] = real_orig*cosval - imag_orig*sinval;
76-
dout[gl_GlobalInvocationID.x*2 + 1] = real_orig*sinval + imag_orig*cosval;
76+
dout[nthread*2 + 0] = real_orig*cosval - imag_orig*sinval;
77+
dout[nthread*2 + 1] = real_orig*sinval + imag_orig*cosval;
7778
}

0 commit comments

Comments
 (0)