Skip to content

Commit

Permalink
[CLC][AMDGPU] Fix barrier calls in collective group algorithms to gen…
Browse files Browse the repository at this point in the history
…erate memory fences (intel#12873)

This PR fixes race conditions in the group algorithms implemented in
libclc for AMDGPU because the control barriers were not emitting any
fences due to unspecified semantics.
  • Loading branch information
GeorgeWeb authored Aug 30, 2024
1 parent 95f9bc0 commit 02a20e5
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions libclc/amdgcn-amdhsa/libspirv/group/collectives.cl
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ __CLC_SUBGROUP_COLLECTIVE(LogicalAndKHR, __CLC_LOGICAL_AND, bool, a, true)
if (sg_lid == sg_size - 1) { \
scratch[sg_id] = carry; \
} \
__spirv_ControlBarrier(Workgroup, 0, 0); \
__spirv_ControlBarrier(Workgroup, Workgroup, AcquireRelease); \
/* Perform InclusiveScan over sub-group results */ \
TYPE sg_prefix; \
TYPE sg_aggregate = scratch[0]; \
Expand All @@ -235,7 +235,7 @@ __CLC_SUBGROUP_COLLECTIVE(LogicalAndKHR, __CLC_LOGICAL_AND, bool, a, true)
result = OP(sg_x, sg_prefix); \
} \
} \
__spirv_ControlBarrier(Workgroup, 0, 0); \
__spirv_ControlBarrier(Workgroup, Workgroup, AcquireRelease); \
return result; \
}

Expand Down Expand Up @@ -401,9 +401,9 @@ long __clc__3d_to_linear_local_id(ulong3 id) {
if (source) { \
*scratch = x; \
} \
__spirv_ControlBarrier(Workgroup, 0, 0); \
__spirv_ControlBarrier(Workgroup, Workgroup, AcquireRelease); \
TYPE result = *scratch; \
__spirv_ControlBarrier(Workgroup, 0, 0); \
__spirv_ControlBarrier(Workgroup, Workgroup, AcquireRelease); \
return result; \
} \
_CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT TYPE __spirv_GroupBroadcast( \
Expand Down

0 comments on commit 02a20e5

Please sign in to comment.