Skip to content

Commit aad2051

Browse files
committed
[Enhancement] Update async intrinsic handling in inject_fence_proxy
* Added support for wgmma async intrinsics in IsAsyncIntrinsic function. * Changed handling of unknown externs to treat them as Generic instead of Async, improving accuracy in proxy kind determination.
1 parent b2acfc3 commit aad2051

File tree

1 file changed

+9
-2
lines changed

1 file changed

+9
-2
lines changed

src/transform/inject_fence_proxy.cc

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,11 @@ bool IsAsyncIntrinsic(const CallNode *call) {
9494
return true;
9595
}
9696

97+
// wgmma async intrinsics
98+
if (call->op.same_as(tl_gemm()) || call->op.same_as(tl_gemm_sp())) {
99+
return true;
100+
}
101+
97102
return false;
98103
}
99104

@@ -208,8 +213,10 @@ class ProxyFenceInjector : public StmtMutator {
208213
} else if (IsKnownGeneric(call)) {
209214
kind = ProxyKind::kGeneric;
210215
} else {
211-
// Treat unknown externs as async to avoid missing required fences.
212-
kind = ProxyKind::kAsync;
216+
// We can now treat extern as Generic, since gemm and gemm_sp are never
217+
// represented as call_extern nodes. They are call_intrin nodes and will
218+
// be handled by IsAsyncIntrinsic above.
219+
kind = ProxyKind::kGeneric;
213220
}
214221
}
215222

0 commit comments

Comments
 (0)