[AMDGPU] Produce ballot/icmp/fcmp lane masks at wavefront width by aobolensk · Pull Request #201358 · llvm/llvm-project

aobolensk · 2026-06-03T13:43:53Z

No description provided.

Reading read_register with the return type and a hardcoded "exec" name crashed isel on a width mismatch Read EXEC at wave-size width with the matching register name, then zext/trunc to the actual return type

llvmorg-github-actions · 2026-06-03T13:44:40Z

@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-llvm-transforms

Author: Arseniy Obolenskiy (aobolensk)

Changes

Reading read_register with the return type and a hardcoded "exec" name crashed isel on a width mismatch

Read EXEC at wave-size width with the matching register name, then zext/trunc to the actual return type

Full diff: https://github.com/llvm/llvm-project/pull/201358.diff

3 Files Affected:

(modified) llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp (+10-6)
(added) llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-cmp-exec-fold-wave-size.ll (+70)
(modified) llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll (+4-2)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 2370c379e75f5..8457b2594d3fc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1644,15 +1644,19 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
         // intrinsic exposes) is one bit per thread, masked with the EXEC
         // register (which contains the bitmask of live threads). So a
         // comparison that always returns true is the same as a read of the
-        // EXEC register.
-        Metadata *MDArgs[] = {MDString::get(II.getContext(), "exec")};
+        // EXEC register. EXEC is wave-size bits wide, so read it at that width
+        // and zext/trunc to the return type.
+        Type *ExecTy = IC.Builder.getIntNTy(ST->getWavefrontSize());
+        StringRef ExecName = ST->isWave32() ? "exec_lo" : "exec";
+        Metadata *MDArgs[] = {MDString::get(II.getContext(), ExecName)};
         MDNode *MD = MDNode::get(II.getContext(), MDArgs);
         Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)};
-        CallInst *NewCall = IC.Builder.CreateIntrinsic(Intrinsic::read_register,
-                                                       II.getType(), Args);
+        CallInst *NewCall =
+            IC.Builder.CreateIntrinsic(Intrinsic::read_register, ExecTy, Args);
         NewCall->addFnAttr(Attribute::Convergent);
-        NewCall->takeName(&II);
-        return IC.replaceInstUsesWith(II, NewCall);
+        Value *Result = IC.Builder.CreateZExtOrTrunc(NewCall, II.getType());
+        Result->takeName(&II);
+        return IC.replaceInstUsesWith(II, Result);
       }
 
       // Canonicalize constants to RHS.
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-cmp-exec-fold-wave-size.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-cmp-exec-fold-wave-size.ll
new file mode 100644
index 0000000000000..d9e3030e53c4d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-cmp-exec-fold-wave-size.ll
@@ -0,0 +1,70 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=instcombine -S < %s | FileCheck -check-prefix=WAVE64 %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=instcombine -S < %s | FileCheck -check-prefix=WAVE32 %s
+
+; The "always true" amdgcn.icmp/amdgcn.fcmp fold reads EXEC, which is wave-size
+; bits wide. The read_register must use that width and register name regardless
+; of the return type, then zext/trunc.
+
+; Return type wider than the wave size on wave32: read i32 exec_lo, then zext.
+define i64 @fcmp_true_i64(float %x, float %y) {
+; WAVE64-LABEL: @fcmp_true_i64(
+; WAVE64-NEXT:    [[R:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0:![0-9]+]]) #[[ATTR3:[0-9]+]]
+; WAVE64-NEXT:    ret i64 [[R]]
+;
+; WAVE32-LABEL: @fcmp_true_i64(
+; WAVE32-NEXT:    [[TMP1:%.*]] = call i32 @llvm.read_register.i32(metadata [[META0:![0-9]+]]) #[[ATTR3:[0-9]+]]
+; WAVE32-NEXT:    [[R:%.*]] = zext i32 [[TMP1]] to i64
+; WAVE32-NEXT:    ret i64 [[R]]
+;
+  %r = call i64 @llvm.amdgcn.fcmp.i64.f32(float 0.0, float 0.0, i32 1)
+  ret i64 %r
+}
+
+define i64 @icmp_true_i64(i32 %x, i32 %y) {
+; WAVE64-LABEL: @icmp_true_i64(
+; WAVE64-NEXT:    [[R:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR3]]
+; WAVE64-NEXT:    ret i64 [[R]]
+;
+; WAVE32-LABEL: @icmp_true_i64(
+; WAVE32-NEXT:    [[TMP1:%.*]] = call i32 @llvm.read_register.i32(metadata [[META0]]) #[[ATTR3]]
+; WAVE32-NEXT:    [[R:%.*]] = zext i32 [[TMP1]] to i64
+; WAVE32-NEXT:    ret i64 [[R]]
+;
+  %r = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 34)
+  ret i64 %r
+}
+
+; Return type narrower than the wave size on wave64: read i64 exec, then trunc.
+define i32 @fcmp_true_i32(float %x, float %y) {
+; WAVE64-LABEL: @fcmp_true_i32(
+; WAVE64-NEXT:    [[TMP1:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR3]]
+; WAVE64-NEXT:    [[R:%.*]] = trunc i64 [[TMP1]] to i32
+; WAVE64-NEXT:    ret i32 [[R]]
+;
+; WAVE32-LABEL: @fcmp_true_i32(
+; WAVE32-NEXT:    [[R:%.*]] = call i32 @llvm.read_register.i32(metadata [[META0]]) #[[ATTR3]]
+; WAVE32-NEXT:    ret i32 [[R]]
+;
+  %r = call i32 @llvm.amdgcn.fcmp.i32.f32(float 0.0, float 0.0, i32 1)
+  ret i32 %r
+}
+
+declare i64 @llvm.amdgcn.fcmp.i64.f32(float, float, i32 immarg)
+declare i64 @llvm.amdgcn.icmp.i64.i32(i32, i32, i32 immarg)
+declare i32 @llvm.amdgcn.fcmp.i32.f32(float, float, i32 immarg)
+;.
+; WAVE64: attributes #[[ATTR0:[0-9]+]] = { "target-cpu"="gfx900" }
+; WAVE64: attributes #[[ATTR1:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) "target-cpu"="gfx900" }
+; WAVE64: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) }
+; WAVE64: attributes #[[ATTR3]] = { convergent }
+;.
+; WAVE32: attributes #[[ATTR0:[0-9]+]] = { "target-cpu"="gfx1010" }
+; WAVE32: attributes #[[ATTR1:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) "target-cpu"="gfx1010" }
+; WAVE32: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) }
+; WAVE32: attributes #[[ATTR3]] = { convergent }
+;.
+; WAVE64: [[META0]] = !{!"exec"}
+;.
+; WAVE32: [[META0]] = !{!"exec_lo"}
+;.
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index 87164cff4276e..bc43aaac6c567 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -1882,7 +1882,8 @@ define i64 @icmp_constant_inputs_false() {
 
 define i64 @icmp_constant_inputs_true() {
 ; CHECK-LABEL: @icmp_constant_inputs_true(
-; CHECK-NEXT:    [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0:![0-9]+]]) #[[ATTR21:[0-9]+]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.read_register.i32(metadata [[META0:![0-9]+]]) #[[ATTR21:[0-9]+]]
+; CHECK-NEXT:    [[RESULT:%.*]] = zext i32 [[TMP1]] to i64
 ; CHECK-NEXT:    ret i64 [[RESULT]]
 ;
   %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 34)
@@ -2589,7 +2590,8 @@ define i64 @fcmp_constant_inputs_false() {
 
 define i64 @fcmp_constant_inputs_true() {
 ; CHECK-LABEL: @fcmp_constant_inputs_true(
-; CHECK-NEXT:    [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR21]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.read_register.i32(metadata [[META0]]) #[[ATTR21]]
+; CHECK-NEXT:    [[RESULT:%.*]] = zext i32 [[TMP1]] to i64
 ; CHECK-NEXT:    ret i64 [[RESULT]]
 ;
   %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 2.0, float 4.0, i32 4)

arsenm

LGTM but we probably should just delete all of this code

arsenm · 2026-06-03T13:47:40Z

-        CallInst *NewCall = IC.Builder.CreateIntrinsic(Intrinsic::read_register,
-                                                       II.getType(), Args);
+        CallInst *NewCall =
+            IC.Builder.CreateIntrinsic(Intrinsic::read_register, ExecTy, Args);


Why do we still have this combine? We switched to preferring ballot(true) vs. read_register for getting exec a long time ago. Also, we should really remove the icmp/fcmp intrinsics

I have removed the code, please re-check

github-actions · 2026-06-03T14:02:44Z

🪟 Windows x64 Test Results

135375 tests passed
3360 tests skipped

✅ The build succeeded and all tests passed.

github-actions · 2026-06-03T14:02:44Z

🐧 Linux x64 Test Results

175054 tests passed
3426 tests skipped
1 test failed

Failed Tests

(click on a test name to see its output)

Clang

Clang.Driver/hipspv-toolchain.hip

Exit Code: 1

Command Output (stdout):
--
# RUN: at line 4
/home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/clang -### -target x86_64-linux-gnu --offload=spirv64    --no-offload-new-driver --hip-path=/home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/Inputs/hipspv -nohipwrapperinc /home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/hipspv-toolchain.hip  2>&1 | /home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/FileCheck --check-prefixes=CHECK,OLD    -DTRIPLE=spirv64 /home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/hipspv-toolchain.hip
# executed command: /home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/clang '-###' -target x86_64-linux-gnu --offload=spirv64 --no-offload-new-driver --hip-path=/home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/Inputs/hipspv -nohipwrapperinc /home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/hipspv-toolchain.hip
# note: command had no output on stdout or stderr
# executed command: /home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/FileCheck --check-prefixes=CHECK,OLD -DTRIPLE=spirv64 /home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/hipspv-toolchain.hip
# note: command had no output on stdout or stderr
# RUN: at line 9
/home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/clang -### -target x86_64-linux-gnu    --offload=spirv64-unknown-chipstar    --offload-new-driver --hip-path=/home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/Inputs/hipspv -nohipwrapperinc /home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/hipspv-toolchain.hip  2>&1 | /home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/FileCheck --check-prefixes=CHECK,NEW    -DTRIPLE=spirv64-unknown-chipstar -DHIP_PATH=/home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/Inputs/hipspv /home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/hipspv-toolchain.hip
# executed command: /home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/clang '-###' -target x86_64-linux-gnu --offload=spirv64-unknown-chipstar --offload-new-driver --hip-path=/home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/Inputs/hipspv -nohipwrapperinc /home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/hipspv-toolchain.hip
# note: command had no output on stdout or stderr
# executed command: /home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/FileCheck --check-prefixes=CHECK,NEW -DTRIPLE=spirv64-unknown-chipstar -DHIP_PATH=/home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/Inputs/hipspv /home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/hipspv-toolchain.hip
# .---command stderr------------
# | /home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/hipspv-toolchain.hip:41:14: error: NEW-SAME: expected string not found in input
# | // NEW-SAME: "--emit-fatbin-only" "--no-lto" "-o" "[[BUNDLE:.*hipfb]]"
# |              ^
# | <stdin>:8:236: note: scanning from here
# |  "/home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/clang-linker-wrapper" "--device-compiler=spirv64-unknown-chipstar=--hip-path=/home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/Inputs/hipspv" "--device-linker=spirv64-unknown-chipstar=--allow-partial-linkage" "--device-linker=spirv64-unknown-chipstar=--create-library" "--no-lto" "--host-triple=x86_64-unknown-linux-gnu" "--linker-path=/home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/clang-offload-bundler" "--emit-fatbin-only" "-o" "/tmp/lit-tmp-zi22bv4u/hipspv-toolchain-d3464b.hipfb" "/tmp/lit-tmp-zi22bv4u/hipspv-toolchain-7bcce6.out"
# |                                                                                                                                                                                                                                            ^
# | <stdin>:8:521: note: possible intended match here
# |  "/home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/clang-linker-wrapper" "--device-compiler=spirv64-unknown-chipstar=--hip-path=/home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/Inputs/hipspv" "--device-linker=spirv64-unknown-chipstar=--allow-partial-linkage" "--device-linker=spirv64-unknown-chipstar=--create-library" "--no-lto" "--host-triple=x86_64-unknown-linux-gnu" "--linker-path=/home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/clang-offload-bundler" "--emit-fatbin-only" "-o" "/tmp/lit-tmp-zi22bv4u/hipspv-toolchain-d3464b.hipfb" "/tmp/lit-tmp-zi22bv4u/hipspv-toolchain-7bcce6.out"
# |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         ^
# | 
# | Input file: <stdin>
# | Check file: /home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/hipspv-toolchain.hip
# | 
# | -dump-input=help explains the following input dump.
# | 
# | Input was:
# | <<<<<<
# |            1: clang version 23.0.0git (https://github.com/llvm/llvm-project 16971370218f772be83f8962be656532da9d673f) 
# |            2: Target: x86_64-unknown-linux-gnu 
# |            3: Thread model: posix 
# |            4: InstalledDir: /home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin 
# |            5: Build config: +assertions 
# |            6:  "/home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/clang-23" "-cc1" "-triple" "spirv64-unknown-chipstar" "-aux-triple" "x86_64-unknown-linux-gnu" "-Wspir-compat" "-emit-llvm-bc" "-emit-llvm-uselists" "-dumpdir" "a-" "-disable-free" "-clear-ast-before-backend" "-main-file-name" "hipspv-toolchain.hip" "-mrelocation-model" "static" "-mframe-pointer=all" "-fno-rounding-math" "-mconstructor-aliases" "-aux-target-cpu" "x86-64" "-fcuda-is-device" "-mllvm" "-vectorize-loops=false" "-mllvm" "-vectorize-slp=false" "-fvisibility=hidden" "-fapply-global-visibility-to-externs" "-mlink-builtin-bitcode" "/home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/Inputs/hipspv/lib/hip-device-lib/hipspv-spirv64-unknown-chipstar.bc" "-debugger-tuning=gdb" "-fdebug-compilation-dir=/home/gha/actions-runner/_work/llvm-project/llvm-project/build/tools/clang/test/Driver" "-fcoverage-compilation-dir=/home/gha/actions-runner/_work/llvm-project/llvm-project/build/tools/clang/test/Driver" "-resource-dir" "/home/gha/actions-runner/_work/llvm-project/llvm-project/build/lib/clang/23" "-isystem" "/home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/Inputs/hipspv/include" "-internal-isystem" "/usr/lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13" "-internal-isystem" "/usr/lib/gcc/x86_64-linux-gnu/13/../../../../include/x86_64-linux-gnu/c++/13" "-internal-isystem" "/usr/lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/backward" "-internal-isystem" "/usr/lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13" "-internal-isystem" "/usr/lib/gcc/x86_64-linux-gnu/13/../../../../include/x86_64-linux-gnu/c++/13" "-internal-isystem" "/usr/lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/backward" "-internal-isystem" "/home/gha/actions-runner/_work/llvm-project/llvm-project/build/lib/clang/23/include" "-internal-isystem" "/usr/local/include" "-internal-isystem" "/usr/lib/gcc/x86_64-linux-gnu/13/../../../../x86_64-linux-gnu/include" "-internal-externc-isystem" "/usr/include/x86_64-linux-gnu" "-internal-externc-isystem" "/include" "-internal-externc-isystem" "/usr/include" "-internal-isystem" "/home/gha/actions-runner/_work/llvm-project/llvm-project/build/lib/clang/23/include" "-internal-isystem" "/usr/local/include" "-internal-isystem" "/usr/lib/gcc/x86_64-linux-gnu/13/../../../../x86_64-linux-gnu/include" "-internal-externc-isystem" "/usr/include/x86_64-linux-gnu" "-internal-externc-isystem" "/include" "-internal-externc-isystem" "/usr/include" "-fdeprecated-macro" "-fno-autolink" "-ferror-limit" "19" "--offload-new-driver" "-fhip-new-launch-api" "-fgnuc-version=4.2.1" "-fskip-odr-check-in-gmf" "-fcxx-exceptions" "-fexceptions" "-cuid=e3a3f2e488ec96c3" "-fcuda-allow-variadic-functions" "-o" "/tmp/lit-tmp-zi22bv4u/hipspv-toolchain-generic-1562ca.bc" "-x" "hip" "/home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/hipspv-toolchain.hip" 
# |            7:  "/home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/llvm-offload-binary" "-o" "/tmp/lit-tmp-zi22bv4u/hipspv-toolchain-7bcce6.out" "--image=file=/tmp/lit-tmp-zi22bv4u/hipspv-toolchain-generic-1562ca.bc,triple=spirv64-unknown-chipstar,arch=generic,kind=hip" 
# |            8:  "/home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/clang-linker-wrapper" "--device-compiler=spirv64-unknown-chipstar=--hip-path=/home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/Inputs/hipspv" "--device-linker=spirv64-unknown-chipstar=--allow-partial-linkage" "--device-linker=spirv64-unknown-chipstar=--create-library" "--no-lto" "--host-triple=x86_64-unknown-linux-gnu" "--linker-path=/home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/clang-offload-bundler" "--emit-fatbin-only" "-o" "/tmp/lit-tmp-zi22bv4u/hipspv-toolchain-d3464b.hipfb" "/tmp/lit-tmp-zi22bv4u/hipspv-toolchain-7bcce6.out" 
# | same:41'0                                                                                                                                                                                                                                                X~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ error: no match found
# | same:41'1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             ?                                                                                                                                    possible intended match
# |            9:  "/home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/clang-23" "-cc1" "-triple" "x86_64-unknown-linux-gnu" "-aux-triple" "spirv64-unknown-chipstar" "-emit-obj" "-dumpdir" "a-" "-disable-free" "-clear-ast-before-backend" "-main-file-name" "hipspv-toolchain.hip" "-mrelocation-model" "pic" "-pic-level" "2" "-pic-is-pie" "-mframe-pointer=all" "-fmath-errno" "-fno-rounding-math" "-mconstructor-aliases" "-funwind-tables=2" "-target-cpu" "x86-64" "-tune-cpu" "generic" "-debugger-tuning=gdb" "-fdebug-compilation-dir=/home/gha/actions-runner/_work/llvm-project/llvm-project/build/tools/clang/test/Driver" "-fcoverage-compilation-dir=/home/gha/actions-runner/_work/llvm-project/llvm-project/build/tools/clang/test/Driver" "-resource-dir" "/home/gha/actions-runner/_work/llvm-project/llvm-project/build/lib/clang/23" "-internal-isystem" "/home/gha/actions-runner/_work/llvm-project/llvm-project/build/lib/clang/23" "-idirafter" "/home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/Inputs/hipspv/include" "-internal-isystem" "/usr/lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13" "-internal-isystem" "/usr/lib/gcc/x86_64-linux-gnu/13/../../../../include/x86_64-linux-gnu/c++/13" "-internal-isystem" "/usr/lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/backward" "-internal-isystem" "/usr/lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13" "-internal-isystem" "/usr/lib/gcc/x86_64-linux-gnu/13/../../../../include/x86_64-linux-gnu/c++/13" "-internal-isystem" "/usr/lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/backward" "-internal-isystem" "/home/gha/actions-runner/_work/llvm-project/llvm-project/build/lib/clang/23/include" "-internal-isystem" "/usr/local/include" "-internal-isystem" "/usr/lib/gcc/x86_64-linux-gnu/13/../../../../x86_64-linux-gnu/include" "-internal-externc-isystem" "/usr/include/x86_64-linux-gnu" "-internal-externc-isystem" "/include" "-internal-externc-isystem" "/usr/include" "-internal-isystem" "/home/gha/actions-runner/_work/llvm-project/llvm-project/build/lib/clang/23/include" "-internal-isystem" "/usr/local/include" "-internal-isystem" "/usr/lib/gcc/x86_64-linux-gnu/13/../../../../x86_64-linux-gnu/include" "-internal-externc-isystem" "/usr/include/x86_64-linux-gnu" "-internal-externc-isystem" "/include" "-internal-externc-isystem" "/usr/include" "-fdeprecated-macro" "-ferror-limit" "19" "--offload-new-driver" "-fhip-new-launch-api" "-fgnuc-version=4.2.1" "-fskip-odr-check-in-gmf" "-fcxx-exceptions" "-fexceptions" "-fcuda-include-gpubinary" "/tmp/lit-tmp-zi22bv4u/hipspv-toolchain-d3464b.hipfb" "-cuid=e3a3f2e488ec96c3" "-fcuda-allow-variadic-functions" "-faddrsig" "-fdwarf2-cfi-asm" "-o" "/tmp/lit-tmp-zi22bv4u/hipspv-toolchain-d1566e.o" "-x" "hip" "/home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/hipspv-toolchain.hip" 
# | same:41'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           10:  "/home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/clang-linker-wrapper" "--device-compiler=spirv64-unknown-chipstar=--hip-path=/home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/Inputs/hipspv" "--device-linker=spirv64-unknown-chipstar=--allow-partial-linkage" "--device-linker=spirv64-unknown-chipstar=--create-library" "--host-triple=x86_64-unknown-linux-gnu" "--linker-path=/usr/bin/x86_64-linux-gnu-ld" "-z" "relro" "--hash-style=gnu" "--eh-frame-hdr" "-m" "elf_x86_64" "-pie" "-dynamic-linker" "/lib64/ld-linux-x86-64.so.2" "-o" "a.out" "/lib/x86_64-linux-gnu/Scrt1.o" "/lib/x86_64-linux-gnu/crti.o" "/usr/lib/gcc/x86_64-linux-gnu/13/crtbeginS.o" "-L/usr/lib/gcc/x86_64-linux-gnu/13" "-L/usr/lib/gcc/x86_64-linux-gnu/13/../../../../lib64" "-L/lib/x86_64-linux-gnu" "-L/lib/../lib64" "-L/usr/lib/x86_64-linux-gnu" "-L/usr/lib64" "-L/lib" "-L/usr/lib" "/tmp/lit-tmp-zi22bv4u/hipspv-toolchain-d1566e.o" "/home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/Inputs/hipspv/lib/libamdhip64.so" "-L/home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/Inputs/hipspv/lib" "-lgcc" "--as-needed" "-lgcc_s" "--no-as-needed" "-lc" "-lgcc" "--as-needed" "-lgcc_s" "--no-as-needed" "/usr/lib/gcc/x86_64-linux-gnu/13/crtendS.o" "/lib/x86_64-linux-gnu/crtn.o" 
# | same:41'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# | >>>>>>
# `-----------------------------
# error: command failed with exit status: 1

--

If these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the infrastructure label.

…-wave-width-fix

arsenm

Could still include the cases that were broken? or replace with ballot instead

…-wave-width-fix

github-actions · 2026-06-03T15:14:21Z

✅ With the latest revision this PR passed the C/C++ code formatter.

arsenm · 2026-06-03T15:21:31Z

@@ -0,0 +1,65 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn -global-isel=0 -mcpu=gfx900 < %s | FileCheck %s


-global-isel=0 should only be used when testing both selectors

Not relevant anymore

arsenm · 2026-06-03T15:23:17Z

+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_mov_b32 s0, 0
+; CHECK-NEXT:    ; return to shader part epilog
+  %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 0)


Suggested change

%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 0)

%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 false)

Not relevant anymore

arsenm · 2026-06-03T15:23:28Z

+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_mov_b32 s0, exec_lo
+; CHECK-NEXT:    ; return to shader part epilog
+  %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 1)


Suggested change

%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 1)

%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 true)

Not relevant anymore

arsenm · 2026-06-03T15:24:26Z

  SDValue Src = N->getOperand(1);
  SDLoc SL(N);

+  // AMDGPUISD::SETCC produces a wave-sized mask; emit it at that width and


I think the i32 return type on wave64 intentionally doesn't work. This is also now going beyond what the title suggests

Fair enough, reverted

…-wave-width-fix

[AMDGPU] Read EXEC at wave-size width in icmp/fcmp instcombine fold

3b49926

Reading read_register with the return type and a hardcoded "exec" name crashed isel on a width mismatch Read EXEC at wave-size width with the matching register name, then zext/trunc to the actual return type

llvmorg-github-actions Bot added backend:AMDGPU llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Jun 3, 2026

arsenm approved these changes Jun 3, 2026

View reviewed changes

Remove code

434000b

aobolensk changed the title ~~[AMDGPU] Read EXEC at wave-size width in icmp/fcmp instcombine fold~~ [AMDGPU] Remove always-true icmp/fcmp instcombine fold to EXEC read Jun 3, 2026

Merge remote-tracking branch 'origin/main' into llvm-amdgpu-read-exec…

c7cb3f4

…-wave-width-fix

aobolensk requested a review from arsenm June 3, 2026 14:17

arsenm reviewed Jun 3, 2026

View reviewed changes

Address comments

5bc38ed

aobolensk changed the title ~~[AMDGPU] Remove always-true icmp/fcmp instcombine fold to EXEC read~~ [AMDGPU] Produce ballot/icmp/fcmp lane masks at wavefront width Jun 3, 2026

Merge remote-tracking branch 'origin/main' into llvm-amdgpu-read-exec…

00dd340

…-wave-width-fix

fmt

311c381

arsenm reviewed Jun 3, 2026

View reviewed changes

aobolensk added 2 commits June 3, 2026 18:06

Revert sdag changes

5a6c104

Merge remote-tracking branch 'origin/main' into llvm-amdgpu-read-exec…

03b0553

…-wave-width-fix

		@@ -0,0 +1,65 @@
		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
		; RUN: llc -mtriple=amdgcn -global-isel=0 -mcpu=gfx900 < %s \| FileCheck %s

	%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 0)
	%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 false)

Conversation

aobolensk commented Jun 3, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmorg-github-actions Bot commented Jun 3, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

arsenm left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

github-actions Bot commented Jun 3, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

🪟 Windows x64 Test Results

Uh oh!

github-actions Bot commented Jun 3, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

🐧 Linux x64 Test Results

Failed Tests

Clang

Uh oh!

arsenm left a comment

Choose a reason for hiding this comment

Uh oh!

github-actions Bot commented Jun 3, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

aobolensk commented Jun 3, 2026 •

edited

Loading

llvmorg-github-actions Bot commented Jun 3, 2026 •

edited

Loading

github-actions Bot commented Jun 3, 2026 •

edited

Loading

github-actions Bot commented Jun 3, 2026 •

edited

Loading

github-actions Bot commented Jun 3, 2026 •

edited

Loading