Skip to content

Commit

Permalink
[AMDGCN] Use ZExt when handling indices in insertment element (#85718)
Browse files Browse the repository at this point in the history
When i1 true is used as an index, SExt extends it to i32 -1. This would
cause BitVector to overflow.
The language manual have specified that the index shall be treated as an
unsigned number, this patch fixes that.
(https://llvm.org/docs/LangRef.html#insertelement-instruction)

This patch fixes #85717

---------

Signed-off-by: Peter Rong <PeterRong96@gmail.com>
  • Loading branch information
DataCorrupted authored Mar 20, 2024
1 parent 3eb9ff3 commit 4a026b5
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 2 deletions.
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1749,7 +1749,7 @@ static bool isInterestingPHIIncomingValue(const Value *V) {
// Non constant index/out of bounds index -> folding is unlikely.
// The latter is more of a sanity check because canonical IR should just
// have replaced those with poison.
if (!Idx || Idx->getSExtValue() >= FVT->getNumElements())
if (!Idx || Idx->getZExtValue() >= FVT->getNumElements())
return false;

const auto *VecSrc = IE->getOperand(0);
Expand All @@ -1761,7 +1761,7 @@ static bool isInterestingPHIIncomingValue(const Value *V) {
return false;

CurVal = VecSrc;
EltsCovered.set(Idx->getSExtValue());
EltsCovered.set(Idx->getZExtValue());

// All elements covered.
if (EltsCovered.all())
Expand Down
51 changes: 51 additions & 0 deletions llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1197,3 +1197,54 @@ reallyfinally:
store <5 x double> %val, ptr %out, align 1
ret void
}

define amdgpu_kernel void @pr85718(i1 %Bool, ptr %Ptr, <4 x float> %Vec1, <4 x float> %Vec2) {
; OPT-LABEL: @pr85718(
; OPT-NEXT: BB0:
; OPT-NEXT: [[I:%.*]] = insertelement <4 x float> [[VEC1:%.*]], float 4.200000e+01, i1 true
; OPT-NEXT: br label [[BB1:%.*]]
; OPT: BB1:
; OPT-NEXT: [[TMP0:%.*]] = phi float [ [[LARGEPHI_EXTRACTSLICE0:%.*]], [[BB2:%.*]] ], [ [[LARGEPHI_EXTRACTSLICE1:%.*]], [[BB1]] ], [ 0.000000e+00, [[BB0:%.*]] ]
; OPT-NEXT: [[TMP1:%.*]] = phi float [ [[LARGEPHI_EXTRACTSLICE3:%.*]], [[BB2]] ], [ [[LARGEPHI_EXTRACTSLICE4:%.*]], [[BB1]] ], [ 0.000000e+00, [[BB0]] ]
; OPT-NEXT: [[TMP2:%.*]] = phi float [ [[LARGEPHI_EXTRACTSLICE6:%.*]], [[BB2]] ], [ [[LARGEPHI_EXTRACTSLICE7:%.*]], [[BB1]] ], [ 0.000000e+00, [[BB0]] ]
; OPT-NEXT: [[TMP3:%.*]] = phi float [ [[LARGEPHI_EXTRACTSLICE9:%.*]], [[BB2]] ], [ [[LARGEPHI_EXTRACTSLICE10:%.*]], [[BB1]] ], [ 0.000000e+00, [[BB0]] ]
; OPT-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
; OPT-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <4 x float> [[LARGEPHI_INSERTSLICE0]], float [[TMP1]], i64 1
; OPT-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <4 x float> [[LARGEPHI_INSERTSLICE1]], float [[TMP2]], i64 2
; OPT-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <4 x float> [[LARGEPHI_INSERTSLICE2]], float [[TMP3]], i64 3
; OPT-NEXT: store <4 x float> [[LARGEPHI_INSERTSLICE3]], ptr [[PTR:%.*]], align 128
; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE1]] = extractelement <4 x float> [[VEC2:%.*]], i64 0
; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE4]] = extractelement <4 x float> [[VEC2]], i64 1
; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE7]] = extractelement <4 x float> [[VEC2]], i64 2
; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE10]] = extractelement <4 x float> [[VEC2]], i64 3
; OPT-NEXT: br i1 [[BOOL:%.*]], label [[BB1]], label [[BB2]]
; OPT: BB2:
; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE0]] = extractelement <4 x float> [[I]], i64 0
; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE3]] = extractelement <4 x float> [[I]], i64 1
; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE6]] = extractelement <4 x float> [[I]], i64 2
; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE9]] = extractelement <4 x float> [[I]], i64 3
; OPT-NEXT: br label [[BB1]]
;
; NOOPT-LABEL: @pr85718(
; NOOPT-NEXT: BB0:
; NOOPT-NEXT: [[I:%.*]] = insertelement <4 x float> [[VEC1:%.*]], float 4.200000e+01, i1 true
; NOOPT-NEXT: br label [[BB1:%.*]]
; NOOPT: BB1:
; NOOPT-NEXT: [[PHI:%.*]] = phi <4 x float> [ [[I]], [[BB2:%.*]] ], [ [[VEC2:%.*]], [[BB1]] ], [ zeroinitializer, [[BB0:%.*]] ]
; NOOPT-NEXT: store <4 x float> [[PHI]], ptr [[PTR:%.*]], align 128
; NOOPT-NEXT: br i1 [[BOOL:%.*]], label [[BB1]], label [[BB2]]
; NOOPT: BB2:
; NOOPT-NEXT: br label [[BB1]]
;
BB0:
%I = insertelement <4 x float> %Vec1, float 4.200000e+01, i1 true
br label %BB1

BB1: ; preds = %BB0, %BB1, %BB2
%PHI = phi <4 x float> [ %I, %BB2 ], [ %Vec2, %BB1 ], [ zeroinitializer, %BB0 ]
store <4 x float> %PHI, ptr %Ptr, align 128
br i1 %Bool, label %BB1, label %BB2

BB2: ; preds = %BB1
br label %BB1
}

0 comments on commit 4a026b5

Please sign in to comment.