Skip to content

Commit

Permalink
[AMDGPU] Fix crash in allowsMisalignedMemoryAccesses with i1 (#105794)
Browse files Browse the repository at this point in the history
  • Loading branch information
kerbowa authored Aug 23, 2024
1 parent caa844e commit ceb587a
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 1 deletion.
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1695,7 +1695,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
if (!Subtarget->hasUnalignedDSAccessEnabled() && Alignment < Align(4))
return false;

Align RequiredAlignment(PowerOf2Ceil(Size/8)); // Natural alignment.
Align RequiredAlignment(
PowerOf2Ceil(divideCeil(Size, 8))); // Natural alignment.
if (Subtarget->hasLDSMisalignedBug() && Size > 32 &&
Alignment < RequiredAlignment)
return false;
Expand Down
13 changes: 13 additions & 0 deletions llvm/test/CodeGen/AMDGPU/load-local-i1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -462,4 +462,17 @@ define amdgpu_kernel void @local_sextload_v64i1_to_v64i64(ptr addrspace(3) %out,
ret void
}

; FUNC-LABEL: {{^}}local_load_i1_misaligned:
; SICIVI: s_mov_b32 m0
; GFX9-NOT: m0
define amdgpu_kernel void @local_load_i1_misaligned(ptr addrspace(3) %in, ptr addrspace (3) %out) #0 {
%in.gep.1 = getelementptr i1, ptr addrspace(3) %in, i32 1
%load.1 = load <16 x i1>, ptr addrspace(3) %in.gep.1, align 4
%load.2 = load <8 x i1>, ptr addrspace(3) %in, align 1
%out.gep.1 = getelementptr i1, ptr addrspace(3) %out, i32 16
store <16 x i1> %load.1, ptr addrspace(3) %out
store <8 x i1> %load.2, ptr addrspace(3) %out.gep.1
ret void
}

attributes #0 = { nounwind }
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -mtriple=amdgcn-amd-amdhsa --mcpu=gfx940 -passes=load-store-vectorizer -S -o - %s | FileCheck %s

; Don't crash when checking for misaligned accesses with sub-byte size.

define void @misaligned_access_i1(ptr addrspace(3) %in) #0 {
; CHECK-LABEL: define void @misaligned_access_i1(
; CHECK-SAME: ptr addrspace(3) [[IN:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[IN_GEP_1:%.*]] = getelementptr i1, ptr addrspace(3) [[IN]], i32 1
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i1>, ptr addrspace(3) [[IN_GEP_1]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i1>, ptr addrspace(3) [[IN]], align 1
; CHECK-NEXT: ret void
;
%in.gep.1 = getelementptr i1, ptr addrspace(3) %in, i32 1

%1 = load <16 x i1>, ptr addrspace(3) %in.gep.1, align 4
%2 = load <8 x i1>, ptr addrspace(3) %in, align 1
ret void
}

0 comments on commit ceb587a

Please sign in to comment.