diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index ecd4451c50472..1437f3d58b5e7 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1695,7 +1695,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl( if (!Subtarget->hasUnalignedDSAccessEnabled() && Alignment < Align(4)) return false; - Align RequiredAlignment(PowerOf2Ceil(Size/8)); // Natural alignment. + Align RequiredAlignment( + PowerOf2Ceil(divideCeil(Size, 8))); // Natural alignment. if (Subtarget->hasLDSMisalignedBug() && Size > 32 && Alignment < RequiredAlignment) return false; diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i1.ll b/llvm/test/CodeGen/AMDGPU/load-local-i1.ll index 578170941efaa..43d102e4655b2 100644 --- a/llvm/test/CodeGen/AMDGPU/load-local-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/load-local-i1.ll @@ -462,4 +462,17 @@ define amdgpu_kernel void @local_sextload_v64i1_to_v64i64(ptr addrspace(3) %out, ret void } +; FUNC-LABEL: {{^}}local_load_i1_misaligned: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 +define amdgpu_kernel void @local_load_i1_misaligned(ptr addrspace(3) %in, ptr addrspace (3) %out) #0 { + %in.gep.1 = getelementptr i1, ptr addrspace(3) %in, i32 1 + %load.1 = load <16 x i1>, ptr addrspace(3) %in.gep.1, align 4 + %load.2 = load <8 x i1>, ptr addrspace(3) %in, align 1 + %out.gep.1 = getelementptr i1, ptr addrspace(3) %out, i32 16 + store <16 x i1> %load.1, ptr addrspace(3) %out + store <8 x i1> %load.2, ptr addrspace(3) %out.gep.1 + ret void +} + attributes #0 = { nounwind } diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/load-i1-misaligned.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/load-i1-misaligned.ll new file mode 100644 index 0000000000000..6f3d2cb69090e --- /dev/null +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/load-i1-misaligned.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -mtriple=amdgcn-amd-amdhsa --mcpu=gfx940 -passes=load-store-vectorizer -S -o - %s | FileCheck %s + +; Don't crash when checking for misaligned accesses with sub-byte size. + +define void @misaligned_access_i1(ptr addrspace(3) %in) #0 { +; CHECK-LABEL: define void @misaligned_access_i1( +; CHECK-SAME: ptr addrspace(3) [[IN:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[IN_GEP_1:%.*]] = getelementptr i1, ptr addrspace(3) [[IN]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i1>, ptr addrspace(3) [[IN_GEP_1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i1>, ptr addrspace(3) [[IN]], align 1 +; CHECK-NEXT: ret void +; + %in.gep.1 = getelementptr i1, ptr addrspace(3) %in, i32 1 + + %1 = load <16 x i1>, ptr addrspace(3) %in.gep.1, align 4 + %2 = load <8 x i1>, ptr addrspace(3) %in, align 1 + ret void +} +