Skip to content

Commit 8f6dc44

Browse files
committed
Check if strict alignment is required
1 parent 9628541 commit 8f6dc44

File tree

5 files changed

+66
-27
lines changed

5 files changed

+66
-27
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,8 @@ def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
393393
SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
394394
SDTCisInt<1>]>>;
395395

396+
def AllowMisalignedMemAccesses : Predicate<"!Subtarget->requiresStrictAlign()">;
397+
396398

397399
//===----------------------------------------------------------------------===//
398400
// AArch64-specific DAG Nodes.

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2977,14 +2977,25 @@ let Predicates = [HasSVE_or_SME] in {
29772977
// Allow using the reg+reg form of ld1b/st1b for memory accesses with the
29782978
// same width as nxv16i8. This saves an add in cases where we would
29792979
// otherwise compute the address separately.
2980-
// Also allow using LDR/STR to avoid the predicate dependence.
29812980
multiclass unpred_loadstore_bitcast<ValueType Ty> {
29822981
let Predicates = [IsLE] in {
29832982
def : Pat<(Ty (load (am_sve_regreg_lsl0 GPR64sp:$base, GPR64:$offset))),
29842983
(LD1B (PTRUE_B 31), GPR64sp:$base, GPR64:$offset)>;
29852984
def : Pat<(store Ty:$val, (am_sve_regreg_lsl0 GPR64sp:$base, GPR64:$offset)),
29862985
(ST1B ZPR:$val, (PTRUE_B 31), GPR64sp:$base, GPR64:$offset)>;
2986+
}
2987+
}
2988+
defm : unpred_loadstore_bitcast<nxv8i16>;
2989+
defm : unpred_loadstore_bitcast<nxv8f16>;
2990+
defm : unpred_loadstore_bitcast<nxv8bf16>;
2991+
defm : unpred_loadstore_bitcast<nxv4f32>;
2992+
defm : unpred_loadstore_bitcast<nxv4i32>;
2993+
defm : unpred_loadstore_bitcast<nxv2i64>;
2994+
defm : unpred_loadstore_bitcast<nxv2f64>;
29872995

2996+
// Allow using LDR/STR to avoid the predicate dependence.
2997+
let Predicates = [IsLE, AllowMisalignedMemAccesses] in
2998+
foreach Ty = [ nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv8f16, nxv4f32, nxv2f64, nxv8bf16 ] in {
29882999
let AddedComplexity = 2 in {
29893000
def : Pat<(Ty (load (am_sve_indexed_s9 GPR64sp:$base, simm9:$offset))),
29903001
(LDR_ZXI GPR64sp:$base, simm9:$offset)>;
@@ -2997,15 +3008,6 @@ let Predicates = [HasSVE_or_SME] in {
29973008
def : Pat<(store Ty:$val, GPR64sp:$base),
29983009
(STR_ZXI ZPR:$val, GPR64sp:$base, (i64 0))>;
29993010
}
3000-
}
3001-
defm : unpred_loadstore_bitcast<nxv16i8>;
3002-
defm : unpred_loadstore_bitcast<nxv8i16>;
3003-
defm : unpred_loadstore_bitcast<nxv8f16>;
3004-
defm : unpred_loadstore_bitcast<nxv8bf16>;
3005-
defm : unpred_loadstore_bitcast<nxv4f32>;
3006-
defm : unpred_loadstore_bitcast<nxv4i32>;
3007-
defm : unpred_loadstore_bitcast<nxv2i64>;
3008-
defm : unpred_loadstore_bitcast<nxv2f64>;
30093011

30103012
multiclass unpred_store_predicate<ValueType Ty, Instruction Store> {
30113013
def _fi : Pat<(store (Ty PPR:$val), (am_sve_fi GPR64sp:$base, simm9:$offset)),

llvm/test/CodeGen/AArch64/nontemporal-load.ll

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -621,11 +621,12 @@ define <vscale x 20 x float> @test_ldnp_v20f32_vscale(ptr %A) {
621621
;
622622
; CHECK-BE-LABEL: test_ldnp_v20f32_vscale:
623623
; CHECK-BE: // %bb.0:
624-
; CHECK-BE-NEXT: ldr z0, [x0]
625-
; CHECK-BE-NEXT: ldr z1, [x0, #1, mul vl]
626-
; CHECK-BE-NEXT: ldr z2, [x0, #2, mul vl]
627-
; CHECK-BE-NEXT: ldr z3, [x0, #3, mul vl]
628-
; CHECK-BE-NEXT: ldr z4, [x0, #4, mul vl]
624+
; CHECK-BE-NEXT: ptrue p0.s
625+
; CHECK-BE-NEXT: ld1w { z0.s }, p0/z, [x0]
626+
; CHECK-BE-NEXT: ld1w { z1.s }, p0/z, [x0, #1, mul vl]
627+
; CHECK-BE-NEXT: ld1w { z2.s }, p0/z, [x0, #2, mul vl]
628+
; CHECK-BE-NEXT: ld1w { z3.s }, p0/z, [x0, #3, mul vl]
629+
; CHECK-BE-NEXT: ld1w { z4.s }, p0/z, [x0, #4, mul vl]
629630
; CHECK-BE-NEXT: ret
630631
%lv = load<vscale x 20 x float>, ptr %A, align 8, !nontemporal !0
631632
ret <vscale x 20 x float> %lv

llvm/test/CodeGen/AArch64/sve-load-store-strict-align.ll

Lines changed: 43 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,86 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2-
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck --check-prefix CHECK-NO-STRICT-ALIGN %s
33
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+strict-align < %s | FileCheck %s
44

55
define void @nxv16i8(ptr %ldptr, ptr %stptr) {
6+
; CHECK-NO-STRICT-ALIGN-LABEL: nxv16i8:
7+
; CHECK-NO-STRICT-ALIGN: // %bb.0:
8+
; CHECK-NO-STRICT-ALIGN-NEXT: ldr z0, [x0]
9+
; CHECK-NO-STRICT-ALIGN-NEXT: str z0, [x1]
10+
; CHECK-NO-STRICT-ALIGN-NEXT: ret
11+
;
612
; CHECK-LABEL: nxv16i8:
713
; CHECK: // %bb.0:
8-
; CHECK-NEXT: ldr z0, [x0]
9-
; CHECK-NEXT: str z0, [x1]
14+
; CHECK-NEXT: ptrue p0.b
15+
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
16+
; CHECK-NEXT: st1b { z0.b }, p0, [x1]
1017
; CHECK-NEXT: ret
1118
%l3 = load <vscale x 16 x i8>, ptr %ldptr, align 1
1219
store <vscale x 16 x i8> %l3, ptr %stptr, align 1
1320
ret void
1421
}
1522

1623
define void @nxv8i16(ptr %ldptr, ptr %stptr) {
24+
; CHECK-NO-STRICT-ALIGN-LABEL: nxv8i16:
25+
; CHECK-NO-STRICT-ALIGN: // %bb.0:
26+
; CHECK-NO-STRICT-ALIGN-NEXT: ldr z0, [x0]
27+
; CHECK-NO-STRICT-ALIGN-NEXT: str z0, [x1]
28+
; CHECK-NO-STRICT-ALIGN-NEXT: ret
29+
;
1730
; CHECK-LABEL: nxv8i16:
1831
; CHECK: // %bb.0:
19-
; CHECK-NEXT: ldr z0, [x0]
20-
; CHECK-NEXT: str z0, [x1]
32+
; CHECK-NEXT: ptrue p0.h
33+
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
34+
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
2135
; CHECK-NEXT: ret
2236
%l3 = load <vscale x 8 x i16>, ptr %ldptr, align 2
2337
store <vscale x 8 x i16> %l3, ptr %stptr, align 2
2438
ret void
2539
}
2640

2741
define void @nxv4i32(ptr %ldptr, ptr %stptr) {
42+
; CHECK-NO-STRICT-ALIGN-LABEL: nxv4i32:
43+
; CHECK-NO-STRICT-ALIGN: // %bb.0:
44+
; CHECK-NO-STRICT-ALIGN-NEXT: ldr z0, [x0]
45+
; CHECK-NO-STRICT-ALIGN-NEXT: str z0, [x1]
46+
; CHECK-NO-STRICT-ALIGN-NEXT: ret
47+
;
2848
; CHECK-LABEL: nxv4i32:
2949
; CHECK: // %bb.0:
30-
; CHECK-NEXT: ldr z0, [x0]
31-
; CHECK-NEXT: str z0, [x1]
50+
; CHECK-NEXT: ptrue p0.s
51+
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
52+
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
3253
; CHECK-NEXT: ret
3354
%l3 = load <vscale x 4 x i32>, ptr %ldptr, align 4
3455
store <vscale x 4 x i32> %l3, ptr %stptr, align 4
3556
ret void
3657
}
3758

3859
define void @nxv2i64(ptr %ldptr, ptr %stptr) {
60+
; CHECK-NO-STRICT-ALIGN-LABEL: nxv2i64:
61+
; CHECK-NO-STRICT-ALIGN: // %bb.0:
62+
; CHECK-NO-STRICT-ALIGN-NEXT: ldr z0, [x0]
63+
; CHECK-NO-STRICT-ALIGN-NEXT: str z0, [x1]
64+
; CHECK-NO-STRICT-ALIGN-NEXT: ret
65+
;
3966
; CHECK-LABEL: nxv2i64:
4067
; CHECK: // %bb.0:
41-
; CHECK-NEXT: ldr z0, [x0]
42-
; CHECK-NEXT: str z0, [x1]
68+
; CHECK-NEXT: ptrue p0.d
69+
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
70+
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
4371
; CHECK-NEXT: ret
4472
%l3 = load <vscale x 2 x i64>, ptr %ldptr, align 8
4573
store <vscale x 2 x i64> %l3, ptr %stptr, align 8
4674
ret void
4775
}
4876

4977
define void @nxv16i1(ptr %ldptr, ptr %stptr) {
78+
; CHECK-NO-STRICT-ALIGN-LABEL: nxv16i1:
79+
; CHECK-NO-STRICT-ALIGN: // %bb.0:
80+
; CHECK-NO-STRICT-ALIGN-NEXT: ldr p0, [x0]
81+
; CHECK-NO-STRICT-ALIGN-NEXT: str p0, [x1]
82+
; CHECK-NO-STRICT-ALIGN-NEXT: ret
83+
;
5084
; CHECK-LABEL: nxv16i1:
5185
; CHECK: // %bb.0:
5286
; CHECK-NEXT: ldr p0, [x0]

llvm/test/CodeGen/AArch64/zext-to-tbl.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2006,9 +2006,9 @@ define void @zext_v16i8_to_v16i32_in_loop_scalable_vectors(ptr %src, ptr %dst) {
20062006
; CHECK-BE-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
20072007
; CHECK-BE-NEXT: add x8, x8, #16
20082008
; CHECK-BE-NEXT: cmp x8, #128
2009-
; CHECK-BE-NEXT: str z1, [x9, #2, mul vl]
2010-
; CHECK-BE-NEXT: str z2, [x9, #3, mul vl]
2011-
; CHECK-BE-NEXT: str z3, [x9, #1, mul vl]
2009+
; CHECK-BE-NEXT: st1w { z1.s }, p0, [x9, #2, mul vl]
2010+
; CHECK-BE-NEXT: st1w { z2.s }, p0, [x9, #3, mul vl]
2011+
; CHECK-BE-NEXT: st1w { z3.s }, p0, [x9, #1, mul vl]
20122012
; CHECK-BE-NEXT: b.ne .LBB19_1
20132013
; CHECK-BE-NEXT: // %bb.2: // %exit
20142014
; CHECK-BE-NEXT: ret

0 commit comments

Comments
 (0)