|
5 | 5 | define <16 x i8> @load_v3i8(ptr %src) { |
6 | 6 | ; CHECK-LABEL: load_v3i8: |
7 | 7 | ; CHECK: ; %bb.0: |
8 | | -; CHECK-NEXT: sub sp, sp, #16 |
9 | | -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
10 | | -; CHECK-NEXT: ldrh w8, [x0] |
11 | | -; CHECK-NEXT: strh w8, [sp, #12] |
12 | | -; CHECK-NEXT: ldr s0, [sp, #12] |
13 | | -; CHECK-NEXT: ushll.8h v0, v0, #0 |
14 | | -; CHECK-NEXT: umov.h w8, v0[0] |
15 | | -; CHECK-NEXT: umov.h w9, v0[1] |
| 8 | +; CHECK-NEXT: ldrb w8, [x0, #2] |
| 9 | +; CHECK-NEXT: ldrh w9, [x0] |
| 10 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
16 | 11 | ; CHECK-NEXT: fmov s0, w8 |
17 | | -; CHECK-NEXT: add x8, x0, #2 |
18 | | -; CHECK-NEXT: mov.b v0[1], w9 |
19 | | -; CHECK-NEXT: ld1.b { v0 }[2], [x8] |
20 | | -; CHECK-NEXT: add sp, sp, #16 |
21 | 12 | ; CHECK-NEXT: ret |
22 | 13 | ; |
23 | 14 | ; BE-LABEL: load_v3i8: |
@@ -47,19 +38,14 @@ define <16 x i8> @load_v3i8(ptr %src) { |
47 | 38 | define <4 x i32> @load_v3i8_to_4xi32(ptr %src) { |
48 | 39 | ; CHECK-LABEL: load_v3i8_to_4xi32: |
49 | 40 | ; CHECK: ; %bb.0: |
50 | | -; CHECK-NEXT: sub sp, sp, #16 |
51 | | -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
52 | | -; CHECK-NEXT: ldrh w8, [x0] |
| 41 | +; CHECK-NEXT: ldrb w8, [x0, #2] |
| 42 | +; CHECK-NEXT: ldrh w9, [x0] |
53 | 43 | ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff |
54 | | -; CHECK-NEXT: strh w8, [sp, #12] |
55 | | -; CHECK-NEXT: ldr s0, [sp, #12] |
56 | | -; CHECK-NEXT: ldrsb w8, [x0, #2] |
57 | | -; CHECK-NEXT: ushll.8h v0, v0, #0 |
58 | | -; CHECK-NEXT: mov.h v0[1], v0[1] |
59 | | -; CHECK-NEXT: mov.h v0[2], w8 |
| 44 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
| 45 | +; CHECK-NEXT: fmov s0, w8 |
| 46 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
60 | 47 | ; CHECK-NEXT: ushll.4s v0, v0, #0 |
61 | 48 | ; CHECK-NEXT: and.16b v0, v0, v1 |
62 | | -; CHECK-NEXT: add sp, sp, #16 |
63 | 49 | ; CHECK-NEXT: ret |
64 | 50 | ; |
65 | 51 | ; BE-LABEL: load_v3i8_to_4xi32: |
@@ -90,19 +76,14 @@ define <4 x i32> @load_v3i8_to_4xi32(ptr %src) { |
90 | 76 | define <4 x i32> @load_v3i8_to_4xi32_align_2(ptr %src) { |
91 | 77 | ; CHECK-LABEL: load_v3i8_to_4xi32_align_2: |
92 | 78 | ; CHECK: ; %bb.0: |
93 | | -; CHECK-NEXT: sub sp, sp, #16 |
94 | | -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
95 | | -; CHECK-NEXT: ldrh w8, [x0] |
| 79 | +; CHECK-NEXT: ldrb w8, [x0, #2] |
| 80 | +; CHECK-NEXT: ldrh w9, [x0] |
96 | 81 | ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff |
97 | | -; CHECK-NEXT: strh w8, [sp, #12] |
98 | | -; CHECK-NEXT: ldr s0, [sp, #12] |
99 | | -; CHECK-NEXT: ldrsb w8, [x0, #2] |
100 | | -; CHECK-NEXT: ushll.8h v0, v0, #0 |
101 | | -; CHECK-NEXT: mov.h v0[1], v0[1] |
102 | | -; CHECK-NEXT: mov.h v0[2], w8 |
| 82 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
| 83 | +; CHECK-NEXT: fmov s0, w8 |
| 84 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
103 | 85 | ; CHECK-NEXT: ushll.4s v0, v0, #0 |
104 | 86 | ; CHECK-NEXT: and.16b v0, v0, v1 |
105 | | -; CHECK-NEXT: add sp, sp, #16 |
106 | 87 | ; CHECK-NEXT: ret |
107 | 88 | ; |
108 | 89 | ; BE-LABEL: load_v3i8_to_4xi32_align_2: |
@@ -160,19 +141,14 @@ define <4 x i32> @load_v3i8_to_4xi32_align_4(ptr %src) { |
160 | 141 | define <4 x i32> @load_v3i8_to_4xi32_const_offset_1(ptr %src) { |
161 | 142 | ; CHECK-LABEL: load_v3i8_to_4xi32_const_offset_1: |
162 | 143 | ; CHECK: ; %bb.0: |
163 | | -; CHECK-NEXT: sub sp, sp, #16 |
164 | | -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
165 | | -; CHECK-NEXT: ldurh w8, [x0, #1] |
| 144 | +; CHECK-NEXT: ldrb w8, [x0, #3] |
| 145 | +; CHECK-NEXT: ldurh w9, [x0, #1] |
166 | 146 | ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff |
167 | | -; CHECK-NEXT: strh w8, [sp, #12] |
168 | | -; CHECK-NEXT: ldr s0, [sp, #12] |
169 | | -; CHECK-NEXT: ldrsb w8, [x0, #3] |
170 | | -; CHECK-NEXT: ushll.8h v0, v0, #0 |
171 | | -; CHECK-NEXT: mov.h v0[1], v0[1] |
172 | | -; CHECK-NEXT: mov.h v0[2], w8 |
| 147 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
| 148 | +; CHECK-NEXT: fmov s0, w8 |
| 149 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
173 | 150 | ; CHECK-NEXT: ushll.4s v0, v0, #0 |
174 | 151 | ; CHECK-NEXT: and.16b v0, v0, v1 |
175 | | -; CHECK-NEXT: add sp, sp, #16 |
176 | 152 | ; CHECK-NEXT: ret |
177 | 153 | ; |
178 | 154 | ; BE-LABEL: load_v3i8_to_4xi32_const_offset_1: |
@@ -204,19 +180,14 @@ define <4 x i32> @load_v3i8_to_4xi32_const_offset_1(ptr %src) { |
204 | 180 | define <4 x i32> @load_v3i8_to_4xi32_const_offset_3(ptr %src) { |
205 | 181 | ; CHECK-LABEL: load_v3i8_to_4xi32_const_offset_3: |
206 | 182 | ; CHECK: ; %bb.0: |
207 | | -; CHECK-NEXT: sub sp, sp, #16 |
208 | | -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
209 | | -; CHECK-NEXT: ldurh w8, [x0, #3] |
| 183 | +; CHECK-NEXT: ldrb w8, [x0, #5] |
| 184 | +; CHECK-NEXT: ldurh w9, [x0, #3] |
210 | 185 | ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff |
211 | | -; CHECK-NEXT: strh w8, [sp, #12] |
212 | | -; CHECK-NEXT: ldr s0, [sp, #12] |
213 | | -; CHECK-NEXT: ldrsb w8, [x0, #5] |
214 | | -; CHECK-NEXT: ushll.8h v0, v0, #0 |
215 | | -; CHECK-NEXT: mov.h v0[1], v0[1] |
216 | | -; CHECK-NEXT: mov.h v0[2], w8 |
| 186 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
| 187 | +; CHECK-NEXT: fmov s0, w8 |
| 188 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
217 | 189 | ; CHECK-NEXT: ushll.4s v0, v0, #0 |
218 | 190 | ; CHECK-NEXT: and.16b v0, v0, v1 |
219 | | -; CHECK-NEXT: add sp, sp, #16 |
220 | 191 | ; CHECK-NEXT: ret |
221 | 192 | ; |
222 | 193 | ; BE-LABEL: load_v3i8_to_4xi32_const_offset_3: |
@@ -348,18 +319,14 @@ define <3 x i32> @load_v3i32(ptr %src) { |
348 | 319 | define <3 x i32> @load_v3i8_zext_to_3xi32(ptr %src) { |
349 | 320 | ; CHECK-LABEL: load_v3i8_zext_to_3xi32: |
350 | 321 | ; CHECK: ; %bb.0: |
351 | | -; CHECK-NEXT: sub sp, sp, #16 |
352 | | -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
353 | | -; CHECK-NEXT: ldrh w8, [x0] |
| 322 | +; CHECK-NEXT: ldrb w8, [x0, #2] |
| 323 | +; CHECK-NEXT: ldrh w9, [x0] |
354 | 324 | ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff |
355 | | -; CHECK-NEXT: strh w8, [sp, #12] |
356 | | -; CHECK-NEXT: add x8, x0, #2 |
357 | | -; CHECK-NEXT: ldr s0, [sp, #12] |
358 | | -; CHECK-NEXT: ushll.8h v0, v0, #0 |
359 | | -; CHECK-NEXT: ld1.b { v0 }[4], [x8] |
| 325 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
| 326 | +; CHECK-NEXT: fmov s0, w8 |
| 327 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
360 | 328 | ; CHECK-NEXT: ushll.4s v0, v0, #0 |
361 | 329 | ; CHECK-NEXT: and.16b v0, v0, v1 |
362 | | -; CHECK-NEXT: add sp, sp, #16 |
363 | 330 | ; CHECK-NEXT: ret |
364 | 331 | ; |
365 | 332 | ; BE-LABEL: load_v3i8_zext_to_3xi32: |
@@ -388,18 +355,14 @@ define <3 x i32> @load_v3i8_zext_to_3xi32(ptr %src) { |
388 | 355 | define <3 x i32> @load_v3i8_sext_to_3xi32(ptr %src) { |
389 | 356 | ; CHECK-LABEL: load_v3i8_sext_to_3xi32: |
390 | 357 | ; CHECK: ; %bb.0: |
391 | | -; CHECK-NEXT: sub sp, sp, #16 |
392 | | -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
393 | | -; CHECK-NEXT: ldrh w8, [x0] |
394 | | -; CHECK-NEXT: strh w8, [sp, #12] |
395 | | -; CHECK-NEXT: add x8, x0, #2 |
396 | | -; CHECK-NEXT: ldr s0, [sp, #12] |
397 | | -; CHECK-NEXT: ushll.8h v0, v0, #0 |
398 | | -; CHECK-NEXT: ld1.b { v0 }[4], [x8] |
| 358 | +; CHECK-NEXT: ldrb w8, [x0, #2] |
| 359 | +; CHECK-NEXT: ldrh w9, [x0] |
| 360 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
| 361 | +; CHECK-NEXT: fmov s0, w8 |
| 362 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
399 | 363 | ; CHECK-NEXT: ushll.4s v0, v0, #0 |
400 | 364 | ; CHECK-NEXT: shl.4s v0, v0, #24 |
401 | 365 | ; CHECK-NEXT: sshr.4s v0, v0, #24 |
402 | | -; CHECK-NEXT: add sp, sp, #16 |
403 | 366 | ; CHECK-NEXT: ret |
404 | 367 | ; |
405 | 368 | ; BE-LABEL: load_v3i8_sext_to_3xi32: |
@@ -513,19 +476,15 @@ entry: |
513 | 476 | define void @load_ext_to_64bits(ptr %src, ptr %dst) { |
514 | 477 | ; CHECK-LABEL: load_ext_to_64bits: |
515 | 478 | ; CHECK: ; %bb.0: ; %entry |
516 | | -; CHECK-NEXT: sub sp, sp, #16 |
517 | | -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
518 | | -; CHECK-NEXT: ldrh w8, [x0] |
519 | | -; CHECK-NEXT: strh w8, [sp, #12] |
520 | | -; CHECK-NEXT: add x8, x0, #2 |
521 | | -; CHECK-NEXT: ldr s0, [sp, #12] |
522 | | -; CHECK-NEXT: ushll.8h v0, v0, #0 |
523 | | -; CHECK-NEXT: ld1.b { v0 }[4], [x8] |
| 479 | +; CHECK-NEXT: ldrb w8, [x0, #2] |
| 480 | +; CHECK-NEXT: ldrh w9, [x0] |
| 481 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
| 482 | +; CHECK-NEXT: fmov s0, w8 |
524 | 483 | ; CHECK-NEXT: add x8, x1, #4 |
| 484 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
525 | 485 | ; CHECK-NEXT: bic.4h v0, #255, lsl #8 |
526 | 486 | ; CHECK-NEXT: st1.h { v0 }[2], [x8] |
527 | 487 | ; CHECK-NEXT: str s0, [x1] |
528 | | -; CHECK-NEXT: add sp, sp, #16 |
529 | 488 | ; CHECK-NEXT: ret |
530 | 489 | ; |
531 | 490 | ; BE-LABEL: load_ext_to_64bits: |
@@ -614,24 +573,20 @@ entry: |
614 | 573 | define void @load_ext_add_to_64bits(ptr %src, ptr %dst) { |
615 | 574 | ; CHECK-LABEL: load_ext_add_to_64bits: |
616 | 575 | ; CHECK: ; %bb.0: ; %entry |
617 | | -; CHECK-NEXT: sub sp, sp, #16 |
618 | | -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
619 | | -; CHECK-NEXT: ldrh w9, [x0] |
| 576 | +; CHECK-NEXT: ldrb w9, [x0, #2] |
| 577 | +; CHECK-NEXT: ldrh w10, [x0] |
620 | 578 | ; CHECK-NEXT: Lloh2: |
621 | 579 | ; CHECK-NEXT: adrp x8, lCPI15_0@PAGE |
622 | 580 | ; CHECK-NEXT: Lloh3: |
623 | 581 | ; CHECK-NEXT: ldr d1, [x8, lCPI15_0@PAGEOFF] |
624 | 582 | ; CHECK-NEXT: add x8, x1, #4 |
625 | | -; CHECK-NEXT: strh w9, [sp, #12] |
626 | | -; CHECK-NEXT: add x9, x0, #2 |
627 | | -; CHECK-NEXT: ldr s0, [sp, #12] |
628 | | -; CHECK-NEXT: ushll.8h v0, v0, #0 |
629 | | -; CHECK-NEXT: ld1.b { v0 }[4], [x9] |
| 583 | +; CHECK-NEXT: orr w9, w10, w9, lsl #16 |
| 584 | +; CHECK-NEXT: fmov s0, w9 |
| 585 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
630 | 586 | ; CHECK-NEXT: bic.4h v0, #255, lsl #8 |
631 | 587 | ; CHECK-NEXT: add.4h v0, v0, v1 |
632 | 588 | ; CHECK-NEXT: st1.h { v0 }[2], [x8] |
633 | 589 | ; CHECK-NEXT: str s0, [x1] |
634 | | -; CHECK-NEXT: add sp, sp, #16 |
635 | 590 | ; CHECK-NEXT: ret |
636 | 591 | ; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh3 |
637 | 592 | ; |
@@ -880,24 +835,21 @@ define void @shift_trunc_volatile_store(ptr %src, ptr %dst) { |
880 | 835 | define void @load_v3i8_zext_to_3xi32_add_trunc_store(ptr %src) { |
881 | 836 | ; CHECK-LABEL: load_v3i8_zext_to_3xi32_add_trunc_store: |
882 | 837 | ; CHECK: ; %bb.0: |
883 | | -; CHECK-NEXT: sub sp, sp, #16 |
884 | | -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
885 | | -; CHECK-NEXT: ldrh w9, [x0] |
| 838 | +; CHECK-NEXT: ldrb w9, [x0, #2] |
| 839 | +; CHECK-NEXT: ldrh w10, [x0] |
886 | 840 | ; CHECK-NEXT: Lloh4: |
887 | 841 | ; CHECK-NEXT: adrp x8, lCPI22_0@PAGE |
888 | 842 | ; CHECK-NEXT: Lloh5: |
889 | 843 | ; CHECK-NEXT: ldr q1, [x8, lCPI22_0@PAGEOFF] |
890 | | -; CHECK-NEXT: add x8, x0, #1 |
891 | | -; CHECK-NEXT: strh w9, [sp, #12] |
892 | | -; CHECK-NEXT: add x9, x0, #2 |
893 | | -; CHECK-NEXT: ldr s0, [sp, #12] |
894 | | -; CHECK-NEXT: ushll.8h v0, v0, #0 |
895 | | -; CHECK-NEXT: ld1.b { v0 }[4], [x9] |
| 844 | +; CHECK-NEXT: add x8, x0, #2 |
| 845 | +; CHECK-NEXT: orr w9, w10, w9, lsl #16 |
| 846 | +; CHECK-NEXT: fmov s0, w9 |
| 847 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
896 | 848 | ; CHECK-NEXT: uaddw.4s v0, v1, v0 |
897 | | -; CHECK-NEXT: st1.b { v0 }[4], [x8] |
898 | | -; CHECK-NEXT: st1.b { v0 }[8], [x9] |
| 849 | +; CHECK-NEXT: st1.b { v0 }[8], [x8] |
| 850 | +; CHECK-NEXT: add x8, x0, #1 |
899 | 851 | ; CHECK-NEXT: st1.b { v0 }[0], [x0] |
900 | | -; CHECK-NEXT: add sp, sp, #16 |
| 852 | +; CHECK-NEXT: st1.b { v0 }[4], [x8] |
901 | 853 | ; CHECK-NEXT: ret |
902 | 854 | ; CHECK-NEXT: .loh AdrpLdr Lloh4, Lloh5 |
903 | 855 | ; |
@@ -936,24 +888,21 @@ define void @load_v3i8_zext_to_3xi32_add_trunc_store(ptr %src) { |
936 | 888 | define void @load_v3i8_sext_to_3xi32_add_trunc_store(ptr %src) { |
937 | 889 | ; CHECK-LABEL: load_v3i8_sext_to_3xi32_add_trunc_store: |
938 | 890 | ; CHECK: ; %bb.0: |
939 | | -; CHECK-NEXT: sub sp, sp, #16 |
940 | | -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
941 | | -; CHECK-NEXT: ldrh w9, [x0] |
| 891 | +; CHECK-NEXT: ldrb w9, [x0, #2] |
| 892 | +; CHECK-NEXT: ldrh w10, [x0] |
942 | 893 | ; CHECK-NEXT: Lloh6: |
943 | 894 | ; CHECK-NEXT: adrp x8, lCPI23_0@PAGE |
944 | 895 | ; CHECK-NEXT: Lloh7: |
945 | 896 | ; CHECK-NEXT: ldr q1, [x8, lCPI23_0@PAGEOFF] |
946 | | -; CHECK-NEXT: add x8, x0, #1 |
947 | | -; CHECK-NEXT: strh w9, [sp, #12] |
948 | | -; CHECK-NEXT: add x9, x0, #2 |
949 | | -; CHECK-NEXT: ldr s0, [sp, #12] |
950 | | -; CHECK-NEXT: ushll.8h v0, v0, #0 |
951 | | -; CHECK-NEXT: ld1.b { v0 }[4], [x9] |
| 897 | +; CHECK-NEXT: add x8, x0, #2 |
| 898 | +; CHECK-NEXT: orr w9, w10, w9, lsl #16 |
| 899 | +; CHECK-NEXT: fmov s0, w9 |
| 900 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
952 | 901 | ; CHECK-NEXT: uaddw.4s v0, v1, v0 |
953 | | -; CHECK-NEXT: st1.b { v0 }[4], [x8] |
954 | | -; CHECK-NEXT: st1.b { v0 }[8], [x9] |
| 902 | +; CHECK-NEXT: st1.b { v0 }[8], [x8] |
| 903 | +; CHECK-NEXT: add x8, x0, #1 |
955 | 904 | ; CHECK-NEXT: st1.b { v0 }[0], [x0] |
956 | | -; CHECK-NEXT: add sp, sp, #16 |
| 905 | +; CHECK-NEXT: st1.b { v0 }[4], [x8] |
957 | 906 | ; CHECK-NEXT: ret |
958 | 907 | ; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh7 |
959 | 908 | ; |
|
0 commit comments