Skip to content

[AArch64] Enable aggressivelyPreferBuildVectorSources #142729

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from

Conversation

davemgreen
Copy link
Collaborator

This helps to remove some inefficient buildvector lowering by converting extract_vector_elt(buildvector) to the original source. It seems to be a general improvement.

This helps to remove some inefficient buildvector lowering by converting
extract_vector_elt(buildvector) to the original source. It seems to be a
general improvement.
@llvmbot
Copy link
Member

llvmbot commented Jun 4, 2025

@llvm/pr-subscribers-llvm-globalisel

Author: David Green (davemgreen)

Changes

This helps to remove some inefficient buildvector lowering by converting extract_vector_elt(buildvector) to the original source. It seems to be a general improvement.


Patch is 328.02 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/142729.diff

16 Files Affected:

  • (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.h (+4)
  • (modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir (+2-6)
  • (modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir (+1-3)
  • (modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir (+1-1)
  • (modified) llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll (+271-305)
  • (modified) llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll (+177-211)
  • (modified) llvm/test/CodeGen/AArch64/fptrunc.ll (+9-19)
  • (modified) llvm/test/CodeGen/AArch64/itofp.ll (+112-108)
  • (modified) llvm/test/CodeGen/AArch64/sext.ll (+2-4)
  • (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll (+7-12)
  • (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll (+78-182)
  • (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll (+84-150)
  • (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll (+1343-2613)
  • (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll (+140-236)
  • (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll (+7-13)
  • (modified) llvm/test/CodeGen/AArch64/zext-to-tbl.ll (+78-89)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index b2174487c2fe8..f193fa05dd161 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -449,6 +449,10 @@ class AArch64TargetLowering : public TargetLowering {
   /// Enable aggressive FMA fusion on targets that want it.
   bool enableAggressiveFMAFusion(EVT VT) const override;
 
+  bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override {
+    return true;
+  }
+
   /// Returns the size of the platform's va_list object.
   unsigned getVaListSizeInBits(const DataLayout &DL) const override;
 
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir
index 93f6051c3bd3b..5189582d0b6ac 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir
@@ -55,13 +55,9 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %arg1:_(s64) = COPY $x0
     ; CHECK-NEXT: %arg2:_(s64) = COPY $x1
-    ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
-    ; CHECK-NEXT: %one:_(s64) = G_CONSTANT i64 1
     ; CHECK-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
-    ; CHECK-NEXT: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s64)
-    ; CHECK-NEXT: %extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s64)
-    ; CHECK-NEXT: $x0 = COPY %extract(s64)
-    ; CHECK-NEXT: $x1 = COPY %extract2(s64)
+    ; CHECK-NEXT: $x0 = COPY %arg1(s64)
+    ; CHECK-NEXT: $x1 = COPY %arg2(s64)
     ; CHECK-NEXT: $q0 = COPY %bv(<2 x s64>)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %arg1:_(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
index e2933690c7c55..e81447a1de4b6 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
@@ -175,10 +175,8 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %arg1:_(s64) = COPY $x0
     ; CHECK-NEXT: %arg2:_(s64) = COPY $x1
-    ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
     ; CHECK-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
-    ; CHECK-NEXT: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s64)
-    ; CHECK-NEXT: $x0 = COPY %extract(s64)
+    ; CHECK-NEXT: $x0 = COPY %arg1(s64)
     ; CHECK-NEXT: $q0 = COPY %bv(<2 x s64>)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %arg1:_(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir
index c000a8e635bc6..86c0575961a17 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir
@@ -278,8 +278,8 @@ body:             |
     ; CHECK: liveins: $x0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 127
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
     ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s8>) = G_BUILD_VECTOR [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8)
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
     ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s8>), [[COPY]](p0) :: (store (<32 x s8>))
     ; CHECK-NEXT: RET_ReallyLR
     %3:_(s8) = G_CONSTANT i8 127
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index a33b1ef569fc3..04dfdedb42752 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -726,7 +726,7 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
 ; CHECK-SD-LABEL: test_signed_v3f128_v3i32:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    sub sp, sp, #128
-; CHECK-SD-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp x30, x23, [sp, #80] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    stp x22, x21, [sp, #96] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    stp x20, x19, [sp, #112] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 128
@@ -734,13 +734,13 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset w20, -16
 ; CHECK-SD-NEXT:    .cfi_offset w21, -24
 ; CHECK-SD-NEXT:    .cfi_offset w22, -32
+; CHECK-SD-NEXT:    .cfi_offset w23, -40
 ; CHECK-SD-NEXT:    .cfi_offset w30, -48
-; CHECK-SD-NEXT:    stp q0, q2, [sp, #48] // 32-byte Folded Spill
-; CHECK-SD-NEXT:    mov v2.16b, v1.16b
+; CHECK-SD-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    adrp x8, .LCPI16_0
-; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI16_0]
 ; CHECK-SD-NEXT:    mov v0.16b, v2.16b
+; CHECK-SD-NEXT:    stp q2, q1, [sp, #32] // 32-byte Folded Spill
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI16_0]
 ; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __getf2
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
@@ -755,15 +755,15 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
 ; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __gttf2
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    mov w21, #2147483647 // =0x7fffffff
+; CHECK-SD-NEXT:    mov w22, #2147483647 // =0x7fffffff
 ; CHECK-SD-NEXT:    cmp w0, #0
-; CHECK-SD-NEXT:    csel w19, w21, w19, gt
+; CHECK-SD-NEXT:    csel w19, w22, w19, gt
 ; CHECK-SD-NEXT:    mov v1.16b, v0.16b
 ; CHECK-SD-NEXT:    bl __unordtf2
 ; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    cmp w0, #0
-; CHECK-SD-NEXT:    csel w22, wzr, w19, ne
+; CHECK-SD-NEXT:    csel w21, wzr, w19, ne
 ; CHECK-SD-NEXT:    bl __getf2
 ; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov w19, w0
@@ -775,16 +775,13 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
 ; CHECK-SD-NEXT:    bl __gttf2
 ; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    cmp w0, #0
-; CHECK-SD-NEXT:    csel w19, w21, w19, gt
+; CHECK-SD-NEXT:    csel w19, w22, w19, gt
 ; CHECK-SD-NEXT:    mov v1.16b, v0.16b
 ; CHECK-SD-NEXT:    bl __unordtf2
-; CHECK-SD-NEXT:    cmp w0, #0
-; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    csel w8, wzr, w19, ne
-; CHECK-SD-NEXT:    fmov s0, w8
-; CHECK-SD-NEXT:    mov v0.s[1], w22
-; CHECK-SD-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    csel w23, wzr, w19, ne
 ; CHECK-SD-NEXT:    bl __getf2
 ; CHECK-SD-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov w19, w0
@@ -796,16 +793,17 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
 ; CHECK-SD-NEXT:    bl __gttf2
 ; CHECK-SD-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    cmp w0, #0
-; CHECK-SD-NEXT:    csel w19, w21, w19, gt
+; CHECK-SD-NEXT:    csel w19, w22, w19, gt
 ; CHECK-SD-NEXT:    mov v1.16b, v0.16b
 ; CHECK-SD-NEXT:    bl __unordtf2
 ; CHECK-SD-NEXT:    cmp w0, #0
-; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    csel w8, wzr, w19, ne
 ; CHECK-SD-NEXT:    ldp x20, x19, [sp, #112] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    mov v0.s[1], w23
+; CHECK-SD-NEXT:    ldp x30, x23, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v0.s[2], w21
 ; CHECK-SD-NEXT:    ldp x22, x21, [sp, #96] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    mov v0.s[2], w8
 ; CHECK-SD-NEXT:    add sp, sp, #128
 ; CHECK-SD-NEXT:    ret
 ;
@@ -4890,63 +4888,61 @@ define <16 x i16> @test_signed_v16f16_v16i16(<16 x half> %f) {
 define <8 x i8> @test_signed_v8f64_v8i8(<8 x double> %f) {
 ; CHECK-SD-LABEL: test_signed_v8f64_v8i8:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    mov d4, v3.d[1]
-; CHECK-SD-NEXT:    fcvtzs w11, d3
-; CHECK-SD-NEXT:    mov w9, #127 // =0x7f
-; CHECK-SD-NEXT:    mov d3, v1.d[1]
-; CHECK-SD-NEXT:    fcvtzs w13, d2
-; CHECK-SD-NEXT:    fcvtzs w15, d1
-; CHECK-SD-NEXT:    fcvtzs w17, d0
-; CHECK-SD-NEXT:    fcvtzs w8, d4
-; CHECK-SD-NEXT:    mov d4, v2.d[1]
-; CHECK-SD-NEXT:    mov d2, v0.d[1]
-; CHECK-SD-NEXT:    fcvtzs w14, d3
-; CHECK-SD-NEXT:    cmp w8, #127
-; CHECK-SD-NEXT:    fcvtzs w12, d4
-; CHECK-SD-NEXT:    fcvtzs w16, d2
-; CHECK-SD-NEXT:    csel w10, w8, w9, lt
-; CHECK-SD-NEXT:    mov w8, #-128 // =0xffffff80
+; CHECK-SD-NEXT:    mov d4, v0.d[1]
+; CHECK-SD-NEXT:    fcvtzs w10, d0
+; CHECK-SD-NEXT:    mov w8, #127 // =0x7f
+; CHECK-SD-NEXT:    fcvtzs w12, d1
+; CHECK-SD-NEXT:    mov d1, v1.d[1]
+; CHECK-SD-NEXT:    fcvtzs w9, d4
+; CHECK-SD-NEXT:    cmp w9, #127
+; CHECK-SD-NEXT:    csel w11, w9, w8, lt
+; CHECK-SD-NEXT:    mov w9, #-128 // =0xffffff80
+; CHECK-SD-NEXT:    cmn w11, #128
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
+; CHECK-SD-NEXT:    cmn w10, #128
+; CHECK-SD-NEXT:    csel w10, w10, w9, gt
+; CHECK-SD-NEXT:    cmp w12, #127
+; CHECK-SD-NEXT:    fmov s0, w10
+; CHECK-SD-NEXT:    fcvtzs w10, d1
+; CHECK-SD-NEXT:    mov d1, v2.d[1]
+; CHECK-SD-NEXT:    mov v0.b[1], w11
+; CHECK-SD-NEXT:    csel w11, w12, w8, lt
+; CHECK-SD-NEXT:    cmn w11, #128
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
+; CHECK-SD-NEXT:    mov v0.b[2], w11
+; CHECK-SD-NEXT:    fcvtzs w11, d2
 ; CHECK-SD-NEXT:    cmn w10, #128
-; CHECK-SD-NEXT:    csel w10, w10, w8, gt
+; CHECK-SD-NEXT:    csel w10, w10, w9, gt
 ; CHECK-SD-NEXT:    cmp w11, #127
-; CHECK-SD-NEXT:    csel w11, w11, w9, lt
+; CHECK-SD-NEXT:    mov v0.b[3], w10
+; CHECK-SD-NEXT:    fcvtzs w10, d1
+; CHECK-SD-NEXT:    csel w11, w11, w8, lt
+; CHECK-SD-NEXT:    mov d1, v3.d[1]
 ; CHECK-SD-NEXT:    cmn w11, #128
-; CHECK-SD-NEXT:    csel w11, w11, w8, gt
-; CHECK-SD-NEXT:    cmp w12, #127
-; CHECK-SD-NEXT:    csel w12, w12, w9, lt
-; CHECK-SD-NEXT:    fmov s3, w11
-; CHECK-SD-NEXT:    cmn w12, #128
-; CHECK-SD-NEXT:    csel w12, w12, w8, gt
-; CHECK-SD-NEXT:    cmp w13, #127
-; CHECK-SD-NEXT:    csel w13, w13, w9, lt
-; CHECK-SD-NEXT:    mov v3.s[1], w10
-; CHECK-SD-NEXT:    cmn w13, #128
-; CHECK-SD-NEXT:    csel w13, w13, w8, gt
-; CHECK-SD-NEXT:    cmp w14, #127
-; CHECK-SD-NEXT:    csel w14, w14, w9, lt
-; CHECK-SD-NEXT:    fmov s2, w13
-; CHECK-SD-NEXT:    cmn w14, #128
-; CHECK-SD-NEXT:    csel w14, w14, w8, gt
-; CHECK-SD-NEXT:    cmp w15, #127
-; CHECK-SD-NEXT:    csel w15, w15, w9, lt
-; CHECK-SD-NEXT:    mov v2.s[1], w12
-; CHECK-SD-NEXT:    cmn w15, #128
-; CHECK-SD-NEXT:    csel w15, w15, w8, gt
-; CHECK-SD-NEXT:    cmp w16, #127
-; CHECK-SD-NEXT:    csel w11, w16, w9, lt
-; CHECK-SD-NEXT:    fmov s1, w15
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[4], w11
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    fcvtzs w11, d3
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
+; CHECK-SD-NEXT:    cmn w10, #128
+; CHECK-SD-NEXT:    csel w10, w10, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[5], w10
+; CHECK-SD-NEXT:    cmp w11, #127
+; CHECK-SD-NEXT:    fcvtzs w10, d1
+; CHECK-SD-NEXT:    csel w11, w11, w8, lt
 ; CHECK-SD-NEXT:    cmn w11, #128
-; CHECK-SD-NEXT:    csel w10, w11, w8, gt
-; CHECK-SD-NEXT:    cmp w17, #127
-; CHECK-SD-NEXT:    csel w9, w17, w9, lt
-; CHECK-SD-NEXT:    mov v1.s[1], w14
-; CHECK-SD-NEXT:    cmn w9, #128
-; CHECK-SD-NEXT:    csel w8, w9, w8, gt
-; CHECK-SD-NEXT:    fmov s0, w8
-; CHECK-SD-NEXT:    adrp x8, .LCPI82_0
-; CHECK-SD-NEXT:    ldr d4, [x8, :lo12:.LCPI82_0]
-; CHECK-SD-NEXT:    mov v0.s[1], w10
-; CHECK-SD-NEXT:    tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.8b
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[6], w11
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    csel w8, w10, w8, lt
+; CHECK-SD-NEXT:    cmn w8, #128
+; CHECK-SD-NEXT:    csel w8, w8, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[7], w8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_signed_v8f64_v8i8:
@@ -4990,11 +4986,9 @@ define <16 x i8> @test_signed_v16f64_v16i8(<16 x double> %f) {
 ; CHECK-SD-NEXT:    mov d16, v0.d[1]
 ; CHECK-SD-NEXT:    fcvtzs w10, d0
 ; CHECK-SD-NEXT:    mov w8, #127 // =0x7f
-; CHECK-SD-NEXT:    mov d0, v1.d[1]
-; CHECK-SD-NEXT:    fcvtzs w13, d1
-; CHECK-SD-NEXT:    mov d1, v2.d[1]
+; CHECK-SD-NEXT:    fcvtzs w12, d1
+; CHECK-SD-NEXT:    mov d1, v1.d[1]
 ; CHECK-SD-NEXT:    fcvtzs w9, d16
-; CHECK-SD-NEXT:    fcvtzs w12, d0
 ; CHECK-SD-NEXT:    cmp w9, #127
 ; CHECK-SD-NEXT:    csel w11, w9, w8, lt
 ; CHECK-SD-NEXT:    mov w9, #-128 // =0xffffff80
@@ -5006,115 +5000,94 @@ define <16 x i8> @test_signed_v16f64_v16i8(<16 x double> %f) {
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
 ; CHECK-SD-NEXT:    cmp w12, #127
 ; CHECK-SD-NEXT:    fmov s0, w10
-; CHECK-SD-NEXT:    csel w10, w12, w8, lt
+; CHECK-SD-NEXT:    fcvtzs w10, d1
+; CHECK-SD-NEXT:    mov d1, v2.d[1]
+; CHECK-SD-NEXT:    mov v0.b[1], w11
+; CHECK-SD-NEXT:    csel w11, w12, w8, lt
+; CHECK-SD-NEXT:    cmn w11, #128
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
+; CHECK-SD-NEXT:    mov v0.b[2], w11
+; CHECK-SD-NEXT:    fcvtzs w11, d2
 ; CHECK-SD-NEXT:    cmn w10, #128
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
-; CHECK-SD-NEXT:    cmp w13, #127
-; CHECK-SD-NEXT:    csel w12, w13, w8, lt
-; CHECK-SD-NEXT:    mov v0.s[1], w11
-; CHECK-SD-NEXT:    fcvtzs w11, d1
-; CHECK-SD-NEXT:    cmn w12, #128
-; CHECK-SD-NEXT:    csel w12, w12, w9, gt
-; CHECK-SD-NEXT:    fmov s1, w12
-; CHECK-SD-NEXT:    fcvtzs w12, d2
-; CHECK-SD-NEXT:    mov d2, v3.d[1]
 ; CHECK-SD-NEXT:    cmp w11, #127
-; CHECK-SD-NEXT:    mov w13, v0.s[1]
-; CHECK-SD-NEXT:    mov v1.s[1], w10
-; CHECK-SD-NEXT:    csel w10, w11, w8, lt
+; CHECK-SD-NEXT:    mov v0.b[3], w10
+; CHECK-SD-NEXT:    fcvtzs w10, d1
+; CHECK-SD-NEXT:    csel w11, w11, w8, lt
+; CHECK-SD-NEXT:    mov d1, v3.d[1]
+; CHECK-SD-NEXT:    cmn w11, #128
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[4], w11
+; CHECK-SD-NEXT:    fcvtzs w11, d3
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
 ; CHECK-SD-NEXT:    cmn w10, #128
-; CHECK-SD-NEXT:    fcvtzs w11, d2
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
-; CHECK-SD-NEXT:    cmp w12, #127
-; CHECK-SD-NEXT:    mov v0.b[1], w13
-; CHECK-SD-NEXT:    csel w12, w12, w8, lt
-; CHECK-SD-NEXT:    cmn w12, #128
-; CHECK-SD-NEXT:    mov w13, v1.s[1]
-; CHECK-SD-NEXT:    csel w12, w12, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[5], w10
+; CHECK-SD-NEXT:    fcvtzs w10, d1
 ; CHECK-SD-NEXT:    cmp w11, #127
-; CHECK-SD-NEXT:    fmov s2, w12
-; CHECK-SD-NEXT:    fcvtzs w12, d3
-; CHECK-SD-NEXT:    mov d3, v4.d[1]
-; CHECK-SD-NEXT:    mov v0.b[2], v1.b[0]
-; CHECK-SD-NEXT:    mov v2.s[1], w10
-; CHECK-SD-NEXT:    csel w10, w11, w8, lt
+; CHECK-SD-NEXT:    csel w11, w11, w8, lt
+; CHECK-SD-NEXT:    mov d1, v4.d[1]
+; CHECK-SD-NEXT:    cmn w11, #128
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[6], w11
+; CHECK-SD-NEXT:    fcvtzs w11, d4
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
 ; CHECK-SD-NEXT:    cmn w10, #128
-; CHECK-SD-NEXT:    fcvtzs w11, d3
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
-; CHECK-SD-NEXT:    cmp w12, #127
-; CHECK-SD-NEXT:    mov v0.b[3], w13
-; CHECK-SD-NEXT:    csel w12, w12, w8, lt
-; CHECK-SD-NEXT:    cmn w12, #128
-; CHECK-SD-NEXT:    mov w13, v2.s[1]
-; CHECK-SD-NEXT:    csel w12, w12, w9, gt
 ; CHECK-SD-NEXT:    cmp w11, #127
-; CHECK-SD-NEXT:    fmov s3, w12
-; CHECK-SD-NEXT:    fcvtzs w12, d4
-; CHECK-SD-NEXT:    mov v0.b[4], v2.b[0]
-; CHECK-SD-NEXT:    mov d4, v5.d[1]
-; CHECK-SD-NEXT:    mov v3.s[1], w10
-; CHECK-SD-NEXT:    csel w10, w11, w8, lt
+; CHECK-SD-NEXT:    mov v0.b[7], w10
+; CHECK-SD-NEXT:    fcvtzs w10, d1
+; CHECK-SD-NEXT:    csel w11, w11, w8, lt
+; CHECK-SD-NEXT:    mov d1, v5.d[1]
+; CHECK-SD-NEXT:    cmn w11, #128
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[8], w11
+; CHECK-SD-NEXT:    fcvtzs w11, d5
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
 ; CHECK-SD-NEXT:    cmn w10, #128
-; CHECK-SD-NEXT:    mov v0.b[5], w13
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
-; CHECK-SD-NEXT:    cmp w12, #127
-; CHECK-SD-NEXT:    fcvtzs w11, d4
-; CHECK-SD-NEXT:    csel w12, w12, w8, lt
-; CHECK-SD-NEXT:    cmn w12, #128
-; CHECK-SD-NEXT:    mov w13, v3.s[1]
-; CHECK-SD-NEXT:    csel w12, w12, w9, gt
-; CHECK-SD-NEXT:    mov v0.b[6], v3.b[0]
-; CHECK-SD-NEXT:    fmov s4, w12
-; CHECK-SD-NEXT:    fcvtzs w12, d5
+; CHECK-SD-NEXT:    mov v0.b[9], w10
+; CHECK-SD-NEXT:    fcvtzs w10, d1
 ; CHECK-SD-NEXT:    cmp w11, #127
-; CHECK-SD-NEXT:    mov d5, v6.d[1]
-; CHECK-SD-NEXT:    mov v4.s[1], w10
-; CHECK-SD-NEXT:    csel w10, w11, w8, lt
-; CHECK-SD-NEXT:    mov v0.b[7], w13
-; CHECK-SD-NEXT:    cmn w10, #128
-; CHECK-SD-NEXT:    csel w10, w10, w9, gt
-; CHECK-SD-NEXT:    cmp w12, #127
-; CHECK-SD-NEXT:    fcvtzs w13, d5
-; CHECK-SD-NEXT:    csel w11, w12, w8, lt
+; CHECK-SD-NEXT:    csel w11, w11, w8, lt
+; CHECK-SD-NEXT:    mov d1, v6.d[1]
 ; CHECK-SD-NEXT:    cmn w11, #128
-; CHECK-SD-NEXT:    mov w12, v4.s[1]
-; CHECK-SD-NEXT:    mov v0.b[8], v4.b[0]
 ; CHECK-SD-NEXT:    csel w11, w11, w9, gt
-; CHECK-SD-NEXT:    fmov s5, w11
+; CHECK-SD-NEXT:    mov v0.b[10], w11
 ; CHECK-SD-NEXT:    fcvtzs w11, d6
-; CHECK-SD-NEXT:    cmp w13, #127
-; CHECK-SD-NEXT:    mov d6, v7.d[1]
-; CHECK-SD-NEXT:    mov v0.b[9], w12
-; CHECK-SD-NEXT:    mov v5.s[1], w10
-; CHECK-SD-NEXT:    csel w10, w13, w8, lt
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
 ; CHECK-SD-NEXT:    cmn w10, #128
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
 ; CHECK-SD-NEXT:    cmp w11, #127
-; CHECK-SD-NEXT:    fcvtzs w13, d6
+; CHECK-SD-NEXT:    mov v0.b[11], w10
+; CHECK-SD-NEXT:    fcvtzs w10, d1
 ; CHECK-SD-NEXT:    csel w11, w11, w8, lt
+; CHECK-SD-NEXT:    mov d1, v7.d[1]
 ; CHECK-SD-NEXT:    cmn w11, #128
-; CHECK-SD-NEXT:    mov v0.b[10], v5.b[0]
-; CHECK-SD-NEXT:    mov w12, v5.s[1]
 ; CHECK-SD-NEXT:    csel w11, w11, w9, gt
-; CHECK-SD-NEXT:    fmov s6, w11
+; CHECK-SD-NEXT:    mov v0.b[12], w11
+; CHECK-SD-NEXT:    cmp w10, #127
 ; CHECK-SD-NEXT:    fcvtzs w11, d7
-; CHECK-SD-NEXT:    cmp w13, #127
-; CHECK-SD-NEXT:    mov v0.b[11], w12
-; CHECK-SD-NEXT:    mov v6.s[1], w10
-; CHECK-SD-NEXT:    csel w10, w13, w8, lt
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
 ; CHECK-SD-NEXT:    cmn w10, #128
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[13], w10
 ; CHECK-SD-NEXT:    cmp w11, #127
-; CHECK-SD-NEXT:    csel w8, w11, w8, lt
+; CHECK-SD-NEXT:    fcvtzs w10, d1
+; CHECK-SD-NEXT:    csel w11, w11, w8, lt
+; CHECK-SD-NEXT:    cmn w11, #128
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[14], w11
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    csel w8, w10, w8, lt
 ; CHECK-SD-NEXT:    cmn w8, #128
-; CHECK-SD-NEXT:    mov v0.b[12], v6.b[0]
-; CHECK-SD-NEXT:    mov w11, v6.s[1]
 ; CHECK-SD-NEXT:    csel w8, w8, w9, gt
-; CHECK-SD-NEXT:    fmov s7, w8
-; CHECK-SD-NEXT:    mov v0.b[13], w11
-; CHECK-SD-NEXT:    mov v7.s[1], w10
-; CHECK-SD-NEXT:    mov v0.b[14], v7.b[0]
-; CHECK-SD-NEXT:    mov w8, v7.s[1]
 ; CHECK-SD-NEXT:    mov v0.b[15], w8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -5179,63 +5152,60 @@ define <16 x i8> @test_signed_v16f64_v16i8(<16 x double> %f) {
 define <8 x i16> @test_signed_v8f64_v8i16(<8 x double> %f) {
 ; CHECK-...
[truncated]

@llvmbot
Copy link
Member

llvmbot commented Jun 4, 2025

@llvm/pr-subscribers-backend-aarch64

Author: David Green (davemgreen)

Changes

This helps to remove some inefficient buildvector lowering by converting extract_vector_elt(buildvector) to the original source. It seems to be a general improvement.


Patch is 328.02 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/142729.diff

16 Files Affected:

  • (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.h (+4)
  • (modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir (+2-6)
  • (modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir (+1-3)
  • (modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir (+1-1)
  • (modified) llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll (+271-305)
  • (modified) llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll (+177-211)
  • (modified) llvm/test/CodeGen/AArch64/fptrunc.ll (+9-19)
  • (modified) llvm/test/CodeGen/AArch64/itofp.ll (+112-108)
  • (modified) llvm/test/CodeGen/AArch64/sext.ll (+2-4)
  • (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll (+7-12)
  • (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll (+78-182)
  • (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll (+84-150)
  • (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll (+1343-2613)
  • (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll (+140-236)
  • (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll (+7-13)
  • (modified) llvm/test/CodeGen/AArch64/zext-to-tbl.ll (+78-89)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index b2174487c2fe8..f193fa05dd161 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -449,6 +449,10 @@ class AArch64TargetLowering : public TargetLowering {
   /// Enable aggressive FMA fusion on targets that want it.
   bool enableAggressiveFMAFusion(EVT VT) const override;
 
+  bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override {
+    return true;
+  }
+
   /// Returns the size of the platform's va_list object.
   unsigned getVaListSizeInBits(const DataLayout &DL) const override;
 
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir
index 93f6051c3bd3b..5189582d0b6ac 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir
@@ -55,13 +55,9 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %arg1:_(s64) = COPY $x0
     ; CHECK-NEXT: %arg2:_(s64) = COPY $x1
-    ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
-    ; CHECK-NEXT: %one:_(s64) = G_CONSTANT i64 1
     ; CHECK-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
-    ; CHECK-NEXT: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s64)
-    ; CHECK-NEXT: %extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s64)
-    ; CHECK-NEXT: $x0 = COPY %extract(s64)
-    ; CHECK-NEXT: $x1 = COPY %extract2(s64)
+    ; CHECK-NEXT: $x0 = COPY %arg1(s64)
+    ; CHECK-NEXT: $x1 = COPY %arg2(s64)
     ; CHECK-NEXT: $q0 = COPY %bv(<2 x s64>)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %arg1:_(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
index e2933690c7c55..e81447a1de4b6 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
@@ -175,10 +175,8 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %arg1:_(s64) = COPY $x0
     ; CHECK-NEXT: %arg2:_(s64) = COPY $x1
-    ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
     ; CHECK-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
-    ; CHECK-NEXT: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s64)
-    ; CHECK-NEXT: $x0 = COPY %extract(s64)
+    ; CHECK-NEXT: $x0 = COPY %arg1(s64)
     ; CHECK-NEXT: $q0 = COPY %bv(<2 x s64>)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %arg1:_(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir
index c000a8e635bc6..86c0575961a17 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir
@@ -278,8 +278,8 @@ body:             |
     ; CHECK: liveins: $x0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 127
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
     ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s8>) = G_BUILD_VECTOR [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8)
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
     ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s8>), [[COPY]](p0) :: (store (<32 x s8>))
     ; CHECK-NEXT: RET_ReallyLR
     %3:_(s8) = G_CONSTANT i8 127
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index a33b1ef569fc3..04dfdedb42752 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -726,7 +726,7 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
 ; CHECK-SD-LABEL: test_signed_v3f128_v3i32:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    sub sp, sp, #128
-; CHECK-SD-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp x30, x23, [sp, #80] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    stp x22, x21, [sp, #96] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    stp x20, x19, [sp, #112] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 128
@@ -734,13 +734,13 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset w20, -16
 ; CHECK-SD-NEXT:    .cfi_offset w21, -24
 ; CHECK-SD-NEXT:    .cfi_offset w22, -32
+; CHECK-SD-NEXT:    .cfi_offset w23, -40
 ; CHECK-SD-NEXT:    .cfi_offset w30, -48
-; CHECK-SD-NEXT:    stp q0, q2, [sp, #48] // 32-byte Folded Spill
-; CHECK-SD-NEXT:    mov v2.16b, v1.16b
+; CHECK-SD-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    adrp x8, .LCPI16_0
-; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI16_0]
 ; CHECK-SD-NEXT:    mov v0.16b, v2.16b
+; CHECK-SD-NEXT:    stp q2, q1, [sp, #32] // 32-byte Folded Spill
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI16_0]
 ; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __getf2
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
@@ -755,15 +755,15 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
 ; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __gttf2
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    mov w21, #2147483647 // =0x7fffffff
+; CHECK-SD-NEXT:    mov w22, #2147483647 // =0x7fffffff
 ; CHECK-SD-NEXT:    cmp w0, #0
-; CHECK-SD-NEXT:    csel w19, w21, w19, gt
+; CHECK-SD-NEXT:    csel w19, w22, w19, gt
 ; CHECK-SD-NEXT:    mov v1.16b, v0.16b
 ; CHECK-SD-NEXT:    bl __unordtf2
 ; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    cmp w0, #0
-; CHECK-SD-NEXT:    csel w22, wzr, w19, ne
+; CHECK-SD-NEXT:    csel w21, wzr, w19, ne
 ; CHECK-SD-NEXT:    bl __getf2
 ; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov w19, w0
@@ -775,16 +775,13 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
 ; CHECK-SD-NEXT:    bl __gttf2
 ; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    cmp w0, #0
-; CHECK-SD-NEXT:    csel w19, w21, w19, gt
+; CHECK-SD-NEXT:    csel w19, w22, w19, gt
 ; CHECK-SD-NEXT:    mov v1.16b, v0.16b
 ; CHECK-SD-NEXT:    bl __unordtf2
-; CHECK-SD-NEXT:    cmp w0, #0
-; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    csel w8, wzr, w19, ne
-; CHECK-SD-NEXT:    fmov s0, w8
-; CHECK-SD-NEXT:    mov v0.s[1], w22
-; CHECK-SD-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    csel w23, wzr, w19, ne
 ; CHECK-SD-NEXT:    bl __getf2
 ; CHECK-SD-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov w19, w0
@@ -796,16 +793,17 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
 ; CHECK-SD-NEXT:    bl __gttf2
 ; CHECK-SD-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    cmp w0, #0
-; CHECK-SD-NEXT:    csel w19, w21, w19, gt
+; CHECK-SD-NEXT:    csel w19, w22, w19, gt
 ; CHECK-SD-NEXT:    mov v1.16b, v0.16b
 ; CHECK-SD-NEXT:    bl __unordtf2
 ; CHECK-SD-NEXT:    cmp w0, #0
-; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    csel w8, wzr, w19, ne
 ; CHECK-SD-NEXT:    ldp x20, x19, [sp, #112] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    mov v0.s[1], w23
+; CHECK-SD-NEXT:    ldp x30, x23, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v0.s[2], w21
 ; CHECK-SD-NEXT:    ldp x22, x21, [sp, #96] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    mov v0.s[2], w8
 ; CHECK-SD-NEXT:    add sp, sp, #128
 ; CHECK-SD-NEXT:    ret
 ;
@@ -4890,63 +4888,61 @@ define <16 x i16> @test_signed_v16f16_v16i16(<16 x half> %f) {
 define <8 x i8> @test_signed_v8f64_v8i8(<8 x double> %f) {
 ; CHECK-SD-LABEL: test_signed_v8f64_v8i8:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    mov d4, v3.d[1]
-; CHECK-SD-NEXT:    fcvtzs w11, d3
-; CHECK-SD-NEXT:    mov w9, #127 // =0x7f
-; CHECK-SD-NEXT:    mov d3, v1.d[1]
-; CHECK-SD-NEXT:    fcvtzs w13, d2
-; CHECK-SD-NEXT:    fcvtzs w15, d1
-; CHECK-SD-NEXT:    fcvtzs w17, d0
-; CHECK-SD-NEXT:    fcvtzs w8, d4
-; CHECK-SD-NEXT:    mov d4, v2.d[1]
-; CHECK-SD-NEXT:    mov d2, v0.d[1]
-; CHECK-SD-NEXT:    fcvtzs w14, d3
-; CHECK-SD-NEXT:    cmp w8, #127
-; CHECK-SD-NEXT:    fcvtzs w12, d4
-; CHECK-SD-NEXT:    fcvtzs w16, d2
-; CHECK-SD-NEXT:    csel w10, w8, w9, lt
-; CHECK-SD-NEXT:    mov w8, #-128 // =0xffffff80
+; CHECK-SD-NEXT:    mov d4, v0.d[1]
+; CHECK-SD-NEXT:    fcvtzs w10, d0
+; CHECK-SD-NEXT:    mov w8, #127 // =0x7f
+; CHECK-SD-NEXT:    fcvtzs w12, d1
+; CHECK-SD-NEXT:    mov d1, v1.d[1]
+; CHECK-SD-NEXT:    fcvtzs w9, d4
+; CHECK-SD-NEXT:    cmp w9, #127
+; CHECK-SD-NEXT:    csel w11, w9, w8, lt
+; CHECK-SD-NEXT:    mov w9, #-128 // =0xffffff80
+; CHECK-SD-NEXT:    cmn w11, #128
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
+; CHECK-SD-NEXT:    cmn w10, #128
+; CHECK-SD-NEXT:    csel w10, w10, w9, gt
+; CHECK-SD-NEXT:    cmp w12, #127
+; CHECK-SD-NEXT:    fmov s0, w10
+; CHECK-SD-NEXT:    fcvtzs w10, d1
+; CHECK-SD-NEXT:    mov d1, v2.d[1]
+; CHECK-SD-NEXT:    mov v0.b[1], w11
+; CHECK-SD-NEXT:    csel w11, w12, w8, lt
+; CHECK-SD-NEXT:    cmn w11, #128
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
+; CHECK-SD-NEXT:    mov v0.b[2], w11
+; CHECK-SD-NEXT:    fcvtzs w11, d2
 ; CHECK-SD-NEXT:    cmn w10, #128
-; CHECK-SD-NEXT:    csel w10, w10, w8, gt
+; CHECK-SD-NEXT:    csel w10, w10, w9, gt
 ; CHECK-SD-NEXT:    cmp w11, #127
-; CHECK-SD-NEXT:    csel w11, w11, w9, lt
+; CHECK-SD-NEXT:    mov v0.b[3], w10
+; CHECK-SD-NEXT:    fcvtzs w10, d1
+; CHECK-SD-NEXT:    csel w11, w11, w8, lt
+; CHECK-SD-NEXT:    mov d1, v3.d[1]
 ; CHECK-SD-NEXT:    cmn w11, #128
-; CHECK-SD-NEXT:    csel w11, w11, w8, gt
-; CHECK-SD-NEXT:    cmp w12, #127
-; CHECK-SD-NEXT:    csel w12, w12, w9, lt
-; CHECK-SD-NEXT:    fmov s3, w11
-; CHECK-SD-NEXT:    cmn w12, #128
-; CHECK-SD-NEXT:    csel w12, w12, w8, gt
-; CHECK-SD-NEXT:    cmp w13, #127
-; CHECK-SD-NEXT:    csel w13, w13, w9, lt
-; CHECK-SD-NEXT:    mov v3.s[1], w10
-; CHECK-SD-NEXT:    cmn w13, #128
-; CHECK-SD-NEXT:    csel w13, w13, w8, gt
-; CHECK-SD-NEXT:    cmp w14, #127
-; CHECK-SD-NEXT:    csel w14, w14, w9, lt
-; CHECK-SD-NEXT:    fmov s2, w13
-; CHECK-SD-NEXT:    cmn w14, #128
-; CHECK-SD-NEXT:    csel w14, w14, w8, gt
-; CHECK-SD-NEXT:    cmp w15, #127
-; CHECK-SD-NEXT:    csel w15, w15, w9, lt
-; CHECK-SD-NEXT:    mov v2.s[1], w12
-; CHECK-SD-NEXT:    cmn w15, #128
-; CHECK-SD-NEXT:    csel w15, w15, w8, gt
-; CHECK-SD-NEXT:    cmp w16, #127
-; CHECK-SD-NEXT:    csel w11, w16, w9, lt
-; CHECK-SD-NEXT:    fmov s1, w15
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[4], w11
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    fcvtzs w11, d3
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
+; CHECK-SD-NEXT:    cmn w10, #128
+; CHECK-SD-NEXT:    csel w10, w10, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[5], w10
+; CHECK-SD-NEXT:    cmp w11, #127
+; CHECK-SD-NEXT:    fcvtzs w10, d1
+; CHECK-SD-NEXT:    csel w11, w11, w8, lt
 ; CHECK-SD-NEXT:    cmn w11, #128
-; CHECK-SD-NEXT:    csel w10, w11, w8, gt
-; CHECK-SD-NEXT:    cmp w17, #127
-; CHECK-SD-NEXT:    csel w9, w17, w9, lt
-; CHECK-SD-NEXT:    mov v1.s[1], w14
-; CHECK-SD-NEXT:    cmn w9, #128
-; CHECK-SD-NEXT:    csel w8, w9, w8, gt
-; CHECK-SD-NEXT:    fmov s0, w8
-; CHECK-SD-NEXT:    adrp x8, .LCPI82_0
-; CHECK-SD-NEXT:    ldr d4, [x8, :lo12:.LCPI82_0]
-; CHECK-SD-NEXT:    mov v0.s[1], w10
-; CHECK-SD-NEXT:    tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.8b
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[6], w11
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    csel w8, w10, w8, lt
+; CHECK-SD-NEXT:    cmn w8, #128
+; CHECK-SD-NEXT:    csel w8, w8, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[7], w8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_signed_v8f64_v8i8:
@@ -4990,11 +4986,9 @@ define <16 x i8> @test_signed_v16f64_v16i8(<16 x double> %f) {
 ; CHECK-SD-NEXT:    mov d16, v0.d[1]
 ; CHECK-SD-NEXT:    fcvtzs w10, d0
 ; CHECK-SD-NEXT:    mov w8, #127 // =0x7f
-; CHECK-SD-NEXT:    mov d0, v1.d[1]
-; CHECK-SD-NEXT:    fcvtzs w13, d1
-; CHECK-SD-NEXT:    mov d1, v2.d[1]
+; CHECK-SD-NEXT:    fcvtzs w12, d1
+; CHECK-SD-NEXT:    mov d1, v1.d[1]
 ; CHECK-SD-NEXT:    fcvtzs w9, d16
-; CHECK-SD-NEXT:    fcvtzs w12, d0
 ; CHECK-SD-NEXT:    cmp w9, #127
 ; CHECK-SD-NEXT:    csel w11, w9, w8, lt
 ; CHECK-SD-NEXT:    mov w9, #-128 // =0xffffff80
@@ -5006,115 +5000,94 @@ define <16 x i8> @test_signed_v16f64_v16i8(<16 x double> %f) {
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
 ; CHECK-SD-NEXT:    cmp w12, #127
 ; CHECK-SD-NEXT:    fmov s0, w10
-; CHECK-SD-NEXT:    csel w10, w12, w8, lt
+; CHECK-SD-NEXT:    fcvtzs w10, d1
+; CHECK-SD-NEXT:    mov d1, v2.d[1]
+; CHECK-SD-NEXT:    mov v0.b[1], w11
+; CHECK-SD-NEXT:    csel w11, w12, w8, lt
+; CHECK-SD-NEXT:    cmn w11, #128
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
+; CHECK-SD-NEXT:    mov v0.b[2], w11
+; CHECK-SD-NEXT:    fcvtzs w11, d2
 ; CHECK-SD-NEXT:    cmn w10, #128
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
-; CHECK-SD-NEXT:    cmp w13, #127
-; CHECK-SD-NEXT:    csel w12, w13, w8, lt
-; CHECK-SD-NEXT:    mov v0.s[1], w11
-; CHECK-SD-NEXT:    fcvtzs w11, d1
-; CHECK-SD-NEXT:    cmn w12, #128
-; CHECK-SD-NEXT:    csel w12, w12, w9, gt
-; CHECK-SD-NEXT:    fmov s1, w12
-; CHECK-SD-NEXT:    fcvtzs w12, d2
-; CHECK-SD-NEXT:    mov d2, v3.d[1]
 ; CHECK-SD-NEXT:    cmp w11, #127
-; CHECK-SD-NEXT:    mov w13, v0.s[1]
-; CHECK-SD-NEXT:    mov v1.s[1], w10
-; CHECK-SD-NEXT:    csel w10, w11, w8, lt
+; CHECK-SD-NEXT:    mov v0.b[3], w10
+; CHECK-SD-NEXT:    fcvtzs w10, d1
+; CHECK-SD-NEXT:    csel w11, w11, w8, lt
+; CHECK-SD-NEXT:    mov d1, v3.d[1]
+; CHECK-SD-NEXT:    cmn w11, #128
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[4], w11
+; CHECK-SD-NEXT:    fcvtzs w11, d3
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
 ; CHECK-SD-NEXT:    cmn w10, #128
-; CHECK-SD-NEXT:    fcvtzs w11, d2
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
-; CHECK-SD-NEXT:    cmp w12, #127
-; CHECK-SD-NEXT:    mov v0.b[1], w13
-; CHECK-SD-NEXT:    csel w12, w12, w8, lt
-; CHECK-SD-NEXT:    cmn w12, #128
-; CHECK-SD-NEXT:    mov w13, v1.s[1]
-; CHECK-SD-NEXT:    csel w12, w12, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[5], w10
+; CHECK-SD-NEXT:    fcvtzs w10, d1
 ; CHECK-SD-NEXT:    cmp w11, #127
-; CHECK-SD-NEXT:    fmov s2, w12
-; CHECK-SD-NEXT:    fcvtzs w12, d3
-; CHECK-SD-NEXT:    mov d3, v4.d[1]
-; CHECK-SD-NEXT:    mov v0.b[2], v1.b[0]
-; CHECK-SD-NEXT:    mov v2.s[1], w10
-; CHECK-SD-NEXT:    csel w10, w11, w8, lt
+; CHECK-SD-NEXT:    csel w11, w11, w8, lt
+; CHECK-SD-NEXT:    mov d1, v4.d[1]
+; CHECK-SD-NEXT:    cmn w11, #128
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[6], w11
+; CHECK-SD-NEXT:    fcvtzs w11, d4
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
 ; CHECK-SD-NEXT:    cmn w10, #128
-; CHECK-SD-NEXT:    fcvtzs w11, d3
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
-; CHECK-SD-NEXT:    cmp w12, #127
-; CHECK-SD-NEXT:    mov v0.b[3], w13
-; CHECK-SD-NEXT:    csel w12, w12, w8, lt
-; CHECK-SD-NEXT:    cmn w12, #128
-; CHECK-SD-NEXT:    mov w13, v2.s[1]
-; CHECK-SD-NEXT:    csel w12, w12, w9, gt
 ; CHECK-SD-NEXT:    cmp w11, #127
-; CHECK-SD-NEXT:    fmov s3, w12
-; CHECK-SD-NEXT:    fcvtzs w12, d4
-; CHECK-SD-NEXT:    mov v0.b[4], v2.b[0]
-; CHECK-SD-NEXT:    mov d4, v5.d[1]
-; CHECK-SD-NEXT:    mov v3.s[1], w10
-; CHECK-SD-NEXT:    csel w10, w11, w8, lt
+; CHECK-SD-NEXT:    mov v0.b[7], w10
+; CHECK-SD-NEXT:    fcvtzs w10, d1
+; CHECK-SD-NEXT:    csel w11, w11, w8, lt
+; CHECK-SD-NEXT:    mov d1, v5.d[1]
+; CHECK-SD-NEXT:    cmn w11, #128
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[8], w11
+; CHECK-SD-NEXT:    fcvtzs w11, d5
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
 ; CHECK-SD-NEXT:    cmn w10, #128
-; CHECK-SD-NEXT:    mov v0.b[5], w13
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
-; CHECK-SD-NEXT:    cmp w12, #127
-; CHECK-SD-NEXT:    fcvtzs w11, d4
-; CHECK-SD-NEXT:    csel w12, w12, w8, lt
-; CHECK-SD-NEXT:    cmn w12, #128
-; CHECK-SD-NEXT:    mov w13, v3.s[1]
-; CHECK-SD-NEXT:    csel w12, w12, w9, gt
-; CHECK-SD-NEXT:    mov v0.b[6], v3.b[0]
-; CHECK-SD-NEXT:    fmov s4, w12
-; CHECK-SD-NEXT:    fcvtzs w12, d5
+; CHECK-SD-NEXT:    mov v0.b[9], w10
+; CHECK-SD-NEXT:    fcvtzs w10, d1
 ; CHECK-SD-NEXT:    cmp w11, #127
-; CHECK-SD-NEXT:    mov d5, v6.d[1]
-; CHECK-SD-NEXT:    mov v4.s[1], w10
-; CHECK-SD-NEXT:    csel w10, w11, w8, lt
-; CHECK-SD-NEXT:    mov v0.b[7], w13
-; CHECK-SD-NEXT:    cmn w10, #128
-; CHECK-SD-NEXT:    csel w10, w10, w9, gt
-; CHECK-SD-NEXT:    cmp w12, #127
-; CHECK-SD-NEXT:    fcvtzs w13, d5
-; CHECK-SD-NEXT:    csel w11, w12, w8, lt
+; CHECK-SD-NEXT:    csel w11, w11, w8, lt
+; CHECK-SD-NEXT:    mov d1, v6.d[1]
 ; CHECK-SD-NEXT:    cmn w11, #128
-; CHECK-SD-NEXT:    mov w12, v4.s[1]
-; CHECK-SD-NEXT:    mov v0.b[8], v4.b[0]
 ; CHECK-SD-NEXT:    csel w11, w11, w9, gt
-; CHECK-SD-NEXT:    fmov s5, w11
+; CHECK-SD-NEXT:    mov v0.b[10], w11
 ; CHECK-SD-NEXT:    fcvtzs w11, d6
-; CHECK-SD-NEXT:    cmp w13, #127
-; CHECK-SD-NEXT:    mov d6, v7.d[1]
-; CHECK-SD-NEXT:    mov v0.b[9], w12
-; CHECK-SD-NEXT:    mov v5.s[1], w10
-; CHECK-SD-NEXT:    csel w10, w13, w8, lt
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
 ; CHECK-SD-NEXT:    cmn w10, #128
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
 ; CHECK-SD-NEXT:    cmp w11, #127
-; CHECK-SD-NEXT:    fcvtzs w13, d6
+; CHECK-SD-NEXT:    mov v0.b[11], w10
+; CHECK-SD-NEXT:    fcvtzs w10, d1
 ; CHECK-SD-NEXT:    csel w11, w11, w8, lt
+; CHECK-SD-NEXT:    mov d1, v7.d[1]
 ; CHECK-SD-NEXT:    cmn w11, #128
-; CHECK-SD-NEXT:    mov v0.b[10], v5.b[0]
-; CHECK-SD-NEXT:    mov w12, v5.s[1]
 ; CHECK-SD-NEXT:    csel w11, w11, w9, gt
-; CHECK-SD-NEXT:    fmov s6, w11
+; CHECK-SD-NEXT:    mov v0.b[12], w11
+; CHECK-SD-NEXT:    cmp w10, #127
 ; CHECK-SD-NEXT:    fcvtzs w11, d7
-; CHECK-SD-NEXT:    cmp w13, #127
-; CHECK-SD-NEXT:    mov v0.b[11], w12
-; CHECK-SD-NEXT:    mov v6.s[1], w10
-; CHECK-SD-NEXT:    csel w10, w13, w8, lt
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
 ; CHECK-SD-NEXT:    cmn w10, #128
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[13], w10
 ; CHECK-SD-NEXT:    cmp w11, #127
-; CHECK-SD-NEXT:    csel w8, w11, w8, lt
+; CHECK-SD-NEXT:    fcvtzs w10, d1
+; CHECK-SD-NEXT:    csel w11, w11, w8, lt
+; CHECK-SD-NEXT:    cmn w11, #128
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[14], w11
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    csel w8, w10, w8, lt
 ; CHECK-SD-NEXT:    cmn w8, #128
-; CHECK-SD-NEXT:    mov v0.b[12], v6.b[0]
-; CHECK-SD-NEXT:    mov w11, v6.s[1]
 ; CHECK-SD-NEXT:    csel w8, w8, w9, gt
-; CHECK-SD-NEXT:    fmov s7, w8
-; CHECK-SD-NEXT:    mov v0.b[13], w11
-; CHECK-SD-NEXT:    mov v7.s[1], w10
-; CHECK-SD-NEXT:    mov v0.b[14], v7.b[0]
-; CHECK-SD-NEXT:    mov w8, v7.s[1]
 ; CHECK-SD-NEXT:    mov v0.b[15], w8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -5179,63 +5152,60 @@ define <16 x i8> @test_signed_v16f64_v16i8(<16 x double> %f) {
 define <8 x i16> @test_signed_v8f64_v8i16(<8 x double> %f) {
 ; CHECK-...
[truncated]

Copy link
Collaborator

@SamTebbs33 SamTebbs33 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants