Skip to content

[LLVM][SVE] Add isel for bfloat based (de)interleave operations. #128875

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 27, 2025

Conversation

paulwalker-arm
Copy link
Collaborator

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Feb 26, 2025

@llvm/pr-subscribers-backend-aarch64

Author: Paul Walker (paulwalker-arm)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/128875.diff

4 Files Affected:

  • (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+2)
  • (modified) llvm/lib/Target/AArch64/SVEInstrFormats.td (+2)
  • (modified) llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll (+35)
  • (modified) llvm/test/CodeGen/AArch64/sve-vector-interleave.ll (+33)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index b00aa11f8499d..35b222e2d55eb 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1702,6 +1702,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::MLOAD, VT, Custom);
       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
+      setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
+      setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
       setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
 
       if (Subtarget->hasSVEB16B16()) {
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index e443c5ab150bd..5af2f98f18132 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -3079,6 +3079,8 @@ multiclass sve_int_perm_bin_perm_zz<bits<3> opc, string asm,
   def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
 
   def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>;
+  def : SVE_2_Op_Pat<nxv4bf16, op, nxv4bf16, nxv4bf16, !cast<Instruction>(NAME # _S)>;
+  def : SVE_2_Op_Pat<nxv2bf16, op, nxv2bf16, nxv2bf16, !cast<Instruction>(NAME # _D)>;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll b/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll
index fd1365d56fee4..86b840b38a550 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll
@@ -70,6 +70,41 @@ define {<vscale x 2 x double>, <vscale x 2 x double>} @vector_deinterleave_nxv2f
   ret {<vscale x 2 x double>, <vscale x 2 x double>}   %retval
 }
 
+define {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>} @vector_deinterleave_nxv2bf16_nxv4bf16(<vscale x 4 x bfloat> %vec) {
+; CHECK-LABEL: vector_deinterleave_nxv2bf16_nxv4bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z1.s, z0.s, z0.s
+; CHECK-NEXT:    uzp2 z2.s, z0.s, z0.s
+; CHECK-NEXT:    uunpklo z0.d, z1.s
+; CHECK-NEXT:    uunpklo z1.d, z2.s
+; CHECK-NEXT:    ret
+  %retval = call {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>} @llvm.vector.deinterleave2.nxv4bf16(<vscale x 4 x bfloat> %vec)
+  ret {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>}   %retval
+}
+
+define {<vscale x 4 x bfloat>, <vscale x 4 x bfloat>} @vector_deinterleave_nxv4bf16_nxv8bf16(<vscale x 8 x bfloat> %vec) {
+; CHECK-LABEL: vector_deinterleave_nxv4bf16_nxv8bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z1.h, z0.h, z0.h
+; CHECK-NEXT:    uzp2 z2.h, z0.h, z0.h
+; CHECK-NEXT:    uunpklo z0.s, z1.h
+; CHECK-NEXT:    uunpklo z1.s, z2.h
+; CHECK-NEXT:    ret
+  %retval = call {<vscale x 4 x bfloat>, <vscale x 4 x bfloat>} @llvm.vector.deinterleave2.nxv8bf16(<vscale x 8 x bfloat> %vec)
+  ret {<vscale x 4 x bfloat>, <vscale x 4 x bfloat>}   %retval
+}
+
+define {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @vector_deinterleave_nxv8bf16_nxv16bf16(<vscale x 16 x bfloat> %vec) {
+; CHECK-LABEL: vector_deinterleave_nxv8bf16_nxv16bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z2.h, z0.h, z1.h
+; CHECK-NEXT:    uzp2 z1.h, z0.h, z1.h
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    ret
+  %retval = call {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @llvm.vector.deinterleave2.nxv16bf16(<vscale x 16 x bfloat> %vec)
+  ret {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>}   %retval
+}
+
 ; Integers
 
 define {<vscale x 16 x i8>, <vscale x 16 x i8>} @vector_deinterleave_nxv16i8_nxv32i8(<vscale x 32 x i8> %vec) {
diff --git a/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll b/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll
index e2c3b0abe21aa..220697cbcbbba 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll
@@ -67,6 +67,39 @@ define <vscale x 4 x double> @interleave2_nxv4f64(<vscale x 2 x double> %vec0, <
   ret  <vscale x 4 x double>   %retval
 }
 
+define <vscale x 4 x bfloat> @interleave2_nxv4bf16(<vscale x 2 x bfloat> %vec0, <vscale x 2 x bfloat> %vec1) {
+; CHECK-LABEL: interleave2_nxv4bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip2 z2.d, z0.d, z1.d
+; CHECK-NEXT:    zip1 z0.d, z0.d, z1.d
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z2.s
+; CHECK-NEXT:    ret
+  %retval = call <vscale x 4 x bfloat> @llvm.vector.interleave2.nxv4bf16(<vscale x 2 x bfloat> %vec0, <vscale x 2 x bfloat> %vec1)
+  ret  <vscale x 4 x bfloat>   %retval
+}
+
+define <vscale x 8 x bfloat> @interleave2_nxv8bf16(<vscale x 4 x bfloat> %vec0, <vscale x 4 x bfloat> %vec1) {
+; CHECK-LABEL: interleave2_nxv8bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip2 z2.s, z0.s, z1.s
+; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z2.h
+; CHECK-NEXT:    ret
+  %retval = call <vscale x 8 x bfloat> @llvm.vector.interleave2.nxv8bf16(<vscale x 4 x bfloat> %vec0, <vscale x 4 x bfloat> %vec1)
+  ret  <vscale x 8 x bfloat>   %retval
+}
+
+define <vscale x 16 x bfloat> @interleave2_nxv16bf16(<vscale x 8 x bfloat> %vec0, <vscale x 8 x bfloat> %vec1) {
+; CHECK-LABEL: interleave2_nxv16bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip1 z2.h, z0.h, z1.h
+; CHECK-NEXT:    zip2 z1.h, z0.h, z1.h
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    ret
+  %retval = call <vscale x 16 x bfloat> @llvm.vector.interleave2.nxv16bf16(<vscale x 8 x bfloat> %vec0, <vscale x 8 x bfloat> %vec1)
+  ret  <vscale x 16 x bfloat>   %retval
+}
+
 ; Integers
 
 define <vscale x 32 x i8> @interleave2_nxv32i8(<vscale x 16 x i8> %vec0, <vscale x 16 x i8> %vec1) {

Copy link
Contributor

@david-arm david-arm left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM!

; CHECK-NEXT: mov z0.d, z2.d
; CHECK-NEXT: ret
%retval = call {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @llvm.vector.deinterleave2.nxv16bf16(<vscale x 16 x bfloat> %vec)
ret {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} %retval
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: There seems to be excess whitespace in %retval and similarly for the other tests in both files. Perhaps remove some whitespace before landing?

Copy link
Collaborator

@huntergr-arm huntergr-arm left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@paulwalker-arm paulwalker-arm merged commit 63caaa2 into llvm:main Feb 27, 2025
11 checks passed
@paulwalker-arm paulwalker-arm deleted the sve-bfloat-lowering branch February 27, 2025 11:05
joaosaffran pushed a commit to joaosaffran/llvm-project that referenced this pull request Mar 3, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants