-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[LoongArch] use TypeWidenVector for most illegal vector types #126456
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-loongarch Author: None (tangaac) Changes
Previous action is Patch is 23.96 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/126456.diff 4 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 2282dc8955613ad..dceb3c682d2df49 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -6570,3 +6570,12 @@ bool LoongArchTargetLowering::shouldAlignPointerArgs(CallInst *CI,
return true;
}
+
+TargetLoweringBase::LegalizeTypeAction
+LoongArchTargetLowering::getPreferredVectorAction(MVT VT) const {
+ if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
+ VT.getVectorElementType() != MVT::i1)
+ return TypeWidenVector;
+
+ return TargetLoweringBase::getPreferredVectorAction(VT);
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index a215ab523874b26..f8d4cef76b9551c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -281,6 +281,7 @@ class LoongArchTargetLowering : public TargetLowering {
Align &PrefAlign) const override;
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const;
+ LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
private:
/// Target-specific function used to lower LoongArch calling conventions.
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll
new file mode 100644
index 000000000000000..84e629825d78992
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll
@@ -0,0 +1,322 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
+
+
+define void @load_sext_2i8_to_2i64(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_sext_2i8_to_2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: ld.h $a0, $a0, 0
+; CHECK-NEXT: pcalau12i $a2, %pc_hi20(.LCPI0_0)
+; CHECK-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI0_0)
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 0
+; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr1, $vr0
+; CHECK-NEXT: vslli.d $vr0, $vr0, 56
+; CHECK-NEXT: vsrai.d $vr0, $vr0, 56
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: ret
+entry:
+ %A = load <2 x i8>, ptr %ptr
+ %B = sext <2 x i8> %A to <2 x i64>
+ store <2 x i64> %B, ptr %dst
+ ret void
+}
+
+define void @load_sext_4i8_to_4i32(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_sext_4i8_to_4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: ld.w $a0, $a0, 0
+; CHECK-NEXT: pcalau12i $a2, %pc_hi20(.LCPI1_0)
+; CHECK-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI1_0)
+; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0
+; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr1, $vr0
+; CHECK-NEXT: vslli.w $vr0, $vr0, 24
+; CHECK-NEXT: vsrai.w $vr0, $vr0, 24
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: ret
+entry:
+ %A = load <4 x i8>, ptr %ptr
+ %B = sext <4 x i8> %A to <4 x i32>
+ store <4 x i32> %B, ptr %dst
+ ret void
+}
+
+define void @load_sext_8i8_to_8i16(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_sext_8i8_to_8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: ld.d $a0, $a0, 0
+; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0
+; CHECK-NEXT: vslli.h $vr0, $vr0, 8
+; CHECK-NEXT: vsrai.h $vr0, $vr0, 8
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: ret
+entry:
+ %A = load <8 x i8>, ptr %ptr
+ %B = sext <8 x i8> %A to <8 x i16>
+ store <8 x i16> %B, ptr %dst
+ ret void
+}
+
+define void @load_sext_2i16_to_2i64(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_sext_2i16_to_2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: ld.w $a0, $a0, 0
+; CHECK-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_0)
+; CHECK-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI3_0)
+; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0
+; CHECK-NEXT: vshuf.h $vr0, $vr0, $vr1
+; CHECK-NEXT: vslli.d $vr0, $vr0, 48
+; CHECK-NEXT: vsrai.d $vr0, $vr0, 48
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: ret
+entry:
+ %A = load <2 x i16>, ptr %ptr
+ %B = sext <2 x i16> %A to <2 x i64>
+ store <2 x i64> %B, ptr %dst
+ ret void
+}
+
+define void @load_sext_4i16_to_4i32(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_sext_4i16_to_4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: ld.d $a0, $a0, 0
+; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
+; CHECK-NEXT: vslli.w $vr0, $vr0, 16
+; CHECK-NEXT: vsrai.w $vr0, $vr0, 16
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: ret
+entry:
+ %A = load <4 x i16>, ptr %ptr
+ %B = sext <4 x i16> %A to <4 x i32>
+ store <4 x i32> %B, ptr %dst
+ ret void
+}
+
+define void @load_sext_2i32_to_2i64(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_sext_2i32_to_2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: ld.d $a0, $a0, 0
+; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 16
+; CHECK-NEXT: vslli.d $vr0, $vr0, 32
+; CHECK-NEXT: vsrai.d $vr0, $vr0, 32
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: ret
+entry:
+ %A = load <2 x i32>, ptr %ptr
+ %B = sext <2 x i32> %A to <2 x i64>
+ store <2 x i64> %B, ptr %dst
+ ret void
+}
+
+define void @load_sext_16i8_to_16i16(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_sext_16i8_to_16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0)
+; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI6_0)
+; CHECK-NEXT: vshuf.b $vr1, $vr0, $vr0, $vr1
+; CHECK-NEXT: vilvl.b $vr1, $vr1, $vr1
+; CHECK-NEXT: vslli.h $vr1, $vr1, 8
+; CHECK-NEXT: vsrai.h $vr1, $vr1, 8
+; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0
+; CHECK-NEXT: vslli.h $vr0, $vr0, 8
+; CHECK-NEXT: vsrai.h $vr0, $vr0, 8
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: vst $vr1, $a1, 16
+; CHECK-NEXT: ret
+entry:
+ %A = load <16 x i8>, ptr %ptr
+ %B = sext <16 x i8> %A to <16 x i16>
+ store <16 x i16> %B, ptr %dst
+ ret void
+}
+
+define void @load_sext_16i8_to_16i32(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_sext_16i8_to_16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0)
+; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI7_0)
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_1)
+; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI7_1)
+; CHECK-NEXT: vshuf.b $vr1, $vr0, $vr0, $vr1
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_2)
+; CHECK-NEXT: vld $vr3, $a0, %pc_lo12(.LCPI7_2)
+; CHECK-NEXT: vshuf.b $vr1, $vr0, $vr1, $vr2
+; CHECK-NEXT: vslli.w $vr1, $vr1, 24
+; CHECK-NEXT: vsrai.w $vr1, $vr1, 24
+; CHECK-NEXT: vshuf.b $vr3, $vr0, $vr0, $vr3
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_3)
+; CHECK-NEXT: vld $vr4, $a0, %pc_lo12(.LCPI7_3)
+; CHECK-NEXT: vshuf.b $vr3, $vr0, $vr3, $vr2
+; CHECK-NEXT: vslli.w $vr3, $vr3, 24
+; CHECK-NEXT: vsrai.w $vr3, $vr3, 24
+; CHECK-NEXT: vshuf.b $vr4, $vr0, $vr0, $vr4
+; CHECK-NEXT: vshuf.b $vr4, $vr0, $vr4, $vr2
+; CHECK-NEXT: vslli.w $vr4, $vr4, 24
+; CHECK-NEXT: vsrai.w $vr4, $vr4, 24
+; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr2
+; CHECK-NEXT: vslli.w $vr0, $vr0, 24
+; CHECK-NEXT: vsrai.w $vr0, $vr0, 24
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: vst $vr4, $a1, 48
+; CHECK-NEXT: vst $vr3, $a1, 32
+; CHECK-NEXT: vst $vr1, $a1, 16
+; CHECK-NEXT: ret
+entry:
+ %A = load <16 x i8>, ptr %ptr
+ %B = sext <16 x i8> %A to <16 x i32>
+ store <16 x i32> %B, ptr %dst
+ ret void
+}
+
+define void @load_sext_16i8_to_16i64(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_sext_16i8_to_16i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_0)
+; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI8_0)
+; CHECK-NEXT: vshuf4i.b $vr2, $vr0, 14
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_1)
+; CHECK-NEXT: vld $vr3, $a0, %pc_lo12(.LCPI8_1)
+; CHECK-NEXT: vshuf.b $vr2, $vr0, $vr2, $vr1
+; CHECK-NEXT: vslli.d $vr2, $vr2, 56
+; CHECK-NEXT: vsrai.d $vr2, $vr2, 56
+; CHECK-NEXT: vshuf.b $vr3, $vr0, $vr0, $vr3
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_2)
+; CHECK-NEXT: vld $vr4, $a0, %pc_lo12(.LCPI8_2)
+; CHECK-NEXT: vshuf.b $vr3, $vr0, $vr3, $vr1
+; CHECK-NEXT: vslli.d $vr3, $vr3, 56
+; CHECK-NEXT: vsrai.d $vr3, $vr3, 56
+; CHECK-NEXT: vshuf.b $vr4, $vr0, $vr0, $vr4
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_3)
+; CHECK-NEXT: vld $vr5, $a0, %pc_lo12(.LCPI8_3)
+; CHECK-NEXT: vshuf.b $vr4, $vr0, $vr4, $vr1
+; CHECK-NEXT: vslli.d $vr4, $vr4, 56
+; CHECK-NEXT: vsrai.d $vr4, $vr4, 56
+; CHECK-NEXT: vshuf.b $vr5, $vr0, $vr0, $vr5
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_4)
+; CHECK-NEXT: vld $vr6, $a0, %pc_lo12(.LCPI8_4)
+; CHECK-NEXT: vshuf.b $vr5, $vr0, $vr5, $vr1
+; CHECK-NEXT: vslli.d $vr5, $vr5, 56
+; CHECK-NEXT: vsrai.d $vr5, $vr5, 56
+; CHECK-NEXT: vshuf.b $vr6, $vr0, $vr0, $vr6
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_5)
+; CHECK-NEXT: vld $vr7, $a0, %pc_lo12(.LCPI8_5)
+; CHECK-NEXT: vshuf.b $vr6, $vr0, $vr6, $vr1
+; CHECK-NEXT: vslli.d $vr6, $vr6, 56
+; CHECK-NEXT: vsrai.d $vr6, $vr6, 56
+; CHECK-NEXT: vshuf.b $vr7, $vr0, $vr0, $vr7
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_6)
+; CHECK-NEXT: vld $vr8, $a0, %pc_lo12(.LCPI8_6)
+; CHECK-NEXT: vshuf.b $vr7, $vr0, $vr7, $vr1
+; CHECK-NEXT: vslli.d $vr7, $vr7, 56
+; CHECK-NEXT: vsrai.d $vr7, $vr7, 56
+; CHECK-NEXT: vshuf.b $vr8, $vr0, $vr0, $vr8
+; CHECK-NEXT: vshuf.b $vr8, $vr0, $vr8, $vr1
+; CHECK-NEXT: vslli.d $vr8, $vr8, 56
+; CHECK-NEXT: vsrai.d $vr8, $vr8, 56
+; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
+; CHECK-NEXT: vslli.d $vr0, $vr0, 56
+; CHECK-NEXT: vsrai.d $vr0, $vr0, 56
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: vst $vr8, $a1, 112
+; CHECK-NEXT: vst $vr7, $a1, 96
+; CHECK-NEXT: vst $vr6, $a1, 80
+; CHECK-NEXT: vst $vr5, $a1, 64
+; CHECK-NEXT: vst $vr4, $a1, 48
+; CHECK-NEXT: vst $vr3, $a1, 32
+; CHECK-NEXT: vst $vr2, $a1, 16
+; CHECK-NEXT: ret
+entry:
+ %A = load <16 x i8>, ptr %ptr
+ %B = sext <16 x i8> %A to <16 x i64>
+ store <16 x i64> %B, ptr %dst
+ ret void
+}
+
+define void @load_sext_8i16_to_8i32(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_sext_8i16_to_8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI9_0)
+; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI9_0)
+; CHECK-NEXT: vshuf.h $vr1, $vr0, $vr0
+; CHECK-NEXT: vilvl.h $vr1, $vr1, $vr1
+; CHECK-NEXT: vslli.w $vr1, $vr1, 16
+; CHECK-NEXT: vsrai.w $vr1, $vr1, 16
+; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
+; CHECK-NEXT: vslli.w $vr0, $vr0, 16
+; CHECK-NEXT: vsrai.w $vr0, $vr0, 16
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: vst $vr1, $a1, 16
+; CHECK-NEXT: ret
+entry:
+ %A = load <8 x i16>, ptr %ptr
+ %B = sext <8 x i16> %A to <8 x i32>
+ store <8 x i32> %B, ptr %dst
+ ret void
+}
+
+define void @load_sext_8i16_to_8i64(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_sext_8i16_to_8i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_0)
+; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI10_0)
+; CHECK-NEXT: vshuf4i.h $vr2, $vr0, 14
+; CHECK-NEXT: vori.b $vr3, $vr1, 0
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_1)
+; CHECK-NEXT: vld $vr4, $a0, %pc_lo12(.LCPI10_1)
+; CHECK-NEXT: vshuf.h $vr3, $vr0, $vr2
+; CHECK-NEXT: vslli.d $vr2, $vr3, 48
+; CHECK-NEXT: vsrai.d $vr2, $vr2, 48
+; CHECK-NEXT: vshuf.h $vr4, $vr0, $vr0
+; CHECK-NEXT: vori.b $vr3, $vr1, 0
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_2)
+; CHECK-NEXT: vld $vr5, $a0, %pc_lo12(.LCPI10_2)
+; CHECK-NEXT: vshuf.h $vr3, $vr0, $vr4
+; CHECK-NEXT: vslli.d $vr3, $vr3, 48
+; CHECK-NEXT: vsrai.d $vr3, $vr3, 48
+; CHECK-NEXT: vshuf.h $vr5, $vr0, $vr0
+; CHECK-NEXT: vori.b $vr4, $vr1, 0
+; CHECK-NEXT: vshuf.h $vr4, $vr0, $vr5
+; CHECK-NEXT: vslli.d $vr4, $vr4, 48
+; CHECK-NEXT: vsrai.d $vr4, $vr4, 48
+; CHECK-NEXT: vshuf.h $vr1, $vr0, $vr0
+; CHECK-NEXT: vslli.d $vr0, $vr1, 48
+; CHECK-NEXT: vsrai.d $vr0, $vr0, 48
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: vst $vr4, $a1, 48
+; CHECK-NEXT: vst $vr3, $a1, 32
+; CHECK-NEXT: vst $vr2, $a1, 16
+; CHECK-NEXT: ret
+entry:
+ %A = load <8 x i16>, ptr %ptr
+ %B = sext <8 x i16> %A to <8 x i64>
+ store <8 x i64> %B, ptr %dst
+ ret void
+}
+
+define void @load_sext_4i32_to_4i64(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_sext_4i32_to_4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 14
+; CHECK-NEXT: vshuf4i.w $vr1, $vr1, 16
+; CHECK-NEXT: vslli.d $vr1, $vr1, 32
+; CHECK-NEXT: vsrai.d $vr1, $vr1, 32
+; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 16
+; CHECK-NEXT: vslli.d $vr0, $vr0, 32
+; CHECK-NEXT: vsrai.d $vr0, $vr0, 32
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: vst $vr1, $a1, 16
+; CHECK-NEXT: ret
+entry:
+ %A = load <4 x i32>, ptr %ptr
+ %B = sext <4 x i32> %A to <4 x i64>
+ store <4 x i64> %B, ptr %dst
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll
new file mode 100644
index 000000000000000..40f102bed97a557
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll
@@ -0,0 +1,301 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
+
+
+define void @load_zext_2i8_to_2i64(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_zext_2i8_to_2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: ld.h $a0, $a0, 0
+; CHECK-NEXT: pcalau12i $a2, %pc_hi20(.LCPI0_0)
+; CHECK-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI0_0)
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 0
+; CHECK-NEXT: vrepli.b $vr2, 0
+; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: ret
+entry:
+ %A = load <2 x i8>, ptr %ptr
+ %B = zext <2 x i8> %A to <2 x i64>
+ store <2 x i64> %B, ptr %dst
+ ret void
+}
+
+define void @load_zext_4i8_to_4i32(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_zext_4i8_to_4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: ld.w $a0, $a0, 0
+; CHECK-NEXT: pcalau12i $a2, %pc_hi20(.LCPI1_0)
+; CHECK-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI1_0)
+; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0
+; CHECK-NEXT: vrepli.b $vr2, 0
+; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: ret
+entry:
+ %A = load <4 x i8>, ptr %ptr
+ %B = zext <4 x i8> %A to <4 x i32>
+ store <4 x i32> %B, ptr %dst
+ ret void
+}
+
+define void @load_zext_8i8_to_8i16(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_zext_8i8_to_8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: ld.d $a0, $a0, 0
+; CHECK-NEXT: pcalau12i $a2, %pc_hi20(.LCPI2_0)
+; CHECK-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI2_0)
+; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0
+; CHECK-NEXT: vrepli.b $vr2, 0
+; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: ret
+entry:
+ %A = load <8 x i8>, ptr %ptr
+ %B = zext <8 x i8> %A to <8 x i16>
+ store <8 x i16> %B, ptr %dst
+ ret void
+}
+
+define void @load_zext_2i16_to_2i64(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_zext_2i16_to_2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: ld.w $a0, $a0, 0
+; CHECK-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_0)
+; CHECK-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI3_0)
+; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0
+; CHECK-NEXT: vrepli.b $vr2, 0
+; CHECK-NEXT: vshuf.h $vr0, $vr1, $vr2
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: ret
+entry:
+ %A = load <2 x i16>, ptr %ptr
+ %B = zext <2 x i16> %A to <2 x i64>
+ store <2 x i64> %B, ptr %dst
+ ret void
+}
+
+define void @load_zext_4i16_to_4i32(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_zext_4i16_to_4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: ld.d $a0, $a0, 0
+; CHECK-NEXT: pcalau12i $a2, %pc_hi20(.LCPI4_0)
+; CHECK-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI4_0)
+; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0
+; CHECK-NEXT: vrepli.b $vr2, 0
+; CHECK-NEXT: vshuf.h $vr0, $vr1, $vr2
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: ret
+entry:
+ %A = load <4 x i16>, ptr %ptr
+ %B = zext <4 x i16> %A to <4 x i32>
+ store <4 x i32> %B, ptr %dst
+ ret void
+}
+
+define void @load_zext_2i32_to_2i64(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_zext_2i32_to_2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: ld.d $a0, $a0, 0
+; CHECK-NEXT: pcalau12i $a2, %pc_hi20(.LCPI5_0)
+; CHECK-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI5_0)
+; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0
+; CHECK-NEXT: vrepli.b $vr2, 0
+; CHECK-NEXT: vshuf.w $vr0, $vr1, $vr2
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: ret
+entry:
+ %A = load <2 x i32>, ptr %ptr
+ %B = zext <2 x i32> %A to <2 x i64>
+ store <2 x i64> %B, ptr %dst
+ ret void
+}
+
+define void @load_zext_16i8_to_16i16(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_zext_16i8_to_16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0)
+; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI6_0)
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_1)
+; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI6_1)
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_2)
+; CHECK-NEXT: vld $vr3, $a0, %pc_lo12(.LCPI6_2)
+; CHECK-NEXT: vshuf.b $vr1, $vr0, $vr0, $vr1
+; CHECK-NEXT: vrepli.b $vr4, 0
+; CHECK-NEXT: vshuf.b $vr1, $vr4, $vr1, $vr2
+; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr4, $vr3
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: vst $vr1, $a1, 16
+; CHECK-NEXT: ret
+entry:
+ %A = load <16 x i8>, ptr %ptr
+ %B = zext <16 x i8> %A to <16 x i16>
+ store <16 x i16> %B, ptr %dst
+ ret void
+}
+
+define void @load_zext_16i8_to_16i32(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_zext_16i8_to_16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0)
+; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI7_0)
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_1)
+; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI7_1)
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_2)
+; CHECK-NEXT: vld $vr3, $a0, %pc_lo12(.LCPI7_2)
+; CHECK-NEXT: vshuf.b $vr1, $vr0, $vr0, $vr1
+; CHECK-NEXT: vrepli.b $vr4, 0
+; CHECK-NEXT: vshuf.b $vr1, $vr4, $vr1, $vr2
+; CHECK-NEXT: vshuf.b $vr3, $vr0, $vr0, $vr3
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_3)
+; CHECK-NEXT: vld $vr5, $a0, %pc_lo12(.LCPI7_3)
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_4)
+; CHECK-NEXT: vld $vr6, $a0, %pc_lo12(.LCPI7_4)
+; CHECK-NEXT: vshuf.b $vr3, $vr4, $vr3, $vr2
+; CHECK-NEXT: vshuf.b $vr5, $vr0, $vr0, $vr5
+; CHECK-NEXT: vshuf.b $vr2, $vr4, $vr5, $vr2
+; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr4, $vr6
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: vst $vr2, $a1, 48
+; CHECK-NEXT: vst $vr3, $a1, 32
+; CHECK-NEXT: vst $vr1, $a1, 16
+; CHECK-NEXT: ret
+entry:
+ %A = load <16 x i8>, ptr %ptr
+ %B = zext <16 x i8> %A to <16 x i32>
+ store <16 x i32> %B, ptr %dst
+ ret void
+}
+
+define void @load_zext_16i8_to_16i64(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_zext_16i8_to_16i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_0)
+; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI8_0)
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_1)
+; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI8_1)
+; CHECK-NEXT: vshuf4i.b $vr3, $vr0, 14
+; CHECK-NEXT: vrepli.b $vr4, 0
+; CHECK-NEXT: vshuf.b $vr3, $vr4, $vr3, $vr1
+; CHECK-NEXT: vshuf.b $vr2, $vr0, $vr0, $vr2
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_2)
+; CHECK-NEXT: vld $vr5, $a0, %pc_lo12(.LCPI8_2)
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_3)
+; CHECK-NEXT: vld $vr6, $a0, %pc_lo12(.LCPI8_3)
+; CHECK-NEXT: vshuf.b $vr2, $vr4, $vr2, $vr1
+; CHECK-NEXT: vshuf.b $vr5, $vr0, $vr0, $vr5
+; CHECK-NEXT: vshuf.b $vr5, $vr4, $vr5, $vr1
+; CHECK-NEXT: vshuf.b $vr6, $vr0, $vr0, $vr6
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_4)
+; CHECK-NEXT: vld $vr7, $a0, %pc_lo12(.LCPI8_4)
+; CHECK-NE...
[truncated]
|
8d03631
to
ea080fb
Compare
ping |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be even better if you could pre-commit the test cases so that we can easily observe the differences in code generation.
I will do it in a new pr soon. |
ea080fb
to
840a177
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Thanks.
…26456) `TypeWidenVector` makes an illegal vector a larger one e.g. in lsx v2i32 -> v4i32 v4i16 -> v8i16 With this we can make good use of `vilvh`, `vilvl` instructions in vector `sext`, `zext` in later pr. Previous action is `TypePromoteInteger`, which replaces integer with a larger one e.g. in lsx v2i32 -> v2i64 v4i16 -> v4i32
…r lasx (#137129) Prvious `fp_to_uint/fp_to_sint` patterns for `v4f64 -> v4i32` are wrong. Conversion error was triggered after pr llvm/llvm-project#126456.
…m#137129) Prvious `fp_to_uint/fp_to_sint` patterns for `v4f64 -> v4i32` are wrong. Conversion error was triggered after pr llvm#126456.
…m#137129) Prvious `fp_to_uint/fp_to_sint` patterns for `v4f64 -> v4i32` are wrong. Conversion error was triggered after pr llvm#126456. (cherry picked from commit b5c7724)
…r lasx (#137129) Prvious `fp_to_uint/fp_to_sint` patterns for `v4f64 -> v4i32` are wrong. Conversion error was triggered after pr llvm/llvm-project#126456. (cherry picked from commit b5c7724)
TypeWidenVector
makes an illegal vector a larger onee.g. in lsx
v2i32 -> v4i32
v4i16 -> v8i16
With this we can make good use of
vilvh
,vilvl
instructions in vectorsext
,zext
in later pr.Previous action is
TypePromoteInteger
, which replaces integer with a larger onee.g. in lsx
v2i32 -> v2i64
v4i16 -> v4i32