Skip to content

Commit 9e92f77

Browse files
committed
[DAG] visitEXTRACT_SUBVECTOR - accumulate SimplifyDemandedVectorElts demanded elts across all EXTRACT_SUBVECTOR uses (REAPPLIED)
Similar to what is done for visitEXTRACT_VECTOR_ELT - if all uses of a vector are EXTRACT_SUBVECTOR, then determine the accumulated demanded elts across all users and call SimplifyDemandedVectorElts in "AssumeSingleUse" use. Second try after llvm#133130 was reverted by llvm#133331 due to it affecting reverted test files
1 parent 50d4ae4 commit 9e92f77

10 files changed

+80
-75
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25557,8 +25557,31 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
2555725557
if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
2555825558
return NarrowBOp;
2555925559

25560-
if (SimplifyDemandedVectorElts(SDValue(N, 0)))
25561-
return SDValue(N, 0);
25560+
// If only EXTRACT_SUBVECTOR nodes use the source vector we can
25561+
// simplify it based on the (valid) extractions.
25562+
if (!V.getValueType().isScalableVector() &&
25563+
llvm::all_of(V->users(), [&](SDNode *Use) {
25564+
return Use->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
25565+
Use->getOperand(0) == V;
25566+
})) {
25567+
unsigned NumElts = V.getValueType().getVectorNumElements();
25568+
APInt DemandedElts = APInt::getZero(NumElts);
25569+
for (SDNode *User : V->users()) {
25570+
unsigned ExtIdx = User->getConstantOperandVal(1);
25571+
unsigned NumSubElts = User->getValueType(0).getVectorNumElements();
25572+
DemandedElts.setBits(ExtIdx, ExtIdx + NumSubElts);
25573+
}
25574+
if (SimplifyDemandedVectorElts(V, DemandedElts, /*AssumeSingleUse=*/true)) {
25575+
// We simplified the vector operand of this extract subvector. If this
25576+
// extract is not dead, visit it again so it is folded properly.
25577+
if (N->getOpcode() != ISD::DELETED_NODE)
25578+
AddToWorklist(N);
25579+
return SDValue(N, 0);
25580+
}
25581+
} else {
25582+
if (SimplifyDemandedVectorElts(SDValue(N, 0)))
25583+
return SDValue(N, 0);
25584+
}
2556225585

2556325586
return SDValue();
2556425587
}

llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2573,7 +2573,6 @@ define void @vec384_i8_widen_to_i24_factor3_broadcast_to_v16i24_factor16(ptr %in
25732573
; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
25742574
; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,15,3,4,15,6,7,15,9,10,15,12,13,15]
25752575
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
2576-
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
25772576
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
25782577
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
25792578
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
@@ -2591,7 +2590,6 @@ define void @vec384_i8_widen_to_i24_factor3_broadcast_to_v16i24_factor16(ptr %in
25912590
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
25922591
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,15,3,4,15,6,7,15,9,10,15,12,13,15]
25932592
; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0
2594-
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
25952593
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
25962594
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
25972595
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
@@ -2837,7 +2835,6 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
28372835
; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
28382836
; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
28392837
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
2840-
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
28412838
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
28422839
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
28432840
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
@@ -2855,7 +2852,6 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
28552852
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
28562853
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
28572854
; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0
2858-
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
28592855
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
28602856
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
28612857
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
@@ -3100,7 +3096,6 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
31003096
; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
31013097
; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
31023098
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
3103-
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
31043099
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
31053100
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
31063101
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
@@ -3118,7 +3113,6 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
31183113
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
31193114
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
31203115
; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0
3121-
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
31223116
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
31233117
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
31243118
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
@@ -3614,10 +3608,9 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
36143608
; AVX512F: # %bb.0:
36153609
; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
36163610
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm1
3611+
; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
36173612
; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
36183613
; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0
3619-
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
3620-
; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
36213614
; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2],xmm0[3],xmm1[4,5],xmm0[6],xmm1[7]
36223615
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
36233616
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
@@ -3631,10 +3624,9 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
36313624
; AVX512DQ: # %bb.0:
36323625
; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
36333626
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm1
3627+
; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
36343628
; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
36353629
; AVX512DQ-NEXT: vpbroadcastw %xmm0, %ymm0
3636-
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
3637-
; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
36383630
; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2],xmm0[3],xmm1[4,5],xmm0[6],xmm1[7]
36393631
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
36403632
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
@@ -3868,10 +3860,9 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
38683860
; AVX512F: # %bb.0:
38693861
; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
38703862
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm1
3863+
; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
38713864
; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
38723865
; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0
3873-
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
3874-
; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
38753866
; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5],xmm0[6],xmm1[7]
38763867
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
38773868
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
@@ -3885,10 +3876,9 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
38853876
; AVX512DQ: # %bb.0:
38863877
; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
38873878
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm1
3879+
; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
38883880
; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
38893881
; AVX512DQ-NEXT: vpbroadcastw %xmm0, %ymm0
3890-
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
3891-
; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
38923882
; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5],xmm0[6],xmm1[7]
38933883
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
38943884
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1

llvm/test/CodeGen/X86/pr42905.ll

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,10 @@
44
define <4 x double> @autogen_SD30452(i1 %L230) {
55
; CHECK-LABEL: autogen_SD30452:
66
; CHECK: # %bb.0: # %BB
7-
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [151829,151829]
8-
; CHECK-NEXT: movq %xmm0, %rax
9-
; CHECK-NEXT: cvtsi2sd %rax, %xmm0
10-
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
11-
; CHECK-NEXT: movq %xmm2, %rax
12-
; CHECK-NEXT: xorps %xmm2, %xmm2
13-
; CHECK-NEXT: cvtsi2sd %rax, %xmm2
14-
; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
15-
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
16-
; CHECK-NEXT: cvtdq2pd %xmm1, %xmm1
7+
; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [151829,151829]
8+
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
9+
; CHECK-NEXT: cvtdq2pd %xmm0, %xmm0
10+
; CHECK-NEXT: movaps %xmm0, %xmm1
1711
; CHECK-NEXT: retq
1812
BB:
1913
%I = insertelement <4 x i64> zeroinitializer, i64 151829, i32 3

llvm/test/CodeGen/X86/sad.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -927,8 +927,7 @@ define dso_local i32 @sad_nonloop_64i8(ptr nocapture readonly %p, i64, ptr nocap
927927
; AVX512F-NEXT: vmovdqu 32(%rdi), %ymm1
928928
; AVX512F-NEXT: vpsadbw 32(%rdx), %ymm1, %ymm1
929929
; AVX512F-NEXT: vpsadbw (%rdx), %ymm0, %ymm0
930-
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
931-
; AVX512F-NEXT: vpaddq %zmm1, %zmm0, %zmm0
930+
; AVX512F-NEXT: vpaddq %ymm1, %ymm0, %ymm0
932931
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
933932
; AVX512F-NEXT: vpaddq %xmm1, %xmm0, %xmm0
934933
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]

llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2079,7 +2079,7 @@ define void @store_i16_stride7_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
20792079
; AVX-NEXT: vpsrld $16, %xmm8, %xmm10
20802080
; AVX-NEXT: vpunpckhdq {{.*#+}} xmm10 = xmm3[2],xmm10[2],xmm3[3],xmm10[3]
20812081
; AVX-NEXT: vpunpckhwd {{.*#+}} xmm12 = xmm8[4],xmm3[4],xmm8[5],xmm3[5],xmm8[6],xmm3[6],xmm8[7],xmm3[7]
2082-
; AVX-NEXT: vpshuflw {{.*#+}} xmm12 = xmm12[2,2,2,2,4,5,6,7]
2082+
; AVX-NEXT: vpshufd {{.*#+}} xmm12 = xmm12[1,1,2,3]
20832083
; AVX-NEXT: vpshufhw {{.*#+}} xmm12 = xmm12[0,1,2,3,4,5,5,4]
20842084
; AVX-NEXT: vinsertf128 $1, %xmm10, %ymm12, %ymm10
20852085
; AVX-NEXT: vandnps %ymm10, %ymm6, %ymm6

llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ define float @test_v16f32(<16 x float> %a0) {
170170
; AVX512-LABEL: test_v16f32:
171171
; AVX512: # %bb.0:
172172
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
173-
; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0
173+
; AVX512-NEXT: vmaxps %ymm1, %ymm0, %ymm0
174174
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
175175
; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
176176
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
@@ -264,7 +264,7 @@ define double @test_v8f64(<8 x double> %a0) {
264264
; AVX512-LABEL: test_v8f64:
265265
; AVX512: # %bb.0:
266266
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
267-
; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
267+
; AVX512-NEXT: vminpd %ymm1, %ymm0, %ymm0
268268
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
269269
; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0
270270
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
@@ -306,7 +306,7 @@ define double @test_v16f64(<16 x double> %a0) {
306306
; AVX512: # %bb.0:
307307
; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
308308
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
309-
; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
309+
; AVX512-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
310310
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
311311
; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
312312
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]

llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ define float @test_v16f32(<16 x float> %a0) {
175175
; AVX512-LABEL: test_v16f32:
176176
; AVX512: # %bb.0:
177177
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
178-
; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0
178+
; AVX512-NEXT: vmaxps %ymm1, %ymm0, %ymm0
179179
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
180180
; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
181181
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
@@ -311,7 +311,7 @@ define double @test_v8f64(<8 x double> %a0) {
311311
; AVX512-LABEL: test_v8f64:
312312
; AVX512: # %bb.0:
313313
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
314-
; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
314+
; AVX512-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
315315
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
316316
; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
317317
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
@@ -353,7 +353,7 @@ define double @test_v16f64(<16 x double> %a0) {
353353
; AVX512: # %bb.0:
354354
; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
355355
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
356-
; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
356+
; AVX512-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
357357
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
358358
; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
359359
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]

llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ define float @test_v16f32(<16 x float> %a0) {
216216
; AVX512-LABEL: test_v16f32:
217217
; AVX512: # %bb.0:
218218
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
219-
; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0
219+
; AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0
220220
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
221221
; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0
222222
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
@@ -310,7 +310,7 @@ define double @test_v8f64(<8 x double> %a0) {
310310
; AVX512-LABEL: test_v8f64:
311311
; AVX512: # %bb.0:
312312
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
313-
; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
313+
; AVX512-NEXT: vminpd %ymm1, %ymm0, %ymm0
314314
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
315315
; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0
316316
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
@@ -352,7 +352,7 @@ define double @test_v16f64(<16 x double> %a0) {
352352
; AVX512: # %bb.0:
353353
; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
354354
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
355-
; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
355+
; AVX512-NEXT: vminpd %ymm1, %ymm0, %ymm0
356356
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
357357
; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0
358358
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]

llvm/test/CodeGen/X86/vector-reduce-mul.ll

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -357,14 +357,14 @@ define i64 @test_v8i64(<8 x i64> %a0) {
357357
; AVX512BW-LABEL: test_v8i64:
358358
; AVX512BW: # %bb.0:
359359
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
360-
; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm2
361-
; AVX512BW-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
362-
; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm3
363-
; AVX512BW-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
364-
; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2
365-
; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2
366-
; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
367-
; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
360+
; AVX512BW-NEXT: vpsrlq $32, %ymm0, %ymm2
361+
; AVX512BW-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
362+
; AVX512BW-NEXT: vpsrlq $32, %ymm1, %ymm3
363+
; AVX512BW-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
364+
; AVX512BW-NEXT: vpaddq %ymm2, %ymm3, %ymm2
365+
; AVX512BW-NEXT: vpsllq $32, %ymm2, %ymm2
366+
; AVX512BW-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
367+
; AVX512BW-NEXT: vpaddq %ymm2, %ymm0, %ymm0
368368
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
369369
; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
370370
; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
@@ -390,14 +390,14 @@ define i64 @test_v8i64(<8 x i64> %a0) {
390390
; AVX512BWVL-LABEL: test_v8i64:
391391
; AVX512BWVL: # %bb.0:
392392
; AVX512BWVL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
393-
; AVX512BWVL-NEXT: vpsrlq $32, %zmm0, %zmm2
394-
; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
395-
; AVX512BWVL-NEXT: vpsrlq $32, %zmm1, %zmm3
396-
; AVX512BWVL-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
397-
; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm3, %zmm2
398-
; AVX512BWVL-NEXT: vpsllq $32, %zmm2, %zmm2
399-
; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
400-
; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm0, %zmm0
393+
; AVX512BWVL-NEXT: vpsrlq $32, %ymm0, %ymm2
394+
; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
395+
; AVX512BWVL-NEXT: vpsrlq $32, %ymm1, %ymm3
396+
; AVX512BWVL-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
397+
; AVX512BWVL-NEXT: vpaddq %ymm2, %ymm3, %ymm2
398+
; AVX512BWVL-NEXT: vpsllq $32, %ymm2, %ymm2
399+
; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
400+
; AVX512BWVL-NEXT: vpaddq %ymm2, %ymm0, %ymm0
401401
; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1
402402
; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2
403403
; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
@@ -667,14 +667,14 @@ define i64 @test_v16i64(<16 x i64> %a0) {
667667
; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
668668
; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
669669
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
670-
; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm2
671-
; AVX512BW-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
672-
; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm3
673-
; AVX512BW-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
674-
; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2
675-
; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2
676-
; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
677-
; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
670+
; AVX512BW-NEXT: vpsrlq $32, %ymm0, %ymm2
671+
; AVX512BW-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
672+
; AVX512BW-NEXT: vpsrlq $32, %ymm1, %ymm3
673+
; AVX512BW-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
674+
; AVX512BW-NEXT: vpaddq %ymm2, %ymm3, %ymm2
675+
; AVX512BW-NEXT: vpsllq $32, %ymm2, %ymm2
676+
; AVX512BW-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
677+
; AVX512BW-NEXT: vpaddq %ymm2, %ymm0, %ymm0
678678
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
679679
; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
680680
; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
@@ -708,14 +708,14 @@ define i64 @test_v16i64(<16 x i64> %a0) {
708708
; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
709709
; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm0, %zmm0
710710
; AVX512BWVL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
711-
; AVX512BWVL-NEXT: vpsrlq $32, %zmm0, %zmm2
712-
; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
713-
; AVX512BWVL-NEXT: vpsrlq $32, %zmm1, %zmm3
714-
; AVX512BWVL-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
715-
; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm3, %zmm2
716-
; AVX512BWVL-NEXT: vpsllq $32, %zmm2, %zmm2
717-
; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
718-
; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm0, %zmm0
711+
; AVX512BWVL-NEXT: vpsrlq $32, %ymm0, %ymm2
712+
; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
713+
; AVX512BWVL-NEXT: vpsrlq $32, %ymm1, %ymm3
714+
; AVX512BWVL-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
715+
; AVX512BWVL-NEXT: vpaddq %ymm2, %ymm3, %ymm2
716+
; AVX512BWVL-NEXT: vpsllq $32, %ymm2, %ymm2
717+
; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
718+
; AVX512BWVL-NEXT: vpaddq %ymm2, %ymm0, %ymm0
719719
; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1
720720
; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2
721721
; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2

llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3862,15 +3862,14 @@ define void @vec384_i32_widen_to_i96_factor3_broadcast_to_v4i96_factor4(ptr %in.
38623862
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
38633863
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
38643864
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
3865-
; AVX-NEXT: vbroadcastss (%rdi), %ymm3
3865+
; AVX-NEXT: vbroadcastss (%rdi), %xmm3
38663866
; AVX-NEXT: vpaddb 32(%rsi), %xmm0, %xmm0
3867+
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
38673868
; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5],xmm2[6,7]
38683869
; AVX-NEXT: vpaddb 16(%rsi), %xmm2, %xmm2
3869-
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
3870-
; AVX-NEXT: vmovdqa %xmm1, (%rdx)
38713870
; AVX-NEXT: vmovdqa %xmm2, 16(%rdx)
3871+
; AVX-NEXT: vmovdqa %xmm1, (%rdx)
38723872
; AVX-NEXT: vmovdqa %xmm0, 32(%rdx)
3873-
; AVX-NEXT: vzeroupper
38743873
; AVX-NEXT: retq
38753874
;
38763875
; AVX2-SLOW-LABEL: vec384_i32_widen_to_i96_factor3_broadcast_to_v4i96_factor4:
@@ -4116,7 +4115,7 @@ define void @vec384_i32_widen_to_i192_factor6_broadcast_to_v2i192_factor2(ptr %i
41164115
; AVX: # %bb.0:
41174116
; AVX-NEXT: vmovdqa 48(%rdi), %xmm0
41184117
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = mem[0,1],xmm0[2,3,4,5,6,7]
4119-
; AVX-NEXT: vbroadcastss (%rdi), %ymm1
4118+
; AVX-NEXT: vbroadcastss (%rdi), %xmm1
41204119
; AVX-NEXT: vmovaps 32(%rsi), %ymm2
41214120
; AVX-NEXT: vxorps %xmm3, %xmm3, %xmm3
41224121
; AVX-NEXT: vblendps {{.*#+}} xmm1 = xmm3[0,1],xmm1[2],xmm3[3]

0 commit comments

Comments
 (0)