Skip to content

Commit 6d3ec56

Browse files
committed
[X86] combineExtractWithShuffle - use combineExtractFromVectorLoad to extract scalar load from shuffled vector load
Improves #85419
1 parent 9f84594 commit 6d3ec56

File tree

4 files changed

+397
-664
lines changed

4 files changed

+397
-664
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44234,6 +44234,12 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
4423444234
if (SDValue V = GetLegalExtract(SrcOp, ExtractVT, ExtractIdx))
4423544235
return DAG.getZExtOrTrunc(V, dl, VT);
4423644236

44237+
if (N->getOpcode() == ISD::EXTRACT_VECTOR_ELT && ExtractVT == SrcVT &&
44238+
SrcOp.getValueType() == SrcVT)
44239+
if (SDValue V =
44240+
combineExtractFromVectorLoad(N, SrcOp, ExtractIdx, dl, DAG, DCI))
44241+
return V;
44242+
4423744243
return SDValue();
4423844244
}
4423944245

llvm/test/CodeGen/X86/extractelement-load.ll

Lines changed: 54 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -10,20 +10,13 @@ define i32 @t(ptr %val) nounwind {
1010
; X86-SSE2-LABEL: t:
1111
; X86-SSE2: # %bb.0:
1212
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
13-
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[2,3,2,3]
14-
; X86-SSE2-NEXT: movd %xmm0, %eax
13+
; X86-SSE2-NEXT: movl 8(%eax), %eax
1514
; X86-SSE2-NEXT: retl
1615
;
17-
; X64-SSSE3-LABEL: t:
18-
; X64-SSSE3: # %bb.0:
19-
; X64-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = mem[2,3,2,3]
20-
; X64-SSSE3-NEXT: movd %xmm0, %eax
21-
; X64-SSSE3-NEXT: retq
22-
;
23-
; X64-AVX-LABEL: t:
24-
; X64-AVX: # %bb.0:
25-
; X64-AVX-NEXT: movl 8(%rdi), %eax
26-
; X64-AVX-NEXT: retq
16+
; X64-LABEL: t:
17+
; X64: # %bb.0:
18+
; X64-NEXT: movl 8(%rdi), %eax
19+
; X64-NEXT: retq
2720
%tmp2 = load <2 x i64>, ptr %val, align 16 ; <<2 x i64>> [#uses=1]
2821
%tmp3 = bitcast <2 x i64> %tmp2 to <4 x i32> ; <<4 x i32>> [#uses=1]
2922
%tmp4 = extractelement <4 x i32> %tmp3, i32 2 ; <i32> [#uses=1]
@@ -286,15 +279,14 @@ entry:
286279
define i32 @PR85419(ptr %p0) {
287280
; X86-SSE2-LABEL: PR85419:
288281
; X86-SSE2: # %bb.0:
289-
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
290-
; X86-SSE2-NEXT: movdqa (%eax), %xmm0
291-
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
292-
; X86-SSE2-NEXT: movd %xmm1, %ecx
293-
; X86-SSE2-NEXT: xorl %edx, %edx
294-
; X86-SSE2-NEXT: orl (%eax), %ecx
295-
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
296-
; X86-SSE2-NEXT: movd %xmm0, %eax
297-
; X86-SSE2-NEXT: cmovel %edx, %eax
282+
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
283+
; X86-SSE2-NEXT: movl (%ecx), %edx
284+
; X86-SSE2-NEXT: xorl %eax, %eax
285+
; X86-SSE2-NEXT: orl 4(%ecx), %edx
286+
; X86-SSE2-NEXT: je .LBB8_2
287+
; X86-SSE2-NEXT: # %bb.1:
288+
; X86-SSE2-NEXT: movl 8(%ecx), %eax
289+
; X86-SSE2-NEXT: .LBB8_2:
298290
; X86-SSE2-NEXT: retl
299291
;
300292
; X64-SSSE3-LABEL: PR85419:
@@ -443,35 +435,35 @@ define i32 @main() nounwind {
443435
; X86-SSE2: # %bb.0:
444436
; X86-SSE2-NEXT: pushl %ebp
445437
; X86-SSE2-NEXT: movl %esp, %ebp
438+
; X86-SSE2-NEXT: pushl %edi
446439
; X86-SSE2-NEXT: pushl %esi
447440
; X86-SSE2-NEXT: andl $-32, %esp
448441
; X86-SSE2-NEXT: subl $64, %esp
449-
; X86-SSE2-NEXT: movdqa zero, %xmm0
450-
; X86-SSE2-NEXT: movaps n1+16, %xmm1
451-
; X86-SSE2-NEXT: movaps n1, %xmm2
452-
; X86-SSE2-NEXT: movaps %xmm2, zero
453-
; X86-SSE2-NEXT: movaps %xmm1, zero+16
454-
; X86-SSE2-NEXT: movaps {{.*#+}} xmm1 = [2,2,2,2]
455-
; X86-SSE2-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
456-
; X86-SSE2-NEXT: movaps %xmm1, (%esp)
457-
; X86-SSE2-NEXT: movdqa (%esp), %xmm1
458-
; X86-SSE2-NEXT: movaps {{[0-9]+}}(%esp), %xmm2
459-
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
460-
; X86-SSE2-NEXT: movd %xmm2, %eax
461-
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
462-
; X86-SSE2-NEXT: movd %xmm2, %ecx
442+
; X86-SSE2-NEXT: movaps n1+16, %xmm0
443+
; X86-SSE2-NEXT: movaps n1, %xmm1
444+
; X86-SSE2-NEXT: movl zero+4, %ecx
445+
; X86-SSE2-NEXT: movl zero+8, %eax
446+
; X86-SSE2-NEXT: movaps %xmm1, zero
447+
; X86-SSE2-NEXT: movaps %xmm0, zero+16
448+
; X86-SSE2-NEXT: movaps {{.*#+}} xmm0 = [2,2,2,2]
449+
; X86-SSE2-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
450+
; X86-SSE2-NEXT: movaps %xmm0, (%esp)
451+
; X86-SSE2-NEXT: movdqa (%esp), %xmm0
452+
; X86-SSE2-NEXT: movaps {{[0-9]+}}(%esp), %xmm1
453+
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
454+
; X86-SSE2-NEXT: movd %xmm1, %esi
463455
; X86-SSE2-NEXT: xorl %edx, %edx
464-
; X86-SSE2-NEXT: divl %ecx
465-
; X86-SSE2-NEXT: movl %eax, %ecx
456+
; X86-SSE2-NEXT: divl %esi
457+
; X86-SSE2-NEXT: movl %eax, %esi
466458
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
467-
; X86-SSE2-NEXT: movd %xmm0, %eax
468-
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
469-
; X86-SSE2-NEXT: movd %xmm0, %esi
459+
; X86-SSE2-NEXT: movd %xmm0, %edi
460+
; X86-SSE2-NEXT: movl %ecx, %eax
470461
; X86-SSE2-NEXT: xorl %edx, %edx
471-
; X86-SSE2-NEXT: divl %esi
472-
; X86-SSE2-NEXT: addl %ecx, %eax
473-
; X86-SSE2-NEXT: leal -4(%ebp), %esp
462+
; X86-SSE2-NEXT: divl %edi
463+
; X86-SSE2-NEXT: addl %esi, %eax
464+
; X86-SSE2-NEXT: leal -8(%ebp), %esp
474465
; X86-SSE2-NEXT: popl %esi
466+
; X86-SSE2-NEXT: popl %edi
475467
; X86-SSE2-NEXT: popl %ebp
476468
; X86-SSE2-NEXT: retl
477469
;
@@ -481,31 +473,29 @@ define i32 @main() nounwind {
481473
; X64-SSSE3-NEXT: movq %rsp, %rbp
482474
; X64-SSSE3-NEXT: andq $-32, %rsp
483475
; X64-SSSE3-NEXT: subq $64, %rsp
484-
; X64-SSSE3-NEXT: movdqa zero(%rip), %xmm0
485476
; X64-SSSE3-NEXT: movq n1@GOTPCREL(%rip), %rax
486-
; X64-SSSE3-NEXT: movaps (%rax), %xmm1
487-
; X64-SSSE3-NEXT: movaps 16(%rax), %xmm2
488-
; X64-SSSE3-NEXT: movaps %xmm1, zero(%rip)
489-
; X64-SSSE3-NEXT: movaps %xmm2, zero+16(%rip)
490-
; X64-SSSE3-NEXT: movaps {{.*#+}} xmm1 = [2,2,2,2]
491-
; X64-SSSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp)
492-
; X64-SSSE3-NEXT: movaps %xmm1, (%rsp)
493-
; X64-SSSE3-NEXT: movdqa (%rsp), %xmm1
494-
; X64-SSSE3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
495-
; X64-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
496-
; X64-SSSE3-NEXT: movd %xmm2, %eax
497-
; X64-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
498-
; X64-SSSE3-NEXT: movd %xmm2, %ecx
477+
; X64-SSSE3-NEXT: movaps (%rax), %xmm0
478+
; X64-SSSE3-NEXT: movaps 16(%rax), %xmm1
479+
; X64-SSSE3-NEXT: movl zero+4(%rip), %ecx
480+
; X64-SSSE3-NEXT: movl zero+8(%rip), %eax
481+
; X64-SSSE3-NEXT: movaps %xmm0, zero(%rip)
482+
; X64-SSSE3-NEXT: movaps %xmm1, zero+16(%rip)
483+
; X64-SSSE3-NEXT: movaps {{.*#+}} xmm0 = [2,2,2,2]
484+
; X64-SSSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
485+
; X64-SSSE3-NEXT: movaps %xmm0, (%rsp)
486+
; X64-SSSE3-NEXT: movdqa (%rsp), %xmm0
487+
; X64-SSSE3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
488+
; X64-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
489+
; X64-SSSE3-NEXT: movd %xmm1, %esi
499490
; X64-SSSE3-NEXT: xorl %edx, %edx
500-
; X64-SSSE3-NEXT: divl %ecx
501-
; X64-SSSE3-NEXT: movl %eax, %ecx
491+
; X64-SSSE3-NEXT: divl %esi
492+
; X64-SSSE3-NEXT: movl %eax, %esi
502493
; X64-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
503-
; X64-SSSE3-NEXT: movd %xmm0, %eax
504-
; X64-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
505-
; X64-SSSE3-NEXT: movd %xmm0, %esi
494+
; X64-SSSE3-NEXT: movd %xmm0, %edi
495+
; X64-SSSE3-NEXT: movl %ecx, %eax
506496
; X64-SSSE3-NEXT: xorl %edx, %edx
507-
; X64-SSSE3-NEXT: divl %esi
508-
; X64-SSSE3-NEXT: addl %ecx, %eax
497+
; X64-SSSE3-NEXT: divl %edi
498+
; X64-SSSE3-NEXT: addl %esi, %eax
509499
; X64-SSSE3-NEXT: movq %rbp, %rsp
510500
; X64-SSSE3-NEXT: popq %rbp
511501
; X64-SSSE3-NEXT: retq

0 commit comments

Comments
 (0)