Skip to content

Commit 6506952

Browse files
committed
[X86][NFC] Precommit test for llvm#136520
1 parent 6ba704a commit 6506952

File tree

1 file changed

+67
-0
lines changed

1 file changed

+67
-0
lines changed

llvm/test/CodeGen/X86/extractelement-load.ll

+67
Original file line numberDiff line numberDiff line change
@@ -528,3 +528,70 @@ define i32 @main() nounwind {
528528
%r = add i32 %e1, %e2
529529
ret i32 %r
530530
}
531+
532+
; A test for incorrect combine for single value extraction from VBROADCAST_LOAD.
533+
; Wrong combine makes the second call (%t8) use the stored result in the
534+
; previous instructions instead of %t4.
535+
declare <2 x float> @ccosf(<2 x float>)
536+
define dso_local <2 x float> @multiuse_of_single_value_from_vbroadcast_load(ptr %p, ptr %arr) nounwind {
537+
; X86-SSE2-LABEL: multiuse_of_single_value_from_vbroadcast_load:
538+
; X86-SSE2: # %bb.0:
539+
; X86-SSE2-NEXT: pushl %esi
540+
; X86-SSE2-NEXT: subl $16, %esp
541+
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
542+
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
543+
; X86-SSE2-NEXT: movups 24(%esi), %xmm0
544+
; X86-SSE2-NEXT: movups %xmm0, (%esp) # 16-byte Spill
545+
; X86-SSE2-NEXT: movhps %xmm0, (%eax)
546+
; X86-SSE2-NEXT: movaps 32(%esi), %xmm0
547+
; X86-SSE2-NEXT: calll ccosf@PLT
548+
; X86-SSE2-NEXT: movlps %xmm0, 32(%esi)
549+
; X86-SSE2-NEXT: movups (%esp), %xmm0 # 16-byte Reload
550+
; X86-SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
551+
; X86-SSE2-NEXT: calll ccosf@PLT
552+
; X86-SSE2-NEXT: addl $16, %esp
553+
; X86-SSE2-NEXT: popl %esi
554+
; X86-SSE2-NEXT: retl
555+
;
556+
; X64-SSSE3-LABEL: multiuse_of_single_value_from_vbroadcast_load:
557+
; X64-SSSE3: # %bb.0:
558+
; X64-SSSE3-NEXT: pushq %rbx
559+
; X64-SSSE3-NEXT: subq $16, %rsp
560+
; X64-SSSE3-NEXT: movq %rsi, %rbx
561+
; X64-SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
562+
; X64-SSSE3-NEXT: movapd %xmm0, (%rsp) # 16-byte Spill
563+
; X64-SSSE3-NEXT: movlpd %xmm0, (%rdi)
564+
; X64-SSSE3-NEXT: movaps 32(%rsi), %xmm0
565+
; X64-SSSE3-NEXT: callq ccosf@PLT
566+
; X64-SSSE3-NEXT: movlps %xmm0, 32(%rbx)
567+
; X64-SSSE3-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
568+
; X64-SSSE3-NEXT: callq ccosf@PLT
569+
; X64-SSSE3-NEXT: addq $16, %rsp
570+
; X64-SSSE3-NEXT: popq %rbx
571+
; X64-SSSE3-NEXT: retq
572+
;
573+
; X64-AVX-LABEL: multiuse_of_single_value_from_vbroadcast_load:
574+
; X64-AVX: # %bb.0:
575+
; X64-AVX-NEXT: pushq %rbx
576+
; X64-AVX-NEXT: movq %rsi, %rbx
577+
; X64-AVX-NEXT: vmovsd 32(%rsi), %xmm0 # xmm0 = mem[0],zero
578+
; X64-AVX-NEXT: vmovsd %xmm0, (%rdi)
579+
; X64-AVX-NEXT: vmovaps 32(%rsi), %xmm0
580+
; X64-AVX-NEXT: callq ccosf@PLT
581+
; X64-AVX-NEXT: vmovlps %xmm0, 32(%rbx)
582+
; X64-AVX-NEXT: vmovddup 32(%rbx), %xmm0 # xmm0 = mem[0,0]
583+
; X64-AVX-NEXT: callq ccosf@PLT
584+
; X64-AVX-NEXT: popq %rbx
585+
; X64-AVX-NEXT: retq
586+
%p1 = getelementptr [5 x <2 x float>], ptr %arr, i64 0, i64 3
587+
%p2 = getelementptr inbounds [5 x <2 x float>], ptr %arr, i64 0, i64 4, i32 0
588+
%t3 = load <4 x float>, ptr %p1, align 8
589+
%t4 = shufflevector <4 x float> %t3, <4 x float> poison, <2 x i32> <i32 2, i32 3>
590+
store <2 x float> %t4, ptr %p, align 16
591+
%t5 = load <4 x float>, ptr %p2, align 32
592+
%t6 = shufflevector <4 x float> %t5, <4 x float> poison, <2 x i32> <i32 0, i32 1>
593+
%t7 = call <2 x float> @ccosf(<2 x float> %t6)
594+
store <2 x float> %t7, ptr %p2, align 32
595+
%t8 = call <2 x float> @ccosf(<2 x float> %t4)
596+
ret <2 x float> %t8
597+
}

0 commit comments

Comments
 (0)