Skip to content

Commit ed866d9

Browse files
authored
[X86][Combine] Ensure single use chain in extract-load combine (#136520)
The problem is that `SrcBC = peekThroughBitcasts(Src)` doesn't ensure single use chain. It results in the situation when a cast may have multiple users and instead of replacing a load we introduce a new one. The situation is worsened by the fact that we've replaced the token from the original load and its correct memory order now is not guaranteed.
1 parent e3eee9e commit ed866d9

File tree

2 files changed

+8
-4
lines changed

2 files changed

+8
-4
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -46268,7 +46268,8 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
4626846268

4626946269
// If we're extracting a single element from a broadcast load and there are
4627046270
// no other users, just create a single load.
46271-
if (SrcBC.getOpcode() == X86ISD::VBROADCAST_LOAD && SrcBC.hasOneUse()) {
46271+
if (peekThroughOneUseBitcasts(Src).getOpcode() == X86ISD::VBROADCAST_LOAD &&
46272+
SrcBC.hasOneUse()) {
4627246273
auto *MemIntr = cast<MemIntrinsicSDNode>(SrcBC);
4627346274
unsigned SrcBCWidth = SrcBC.getScalarValueSizeInBits();
4627446275
if (MemIntr->getMemoryVT().getSizeInBits() == SrcBCWidth &&

llvm/test/CodeGen/X86/extractelement-load.ll

+6-3
Original file line numberDiff line numberDiff line change
@@ -573,14 +573,17 @@ define dso_local <2 x float> @multiuse_of_single_value_from_vbroadcast_load(ptr
573573
; X64-AVX-LABEL: multiuse_of_single_value_from_vbroadcast_load:
574574
; X64-AVX: # %bb.0:
575575
; X64-AVX-NEXT: pushq %rbx
576+
; X64-AVX-NEXT: subq $16, %rsp
576577
; X64-AVX-NEXT: movq %rsi, %rbx
577-
; X64-AVX-NEXT: vmovsd 32(%rsi), %xmm0 # xmm0 = mem[0],zero
578-
; X64-AVX-NEXT: vmovsd %xmm0, (%rdi)
578+
; X64-AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
579+
; X64-AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
580+
; X64-AVX-NEXT: vmovlps %xmm0, (%rdi)
579581
; X64-AVX-NEXT: vmovaps 32(%rsi), %xmm0
580582
; X64-AVX-NEXT: callq ccosf@PLT
581583
; X64-AVX-NEXT: vmovlps %xmm0, 32(%rbx)
582-
; X64-AVX-NEXT: vmovddup 32(%rbx), %xmm0 # xmm0 = mem[0,0]
584+
; X64-AVX-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
583585
; X64-AVX-NEXT: callq ccosf@PLT
586+
; X64-AVX-NEXT: addq $16, %rsp
584587
; X64-AVX-NEXT: popq %rbx
585588
; X64-AVX-NEXT: retq
586589
%p1 = getelementptr [5 x <2 x float>], ptr %arr, i64 0, i64 3

0 commit comments

Comments
 (0)