Skip to content

Commit 9a77af3

Browse files
authored
[X86] lowerV4F64Shuffle - prefer BLEND before UNPCK shuffle matching (#141073)
Use the same matching order as other 128/256-bit shuffles Fixes regression identified in #139741
1 parent aac843c commit 9a77af3

File tree

3 files changed

+43
-17
lines changed

3 files changed

+43
-17
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16444,15 +16444,14 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
1644416444
DAG, Subtarget);
1644516445
}
1644616446

16447-
// Use dedicated unpack instructions for masks that match their pattern.
16448-
if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4f64, V1, V2, Mask, DAG))
16449-
return V;
16450-
1645116447
if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask,
1645216448
Zeroable, Subtarget, DAG))
1645316449
return Blend;
1645416450

16455-
// Check if the blend happens to exactly fit that of SHUFPD.
16451+
// Use dedicated unpack instructions for masks that match their pattern.
16452+
if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4f64, V1, V2, Mask, DAG))
16453+
return V;
16454+
1645616455
if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v4f64, V1, V2, Mask,
1645716456
Zeroable, Subtarget, DAG))
1645816457
return Op;

llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ define void @foo(<2 x float> %0) {
77
; CHECK: # %bb.0: # %entry
88
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
99
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
10-
; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
10+
; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7]
1111
; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0
1212
; CHECK-NEXT: vmovlps %xmm0, 0
1313
; CHECK-NEXT: vzeroupper

llvm/test/CodeGen/X86/subvector-broadcast.ll

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1662,19 +1662,46 @@ define <4 x double> @broadcast_v4f64_v2f64_4u61(ptr %vp, <4 x double> %default)
16621662
ret <4 x double> %res
16631663
}
16641664

1665+
; TODO: prefer vblend vs vunpckh on AVX1 targets
16651666
define <8 x float> @broadcast_v8f32_v2f32_u1uu0uEu(ptr %vp, <8 x float> %default) {
1666-
; X86-LABEL: broadcast_v8f32_v2f32_u1uu0uEu:
1667-
; X86: # %bb.0:
1668-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1669-
; X86-NEXT: vbroadcastsd (%eax), %ymm1
1670-
; X86-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
1671-
; X86-NEXT: retl
1667+
; X86-AVX1-LABEL: broadcast_v8f32_v2f32_u1uu0uEu:
1668+
; X86-AVX1: # %bb.0:
1669+
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax
1670+
; X86-AVX1-NEXT: vbroadcastsd (%eax), %ymm1
1671+
; X86-AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
1672+
; X86-AVX1-NEXT: retl
16721673
;
1673-
; X64-LABEL: broadcast_v8f32_v2f32_u1uu0uEu:
1674-
; X64: # %bb.0:
1675-
; X64-NEXT: vbroadcastsd (%rdi), %ymm1
1676-
; X64-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
1677-
; X64-NEXT: retq
1674+
; X86-AVX2-LABEL: broadcast_v8f32_v2f32_u1uu0uEu:
1675+
; X86-AVX2: # %bb.0:
1676+
; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
1677+
; X86-AVX2-NEXT: vbroadcastsd (%eax), %ymm1
1678+
; X86-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7]
1679+
; X86-AVX2-NEXT: retl
1680+
;
1681+
; X86-AVX512-LABEL: broadcast_v8f32_v2f32_u1uu0uEu:
1682+
; X86-AVX512: # %bb.0:
1683+
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
1684+
; X86-AVX512-NEXT: vbroadcastsd (%eax), %ymm1
1685+
; X86-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7]
1686+
; X86-AVX512-NEXT: retl
1687+
;
1688+
; X64-AVX1-LABEL: broadcast_v8f32_v2f32_u1uu0uEu:
1689+
; X64-AVX1: # %bb.0:
1690+
; X64-AVX1-NEXT: vbroadcastsd (%rdi), %ymm1
1691+
; X64-AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
1692+
; X64-AVX1-NEXT: retq
1693+
;
1694+
; X64-AVX2-LABEL: broadcast_v8f32_v2f32_u1uu0uEu:
1695+
; X64-AVX2: # %bb.0:
1696+
; X64-AVX2-NEXT: vbroadcastsd (%rdi), %ymm1
1697+
; X64-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7]
1698+
; X64-AVX2-NEXT: retq
1699+
;
1700+
; X64-AVX512-LABEL: broadcast_v8f32_v2f32_u1uu0uEu:
1701+
; X64-AVX512: # %bb.0:
1702+
; X64-AVX512-NEXT: vbroadcastsd (%rdi), %ymm1
1703+
; X64-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7]
1704+
; X64-AVX512-NEXT: retq
16781705
%vec = load <2 x float>, ptr %vp
16791706
%shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 undef, i32 1, i32 undef, i32 undef, i32 0, i32 2, i32 3, i32 undef>
16801707
%res = select <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1>, <8 x float> %shuf, <8 x float> %default

0 commit comments

Comments
 (0)