diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f4f37a894620e..e1a6d22f06165 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -13884,7 +13884,6 @@ static SDValue lowerVectorShuffleAsLanePermuteAndPermute( int NumEltsPerLane = NumElts / NumLanes; SmallVector SrcLaneMask(NumLanes, SM_SentinelUndef); - SmallVector LaneMask(NumElts, SM_SentinelUndef); SmallVector PermMask(NumElts, SM_SentinelUndef); for (int i = 0; i != NumElts; ++i) { @@ -13899,10 +13898,20 @@ static SDValue lowerVectorShuffleAsLanePermuteAndPermute( return SDValue(); SrcLaneMask[DstLane] = SrcLane; - LaneMask[i] = (SrcLane * NumEltsPerLane) + (i % NumEltsPerLane); PermMask[i] = (DstLane * NumEltsPerLane) + (M % NumEltsPerLane); } + // Make sure we set all elements of the lane mask, to avoid undef propagation. + SmallVector LaneMask(NumElts, SM_SentinelUndef); + for (int DstLane = 0; DstLane != NumLanes; ++DstLane) { + int SrcLane = SrcLaneMask[DstLane]; + if (0 <= SrcLane) + for (int j = 0; j != NumEltsPerLane; ++j) { + LaneMask[(DstLane * NumEltsPerLane) + j] = + (SrcLane * NumEltsPerLane) + j; + } + } + // If we're only shuffling a single lowest lane and the rest are identity // then don't bother. // TODO - isShuffleMaskInputInPlace could be extended to something like this. diff --git a/llvm/test/CodeGen/X86/pr40730.ll b/llvm/test/CodeGen/X86/pr40730.ll new file mode 100644 index 0000000000000..12b372dea33bb --- /dev/null +++ b/llvm/test/CodeGen/X86/pr40730.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s + +define <8 x i32> @shuffle_v8i32_0dcd3f14(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: shuffle_v8i32_0dcd3f14: +; CHECK: # %bb.0: +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2 +; CHECK-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3] +; CHECK-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,1,0] +; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; CHECK-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,2,3] +; CHECK-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,3,2] +; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5],ymm0[6,7] +; CHECK-NEXT: retq + %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %shuffle +} + +; CHECK: .LCPI1_0: +; CHECK-NEXT: .quad 60129542157 +; CHECK-NEXT: .quad 60129542157 +; CHECK-NEXT: .quad 68719476736 +; CHECK-NEXT: .quad 60129542157 + +define <8 x i32> @shuffle_v8i32_0dcd3f14_constant(<8 x i32> %a0) { +; CHECK-LABEL: shuffle_v8i32_0dcd3f14_constant: +; CHECK: # %bb.0: +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 +; CHECK-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3] +; CHECK-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,1,0] +; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],mem[1,2,3],ymm0[4],mem[5],ymm0[6,7] +; CHECK-NEXT: retq + %res = shufflevector <8 x i32> %a0, <8 x i32> , <8 x i32> + ret <8 x i32> %res +}