From 81e63f9e0c4b86ca1a00be7aeeffb1519a74226e Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 20 Dec 2024 09:55:11 +0000 Subject: [PATCH] [CostModel][X86] getShuffleCost - use processShuffleMasks to split SK_PermuteTwoSrc shuffles to legal types (#120599) processShuffleMasks can now correctly handle 2 src shuffles, so we can use the existing SK_PermuteSingleSrc splitting cost logic to handle SK_PermuteTwoSrc as well and correctly recognise the number of active subvectors per legalised shuffle. --- .../lib/Target/X86/X86TargetTransformInfo.cpp | 11 +- .../X86/shuffle-concat_subvector-codesize.ll | 108 ++++----- .../X86/shuffle-concat_subvector-latency.ll | 108 ++++----- .../shuffle-concat_subvector-sizelatency.ll | 108 ++++----- .../CostModel/X86/shuffle-concat_subvector.ll | 108 ++++----- .../X86/shuffle-insert_subvector-codesize.ll | 152 ++++++------- .../X86/shuffle-insert_subvector-latency.ll | 152 ++++++------- .../shuffle-insert_subvector-sizelatency.ll | 152 ++++++------- .../CostModel/X86/shuffle-insert_subvector.ll | 152 ++++++------- .../CostModel/X86/shuffle-two-src-codesize.ll | 210 +++++++++++------- .../X86/shuffle-two-src-fp16-codesize.ll | 2 +- .../X86/shuffle-two-src-fp16-latency.ll | 2 +- .../X86/shuffle-two-src-fp16-sizelatency.ll | 2 +- .../CostModel/X86/shuffle-two-src-fp16.ll | 2 +- .../CostModel/X86/shuffle-two-src-latency.ll | 210 +++++++++++------- .../X86/shuffle-two-src-sizelatency.ll | 210 +++++++++++------- .../Analysis/CostModel/X86/shuffle-two-src.ll | 210 +++++++++++------- .../Transforms/PhaseOrdering/X86/pr94546.ll | 39 ++-- .../SLPVectorizer/X86/horizontal-minmax.ll | 19 +- .../X86/minbitwidth-transformed-operand.ll | 7 +- .../VectorCombine/X86/shuffle-of-casts.ll | 16 +- .../VectorCombine/X86/shuffle-of-shuffles.ll | 27 ++- 22 files changed, 1073 insertions(+), 934 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 54c9998c0ead2..808f48eb92a61 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1698,7 +1698,8 @@ InstructionCost X86TTIImpl::getShuffleCost( // We are going to permute multiple sources and the result will be in multiple // destinations. Providing an accurate cost only for splits where the element // type remains the same. - if (Kind == TTI::SK_PermuteSingleSrc && LT.first != 1) { + if ((Kind == TTI::SK_PermuteSingleSrc || Kind == TTI::SK_PermuteTwoSrc) && + LT.first != 1) { MVT LegalVT = LT.second; if (LegalVT.isVector() && LegalVT.getVectorElementType().getSizeInBits() == @@ -1784,14 +1785,6 @@ InstructionCost X86TTIImpl::getShuffleCost( return BaseT::getShuffleCost(Kind, BaseTp, Mask, CostKind, Index, SubTp); } - // For 2-input shuffles, we must account for splitting the 2 inputs into many. - if (Kind == TTI::SK_PermuteTwoSrc && !IsInLaneShuffle && LT.first != 1) { - // We assume that source and destination have the same vector type. - InstructionCost NumOfDests = LT.first; - InstructionCost NumOfShufflesPerDest = LT.first * 2 - 1; - LT.first = NumOfDests * NumOfShufflesPerDest; - } - static const CostTblEntry AVX512VBMIShuffleTbl[] = { {TTI::SK_Reverse, MVT::v64i8, 1}, // vpermb {TTI::SK_Reverse, MVT::v32i8, 1}, // vpermb diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-codesize.ll index c78023e24572c..176a794ea666f 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-codesize.ll @@ -2,15 +2,15 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse2 | FileCheck %s -check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s -check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s -check-prefixes=AVX ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=slm | FileCheck %s --check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=goldmont | FileCheck %s --check-prefixes=SSE -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=btver2 | FileCheck %s --check-prefixes=AVX ; ; Verify the cost model for concat_subvector style shuffles. @@ -19,14 +19,14 @@ define void @test_vXf64(<2 x double> %a128, <4 x double> %a256, <8 x double> %a512, <2 x double> %b128, <4 x double> %b256, <8 x double> %b512) { ; SSE-LABEL: 'test_vXf64' ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <4 x double> %a256, <4 x double> %b256, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <8 x double> %a512, <8 x double> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX-LABEL: 'test_vXf64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <4 x double> %a256, <4 x double> %b256, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <8 x double> %a512, <8 x double> %b512, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -48,14 +48,14 @@ define void @test_vXf64(<2 x double> %a128, <4 x double> %a256, <8 x double> %a5 define void @test_vXi64(<2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSE-LABEL: 'test_vXi64' ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX-LABEL: 'test_vXi64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -77,24 +77,17 @@ define void @test_vXi64(<2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, <2 x define void @test_vXf32(<4 x float> %a128, <8 x float> %a256, <16 x float> %a512, <4 x float> %b128, <8 x float> %b256, <16 x float> %b512) { ; SSE-LABEL: 'test_vXf32' ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX1-LABEL: 'test_vXf32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX2-LABEL: 'test_vXf32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX-LABEL: 'test_vXf32' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> @@ -113,24 +106,17 @@ define void @test_vXf32(<4 x float> %a128, <8 x float> %a256, <16 x float> %a512 define void @test_vXi32(<4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSE-LABEL: 'test_vXi32' ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX1-LABEL: 'test_vXi32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX2-LABEL: 'test_vXi32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX-LABEL: 'test_vXi32' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> @@ -147,19 +133,19 @@ define void @test_vXi32(<4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, <4 x } define void @test_vXi16(<8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { -; AVX1-LABEL: 'test_vXi16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX2-LABEL: 'test_vXi16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; SSE-LABEL: 'test_vXi16' +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi16' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'test_vXi16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> @@ -190,19 +176,19 @@ define void @test_vXi16(<8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, <8 } define void @test_vXi8(<16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { -; AVX1-LABEL: 'test_vXi8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX2-LABEL: 'test_vXi8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; SSE-LABEL: 'test_vXi8' +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi8' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'test_vXi8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-latency.ll index 78aaf6e15cd2d..a99b1f0d7dbe8 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-latency.ll @@ -2,15 +2,15 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s -check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=slm | FileCheck %s --check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX ; ; Verify the cost model for concat_subvector style shuffles. @@ -19,14 +19,14 @@ define void @test_vXf64(<2 x double> %a128, <4 x double> %a256, <8 x double> %a512, <2 x double> %b128, <4 x double> %b256, <8 x double> %b512) { ; SSE-LABEL: 'test_vXf64' ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <4 x double> %a256, <4 x double> %b256, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <8 x double> %a512, <8 x double> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX-LABEL: 'test_vXf64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <4 x double> %a256, <4 x double> %b256, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <8 x double> %a512, <8 x double> %b512, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -48,14 +48,14 @@ define void @test_vXf64(<2 x double> %a128, <4 x double> %a256, <8 x double> %a5 define void @test_vXi64(<2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSE-LABEL: 'test_vXi64' ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX-LABEL: 'test_vXi64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -77,24 +77,17 @@ define void @test_vXi64(<2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, <2 x define void @test_vXf32(<4 x float> %a128, <8 x float> %a256, <16 x float> %a512, <4 x float> %b128, <8 x float> %b256, <16 x float> %b512) { ; SSE-LABEL: 'test_vXf32' ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX1-LABEL: 'test_vXf32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX2-LABEL: 'test_vXf32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX-LABEL: 'test_vXf32' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> @@ -113,24 +106,17 @@ define void @test_vXf32(<4 x float> %a128, <8 x float> %a256, <16 x float> %a512 define void @test_vXi32(<4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSE-LABEL: 'test_vXi32' ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX1-LABEL: 'test_vXi32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX2-LABEL: 'test_vXi32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX-LABEL: 'test_vXi32' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> @@ -147,19 +133,19 @@ define void @test_vXi32(<4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, <4 x } define void @test_vXi16(<8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { -; AVX1-LABEL: 'test_vXi16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX2-LABEL: 'test_vXi16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; SSE-LABEL: 'test_vXi16' +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi16' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'test_vXi16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> @@ -190,19 +176,19 @@ define void @test_vXi16(<8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, <8 } define void @test_vXi8(<16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { -; AVX1-LABEL: 'test_vXi8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX2-LABEL: 'test_vXi8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; SSE-LABEL: 'test_vXi8' +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi8' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'test_vXi8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-sizelatency.ll index 4edfa8c1384d4..85996551710ee 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-sizelatency.ll @@ -2,15 +2,15 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s -check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=slm | FileCheck %s --check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX ; ; Verify the cost model for concat_subvector style shuffles. @@ -19,14 +19,14 @@ define void @test_vXf64(<2 x double> %a128, <4 x double> %a256, <8 x double> %a512, <2 x double> %b128, <4 x double> %b256, <8 x double> %b512) { ; SSE-LABEL: 'test_vXf64' ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <4 x double> %a256, <4 x double> %b256, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <8 x double> %a512, <8 x double> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX-LABEL: 'test_vXf64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <4 x double> %a256, <4 x double> %b256, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <8 x double> %a512, <8 x double> %b512, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -48,14 +48,14 @@ define void @test_vXf64(<2 x double> %a128, <4 x double> %a256, <8 x double> %a5 define void @test_vXi64(<2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSE-LABEL: 'test_vXi64' ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX-LABEL: 'test_vXi64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -77,24 +77,17 @@ define void @test_vXi64(<2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, <2 x define void @test_vXf32(<4 x float> %a128, <8 x float> %a256, <16 x float> %a512, <4 x float> %b128, <8 x float> %b256, <16 x float> %b512) { ; SSE-LABEL: 'test_vXf32' ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX1-LABEL: 'test_vXf32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX2-LABEL: 'test_vXf32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX-LABEL: 'test_vXf32' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> @@ -113,24 +106,17 @@ define void @test_vXf32(<4 x float> %a128, <8 x float> %a256, <16 x float> %a512 define void @test_vXi32(<4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSE-LABEL: 'test_vXi32' ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX1-LABEL: 'test_vXi32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX2-LABEL: 'test_vXi32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX-LABEL: 'test_vXi32' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> @@ -147,19 +133,19 @@ define void @test_vXi32(<4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, <4 x } define void @test_vXi16(<8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { -; AVX1-LABEL: 'test_vXi16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX2-LABEL: 'test_vXi16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; SSE-LABEL: 'test_vXi16' +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi16' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'test_vXi16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> @@ -190,19 +176,19 @@ define void @test_vXi16(<8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, <8 } define void @test_vXi8(<16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { -; AVX1-LABEL: 'test_vXi8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX2-LABEL: 'test_vXi8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; SSE-LABEL: 'test_vXi8' +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi8' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'test_vXi8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector.ll b/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector.ll index 3d5c2cfb2143d..d530d11432d96 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector.ll @@ -2,15 +2,15 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+sse2 | FileCheck %s -check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx | FileCheck %s -check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx2 | FileCheck %s -check-prefixes=AVX ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mcpu=slm | FileCheck %s --check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mcpu=goldmont | FileCheck %s --check-prefixes=SSE -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mcpu=btver2 | FileCheck %s --check-prefixes=AVX ; ; Verify the cost model for concat_subvector style shuffles. @@ -19,14 +19,14 @@ define void @test_vXf64(<2 x double> %a128, <4 x double> %a256, <8 x double> %a512, <2 x double> %b128, <4 x double> %b256, <8 x double> %b512) { ; SSE-LABEL: 'test_vXf64' ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <4 x double> %a256, <4 x double> %b256, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <8 x double> %a512, <8 x double> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_vXf64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <4 x double> %a256, <4 x double> %b256, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <8 x double> %a512, <8 x double> %b512, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -48,14 +48,14 @@ define void @test_vXf64(<2 x double> %a128, <4 x double> %a256, <8 x double> %a5 define void @test_vXi64(<2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSE-LABEL: 'test_vXi64' ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_vXi64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -77,24 +77,17 @@ define void @test_vXi64(<2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, <2 x define void @test_vXf32(<4 x float> %a128, <8 x float> %a256, <16 x float> %a512, <4 x float> %b128, <8 x float> %b256, <16 x float> %b512) { ; SSE-LABEL: 'test_vXf32' ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX1-LABEL: 'test_vXf32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; AVX2-LABEL: 'test_vXf32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX-LABEL: 'test_vXf32' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> @@ -113,24 +106,17 @@ define void @test_vXf32(<4 x float> %a128, <8 x float> %a256, <16 x float> %a512 define void @test_vXi32(<4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSE-LABEL: 'test_vXi32' ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX1-LABEL: 'test_vXi32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; AVX2-LABEL: 'test_vXi32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX-LABEL: 'test_vXi32' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> @@ -147,19 +133,19 @@ define void @test_vXi32(<4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, <4 x } define void @test_vXi16(<8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { -; AVX1-LABEL: 'test_vXi16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; AVX2-LABEL: 'test_vXi16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SSE-LABEL: 'test_vXi16' +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'test_vXi16' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'test_vXi16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> @@ -190,19 +176,19 @@ define void @test_vXi16(<8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, <8 } define void @test_vXi8(<16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { -; AVX1-LABEL: 'test_vXi8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; AVX2-LABEL: 'test_vXi8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SSE-LABEL: 'test_vXi8' +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'test_vXi8' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'test_vXi8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-codesize.ll index 57f72056f9c8d..4e4235198b45e 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-codesize.ll @@ -143,19 +143,19 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> @@ -174,19 +174,19 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> @@ -205,19 +205,19 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> @@ -242,13 +242,13 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> @@ -273,13 +273,13 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> @@ -364,19 +364,19 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> @@ -395,19 +395,19 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> @@ -426,19 +426,19 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> @@ -463,13 +463,13 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> @@ -494,13 +494,13 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> @@ -595,17 +595,17 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -630,17 +630,17 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -665,17 +665,17 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-latency.ll index ae91a70f67aee..61978badb34d4 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-latency.ll @@ -143,19 +143,19 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> @@ -174,19 +174,19 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> @@ -205,19 +205,19 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> @@ -242,13 +242,13 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> @@ -273,13 +273,13 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> @@ -364,19 +364,19 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> @@ -395,19 +395,19 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> @@ -426,19 +426,19 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> @@ -463,13 +463,13 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> @@ -494,13 +494,13 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> @@ -595,17 +595,17 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -630,17 +630,17 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -665,17 +665,17 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-sizelatency.ll index c84b2847cf3e9..94e3bc3a610bd 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-sizelatency.ll @@ -143,19 +143,19 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> @@ -174,19 +174,19 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> @@ -205,19 +205,19 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> @@ -242,13 +242,13 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> @@ -273,13 +273,13 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> @@ -364,19 +364,19 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> @@ -395,19 +395,19 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> @@ -426,19 +426,19 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> @@ -463,13 +463,13 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> @@ -494,13 +494,13 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> @@ -595,17 +595,17 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -630,17 +630,17 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -665,17 +665,17 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll index c4cbd6141d1f0..fe3e61d23397d 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll @@ -143,19 +143,19 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> @@ -174,19 +174,19 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> @@ -205,19 +205,19 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> @@ -242,13 +242,13 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> @@ -273,13 +273,13 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> @@ -364,19 +364,19 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> @@ -395,19 +395,19 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> @@ -426,19 +426,19 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> @@ -463,13 +463,13 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> @@ -494,13 +494,13 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> @@ -595,17 +595,17 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -630,17 +630,17 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -665,17 +665,17 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-two-src-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-two-src-codesize.ll index 6b2029e71a6e1..027af628ea322 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-two-src-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-two-src-codesize.ll @@ -3,15 +3,15 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+xop | FileCheck %s -check-prefixes=XOP -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s -check-prefixes=AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s -check-prefixes=AVX2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=btver2 | FileCheck %s --check-prefixes=AVX1 ; ; Verify the cost model for 2 src shuffles @@ -20,30 +20,37 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512, <16 x double> %src1024, <2 x double> %src128_1, <4 x double> %src256_1, <8 x double> %src512_1, <16 x double> %src1024_1) { ; SSE-LABEL: 'test_vXf64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'test_vXf64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX-LABEL: 'test_vXf64' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXf64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> @@ -56,30 +63,37 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512, <16 x i64> %src1024, <2 x i64> %src128_1, <4 x i64> %src256_1, <8 x i64> %src512_1, <16 x i64> %src1024_1) { ; SSE-LABEL: 'test_vXi64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'test_vXi64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX-LABEL: 'test_vXi64' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXi64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> @@ -90,36 +104,52 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512, } define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512, <32 x float> %src1024, <2 x float> %src64_1, <4 x float> %src128_1, <8 x float> %src256_1, <16 x float> %src512_1, <32 x float> %src1024_1) { -; SSE-LABEL: 'test_vXf32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; SSE2-LABEL: 'test_vXf32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXf32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXf32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'test_vXf32' ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXf32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXf32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf32' @@ -127,7 +157,7 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> @@ -139,36 +169,52 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr } define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512, <32 x i32> %src1024, <2 x i32> %src64_1, <4 x i32> %src128_1, <8 x i32> %src256_1, <16 x i32> %src512_1, <32 x i32> %src1024_1) { -; SSE-LABEL: 'test_vXi32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; SSE2-LABEL: 'test_vXi32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'test_vXi32' ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi32' @@ -176,7 +222,7 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> @@ -192,27 +238,27 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi16' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 360 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 360 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'test_vXi16' @@ -220,8 +266,8 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi16' @@ -229,8 +275,8 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi16' @@ -238,8 +284,8 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'test_vXi16' @@ -248,7 +294,7 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'test_vXi16' @@ -257,7 +303,7 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI-LABEL: 'test_vXi16' @@ -266,7 +312,7 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> @@ -284,8 +330,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 364 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi8' @@ -293,8 +339,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi8' @@ -302,8 +348,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'test_vXi8' @@ -312,7 +358,7 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi8' @@ -321,7 +367,7 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi8' @@ -330,7 +376,7 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'test_vXi8' diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-two-src-fp16-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-two-src-fp16-codesize.ll index ae05855337b6c..dc7e8292c6eef 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-two-src-fp16-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-two-src-fp16-codesize.ll @@ -6,7 +6,7 @@ define void @test_vXf16(<8 x half> %src128, <16 x half> %src256, <32 x half> %sr ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> %src128_1, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> %src256_1, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> %src512_1, <32 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x half> %src1024, <64 x half> %src1024_1, <64 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <64 x half> %src1024, <64 x half> %src1024_1, <64 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V128 = shufflevector <8 x half> %src128, <8 x half> %src128_1, <8 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-two-src-fp16-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-two-src-fp16-latency.ll index ca13511c32707..8ad1fa46f59e6 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-two-src-fp16-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-two-src-fp16-latency.ll @@ -6,7 +6,7 @@ define void @test_vXf16(<8 x half> %src128, <16 x half> %src256, <32 x half> %sr ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> %src128_1, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> %src256_1, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> %src512_1, <32 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x half> %src1024, <64 x half> %src1024_1, <64 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <64 x half> %src1024, <64 x half> %src1024_1, <64 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V128 = shufflevector <8 x half> %src128, <8 x half> %src128_1, <8 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-two-src-fp16-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-two-src-fp16-sizelatency.ll index 5312e8a87930e..dc7f4c04d0f35 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-two-src-fp16-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-two-src-fp16-sizelatency.ll @@ -6,7 +6,7 @@ define void @test_vXf16(<8 x half> %src128, <16 x half> %src256, <32 x half> %sr ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> %src128_1, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> %src256_1, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> %src512_1, <32 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x half> %src1024, <64 x half> %src1024_1, <64 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <64 x half> %src1024, <64 x half> %src1024_1, <64 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V128 = shufflevector <8 x half> %src128, <8 x half> %src128_1, <8 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-two-src-fp16.ll b/llvm/test/Analysis/CostModel/X86/shuffle-two-src-fp16.ll index 9e444b6888a49..8ed785453d4e0 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-two-src-fp16.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-two-src-fp16.ll @@ -6,7 +6,7 @@ define void @test_vXf16(<8 x half> %src128, <16 x half> %src256, <32 x half> %sr ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> %src128_1, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> %src256_1, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> %src512_1, <32 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x half> %src1024, <64 x half> %src1024_1, <64 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <64 x half> %src1024, <64 x half> %src1024_1, <64 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V128 = shufflevector <8 x half> %src128, <8 x half> %src128_1, <8 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-two-src-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-two-src-latency.ll index 1c07247914263..f9f045f3a172b 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-two-src-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-two-src-latency.ll @@ -3,15 +3,15 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+xop | FileCheck %s -check-prefixes=XOP -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s -check-prefixes=AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX1 ; ; Verify the cost model for 2 src shuffles @@ -20,30 +20,37 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512, <16 x double> %src1024, <2 x double> %src128_1, <4 x double> %src256_1, <8 x double> %src512_1, <16 x double> %src1024_1) { ; SSE-LABEL: 'test_vXf64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'test_vXf64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX-LABEL: 'test_vXf64' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXf64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> @@ -56,30 +63,37 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512, <16 x i64> %src1024, <2 x i64> %src128_1, <4 x i64> %src256_1, <8 x i64> %src512_1, <16 x i64> %src1024_1) { ; SSE-LABEL: 'test_vXi64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'test_vXi64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX-LABEL: 'test_vXi64' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXi64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> @@ -90,36 +104,52 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512, } define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512, <32 x float> %src1024, <2 x float> %src64_1, <4 x float> %src128_1, <8 x float> %src256_1, <16 x float> %src512_1, <32 x float> %src1024_1) { -; SSE-LABEL: 'test_vXf32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; SSE2-LABEL: 'test_vXf32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXf32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXf32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'test_vXf32' ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXf32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXf32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf32' @@ -127,7 +157,7 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> @@ -139,36 +169,52 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr } define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512, <32 x i32> %src1024, <2 x i32> %src64_1, <4 x i32> %src128_1, <8 x i32> %src256_1, <16 x i32> %src512_1, <32 x i32> %src1024_1) { -; SSE-LABEL: 'test_vXi32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; SSE2-LABEL: 'test_vXi32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'test_vXi32' ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi32' @@ -176,7 +222,7 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> @@ -192,27 +238,27 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi16' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 360 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 360 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'test_vXi16' @@ -220,8 +266,8 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi16' @@ -229,8 +275,8 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi16' @@ -238,8 +284,8 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'test_vXi16' @@ -248,7 +294,7 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'test_vXi16' @@ -257,7 +303,7 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI-LABEL: 'test_vXi16' @@ -266,7 +312,7 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> @@ -284,8 +330,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 364 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi8' @@ -293,8 +339,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi8' @@ -302,8 +348,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'test_vXi8' @@ -312,7 +358,7 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi8' @@ -321,7 +367,7 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi8' @@ -330,7 +376,7 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'test_vXi8' diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-two-src-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-two-src-sizelatency.ll index 8f40881800687..76690afecabdd 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-two-src-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-two-src-sizelatency.ll @@ -3,15 +3,15 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+xop | FileCheck %s -check-prefixes=XOP -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s -check-prefixes=AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX1 ; ; Verify the cost model for 2 src shuffles @@ -20,30 +20,37 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512, <16 x double> %src1024, <2 x double> %src128_1, <4 x double> %src256_1, <8 x double> %src512_1, <16 x double> %src1024_1) { ; SSE-LABEL: 'test_vXf64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'test_vXf64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX-LABEL: 'test_vXf64' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXf64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> @@ -56,30 +63,37 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512, <16 x i64> %src1024, <2 x i64> %src128_1, <4 x i64> %src256_1, <8 x i64> %src512_1, <16 x i64> %src1024_1) { ; SSE-LABEL: 'test_vXi64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'test_vXi64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX-LABEL: 'test_vXi64' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXi64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> @@ -90,36 +104,52 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512, } define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512, <32 x float> %src1024, <2 x float> %src64_1, <4 x float> %src128_1, <8 x float> %src256_1, <16 x float> %src512_1, <32 x float> %src1024_1) { -; SSE-LABEL: 'test_vXf32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; SSE2-LABEL: 'test_vXf32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXf32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXf32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'test_vXf32' ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXf32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXf32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf32' @@ -127,7 +157,7 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> @@ -139,36 +169,52 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr } define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512, <32 x i32> %src1024, <2 x i32> %src64_1, <4 x i32> %src128_1, <8 x i32> %src256_1, <16 x i32> %src512_1, <32 x i32> %src1024_1) { -; SSE-LABEL: 'test_vXi32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; SSE2-LABEL: 'test_vXi32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'test_vXi32' ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi32' @@ -176,7 +222,7 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> @@ -192,27 +238,27 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi16' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 360 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 360 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'test_vXi16' @@ -220,8 +266,8 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi16' @@ -229,8 +275,8 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi16' @@ -238,8 +284,8 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'test_vXi16' @@ -248,7 +294,7 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'test_vXi16' @@ -257,7 +303,7 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI-LABEL: 'test_vXi16' @@ -266,7 +312,7 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> @@ -284,8 +330,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 364 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi8' @@ -293,8 +339,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi8' @@ -302,8 +348,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'test_vXi8' @@ -312,7 +358,7 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi8' @@ -321,7 +367,7 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi8' @@ -330,7 +376,7 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'test_vXi8' diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll b/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll index 32c06d4c44cec..034ec0acf79d9 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll @@ -3,15 +3,15 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+xop | FileCheck %s -check-prefixes=XOP -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx | FileCheck %s -check-prefixes=AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx2 | FileCheck %s -check-prefixes=AVX2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mcpu=btver2 | FileCheck %s --check-prefixes=AVX1 ; ; Verify the cost model for 2 src shuffles @@ -20,30 +20,37 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512, <16 x double> %src1024, <2 x double> %src128_1, <4 x double> %src256_1, <8 x double> %src512_1, <16 x double> %src1024_1) { ; SSE-LABEL: 'test_vXf64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'test_vXf64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX-LABEL: 'test_vXf64' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX1-LABEL: 'test_vXf64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> @@ -56,30 +63,37 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512, <16 x i64> %src1024, <2 x i64> %src128_1, <4 x i64> %src256_1, <8 x i64> %src512_1, <16 x i64> %src1024_1) { ; SSE-LABEL: 'test_vXi64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'test_vXi64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX-LABEL: 'test_vXi64' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX1-LABEL: 'test_vXi64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> @@ -90,36 +104,52 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512, } define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512, <32 x float> %src1024, <2 x float> %src64_1, <4 x float> %src128_1, <8 x float> %src256_1, <16 x float> %src512_1, <32 x float> %src1024_1) { -; SSE-LABEL: 'test_vXf32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SSE2-LABEL: 'test_vXf32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXf32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'test_vXf32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'test_vXf32' ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'test_vXf32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_vXf32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf32' @@ -127,7 +157,7 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> @@ -139,36 +169,52 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr } define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512, <32 x i32> %src1024, <2 x i32> %src64_1, <4 x i32> %src128_1, <8 x i32> %src256_1, <16 x i32> %src512_1, <32 x i32> %src1024_1) { -; SSE-LABEL: 'test_vXi32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SSE2-LABEL: 'test_vXi32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'test_vXi32' ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi32' @@ -176,7 +222,7 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> @@ -192,27 +238,27 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi16' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 360 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 360 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'test_vXi16' @@ -220,8 +266,8 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi16' @@ -229,8 +275,8 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi16' @@ -238,8 +284,8 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'test_vXi16' @@ -248,7 +294,7 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'test_vXi16' @@ -257,7 +303,7 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI-LABEL: 'test_vXi16' @@ -266,7 +312,7 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> @@ -284,8 +330,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 364 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi8' @@ -293,8 +339,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi8' @@ -302,8 +348,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'test_vXi8' @@ -312,7 +358,7 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; XOP-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi8' @@ -321,7 +367,7 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi8' @@ -330,7 +376,7 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'test_vXi8' diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr94546.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr94546.ll index cbdafde5e421b..354a988416c3e 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/pr94546.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/pr94546.ll @@ -1,19 +1,32 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -mtriple=x86_64-- -mcpu=x86-64 -O3 -S < %s | FileCheck %s -; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -O3 -S < %s | FileCheck %s -; RUN: opt -mtriple=x86_64-- -mcpu=btver2 -O3 -S < %s | FileCheck %s -; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -O3 -S < %s | FileCheck %s -; RUN: opt -mtriple=x86_64-- -mcpu=x86-64 -passes="default" -S < %s | FileCheck %s -; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -passes="default" -S < %s | FileCheck %s -; RUN: opt -mtriple=x86_64-- -mcpu=btver2 -passes="default" -S < %s | FileCheck %s -; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -passes="default" -S < %s | FileCheck %s +; RUN: opt -mtriple=x86_64-- -mcpu=x86-64 -O3 -S < %s | FileCheck %s --check-prefixes=SSE2 +; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -O3 -S < %s | FileCheck %s --check-prefixes=SSE4 +; RUN: opt -mtriple=x86_64-- -mcpu=btver2 -O3 -S < %s | FileCheck %s --check-prefixes=AVX +; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -O3 -S < %s | FileCheck %s --check-prefixes=AVX +; RUN: opt -mtriple=x86_64-- -mcpu=x86-64 -passes="default" -S < %s | FileCheck %s --check-prefixes=SSE2 +; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -passes="default" -S < %s | FileCheck %s --check-prefixes=SSE4 +; RUN: opt -mtriple=x86_64-- -mcpu=btver2 -passes="default" -S < %s | FileCheck %s --check-prefixes=AVX +; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -passes="default" -S < %s | FileCheck %s --check-prefixes=AVX define <4 x double> @PR94546(<4 x double> %a, <4 x double> %b) { -; CHECK-LABEL: @PR94546( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]] -; CHECK-NEXT: ret <4 x double> [[TMP3]] +; SSE2-LABEL: @PR94546( +; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> +; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; SSE2-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; SSE2-NEXT: ret <4 x double> [[TMP4]] +; +; SSE4-LABEL: @PR94546( +; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> +; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> +; SSE4-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]] +; SSE4-NEXT: ret <4 x double> [[TMP3]] +; +; AVX-LABEL: @PR94546( +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> +; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> +; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]] +; AVX-NEXT: ret <4 x double> [[TMP3]] ; %vecext = extractelement <4 x double> %a, i32 0 %vecext1 = extractelement <4 x double> %a, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll index 0bc91d42b0f13..7626eea85f219 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll @@ -972,22 +972,15 @@ define i32 @maxi8_wrong_parent(i32) { ; SSE2-NEXT: ret i32 [[TMP23]] ; ; SSE4-LABEL: @maxi8_wrong_parent( -; SSE4-NEXT: [[TMP2:%.*]] = load i32, ptr @arr, align 16 -; SSE4-NEXT: [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4 +; SSE4-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @arr, align 16 ; SSE4-NEXT: br label [[PP:%.*]] ; SSE4: pp: ; SSE4-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8 -; SSE4-NEXT: [[TMP5:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8 -; SSE4-NEXT: [[TMP6:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4 -; SSE4-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]]) -; SSE4-NEXT: [[OP_RDX:%.*]] = icmp sgt i32 [[TMP7]], [[TMP5]] -; SSE4-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP7]], i32 [[TMP5]] -; SSE4-NEXT: [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP6]], [[TMP2]] -; SSE4-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP6]], i32 [[TMP2]] -; SSE4-NEXT: [[OP_RDX4:%.*]] = icmp sgt i32 [[OP_RDX1]], [[OP_RDX3]] -; SSE4-NEXT: [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[OP_RDX1]], i32 [[OP_RDX3]] -; SSE4-NEXT: [[OP_RDX6:%.*]] = icmp sgt i32 [[OP_RDX5]], [[TMP3]] -; SSE4-NEXT: [[OP_RDX7:%.*]] = select i1 [[OP_RDX6]], i32 [[OP_RDX5]], i32 [[TMP3]] +; SSE4-NEXT: [[TMP8:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8 +; SSE4-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> [[TMP4]], i64 0) +; SSE4-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP5]], <2 x i32> [[TMP8]], i64 4) +; SSE4-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP6]], <2 x i32> [[TMP2]], i64 6) +; SSE4-NEXT: [[OP_RDX7:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP7]]) ; SSE4-NEXT: ret i32 [[OP_RDX7]] ; ; AVX-LABEL: @maxi8_wrong_parent( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll index 57b5d2af48ee6..76104efc1bb78 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll @@ -5,12 +5,11 @@ define void @test(i64 %d.promoted.i) { ; CHECK-LABEL: define void @test( ; CHECK-SAME: i64 [[D_PROMOTED_I:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i64> , i64 0, i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> , i64 [[D_PROMOTED_I]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> zeroinitializer, [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i1> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i1> [[TMP3]], <2 x i1> poison, <16 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i1> , <16 x i1> [[TMP4]], <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i1> @llvm.vector.insert.v16i1.v2i1(<16 x i1> poison, <2 x i1> [[TMP3]], i64 0) +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i1> [[TMP4]], <16 x i1> , <16 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = mul <16 x i1> [[TMP5]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP6]]) ; CHECK-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll index 179e11136c883..fba4b60ef417b 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll @@ -165,11 +165,17 @@ define <16 x float> @concat_fptrunc_v8f64_v16f32(<8 x double> %a0, <8 x double> ; commuted vector concatenation define <16 x i32> @rconcat_sext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { -; CHECK-LABEL: define <16 x i32> @rconcat_sext_v8i16_v16i32( -; CHECK-SAME: <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0]], <8 x i16> [[A1]], <16 x i32> -; CHECK-NEXT: [[R:%.*]] = sext <16 x i16> [[TMP1]] to <16 x i32> -; CHECK-NEXT: ret <16 x i32> [[R]] +; SSE-LABEL: define <16 x i32> @rconcat_sext_v8i16_v16i32( +; SSE-SAME: <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A1]], <8 x i16> [[A0]], <16 x i32> +; SSE-NEXT: [[R:%.*]] = sext <16 x i16> [[TMP1]] to <16 x i32> +; SSE-NEXT: ret <16 x i32> [[R]] +; +; AVX-LABEL: define <16 x i32> @rconcat_sext_v8i16_v16i32( +; AVX-SAME: <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) #[[ATTR0]] { +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0]], <8 x i16> [[A1]], <16 x i32> +; AVX-NEXT: [[R:%.*]] = sext <16 x i16> [[TMP1]] to <16 x i32> +; AVX-NEXT: ret <16 x i32> [[R]] ; %x0 = sext <8 x i16> %a0 to <8 x i32> %x1 = sext <8 x i16> %a1 to <8 x i32> diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll index 7dc70cd8b3d77..b30dc9ffdc596 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s -; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX ; fold to identity @@ -47,14 +47,21 @@ define <8 x i32> @concat_extract_subvectors_poison(<8 x i32> %x) { ; broadcast loads are free on AVX (and blends are much cheap than general 2-operand shuffles) define <4 x double> @blend_broadcasts_v4f64(ptr %p0, ptr %p1) { -; CHECK-LABEL: define <4 x double> @blend_broadcasts_v4f64( -; CHECK-SAME: ptr [[P0:%.*]], ptr [[P1:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[LD0:%.*]] = load <4 x double>, ptr [[P0]], align 32 -; CHECK-NEXT: [[LD1:%.*]] = load <4 x double>, ptr [[P1]], align 32 -; CHECK-NEXT: [[BCST0:%.*]] = shufflevector <4 x double> [[LD0]], <4 x double> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BCST1:%.*]] = shufflevector <4 x double> [[LD1]], <4 x double> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BLEND:%.*]] = shufflevector <4 x double> [[BCST0]], <4 x double> [[BCST1]], <4 x i32> -; CHECK-NEXT: ret <4 x double> [[BLEND]] +; SSE-LABEL: define <4 x double> @blend_broadcasts_v4f64( +; SSE-SAME: ptr [[P0:%.*]], ptr [[P1:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[LD0:%.*]] = load <4 x double>, ptr [[P0]], align 32 +; SSE-NEXT: [[LD1:%.*]] = load <4 x double>, ptr [[P1]], align 32 +; SSE-NEXT: [[BLEND:%.*]] = shufflevector <4 x double> [[LD0]], <4 x double> [[LD1]], <4 x i32> +; SSE-NEXT: ret <4 x double> [[BLEND]] +; +; AVX-LABEL: define <4 x double> @blend_broadcasts_v4f64( +; AVX-SAME: ptr [[P0:%.*]], ptr [[P1:%.*]]) #[[ATTR0]] { +; AVX-NEXT: [[LD0:%.*]] = load <4 x double>, ptr [[P0]], align 32 +; AVX-NEXT: [[LD1:%.*]] = load <4 x double>, ptr [[P1]], align 32 +; AVX-NEXT: [[BCST0:%.*]] = shufflevector <4 x double> [[LD0]], <4 x double> undef, <4 x i32> zeroinitializer +; AVX-NEXT: [[BCST1:%.*]] = shufflevector <4 x double> [[LD1]], <4 x double> undef, <4 x i32> zeroinitializer +; AVX-NEXT: [[BLEND:%.*]] = shufflevector <4 x double> [[BCST0]], <4 x double> [[BCST1]], <4 x i32> +; AVX-NEXT: ret <4 x double> [[BLEND]] ; %ld0 = load <4 x double>, ptr %p0, align 32 %ld1 = load <4 x double>, ptr %p1, align 32