@@ -1888,15 +1888,14 @@ define void @vec384_i8_widen_to_i16_factor2_broadcast_to_v24i16_factor24(ptr %in
18881888;
18891889; AVX2-LABEL: vec384_i8_widen_to_i16_factor2_broadcast_to_v24i16_factor24:
18901890; AVX2: # %bb.0:
1891- ; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0
1892- ; AVX2-NEXT: vmovdqa 48(%rdi), %xmm1
1893- ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
1894- ; AVX2-NEXT: vpbroadcastb (%rdi), %ymm2
1895- ; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[2],ymm1[2],ymm2[3],ymm1[3],ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[16],ymm1[16],ymm2[17],ymm1[17],ymm2[18],ymm1[18],ymm2[19],ymm1[19],ymm2[20],ymm1[20],ymm2[21],ymm1[21],ymm2[22],ymm1[22],ymm2[23],ymm1[23]
1896- ; AVX2-NEXT: vpaddb (%rsi), %ymm1, %ymm1
1897- ; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
1898- ; AVX2-NEXT: vmovdqa %ymm0, 32(%rdx)
1899- ; AVX2-NEXT: vmovdqa %ymm1, (%rdx)
1891+ ; AVX2-NEXT: vmovdqa 48(%rdi), %xmm0
1892+ ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
1893+ ; AVX2-NEXT: vpbroadcastb (%rdi), %ymm1
1894+ ; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
1895+ ; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
1896+ ; AVX2-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
1897+ ; AVX2-NEXT: vmovdqa %ymm1, 32(%rdx)
1898+ ; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
19001899; AVX2-NEXT: vzeroupper
19011900; AVX2-NEXT: retq
19021901;
@@ -2112,15 +2111,14 @@ define void @vec384_i8_widen_to_i32_factor4_broadcast_to_v12i32_factor12(ptr %in
21122111;
21132112; AVX2-LABEL: vec384_i8_widen_to_i32_factor4_broadcast_to_v12i32_factor12:
21142113; AVX2: # %bb.0:
2115- ; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0
2116- ; AVX2-NEXT: vmovdqa 48(%rdi), %xmm1
2117- ; AVX2-NEXT: vpbroadcastb (%rdi), %ymm2
2118- ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
2119- ; AVX2-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
2120- ; AVX2-NEXT: vpaddb (%rsi), %ymm1, %ymm1
2121- ; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
2122- ; AVX2-NEXT: vmovdqa %ymm0, 32(%rdx)
2123- ; AVX2-NEXT: vmovdqa %ymm1, (%rdx)
2114+ ; AVX2-NEXT: vmovdqa 48(%rdi), %xmm0
2115+ ; AVX2-NEXT: vpbroadcastb (%rdi), %ymm1
2116+ ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
2117+ ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
2118+ ; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
2119+ ; AVX2-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
2120+ ; AVX2-NEXT: vmovdqa %ymm1, 32(%rdx)
2121+ ; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
21242122; AVX2-NEXT: vzeroupper
21252123; AVX2-NEXT: retq
21262124;
@@ -2237,33 +2235,29 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.e
22372235;
22382236; AVX512F-LABEL: vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8:
22392237; AVX512F: # %bb.0:
2240- ; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm0
2241- ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
2242- ; AVX512F-NEXT: vmovdqa (%rdi), %xmm1
2243- ; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
2244- ; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
2245- ; AVX512F-NEXT: vpbroadcastb (%rdi), %xmm2
2246- ; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
2247- ; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm1
2248- ; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
2249- ; AVX512F-NEXT: vmovdqa %ymm0, 32(%rdx)
2250- ; AVX512F-NEXT: vmovdqa %ymm1, (%rdx)
2238+ ; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
2239+ ; AVX512F-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
2240+ ; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
2241+ ; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm1
2242+ ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2243+ ; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
2244+ ; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
2245+ ; AVX512F-NEXT: vmovdqa %ymm1, 32(%rdx)
2246+ ; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
22512247; AVX512F-NEXT: vzeroupper
22522248; AVX512F-NEXT: retq
22532249;
22542250; AVX512DQ-LABEL: vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8:
22552251; AVX512DQ: # %bb.0:
2256- ; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm0
2257- ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
2258- ; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm1
2259- ; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
2260- ; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
2261- ; AVX512DQ-NEXT: vpbroadcastb (%rdi), %xmm2
2262- ; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
2263- ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm1
2264- ; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
2265- ; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rdx)
2266- ; AVX512DQ-NEXT: vmovdqa %ymm1, (%rdx)
2252+ ; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
2253+ ; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
2254+ ; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
2255+ ; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm1
2256+ ; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2257+ ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
2258+ ; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
2259+ ; AVX512DQ-NEXT: vmovdqa %ymm1, 32(%rdx)
2260+ ; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
22672261; AVX512DQ-NEXT: vzeroupper
22682262; AVX512DQ-NEXT: retq
22692263;
@@ -2272,9 +2266,8 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.e
22722266; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0
22732267; AVX512BW-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
22742268; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
2275- ; AVX512BW-NEXT: vpbroadcastb (%rdi), %xmm1
2276- ; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
22772269; AVX512BW-NEXT: vpbroadcastb (%rdi), %ymm1
2270+ ; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
22782271; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
22792272; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
22802273; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
@@ -2339,15 +2332,14 @@ define void @vec384_i8_widen_to_i64_factor8_broadcast_to_v6i64_factor6(ptr %in.e
23392332;
23402333; AVX2-LABEL: vec384_i8_widen_to_i64_factor8_broadcast_to_v6i64_factor6:
23412334; AVX2: # %bb.0:
2342- ; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0
2343- ; AVX2-NEXT: vmovdqa 48(%rdi), %xmm1
2344- ; AVX2-NEXT: vpbroadcastb (%rdi), %ymm2
2345- ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
2346- ; AVX2-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
2347- ; AVX2-NEXT: vpaddb (%rsi), %ymm1, %ymm1
2348- ; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
2349- ; AVX2-NEXT: vmovdqa %ymm0, 32(%rdx)
2350- ; AVX2-NEXT: vmovdqa %ymm1, (%rdx)
2335+ ; AVX2-NEXT: vmovdqa 48(%rdi), %xmm0
2336+ ; AVX2-NEXT: vpbroadcastb (%rdi), %ymm1
2337+ ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
2338+ ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
2339+ ; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
2340+ ; AVX2-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
2341+ ; AVX2-NEXT: vmovdqa %ymm1, 32(%rdx)
2342+ ; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
23512343; AVX2-NEXT: vzeroupper
23522344; AVX2-NEXT: retq
23532345;
@@ -2462,33 +2454,29 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
24622454;
24632455; AVX512F-LABEL: vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4:
24642456; AVX512F: # %bb.0:
2465- ; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm0
2466- ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
2467- ; AVX512F-NEXT: vmovdqa (%rdi), %xmm1
2468- ; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
2469- ; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
2470- ; AVX512F-NEXT: vpbroadcastb (%rdi), %xmm2
2471- ; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
2472- ; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm1
2473- ; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
2474- ; AVX512F-NEXT: vmovdqa %ymm0, 32(%rdx)
2475- ; AVX512F-NEXT: vmovdqa %ymm1, (%rdx)
2457+ ; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
2458+ ; AVX512F-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
2459+ ; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
2460+ ; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm1
2461+ ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2462+ ; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
2463+ ; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
2464+ ; AVX512F-NEXT: vmovdqa %ymm1, 32(%rdx)
2465+ ; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
24762466; AVX512F-NEXT: vzeroupper
24772467; AVX512F-NEXT: retq
24782468;
24792469; AVX512DQ-LABEL: vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4:
24802470; AVX512DQ: # %bb.0:
2481- ; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm0
2482- ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
2483- ; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm1
2484- ; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
2485- ; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
2486- ; AVX512DQ-NEXT: vpbroadcastb (%rdi), %xmm2
2487- ; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
2488- ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm1
2489- ; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
2490- ; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rdx)
2491- ; AVX512DQ-NEXT: vmovdqa %ymm1, (%rdx)
2471+ ; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
2472+ ; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
2473+ ; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
2474+ ; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm1
2475+ ; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2476+ ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
2477+ ; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
2478+ ; AVX512DQ-NEXT: vmovdqa %ymm1, 32(%rdx)
2479+ ; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
24922480; AVX512DQ-NEXT: vzeroupper
24932481; AVX512DQ-NEXT: retq
24942482;
@@ -2497,9 +2485,8 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
24972485; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0
24982486; AVX512BW-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
24992487; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
2500- ; AVX512BW-NEXT: vpbroadcastb (%rdi), %xmm1
2501- ; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
25022488; AVX512BW-NEXT: vpbroadcastb (%rdi), %ymm1
2489+ ; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
25032490; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
25042491; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
25052492; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
@@ -2788,14 +2775,13 @@ define void @vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12(ptr %i
27882775;
27892776; AVX2-LABEL: vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12:
27902777; AVX2: # %bb.0:
2791- ; AVX2-NEXT: vpbroadcastw (%rdi), %xmm0
2792- ; AVX2-NEXT: vmovdqa 48(%rdi), %xmm1
2793- ; AVX2-NEXT: vpbroadcastw (%rdi), %ymm2
2794- ; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4],ymm1[5],ymm2[6],ymm1[7],ymm2[8],ymm1[9],ymm2[10],ymm1[11],ymm2[12],ymm1[13],ymm2[14],ymm1[15]
2795- ; AVX2-NEXT: vpaddb (%rsi), %ymm1, %ymm1
2796- ; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
2797- ; AVX2-NEXT: vmovdqa %ymm0, 32(%rdx)
2798- ; AVX2-NEXT: vmovdqa %ymm1, (%rdx)
2778+ ; AVX2-NEXT: vmovdqa 48(%rdi), %xmm0
2779+ ; AVX2-NEXT: vpbroadcastw (%rdi), %ymm1
2780+ ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
2781+ ; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
2782+ ; AVX2-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
2783+ ; AVX2-NEXT: vmovdqa %ymm1, 32(%rdx)
2784+ ; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
27992785; AVX2-NEXT: vzeroupper
28002786; AVX2-NEXT: retq
28012787;
@@ -2990,14 +2976,13 @@ define void @vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6(ptr %in.
29902976;
29912977; AVX2-LABEL: vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6:
29922978; AVX2: # %bb.0:
2993- ; AVX2-NEXT: vpbroadcastw (%rdi), %xmm0
2994- ; AVX2-NEXT: vmovdqa 48(%rdi), %xmm1
2995- ; AVX2-NEXT: vpbroadcastw (%rdi), %ymm2
2996- ; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0],ymm1[1,2,3],ymm2[4],ymm1[5,6,7],ymm2[8],ymm1[9,10,11],ymm2[12],ymm1[13,14,15]
2997- ; AVX2-NEXT: vpaddb (%rsi), %ymm1, %ymm1
2998- ; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
2999- ; AVX2-NEXT: vmovdqa %ymm0, 32(%rdx)
3000- ; AVX2-NEXT: vmovdqa %ymm1, (%rdx)
2979+ ; AVX2-NEXT: vmovdqa 48(%rdi), %xmm0
2980+ ; AVX2-NEXT: vpbroadcastw (%rdi), %ymm1
2981+ ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3],ymm1[4],ymm0[5,6,7],ymm1[8],ymm0[9,10,11],ymm1[12],ymm0[13,14,15]
2982+ ; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
2983+ ; AVX2-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
2984+ ; AVX2-NEXT: vmovdqa %ymm1, 32(%rdx)
2985+ ; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
30012986; AVX2-NEXT: vzeroupper
30022987; AVX2-NEXT: retq
30032988;
@@ -3108,27 +3093,25 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
31083093;
31093094; AVX512F-LABEL: vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4:
31103095; AVX512F: # %bb.0:
3111- ; AVX512F-NEXT: vpbroadcastw (%rdi), %xmm0
3096+ ; AVX512F-NEXT: vpbroadcastw (%rdi), %ymm0
31123097; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2,3,4,5],xmm0[6],mem[7]
3113- ; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
3114- ; AVX512F-NEXT: vpbroadcastw (%rdi), %ymm1
3115- ; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
3116- ; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
3117- ; AVX512F-NEXT: vmovdqa %ymm1, 32(%rdx)
3118- ; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
3098+ ; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
3099+ ; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm1
3100+ ; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
3101+ ; AVX512F-NEXT: vmovdqa %ymm0, 32(%rdx)
3102+ ; AVX512F-NEXT: vmovdqa %ymm1, (%rdx)
31193103; AVX512F-NEXT: vzeroupper
31203104; AVX512F-NEXT: retq
31213105;
31223106; AVX512DQ-LABEL: vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4:
31233107; AVX512DQ: # %bb.0:
3124- ; AVX512DQ-NEXT: vpbroadcastw (%rdi), %xmm0
3108+ ; AVX512DQ-NEXT: vpbroadcastw (%rdi), %ymm0
31253109; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2,3,4,5],xmm0[6],mem[7]
3126- ; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
3127- ; AVX512DQ-NEXT: vpbroadcastw (%rdi), %ymm1
3128- ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
3129- ; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
3130- ; AVX512DQ-NEXT: vmovdqa %ymm1, 32(%rdx)
3131- ; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
3110+ ; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
3111+ ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm1
3112+ ; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
3113+ ; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rdx)
3114+ ; AVX512DQ-NEXT: vmovdqa %ymm1, (%rdx)
31323115; AVX512DQ-NEXT: vzeroupper
31333116; AVX512DQ-NEXT: retq
31343117;
0 commit comments