@@ -2837,8 +2837,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
2837
2837
; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
2838
2838
; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
2839
2839
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
2840
- ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
2841
- ; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
2840
+ ; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2842
2841
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
2843
2842
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
2844
2843
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -2855,8 +2854,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
2855
2854
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
2856
2855
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
2857
2856
; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0
2858
- ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
2859
- ; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
2857
+ ; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2860
2858
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
2861
2859
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
2862
2860
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -2872,7 +2870,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
2872
2870
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
2873
2871
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
2874
2872
; AVX512BW-NEXT: vpbroadcastb %xmm0, %ymm0
2875
- ; AVX512BW-NEXT: vinserti128 $1, %xmm0, % ymm1, % ymm1
2873
+ ; AVX512BW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2876
2874
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
2877
2875
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
2878
2876
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -3100,8 +3098,7 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
3100
3098
; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
3101
3099
; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
3102
3100
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
3103
- ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
3104
- ; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
3101
+ ; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3105
3102
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
3106
3103
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
3107
3104
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3118,8 +3115,7 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
3118
3115
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
3119
3116
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
3120
3117
; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0
3121
- ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
3122
- ; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
3118
+ ; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3123
3119
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
3124
3120
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
3125
3121
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3135,7 +3131,7 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
3135
3131
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
3136
3132
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
3137
3133
; AVX512BW-NEXT: vpbroadcastb %xmm0, %ymm0
3138
- ; AVX512BW-NEXT: vinserti128 $1, %xmm0, % ymm1, % ymm1
3134
+ ; AVX512BW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3139
3135
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
3140
3136
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
3141
3137
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -3868,12 +3864,11 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
3868
3864
; AVX512F: # %bb.0:
3869
3865
; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
3870
3866
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm1
3867
+ ; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
3871
3868
; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
3872
3869
; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0
3873
- ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
3874
- ; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
3875
3870
; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5],xmm0[6],xmm1[7]
3876
- ; AVX512F-NEXT: vinserti128 $1, %xmm0, % ymm1, % ymm1
3871
+ ; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3877
3872
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
3878
3873
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
3879
3874
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3885,12 +3880,11 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
3885
3880
; AVX512DQ: # %bb.0:
3886
3881
; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
3887
3882
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm1
3883
+ ; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
3888
3884
; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
3889
3885
; AVX512DQ-NEXT: vpbroadcastw %xmm0, %ymm0
3890
- ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
3891
- ; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
3892
3886
; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5],xmm0[6],xmm1[7]
3893
- ; AVX512DQ-NEXT: vinserti128 $1, %xmm0, % ymm1, % ymm1
3887
+ ; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3894
3888
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
3895
3889
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
3896
3890
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
0 commit comments