@@ -1684,15 +1684,14 @@ define <64 x i8> @var_shl_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
16841684;
16851685; GFNIAVX512BW-LABEL: var_shl_v64i8:
16861686; GFNIAVX512BW: # %bb.0:
1687- ; GFNIAVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1
1688- ; GFNIAVX512BW-NEXT: vpmovb2m %zmm1, %k1
1689- ; GFNIAVX512BW-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 {%k1}
1690- ; GFNIAVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
1691- ; GFNIAVX512BW-NEXT: vpmovb2m %zmm1, %k1
1692- ; GFNIAVX512BW-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 {%k1}
1693- ; GFNIAVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
1694- ; GFNIAVX512BW-NEXT: vpmovb2m %zmm1, %k1
1695- ; GFNIAVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
1687+ ; GFNIAVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm2
1688+ ; GFNIAVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm2
1689+ ; GFNIAVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
1690+ ; GFNIAVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
1691+ ; GFNIAVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
1692+ ; GFNIAVX512BW-NEXT: movabsq $6148914691236517205, %rax # imm = 0x5555555555555555
1693+ ; GFNIAVX512BW-NEXT: kmovq %rax, %k1
1694+ ; GFNIAVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
16961695; GFNIAVX512BW-NEXT: retq
16971696 %shift = shl <64 x i8 > %a , %b
16981697 ret <64 x i8 > %shift
@@ -1876,15 +1875,15 @@ define <64 x i8> @var_lshr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
18761875;
18771876; GFNIAVX512BW-LABEL: var_lshr_v64i8:
18781877; GFNIAVX512BW: # %bb.0:
1879- ; GFNIAVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1
1880- ; GFNIAVX512BW-NEXT: vpmovb2m % zmm1, %k1
1881- ; GFNIAVX512BW-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8} , %zmm0, %zmm0 {%k1}
1882- ; GFNIAVX512BW-NEXT: vpaddb %zmm1 , %zmm1 , %zmm1
1883- ; GFNIAVX512BW-NEXT: vpmovb2m %zmm1, %k1
1884- ; GFNIAVX512BW-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8} , %zmm0, %zmm0 {%k1}
1885- ; GFNIAVX512BW-NEXT: vpaddb %zmm1 , %zmm1, %zmm1
1886- ; GFNIAVX512BW-NEXT: vpmovb2m %zmm1 , %k1
1887- ; GFNIAVX512BW-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0 , %zmm0 {%k1}
1878+ ; GFNIAVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
1879+ ; GFNIAVX512BW-NEXT: vpandq %zmm2, % zmm1, %zmm3
1880+ ; GFNIAVX512BW-NEXT: vpandq %zmm2 , %zmm0, %zmm2
1881+ ; GFNIAVX512BW-NEXT: vpsrlvw %zmm3 , %zmm2 , %zmm2
1882+ ; GFNIAVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
1883+ ; GFNIAVX512BW-NEXT: vpsrlvw %zmm1 , %zmm0, %zmm0
1884+ ; GFNIAVX512BW-NEXT: movabsq $6148914691236517205 , %rax # imm = 0x5555555555555555
1885+ ; GFNIAVX512BW-NEXT: kmovq %rax , %k1
1886+ ; GFNIAVX512BW-NEXT: vmovdqu8 %zmm2 , %zmm0 {%k1}
18881887; GFNIAVX512BW-NEXT: retq
18891888 %shift = lshr <64 x i8 > %a , %b
18901889 ret <64 x i8 > %shift
@@ -2232,36 +2231,16 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
22322231;
22332232; GFNIAVX512BW-LABEL: var_ashr_v64i8:
22342233; GFNIAVX512BW: # %bb.0:
2235- ; GFNIAVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
2236- ; GFNIAVX512BW-NEXT: vpsraw $4, %zmm2, %zmm3
2237- ; GFNIAVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1
2238- ; GFNIAVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm4 = zmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
2239- ; GFNIAVX512BW-NEXT: vpmovb2m %zmm4, %k1
2240- ; GFNIAVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1}
2241- ; GFNIAVX512BW-NEXT: vpsraw $2, %zmm2, %zmm3
2242- ; GFNIAVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm5
2243- ; GFNIAVX512BW-NEXT: vpmovb2m %zmm5, %k1
2244- ; GFNIAVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1}
2245- ; GFNIAVX512BW-NEXT: vpsraw $1, %zmm2, %zmm3
2246- ; GFNIAVX512BW-NEXT: vpsllw $2, %zmm4, %zmm4
2247- ; GFNIAVX512BW-NEXT: vpmovb2m %zmm4, %k1
2248- ; GFNIAVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1}
2249- ; GFNIAVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
2250- ; GFNIAVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
2251- ; GFNIAVX512BW-NEXT: vpsraw $4, %zmm0, %zmm3
2252- ; GFNIAVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
2253- ; GFNIAVX512BW-NEXT: vpmovb2m %zmm1, %k1
2254- ; GFNIAVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
2255- ; GFNIAVX512BW-NEXT: vpsraw $2, %zmm0, %zmm3
2256- ; GFNIAVX512BW-NEXT: vpaddw %zmm1, %zmm1, %zmm4
2257- ; GFNIAVX512BW-NEXT: vpmovb2m %zmm4, %k1
2258- ; GFNIAVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
2259- ; GFNIAVX512BW-NEXT: vpsraw $1, %zmm0, %zmm3
2260- ; GFNIAVX512BW-NEXT: vpsllw $2, %zmm1, %zmm1
2261- ; GFNIAVX512BW-NEXT: vpmovb2m %zmm1, %k1
2262- ; GFNIAVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
2263- ; GFNIAVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
2264- ; GFNIAVX512BW-NEXT: vpackuswb %zmm2, %zmm0, %zmm0
2234+ ; GFNIAVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm2
2235+ ; GFNIAVX512BW-NEXT: vpsravw %zmm2, %zmm0, %zmm2
2236+ ; GFNIAVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
2237+ ; GFNIAVX512BW-NEXT: vpsllw $8, %zmm0, %zmm0
2238+ ; GFNIAVX512BW-NEXT: vpsraw $8, %zmm0, %zmm0
2239+ ; GFNIAVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
2240+ ; GFNIAVX512BW-NEXT: movabsq $6148914691236517205, %rax # imm = 0x5555555555555555
2241+ ; GFNIAVX512BW-NEXT: kmovq %rax, %k1
2242+ ; GFNIAVX512BW-NEXT: vmovdqu8 %zmm0, %zmm2 {%k1}
2243+ ; GFNIAVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
22652244; GFNIAVX512BW-NEXT: retq
22662245 %shift = ashr <64 x i8 > %a , %b
22672246 ret <64 x i8 > %shift
0 commit comments