@@ -389,57 +389,28 @@ define <32 x i8> @trunc_shuffle_v32i16_v32i8_ofs1(<32 x i16> %a0) {
389389; AVX512VL-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
390390; AVX512VL-FAST-PERLANE-NEXT: retq
391391;
392- ; AVX512BW-FAST-ALL-LABEL: trunc_shuffle_v32i16_v32i8_ofs1:
393- ; AVX512BW-FAST-ALL: # %bb.0:
394- ; AVX512BW-FAST-ALL-NEXT: vpmovsxbq {{.*#+}} ymm1 = [0,2,9,11]
395- ; AVX512BW-FAST-ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
396- ; AVX512BW-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,u,u,u,u,u,u,1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u,17,19,21,23,25,27,29,31]
397- ; AVX512BW-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u,17,19,21,23,25,27,29,31,u,u,u,u,u,u,u,u]
398- ; AVX512BW-FAST-ALL-NEXT: vpermt2q %zmm2, %zmm1, %zmm0
399- ; AVX512BW-FAST-ALL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
400- ; AVX512BW-FAST-ALL-NEXT: retq
401- ;
402- ; AVX512BW-FAST-PERLANE-LABEL: trunc_shuffle_v32i16_v32i8_ofs1:
403- ; AVX512BW-FAST-PERLANE: # %bb.0:
404- ; AVX512BW-FAST-PERLANE-NEXT: vextracti64x4 $1, %zmm0, %ymm1
405- ; AVX512BW-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,3,5,7,9,11,13,15,1,3,5,7,9,11,13,15,1,3,5,7,9,11,13,15,1,3,5,7,9,11,13,15]
406- ; AVX512BW-FAST-PERLANE-NEXT: vpshufb %ymm2, %ymm1, %ymm1
407- ; AVX512BW-FAST-PERLANE-NEXT: vpshufb %ymm2, %ymm0, %ymm0
408- ; AVX512BW-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
409- ; AVX512BW-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
410- ; AVX512BW-FAST-PERLANE-NEXT: retq
411- ;
412- ; AVX512BWVL-FAST-ALL-LABEL: trunc_shuffle_v32i16_v32i8_ofs1:
413- ; AVX512BWVL-FAST-ALL: # %bb.0:
414- ; AVX512BWVL-FAST-ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
415- ; AVX512BWVL-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u,17,19,21,23,25,27,29,31]
416- ; AVX512BWVL-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u,17,19,21,23,25,27,29,31,u,u,u,u,u,u,u,u]
417- ; AVX512BWVL-FAST-ALL-NEXT: vpmovsxbq {{.*#+}} ymm0 = [0,2,5,7]
418- ; AVX512BWVL-FAST-ALL-NEXT: vpermi2q %ymm1, %ymm2, %ymm0
419- ; AVX512BWVL-FAST-ALL-NEXT: retq
392+ ; AVX512BW-LABEL: trunc_shuffle_v32i16_v32i8_ofs1:
393+ ; AVX512BW: # %bb.0:
394+ ; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
395+ ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
396+ ; AVX512BW-NEXT: retq
420397;
421- ; AVX512BWVL-FAST-PERLANE-LABEL: trunc_shuffle_v32i16_v32i8_ofs1:
422- ; AVX512BWVL-FAST-PERLANE: # %bb.0:
423- ; AVX512BWVL-FAST-PERLANE-NEXT: vextracti64x4 $1, %zmm0, %ymm1
424- ; AVX512BWVL-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,3,5,7,9,11,13,15,1,3,5,7,9,11,13,15,1,3,5,7,9,11,13,15,1,3,5,7,9,11,13,15]
425- ; AVX512BWVL-FAST-PERLANE-NEXT: vpshufb %ymm2, %ymm1, %ymm1
426- ; AVX512BWVL-FAST-PERLANE-NEXT: vpshufb %ymm2, %ymm0, %ymm0
427- ; AVX512BWVL-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
428- ; AVX512BWVL-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
429- ; AVX512BWVL-FAST-PERLANE-NEXT: retq
398+ ; AVX512BWVL-LABEL: trunc_shuffle_v32i16_v32i8_ofs1:
399+ ; AVX512BWVL: # %bb.0:
400+ ; AVX512BWVL-NEXT: vpsrlw $8, %zmm0, %zmm0
401+ ; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
402+ ; AVX512BWVL-NEXT: retq
430403;
431404; AVX512VBMI-LABEL: trunc_shuffle_v32i16_v32i8_ofs1:
432405; AVX512VBMI: # %bb.0:
433- ; AVX512VBMI-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,5,7,9,11,13,15,97,99,101,103,105,107,109,111,17,19,21,23,25,27,29,31,113,115,117,119,121,123,125,127]
434- ; AVX512VBMI-NEXT: vpermi2b %zmm0, %zmm0, %zmm1
435- ; AVX512VBMI-NEXT: vpermq {{.*#+}} ymm0 = ymm1[0,2,1,3]
406+ ; AVX512VBMI-NEXT: vpsrlw $8, %zmm0, %zmm0
407+ ; AVX512VBMI-NEXT: vpmovwb %zmm0, %ymm0
436408; AVX512VBMI-NEXT: retq
437409;
438410; AVX512VBMIVL-LABEL: trunc_shuffle_v32i16_v32i8_ofs1:
439411; AVX512VBMIVL: # %bb.0:
440- ; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63]
441- ; AVX512VBMIVL-NEXT: vpermb %zmm0, %zmm1, %zmm0
442- ; AVX512VBMIVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
412+ ; AVX512VBMIVL-NEXT: vpsrlw $8, %zmm0, %zmm0
413+ ; AVX512VBMIVL-NEXT: vpmovwb %zmm0, %ymm0
443414; AVX512VBMIVL-NEXT: retq
444415 %bc = bitcast <32 x i16 > %a0 to <64 x i8 >
445416 %res = shufflevector <64 x i8 > %bc , <64 x i8 > poison, <32 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 , i32 15 , i32 17 , i32 19 , i32 21 , i32 23 , i32 25 , i32 27 , i32 29 , i32 31 , i32 33 , i32 35 , i32 37 , i32 39 , i32 41 , i32 43 , i32 45 , i32 47 , i32 49 , i32 51 , i32 53 , i32 55 , i32 57 , i32 59 , i32 61 , i32 63 >
@@ -523,3 +494,8 @@ define <16 x i8> @trunc_v8i64_to_v8i8_return_v16i8(<8 x i64> %vec) nounwind {
523494 ret <16 x i8 > %result
524495}
525496
497+ ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
498+ ; AVX512BW-FAST-ALL: {{.*}}
499+ ; AVX512BW-FAST-PERLANE: {{.*}}
500+ ; AVX512BWVL-FAST-ALL: {{.*}}
501+ ; AVX512BWVL-FAST-PERLANE: {{.*}}
0 commit comments