@@ -135,37 +135,17 @@ define <4 x double> @merge_4f64_f64_45zz(double* %ptr) nounwind uwtable noinline
135135}
136136
137137define <4 x double > @merge_4f64_f64_34z6 (double * %ptr ) nounwind uwtable noinline ssp {
138- ; AVX1-LABEL: merge_4f64_f64_34z6:
139- ; AVX1: # BB#0:
140- ; AVX1-NEXT: vmovups 24(%rdi), %xmm0
141- ; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
142- ; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
143- ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
144- ; AVX1-NEXT: retq
145- ;
146- ; AVX2-LABEL: merge_4f64_f64_34z6:
147- ; AVX2: # BB#0:
148- ; AVX2-NEXT: vmovdqu 24(%rdi), %xmm0
149- ; AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
150- ; AVX2-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
151- ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
152- ; AVX2-NEXT: retq
153- ;
154- ; AVX512F-LABEL: merge_4f64_f64_34z6:
155- ; AVX512F: # BB#0:
156- ; AVX512F-NEXT: vmovdqu 24(%rdi), %xmm0
157- ; AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
158- ; AVX512F-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
159- ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
160- ; AVX512F-NEXT: retq
138+ ; AVX-LABEL: merge_4f64_f64_34z6:
139+ ; AVX: # BB#0:
140+ ; AVX-NEXT: vxorpd %ymm0, %ymm0, %ymm0
141+ ; AVX-NEXT: vblendpd {{.*#+}} ymm0 = mem[0,1],ymm0[2],mem[3]
142+ ; AVX-NEXT: retq
161143;
162144; X32-AVX-LABEL: merge_4f64_f64_34z6:
163145; X32-AVX: # BB#0:
164146; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
165- ; X32-AVX-NEXT: vmovups 24(%eax), %xmm0
166- ; X32-AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
167- ; X32-AVX-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
168- ; X32-AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
147+ ; X32-AVX-NEXT: vxorpd %ymm0, %ymm0, %ymm0
148+ ; X32-AVX-NEXT: vblendpd {{.*#+}} ymm0 = mem[0,1],ymm0[2],mem[3]
169149; X32-AVX-NEXT: retl
170150 %ptr0 = getelementptr inbounds double , double * %ptr , i64 3
171151 %ptr1 = getelementptr inbounds double , double * %ptr , i64 4
@@ -298,11 +278,8 @@ define <8 x float> @merge_8f32_2f32_23z5(<2 x float>* %ptr) nounwind uwtable noi
298278; X32-AVX-LABEL: merge_8f32_2f32_23z5:
299279; X32-AVX: # BB#0:
300280; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
301- ; X32-AVX-NEXT: vmovupd 16(%eax), %xmm0
302- ; X32-AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
303- ; X32-AVX-NEXT: vxorpd %xmm2, %xmm2, %xmm2
304- ; X32-AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
305- ; X32-AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
281+ ; X32-AVX-NEXT: vxorpd %ymm0, %ymm0, %ymm0
282+ ; X32-AVX-NEXT: vblendpd {{.*#+}} ymm0 = mem[0,1],ymm0[2],mem[3]
306283; X32-AVX-NEXT: retl
307284 %ptr0 = getelementptr inbounds <2 x float >, <2 x float >* %ptr , i64 2
308285 %ptr1 = getelementptr inbounds <2 x float >, <2 x float >* %ptr , i64 3
@@ -360,45 +337,17 @@ define <8 x float> @merge_8f32_f32_12zzuuzz(float* %ptr) nounwind uwtable noinli
360337}
361338
362339define <8 x float > @merge_8f32_f32_1u3u5zu8 (float * %ptr ) nounwind uwtable noinline ssp {
363- ; AVX1-LABEL: merge_8f32_f32_1u3u5zu8:
364- ; AVX1: # BB#0:
365- ; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
366- ; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
367- ; AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
368- ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm2[0,1],xmm0[1,0]
369- ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
370- ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
371- ; AVX1-NEXT: retq
372- ;
373- ; AVX2-LABEL: merge_8f32_f32_1u3u5zu8:
374- ; AVX2: # BB#0:
375- ; AVX2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
376- ; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
377- ; AVX2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
378- ; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm2[0,1],xmm0[1,0]
379- ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
380- ; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
381- ; AVX2-NEXT: retq
382- ;
383- ; AVX512F-LABEL: merge_8f32_f32_1u3u5zu8:
384- ; AVX512F: # BB#0:
385- ; AVX512F-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
386- ; AVX512F-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
387- ; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,1],xmm0[1,0]
388- ; AVX512F-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
389- ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
390- ; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
391- ; AVX512F-NEXT: retq
340+ ; AVX-LABEL: merge_8f32_f32_1u3u5zu8:
341+ ; AVX: # BB#0:
342+ ; AVX-NEXT: vxorps %ymm0, %ymm0, %ymm0
343+ ; AVX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4],ymm0[5],mem[6,7]
344+ ; AVX-NEXT: retq
392345;
393346; X32-AVX-LABEL: merge_8f32_f32_1u3u5zu8:
394347; X32-AVX: # BB#0:
395348; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
396- ; X32-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
397- ; X32-AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
398- ; X32-AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
399- ; X32-AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm2[0,1],xmm0[1,0]
400- ; X32-AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
401- ; X32-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
349+ ; X32-AVX-NEXT: vxorps %ymm0, %ymm0, %ymm0
350+ ; X32-AVX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4],ymm0[5],mem[6,7]
402351; X32-AVX-NEXT: retl
403352 %ptr0 = getelementptr inbounds float , float * %ptr , i64 1
404353 %ptr2 = getelementptr inbounds float , float * %ptr , i64 3
@@ -483,47 +432,27 @@ define <8 x i32> @merge_8i32_i32_56zz9uzz(i32* %ptr) nounwind uwtable noinline s
483432define <8 x i32 > @merge_8i32_i32_1u3u5zu8 (i32* %ptr ) nounwind uwtable noinline ssp {
484433; AVX1-LABEL: merge_8i32_i32_1u3u5zu8:
485434; AVX1: # BB#0:
486- ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
487- ; AVX1-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
488- ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,1,0]
489- ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
490- ; AVX1-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
491- ; AVX1-NEXT: vpinsrd $2, 12(%rdi), %xmm1, %xmm1
492- ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
435+ ; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
436+ ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4],ymm0[5],mem[6,7]
493437; AVX1-NEXT: retq
494438;
495439; AVX2-LABEL: merge_8i32_i32_1u3u5zu8:
496440; AVX2: # BB#0:
497- ; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
498- ; AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
499- ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,1,0]
500- ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
501- ; AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
502- ; AVX2-NEXT: vpinsrd $2, 12(%rdi), %xmm1, %xmm1
503- ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
441+ ; AVX2-NEXT: vpxor %ymm0, %ymm0, %ymm0
442+ ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = mem[0,1,2,3,4],ymm0[5],mem[6,7]
504443; AVX2-NEXT: retq
505444;
506445; AVX512F-LABEL: merge_8i32_i32_1u3u5zu8:
507446; AVX512F: # BB#0:
508- ; AVX512F-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
509- ; AVX512F-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
510- ; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,1,0]
511- ; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
512- ; AVX512F-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
513- ; AVX512F-NEXT: vpinsrd $2, 12(%rdi), %xmm1, %xmm1
514- ; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
447+ ; AVX512F-NEXT: vpxor %ymm0, %ymm0, %ymm0
448+ ; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = mem[0,1,2,3,4],ymm0[5],mem[6,7]
515449; AVX512F-NEXT: retq
516450;
517451; X32-AVX-LABEL: merge_8i32_i32_1u3u5zu8:
518452; X32-AVX: # BB#0:
519453; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
520- ; X32-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
521- ; X32-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
522- ; X32-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,1,0]
523- ; X32-AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
524- ; X32-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
525- ; X32-AVX-NEXT: vpinsrd $2, 12(%eax), %xmm1, %xmm1
526- ; X32-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
454+ ; X32-AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
455+ ; X32-AVX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4],ymm0[5],mem[6,7]
527456; X32-AVX-NEXT: retl
528457 %ptr0 = getelementptr inbounds i32 , i32* %ptr , i64 1
529458 %ptr2 = getelementptr inbounds i32 , i32* %ptr , i64 3
@@ -620,43 +549,27 @@ define <16 x i16> @merge_16i16_i16_0uu3uuuuuuuuCuEF(i16* %ptr) nounwind uwtable
620549define <16 x i16 > @merge_16i16_i16_0uu3zzuuuuuzCuEF (i16* %ptr ) nounwind uwtable noinline ssp {
621550; AVX1-LABEL: merge_16i16_i16_0uu3zzuuuuuzCuEF:
622551; AVX1: # BB#0:
623- ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
624- ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
625- ; AVX1-NEXT: vpinsrw $4, 24(%rdi), %xmm1, %xmm1
626- ; AVX1-NEXT: vpinsrw $6, 28(%rdi), %xmm1, %xmm1
627- ; AVX1-NEXT: vpinsrw $7, 30(%rdi), %xmm1, %xmm1
628- ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
552+ ; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [65535,0,0,65535,0,0,0,0,0,0,0,0,65535,0,65535,65535]
553+ ; AVX1-NEXT: vandps (%rdi), %ymm0, %ymm0
629554; AVX1-NEXT: retq
630555;
631556; AVX2-LABEL: merge_16i16_i16_0uu3zzuuuuuzCuEF:
632557; AVX2: # BB#0:
633- ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
634- ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
635- ; AVX2-NEXT: vpinsrw $4, 24(%rdi), %xmm1, %xmm1
636- ; AVX2-NEXT: vpinsrw $6, 28(%rdi), %xmm1, %xmm1
637- ; AVX2-NEXT: vpinsrw $7, 30(%rdi), %xmm1, %xmm1
638- ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
558+ ; AVX2-NEXT: vmovups (%rdi), %ymm0
559+ ; AVX2-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
639560; AVX2-NEXT: retq
640561;
641562; AVX512F-LABEL: merge_16i16_i16_0uu3zzuuuuuzCuEF:
642563; AVX512F: # BB#0:
643- ; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
644- ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
645- ; AVX512F-NEXT: vpinsrw $4, 24(%rdi), %xmm1, %xmm1
646- ; AVX512F-NEXT: vpinsrw $6, 28(%rdi), %xmm1, %xmm1
647- ; AVX512F-NEXT: vpinsrw $7, 30(%rdi), %xmm1, %xmm1
648- ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
564+ ; AVX512F-NEXT: vmovups (%rdi), %ymm0
565+ ; AVX512F-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
649566; AVX512F-NEXT: retq
650567;
651568; X32-AVX-LABEL: merge_16i16_i16_0uu3zzuuuuuzCuEF:
652569; X32-AVX: # BB#0:
653570; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
654- ; X32-AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
655- ; X32-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
656- ; X32-AVX-NEXT: vpinsrw $4, 24(%eax), %xmm1, %xmm1
657- ; X32-AVX-NEXT: vpinsrw $6, 28(%eax), %xmm1, %xmm1
658- ; X32-AVX-NEXT: vpinsrw $7, 30(%eax), %xmm1, %xmm1
659- ; X32-AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
571+ ; X32-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65535,0,0,65535,0,0,0,0,0,0,0,0,65535,0,65535,65535]
572+ ; X32-AVX-NEXT: vandps (%eax), %ymm0, %ymm0
660573; X32-AVX-NEXT: retl
661574 %ptr0 = getelementptr inbounds i16 , i16* %ptr , i64 0
662575 %ptr3 = getelementptr inbounds i16 , i16* %ptr , i64 3
0 commit comments