@@ -172,8 +172,9 @@ define <8 x half> @test_insert_v8f16_insert_1(half %a) {
172172; CHECK-LABEL: test_insert_v8f16_insert_1:
173173; CHECK: // %bb.0:
174174; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0
175+ ; CHECK-NEXT: movi d1, #0000000000000000
175176; CHECK-NEXT: dup.8h v0, v0[0]
176- ; CHECK-NEXT: mov.h v0[7], wzr
177+ ; CHECK-NEXT: mov.h v0[7], v1[0]
177178; CHECK-NEXT: ret
178179 %v.0 = insertelement <8 x half > <half undef , half undef , half undef , half undef , half undef , half undef , half undef , half 0 .0 >, half %a , i32 0
179180 %v.1 = insertelement <8 x half > %v.0 , half %a , i32 1
@@ -278,8 +279,9 @@ define <4 x float> @test_insert_3_f32_undef_zero_vector(float %a) {
278279; CHECK-LABEL: test_insert_3_f32_undef_zero_vector:
279280; CHECK: // %bb.0:
280281; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
282+ ; CHECK-NEXT: movi d1, #0000000000000000
281283; CHECK-NEXT: dup.4s v0, v0[0]
282- ; CHECK-NEXT: mov.s v0[3], wzr
284+ ; CHECK-NEXT: mov.s v0[3], v1[0]
283285; CHECK-NEXT: ret
284286 %v.0 = insertelement <4 x float > <float undef , float undef , float undef , float 0 .000000e+00 >, float %a , i32 0
285287 %v.1 = insertelement <4 x float > %v.0 , float %a , i32 1
@@ -347,12 +349,12 @@ define <8 x i16> @test_insert_v8i16_i16_zero(<8 x i16> %a) {
347349 ret <8 x i16 > %v.0
348350}
349351
350- ; TODO: This should jsut be a mov.s v0[3], wzr
351352define <4 x half > @test_insert_v4f16_f16_zero (<4 x half > %a ) {
352353; CHECK-LABEL: test_insert_v4f16_f16_zero:
353354; CHECK: // %bb.0:
355+ ; CHECK-NEXT: movi d1, #0000000000000000
354356; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
355- ; CHECK-NEXT: mov.h v0[0], wzr
357+ ; CHECK-NEXT: mov.h v0[0], v1[0]
356358; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
357359; CHECK-NEXT: ret
358360 %v.0 = insertelement <4 x half > %a , half 0 .000000e+00 , i32 0
@@ -362,7 +364,8 @@ define <4 x half> @test_insert_v4f16_f16_zero(<4 x half> %a) {
362364define <8 x half > @test_insert_v8f16_f16_zero (<8 x half > %a ) {
363365; CHECK-LABEL: test_insert_v8f16_f16_zero:
364366; CHECK: // %bb.0:
365- ; CHECK-NEXT: mov.h v0[6], wzr
367+ ; CHECK-NEXT: movi d1, #0000000000000000
368+ ; CHECK-NEXT: mov.h v0[6], v1[0]
366369; CHECK-NEXT: ret
367370 %v.0 = insertelement <8 x half > %a , half 0 .000000e+00 , i32 6
368371 ret <8 x half > %v.0
@@ -371,8 +374,9 @@ define <8 x half> @test_insert_v8f16_f16_zero(<8 x half> %a) {
371374define <2 x float > @test_insert_v2f32_f32_zero (<2 x float > %a ) {
372375; CHECK-LABEL: test_insert_v2f32_f32_zero:
373376; CHECK: // %bb.0:
377+ ; CHECK-NEXT: movi d1, #0000000000000000
374378; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
375- ; CHECK-NEXT: mov.s v0[0], wzr
379+ ; CHECK-NEXT: mov.s v0[0], v1[0]
376380; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
377381; CHECK-NEXT: ret
378382 %v.0 = insertelement <2 x float > %a , float 0 .000000e+00 , i32 0
@@ -382,7 +386,8 @@ define <2 x float> @test_insert_v2f32_f32_zero(<2 x float> %a) {
382386define <4 x float > @test_insert_v4f32_f32_zero (<4 x float > %a ) {
383387; CHECK-LABEL: test_insert_v4f32_f32_zero:
384388; CHECK: // %bb.0:
385- ; CHECK-NEXT: mov.s v0[3], wzr
389+ ; CHECK-NEXT: movi d1, #0000000000000000
390+ ; CHECK-NEXT: mov.s v0[3], v1[0]
386391; CHECK-NEXT: ret
387392 %v.0 = insertelement <4 x float > %a , float 0 .000000e+00 , i32 3
388393 ret <4 x float > %v.0
@@ -391,8 +396,60 @@ define <4 x float> @test_insert_v4f32_f32_zero(<4 x float> %a) {
391396define <2 x double > @test_insert_v2f64_f64_zero (<2 x double > %a ) {
392397; CHECK-LABEL: test_insert_v2f64_f64_zero:
393398; CHECK: // %bb.0:
399+ ; CHECK-NEXT: movi d1, #0000000000000000
400+ ; CHECK-NEXT: mov.d v0[1], v1[0]
401+ ; CHECK-NEXT: ret
402+ %v.0 = insertelement <2 x double > %a , double 0 .000000e+00 , i32 1
403+ ret <2 x double > %v.0
404+ }
405+
406+ define <4 x half > @test_insert_v4f16_f16_zero_wzr (<4 x half > %a ) #1 {
407+ ; CHECK-LABEL: test_insert_v4f16_f16_zero_wzr:
408+ ; CHECK: // %bb.0:
409+ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
410+ ; CHECK-NEXT: mov.h v0[0], wzr
411+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
412+ ; CHECK-NEXT: ret
413+ %v.0 = insertelement <4 x half > %a , half 0 .000000e+00 , i32 0
414+ ret <4 x half > %v.0
415+ }
416+
417+ define <8 x half > @test_insert_v8f16_f16_zero_wzr (<8 x half > %a ) #1 {
418+ ; CHECK-LABEL: test_insert_v8f16_f16_zero_wzr:
419+ ; CHECK: // %bb.0:
420+ ; CHECK-NEXT: mov.h v0[6], wzr
421+ ; CHECK-NEXT: ret
422+ %v.0 = insertelement <8 x half > %a , half 0 .000000e+00 , i32 6
423+ ret <8 x half > %v.0
424+ }
425+
426+ define <2 x float > @test_insert_v2f32_f32_zero_wzr (<2 x float > %a ) #1 {
427+ ; CHECK-LABEL: test_insert_v2f32_f32_zero_wzr:
428+ ; CHECK: // %bb.0:
429+ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
430+ ; CHECK-NEXT: mov.s v0[0], wzr
431+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
432+ ; CHECK-NEXT: ret
433+ %v.0 = insertelement <2 x float > %a , float 0 .000000e+00 , i32 0
434+ ret <2 x float > %v.0
435+ }
436+
437+ define <4 x float > @test_insert_v4f32_f32_zero_wzr (<4 x float > %a ) #1 {
438+ ; CHECK-LABEL: test_insert_v4f32_f32_zero_wzr:
439+ ; CHECK: // %bb.0:
440+ ; CHECK-NEXT: mov.s v0[3], wzr
441+ ; CHECK-NEXT: ret
442+ %v.0 = insertelement <4 x float > %a , float 0 .000000e+00 , i32 3
443+ ret <4 x float > %v.0
444+ }
445+
446+ define <2 x double > @test_insert_v2f64_f64_zero_xzr (<2 x double > %a ) #1 {
447+ ; CHECK-LABEL: test_insert_v2f64_f64_zero_xzr:
448+ ; CHECK: // %bb.0:
394449; CHECK-NEXT: mov.d v0[1], xzr
395450; CHECK-NEXT: ret
396451 %v.0 = insertelement <2 x double > %a , double 0 .000000e+00 , i32 1
397452 ret <2 x double > %v.0
398453}
454+
455+ attributes #1 = {"tune-cpu" ="cortex-a55" }
0 commit comments