Skip to content

Commit 530b3ed

Browse files
author
Asiri Rathnayake
committed
Add missing natual vector cast.
Summary: The natual vector cast node (similar to bitcast) AArch64ISD::NVCAST was introduced in r217159 and r217138. This patch adds a missing cast from v2f32 to v1i64 which is causing some compilation failures. Also added test cases to cover various modimm types and BUILD_VECTORs with i64 elements. llvm-svn: 218751
1 parent 36301a0 commit 530b3ed

File tree

3 files changed

+67
-0
lines changed

3 files changed

+67
-0
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -823,6 +823,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
823823
case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
824824
case AArch64ISD::SITOF: return "AArch64ISD::SITOF";
825825
case AArch64ISD::UITOF: return "AArch64ISD::UITOF";
826+
case AArch64ISD::NVCAST: return "AArch64ISD::NVCAST";
826827
case AArch64ISD::SQSHL_I: return "AArch64ISD::SQSHL_I";
827828
case AArch64ISD::UQSHL_I: return "AArch64ISD::UQSHL_I";
828829
case AArch64ISD::SRSHR_I: return "AArch64ISD::SRSHR_I";

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4986,6 +4986,7 @@ def : Pat<(v8i8 (AArch64NvCast (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>;
49864986
def : Pat<(v4i16 (AArch64NvCast (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
49874987
def : Pat<(v2i32 (AArch64NvCast (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
49884988
def : Pat<(v2f32 (AArch64NvCast (v2f32 FPR64:$src))), (v2f32 FPR64:$src)>;
4989+
def : Pat<(v1i64 (AArch64NvCast (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
49894990

49904991
// Natural vector casts (128 bit)
49914992
def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;

llvm/test/CodeGen/AArch64/aarch64-be-bv.ll

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,9 +377,11 @@ define i16 @orr_modimm_t6() nounwind {
377377
declare i8 @f_v8i8(<8 x i8> %arg)
378378
declare i16 @f_v4i16(<4 x i16> %arg)
379379
declare i32 @f_v2i32(<2 x i32> %arg)
380+
declare i64 @f_v1i64(<1 x i64> %arg)
380381
declare i8 @f_v16i8(<16 x i8> %arg)
381382
declare i16 @f_v8i16(<8 x i16> %arg)
382383
declare i32 @f_v4i32(<4 x i32> %arg)
384+
declare i64 @f_v2i64(<2 x i64> %arg)
383385

384386
; CHECK-LABEL: modimm_t1_call:
385387
define void @modimm_t1_call() {
@@ -395,6 +397,9 @@ define void @modimm_t1_call() {
395397
; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s
396398
; CHECK-NEXT: bl f_v2i32
397399
call i32 @f_v2i32(<2 x i32> <i32 6, i32 6>)
400+
; CHECK: movi v{{[0-9]+}}.2s, #0x5
401+
; CHECK-NEXT: bl f_v1i64
402+
call i64 @f_v1i64(<1 x i64> <i64 21474836485>)
398403
; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5
399404
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
400405
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
@@ -410,6 +415,10 @@ define void @modimm_t1_call() {
410415
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
411416
; CHECK-NEXT: bl f_v4i32
412417
call i32 @f_v4i32(<4 x i32> <i32 3, i32 3, i32 3, i32 3>)
418+
; CHECK: movi v[[REG:[0-9]+]].4s, #0x2
419+
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
420+
; CHECK-NEXT: bl f_v2i64
421+
call i64 @f_v2i64(<2 x i64> <i64 8589934594, i64 8589934594>)
413422

414423
ret void
415424
}
@@ -428,6 +437,9 @@ define void @modimm_t2_call() {
428437
; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s
429438
; CHECK-NEXT: bl f_v2i32
430439
call i32 @f_v2i32(<2 x i32> <i32 1536, i32 1536>)
440+
; CHECK: movi v{{[0-9]+}}.2s, #0x5, lsl #8
441+
; CHECK-NEXT: bl f_v1i64
442+
call i64 @f_v1i64(<1 x i64> <i64 5497558140160>)
431443
; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, lsl #8
432444
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
433445
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
@@ -443,6 +455,10 @@ define void @modimm_t2_call() {
443455
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
444456
; CHECK-NEXT: bl f_v4i32
445457
call i32 @f_v4i32(<4 x i32> <i32 768, i32 768, i32 768, i32 768>)
458+
; CHECK: movi v[[REG:[0-9]+]].4s, #0x2, lsl #8
459+
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
460+
; CHECK-NEXT: bl f_v2i64
461+
call i64 @f_v2i64(<2 x i64> <i64 2199023256064, i64 2199023256064>)
446462

447463
ret void
448464
}
@@ -461,6 +477,9 @@ define void @modimm_t3_call() {
461477
; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s
462478
; CHECK-NEXT: bl f_v2i32
463479
call i32 @f_v2i32(<2 x i32> <i32 393216, i32 393216>)
480+
; CHECK: movi v{{[0-9]+}}.2s, #0x5, lsl #16
481+
; CHECK-NEXT: bl f_v1i64
482+
call i64 @f_v1i64(<1 x i64> <i64 1407374883880960>)
464483
; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, lsl #16
465484
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
466485
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
@@ -476,6 +495,10 @@ define void @modimm_t3_call() {
476495
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
477496
; CHECK-NEXT: bl f_v4i32
478497
call i32 @f_v4i32(<4 x i32> <i32 196608, i32 196608, i32 196608, i32 196608>)
498+
; CHECK: movi v[[REG:[0-9]+]].4s, #0x2, lsl #16
499+
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
500+
; CHECK-NEXT: bl f_v2i64
501+
call i64 @f_v2i64(<2 x i64> <i64 562949953552384, i64 562949953552384>)
479502

480503
ret void
481504
}
@@ -494,6 +517,9 @@ define void @modimm_t4_call() {
494517
; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s
495518
; CHECK-NEXT: bl f_v2i32
496519
call i32 @f_v2i32(<2 x i32> <i32 100663296, i32 100663296>)
520+
; CHECK: movi v{{[0-9]+}}.2s, #0x5, lsl #24
521+
; CHECK-NEXT: bl f_v1i64
522+
call i64 @f_v1i64(<1 x i64> <i64 360287970273525760>)
497523
; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, lsl #24
498524
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
499525
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
@@ -509,6 +535,10 @@ define void @modimm_t4_call() {
509535
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
510536
; CHECK-NEXT: bl f_v4i32
511537
call i32 @f_v4i32(<4 x i32> <i32 50331648, i32 50331648, i32 50331648, i32 50331648>)
538+
; CHECK: movi v[[REG:[0-9]+]].4s, #0x2, lsl #24
539+
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
540+
; CHECK-NEXT: bl f_v2i64
541+
call i64 @f_v2i64(<2 x i64> <i64 144115188109410304, i64 144115188109410304>)
512542

513543
ret void
514544
}
@@ -527,6 +557,9 @@ define void @modimm_t5_call() {
527557
; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s
528558
; CHECK-NEXT: bl f_v2i32
529559
call i32 @f_v2i32(<2 x i32> <i32 393222, i32 393222>)
560+
; CHECK: movi v{{[0-9]+}}.4h, #0x5
561+
; CHECK-NEXT: bl f_v1i64
562+
call i64 @f_v1i64(<1 x i64> <i64 1407396358717445>)
530563
; CHECK: movi v[[REG1:[0-9]+]].8h, #0x5
531564
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
532565
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
@@ -542,6 +575,10 @@ define void @modimm_t5_call() {
542575
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
543576
; CHECK-NEXT: bl f_v4i32
544577
call i32 @f_v4i32(<4 x i32> <i32 196611, i32 196611, i32 196611, i32 196611>)
578+
; CHECK: movi v[[REG:[0-9]+]].8h, #0x2
579+
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
580+
; CHECK-NEXT: bl f_v2i64
581+
call i64 @f_v2i64(<2 x i64> <i64 562958543486978, i64 562958543486978>)
545582

546583
ret void
547584
}
@@ -560,6 +597,9 @@ define void @modimm_t6_call() {
560597
; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s
561598
; CHECK-NEXT: bl f_v2i32
562599
call i32 @f_v2i32(<2 x i32> <i32 100664832, i32 100664832>)
600+
; CHECK: movi v{{[0-9]+}}.4h, #0x5, lsl #8
601+
; CHECK-NEXT: bl f_v1i64
602+
call i64 @f_v1i64(<1 x i64> <i64 360293467831665920>)
563603
; CHECK: movi v[[REG1:[0-9]+]].8h, #0x5, lsl #8
564604
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
565605
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
@@ -575,6 +615,10 @@ define void @modimm_t6_call() {
575615
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
576616
; CHECK-NEXT: bl f_v4i32
577617
call i32 @f_v4i32(<4 x i32> <i32 50332416, i32 50332416, i32 50332416, i32 50332416>)
618+
; CHECK: movi v[[REG:[0-9]+]].8h, #0x2, lsl #8
619+
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
620+
; CHECK-NEXT: bl f_v2i64
621+
call i64 @f_v2i64(<2 x i64> <i64 144117387132666368, i64 144117387132666368>)
578622

579623
ret void
580624
}
@@ -593,6 +637,9 @@ define void @modimm_t7_call() {
593637
; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s
594638
; CHECK-NEXT: bl f_v2i32
595639
call i32 @f_v2i32(<2 x i32> <i32 1791, i32 1791>)
640+
; CHECK: movi v{{[0-9]+}}.2s, #0x5, msl #8
641+
; CHECK-NEXT: bl f_v1i64
642+
call i64 @f_v1i64(<1 x i64> <i64 6592774800895>)
596643
; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, msl #8
597644
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
598645
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
@@ -608,6 +655,10 @@ define void @modimm_t7_call() {
608655
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
609656
; CHECK-NEXT: bl f_v4i32
610657
call i32 @f_v4i32(<4 x i32> <i32 1023, i32 1023, i32 1023, i32 1023>)
658+
; CHECK: movi v[[REG:[0-9]+]].4s, #0x2, msl #8
659+
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
660+
; CHECK-NEXT: bl f_v2i64
661+
call i64 @f_v2i64(<2 x i64> <i64 3294239916799, i64 3294239916799>)
611662

612663
ret void
613664
}
@@ -626,6 +677,9 @@ define void @modimm_t8_call() {
626677
; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s
627678
; CHECK-NEXT: bl f_v2i32
628679
call i32 @f_v2i32(<2 x i32> <i32 458751, i32 458751>)
680+
; CHECK: movi v{{[0-9]+}}.2s, #0x5, msl #16
681+
; CHECK-NEXT: bl f_v1i64
682+
call i64 @f_v1i64(<1 x i64> <i64 1688845565689855>)
629683
; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, msl #16
630684
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
631685
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
@@ -641,6 +695,10 @@ define void @modimm_t8_call() {
641695
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
642696
; CHECK-NEXT: bl f_v4i32
643697
call i32 @f_v4i32(<4 x i32> <i32 262143, i32 262143, i32 262143, i32 262143>)
698+
; CHECK: movi v[[REG:[0-9]+]].4s, #0x2, msl #16
699+
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
700+
; CHECK-NEXT: bl f_v2i64
701+
call i64 @f_v2i64(<2 x i64> <i64 844420635361279, i64 844420635361279>)
644702

645703
ret void
646704
}
@@ -725,6 +783,9 @@ define void @modimm_t11_call() {
725783
; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s
726784
; CHECK-NEXT: bl f_v2i32
727785
call i32 @f_v2i32(<2 x i32> <i32 1080033280, i32 1080033280>)
786+
; CHECK: fmov v{{[0-9]+}}.2s, #0.39062500
787+
; CHECK-NEXT: bl f_v1i64
788+
call i64 @f_v1i64(<1 x i64> <i64 4523865826746957824>)
728789
; CHECK: fmov v[[REG1:[0-9]+]].4s, #3.25000000
729790
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
730791
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
@@ -740,6 +801,10 @@ define void @modimm_t11_call() {
740801
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
741802
; CHECK-NEXT: bl f_v4i32
742803
call i32 @f_v4i32(<4 x i32> <i32 1076887552, i32 1076887552, i32 1076887552, i32 1076887552>)
804+
; CHECK: fmov v[[REG:[0-9]+]].4s, #2.5000000
805+
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
806+
; CHECK-NEXT: bl f_v2i64
807+
call i64 @f_v2i64(<2 x i64> <i64 4620693218757967872, i64 4620693218757967872>)
743808

744809
ret void
745810
}

0 commit comments

Comments
 (0)