@@ -282,6 +282,28 @@ multiclass AVX512_maskable_fp_common<bits<8> O, Format F, X86VectorVTInfo _,
282
282
MaskingConstraint, itin, IsCommutable,
283
283
IsKCommutable>;
284
284
285
+ // This multiclass generates the unconditional/non-masking, the masking and
286
+ // the zero-masking variant of the vector instruction. In the masking case, the
287
+ // perserved vector elements come from a new dummy input operand tied to $dst.
288
+ // This version uses a separate dag for non-masking and masking.
289
+ multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
290
+ dag Outs, dag Ins, string OpcodeStr,
291
+ string AttSrcAsm, string IntelSrcAsm,
292
+ dag RHS, dag MaskRHS,
293
+ InstrItinClass itin = NoItinerary,
294
+ bit IsCommutable = 0, bit IsKCommutable = 0,
295
+ SDNode Select = vselect> :
296
+ AVX512_maskable_custom<O, F, Outs, Ins,
297
+ !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
298
+ !con((ins _.KRCWM:$mask), Ins),
299
+ OpcodeStr, AttSrcAsm, IntelSrcAsm,
300
+ [(set _.RC:$dst, RHS)],
301
+ [(set _.RC:$dst,
302
+ (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
303
+ [(set _.RC:$dst,
304
+ (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
305
+ "$src0 = $dst", itin, IsCommutable, IsKCommutable>;
306
+
285
307
// This multiclass generates the unconditional/non-masking, the masking and
286
308
// the zero-masking variant of the vector instruction. In the masking case, the
287
309
// perserved vector elements come from a new dummy input operand tied to $dst.
@@ -512,28 +534,45 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
512
534
//===----------------------------------------------------------------------===//
513
535
// AVX-512 - VECTOR INSERT
514
536
//
515
- multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, X86VectorVTInfo To,
516
- PatFrag vinsert_insert> {
537
+
538
+ // Supports two different pattern operators for mask and unmasked ops. Allows
539
+ // null_frag to be passed for one.
540
+ multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
541
+ X86VectorVTInfo To,
542
+ SDPatternOperator vinsert_insert,
543
+ SDPatternOperator vinsert_for_mask> {
517
544
let ExeDomain = To.ExeDomain in {
518
- defm rr : AVX512_maskable <Opcode, MRMSrcReg, To, (outs To.RC:$dst),
545
+ defm rr : AVX512_maskable_split <Opcode, MRMSrcReg, To, (outs To.RC:$dst),
519
546
(ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
520
547
"vinsert" # From.EltTypeName # "x" # From.NumElts,
521
548
"$src3, $src2, $src1", "$src1, $src2, $src3",
522
549
(vinsert_insert:$src3 (To.VT To.RC:$src1),
523
550
(From.VT From.RC:$src2),
524
- (iPTR imm))>, AVX512AIi8Base, EVEX_4V;
551
+ (iPTR imm)),
552
+ (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
553
+ (From.VT From.RC:$src2),
554
+ (iPTR imm))>, AVX512AIi8Base, EVEX_4V;
525
555
526
- defm rm : AVX512_maskable <Opcode, MRMSrcMem, To, (outs To.RC:$dst),
556
+ defm rm : AVX512_maskable_split <Opcode, MRMSrcMem, To, (outs To.RC:$dst),
527
557
(ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
528
558
"vinsert" # From.EltTypeName # "x" # From.NumElts,
529
559
"$src3, $src2, $src1", "$src1, $src2, $src3",
530
560
(vinsert_insert:$src3 (To.VT To.RC:$src1),
561
+ (From.VT (bitconvert (From.LdFrag addr:$src2))),
562
+ (iPTR imm)),
563
+ (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
531
564
(From.VT (bitconvert (From.LdFrag addr:$src2))),
532
565
(iPTR imm))>, AVX512AIi8Base, EVEX_4V,
533
566
EVEX_CD8<From.EltSize, From.CD8TupleForm>;
534
567
}
535
568
}
536
569
570
+ // Passes the same pattern operator for masked and unmasked ops.
571
+ multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
572
+ X86VectorVTInfo To,
573
+ SDPatternOperator vinsert_insert> :
574
+ vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert>;
575
+
537
576
multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
538
577
X86VectorVTInfo To, PatFrag vinsert_insert,
539
578
SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
@@ -573,44 +612,46 @@ multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
573
612
X86VectorVTInfo< 8, EltVT64, VR512>,
574
613
vinsert256_insert>, VEX_W, EVEX_V512;
575
614
615
+ // Even with DQI we'd like to only use these instructions for masking.
576
616
let Predicates = [HasVLX, HasDQI] in
577
- defm NAME # "64x2Z256" : vinsert_for_size <Opcode128,
617
+ defm NAME # "64x2Z256" : vinsert_for_size_split <Opcode128,
578
618
X86VectorVTInfo< 2, EltVT64, VR128X>,
579
619
X86VectorVTInfo< 4, EltVT64, VR256X>,
580
- vinsert128_insert>, VEX_W, EVEX_V256;
620
+ null_frag, vinsert128_insert>, VEX_W, EVEX_V256;
581
621
622
+ // Even with DQI we'd like to only use these instructions for masking.
582
623
let Predicates = [HasDQI] in {
583
- defm NAME # "64x2Z" : vinsert_for_size <Opcode128,
624
+ defm NAME # "64x2Z" : vinsert_for_size_split <Opcode128,
584
625
X86VectorVTInfo< 2, EltVT64, VR128X>,
585
626
X86VectorVTInfo< 8, EltVT64, VR512>,
586
- vinsert128_insert>, VEX_W, EVEX_V512;
627
+ null_frag, vinsert128_insert>, VEX_W, EVEX_V512;
587
628
588
- defm NAME # "32x8Z" : vinsert_for_size <Opcode256,
629
+ defm NAME # "32x8Z" : vinsert_for_size_split <Opcode256,
589
630
X86VectorVTInfo< 8, EltVT32, VR256X>,
590
631
X86VectorVTInfo<16, EltVT32, VR512>,
591
- vinsert256_insert>, EVEX_V512;
632
+ null_frag, vinsert256_insert>, EVEX_V512;
592
633
}
593
634
}
594
635
595
636
defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a>;
596
637
defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a>;
597
638
598
639
// Codegen pattern with the alternative types,
599
- // Only add this if 64x2 and its friends are not supported natively via AVX512DQ .
640
+ // Even with AVX512DQ we'll still use these for unmasked operations .
600
641
defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
601
- vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX, NoDQI ]>;
642
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
602
643
defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
603
- vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX, NoDQI ]>;
644
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
604
645
605
646
defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
606
- vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512, NoDQI ]>;
647
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
607
648
defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
608
- vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512, NoDQI ]>;
649
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
609
650
610
651
defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
611
- vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512, NoDQI ]>;
652
+ vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
612
653
defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
613
- vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512, NoDQI ]>;
654
+ vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
614
655
615
656
// Codegen pattern with the alternative types insert VEC128 into VEC256
616
657
defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
@@ -647,16 +688,20 @@ def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
647
688
// AVX-512 VECTOR EXTRACT
648
689
//---
649
690
650
- multiclass vextract_for_size<int Opcode,
651
- X86VectorVTInfo From, X86VectorVTInfo To,
652
- PatFrag vextract_extract> {
691
+ // Supports two different pattern operators for mask and unmasked ops. Allows
692
+ // null_frag to be passed for one.
693
+ multiclass vextract_for_size_split<int Opcode,
694
+ X86VectorVTInfo From, X86VectorVTInfo To,
695
+ SDPatternOperator vextract_extract,
696
+ SDPatternOperator vextract_for_mask> {
653
697
654
698
let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
655
- defm rr : AVX512_maskable <Opcode, MRMDestReg, To, (outs To.RC:$dst),
699
+ defm rr : AVX512_maskable_split <Opcode, MRMDestReg, To, (outs To.RC:$dst),
656
700
(ins From.RC:$src1, u8imm:$idx),
657
701
"vextract" # To.EltTypeName # "x" # To.NumElts,
658
702
"$idx, $src1", "$src1, $idx",
659
- (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm))>,
703
+ (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
704
+ (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
660
705
AVX512AIi8Base, EVEX;
661
706
def mr : AVX512AIi8<Opcode, MRMDestMem, (outs),
662
707
(ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
@@ -677,6 +722,12 @@ multiclass vextract_for_size<int Opcode,
677
722
}
678
723
}
679
724
725
+ // Passes the same pattern operator for masked and unmasked ops.
726
+ multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
727
+ X86VectorVTInfo To,
728
+ SDPatternOperator vextract_extract> :
729
+ vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract>;
730
+
680
731
// Codegen pattern for the alternative types
681
732
multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
682
733
X86VectorVTInfo To, PatFrag vextract_extract,
@@ -713,22 +764,26 @@ multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
713
764
X86VectorVTInfo< 4, EltVT32, VR128X>,
714
765
vextract128_extract>,
715
766
EVEX_V256, EVEX_CD8<32, CD8VT4>;
767
+
768
+ // Even with DQI we'd like to only use these instructions for masking.
716
769
let Predicates = [HasVLX, HasDQI] in
717
- defm NAME # "64x2Z256" : vextract_for_size <Opcode128,
770
+ defm NAME # "64x2Z256" : vextract_for_size_split <Opcode128,
718
771
X86VectorVTInfo< 4, EltVT64, VR256X>,
719
772
X86VectorVTInfo< 2, EltVT64, VR128X>,
720
- vextract128_extract>,
773
+ null_frag, vextract128_extract>,
721
774
VEX_W, EVEX_V256, EVEX_CD8<64, CD8VT2>;
775
+
776
+ // Even with DQI we'd like to only use these instructions for masking.
722
777
let Predicates = [HasDQI] in {
723
- defm NAME # "64x2Z" : vextract_for_size <Opcode128,
778
+ defm NAME # "64x2Z" : vextract_for_size_split <Opcode128,
724
779
X86VectorVTInfo< 8, EltVT64, VR512>,
725
780
X86VectorVTInfo< 2, EltVT64, VR128X>,
726
- vextract128_extract>,
781
+ null_frag, vextract128_extract>,
727
782
VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
728
- defm NAME # "32x8Z" : vextract_for_size <Opcode256,
783
+ defm NAME # "32x8Z" : vextract_for_size_split <Opcode256,
729
784
X86VectorVTInfo<16, EltVT32, VR512>,
730
785
X86VectorVTInfo< 8, EltVT32, VR256X>,
731
- vextract256_extract>,
786
+ null_frag, vextract256_extract>,
732
787
EVEX_V512, EVEX_CD8<32, CD8VT8>;
733
788
}
734
789
}
@@ -737,21 +792,21 @@ defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b>;
737
792
defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b>;
738
793
739
794
// extract_subvector codegen patterns with the alternative types.
740
- // Only add this if 64x2 and its friends are not supported natively via AVX512DQ .
795
+ // Even with AVX512DQ we'll still use these for unmasked operations .
741
796
defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
742
- vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512, NoDQI ]>;
797
+ vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
743
798
defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
744
- vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512, NoDQI ]>;
799
+ vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
745
800
746
801
defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
747
- vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512, NoDQI ]>;
802
+ vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
748
803
defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
749
- vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512, NoDQI ]>;
804
+ vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
750
805
751
806
defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
752
- vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX, NoDQI ]>;
807
+ vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
753
808
defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
754
- vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX, NoDQI ]>;
809
+ vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
755
810
756
811
// Codegen pattern with the alternative types extract VEC128 from VEC256
757
812
defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
0 commit comments