@@ -1094,7 +1094,7 @@ def : Pat <
1094
1094
// VOP1 Patterns
1095
1095
//===----------------------------------------------------------------------===//
1096
1096
1097
- multiclass f16_fp_Pats <Instruction cvt_f16_f32_inst_e64, Instruction cvt_f32_f16_inst_e64> {
1097
+ multiclass f16_to_fp_Pats <Instruction cvt_f16_f32_inst_e64, Instruction cvt_f32_f16_inst_e64> {
1098
1098
// f16_to_fp patterns
1099
1099
def : GCNPat <
1100
1100
(f32 (any_f16_to_fp i32:$src0)),
@@ -1121,25 +1121,42 @@ multiclass f16_fp_Pats<Instruction cvt_f16_f32_inst_e64, Instruction cvt_f32_f16
1121
1121
(cvt_f32_f16_inst_e64 SRCMODS.NEG, $src0)
1122
1122
>;
1123
1123
1124
+ // fp_to_fp16 patterns
1124
1125
def : GCNPat <
1125
- (f64 (any_fpextend f16:$src )),
1126
- (V_CVT_F64_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, $src) )
1126
+ (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)) )),
1127
+ (cvt_f16_f32_inst_e64 $src0_modifiers, f32:$src0 )
1127
1128
>;
1128
1129
1129
- // fp_to_fp16 patterns
1130
+ // This is only used on targets without half support
1131
+ // TODO: Introduce strict variant of AMDGPUfp_to_f16 and share custom lowering
1130
1132
def : GCNPat <
1131
- (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
1133
+ (i32 (strict_fp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
1132
1134
(cvt_f16_f32_inst_e64 $src0_modifiers, f32:$src0)
1133
1135
>;
1136
+ }
1137
+
1138
+ let True16Predicate = NotHasTrue16BitInsts in
1139
+ defm : f16_to_fp_Pats<V_CVT_F16_F32_e64, V_CVT_F32_F16_e64>;
1140
+
1141
+ let True16Predicate = UseFakeTrue16Insts in
1142
+ defm : f16_to_fp_Pats<V_CVT_F16_F32_fake16_e64, V_CVT_F32_F16_fake16_e64>;
1143
+
1144
+ multiclass f16_fp_Pats<Instruction cvt_f16_f32_inst_e64,
1145
+ Instruction cvt_f32_f16_inst_e64,
1146
+ RegOrImmOperand VSrc> {
1147
+ def : GCNPat <
1148
+ (f64 (any_fpextend f16:$src)),
1149
+ (V_CVT_F64_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, $src))
1150
+ >;
1134
1151
1135
1152
def : GCNPat <
1136
1153
(i32 (fp_to_sint f16:$src)),
1137
- (V_CVT_I32_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, VSrc_b32 :$src))
1154
+ (V_CVT_I32_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, VSrc :$src))
1138
1155
>;
1139
1156
1140
1157
def : GCNPat <
1141
1158
(i32 (fp_to_uint f16:$src)),
1142
- (V_CVT_U32_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, VSrc_b32 :$src))
1159
+ (V_CVT_U32_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, VSrc :$src))
1143
1160
>;
1144
1161
1145
1162
def : GCNPat <
@@ -1151,20 +1168,16 @@ multiclass f16_fp_Pats<Instruction cvt_f16_f32_inst_e64, Instruction cvt_f32_f16
1151
1168
(f16 (uint_to_fp i32:$src)),
1152
1169
(cvt_f16_f32_inst_e64 SRCMODS.NONE, (V_CVT_F32_U32_e32 VSrc_b32:$src))
1153
1170
>;
1154
-
1155
- // This is only used on targets without half support
1156
- // TODO: Introduce strict variant of AMDGPUfp_to_f16 and share custom lowering
1157
- def : GCNPat <
1158
- (i32 (strict_fp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
1159
- (cvt_f16_f32_inst_e64 $src0_modifiers, f32:$src0)
1160
- >;
1161
1171
}
1162
1172
1163
1173
let True16Predicate = NotHasTrue16BitInsts in
1164
- defm : f16_fp_Pats<V_CVT_F16_F32_e64, V_CVT_F32_F16_e64>;
1174
+ defm : f16_fp_Pats<V_CVT_F16_F32_e64, V_CVT_F32_F16_e64, VSrc_b32>;
1175
+
1176
+ let True16Predicate = UseRealTrue16Insts in
1177
+ defm : f16_fp_Pats<V_CVT_F16_F32_t16_e64, V_CVT_F32_F16_t16_e64, VSrcT_b16>;
1165
1178
1166
1179
let True16Predicate = UseFakeTrue16Insts in
1167
- defm : f16_fp_Pats<V_CVT_F16_F32_fake16_e64, V_CVT_F32_F16_fake16_e64>;
1180
+ defm : f16_fp_Pats<V_CVT_F16_F32_fake16_e64, V_CVT_F32_F16_fake16_e64, VSrc_b16 >;
1168
1181
1169
1182
//===----------------------------------------------------------------------===//
1170
1183
// VOP2 Patterns
@@ -2774,30 +2787,53 @@ def : GCNPat <
2774
2787
SSrc_i1:$src))
2775
2788
>;
2776
2789
2777
- let SubtargetPredicate = HasTrue16BitInsts in
2790
+ let True16Predicate = UseRealTrue16Insts in
2778
2791
def : GCNPat <
2779
2792
(f16 (sint_to_fp i1:$src)),
2780
- (V_CVT_F16_F32_fake16_e32 (
2781
- V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
2793
+ (V_CVT_F16_F32_t16_e64 /*src0_modifiers*/ 0,
2794
+ ( V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
2782
2795
/*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE),
2783
- SSrc_i1:$src))
2796
+ SSrc_i1:$src),
2797
+ /*clamp*/ 0, /*omod*/ 0, /*op_sel*/ 0)
2784
2798
>;
2785
2799
2786
- let SubtargetPredicate = NotHasTrue16BitInsts in
2800
+ let True16Predicate = UseFakeTrue16Insts in
2801
+ def : GCNPat <
2802
+ (f16 (sint_to_fp i1:$src)),
2803
+ (V_CVT_F16_F32_fake16_e64 /*src0_modifiers*/ 0,
2804
+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
2805
+ /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE),
2806
+ SSrc_i1:$src),
2807
+ /*clamp*/ 0, /*omod*/ 0)
2808
+ >;
2809
+
2810
+ let True16Predicate = NotHasTrue16BitInsts in
2787
2811
def : GCNPat <
2788
2812
(f16 (uint_to_fp i1:$src)),
2789
2813
(V_CVT_F16_F32_e32 (
2790
2814
V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
2791
2815
/*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE),
2792
2816
SSrc_i1:$src))
2793
2817
>;
2794
- let SubtargetPredicate = HasTrue16BitInsts in
2818
+
2819
+ let True16Predicate = UseRealTrue16Insts in
2795
2820
def : GCNPat <
2796
2821
(f16 (uint_to_fp i1:$src)),
2797
- (V_CVT_F16_F32_fake16_e32 (
2798
- V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
2822
+ (V_CVT_F16_F32_t16_e64 /*src0_modifiers*/ 0,
2823
+ ( V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
2799
2824
/*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE),
2800
- SSrc_i1:$src))
2825
+ SSrc_i1:$src),
2826
+ /*clamp*/ 0, /*omod*/ 0, /*op_sel*/ 0)
2827
+ >;
2828
+
2829
+ let True16Predicate = UseFakeTrue16Insts in
2830
+ def : GCNPat <
2831
+ (f16 (uint_to_fp i1:$src)),
2832
+ (V_CVT_F16_F32_fake16_e64 /*src0_modifiers*/ 0,
2833
+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
2834
+ /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE),
2835
+ SSrc_i1:$src),
2836
+ /*clamp*/ 0, /*omod*/ 0)
2801
2837
>;
2802
2838
2803
2839
def : GCNPat <
0 commit comments