@@ -1079,7 +1079,7 @@ def CVTFP6TypeAttr : EnumAttr<NVVM_Dialect, CVTFP6Type, "cvt_fp6_type"> {
1079
1079
let assemblyFormat = "`<` $value `>`";
1080
1080
}
1081
1081
1082
- def NVVM_CvtToF6x2Op : NVVM_Op<"cvt.to.f6x2"> {
1082
+ def NVVM_CvtF32x2ToF6x2Op : NVVM_Op<"cvt.f32x2 .to.f6x2"> {
1083
1083
let summary = "Convert a pair of float inputs to f6x2";
1084
1084
let description = [{
1085
1085
This Op converts each of the given float inputs to the specified fp6 type.
@@ -1110,7 +1110,7 @@ def NVVM_CvtToF6x2Op : NVVM_Op<"cvt.to.f6x2"> {
1110
1110
}];
1111
1111
1112
1112
string llvmBuilder = [{
1113
- auto intId = NVVM::CvtToF6x2Op ::getIntrinsicID($type, $relu);
1113
+ auto intId = NVVM::CvtF32x2ToF6x2Op ::getIntrinsicID($type, $relu);
1114
1114
llvm::Value *packedI16 = createIntrinsicCall(builder, intId, {$a, $b});
1115
1115
if(op.getDst().getType().isInteger(16))
1116
1116
$dst = packedI16;
@@ -1120,6 +1120,153 @@ def NVVM_CvtToF6x2Op : NVVM_Op<"cvt.to.f6x2"> {
1120
1120
}];
1121
1121
}
1122
1122
1123
+ def CVTFP8E4M3 : I32EnumAttrCase<"E4M3", 0, "e4m3">;
1124
+ def CVTFP8E5M2 : I32EnumAttrCase<"E5M2", 1, "e5m2">;
1125
+ def CVTFP8UE8M0 : I32EnumAttrCase<"UE8M0", 2, "ue8m0">;
1126
+
1127
+ def CVTFP8Type : I32EnumAttr<"CVTFP8Type", "NVVM CVTFP8Type kind",
1128
+ [CVTFP8E4M3, CVTFP8E5M2, CVTFP8UE8M0]> {
1129
+ let genSpecializedAttr = 0;
1130
+ let cppNamespace = "::mlir::NVVM";
1131
+ }
1132
+ def CVTFP8TypeAttr : EnumAttr<NVVM_Dialect, CVTFP8Type, "cvt_fp8_type"> {
1133
+ let assemblyFormat = "`<` $value `>`";
1134
+ }
1135
+
1136
+ def NVVM_CvtF32x2ToF8x2Op : NVVM_Op<"cvt.f32x2.to.f8x2"> {
1137
+ let summary = "Convert a pair of float inputs to f8x2";
1138
+ let description = [{
1139
+ This Op converts each of the given float inputs to the specified fp8 type.
1140
+ The result `dst` is represented as an i16 type or as a vector
1141
+ of two i8 types.
1142
+ If `dst` is returned as an i16 type, the converted values are packed such
1143
+ that the value converted from `a` is stored in the upper 8 bits of `dst`
1144
+ and the value converted from `b` is stored in the lower 8 bits of `dst`.
1145
+ If `dst` is returned as a vector type, each converted value is stored as an
1146
+ i8 element in the vector.
1147
+ The `rnd` and `sat` attributes specify the rounding and saturation modes respectively.
1148
+ The `relu` attribute, when set, lowers to the '.relu' variant of
1149
+ the cvt instruction.
1150
+
1151
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt)
1152
+ }];
1153
+ let results = (outs AnyTypeOf<[I16, VectorOfLengthAndType<[2], [I8]>]>:$dst);
1154
+ let arguments = (ins
1155
+ CVTFP8TypeAttr:$type,
1156
+ F32:$a,
1157
+ F32:$b,
1158
+ DefaultValuedAttr<FPRoundingModeAttr, "FPRoundingMode::NONE">:$rnd,
1159
+ DefaultValuedAttr<SaturationModeAttr, "SaturationMode::NONE">:$sat,
1160
+ DefaultValuedAttr<BoolAttr, "false">:$relu);
1161
+ let assemblyFormat = "$type $a `,` $b attr-dict `:` type($dst)";
1162
+
1163
+ let extraClassDeclaration = [{
1164
+ static llvm::Intrinsic::ID getIntrinsicID(NVVM::CVTFP8Type to,
1165
+ NVVM::FPRoundingMode rnd,
1166
+ NVVM::SaturationMode sat,
1167
+ bool hasRelu);
1168
+ }];
1169
+
1170
+ string llvmBuilder = [{
1171
+ auto intId = NVVM::CvtF32x2ToF8x2Op::getIntrinsicID($type, $rnd, $sat, $relu);
1172
+ llvm::Value *packedI16 = createIntrinsicCall(builder, intId, {$a, $b});
1173
+ if(op.getDst().getType().isInteger(16))
1174
+ $dst = packedI16;
1175
+ else
1176
+ $dst = builder.CreateBitCast(packedI16,
1177
+ llvm::FixedVectorType::get(llvm::Type::getInt8Ty(builder.getContext()), 2));
1178
+ }];
1179
+
1180
+ let hasVerifier = 1;
1181
+ }
1182
+
1183
+ def NVVM_CvtF16x2ToF8x2Op : NVVM_Op<"cvt.f16x2.to.f8x2"> {
1184
+ let summary = "Convert an f16x2 input to f8x2";
1185
+ let description = [{
1186
+ This Op converts the given f16 inputs in an f16x2 vector to the specified
1187
+ f8 type.
1188
+ The result `dst` is represented as an i16 type or as a vector
1189
+ of two i8 types.
1190
+ If `dst` is returned as an i16 type, the converted values from `a`
1191
+ are packed such that the value converted from the first element of `a`
1192
+ is stored in the upper 8 bits of `dst` and the value converted from the
1193
+ second element of `a` is stored in the lower 8 bits of `dst`.
1194
+ If `dst` is returned as a vector type, each converted value is stored as an
1195
+ i8 element in the vector.
1196
+ The `relu` attribute, when set, lowers to the '.relu' variant of
1197
+ the cvt instruction.
1198
+
1199
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt)
1200
+ }];
1201
+ let results = (outs AnyTypeOf<[I16, VectorOfLengthAndType<[2], [I8]>]>:$dst);
1202
+ let arguments = (ins
1203
+ CVTFP8TypeAttr:$type,
1204
+ VectorOfLengthAndType<[2], [F16]>:$a,
1205
+ DefaultValuedAttr<BoolAttr, "false">:$relu);
1206
+ let assemblyFormat = "$type $a attr-dict `:` type($a) `->` type($dst)";
1207
+
1208
+ let extraClassDeclaration = [{
1209
+ static llvm::Intrinsic::ID getIntrinsicID(NVVM::CVTFP8Type to,
1210
+ bool hasRelu);
1211
+ }];
1212
+
1213
+ string llvmBuilder = [{
1214
+ auto intId = NVVM::CvtF16x2ToF8x2Op::getIntrinsicID($type, $relu);
1215
+ llvm::Value *packedI16 = createIntrinsicCall(builder, intId, {$a});
1216
+ if(op.getDst().getType().isInteger(16))
1217
+ $dst = packedI16;
1218
+ else
1219
+ $dst = builder.CreateBitCast(packedI16,
1220
+ llvm::FixedVectorType::get(llvm::Type::getInt8Ty(builder.getContext()), 2));
1221
+ }];
1222
+
1223
+ let hasVerifier = 1;
1224
+ }
1225
+
1226
+ def NVVM_CvtBF16x2ToF8x2Op : NVVM_Op<"cvt.bf16x2.to.f8x2"> {
1227
+ let summary = "Convert a pair of bf16 inputs to f8x2";
1228
+ let description = [{
1229
+ This Op converts the given bf16 inputs in a bf16x2 vector to the specified
1230
+ f8 type.
1231
+ The result `dst` is represented as an i16 type or as a vector
1232
+ of two i8 types.
1233
+ If `dst` is returned as an i16 type, the converted values from `a`
1234
+ are packed such that the value converted from the first element of `a`
1235
+ is stored in the upper 8 bits of `dst` and the value converted from the
1236
+ second element of `a` is stored in the lower 8 bits of `dst`.
1237
+ If `dst` is returned as a vector type, each converted value is stored as an
1238
+ i8 element in the vector.
1239
+ The `rnd` and `sat` attributes specify the rounding and saturation modes
1240
+ respectively.
1241
+
1242
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt)
1243
+ }];
1244
+ let results = (outs AnyTypeOf<[I16, VectorOfLengthAndType<[2], [I8]>]>:$dst);
1245
+ let arguments = (ins
1246
+ CVTFP8TypeAttr:$type,
1247
+ VectorOfLengthAndType<[2], [BF16]>:$a,
1248
+ DefaultValuedAttr<FPRoundingModeAttr, "FPRoundingMode::NONE">:$rnd,
1249
+ DefaultValuedAttr<SaturationModeAttr, "SaturationMode::NONE">:$sat);
1250
+ let assemblyFormat = "$type $a attr-dict `:` type($a) `->` type($dst)";
1251
+
1252
+ let extraClassDeclaration = [{
1253
+ static llvm::Intrinsic::ID getIntrinsicID(NVVM::FPRoundingMode rnd,
1254
+ NVVM::SaturationMode sat);
1255
+ }];
1256
+
1257
+ string llvmBuilder = [{
1258
+ auto intId = NVVM::CvtBF16x2ToF8x2Op::getIntrinsicID($rnd, $sat);
1259
+ llvm::Value *packedI16 = createIntrinsicCall(builder, intId, {$a});
1260
+ if(op.getDst().getType().isInteger(16))
1261
+ $dst = packedI16;
1262
+ else
1263
+ $dst = builder.CreateBitCast(packedI16,
1264
+ llvm::FixedVectorType::get(llvm::Type::getInt8Ty(builder.getContext()), 2));
1265
+ }];
1266
+
1267
+ let hasVerifier = 1;
1268
+ }
1269
+
1123
1270
//===----------------------------------------------------------------------===//
1124
1271
// NVVM MMA Ops
1125
1272
//===----------------------------------------------------------------------===//
0 commit comments