Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 90 additions & 19 deletions llvm/lib/Target/AArch64/AArch64InstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -5234,6 +5234,32 @@ multiclass FPToIntegerUnscaled<bits<2> rmode, bits<3> opcode, string asm,
}
}

multiclass FPToIntegerSIMDScalar<bits<2> rmode, bits<3> opcode, string asm> {
// double-precision to 32-bit SIMD/FPR
def SDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, FPR32, asm,
[]> {
let Inst{31} = 0; // 32-bit FPR flag
}

// half-precision to 32-bit SIMD/FPR
def SHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, FPR32, asm,
[]> {
let Inst{31} = 0; // 32-bit FPR flag
}

// half-precision to 64-bit SIMD/FPR
def DHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, FPR64, asm,
[]> {
let Inst{31} = 1; // 64-bit FPR flag
}

// single-precision to 64-bit SIMD/FPR
def DSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, FPR64, asm,
[]> {
let Inst{31} = 1; // 64-bit FPR flag
}
}

multiclass FPToIntegerScaled<bits<2> rmode, bits<3> opcode, string asm,
SDPatternOperator OpN> {
// Scaled half-precision to 32-bit
Expand Down Expand Up @@ -5295,7 +5321,7 @@ multiclass FPToIntegerScaled<bits<2> rmode, bits<3> opcode, string asm,
//---

let mayStore = 0, mayLoad = 0, hasSideEffects = 0, mayRaiseFPException = 1, Uses = [FPCR] in
class BaseIntegerToFP<bit isUnsigned,
class BaseIntegerToFP<bits<2> rmode, bits<3> opcode,
RegisterClass srcType, RegisterClass dstType,
Operand immType, string asm, list<dag> pattern>
: I<(outs dstType:$Rd), (ins srcType:$Rn, immType:$scale),
Expand All @@ -5305,15 +5331,16 @@ class BaseIntegerToFP<bit isUnsigned,
bits<5> Rn;
bits<6> scale;
let Inst{30-24} = 0b0011110;
let Inst{21-17} = 0b00001;
let Inst{16} = isUnsigned;
let Inst{21} = 0b0;
let Inst{20-19} = rmode;
let Inst{18-16} = opcode;
let Inst{15-10} = scale;
let Inst{9-5} = Rn;
let Inst{4-0} = Rd;
}

let mayRaiseFPException = 1, Uses = [FPCR] in
class BaseIntegerToFPUnscaled<bit isUnsigned,
class BaseIntegerToFPUnscaled<bits<2> rmode, bits<3> opcode,
RegisterClass srcType, RegisterClass dstType,
ValueType dvt, string asm, SDPatternOperator node>
: I<(outs dstType:$Rd), (ins srcType:$Rn),
Expand All @@ -5323,49 +5350,50 @@ class BaseIntegerToFPUnscaled<bit isUnsigned,
bits<5> Rn;
bits<6> scale;
let Inst{30-24} = 0b0011110;
let Inst{21-17} = 0b10001;
let Inst{16} = isUnsigned;
let Inst{21} = 0b1;
let Inst{20-19} = rmode;
let Inst{18-16} = opcode;
let Inst{15-10} = 0b000000;
let Inst{9-5} = Rn;
let Inst{4-0} = Rd;
}

multiclass IntegerToFP<bit isUnsigned, string asm, SDPatternOperator node> {
multiclass IntegerToFP<bits<2> rmode, bits<3> opcode, string asm, SDPatternOperator node> {
// Unscaled
def UWHri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR16, f16, asm, node> {
def UWHri: BaseIntegerToFPUnscaled<rmode, opcode, GPR32, FPR16, f16, asm, node> {
let Inst{31} = 0; // 32-bit GPR flag
let Inst{23-22} = 0b11; // 16-bit FPR flag
let Predicates = [HasFullFP16];
}

def UWSri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR32, f32, asm, node> {
def UWSri: BaseIntegerToFPUnscaled<rmode, opcode, GPR32, FPR32, f32, asm, node> {
let Inst{31} = 0; // 32-bit GPR flag
let Inst{23-22} = 0b00; // 32-bit FPR flag
}

def UWDri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR64, f64, asm, node> {
def UWDri: BaseIntegerToFPUnscaled<rmode, opcode, GPR32, FPR64, f64, asm, node> {
let Inst{31} = 0; // 32-bit GPR flag
let Inst{23-22} = 0b01; // 64-bit FPR flag
}

def UXHri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR16, f16, asm, node> {
def UXHri: BaseIntegerToFPUnscaled<rmode, opcode, GPR64, FPR16, f16, asm, node> {
let Inst{31} = 1; // 64-bit GPR flag
let Inst{23-22} = 0b11; // 16-bit FPR flag
let Predicates = [HasFullFP16];
}

def UXSri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR32, f32, asm, node> {
def UXSri: BaseIntegerToFPUnscaled<rmode, opcode, GPR64, FPR32, f32, asm, node> {
let Inst{31} = 1; // 64-bit GPR flag
let Inst{23-22} = 0b00; // 32-bit FPR flag
}

def UXDri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR64, f64, asm, node> {
def UXDri: BaseIntegerToFPUnscaled<rmode, opcode, GPR64, FPR64, f64, asm, node> {
let Inst{31} = 1; // 64-bit GPR flag
let Inst{23-22} = 0b01; // 64-bit FPR flag
}

// Scaled
def SWHri: BaseIntegerToFP<isUnsigned, GPR32, FPR16, fixedpoint_recip_f16_i32, asm,
def SWHri: BaseIntegerToFP<rmode, opcode, GPR32, FPR16, fixedpoint_recip_f16_i32, asm,
[(set (f16 FPR16:$Rd),
(fmul (node GPR32:$Rn),
fixedpoint_recip_f16_i32:$scale))]> {
Expand All @@ -5375,7 +5403,7 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDPatternOperator node> {
let Predicates = [HasFullFP16];
}

def SWSri: BaseIntegerToFP<isUnsigned, GPR32, FPR32, fixedpoint_recip_f32_i32, asm,
def SWSri: BaseIntegerToFP<rmode, opcode, GPR32, FPR32, fixedpoint_recip_f32_i32, asm,
[(set FPR32:$Rd,
(fmul (node GPR32:$Rn),
fixedpoint_recip_f32_i32:$scale))]> {
Expand All @@ -5384,7 +5412,7 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDPatternOperator node> {
let scale{5} = 1;
}

def SWDri: BaseIntegerToFP<isUnsigned, GPR32, FPR64, fixedpoint_recip_f64_i32, asm,
def SWDri: BaseIntegerToFP<rmode, opcode, GPR32, FPR64, fixedpoint_recip_f64_i32, asm,
[(set FPR64:$Rd,
(fmul (node GPR32:$Rn),
fixedpoint_recip_f64_i32:$scale))]> {
Expand All @@ -5393,7 +5421,7 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDPatternOperator node> {
let scale{5} = 1;
}

def SXHri: BaseIntegerToFP<isUnsigned, GPR64, FPR16, fixedpoint_recip_f16_i64, asm,
def SXHri: BaseIntegerToFP<rmode, opcode, GPR64, FPR16, fixedpoint_recip_f16_i64, asm,
[(set (f16 FPR16:$Rd),
(fmul (node GPR64:$Rn),
fixedpoint_recip_f16_i64:$scale))]> {
Expand All @@ -5402,15 +5430,15 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDPatternOperator node> {
let Predicates = [HasFullFP16];
}

def SXSri: BaseIntegerToFP<isUnsigned, GPR64, FPR32, fixedpoint_recip_f32_i64, asm,
def SXSri: BaseIntegerToFP<rmode, opcode, GPR64, FPR32, fixedpoint_recip_f32_i64, asm,
[(set FPR32:$Rd,
(fmul (node GPR64:$Rn),
fixedpoint_recip_f32_i64:$scale))]> {
let Inst{31} = 1; // 64-bit GPR flag
let Inst{23-22} = 0b00; // 32-bit FPR flag
}

def SXDri: BaseIntegerToFP<isUnsigned, GPR64, FPR64, fixedpoint_recip_f64_i64, asm,
def SXDri: BaseIntegerToFP<rmode, opcode, GPR64, FPR64, fixedpoint_recip_f64_i64, asm,
[(set FPR64:$Rd,
(fmul (node GPR64:$Rn),
fixedpoint_recip_f64_i64:$scale))]> {
Expand All @@ -5419,6 +5447,32 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDPatternOperator node> {
}
}

multiclass IntegerToFPSIMDScalar<bits<2> rmode, bits<3> opcode, string asm, SDPatternOperator node = null_frag> {
// 32-bit to half-precision
def HSr: BaseIntegerToFPUnscaled<rmode, opcode, FPR32, FPR16, f16, asm, node> {
let Inst{31} = 0; // 32-bit FPR flag
let Inst{23-22} = 0b11; // 16-bit FPR flag
}

// 32-bit to double-precision
def DSr: BaseIntegerToFPUnscaled<rmode, opcode, FPR32, FPR64, f64, asm, node> {
let Inst{31} = 0; // 32-bit FPR flag
let Inst{23-22} = 0b01; // 64-bit FPR flag
}

// 64-bit to half-precision
def HDr: BaseIntegerToFPUnscaled<rmode, opcode, FPR64, FPR16, f16, asm, node> {
let Inst{31} = 1; // 64-bit FPR flag
let Inst{23-22} = 0b11; // 16-bit FPR flag
}

// 64-bit to single-precision
def SDr: BaseIntegerToFPUnscaled<rmode, opcode, FPR64, FPR32, f32, asm, node> {
let Inst{31} = 1; // 64-bit FPR flag
let Inst{23-22} = 0b00; // 32-bit FPR flag
}
}

//---
// Unscaled integer <-> floating point conversion (i.e. FMOV)
//---
Expand Down Expand Up @@ -13126,3 +13180,20 @@ multiclass AtomicFPStore<bit R, bits<3> op0, string asm> {
def S : BaseAtomicFPStore<FPR32, 0b10, R, op0, asm>;
def H : BaseAtomicFPStore<FPR16, 0b01, R, op0, asm>;
}

class BaseSIMDThreeSameVectorFP8MatrixMul<string asm, bits<2> size, string kind>
: BaseSIMDThreeSameVectorTied<1, 1, {size, 0}, 0b11101,
V128, asm, ".16b", []> {
let AsmString = !strconcat(asm, "{\t$Rd", kind, ", $Rn", ".16b",
", $Rm", ".16b", "}");
}

multiclass SIMDThreeSameVectorFP8MatrixMul<string asm>{
def v8f16: BaseSIMDThreeSameVectorFP8MatrixMul<asm, 0b00, ".8h">{
let Predicates = [HasNEON, HasF8F16MM];
}
def v4f32: BaseSIMDThreeSameVectorFP8MatrixMul<asm, 0b10, ".4s">{
let Predicates = [HasNEON, HasF8F32MM];
}
}

25 changes: 23 additions & 2 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -4838,6 +4838,19 @@ defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;

let Predicates = [HasNEON, HasFPRCVT] in{
defm FCVTAS : FPToIntegerSIMDScalar<0b11, 0b010, "fcvtas">;
defm FCVTAU : FPToIntegerSIMDScalar<0b11, 0b011, "fcvtau">;
defm FCVTMS : FPToIntegerSIMDScalar<0b10, 0b100, "fcvtms">;
defm FCVTMU : FPToIntegerSIMDScalar<0b10, 0b101, "fcvtmu">;
defm FCVTNS : FPToIntegerSIMDScalar<0b01, 0b010, "fcvtns">;
defm FCVTNU : FPToIntegerSIMDScalar<0b01, 0b011, "fcvtnu">;
defm FCVTPS : FPToIntegerSIMDScalar<0b10, 0b010, "fcvtps">;
defm FCVTPU : FPToIntegerSIMDScalar<0b10, 0b011, "fcvtpu">;
defm FCVTZS : FPToIntegerSIMDScalar<0b10, 0b110, "fcvtzs">;
defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu">;
}

// AArch64's FCVT instructions saturate when out of range.
multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
let Predicates = [HasFullFP16] in {
Expand Down Expand Up @@ -4996,8 +5009,13 @@ def : Pat<(i64 (any_llround f64:$Rn)),
// Scaled integer to floating point conversion instructions.
//===----------------------------------------------------------------------===//

defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>;
defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>;
defm SCVTF : IntegerToFP<0b00, 0b010, "scvtf", any_sint_to_fp>;
defm UCVTF : IntegerToFP<0b00, 0b011, "ucvtf", any_uint_to_fp>;

let Predicates = [HasNEON, HasFPRCVT] in {
defm SCVTF : IntegerToFPSIMDScalar<0b11, 0b100, "scvtf">;
defm UCVTF : IntegerToFPSIMDScalar<0b11, 0b101, "ucvtf">;
}

def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)),
(SCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>;
Expand Down Expand Up @@ -10547,6 +10565,9 @@ let Predicates = [HasLSFE] in {
def STBFMINNML : BaseAtomicFPStore<FPR16, 0b00, 0b1, 0b111, "stbfminnml">;
}

let Uses = [FPMR, FPCR] in
defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">;

include "AArch64InstrAtomics.td"
include "AArch64SVEInstrInfo.td"
include "AArch64SMEInstrInfo.td"
Expand Down
46 changes: 46 additions & 0 deletions llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// RUN: not llvm-mc -triple=aarch64 -mattr=+f8f16mm,+f8f32mm 2>&1 < %s| FileCheck %s

fmmla v0.4h, v1.16b, v2.16b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fmmla v0.4h, v1.16b, v2.16b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

fmmla v0.8s, v1.16b, v2.16b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier
// CHECK-NEXT: fmmla v0.8s, v1.16b, v2.16b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

fmmla v0.4s, v1.4s, v2.4s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fmmla v0.4s, v1.4s, v2.4s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

fmmla v0.8h, v1.8h, v2.8h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fmmla v0.8h, v1.8h, v2.8h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

fmmla v0.16b, v1.16b, v2.16b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fmmla v0.16b, v1.16b, v2.16b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

fmmla v0.d, v1.16b, v2.16b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fmmla v0.d, v1.16b, v2.16b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

fmmla v0.2d, v1.16b, v2.16b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fmmla v0.2d, v1.16b, v2.16b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

fmmla v0.8h, v1.8b, v2.8b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fmmla v0.8h, v1.8b, v2.8b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

fmmla v0.4s, v1.8b, v2.8b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fmmla v0.4s, v1.8b, v2.8b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
25 changes: 25 additions & 0 deletions llvm/test/MC/AArch64/FP8/fmmla.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f8f16mm,+f8f32mm < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f8f16mm,+f8f32mm < %s \
// RUN: | llvm-objdump -d --mattr=+f8f16mm,+f8f32mm - | FileCheck %s --check-prefix=CHECK-INST
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f8f16mm,+f8f32mm < %s \
// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=-f8f16mm,-f8f32mm - | FileCheck %s --check-prefix=CHECK-UNKNOWN
// Disassemble encoding and check the re-encoding (-show-encoding) matches.
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f8f16mm,+f8f32mm < %s \
// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
// RUN: | llvm-mc -triple=aarch64 -mattr=+f8f16mm,+f8f32mm -disassemble -show-encoding \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST

fmmla v0.8h, v1.16b, v2.16b
// CHECK-INST: fmmla v0.8h, v1.16b, v2.16b
// CHECK-ENCODING: [0x20,0xec,0x02,0x6e]
// CHECK-ERROR: instruction requires: f8f16mm
// CHECK-UNKNOWN: 6e02ec20 <unknown>

fmmla v0.4s, v1.16b, v2.16b
// CHECK-INST: fmmla v0.4s, v1.16b, v2.16b
// CHECK-ENCODING: [0x20,0xec,0x82,0x6e]
// CHECK-ERROR: instruction requires: f8f32mm
// CHECK-UNKNOWN: 6e82ec20 <unknown>
Loading
Loading