Skip to content

Commit

Permalink
[AArch64]Add convert and multiply-add SIMD&FP assembly/disassembly in… (
Browse files Browse the repository at this point in the history
#113296)

…structions

This patch adds the following instructions:
Conversion between floating-point and integer:
  FCVT{AS, AU, MS, MU, NS, NU, PS, PU, ZS, ZU}
  {S,U}CVTF
Advanced SIMD three-register extension:
  FMMLA

According to https://developer.arm.com/documentation/ddi0602

Co-authored-by: Marian Lukac marian.lukac@arm.com
Co-authored-by: Spencer Abson spencer.abson@arm.com
  • Loading branch information
CarolineConcatto authored Oct 28, 2024
1 parent 7a71011 commit 1062595
Show file tree
Hide file tree
Showing 12 changed files with 637 additions and 62 deletions.
109 changes: 90 additions & 19 deletions llvm/lib/Target/AArch64/AArch64InstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -5234,6 +5234,32 @@ multiclass FPToIntegerUnscaled<bits<2> rmode, bits<3> opcode, string asm,
}
}

multiclass FPToIntegerSIMDScalar<bits<2> rmode, bits<3> opcode, string asm> {
// double-precision to 32-bit SIMD/FPR
def SDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, FPR32, asm,
[]> {
let Inst{31} = 0; // 32-bit FPR flag
}

// half-precision to 32-bit SIMD/FPR
def SHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, FPR32, asm,
[]> {
let Inst{31} = 0; // 32-bit FPR flag
}

// half-precision to 64-bit SIMD/FPR
def DHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, FPR64, asm,
[]> {
let Inst{31} = 1; // 64-bit FPR flag
}

// single-precision to 64-bit SIMD/FPR
def DSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, FPR64, asm,
[]> {
let Inst{31} = 1; // 64-bit FPR flag
}
}

multiclass FPToIntegerScaled<bits<2> rmode, bits<3> opcode, string asm,
SDPatternOperator OpN> {
// Scaled half-precision to 32-bit
Expand Down Expand Up @@ -5295,7 +5321,7 @@ multiclass FPToIntegerScaled<bits<2> rmode, bits<3> opcode, string asm,
//---

let mayStore = 0, mayLoad = 0, hasSideEffects = 0, mayRaiseFPException = 1, Uses = [FPCR] in
class BaseIntegerToFP<bit isUnsigned,
class BaseIntegerToFP<bits<2> rmode, bits<3> opcode,
RegisterClass srcType, RegisterClass dstType,
Operand immType, string asm, list<dag> pattern>
: I<(outs dstType:$Rd), (ins srcType:$Rn, immType:$scale),
Expand All @@ -5305,15 +5331,16 @@ class BaseIntegerToFP<bit isUnsigned,
bits<5> Rn;
bits<6> scale;
let Inst{30-24} = 0b0011110;
let Inst{21-17} = 0b00001;
let Inst{16} = isUnsigned;
let Inst{21} = 0b0;
let Inst{20-19} = rmode;
let Inst{18-16} = opcode;
let Inst{15-10} = scale;
let Inst{9-5} = Rn;
let Inst{4-0} = Rd;
}

let mayRaiseFPException = 1, Uses = [FPCR] in
class BaseIntegerToFPUnscaled<bit isUnsigned,
class BaseIntegerToFPUnscaled<bits<2> rmode, bits<3> opcode,
RegisterClass srcType, RegisterClass dstType,
ValueType dvt, string asm, SDPatternOperator node>
: I<(outs dstType:$Rd), (ins srcType:$Rn),
Expand All @@ -5323,49 +5350,50 @@ class BaseIntegerToFPUnscaled<bit isUnsigned,
bits<5> Rn;
bits<6> scale;
let Inst{30-24} = 0b0011110;
let Inst{21-17} = 0b10001;
let Inst{16} = isUnsigned;
let Inst{21} = 0b1;
let Inst{20-19} = rmode;
let Inst{18-16} = opcode;
let Inst{15-10} = 0b000000;
let Inst{9-5} = Rn;
let Inst{4-0} = Rd;
}

multiclass IntegerToFP<bit isUnsigned, string asm, SDPatternOperator node> {
multiclass IntegerToFP<bits<2> rmode, bits<3> opcode, string asm, SDPatternOperator node> {
// Unscaled
def UWHri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR16, f16, asm, node> {
def UWHri: BaseIntegerToFPUnscaled<rmode, opcode, GPR32, FPR16, f16, asm, node> {
let Inst{31} = 0; // 32-bit GPR flag
let Inst{23-22} = 0b11; // 16-bit FPR flag
let Predicates = [HasFullFP16];
}

def UWSri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR32, f32, asm, node> {
def UWSri: BaseIntegerToFPUnscaled<rmode, opcode, GPR32, FPR32, f32, asm, node> {
let Inst{31} = 0; // 32-bit GPR flag
let Inst{23-22} = 0b00; // 32-bit FPR flag
}

def UWDri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR64, f64, asm, node> {
def UWDri: BaseIntegerToFPUnscaled<rmode, opcode, GPR32, FPR64, f64, asm, node> {
let Inst{31} = 0; // 32-bit GPR flag
let Inst{23-22} = 0b01; // 64-bit FPR flag
}

def UXHri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR16, f16, asm, node> {
def UXHri: BaseIntegerToFPUnscaled<rmode, opcode, GPR64, FPR16, f16, asm, node> {
let Inst{31} = 1; // 64-bit GPR flag
let Inst{23-22} = 0b11; // 16-bit FPR flag
let Predicates = [HasFullFP16];
}

def UXSri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR32, f32, asm, node> {
def UXSri: BaseIntegerToFPUnscaled<rmode, opcode, GPR64, FPR32, f32, asm, node> {
let Inst{31} = 1; // 64-bit GPR flag
let Inst{23-22} = 0b00; // 32-bit FPR flag
}

def UXDri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR64, f64, asm, node> {
def UXDri: BaseIntegerToFPUnscaled<rmode, opcode, GPR64, FPR64, f64, asm, node> {
let Inst{31} = 1; // 64-bit GPR flag
let Inst{23-22} = 0b01; // 64-bit FPR flag
}

// Scaled
def SWHri: BaseIntegerToFP<isUnsigned, GPR32, FPR16, fixedpoint_recip_f16_i32, asm,
def SWHri: BaseIntegerToFP<rmode, opcode, GPR32, FPR16, fixedpoint_recip_f16_i32, asm,
[(set (f16 FPR16:$Rd),
(fmul (node GPR32:$Rn),
fixedpoint_recip_f16_i32:$scale))]> {
Expand All @@ -5375,7 +5403,7 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDPatternOperator node> {
let Predicates = [HasFullFP16];
}

def SWSri: BaseIntegerToFP<isUnsigned, GPR32, FPR32, fixedpoint_recip_f32_i32, asm,
def SWSri: BaseIntegerToFP<rmode, opcode, GPR32, FPR32, fixedpoint_recip_f32_i32, asm,
[(set FPR32:$Rd,
(fmul (node GPR32:$Rn),
fixedpoint_recip_f32_i32:$scale))]> {
Expand All @@ -5384,7 +5412,7 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDPatternOperator node> {
let scale{5} = 1;
}

def SWDri: BaseIntegerToFP<isUnsigned, GPR32, FPR64, fixedpoint_recip_f64_i32, asm,
def SWDri: BaseIntegerToFP<rmode, opcode, GPR32, FPR64, fixedpoint_recip_f64_i32, asm,
[(set FPR64:$Rd,
(fmul (node GPR32:$Rn),
fixedpoint_recip_f64_i32:$scale))]> {
Expand All @@ -5393,7 +5421,7 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDPatternOperator node> {
let scale{5} = 1;
}

def SXHri: BaseIntegerToFP<isUnsigned, GPR64, FPR16, fixedpoint_recip_f16_i64, asm,
def SXHri: BaseIntegerToFP<rmode, opcode, GPR64, FPR16, fixedpoint_recip_f16_i64, asm,
[(set (f16 FPR16:$Rd),
(fmul (node GPR64:$Rn),
fixedpoint_recip_f16_i64:$scale))]> {
Expand All @@ -5402,15 +5430,15 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDPatternOperator node> {
let Predicates = [HasFullFP16];
}

def SXSri: BaseIntegerToFP<isUnsigned, GPR64, FPR32, fixedpoint_recip_f32_i64, asm,
def SXSri: BaseIntegerToFP<rmode, opcode, GPR64, FPR32, fixedpoint_recip_f32_i64, asm,
[(set FPR32:$Rd,
(fmul (node GPR64:$Rn),
fixedpoint_recip_f32_i64:$scale))]> {
let Inst{31} = 1; // 64-bit GPR flag
let Inst{23-22} = 0b00; // 32-bit FPR flag
}

def SXDri: BaseIntegerToFP<isUnsigned, GPR64, FPR64, fixedpoint_recip_f64_i64, asm,
def SXDri: BaseIntegerToFP<rmode, opcode, GPR64, FPR64, fixedpoint_recip_f64_i64, asm,
[(set FPR64:$Rd,
(fmul (node GPR64:$Rn),
fixedpoint_recip_f64_i64:$scale))]> {
Expand All @@ -5419,6 +5447,32 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDPatternOperator node> {
}
}

multiclass IntegerToFPSIMDScalar<bits<2> rmode, bits<3> opcode, string asm, SDPatternOperator node = null_frag> {
// 32-bit to half-precision
def HSr: BaseIntegerToFPUnscaled<rmode, opcode, FPR32, FPR16, f16, asm, node> {
let Inst{31} = 0; // 32-bit FPR flag
let Inst{23-22} = 0b11; // 16-bit FPR flag
}

// 32-bit to double-precision
def DSr: BaseIntegerToFPUnscaled<rmode, opcode, FPR32, FPR64, f64, asm, node> {
let Inst{31} = 0; // 32-bit FPR flag
let Inst{23-22} = 0b01; // 64-bit FPR flag
}

// 64-bit to half-precision
def HDr: BaseIntegerToFPUnscaled<rmode, opcode, FPR64, FPR16, f16, asm, node> {
let Inst{31} = 1; // 64-bit FPR flag
let Inst{23-22} = 0b11; // 16-bit FPR flag
}

// 64-bit to single-precision
def SDr: BaseIntegerToFPUnscaled<rmode, opcode, FPR64, FPR32, f32, asm, node> {
let Inst{31} = 1; // 64-bit FPR flag
let Inst{23-22} = 0b00; // 32-bit FPR flag
}
}

//---
// Unscaled integer <-> floating point conversion (i.e. FMOV)
//---
Expand Down Expand Up @@ -13126,3 +13180,20 @@ multiclass AtomicFPStore<bit R, bits<3> op0, string asm> {
def S : BaseAtomicFPStore<FPR32, 0b10, R, op0, asm>;
def H : BaseAtomicFPStore<FPR16, 0b01, R, op0, asm>;
}

class BaseSIMDThreeSameVectorFP8MatrixMul<string asm, bits<2> size, string kind>
: BaseSIMDThreeSameVectorTied<1, 1, {size, 0}, 0b11101,
V128, asm, ".16b", []> {
let AsmString = !strconcat(asm, "{\t$Rd", kind, ", $Rn", ".16b",
", $Rm", ".16b", "}");
}

multiclass SIMDThreeSameVectorFP8MatrixMul<string asm>{
def v8f16: BaseSIMDThreeSameVectorFP8MatrixMul<asm, 0b00, ".8h">{
let Predicates = [HasNEON, HasF8F16MM];
}
def v4f32: BaseSIMDThreeSameVectorFP8MatrixMul<asm, 0b10, ".4s">{
let Predicates = [HasNEON, HasF8F32MM];
}
}

25 changes: 23 additions & 2 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -4838,6 +4838,19 @@ defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;

let Predicates = [HasNEON, HasFPRCVT] in{
defm FCVTAS : FPToIntegerSIMDScalar<0b11, 0b010, "fcvtas">;
defm FCVTAU : FPToIntegerSIMDScalar<0b11, 0b011, "fcvtau">;
defm FCVTMS : FPToIntegerSIMDScalar<0b10, 0b100, "fcvtms">;
defm FCVTMU : FPToIntegerSIMDScalar<0b10, 0b101, "fcvtmu">;
defm FCVTNS : FPToIntegerSIMDScalar<0b01, 0b010, "fcvtns">;
defm FCVTNU : FPToIntegerSIMDScalar<0b01, 0b011, "fcvtnu">;
defm FCVTPS : FPToIntegerSIMDScalar<0b10, 0b010, "fcvtps">;
defm FCVTPU : FPToIntegerSIMDScalar<0b10, 0b011, "fcvtpu">;
defm FCVTZS : FPToIntegerSIMDScalar<0b10, 0b110, "fcvtzs">;
defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu">;
}

// AArch64's FCVT instructions saturate when out of range.
multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
let Predicates = [HasFullFP16] in {
Expand Down Expand Up @@ -4996,8 +5009,13 @@ def : Pat<(i64 (any_llround f64:$Rn)),
// Scaled integer to floating point conversion instructions.
//===----------------------------------------------------------------------===//

defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>;
defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>;
defm SCVTF : IntegerToFP<0b00, 0b010, "scvtf", any_sint_to_fp>;
defm UCVTF : IntegerToFP<0b00, 0b011, "ucvtf", any_uint_to_fp>;

let Predicates = [HasNEON, HasFPRCVT] in {
defm SCVTF : IntegerToFPSIMDScalar<0b11, 0b100, "scvtf">;
defm UCVTF : IntegerToFPSIMDScalar<0b11, 0b101, "ucvtf">;
}

def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)),
(SCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>;
Expand Down Expand Up @@ -10547,6 +10565,9 @@ let Predicates = [HasLSFE] in {
def STBFMINNML : BaseAtomicFPStore<FPR16, 0b00, 0b1, 0b111, "stbfminnml">;
}

let Uses = [FPMR, FPCR] in
defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">;

include "AArch64InstrAtomics.td"
include "AArch64SVEInstrInfo.td"
include "AArch64SMEInstrInfo.td"
Expand Down
46 changes: 46 additions & 0 deletions llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// RUN: not llvm-mc -triple=aarch64 -mattr=+f8f16mm,+f8f32mm 2>&1 < %s| FileCheck %s

fmmla v0.4h, v1.16b, v2.16b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fmmla v0.4h, v1.16b, v2.16b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

fmmla v0.8s, v1.16b, v2.16b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier
// CHECK-NEXT: fmmla v0.8s, v1.16b, v2.16b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

fmmla v0.4s, v1.4s, v2.4s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fmmla v0.4s, v1.4s, v2.4s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

fmmla v0.8h, v1.8h, v2.8h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fmmla v0.8h, v1.8h, v2.8h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

fmmla v0.16b, v1.16b, v2.16b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fmmla v0.16b, v1.16b, v2.16b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

fmmla v0.d, v1.16b, v2.16b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fmmla v0.d, v1.16b, v2.16b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

fmmla v0.2d, v1.16b, v2.16b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fmmla v0.2d, v1.16b, v2.16b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

fmmla v0.8h, v1.8b, v2.8b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fmmla v0.8h, v1.8b, v2.8b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

fmmla v0.4s, v1.8b, v2.8b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fmmla v0.4s, v1.8b, v2.8b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
25 changes: 25 additions & 0 deletions llvm/test/MC/AArch64/FP8/fmmla.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f8f16mm,+f8f32mm < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f8f16mm,+f8f32mm < %s \
// RUN: | llvm-objdump -d --mattr=+f8f16mm,+f8f32mm - | FileCheck %s --check-prefix=CHECK-INST
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f8f16mm,+f8f32mm < %s \
// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=-f8f16mm,-f8f32mm - | FileCheck %s --check-prefix=CHECK-UNKNOWN
// Disassemble encoding and check the re-encoding (-show-encoding) matches.
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f8f16mm,+f8f32mm < %s \
// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
// RUN: | llvm-mc -triple=aarch64 -mattr=+f8f16mm,+f8f32mm -disassemble -show-encoding \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST

fmmla v0.8h, v1.16b, v2.16b
// CHECK-INST: fmmla v0.8h, v1.16b, v2.16b
// CHECK-ENCODING: [0x20,0xec,0x02,0x6e]
// CHECK-ERROR: instruction requires: f8f16mm
// CHECK-UNKNOWN: 6e02ec20 <unknown>

fmmla v0.4s, v1.16b, v2.16b
// CHECK-INST: fmmla v0.4s, v1.16b, v2.16b
// CHECK-ENCODING: [0x20,0xec,0x82,0x6e]
// CHECK-ERROR: instruction requires: f8f32mm
// CHECK-UNKNOWN: 6e82ec20 <unknown>
Loading

0 comments on commit 1062595

Please sign in to comment.