-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[RISCV] Implement base scheduling model for andes 45 series processor. #141008
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
This patch implements scheduling model for IMAFD and Zb extension. The latency and throughput of all instructions, except load/store, are measured by llvm-exegesis. Scheduling model for V and other extensions will be added in a follow-up patch.
@llvm/pr-subscribers-backend-risc-v Author: Jim Lin (tclin914) ChangesThis patch implements scheduling model for IMAFD and Zb extension. The latency and throughput of all instructions, except load/store, are measured by llvm-exegesis. Scheduling model for V and other extensions will be added in a follow-up patch. Patch is 28.03 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141008.diff 5 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index e322ae340349c..b24d8637cb27f 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -51,6 +51,7 @@ include "RISCVMacroFusion.td"
//===----------------------------------------------------------------------===//
// RISC-V Scheduling Models
//===----------------------------------------------------------------------===//
+include "RISCVSchedAndes45.td"
include "RISCVSchedGenericOOO.td"
include "RISCVSchedMIPSP8700.td"
include "RISCVSchedRocket.td"
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index 735997de94e81..47968c7afcd96 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -649,7 +649,7 @@ def RP2350_HAZARD3 : RISCVProcessorModel<"rp2350-hazard3",
FeatureStdExtZcmp]>;
def ANDES_N45 : RISCVProcessorModel<"andes-n45",
- NoSchedModel,
+ Andes45Model,
[Feature32Bit,
FeatureStdExtI,
FeatureStdExtZicsr,
@@ -662,7 +662,7 @@ def ANDES_N45 : RISCVProcessorModel<"andes-n45",
FeatureStdExtB]>;
def ANDES_NX45 : RISCVProcessorModel<"andes-nx45",
- NoSchedModel,
+ Andes45Model,
[Feature64Bit,
FeatureStdExtI,
FeatureStdExtZicsr,
@@ -675,7 +675,7 @@ def ANDES_NX45 : RISCVProcessorModel<"andes-nx45",
FeatureStdExtB]>;
def ANDES_A45 : RISCVProcessorModel<"andes-a45",
- NoSchedModel,
+ Andes45Model,
[Feature32Bit,
FeatureStdExtI,
FeatureStdExtZicsr,
@@ -688,7 +688,7 @@ def ANDES_A45 : RISCVProcessorModel<"andes-a45",
FeatureStdExtB]>;
def ANDES_AX45 : RISCVProcessorModel<"andes-ax45",
- NoSchedModel,
+ Andes45Model,
[Feature64Bit,
FeatureStdExtI,
FeatureStdExtZicsr,
diff --git a/llvm/lib/Target/RISCV/RISCVSchedAndes45.td b/llvm/lib/Target/RISCV/RISCVSchedAndes45.td
new file mode 100644
index 0000000000000..f42b48a1d8b83
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVSchedAndes45.td
@@ -0,0 +1,339 @@
+//==- RISCVSchedAndes45.td - Andes45 Scheduling Definitions --*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+
+// FIXME: Implement sheduling model for V and other extensions.
+def Andes45Model : SchedMachineModel {
+ let MicroOpBufferSize = 0; // Andes45 is in-order processor
+ let IssueWidth = 2; // 2 micro-ops dispatched per cycle
+ let LoadLatency = 2;
+ let MispredictPenalty = 5;
+ let CompleteModel = 0;
+}
+
+let SchedModel = Andes45Model in {
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available.
+
+//===----------------------------------------------------------------------===//
+// Andes 45 series CPU
+// - 2 Interger Arithmetic and Logical Units (ALU)
+// - Multiply / Divide Unit (MDU)
+// - Load Store Unit (LSU)
+// - Control and Status Register Unit (CSR)
+// - Floating Point Multiply-Accumulate Unit (FMAC)
+// - Floating Point Divide / SQRT Unit (FDIV)
+// - Floating Point Move Unit (FMV)
+// - Floating Point Misc Unit (FMISC)
+//===----------------------------------------------------------------------===//
+
+let BufferSize = 0 in {
+def Andes45ALU : ProcResource<2>;
+def Andes45MDU : ProcResource<1>;
+def Andes45LSU : ProcResource<1>;
+def Andes45CSR : ProcResource<1>;
+
+def Andes45FMAC : ProcResource<1>;
+def Andes45FDIV : ProcResource<1>;
+def Andes45FMV : ProcResource<1>;
+def Andes45FMISC : ProcResource<1>;
+}
+
+// Integer arithmetic and logic
+def : WriteRes<WriteIALU, [Andes45ALU]>;
+def : WriteRes<WriteIALU32, [Andes45ALU]>;
+def : WriteRes<WriteShiftImm, [Andes45ALU]>;
+def : WriteRes<WriteShiftImm32, [Andes45ALU]>;
+def : WriteRes<WriteShiftReg, [Andes45ALU]>;
+def : WriteRes<WriteShiftReg32, [Andes45ALU]>;
+
+// Branching
+def : WriteRes<WriteJmp, [Andes45ALU]>;
+def : WriteRes<WriteJal, [Andes45ALU]>;
+def : WriteRes<WriteJalr, [Andes45ALU]>;
+
+// Integer multiplication
+let Latency = 3 in {
+def : WriteRes<WriteIMul, [Andes45MDU]>;
+def : WriteRes<WriteIMul32, [Andes45MDU]>;
+}
+
+// Integer division
+let Latency = 39, ReleaseAtCycles = [39] in {
+def : WriteRes<WriteIDiv, [Andes45MDU]>;
+def : WriteRes<WriteIDiv32, [Andes45MDU]>;
+}
+
+// Integer remainder
+let Latency = 39, ReleaseAtCycles = [39] in {
+def : WriteRes<WriteIRem, [Andes45MDU]>;
+def : WriteRes<WriteIRem32, [Andes45MDU]>;
+}
+
+// Memory
+let Latency = 5 in {
+def : WriteRes<WriteLDB, [Andes45LSU]>;
+def : WriteRes<WriteLDH, [Andes45LSU]>;
+def : WriteRes<WriteFLD16, [Andes45LSU]>;
+}
+
+let Latency = 3 in {
+def : WriteRes<WriteLDW, [Andes45LSU]>;
+def : WriteRes<WriteLDD, [Andes45LSU]>;
+def : WriteRes<WriteFLD32, [Andes45LSU]>;
+def : WriteRes<WriteFLD64, [Andes45LSU]>;
+}
+
+let Latency = 1 in {
+def : WriteRes<WriteSTB, [Andes45LSU]>;
+def : WriteRes<WriteSTH, [Andes45LSU]>;
+def : WriteRes<WriteSTW, [Andes45LSU]>;
+def : WriteRes<WriteSTD, [Andes45LSU]>;
+def : WriteRes<WriteFST16, [Andes45LSU]>;
+def : WriteRes<WriteFST32, [Andes45LSU]>;
+def : WriteRes<WriteFST64, [Andes45LSU]>;
+}
+
+// Atomic Memory
+let Latency = 9 in {
+def : WriteRes<WriteAtomicW, [Andes45LSU]>;
+def : WriteRes<WriteAtomicD, [Andes45LSU]>;
+def : WriteRes<WriteAtomicLDW, [Andes45LSU]>;
+def : WriteRes<WriteAtomicLDD, [Andes45LSU]>;
+}
+
+let Latency = 3 in {
+def : WriteRes<WriteAtomicSTW, [Andes45LSU]>;
+def : WriteRes<WriteAtomicSTD, [Andes45LSU]>;
+}
+
+// FMAC
+let Latency = 4 in {
+def : WriteRes<WriteFAdd16, [Andes45FMAC]>;
+def : WriteRes<WriteFAdd32, [Andes45FMAC]>;
+def : WriteRes<WriteFAdd64, [Andes45FMAC]>;
+def : WriteRes<WriteFMul16, [Andes45FMAC]>;
+def : WriteRes<WriteFMul32, [Andes45FMAC]>;
+def : WriteRes<WriteFMul64, [Andes45FMAC]>;
+def : WriteRes<WriteFMA16, [Andes45FMAC]>;
+def : WriteRes<WriteFMA32, [Andes45FMAC]>;
+def : WriteRes<WriteFMA64, [Andes45FMAC]>;
+}
+
+// FDIV
+let Latency = 12, ReleaseAtCycles = [12] in
+def : WriteRes<WriteFDiv16, [Andes45FDIV]>;
+let Latency = 11, ReleaseAtCycles = [11] in
+def : WriteRes<WriteFSqrt16, [Andes45FDIV]>;
+
+let Latency = 19, ReleaseAtCycles = [19] in
+def : WriteRes<WriteFDiv32, [Andes45FDIV]>;
+let Latency = 18, ReleaseAtCycles = [18] in
+def : WriteRes<WriteFSqrt32, [Andes45FDIV]>;
+
+let Latency = 33, ReleaseAtCycles = [33] in
+def : WriteRes<WriteFDiv64, [Andes45FDIV]>;
+let Latency = 32, ReleaseAtCycles = [32] in
+def : WriteRes<WriteFSqrt64, [Andes45FDIV]>;
+
+// FMV
+def : WriteRes<WriteFSGNJ16, [Andes45FMV]>;
+def : WriteRes<WriteFSGNJ32, [Andes45FMV]>;
+def : WriteRes<WriteFSGNJ64, [Andes45FMV]>;
+def : WriteRes<WriteFMovF16ToI16, [Andes45FMV]>;
+def : WriteRes<WriteFMovI16ToF16, [Andes45FMV]>;
+def : WriteRes<WriteFMovF32ToI32, [Andes45FMV]>;
+def : WriteRes<WriteFMovI32ToF32, [Andes45FMV]>;
+def : WriteRes<WriteFMovF64ToI64, [Andes45FMV]>;
+def : WriteRes<WriteFMovI64ToF64, [Andes45FMV]>;
+
+// FMISC
+let Latency = 2 in {
+def : WriteRes<WriteFMinMax16, [Andes45FMISC]>;
+def : WriteRes<WriteFMinMax32, [Andes45FMISC]>;
+def : WriteRes<WriteFMinMax64, [Andes45FMISC]>;
+def : WriteRes<WriteFClass16, [Andes45FMISC]>;
+def : WriteRes<WriteFClass32, [Andes45FMISC]>;
+def : WriteRes<WriteFClass64, [Andes45FMISC]>;
+def : WriteRes<WriteFCmp16, [Andes45FMISC]>;
+def : WriteRes<WriteFCmp32, [Andes45FMISC]>;
+def : WriteRes<WriteFCmp64, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtF16ToI32, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtF16ToI64, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtF32ToI32, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtF32ToI64, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtF64ToI32, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtF64ToI64, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtI32ToF16, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtI32ToF32, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtI32ToF64, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtI64ToF16, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtI64ToF32, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtI64ToF64, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtF16ToF32, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtF16ToF64, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtF32ToF16, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtF32ToF64, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtF64ToF16, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtF64ToF32, [Andes45FMISC]>;
+}
+
+// Bitmanip
+// Zba extension
+def : WriteRes<WriteSHXADD, [Andes45ALU]>;
+def : WriteRes<WriteSHXADD32, [Andes45ALU]>;
+
+// Zbb extension
+def : WriteRes<WriteRotateImm, [Andes45ALU]>;
+def : WriteRes<WriteRotateImm32, [Andes45ALU]>;
+def : WriteRes<WriteRotateReg, [Andes45ALU]>;
+def : WriteRes<WriteRotateReg32, [Andes45ALU]>;
+def : WriteRes<WriteREV8, [Andes45ALU]>;
+def : WriteRes<WriteORCB, [Andes45ALU]>;
+def : WriteRes<WriteIMinMax, [Andes45ALU]>;
+
+let Latency = 3 in {
+def : WriteRes<WriteCLZ, [Andes45ALU]>;
+def : WriteRes<WriteCLZ32, [Andes45ALU]>;
+def : WriteRes<WriteCTZ, [Andes45ALU]>;
+def : WriteRes<WriteCTZ32, [Andes45ALU]>;
+def : WriteRes<WriteCPOP, [Andes45ALU]>;
+def : WriteRes<WriteCPOP32, [Andes45ALU]>;
+}
+
+// Zbc extension
+let Latency = 3 in
+def : WriteRes<WriteCLMUL, [Andes45ALU]>;
+
+// Zbs extension
+def : WriteRes<WriteSingleBit, [Andes45ALU]>;
+def : WriteRes<WriteSingleBitImm, [Andes45ALU]>;
+def : WriteRes<WriteBEXT, [Andes45ALU]>;
+def : WriteRes<WriteBEXTI, [Andes45ALU]>;
+
+// Others
+def : WriteRes<WriteCSR, [Andes45CSR]>;
+def : WriteRes<WriteNop, []>;
+
+//===----------------------------------------------------------------------===//
+
+// Bypass and advance
+def : ReadAdvance<ReadIALU, 0>;
+def : ReadAdvance<ReadIALU32, 0>;
+def : ReadAdvance<ReadShiftImm, 0>;
+def : ReadAdvance<ReadShiftImm32, 0>;
+def : ReadAdvance<ReadShiftReg, 0>;
+def : ReadAdvance<ReadShiftReg32, 0>;
+def : ReadAdvance<ReadJalr, 0>;
+def : ReadAdvance<ReadJmp, 0>;
+def : ReadAdvance<ReadIMul, 0>;
+def : ReadAdvance<ReadIMul32, 0>;
+def : ReadAdvance<ReadIDiv, 0>;
+def : ReadAdvance<ReadIDiv32, 0>;
+def : ReadAdvance<ReadIRem, 0>;
+def : ReadAdvance<ReadIRem32, 0>;
+def : ReadAdvance<ReadStoreData, 0>;
+def : ReadAdvance<ReadMemBase, 0>;
+def : ReadAdvance<ReadAtomicWA, 0>;
+def : ReadAdvance<ReadAtomicWD, 0>;
+def : ReadAdvance<ReadAtomicDA, 0>;
+def : ReadAdvance<ReadAtomicDD, 0>;
+def : ReadAdvance<ReadAtomicLDW, 0>;
+def : ReadAdvance<ReadAtomicLDD, 0>;
+def : ReadAdvance<ReadAtomicSTW, 0>;
+def : ReadAdvance<ReadAtomicSTD, 0>;
+def : ReadAdvance<ReadFStoreData, 0>;
+def : ReadAdvance<ReadFMemBase, 0>;
+def : ReadAdvance<ReadFAdd16, 0>;
+def : ReadAdvance<ReadFAdd32, 0>;
+def : ReadAdvance<ReadFAdd64, 0>;
+def : ReadAdvance<ReadFMul16, 0>;
+def : ReadAdvance<ReadFMul32, 0>;
+def : ReadAdvance<ReadFMul64, 0>;
+def : ReadAdvance<ReadFMA16, 0>;
+def : ReadAdvance<ReadFMA32, 0>;
+def : ReadAdvance<ReadFMA64, 0>;
+def : ReadAdvance<ReadFMA16Addend, 0>;
+def : ReadAdvance<ReadFMA32Addend, 0>;
+def : ReadAdvance<ReadFMA64Addend, 0>;
+def : ReadAdvance<ReadFDiv16, 0>;
+def : ReadAdvance<ReadFDiv32, 0>;
+def : ReadAdvance<ReadFDiv64, 0>;
+def : ReadAdvance<ReadFSqrt16, 0>;
+def : ReadAdvance<ReadFSqrt32, 0>;
+def : ReadAdvance<ReadFSqrt64, 0>;
+def : ReadAdvance<ReadFSGNJ16, 0>;
+def : ReadAdvance<ReadFSGNJ32, 0>;
+def : ReadAdvance<ReadFSGNJ64, 0>;
+def : ReadAdvance<ReadFMovF16ToI16, 0>;
+def : ReadAdvance<ReadFMovI16ToF16, 0>;
+def : ReadAdvance<ReadFMovF32ToI32, 0>;
+def : ReadAdvance<ReadFMovI32ToF32, 0>;
+def : ReadAdvance<ReadFMovF64ToI64, 0>;
+def : ReadAdvance<ReadFMovI64ToF64, 0>;
+def : ReadAdvance<ReadFMinMax16, 0>;
+def : ReadAdvance<ReadFMinMax32, 0>;
+def : ReadAdvance<ReadFMinMax64, 0>;
+def : ReadAdvance<ReadFClass16, 0>;
+def : ReadAdvance<ReadFClass32, 0>;
+def : ReadAdvance<ReadFClass64, 0>;
+def : ReadAdvance<ReadFCmp16, 0>;
+def : ReadAdvance<ReadFCmp32, 0>;
+def : ReadAdvance<ReadFCmp64, 0>;
+def : ReadAdvance<ReadFCvtF16ToI32, 0>;
+def : ReadAdvance<ReadFCvtF16ToI64, 0>;
+def : ReadAdvance<ReadFCvtF32ToI32, 0>;
+def : ReadAdvance<ReadFCvtF32ToI64, 0>;
+def : ReadAdvance<ReadFCvtF64ToI32, 0>;
+def : ReadAdvance<ReadFCvtF64ToI64, 0>;
+def : ReadAdvance<ReadFCvtI32ToF16, 0>;
+def : ReadAdvance<ReadFCvtI32ToF32, 0>;
+def : ReadAdvance<ReadFCvtI32ToF64, 0>;
+def : ReadAdvance<ReadFCvtI64ToF16, 0>;
+def : ReadAdvance<ReadFCvtI64ToF32, 0>;
+def : ReadAdvance<ReadFCvtI64ToF64, 0>;
+def : ReadAdvance<ReadFCvtF16ToF32, 0>;
+def : ReadAdvance<ReadFCvtF16ToF64, 0>;
+def : ReadAdvance<ReadFCvtF32ToF16, 0>;
+def : ReadAdvance<ReadFCvtF32ToF64, 0>;
+def : ReadAdvance<ReadFCvtF64ToF16, 0>;
+def : ReadAdvance<ReadFCvtF64ToF32, 0>;
+def : ReadAdvance<ReadSHXADD, 0>;
+def : ReadAdvance<ReadSHXADD32, 0>;
+def : ReadAdvance<ReadRotateImm, 1>;
+def : ReadAdvance<ReadRotateImm32, 1>;
+def : ReadAdvance<ReadRotateReg, 1>;
+def : ReadAdvance<ReadRotateReg32, 1>;
+def : ReadAdvance<ReadCLZ, 0>;
+def : ReadAdvance<ReadCLZ32, 0>;
+def : ReadAdvance<ReadCTZ, 0>;
+def : ReadAdvance<ReadCTZ32, 0>;
+def : ReadAdvance<ReadCPOP, 0>;
+def : ReadAdvance<ReadCPOP32, 0>;
+def : ReadAdvance<ReadREV8, 0>;
+def : ReadAdvance<ReadORCB, 0>;
+def : ReadAdvance<ReadIMinMax, 0>;
+def : ReadAdvance<ReadCLMUL, 0>;
+def : ReadAdvance<ReadSingleBit, 0>;
+def : ReadAdvance<ReadSingleBitImm, 0>;
+def : ReadAdvance<ReadCSR, 0>;
+
+//===----------------------------------------------------------------------===//
+// Unsupported extensions
+defm : UnsupportedSchedQ;
+defm : UnsupportedSchedSFB;
+defm : UnsupportedSchedV;
+defm : UnsupportedSchedXsfvcp;
+defm : UnsupportedSchedZabha;
+defm : UnsupportedSchedZbkb;
+defm : UnsupportedSchedZbkx;
+defm : UnsupportedSchedZfa;
+defm : UnsupportedSchedZvk;
+}
diff --git a/llvm/test/tools/llvm-mca/RISCV/Andes45/fpr.s b/llvm/test/tools/llvm-mca/RISCV/Andes45/fpr.s
new file mode 100644
index 0000000000000..d1ab4b3b6a7e0
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/Andes45/fpr.s
@@ -0,0 +1,129 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64 -mcpu=andes-nx45 -timeline -iterations=1 < %s | FileCheck %s
+
+// Can be dispatched in the same cycle if different functional units are used.
+fadd.s ft0, fa0, fa1
+fdiv.s ft0, fa0, fa1
+
+// FMAC
+fadd.s ft0, fa0, fa1
+fmul.s ft0, fa0, fa1
+fmadd.s ft0, fa0, fa1, fa2
+
+// FDIV
+fdiv.s ft0, fa0, fa1
+fsqrt.s ft0, fa0
+
+// FMV
+fsgnj.s ft0, fa0, fa1
+fmv.x.w a0, fa0
+
+// FMISC
+fmin.s ft0, fa0, fa1
+fclass.s a0, fa0
+feq.s a0, fa0, fa1
+fcvt.s.w ft0, a0
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 13
+# CHECK-NEXT: Total Cycles: 63
+# CHECK-NEXT: Total uOps: 13
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.21
+# CHECK-NEXT: IPC: 0.21
+# CHECK-NEXT: Block RThroughput: 56.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 4 1.00 fadd.s ft0, fa0, fa1
+# CHECK-NEXT: 1 19 19.00 fdiv.s ft0, fa0, fa1
+# CHECK-NEXT: 1 4 1.00 fadd.s ft0, fa0, fa1
+# CHECK-NEXT: 1 4 1.00 fmul.s ft0, fa0, fa1
+# CHECK-NEXT: 1 4 1.00 fmadd.s ft0, fa0, fa1, fa2
+# CHECK-NEXT: 1 19 19.00 fdiv.s ft0, fa0, fa1
+# CHECK-NEXT: 1 18 18.00 fsqrt.s ft0, fa0
+# CHECK-NEXT: 1 1 1.00 fsgnj.s ft0, fa0, fa1
+# CHECK-NEXT: 1 1 1.00 fmv.x.w a0, fa0
+# CHECK-NEXT: 1 2 1.00 fmin.s ft0, fa0, fa1
+# CHECK-NEXT: 1 2 1.00 fclass.s a0, fa0
+# CHECK-NEXT: 1 2 1.00 feq.s a0, fa0, fa1
+# CHECK-NEXT: 1 2 1.00 fcvt.s.w ft0, a0
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - Andes45ALU
+# CHECK-NEXT: [0.1] - Andes45ALU
+# CHECK-NEXT: [1] - Andes45CSR
+# CHECK-NEXT: [2] - Andes45FDIV
+# CHECK-NEXT: [3] - Andes45FMAC
+# CHECK-NEXT: [4] - Andes45FMISC
+# CHECK-NEXT: [5] - Andes45FMV
+# CHECK-NEXT: [6] - Andes45LSU
+# CHECK-NEXT: [7] - Andes45MDU
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7]
+# CHECK-NEXT: - - - 56.00 4.00 4.00 2.00 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] Instructions:
+# CHECK-NEXT: - - - - 1.00 - - - - fadd.s ft0, fa0, fa1
+# CHECK-NEXT: - - - 19.00 - - - - - fdiv.s ft0, fa0, fa1
+# CHECK-NEXT: - - - - 1.00 - - - - fadd.s ft0, fa0, fa1
+# CHECK-NEXT: - - - - 1.00 - - - - fmul.s ft0, fa0, fa1
+# CHECK-NEXT: - - - - 1.00 - - - - fmadd.s ft0, fa0, fa1, fa2
+# CHECK-NEXT: - - - 19.00 - - - - - fdiv.s ft0, fa0, fa1
+# CHECK-NEXT: - - - 18.00 - - - - - fsqrt.s ft0, fa0
+# CHECK-NEXT: - - - - - - 1.00 - - fsgnj.s ft0, fa0, fa1
+# CHECK-NEXT: - - - - - - 1.00 - - fmv.x.w a0, fa0
+# CHECK-NEXT: - - - - - 1.00 - - - fmin.s ft0, fa0, fa1
+# CHECK-NEXT: - - - - - 1.00 - - - fclass.s a0, fa0
+# CHECK-NEXT: - - - - - 1.00 - - - feq.s a0, fa0, fa1
+# CHECK-NEXT: - - - - - 1.00 - - - fcvt.s.w ft0, a0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeE. . . . . . . . . . . . . fadd.s ft0, fa0, fa1
+# CHECK-NEXT: [0,1] DeeeeeeeeeeeeeeeeeeE. . . . . . . . . . fdiv.s ft0, fa0, fa1
+# CHECK-NEXT: [0,2] . . . DeeeE. . . . . . . . . . fadd.s ft0, fa0, fa1
+# CHECK-NEXT: [0,3] . . . .DeeeE . . . . . . . . . fmul.s ft0, fa0, fa1
+# CHECK-NEXT: [0,4] . . . . DeeeE . . . . . . . . . fmadd.s ft0, fa0, fa1, fa2
+# CHECK-NEXT: [0,5] . . . . DeeeeeeeeeeeeeeeeeeE . . . . . . fdiv.s ft0, fa0, fa1
+# CHECK-NEXT: [0,6] . . . . . . . . DeeeeeeeeeeeeeeeeeE . . fsqrt.s ft0, fa0
+# CHECK-NEXT: [0,7] . . . . . . . . . . . DE . . fsgnj.s ft0, fa0, fa1
+# CHECK-NEXT: [0,8] . . . . . . . . . . . .DE . . fmv.x.w a0, fa0
+# CHECK-NEXT: [0,9] . . . . . . . . . . . .DeE . . fmin.s ft0, fa0, fa1
+# CHECK-NEXT: [0,10] . . . . ...
[truncated]
|
This patch implements scheduling model for IMAFD and Zb extension. The latency and throughput of all instructions, except load/store, are measured by llvm-exegesis.
Scheduling model for V and other extensions will be added in a follow-up patch.