diff --git a/README.md b/README.md index 6b48f089b..e63003cfc 100644 --- a/README.md +++ b/README.md @@ -182,8 +182,13 @@ First, you have to generate the SystemVerilog package and C header file of the c make mcu-gen ``` -To change the default cpu type (i.e., cv32e20), the default bus type (i.e., onetoM), -the default continuous memory size (i.e., 2 continuous banks) or the default interleaved memory size (i.e., 0 interleaved banks): +By default, `X-HEEP` deploys the [cv32e20](https://github.com/openhwgroup/cve2) RISC-V CPU. +Other supported CPUs are: the [cv32e40p](https://github.com/openhwgroup/cv32e40p), [cv32e40x](https://github.com/openhwgroup/cv32e40x), and the [cv32e40px](https://github.com/esl-epfl/cv32e40px). +The default bus type of `X-HEEP` is a single-master-at-a-time architecture, (called `onetoM`), but the cross-bar architecture is also supported by setting +the bus to `NtoM`. Also, the user can select the number of 32kB banks addressed in continuous mode and/or the interleaved mode. +By default, `X-HEEP` is generated with 2 continuous banks and 0 interleaved banks. + +Below an example that changes the default configuration: ``` make mcu-gen CPU=cv32e40p BUS=NtoM MEMORY_BANKS=12 MEMORY_BANKS_IL=4 diff --git a/core-v-mini-mcu.core b/core-v-mini-mcu.core index 7f9af49c0..52b58b04a 100644 --- a/core-v-mini-mcu.core +++ b/core-v-mini-mcu.core @@ -73,6 +73,7 @@ filesets: tb-utils: depend: - x-heep::tb-utils + - x-heep::tb-fpu-utils files_verilator_waiver: depend: @@ -230,6 +231,12 @@ parameters: description: | Enables testbench JTAG DIPs. Admitted values: 1|0. default: 0 + X_EXT: + datatype: int + paramtype: vlogparam + description: | + Enables CORE-V-XIF interface for the CV32E40X and CV32E40PX cores. Admitted values: 1|0. + default: 0 USE_EXTERNAL_DEVICE_EXAMPLE: datatype: int paramtype: vlogparam @@ -324,6 +331,7 @@ targets: - COREV_PULP - FPU - JTAG_DPI + - X_EXT - USE_EXTERNAL_DEVICE_EXAMPLE - USE_UPF - REMOVE_OBI_FIFO @@ -382,6 +390,7 @@ targets: parameters: - COREV_PULP - FPU + - X_EXT - SYNTHESIS=true - REMOVE_OBI_FIFO tools: @@ -401,6 +410,7 @@ targets: parameters: - COREV_PULP - FPU + - X_EXT - SYNTHESIS=true - REMOVE_OBI_FIFO tools: @@ -420,6 +430,7 @@ targets: parameters: - COREV_PULP - FPU + - X_EXT - SYNTHESIS=true - REMOVE_OBI_FIFO tools: @@ -435,6 +446,7 @@ targets: parameters: - PRIM_DEFAULT_IMPL=prim_pkg::your_target_technology - COREV_PULP + - X_EXT - FPU - SYNTHESIS=true - REMOVE_OBI_FIFO @@ -457,6 +469,7 @@ targets: parameters: - COREV_PULP - FPU + - X_EXT - SYNTHESIS=true - VERILATOR=true - REMOVE_OBI_FIFO diff --git a/docs/source/How_to/eXtendingHEEP.md b/docs/source/How_to/eXtendingHEEP.md index c71c6c3af..ad8805b16 100644 --- a/docs/source/How_to/eXtendingHEEP.md +++ b/docs/source/How_to/eXtendingHEEP.md @@ -12,6 +12,18 @@ Here you can find a list of `X-HEEP` based open-source examples. If you want to * [F-HEEP](https://github.com/davidmallasen/F-HEEP): System integrating [fpu_ss](https://github.com/pulp-platform/fpu_ss) into X-HEEP via the eXtension interface and cv32e40x. +In addition, the `X-HEEP` testbench has been extended with a `DMA`, dummy `PERIPHERALs` (including the `FLASH`), and a CORE-V-XIF compatible co-processor +implementing the `RV32F` RISC-V ISA. This has been done to help us maintaining and verifying the extension interface. + +If you want to try the co-processor with a CORE-V-XIF compatible CPU as the `cv32e40px`, do as follow: + +``` +make mcu-gen CPU=cv32e40px +make verilator-sim FUSESOC_PARAM="--X_EXT=1" +make app PROJECT=example_matfadd ARCH=rv32imfc +./Vtestharness +firmware=../../../sw/build/main.hex +``` + ## Vendorizing X-HEEP In order to vendorize `X-HEEP` create inside your repository's base directory (`BASE`) a `hw/vendor` directory containing a file named `esl_epfl_x_heep.vendor.hjson`: diff --git a/hw/core-v-mini-mcu/cpu_subsystem.sv b/hw/core-v-mini-mcu/cpu_subsystem.sv index 6e6d5a8b3..bbb62d753 100644 --- a/hw/core-v-mini-mcu/cpu_subsystem.sv +++ b/hw/core-v-mini-mcu/cpu_subsystem.sv @@ -175,7 +175,7 @@ module cpu_subsystem // instantiate the core cv32e40x_core #( .NUM_MHPMCOUNTERS(NUM_MHPMCOUNTERS), - .X_EXT(X_EXT), + .X_EXT(X_EXT[0]), .DBG_NUM_TRIGGERS('0) ) cv32e40x_core_i ( // Clock and reset @@ -307,30 +307,35 @@ module cpu_subsystem // CORE-V-XIF // Compressed interface - .x_compressed_valid_o(), - .x_compressed_ready_i(), - .x_compressed_req_o(), - .x_compressed_resp_i('0), + .x_compressed_valid_o(xif_compressed_if.compressed_valid), + .x_compressed_ready_i(xif_compressed_if.compressed_ready), + .x_compressed_req_o (xif_compressed_if.compressed_req), + .x_compressed_resp_i (xif_compressed_if.compressed_resp), + // Issue Interface - .x_issue_valid_o(), - .x_issue_ready_i(), - .x_issue_req_o(), - .x_issue_resp_i('0), + .x_issue_valid_o(xif_issue_if.issue_valid), + .x_issue_ready_i(xif_issue_if.issue_ready), + .x_issue_req_o (xif_issue_if.issue_req), + .x_issue_resp_i (xif_issue_if.issue_resp), + // Commit Interface - .x_commit_valid_o(), - .x_commit_o(), - // Memory request/response Interface - .x_mem_valid_i(), - .x_mem_ready_o(), - .x_mem_req_i('0), - .x_mem_resp_o(), + .x_commit_valid_o(xif_commit_if.commit_valid), + .x_commit_o(xif_commit_if.commit), + + // Memory Request/Response Interface + .x_mem_valid_i(xif_mem_if.mem_valid), + .x_mem_ready_o(xif_mem_if.mem_ready), + .x_mem_req_i (xif_mem_if.mem_req), + .x_mem_resp_o (xif_mem_if.mem_resp), + // Memory Result Interface - .x_mem_result_valid_o(), - .x_mem_result_o(), + .x_mem_result_valid_o(xif_mem_result_if.mem_result_valid), + .x_mem_result_o(xif_mem_result_if.mem_result), + // Result Interface - .x_result_valid_i(), - .x_result_ready_o(), - .x_result_i('0), + .x_result_valid_i(xif_result_if.result_valid), + .x_result_ready_o(xif_result_if.result_ready), + .x_result_i(xif_result_if.result), .irq_i (irq_i), .irq_ack_o(irq_ack_o), diff --git a/hw/ip_examples/fpu_ss_wrapper/fpu_ss_wrapper.sv b/hw/ip_examples/fpu_ss_wrapper/fpu_ss_wrapper.sv new file mode 100644 index 000000000..7c3b7d228 --- /dev/null +++ b/hw/ip_examples/fpu_ss_wrapper/fpu_ss_wrapper.sv @@ -0,0 +1,84 @@ +// Copyright 2023 David Mallasén Quintana +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you +// may not use this file except in compliance with the License, or, at your +// option, the Apache License version 2.0. You may obtain a copy of the +// License at https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work +// distributed under the License is distributed on an “AS IS” BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. +// +// Author: David Mallasén +// Description: Wrapper for the fpu_ss, adapting the XIF signals + +module fpu_ss_wrapper #( + parameter PULP_ZFINX = 0, + parameter INPUT_BUFFER_DEPTH = 1, + parameter OUT_OF_ORDER = 0, + parameter FORWARDING = 1, + parameter fpnew_pkg::fpu_features_t FPU_FEATURES = fpu_ss_pkg::FPU_FEATURES, + parameter fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = fpu_ss_pkg::FPU_IMPLEMENTATION +) ( + // Clock and Reset + input logic clk_i, + input logic rst_ni, + + // eXtension interface + if_xif.coproc_compressed xif_compressed_if, + if_xif.coproc_issue xif_issue_if, + if_xif.coproc_commit xif_commit_if, + if_xif.coproc_mem xif_mem_if, + if_xif.coproc_mem_result xif_mem_result_if, + if_xif.coproc_result xif_result_if +); + + fpu_ss #( + .PulpDivsqrt(1'b0), + .PULP_ZFINX(PULP_ZFINX), + .INPUT_BUFFER_DEPTH(INPUT_BUFFER_DEPTH), + .OUT_OF_ORDER(OUT_OF_ORDER), + .FORWARDING(FORWARDING), + .FPU_FEATURES(FPU_FEATURES), + .FPU_IMPLEMENTATION(FPU_IMPLEMENTATION) + ) fpu_ss_i ( + // Clock and reset + .clk_i (clk_i), + .rst_ni(rst_ni), + + // Compressed Interface + .x_compressed_valid_i(xif_compressed_if.compressed_valid), + .x_compressed_ready_o(xif_compressed_if.compressed_ready), + .x_compressed_req_i (xif_compressed_if.compressed_req), + .x_compressed_resp_o (xif_compressed_if.compressed_resp), + + // Issue Interface + .x_issue_valid_i(xif_issue_if.issue_valid), + .x_issue_ready_o(xif_issue_if.issue_ready), + .x_issue_req_i (xif_issue_if.issue_req), + .x_issue_resp_o (xif_issue_if.issue_resp), + + // Commit Interface + .x_commit_valid_i(xif_commit_if.commit_valid), + .x_commit_i(xif_commit_if.commit), + + // Memory Request/Response Interface + .x_mem_valid_o(xif_mem_if.mem_valid), + .x_mem_ready_i(xif_mem_if.mem_ready), + .x_mem_req_o (xif_mem_if.mem_req), + .x_mem_resp_i (xif_mem_if.mem_resp), + + // Memory Result Interface + .x_mem_result_valid_i(xif_mem_result_if.mem_result_valid), + .x_mem_result_i(xif_mem_result_if.mem_result), + + // Result Interface + .x_result_valid_o(xif_result_if.result_valid), + .x_result_ready_i(xif_result_if.result_ready), + .x_result_o(xif_result_if.result) + ); + +endmodule diff --git a/hw/vendor/esl_epfl_cv32e40px.core b/hw/vendor/esl_epfl_cv32e40px.core index 1540518e8..0373fed05 100644 --- a/hw/vendor/esl_epfl_cv32e40px.core +++ b/hw/vendor/esl_epfl_cv32e40px.core @@ -38,12 +38,12 @@ filesets: - esl_epfl_cv32e40px/rtl/cv32e40px_sleep_unit.sv - esl_epfl_cv32e40px/rtl/cv32e40px_core.sv - esl_epfl_cv32e40px/rtl/cv32e40px_apu_disp.sv + - esl_epfl_cv32e40px/rtl/cv32e40px_x_disp - esl_epfl_cv32e40px/rtl/cv32e40px_fifo.sv - esl_epfl_cv32e40px/rtl/cv32e40px_fp_wrapper.sv - esl_epfl_cv32e40px/rtl/cv32e40px_top.sv file_type: systemVerilogSource - files_clk_gate: files: - esl_epfl_cv32e40px/bhv/cv32e40px_sim_clock_gate.sv diff --git a/hw/vendor/esl_epfl_cv32e40px.lock.hjson b/hw/vendor/esl_epfl_cv32e40px.lock.hjson index 30b0a1b2d..39f77e7ab 100644 --- a/hw/vendor/esl_epfl_cv32e40px.lock.hjson +++ b/hw/vendor/esl_epfl_cv32e40px.lock.hjson @@ -9,6 +9,6 @@ upstream: { url: https://github.com/esl-epfl/cv32e40px.git - rev: 326891e200365b2558511e6deb8e444eb8daf977 + rev: acf3442b414725191fb7a2027facd0b5b4123c1c } } diff --git a/hw/vendor/esl_epfl_cv32e40px.vendor.hjson b/hw/vendor/esl_epfl_cv32e40px.vendor.hjson index d99260589..b261db045 100644 --- a/hw/vendor/esl_epfl_cv32e40px.vendor.hjson +++ b/hw/vendor/esl_epfl_cv32e40px.vendor.hjson @@ -7,7 +7,7 @@ upstream: { url: "https://github.com/esl-epfl/cv32e40px.git", - rev: "326891e200365b2558511e6deb8e444eb8daf977", + rev: "acf3442b414725191fb7a2027facd0b5b4123c1c", }, patch_dir: "patches/esl_epfl_cv32e40px", diff --git a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_core.sv b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_core.sv index f0fc87967..fc301b914 100644 --- a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_core.sv +++ b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_core.sv @@ -463,6 +463,7 @@ module cv32e40px_core // // ////////////////////////////////////////////////// cv32e40px_if_stage #( + .COREV_X_IF (COREV_X_IF), .COREV_PULP (COREV_PULP), .PULP_OBI (PULP_OBI), .PULP_SECURE(PULP_SECURE), diff --git a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_id_stage.sv b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_id_stage.sv index 50df28a0b..4c513d272 100644 --- a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_id_stage.sv +++ b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_id_stage.sv @@ -1015,7 +1015,7 @@ module cv32e40px_id_stage logic [1:0] x_mem_data_type_id; generate - if (COREV_X_IF) begin : gen_x_disp + if (COREV_X_IF != 0) begin : gen_x_disp //////////////////////////////////////// // __ __ ____ ___ ____ ____ // // \ \/ / | _ \_ _/ ___|| _ \ // @@ -1138,13 +1138,13 @@ module cv32e40px_id_stage always_comb begin x_mem_data_type_id = 2'b00; case (x_mem_req_i.size) - 2'b00: x_mem_data_type_id = 2'b10; // SB - 2'b01: x_mem_data_type_id = 2'b01; // SH - 2'b10: x_mem_data_type_id = 2'b00; // SW + 3'd0: x_mem_data_type_id = 2'b10; // SB + 3'd1: x_mem_data_type_id = 2'b01; // SH + 3'd2: x_mem_data_type_id = 2'b00; // SW + default: x_mem_data_type_id = 2'b00; // SW endcase end - end else begin : gen_no_x_disp // default illegal instruction assignment diff --git a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_if_stage.sv b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_if_stage.sv index a36f89ae3..2b02e0a79 100644 --- a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_if_stage.sv +++ b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_if_stage.sv @@ -291,9 +291,9 @@ module cv32e40px_if_stage #( generate - if (COREV_X_IF) begin + if (COREV_X_IF != 0) begin assign x_compressed_valid_o = illegal_c_insn_dec; - assign x_compressed_req_o.instr = instr_aligned; + assign x_compressed_req_o.instr = instr_aligned[15:0]; assign x_compressed_req_o.mode = 2'b00; // Machine Mode assign x_compressed_req_o.id = x_compressed_id_i; diff --git a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_x_disp.sv b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_x_disp.sv index b2dea3482..71bbf665f 100644 --- a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_x_disp.sv +++ b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_x_disp.sv @@ -98,7 +98,7 @@ module cv32e40px_x_disp // scoreboard, id and satus signals logic [31:0] scoreboard_q, scoreboard_d; logic [3:0] id_q, id_d; - logic [3:0] instr_offloaded_q, instr_offloaded_d; + logic instr_offloaded_q, instr_offloaded_d; logic [3:0] mem_counter_q, mem_counter_d; logic dep; logic outstanding_mem; @@ -182,7 +182,7 @@ module cv32e40px_x_disp always_comb begin scoreboard_d = scoreboard_q; if (x_issue_resp_writeback_i & x_issue_valid_o & x_issue_ready_i - & ~((waddr_id_i == x_result_rd_i) & x_result_valid_i & x_result_rd_i)) begin + & ~((waddr_id_i == x_result_rd_i) & x_result_valid_i & (x_result_rd_i != '0))) begin scoreboard_d[waddr_id_i] = 1'b1; end if (x_result_valid_i & x_result_we_i) begin diff --git a/hw/vendor/esl_epfl_cv32e40px/rtl/include/cv32e40px_core_v_xif_pkg.sv b/hw/vendor/esl_epfl_cv32e40px/rtl/include/cv32e40px_core_v_xif_pkg.sv index ae52789ca..499fec318 100644 --- a/hw/vendor/esl_epfl_cv32e40px/rtl/include/cv32e40px_core_v_xif_pkg.sv +++ b/hw/vendor/esl_epfl_cv32e40px/rtl/include/cv32e40px_core_v_xif_pkg.sv @@ -22,75 +22,81 @@ package cv32e40px_core_v_xif_pkg; parameter logic [31:0] X_MISA = '0; parameter logic [1:0] X_ECS_XS = '0; - // interface structs + localparam int XLEN = 32; + typedef struct packed { - logic [15:0] instr; - logic [1:0] mode; - logic [X_ID_WIDTH-1:0] id; + logic [15:0] instr; // Offloaded compressed instruction + logic [1:0] mode; // Privilege level + logic [X_ID_WIDTH-1:0] id; // Identification number of the offloaded compressed instruction } x_compressed_req_t; typedef struct packed { - logic [31:0] instr; - logic accept; + logic [31:0] instr; // Uncompressed instruction + logic accept; // Is the offloaded compressed instruction (id) accepted by the coprocessor? } x_compressed_resp_t; typedef struct packed { - logic [31:0] instr; - logic [1:0] mode; - logic [X_ID_WIDTH-1:0] id; - logic [X_NUM_RS-1:0][X_RFR_WIDTH-1:0] rs; - logic [X_NUM_RS-1:0] rs_valid; - logic [5:0] ecs; - logic ecs_valid; + logic [31:0] instr; // Offloaded instruction + logic [1:0] mode; // Privilege level + logic [X_ID_WIDTH-1:0] id; // Identification of the offloaded instruction + logic [X_NUM_RS -1:0][X_RFR_WIDTH-1:0] rs; // Register file source operands for the offloaded instruction + logic [X_NUM_RS -1:0] rs_valid; // Validity of the register file source operand(s) + logic [5:0] ecs; // Extension Context Status ({mstatus.xs, mstatus.fs, mstatus.vs}) + logic ecs_valid; // Validity of the Extension Context Status } x_issue_req_t; typedef struct packed { - logic accept; - logic writeback; - //logic float; - logic dualwrite; - logic dualread; - logic loadstore; - logic exc; + logic accept; // Is the offloaded instruction (id) accepted by the coprocessor? + logic writeback; // Will the coprocessor perform a writeback in the core to rd? + logic dualwrite; // Will the coprocessor perform a dual writeback in the core to rd and rd+1? + logic [2:0] dualread; // Will the coprocessor require dual reads from rs1\rs2\rs3 and rs1+1\rs2+1\rs3+1? + logic loadstore; // Is the offloaded instruction a load/store instruction? + logic ecswrite; // Will the coprocessor write the Extension Context Status in mstatus? + logic exc; // Can the offloaded instruction possibly cause a synchronous exception in the coprocessor itself? } x_issue_resp_t; typedef struct packed { - logic [X_ID_WIDTH-1:0] id; - logic commit_kill; + logic [X_ID_WIDTH-1:0] id; // Identification of the offloaded instruction + logic commit_kill; // Shall an offloaded instruction be killed? } x_commit_t; typedef struct packed { - logic [X_ID_WIDTH-1:0] id; - logic [31:0] addr; - logic [1:0] mode; - logic [1:0] size; - logic we; - logic [X_MEM_WIDTH-1:0] wdata; - logic last; - logic spec; + logic [X_ID_WIDTH -1:0] id; // Identification of the offloaded instruction + logic [31:0] addr; // Virtual address of the memory transaction + logic [1:0] mode; // Privilege level + logic we; // Write enable of the memory transaction + logic [2:0] size; // Size of the memory transaction + logic [X_MEM_WIDTH/8-1:0] be; // Byte enables for memory transaction + logic [1:0] attr; // Memory transaction attributes + logic [X_MEM_WIDTH -1:0] wdata; // Write data of a store memory transaction + logic last; // Is this the last memory transaction for the offloaded instruction? + logic spec; // Is the memory transaction speculative? } x_mem_req_t; typedef struct packed { - logic exc; - logic [5:0] exccode; - logic dbg; + logic exc; // Did the memory request cause a synchronous exception? + logic [5:0] exccode; // Exception code + logic dbg; // Did the memory request cause a debug trigger match with ``mcontrol.timing`` = 0? } x_mem_resp_t; typedef struct packed { - logic [X_ID_WIDTH-1:0] id; - logic [X_MEM_WIDTH-1:0] rdata; - logic err; - logic dbg; + logic [X_ID_WIDTH -1:0] id; // Identification of the offloaded instruction + logic [X_MEM_WIDTH-1:0] rdata; // Read data of a read memory transaction + logic err; // Did the instruction cause a bus error? + logic dbg; // Did the read data cause a debug trigger match with ``mcontrol.timing`` = 0? } x_mem_result_t; typedef struct packed { - logic [X_ID_WIDTH-1:0] id; - logic [X_RFW_WIDTH-1:0] data; - logic [4:0] rd; - logic we; - logic [2:0] ecswe; - logic [5:0] ecsdata; - logic exc; - logic [5:0] exccode; + logic [X_ID_WIDTH -1:0] id; // Identification of the offloaded instruction + logic [X_RFW_WIDTH -1:0] data; // Register file write data value(s) + logic [4:0] rd; // Register file destination address(es) + logic [X_RFW_WIDTH/XLEN-1:0] we; // Register file write enable(s) + logic [5:0] ecsdata; // Write data value for {mstatus.xs, mstatus.fs, mstatus.vs} + logic [2:0] ecswe; // Write enables for {mstatus.xs, mstatus.fs, mstatus.vs} + logic exc; // Did the instruction cause a synchronous exception? + logic [5:0] exccode; // Exception code + logic err; // Did the instruction cause a bus error? + logic dbg; // Did the instruction cause a debug trigger match with ``mcontrol.timing`` = 0? } x_result_t; + endpackage diff --git a/hw/vendor/openhwgroup_cv32e40x/rtl/cv32e40x_core.sv b/hw/vendor/openhwgroup_cv32e40x/rtl/cv32e40x_core.sv index 5394f143a..6a6836824 100644 --- a/hw/vendor/openhwgroup_cv32e40x/rtl/cv32e40x_core.sv +++ b/hw/vendor/openhwgroup_cv32e40x/rtl/cv32e40x_core.sv @@ -400,7 +400,7 @@ module cv32e40x_core import cv32e40x_pkg::*; assign dbg_ack = ctrl_fsm.dbg_ack; // Gate off the internal debug_request signal if debug support is not configured. - assign debug_req_gated = DEBUG ? debug_req_i : 1'b0; + assign debug_req_gated = (DEBUG != 0) ? debug_req_i : 1'b0; ////////////////////////////////////////////////////////////////////////////////////////////// // ____ _ _ __ __ _ // diff --git a/hw/vendor/openhwgroup_cv32e40x/rtl/cv32e40x_cs_registers.sv b/hw/vendor/openhwgroup_cv32e40x/rtl/cv32e40x_cs_registers.sv index 2c1dcd3dd..7f08bcd7d 100644 --- a/hw/vendor/openhwgroup_cv32e40x/rtl/cv32e40x_cs_registers.sv +++ b/hw/vendor/openhwgroup_cv32e40x/rtl/cv32e40x_cs_registers.sv @@ -271,13 +271,14 @@ module cv32e40x_cs_registers import cv32e40x_pkg::*; // Performance Counter Signals - logic [31:0] [63:0] mhpmcounter_q; // Performance counters + logic [63:0] mhpmcounter_q[32]; // Performance counters logic [31:0] [63:0] mhpmcounter_n; // Performance counters next value logic [31:0] [63:0] mhpmcounter_rdata; // Performance counters next value logic [31:0] [1:0] mhpmcounter_we; // Performance counters write enable logic [31:0] [31:0] mhpmevent_q, mhpmevent_n, mhpmevent_rdata; // Event enable logic [31:0] mcountinhibit_q, mcountinhibit_n, mcountinhibit_rdata; // Performance counter inhibit - logic [NUM_HPM_EVENTS-1:0] hpm_events; // Events for performance counters + logic hpm_events[NUM_HPM_EVENTS]; // Events for performance counters + logic [NUM_HPM_EVENTS-1:0] packed_hpm_events; // Packed Events for performance counters logic [31:0] [63:0] mhpmcounter_increment; // Increment of mhpmcounter_q logic [31:0] mhpmcounter_write_lower; // Write 32 lower bits of mhpmcounter_q logic [31:0] mhpmcounter_write_upper; // Write 32 upper bits mhpmcounter_q @@ -535,7 +536,7 @@ module cv32e40x_cs_registers import cv32e40x_pkg::*; end CSR_DCSR: begin - if (DEBUG) begin + if (DEBUG != 0) begin csr_rdata_int = dcsr_rdata; illegal_csr_read = !ctrl_fsm_i.debug_mode; end else begin @@ -545,7 +546,7 @@ module cv32e40x_cs_registers import cv32e40x_pkg::*; end CSR_DPC: begin - if (DEBUG) begin + if (DEBUG != 0) begin csr_rdata_int = dpc_rdata; illegal_csr_read = !ctrl_fsm_i.debug_mode; end else begin @@ -555,7 +556,7 @@ module cv32e40x_cs_registers import cv32e40x_pkg::*; end CSR_DSCRATCH0: begin - if (DEBUG) begin + if (DEBUG != 0) begin csr_rdata_int = dscratch0_rdata; illegal_csr_read = !ctrl_fsm_i.debug_mode; end else begin @@ -565,7 +566,7 @@ module cv32e40x_cs_registers import cv32e40x_pkg::*; end CSR_DSCRATCH1: begin - if (DEBUG) begin + if (DEBUG != 0) begin csr_rdata_int = dscratch1_rdata; illegal_csr_read = !ctrl_fsm_i.debug_mode; end else begin @@ -1239,7 +1240,7 @@ module cv32e40x_cs_registers import cv32e40x_pkg::*; ); generate - if (DEBUG) begin : gen_debug_csr + if (DEBUG != 0) begin : gen_debug_csr cv32e40x_csr #( .WIDTH (32), @@ -1533,7 +1534,7 @@ module cv32e40x_cs_registers import cv32e40x_pkg::*; assign priv_lvl_rdata = PRIV_LVL_M; // dcsr_rdata factors in the flop outputs and the nmip bit from the controller - assign dcsr_rdata = DEBUG ? {dcsr_q[31:4], ctrl_fsm_i.pending_nmi, dcsr_q[2:0]} : 32'h0; + assign dcsr_rdata = (DEBUG != 0) ? {dcsr_q[31:4], ctrl_fsm_i.pending_nmi, dcsr_q[2:0]} : 32'h0; assign mcause_rdata = mcause_q; @@ -1682,6 +1683,9 @@ module cv32e40x_cs_registers import cv32e40x_pkg::*; genvar hpm_idx; generate for(hpm_idx=0; hpm_idx<16; hpm_idx++) begin + + assign packed_hpm_events[hpm_idx] = hpm_events[hpm_idx]; + if(HPM_EVENT_FLOP[hpm_idx]) begin: hpm_event_flop always_ff @(posedge clk, negedge rst_n) begin @@ -1807,14 +1811,14 @@ module cv32e40x_cs_registers import cv32e40x_pkg::*; !mhpmcounter_write_upper[wcnt_gidx] && !mcountinhibit_rdata[wcnt_gidx] && !debug_stopcount && - hpm_events[1]; + packed_hpm_events[1]; end else if( (wcnt_gidx>2) && (wcnt_gidx<(NUM_MHPMCOUNTERS+3))) begin : gen_mhpmcounter // add +1 if any event is enabled and active assign mhpmcounter_write_increment[wcnt_gidx] = !mhpmcounter_write_lower[wcnt_gidx] && !mhpmcounter_write_upper[wcnt_gidx] && !mcountinhibit_rdata[wcnt_gidx] && !debug_stopcount && - |(hpm_events & mhpmevent_rdata[wcnt_gidx][NUM_HPM_EVENTS-1:0]); + |(packed_hpm_events & mhpmevent_rdata[wcnt_gidx][NUM_HPM_EVENTS-1:0]); end else begin : gen_mhpmcounter_not_implemented assign mhpmcounter_write_increment[wcnt_gidx] = 1'b0; end diff --git a/hw/vendor/openhwgroup_cv32e40x/rtl/cv32e40x_debug_triggers.sv b/hw/vendor/openhwgroup_cv32e40x/rtl/cv32e40x_debug_triggers.sv index 25a084d3e..3bd8a324d 100644 --- a/hw/vendor/openhwgroup_cv32e40x/rtl/cv32e40x_debug_triggers.sv +++ b/hw/vendor/openhwgroup_cv32e40x/rtl/cv32e40x_debug_triggers.sv @@ -108,13 +108,13 @@ import cv32e40x_pkg::*; logic [DBG_NUM_TRIGGERS-1 : 0] tdata2_we_int; // CSR instance outputs - logic [31:0] tdata1_q[DBG_NUM_TRIGGERS]; - logic [31:0] tdata2_q[DBG_NUM_TRIGGERS]; + logic [31:0] tdata1_q[DBG_NUM_TRIGGERS-1:0]; + logic [31:0] tdata2_q[DBG_NUM_TRIGGERS-1:0]; logic [31:0] tselect_q; // CSR read data, possibly WARL resolved - logic [31:0] tdata1_rdata[DBG_NUM_TRIGGERS]; - logic [31:0] tdata2_rdata[DBG_NUM_TRIGGERS]; + logic [31:0] tdata1_rdata[DBG_NUM_TRIGGERS-1:0]; + logic [31:0] tdata2_rdata[DBG_NUM_TRIGGERS-1:0]; // IF, EX and WB stages trigger match logic [DBG_NUM_TRIGGERS-1 : 0] trigger_match_if; @@ -127,7 +127,7 @@ import cv32e40x_pkg::*; // LSU address match signals logic [DBG_NUM_TRIGGERS-1 : 0] lsu_addr_match_en; logic [DBG_NUM_TRIGGERS-1 : 0] lsu_addr_match; - logic [3:0] lsu_byte_addr_match[DBG_NUM_TRIGGERS]; + logic [3:0] lsu_byte_addr_match[DBG_NUM_TRIGGERS-1:0]; // Enable matching based on privilege level per trigger logic [DBG_NUM_TRIGGERS-1 : 0] priv_lvl_match_en_if; @@ -140,7 +140,7 @@ import cv32e40x_pkg::*; logic [31:0] lsu_addr_high; // The highest accessed address of an LSU transaction // Exception trigger code match - logic [31:0] exception_match[DBG_NUM_TRIGGERS]; + logic [31:0] exception_match[DBG_NUM_TRIGGERS-1:0]; // Write data always_comb begin diff --git a/hw/vendor/openhwgroup_cv32e40x/rtl/cv32e40x_id_stage.sv b/hw/vendor/openhwgroup_cv32e40x/rtl/cv32e40x_id_stage.sv index 1385dfb1e..11bc4570a 100644 --- a/hw/vendor/openhwgroup_cv32e40x/rtl/cv32e40x_id_stage.sv +++ b/hw/vendor/openhwgroup_cv32e40x/rtl/cv32e40x_id_stage.sv @@ -749,7 +749,9 @@ module cv32e40x_id_stage import cv32e40x_pkg::*; // Also attempt to offload any CSR instruction. The validity of such instructions are only // checked in the EX stage. // Instructions with deassert_we set to 1 from the controller bypass logic will not be attempted offloaded. - assign xif_issue_if.issue_valid = instr_valid && (illegal_insn || csr_en) && + // Only offload instructions if the EX stage is ready not to miss data from xif_issue.issue_resp + assign xif_issue_if.issue_valid = instr_valid && ex_ready_i && + (illegal_insn || csr_en) && !(xif_accepted_q || xif_rejected_q || ctrl_byp_i.deassert_we); // Keep xif_offloading_o high after an offloaded instruction was accepted or rejected to get diff --git a/hw/vendor/openhwgroup_cv32e40x/rtl/cv32e40x_pma.sv b/hw/vendor/openhwgroup_cv32e40x/rtl/cv32e40x_pma.sv index 9e3f6ad20..f63534d9a 100644 --- a/hw/vendor/openhwgroup_cv32e40x/rtl/cv32e40x_pma.sv +++ b/hw/vendor/openhwgroup_cv32e40x/rtl/cv32e40x_pma.sv @@ -98,7 +98,7 @@ module cv32e40x_pma import cv32e40x_pkg::*; // Tie of atomic attribute if A_EXT=0 generate - if (A_EXT) begin: pma_atomic + if (A_EXT != A_NONE) begin: pma_atomic assign pma_cfg_atomic = pma_cfg.atomic; end else begin: pma_no_atomic diff --git a/hw/vendor/patches/openhwgroup_cv32e40x/cv32e40x_core.sv.patch b/hw/vendor/patches/openhwgroup_cv32e40x/cv32e40x_core.sv.patch new file mode 100644 index 000000000..92de4639f --- /dev/null +++ b/hw/vendor/patches/openhwgroup_cv32e40x/cv32e40x_core.sv.patch @@ -0,0 +1,13 @@ +diff --git a/rtl/cv32e40x_core.sv b/rtl/cv32e40x_core.sv +index faceb96..eddd0ea 100644 +--- a/rtl/cv32e40x_core.sv ++++ b/rtl/cv32e40x_core.sv +@@ -398,7 +398,7 @@ module cv32e40x_core import cv32e40x_pkg::*; + assign dbg_ack = ctrl_fsm.dbg_ack; + + // Gate off the internal debug_request signal if debug support is not configured. +- assign debug_req_gated = DEBUG ? debug_req_i : 1'b0; ++ assign debug_req_gated = (DEBUG != 0) ? debug_req_i : 1'b0; + + ////////////////////////////////////////////////////////////////////////////////////////////// + // ____ _ _ __ __ _ // diff --git a/hw/vendor/patches/openhwgroup_cv32e40x/cv32e40x_cs_registers.sv.patch b/hw/vendor/patches/openhwgroup_cv32e40x/cv32e40x_cs_registers.sv.patch new file mode 100644 index 000000000..96435f0b2 --- /dev/null +++ b/hw/vendor/patches/openhwgroup_cv32e40x/cv32e40x_cs_registers.sv.patch @@ -0,0 +1,102 @@ +diff --git a/rtl/cv32e40x_cs_registers.sv b/rtl/cv32e40x_cs_registers.sv +index 6fe90077..57908723 100644 +--- a/rtl/cv32e40x_cs_registers.sv ++++ b/rtl/cv32e40x_cs_registers.sv +@@ -271,13 +271,14 @@ module cv32e40x_cs_registers import cv32e40x_pkg::*; + + + // Performance Counter Signals +- logic [31:0] [63:0] mhpmcounter_q; // Performance counters ++ logic [63:0] mhpmcounter_q[32]; // Performance counters + logic [31:0] [63:0] mhpmcounter_n; // Performance counters next value + logic [31:0] [63:0] mhpmcounter_rdata; // Performance counters next value + logic [31:0] [1:0] mhpmcounter_we; // Performance counters write enable + logic [31:0] [31:0] mhpmevent_q, mhpmevent_n, mhpmevent_rdata; // Event enable + logic [31:0] mcountinhibit_q, mcountinhibit_n, mcountinhibit_rdata; // Performance counter inhibit +- logic [NUM_HPM_EVENTS-1:0] hpm_events; // Events for performance counters ++ logic hpm_events[NUM_HPM_EVENTS]; // Events for performance counters ++ logic [NUM_HPM_EVENTS-1:0] packed_hpm_events; // Packed Events for performance counters + logic [31:0] [63:0] mhpmcounter_increment; // Increment of mhpmcounter_q + logic [31:0] mhpmcounter_write_lower; // Write 32 lower bits of mhpmcounter_q + logic [31:0] mhpmcounter_write_upper; // Write 32 upper bits mhpmcounter_q +@@ -535,7 +536,7 @@ module cv32e40x_cs_registers import cv32e40x_pkg::*; + end + + CSR_DCSR: begin +- if (DEBUG) begin ++ if (DEBUG != 0) begin + csr_rdata_int = dcsr_rdata; + illegal_csr_read = !ctrl_fsm_i.debug_mode; + end else begin +@@ -545,7 +546,7 @@ module cv32e40x_cs_registers import cv32e40x_pkg::*; + end + + CSR_DPC: begin +- if (DEBUG) begin ++ if (DEBUG != 0) begin + csr_rdata_int = dpc_rdata; + illegal_csr_read = !ctrl_fsm_i.debug_mode; + end else begin +@@ -555,7 +556,7 @@ module cv32e40x_cs_registers import cv32e40x_pkg::*; + end + + CSR_DSCRATCH0: begin +- if (DEBUG) begin ++ if (DEBUG != 0) begin + csr_rdata_int = dscratch0_rdata; + illegal_csr_read = !ctrl_fsm_i.debug_mode; + end else begin +@@ -565,7 +566,7 @@ module cv32e40x_cs_registers import cv32e40x_pkg::*; + end + + CSR_DSCRATCH1: begin +- if (DEBUG) begin ++ if (DEBUG != 0) begin + csr_rdata_int = dscratch1_rdata; + illegal_csr_read = !ctrl_fsm_i.debug_mode; + end else begin +@@ -1239,7 +1240,7 @@ module cv32e40x_cs_registers import cv32e40x_pkg::*; + ); + + generate +- if (DEBUG) begin : gen_debug_csr ++ if (DEBUG != 0) begin : gen_debug_csr + cv32e40x_csr + #( + .WIDTH (32), +@@ -1533,7 +1534,7 @@ module cv32e40x_cs_registers import cv32e40x_pkg::*; + assign priv_lvl_rdata = PRIV_LVL_M; + + // dcsr_rdata factors in the flop outputs and the nmip bit from the controller +- assign dcsr_rdata = DEBUG ? {dcsr_q[31:4], ctrl_fsm_i.pending_nmi, dcsr_q[2:0]} : 32'h0; ++ assign dcsr_rdata = (DEBUG != 0) ? {dcsr_q[31:4], ctrl_fsm_i.pending_nmi, dcsr_q[2:0]} : 32'h0; + + + assign mcause_rdata = mcause_q; +@@ -1682,6 +1683,9 @@ module cv32e40x_cs_registers import cv32e40x_pkg::*; + genvar hpm_idx; + generate + for(hpm_idx=0; hpm_idx<16; hpm_idx++) begin ++ ++ assign packed_hpm_events[hpm_idx] = hpm_events[hpm_idx]; ++ + if(HPM_EVENT_FLOP[hpm_idx]) begin: hpm_event_flop + + always_ff @(posedge clk, negedge rst_n) begin +@@ -1807,14 +1811,14 @@ module cv32e40x_cs_registers import cv32e40x_pkg::*; + !mhpmcounter_write_upper[wcnt_gidx] && + !mcountinhibit_rdata[wcnt_gidx] && + !debug_stopcount && +- hpm_events[1]; ++ packed_hpm_events[1]; + end else if( (wcnt_gidx>2) && (wcnt_gidx<(NUM_MHPMCOUNTERS+3))) begin : gen_mhpmcounter + // add +1 if any event is enabled and active + assign mhpmcounter_write_increment[wcnt_gidx] = !mhpmcounter_write_lower[wcnt_gidx] && + !mhpmcounter_write_upper[wcnt_gidx] && + !mcountinhibit_rdata[wcnt_gidx] && + !debug_stopcount && +- |(hpm_events & mhpmevent_rdata[wcnt_gidx][NUM_HPM_EVENTS-1:0]); ++ |(packed_hpm_events & mhpmevent_rdata[wcnt_gidx][NUM_HPM_EVENTS-1:0]); + end else begin : gen_mhpmcounter_not_implemented + assign mhpmcounter_write_increment[wcnt_gidx] = 1'b0; + end diff --git a/hw/vendor/patches/openhwgroup_cv32e40x/cv32e40x_debug_triggers.sv.patch b/hw/vendor/patches/openhwgroup_cv32e40x/cv32e40x_debug_triggers.sv.patch new file mode 100644 index 000000000..f9ac4ea09 --- /dev/null +++ b/hw/vendor/patches/openhwgroup_cv32e40x/cv32e40x_debug_triggers.sv.patch @@ -0,0 +1,40 @@ +diff --git a/rtl/cv32e40x_debug_triggers.sv b/rtl/cv32e40x_debug_triggers.sv +index 25a084d..3bd8a32 100644 +--- a/rtl/cv32e40x_debug_triggers.sv ++++ b/rtl/cv32e40x_debug_triggers.sv +@@ -108,13 +108,13 @@ import cv32e40x_pkg::*; + logic [DBG_NUM_TRIGGERS-1 : 0] tdata2_we_int; + + // CSR instance outputs +- logic [31:0] tdata1_q[DBG_NUM_TRIGGERS]; +- logic [31:0] tdata2_q[DBG_NUM_TRIGGERS]; ++ logic [31:0] tdata1_q[DBG_NUM_TRIGGERS-1:0]; ++ logic [31:0] tdata2_q[DBG_NUM_TRIGGERS-1:0]; + logic [31:0] tselect_q; + + // CSR read data, possibly WARL resolved +- logic [31:0] tdata1_rdata[DBG_NUM_TRIGGERS]; +- logic [31:0] tdata2_rdata[DBG_NUM_TRIGGERS]; ++ logic [31:0] tdata1_rdata[DBG_NUM_TRIGGERS-1:0]; ++ logic [31:0] tdata2_rdata[DBG_NUM_TRIGGERS-1:0]; + + // IF, EX and WB stages trigger match + logic [DBG_NUM_TRIGGERS-1 : 0] trigger_match_if; +@@ -127,7 +127,7 @@ import cv32e40x_pkg::*; + // LSU address match signals + logic [DBG_NUM_TRIGGERS-1 : 0] lsu_addr_match_en; + logic [DBG_NUM_TRIGGERS-1 : 0] lsu_addr_match; +- logic [3:0] lsu_byte_addr_match[DBG_NUM_TRIGGERS]; ++ logic [3:0] lsu_byte_addr_match[DBG_NUM_TRIGGERS-1:0]; + + // Enable matching based on privilege level per trigger + logic [DBG_NUM_TRIGGERS-1 : 0] priv_lvl_match_en_if; +@@ -140,7 +140,7 @@ import cv32e40x_pkg::*; + logic [31:0] lsu_addr_high; // The highest accessed address of an LSU transaction + + // Exception trigger code match +- logic [31:0] exception_match[DBG_NUM_TRIGGERS]; ++ logic [31:0] exception_match[DBG_NUM_TRIGGERS-1:0]; + + // Write data + always_comb begin diff --git a/hw/vendor/patches/openhwgroup_cv32e40x/cv32e40x_id_stage.sv.patch b/hw/vendor/patches/openhwgroup_cv32e40x/cv32e40x_id_stage.sv.patch new file mode 100644 index 000000000..c74917ea5 --- /dev/null +++ b/hw/vendor/patches/openhwgroup_cv32e40x/cv32e40x_id_stage.sv.patch @@ -0,0 +1,15 @@ +diff --git a/rtl/cv32e40x_id_stage.sv b/rtl/cv32e40x_id_stage.sv +index 1385dfb..d760e55 100644 +--- a/rtl/cv32e40x_id_stage.sv ++++ b/rtl/cv32e40x_id_stage.sv +@@ -749,7 +749,9 @@ module cv32e40x_id_stage import cv32e40x_pkg::*; + // Also attempt to offload any CSR instruction. The validity of such instructions are only + // checked in the EX stage. + // Instructions with deassert_we set to 1 from the controller bypass logic will not be attempted offloaded. +- assign xif_issue_if.issue_valid = instr_valid && (illegal_insn || csr_en) && ++ // Only offload instructions if the EX stage is ready not to miss data from xif_issue.issue_resp ++ assign xif_issue_if.issue_valid = instr_valid && ex_ready_i && ++ (illegal_insn || csr_en) && + !(xif_accepted_q || xif_rejected_q || ctrl_byp_i.deassert_we); + + // Keep xif_offloading_o high after an offloaded instruction was accepted or rejected to get diff --git a/hw/vendor/patches/openhwgroup_cv32e40x/cv32e40x_pma.sv.patch b/hw/vendor/patches/openhwgroup_cv32e40x/cv32e40x_pma.sv.patch new file mode 100644 index 000000000..0c08682ae --- /dev/null +++ b/hw/vendor/patches/openhwgroup_cv32e40x/cv32e40x_pma.sv.patch @@ -0,0 +1,13 @@ +diff --git a/rtl/cv32e40x_pma.sv b/rtl/cv32e40x_pma.sv +index 9e3f6ad..f63534d 100644 +--- a/rtl/cv32e40x_pma.sv ++++ b/rtl/cv32e40x_pma.sv +@@ -98,7 +98,7 @@ module cv32e40x_pma import cv32e40x_pkg::*; + + // Tie of atomic attribute if A_EXT=0 + generate +- if (A_EXT) begin: pma_atomic ++ if (A_EXT != A_NONE) begin: pma_atomic + assign pma_cfg_atomic = pma_cfg.atomic; + end + else begin: pma_no_atomic diff --git a/hw/vendor/pulp_platform_fpnew.lock.hjson b/hw/vendor/pulp_platform_fpnew.lock.hjson index e150bcb2f..bc1ec5555 100644 --- a/hw/vendor/pulp_platform_fpnew.lock.hjson +++ b/hw/vendor/pulp_platform_fpnew.lock.hjson @@ -9,6 +9,6 @@ upstream: { url: https://github.com/pulp-platform/fpnew.git - rev: 11659d7ff3580ac3226c6d56a90ef717cdc530e3 + rev: d6e581628f3517a1fb1257507d3214e599f7859d } } diff --git a/hw/vendor/pulp_platform_fpnew.vendor.hjson b/hw/vendor/pulp_platform_fpnew.vendor.hjson index e76745d51..7ae0d55cc 100644 --- a/hw/vendor/pulp_platform_fpnew.vendor.hjson +++ b/hw/vendor/pulp_platform_fpnew.vendor.hjson @@ -7,7 +7,7 @@ upstream: { url: "https://github.com/pulp-platform/fpnew.git", - rev: "11659d7ff3580ac3226c6d56a90ef717cdc530e3", + rev: "d6e581628f3517a1fb1257507d3214e599f7859d", }, exclude_from_upstream: [ diff --git a/hw/vendor/pulp_platform_fpnew/src/fpnew_cast_multi.sv b/hw/vendor/pulp_platform_fpnew/src/fpnew_cast_multi.sv index 964ef7429..7abe33043 100644 --- a/hw/vendor/pulp_platform_fpnew/src/fpnew_cast_multi.sv +++ b/hw/vendor/pulp_platform_fpnew/src/fpnew_cast_multi.sv @@ -443,7 +443,11 @@ module fpnew_cast_multi #( // By default right shift mantissa to be an integer denorm_shamt = unsigned'(MAX_INT_WIDTH - 1 - input_exp_q); // overflow: when converting to unsigned the range is larger by one - if (input_exp_q >= signed'(fpnew_pkg::int_width(int_fmt_q2) - 1 + op_mod_q2)) begin + if ((input_exp_q >= signed'(fpnew_pkg::int_width(int_fmt_q2) - 1 + op_mod_q2)) // Exponent larger than max int range, + && !(!op_mod_q2 // unless cast to signed int + && input_sign_q // and input value is larges negative int value + && (input_exp_q == signed'(fpnew_pkg::int_width(int_fmt_q2) - 1)) + && (input_mant_q == {1'b1, {INT_MAN_WIDTH-1{1'b0}}}))) begin denorm_shamt = '0; // prevent shifting of_before_round = 1'b1; // underflow diff --git a/hw/vendor/pulp_platform_fpnew/src/fpnew_divsqrt_multi.sv b/hw/vendor/pulp_platform_fpnew/src/fpnew_divsqrt_multi.sv index a8b004952..56a2f5d62 100644 --- a/hw/vendor/pulp_platform_fpnew/src/fpnew_divsqrt_multi.sv +++ b/hw/vendor/pulp_platform_fpnew/src/fpnew_divsqrt_multi.sv @@ -207,7 +207,7 @@ module fpnew_divsqrt_multi #( // Valid synch with other lanes // When one divsqrt unit completes an operation, keep its done high, waiting for the other lanes // As soon as all the lanes are over, we can clear this FF and start with a new operation - `FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done, 1'b0, clk_i, rst_ni); + `FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done, 1'b0, clk_i, rst_ni) // Tell the other units that this unit has finished now or in the past assign divsqrt_done_o = (unit_done_q | unit_done) & result_vec_op_q; diff --git a/hw/vendor/pulp_platform_fpnew/src/fpnew_fma.sv b/hw/vendor/pulp_platform_fpnew/src/fpnew_fma.sv index 051e6a698..6fdd89056 100644 --- a/hw/vendor/pulp_platform_fpnew/src/fpnew_fma.sv +++ b/hw/vendor/pulp_platform_fpnew/src/fpnew_fma.sv @@ -613,7 +613,9 @@ module fpnew_fma #( ); // Classification after rounding - assign uf_after_round = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // exponent = 0 + assign uf_after_round = (rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0) // denormal + || ((pre_round_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0) && (rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == 1) && + ((round_sticky_bits != 2'b11) || (!sum_sticky_bits[MAN_BITS*2 + 4] && ((rnd_mode_i == fpnew_pkg::RNE) || (rnd_mode_i == fpnew_pkg::RMM))))); assign of_after_round = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // exponent all ones // ----------------- diff --git a/hw/vendor/pulp_platform_fpnew/src/fpnew_fma_multi.sv b/hw/vendor/pulp_platform_fpnew/src/fpnew_fma_multi.sv index e691f6777..471d966f0 100644 --- a/hw/vendor/pulp_platform_fpnew/src/fpnew_fma_multi.sv +++ b/hw/vendor/pulp_platform_fpnew/src/fpnew_fma_multi.sv @@ -745,8 +745,10 @@ module fpnew_fma_multi #( if (FpFmtConfig[fmt]) begin : active_format always_comb begin : post_process - // detect of / uf - fmt_uf_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // denormal + // detect of / uf + fmt_uf_after_round[fmt] = (rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0) // denormal + || ((pre_round_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0) && (rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == 1) && + ((round_sticky_bits != 2'b11) || (!sum_sticky_bits[MAN_BITS*2 + 4] && ((rnd_mode_i == fpnew_pkg::RNE) || (rnd_mode_i == fpnew_pkg::RMM))))); fmt_of_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // inf exp. // Assemble regular result, nan box short ones. diff --git a/hw/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_pack_single.v b/hw/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_pack_single.v index 87139a253..d22e85ba9 100644 --- a/hw/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_pack_single.v +++ b/hw/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_pack_single.v @@ -222,7 +222,7 @@ end assign ex4_rst_norm[31:0] = {fdsu_ex4_result_sign, ex4_expnt_rst[7:0], ex4_frac_23[22:0]}; -assign ex4_cor_uf = (fdsu_ex4_uf && !ex4_denorm_potnt_norm || ex4_uf_plus) +assign ex4_cor_uf = (fdsu_ex4_uf || ex4_denorm_potnt_norm || ex4_uf_plus) && fdsu_ex4_nx; assign ex4_cor_nx = fdsu_ex4_nx || fdsu_ex4_of diff --git a/hw/vendor/pulp_platform_fpu_ss.core b/hw/vendor/pulp_platform_fpu_ss.core new file mode 100644 index 000000000..a2fa16f04 --- /dev/null +++ b/hw/vendor/pulp_platform_fpu_ss.core @@ -0,0 +1,30 @@ +CAPI=2: + +# Copyright 2023 David Mallasen Quintana +# Solderpad Hardware License, Version 2.1, see LICENSE.md for details. +# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + +name: "pulp-platform.org:ip:fpu_ss" + +filesets: + rtl: + depend: + - pulp-platform.org::fpnew + files: + - pulp_platform_fpu_ss/src/fpu_ss_pkg.sv + - pulp_platform_fpu_ss/src/fpu_ss_instr_pkg.sv + - pulp_platform_fpu_ss/src/fpu_ss_prd_f_pkg.sv + - pulp_platform_fpu_ss/src/fpu_ss_prd_zfinx_pkg.sv + - pulp_platform_fpu_ss/src/fpu_ss_compressed_predecoder.sv + - pulp_platform_fpu_ss/src/fpu_ss_controller.sv + - pulp_platform_fpu_ss/src/fpu_ss_csr.sv + - pulp_platform_fpu_ss/src/fpu_ss_decoder.sv + - pulp_platform_fpu_ss/src/fpu_ss_predecoder.sv + - pulp_platform_fpu_ss/src/fpu_ss_regfile.sv + - pulp_platform_fpu_ss/src/fpu_ss.sv + file_type: systemVerilogSource + +targets: + default: + filesets: + - rtl \ No newline at end of file diff --git a/hw/vendor/pulp_platform_fpu_ss.lock.hjson b/hw/vendor/pulp_platform_fpu_ss.lock.hjson new file mode 100644 index 000000000..5edbdf883 --- /dev/null +++ b/hw/vendor/pulp_platform_fpu_ss.lock.hjson @@ -0,0 +1,14 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// This file is generated by the util/vendor script. Please do not modify it +// manually. + +{ + upstream: + { + url: https://github.com/davideschiavone/fpu_ss.git + rev: bb784e028ced4d2d542639dc698e2b620ed20aae + } +} diff --git a/hw/vendor/pulp_platform_fpu_ss.vendor.hjson b/hw/vendor/pulp_platform_fpu_ss.vendor.hjson new file mode 100644 index 000000000..ea62092cb --- /dev/null +++ b/hw/vendor/pulp_platform_fpu_ss.vendor.hjson @@ -0,0 +1,17 @@ +// Copyright 2023 David Mallasen Quintana +// Solderpad Hardware License, Version 2.1, see LICENSE.md for details. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +{ + name: "pulp_platform_fpu_ss", + target_dir: "pulp_platform_fpu_ss", + + upstream: { + url: "https://github.com/davideschiavone/fpu_ss.git", + rev: "bb784e028ced4d2d542639dc698e2b620ed20aae", + }, + + exclude_from_upstream: [ + ".gitmodules", + "fpnew", + ] +} diff --git a/hw/vendor/pulp_platform_fpu_ss/LICENSE.md b/hw/vendor/pulp_platform_fpu_ss/LICENSE.md new file mode 100644 index 000000000..fb3358e86 --- /dev/null +++ b/hw/vendor/pulp_platform_fpu_ss/LICENSE.md @@ -0,0 +1,60 @@ +# SolderPad Hardware License v0.51 + +This license is based closely on the Apache License Version 2.0, but is not approved or endorsed by the Apache Foundation. A copy of the non-modified Apache License 2.0 can be found at http://www.apache.org/licenses/LICENSE-2.0. + +As this license is not currently OSI or FSF approved, the Licensor permits any Work licensed under this License, at the option of the Licensee, to be treated as licensed under the Apache License Version 2.0 (which is so approved). + +This License is licensed under the terms of this License and in particular clause 7 below (Disclaimer of Warranties) applies in relation to its use. + +## TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +*1. Definitions.* + +"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the Rights owner or entity authorized by the Rights owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. + +"Rights" means copyright and any similar right including design right (whether registered or unregistered), semiconductor topography (mask) rights and database rights (but excluding Patents and Trademarks). + +"Source" form shall mean the preferred form for making modifications, including but not limited to source code, net lists, board layouts, CAD files, documentation source, and configuration files. + +"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, the instantiation of a hardware design and conversions to other media types, including intermediate forms such as bytecodes, FPGA bitstreams, artwork and semiconductor topographies (mask works). + +"Work" shall mean the work of authorship, whether in Source form or other Object form, made available under the License, as indicated by a Rights notice that is included in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) or physically connect to or interoperate with the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any design or work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the Rights owner or by an individual or Legal Entity authorized to submit on behalf of the Rights owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the Rights owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. + +*2. Grant of License.* Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable license under the Rights to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form and do anything in relation to the Work as if the Rights did not exist. + +*3. Grant of Patent License.* Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. + +*4. Redistribution.* You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of this License; and + +You must cause any modified files to carry prominent notices stating that You changed the files; and + +You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and + +If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. + +*5. Submission of Contributions.* Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. + +*6. Trademarks.* This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. + +*7. Disclaimer of Warranty.* Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. + +*8. Limitation of Liability.* In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. + +*9. Accepting Warranty or Additional Liability.* While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + diff --git a/hw/vendor/pulp_platform_fpu_ss/README.md b/hw/vendor/pulp_platform_fpu_ss/README.md new file mode 100644 index 000000000..a2939f655 --- /dev/null +++ b/hw/vendor/pulp_platform_fpu_ss/README.md @@ -0,0 +1,116 @@ +# Floating-Point Unit Subsystem +The Floating-Point Unit Subsystem is a 32-bit coprocessor that implements the RISC-V "F" Standard Extension, Version 2.1 and the RISC-V "Zfinx" Standard Extension, Version 1.0.0-rc. The coprocessor implements all channels described by the [Core-V eXtension Interface (cv-x-if)](https://github.com/openhwgroup/core-v-xif) to communicate with a core. The [FPnew](https://github.com/pulp-platform/fpnew) serves as the processing unit inside the coprocessor. +The implementation is taylored to cv32e40p (formerly known as RI5CY). This means exception handling is not implemented, since there are no memory exceptions in cv32e40p. +## Integration +The coprocessors main module is named `fpu_ss` and can be found in [fpu_ss.sv](src/fpu_ss.sv "fpu_ss.sv"). Below, the instantiation template is given and the parameters are described. +### Integration Template + fpu_ss #( + .PULP_ZFINX ( 0 ), + .INPUT_BUFFER_DEPTH ( 0 ), + .OUT_OF_ORDER ( 1 ), + .FORWARDING ( 1 ), + .FPU_FEATURES ( ), + .FPU_IMPLEMENTATION ( ) + ) fpu_ss_i ( + // clock and reset + .clk_i (), + .rst_ni (), + + // Compressed Interface + .x_compressed_valid_i (), + .x_compressed_ready_o (), + .x_compressed_req_i (), + .x_compressed_resp_o (), + + // Issue Interface + .x_issue_valid_i (), + .x_issue_ready_o (), + .x_issue_req_i (), + .x_issue_resp_o (), + + // Commit Interface + .x_commit_valid_i (), + .x_commit_i (), + + // Memory Request/Response Interface + .x_mem_valid_o (), + .x_mem_ready_i (), + .x_mem_req_o (), + .x_mem_resp_i (), + + // Memory Result Interface + .x_mem_result_valid_i (), + .x_mem_result_i (), + + // Result Interface + .x_result_valid_o (), + .x_result_ready_i (), + .x_result_o () + ); + +### Dependencies +The [FPnew](https://github.com/pulp-platform/fpnew) is a submodule of the floating-point unit subsystem. Use + + git submodule update --init --recursive +to load the FPnew or use + + git clone --recurse-submodules https://github.com/moimfeld/fpu_ss +to clone the repository with the Fpnew. +### Parameters + +| Parameter Name | Values | Description | Default | +| -------------------- | ----------------------------------------------------------------------------------------------- | -------------------------------------------- | ------- | +| `PULP_ZFINX` | {0,1} | Use F or zfinx extension | 0 | +| `INPUT_BUFFER_DEPTH` | {0, ... , 2^32-1} | Input buffer depth | 0 | +| `OUT_OF_ORDER` | {0, 1} | Enabling out-of-order execution | 1 | +| `FORWARDING` | {0, 1} | Enabling forwarding inside the fpu subsystem | 1 | +| `FPU_FEATURES` | see [FPnew](https://github.com/pulp-platform/fpnew/tree/develop/docs/README.md#parameters) docs | - | - | +| `FPU_IMPLEMENTATION` | see [FPnew](https://github.com/pulp-platform/fpnew/tree/develop/docs/README.md#parameters) docs | - | - | + + +#### Extended parameter descriptions +##### `PULP_ZFINX` +When `PULP_ZFINX` is set to `1`, the coprocessor supports the "Zfinx" extension and no longer supports the "F" extension. No floating-point specific register file will be instantiated, which will reduce the area of the coprocessor. Using the "Zfinx" extension is likely to have a negative impact on IPC. + +**Important Note**: If the coprocessor wants to be simulated/compiled for the "Zfinx" extension then `PULP_ZFINX_DEF` must be defined (besides setting `PULP_ZFINX` to `1`). +##### `INPUT_BUFFER_DEPTH` +Setting `INPUT_BUFFER_DEPTH` to `0` will remove the FIFO that buffers incoming instructions from the core. This can have a negative impact on IPC, but will reduce the area of the coprocessor. +##### `FORWARDING` +Setting `FORWARDING` to `0` removes the possibility of forwarding operands from the output of the FPnew to the input of the FPnew. This will significantly reduce the area of the coprocessor and increase the maximum frequency, but can have a negative effect on the IPC. + + +# Architecture +This section describes the architecture of the coprocessor. The following figure gives a high level overview of the coprocessor. + +![fpu_ss](fig/fpu_ss.png) + +### List of Modules +| Name | Description | SystemVerilog File | +| ------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------- | +| `fpu_ss` | Top level module | [fpu_ss.sv](src/fpu_ss.sv "fpu_ss.sv") | +| `fpu_ss_compressed_predecoder` | Expands valid floating-point compressed instructions and sends them back to the core via the compressed interface | [fpu_ss_compressed_predecoder.sv](src/fpu_ss_compressed_predecoder.sv "fpu_ss_compressed_predecoder.sv") | +| `fpu_ss_predecoder` | Decides when and if an offload attempt from the core is accepted or not | [fpu_ss_predecoder.sv](src/fpu_ss_predecoder.sv "fpu_ss_predecoder.sv") | +| `stream_fifo` | Stream FIFO from [pulp-platform/common_cells](https://github.com/pulp-platform/common_cells) is used in the coprocessor to buffer incoming instructions and metadata of ongoing memory operations | [stream_fifo.sv](https://github.com/pulp-platform/common_cells/tree/master/src/stream_fifo.sv "stream_fifo.sv")| +| `fpu_ss_decoder` | Decodes instructions | [fpu_ss_decoder.sv](src/fpu_ss_decoder.sv "fpu_ss_decoder.sv") | +| `fpu_ss_regfile` | Flip-flop based floating-point specific register file with three read ports and one write port | [fpu_ss_regfile.sv](src/fpu_ss_regfile.sv "fpu_ss_regfile.sv") | +| `fpnew_top` | Main porcessing unit | [fpnew_top.sv](https://github.com/pulp-platform/fpnew/tree/develop/src/fpnew_top.sv "fpnew_top.sv") | +| `fpu_ss_csr` | Contains the floating-point specific CSR registers and executes all floating-point specific CSR instructions | [fpu_ss_csr.sv](src/fpu_ss_csr.sv "fpu_ss_csr.sv") | +| `fpu_ss_controller` | Control unit for the whole subsystem and the cv-x-if | [fpu_ss_controller.sv](src/fpu_ss_controller.sv "fpu_ss_controller.sv") | +#### Extended Module Descriptions +##### `fpu_ss_compressed_predecoder` +The compressed predecoder takes any instruction sequence that was offloaded via the compressed interface (see [cv-x-if documentation](https://docs.openhwgroup.org/projects/openhw-group-core-v-xif/x_ext.html#compressed-interface)). If the sequence corresponds to a valid floating-point instruction it will expand the instruction and send it back to the core via the compressed interface. +##### `fpu_ss_predecoder` +The predecoder decides if the coprocessor can accept the instruction the core is currently trying to offload. It also assigns predetermined values to the issue interface response signals once a valid instruction is encountered. These predetermined values are defined in the predecoder packages (`fpu_ss_prd_f_pkg` and `fpu_ss_prd_zfinx_pkg`). +##### `input_stream_fifo` +The `input_stream_fifo` is used to buffer offloaded instruction. If the parameter `INPUT_BUFFER_DEPTH` is set to 0, no input buffer is instantiated. The buffer stores the instruction, the source operands, the id of the offloaded instruction and the mode of the offloaded instruction. The `input_stream_fifo` operates in fall-through mode, i.e. if it is empty, every data-element that is input into it will be visible at the output in the same clock cycle. It works in this mode to reduce latency. + +### List of Packages +| Name | Description | SystemVerilog File | +| ---------------------- | ------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------- | +| `fpu_ss_pkg` | Defines coprocessor specific structs, cv-x-if structs and the FPnew configuration/implementation | [fpu_ss_pkg.sv](src/fpu_ss_pkg.sv "fpu_ss_pkg.sv") | +| `fpu_ss_instr_pkg` | Instruction masks for the fpu_ss_decoder | [fpu_ss_instr_pkg.sv](src/fpu_ss_instr_pkg.sv "fpu_ss_instr_pkg.sv") | +| `fpu_ss_prd_f_pkg` | "F" Standard Extension package for the predecoder | [fpu_ss_prd_f_pkg.sv](src/fpu_ss_prd_f_pkg.sv "fpu_ss_prd_f_pkg.sv") | +| `fpu_ss_prd_zfinx_pkg` | "Zfinx" Standard Extension package for the predecoder | [fpu_ss_prd_zfinx_pkg.sv](src/fpu_ss_prd_zfinx_pkg.sv "fpu_ss_prd_zfinx_pkg.sv") | +#### Extended Package Descriptions +##### `fpu_ss_prd_f_pkg` and `fpu_ss_prd_zfinx_pkg` +These packages contain the parameters for the predecoder and define which instructions will be accepted by the fpu_ss. Each instruction has its predefined issue interface response. If `PULP_ZFINX_DEF` is defined the predecoder is instantiated with the parameters in the `fpu_ss_prd_zfinx_pkg`. If `PULP_ZFINX_DEF` is not defined the predecoder is instantated with the parameters in the `fpu_ss_prd_f_pkg`. \ No newline at end of file diff --git a/hw/vendor/pulp_platform_fpu_ss/bhv/fpu_ss_tracer.sv b/hw/vendor/pulp_platform_fpu_ss/bhv/fpu_ss_tracer.sv new file mode 100644 index 000000000..dfbf3f06a --- /dev/null +++ b/hw/vendor/pulp_platform_fpu_ss/bhv/fpu_ss_tracer.sv @@ -0,0 +1,223 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Tracer for fpu subsystem +// Contributor: Moritz Imfeld + +`ifdef FPU_SS_TRACER + +module fpu_ss_tracer ( + input logic clk_i, + input logic rst_ni, + input logic x_mem_valid_i, + input logic fpu_in_valid_i, + input logic [ 4:0] rs1_i, + input logic [ 4:0] rs2_i, + input logic [ 4:0] rs3_i, + input logic [31:0] instr_i, + input logic fpu_out_valid_i, + input logic fpu_out_ready_i, + input logic [ 4:0] fpu_waddr_i, + input logic [31:0] fpu_result_i, + input logic x_mem_result_valid_i, + input logic fpr_we_i, + input logic [31:0] x_mem_result_i + +); + + int fpu_ss_trace_file; + string fn; + string fpu_ss_operation; + string rd_s; + string result; + string rs1_s, rs2_s, rs3_s; + string rs1_mem_s, rs2_mem_s, rs3_mem_s; + string instruction; + + // open/close output file for writing + initial begin + wait(rst_ni == 1'b1); + $sformat(fn, "fpu_ss_trace.log"); + fpu_ss_trace_file = $fopen(fn, "w"); + $fwrite(fpu_ss_trace_file, "time instruction result\n"); + + while (1) begin + + @(negedge clk_i); + if (fpu_in_valid_i) begin + $sformat(rs1_s, ""); + $sformat(rs2_s, ""); + $sformat(rs3_s, ""); + unique casez(instr_i) + fpu_ss_instr_pkg::FADD_S: begin + $sformat(fpu_ss_operation, "fadd.s"); + $sformat(rs1_s, ", f%0d", rs1_i); + $sformat(rs2_s, ", f%0d", rs2_i); + end + fpu_ss_instr_pkg::FSUB_S: begin + $sformat(fpu_ss_operation, "fsub.s"); + $sformat(rs1_s, ", f%0d", rs1_i); + $sformat(rs2_s, ", f%0d", rs2_i); + end + fpu_ss_instr_pkg::FMUL_S: begin + $sformat(fpu_ss_operation, "fmul.s"); + $sformat(rs1_s, ", f%0d", rs1_i); + $sformat(rs2_s, ", f%0d", rs2_i); + end + fpu_ss_instr_pkg::FDIV_S: begin + $sformat(fpu_ss_operation, "fdiv.s"); + $sformat(rs1_s, ", f%0d", rs1_i); + $sformat(rs2_s, ", f%0d", rs2_i); + end + fpu_ss_instr_pkg::FSGNJ_S: begin + $sformat(fpu_ss_operation, "fsgnj.s"); + $sformat(rs1_s, ", f%0d", rs1_i); + end + fpu_ss_instr_pkg::FSGNJN_S: begin + $sformat(fpu_ss_operation, "fsgnjn.s"); + $sformat(rs1_s, ", f%0d", rs1_i); + end + fpu_ss_instr_pkg::FSGNJX_S: begin + $sformat(fpu_ss_operation, "fsgnjx.s"); + $sformat(rs1_s, ", f%0d", rs1_i); + end + fpu_ss_instr_pkg::FMIN_S:begin + $sformat(fpu_ss_operation, "fmin.s"); + $sformat(rs1_s, ", f%0d", rs1_i); + $sformat(rs2_s, ", f%0d", rs2_i); + end + fpu_ss_instr_pkg::FMAX_S: begin + $sformat(fpu_ss_operation, "fmax.s"); + $sformat(rs1_s, ", f%0d", rs1_i); + $sformat(rs2_s, ", f%0d", rs2_i); + end + fpu_ss_instr_pkg::FSQRT_S: begin + $sformat(fpu_ss_operation, "fsqrt.s"); + $sformat(rs1_s, ", f%0d", rs1_i); + end + fpu_ss_instr_pkg::FMADD_S: begin + $sformat(fpu_ss_operation, "fmadd.s"); + $sformat(rs1_s, ", f%0d", rs1_i); + $sformat(rs2_s, ", f%0d", rs2_i); + $sformat(rs3_s, ", f%0d", rs3_i); + end + fpu_ss_instr_pkg::FMSUB_S: begin + $sformat(fpu_ss_operation, "fmsub.s"); + $sformat(rs1_s, ", f%0d", rs1_i); + $sformat(rs2_s, ", f%0d", rs2_i); + $sformat(rs3_s, ", f%0d", rs3_i); + end + fpu_ss_instr_pkg::FNMSUB_S: begin + $sformat(fpu_ss_operation, "fnmsub.s"); + $sformat(rs1_s, ", f%0d", rs1_i); + $sformat(rs2_s, ", f%0d", rs2_i); + $sformat(rs3_s, ", f%0d", rs3_i); + end + fpu_ss_instr_pkg::FNMADD_S: begin + $sformat(fpu_ss_operation, "fnmadd.s"); + $sformat(rs1_s, ", f%0d", rs1_i); + $sformat(rs2_s, ", f%0d", rs2_i); + $sformat(rs3_s, ", f%0d", rs3_i); + end + fpu_ss_instr_pkg::FLE_S: begin + $sformat(fpu_ss_operation, "fle.s"); + $sformat(rs1_s, ", f%0d", rs1_i); + $sformat(rs2_s, ", f%0d", rs2_i); + end + fpu_ss_instr_pkg::FLT_S: begin + $sformat(fpu_ss_operation, "flt.s"); + $sformat(rs1_s, ", f%0d", rs1_i); + $sformat(rs2_s, ", f%0d", rs2_i); + end + fpu_ss_instr_pkg::FEQ_S: begin + $sformat(fpu_ss_operation, "feq.s"); + $sformat(rs1_s, ", f%0d", rs1_i); + $sformat(rs2_s, ", f%0d", rs2_i); + end + fpu_ss_instr_pkg::FCLASS_S: begin + $sformat(fpu_ss_operation, "fclass.s"); + $sformat(rs1_s, ", f%0d", rs1_i); + end + fpu_ss_instr_pkg::FCVT_S_W: begin + $sformat(fpu_ss_operation, "fcvt.w.s"); + $sformat(rs1_s, ", x%0d", rs1_i); + end + fpu_ss_instr_pkg::FCVT_W_S: begin + $sformat(fpu_ss_operation, "fcvt.w.s"); + $sformat(rs1_s, ", f%0d", rs1_i); + end + fpu_ss_instr_pkg::FCVT_WU_S: begin + $sformat(fpu_ss_operation, "fcvt.wu.s"); + $sformat(rs1_s, ", f%0d", rs1_i); + end + fpu_ss_instr_pkg::FMV_X_W: begin + $sformat(fpu_ss_operation, "fmv.x.w"); + $sformat(rs1_s, ", x%0d", rs1_i); + end + default: begin + $sformat(fpu_ss_operation, "operation unknown"); + $display("fpu_ss_operation was set to operation unknown"); + end + endcase // instr_i + end + if (x_mem_valid_i) begin + $sformat(rs1_mem_s, ""); + $sformat(rs2_mem_s, ""); + $sformat(rs3_mem_s, ""); + unique casez(instr_i) + fpu_ss_instr_pkg::FLW: begin + $sformat(rs1_mem_s, ", x%0d", rs1_i); + end + fpu_ss_instr_pkg::FSW: begin + $sformat(rs1_mem_s, ", x%0d", rs1_i); + $sformat(rs2_mem_s, ", f%0d", rs1_i); + end + endcase + end + + if (fpu_out_valid_i & fpu_out_ready_i) begin + if (fpr_we_i) begin + $sformat(rd_s, "f%0d", fpu_waddr_i); + $sformat(result, "%.4f", $bitstoshortreal(fpu_result_i)); + end else begin + $sformat(rd_s, "x%0d", fpu_waddr_i); + $sformat(result, "%h", fpu_result_i); + end + $sformat(instruction, "%s %s%s%s%s", fpu_ss_operation, rd_s, rs1_s, rs2_s, rs3_s); + $sformat(instruction, "%-30s", instruction); + $fwrite(fpu_ss_trace_file, "%t\t\t %s \t\t%s", $time, instruction, result); + $fwrite(fpu_ss_trace_file, "\n"); + end else if (x_mem_result_valid_i) begin + if (fpr_we_i) begin + $sformat(rd_s, "f%0d", fpu_waddr_i); + $sformat(result, "%.4f", $bitstoshortreal(x_mem_result_i)); + $sformat(instruction, "flw %s%s%s%s", rd_s, rs1_mem_s, rs2_mem_s, rs3_mem_s); + $sformat(instruction, "%-30s", instruction); + $fwrite(fpu_ss_trace_file,"%t\t\t %s \t\t%s", $time, instruction, result); + $fwrite(fpu_ss_trace_file, "\n"); + end else begin + $sformat(result, "-----"); + $sformat(instruction, "fsw %s%s%s", rs1_mem_s, rs2_mem_s, rs3_mem_s); + $sformat(instruction, "%-30s", instruction); + $fwrite(fpu_ss_trace_file,"%t\t\t %s \t\t%s", $time, instruction, result); + $fwrite(fpu_ss_trace_file, "\n"); + end + end + end + + end + + final begin + $fclose(fpu_ss_trace_file); + end + +endmodule // fpu_ss_tracer + +`endif // FPU_SS_TRACER diff --git a/hw/vendor/pulp_platform_fpu_ss/bhv/fpu_ss_wrapper.sv b/hw/vendor/pulp_platform_fpu_ss/bhv/fpu_ss_wrapper.sv new file mode 100644 index 000000000..035a85b06 --- /dev/null +++ b/hw/vendor/pulp_platform_fpu_ss/bhv/fpu_ss_wrapper.sv @@ -0,0 +1,94 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Wrapper for a fpu_ss, containing fpu_ss, and tracer +// Contributor: Moritz Imfeld + +`ifdef FPU_SS_TRACER +`include "fpu_ss_tracer.sv" +`endif +module fpu_ss_wrapper + import fpu_ss_pkg::*; +#( + parameter PULP_ZFINX = 0, + parameter INPUT_BUFFER_DEPTH = 1, + parameter OUT_OF_ORDER = 1, + parameter FORWARDING = 1, + parameter fpnew_pkg::fpu_features_t FPU_FEATURES = fpu_ss_pkg::FPU_FEATURES, + parameter fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = fpu_ss_pkg::FPU_IMPLEMENTATION +) ( + // clock and reset + input logic clk_i, + input logic rst_ni, + + // Compressed Interface + input logic x_compressed_valid_i, + output logic x_compressed_ready_o, + input x_compressed_req_t x_compressed_req_i, + output x_compressed_resp_t x_compressed_resp_o, + + // Issue Interface + input logic x_issue_valid_i, + output logic x_issue_ready_o, + input x_issue_req_t x_issue_req_i, + output x_issue_resp_t x_issue_resp_o, + + // Commit Interface + input logic x_commit_valid_i, + input x_commit_t x_commit_i, + + // Memory request/response Interface + output logic x_mem_valid_o, + input logic x_mem_ready_i, + output x_mem_req_t x_mem_req_o, + input x_mem_resp_t x_mem_resp_i, + + // Memory Result Interface + input logic x_mem_result_valid_i, + input x_mem_result_t x_mem_result_i, + + // Result Interface + output logic x_result_valid_o, + input logic x_result_ready_i, + output x_result_t x_result_o +); + + `ifdef FPU_SS_TRACER + fpu_ss_tracer fpu_ss_tracer_i ( + .clk_i (fpu_ss_i.clk_i), + .rst_ni (fpu_ss_i.rst_ni), + .x_mem_valid_i (fpu_ss_i.x_mem_valid_o), + .fpu_in_valid_i (fpu_ss_i.fpu_in_valid), + .rs1_i (fpu_ss_i.rs1), + .rs2_i (fpu_ss_i.rs2), + .rs3_i (fpu_ss_i.rs3), + .instr_i (fpu_ss_i.instr), + .fpu_out_valid_i (fpu_ss_i.fpu_out_valid), + .fpu_out_ready_i (fpu_ss_i.fpu_out_ready), + .fpu_waddr_i (fpu_ss_i.fpr_wb_addr), + .fpu_result_i (fpu_ss_i.fpu_result), + .x_mem_result_valid_i (fpu_ss_i.x_mem_result_valid_i), + .fpr_we_i (fpu_ss_i.fpr_we), + .x_mem_result_i (fpu_ss_i.fpr_wb_data) + ); + `endif + + // instantiate the fpu_ss + fpu_ss #( + .PULP_ZFINX(PULP_ZFINX), + .INPUT_BUFFER_DEPTH(INPUT_BUFFER_DEPTH), + .OUT_OF_ORDER(OUT_OF_ORDER), + .FORWARDING(FORWARDING), + .FPU_FEATURES(FPU_FEATURES), + .FPU_IMPLEMENTATION(FPU_IMPLEMENTATION) + ) fpu_ss_i ( + .* + ); + endmodule \ No newline at end of file diff --git a/hw/vendor/pulp_platform_fpu_ss/fig/fpu_ss.png b/hw/vendor/pulp_platform_fpu_ss/fig/fpu_ss.png new file mode 100644 index 000000000..35029dd9a Binary files /dev/null and b/hw/vendor/pulp_platform_fpu_ss/fig/fpu_ss.png differ diff --git a/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss.sv b/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss.sv new file mode 100644 index 000000000..c3e3be33b --- /dev/null +++ b/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss.sv @@ -0,0 +1,688 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Description: Top level Module of the FPU subsystem +// +// Parameters: PULP_ZFINX: Enable support for "Zfinx" standard extension (and thereby removing support for +// "F" standard extension) +// +// INPUT_BUFFER_DEPTH: Set depth of the FIFO input buffer. If parameter is set to 0, no buffer will be +// instantiated +// +// OUT_OF_ORDER: Enable out-of-order execution for instructions that go through +// the FPnew. +// For example with OUT_OF_ORDER = 1 +// fdiv.s fa1, fa2, fa3 // suppose takes 3 cycles +// fmul.s fa4, fa5, fa6 // suppose takes 1 cycles +// fmul.s fa2, fa5, fa6 // suppose takes 1 cycles +// fmul.s fa3, fa5, fa6 // suppose takes 1 cycles +// --> This sequence takes 4 clock cycles +// With OUT_OF_ORDER this instruction sequence would take 5 clock cycles +// Possible values for this parameter are 0 and 1 +// +// FORWARDING: Enable forwarding of floating-point results in the subsystem. +// For examle take this sequence: +// fmul.s fa4, fa5, fa6 // suppose takes 1 cycles +// fmul.s fa1, fa4, fa6 // suppose takes 1 cycles +// There is a source register dependency for the second instruction on the +// first instructions result. With FORWARDING = 1 this sequence takes 2 clock cycles +// while with FORWARDING = 0 this sequence takes 3 clock cycles. +// +// FPU_FEATURES: Parameter to configure the FPnew. The subsystem was designed for the configuration found here: +// https://github.com/moimfeld/cv32e40p/blob/x-interface/example_tb/core/fpu_ss/fpu_ss_pkg.sv +// Other configurations might not work +// +// FPU_IMPLEMENTATION: Parameter to configure the FPnew. The subsystem was designed for the configuration found here: +// https://github.com/moimfeld/cv32e40p/blob/x-interface/example_tb/core/fpu_ss/fpu_ss_pkg.sv +// Other configurations might not work +// +// Contributor: Moritz Imfeld +// Davide Schiavone + +module fpu_ss + import fpu_ss_pkg::*; +#( + parameter PULP_ZFINX = 0, + parameter INPUT_BUFFER_DEPTH = 0, + parameter OUT_OF_ORDER = 1, + parameter FORWARDING = 1, + // PulpDivSqrt = 0 enables T-head-based DivSqrt unit. Supported only for FP32-only instances of Fpnew + parameter logic PulpDivsqrt = 1'b1, + parameter fpnew_pkg::fpu_features_t FPU_FEATURES = fpu_ss_pkg::FPU_FEATURES, + parameter fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = fpu_ss_pkg::FPU_IMPLEMENTATION +) ( + // Clock and Reset + input logic clk_i, + input logic rst_ni, + + // Compressed Interface + input logic x_compressed_valid_i, + output logic x_compressed_ready_o, + input x_compressed_req_t x_compressed_req_i, + output x_compressed_resp_t x_compressed_resp_o, + + // Issue Interface + input logic x_issue_valid_i, + output logic x_issue_ready_o, + input x_issue_req_t x_issue_req_i, + output x_issue_resp_t x_issue_resp_o, + + // Commit Interface + input logic x_commit_valid_i, + input x_commit_t x_commit_i, + + // Memory Eequest/Response Interface + output logic x_mem_valid_o, + input logic x_mem_ready_i, + output x_mem_req_t x_mem_req_o, + input x_mem_resp_t x_mem_resp_i, + + // Memory Result Interface + input logic x_mem_result_valid_i, + input x_mem_result_t x_mem_result_i, + + // Result Interface + output logic x_result_valid_o, + input logic x_result_ready_i, + output x_result_t x_result_o +); + +// predecoder parameter +`ifdef PULP_ZFINX_DEF + localparam int unsigned NUM_INSTR = fpu_ss_prd_zfinx_pkg::NumInstr; + localparam offload_instr_t OFFLOAD_INSTR[NUM_INSTR] = fpu_ss_prd_zfinx_pkg::OffloadInstr; +`else + localparam int unsigned NUM_INSTR = fpu_ss_prd_f_pkg::NumInstr; + localparam offload_instr_t OFFLOAD_INSTR[NUM_INSTR] = fpu_ss_prd_f_pkg::OffloadInstr; +`endif + + // compressed predecoder signals + comp_prd_req_t comp_prd_req; + comp_prd_rsp_t comp_prd_rsp; + + // predecoder signals + acc_prd_req_t prd_req; + acc_prd_rsp_t prd_rsp; + logic in_buf_push_ready; + + // issue_interface + logic x_issue_ready; + + // input stream fifo signals + offloaded_data_t in_buf_push_data; + offloaded_data_t in_buf_pop_data; + logic in_buf_push_valid; + logic in_buf_pop_valid; + logic in_buf_pop_ready; + + // decoder signals + fpnew_pkg::operation_e fpu_op; + op_select_e [ 2:0 ] op_select_dec; + op_select_e [ 2:0 ] op_select; + fpnew_pkg::roundmode_e fpu_rnd_mode; + logic set_dyn_rm; + fpnew_pkg::fp_format_e src_fmt; + fpnew_pkg::fp_format_e dst_fmt; + fpnew_pkg::int_format_e int_fmt; + logic rd_is_fp; + logic csr_instr; + logic vectorial_op; + logic op_mode; + logic use_fpu; + logic is_store; + logic is_load; + ls_size_e ls_size; + + // forwarding and dependency + logic [ 2:0] fpu_fwd; + logic [ 2:0] lsu_fwd; + logic dep_rs; + logic dep_rd; + + // instruction data, operands and adresses + logic [31:0] instr; + logic [ 2:0] [31:0] fpu_operands_dec; + logic [ 2:0] [31:0] fpu_operands; + logic [ 2:0] [31:0] int_operands; + logic [ 2:0] [31:0] fpr_operands; + logic [ 4:0] rs1; + logic [ 4:0] rs2; + logic [ 4:0] rs3; + logic [ 4:0] rd; + logic [31:0] offset; + logic [ 2:0] [ 4:0] fpr_raddr; + logic [ 4:0] fpr_wb_addr; + logic [31:0] fpr_wb_data; + logic fpr_we; + + // memory buffer signals + logic mem_push_valid; + logic mem_push_ready; + logic mem_pop_valid; + logic mem_pop_ready; + mem_metadata_t mem_push_data; + mem_metadata_t mem_pop_data; + + // CSR + logic csr_wb; + logic [31:0] csr_rdata; + logic [ 4:0] csr_wb_addr; + logic [ 3:0] csr_wb_id; + logic [ 2:0] frm; + + // FPnew signals + fpu_tag_t fpu_tag_in; + fpu_tag_t fpu_tag_out; + logic fpu_in_valid; + logic fpu_in_ready; + logic fpu_out_valid; + logic fpu_out_ready; + logic [31:0] fpu_result; + logic fpu_busy; + fpnew_pkg::status_t fpu_status; + + // compressed interface signal assignments + assign x_compressed_ready_o = x_compressed_valid_i; + assign comp_prd_req.comp_instr = x_compressed_req_i.instr; + assign x_compressed_resp_o.instr = comp_prd_rsp.decomp_instr; + assign x_compressed_resp_o.accept = comp_prd_rsp.accept; + + // issue interface signal assignment + assign prd_req.q_instr_data = x_issue_req_i.instr; + assign x_issue_resp_o.accept = prd_rsp.p_accept; + assign x_issue_resp_o.writeback = prd_rsp.p_writeback; + assign x_issue_resp_o.dualwrite = '0; + assign x_issue_resp_o.dualread = '0; + assign x_issue_resp_o.loadstore = prd_rsp.p_is_mem_op; + assign x_issue_resp_o.ecswrite = '0; + assign x_issue_resp_o.exc = '0; + + // input buffer signal assignment + assign in_buf_push_valid = x_issue_valid_i & x_issue_ready_o & x_issue_resp_o.accept; + assign in_buf_push_data.rs = x_issue_req_i.rs; + assign in_buf_push_data.instr_data = x_issue_req_i.instr; + assign in_buf_push_data.id = x_issue_req_i.id; + assign in_buf_push_data.mode = x_issue_req_i.mode; + + // instr, operand and address signal assignment + assign instr = in_buf_pop_data.instr_data; + assign int_operands[0] = in_buf_pop_data.rs[0]; + assign int_operands[1] = in_buf_pop_data.rs[1]; + assign int_operands[2] = in_buf_pop_data.rs[2]; + assign rs1 = instr[19:15]; + assign rs2 = instr[24:20]; + assign rs3 = instr[31:27]; + assign rd = instr[11:7]; + + // FPnew tag + assign fpu_tag_in.addr = rd; + assign fpu_tag_in.rd_is_fp = rd_is_fp; + assign fpu_tag_in.id = in_buf_pop_data.id; + + // memory instruction buffer assignment + assign mem_push_data.id = in_buf_pop_data.id; + assign mem_push_data.rd = rd; + assign mem_push_data.we = is_load; + + // memory request signal assignments + assign x_mem_req_o.mode = in_buf_pop_data.mode; + assign x_mem_req_o.size = instr[14:12]; + assign x_mem_req_o.id = in_buf_pop_data.id; + + always_comb begin + x_mem_req_o.wdata = fpr_operands[1]; + if (fpu_fwd[1]) begin + x_mem_req_o.wdata = fpu_result; + end else if (lsu_fwd[1]) begin + x_mem_req_o.wdata = x_mem_result_i.rdata; + end + end + + // load and store address calculation for memory instructions + always_comb begin + if (~x_mem_req_o.we) begin + offset = 32'($unsigned(instr[31:20])); + if (instr[31]) begin + offset = {20'b1111_1111_1111_1111_1111, instr[31:20]}; + end + end else begin + offset = 32'($unsigned({instr[31:25], instr[11:7]})); + if (instr[31]) begin + offset = {20'b1111_1111_1111_1111_1111, instr[31:25], instr[11:7]}; + end + end + x_mem_req_o.addr = int_operands[0] + offset; + end + + // --------------------- + // Compressed Predecoder + // --------------------- + fpu_ss_compressed_predecoder fpu_ss_compressed_predecoder_i + ( + .prd_req_i(comp_prd_req), + .prd_rsp_o(comp_prd_rsp) + ); + + // ---------- + // Predecoder + // ---------- + fpu_ss_predecoder #( + .NumInstr(NUM_INSTR), + .OffloadInstr(OFFLOAD_INSTR) + ) fpu_ss_predecoder_i ( + .prd_req_i(prd_req), + .prd_rsp_o(prd_rsp) + ); + + // ----------------- + // Input Stream FIFO + // ----------------- + generate + if (INPUT_BUFFER_DEPTH > 0) begin : gen_input_stream_fifo + stream_fifo #( + .FALL_THROUGH(1), + .DATA_WIDTH (32), + .DEPTH (INPUT_BUFFER_DEPTH), + .T (offloaded_data_t) + ) input_stream_fifo_i ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .testmode_i(1'b0), + .usage_o ( /* unused */), + + .data_i (in_buf_push_data), + .valid_i(in_buf_push_valid), + .ready_o(in_buf_push_ready), + + .data_o (in_buf_pop_data), + .valid_o(in_buf_pop_valid), + .ready_i(in_buf_pop_ready) + ); + assign x_issue_ready_o = x_issue_ready; + end else begin : gen_no_input_stream_fifo + assign in_buf_pop_data = in_buf_push_data; + assign x_issue_ready_o = x_issue_ready & ~dep_rs & ~dep_rd; // readiness of FPnew is assumed here + assign in_buf_push_ready = 1'b1; + assign in_buf_pop_valid = x_issue_valid_i; + end + endgenerate + + // ------- + // Decoder + // ------- + fpu_ss_decoder #( + .PULP_ZFINX(PULP_ZFINX) + ) fpu_ss_decoder_i ( + .instr_i (instr), + .fpu_rnd_mode_i(fpnew_pkg::roundmode_e'(frm)), + .fpu_op_o (fpu_op), + .op_select_o (op_select_dec), + .fpu_rnd_mode_o(fpu_rnd_mode), + .set_dyn_rm_o (set_dyn_rm), + .src_fmt_o (src_fmt), + .dst_fmt_o (dst_fmt), + .int_fmt_o (int_fmt), + .rd_is_fp_o (rd_is_fp), + .vectorial_op_o(vectorial_op), + .op_mode_o (op_mode), + .use_fpu_o (use_fpu), + .is_store_o (is_store), + .is_load_o (is_load), + .ls_size_o (ls_size) + ); + + // ------------------------------ + // Memory Instruction Stream FIFO + // ------------------------------ + stream_fifo #( + .FALL_THROUGH(0), + .DATA_WIDTH (32), + .DEPTH (3), + .T (mem_metadata_t) + ) mem_stream_fifo_i ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .testmode_i(1'b0), + .usage_o ( /* unused */), + + .data_i (mem_push_data), + .valid_i(mem_push_valid), + .ready_o(mem_push_ready), + + .data_o (mem_pop_data), + .valid_o(mem_pop_valid), + .ready_i(mem_pop_ready) + ); + + // ------------------ + // Floating-Point CSR + // ------------------ + fpu_ss_csr fpu_ss_csr_i ( + .clk_i (clk_i), + .rst_ni(rst_ni), + + .instr_i (instr), + .csr_data_i (int_operands[0]), + .fpu_status_i (fpu_status), + .in_buf_pop_valid_i (in_buf_pop_valid), + .fpu_out_valid_i (fpu_out_valid), + .csr_id_i (in_buf_pop_data.id), + .csr_rdata_o (csr_rdata), + .frm_o (frm), + .csr_wb_o (csr_wb), + .csr_wb_addr_o (csr_wb_addr), + .csr_wb_id_o (csr_wb_id), + .csr_instr_o (csr_instr) + ); + + // ------------------------ + // FPU Subsystem Controller + // ------------------------ + fpu_ss_controller #( + .PULP_ZFINX(PULP_ZFINX), + .INPUT_BUFFER_DEPTH(INPUT_BUFFER_DEPTH), + .OUT_OF_ORDER(OUT_OF_ORDER), + .FORWARDING(FORWARDING) + ) fpu_ss_controller_i ( + // Clock and Reset + .clk_i (clk_i), + .rst_ni(rst_ni), + + // Predecoder + .in_buf_push_ready_i (in_buf_push_ready), + .prd_rsp_use_rs_i (prd_rsp.p_use_rs), + + // Issue Interface + .x_issue_req_rs_valid_i (x_issue_req_i.rs_valid), + .x_issue_ready_o (x_issue_ready), + + // Commit Interface + .x_commit_valid_i (x_commit_valid_i), + .x_commit_i (x_commit_i), + + // Input Buffer + .in_buf_pop_valid_i (in_buf_pop_valid), + .in_buf_pop_ready_o (in_buf_pop_ready), + + // Register + .rd_is_fp_i(fpu_tag_out.rd_is_fp), + .fpr_wb_addr_i(fpr_wb_addr), + .rd_i(rd), + .fpr_we_o(fpr_we), + .fpu_out_id_i (fpu_tag_out.id), + + // Dependency Check and Forwarding + .rd_in_is_fp_i(rd_is_fp), + .rs1_i(fpr_raddr[0]), + .rs2_i(fpr_raddr[1]), + .rs3_i(fpr_raddr[2]), + .fpu_fwd_o(fpu_fwd), + .lsu_fwd_o(lsu_fwd), + .op_select_i(op_select), + .dep_rs_o(dep_rs), + .dep_rd_o(dep_rd), + .x_issue_ready_i(x_issue_ready_o), + + // Memory Instruction + .is_load_i (is_load), + .is_store_i(is_store), + + // Memory Request/Repsonse Interface + .x_mem_valid_o (x_mem_valid_o), + .x_mem_ready_i (x_mem_ready_i), + .x_mem_req_id_i (x_mem_req_o.id), + .x_mem_req_we_o (x_mem_req_o.we), + .x_mem_req_spec_o (x_mem_req_o.spec), + .x_mem_req_last_o (x_mem_req_o.last), + + // Memory Buffer + .mem_push_valid_o (mem_push_valid), + .mem_push_ready_i (mem_push_ready), + .mem_pop_ready_o (mem_pop_ready), + .mem_pop_data_i (mem_pop_data), + + // Memory Result Interface + .x_mem_result_valid_i(x_mem_result_valid_i), + + // FPnew + .fpu_in_valid_o (fpu_in_valid), + .fpu_in_ready_i (fpu_in_ready), + .use_fpu_i (use_fpu), + .fpu_in_id_i (in_buf_pop_data.id), + .fpu_out_valid_i (fpu_out_valid), + .fpu_out_ready_o (fpu_out_ready), + + // Result Interface + .x_result_ready_i(x_result_ready_i), + .x_result_valid_o(x_result_valid_o), + .csr_instr_i(csr_instr) + ); + + // ------------------------------------- + // Floating-Point specific Register File + // ------------------------------------- + generate + if (!PULP_ZFINX) begin : gen_fp_register_file + // fp register address selection + always_comb begin + fpr_raddr[0] = rs1; + fpr_raddr[1] = rs2; + fpr_raddr[2] = rs3; + + unique case (op_select_dec[1]) + RegA: begin + fpr_raddr[1] = rs1; + end + default: ; + endcase + + unique case (op_select_dec[2]) + RegB, RegBRep: begin + fpr_raddr[2] = rs2; + end + RegDest: begin + fpr_raddr[2] = rd; + end + default: ; + endcase + end + + // fp register writeback data mux + always_comb begin + fpr_wb_data = fpu_result; + if (x_mem_result_valid_i) begin + fpr_wb_data = x_mem_result_i.rdata; + end + end + + // fp register addr writeback mux + always_comb begin + fpr_wb_addr = fpu_tag_out.addr; + if (x_mem_result_valid_i) begin + fpr_wb_addr = mem_pop_data.rd; + end else if (~use_fpu & ~fpu_out_valid) begin + fpr_wb_addr = rd; + end + end + + fpu_ss_regfile fpu_ss_regfile_i ( + .clk_i(clk_i), + .rst_ni(rst_ni), + + .raddr_i(fpr_raddr), + .rdata_o(fpr_operands), + + .waddr_i(fpr_wb_addr), + .wdata_i(fpr_wb_data), + .we_i (fpr_we) + ); + end else begin : gen_no_fp_register_file + assign fpr_operands = int_operands; + end + endgenerate + + // ------------------ + // Operand Selection + // ------------------ + for (genvar i = 0; i < 3; i++) begin + always_comb begin + op_select[i] = op_select_dec[i]; + if (PULP_ZFINX) begin + unique case (op_select_dec[i]) + None, AccBus: begin + op_select[i] = op_select_dec[i]; + end + RegA, RegB, RegBRep, RegC, RegDest: begin + op_select[i] = AccBus; + end + endcase + end + end + end + + for (genvar i = 0; i < 3; i++) begin : gen_operand_select + always_comb begin + unique case (op_select[i]) + None: begin + fpu_operands_dec[i] = '1; + end + AccBus: begin + fpu_operands_dec[i] = int_operands[i]; + if (fpu_fwd[i]) begin + fpu_operands_dec[i] = fpu_result; + end + end + RegA, RegB, RegBRep, RegC, RegDest: begin + fpu_operands_dec[i] = fpr_operands[i]; + if (fpu_fwd[i] & (fpu_op != fpnew_pkg::ADD)) begin + fpu_operands_dec[i] = fpu_result; + end else if (lsu_fwd[i] & (fpu_op != fpnew_pkg::ADD)) begin + fpu_operands_dec[i] = x_mem_result_i.rdata; + end + // Replicate if needed + if (op_select[i] == RegBRep) begin + unique case (src_fmt) + fpnew_pkg::FP32: fpu_operands_dec[i] = {(32 / 32) {fpu_operands_dec[i][31:0]}}; + fpnew_pkg::FP16, fpnew_pkg::FP16ALT: + fpu_operands_dec[i] = {(32 / 16) {fpu_operands_dec[i][15:0]}}; + fpnew_pkg::FP8: fpu_operands_dec[i] = {(32 / 8) {fpu_operands_dec[i][7:0]}}; + default: fpu_operands_dec[i] = fpu_operands_dec[i][32-1:0]; + endcase + end + end + default: begin + fpu_operands_dec[i] = '0; + end + endcase + end + end + + always_comb begin + fpu_operands = fpu_operands_dec; + if (PULP_ZFINX) begin + if (op_select_dec[1] == RegA) begin + fpu_operands[1] = int_operands[0]; + end + if (op_select_dec[2] == RegB) begin + fpu_operands[2] = int_operands[1]; + end + end else begin + if (lsu_fwd[1] & (fpu_op == fpnew_pkg::ADD) & use_fpu) begin + fpu_operands[1] = x_mem_result_i.rdata; + end + if (lsu_fwd[2] & (fpu_op == fpnew_pkg::ADD) & use_fpu) begin + fpu_operands[2] = x_mem_result_i.rdata; + end + if (fpu_fwd[1] & (fpu_op == fpnew_pkg::ADD) & use_fpu) begin + fpu_operands[1] = fpu_result; + end + if (fpu_fwd[2] & (fpu_op == fpnew_pkg::ADD) & use_fpu) begin + fpu_operands[2] = fpu_result; + end + end + end + + // ------ + // FPnew + // ------ + fpnew_top #( + .PulpDivsqrt (PulpDivsqrt), + .Features (FPU_FEATURES), + .Implementation(FPU_IMPLEMENTATION), + .TagType (fpu_tag_t) + ) i_fpnew_bulk ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .operands_i (fpu_operands), + .rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rnd_mode)), + .op_i (fpnew_pkg::operation_e'(fpu_op)), + .op_mod_i (op_mode), + .src_fmt_i (fpnew_pkg::fp_format_e'(src_fmt)), + .dst_fmt_i (fpnew_pkg::fp_format_e'(dst_fmt)), + .int_fmt_i (fpnew_pkg::int_format_e'(int_fmt)), + .vectorial_op_i(vectorial_op), + .tag_i (fpu_tag_in), + .simd_mask_i ('0), + .in_valid_i (fpu_in_valid), + .in_ready_o (fpu_in_ready), + .flush_i (1'b0), + .result_o (fpu_result), + .status_o (fpu_status), + .tag_o (fpu_tag_out), + .out_valid_o (fpu_out_valid), + .out_ready_i (fpu_out_ready), + .busy_o (fpu_busy) + ); + + + // ------------------------- + // Result Interface Signals + // ------------------------- + assign x_result_o.exc = 1'b0; // no errors can occur for now + assign x_result_o.exccode = '0; // no errors can occur for now + + always_comb begin + x_result_o.data = fpu_result; + if (csr_wb & ~fpu_out_valid & csr_wb & ~fpu_out_valid) begin + x_result_o.data = 32'($unsigned(csr_wb_addr)); + end + end + + always_comb begin + x_result_o.rd = '0; + x_result_o.id = '0; + if (fpu_out_valid & x_result_valid_o & x_result_ready_i) begin + x_result_o.rd = fpu_tag_out.addr; + x_result_o.id = fpu_tag_out.id; + end else if (x_result_valid_o & x_result_ready_i & ~fpu_out_valid) begin + x_result_o.rd = csr_wb_addr; + x_result_o.id = csr_wb_id; + end + end + + always_comb begin + x_result_o.we = 1'b0; + if ((fpu_out_valid & ~fpu_tag_out.rd_is_fp) | (csr_wb)) begin + x_result_o.we = 1'b1; + end + end + + always_comb begin + x_result_o.ecswe = '0; + x_result_o.ecsdata = '0; + if (fpu_out_valid & x_result_valid_o & x_result_ready_i & fpu_tag_out.rd_is_fp) begin + x_result_o.ecswe = 3'b010; + x_result_o.ecsdata = 6'b001100; + end + end + +endmodule // fpu_ss diff --git a/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss_compressed_predecoder.sv b/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss_compressed_predecoder.sv new file mode 100644 index 000000000..b692e225d --- /dev/null +++ b/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss_compressed_predecoder.sv @@ -0,0 +1,48 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// FPU Subsystem Compressed Instruction Predecoder +// Moritz Imfeld + +module fpu_ss_compressed_predecoder + import fpu_ss_pkg::*; + ( + input fpu_ss_pkg::comp_prd_req_t prd_req_i, + output fpu_ss_pkg::comp_prd_rsp_t prd_rsp_o +); + + always_comb begin + prd_rsp_o.accept = 1'b0; + prd_rsp_o.decomp_instr = '0; + + unique casez (prd_req_i.comp_instr) + fpu_ss_instr_pkg::C_FLW: begin + prd_rsp_o.accept = 1'b1; + prd_rsp_o.decomp_instr = { 5'b0, prd_req_i.comp_instr[5], prd_req_i.comp_instr[12:10], prd_req_i.comp_instr[6], 2'b00, 2'b01, prd_req_i.comp_instr[9:7], 3'b010, 2'b01, prd_req_i.comp_instr[4:2], 7'b000_0111 }; + end + fpu_ss_instr_pkg::C_FLWSP: begin + prd_rsp_o.accept = 1'b1; + prd_rsp_o.decomp_instr = { 4'b0, prd_req_i.comp_instr[3:2], prd_req_i.comp_instr[12], prd_req_i.comp_instr[6:4], 2'b00, 5'h02, 3'b010, prd_req_i.comp_instr[11:7], 7'b000_0111 }; + end + fpu_ss_instr_pkg::C_FSW: begin + prd_rsp_o.accept = 1'b1; + prd_rsp_o.decomp_instr = { 5'b0, prd_req_i.comp_instr[5], prd_req_i.comp_instr[12], 2'b01, prd_req_i.comp_instr[4:2], 2'b01, prd_req_i.comp_instr[9:7], 3'b010, prd_req_i.comp_instr[11:10], prd_req_i.comp_instr[6], 2'b00, 7'b010_0111 }; + end + fpu_ss_instr_pkg::C_FSWSP: begin + prd_rsp_o.accept = 1'b1; + prd_rsp_o.decomp_instr = { 4'b0, prd_req_i.comp_instr[8:7], prd_req_i.comp_instr[12], prd_req_i.comp_instr[6:2], 5'h02, 3'b010, prd_req_i.comp_instr[11:9], 2'b00, 7'b010_0111 }; + end + default: begin + prd_rsp_o.accept = 1'b0; + prd_rsp_o.decomp_instr = '0; + end + endcase + end +endmodule // fpu_ss_compressed_predecoder diff --git a/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss_controller.sv b/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss_controller.sv new file mode 100644 index 000000000..40709f9aa --- /dev/null +++ b/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss_controller.sv @@ -0,0 +1,306 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// FPU Subsystem Controller +// Contributor: Moritz Imfeld + +module fpu_ss_controller + import fpu_ss_pkg::*; +#( + parameter PULP_ZFINX = 0, + parameter INPUT_BUFFER_DEPTH = 0, + parameter OUT_OF_ORDER = 1, + parameter FORWARDING = 1 +) ( + // Clock and Reset + input logic clk_i, + input logic rst_ni, + + // Predecoder + input logic in_buf_push_ready_i, + input logic [2:0] prd_rsp_use_rs_i, + + // Issue Interface + input logic [2:0] x_issue_req_rs_valid_i, + output logic x_issue_ready_o, + + // Commit Interface + input logic x_commit_valid_i, + input x_commit_t x_commit_i, + + // Input Buffer + input logic in_buf_pop_valid_i, + output logic in_buf_pop_ready_o, + + // Register + input logic rd_is_fp_i, + input logic [4:0] fpr_wb_addr_i, + input logic [4:0] rd_i, + output logic fpr_we_o, + input logic [3:0] fpu_out_id_i, + + // Dependency Check and Forwarding + input logic rd_in_is_fp_i, + input logic [4:0] rs1_i, + input logic [4:0] rs2_i, + input logic [4:0] rs3_i, + output logic [2:0] fpu_fwd_o, + output logic [2:0] lsu_fwd_o, + input fpu_ss_pkg::op_select_e [2:0] op_select_i, + output logic dep_rs_o, + output logic dep_rd_o, + input logic x_issue_ready_i, + + // Memory Instruction + input logic is_load_i, + input logic is_store_i, + + // Memory Request/Repsonse Interface + output logic x_mem_valid_o, + input logic x_mem_ready_i, + input logic [X_ID_WIDTH-1:0] x_mem_req_id_i, + output logic x_mem_req_we_o, + output logic x_mem_req_spec_o, + output logic x_mem_req_last_o, + + // Memory Buffer + output logic mem_push_valid_o, + input logic mem_push_ready_i, + output logic mem_pop_ready_o, + input fpu_ss_pkg::mem_metadata_t mem_pop_data_i, + + // Memory Result Interface + input logic x_mem_result_valid_i, + + // FPnew + input logic use_fpu_i, + output logic fpu_in_valid_o, + input logic fpu_in_ready_i, + input logic [3:0] fpu_in_id_i, + input logic fpu_out_valid_i, + output logic fpu_out_ready_o, + + // Result Interface + input logic x_result_ready_i, + output logic x_result_valid_o, + input logic csr_instr_i +); + + // dependencies and forwarding + logic [2:0] valid_operands; + logic dep_rs1; + logic dep_rs2; + logic dep_rs1_add; // seperate dependency for the addition and subtraction instruction, since they have different operand assignments (--> see FPnew documentation) + logic dep_rs2_add; // seperate dependency for the addition and subtraction instruction, since they have different operand assignments (--> see FPnew documentation) + logic dep_rs3; + + // handshakes + logic x_result_hs; + logic x_mem_req_hs; + + // status signals and scoreboards + logic instr_inflight_d; + logic instr_inflight_q; + logic instr_offloaded_d; + logic instr_offloaded_q; + logic [31:0] rd_scoreboard_d; + logic [31:0] rd_scoreboard_q; + logic [15:0] id_scoreboard_d; + logic [15:0] id_scoreboard_q; + + // --------------- + // Issue Interface + // --------------- + always_comb begin + x_issue_ready_o = 1'b0; + if (((prd_rsp_use_rs_i[0] & x_issue_req_rs_valid_i[0]) | !prd_rsp_use_rs_i[0]) + & ((prd_rsp_use_rs_i[1] & x_issue_req_rs_valid_i[1]) | !prd_rsp_use_rs_i[1]) + & ((prd_rsp_use_rs_i[2] & x_issue_req_rs_valid_i[2]) | !prd_rsp_use_rs_i[2]) + & in_buf_push_ready_i) begin + x_issue_ready_o = 1'b1; + end + end + + // ------------ + // Input Buffer + // ------------ + always_comb begin + in_buf_pop_ready_o = 1'b0; + if ((fpu_in_valid_o & fpu_in_ready_i) | (x_result_hs & csr_instr_i) | x_mem_req_hs) begin + in_buf_pop_ready_o = 1'b1; + end + end + + // ---------------- + // FP Register File + // ---------------- + always_comb begin + fpr_we_o = 1'b0; + if ((fpu_out_valid_i & fpu_out_ready_o & rd_is_fp_i) | (mem_pop_data_i.we & x_mem_result_valid_i) & ~PULP_ZFINX) begin + fpr_we_o = 1'b1; + end + end + + // ------------------------------- + // Dependency Check and Forwarding + // ------------------------------- + assign dep_rs1 = rd_scoreboard_q[rs1_i] & in_buf_pop_valid_i & (op_select_i[0] == fpu_ss_pkg::RegA); + assign dep_rs1_add = rd_scoreboard_q[rs2_i] & in_buf_pop_valid_i & (op_select_i[1] == fpu_ss_pkg::RegA); + assign dep_rs2 = rd_scoreboard_q[rs2_i] & in_buf_pop_valid_i & (op_select_i[1] == fpu_ss_pkg::RegB); + assign dep_rs2_add = rd_scoreboard_q[rs3_i] & in_buf_pop_valid_i & (op_select_i[2] == fpu_ss_pkg::RegB); + assign dep_rs3 = rd_scoreboard_q[rs3_i] & in_buf_pop_valid_i & (op_select_i[2] == fpu_ss_pkg::RegC); + assign dep_rs_o = (dep_rs1 & ~(fpu_fwd_o[0] | lsu_fwd_o[0])) + | (dep_rs1_add & ~(fpu_fwd_o[1] | lsu_fwd_o[1])) + | (dep_rs2 & ~(fpu_fwd_o[1] | lsu_fwd_o[1])) + | (dep_rs2_add & ~(fpu_fwd_o[2] | lsu_fwd_o[2])) + | (dep_rs3 & ~(fpu_fwd_o[2] | lsu_fwd_o[2])); + assign dep_rd_o = rd_scoreboard_q[rd_i] & rd_in_is_fp_i & ~(((fpu_out_valid_i & fpu_out_ready_o) | x_mem_result_valid_i) + & fpr_we_o & (fpr_wb_addr_i == rd_i)); + + always_comb begin + fpu_fwd_o[0] = 1'b0; + fpu_fwd_o[1] = 1'b0; + fpu_fwd_o[2] = 1'b0; + lsu_fwd_o[0] = 1'b0; + lsu_fwd_o[1] = 1'b0; + lsu_fwd_o[2] = 1'b0; + if (FORWARDING) begin + valid_operands[0] = op_select_i[0] == fpu_ss_pkg::RegA; + valid_operands[1] = op_select_i[1] == fpu_ss_pkg::RegA | op_select_i[1] == fpu_ss_pkg::RegB; + valid_operands[2] = op_select_i[2] == fpu_ss_pkg::RegB | op_select_i[2] == fpu_ss_pkg::RegC; + fpu_fwd_o[0] = valid_operands[0] & fpu_out_valid_i & fpu_out_ready_o & rd_is_fp_i & rs1_i == fpr_wb_addr_i; + fpu_fwd_o[1] = valid_operands[1] & fpu_out_valid_i & fpu_out_ready_o & rd_is_fp_i & rs2_i == fpr_wb_addr_i; + fpu_fwd_o[2] = valid_operands[2] & fpu_out_valid_i & fpu_out_ready_o & rd_is_fp_i & rs3_i == fpr_wb_addr_i; + lsu_fwd_o[0] = valid_operands[0] & x_mem_result_valid_i & mem_pop_data_i.we & rs1_i == mem_pop_data_i.rd; + lsu_fwd_o[1] = valid_operands[1] & x_mem_result_valid_i & mem_pop_data_i.we & rs2_i == mem_pop_data_i.rd; + lsu_fwd_o[2] = valid_operands[2] & x_mem_result_valid_i & mem_pop_data_i.we & rs3_i == mem_pop_data_i.rd; + end + end + + // ---------------------------------- + // Memory Interface and Memory Buffer + // ---------------------------------- + assign x_mem_req_hs = x_mem_valid_o & x_mem_ready_i; + assign x_mem_req_spec_o = 1'b0; // no speculative memory operations -> hardwire to 0 + + assign mem_push_valid_o = x_mem_req_hs; + assign mem_pop_ready_o = x_mem_result_valid_i; + + always_comb begin + x_mem_valid_o = 1'b0; + if ((is_load_i | is_store_i) & ~dep_rs_o & ~dep_rd_o & in_buf_pop_valid_i & mem_push_ready_i + & (x_issue_ready_i | INPUT_BUFFER_DEPTH) + & (id_scoreboard_q[x_mem_req_id_i] | (x_commit_valid_i + & (x_commit_i.id == x_mem_req_id_i) + & ~x_commit_i.commit_kill)) ) begin + x_mem_valid_o = 1'b1; + end + end + + always_comb begin + x_mem_req_we_o = 1'b0; + if (is_store_i) begin + x_mem_req_we_o = 1'b1; + end + end + + always_comb begin + x_mem_req_last_o = 1'b0; + if (x_mem_valid_o) begin + x_mem_req_last_o = 1'b1; + end + end + + // ----- + // FPnew + // ----- + assign fpu_out_ready_o = ~x_mem_result_valid_i; + always_comb begin + fpu_in_valid_o = 1'b0; + if (use_fpu_i & in_buf_pop_valid_i & (id_scoreboard_q[fpu_in_id_i] | ((x_commit_i.id == fpu_in_id_i) + & ~x_commit_i.commit_kill & x_commit_valid_i)) & ~dep_rs_o & ~dep_rd_o & (x_issue_ready_i | ~PULP_ZFINX) & OUT_OF_ORDER) begin + fpu_in_valid_o = 1'b1; + end else if (use_fpu_i & in_buf_pop_valid_i & (id_scoreboard_q[fpu_in_id_i] | ((x_commit_i.id == fpu_in_id_i) + & ~x_commit_i.commit_kill & x_commit_valid_i)) & ~dep_rs_o & ~dep_rd_o & (fpu_out_valid_i | ~instr_inflight_q) & ~OUT_OF_ORDER) begin + fpu_in_valid_o = 1'b1; + end + end + + // ---------------- + // Result Interface + // ---------------- + assign x_result_hs = x_result_ready_i & x_result_valid_o; + always_comb begin + x_result_valid_o = 1'b0; + if (fpu_out_valid_i | csr_instr_i | x_mem_result_valid_i) begin + x_result_valid_o = 1'b1; + end + end + + + // ----------------------------- + // Status Signals and Scoreboard + // ----------------------------- + always_comb begin + instr_inflight_d = instr_inflight_q; + if ((fpu_out_valid_i & fpu_out_ready_o) & ~fpu_in_valid_o) begin + instr_inflight_d = 1'b0; + end else if (fpu_in_valid_o) begin + instr_inflight_d = 1'b1; + end + end + + always_comb begin + instr_offloaded_d = instr_offloaded_q; + if (in_buf_pop_valid_i & x_mem_req_hs) begin + instr_offloaded_d = 1'b1; + end else if (x_mem_result_valid_i) begin + instr_offloaded_d = 1'b0; + end + end + + always_comb begin + rd_scoreboard_d = rd_scoreboard_q; + if ((fpu_in_valid_o & fpu_in_ready_i & rd_in_is_fp_i) | (x_mem_req_hs & is_load_i & in_buf_pop_valid_i)) begin + rd_scoreboard_d[rd_i] = 1'b1; + end + if ((fpu_out_ready_o & fpu_out_valid_i) & ~(fpu_in_valid_o & fpu_in_ready_i & fpr_wb_addr_i == rd_i)) begin + rd_scoreboard_d[fpr_wb_addr_i] = 1'b0; + end else if (x_mem_result_valid_i & mem_pop_data_i.we & ~(fpu_in_valid_o & fpu_in_ready_i & rd_in_is_fp_i + & (mem_pop_data_i.rd == rd_i))) begin + rd_scoreboard_d[mem_pop_data_i.rd] = 1'b0; + end + end + + always_comb begin + id_scoreboard_d = id_scoreboard_q; + if (x_commit_valid_i & ~x_commit_i.commit_kill) begin + id_scoreboard_d[x_commit_i.id] = 1'b1; + end + if (fpu_out_ready_o & fpu_out_valid_i) begin + id_scoreboard_d[fpu_out_id_i] = 1'b0; + end + end + + always_ff @(posedge clk_i, negedge rst_ni) begin + if (~rst_ni) begin + instr_inflight_q <= 1'b0; + instr_offloaded_q <= 1'b0; + rd_scoreboard_q <= '0; + id_scoreboard_q <= '0; + end else begin + instr_inflight_q <= instr_inflight_d; + instr_offloaded_q <= instr_offloaded_d; + rd_scoreboard_q <= rd_scoreboard_d; + id_scoreboard_q <= id_scoreboard_d; + end + end + +endmodule // fpu_ss_controller diff --git a/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss_csr.sv b/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss_csr.sv new file mode 100644 index 000000000..238c17a6d --- /dev/null +++ b/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss_csr.sv @@ -0,0 +1,119 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Floating-point CSR +// Contributor: Moritz Imfeld + +module fpu_ss_csr ( + input logic clk_i, + input logic rst_ni, + + input logic [31:0] instr_i, + input logic [31:0] csr_data_i, + input fpnew_pkg::status_t fpu_status_i, + input logic in_buf_pop_valid_i, + input logic fpu_out_valid_i, + input logic [ 3:0] csr_id_i, + output logic [31:0] csr_rdata_o, + output logic [ 2:0] frm_o, + output logic csr_wb_o, + output logic [ 4:0] csr_wb_addr_o, + output logic [ 3:0] csr_wb_id_o, + output logic csr_instr_o + +); + + logic [31:0] fcsr_d, fcsr_q, instr_q; + + assign frm_o = fcsr_q[7:5]; + assign csr_wb_addr_o = instr_q[11:7]; + + always_ff @(posedge clk_i, negedge rst_ni) begin : proc_instr_q + if(~rst_ni) begin + instr_q <= '0; + csr_wb_id_o <= '0; + end else begin + if (in_buf_pop_valid_i) begin + csr_wb_id_o <= csr_id_i; + instr_q <= instr_i; + end else begin + instr_q <= '0; + end + end + end + + always_comb begin + fcsr_d = fcsr_q; + csr_wb_o = 1'b0; + csr_rdata_o = '0; + csr_instr_o = 1'b1; + unique casez (instr_q) + fpu_ss_instr_pkg::CSRRW_FSCSR: begin // Swap value in fcsr with the one in rs1 + fcsr_d = csr_data_i; + csr_wb_o = 1'b1; + csr_rdata_o = fcsr_q; + end + fpu_ss_instr_pkg::CSRRS_FRCSR: begin // Read value from fcsr and copy to int reg + csr_wb_o = 1'b1; + csr_rdata_o = fcsr_q; + end + fpu_ss_instr_pkg::CSRRW_FSRM: begin // Swap frm value in fcsr with the one in rs1 + fcsr_d[7:5] = csr_data_i[2:0]; + csr_wb_o = 1'b1; + csr_rdata_o = {29'b0, fcsr_q[7:5]}; + end + fpu_ss_instr_pkg::CSRRS_FRRM: begin // Read frm from fcsr and copy to int reg (zeropadding at the front) + csr_wb_o = 1'b1; + csr_rdata_o = {29'b0, fcsr_q[7:5]}; + end + fpu_ss_instr_pkg::CSRRWI_FSRMI: begin // Swap frm value in fcsr with the one in the immediat instr_i [17:15] (immediat is at [19:15]) + fcsr_d[7:5] = instr_i[17:15]; + end + fpu_ss_instr_pkg::CSRRW_FSFLAGS: begin // Swap fflags value in fcsr with the one in rs1 + fcsr_d[4:0] = csr_data_i[4:0]; + csr_wb_o = 1'b1; + csr_rdata_o = {27'b0, fcsr_q[4:0]}; + end + fpu_ss_instr_pkg::CSRRS_FRFLAGS: begin // Read fflags from fcsr and copy to int reg (zeropadding at the front) + csr_wb_o = 1'b1; + csr_rdata_o = {27'b0, fcsr_q[4:0]}; + end + fpu_ss_instr_pkg::CSRRWI_FSFLAGSI: begin // Swap frm value in fcsr with the one in the immediat instr_i [19:15] (immediat is at [19:15]) + fcsr_d[4:0] = instr_i[19:15]; + end + default: begin + if (fpu_out_valid_i) begin + fcsr_d = { + fcsr_q[31:5], + fpu_status_i.NV, + fpu_status_i.DZ, + fpu_status_i.OF, + fpu_status_i.UF, + fpu_status_i.NX + }; + end else begin + fcsr_d = fcsr_q; + end + csr_wb_o = 1'b0; + csr_rdata_o = '0; + csr_instr_o = 1'b0; + end + endcase + end + + always_ff @(posedge clk_i, negedge rst_ni) begin + if (~rst_ni) begin + fcsr_q <= '0; + end else begin + fcsr_q <= fcsr_d; + end + end + +endmodule // fpu_ss_csr diff --git a/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss_decoder.sv b/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss_decoder.sv new file mode 100644 index 000000000..448723cea --- /dev/null +++ b/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss_decoder.sv @@ -0,0 +1,208 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// FPU Subsystem Decoder +// Contributor: Moritz Imfeld +// Based on: https://github.com/pulp-platform/snitch/blob/master/hw/ip/snitch_cluster/src/snitch_fp_ss.sv + +module fpu_ss_decoder #( + parameter PULP_ZFINX = 0 +) ( + input logic [31:0] instr_i, + input fpnew_pkg::roundmode_e fpu_rnd_mode_i, + output fpnew_pkg::operation_e fpu_op_o, + output fpu_ss_pkg::op_select_e [ 2:0] op_select_o, + output fpnew_pkg::roundmode_e fpu_rnd_mode_o, + output logic set_dyn_rm_o, + output fpnew_pkg::fp_format_e src_fmt_o, + output fpnew_pkg::fp_format_e dst_fmt_o, + output fpnew_pkg::int_format_e int_fmt_o, + output logic rd_is_fp_o, + output logic vectorial_op_o, + output logic op_mode_o, + output logic use_fpu_o, + output logic is_store_o, + output logic is_load_o, + output fpu_ss_pkg::ls_size_e ls_size_o +); + + logic rd_is_fp_dec; + + assign rd_is_fp_o = PULP_ZFINX ? 1'b0 : rd_is_fp_dec; + + always_comb begin + + fpu_op_o = fpnew_pkg::ADD; + use_fpu_o = 1'b1; + fpu_rnd_mode_o = (fpnew_pkg::roundmode_e'(instr_i[14:12]) == fpnew_pkg::DYN) + ? fpu_rnd_mode_i + : fpnew_pkg::roundmode_e'(instr_i[14:12]); + + set_dyn_rm_o = 1'b0; + + src_fmt_o = fpnew_pkg::FP32; + dst_fmt_o = fpnew_pkg::FP32; + int_fmt_o = fpnew_pkg::INT32; + + op_select_o[0] = fpu_ss_pkg::None; + op_select_o[1] = fpu_ss_pkg::None; + op_select_o[2] = fpu_ss_pkg::None; + + vectorial_op_o = 1'b0; + op_mode_o = 1'b0; + + is_store_o = 1'b0; + is_load_o = 1'b0; + ls_size_o = fpu_ss_pkg::Word; + + // Destination register is in FPR + rd_is_fp_dec = 1'b1; + + unique casez (instr_i) + // FP - FP Operations + // Single Precision + fpu_ss_instr_pkg::FADD_S: begin + fpu_op_o = fpnew_pkg::ADD; + op_select_o[1] = fpu_ss_pkg::RegA; + op_select_o[2] = fpu_ss_pkg::RegB; + end + fpu_ss_instr_pkg::FSUB_S: begin + fpu_op_o = fpnew_pkg::ADD; + op_select_o[1] = fpu_ss_pkg::RegA; + op_select_o[2] = fpu_ss_pkg::RegB; + op_mode_o = 1'b1; + end + fpu_ss_instr_pkg::FMUL_S: begin + fpu_op_o = fpnew_pkg::MUL; + op_select_o[0] = fpu_ss_pkg::RegA; + op_select_o[1] = fpu_ss_pkg::RegB; + end + fpu_ss_instr_pkg::FDIV_S: begin + fpu_op_o = fpnew_pkg::DIV; + op_select_o[0] = fpu_ss_pkg::RegA; + op_select_o[1] = fpu_ss_pkg::RegB; + end + fpu_ss_instr_pkg::FSGNJ_S, fpu_ss_instr_pkg::FSGNJN_S, fpu_ss_instr_pkg::FSGNJX_S: begin + fpu_op_o = fpnew_pkg::SGNJ; + op_select_o[0] = fpu_ss_pkg::RegA; + op_select_o[1] = fpu_ss_pkg::RegB; + end + fpu_ss_instr_pkg::FMIN_S, fpu_ss_instr_pkg::FMAX_S: begin + fpu_op_o = fpnew_pkg::MINMAX; + op_select_o[0] = fpu_ss_pkg::RegA; + op_select_o[1] = fpu_ss_pkg::RegB; + end + fpu_ss_instr_pkg::FSQRT_S: begin + fpu_op_o = fpnew_pkg::SQRT; + op_select_o[0] = fpu_ss_pkg::RegA; + end + fpu_ss_instr_pkg::FMADD_S: begin + fpu_op_o = fpnew_pkg::FMADD; + op_select_o[0] = fpu_ss_pkg::RegA; + op_select_o[1] = fpu_ss_pkg::RegB; + op_select_o[2] = fpu_ss_pkg::RegC; + end + fpu_ss_instr_pkg::FMSUB_S: begin + fpu_op_o = fpnew_pkg::FMADD; + op_select_o[0] = fpu_ss_pkg::RegA; + op_select_o[1] = fpu_ss_pkg::RegB; + op_select_o[2] = fpu_ss_pkg::RegC; + op_mode_o = 1'b1; + end + fpu_ss_instr_pkg::FNMSUB_S: begin + fpu_op_o = fpnew_pkg::FNMSUB; + op_select_o[0] = fpu_ss_pkg::RegA; + op_select_o[1] = fpu_ss_pkg::RegB; + op_select_o[2] = fpu_ss_pkg::RegC; + end + fpu_ss_instr_pkg::FNMADD_S: begin + fpu_op_o = fpnew_pkg::FNMSUB; + op_select_o[0] = fpu_ss_pkg::RegA; + op_select_o[1] = fpu_ss_pkg::RegB; + op_select_o[2] = fpu_ss_pkg::RegC; + op_mode_o = 1'b1; + end + // ------------------- + // From float to int + // ------------------- + // Single Precision Floating-Point + fpu_ss_instr_pkg::FLE_S, fpu_ss_instr_pkg::FLT_S, fpu_ss_instr_pkg::FEQ_S: begin + fpu_op_o = fpnew_pkg::CMP; + op_select_o[0] = fpu_ss_pkg::RegA; + op_select_o[1] = fpu_ss_pkg::RegB; + src_fmt_o = fpnew_pkg::FP32; + dst_fmt_o = fpnew_pkg::FP32; + rd_is_fp_dec = 1'b0; + end + fpu_ss_instr_pkg::FCLASS_S: begin + fpu_op_o = fpnew_pkg::CLASSIFY; + op_select_o[0] = fpu_ss_pkg::RegA; + fpu_rnd_mode_o = fpnew_pkg::RNE; + src_fmt_o = fpnew_pkg::FP32; + dst_fmt_o = fpnew_pkg::FP32; + rd_is_fp_dec = 1'b0; + end + fpu_ss_instr_pkg::FCVT_W_S, fpu_ss_instr_pkg::FCVT_WU_S: begin + fpu_op_o = fpnew_pkg::F2I; + op_select_o[0] = fpu_ss_pkg::RegA; + src_fmt_o = fpnew_pkg::FP32; + dst_fmt_o = fpnew_pkg::FP32; + rd_is_fp_dec = 1'b0; + if (instr_i inside {fpu_ss_instr_pkg::FCVT_WU_S}) op_mode_o = 1'b1; // unsigned + end + fpu_ss_instr_pkg::FMV_X_W: begin + fpu_op_o = fpnew_pkg::SGNJ; + fpu_rnd_mode_o = fpnew_pkg::RUP; // passthrough without checking nan-box + src_fmt_o = fpnew_pkg::FP32; + dst_fmt_o = fpnew_pkg::FP32; + op_mode_o = 1'b1; // sign-extend result + op_select_o[0] = fpu_ss_pkg::RegA; + rd_is_fp_dec = 1'b0; + end + // ------------------- + // From int to float + // ------------------- + // Single Precision Floating-Point + fpu_ss_instr_pkg::FMV_W_X: begin + fpu_op_o = fpnew_pkg::SGNJ; + op_select_o[0] = fpu_ss_pkg::AccBus; + fpu_rnd_mode_o = fpnew_pkg::RUP; // passthrough without checking nan-box + src_fmt_o = fpnew_pkg::FP32; + dst_fmt_o = fpnew_pkg::FP32; + end + fpu_ss_instr_pkg::FCVT_S_W, fpu_ss_instr_pkg::FCVT_S_WU: begin + fpu_op_o = fpnew_pkg::I2F; + op_select_o[0] = fpu_ss_pkg::AccBus; + dst_fmt_o = fpnew_pkg::FP32; + if (instr_i inside {fpu_ss_instr_pkg::FCVT_S_WU}) op_mode_o = 1'b1; // unsigned + end + // ------------- + // Load / Store + // ------------- + // Single Precision Floating-Point + fpu_ss_instr_pkg::FLW: begin + is_load_o = 1'b1; + use_fpu_o = 1'b0; + end + fpu_ss_instr_pkg::FSW: begin + is_store_o = 1'b1; + op_select_o[1] = fpu_ss_pkg::RegB; + use_fpu_o = 1'b0; + rd_is_fp_dec = 1'b0; + end + default: begin + use_fpu_o = 1'b0; + rd_is_fp_dec = 1'b0; + end + endcase + // fix round mode for vectors and fp16alt + if (set_dyn_rm_o) fpu_rnd_mode_o = fpu_rnd_mode_i; + end +endmodule // fpu_ss_decoder diff --git a/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss_instr_pkg.sv b/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss_instr_pkg.sv new file mode 100644 index 000000000..411ea5af9 --- /dev/null +++ b/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss_instr_pkg.sv @@ -0,0 +1,499 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// FPU Subsystem Instruction Package +// Contributor: Moritz Imfeld + +package fpu_ss_instr_pkg; + localparam logic [15:0] C_FLWSP = 16'b011???????????10; + localparam logic [15:0] C_FSWSP = 16'b111???????????10; + localparam logic [15:0] C_FLW = 16'b011???????????00; + localparam logic [15:0] C_FSW = 16'b111???????????00; + + localparam logic [31:0] CSRRW_FSCSR = 32'b000000000011?????001?????1110011; + localparam logic [31:0] CSRRS_FRCSR = 32'b000000000011?????010?????1110011; + localparam logic [31:0] CSRRW_FSRM = 32'b000000000010?????001?????1110011; + localparam logic [31:0] CSRRS_FRRM = 32'b000000000010?????010?????1110011; + localparam logic [31:0] CSRRWI_FSRMI = 32'b000000000010?????101?????1110011; + localparam logic [31:0] CSRRW_FSFLAGS = 32'b000000000001?????001?????1110011; + localparam logic [31:0] CSRRS_FRFLAGS = 32'b000000000001?????010?????1110011; + localparam logic [31:0] CSRRWI_FSFLAGSI = 32'b000000000001?????101?????1110011; + localparam logic [31:0] FADD_S = 32'b0000000??????????????????1010011; + localparam logic [31:0] FSUB_S = 32'b0000100??????????????????1010011; + localparam logic [31:0] FMUL_S = 32'b0001000??????????????????1010011; + localparam logic [31:0] FDIV_S = 32'b0001100??????????????????1010011; + localparam logic [31:0] FSGNJ_S = 32'b0010000??????????000?????1010011; + localparam logic [31:0] FSGNJN_S = 32'b0010000??????????001?????1010011; + localparam logic [31:0] FSGNJX_S = 32'b0010000??????????010?????1010011; + localparam logic [31:0] FMIN_S = 32'b0010100??????????000?????1010011; + localparam logic [31:0] FMAX_S = 32'b0010100??????????001?????1010011; + localparam logic [31:0] FSQRT_S = 32'b010110000000?????????????1010011; + localparam logic [31:0] FADD_D = 32'b0000001??????????????????1010011; + localparam logic [31:0] FSUB_D = 32'b0000101??????????????????1010011; + localparam logic [31:0] FMUL_D = 32'b0001001??????????????????1010011; + localparam logic [31:0] FDIV_D = 32'b0001101??????????????????1010011; + localparam logic [31:0] FSGNJ_D = 32'b0010001??????????000?????1010011; + localparam logic [31:0] FSGNJN_D = 32'b0010001??????????001?????1010011; + localparam logic [31:0] FSGNJX_D = 32'b0010001??????????010?????1010011; + localparam logic [31:0] FMIN_D = 32'b0010101??????????000?????1010011; + localparam logic [31:0] FMAX_D = 32'b0010101??????????001?????1010011; + localparam logic [31:0] FCVT_S_D = 32'b010000000001?????????????1010011; + localparam logic [31:0] FCVT_D_S = 32'b010000100000?????????????1010011; + localparam logic [31:0] FSQRT_D = 32'b010110100000?????????????1010011; + localparam logic [31:0] FADD_Q = 32'b0000011??????????????????1010011; + localparam logic [31:0] FSUB_Q = 32'b0000111??????????????????1010011; + localparam logic [31:0] FMUL_Q = 32'b0001011??????????????????1010011; + localparam logic [31:0] FDIV_Q = 32'b0001111??????????????????1010011; + localparam logic [31:0] FSGNJ_Q = 32'b0010011??????????000?????1010011; + localparam logic [31:0] FSGNJN_Q = 32'b0010011??????????001?????1010011; + localparam logic [31:0] FSGNJX_Q = 32'b0010011??????????010?????1010011; + localparam logic [31:0] FMIN_Q = 32'b0010111??????????000?????1010011; + localparam logic [31:0] FMAX_Q = 32'b0010111??????????001?????1010011; + localparam logic [31:0] FCVT_S_Q = 32'b010000000011?????????????1010011; + localparam logic [31:0] FCVT_Q_S = 32'b010001100000?????????????1010011; + localparam logic [31:0] FCVT_D_Q = 32'b010000100011?????????????1010011; + localparam logic [31:0] FCVT_Q_D = 32'b010001100001?????????????1010011; + localparam logic [31:0] FSQRT_Q = 32'b010111100000?????????????1010011; + localparam logic [31:0] FLE_S = 32'b1010000??????????000?????1010011; + localparam logic [31:0] FLT_S = 32'b1010000??????????001?????1010011; + localparam logic [31:0] FEQ_S = 32'b1010000??????????010?????1010011; + localparam logic [31:0] FLE_D = 32'b1010001??????????000?????1010011; + localparam logic [31:0] FLT_D = 32'b1010001??????????001?????1010011; + localparam logic [31:0] FEQ_D = 32'b1010001??????????010?????1010011; + localparam logic [31:0] FLE_Q = 32'b1010011??????????000?????1010011; + localparam logic [31:0] FLT_Q = 32'b1010011??????????001?????1010011; + localparam logic [31:0] FEQ_Q = 32'b1010011??????????010?????1010011; + localparam logic [31:0] FCVT_W_S = 32'b110000000000?????????????1010011; + localparam logic [31:0] FCVT_WU_S = 32'b110000000001?????????????1010011; + localparam logic [31:0] FCVT_L_S = 32'b110000000010?????????????1010011; + localparam logic [31:0] FCVT_LU_S = 32'b110000000011?????????????1010011; + localparam logic [31:0] FMV_X_W = 32'b111000000000?????000?????1010011; + localparam logic [31:0] FCLASS_S = 32'b111000000000?????001?????1010011; + localparam logic [31:0] FCVT_W_D = 32'b110000100000?????????????1010011; + localparam logic [31:0] FCVT_WU_D = 32'b110000100001?????????????1010011; + localparam logic [31:0] FCVT_L_D = 32'b110000100010?????????????1010011; + localparam logic [31:0] FCVT_LU_D = 32'b110000100011?????????????1010011; + localparam logic [31:0] FMV_X_D = 32'b111000100000?????000?????1010011; + localparam logic [31:0] FCLASS_D = 32'b111000100000?????001?????1010011; + localparam logic [31:0] FCVT_W_Q = 32'b110001100000?????????????1010011; + localparam logic [31:0] FCVT_WU_Q = 32'b110001100001?????????????1010011; + localparam logic [31:0] FCVT_L_Q = 32'b110001100010?????????????1010011; + localparam logic [31:0] FCVT_LU_Q = 32'b110001100011?????????????1010011; + localparam logic [31:0] FMV_X_Q = 32'b111001100000?????000?????1010011; + localparam logic [31:0] FCLASS_Q = 32'b111001100000?????001?????1010011; + localparam logic [31:0] FCVT_S_W = 32'b110100000000?????????????1010011; + localparam logic [31:0] FCVT_S_WU = 32'b110100000001?????????????1010011; + localparam logic [31:0] FCVT_S_L = 32'b110100000010?????????????1010011; + localparam logic [31:0] FCVT_S_LU = 32'b110100000011?????????????1010011; + localparam logic [31:0] FMV_W_X = 32'b111100000000?????000?????1010011; + localparam logic [31:0] FCVT_D_W = 32'b110100100000?????????????1010011; + localparam logic [31:0] FCVT_D_WU = 32'b110100100001?????????????1010011; + localparam logic [31:0] FCVT_D_L = 32'b110100100010?????????????1010011; + localparam logic [31:0] FCVT_D_LU = 32'b110100100011?????????????1010011; + localparam logic [31:0] FMV_D_X = 32'b111100100000?????000?????1010011; + localparam logic [31:0] FCVT_Q_W = 32'b110101100000?????????????1010011; + localparam logic [31:0] FCVT_Q_WU = 32'b110101100001?????????????1010011; + localparam logic [31:0] FCVT_Q_L = 32'b110101100010?????????????1010011; + localparam logic [31:0] FCVT_Q_LU = 32'b110101100011?????????????1010011; + localparam logic [31:0] FMV_Q_X = 32'b111101100000?????000?????1010011; + localparam logic [31:0] FLW = 32'b?????????????????010?????0000111; + localparam logic [31:0] FLD = 32'b?????????????????011?????0000111; + localparam logic [31:0] FLQ = 32'b?????????????????100?????0000111; + localparam logic [31:0] FSW = 32'b?????????????????010?????0100111; + localparam logic [31:0] FSD = 32'b?????????????????011?????0100111; + localparam logic [31:0] FSQ = 32'b?????????????????100?????0100111; + localparam logic [31:0] FMADD_S = 32'b?????00??????????????????1000011; + localparam logic [31:0] FMSUB_S = 32'b?????00??????????????????1000111; + localparam logic [31:0] FNMSUB_S = 32'b?????00??????????????????1001011; + localparam logic [31:0] FNMADD_S = 32'b?????00??????????????????1001111; + localparam logic [31:0] FMADD_D = 32'b?????01??????????????????1000011; + localparam logic [31:0] FMSUB_D = 32'b?????01??????????????????1000111; + localparam logic [31:0] FNMSUB_D = 32'b?????01??????????????????1001011; + localparam logic [31:0] FNMADD_D = 32'b?????01??????????????????1001111; + localparam logic [31:0] FMADD_Q = 32'b?????11??????????????????1000011; + localparam logic [31:0] FMSUB_Q = 32'b?????11??????????????????1000111; + localparam logic [31:0] FNMSUB_Q = 32'b?????11??????????????????1001011; + localparam logic [31:0] FNMADD_Q = 32'b?????11??????????????????1001111; + localparam logic [31:0] DMSRC = 32'b0000000??????????000000000101011; + localparam logic [31:0] DMDST = 32'b0000001??????????000000000101011; + localparam logic [31:0] DMCPYI = 32'b0000010??????????000?????0101011; + localparam logic [31:0] DMCPY = 32'b0000011??????????000?????0101011; + localparam logic [31:0] DMSTATI = 32'b0000100?????00000000?????0101011; + localparam logic [31:0] DMSTAT = 32'b0000101?????00000000?????0101011; + localparam logic [31:0] DMSTR = 32'b0000110??????????000000000101011; + localparam logic [31:0] DMREP = 32'b000011100000?????000000000101011; + localparam logic [31:0] FREP_O = 32'b????????????????????????10001011; + localparam logic [31:0] FREP_I = 32'b????????????????????????00001011; + localparam logic [31:0] IREP = 32'b?????????????????????????0111111; + localparam logic [31:0] SCFGRI = 32'b????????????00000001?????0101011; + localparam logic [31:0] SCFGWI = 32'b?????????????????010000000101011; + localparam logic [31:0] SCFGR = 32'b0000000?????00001001?????0101011; + localparam logic [31:0] SCFGW = 32'b0000000??????????010000010101011; + localparam logic [31:0] FLH = 32'b?????????????????001?????0000111; + localparam logic [31:0] FSH = 32'b?????????????????001?????0100111; + localparam logic [31:0] FMADD_H = 32'b?????10??????????????????1000011; + localparam logic [31:0] FMSUB_H = 32'b?????10??????????????????1000111; + localparam logic [31:0] FNMSUB_H = 32'b?????10??????????????????1001011; + localparam logic [31:0] FNMADD_H = 32'b?????10??????????????????1001111; + localparam logic [31:0] FADD_H = 32'b0000010??????????????????1010011; + localparam logic [31:0] FSUB_H = 32'b0000110??????????????????1010011; + localparam logic [31:0] FMUL_H = 32'b0001010??????????????????1010011; + localparam logic [31:0] FDIV_H = 32'b0001110??????????????????1010011; + localparam logic [31:0] FSQRT_H = 32'b010111000000?????????????1010011; + localparam logic [31:0] FSGNJ_H = 32'b0010010??????????000?????1010011; + localparam logic [31:0] FSGNJN_H = 32'b0010010??????????001?????1010011; + localparam logic [31:0] FSGNJX_H = 32'b0010010??????????010?????1010011; + localparam logic [31:0] FMIN_H = 32'b0010110??????????000?????1010011; + localparam logic [31:0] FMAX_H = 32'b0010110??????????001?????1010011; + localparam logic [31:0] FEQ_H = 32'b1010010??????????010?????1010011; + localparam logic [31:0] FLT_H = 32'b1010010??????????001?????1010011; + localparam logic [31:0] FLE_H = 32'b1010010??????????000?????1010011; + localparam logic [31:0] FCVT_W_H = 32'b110001000000?????????????1010011; + localparam logic [31:0] FCVT_WU_H = 32'b110001000001?????????????1010011; + localparam logic [31:0] FCVT_H_W = 32'b110101000000?????????????1010011; + localparam logic [31:0] FCVT_H_WU = 32'b110101000001?????????????1010011; + localparam logic [31:0] FMV_X_H = 32'b111001000000?????000?????1010011; + localparam logic [31:0] FCLASS_H = 32'b111001000000?????001?????1010011; + localparam logic [31:0] FMV_H_X = 32'b111101000000?????000?????1010011; + localparam logic [31:0] FCVT_L_H = 32'b110001000010?????????????1010011; + localparam logic [31:0] FCVT_LU_H = 32'b110001000011?????????????1010011; + localparam logic [31:0] FCVT_H_L = 32'b110101000010?????????????1010011; + localparam logic [31:0] FCVT_H_LU = 32'b110101000011?????????????1010011; + localparam logic [31:0] FCVT_S_H = 32'b010000000010?????000?????1010011; + localparam logic [31:0] FCVT_H_S = 32'b010001000000?????????????1010011; + localparam logic [31:0] FCVT_D_H = 32'b010000100010?????000?????1010011; + localparam logic [31:0] FCVT_H_D = 32'b010001000001?????????????1010011; + localparam logic [31:0] FLAH = 32'b?????????????????001?????0000111; + localparam logic [31:0] FSAH = 32'b?????????????????001?????0100111; + localparam logic [31:0] FMADD_AH = 32'b?????10??????????101?????1000011; + localparam logic [31:0] FMSUB_AH = 32'b?????10??????????101?????1000111; + localparam logic [31:0] FNMSUB_AH = 32'b?????10??????????101?????1001011; + localparam logic [31:0] FNMADD_AH = 32'b?????10??????????101?????1001111; + localparam logic [31:0] FADD_AH = 32'b0000010??????????101?????1010011; + localparam logic [31:0] FSUB_AH = 32'b0000110??????????101?????1010011; + localparam logic [31:0] FMUL_AH = 32'b0001010??????????101?????1010011; + localparam logic [31:0] FDIV_AH = 32'b0001110??????????101?????1010011; + localparam logic [31:0] FSQRT_AH = 32'b010111000000?????101?????1010011; + localparam logic [31:0] FSGNJ_AH = 32'b0010010??????????100?????1010011; + localparam logic [31:0] FSGNJN_AH = 32'b0010010??????????101?????1010011; + localparam logic [31:0] FSGNJX_AH = 32'b0010010??????????110?????1010011; + localparam logic [31:0] FMIN_AH = 32'b0010110??????????100?????1010011; + localparam logic [31:0] FMAX_AH = 32'b0010110??????????101?????1010011; + localparam logic [31:0] FEQ_AH = 32'b1010010??????????110?????1010011; + localparam logic [31:0] FLT_AH = 32'b1010010??????????101?????1010011; + localparam logic [31:0] FLE_AH = 32'b1010010??????????100?????1010011; + localparam logic [31:0] FCVT_W_AH = 32'b110001000000?????101?????1010011; + localparam logic [31:0] FCVT_WU_AH = 32'b110001000001?????101?????1010011; + localparam logic [31:0] FCVT_AH_W = 32'b110101000000?????101?????1010011; + localparam logic [31:0] FCVT_AH_WU = 32'b110101000001?????101?????1010011; + localparam logic [31:0] FMV_X_AH = 32'b111001000000?????100?????1010011; + localparam logic [31:0] FCLASS_AH = 32'b111001000000?????101?????1010011; + localparam logic [31:0] FMV_AH_X = 32'b111101000000?????100?????1010011; + localparam logic [31:0] FCVT_L_AH = 32'b110001000010?????101?????1010011; + localparam logic [31:0] FCVT_LU_AH = 32'b110001000011?????101?????1010011; + localparam logic [31:0] FCVT_AH_L = 32'b110101000010?????101?????1010011; + localparam logic [31:0] FCVT_AH_LU = 32'b110101000011?????101?????1010011; + localparam logic [31:0] FCVT_S_AH = 32'b010000000110?????000?????1010011; + localparam logic [31:0] FCVT_AH_S = 32'b010001000000?????101?????1010011; + localparam logic [31:0] FCVT_D_AH = 32'b010000100110?????000?????1010011; + localparam logic [31:0] FCVT_AH_D = 32'b010001000001?????101?????1010011; + localparam logic [31:0] FCVT_H_AH = 32'b010001000110?????????????1010011; + localparam logic [31:0] FCVT_AH_H = 32'b010001000010?????101?????1010011; + localparam logic [31:0] FLB = 32'b?????????????????000?????0000111; + localparam logic [31:0] FSB = 32'b?????????????????000?????0100111; + localparam logic [31:0] FMADD_B = 32'b?????11??????????????????1000011; + localparam logic [31:0] FMSUB_B = 32'b?????11??????????????????1000111; + localparam logic [31:0] FNMSUB_B = 32'b?????11??????????????????1001011; + localparam logic [31:0] FNMADD_B = 32'b?????11??????????????????1001111; + localparam logic [31:0] FADD_B = 32'b0000011??????????????????1010011; + localparam logic [31:0] FSUB_B = 32'b0000111??????????????????1010011; + localparam logic [31:0] FMUL_B = 32'b0001011??????????????????1010011; + localparam logic [31:0] FDIV_B = 32'b0001111??????????????????1010011; + localparam logic [31:0] FSQRT_B = 32'b010111100000?????????????1010011; + localparam logic [31:0] FSGNJ_B = 32'b0010011??????????000?????1010011; + localparam logic [31:0] FSGNJN_B = 32'b0010011??????????001?????1010011; + localparam logic [31:0] FSGNJX_B = 32'b0010011??????????010?????1010011; + localparam logic [31:0] FMIN_B = 32'b0010111??????????000?????1010011; + localparam logic [31:0] FMAX_B = 32'b0010111??????????001?????1010011; + localparam logic [31:0] FEQ_B = 32'b1010011??????????010?????1010011; + localparam logic [31:0] FLT_B = 32'b1010011??????????001?????1010011; + localparam logic [31:0] FLE_B = 32'b1010011??????????000?????1010011; + localparam logic [31:0] FCVT_W_B = 32'b110001100000?????????????1010011; + localparam logic [31:0] FCVT_WU_B = 32'b110001100001?????????????1010011; + localparam logic [31:0] FCVT_B_W = 32'b110101100000?????????????1010011; + localparam logic [31:0] FCVT_B_WU = 32'b110101100001?????????????1010011; + localparam logic [31:0] FMV_X_B = 32'b111001100000?????000?????1010011; + localparam logic [31:0] FCLASS_B = 32'b111001100000?????001?????1010011; + localparam logic [31:0] FMV_B_X = 32'b111101100000?????000?????1010011; + localparam logic [31:0] FCVT_L_B = 32'b110001100010?????????????1010011; + localparam logic [31:0] FCVT_LU_B = 32'b110001100011?????????????1010011; + localparam logic [31:0] FCVT_B_L = 32'b110101100010?????????????1010011; + localparam logic [31:0] FCVT_B_LU = 32'b110101100011?????????????1010011; + localparam logic [31:0] FCVT_S_B = 32'b010000000011?????000?????1010011; + localparam logic [31:0] FCVT_B_S = 32'b010001100000?????????????1010011; + localparam logic [31:0] FCVT_D_B = 32'b010000100011?????000?????1010011; + localparam logic [31:0] FCVT_B_D = 32'b010001100001?????????????1010011; + localparam logic [31:0] FCVT_H_B = 32'b010001000011?????000?????1010011; + localparam logic [31:0] FCVT_B_H = 32'b010001100010?????????????1010011; + localparam logic [31:0] FCVT_AH_B = 32'b010001000011?????101?????1010011; + localparam logic [31:0] FCVT_B_AH = 32'b010001100110?????????????1010011; + localparam logic [31:0] VFADD_S = 32'b1000001??????????000?????0110011; + localparam logic [31:0] VFADD_R_S = 32'b1000001??????????100?????0110011; + localparam logic [31:0] VFSUB_S = 32'b1000010??????????000?????0110011; + localparam logic [31:0] VFSUB_R_S = 32'b1000010??????????100?????0110011; + localparam logic [31:0] VFMUL_S = 32'b1000011??????????000?????0110011; + localparam logic [31:0] VFMUL_R_S = 32'b1000011??????????100?????0110011; + localparam logic [31:0] VFDIV_S = 32'b1000100??????????000?????0110011; + localparam logic [31:0] VFDIV_R_S = 32'b1000100??????????100?????0110011; + localparam logic [31:0] VFMIN_S = 32'b1000101??????????000?????0110011; + localparam logic [31:0] VFMIN_R_S = 32'b1000101??????????100?????0110011; + localparam logic [31:0] VFMAX_S = 32'b1000110??????????000?????0110011; + localparam logic [31:0] VFMAX_R_S = 32'b1000110??????????100?????0110011; + localparam logic [31:0] VFSQRT_S = 32'b100011100000?????000?????0110011; + localparam logic [31:0] VFMAC_S = 32'b1001000??????????000?????0110011; + localparam logic [31:0] VFMAC_R_S = 32'b1001000??????????100?????0110011; + localparam logic [31:0] VFMRE_S = 32'b1001001??????????000?????0110011; + localparam logic [31:0] VFMRE_R_S = 32'b1001001??????????100?????0110011; + localparam logic [31:0] VFCLASS_S = 32'b100110000001?????000?????0110011; + localparam logic [31:0] VFSGNJ_S = 32'b1001101??????????000?????0110011; + localparam logic [31:0] VFSGNJ_R_S = 32'b1001101??????????100?????0110011; + localparam logic [31:0] VFSGNJN_S = 32'b1001110??????????000?????0110011; + localparam logic [31:0] VFSGNJN_R_S = 32'b1001110??????????100?????0110011; + localparam logic [31:0] VFSGNJX_S = 32'b1001111??????????000?????0110011; + localparam logic [31:0] VFSGNJX_R_S = 32'b1001111??????????100?????0110011; + localparam logic [31:0] VFEQ_S = 32'b1010000??????????000?????0110011; + localparam logic [31:0] VFEQ_R_S = 32'b1010000??????????100?????0110011; + localparam logic [31:0] VFNE_S = 32'b1010001??????????000?????0110011; + localparam logic [31:0] VFNE_R_S = 32'b1010001??????????100?????0110011; + localparam logic [31:0] VFLT_S = 32'b1010010??????????000?????0110011; + localparam logic [31:0] VFLT_R_S = 32'b1010010??????????100?????0110011; + localparam logic [31:0] VFGE_S = 32'b1010011??????????000?????0110011; + localparam logic [31:0] VFGE_R_S = 32'b1010011??????????100?????0110011; + localparam logic [31:0] VFLE_S = 32'b1010100??????????000?????0110011; + localparam logic [31:0] VFLE_R_S = 32'b1010100??????????100?????0110011; + localparam logic [31:0] VFGT_S = 32'b1010101??????????000?????0110011; + localparam logic [31:0] VFGT_R_S = 32'b1010101??????????100?????0110011; + localparam logic [31:0] VFMV_X_S = 32'b100110000000?????000?????0110011; + localparam logic [31:0] VFMV_S_X = 32'b100110000000?????100?????0110011; + localparam logic [31:0] VFCVT_X_S = 32'b100110000010?????000?????0110011; + localparam logic [31:0] VFCVT_XU_S = 32'b100110000010?????100?????0110011; + localparam logic [31:0] VFCVT_S_X = 32'b100110000011?????000?????0110011; + localparam logic [31:0] VFCVT_S_XU = 32'b100110000011?????100?????0110011; + localparam logic [31:0] VFCPKA_S_S = 32'b1011000??????????000?????0110011; + localparam logic [31:0] VFCPKB_S_S = 32'b1011000??????????100?????0110011; + localparam logic [31:0] VFCPKC_S_S = 32'b1011001??????????000?????0110011; + localparam logic [31:0] VFCPKD_S_S = 32'b1011001??????????100?????0110011; + localparam logic [31:0] VFCPKA_S_D = 32'b1011010??????????000?????0110011; + localparam logic [31:0] VFCPKB_S_D = 32'b1011010??????????100?????0110011; + localparam logic [31:0] VFCPKC_S_D = 32'b1011011??????????000?????0110011; + localparam logic [31:0] VFCPKD_S_D = 32'b1011011??????????100?????0110011; + localparam logic [31:0] VFADD_H = 32'b1000001??????????010?????0110011; + localparam logic [31:0] VFADD_R_H = 32'b1000001??????????110?????0110011; + localparam logic [31:0] VFSUB_H = 32'b1000010??????????010?????0110011; + localparam logic [31:0] VFSUB_R_H = 32'b1000010??????????110?????0110011; + localparam logic [31:0] VFMUL_H = 32'b1000011??????????010?????0110011; + localparam logic [31:0] VFMUL_R_H = 32'b1000011??????????110?????0110011; + localparam logic [31:0] VFDIV_H = 32'b1000100??????????010?????0110011; + localparam logic [31:0] VFDIV_R_H = 32'b1000100??????????110?????0110011; + localparam logic [31:0] VFMIN_H = 32'b1000101??????????010?????0110011; + localparam logic [31:0] VFMIN_R_H = 32'b1000101??????????110?????0110011; + localparam logic [31:0] VFMAX_H = 32'b1000110??????????010?????0110011; + localparam logic [31:0] VFMAX_R_H = 32'b1000110??????????110?????0110011; + localparam logic [31:0] VFSQRT_H = 32'b100011100000?????010?????0110011; + localparam logic [31:0] VFMAC_H = 32'b1001000??????????010?????0110011; + localparam logic [31:0] VFMAC_R_H = 32'b1001000??????????110?????0110011; + localparam logic [31:0] VFMRE_H = 32'b1001001??????????010?????0110011; + localparam logic [31:0] VFMRE_R_H = 32'b1001001??????????110?????0110011; + localparam logic [31:0] VFCLASS_H = 32'b100110000001?????010?????0110011; + localparam logic [31:0] VFSGNJ_H = 32'b1001101??????????010?????0110011; + localparam logic [31:0] VFSGNJ_R_H = 32'b1001101??????????110?????0110011; + localparam logic [31:0] VFSGNJN_H = 32'b1001110??????????010?????0110011; + localparam logic [31:0] VFSGNJN_R_H = 32'b1001110??????????110?????0110011; + localparam logic [31:0] VFSGNJX_H = 32'b1001111??????????010?????0110011; + localparam logic [31:0] VFSGNJX_R_H = 32'b1001111??????????110?????0110011; + localparam logic [31:0] VFEQ_H = 32'b1010000??????????010?????0110011; + localparam logic [31:0] VFEQ_R_H = 32'b1010000??????????110?????0110011; + localparam logic [31:0] VFNE_H = 32'b1010001??????????010?????0110011; + localparam logic [31:0] VFNE_R_H = 32'b1010001??????????110?????0110011; + localparam logic [31:0] VFLT_H = 32'b1010010??????????010?????0110011; + localparam logic [31:0] VFLT_R_H = 32'b1010010??????????110?????0110011; + localparam logic [31:0] VFGE_H = 32'b1010011??????????010?????0110011; + localparam logic [31:0] VFGE_R_H = 32'b1010011??????????110?????0110011; + localparam logic [31:0] VFLE_H = 32'b1010100??????????010?????0110011; + localparam logic [31:0] VFLE_R_H = 32'b1010100??????????110?????0110011; + localparam logic [31:0] VFGT_H = 32'b1010101??????????010?????0110011; + localparam logic [31:0] VFGT_R_H = 32'b1010101??????????110?????0110011; + localparam logic [31:0] VFMV_X_H = 32'b100110000000?????010?????0110011; + localparam logic [31:0] VFMV_H_X = 32'b100110000000?????110?????0110011; + localparam logic [31:0] VFCVT_X_H = 32'b100110000010?????010?????0110011; + localparam logic [31:0] VFCVT_XU_H = 32'b100110000010?????110?????0110011; + localparam logic [31:0] VFCVT_H_X = 32'b100110000011?????010?????0110011; + localparam logic [31:0] VFCVT_H_XU = 32'b100110000011?????110?????0110011; + localparam logic [31:0] VFCPKA_H_S = 32'b1011000??????????010?????0110011; + localparam logic [31:0] VFCPKB_H_S = 32'b1011000??????????110?????0110011; + localparam logic [31:0] VFCPKC_H_S = 32'b1011001??????????010?????0110011; + localparam logic [31:0] VFCPKD_H_S = 32'b1011001??????????110?????0110011; + localparam logic [31:0] VFCPKA_H_D = 32'b1011010??????????010?????0110011; + localparam logic [31:0] VFCPKB_H_D = 32'b1011010??????????110?????0110011; + localparam logic [31:0] VFCPKC_H_D = 32'b1011011??????????010?????0110011; + localparam logic [31:0] VFCPKD_H_D = 32'b1011011??????????110?????0110011; + localparam logic [31:0] VFCVT_S_H = 32'b100110000110?????000?????0110011; + localparam logic [31:0] VFCVTU_S_H = 32'b100110000110?????100?????0110011; + localparam logic [31:0] VFCVT_H_S = 32'b100110000100?????010?????0110011; + localparam logic [31:0] VFCVTU_H_S = 32'b100110000100?????110?????0110011; + localparam logic [31:0] VFADD_AH = 32'b1000001??????????001?????0110011; + localparam logic [31:0] VFADD_R_AH = 32'b1000001??????????101?????0110011; + localparam logic [31:0] VFSUB_AH = 32'b1000010??????????001?????0110011; + localparam logic [31:0] VFSUB_R_AH = 32'b1000010??????????101?????0110011; + localparam logic [31:0] VFMUL_AH = 32'b1000011??????????001?????0110011; + localparam logic [31:0] VFMUL_R_AH = 32'b1000011??????????101?????0110011; + localparam logic [31:0] VFDIV_AH = 32'b1000100??????????001?????0110011; + localparam logic [31:0] VFDIV_R_AH = 32'b1000100??????????101?????0110011; + localparam logic [31:0] VFMIN_AH = 32'b1000101??????????001?????0110011; + localparam logic [31:0] VFMIN_R_AH = 32'b1000101??????????101?????0110011; + localparam logic [31:0] VFMAX_AH = 32'b1000110??????????001?????0110011; + localparam logic [31:0] VFMAX_R_AH = 32'b1000110??????????101?????0110011; + localparam logic [31:0] VFSQRT_AH = 32'b100011100000?????001?????0110011; + localparam logic [31:0] VFMAC_AH = 32'b1001000??????????001?????0110011; + localparam logic [31:0] VFMAC_R_AH = 32'b1001000??????????101?????0110011; + localparam logic [31:0] VFMRE_AH = 32'b1001001??????????001?????0110011; + localparam logic [31:0] VFMRE_R_AH = 32'b1001001??????????101?????0110011; + localparam logic [31:0] VFCLASS_AH = 32'b100110000001?????001?????0110011; + localparam logic [31:0] VFSGNJ_AH = 32'b1001101??????????001?????0110011; + localparam logic [31:0] VFSGNJ_R_AH = 32'b1001101??????????101?????0110011; + localparam logic [31:0] VFSGNJN_AH = 32'b1001110??????????001?????0110011; + localparam logic [31:0] VFSGNJN_R_AH = 32'b1001110??????????101?????0110011; + localparam logic [31:0] VFSGNJX_AH = 32'b1001111??????????001?????0110011; + localparam logic [31:0] VFSGNJX_R_AH = 32'b1001111??????????101?????0110011; + localparam logic [31:0] VFEQ_AH = 32'b1010000??????????001?????0110011; + localparam logic [31:0] VFEQ_R_AH = 32'b1010000??????????101?????0110011; + localparam logic [31:0] VFNE_AH = 32'b1010001??????????001?????0110011; + localparam logic [31:0] VFNE_R_AH = 32'b1010001??????????101?????0110011; + localparam logic [31:0] VFLT_AH = 32'b1010010??????????001?????0110011; + localparam logic [31:0] VFLT_R_AH = 32'b1010010??????????101?????0110011; + localparam logic [31:0] VFGE_AH = 32'b1010011??????????001?????0110011; + localparam logic [31:0] VFGE_R_AH = 32'b1010011??????????101?????0110011; + localparam logic [31:0] VFLE_AH = 32'b1010100??????????001?????0110011; + localparam logic [31:0] VFLE_R_AH = 32'b1010100??????????101?????0110011; + localparam logic [31:0] VFGT_AH = 32'b1010101??????????001?????0110011; + localparam logic [31:0] VFGT_R_AH = 32'b1010101??????????101?????0110011; + localparam logic [31:0] VFMV_X_AH = 32'b100110000000?????001?????0110011; + localparam logic [31:0] VFMV_AH_X = 32'b100110000000?????101?????0110011; + localparam logic [31:0] VFCVT_X_AH = 32'b100110000010?????001?????0110011; + localparam logic [31:0] VFCVT_XU_AH = 32'b100110000010?????101?????0110011; + localparam logic [31:0] VFCVT_AH_X = 32'b100110000011?????001?????0110011; + localparam logic [31:0] VFCVT_AH_XU = 32'b100110000011?????101?????0110011; + localparam logic [31:0] VFCPKA_AH_S = 32'b1011000??????????001?????0110011; + localparam logic [31:0] VFCPKB_AH_S = 32'b1011000??????????101?????0110011; + localparam logic [31:0] VFCPKC_AH_S = 32'b1011001??????????001?????0110011; + localparam logic [31:0] VFCPKD_AH_S = 32'b1011001??????????101?????0110011; + localparam logic [31:0] VFCPKA_AH_D = 32'b1011010??????????001?????0110011; + localparam logic [31:0] VFCPKB_AH_D = 32'b1011010??????????101?????0110011; + localparam logic [31:0] VFCPKC_AH_D = 32'b1011011??????????001?????0110011; + localparam logic [31:0] VFCPKD_AH_D = 32'b1011011??????????101?????0110011; + localparam logic [31:0] VFCVT_S_AH = 32'b100110000101?????000?????0110011; + localparam logic [31:0] VFCVTU_S_AH = 32'b100110000101?????100?????0110011; + localparam logic [31:0] VFCVT_AH_S = 32'b100110000100?????001?????0110011; + localparam logic [31:0] VFCVTU_AH_S = 32'b100110000100?????101?????0110011; + localparam logic [31:0] VFCVT_H_AH = 32'b100110000101?????010?????0110011; + localparam logic [31:0] VFCVTU_H_AH = 32'b100110000101?????110?????0110011; + localparam logic [31:0] VFCVT_AH_H = 32'b100110000110?????001?????0110011; + localparam logic [31:0] VFCVTU_AH_H = 32'b100110000110?????101?????0110011; + localparam logic [31:0] VFADD_B = 32'b1000001??????????011?????0110011; + localparam logic [31:0] VFADD_R_B = 32'b1000001??????????111?????0110011; + localparam logic [31:0] VFSUB_B = 32'b1000010??????????011?????0110011; + localparam logic [31:0] VFSUB_R_B = 32'b1000010??????????111?????0110011; + localparam logic [31:0] VFMUL_B = 32'b1000011??????????011?????0110011; + localparam logic [31:0] VFMUL_R_B = 32'b1000011??????????111?????0110011; + localparam logic [31:0] VFDIV_B = 32'b1000100??????????011?????0110011; + localparam logic [31:0] VFDIV_R_B = 32'b1000100??????????111?????0110011; + localparam logic [31:0] VFMIN_B = 32'b1000101??????????011?????0110011; + localparam logic [31:0] VFMIN_R_B = 32'b1000101??????????111?????0110011; + localparam logic [31:0] VFMAX_B = 32'b1000110??????????011?????0110011; + localparam logic [31:0] VFMAX_R_B = 32'b1000110??????????111?????0110011; + localparam logic [31:0] VFSQRT_B = 32'b100011100000?????011?????0110011; + localparam logic [31:0] VFMAC_B = 32'b1001000??????????011?????0110011; + localparam logic [31:0] VFMAC_R_B = 32'b1001000??????????111?????0110011; + localparam logic [31:0] VFMRE_B = 32'b1001001??????????011?????0110011; + localparam logic [31:0] VFMRE_R_B = 32'b1001001??????????111?????0110011; + localparam logic [31:0] VFSGNJ_B = 32'b1001101??????????011?????0110011; + localparam logic [31:0] VFSGNJ_R_B = 32'b1001101??????????111?????0110011; + localparam logic [31:0] VFSGNJN_B = 32'b1001110??????????011?????0110011; + localparam logic [31:0] VFSGNJN_R_B = 32'b1001110??????????111?????0110011; + localparam logic [31:0] VFSGNJX_B = 32'b1001111??????????011?????0110011; + localparam logic [31:0] VFSGNJX_R_B = 32'b1001111??????????111?????0110011; + localparam logic [31:0] VFEQ_B = 32'b1010000??????????011?????0110011; + localparam logic [31:0] VFEQ_R_B = 32'b1010000??????????111?????0110011; + localparam logic [31:0] VFNE_B = 32'b1010001??????????011?????0110011; + localparam logic [31:0] VFNE_R_B = 32'b1010001??????????111?????0110011; + localparam logic [31:0] VFLT_B = 32'b1010010??????????011?????0110011; + localparam logic [31:0] VFLT_R_B = 32'b1010010??????????111?????0110011; + localparam logic [31:0] VFGE_B = 32'b1010011??????????011?????0110011; + localparam logic [31:0] VFGE_R_B = 32'b1010011??????????111?????0110011; + localparam logic [31:0] VFLE_B = 32'b1010100??????????011?????0110011; + localparam logic [31:0] VFLE_R_B = 32'b1010100??????????111?????0110011; + localparam logic [31:0] VFGT_B = 32'b1010101??????????011?????0110011; + localparam logic [31:0] VFGT_R_B = 32'b1010101??????????111?????0110011; + localparam logic [31:0] VFMV_X_B = 32'b100110000000?????011?????0110011; + localparam logic [31:0] VFMV_B_X = 32'b100110000000?????111?????0110011; + localparam logic [31:0] VFCLASS_B = 32'b100110000001?????011?????0110011; + localparam logic [31:0] VFCVT_X_B = 32'b100110000010?????011?????0110011; + localparam logic [31:0] VFCVT_XU_B = 32'b100110000010?????111?????0110011; + localparam logic [31:0] VFCVT_B_X = 32'b100110000011?????011?????0110011; + localparam logic [31:0] VFCVT_B_XU = 32'b100110000011?????111?????0110011; + localparam logic [31:0] VFCPKA_B_S = 32'b1011000??????????011?????0110011; + localparam logic [31:0] VFCPKB_B_S = 32'b1011000??????????111?????0110011; + localparam logic [31:0] VFCPKC_B_S = 32'b1011001??????????011?????0110011; + localparam logic [31:0] VFCPKD_B_S = 32'b1011001??????????111?????0110011; + localparam logic [31:0] VFCPKA_B_D = 32'b1011010??????????011?????0110011; + localparam logic [31:0] VFCPKB_B_D = 32'b1011010??????????111?????0110011; + localparam logic [31:0] VFCPKC_B_D = 32'b1011011??????????011?????0110011; + localparam logic [31:0] VFCPKD_B_D = 32'b1011011??????????111?????0110011; + localparam logic [31:0] VFCVT_S_B = 32'b100110000111?????000?????0110011; + localparam logic [31:0] VFCVTU_S_B = 32'b100110000111?????100?????0110011; + localparam logic [31:0] VFCVT_B_S = 32'b100110000100?????011?????0110011; + localparam logic [31:0] VFCVTU_B_S = 32'b100110000100?????111?????0110011; + localparam logic [31:0] VFCVT_H_B = 32'b100110000111?????010?????0110011; + localparam logic [31:0] VFCVTU_H_B = 32'b100110000111?????110?????0110011; + localparam logic [31:0] VFCVT_B_H = 32'b100110000110?????011?????0110011; + localparam logic [31:0] VFCVTU_B_H = 32'b100110000110?????111?????0110011; + localparam logic [31:0] VFCVT_AH_B = 32'b100110000111?????001?????0110011; + localparam logic [31:0] VFCVTU_AH_B = 32'b100110000111?????101?????0110011; + localparam logic [31:0] VFCVT_B_AH = 32'b100110000101?????011?????0110011; + localparam logic [31:0] VFCVTU_B_AH = 32'b100110000101?????111?????0110011; + localparam logic [31:0] VFDOTP_S = 32'b1001010??????????000?????0110011; + localparam logic [31:0] VFDOTP_R_S = 32'b1001010??????????100?????0110011; + localparam logic [31:0] VFAVG_S = 32'b1010110??????????000?????0110011; + localparam logic [31:0] VFAVG_R_S = 32'b1010110??????????100?????0110011; + localparam logic [31:0] FMULEX_S_H = 32'b0100110??????????????????1010011; + localparam logic [31:0] FMACEX_S_H = 32'b0101010??????????????????1010011; + localparam logic [31:0] VFDOTP_H = 32'b1001010??????????010?????0110011; + localparam logic [31:0] VFDOTP_R_H = 32'b1001010??????????110?????0110011; + localparam logic [31:0] VFDOTPEX_S_H = 32'b1001011??????????010?????0110011; + localparam logic [31:0] VFDOTPEX_S_R_H = 32'b1001011??????????110?????0110011; + localparam logic [31:0] VFAVG_H = 32'b1010110??????????010?????0110011; + localparam logic [31:0] VFAVG_R_H = 32'b1010110??????????110?????0110011; + localparam logic [31:0] FMULEX_S_AH = 32'b0100110??????????101?????1010011; + localparam logic [31:0] FMACEX_S_AH = 32'b0101010??????????101?????1010011; + localparam logic [31:0] VFDOTP_AH = 32'b1001010??????????001?????0110011; + localparam logic [31:0] VFDOTP_R_AH = 32'b1001010??????????101?????0110011; + localparam logic [31:0] VFDOTPEX_S_AH = 32'b1001011??????????001?????0110011; + localparam logic [31:0] VFDOTPEX_S_R_AH = 32'b1001011??????????101?????0110011; + localparam logic [31:0] VFAVG_AH = 32'b1010110??????????001?????0110011; + localparam logic [31:0] VFAVG_R_AH = 32'b1010110??????????101?????0110011; + localparam logic [31:0] FMULEX_S_B = 32'b0100111??????????????????1010011; + localparam logic [31:0] FMACEX_S_B = 32'b0101011??????????????????1010011; + localparam logic [31:0] VFDOTP_B = 32'b1001010??????????011?????0110011; + localparam logic [31:0] VFDOTP_R_B = 32'b1001010??????????111?????0110011; + localparam logic [31:0] VFDOTPEX_S_B = 32'b1001011??????????011?????0110011; + localparam logic [31:0] VFDOTPEX_S_R_B = 32'b1001011??????????111?????0110011; + localparam logic [31:0] VFAVG_B = 32'b1010110??????????011?????0110011; + localparam logic [31:0] VFAVG_R_B = 32'b1010110??????????111?????0110011; +endpackage // fpu_ss_instr_pkg diff --git a/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss_pkg.sv b/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss_pkg.sv new file mode 100644 index 000000000..c292214ca --- /dev/null +++ b/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss_pkg.sv @@ -0,0 +1,240 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// FPU Subsystem Package +// Contributor: Moritz Imfeld + +package fpu_ss_pkg; + + // ------------------------------ + // cv-x-if structs and parameters + // ------------------------------ + + parameter int X_NUM_RS = 3; + parameter int X_ID_WIDTH = 4; + parameter int X_MEM_WIDTH = 32; + parameter int X_RFR_WIDTH = 32; + parameter int X_RFW_WIDTH = 32; + parameter logic [31:0] X_MISA = '0; + parameter logic [ 1:0] X_ECS_XS = '0; + + localparam int XLEN = 32; + localparam int FLEN = 32; + + typedef struct packed { + logic [ 15:0] instr; // Offloaded compressed instruction + logic [ 1:0] mode; // Privilege level + logic [X_ID_WIDTH-1:0] id; // Identification number of the offloaded compressed instruction + } x_compressed_req_t; + + typedef struct packed { + logic [31:0] instr; // Uncompressed instruction + logic accept; // Is the offloaded compressed instruction (id) accepted by the coprocessor? + } x_compressed_resp_t; + + typedef struct packed { + logic [ 31:0] instr; // Offloaded instruction + logic [ 1:0] mode; // Privilege level + logic [X_ID_WIDTH-1:0] id; // Identification of the offloaded instruction + logic [X_NUM_RS -1:0][X_RFR_WIDTH-1:0] rs; // Register file source operands for the offloaded instruction + logic [X_NUM_RS -1:0] rs_valid; // Validity of the register file source operand(s) + logic [ 5:0] ecs; // Extension Context Status ({mstatus.xs, mstatus.fs, mstatus.vs}) + logic ecs_valid; // Validity of the Extension Context Status + } x_issue_req_t; + + typedef struct packed { + logic accept; // Is the offloaded instruction (id) accepted by the coprocessor? + logic writeback; // Will the coprocessor perform a writeback in the core to rd? + logic dualwrite; // Will the coprocessor perform a dual writeback in the core to rd and rd+1? + logic [2:0] dualread; // Will the coprocessor require dual reads from rs1\rs2\rs3 and rs1+1\rs2+1\rs3+1? + logic loadstore; // Is the offloaded instruction a load/store instruction? + logic ecswrite ; // Will the coprocessor write the Extension Context Status in mstatus? + logic exc; // Can the offloaded instruction possibly cause a synchronous exception in the coprocessor itself? + } x_issue_resp_t; + + typedef struct packed { + logic [X_ID_WIDTH-1:0] id; // Identification of the offloaded instruction + logic commit_kill; // Shall an offloaded instruction be killed? + } x_commit_t; + + typedef struct packed { + logic [X_ID_WIDTH -1:0] id; // Identification of the offloaded instruction + logic [ 31:0] addr; // Virtual address of the memory transaction + logic [ 1:0] mode; // Privilege level + logic we; // Write enable of the memory transaction + logic [ 2:0] size; // Size of the memory transaction + logic [X_MEM_WIDTH/8-1:0] be; // Byte enables for memory transaction + logic [ 1:0] attr; // Memory transaction attributes + logic [X_MEM_WIDTH -1:0] wdata; // Write data of a store memory transaction + logic last; // Is this the last memory transaction for the offloaded instruction? + logic spec; // Is the memory transaction speculative? + } x_mem_req_t; + + typedef struct packed { + logic exc; // Did the memory request cause a synchronous exception? + logic [5:0] exccode; // Exception code + logic dbg; // Did the memory request cause a debug trigger match with ``mcontrol.timing`` = 0? + } x_mem_resp_t; + + typedef struct packed { + logic [X_ID_WIDTH -1:0] id; // Identification of the offloaded instruction + logic [X_MEM_WIDTH-1:0] rdata; // Read data of a read memory transaction + logic err; // Did the instruction cause a bus error? + logic dbg; // Did the read data cause a debug trigger match with ``mcontrol.timing`` = 0? + } x_mem_result_t; + + typedef struct packed { + logic [X_ID_WIDTH -1:0] id; // Identification of the offloaded instruction + logic [X_RFW_WIDTH -1:0] data; // Register file write data value(s) + logic [ 4:0] rd; // Register file destination address(es) + logic [X_RFW_WIDTH/XLEN-1:0] we; // Register file write enable(s) + logic [ 5:0] ecsdata; // Write data value for {mstatus.xs, mstatus.fs, mstatus.vs} + logic [ 2:0] ecswe; // Write enables for {mstatus.xs, mstatus.fs, mstatus.vs} + logic exc; // Did the instruction cause a synchronous exception? + logic [ 5:0] exccode; // Exception code + logic err; // Did the instruction cause a bus error? + logic dbg; // Did the instruction cause a debug trigger match with ``mcontrol.timing`` = 0? + } x_result_t; + + // -------------- + // FPU_SS structs + // -------------- + + // Compressed predecoder request type + typedef struct packed { + logic [15:0] comp_instr; + } comp_prd_req_t; + + // Compressed predecoder response type + typedef struct packed { + logic accept; + logic [31:0] decomp_instr; + } comp_prd_rsp_t; + + // Predecoder request type + typedef struct packed { + logic [31:0] q_instr_data; + } acc_prd_req_t; + + // Predecoder response type + typedef struct packed { + logic p_accept; + logic p_is_mem_op; + logic p_writeback; + logic [2:0] p_use_rs; + } acc_prd_rsp_t; + + // Predecoder internal instruction metadata + typedef struct packed { + logic [31:0] instr_data; + logic [31:0] instr_mask; + acc_prd_rsp_t prd_rsp; + } offload_instr_t; + + typedef enum logic [2:0] { + None, + AccBus, + RegA, + RegB, + RegC, + RegBRep, // Replication for vectors + RegDest + } op_select_e; + + typedef enum logic [1:0] { + Byte = 2'b00, + HalfWord = 2'b01, + Word = 2'b10, + DoubleWord = 2'b11 + } ls_size_e; + + typedef struct packed { + logic [2:0][31:0] rs; + logic [31:0] instr_data; + logic [3:0] id; + logic [1:0] mode; + } offloaded_data_t; + + typedef struct packed { + logic [ 3:0] id; + logic [ 4:0] rd; + logic we; + } mem_metadata_t; + + typedef struct packed { + logic [ 4:0] addr; + logic rd_is_fp; + logic [ 3:0] id; + } fpu_tag_t; + + // ----------- + // FPU Config + // ----------- + + // Floating-point extensions configuration + parameter bit C_RVF = 1'b1; // Is F extension enabled + parameter bit C_RVD = 1'b0; // Is D extension enabled - NOT SUPPORTED CURRENTLY + + // Transprecision floating-point extensions configuration + parameter bit C_XF16 = 1'b0; // Is half-precision float extension (Xf16) enabled + parameter bit C_XF16ALT = 1'b0; // Is alternative half-precision float extension (Xf16alt) enabled + parameter bit C_XF8 = 1'b0; // Is quarter-precision float extension (Xf8) enabled + parameter bit C_XFVEC = 1'b0; // Is vectorial float extension (Xfvec) enabled + + // Latency of FP operations: 0 = no pipe registers, 1 = 1 pipe register etc. + parameter int unsigned C_LAT_FP64 = 'd1; // set to 1 to mimic cv32e40p core internal + parameter int unsigned C_LAT_FP32 = 'd1; // set to 1 to mimic cv32e40p core internal + parameter int unsigned C_LAT_FP16 = 'd1; // set to 1 to mimic cv32e40p core internal + parameter int unsigned C_LAT_FP16ALT = 'd1; // set to 1 to mimic cv32e40p core internal + parameter int unsigned C_LAT_FP8 = 'd1; // set to 1 to break critical path + parameter int unsigned C_LAT_DIVSQRT = 'd1; // divsqrt post-processing pipe + parameter int unsigned C_LAT_CONV = 'd1; // set to 1 to mimic cv32e40p core internal + parameter int unsigned C_LAT_NONCOMP = 'd1; // set to 1 to mimic cv32e40p core internal + + // General FPU-specific defines + + // Length of widest floating-point format = width of fp regfile + parameter C_FLEN = C_RVD ? 64 : // D ext. + C_RVF ? 32 : // F ext. + C_XF16 ? 16 : // Xf16 ext. + C_XF16ALT ? 16 : // Xf16alt ext. + C_XF8 ? 8 : // Xf8 ext. + 0; // Unused in case of no FP + + // Features (enabled formats, vectors etc.) + parameter fpnew_pkg::fpu_features_t FPU_FEATURES = '{ + Width: fpu_ss_pkg::C_FLEN, + EnableVectors: fpu_ss_pkg::C_XFVEC, + EnableNanBox: 1'b0, + FpFmtMask: { + fpu_ss_pkg::C_RVF, fpu_ss_pkg::C_RVD, fpu_ss_pkg::C_XF16, fpu_ss_pkg::C_XF8, fpu_ss_pkg::C_XF16ALT + }, IntFmtMask: { + fpu_ss_pkg::C_XFVEC && fpu_ss_pkg::C_XF8, fpu_ss_pkg::C_XFVEC && (fpu_ss_pkg::C_XF16 || fpu_ss_pkg::C_XF16ALT), 1'b1, 1'b0 + }}; + + // Implementation (number of registers etc) + parameter fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = '{ + PipeRegs: '{// FP32, FP64, FP16, FP8, FP16alt + '{ + fpu_ss_pkg::C_LAT_FP32, fpu_ss_pkg::C_LAT_FP64, fpu_ss_pkg::C_LAT_FP16, fpu_ss_pkg::C_LAT_FP8, fpu_ss_pkg::C_LAT_FP16ALT + }, // ADDMUL + '{default: fpu_ss_pkg::C_LAT_DIVSQRT}, // DIVSQRT + '{default: fpu_ss_pkg::C_LAT_NONCOMP}, // NONCOMP + '{default: fpu_ss_pkg::C_LAT_CONV} + }, // CONV + UnitTypes: '{ + '{default: fpnew_pkg::MERGED}, // ADDMUL + '{default: fpnew_pkg::MERGED}, // DIVSQRT + '{default: fpnew_pkg::PARALLEL}, // NONCOMP + '{default: fpnew_pkg::MERGED} + }, // CONV + PipeConfig: fpnew_pkg::BEFORE}; + +endpackage // fpu_ss_pkg diff --git a/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss_prd_f_pkg.sv b/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss_prd_f_pkg.sv new file mode 100644 index 000000000..9bdf5546b --- /dev/null +++ b/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss_prd_f_pkg.sv @@ -0,0 +1,361 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// FPU Predecoder Package +// Contributor: Moritz Imfeld + +package fpu_ss_prd_f_pkg; + +// parameter int unsigned NumInstr="NUMBER OF FP INSTR"; +parameter int unsigned NumInstr=34; +parameter fpu_ss_pkg::offload_instr_t OffloadInstr[NumInstr] = '{ + '{ + instr_data: 32'b 000000000000_00000_010_00000_0000111, // FLW + instr_mask: 32'b 000000000000_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b0, + p_is_mem_op : 1'b1, + p_use_rs : 3'b001 + } + }, + '{ + instr_data: 32'b 0000000_00000_00000_010_00000_0100111, // FSW + instr_mask: 32'b 0000000_00000_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b0, + p_is_mem_op : 1'b1, + p_use_rs : 3'b001 + } + }, + '{ + instr_data: 32'b 00000_00_00000_00000_000_00000_1000011, // FMADD.S + instr_mask: 32'b 00000_11_00000_00000_000_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b0, + p_is_mem_op : 1'b0, + p_use_rs : 3'b000 + } + }, + '{ + instr_data: 32'b 00000_00_00000_00000_000_00000_1000111, // FMSUB.S + instr_mask: 32'b 00000_11_00000_00000_000_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b0, + p_is_mem_op : 1'b0, + p_use_rs : 3'b000 + } + }, + '{ + instr_data: 32'b 00000_00_00000_00000_000_00000_1001011, // FNMSUB.S + instr_mask: 32'b 00000_11_00000_00000_000_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b0, + p_is_mem_op : 1'b0, + p_use_rs : 3'b000 + } + }, + '{ + instr_data: 32'b 00000_00_00000_00000_000_00000_1001111, // FNMADD.S + instr_mask: 32'b 00000_11_00000_00000_000_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b0, + p_is_mem_op : 1'b0, + p_use_rs : 3'b000 + } + }, + '{ + instr_data: 32'b 0000000_00000_00000_000_00000_1010011, // FADD.S + instr_mask: 32'b 1111111_00000_00000_000_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b0, + p_is_mem_op : 1'b0, + p_use_rs : 3'b000 + } + }, + '{ + instr_data: 32'b 0000100_00000_00000_000_00000_1010011, // FSUB.S + instr_mask: 32'b 1111111_00000_00000_000_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b0, + p_is_mem_op : 1'b0, + p_use_rs : 3'b000 + } + }, + '{ + instr_data: 32'b 0001000_00000_00000_000_00000_1010011, // FMUL.S + instr_mask: 32'b 1111111_00000_00000_000_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b0, + p_is_mem_op : 1'b0, + p_use_rs : 3'b000 + } + }, + '{ + instr_data: 32'b 0001100_00000_00000_000_00000_1010011, // FDIV.S + instr_mask: 32'b 1111111_00000_00000_000_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b0, + p_is_mem_op : 1'b0, + p_use_rs : 3'b000 + } + }, + '{ + instr_data: 32'b 0101100_00000_00000_000_00000_1010011, // FSQRT.S + instr_mask: 32'b 1111111_11111_00000_000_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b0, + p_is_mem_op : 1'b0, + p_use_rs : 3'b000 + } + }, + '{ + instr_data: 32'b 0010000_00000_00000_000_00000_1010011, // FSGNJ.S + instr_mask: 32'b 1111111_00000_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b0, + p_is_mem_op : 1'b0, + p_use_rs : 3'b000 + } + }, + '{ + instr_data: 32'b 0010000_00000_00000_001_00000_1010011, // FSGNJN.S + instr_mask: 32'b 1111111_00000_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b0, + p_is_mem_op : 1'b0, + p_use_rs : 3'b000 + } + }, + '{ + instr_data: 32'b 0010000_00000_00000_010_00000_1010011, // FSGNJX.S + instr_mask: 32'b 1111111_00000_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b0, + p_is_mem_op : 1'b0, + p_use_rs : 3'b000 + } + }, + '{ + instr_data: 32'b 0010100_00000_00000_000_00000_1010011, // FMIN.S + instr_mask: 32'b 1111111_00000_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b0, + p_is_mem_op : 1'b0, + p_use_rs : 3'b000 + } + }, + '{ + instr_data: 32'b 0010100_00000_00000_001_00000_1010011, // FMAX.S + instr_mask: 32'b 1111111_00000_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b0, + p_is_mem_op : 1'b0, + p_use_rs : 3'b000 + } + }, + '{ + instr_data: 32'b 1100000_00000_00000_000_00000_1010011, // FCVT.W.S + instr_mask: 32'b 1111111_11111_00000_000_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b000 + } + }, + '{ + instr_data: 32'b 1100000_00001_00000_000_00000_1010011, // FCVT.WU.S + instr_mask: 32'b 1111111_11111_00000_000_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b000 + } + }, + '{ + instr_data: 32'b 1110000_00000_00000_000_00000_1010011, // FMV.X.W + instr_mask: 32'b 1111111_11111_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b000 + } + }, + '{ + instr_data: 32'b 1010000_00000_00000_010_00000_1010011, // FEQ.S + instr_mask: 32'b 1111111_00000_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b000 + } + }, + '{ + instr_data: 32'b 1010000_00000_00000_001_00000_1010011, // FLT.S + instr_mask: 32'b 1111111_00000_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b000 + } + }, + '{ + instr_data: 32'b 1010000_00000_00000_000_00000_1010011, // FLE.S + instr_mask: 32'b 1111111_00000_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b000 + } + }, + '{ + instr_data: 32'b 1110000_00000_00000_001_00000_1010011, // FCLASS.S + instr_mask: 32'b 1111111_11111_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b000 + } + }, + '{ + instr_data: 32'b 1101000_00000_00000_000_00000_1010011, // FCVT.S.W + instr_mask: 32'b 1111111_11111_00000_000_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b0, + p_is_mem_op : 1'b0, + p_use_rs : 3'b001 + } + }, + '{ + instr_data: 32'b 1101000_00001_00000_000_00000_1010011, // FCVT.S.WU + instr_mask: 32'b 1111111_11111_00000_000_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b0, + p_is_mem_op : 1'b0, + p_use_rs : 3'b001 + } + }, + '{ + instr_data: 32'b 1111000_00000_00000_000_00000_1010011, // FMV.W.X + instr_mask: 32'b 1111111_11111_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b0, + p_is_mem_op : 1'b0, + p_use_rs : 3'b001 + } + }, + '{ + instr_data: 32'b 000000000011_00000_001_00000_1110011, // CSRRW (fscsr) NOTE: two instructions (swap and write) + instr_mask: 32'b 111111111111_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b001 + } + }, + '{ + instr_data: 32'b 000000000011_00000_010_00000_1110011, // CSRRS (frcsr) + instr_mask: 32'b 111111111111_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b001 + } + }, + '{ + instr_data: 32'b 000000000010_00000_001_00000_1110011, // CSRRW (fsrm) NOTE: two instructions (swap and write) + instr_mask: 32'b 111111111111_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b001 + } + }, + '{ + instr_data: 32'b 000000000010_00000_010_00000_1110011, // CSRRS (frrm) + instr_mask: 32'b 111111111111_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b001 + } + }, + '{ + instr_data: 32'b 000000000010_00000_101_00000_1110011, // CSRRWI (fsrmi) NOTE: two instructions (swap and write) + instr_mask: 32'b 111111111111_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b000 + } + }, + '{ + instr_data: 32'b 000000000001_00000_001_00000_1110011, // CSRRW (fsflags) NOTE: two instructions (swap and write) + instr_mask: 32'b 111111111111_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b001 + } + }, + '{ + instr_data: 32'b 000000000001_00000_010_00000_1110011, // CSRRS (frflags) + instr_mask: 32'b 111111111111_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b001 + } + }, + '{ + instr_data: 32'b 000000000001_00000_101_00000_1110011, // CSRRWI (fsflagsi) NOTE: two instructions (swap and write) + instr_mask: 32'b 111111111111_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b000 + } + } +}; + +endpackage // fpu_ss_prd_f_pkg diff --git a/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss_prd_zfinx_pkg.sv b/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss_prd_zfinx_pkg.sv new file mode 100644 index 000000000..b3cdd307f --- /dev/null +++ b/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss_prd_zfinx_pkg.sv @@ -0,0 +1,321 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// FPU Predecoder Package +// Contributor: Moritz Imfeld + +package fpu_ss_prd_zfinx_pkg; + +// parameter int unsigned NumInstr="NUMBER OF FP INSTR"; +parameter int unsigned NumInstr=30; +parameter fpu_ss_pkg::offload_instr_t OffloadInstr[NumInstr] = '{ + '{ + instr_data: 32'b 00000_00_00000_00000_000_00000_1000011, // FMADD.S ok + instr_mask: 32'b 00000_11_00000_00000_000_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b111 + } + }, + '{ + instr_data: 32'b 00000_00_00000_00000_000_00000_1000111, // FMSUB.S ok + instr_mask: 32'b 00000_11_00000_00000_000_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b111 + } + }, + '{ + instr_data: 32'b 00000_00_00000_00000_000_00000_1001011, // FNMSUB.S ok + instr_mask: 32'b 00000_11_00000_00000_000_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b111 + } + }, + '{ + instr_data: 32'b 00000_00_00000_00000_000_00000_1001111, // FNMADD.S ok + instr_mask: 32'b 00000_11_00000_00000_000_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b111 + } + }, + '{ + instr_data: 32'b 0000000_00000_00000_000_00000_1010011, // FADD.S ok + instr_mask: 32'b 1111111_00000_00000_000_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b011 + } + }, + '{ + instr_data: 32'b 0000100_00000_00000_000_00000_1010011, // FSUB.S ok + instr_mask: 32'b 1111111_00000_00000_000_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b011 + } + }, + '{ + instr_data: 32'b 0001000_00000_00000_000_00000_1010011, // FMUL.S ok + instr_mask: 32'b 1111111_00000_00000_000_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b011 + } + }, + '{ + instr_data: 32'b 0001100_00000_00000_000_00000_1010011, // FDIV.S ok + instr_mask: 32'b 1111111_00000_00000_000_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b011 + } + }, + '{ + instr_data: 32'b 0101100_00000_00000_000_00000_1010011, // FSQRT.S ok + instr_mask: 32'b 1111111_11111_00000_000_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b001 + } + }, + '{ + instr_data: 32'b 0010000_00000_00000_000_00000_1010011, // FSGNJ.S ok + instr_mask: 32'b 1111111_00000_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b011 + } + }, + '{ + instr_data: 32'b 0010000_00000_00000_001_00000_1010011, // FSGNJN.S ok + instr_mask: 32'b 1111111_00000_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b011 + } + }, + '{ + instr_data: 32'b 0010000_00000_00000_010_00000_1010011, // FSGNJX.S ok + instr_mask: 32'b 1111111_00000_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b011 + } + }, + '{ + instr_data: 32'b 0010100_00000_00000_000_00000_1010011, // FMIN.S + instr_mask: 32'b 1111111_00000_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b011 + } + }, + '{ + instr_data: 32'b 0010100_00000_00000_001_00000_1010011, // FMAX.S + instr_mask: 32'b 1111111_00000_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b011 + } + }, + '{ + instr_data: 32'b 1100000_00000_00000_000_00000_1010011, // FCVT.W.S + instr_mask: 32'b 1111111_11111_00000_000_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b001 + } + }, + '{ + instr_data: 32'b 1100000_00001_00000_000_00000_1010011, // FCVT.WU.S + instr_mask: 32'b 1111111_11111_00000_000_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b001 + } + }, + '{ + instr_data: 32'b 1010000_00000_00000_010_00000_1010011, // FEQ.S + instr_mask: 32'b 1111111_00000_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b011 + } + }, + '{ + instr_data: 32'b 1010000_00000_00000_001_00000_1010011, // FLT.S + instr_mask: 32'b 1111111_00000_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b011 + } + }, + '{ + instr_data: 32'b 1010000_00000_00000_000_00000_1010011, // FLE.S + instr_mask: 32'b 1111111_00000_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b011 + } + }, + '{ + instr_data: 32'b 1110000_00000_00000_001_00000_1010011, // FCLASS.S + instr_mask: 32'b 1111111_11111_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b001 + } + }, + '{ + instr_data: 32'b 1101000_00000_00000_000_00000_1010011, // FCVT.S.W + instr_mask: 32'b 1111111_11111_00000_000_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b001 + } + }, + '{ + instr_data: 32'b 1101000_00001_00000_000_00000_1010011, // FCVT.S.WU + instr_mask: 32'b 1111111_11111_00000_000_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b001 + } + }, + '{ + instr_data: 32'b 000000000011_00000_001_00000_1110011, // CSRRW (fscsr) NOTE: two instructions (swap and write) + instr_mask: 32'b 111111111111_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b001 + } + }, + '{ + instr_data: 32'b 000000000011_00000_010_00000_1110011, // CSRRS (frcsr) + instr_mask: 32'b 111111111111_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b001 + } + }, + '{ + instr_data: 32'b 000000000010_00000_001_00000_1110011, // CSRRW (fsrm) NOTE: two instructions (swap and write) + instr_mask: 32'b 111111111111_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b001 + } + }, + '{ + instr_data: 32'b 000000000010_00000_010_00000_1110011, // CSRRS (frrm) + instr_mask: 32'b 111111111111_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b001 + } + }, + '{ + instr_data: 32'b 000000000010_00000_101_00000_1110011, // CSRRWI (fsrmi) NOTE: two instructions (swap and write) + instr_mask: 32'b 111111111111_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b000 + } + }, + '{ + instr_data: 32'b 000000000001_00000_001_00000_1110011, // CSRRW (fsflags) NOTE: two instructions (swap and write) + instr_mask: 32'b 111111111111_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b001 + } + }, + '{ + instr_data: 32'b 000000000001_00000_010_00000_1110011, // CSRRS (frflags) + instr_mask: 32'b 111111111111_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b001 + } + }, + '{ + instr_data: 32'b 000000000001_00000_101_00000_1110011, // CSRRWI (fsflagsi) NOTE: two instructions (swap and write) + instr_mask: 32'b 111111111111_00000_111_00000_1111111, + prd_rsp : '{ + p_accept : 1'b1, + p_writeback : 1'b1, + p_is_mem_op : 1'b0, + p_use_rs : 3'b000 + } + } +}; + +endpackage // fpu_ss_prd_zfinx_pkg diff --git a/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss_predecoder.sv b/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss_predecoder.sv new file mode 100644 index 000000000..58f5f66d7 --- /dev/null +++ b/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss_predecoder.sv @@ -0,0 +1,54 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// FPU Subsystem Predecoder +// Contributor: Noam Gallmann +// Moritz Imfeld + + +module fpu_ss_predecoder #( + parameter int NumInstr = 1, + parameter fpu_ss_pkg::offload_instr_t OffloadInstr[NumInstr] = {0} +) ( + input fpu_ss_pkg::acc_prd_req_t prd_req_i, + output fpu_ss_pkg::acc_prd_rsp_t prd_rsp_o +); + + import fpu_ss_pkg::*; + + acc_prd_rsp_t [NumInstr-1:0] instr_rsp; + logic [NumInstr-1:0] instr_sel; + + for (genvar i = 0; i < NumInstr; i++) begin : gen_predecoder_selector + assign instr_sel[i] = + ((OffloadInstr[i].instr_mask & prd_req_i.q_instr_data) == OffloadInstr[i].instr_data); + end + + for (genvar i = 0; i < NumInstr; i++) begin : gen_predecoder_mux + assign instr_rsp[i].p_accept = instr_sel[i] ? OffloadInstr[i].prd_rsp.p_accept : 1'b0; + assign instr_rsp[i].p_writeback = instr_sel[i] ? OffloadInstr[i].prd_rsp.p_writeback : 1'b0; + assign instr_rsp[i].p_is_mem_op = instr_sel[i] ? OffloadInstr[i].prd_rsp.p_is_mem_op : '0; + assign instr_rsp[i].p_use_rs = instr_sel[i] ? OffloadInstr[i].prd_rsp.p_use_rs : '0; + end + + always_comb begin + prd_rsp_o.p_accept = 1'b0; + prd_rsp_o.p_writeback = 1'b0; + prd_rsp_o.p_is_mem_op = '0; + prd_rsp_o.p_use_rs = '0; + for (int unsigned i = 0; i < NumInstr; i++) begin + prd_rsp_o.p_accept |= instr_rsp[i].p_accept; + prd_rsp_o.p_writeback |= instr_rsp[i].p_writeback; + prd_rsp_o.p_is_mem_op |= instr_rsp[i].p_is_mem_op; + prd_rsp_o.p_use_rs |= instr_rsp[i].p_use_rs; + end + end + +endmodule // fpu_ss_predecoder diff --git a/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss_regfile.sv b/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss_regfile.sv new file mode 100644 index 000000000..630cba6dc --- /dev/null +++ b/hw/vendor/pulp_platform_fpu_ss/src/fpu_ss_regfile.sv @@ -0,0 +1,56 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// FPU Subsystem Register File +// Contributor: Moritz Imfeld +// Based on: https://github.com/pulp-platform/snitch/blob/master/hw/ip/snitch/src/snitch_regfile_ff.sv + +module fpu_ss_regfile ( + // clock and reset + input logic clk_i, + input logic rst_ni, + // read port + input logic [ 2:0][ 4:0] raddr_i, + output logic [ 2:0][31:0] rdata_o, + // write port + input logic [ 4:0] waddr_i, + input logic [31:0] wdata_i, + input logic we_i +); + + localparam int unsigned NumWords = 32; + + logic [NumWords-1:0][31:0] mem; + logic [NumWords-1:0] we_dec; + + + always_comb begin : we_decoder + for (int unsigned i = 0; i < NumWords; i++) begin + if (waddr_i == i[4:0]) we_dec[i] = we_i; + else we_dec[i] = 1'b0; + end + end + + // loop from 1 to NumWords-1 as R0 is nil + for (genvar i = 0; i < NumWords; i++) begin : register_write_behavioral + always_ff @(posedge clk_i, negedge rst_ni) begin + if (~rst_ni) begin + mem[i] <= '0; + end else if (we_dec[i]) begin + mem[i] <= wdata_i; + end + end + end + + for (genvar i = 0; i < 3; i++) begin : gen_read_port + assign rdata_o[i] = mem[raddr_i[i]]; + end + +endmodule // fpu_ss_regfile diff --git a/hw/vendor/waiver/lint/common_cells.vlt b/hw/vendor/waiver/lint/common_cells.vlt index 1079fc6c9..f4480f2f6 100644 --- a/hw/vendor/waiver/lint/common_cells.vlt +++ b/hw/vendor/waiver/lint/common_cells.vlt @@ -15,3 +15,4 @@ lint_off -rule DECLFILENAME -file "*/src/clk_mux_glitch_free.sv" -match "Filenam lint_off -rule BLKSEQ -file "*/src/clk_int_div.sv" lint_off -rule WIDTH -file "*/src/clk_int_div.sv" lint_off -rule WIDTH -file "*/src/cdc_fifo_2phase.sv" +lint_off -rule UNOPTFLAT -file "*src/fifo_v3.sv" -match "Signal unoptimizable: Feedback to clock or circular*" diff --git a/hw/vendor/waiver/lint/cv32e40x.vlt b/hw/vendor/waiver/lint/cv32e40x.vlt index 62d7fb109..3e06f22e5 100644 --- a/hw/vendor/waiver/lint/cv32e40x.vlt +++ b/hw/vendor/waiver/lint/cv32e40x.vlt @@ -31,16 +31,15 @@ lint_off -rule WIDTH -file "*/rtl/cv32e40x_int_controller.sv" -match "Operator A lint_off -rule WIDTH -file "*/rtl/cv32e40x_int_controller.sv" -match "Operator ASSIGN expects 5 bits on the Assign RHS, but Assign RHS's VARREF 'CSR_MTIX_BIT' generates 32 bits." lint_off -rule WIDTH -file "*/rtl/cv32e40x_if_stage.sv" -match "Operator ASSIGNDLY expects 32 bits on the Assign RHS, but Assign RHS's VARREF 'xif_id' generates 4 bits." lint_off -rule WIDTH -file "*/rtl/cv32e40x_pma.sv" -match "Logical operator GENIF expects 1 bit on the If, but If's VARREF 'A_EXT' generates 2 bits." - lint_off -rule UNUSED -file "*cv32e40x_*.sv" -match "*" lint_off -rule UNUSED -file "*/rtl/if_xif.sv" -match "*" - lint_off -rule LITENDIAN -file "*/rtl/cv32e40x_alignment_buffer.sv" -match "Little bit endian vector: left < right of bit range: [0:2]" - lint_off -rule UNDRIVEN -file "*/rtl/if_xif.sv" -match "*" lint_off -rule UNDRIVEN -file "*/rtl/cv32e40x_id_stage.sv" -match "*" lint_off -rule UNDRIVEN -file "*/rtl/cv32e40x_align_check.sv" -match "Bits of signal are not driven: 'core_resp_o'[2]" - lint_off -rule BLKANDNBLK -file "*/rtl/cv32e40x_csr.sv" -match "Unsupported: Blocked and non-blocking assignments to same variable: *" - lint_off -rule MULTIDRIVEN -file "*/rtl/cv32e40x_cs_registers.sv" -match "Signal has multiple driving blocks with different clocking: '*'*" +lint_off -rule WIDTH -file "*/rtl/cv32e40x_controller_fsm.sv" -match "Operator ASSIGNW expects 4 bits on the Assign RHS, but Assign RHS's SEL generates 32 bits*" +lint_off -rule WIDTH -file "*/rtl/cv32e40x_id_stage.sv" -match "Operator ASSIGNW expects 4 bits on the Assign RHS, but Assign RHS's SEL generates 32 bits.*" +lint_off -rule WIDTH -file "*/rtl/cv32e40x_load_store_unit.sv" -match "Operator ASSIGNW expects 6 bits on the Assign RHS, but Assign RHS's COND generates 11 bits.*" +lint_off -rule WIDTH -file "*/rtl/cv32e40x_if_stage.sv" -match "Operator ASSIGNW expects 4 bits on the Assign RHS, but Assign RHS's COND generates 32 bits.*" diff --git a/hw/vendor/waiver/lint/fpu_ss.vlt b/hw/vendor/waiver/lint/fpu_ss.vlt new file mode 100644 index 000000000..9efcb6fbd --- /dev/null +++ b/hw/vendor/waiver/lint/fpu_ss.vlt @@ -0,0 +1,13 @@ +// Copyright 2022 OpenHW Group +// Solderpad Hardware License, Version 2.1, see LICENSE.md for details. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + +`verilator_config + +lint_off -rule WIDTHCONCAT -file "*/pulp_platform_fpu_ss/src/*" -match "*" +lint_off -rule VARHIDDEN -file "*/src/fpu_ss.sv" -match "Declaration of signal hides declaration in upper scope: 'FPU_FEATURES'*" +lint_off -rule VARHIDDEN -file "*/src/fpu_ss.sv" -match "Declaration of signal hides declaration in upper scope: 'FPU_IMPLEMENTATION'*" +lint_off -rule UNUSED -file "*/src/fpu_ss.sv" -match "*" +lint_off -rule UNUSED -file "*/src/fpu_ss_controller.sv" -match "*" +lint_off -rule UNDRIVEN -file "*/src/fpu_ss.sv" -match "Bits of signal are not driven:*" +lint_off -rule UNOPTFLAT -file "*/src/fpu_ss.sv" -match "Signal unoptimizable: Feedback to clock or circular logic:*" diff --git a/hw/vendor/waiver/verilator_waiver.core b/hw/vendor/waiver/verilator_waiver.core index e73cbfe00..6be4b4867 100644 --- a/hw/vendor/waiver/verilator_waiver.core +++ b/hw/vendor/waiver/verilator_waiver.core @@ -28,6 +28,7 @@ filesets: - lint/spi_device.vlt - lint/spi_host.vlt - lint/gpio.vlt + - lint/fpu_ss.vlt file_type: vlt diff --git a/tb/tb_top.sv b/tb/tb_top.sv index 75bc9ee43..226e0be8e 100644 --- a/tb/tb_top.sv +++ b/tb/tb_top.sv @@ -7,6 +7,7 @@ module tb_top #( parameter FPU = 0, parameter ZFINX = 0, parameter JTAG_DPI = 0, + parameter X_EXT = 0, parameter USE_EXTERNAL_DEVICE_EXAMPLE = 1 ); @@ -193,6 +194,7 @@ module tb_top #( .COREV_PULP (COREV_PULP), .FPU (FPU), .ZFINX (ZFINX), + .X_EXT (X_EXT), .JTAG_DPI (JTAG_DPI), .USE_EXTERNAL_DEVICE_EXAMPLE(USE_EXTERNAL_DEVICE_EXAMPLE), .CLK_FREQUENCY (CLK_FREQUENCY_KHz) diff --git a/tb/testharness.sv b/tb/testharness.sv index 9abe4180f..fc216e7bc 100644 --- a/tb/testharness.sv +++ b/tb/testharness.sv @@ -30,6 +30,7 @@ module testharness #( inout wire exit_valid_o ); + `include "tb_util.svh" import obi_pkg::*; @@ -114,6 +115,16 @@ module testharness #( logic [EXT_DOMAINS_RND-1:0] external_ram_banks_set_retentive_n; logic [EXT_DOMAINS_RND-1:0] external_subsystem_clkgate_en_n; + // eXtension Interface + if_xif #( + .X_NUM_RS(fpu_ss_pkg::X_NUM_RS), + .X_ID_WIDTH(fpu_ss_pkg::X_ID_WIDTH), + .X_MEM_WIDTH(fpu_ss_pkg::X_MEM_WIDTH), + .X_RFR_WIDTH(fpu_ss_pkg::X_RFR_WIDTH), + .X_RFW_WIDTH(fpu_ss_pkg::X_RFW_WIDTH), + .X_MISA(fpu_ss_pkg::X_MISA) + ) ext_if (); + always_comb begin // All interrupt lines set to zero by default for (int i = 0; i < core_v_mini_mcu_pkg::NEXT_INT; i++) begin @@ -123,6 +134,19 @@ module testharness #( intr_vector_ext[0] = memcopy_intr; end + //log parameters + initial begin + $display("%t: the parameter COREV_PULP is %x", $time, COREV_PULP); + $display("%t: the parameter FPU is %x", $time, FPU); + $display("%t: the parameter ZFINX is %x", $time, ZFINX); + $display("%t: the parameter X_EXT is %x", $time, X_EXT); + $display("%t: the parameter ZFINX is %x", $time, ZFINX); + $display("%t: the parameter JTAG_DPI is %x", $time, JTAG_DPI); + $display("%t: the parameter USE_EXTERNAL_DEVICE_EXAMPLE is %x", $time, + USE_EXTERNAL_DEVICE_EXAMPLE); + $display("%t: the parameter CLK_FREQUENCY is %d KHz", $time, CLK_FREQUENCY); + end + `ifdef USE_UPF initial begin $display("%t: All Power Supply ON", $time); @@ -131,9 +155,6 @@ module testharness #( end `endif - // eXtension Interface - if_xif #() ext_if (); - x_heep_system #( .COREV_PULP(COREV_PULP), .FPU(FPU), @@ -498,6 +519,29 @@ module testharness #( ); `endif + if ((core_v_mini_mcu_pkg::CpuType == cv32e40x || core_v_mini_mcu_pkg::CpuType == cv32e40px) && X_EXT != 0) begin: gen_fpu_ss_wrapper + fpu_ss_wrapper #( + .PULP_ZFINX(ZFINX), + .INPUT_BUFFER_DEPTH(1), + .OUT_OF_ORDER(0), + .FORWARDING(1), + .FPU_FEATURES(fpu_ss_pkg::FPU_FEATURES), + .FPU_IMPLEMENTATION(fpu_ss_pkg::FPU_IMPLEMENTATION) + ) fpu_ss_wrapper_i ( + // Clock and reset + .clk_i, + .rst_ni, + + // eXtension Interface + .xif_compressed_if(ext_if), + .xif_issue_if(ext_if), + .xif_commit_if(ext_if), + .xif_mem_if(ext_if), + .xif_mem_result_if(ext_if), + .xif_result_if(ext_if) + ); + end + end else begin : gen_DONT_USE_EXTERNAL_DEVICE_EXAMPLE assign slow_ram_slave_resp.gnt = '0; assign slow_ram_slave_resp.rdata = '0; @@ -514,6 +558,7 @@ module testharness #( assign memcopy_intr = '0; assign periph_slave_rsp = '0; + end endgenerate diff --git a/x-heep-tb-fpu-utils.core b/x-heep-tb-fpu-utils.core new file mode 100644 index 000000000..83f6b6cd5 --- /dev/null +++ b/x-heep-tb-fpu-utils.core @@ -0,0 +1,21 @@ +CAPI=2: + +# Copyright 2023 EPFL +# Solderpad Hardware License, Version 2.1, see LICENSE.md for details. +# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + +name: x-heep::tb-fpu-utils +description: x-heep testbench files. + +filesets: + files_examples: + depend: + - pulp-platform.org:ip:fpu_ss + files: + - hw/ip_examples/fpu_ss_wrapper/fpu_ss_wrapper.sv + file_type: systemVerilogSource + +targets: + default: &default_target + filesets: + - files_examples diff --git a/x-heep-tb-utils.core b/x-heep-tb-utils.core index a029e1bb1..05b75ba51 100644 --- a/x-heep-tb-utils.core +++ b/x-heep-tb-utils.core @@ -30,6 +30,8 @@ filesets: - pulp-platform.org::pulpissimo_simjtag tb-harness: + depend: + - pulp-platform.org:ip:fpu_ss files: - tb/tb_util.svh: {is_include_file: true} - tb/testharness_pkg.sv