From f0ff86d2227d228b076d6c0560d22dfd091d49ec Mon Sep 17 00:00:00 2001 From: Attapon-Bunwong Date: Sun, 5 May 2024 18:19:42 +0000 Subject: [PATCH] Support random traffic simulation --- 1. COMPUTE_TILE_TESTNODE.md | 19 +- Bender.local | 6 + Makefile | 57 ++-- floogen/examples/chiplet_all_dma.yml | 286 ++++++++++++++++++ floogen/model/endpoint.py | 10 +- floogen/model/network.py | 14 +- .../tb_floo_compute_tile_array.sv.mako | 4 +- hw/chiplet_floo_noc.sv | 24 +- hw/floo_narrow_wide_pkg.sv | 11 +- hw/tb/floo_testharness.sv | 6 + hw/tb/tb_floo_compute_tile_array.sv | 33 +- hw/test/dma_test_node.sv | 10 +- hw/test/snitch_cluster_test_node.sv | 12 +- util/gen_jobs.py | 180 +++++++---- util/soc_config.py | 3 +- util/test_random_compute_tile.sh | 8 +- 16 files changed, 546 insertions(+), 137 deletions(-) create mode 100644 Bender.local create mode 100644 floogen/examples/chiplet_all_dma.yml diff --git a/1. COMPUTE_TILE_TESTNODE.md b/1. COMPUTE_TILE_TESTNODE.md index bed3f5cd..46d099b4 100644 --- a/1. COMPUTE_TILE_TESTNODE.md +++ b/1. COMPUTE_TILE_TESTNODE.md @@ -36,10 +36,10 @@ pip install . ```sh # Generate system verilog wrapper file using floogen. # Overwrite default path for [--outdir] and [--pkg-outdir] -floogen -c floogen/examples/compute_tile_array.yml --outdir hw --pkg-outdir hw -floogen -c floogen/examples/compute_tile_array_5x4.yml --outdir hw --pkg-outdir hw -floogen -c floogen/examples/chiplet.yml --outdir hw --pkg-outdir hw -# floogen -c ../../floogen/examples/chiplet.yml --outdir ../../hw --pkg-outdir ../../hw --tb-outdir ../../hw/tb --util-outdir ../../util +# floogen -c floogen/examples/compute_tile_array.yml --outdir hw --pkg-outdir hw +# floogen -c floogen/examples/compute_tile_array_5x4.yml --outdir hw --pkg-outdir hw +# floogen -c floogen/examples/chiplet.yml --outdir hw --pkg-outdir hw +floogen -c floogen/examples/chiplet_all_dma.yml --outdir hw --pkg-outdir hw ``` After running the above command, all file used to run the simulation and synthesis of the network configuration that described in .yml file is ready in the path that bender is pointing to. The detail for each file that generated by floogen is described at table 1. in the next section. @@ -53,18 +53,25 @@ make jobs TRAFFIC_TB=compute_tile_array TRAFFIC_TYPE=random 'TRAFFIC_TB' must set to 'compute_tile_array' for working with compute tile array structure. 'TRAFFIC_TYPE' of the above command can be change from 'random' to other type to change the DMA behavior, which is described at table 2. in the next section. -4. Running simulation in VCS. The simple standalone simulation of FlooNoC is used when `DMA_TESTNODE=TRUE` is declared. In this standalone test mode, snitch cluster will be replace with DMA simulation model, and there is no require for snitch cluster repo to be locate locally. In case select between running simulation in batch mode and GUI mode. Verdi is used for interactive debugging in GUI mode. 'TB_DUT' must set to the name of top level testbench, which can be difference in case of 'name' in .yml file is changing. 'JOB_NAME' must set to 'compute_tile_array' for working with compute tile array structure. +4. Running simulation in VCS. In this standalone test mode, snitch cluster will be replace with DMA simulation model, and there is no require for snitch cluster repo to be locate locally. In case select between running simulation in batch mode and GUI mode. Verdi is used for interactive debugging in GUI mode. 'TB_DUT' must set to the name of top level testbench, which can be difference in case of 'name' in .yml file is changing. 'JOB_NAME' must set to 'compute_tile_array' for working with compute tile array structure. To run the simulation in batch mode : ```sh +# Build simulation binary for batch mode +make bin/floo_noc_batch.vcs TB_DUT=tb_floo_compute_tile_array # Running simulation in batch mode -make run-vcs-batch TB_DUT=tb_floo_compute_tile_array JOB_NAME=compute_tile_array DMA_TESTNODE=TRUE +make run-vcs-batch JOB_NAME=compute_tile_array ``` To run the simulation in GUI mode : ```sh +# Build simulation binary for GUI mode +make bin/floo_noc_gui.vcs TB_DUT=tb_floo_compute_tile_array +# Running simulation in GUI mode +make run-vcs JOB_NAME=compute_tile_array + # Running simulation in GUI mode make run-vcs TB_DUT=tb_floo_compute_tile_array JOB_NAME=compute_tile_array DMA_TESTNODE=TRUE ``` diff --git a/Bender.local b/Bender.local new file mode 100644 index 00000000..f033db3b --- /dev/null +++ b/Bender.local @@ -0,0 +1,6 @@ +# Copyright 2020 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 + +overrides: + axi: { git: "https://github.com/pulp-platform/axi.git", version: 0.39.1 } \ No newline at end of file diff --git a/Makefile b/Makefile index d9a58a86..4f70baf4 100644 --- a/Makefile +++ b/Makefile @@ -212,31 +212,43 @@ compile-vcs-batch: work-vcs/compile_vcs.sh compile-vcs: VLOGAN_ARGS+=-debug_access+all compile-vcs: compile-vcs-batch -run-vcs: VCS_FLAGS+=-debug_access+all -run-vcs: SIMV_FLAGS+=-gui=elite -run-vcs-common: -ifdef DMA_TESTNODE - $(VCS) $(VCS_FLAGS) $(TB_DUT) - ./simv $(SIMV_FLAGS) -else -# Generate VCS simulation binary + +# run-vcs-common: +# ifdef DMA_TESTNODE +# $(VCS) $(VCS_FLAGS) $(TB_DUT) +# ./simv $(SIMV_FLAGS) +# else +# # Generate VCS simulation binary +# mkdir -p bin +# $(VCS) $(VCS_FLAGS) -o bin/snitch_cluster.vcs -cc cc -cpp g++ $(TB_DUT) \ +# $(SNITCH_PATH)/../../../hw/ip/test/src/rtl_lib.cc \ +# $(SNITCH_PATH)/../../../hw/ip/test/src/common_lib.cc \ +# $(SNITCH_PATH)/generated/bootdata.cc \ +# -CFLAGS "-std=c++14 -I$(SNITCH_PATH)/ \ +# -I$(SNITCH_PATH)/test \ +# -I$(SNITCH_PATH)/work/include \ +# -I$(SNITCH_PATH)/../../../hw/ip/test/src" \ +# -LDFLAGS "-L$(SNITCH_PATH)/work/lib" -lfesvr +# # Run VCS simulation binary +# ./bin/snitch_cluster.vcs $(SNITCH_PATH)/$(SNITCH_SW) $(SIMV_FLAGS) +# endif + +bin/floo_noc_gui.vcs: VCS_FLAGS+=-debug_access+all +bin/floo_noc_gui.vcs: compile-vcs mkdir -p bin - $(VCS) $(VCS_FLAGS) -o bin/snitch_cluster.vcs -cc cc -cpp g++ $(TB_DUT) \ - $(SNITCH_PATH)/../../../hw/ip/test/src/rtl_lib.cc \ - $(SNITCH_PATH)/../../../hw/ip/test/src/common_lib.cc \ - $(SNITCH_PATH)/generated/bootdata.cc \ - -CFLAGS "-std=c++14 -I$(SNITCH_PATH)/ \ - -I$(SNITCH_PATH)/test \ - -I$(SNITCH_PATH)/work/include \ - -I$(SNITCH_PATH)/../../../hw/ip/test/src" \ - -LDFLAGS "-L$(SNITCH_PATH)/work/lib" -lfesvr -# Run VCS simulation binary - ./bin/snitch_cluster.vcs $(SNITCH_PATH)/$(SNITCH_SW) $(SIMV_FLAGS) -endif + $(VCS) $(VCS_FLAGS) -o bin/floo_noc_gui.vcs $(TB_DUT) + +bin/floo_noc_batch.vcs: compile-vcs-batch + mkdir -p bin + $(VCS) $(VCS_FLAGS) -o bin/floo_noc_batch.vcs $(TB_DUT) + +run-vcs: SIMV_FLAGS+=-gui=elite +run-vcs: + ./bin/floo_noc_gui.vcs $(SIMV_FLAGS) -run-vcs: compile-vcs run-vcs-common -run-vcs-batch: compile-vcs-batch run-vcs-common +run-vcs-batch: + ./bin/floo_noc_batch.vcs $(SIMV_FLAGS) clean-vcs: rm -rf work-vcs/compile_vcs.sh @@ -256,6 +268,7 @@ clean-vcs: rm -rf verdi_config_file rm -rf verdiLog rm -rf simv.vdb + rm -rf sysBusyPLog ############################################# # Random testing for Compute tile structure # diff --git a/floogen/examples/chiplet_all_dma.yml b/floogen/examples/chiplet_all_dma.yml new file mode 100644 index 00000000..b6aa36d9 --- /dev/null +++ b/floogen/examples/chiplet_all_dma.yml @@ -0,0 +1,286 @@ +# Copyright 2023 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 + +# Reuse variable +# For the current configuration Y_NUM must be >= 4 +X_NUM: &X_NUM 8 +Y_NUM: &Y_NUM 4 +X_NUM_DEC: &X_NUM_DEC 7 # X_NUM minus by 1 +Y_NUM_DEC: &Y_NUM_DEC 3 # Y_NUM minus by 1 + +name: chiplet +description: "Top Level Chiplet" +compute_tile_gen: true +num_snitch_core: 9 # snitch core 8, dma core 1 + +routing: + route_algo: "XY" + use_id_table: true + # If true, allow routing from X to Y only + xy_route_opt: false + +protocols: + - name: "narrow" + type: "AXI4" + direction: "manager" + data_width: 64 + addr_width: 48 + id_width: 4 # iw_in, equal to iw_out of snitch cluster + user_width: 5 # clog2(total number of clusters) + - name: "narrow" + type: "AXI4" + direction: "subordinate" + data_width: 64 + addr_width: 48 + # iw_out = iw_in + clog2(number of master to this interconnect, which is equal to number of endpoint) + # iw_out = iw_in + clog2(X_NUM*Y_NUM + 2*X_NUM + 2*Y_NUM) + # iw_out = 4 + clog2(8*4 + 2*8 + 2*4) = 4 + clog2(56) = 4 + 6 = 10 + id_width: 10 # iw_out, this ID will be remap before connect with snitch cluster + user_width: 5 # clog2(total number of clusters) + - name: "wide" + type: "AXI4" + direction: "manager" + data_width: 512 + #data_width: 64 + addr_width: 48 + # id_width: 3 # iw_in, equal to iw_out of snitch cluster + id_width: 4 # iw_in, equal to iw_out of + ro_cache_cfg (ro_cache_cfg + 1 for id width) + user_width: 0 + - name: "wide" + type: "AXI4" + direction: "subordinate" + data_width: 512 + #data_width: 64 + addr_width: 48 + # iw_out = iw_in + clog2(number of master to this interconnect, which is equal to number of endpoint) + # iw_out = iw_in + clog2(X_NUM*Y_NUM + 2*X_NUM + 2*Y_NUM) + # iw_out = 3 + clog2(8*4 + 2*8 + 2*4) = 3 + clog2(56) = 3 + 6 = 9 + #id_width: 9 # iw_out, this ID will be remap before connect with snitch cluster + id_width: 10 # iw_out + ro_cache_cfg (ro_cache_cfg + 1 for id width), this ID will be remap before connect with snitch cluster + user_width: 0 + +# Define endpoint and slave address mapped +endpoints: +# Define compute tile array (snitch cluster) in XY mesh + - name: "cluster" + array: [*X_NUM, *Y_NUM] + addr_range: + base: 0x0000_1000_0000 + size: 0x0000_0004_0000 # size per single snitch cluster + mgr_port_protocol: + - "narrow" + - "wide" + sbr_port_protocol: + - "narrow" + - "wide" + id_offset: + x: 0 # 0 to X_NUM-1 + y: 0 # 0 to Y_NUM-1 +# Define endpoint for each border side (North, East, South, and West) + ##### North border + # HBM memory + - name: "hbm_north" + array: [*X_NUM, 1] + addr_range: + base: 0x0010_0000_0000 + size: 0x0000_4000_0000 # 1 GiB + sbr_port_protocol: + - "narrow" + - "wide" + id_offset: + x: 0 # 0 to X_NUM-1 + y: *Y_NUM # Y_NUM mean border of the north side + ##### South border + # HBM memory + - name: "hbm_south" + array: [*X_NUM, 1] + addr_range: + base: 0x0002_0000_0000 + size: 0x0000_4000_0000 # 1 GiB + sbr_port_protocol: + - "narrow" + - "wide" + id_offset: + x: 0 # 0 to X_NUM-1 + y: -1 # -1 mean border of the south side + ##### West border + # PCIe interface + # address_io: 536870912, // 0x2000_0000 + # address_mm: 1207959552, // 0x4800_0000 + # length: 671088640, // 640 MiB 0x2800_0000 + - name: "pcie" + addr_range: + base: 0x0000_2000_0000 + size: 0x0000_5000_0000 + mgr_port_protocol: + - "narrow" + sbr_port_protocol: + - "narrow" + id_offset: + x: -1 # -1 mean border of the west side + y: 3 + # Peripherals AXI xbar + # This xbar connected to rom, soc_ctrl, fll_system, fll_periph, fll_hbm2e, + # uart, gpio, i2c, chip_ctrl, timer, spim, clint, pcie_cfg, hbm_cfg, plic + - name: "peripherals" + addr_range: + base: 0x0000_0100_0000 + size: 0x0000_0E00_0000 + sbr_port_protocol: + - "narrow" + id_offset: + x: -1 # -1 mean border of the west side + y: 2 + # JTAG + - name: "jtag" + addr_range: + base: 0x0000_0000_0000 + size: 0x0000_0000_1000 # 4 kiB + mgr_port_protocol: + - "narrow" + sbr_port_protocol: + - "narrow" + id_offset: + x: -1 # -1 mean border of the west side + y: 1 + # CVA6 + - name: "cva6" + addr_range: + base: 0x8000_0000_0000 # Add for simulation purpose + size: 0x0000_0010_0000 # Add for simulation purpose + # No slave port so no endpoint address assign to its + mgr_port_protocol: + - "narrow" + sbr_port_protocol: + - "narrow" # Add for simulation purpose + id_offset: + x: -1 # -1 mean border of the west side + y: 0 + ##### East border + # SPM Wide + - name: "spm_wide" + addr_range: + base: 0x0000_7100_0000 + size: 0x0000_0010_0000 + sbr_port_protocol: + - "wide" + id_offset: + x: *X_NUM # East size border + y: 3 + # SPM Narrow + - name: "spm_narrow" + addr_range: + base: 0x0000_7000_0000 + size: 0x0000_0008_0000 # 512 kiB + sbr_port_protocol: + - "narrow" + id_offset: + x: *X_NUM # East size border + y: 2 + # iDMA, narrow configuration and wide transfer data + - name: "idma" + addr_range: + base: 0x0000_1100_0000 + size: 0x0000_0001_0000 # 64 kiB + mgr_port_protocol: + - "narrow" # Add for simulation purpose + - "wide" + sbr_port_protocol: + - "narrow" # iDMA frontend configuration + - "wide" # Add for simulation purpose + id_offset: + x: *X_NUM # East size border + y: 1 + # AXI Zero-Memory + - name: "zero_mem" + addr_range: + base: 0x0080_0000_0000 + size: 0x0002_0000_0000 # 8 GiB + sbr_port_protocol: + - "wide" + id_offset: + x: *X_NUM # East size border + y: 0 + +routers: + # Router equal to cluster number + - name: "router" + array: [*X_NUM, *Y_NUM] + +connections: + # Each compute cluster (snitch cluster) connect to the router + - src: "cluster" + dst: "router" + src_range: + - [0, *X_NUM_DEC] + - [0, *Y_NUM_DEC] + dst_range: + - [0, *X_NUM_DEC] + - [0, *Y_NUM_DEC] + bidirectional: true + ##### North border + # HBM memory + - src: "hbm_north" + dst: "router" + src_range: + - [0, *X_NUM_DEC] + - [0, 0] + dst_range: # router index for north border + - [0, *X_NUM_DEC] + - [*Y_NUM_DEC, *Y_NUM_DEC] + bidirectional: true + ##### South border + # HBM memory + - src: "hbm_south" + dst: "router" + src_range: + - [0, *X_NUM_DEC] + - [0, 0] + dst_range: # router index for south border + - [0, *X_NUM_DEC] + - [0, 0] + bidirectional: true + ##### West border + # PCIe interface + - src: "pcie" + dst: "router" + dst_idx: [0, 3] + bidirectional: true + # Peripherals + - src: "peripherals" + dst: "router" + dst_idx: [0, 2] + bidirectional: true + # JTAG + - src: "jtag" + dst: "router" + dst_idx: [0, 1] + bidirectional: true + # CVA6 + - src: "cva6" + dst: "router" + dst_idx: [0, 0] + bidirectional: true + ##### East border + # SPM Wide + - src: "spm_wide" + dst: "router" + dst_idx: [*X_NUM_DEC, 3] + bidirectional: true + # SPM Narrow + - src: "spm_narrow" + dst: "router" + dst_idx: [*X_NUM_DEC, 2] + bidirectional: true + # iDMA + - src: "idma" + dst: "router" + dst_idx: [*X_NUM_DEC, 1] + bidirectional: true + # AXI Zero Memory + - src: "zero_mem" + dst: "router" + dst_idx: [*X_NUM_DEC, 0] + bidirectional: true + \ No newline at end of file diff --git a/floogen/model/endpoint.py b/floogen/model/endpoint.py index d7a4fd37..f984b71c 100644 --- a/floogen/model/endpoint.py +++ b/floogen/model/endpoint.py @@ -141,12 +141,18 @@ def render_tb_ports(self): if (self.is_memory_tb()): for port in self.sbr_ports: ports += port.render_tb_mem_connect_port() - # Render as trimmed port (unsupport simulation model endpoint) - else: + # Render as connected port (support simulation model endpoint) + elif self.sbr_port_protocol == self.mgr_port_protocol: for port in self.mgr_ports: ports += port.render_tb_dma_connect_port() for port in self.sbr_ports: ports += port.render_tb_dma_connect_port() + # Render as trimmed port (unsupport simulation model endpoint) + else: + for port in self.mgr_ports: + ports += port.render_tb_trim_port() + for port in self.sbr_ports: + ports += port.render_tb_trim_port() return ports def render_tb_mem(self) -> str: diff --git a/floogen/model/network.py b/floogen/model/network.py index 0df4819f..90aa9267 100644 --- a/floogen/model/network.py +++ b/floogen/model/network.py @@ -822,7 +822,8 @@ def render_tb_endpoint(self): continue if ep.is_memory_tb(): endpoints += ep.render_tb_mem() + "\n" - else: + # DMA test node require each endpoint to have both active in mgr and sbr + elif ep.sbr_port_protocol == ep.mgr_port_protocol: if ep.array is not None: raise ValueError( "The current version is not support for generating testbench for an array of non-memory endpoint" @@ -883,12 +884,13 @@ def render_tb(self): # Remove node that connect to Eject from the top level interface port for compute tile array structure ep_eject_nodes = self.graph.get_ep_eject_nodes() ep_nodes = [ep for ep in ep_nodes if ep not in ep_eject_nodes] - endpoint_mgr = [ep for ep in ep_nodes if ep.mgr_port_protocol is not None] - endpoint_mgr_num = 0; - for ep in endpoint_mgr: - endpoint_mgr_num += len(ep.mgr_port_protocol) + # DMA node need to have both master and slave + endpoint_dma = [ep for ep in ep_nodes if ep.sbr_port_protocol == ep.mgr_port_protocol] + endpoint_dma_num = 0; + for ep in endpoint_dma: + endpoint_dma_num += len(ep.mgr_port_protocol) return self.tpl_tb.render(noc=self, cp_tiles=routers, \ - endpoint_mgr=endpoint_mgr, endpoint_mgr_num=endpoint_mgr_num) + endpoint_dma=endpoint_dma, endpoint_dma_num=endpoint_dma_num) def render_testharness(self): """Render the testbench of the generated network.""" diff --git a/floogen/templates/tb_floo_compute_tile_array.sv.mako b/floogen/templates/tb_floo_compute_tile_array.sv.mako index cc7aca8b..df8e9398 100644 --- a/floogen/templates/tb_floo_compute_tile_array.sv.mako +++ b/floogen/templates/tb_floo_compute_tile_array.sv.mako @@ -59,13 +59,13 @@ module tb_floo_compute_tile_array; ${noc.render_tb_dut_ports()} ); - logic [${len(cp_tiles)+endpoint_mgr_num-1}:0] endsim_cluster; + logic [${len(cp_tiles)+endpoint_dma_num-1}:0] endsim_cluster; // Get end_of_sim signal inside DUT % for i in range(0,len(cp_tiles)): assign endsim_cluster[${i}] = &tb_floo_compute_tile_array.i_${noc.name}_floo_noc.compute_tile_${str(cp_tiles[i].id.x)}_${str(cp_tiles[i].id.y)}.i_snitch_cluster_test_node.end_of_sim; % endfor <% i = 0 %>\ -% for ep in endpoint_mgr: +% for ep in endpoint_dma: % for ep_mgr_prot in ep.mgr_port_protocol: assign endsim_cluster[${i+len(cp_tiles)}] = &tb_floo_compute_tile_array.i_floo_${ep_mgr_prot}_${ep.name}_model.end_of_sim; <% i += 1 %>\ diff --git a/hw/chiplet_floo_noc.sv b/hw/chiplet_floo_noc.sv index 9e8ed129..b0d79db7 100644 --- a/hw/chiplet_floo_noc.sv +++ b/hw/chiplet_floo_noc.sv @@ -37,14 +37,20 @@ module chiplet_floo_noc input axi_narrow_out_rsp_t jtag_narrow_rsp_i, input axi_narrow_in_req_t cva6_narrow_req_i, output axi_narrow_in_rsp_t cva6_narrow_rsp_o, + output axi_narrow_out_req_t cva6_narrow_req_o, + input axi_narrow_out_rsp_t cva6_narrow_rsp_i, output axi_wide_out_req_t spm_wide_wide_req_o, input axi_wide_out_rsp_t spm_wide_wide_rsp_i, output axi_narrow_out_req_t spm_narrow_narrow_req_o, input axi_narrow_out_rsp_t spm_narrow_narrow_rsp_i, + input axi_narrow_in_req_t idma_narrow_req_i, + output axi_narrow_in_rsp_t idma_narrow_rsp_o, input axi_wide_in_req_t idma_wide_req_i, output axi_wide_in_rsp_t idma_wide_rsp_o, output axi_narrow_out_req_t idma_narrow_req_o, input axi_narrow_out_rsp_t idma_narrow_rsp_i, + output axi_wide_out_req_t idma_wide_req_o, + input axi_wide_out_rsp_t idma_wide_rsp_i, output axi_wide_out_req_t zero_mem_wide_req_o, input axi_wide_out_rsp_t zero_mem_wide_rsp_i @@ -1253,7 +1259,7 @@ localparam id_t cva6_ni_id = '{x: 0, y: 1}; floo_narrow_wide_chimney #( - .EnNarrowSbrPort(1'b0), + .EnNarrowSbrPort(1'b1), .EnNarrowMgrPort(1'b1), .EnWideSbrPort(1'b0), .EnWideMgrPort(1'b0) @@ -1264,8 +1270,8 @@ floo_narrow_wide_chimney #( .sram_cfg_i ( '0 ), .axi_narrow_in_req_i ( cva6_narrow_req_i ), .axi_narrow_in_rsp_o ( cva6_narrow_rsp_o ), - .axi_narrow_out_req_o ( ), - .axi_narrow_out_rsp_i ( '0 ), + .axi_narrow_out_req_o ( cva6_narrow_req_o ), + .axi_narrow_out_rsp_i ( cva6_narrow_rsp_i ), .axi_wide_in_req_i ( '0 ), .axi_wide_in_rsp_o ( ), .axi_wide_out_req_o ( ), @@ -1347,22 +1353,22 @@ localparam id_t idma_ni_id = '{x: 9, y: 2}; floo_narrow_wide_chimney #( .EnNarrowSbrPort(1'b1), - .EnNarrowMgrPort(1'b0), - .EnWideSbrPort(1'b0), + .EnNarrowMgrPort(1'b1), + .EnWideSbrPort(1'b1), .EnWideMgrPort(1'b1) ) idma_ni ( .clk_i, .rst_ni, .test_enable_i, .sram_cfg_i ( '0 ), - .axi_narrow_in_req_i ( '0 ), - .axi_narrow_in_rsp_o ( ), + .axi_narrow_in_req_i ( idma_narrow_req_i ), + .axi_narrow_in_rsp_o ( idma_narrow_rsp_o ), .axi_narrow_out_req_o ( idma_narrow_req_o ), .axi_narrow_out_rsp_i ( idma_narrow_rsp_i ), .axi_wide_in_req_i ( idma_wide_req_i ), .axi_wide_in_rsp_o ( idma_wide_rsp_o ), - .axi_wide_out_req_o ( ), - .axi_wide_out_rsp_i ( '0 ), + .axi_wide_out_req_o ( idma_wide_req_o ), + .axi_wide_out_rsp_i ( idma_wide_rsp_i ), .id_i ( idma_ni_id ), .route_table_i ( '0 ), .floo_req_o ( idma_ni_to_router_7_1_req ), diff --git a/hw/floo_narrow_wide_pkg.sv b/hw/floo_narrow_wide_pkg.sv index 37ab5c2a..9ec478c3 100644 --- a/hw/floo_narrow_wide_pkg.sv +++ b/hw/floo_narrow_wide_pkg.sv @@ -154,8 +154,8 @@ package floo_narrow_wide_pkg; localparam bit UseIdTable = 1'b1; localparam int unsigned NumXBits = 4; localparam int unsigned NumYBits = 3; - localparam int unsigned XYAddrOffsetX = 40; - localparam int unsigned XYAddrOffsetY = 44; + localparam int unsigned XYAddrOffsetX = 48; + localparam int unsigned XYAddrOffsetY = 52; localparam border_id_t BorderId = '{north: 5, west: 0, south: 0, east: 9}; localparam int unsigned IdAddrOffset = 0; @@ -176,7 +176,7 @@ package floo_narrow_wide_pkg; // Address Map // ///////////////////// - localparam int unsigned SamNumRules = 55; + localparam int unsigned SamNumRules = 56; typedef struct packed { id_t idx; @@ -440,6 +440,11 @@ package floo_narrow_wide_pkg; start_addr: 48'h000000000000, end_addr: 48'h000000001000 }, // jtag_ni + '{ + idx: '{x: 0, y: 1}, + start_addr: 48'h800000000000, + end_addr: 48'h800000100000 + }, // cva6_ni '{ idx: '{x: 9, y: 4}, start_addr: 48'h000071000000, diff --git a/hw/tb/floo_testharness.sv b/hw/tb/floo_testharness.sv index 1be4fe05..a301318b 100644 --- a/hw/tb/floo_testharness.sv +++ b/hw/tb/floo_testharness.sv @@ -199,14 +199,20 @@ module floo_testharness .jtag_narrow_rsp_i(jtag_narrow_out_rsp), .cva6_narrow_req_i(cva6_narrow_in_req), .cva6_narrow_rsp_o(cva6_narrow_in_rsp), + .cva6_narrow_req_o(cva6_narrow_out_req), + .cva6_narrow_rsp_i(cva6_narrow_out_rsp), .spm_wide_wide_req_o(spm_wide_wide_req), .spm_wide_wide_rsp_i(spm_wide_wide_rsp), .spm_narrow_narrow_req_o(spm_narrow_narrow_req), .spm_narrow_narrow_rsp_i(spm_narrow_narrow_rsp), + .idma_narrow_req_i(idma_narrow_in_req), + .idma_narrow_rsp_o(idma_narrow_in_rsp), .idma_wide_req_i(idma_wide_in_req), .idma_wide_rsp_o(idma_wide_in_rsp), .idma_narrow_req_o(idma_narrow_out_req), .idma_narrow_rsp_i(idma_narrow_out_rsp), + .idma_wide_req_o(idma_wide_out_req), + .idma_wide_rsp_i(idma_wide_out_rsp), .zero_mem_wide_req_o(zero_mem_wide_req), .zero_mem_wide_rsp_i(zero_mem_wide_rsp) diff --git a/hw/tb/tb_floo_compute_tile_array.sv b/hw/tb/tb_floo_compute_tile_array.sv index b6cbb3d3..f8726a22 100644 --- a/hw/tb/tb_floo_compute_tile_array.sv +++ b/hw/tb/tb_floo_compute_tile_array.sv @@ -209,8 +209,10 @@ module tb_floo_compute_tile_array; .axi_out_resp_i(jtag_narrow_in_rsp) ); - axi_narrow_in_req_t cva6_narrow_in_req; - axi_narrow_in_rsp_t cva6_narrow_in_rsp; + axi_narrow_out_req_t cva6_narrow_out_req; + axi_narrow_out_rsp_t cva6_narrow_out_rsp; + axi_narrow_in_req_t cva6_narrow_in_req; + axi_narrow_in_rsp_t cva6_narrow_in_rsp; dma_test_node #( .id_x (0), @@ -229,8 +231,8 @@ module tb_floo_compute_tile_array; ) i_floo_narrow_cva6_model ( .clk_i (clk), .rst_ni (rst_n), - .axi_in_req_i ('0), - .axi_in_resp_o (), + .axi_in_req_i (cva6_narrow_out_req), + .axi_in_resp_o (cva6_narrow_out_rsp), .axi_out_req_o (cva6_narrow_in_req), .axi_out_resp_i(cva6_narrow_in_rsp) ); @@ -289,6 +291,10 @@ module tb_floo_compute_tile_array; axi_narrow_out_req_t idma_narrow_out_req; axi_narrow_out_rsp_t idma_narrow_out_rsp; + axi_wide_out_req_t idma_wide_out_req; + axi_wide_out_rsp_t idma_wide_out_rsp; + axi_narrow_in_req_t idma_narrow_in_req; + axi_narrow_in_rsp_t idma_narrow_in_rsp; axi_wide_in_req_t idma_wide_in_req; axi_wide_in_rsp_t idma_wide_in_rsp; @@ -309,8 +315,8 @@ module tb_floo_compute_tile_array; ) i_floo_wide_idma_model ( .clk_i (clk), .rst_ni (rst_n), - .axi_in_req_i ('0), - .axi_in_resp_o (), + .axi_in_req_i (idma_wide_out_req), + .axi_in_resp_o (idma_wide_out_rsp), .axi_out_req_o (idma_wide_in_req), .axi_out_resp_i(idma_wide_in_rsp) ); @@ -333,8 +339,8 @@ module tb_floo_compute_tile_array; .rst_ni (rst_n), .axi_in_req_i (idma_narrow_out_req), .axi_in_resp_o (idma_narrow_out_rsp), - .axi_out_req_o (), - .axi_out_resp_i('0) + .axi_out_req_o (idma_narrow_in_req), + .axi_out_resp_i(idma_narrow_in_rsp) ); axi_wide_out_req_t zero_mem_wide_req; @@ -405,20 +411,26 @@ module tb_floo_compute_tile_array; .jtag_narrow_rsp_i(jtag_narrow_out_rsp), .cva6_narrow_req_i(cva6_narrow_in_req), .cva6_narrow_rsp_o(cva6_narrow_in_rsp), + .cva6_narrow_req_o(cva6_narrow_out_req), + .cva6_narrow_rsp_i(cva6_narrow_out_rsp), .spm_wide_wide_req_o(spm_wide_wide_req), .spm_wide_wide_rsp_i(spm_wide_wide_rsp), .spm_narrow_narrow_req_o(spm_narrow_narrow_req), .spm_narrow_narrow_rsp_i(spm_narrow_narrow_rsp), + .idma_narrow_req_i(idma_narrow_in_req), + .idma_narrow_rsp_o(idma_narrow_in_rsp), .idma_wide_req_i(idma_wide_in_req), .idma_wide_rsp_o(idma_wide_in_rsp), .idma_narrow_req_o(idma_narrow_out_req), .idma_narrow_rsp_i(idma_narrow_out_rsp), + .idma_wide_req_o(idma_wide_out_req), + .idma_wide_rsp_i(idma_wide_out_rsp), .zero_mem_wide_req_o(zero_mem_wide_req), .zero_mem_wide_rsp_i(zero_mem_wide_rsp) ); - logic [35:0] endsim_cluster; + logic [36:0] endsim_cluster; // Get end_of_sim signal inside DUT assign endsim_cluster[0] = &tb_floo_compute_tile_array.i_chiplet_floo_noc.compute_tile_0_0.i_snitch_cluster_test_node.end_of_sim; assign endsim_cluster[1] = &tb_floo_compute_tile_array.i_chiplet_floo_noc.compute_tile_0_1.i_snitch_cluster_test_node.end_of_sim; @@ -455,7 +467,8 @@ module tb_floo_compute_tile_array; assign endsim_cluster[32] = &tb_floo_compute_tile_array.i_floo_narrow_pcie_model.end_of_sim; assign endsim_cluster[33] = &tb_floo_compute_tile_array.i_floo_narrow_jtag_model.end_of_sim; assign endsim_cluster[34] = &tb_floo_compute_tile_array.i_floo_narrow_cva6_model.end_of_sim; - assign endsim_cluster[35] = &tb_floo_compute_tile_array.i_floo_wide_idma_model.end_of_sim; + assign endsim_cluster[35] = &tb_floo_compute_tile_array.i_floo_narrow_idma_model.end_of_sim; + assign endsim_cluster[36] = &tb_floo_compute_tile_array.i_floo_wide_idma_model.end_of_sim; initial begin wait (&endsim_cluster); diff --git a/hw/test/dma_test_node.sv b/hw/test/dma_test_node.sv index a7f4af94..09c6a69c 100644 --- a/hw/test/dma_test_node.sv +++ b/hw/test/dma_test_node.sv @@ -27,17 +27,11 @@ module dma_test_node output axi_in_req_t axi_out_req_o, input axi_in_rsp_t axi_out_resp_i ); - // Make its switch able between DMA and normal AXI package - - // Convert between id_i of base 1 to base 0 index - localparam int unsigned x = id_x - 1; - localparam int unsigned y = id_y - 1; - localparam string TypeName = is_narrow ? "narrow" : "wide"; - localparam string DmaName = $sformatf("%s_dma_%0d_%0d", TypeName, x, y); + localparam string DmaName = $sformatf("%s_dma_%0d_%0d", TypeName, id_x, id_y); // Assign unique job ID for each DMA test node - localparam int unsigned Index = y * NumX + x + 1; + localparam int unsigned Index = id_y * NumX + id_x; localparam int unsigned JobId = is_narrow ? Index + 1000 : Index; localparam sam_rule_t local_addrmap = find_addrmap_by_xy_id(id_x, id_y); diff --git a/hw/test/snitch_cluster_test_node.sv b/hw/test/snitch_cluster_test_node.sv index e2ecbdeb..f58fa44c 100644 --- a/hw/test/snitch_cluster_test_node.sv +++ b/hw/test/snitch_cluster_test_node.sv @@ -20,17 +20,11 @@ module snitch_cluster_test_node input axi_wide_out_req_t wide_in_req_i, output axi_wide_out_rsp_t wide_in_resp_o ); - // Make its switch able between DMA and normal AXI package - - // Convert between id_i of base 1 to base 0 index - localparam int unsigned x = id_x - 1; - localparam int unsigned y = id_y - 1; - - localparam string NarrowDmaName = $sformatf("narrow_dma_%0d_%0d", x, y); - localparam string WideDmaName = $sformatf("wide_dma_%0d_%0d", x, y); + localparam string NarrowDmaName = $sformatf("narrow_dma_%0d_%0d", id_x, id_y); + localparam string WideDmaName = $sformatf("wide_dma_%0d_%0d", id_x, id_y); // Assign unique job ID for each DMA test node - localparam int unsigned Index = y * NumX + x + 1; + localparam int unsigned Index = id_y * NumX + id_x; localparam sam_rule_t local_addrmap = find_addrmap_by_xy_id(id_x, id_y); localparam logic [AxiNarrowInAddrWidth-1:0] DMAMemBaseAddr = local_addrmap.start_addr; // byte unit diff --git a/util/gen_jobs.py b/util/gen_jobs.py index 5ca3f599..e9413ee3 100755 --- a/util/gen_jobs.py +++ b/util/gen_jobs.py @@ -25,13 +25,55 @@ def get_xy_base_addr(x: int, y: int): assert x <= NUM_X+1 and y <= NUM_Y+1 return (x + 2 ** clog2(NUM_X + 2) * y) * MEM_SIZE -def find_soc_type(AddrMap, target_soc_type): +# def find_soc_type(AddrMap, target_soc_type): +# addrmap_soc = list() +# for node_addr in AddrMap: +# if (node_addr["soc_type"]==target_soc_type): +# addrmap_soc.append(node_addr) +# return addrmap_soc + +def find_sbr_module(AddrMap): + addrmap_soc = list() + for node_addr in AddrMap: + if (node_addr["sbr_port"]["narrow"]==True or node_addr["sbr_port"]["wide"]==True): + addrmap_soc.append(node_addr) + return addrmap_soc + +def find_mgr_module(AddrMap): + addrmap_soc = list() + for node_addr in AddrMap: + if (node_addr["mgr_port"]["narrow"]==True or node_addr["mgr_port"]["wide"]==True): + addrmap_soc.append(node_addr) + return addrmap_soc + +def find_memory(AddrMap): addrmap_soc = list() for node_addr in AddrMap: - if (node_addr["soc_type"]==target_soc_type): + if (node_addr["mgr_port"]["narrow"]==False and node_addr["mgr_port"]["wide"]==False): addrmap_soc.append(node_addr) return addrmap_soc +def filter_sbr_type(node_list, node_type): + if node_type=="narrow_wide": + return node_list + node_out = list() + for node in node_list: + if node["sbr_port"][node_type]: + node_out.append(node) + return node_out + +def filter_exclude_idx(node_list, idx): + # Generate cluster list that self cluster + node_out = node_list.copy() + rm_node = None + for node in node_list: + if (node["idx"]["x"]==idx["x"] and node["idx"]["y"]==idx["y"]): + rm_node = node + break + if rm_node is not None: + node_out.remove(rm_node) + return node_out + def find_shortest_path(node_list, idx): shortest_node = dict() shortest_path = 100000 # init to more than every possible path @@ -213,87 +255,111 @@ def gen_compute_tile_array_traffic( # pylint: disable=too-many-arguments, too-many-locals """Generate compute tile array traffic.""" - cluster_list = find_soc_type(soc_config.AddrMap, "cluster") - hbm_list = find_soc_type(soc_config.AddrMap, "memory") + # cluster_list = find_soc_type(soc_config.AddrMap, "cluster") + # hbm_list = find_soc_type(soc_config.AddrMap, "memory") + + mgr_module_list = find_mgr_module(soc_config.AddrMap) + sbr_module_list = find_sbr_module(soc_config.AddrMap) + memory_list = find_memory(soc_config.AddrMap) + # sbr_narrow_list = find_sbr_module(soc_config.AddrMap, "narrow") + # sbr_wide_list = find_sbr_module(soc_config.AddrMap, "wide") + # sbr_narrow_wide_list = find_sbr_module(soc_config.AddrMap, ["narrow_wide"]) # ID table based DMA jobs generation - for cluster in cluster_list: + for mgr_module in mgr_module_list: wide_jobs = "" narrow_jobs = "" + mgr_port_type = "narrow_wide" wide_length = wide_burst_length * soc_config.data_widths["wide"] / 8 # total data to be transfer over wide DMA interface in byte unit narrow_length = narrow_burst_length * soc_config.data_widths["narrow"] / 8 # total data to be transfer over narrow DMA interface in byte unit + if mgr_module["mgr_port"]["narrow"]==False: + mgr_port_type = "wide" + elif mgr_module["mgr_port"]["wide"]==False: + mgr_port_type = "narrow" # assert wide_length <= MEM_SIZE and narrow_length <= MEM_SIZE - # 1) cluster DMA from shortest path hbm - if traffic_type == "hbm": - ext = find_shortest_path(hbm_list, cluster["idx"]) - src_addr = ext["start_addr"] if rw == "read" else cluster["start_addr"] - dst_addr = cluster["start_addr"] if rw == "read" else ext["start_addr"] - # 2) cluster DMA from random hbm - elif traffic_type == "hbm_rand": + # 1) master node DMA from shortest path memory + if traffic_type == "memory": + sel_sbr_module_list = filter_sbr_type(memory_list, mgr_port_type) # No filter for mgr_port_type="narrow_wide" + sel_sbr_module_list = filter_exclude_idx(sel_sbr_module_list, mgr_module["idx"]) + ext = find_shortest_path(sel_sbr_module_list, mgr_module["idx"]) + src_addr = ext["start_addr"] if rw == "read" else mgr_module["start_addr"] + dst_addr = mgr_module["start_addr"] if rw == "read" else ext["start_addr"] + # 2) master node DMA from random memory + elif traffic_type == "memory_rand": + sel_sbr_module_list = filter_sbr_type(memory_list, mgr_port_type) # No filter for mgr_port_type="narrow_wide" + sel_sbr_module_list = filter_exclude_idx(sel_sbr_module_list, mgr_module["idx"]) # Find possible number of hbm target node - hbm_node_num = len(hbm_list) - # Ramdom picking target node - rand_idx = random.randint(0, hbm_node_num-1) - ext = hbm_list[rand_idx] - src_addr = ext["start_addr"] if rw == "read" else cluster["start_addr"] - dst_addr = cluster["start_addr"] if rw == "read" else ext["start_addr"] - # 3) cluster DMA from random hbm and random cluster + sbr_node_num = len(sel_sbr_module_list) + # Random picking target node + rand_idx = random.randint(0, sbr_node_num-1) + ext = sel_sbr_module_list[rand_idx] + src_addr = ext["start_addr"] if rw == "read" else mgr_module["start_addr"] + dst_addr = mgr_module["start_addr"] if rw == "read" else ext["start_addr"] + # 3) master node DMA from random slave node elif traffic_type == "random": - # Generate cluster list that self cluster - cluster_noself_list = cluster_list.copy() - cluster_noself_list.remove(cluster) + sel_sbr_module_list = filter_sbr_type(sbr_module_list, mgr_port_type) # No filter for mgr_port_type="narrow_wide" + sel_sbr_module_list = filter_exclude_idx(sel_sbr_module_list, mgr_module["idx"]) # Find possible number of target node for both cluster and hbm - cluster_node_num = len(cluster_noself_list) - hbm_node_num = len(hbm_list) - # Ramdom picking target node - rand_idx = random.randint(0, cluster_node_num+hbm_node_num-1) - if (rand_idx < cluster_node_num): - ext = cluster_noself_list[rand_idx] - else: - ext = hbm_list[rand_idx-cluster_node_num] - src_addr = ext["start_addr"] if rw == "read" else cluster["start_addr"] - dst_addr = cluster["start_addr"] if rw == "read" else ext["start_addr"] - # 4) cluster DMA from random cluster - elif traffic_type == "cluster_rand": - # Generate cluster list that self cluster - cluster_noself_list = cluster_list.copy() - cluster_noself_list.remove(cluster) + sbr_node_num = len(sel_sbr_module_list) + # Random picking target node + rand_idx = random.randint(0, sbr_node_num-1) + ext = sel_sbr_module_list[rand_idx] + src_addr = ext["start_addr"] if rw == "read" else mgr_module["start_addr"] + dst_addr = mgr_module["start_addr"] if rw == "read" else ext["start_addr"] + # 4) master node DMA from other master node + elif traffic_type == "mgr_rand": + sel_sbr_module_list = find_sbr_module(mgr_module_list) + sel_sbr_module_list = filter_sbr_type(sel_sbr_module_list, mgr_port_type) # No filter for mgr_port_type="narrow_wide" + sel_sbr_module_list = filter_exclude_idx(sel_sbr_module_list, mgr_module["idx"]) # Find possible number of cluster target node - cluster_node_num = len(cluster_noself_list) - # Ramdom picking target node - rand_idx = random.randint(0, cluster_node_num-1) - ext = cluster_noself_list[rand_idx] - src_addr = ext["start_addr"] if rw == "read" else cluster["start_addr"] - dst_addr = cluster["start_addr"] if rw == "read" else ext["start_addr"] - # 5) cluster DMA from the next upper node + sbr_node_num = len(sel_sbr_module_list) + # Random picking target node + rand_idx = random.randint(0, sbr_node_num-1) + ext = sel_sbr_module_list[rand_idx] + src_addr = ext["start_addr"] if rw == "read" else mgr_module["start_addr"] + dst_addr = mgr_module["start_addr"] if rw == "read" else ext["start_addr"] + # 5) cluster DMA from the next right node elif traffic_type == "onehop": - # Filter only hbm memory and cluster - all_node_list = cluster_list + hbm_list + sel_sbr_module_list = filter_sbr_type(sbr_module_list, mgr_port_type) # No filter for mgr_port_type="narrow_wide" + sel_sbr_module_list = filter_exclude_idx(sel_sbr_module_list, mgr_module["idx"]) # Access to next upper hop - ext_idx = {"x": cluster["idx"]["x"], "y": cluster["idx"]["y"]+1}; + ext_idx = {"x": mgr_module["idx"]["x"]+1, "y": mgr_module["idx"]["y"]}; # Find matching target idx - ext = find_target_node(all_node_list, ext_idx); + ext = find_target_node(sel_sbr_module_list, ext_idx); if (ext==-1): wide_length = 0 narrow_length = 0 src_addr = 0 dst_addr = 0 else: - src_addr = ext["start_addr"] if rw == "read" else cluster["start_addr"] - dst_addr = cluster["start_addr"] if rw == "read" else ext["start_addr"] + src_addr = ext["start_addr"] if rw == "read" else mgr_module["start_addr"] + dst_addr = mgr_module["start_addr"] if rw == "read" else ext["start_addr"] else: raise ValueError(f"Unknown traffic type: {traffic_type}") - ext_mem_size = ext["end_addr"] - ext["start_addr"] - assert wide_length <= ext_mem_size and narrow_length <= ext_mem_size - # Print DMA sumary - print(" [jobs] " + cluster["name"] + str(cluster["idx"]) + " DMA " + rw + " to " + ext["name"] + str(ext["idx"])) - # Write DMA jobs to file + if ext!=-1: + ext_mem_size = ext["end_addr"] - ext["start_addr"] + assert wide_length <= ext_mem_size and narrow_length <= ext_mem_size + sbr_port_type = "narrow_wide" + if ext["sbr_port"]["narrow"]==False: + sbr_port_type = "wide" + elif ext["sbr_port"]["wide"]==False: + sbr_port_type = "narrow" + if sbr_port_type == "narrow_wide" and mgr_port_type != "narrow_wide": + sbr_port_type = mgr_port_type + # Print DMA summary + print(" [jobs] " + mgr_module["name"] + str(mgr_module["idx"]) + + " " + sbr_port_type + " DMA " + rw + " to " + ext["name"] + str(ext["idx"])) + # Write DMA jobs to file separately between narrow and wide interface + if "wide" not in sbr_port_type: + wide_length = 0 + if "narrow" not in sbr_port_type: + narrow_length = 0 for _ in range(num_wide_bursts): wide_jobs += gen_job_str(wide_length, src_addr, dst_addr) + emit_jobs(wide_jobs, out_dir, "compute_tile_array", mgr_module["idx"]["x"] + mgr_module["idx"]["y"] * soc_config.NUM_X) for _ in range(num_narrow_bursts): narrow_jobs += gen_job_str(narrow_length, src_addr, dst_addr) - emit_jobs(wide_jobs, out_dir, "compute_tile_array", cluster["idx"]["x"] + (cluster["idx"]["y"] - 1) * soc_config.NUM_X) - emit_jobs(narrow_jobs, out_dir, "compute_tile_array", cluster["idx"]["x"] + (cluster["idx"]["y"] - 1) * soc_config.NUM_X + 1000) + emit_jobs(narrow_jobs, out_dir, "compute_tile_array", mgr_module["idx"]["x"] + mgr_module["idx"]["y"] * soc_config.NUM_X + 1000) def main(): """Main function.""" diff --git a/util/soc_config.py b/util/soc_config.py index ef5d8116..cb29558b 100644 --- a/util/soc_config.py +++ b/util/soc_config.py @@ -59,9 +59,10 @@ {"idx": {"x": 0, "y": 4}, "name": "pcie", "mgr_port": {"narrow": True, "wide": False}, "sbr_port": {"narrow": True, "wide": False}, "start_addr": int("0x000020000000",16), "end_addr": int("0x000070000000",16)}, {"idx": {"x": 0, "y": 3}, "name": "peripherals", "mgr_port": {"narrow": False, "wide": False}, "sbr_port": {"narrow": True, "wide": False}, "start_addr": int("0x000001000000",16), "end_addr": int("0x00000f000000",16)}, {"idx": {"x": 0, "y": 2}, "name": "jtag", "mgr_port": {"narrow": True, "wide": False}, "sbr_port": {"narrow": True, "wide": False}, "start_addr": int("0x000000000000",16), "end_addr": int("0x000000001000",16)}, + {"idx": {"x": 0, "y": 1}, "name": "cva6", "mgr_port": {"narrow": True, "wide": False}, "sbr_port": {"narrow": True, "wide": False}, "start_addr": int("0x800000000000",16), "end_addr": int("0x800000100000",16)}, {"idx": {"x": 9, "y": 4}, "name": "spm_wide", "mgr_port": {"narrow": False, "wide": False}, "sbr_port": {"narrow": False, "wide": True}, "start_addr": int("0x000071000000",16), "end_addr": int("0x000071100000",16)}, {"idx": {"x": 9, "y": 3}, "name": "spm_narrow", "mgr_port": {"narrow": False, "wide": False}, "sbr_port": {"narrow": True, "wide": False}, "start_addr": int("0x000070000000",16), "end_addr": int("0x000070080000",16)}, - {"idx": {"x": 9, "y": 2}, "name": "idma", "mgr_port": {"narrow": False, "wide": True}, "sbr_port": {"narrow": True, "wide": False}, "start_addr": int("0x000011000000",16), "end_addr": int("0x000011010000",16)}, + {"idx": {"x": 9, "y": 2}, "name": "idma", "mgr_port": {"narrow": True, "wide": True}, "sbr_port": {"narrow": True, "wide": True}, "start_addr": int("0x000011000000",16), "end_addr": int("0x000011010000",16)}, {"idx": {"x": 9, "y": 1}, "name": "zero_mem", "mgr_port": {"narrow": False, "wide": False}, "sbr_port": {"narrow": False, "wide": True}, "start_addr": int("0x008000000000",16), "end_addr": int("0x008200000000",16)} ] diff --git a/util/test_random_compute_tile.sh b/util/test_random_compute_tile.sh index 1ffad4ad..0f82e63b 100755 --- a/util/test_random_compute_tile.sh +++ b/util/test_random_compute_tile.sh @@ -10,7 +10,7 @@ job_type="compute_tile_array" print_jobs="true" rw_type=("read" "write") -traffic_type=("hbm" "hbm_rand" "random" "cluster_rand" "onehop") +traffic_type=("memory" "memory_rand" "random" "mgr_rand" "onehop") repeat_num_test=(10 50 50 50 10) # match with traffic_type #repeat_num_test=(1 1 1 1 1) # match with traffic_type out_dir="test/jobs" @@ -28,6 +28,9 @@ LEN_WD_BURST_RANGE=$((${wide_burst_length_range[0]}-${wide_burst_length_range[1] RANDOM=$$ make clean-test-random +mkdir -p ${out_dir} +# One time build of VCS simulation binary +make bin/floo_noc_batch.vcs TB_DUT=$tb_dut 2>&1 | tee ${out_dir}/vcs_build_bin.log # Running the simulation for rw in ${rw_type[@]} @@ -65,7 +68,8 @@ do TRAFFIC_NR_BURST_NUM=$num_narrow_bursts TRAFFIC_NR_BURST_LEN=$narrow_burst_length \ TRAFFIC_WD_BURST_NUM=$num_wide_bursts TRAFFIC_WD_BURST_LEN=$wide_burst_length 2>&1 | tee $job_dir/job_stats.log # Run the simulation - make run-vcs-batch TB_DUT=$tb_dut JOB_NAME=$job_type JOB_DIR=$job_dir DMA_TESTNODE=TRUE 2>&1 | tee ${out_dir}/${job_name}_sim_stats.log + #make run-vcs-batch TB_DUT=$tb_dut JOB_NAME=$job_type JOB_DIR=$job_dir DMA_TESTNODE=TRUE 2>&1 | tee ${out_dir}/${job_name}_sim_stats.log + make run-vcs-batch JOB_NAME=$job_type JOB_DIR=$job_dir 2>&1 | tee ${out_dir}/${job_name}_sim_stats.log done done echo ""