Skip to content

Commit

Permalink
rtlsim multibanks
Browse files Browse the repository at this point in the history
  • Loading branch information
tinebp committed Dec 17, 2024
1 parent bae24e5 commit a98d2e2
Show file tree
Hide file tree
Showing 25 changed files with 883 additions and 396 deletions.
6 changes: 3 additions & 3 deletions hw/rtl/Vortex_axi.sv
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ module Vortex_axi import VX_gpu_pkg::*; #(

// Adjust memory data width to match AXI interface
for (genvar i = 0; i < `VX_MEM_PORTS; i++) begin : g_mem_adapter
VX_mem_adapter #(
VX_mem_data_adapter #(
.SRC_DATA_WIDTH (`VX_MEM_DATA_WIDTH),
.DST_DATA_WIDTH (AXI_DATA_WIDTH),
.SRC_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH),
Expand All @@ -153,7 +153,7 @@ module Vortex_axi import VX_gpu_pkg::*; #(
.DST_TAG_WIDTH (VX_MEM_TAG_A_WIDTH),
.REQ_OUT_BUF (0),
.RSP_OUT_BUF (0)
) mem_adapter (
) mem_data_adapter (
.clk (clk),
.reset (reset),

Expand Down Expand Up @@ -192,7 +192,7 @@ module Vortex_axi import VX_gpu_pkg::*; #(
.TAG_WIDTH_IN (VX_MEM_TAG_A_WIDTH),
.TAG_WIDTH_OUT (AXI_TID_WIDTH),
.NUM_PORTS_IN (`VX_MEM_PORTS),
.NUM_PORTS_OUT (AXI_NUM_BANKS),
.NUM_BANKS_OUT (AXI_NUM_BANKS),
.INTERLEAVE (0),
.REQ_OUT_BUF ((`VX_MEM_PORTS > 1) ? 2 : 0),
.RSP_OUT_BUF ((`VX_MEM_PORTS > 1 || AXI_NUM_BANKS > 1) ? 2 : 0)
Expand Down
70 changes: 36 additions & 34 deletions hw/rtl/afu/opae/vortex_afu.sv
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
wire [`VX_MEM_PORTS-1:0] vx_mem_req_ready_qual;

for (genvar i = 0; i < `VX_MEM_PORTS; ++i) begin : g_vx_mem_adapter
VX_mem_adapter #(
VX_mem_data_adapter #(
.SRC_DATA_WIDTH (`VX_MEM_DATA_WIDTH),
.DST_DATA_WIDTH (LMEM_DATA_WIDTH),
.SRC_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH),
Expand All @@ -526,7 +526,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.DST_TAG_WIDTH (CCI_VX_TAG_WIDTH),
.REQ_OUT_BUF (0),
.RSP_OUT_BUF (2)
) vx_mem_adapter (
) vx_mem_data_adapter (
.clk (clk),
.reset (reset),

Expand Down Expand Up @@ -567,7 +567,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.TAG_WIDTH (CCI_VX_TAG_WIDTH)
) cci_vx_mem_arb_in_if[2]();

VX_mem_adapter #(
VX_mem_data_adapter #(
.SRC_DATA_WIDTH (CCI_DATA_WIDTH),
.DST_DATA_WIDTH (LMEM_DATA_WIDTH),
.SRC_ADDR_WIDTH (CCI_ADDR_WIDTH),
Expand All @@ -576,7 +576,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.DST_TAG_WIDTH (CCI_VX_TAG_WIDTH),
.REQ_OUT_BUF (0),
.RSP_OUT_BUF (0)
) cci_mem_adapter (
) cci_mem_data_adapter (
.clk (clk),
.reset (reset),

Expand Down Expand Up @@ -632,6 +632,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.bus_in_if (cci_vx_mem_arb_in_if),
.bus_out_if (cci_vx_mem_arb_out_if)
);
`UNUSED_VAR (cci_vx_mem_arb_out_if[0].req_data.flags)

// final merged memory interface
wire mem_req_valid [`VX_MEM_PORTS];
Expand All @@ -647,35 +648,36 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
wire [AVS_TAG_WIDTH-1:0] mem_rsp_tag [`VX_MEM_PORTS];
wire mem_rsp_ready [`VX_MEM_PORTS];

// assign port0 to CCI/VX arbiter
assign mem_req_valid[0] = cci_vx_mem_arb_out_if[0].req_valid;
assign mem_req_rw[0] = cci_vx_mem_arb_out_if[0].req_data.rw;
assign mem_req_addr[0] = cci_vx_mem_arb_out_if[0].req_data.addr;
assign mem_req_byteen[0]= cci_vx_mem_arb_out_if[0].req_data.byteen;
assign mem_req_data[0] = cci_vx_mem_arb_out_if[0].req_data.data;
assign mem_req_tag[0] = cci_vx_mem_arb_out_if[0].req_data.tag;
assign cci_vx_mem_arb_out_if[0].req_ready = mem_req_ready[0];

assign cci_vx_mem_arb_out_if[0].rsp_valid = mem_rsp_valid[0];
assign cci_vx_mem_arb_out_if[0].rsp_data.data = mem_rsp_data[0];
assign cci_vx_mem_arb_out_if[0].rsp_data.tag = mem_rsp_tag[0];
assign mem_rsp_ready[0] = cci_vx_mem_arb_out_if[0].rsp_ready;
`UNUSED_VAR (cci_vx_mem_arb_out_if[0].req_data.flags)

// assign other ports to VX memory bus
for (genvar i = 1; i < `VX_MEM_PORTS; ++i) begin : g_mem_bus_if
assign mem_req_valid[i] = vx_mem_bus_if[i].req_valid;
assign mem_req_rw[i] = vx_mem_bus_if[i].req_data.rw;
assign mem_req_addr[i] = vx_mem_bus_if[i].req_data.addr;
assign mem_req_byteen[i]= vx_mem_bus_if[i].req_data.byteen;
assign mem_req_data[i] = vx_mem_bus_if[i].req_data.data;
assign mem_req_tag[i] = AVS_TAG_WIDTH'(vx_mem_bus_if[i].req_data.tag);
assign vx_mem_bus_if[i].req_ready = mem_req_ready[i];

assign vx_mem_bus_if[i].rsp_valid = mem_rsp_valid[i];
assign vx_mem_bus_if[i].rsp_data.data = mem_rsp_data[i];
assign vx_mem_bus_if[i].rsp_data.tag = CCI_VX_TAG_WIDTH'(mem_rsp_tag[i]);
assign mem_rsp_ready[i] = vx_mem_bus_if[i].rsp_ready;
for (genvar i = 0; i < `VX_MEM_PORTS; ++i) begin : g_mem_bus_if
if (i == 0) begin : g_i0
// assign port0 to CCI/VX arbiter
assign mem_req_valid[i] = cci_vx_mem_arb_out_if[i].req_valid;
assign mem_req_rw[i] = cci_vx_mem_arb_out_if[i].req_data.rw;
assign mem_req_addr[i] = cci_vx_mem_arb_out_if[i].req_data.addr;
assign mem_req_byteen[i]= cci_vx_mem_arb_out_if[i].req_data.byteen;
assign mem_req_data[i] = cci_vx_mem_arb_out_if[i].req_data.data;
assign mem_req_tag[i] = cci_vx_mem_arb_out_if[i].req_data.tag;
assign cci_vx_mem_arb_out_if[i].req_ready = mem_req_ready[i];

assign cci_vx_mem_arb_out_if[i].rsp_valid = mem_rsp_valid[i];
assign cci_vx_mem_arb_out_if[i].rsp_data.data = mem_rsp_data[i];
assign cci_vx_mem_arb_out_if[i].rsp_data.tag = mem_rsp_tag[i];
assign mem_rsp_ready[i] = cci_vx_mem_arb_out_if[i].rsp_ready;
end else begin : g_i
// assign other ports to VX memory bus
assign mem_req_valid[i] = vx_mem_bus_if[i].req_valid;
assign mem_req_rw[i] = vx_mem_bus_if[i].req_data.rw;
assign mem_req_addr[i] = vx_mem_bus_if[i].req_data.addr;
assign mem_req_byteen[i]= vx_mem_bus_if[i].req_data.byteen;
assign mem_req_data[i] = vx_mem_bus_if[i].req_data.data;
assign mem_req_tag[i] = AVS_TAG_WIDTH'(vx_mem_bus_if[i].req_data.tag);
assign vx_mem_bus_if[i].req_ready = mem_req_ready[i];

assign vx_mem_bus_if[i].rsp_valid = mem_rsp_valid[i];
assign vx_mem_bus_if[i].rsp_data.data = mem_rsp_data[i];
assign vx_mem_bus_if[i].rsp_data.tag = CCI_VX_TAG_WIDTH'(mem_rsp_tag[i]);
assign mem_rsp_ready[i] = vx_mem_bus_if[i].rsp_ready;
end
end

// convert merged memory interface to AVS
Expand All @@ -685,7 +687,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.ADDR_WIDTH_OUT(LMEM_ADDR_WIDTH),
.BURST_WIDTH (LMEM_BURST_CTRW),
.NUM_PORTS_IN (`VX_MEM_PORTS),
.NUM_PORTS_OUT (NUM_LOCAL_MEM_BANKS),
.NUM_BANKS_OUT (NUM_LOCAL_MEM_BANKS),
.TAG_WIDTH (AVS_TAG_WIDTH),
.RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE),
.INTERLEAVE (`PLATFORM_MEMORY_INTERLEAVE),
Expand Down
135 changes: 53 additions & 82 deletions hw/rtl/cache/VX_cache_data.sv
Original file line number Diff line number Diff line change
Expand Up @@ -55,47 +55,44 @@ module VX_cache_data #(
`UNUSED_PARAM (WORD_SIZE)
`UNUSED_VAR (stall)

wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_mask;
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin : g_write_mask
wire word_en = (`CS_WORDS_PER_LINE == 1) || (word_idx == i);
assign write_mask[i] = write_byteen & {WORD_SIZE{word_en}};
end

if (DIRTY_BYTES != 0) begin : g_dirty_bytes

wire [NUM_WAYS-1:0][LINE_SIZE-1:0] byteen_rdata;
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] byteen_wdata;
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] byteen_wren;

for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_byteen_wdata
wire evict = fill || flush;
wire evict_way_en = (NUM_WAYS == 1) || (evict_way == i);
wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_mask;
for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_write_mask
wire word_en = (`CS_WORDS_PER_LINE == 1) || (word_idx == j);
assign write_mask[j] = write_byteen & {WORD_SIZE{word_en}};
end
assign byteen_wdata[i] = {LINE_SIZE{write}}; // only asserted on writes
assign byteen_wren[i] = {LINE_SIZE{init}}
| {LINE_SIZE{evict && evict_way_en}}
| ({LINE_SIZE{write && tag_matches[i]}} & write_mask);
end

wire byteen_read = fill || flush;
wire byteen_write = init || write || fill || flush;
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_byteen_store
wire [LINE_SIZE-1:0] byteen_wdata = {LINE_SIZE{write}}; // only asserted on writes
wire [LINE_SIZE-1:0] byteen_wren = {LINE_SIZE{init || fill || flush}} | write_mask;
wire byteen_write = ((fill || flush) && ((NUM_WAYS == 1) || (evict_way == i)))
|| (write && tag_matches[i])
|| init;
wire byteen_read = fill || flush;

VX_sp_ram #(
.DATAW (LINE_SIZE * NUM_WAYS),
.WRENW (LINE_SIZE * NUM_WAYS),
.SIZE (`CS_LINES_PER_BANK),
.OUT_REG (1),
.RDW_MODE ("R")
) byteen_store (
.clk (clk),
.reset (reset),
.read (byteen_read),
.write (byteen_write),
.wren (byteen_wren),
.addr (line_idx),
.wdata (byteen_wdata),
.rdata (byteen_rdata)
);
VX_sp_ram #(
.DATAW (LINE_SIZE),
.WRENW (LINE_SIZE),
.SIZE (`CS_LINES_PER_BANK),
.OUT_REG (1),
.RDW_MODE ("R")
) byteen_store (
.clk (clk),
.reset (reset),
.read (byteen_read),
.write (byteen_write),
.wren (byteen_wren),
.addr (line_idx),
.wdata (byteen_wdata),
.rdata (byteen_rdata[i])
);
end

assign evict_byteen = byteen_rdata[way_idx_r];

end else begin : g_no_dirty_bytes
`UNUSED_VAR (init)
`UNUSED_VAR (flush)
Expand All @@ -104,32 +101,32 @@ module VX_cache_data #(

wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_rdata;

if (WRITE_ENABLE) begin : g_data_store
// create a single write-enable block ram to reduce area overhead
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_wdata;
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] line_wren;
wire line_write;
wire line_read;

for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_wdata
wire fill_way_en = (NUM_WAYS == 1) || (evict_way == i);
wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_mask;
for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_write_mask
wire word_en = (`CS_WORDS_PER_LINE == 1) || (word_idx == j);
assign write_mask[j] = write_byteen & {WORD_SIZE{word_en}};
end
assign line_wdata[i] = fill ? fill_data : {`CS_WORDS_PER_LINE{write_word}};
assign line_wren[i] = {LINE_SIZE{fill && fill_way_en}}
| ({LINE_SIZE{write && tag_matches[i]}} & write_mask);
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_data_store

localparam WRENW = WRITE_ENABLE ? LINE_SIZE : 1;

wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_wdata;
wire [WRENW-1:0] line_wren;

if (WRITE_ENABLE) begin : g_wren
assign line_wdata = fill ? fill_data : {`CS_WORDS_PER_LINE{write_word}};
assign line_wren = {LINE_SIZE{fill}} | write_mask;
end else begin : g_no_wren
`UNUSED_VAR (write_word)
`UNUSED_VAR (write_mask)
assign line_wdata = fill_data;
assign line_wren = 1'b1;
end

assign line_read = read || ((fill || flush) && WRITEBACK);
assign line_write = fill || (write && WRITE_ENABLE);
wire line_write = (fill && ((NUM_WAYS == 1) || (evict_way == i)))
|| (write && tag_matches[i] && WRITE_ENABLE);

wire line_read = read || ((fill || flush) && WRITEBACK);

VX_sp_ram #(
.DATAW (NUM_WAYS * `CS_LINE_WIDTH),
.DATAW (`CS_LINE_WIDTH),
.SIZE (`CS_LINES_PER_BANK),
.WRENW (NUM_WAYS * LINE_SIZE),
.WRENW (WRENW),
.OUT_REG (1),
.RDW_MODE ("R")
) data_store (
Expand All @@ -140,34 +137,8 @@ module VX_cache_data #(
.wren (line_wren),
.addr (line_idx),
.wdata (line_wdata),
.rdata (line_rdata)
.rdata (line_rdata[i])
);
end else begin : g_data_store
`UNUSED_VAR (write)
`UNUSED_VAR (write_byteen)
`UNUSED_VAR (write_word)
`UNUSED_VAR (word_idx)
`UNUSED_VAR (tag_matches)

// we don't merge the ways into a single block ram due to WREN overhead
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_ways
wire fill_way_en = (NUM_WAYS == 1) || (evict_way == i);
VX_sp_ram #(
.DATAW (`CS_LINE_WIDTH),
.SIZE (`CS_LINES_PER_BANK),
.OUT_REG (1),
.RDW_MODE ("R")
) data_store (
.clk (clk),
.reset (reset),
.read (read),
.write (fill && fill_way_en),
.wren (1'b1),
.addr (line_idx),
.wdata (fill_data),
.rdata (line_rdata[i])
);
end
end

assign read_data = line_rdata[way_idx_r];
Expand Down
3 changes: 2 additions & 1 deletion hw/rtl/cache/VX_cache_mshr.sv
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,8 @@ module VX_cache_mshr #(
VX_dp_ram #(
.DATAW (DATA_WIDTH),
.SIZE (MSHR_SIZE),
.RDW_MODE ("R")
.RDW_MODE ("R"),
.RADDR_REG (1)
) mshr_store (
.clk (clk),
.reset (reset),
Expand Down
6 changes: 4 additions & 2 deletions hw/rtl/cache/VX_cache_repl.sv
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,8 @@ module VX_cache_repl #(
.DATAW (LRU_WIDTH),
.SIZE (`CS_LINES_PER_BANK),
.WRENW (LRU_WIDTH),
.RDW_MODE ("R")
.RDW_MODE ("R"),
.RADDR_REG (1)
) plru_store (
.clk (clk),
.reset (reset),
Expand Down Expand Up @@ -158,7 +159,8 @@ module VX_cache_repl #(
VX_sp_ram #(
.DATAW (WAY_SEL_WIDTH),
.SIZE (`CS_LINES_PER_BANK),
.RDW_MODE ("R")
.RDW_MODE ("R"),
.RADDR_REG (1)
) ctr_store (
.clk (clk),
.reset (reset),
Expand Down
3 changes: 2 additions & 1 deletion hw/rtl/cache/VX_cache_tags.sv
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@ module VX_cache_tags #(
VX_sp_ram #(
.DATAW (TAG_WIDTH),
.SIZE (`CS_LINES_PER_BANK),
.RDW_MODE ("W")
.RDW_MODE ("W"),
.RADDR_REG (1)
) tag_store (
.clk (clk),
.reset (reset),
Expand Down
14 changes: 9 additions & 5 deletions hw/rtl/libs/VX_async_ram_patch.sv
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ module VX_async_ram_patch #(
parameter WRENW = 1,
parameter DUAL_PORT = 0,
parameter FORCE_BRAM = 0,
parameter RADDR_REG = 0, // read address registered hint
parameter WRITE_FIRST = 0,
parameter INIT_ENABLE = 0,
parameter INIT_FILE = "",
Expand Down Expand Up @@ -154,7 +155,7 @@ module VX_async_ram_patch #(
.out ({raddr_s, read_s, is_raddr_reg})
);

wire [DATAW-1:0] rdata_s, rdata_a;
wire [DATAW-1:0] rdata_s;

if (1) begin : g_sync_ram
if (WRENW != 1) begin : g_wren
Expand Down Expand Up @@ -204,8 +205,12 @@ module VX_async_ram_patch #(
end
end

if (1) begin : g_async_ram
if (DUAL_PORT != 0) begin : g_dp
if (RADDR_REG) begin : g_raddr_reg
`UNUSED_VAR (is_raddr_reg)
assign rdata = rdata_s;
end else begin : g_async_ram
wire [DATAW-1:0] rdata_a;
if (DUAL_PORT) begin : g_dp
if (WRENW != 1) begin : g_wren
if (WRITE_FIRST) begin : g_write_first
`define RAM_ATTRIBUTES `RW_RAM_CHECK
Expand Down Expand Up @@ -250,9 +255,8 @@ module VX_async_ram_patch #(
end
end
end
assign rdata = is_raddr_reg ? rdata_s : rdata_a;
end

assign rdata = is_raddr_reg ? rdata_s : rdata_a;

endmodule
`TRACING_ON
Loading

0 comments on commit a98d2e2

Please sign in to comment.