Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

synthesis timing improvements #77

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 61 additions & 51 deletions mtkcpu/cpu/cpu.py
Original file line number Diff line number Diff line change
@@ -20,7 +20,7 @@
from mtkcpu.units.shifter import ShifterUnit, match_shifter_unit
from mtkcpu.units.upper import match_auipc, match_lui
from mtkcpu.utils.common import matcher
from mtkcpu.cpu.isa import Funct3, InstrType, Funct7
from mtkcpu.cpu.isa import Funct3, InstrType, Funct7, Funct12
from mtkcpu.units.debug.top import DebugUnit
from mtkcpu.cpu.priv_isa import IrqCause, TrapCause, PrivModeBits
from mtkcpu.units.debug.cpu_dm_if import CpuRunningState, CpuRunningStateExternalInterface
@@ -239,6 +239,9 @@ def elaborate(self, platform):
# at most one active_unit at any time
active_unit = ActiveUnit()

# In order to improve synth timing.
load_addr_precalculated = Signal(32)

# Register file. Contains two read ports (for rs1, rs2) and one write port.
regs = self.regs
reg_read_port1 = self.reg_read_port1 = m.submodules.reg_read_port1 = regs.read_port()
@@ -349,12 +352,12 @@ def prev(sig: Signal) -> Signal:
logic.src1.eq(rs1val),
logic.src2.eq(Mux(opcode == InstrType.OP_IMM, imm, rs2val)),
]
with m.Elif(active_unit.adder):
with m.If(active_unit.adder):
comb += [
adder.src1.eq(rs1val),
adder.src2.eq(Mux(opcode == InstrType.OP_IMM, imm, rs2val)),
]
with m.Elif(active_unit.shifter):
with m.If(active_unit.shifter):
comb += [
shifter.funct3.eq(funct3),
shifter.funct7.eq(funct7),
@@ -365,34 +368,31 @@ def prev(sig: Signal) -> Signal:
)
),
]
with m.Elif(active_unit.mem_unit):
with m.If(active_unit.mem_unit):
comb += [
mem_unit.en.eq(1),
mem_unit.funct3.eq(funct3),
mem_unit.src1.eq(rs1val),
mem_unit.src1.eq(load_addr_precalculated),
mem_unit.src2.eq(rs2val),
mem_unit.store.eq(opcode == InstrType.STORE),
mem_unit.offset.eq(
Mux(opcode == InstrType.LOAD, imm, Cat(rd, imm[5:12]))
),
]
with m.Elif(active_unit.compare):
with m.If(active_unit.compare):
comb += [
compare.funct3.eq(funct3),
# Compare Unit uses Adder for carry and overflow flags.
adder.src1.eq(rs1val),
adder.src2.eq(Mux(opcode == InstrType.OP_IMM, imm, rs2val)),
# adder.sub set somewhere below
]
with m.Elif(active_unit.branch):
with m.If(active_unit.branch):
comb += [
compare.funct3.eq(funct3),
# Compare Unit uses Adder for carry and overflow flags.
adder.src1.eq(rs1val),
adder.src2.eq(rs2val),
# adder.sub set somewhere below
]
with m.Elif(active_unit.csr):
with m.If(active_unit.csr):
comb += [
csr_unit.func3.eq(funct3),
csr_unit.csr_idx.eq(csr_idx),
@@ -511,6 +511,10 @@ def fetch_with_new_pc(pc : Signal):
instr.eq(ibus.read_data),
]
m.next = "DECODE"

# HACK, FIXME: below is only valid for load/store, to pre-calculcate the address
comb += reg_read_port1.addr.eq(ibus.read_data[15:20])

with m.State("DECODE"):
m.next = "EXECUTE"
# here, we have registers already fetched into rs1val, rs2val.
@@ -520,116 +524,119 @@ def fetch_with_new_pc(pc : Signal):
sync += [
active_unit.logic.eq(1),
]
with m.Elif(match_adder_unit(opcode, funct3, funct7)):
with m.If(match_adder_unit(opcode, funct3, funct7)):
sync += [
active_unit.adder.eq(1),
adder.sub.eq(
(opcode == InstrType.ALU) & (funct7 == Funct7.SUB)
),
]
with m.Elif(match_shifter_unit(opcode, funct3, funct7)):
with m.If(match_shifter_unit(opcode, funct3, funct7)):
sync += [
active_unit.shifter.eq(1),
]
with m.Elif(match_loadstore_unit(opcode, funct3, funct7)):
]
with m.If(match_loadstore_unit(opcode, funct3, funct7)):
sync += [
active_unit.mem_unit.eq(1),
]
with m.Elif(match_compare_unit(opcode, funct3, funct7)):


# address precalculation, to avoid consecutive 'add' and interconnect comb. logic in same cycle.
offset = Signal(signed(12), name="LD_ST_offset")
comb += offset.eq(Mux(opcode == InstrType.LOAD, imm, Cat(rd, imm[5:12])))
sync += load_addr_precalculated.eq(offset + reg_read_port1.data),


with m.If(match_compare_unit(opcode, funct3, funct7)):
sync += [
active_unit.compare.eq(1),
adder.sub.eq(1),
]
with m.Elif(match_lui(opcode, funct3, funct7)):
with m.If(match_lui(opcode, funct3, funct7)):
sync += [
active_unit.lui.eq(1),
]
comb += [
reg_read_port1.addr.eq(rd),
# rd will be available in next cycle in rs1val
]
with m.Elif(match_auipc(opcode, funct3, funct7)):
with m.If(match_auipc(opcode, funct3, funct7)):
sync += [
active_unit.auipc.eq(1),
]
with m.Elif(match_jal(opcode, funct3, funct7)):
with m.If(match_jal(opcode, funct3, funct7)):
sync += [
active_unit.jal.eq(1),
]
with m.Elif(match_jalr(opcode, funct3, funct7)):
with m.If(match_jalr(opcode, funct3, funct7)):
sync += [
active_unit.jalr.eq(1),
]
with m.Elif(match_branch(opcode, funct3, funct7)):
with m.If(match_branch(opcode, funct3, funct7)):
sync += [
active_unit.branch.eq(1),
adder.sub.eq(1),
]
with m.Elif(match_csr(opcode, funct3, funct7)):
with m.If(match_csr(opcode, funct3, funct7)):
sync += [
active_unit.csr.eq(1)
]
with m.Elif(match_mret(opcode, funct3, funct7)):
with m.If(match_mret(opcode, funct3, funct7)):
sync += [
active_unit.mret.eq(1)
]
with m.Elif(match_sfence_vma(opcode, funct3, funct7)):
with m.If(match_sfence_vma(opcode, funct3, funct7)):
pass # sfence.vma
with m.Elif(opcode == 0b0001111):
with m.If(opcode == 0b0001111):
pass # fence - do nothing, as we are a simple implementation.
with m.Elif(opcode == 0b1110011):
with m.If(imm & 0b1):
# ebreak
with m.If(halt_on_ebreak):
# enter Debug Mode.
m.next = "HALTED"
sync += dcsr.as_view().cause.eq(DCSR_DM_Entry_Cause.EBREAK)
with m.Else():
# EBREAK description from Privileged specs:
# It generates a breakpoint exception and performs no other operation.
trap(TrapCause.BREAKPOINT)
with m.If((opcode == InstrType.SYSTEM) & (imm == Funct12.EBREAK)):
with m.If(halt_on_ebreak):
# enter Debug Mode.
m.next = "HALTED"
sync += dcsr.as_view().cause.eq(DCSR_DM_Entry_Cause.EBREAK)
with m.Else():
# ecall
with m.If(exception_unit.current_priv_mode == PrivModeBits.MACHINE):
trap(TrapCause.ECALL_FROM_M)
with m.Else():
trap(TrapCause.ECALL_FROM_U)
with m.Else():
trap(TrapCause.ILLEGAL_INSTRUCTION)
# EBREAK description from Privileged specs:
# It generates a breakpoint exception and performs no other operation.
trap(TrapCause.BREAKPOINT)
with m.If((opcode == InstrType.SYSTEM) & (imm == Funct12.ECALL)):
with m.If(exception_unit.current_priv_mode == PrivModeBits.MACHINE):
trap(TrapCause.ECALL_FROM_M)
with m.Else():
trap(TrapCause.ECALL_FROM_U)
with m.State("EXECUTE"):
with m.If(active_unit.logic):
sync += [
rdval.eq(logic.res),
]
with m.Elif(active_unit.adder):
with m.If(active_unit.adder):
sync += [
rdval.eq(adder.res),
]
with m.Elif(active_unit.shifter):
with m.If(active_unit.shifter):
sync += [
rdval.eq(shifter.res),
]
with m.Elif(active_unit.mem_unit):
with m.If(active_unit.mem_unit):
sync += [
rdval.eq(mem_unit.res),
]
with m.Elif(active_unit.compare):
with m.If(active_unit.compare):
sync += [
rdval.eq(compare.condition_met),
]
with m.Elif(active_unit.lui):
with m.If(active_unit.lui):
sync += [
rdval.eq(Cat(Const(0, 12), uimm)),
]
with m.Elif(active_unit.auipc):
with m.If(active_unit.auipc):
sync += [
rdval.eq(pc + Cat(Const(0, 12), uimm)),
]
with m.Elif(active_unit.jal | active_unit.jalr):
with m.If(active_unit.jal | active_unit.jalr):
sync += [
rdval.eq(pc + 4),
]
with m.Elif(active_unit.csr):
with m.If(active_unit.csr):
sync += [
rdval.eq(csr_unit.rd_val)
]
@@ -661,6 +668,9 @@ def fetch_with_new_pc(pc : Signal):
m.next = "WRITEBACK"
sync += active_unit.eq(0)

with m.If(active_unit == 0):
trap(TrapCause.ILLEGAL_INSTRUCTION)

jal_offset = Signal(signed(21))
comb += jal_offset.eq(
Cat(
2 changes: 1 addition & 1 deletion mtkcpu/units/debug/impl_config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
PROGBUFSIZE = 3
DATASIZE = 2
PROGBUF_MMIO_ADDR = 0xde88
PROGBUF_MMIO_ADDR = 0x1000_0000

TOOLCHAIN = "riscv-none-elf"
GCC_MARCH = "rv32i_zicsr"
11 changes: 8 additions & 3 deletions mtkcpu/units/loadstore.py
Original file line number Diff line number Diff line change
@@ -160,7 +160,13 @@ def elaborate(self, platform):
num_words = addr_scheme.num_words
start_addr = addr_scheme.first_valid_addr_incl
max_legal_addr = start_addr + self.word_size * (num_words - 1)
req_addr = self.bus.adr
req_addr : Signal = self.bus.adr

# TODO: the longest critical path here.
# can't get rid of it yet, as removing comparators fails test_exception.py
#
# with m.If(req_addr.word_select(7, 4) == (start_addr >> 28)):
#
with m.If((req_addr >= start_addr) & (req_addr <= max_legal_addr)):
m.d.comb += [
slv_bus.connect(self.bus, exclude=["adr"]),
@@ -534,7 +540,6 @@ def __init__(self, mem_port : LoadStoreInterface):

# 'src2' is used only for 'store' instructions.
self.src2 = Signal(32, name="LD_ST_src2")
self.offset = Signal(signed(12), name="LD_ST_offset")

self.res = Signal(32, name="LD_ST_res")
self.en = Signal(name="LD_ST_en") # TODO implement 'ready/valid' interface
@@ -560,7 +565,7 @@ def elaborate(self, platform):
addr_lsb = Signal(2)

comb += [
addr.eq(self.offset + self.src1),
addr.eq(self.src1),
addr_lsb.eq(addr[:2]),
]

2 changes: 2 additions & 0 deletions mtkcpu/utils/tests/utils.py
Original file line number Diff line number Diff line change
@@ -210,6 +210,8 @@ def reg_test(
sim.add_sync_process(capture_write_transactions(cpu=cpu, dict_reference=result_mem))
# sim.add_sync_process(print_mem_transactions(cpu=cpu))
sim.add_sync_process(check_addr_translation_errors(cpu=cpu))

sim.add_sync_process(monitor_pc_and_main_fsm(cpu=cpu, wait_for_first_haltreq=False, log_fn=print))

sim.add_sync_process(
get_sim_register_test(