Skip to content

Commit

Permalink
Merge pull request #1495 from ucb-bar/shuttle
Browse files Browse the repository at this point in the history
Add dual-issue in-order "shuttle" core
  • Loading branch information
jerryz123 authored Jun 13, 2023
2 parents fa6d772 + 309bfde commit d287fed
Show file tree
Hide file tree
Showing 12 changed files with 182 additions and 28 deletions.
2 changes: 1 addition & 1 deletion .github/scripts/check-commit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ search () {
done
}

submodules=("cva6" "boom" "ibex" "gemmini" "hwacha" "icenet" "nvdla" "rocket-chip" "sha3" "sifive-blocks" "sifive-cache" "testchipip" "riscv-sodor" "mempress" "bar-fetchers")
submodules=("cva6" "boom" "ibex" "gemmini" "hwacha" "icenet" "nvdla" "rocket-chip" "sha3" "sifive-blocks" "sifive-cache" "testchipip" "riscv-sodor" "mempress" "bar-fetchers" "shuttle")
dir="generators"
branches=("master" "main" "dev")
search
Expand Down
3 changes: 2 additions & 1 deletion .github/scripts/defaults.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ REMOTE_COURSIER_CACHE=$REMOTE_WORK_DIR/.coursier-cache

# key value store to get the build groups
declare -A grouping
grouping["group-cores"]="chipyard-cva6 chipyard-ibex chipyard-rocket chipyard-hetero chipyard-boom chipyard-sodor chipyard-digitaltop chipyard-multiclock-rocket chipyard-nomem-scratchpad chipyard-spike chipyard-clone chipyard-prefetchers"
grouping["group-cores"]="chipyard-cva6 chipyard-ibex chipyard-rocket chipyard-hetero chipyard-boom chipyard-sodor chipyard-digitaltop chipyard-multiclock-rocket chipyard-nomem-scratchpad chipyard-spike chipyard-clone chipyard-prefetchers chipyard-shuttle"
grouping["group-peripherals"]="chipyard-dmirocket chipyard-dmiboom chipyard-spiflashwrite chipyard-mmios chipyard-nocores chipyard-manyperipherals chipyard-chiplike"
grouping["group-accels"]="chipyard-mempress chipyard-sha3 chipyard-hwacha chipyard-gemmini chipyard-manymmioaccels chipyard-nvdla"
grouping["group-constellation"]="chipyard-constellation"
Expand Down Expand Up @@ -62,6 +62,7 @@ mapping["chipyard-nocores"]=" CONFIG=NoCoresConfig verilog"
mapping["tracegen"]=" CONFIG=NonBlockingTraceGenL2Config"
mapping["tracegen-boom"]=" CONFIG=BoomTraceGenConfig"
mapping["chipyard-sodor"]=" CONFIG=Sodor5StageConfig"
mapping["chipyard-shuttle"]=" CONFIG=ShuttleConfig"
mapping["chipyard-multiclock-rocket"]=" CONFIG=MulticlockRocketConfig"
mapping["chipyard-nomem-scratchpad"]=" CONFIG=MMIOScratchpadOnlyRocketConfig"
mapping["chipyard-constellation"]=" CONFIG=SharedNoCConfig"
Expand Down
3 changes: 3 additions & 0 deletions .github/scripts/run-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ case $1 in
chipyard-boom)
run_bmark ${mapping[$1]}
;;
chipyard-shuttle)
run_bmark ${mapping[$1]}
;;
chipyard-dmiboom)
$LOCAL_CHIPYARD_DIR/scripts/generate-ckpt.sh -b $RISCV/riscv64-unknown-elf/share/riscv-tests/benchmarks/dhrystone.riscv -i 10000
make -C $LOCAL_SIM_DIR $DISABLE_SIM_PREREQ ${mapping[$1]} run-binary LOADARCH=$PWD/dhrystone.riscv.0x80000000.10000.loadarch
Expand Down
24 changes: 24 additions & 0 deletions .github/workflows/chipyard-run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,29 @@ jobs:
group-key: "group-cores"
project-key: "chipyard-boom"

chipyard-shuttle-run-tests:
name: chipyard-shuttle-run-tests
needs: prepare-chipyard-cores
runs-on: self-hosted
steps:
- name: Delete old checkout
run: |
ls -alh .
rm -rf ${{ github.workspace }}/* || true
rm -rf ${{ github.workspace }}/.* || true
ls -alh .
- name: Checkout
uses: actions/checkout@v3
- name: Git workaround
uses: ./.github/actions/git-workaround
- name: Create conda env
uses: ./.github/actions/create-conda-env
- name: Run tests
uses: ./.github/actions/run-tests
with:
group-key: "group-cores"
project-key: "chipyard-shuttle"

chipyard-cva6-run-tests:
name: chipyard-cva6-run-tests
needs: prepare-chipyard-cores
Expand Down Expand Up @@ -1049,6 +1072,7 @@ jobs:
chipyard-rocket-run-tests,
chipyard-hetero-run-tests,
chipyard-boom-run-tests,
chipyard-shuttle-run-tests,
chipyard-cva6-run-tests,
chipyard-ibex-run-tests,
chipyard-sodor-run-tests,
Expand Down
5 changes: 4 additions & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@
[submodule "software/embench/embench-iot"]
path = software/embench/embench-iot
url = https://github.com/embench/embench-iot.git
[submodule "shuttle"]
path = generators/shuttle
url = https://github.com/ucb-bar/shuttle.git
[submodule "generators/bar-fetchers"]
path = generators/bar-fetchers
url = https://github.com/ucb-bar/bar-fetchers.git
url = https://github.com/ucb-bar/bar-fetchers.git
7 changes: 6 additions & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ lazy val chipyard = (project in file("generators/chipyard"))
sha3, // On separate line to allow for cleaner tutorial-setup patches
dsptools, `rocket-dsp-utils`,
gemmini, icenet, tracegen, cva6, nvdla, sodor, ibex, fft_generator,
constellation, mempress, barf)
constellation, mempress, barf, shuttle)
.settings(libraryDependencies ++= rocketLibDeps.value)
.settings(
libraryDependencies ++= Seq(
Expand Down Expand Up @@ -203,6 +203,11 @@ lazy val boom = (project in file("generators/boom"))
.settings(libraryDependencies ++= rocketLibDeps.value)
.settings(commonSettings)

lazy val shuttle = (project in file("generators/shuttle"))
.dependsOn(rocketchip)
.settings(libraryDependencies ++= rocketLibDeps.value)
.settings(commonSettings)

lazy val cva6 = (project in file("generators/cva6"))
.dependsOn(rocketchip)
.settings(libraryDependencies ++= rocketLibDeps.value)
Expand Down
8 changes: 8 additions & 0 deletions docs/Generators/Shuttle.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Shuttle RISC-V Core
===================

Shuttle is a Rocket-based superscalar in-order RISC-V core, supporting the base RV64IMAFDC instruction set with supervisor and user-mode. Shuttle is a 6-stage core that can be configured to be dual, three, or quad-issue, although dual-issue is the most sensible design point. Shuttle is not designed to meet any power, performance, or area targets. It exists purely as a demonstrative example of another RISC-V CPU design point.

The superscalar microarchitecture presents the most advantages for 1) floating-point kernels and 2) RoCC accelerator kernels, as scalar control code can execute concurrently with floating point or RoCC instructions, maintaining high utilization of those units.

Shuttle is tape-out proven, and has similar physical design complexity as Rocket.
1 change: 1 addition & 0 deletions docs/Generators/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,6 @@ so changes to the generators themselves will automatically be used when building
fft
NVDLA
Sodor
Shuttle
Mempress
Prefetchers
122 changes: 99 additions & 23 deletions generators/chipyard/src/main/resources/csrc/cospike.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
#include <vector>
#include <string>
#include <riscv/sim.h>
#include <riscv/mmu.h>
#include <riscv/encoding.h>
#include <vpi_user.h>
#include <svdpi.h>
#include <sstream>
Expand All @@ -27,7 +29,11 @@ extern std::map<long long int, backing_data_t> backing_mem_data;
#endif

#define CLINT_BASE (0x2000000)
#define CLINT_SIZE (0x1000)
#define CLINT_SIZE (0x10000)
#define UART_BASE (0x54000000)
#define UART_SIZE (0x1000)
#define PLIC_BASE (0xc000000)
#define PLIC_SIZE (0x4000000)

typedef struct system_info_t {
std::string isa;
Expand All @@ -38,13 +44,33 @@ typedef struct system_info_t {
std::vector<char> bootrom;
};

class read_override_device_t : public abstract_device_t {
public:
read_override_device_t(std::string n, reg_t sz) : was_read_from(false), size(size), name(n) { };
bool load(reg_t addr, size_t len, uint8_t* bytes) {
if (addr + len < addr || addr + len > size) return false;
printf("Read from device %s at %lx\n", name.c_str(), addr);
was_read_from = true;
return true;
}
bool store(reg_t addr, size_t len, const uint8_t* bytes) {
return (addr + len >= addr && addr + len <= size);
}
bool was_read_from;
private:
reg_t size;
std::string name;
};

system_info_t* info = NULL;
sim_t* sim = NULL;
bool cospike_debug;
reg_t tohost_addr = 0;
reg_t fromhost_addr = 0;
reg_t cospike_timeout = 0;
std::set<reg_t> magic_addrs;
cfg_t* cfg;
std::vector<read_override_device_t*> read_override_devices;

static std::vector<std::pair<reg_t, mem_t*>> make_mems(const std::vector<mem_cfg_t> &layout)
{
Expand Down Expand Up @@ -90,6 +116,7 @@ extern "C" void cospike_cosim(long long int cycle,
int priv)
{
assert(info);

if (unlikely(!sim)) {
printf("Configuring spike cosim\n");
std::vector<mem_cfg_t> mem_cfg;
Expand All @@ -114,19 +141,31 @@ extern "C" void cospike_cosim(long long int cycle,

std::vector<std::pair<reg_t, mem_t*>> mems = make_mems(cfg->mem_layout());

size_t default_boot_rom_size = 0x10000;
size_t default_boot_rom_addr = 0x10000;
assert(info->bootrom.size() < default_boot_rom_size);
info->bootrom.resize(default_boot_rom_size);

rom_device_t *boot_rom = new rom_device_t(info->bootrom);
mem_t *boot_addr_reg = new mem_t(0x1000);
uint64_t default_boot_addr = 0x80000000;
boot_addr_reg->store(0, 8, (const uint8_t*)(&default_boot_addr));

// Don't actually build a clint
mem_t* clint_mem = new mem_t(CLINT_SIZE);
read_override_device_t* clint = new read_override_device_t("clint", CLINT_SIZE);
read_override_device_t* uart = new read_override_device_t("uart", UART_SIZE);
read_override_device_t* plic = new read_override_device_t("plic", PLIC_SIZE);

read_override_devices.push_back(clint);
read_override_devices.push_back(uart);
read_override_devices.push_back(plic);

std::vector<std::pair<reg_t, abstract_device_t*>> plugin_devices;
// The device map is hardcoded here for now
plugin_devices.push_back(std::pair(0x4000, boot_addr_reg));
plugin_devices.push_back(std::pair(0x10000, boot_rom));
plugin_devices.push_back(std::pair(CLINT_BASE, clint_mem));
plugin_devices.push_back(std::pair(default_boot_rom_addr, boot_rom));
plugin_devices.push_back(std::pair(CLINT_BASE, clint));
plugin_devices.push_back(std::pair(UART_BASE, uart));
plugin_devices.push_back(std::pair(PLIC_BASE, plic));

s_vpi_vlog_info vinfo;
if (!vpi_get_vlog_info(&vinfo))
Expand All @@ -142,6 +181,8 @@ extern "C" void cospike_cosim(long long int cycle,
in_permissive = false;
} else if (arg == "+cospike_debug" || arg == "+cospike-debug") {
cospike_debug = true;
} else if (arg.find("+cospike-timeout=") == 0) {
cospike_timeout = strtoull(arg.substr(17).c_str(), 0, 10);
} else if (!in_permissive) {
htif_args.push_back(arg);
}
Expand All @@ -159,17 +200,19 @@ extern "C" void cospike_cosim(long long int cycle,
.support_impebreak = true
};

printf("isa string is %s\n", info->isa.c_str());
printf("isa string: %s\n", info->isa.c_str());
printf("htif args: ");
for (int i = 0; i < htif_args.size(); i++) {
printf("%s\n", htif_args[i].c_str());
printf("%s", htif_args[i].c_str());
}
printf("\n");

sim = new sim_t(cfg, false,
mems,
plugin_devices,
htif_args,
dm_config,
"cospike.log",
nullptr,
false,
nullptr,
false,
Expand All @@ -193,26 +236,45 @@ extern "C" void cospike_cosim(long long int cycle,
#endif

sim->configure_log(true, true);
// Use our own reset vector
for (int i = 0; i < info->nharts; i++) {
// Use our own reset vector
sim->get_core(hartid)->get_state()->pc = 0x10040;
// Set MMU to support up to sv39, as our normal hw configs do
sim->get_core(hartid)->set_impl(IMPL_MMU_SV48, false);
sim->get_core(hartid)->set_impl(IMPL_MMU_SV57, false);

// HACKS: Our processor's don't implement zicntr fully, they don't provide time
sim->get_core(hartid)->get_state()->csrmap.erase(CSR_TIME);
}
sim->set_debug(cospike_debug);
sim->set_histogram(true);
sim->set_procs_debug(cospike_debug);
printf("Setting up htif for spike cosim\n");
((htif_t*)sim)->start();
printf("Spike cosim started\n");
tohost_addr = ((htif_t*)sim)->get_tohost_addr();
fromhost_addr = ((htif_t*)sim)->get_fromhost_addr();
printf("Tohost : %lx\n", tohost_addr);
printf("Fromhost: %lx\n", fromhost_addr);
printf("Memory base : %lx\n", info->mem0_base);
printf("Memory Size : %lx\n", info->mem0_size);
printf("BootROM base : %lx\n", default_boot_rom_addr);
printf("BootROM size : %lx\n", boot_rom->contents().size());
printf("Memory base : %lx\n", info->mem0_base);
printf("Memory size : %lx\n", info->mem0_size);
}

if (priv & 0x4) { // debug
return;
}

if (cospike_timeout && cycle > cospike_timeout) {
if (sim) {
printf("Cospike reached timeout cycles = %ld, terminating\n", cospike_timeout);
delete sim;
}
exit(0);
}


processor_t* p = sim->get_core(hartid);
state_t* s = p->get_state();
#ifdef COSPIKE_DTM
Expand Down Expand Up @@ -269,14 +331,18 @@ extern "C" void cospike_cosim(long long int cycle,
uint64_t interrupt_cause = cause & 0x7FFFFFFFFFFFFFFF;
bool ssip_interrupt = interrupt_cause == 0x1;
bool msip_interrupt = interrupt_cause == 0x3;
bool stip_interrupt = interrupt_cause == 0x5;
bool mtip_interrupt = interrupt_cause == 0x7;
bool debug_interrupt = interrupt_cause == 0xe;
if (raise_interrupt) {
printf("%d interrupt %lx\n", cycle, cause);

if (ssip_interrupt) {
if (ssip_interrupt || stip_interrupt) {
// do nothing
} else if (msip_interrupt) {
s->mip->backdoor_write_with_mask(MIP_MSIP, MIP_MSIP);
} else if (mtip_interrupt) {
s->mip->backdoor_write_with_mask(MIP_MTIP, MIP_MTIP);
} else if (debug_interrupt) {
return;
} else {
Expand All @@ -295,6 +361,8 @@ extern "C" void cospike_cosim(long long int cycle,
printf("\n");
}
if (valid || raise_interrupt || raise_exception) {
p->clear_waiting_for_interrupt();
for (auto& e : read_override_devices) e->was_read_from = false;
p->step(1);
if (unlikely(cospike_debug)) {
printf("spike pc is %lx\n", s->pc);
Expand Down Expand Up @@ -328,10 +396,8 @@ extern "C" void cospike_cosim(long long int cycle,
if ((waddr == CLINT_BASE + 4*hartid) && w_data == 0) {
s->mip->backdoor_write_with_mask(MIP_MSIP, 0);
}
// Try to remember magic_mem addrs, and ignore these in the future
if ( waddr == tohost_addr && w_data >= info->mem0_base && w_data < (info->mem0_base + info->mem0_size)) {
printf("Probable magic mem %lx\n", w_data);
magic_addrs.insert(w_data);
if ((waddr == CLINT_BASE + 0x4000 + 4*hartid)) {
s->mip->backdoor_write_with_mask(MIP_MTIP, 0);
}
// Try to remember magic_mem addrs, and ignore these in the future
if ( waddr == tohost_addr && w_data >= info->mem0_base && w_data < (info->mem0_base + info->mem0_size)) {
Expand All @@ -357,13 +423,18 @@ extern "C" void cospike_cosim(long long int cycle,
// 2 => vec
// 3 => vec hint
// 4 => csr
bool device_read = false;
for (auto& e : read_override_devices) if (e->was_read_from) device_read = true;

bool ignore_read = (!mem_read.empty() &&
((magic_addrs.count(mem_read_addr) ||
(tohost_addr && mem_read_addr == tohost_addr) ||
(fromhost_addr && mem_read_addr == fromhost_addr) ||
(CLINT_BASE <= mem_read_addr && mem_read_addr < (CLINT_BASE + CLINT_SIZE)))));
bool lr_read = ((insn & MASK_LR_D) == MATCH_LR_D) || ((insn & MASK_LR_W) == MATCH_LR_W);
bool sc_read = ((insn & MASK_SC_D) == MATCH_SC_D) || ((insn & MASK_SC_W) == MATCH_SC_W);

bool ignore_read = sc_read || (!mem_read.empty() &&
(magic_addrs.count(mem_read_addr) ||
device_read ||
lr_read ||
(tohost_addr && mem_read_addr == tohost_addr) ||
(fromhost_addr && mem_read_addr == fromhost_addr)));
// check the type is compliant with writeback first
if ((type == 0 || type == 1))
scalar_wb = true;
Expand All @@ -379,11 +450,16 @@ extern "C" void cospike_cosim(long long int cycle,
bool csr_read = (insn & 0x7f) == 0x73;
if (csr_read)
printf("CSR read %lx\n", csr_addr);
if (csr_read && ((csr_addr == 0xf13) || // mimpid
if (csr_read && ((csr_addr == 0x301) || // misa
(csr_addr == 0x306) || // mcounteren
(csr_addr == 0xf13) || // mimpid
(csr_addr == 0xf12) || // marchid
(csr_addr == 0xf11) || // mvendorid
(csr_addr == 0xb00) || // mcycle
(csr_addr == 0xb02) || // minstret
(csr_addr == 0xc00) || // cycle
(csr_addr == 0xc01) || // time
(csr_addr == 0xc02) || // instret
(csr_addr >= 0x7a0 && csr_addr <= 0x7aa) || // debug trigger registers
(csr_addr >= 0x3b0 && csr_addr <= 0x3ef) // pmpaddr
)) {
Expand All @@ -394,7 +470,7 @@ extern "C" void cospike_cosim(long long int cycle,
// from clint Technically this could be buggy because log_mem_read
// only reports vaddrs, but no software ever should access
// tohost/fromhost/clint with vaddrs anyways
printf("Read override %lx\n", mem_read_addr);
printf("Read override %lx = %lx\n", mem_read_addr, wdata);
s->XPR.write(rd, wdata);
} else if (wdata != regwrite.second.v[0]) {
printf("%d wdata mismatch reg %d %lx != %lx\n", cycle, rd,
Expand Down
Loading

0 comments on commit d287fed

Please sign in to comment.