Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
3a90f1a
Update the class diagram
lleoncr Feb 19, 2024
abdeafe
Modify the hardware interface to extend required changes
lleoncr Feb 19, 2024
44ea32a
Update the implementation of Ultrascale
lleon95 Feb 19, 2024
d71e303
Extend the accelerator interface to meet the new requirements
lleon95 Feb 19, 2024
7bd43f5
Add Alveo hardware platform
lleon95 Feb 26, 2024
c3dcd13
Add the accelerator implementation
lleon95 Feb 26, 2024
f64994e
Add the XRT Accelerator to the factory
lleon95 Feb 27, 2024
420cbe5
Adapt the DMA API to support custom memory bank
lleon95 Feb 27, 2024
5f6d946
Add data mover for XRT
lleon95 Feb 27, 2024
6f98b2c
Modify the example and DMA to match the new functionality
lleon95 Feb 27, 2024
382f1ce
Move the basic example to be ZYNQ MPSoC example
lleon95 Feb 27, 2024
c5bf7ff
Add artefacts installer
lleon95 Feb 27, 2024
3873d35
Add the Alveo to HardwareArchitecture
lleon95 Feb 27, 2024
24ddaf0
Reorder the arguments in IDataMover::GetBuffer
lleon95 Feb 27, 2024
710ed1a
Add alveo example for hello xrt
lleon95 Feb 27, 2024
2c4010e
Fix headers
lleon95 Mar 4, 2024
6134b1b
Update readme
lleon95 Mar 4, 2024
983b335
Fix xrt_device errors by disabling pedantic only for headers
lleon95 Mar 4, 2024
4c4c506
Add memory attach method for IMemory
lleon95 Mar 4, 2024
8b9f040
Fix the buffer attachment in example
lleon95 Mar 4, 2024
4589c66
Simplify the hardware declaration
lleon95 Mar 4, 2024
487d3ca
Update CYNQ Readme
lleon95 Mar 4, 2024
7a4bba3
Add exclusive access
lleon95 Mar 4, 2024
92c0900
Update the class diagram
lleon95 Mar 6, 2024
46e9609
Fix documentation
lleon95 Mar 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 51 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ outbuf = allocate(shape=(output_elements,), dtype=np.uint16)
# Run
dma.sendchannel.transfer(inbuf)
accel.write(accel.register_map.CTRL.address, 0x81)
accel.write(accel.register_map.n_elements.address, input_elements)
dma.recvchannel.transfer(outbuf)
dma.recvchannel.wait()

Expand All @@ -53,7 +54,7 @@ del input_hw
del output_hw
```

With CYNQ:
With CYNQ for Xilinx Ultrascale+:

```c++
#include <cynq/cynq.hpp>
Expand All @@ -62,7 +63,7 @@ using namespace cynq;

// Configure the FPGA
auto kArch = HardwareArchitecture::UltraScale;
auto platform = IHardware::Create(kArch, "design.bit", "default.xclbin");
auto platform = IHardware::Create(kArch, "design.bit");

// Extract the accelerator (IP Core) and DMA
// Addresses are given by the design
Expand All @@ -72,17 +73,57 @@ auto accel = platform->GetAccelerator(accel_addr);
auto dma = platform->GetDataMover(dma_addr);

// Allocate buffers and get the pointers
auto inbuf = mover->GetBuffer(input_size);
auto outbuf = mover->GetBuffer(output_size);
auto inbuf = mover->GetBuffer(input_size, accel->GetMemoryBank(0));
auto outbuf = mover->GetBuffer(output_size, accel->GetMemoryBank(1));
uint16_t* input_ptr = inbuf->HostAddress<uint16_t>().get();
uint16_t* output_ptr = outbuf->HostAddress<uint16_t>().get();

// Configure data - Bus: AXI4 Stream is handled by DMA
const uint32_t num_elements = 4096;
const uint64_t addr_num_elements = 0x20;
accel->Write(addr_num_elements, num_elements);

// Run
mover->Upload(in_mem, infbuf->Size(), 0, ExecutionType::Async);
accel->Start(StartMode::Continuous);
inbuf->Sync(SyncType::HostToDevice);
mover->Upload(in_mem, infbuf->Size(), 0, ExecutionType::Sync);
mover->Download(out_mem, outbuf->Size(), 0, ExecutionType::Sync);
outbuf->Sync(SyncType::DeviceToHost);
accel->Stop();

// Dispose? We use RAII
```

With CYNQ for Alveo

```c++
#include <cynq/cynq.hpp>

using namespace cynq;

// Configure the FPGA
auto kArch = HardwareArchitecture::Alveo;
auto platform = IHardware::Create(kArch, "design.xclbin");

// Extract the accelerator (IP Core) and DMA
// Addresses are given by the design
auto accel = platform->GetAccelerator("vadd");
auto dma = platform->GetDataMover(0);

// Allocate buffers and get the pointers
auto inbuf = mover->GetBuffer(input_size, accel->GetMemoryBank(0));
auto outbuf = mover->GetBuffer(output_size, accel->GetMemoryBank(1));
uint16_t* input_ptr = inbuf->HostAddress<uint16_t>().get();
uint16_t* output_ptr = outbuf->HostAddress<uint16_t>().get();

// Configure the accel - memory mapped
const uint32_t num_elements = 4096;
accel->Attach(0, bo_0);
accel->Attach(1, bo_1);
accel->Attach(2, &num_elements);

// Run
mover->Upload(in_mem, infbuf->Size(), 0, ExecutionType::Async);
accel->Start(StartMode::Once);
mover->Download(out_mem, outbuf->Size(), 0, ExecutionType::Sync);

// Dispose? We use RAII
```
Expand All @@ -92,6 +133,7 @@ outbuf->Sync(SyncType::DeviceToHost);
So far, we have tested CYNQ on:

1. Xilinx KV26-based with Ubuntu 2022.04
2. Xilinx Alveo U250 (it should be compatible with other similar Alveo cards) - Shell: xilinx_u250_gen3x16_xdma_4_1_202210_1

## Links & References:

Expand All @@ -106,8 +148,8 @@ Cite Us:
AND Ávila-Torres, Diego
AND Castro-Godínez, Jorge
}},
title = {{CYNQ (v0.1)}},
year = {2023},
title = {{CYNQ (v0.2)}},
year = {2024},
url = {https://github.com/ECASLab/cynq},
}
```
7 changes: 7 additions & 0 deletions docs/About.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,10 @@ The current maintainers are:

* Luis G. Leon Vega <luis.leon@ieee.org>
* Diego Avila Torres <diego.avila@uned.cr>

## Acknowledgements

Thanks to

* AMD HACC programme (ETH Zürich cluster) since it made possible to add the Alveo card support.
* RidgeRun LLC for facilitating access to a Xilinx Kria KV260 since it made possible to add K26 support.
46 changes: 41 additions & 5 deletions docs/ClassDiagram.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
@startuml
interface IHardware {
+{abstract} Reset() -> Status
+{abstract} GetDataMover(address) -> IDataMover *
+{abstract} GetAccelerator(address) -> IAccelerator *
+{abstract} GetDataMover(address = 0) -> IDataMover *
+{abstract} GetAccelerator(address: uint64) -> IAccelerator *
+{abstract} GetAccelerator(address: string) -> IAccelerator *
+{static} Create(hw: HardwareArchitecture, bitstream: string, xclbin: string) -> IHardware*

+{static} Create(hw: HardwareArchitecture, config: string) -> IHardware*
}

interface IMemory {
Expand All @@ -30,12 +31,16 @@ IMemoryType ..o IMemory
interface IAccelerator {
{abstract} Start(mode: StartMode) -> Status
{abstract} Stop() -> Status
{abstract} Sync() -> Status
{abstract} #WriteRegister(address, data: uint8_t*, size: size_t) -> Status
{abstract} #ReadRegister(address, data: uint8_t*, size: size_t) -> Status
+Write<T>(address, data: T*, elems: size_t) -> Status
+Read<T>(address, data: T*, elems: size_t) -> Status
+Attach<T>(address, data: T*, elems: size_t) -> Status
+Attach(address, mem: std::shared_ptr<IMemory>, elems: size_t) -> Status
{abstract} GetStatus() -> DeviceStatus
+{static} Create(impl: IAcceleratorType, addr: uint64) -> IAccelerator*
+{static} Create(impl: IAcceleratorType, addr: string) -> IAccelerator*
}

enum IAcceleratorType {
Expand Down Expand Up @@ -108,11 +113,19 @@ enum DataMoverType {

class UltraScale {
+Reset() -> Status
+GetDataMover(address, type : DataMoverType) -> IDataMover *
+GetAccelerator(address) -> EmbeddedAccelerator *
+GetDataMover(address, type : DataMoverType) -> DMADataMover *
+GetAccelerator(address: uint64) -> MMIOAccelerator *
+UltraScale(hw, bitsteam, xclbin)
}

class Alveo {
+Reset() -> Status
+GetDataMover(address, type : DataMoverType) -> XRTtDataMover *
+GetAccelerator(address: string) -> XRTAccelerator *
+UltraScale(hw, bitsteam, xclbin)
}


class XRTMemory {
#GetHostAddress() -> uint8_t *
#GetDeviceAddress() -> uint8_t *
Expand All @@ -124,12 +137,23 @@ class XRTMemory {
class MMIOAccelerator {
Start(mode: StartMode) -> Status
Stop() -> Status
Sync() -> Status
GetStatus() -> DeviceStatus
#WriteRegister(address, data: uint8_t*, size: size_t) -> Status
#ReadRegister(address, data: uint8_t*, size: size_t) -> Status
+MMIOAccelerator(addr: uint64)
}


class XRTAccelerator {
Start(mode: StartMode) -> Status
Stop() -> Status
Sync() -> Status
GetStatus() -> DeviceStatus
#SetArgument(position, data: T*) -> Status
+XRTAccelerator(name: string)
}

class DMADataMover {
GetBuffer(size: size_t, type: MemoryType) -> XRTMemory *
Upload(mem: IMemory, size: size_t, exetype: ExecutionType) -> Status
Expand All @@ -139,8 +163,20 @@ class DMADataMover {
DMADataMover(addr)
}

class XRTDataMover {
GetBuffer(size: size_t, type: MemoryType) -> XRTMemory *
Upload(mem: IMemory, size: size_t, exetype: ExecutionType) -> Status
Download(mem: IMemory, size: size_t, exetype: ExecutionType) -> Status
Sync() -> Status
GetStatus() -> DeviceStatus
XrtDataMover(mem_bank)
}

UltraScale ..> IHardware
Alveo ..> IHardware
XRTMemory ..> IMemory
MMIOAccelerator ..> IAccelerator
XRTAccelerator ..> IAccelerator
DMADataMover ..> IDataMover
XRTDataMover ..> IDataMover
@enduml
Loading