ECASLab · lleon95 · Nov 30, 2023 · Nov 18, 2023
diff --git a/README.md b/README.md
@@ -1,7 +1,113 @@
-# cynq
-PYNQ bindings for C and C++ to avoid requiring Python or Vitis to execute hardware acceleration.
+# CYNQ
 
-# Dependencies
-1. meson 1.2.3
-2. Python 3.8.10
-3. C++17
+Framework to develop FPGA applications in C++ with the easiness of PYNQ
+
+## Introduction
+
+CYNQ is a C++ framework to implement FPGA-based accelerated applications with the same ease of use as PYNQ framework for Python. This allows users to implement their own applications with better performance than in Python and avoids the long processing times of coding applications with Vitis.
+
+## Dependencies
+
+1. Meson >= 1.x
+2. Python >= 3.8
+3. GCC >= 9.x
+4. XRT >= 2.13
+5. Linux FPGA Manager
+
+## Index
+
+* [Foundations](docs/Foundations.md)
+  * [Class Diagram](docs/ClassDiagram.md)
+* [Installation](docs/Installation.md)
+* [Getting Started](docs/GettingStarted.md)
+* [About](docs/About.md)
+
+## How does CYNQ look like?
+
+CYNQ is pretty similar to PYNQ, let's have a look.
+
+PYNQ:
+
+```python
+from pynq import allocate, Overlay
+
+# Configure the FPGA
+design = Overlay("design.bit")
+
+# Extract the accelerator (IP Core) and DMA
+dma = design.axi_dma_0
+accel = design.multiplication_accel_0
+
+# Allocate buffers
+inbuf = allocate(shape=(input_elements,), dtype=np.uint16)
+outbuf = allocate(shape=(output_elements,), dtype=np.uint16)
+
+# Run
+dma.sendchannel.transfer(inbuf)
+accel.write(accel.register_map.CTRL.address, 0x81)
+dma.recvchannel.transfer(outbuf)
+dma.recvchannel.wait()
+
+# Dispose the buffers
+del input_hw
+del output_hw
+```
+
+With CYNQ:
+
+```c++
+#include <cynq/cynq.hpp>
+
+using namespace cynq;
+
+// Configure the FPGA
+auto kArch = HardwareArchitecture::UltraScale;
+auto platform = IHardware::Create(kArch, "design.bit", "default.xclbin");
+
+// Extract the accelerator (IP Core) and DMA
+// Addresses are given by the design
+const uint64_t accel_addr = 0xa000000;
+const uint64_t dma_addr = 0xa0010000;
+auto accel = platform->GetAccelerator(accel_addr);
+auto dma = platform->GetDataMover(dma_addr);
+
+// Allocate buffers and get the pointers
+auto inbuf = mover->GetBuffer(input_size);
+auto outbuf = mover->GetBuffer(output_size);
+uint16_t* input_ptr = inbuf->HostAddress<uint16_t>().get();
+uint16_t* output_ptr = outbuf->HostAddress<uint16_t>().get();
+
+// Run
+accel->Start(StartMode::Continuous);
+inbuf->Sync(SyncType::HostToDevice);
+mover->Upload(in_mem, infbuf->Size(), 0, ExecutionType::Sync);
+mover->Download(out_mem, outbuf->Size(), 0, ExecutionType::Sync);
+outbuf->Sync(SyncType::DeviceToHost);
+
+// Dispose? We use RAII
+```
+
+## Currently tested
+
+So far, we have tested CYNQ on:
+
+1. Xilinx KV26-based with Ubuntu 2022.04
+
+## Links & References:
+
+* Docs: https://ecaslab.github.io/cynq
+* Github: https://github.com/ECASLab/cynq
+
+Cite Us:
+
+```
+@misc{cynq,
+  author = {{León-Vega, Luis G.
+              AND Ávila-Torres, Diego
+              AND Castro-Godínez, Jorge
+            }},
+  title = {{CYNQ (v0.1)}},
+  year  = {2023},
+  url   = {https://github.com/ECASLab/cynq},
+} 
+```
diff --git a/docs/About.md b/docs/About.md
@@ -0,0 +1,21 @@
+# About
+
+CYNQ is an alternative framework to code applications for Xilinx FPGAs with the easiness of PYNQ. Currently, most of the flow is quite complex and requires Vitis to get a decent C++ application and there are no bindings from the official PYNQ. Based on these needs, CYNQ proposes:
+
+* Provide C++ bindings for C++ applications
+* Provide high performance
+* Keep the simplicity of PYNQ
+* Provide an agnostic interface amongst several hardware: ZYNQ, Alveo, PCIe cards
+
+This project is completely Open Source thanks to:
+
+* Ministero dell'Università e della Ricerca
+* University of Trieste
+* Costa Rica Institute of Technology
+
+Our goal is to keep it Open Source, accessible and powerful. Collaboration and improvements are very welcome.
+
+The current maintainers are:
+
+* Luis G. Leon Vega <luis.leon@ieee.org>
+* Diego Avila Torres <diego.avila@uned.cr>
diff --git a/docs/ClassDiagram.md b/docs/ClassDiagram.md
@@ -0,0 +1,141 @@
+# Class Diagram
+
+@startuml
+interface IHardware {
+  +{abstract} Reset() -> Status
+  +{abstract} GetDataMover(address) -> IDataMover *
+  +{abstract} GetAccelerator(address) -> IAccelerator *
+  +{static} Create(hw: HardwareArchitecture, bitstream: string, xclbin: string) -> IHardware*
+
+}
+
+interface IMemory {
+  {abstract} #GetHostAddress() -> uint8_t *
+  {abstract} #GetDeviceAddress() -> uint8_t *
+  +HostAddress<T>() -> T *
+  +DeviceAddress<T>() -> T *
+  {abstract} Sync(type: SyncType) -> Status
+  {abstract} Size() -> size_t
+  +{static} Create(impl: IMemoryType, size, hostptr, devptr) -> IMemory*
+}
+
+enum IMemoryType {
+  XRT
+  CMA
+  ALIGNED
+}
+
+IMemoryType ..o IMemory
+
+interface IAccelerator {
+  {abstract} Start(mode: StartMode) -> Status
+  {abstract} Stop() -> Status
+  {abstract} #WriteRegister(address, data: uint8_t*, size: size_t) -> Status
+  {abstract} #ReadRegister(address, data: uint8_t*, size: size_t) -> Status
+  +Write<T>(address, data: T*, elems: size_t) -> Status
+  +Read<T>(address, data: T*, elems: size_t) -> Status
+  {abstract} GetStatus() -> DeviceStatus
+  +{static} Create(impl: IAcceleratorType, addr: uint64) -> IAccelerator*
+}
+
+enum IAcceleratorType {
+  XRT
+  MMIO
+  CHAR
+}
+IAcceleratorType ..o IAccelerator
+
+interface IDataMover {
+  {abstract} GetBuffer(size: size_t, type: MemoryType) -> IMemory *
+  {abstract} Upload(mem: IMemory, size: size_t, exetype: ExecutionType) -> Status
+  {abstract} Download(mem: IMemory, size: size_t, exetype: ExecutionType) -> Status
+  {abstract} Sync() -> Status
+  {abstract} GetStatus() -> DeviceStatus
+  +{static} Create(impl: IDataMoverType, addr: uint64) -> IDataMover*
+}
+
+enum IDataMoverType {
+  XRT
+  DMA
+  XDMA
+}
+IDataMoverType ..o IDataMover
+
+enum HardwareArchitecture {
+  UltraScale
+  Zynq
+  XDMA
+  Alveo
+}
+
+HardwareArchitecture ..o IHardware
+
+enum SyncType {
+  HostToDevice,
+  DeviceToHost,
+}
+
+enum StartMode {
+  Once,
+  Continuous
+}
+
+enum MemoryType {
+  Dual,
+  Cacheable,
+  Host,
+  Device
+}
+
+enum DeviceStatus {
+  Unknown,
+  Done,
+  Idle,
+  Running,
+  Error
+}
+
+enum ExecutionType {
+  Sync,
+  Async
+}
+
+
+class UltraScale {
+  +Reset() -> Status
+  +GetDataMover(address) -> XRTDataMover *
+  +GetAccelerator(address) -> AmdAccelerator *
+  +UltraScale(hw, bitsteam, xclbin)
+}
+
+class XRTMemory {
+  #GetHostAddress() -> uint8_t *
+  #GetDeviceAddress() -> uint8_t *
+  Sync(type: SyncType) -> Status
+  Size() -> size_t
+  +XRTMemory(hostptr, devptr)
+}
+
+class XRTAccelerator {
+  Start(mode: StartMode) -> Status
+  Stop() -> Status
+  GetStatus() -> DeviceStatus
+  #WriteRegister(address, data: uint8_t*, size: size_t) -> Status
+  #ReadRegister(address, data: uint8_t*, size: size_t) -> Status
+  +AmdAccelerator(addr: uint64)
+}
+
+class XRTDataMover {
+  GetBuffer(size: size_t, type: MemoryType) -> XRTMemory *
+  Upload(mem: IMemory, size: size_t, exetype: ExecutionType) -> Status
+  Download(mem: IMemory, size: size_t, exetype: ExecutionType) -> Status
+  Sync() -> Status
+  GetStatus() -> DeviceStatus
+  XRTDataMover(addr)
+}
+
+UltraScale ..> IHardware
+XRTMemory ..> IMemory
+XRTAccelerator ..> IAccelerator
+XRTDataMover ..> IDataMover
+@enduml