Skip to content

Commit 1abf3d6

Browse files
committed
Update readme
Now it includes links and citation
1 parent 24b76d6 commit 1abf3d6

File tree

6 files changed

+177
-150
lines changed

6 files changed

+177
-150
lines changed

README.md

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ CYNQ is a C++ framework to implement FPGA-based accelerated applications with th
1717
## Index
1818

1919
* [Foundations](docs/Foundations.md)
20+
* [Class Diagram](docs/ClassDiagram.md)
2021
* [Installation](docs/Installation.md)
2122
* [Getting Started](docs/GettingStarted.md)
2223
* [About](docs/About.md)
@@ -60,8 +61,8 @@ With CYNQ:
6061
using namespace cynq;
6162

6263
// Configure the FPGA
63-
auto kArquitecture = HardwareArchitecture::UltraScale;
64-
auto platform = IHardware::Create(kArquitecture, "design.bit", "default.xclbin");
64+
auto kArch = HardwareArchitecture::UltraScale;
65+
auto platform = IHardware::Create(kArch, "design.bit", "default.xclbin");
6566

6667
// Extract the accelerator (IP Core) and DMA
6768
// Addresses are given by the design
@@ -89,3 +90,22 @@ outbuf->Sync(SyncType::DeviceToHost);
8990
So far, we have tested CYNQ on:
9091
9192
1. Xilinx KV26-based with Ubuntu 2022.04
93+
94+
## Links & References:
95+
96+
* Docs: https://ecaslab.github.io/cynq
97+
* Github: https://github.com/ECASLab/cynq
98+
99+
Cite Us:
100+
101+
```
102+
@misc{blabla,
103+
author = {{León-vega, Luis G.
104+
AND Ávila-Torres, Diego
105+
AND Castro-Godínez, Jorge
106+
}},
107+
title = {{CYNQ (v0.1)}},
108+
year = {2023},
109+
url = {https://github.com/ECASLab/cynq},
110+
}
111+
```

docs/ClassDiagram.md

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
# Class Diagram
2+
3+
@startuml
4+
interface IHardware {
5+
+{abstract} Reset() -> Status
6+
+{abstract} GetDataMover(address) -> IDataMover *
7+
+{abstract} GetAccelerator(address) -> IAccelerator *
8+
+{static} Create(hw: HardwareArchitecture, bitstream: string, xclbin: string) -> IHardware*
9+
10+
}
11+
12+
interface IMemory {
13+
{abstract} #GetHostAddress() -> uint8_t *
14+
{abstract} #GetDeviceAddress() -> uint8_t *
15+
+HostAddress<T>() -> T *
16+
+DeviceAddress<T>() -> T *
17+
{abstract} Sync(type: SyncType) -> Status
18+
{abstract} Size() -> size_t
19+
+{static} Create(impl: IMemoryType, size, hostptr, devptr) -> IMemory*
20+
}
21+
22+
enum IMemoryType {
23+
XRT
24+
CMA
25+
ALIGNED
26+
}
27+
28+
IMemoryType ..o IMemory
29+
30+
interface IAccelerator {
31+
{abstract} Start(mode: StartMode) -> Status
32+
{abstract} Stop() -> Status
33+
{abstract} #WriteRegister(address, data: uint8_t*, size: size_t) -> Status
34+
{abstract} #ReadRegister(address, data: uint8_t*, size: size_t) -> Status
35+
+Write<T>(address, data: T*, elems: size_t) -> Status
36+
+Read<T>(address, data: T*, elems: size_t) -> Status
37+
{abstract} GetStatus() -> DeviceStatus
38+
+{static} Create(impl: IAcceleratorType, addr: uint64) -> IAccelerator*
39+
}
40+
41+
enum IAcceleratorType {
42+
XRT
43+
MMIO
44+
CHAR
45+
}
46+
IAcceleratorType ..o IAccelerator
47+
48+
interface IDataMover {
49+
{abstract} GetBuffer(size: size_t, type: MemoryType) -> IMemory *
50+
{abstract} Upload(mem: IMemory, size: size_t, exetype: ExecutionType) -> Status
51+
{abstract} Download(mem: IMemory, size: size_t, exetype: ExecutionType) -> Status
52+
{abstract} Sync() -> Status
53+
{abstract} GetStatus() -> DeviceStatus
54+
+{static} Create(impl: IDataMoverType, addr: uint64) -> IDataMover*
55+
}
56+
57+
enum IDataMoverType {
58+
XRT
59+
DMA
60+
XDMA
61+
}
62+
IDataMoverType ..o IDataMover
63+
64+
enum HardwareArchitecture {
65+
UltraScale
66+
Zynq
67+
XDMA
68+
Alveo
69+
}
70+
71+
HardwareArchitecture ..o IHardware
72+
73+
enum SyncType {
74+
HostToDevice,
75+
DeviceToHost,
76+
}
77+
78+
enum StartMode {
79+
Once,
80+
Continuous
81+
}
82+
83+
enum MemoryType {
84+
Dual,
85+
Cacheable,
86+
Host,
87+
Device
88+
}
89+
90+
enum DeviceStatus {
91+
Unknown,
92+
Done,
93+
Idle,
94+
Running,
95+
Error
96+
}
97+
98+
enum ExecutionType {
99+
Sync,
100+
Async
101+
}
102+
103+
104+
class UltraScale {
105+
+Reset() -> Status
106+
+GetDataMover(address) -> XRTDataMover *
107+
+GetAccelerator(address) -> AmdAccelerator *
108+
+UltraScale(hw, bitsteam, xclbin)
109+
}
110+
111+
class XRTMemory {
112+
#GetHostAddress() -> uint8_t *
113+
#GetDeviceAddress() -> uint8_t *
114+
Sync(type: SyncType) -> Status
115+
Size() -> size_t
116+
+XRTMemory(hostptr, devptr)
117+
}
118+
119+
class XRTAccelerator {
120+
Start(mode: StartMode) -> Status
121+
Stop() -> Status
122+
GetStatus() -> DeviceStatus
123+
#WriteRegister(address, data: uint8_t*, size: size_t) -> Status
124+
#ReadRegister(address, data: uint8_t*, size: size_t) -> Status
125+
+AmdAccelerator(addr: uint64)
126+
}
127+
128+
class XRTDataMover {
129+
GetBuffer(size: size_t, type: MemoryType) -> XRTMemory *
130+
Upload(mem: IMemory, size: size_t, exetype: ExecutionType) -> Status
131+
Download(mem: IMemory, size: size_t, exetype: ExecutionType) -> Status
132+
Sync() -> Status
133+
GetStatus() -> DeviceStatus
134+
XRTDataMover(addr)
135+
}
136+
137+
UltraScale ..> IHardware
138+
XRTMemory ..> IMemory
139+
XRTAccelerator ..> IAccelerator
140+
XRTDataMover ..> IDataMover
141+
@enduml

docs/Doxyfile.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ OPTIMIZE_OUTPUT_VHDL = NO
291291
# Note that for custom extensions you also need to set FILE_PATTERNS otherwise
292292
# the files are not read by doxygen.
293293

294-
EXTENSION_MAPPING =
294+
EXTENSION_MAPPING =
295295

296296
# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments
297297
# according to the Markdown format, which allows for more readable

docs/Foundations.md

Lines changed: 1 addition & 141 deletions
Original file line numberDiff line numberDiff line change
@@ -53,144 +53,4 @@ As it is possible to see, there is an equivalence at the functional level.
5353

5454
Going deeper, how CYNQ is currently mounted for the Xilinx Kria, it uses XRT for the buffers, MMIO for accelerators and data mover and FPGA manager for the hardware configuration.
5555

56-
A rough estimation of the class diagram is the following:
57-
58-
@startuml
59-
interface IHardware {
60-
+{abstract} Reset() -> Status
61-
+{abstract} GetDataMover(address) -> IDataMover *
62-
+{abstract} GetAccelerator(address) -> IAccelerator *
63-
+{static} Create(hw: HardwareArchitecture, bitstream: string, xclbin: string) -> IHardware*
64-
65-
}
66-
67-
interface IMemory {
68-
{abstract} #GetHostAddress() -> uint8_t *
69-
{abstract} #GetDeviceAddress() -> uint8_t *
70-
+HostAddress<T>() -> T *
71-
+DeviceAddress<T>() -> T *
72-
{abstract} Sync(type: SyncType) -> Status
73-
{abstract} Size() -> size_t
74-
+{static} Create(impl: IMemoryType, size, hostptr, devptr) -> IMemory*
75-
}
76-
77-
enum IMemoryType {
78-
XRT
79-
CMA
80-
ALIGNED
81-
}
82-
83-
IMemoryType ..o IMemory
84-
85-
interface IAccelerator {
86-
{abstract} Start(mode: StartMode) -> Status
87-
{abstract} Stop() -> Status
88-
{abstract} #WriteRegister(address, data: uint8_t*, size: size_t) -> Status
89-
{abstract} #ReadRegister(address, data: uint8_t*, size: size_t) -> Status
90-
+Write<T>(address, data: T*, elems: size_t) -> Status
91-
+Read<T>(address, data: T*, elems: size_t) -> Status
92-
{abstract} GetStatus() -> DeviceStatus
93-
+{static} Create(impl: IAcceleratorType, addr: uint64) -> IAccelerator*
94-
}
95-
96-
enum IAcceleratorType {
97-
XRT
98-
MMIO
99-
CHAR
100-
}
101-
IAcceleratorType ..o IAccelerator
102-
103-
interface IDataMover {
104-
{abstract} GetBuffer(size: size_t, type: MemoryType) -> IMemory *
105-
{abstract} Upload(mem: IMemory, size: size_t, exetype: ExecutionType) -> Status
106-
{abstract} Download(mem: IMemory, size: size_t, exetype: ExecutionType) -> Status
107-
{abstract} Sync() -> Status
108-
{abstract} GetStatus() -> DeviceStatus
109-
+{static} Create(impl: IDataMoverType, addr: uint64) -> IDataMover*
110-
}
111-
112-
enum IDataMoverType {
113-
XRT
114-
DMA
115-
XDMA
116-
}
117-
IDataMoverType ..o IDataMover
118-
119-
enum HardwareArchitecture {
120-
UltraScale
121-
Zynq
122-
XDMA
123-
Alveo
124-
}
125-
126-
HardwareArchitecture ..o IHardware
127-
128-
enum SyncType {
129-
HostToDevice,
130-
DeviceToHost,
131-
}
132-
133-
enum StartMode {
134-
Once,
135-
Continuous
136-
}
137-
138-
enum MemoryType {
139-
Dual,
140-
Cacheable,
141-
Host,
142-
Device
143-
}
144-
145-
enum DeviceStatus {
146-
Unknown,
147-
Done,
148-
Idle,
149-
Running,
150-
Error
151-
}
152-
153-
enum ExecutionType {
154-
Sync,
155-
Async
156-
}
157-
158-
159-
class UltraScale {
160-
+Reset() -> Status
161-
+GetDataMover(address) -> XRTDataMover *
162-
+GetAccelerator(address) -> AmdAccelerator *
163-
+UltraScale(hw, bitsteam, xclbin)
164-
}
165-
166-
class XRTMemory {
167-
#GetHostAddress() -> uint8_t *
168-
#GetDeviceAddress() -> uint8_t *
169-
Sync(type: SyncType) -> Status
170-
Size() -> size_t
171-
+XRTMemory(hostptr, devptr)
172-
}
173-
174-
class XRTAccelerator {
175-
Start(mode: StartMode) -> Status
176-
Stop() -> Status
177-
GetStatus() -> DeviceStatus
178-
#WriteRegister(address, data: uint8_t*, size: size_t) -> Status
179-
#ReadRegister(address, data: uint8_t*, size: size_t) -> Status
180-
+AmdAccelerator(addr: uint64)
181-
}
182-
183-
class XRTDataMover {
184-
GetBuffer(size: size_t, type: MemoryType) -> XRTMemory *
185-
Upload(mem: IMemory, size: size_t, exetype: ExecutionType) -> Status
186-
Download(mem: IMemory, size: size_t, exetype: ExecutionType) -> Status
187-
Sync() -> Status
188-
GetStatus() -> DeviceStatus
189-
XRTDataMover(addr)
190-
}
191-
192-
UltraScale ..> IHardware
193-
XRTMemory ..> IMemory
194-
XRTAccelerator ..> IAccelerator
195-
XRTDataMover ..> IDataMover
196-
@enduml
56+
See more in [Class Diagram](ClassDiagram.md)

docs/html/index.html

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -116,13 +116,19 @@ <h2>Index</h2>
116116
<h2>How does CYNQ look like?</h2>
117117
<p>CYNQ is pretty similar to PYNQ, let's have a look.</p>
118118
<p>PYNQ:</p>
119-
<div class="fragment"><div class="line">from pynq <span class="keyword">import</span> allocate, Overlay</div><div class="line"></div><div class="line"><span class="comment"># Configure the FPGA</span></div><div class="line">design = Overlay(<span class="stringliteral">&quot;design.bit&quot;</span>)</div><div class="line"></div><div class="line"><span class="comment"># Extract the accelerator (IP Core) and DMA</span></div><div class="line">dma = design.axi_dma_0</div><div class="line">accel = design.multiplication_accel_0</div><div class="line"></div><div class="line"><span class="comment"># Allocate buffers</span></div><div class="line">inbuf = allocate(shape=(input_elements,), dtype=np.uint16)</div><div class="line">outbuf = allocate(shape=(output_elements,), dtype=np.uint16)</div><div class="line"></div><div class="line"><span class="comment"># Run</span></div><div class="line">dma.sendchannel.transfer(inbuf)</div><div class="line">accel.write(accel.register_map.CTRL.address, 0x81)</div><div class="line">dma.recvchannel.transfer(outbuf)</div><div class="line">dma.recvchannel.wait()</div><div class="line"></div><div class="line"><span class="comment"># Dispose the buffers</span></div><div class="line">del input_hw</div><div class="line">del output_hw</div></div><!-- fragment --><p>With CYNQ:</p>
120-
<div class="fragment"><div class="line"><span class="preprocessor">#include &lt;cynq/cynq.hpp&gt;</span></div><div class="line"></div><div class="line"><span class="keyword">using namespace </span><a class="code" href="namespacecynq.html">cynq</a>;</div><div class="line"></div><div class="line"><span class="comment">// Configure the FPGA</span></div><div class="line"><span class="keyword">auto</span> kArquitecture = HardwareArchitecture::UltraScale;</div><div class="line"><span class="keyword">auto</span> platform = <a class="code" href="classcynq_1_1IHardware.html#ab3cc41fbed5a6da306d8ae51153a3065">IHardware::Create</a>(kArquitecture, <span class="stringliteral">&quot;design.bit&quot;</span>, <span class="stringliteral">&quot;default.xclbin&quot;</span>);</div><div class="line"></div><div class="line"><span class="comment">// Extract the accelerator (IP Core) and DMA</span></div><div class="line"><span class="comment">// Addresses are given by the design</span></div><div class="line"><span class="keyword">auto</span> accel = platform-&gt;GetAccelerator(0xa000000);</div><div class="line"><span class="keyword">auto</span> dma = platform-&gt;GetDataMover(0xa0010000);</div><div class="line"></div><div class="line"><span class="comment">// Allocate buffers and get the pointers</span></div><div class="line"><span class="keyword">auto</span> inbuf = mover-&gt;GetBuffer(input_size);</div><div class="line"><span class="keyword">auto</span> outbuf = mover-&gt;GetBuffer(output_size);</div><div class="line">uint16_t* input_ptr = inbuf-&gt;HostAddress&lt;uint16_t&gt;().<span class="keyword">get</span>();</div><div class="line">uint16_t* output_ptr = outbuf-&gt;HostAddress&lt;uint16_t&gt;().<span class="keyword">get</span>();</div><div class="line"></div><div class="line"><span class="comment">// Run</span></div><div class="line">accel-&gt;Start(StartMode::Continuous);</div><div class="line">inbuf-&gt;Sync(SyncType::HostToDevice);</div><div class="line">mover-&gt;Upload(in_mem, infbuf-&gt;Size(), 0, ExecutionType::Sync);</div><div class="line">mover-&gt;Download(out_mem, outbuf-&gt;Size(), 0, ExecutionType::Sync);</div><div class="line">outbuf-&gt;Sync(SyncType::DeviceToHost);</div><div class="line"></div><div class="line"><span class="comment">// Dispose? We use RAII</span></div></div><!-- fragment --><h2>Currently tested</h2>
119+
<div class="fragment"><div class="line">from pynq import allocate, Overlay</div><div class="line"></div><div class="line"># Configure the FPGA</div><div class="line">design = Overlay(&quot;design.bit&quot;)</div><div class="line"></div><div class="line"># Extract the accelerator (IP Core) and DMA</div><div class="line">dma = design.axi_dma_0</div><div class="line">accel = design.multiplication_accel_0</div><div class="line"></div><div class="line"># Allocate buffers</div><div class="line">inbuf = allocate(shape=(input_elements,), dtype=np.uint16)</div><div class="line">outbuf = allocate(shape=(output_elements,), dtype=np.uint16)</div><div class="line"></div><div class="line"># Run</div><div class="line">dma.sendchannel.transfer(inbuf)</div><div class="line">accel.write(accel.register_map.CTRL.address, 0x81)</div><div class="line">dma.recvchannel.transfer(outbuf)</div><div class="line">dma.recvchannel.wait()</div><div class="line"></div><div class="line"># Dispose the buffers</div><div class="line">del input_hw</div><div class="line">del output_hw</div></div><!-- fragment --><p>With CYNQ:</p>
120+
<div class="fragment"><div class="line">{c++}</div><div class="line">#include &lt;cynq/cynq.hpp&gt;</div><div class="line"></div><div class="line">using namespace cynq;</div><div class="line"></div><div class="line">// Configure the FPGA</div><div class="line">auto kArch = HardwareArchitecture::UltraScale;</div><div class="line">auto platform = IHardware::Create(kArch, &quot;design.bit&quot;, &quot;default.xclbin&quot;);</div><div class="line"></div><div class="line">// Extract the accelerator (IP Core) and DMA</div><div class="line">// Addresses are given by the design</div><div class="line">auto accel = platform-&gt;GetAccelerator(0xa000000);</div><div class="line">auto dma = platform-&gt;GetDataMover(0xa0010000);</div><div class="line"></div><div class="line">// Allocate buffers and get the pointers</div><div class="line">auto inbuf = mover-&gt;GetBuffer(input_size);</div><div class="line">auto outbuf = mover-&gt;GetBuffer(output_size);</div><div class="line">uint16_t* input_ptr = inbuf-&gt;HostAddress&lt;uint16_t&gt;().get();</div><div class="line">uint16_t* output_ptr = outbuf-&gt;HostAddress&lt;uint16_t&gt;().get();</div><div class="line"></div><div class="line">// Run</div><div class="line">accel-&gt;Start(StartMode::Continuous);</div><div class="line">inbuf-&gt;Sync(SyncType::HostToDevice);</div><div class="line">mover-&gt;Upload(in_mem, infbuf-&gt;Size(), 0, ExecutionType::Sync);</div><div class="line">mover-&gt;Download(out_mem, outbuf-&gt;Size(), 0, ExecutionType::Sync);</div><div class="line">outbuf-&gt;Sync(SyncType::DeviceToHost);</div><div class="line"></div><div class="line">// Dispose? We use RAII</div></div><!-- fragment --><h2>Currently tested</h2>
121121
<p>So far, we have tested CYNQ on:</p>
122122
<ol type="1">
123-
<li>Xilinx KV26-based with Ubuntu 2022.04 </li>
123+
<li>Xilinx KV26-based with Ubuntu 2022.04</li>
124124
</ol>
125-
</div></div><!-- contents -->
125+
<h2>Links &amp; References:</h2>
126+
<ul>
127+
<li>Docs: <a href="https://ecaslab.github.io/cynq">https://ecaslab.github.io/cynq</a></li>
128+
<li>Github: <a href="https://github.com/ECASLab/cynq">https://github.com/ECASLab/cynq</a></li>
129+
</ul>
130+
<p>Cite Us:</p>
131+
<div class="fragment"><div class="line">@misc{blabla,</div><div class="line"> author = {{León-vega, Luis G.</div><div class="line"> AND Ávila-Torres, Diego</div><div class="line"> AND Castro-Godínez, Jorge</div><div class="line"> }},</div><div class="line"> title = {{CYNQ (v0.1)}},</div><div class="line"> year = {2023},</div><div class="line"> url = {https://github.com/ECASLab/cynq},</div><div class="line">} </div></div><!-- fragment --> </div></div><!-- contents -->
126132
</div><!-- doc-content -->
127133
<!-- start footer part -->
128134
<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->

docs/html/md__media_lleon95_data_Personal_cynq_docs_GettingStarted.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,8 +138,8 @@
138138
<p>To synchronise the buffers, it is possible to use <code>Sync()</code> method:</p>
139139
<div class="fragment"><div class="line">{c++}</div><div class="line">in_mem-&gt;Sync(SyncType::HostToDevice);</div><div class="line">out_mem-&gt;Sync(SyncType::DeviceToHost);</div></div><!-- fragment --><p>that takes any of the following values:</p>
140140
<ul>
141-
<li>cynq::SyncType::DeviceToHost: device to host synchronisation</li>
142-
<li>cynq::SyncType::HostToDevice: host to device synchronisation</li>
141+
<li><code>cynq::SyncType::DeviceToHost</code>: device to host synchronisation</li>
142+
<li><code>cynq::SyncType::HostToDevice</code>: host to device synchronisation</li>
143143
</ul>
144144
<p>Once the memory is synchronised, the data mover is used to upload the data to the AXI4-Stream or download from it.</p>
145145
<div class="fragment"><div class="line">{c++}</div><div class="line">// Upload: requires the buffer to be sync in HostToDevice</div><div class="line">dma-&gt;Upload(in_mem, in_mem-&gt;Size(), 0, ExecutionType::Sync);</div><div class="line">// Download: after its completion, the buffer must be sync DeviceToHost</div><div class="line">dma-&gt;Download(out_mem, out_mem-&gt;Size(), 0, ExecutionType::Sync);</div></div><!-- fragment --><p>Both methods takes: <code>(memory, size, offset, execution_type)</code>, where <code>size</code> is the amount of data to transfer in bytes, <code>offset</code> moves the starting point of the data and <code>execution_type</code> is the type of execution:</p>

0 commit comments

Comments
 (0)