Skip to content

Commit 38fa9fa

Browse files
committed
[DRIVER][RUNTIME] Make runtime fully device agnostic (apache#23)
1 parent 8eae56a commit 38fa9fa

File tree

6 files changed

+188
-150
lines changed

6 files changed

+188
-150
lines changed

vta/include/vta/driver.h

Lines changed: 47 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
/*!
22
* Copyright (c) 2018 by Contributors
33
* \file vta_driver.h
4-
* \brief General driver interface.
4+
* \brief Driver interface that is used by runtime.
5+
*
6+
* Driver's implementation is device specific.
57
*/
68

79
#ifndef VTA_DRIVER_H_
@@ -11,16 +13,50 @@
1113
extern "C" {
1214
#endif
1315

14-
#include <stdlib.h>
1516
#include <stdint.h>
17+
#include <stdlib.h>
1618

17-
/*! \brief Memory management constants */
19+
/*! \brief Memory management constants for cached memory */
1820
#define VTA_CACHED 1
19-
/*! \brief Memory management constants */
21+
/*! \brief Memory management constants for non-cached memory */
2022
#define VTA_NOT_CACHED 0
2123

22-
/*! \brief VTA command handle */
23-
typedef void * VTAHandle;
24+
/*! \brief Physically contiguous buffer size limit */
25+
#ifndef VTA_MAX_XFER
26+
#define VTA_MAX_XFER (1<<22)
27+
#endif
28+
29+
/*! \brief Device resource context */
30+
typedef void * VTADeviceHandle;
31+
32+
/*! \brief physical address */
33+
typedef uint32_t vta_phy_addr_t;
34+
35+
/*!
36+
* \brief Allocate a device resource handle
37+
* \return The device handle.
38+
*/
39+
VTADeviceHandle VTADeviceAlloc();
40+
41+
/*!
42+
* \brief Free a device handle
43+
* \param handle The device handle to be freed.
44+
*/
45+
void VTADeviceFree(VTADeviceHandle handle);
46+
47+
/*!
48+
* \brief Launch the instructions block until done.
49+
* \param The device handle.
50+
* \param insn_phy_addr The physical address of instruction stream.
51+
* \param insn_count Instruction count.
52+
* \param wait_cycles The maximum of cycles to wait
53+
*
54+
* \return 0 if running is successful, 1 if timeout.
55+
*/
56+
int VTADeviceRun(VTADeviceHandle device,
57+
vta_phy_addr_t insn_phy_addr,
58+
uint32_t insn_count,
59+
uint32_t wait_cycles);
2460

2561
/*!
2662
* \brief Allocates physically contiguous region in memory (limited by MAX_XFER).
@@ -41,52 +77,23 @@ void VTAMemFree(void* buf);
4177
* \param buf Pointer to memory region allocated with VTAMemAlloc.
4278
* \return The physical address of the memory region.
4379
*/
44-
uint32_t VTAGetMemPhysAddr(void* buf);
80+
vta_phy_addr_t VTAGetMemPhysAddr(void* buf);
4581

4682
/*!
4783
* \brief Flushes the region of memory out of the CPU cache to DRAM.
4884
* \param buf Pointer to memory region allocated with VTAMemAlloc to be flushed.
85+
* This need to be the physical address.
4986
* \param size Size of the region to flush in Bytes.
5087
*/
51-
void VTAFlushCache(void* buf, int size);
88+
void VTAFlushCache(vta_phy_addr_t buf, int size);
5289

5390
/*!
5491
* \brief Invalidates the region of memory that is cached.
5592
* \param buf Pointer to memory region allocated with VTAMemAlloc to be invalidated.
93+
* This need to be the physical address.
5694
* \param size Size of the region to invalidate in Bytes.
5795
*/
58-
void VTAInvalidateCache(void* buf, int size);
59-
60-
/*!
61-
* \brief Returns a memory map to FPGA configuration registers.
62-
* \param addr The base physical address of the configuration registers.
63-
* \param length The size of the memory mapped region in bytes.
64-
* \return A pointer to the memory mapped region.
65-
*/
66-
void *VTAMapRegister(unsigned addr, size_t length);
67-
68-
/*!
69-
* \brief Deletes the configuration register memory map.
70-
* \param vta The memory mapped region.
71-
* \param length The size of the memory mapped region in bytes.
72-
*/
73-
void VTAUnmapRegister(void *vta, size_t length);
74-
75-
/*!
76-
* \brief Writes to a memory mapped configuration register.
77-
* \param vta_base The handle to the memory mapped configuration registers.
78-
* \param offset The offset of the register to write to.
79-
* \param val The value to be written to the memory mapped register.
80-
*/
81-
void VTAWriteMappedReg(VTAHandle vta_base, unsigned offset, unsigned val);
82-
83-
/*!
84-
* \brief Reads from the memory mapped configuration register.
85-
* \param vta_base The handle to the memory mapped configuration registers.
86-
* \param offset The offset of the register to read from.
87-
* \return The value read from the memory mapped register.
88-
*/
89-
unsigned VTAReadMappedReg(VTAHandle vta_base, unsigned offset);
96+
void VTAInvalidateCache(vta_phy_addr_t buf, int size);
9097

9198
/*!
9299
* \brief Programming the bit stream on the FPGA.

vta/src/data_buffer.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ struct DataBuffer {
3535
*/
3636
void InvalidateCache(size_t offset, size_t size) {
3737
if (!kBufferCoherent) {
38-
VTAInvalidateCache(reinterpret_cast<void*>(phy_addr_ + offset), size);
38+
VTAInvalidateCache(phy_addr_ + offset, size);
3939
}
4040
}
4141
/*!
@@ -45,7 +45,7 @@ struct DataBuffer {
4545
*/
4646
void FlushCache(size_t offset, size_t size) {
4747
if (!kBufferCoherent) {
48-
VTAFlushCache(reinterpret_cast<void*>(phy_addr_ + offset), size);
48+
VTAFlushCache(phy_addr_ + offset, size);
4949
}
5050
}
5151
/*!

vta/src/pynq/pynq_driver.cc

Lines changed: 110 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
*/
66

77
#include <vta/driver.h>
8+
#include <thread>
89
#include "./pynq_driver.h"
910

1011

@@ -16,16 +17,16 @@ void VTAMemFree(void* buf) {
1617
cma_free(buf);
1718
}
1819

19-
uint32_t VTAGetMemPhysAddr(void* buf) {
20+
vta_phy_addr_t VTAGetMemPhysAddr(void* buf) {
2021
return cma_get_phy_addr(buf);
2122
}
2223

23-
void VTAFlushCache(void* buf, int size) {
24-
xlnkFlushCache(buf, size);
24+
void VTAFlushCache(vta_phy_addr_t buf, int size) {
25+
xlnkFlushCache(reinterpret_cast<void*>(buf), size);
2526
}
2627

27-
void VTAInvalidateCache(void* buf, int size) {
28-
xlnkInvalidateCache(buf, size);
28+
void VTAInvalidateCache(vta_phy_addr_t buf, int size) {
29+
xlnkInvalidateCache(reinterpret_cast<void*>(buf), size);
2930
}
3031

3132
void *VTAMapRegister(uint32_t addr, size_t length) {
@@ -57,33 +58,112 @@ uint32_t VTAReadMappedReg(void* base_addr, uint32_t offset) {
5758
return *((volatile uint32_t *) (reinterpret_cast<char *>(base_addr) + offset));
5859
}
5960

61+
class VTADevice {
62+
public:
63+
VTADevice() {
64+
// VTA stage handles
65+
vta_fetch_handle_ = VTAMapRegister(VTA_FETCH_ADDR, VTA_RANGE);
66+
vta_load_handle_ = VTAMapRegister(VTA_LOAD_ADDR, VTA_RANGE);
67+
vta_compute_handle_ = VTAMapRegister(VTA_COMPUTE_ADDR, VTA_RANGE);
68+
vta_store_handle_ = VTAMapRegister(VTA_STORE_ADDR, VTA_RANGE);
69+
}
70+
71+
~VTADevice() {
72+
// Close VTA stage handle
73+
VTAUnmapRegister(vta_fetch_handle_, VTA_RANGE);
74+
VTAUnmapRegister(vta_load_handle_, VTA_RANGE);
75+
VTAUnmapRegister(vta_compute_handle_, VTA_RANGE);
76+
VTAUnmapRegister(vta_store_handle_, VTA_RANGE);
77+
}
78+
79+
int Run(vta_phy_addr_t insn_phy_addr,
80+
uint32_t insn_count,
81+
uint32_t wait_cycles) {
82+
// NOTE: Register address map is derived from the auto-generated
83+
// driver files available under hardware/build/vivado/<design>/export/driver
84+
// FETCH @ 0x10 : Data signal of insn_count_V
85+
VTAWriteMappedReg(vta_fetch_handle_, 0x10, insn_count);
86+
// FETCH @ 0x18 : Data signal of insns_V
87+
VTAWriteMappedReg(vta_fetch_handle_, 0x18, insn_phy_addr);
88+
// LOAD @ 0x10 : Data signal of inputs_V
89+
VTAWriteMappedReg(vta_load_handle_, 0x10, 0);
90+
// LOAD @ 0x18 : Data signal of weight_V
91+
VTAWriteMappedReg(vta_load_handle_, 0x18, 0);
92+
// COMPUTE @ 0x20 : Data signal of uops_V
93+
VTAWriteMappedReg(vta_compute_handle_, 0x20, 0);
94+
// COMPUTE @ 0x28 : Data signal of biases_V
95+
VTAWriteMappedReg(vta_compute_handle_, 0x28, 0);
96+
// STORE @ 0x10 : Data signal of outputs_V
97+
VTAWriteMappedReg(vta_store_handle_, 0x10, 0);
98+
99+
// VTA start
100+
VTAWriteMappedReg(vta_fetch_handle_, 0x0, VTA_START);
101+
VTAWriteMappedReg(vta_load_handle_, 0x0, VTA_AUTORESTART);
102+
VTAWriteMappedReg(vta_compute_handle_, 0x0, VTA_AUTORESTART);
103+
VTAWriteMappedReg(vta_store_handle_, 0x0, VTA_AUTORESTART);
104+
105+
// Loop until the VTA is done
106+
unsigned t, flag = 0;
107+
for (t = 0; t < wait_cycles; ++t) {
108+
flag = VTAReadMappedReg(vta_compute_handle_, 0x18);
109+
if (flag == VTA_DONE) break;
110+
std::this_thread::yield();
111+
}
112+
// Report error if timeout
113+
return t < wait_cycles ? 0 : 1;
114+
}
115+
116+
private:
117+
// VTA handles (register maps)
118+
void* vta_fetch_handle_{nullptr};
119+
void* vta_load_handle_{nullptr};
120+
void* vta_compute_handle_{nullptr};
121+
void* vta_store_handle_{nullptr};
122+
};
123+
124+
VTADeviceHandle VTADeviceAlloc() {
125+
return new VTADevice();
126+
}
127+
128+
void VTADeviceFree(VTADeviceHandle handle) {
129+
delete static_cast<VTADevice*>(handle);
130+
}
131+
132+
int VTADeviceRun(VTADeviceHandle handle,
133+
vta_phy_addr_t insn_phy_addr,
134+
uint32_t insn_count,
135+
uint32_t wait_cycles) {
136+
return static_cast<VTADevice*>(handle)->Run(
137+
insn_phy_addr, insn_count, wait_cycles);
138+
}
139+
60140
void VTAProgram(const char* bitstream) {
61-
int elem;
62-
FILE *src, *dst, *partial;
63-
partial = fopen(VTA_PYNQ_BS_IS_PARTIAL, "w");
64-
if (partial == NULL) {
65-
printf("Cannot open partial config file %s\n", VTA_PYNQ_BS_IS_PARTIAL);
141+
int elem;
142+
FILE *src, *dst, *partial;
143+
partial = fopen(VTA_PYNQ_BS_IS_PARTIAL, "w");
144+
if (partial == NULL) {
145+
printf("Cannot open partial config file %s\n", VTA_PYNQ_BS_IS_PARTIAL);
66146
fclose(partial);
67147
exit(1);
68-
}
69-
fputc('0', partial);
70-
fclose(partial);
71-
src = fopen(bitstream, "rb");
72-
if (src == NULL) {
73-
printf("Cannot open bitstream %s\n", bitstream);
74-
exit(1);
75-
}
76-
dst = fopen(VTA_PYNQ_BS_XDEVCFG, "wb");
77-
if (dst == NULL) {
78-
printf("Cannot open device file %s\n", VTA_PYNQ_BS_XDEVCFG);
79-
fclose(dst);
80-
exit(1);
81-
}
82-
elem = fgetc(src);
83-
while (elem != EOF) {
84-
fputc(elem, dst);
85-
elem = fgetc(src);
86-
}
87-
fclose(src);
148+
}
149+
fputc('0', partial);
150+
fclose(partial);
151+
src = fopen(bitstream, "rb");
152+
if (src == NULL) {
153+
printf("Cannot open bitstream %s\n", bitstream);
154+
exit(1);
155+
}
156+
dst = fopen(VTA_PYNQ_BS_XDEVCFG, "wb");
157+
if (dst == NULL) {
158+
printf("Cannot open device file %s\n", VTA_PYNQ_BS_XDEVCFG);
88159
fclose(dst);
160+
exit(1);
161+
}
162+
elem = fgetc(src);
163+
while (elem != EOF) {
164+
fputc(elem, dst);
165+
elem = fgetc(src);
166+
}
167+
fclose(src);
168+
fclose(dst);
89169
}

vta/src/pynq/pynq_driver.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@ void xlnkFlushCache(void* buf, int size);
3232
void xlnkInvalidateCache(void* buf, int size);
3333
#endif
3434

35+
void *VTAMapRegister(uint32_t addr, size_t length);
36+
void VTAUnmapRegister(void *vta, size_t length);
37+
void VTAWriteMappedReg(void* base_addr, uint32_t offset, uint32_t val);
38+
uint32_t VTAReadMappedReg(void* base_addr, uint32_t offset);
39+
3540
/*! \brief (Pynq only) Partial bitstream status file path */
3641
#define VTA_PYNQ_BS_IS_PARTIAL "/sys/devices/soc0/amba/f8007000.devcfg/is_partial_bitstream"
3742
/*! \brief (Pynq only) Bitstream destination file path */
@@ -44,9 +49,6 @@ void xlnkInvalidateCache(void* buf, int size);
4449
/*! \brief (Pynq only) MMIO driver constant */
4550
#define VTA_PYNQ_MMIO_WORD_MASK (~(MMIO_WORD_LENGTH - 1))
4651

47-
/*! \brief Physically contiguous buffer size limit */
48-
#define VTA_MAX_XFER (1<<22)
49-
5052
/*! \brief VTA configuration register address range */
5153
#define VTA_RANGE 0x100
5254
/*! \brief VTA configuration register start value */

0 commit comments

Comments
 (0)