Skip to content

Commit 6327e70

Browse files
committed
add yolov2 i16 128b support
1 parent 49452bd commit 6327e70

File tree

1,552 files changed

+43464
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,552 files changed

+43464
-0
lines changed

SDK/src_int16_128b/acc_i16c.cpp

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
2+
#include "acc_i16c.h"
3+
4+
void copy_mem2dev(uint8_t *orig,uint32_t byte_num, unsigned long in_buffer)
5+
{
6+
int fd = open("/dev/mem", O_RDWR);
7+
unsigned char *virtual_addr;
8+
uint32_t RequestByteNum;// must page
9+
if(byte_num%(HPAGESIZE)==0)
10+
RequestByteNum = byte_num;
11+
else
12+
{
13+
RequestByteNum = ceil(byte_num/(HPAGESIZE*1.0))*(HPAGESIZE);
14+
}
15+
virtual_addr = (unsigned char *)mmap(NULL, RequestByteNum, PROT_READ | PROT_WRITE, MAP_SHARED, fd, (off_t)in_buffer);
16+
if(virtual_addr == MAP_FAILED)
17+
{
18+
perror("Virtual_addr_in mappong for absolute memory access failed!\n");
19+
return;
20+
}
21+
memcpy(virtual_addr,orig,byte_num);
22+
23+
munmap((void *)virtual_addr, byte_num);
24+
close(fd);
25+
}
26+
27+
void copy_dev2mem(uint8_t *dst,uint32_t byte_num, unsigned long in_buffer)
28+
{
29+
int fd = open("/dev/mem", O_RDWR);
30+
unsigned char *virtual_addr;
31+
uint32_t RequestByteNum;// must page
32+
if(byte_num%(HPAGESIZE)==0)
33+
RequestByteNum = byte_num;
34+
else
35+
{
36+
RequestByteNum = ceil(byte_num/(HPAGESIZE*1.0))*(HPAGESIZE);
37+
}
38+
virtual_addr = (unsigned char *)mmap(NULL, RequestByteNum, PROT_READ | PROT_WRITE, MAP_SHARED, fd, (off_t)in_buffer);
39+
if(virtual_addr == MAP_FAILED)
40+
{
41+
perror("Virtual_addr_in mappong for absolute memory access failed!\n");
42+
return;
43+
}
44+
printf("copy start-----byte_num=%d\n",byte_num);
45+
memcpy((uint8_t *)dst,virtual_addr,byte_num);
46+
printf("copy ok!\n");
47+
48+
munmap((void *)virtual_addr, byte_num);
49+
close(fd);
50+
}
51+
52+
int copy_file2mem(char *bin_file,uint32_t byte_num,unsigned long in_buffer)
53+
{
54+
unsigned char *buffer = (unsigned char *)malloc(HPAGESIZE);
55+
if(buffer==NULL){
56+
printf("cannot malloc buffer %d byte\n", HPAGESIZE);
57+
return -1;
58+
}
59+
printf("Total Byte Num = %d\n Address 0x%X\n", byte_num, in_buffer);
60+
FILE *fp;
61+
if( (fp = fopen(bin_file, "rb")) == NULL)fprintf(stderr,"CANNOT OPEN bin_file\n");
62+
int rd_num;
63+
unsigned long offset = 0;
64+
while(rd_num = fread(buffer, sizeof(unsigned char), HPAGESIZE, fp))
65+
{
66+
if(rd_num < HPAGESIZE)
67+
rd_num = HPAGESIZE;
68+
copy_mem2dev(buffer,rd_num, in_buffer+offset);
69+
// printf("rd_num=%d, offset=%d\n", rd_num, offset);
70+
offset += rd_num;
71+
}
72+
printf("copy_file2mem offset=%d\n",offset);
73+
fclose(fp);
74+
75+
free(buffer);
76+
77+
78+
return 0;
79+
}
80+
81+
int copy_mem2file(char *bin_file,uint32_t byte_num,unsigned long in_buffer)
82+
{
83+
void *buffer = malloc(HPAGESIZE);
84+
if(buffer==NULL){
85+
printf("cannot malloc buffer %d byte\n", HPAGESIZE);
86+
return -1;
87+
}
88+
89+
FILE *fp;
90+
if( (fp = fopen(bin_file, "wb")) == NULL)fprintf(stderr,"CANNOT OPEN bin_file\n");
91+
92+
int x = byte_num;
93+
int addbyte;
94+
unsigned long offset = 0;
95+
while(addbyte=((x<HPAGESIZE)?x:(HPAGESIZE)))
96+
{
97+
copy_dev2mem((uint8_t *)buffer,addbyte, in_buffer+offset);
98+
fwrite(buffer , sizeof(unsigned char), addbyte, fp);
99+
x -= addbyte;
100+
offset += addbyte;
101+
}
102+
printf("copy_mem2file offset=%d\n",offset);
103+
104+
105+
fclose(fp);
106+
107+
free(buffer);
108+
109+
return 0;
110+
}
111+
112+
double what_time_is_it_now()
113+
{
114+
struct timeval time;
115+
if (gettimeofday(&time,NULL)){
116+
return 0;
117+
}
118+
return (double)time.tv_sec + (double)time.tv_usec * .000001;
119+
}
120+
121+
int FPGA_Acc(unsigned int ifm_addr, unsigned int ofm_addr, unsigned int weight_offset, unsigned int bias_offset, uint32_t k_s_pad_ltype, uint32_t iofm_num, uint32_t ifm_w_h, uint32_t ofm_w_h,
122+
uint32_t TRTC, uint32_t TMTN, int32_t NToy, int32_t NTox, int32_t NTof, int32_t NTcomb, int32_t NTif, uint8_t lmode, int32_t NTcomb_l, int16_t pad_val, int16_t div_kk,
123+
uint32_t IHW, uint32_t OHW, uint32_t KK_INumxKK, uint32_t en_bits, uint32_t weightQ, uint32_t biasQ, uint32_t ifmQ, uint32_t ofmQ, uint32_t avgQ, uint32_t interQ)//enable_bits[2:0]={IsReLU, LoadBias, IsNotConv}
124+
{
125+
unsigned int ap_idle;
126+
unsigned int ap_done;
127+
128+
unsigned long int PhysicalAddress = YOLO2_BASEADDR;
129+
int map_len = 0x120;
130+
int fd = open("/dev/mem", O_RDWR);
131+
132+
unsigned char *xbase_address;
133+
xbase_address = (unsigned char *)mmap(NULL, map_len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, (off_t)PhysicalAddress);
134+
if(xbase_address == MAP_FAILED)
135+
{
136+
perror("1:Init Mapping memory for absolute memory access failed.\n");
137+
return -1;
138+
}
139+
140+
while(1)
141+
{
142+
ap_idle = ((ReadReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_AP_CTRL) >> 2) && 0x1);
143+
if(ap_idle)
144+
break;
145+
}
146+
147+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_IFM_V_DATA, ifm_addr);
148+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_OFM_V_DATA, ofm_addr);
149+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_WEIGHT_V_DATA, WEIGHT_BASE + weight_offset*2);
150+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_BIAS_V_DATA, BETA_BASE + bias_offset*2);
151+
152+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_K_S_PAD_LTYPE_DATA, k_s_pad_ltype);
153+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_IOFM_NUM_DATA, iofm_num);
154+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_IFM_W_H_DATA, ifm_w_h);
155+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_OFM_W_H_DATA, ofm_w_h);
156+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_TRTC_DATA, TRTC);
157+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_TMTN_DATA, TMTN);
158+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_NTOY_DATA, NToy);
159+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_NTOX_DATA, NTox);
160+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_NTOF_DATA, NTof);
161+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_NTCOMB_DATA, NTcomb);
162+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_NTIF_DATA, NTif);
163+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_LMODE_DATA, lmode);
164+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_NTCOMB_L_DATA, NTcomb_l);
165+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_PAD_VAL_DATA, pad_val);
166+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_DIV_KK_DATA, div_kk);
167+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_IHW_DATA, IHW);
168+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_OHW_DATA, OHW);
169+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_KK_INUMXKK_DATA, KK_INumxKK);
170+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_EN_BITS_DATA, en_bits);
171+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_WEIGHTQ_DATA, weightQ);
172+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_BIASQ_DATA, biasQ);
173+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_IFMQ_DATA, ifmQ);
174+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_OFMQ_DATA, ofmQ);
175+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_AVGQ_DATA, avgQ);
176+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_INTERQ_DATA, interQ);
177+
178+
// double time1,time2;
179+
// time1 = what_time_is_it_now();
180+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_GIE, 0x0);
181+
WriteReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_AP_CTRL, 0x1);//Start
182+
while(1)
183+
{
184+
ap_done = ((ReadReg(xbase_address, XFPGA_ACC_CTRL_BUS_ADDR_AP_CTRL) >> 1) && 0x1);
185+
if(ap_done)
186+
break;
187+
}
188+
// time2 = what_time_is_it_now();
189+
// printf("START TO DONE in %f seconds.\n",time2 - time1);
190+
191+
munmap((void *)xbase_address, map_len);
192+
close(fd);
193+
194+
return 0;
195+
}

SDK/src_int16_128b/acc_i16c.h

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
2+
#include <stdio.h>
3+
#include <stdlib.h>
4+
#include <sys/mman.h>
5+
#include <fcntl.h>
6+
#include <sys/ioctl.h>
7+
#include <unistd.h>
8+
#include <linux/fb.h>
9+
#include <string.h>
10+
#include <time.h>
11+
#include <stdint.h>
12+
#include <sys/time.h>
13+
#include <assert.h>
14+
#include <math.h>
15+
16+
#define XFPGA_ACC_CTRL_BUS_ADDR_AP_CTRL 0x00
17+
#define XFPGA_ACC_CTRL_BUS_ADDR_GIE 0x04
18+
#define XFPGA_ACC_CTRL_BUS_ADDR_IER 0x08
19+
#define XFPGA_ACC_CTRL_BUS_ADDR_ISR 0x0c
20+
#define XFPGA_ACC_CTRL_BUS_ADDR_IFM_V_DATA 0x10
21+
#define XFPGA_ACC_CTRL_BUS_BITS_IFM_V_DATA 32
22+
#define XFPGA_ACC_CTRL_BUS_ADDR_OFM_V_DATA 0x18
23+
#define XFPGA_ACC_CTRL_BUS_BITS_OFM_V_DATA 32
24+
#define XFPGA_ACC_CTRL_BUS_ADDR_WEIGHT_V_DATA 0x20
25+
#define XFPGA_ACC_CTRL_BUS_BITS_WEIGHT_V_DATA 32
26+
#define XFPGA_ACC_CTRL_BUS_ADDR_BIAS_V_DATA 0x28
27+
#define XFPGA_ACC_CTRL_BUS_BITS_BIAS_V_DATA 32
28+
#define XFPGA_ACC_CTRL_BUS_ADDR_K_S_PAD_LTYPE_DATA 0x30
29+
#define XFPGA_ACC_CTRL_BUS_BITS_K_S_PAD_LTYPE_DATA 32
30+
#define XFPGA_ACC_CTRL_BUS_ADDR_IOFM_NUM_DATA 0x38
31+
#define XFPGA_ACC_CTRL_BUS_BITS_IOFM_NUM_DATA 32
32+
#define XFPGA_ACC_CTRL_BUS_ADDR_IFM_W_H_DATA 0x40
33+
#define XFPGA_ACC_CTRL_BUS_BITS_IFM_W_H_DATA 32
34+
#define XFPGA_ACC_CTRL_BUS_ADDR_OFM_W_H_DATA 0x48
35+
#define XFPGA_ACC_CTRL_BUS_BITS_OFM_W_H_DATA 32
36+
#define XFPGA_ACC_CTRL_BUS_ADDR_TRTC_DATA 0x50
37+
#define XFPGA_ACC_CTRL_BUS_BITS_TRTC_DATA 32
38+
#define XFPGA_ACC_CTRL_BUS_ADDR_TMTN_DATA 0x58
39+
#define XFPGA_ACC_CTRL_BUS_BITS_TMTN_DATA 32
40+
#define XFPGA_ACC_CTRL_BUS_ADDR_NTOY_DATA 0x60
41+
#define XFPGA_ACC_CTRL_BUS_BITS_NTOY_DATA 32
42+
#define XFPGA_ACC_CTRL_BUS_ADDR_NTOX_DATA 0x68
43+
#define XFPGA_ACC_CTRL_BUS_BITS_NTOX_DATA 32
44+
#define XFPGA_ACC_CTRL_BUS_ADDR_NTOF_DATA 0x70
45+
#define XFPGA_ACC_CTRL_BUS_BITS_NTOF_DATA 32
46+
#define XFPGA_ACC_CTRL_BUS_ADDR_NTCOMB_DATA 0x78
47+
#define XFPGA_ACC_CTRL_BUS_BITS_NTCOMB_DATA 32
48+
#define XFPGA_ACC_CTRL_BUS_ADDR_NTIF_DATA 0x80
49+
#define XFPGA_ACC_CTRL_BUS_BITS_NTIF_DATA 32
50+
#define XFPGA_ACC_CTRL_BUS_ADDR_LMODE_DATA 0x88
51+
#define XFPGA_ACC_CTRL_BUS_BITS_LMODE_DATA 8
52+
#define XFPGA_ACC_CTRL_BUS_ADDR_NTCOMB_L_DATA 0x90
53+
#define XFPGA_ACC_CTRL_BUS_BITS_NTCOMB_L_DATA 32
54+
#define XFPGA_ACC_CTRL_BUS_ADDR_PAD_VAL_DATA 0x98
55+
#define XFPGA_ACC_CTRL_BUS_BITS_PAD_VAL_DATA 16
56+
#define XFPGA_ACC_CTRL_BUS_ADDR_DIV_KK_DATA 0xa0
57+
#define XFPGA_ACC_CTRL_BUS_BITS_DIV_KK_DATA 16
58+
#define XFPGA_ACC_CTRL_BUS_ADDR_IHW_DATA 0xa8
59+
#define XFPGA_ACC_CTRL_BUS_BITS_IHW_DATA 32
60+
#define XFPGA_ACC_CTRL_BUS_ADDR_OHW_DATA 0xb0
61+
#define XFPGA_ACC_CTRL_BUS_BITS_OHW_DATA 32
62+
#define XFPGA_ACC_CTRL_BUS_ADDR_KK_INUMXKK_DATA 0xb8
63+
#define XFPGA_ACC_CTRL_BUS_BITS_KK_INUMXKK_DATA 32
64+
#define XFPGA_ACC_CTRL_BUS_ADDR_EN_BITS_DATA 0xc0
65+
#define XFPGA_ACC_CTRL_BUS_BITS_EN_BITS_DATA 32
66+
#define XFPGA_ACC_CTRL_BUS_ADDR_WEIGHTQ_DATA 0xc8
67+
#define XFPGA_ACC_CTRL_BUS_BITS_WEIGHTQ_DATA 32
68+
#define XFPGA_ACC_CTRL_BUS_ADDR_BIASQ_DATA 0xd0
69+
#define XFPGA_ACC_CTRL_BUS_BITS_BIASQ_DATA 32
70+
#define XFPGA_ACC_CTRL_BUS_ADDR_IFMQ_DATA 0xd8
71+
#define XFPGA_ACC_CTRL_BUS_BITS_IFMQ_DATA 32
72+
#define XFPGA_ACC_CTRL_BUS_ADDR_OFMQ_DATA 0xe0
73+
#define XFPGA_ACC_CTRL_BUS_BITS_OFMQ_DATA 32
74+
#define XFPGA_ACC_CTRL_BUS_ADDR_AVGQ_DATA 0xe8
75+
#define XFPGA_ACC_CTRL_BUS_BITS_AVGQ_DATA 32
76+
#define XFPGA_ACC_CTRL_BUS_ADDR_INTERQ_DATA 0xf0
77+
#define XFPGA_ACC_CTRL_BUS_BITS_INTERQ_DATA 32
78+
79+
#define YOLO2_BASEADDR 0xA0000000
80+
#define WEIGHT_BASE (0x60000000) //203779456 = C25 6D80
81+
#define BETA_BASE (0x6C25F000) //43056 = 0xA830
82+
#define MEM_BASE (0x6C400000) //416*416*32*4+208*208*32*4=173,056+43,264= 216,320*128 = 0x1A6_8000
83+
84+
#define MIN_diy(x,y) ((x) < (y) ? (x) : (y))
85+
#define MAX_diy(x,y) ((x) > (y) ? (x) : (y))
86+
87+
#define FALSE 0
88+
#define TRUE 1
89+
90+
#define VALID 0
91+
#define SAME 1
92+
93+
#define LT_DCONV 0
94+
#define LT_CONV 1
95+
#define LT_AVGPOOL 2
96+
#define LT_MAXPOOL 3
97+
98+
#define MIN_NEG (-1024*1024)
99+
// #define MIN_NEG_INT16 (0x8000)
100+
#define MIN_NEG_INT32 (0x80000000)
101+
102+
#define HW_S 2
103+
#define K 3
104+
#define Tn 8
105+
#define Tm 24
106+
#define Tr 26
107+
#define Tc 26
108+
#define MAX_BETA_LENGTH 1024
109+
#define LANE_NUM 8
110+
#define EXTRA_BIT 10
111+
#define INTERQ_MAX 16
112+
#define EXTRA_BIT 10
113+
114+
#define OnChipIB_Width ((Tc-1)*HW_S+K)
115+
#define OnChipIB_Height ((Tr-1)*HW_S+K)
116+
117+
#define PRAGMA_SUB(x) _Pragma (#x)
118+
#define DO_PRAGMA(x) PRAGMA_SUB(x)
119+
120+
const uint32_t IB_W = OnChipIB_Width;
121+
const uint32_t IB_H = OnChipIB_Height;
122+
const uint32_t IB_HxW = IB_H*IB_W;
123+
const uint32_t TnxIB_H = Tn*IB_H;
124+
const uint32_t TnxIB_HxIB_W = Tn*IB_H*IB_W;
125+
const uint32_t TrxTc = Tr*Tc;
126+
const uint32_t Tmax_dx = (Tm+LANE_NUM-1)/LANE_NUM;
127+
const uint32_t Tnax_dx = (Tn+LANE_NUM-1)/LANE_NUM;
128+
129+
#define WriteReg(BaseAddress, RegOffset, Data) *(volatile unsigned int*)((BaseAddress) + (RegOffset)) = (Data)
130+
#define ReadReg(BaseAddress, RegOffset) *(volatile unsigned int*)((BaseAddress) + (RegOffset))
131+
132+
#define HPAGESIZE (4*1024)
133+
134+
void copy_mem2dev(uint8_t *orig,uint32_t byte_num, unsigned long in_buffer);
135+
136+
void copy_dev2mem(uint8_t *dst,uint32_t byte_num, unsigned long in_buffer);
137+
138+
int copy_file2mem(char *bin_file,uint32_t byte_num,unsigned long in_buffer);
139+
140+
int copy_mem2file(char *bin_file,uint32_t byte_num,unsigned long in_buffer);
141+
142+
double what_time_is_it_now();
143+
//typedef ap_uint<16*LANE_NUM> DT_IO;
144+
145+
int FPGA_Acc(unsigned int ifm_addr, unsigned int ofm_addr, unsigned int weight_offset, unsigned int bias_offset, uint32_t k_s_pad_ltype, uint32_t iofm_num, uint32_t ifm_w_h, uint32_t ofm_w_h,
146+
uint32_t TRTC, uint32_t TMTN, int32_t NToy, int32_t NTox, int32_t NTof, int32_t NTcomb, int32_t NTif, uint8_t lmode, int32_t NTcomb_l, int16_t pad_val, int16_t div_kk,
147+
uint32_t IHW, uint32_t OHW, uint32_t KK_INumxKK, uint32_t en_bits, uint32_t weightQ, uint32_t biasQ, uint32_t ifmQ, uint32_t ofmQ, uint32_t avgQ, uint32_t interQ);//enable_bits[2:0]={IsReLU, LoadBias, IsNotConv}

0 commit comments

Comments
 (0)