forked from dillonhuff/clockwork
-
Notifications
You must be signed in to change notification settings - Fork 0
/
complex_mem_pond_compute.h
142 lines (111 loc) · 9.49 KB
/
complex_mem_pond_compute.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#pragma once
#include "hw_classes.h"
#include "clockwork_standard_compute_units.h"
//store is: input_cgra.stencil(input_cgra_s0_zz, input_cgra_s0_z, input_cgra_s0_x, input_cgra_s0_y) = input_host.stencil(((input_cgra_s0_z*4) + input_cgra_s0_zz), input_cgra_s0_x, input_cgra_s0_y)
hw_uint<16> hcompute_input_cgra_stencil(hw_uint<16>& input_host_stencil) {
uint16_t _input_host_stencil_1 = (uint16_t) input_host_stencil.extract<0, 15>();
return _input_host_stencil_1;
}
//store is: kernel_cgra.stencil(kernel_cgra_s0_z, (kernel_cgra_s0_w + 45), kernel_cgra_s0_x, kernel_cgra_s0_y) = kernel_host.stencil(kernel_cgra_s0_z, kernel_cgra_s0_w, kernel_cgra_s0_x, kernel_cgra_s0_y)
hw_uint<16> hcompute_kernel_cgra_stencil(hw_uint<16>& kernel_host_stencil) {
uint16_t _kernel_host_stencil_1 = (uint16_t) kernel_host_stencil.extract<0, 15>();
return _kernel_host_stencil_1;
}
//store is: output_pond.stencil(0, 0, output_pond_s0_w_w_unroll) = (uint16)0
hw_uint<16> hcompute_output_pond_stencil() {
uint16_t _676 = (uint16_t)(0);
return _676;
}
//store is: input_pond.stencil(input_pond_s0_zz, input_pond_s0_z, ((input_pond_s0_x + output_cgra_s0_x) - output_cgra_s0_x), ((input_pond_s0_y + output_cgra_s0_y) - output_cgra_s0_y)) = input_cgra.stencil(input_pond_s0_zz, input_pond_s0_z, (input_pond_s0_x + output_cgra_s0_x), (input_pond_s0_y + output_cgra_s0_y))
hw_uint<16> hcompute_input_pond_stencil(hw_uint<16>& input_cgra_stencil) {
uint16_t _input_cgra_stencil_1 = (uint16_t) input_cgra_stencil.extract<0, 15>();
return _input_cgra_stencil_1;
}
//store is: kernel_pond.stencil(kernel_pond_s0_zz, kernel_pond_s0_z, kernel_pond_s0_w_w_cgra, kernel_pond_s0_x, kernel_pond_s0_y) = kernel_cgra.stencil(((kernel_pond_s0_z*4) + kernel_pond_s0_zz), (((output_cgra_s0_w_w*3) + kernel_pond_s0_w_w_cgra) + 45), kernel_pond_s0_x, kernel_pond_s0_y)
hw_uint<16> hcompute_kernel_pond_stencil(hw_uint<16>& kernel_cgra_stencil) {
uint16_t _kernel_cgra_stencil_1 = (uint16_t) kernel_cgra_stencil.extract<0, 15>();
return _kernel_cgra_stencil_1;
}
//store is: output_pond.stencil(0, 0, 0) = ((kernel_pond.stencil(0, output_pond_s1_r_z, 0, output_pond_s1_r_x, output_pond_s1_r_y)*input_pond.stencil(0, output_pond_s1_r_z, output_pond_s1_r_x, output_pond_s1_r_y)) + ((kernel_pond.stencil(1, output_pond_s1_r_z, 0, output_pond_s1_r_x, output_pond_s1_r_y)*input_pond.stencil(1, output_pond_s1_r_z, output_pond_s1_r_x, output_pond_s1_r_y)) + ((kernel_pond.stencil(2, output_pond_s1_r_z, 0, output_pond_s1_r_x, output_pond_s1_r_y)*input_pond.stencil(2, output_pond_s1_r_z, output_pond_s1_r_x, output_pond_s1_r_y)) + (output_pond.stencil(0, 0, 0) + (kernel_pond.stencil(3, output_pond_s1_r_z, 0, output_pond_s1_r_x, output_pond_s1_r_y)*input_pond.stencil(3, output_pond_s1_r_z, output_pond_s1_r_x, output_pond_s1_r_y))))))
hw_uint<16> hcompute_output_pond_stencil_1(hw_uint<64>& input_pond_stencil, hw_uint<64>& kernel_pond_stencil, hw_uint<16>& output_pond_stencil) {
uint16_t _input_pond_stencil_1 = (uint16_t) input_pond_stencil.extract<0, 15>();
uint16_t _input_pond_stencil_2 = (uint16_t) input_pond_stencil.extract<16, 31>();
uint16_t _input_pond_stencil_3 = (uint16_t) input_pond_stencil.extract<32, 47>();
uint16_t _input_pond_stencil_4 = (uint16_t) input_pond_stencil.extract<48, 63>();
uint16_t _kernel_pond_stencil_1 = (uint16_t) kernel_pond_stencil.extract<0, 15>();
uint16_t _kernel_pond_stencil_2 = (uint16_t) kernel_pond_stencil.extract<16, 31>();
uint16_t _kernel_pond_stencil_3 = (uint16_t) kernel_pond_stencil.extract<32, 47>();
uint16_t _kernel_pond_stencil_4 = (uint16_t) kernel_pond_stencil.extract<48, 63>();
uint16_t _output_pond_stencil_1 = (uint16_t) output_pond_stencil.extract<0, 15>();
uint16_t _690 = _kernel_pond_stencil_1 * _input_pond_stencil_1;
uint16_t _691 = _kernel_pond_stencil_2 * _input_pond_stencil_2;
uint16_t _692 = _kernel_pond_stencil_3 * _input_pond_stencil_3;
uint16_t _693 = _kernel_pond_stencil_4 * _input_pond_stencil_4;
uint16_t _694 = _output_pond_stencil_1 + _693;
uint16_t _695 = _692 + _694;
uint16_t _696 = _691 + _695;
uint16_t _697 = _690 + _696;
return _697;
}
//store is: output_pond.stencil(0, 0, 1) = ((kernel_pond.stencil(0, output_pond_s1_r_z, 1, output_pond_s1_r_x, output_pond_s1_r_y)*input_pond.stencil(0, output_pond_s1_r_z, output_pond_s1_r_x, output_pond_s1_r_y)) + ((kernel_pond.stencil(1, output_pond_s1_r_z, 1, output_pond_s1_r_x, output_pond_s1_r_y)*input_pond.stencil(1, output_pond_s1_r_z, output_pond_s1_r_x, output_pond_s1_r_y)) + ((kernel_pond.stencil(2, output_pond_s1_r_z, 1, output_pond_s1_r_x, output_pond_s1_r_y)*input_pond.stencil(2, output_pond_s1_r_z, output_pond_s1_r_x, output_pond_s1_r_y)) + (output_pond.stencil(0, 0, 1) + (kernel_pond.stencil(3, output_pond_s1_r_z, 1, output_pond_s1_r_x, output_pond_s1_r_y)*input_pond.stencil(3, output_pond_s1_r_z, output_pond_s1_r_x, output_pond_s1_r_y))))))
hw_uint<16> hcompute_output_pond_stencil_2(hw_uint<64>& input_pond_stencil, hw_uint<64>& kernel_pond_stencil, hw_uint<16>& output_pond_stencil) {
uint16_t _input_pond_stencil_5 = (uint16_t) input_pond_stencil.extract<0, 15>();
uint16_t _input_pond_stencil_6 = (uint16_t) input_pond_stencil.extract<16, 31>();
uint16_t _input_pond_stencil_7 = (uint16_t) input_pond_stencil.extract<32, 47>();
uint16_t _input_pond_stencil_8 = (uint16_t) input_pond_stencil.extract<48, 63>();
uint16_t _kernel_pond_stencil_5 = (uint16_t) kernel_pond_stencil.extract<0, 15>();
uint16_t _kernel_pond_stencil_6 = (uint16_t) kernel_pond_stencil.extract<16, 31>();
uint16_t _kernel_pond_stencil_7 = (uint16_t) kernel_pond_stencil.extract<32, 47>();
uint16_t _kernel_pond_stencil_8 = (uint16_t) kernel_pond_stencil.extract<48, 63>();
uint16_t _output_pond_stencil_2 = (uint16_t) output_pond_stencil.extract<0, 15>();
uint16_t _723 = _kernel_pond_stencil_5 * _input_pond_stencil_5;
uint16_t _724 = _kernel_pond_stencil_6 * _input_pond_stencil_6;
uint16_t _725 = _kernel_pond_stencil_7 * _input_pond_stencil_7;
uint16_t _726 = _kernel_pond_stencil_8 * _input_pond_stencil_8;
uint16_t _727 = _output_pond_stencil_2 + _726;
uint16_t _728 = _725 + _727;
uint16_t _729 = _724 + _728;
uint16_t _730 = _723 + _729;
return _730;
}
//store is: output_pond.stencil(0, 0, 2) = ((kernel_pond.stencil(0, output_pond_s1_r_z, 2, output_pond_s1_r_x, output_pond_s1_r_y)*input_pond.stencil(0, output_pond_s1_r_z, output_pond_s1_r_x, output_pond_s1_r_y)) + ((kernel_pond.stencil(1, output_pond_s1_r_z, 2, output_pond_s1_r_x, output_pond_s1_r_y)*input_pond.stencil(1, output_pond_s1_r_z, output_pond_s1_r_x, output_pond_s1_r_y)) + ((kernel_pond.stencil(2, output_pond_s1_r_z, 2, output_pond_s1_r_x, output_pond_s1_r_y)*input_pond.stencil(2, output_pond_s1_r_z, output_pond_s1_r_x, output_pond_s1_r_y)) + (output_pond.stencil(0, 0, 2) + (kernel_pond.stencil(3, output_pond_s1_r_z, 2, output_pond_s1_r_x, output_pond_s1_r_y)*input_pond.stencil(3, output_pond_s1_r_z, output_pond_s1_r_x, output_pond_s1_r_y))))))
hw_uint<16> hcompute_output_pond_stencil_3(hw_uint<64>& input_pond_stencil, hw_uint<64>& kernel_pond_stencil, hw_uint<16>& output_pond_stencil) {
uint16_t _input_pond_stencil_10 = (uint16_t) input_pond_stencil.extract<0, 15>();
uint16_t _input_pond_stencil_11 = (uint16_t) input_pond_stencil.extract<16, 31>();
uint16_t _input_pond_stencil_12 = (uint16_t) input_pond_stencil.extract<32, 47>();
uint16_t _input_pond_stencil_9 = (uint16_t) input_pond_stencil.extract<48, 63>();
uint16_t _kernel_pond_stencil_10 = (uint16_t) kernel_pond_stencil.extract<0, 15>();
uint16_t _kernel_pond_stencil_11 = (uint16_t) kernel_pond_stencil.extract<16, 31>();
uint16_t _kernel_pond_stencil_12 = (uint16_t) kernel_pond_stencil.extract<32, 47>();
uint16_t _kernel_pond_stencil_9 = (uint16_t) kernel_pond_stencil.extract<48, 63>();
uint16_t _output_pond_stencil_3 = (uint16_t) output_pond_stencil.extract<0, 15>();
uint16_t _756 = _kernel_pond_stencil_9 * _input_pond_stencil_9;
uint16_t _757 = _kernel_pond_stencil_10 * _input_pond_stencil_10;
uint16_t _758 = _kernel_pond_stencil_11 * _input_pond_stencil_11;
uint16_t _759 = _kernel_pond_stencil_12 * _input_pond_stencil_12;
uint16_t _760 = _output_pond_stencil_3 + _759;
uint16_t _761 = _758 + _760;
uint16_t _762 = _757 + _761;
uint16_t _763 = _756 + _762;
return _763;
}
//store is: output_cgra.stencil(output_cgra_s0_x, output_cgra_s0_y, (output_cgra_s0_w_w*3)) = output_pond.stencil(0, 0, 0)
hw_uint<16> hcompute_output_cgra_stencil(hw_uint<16>& output_pond_stencil) {
uint16_t _output_pond_stencil_4 = (uint16_t) output_pond_stencil.extract<0, 15>();
return _output_pond_stencil_4;
}
//store is: output_cgra.stencil(output_cgra_s0_x, output_cgra_s0_y, ((output_cgra_s0_w_w*3) + 1)) = output_pond.stencil(0, 0, 1)
hw_uint<16> hcompute_output_cgra_stencil_1(hw_uint<16>& output_pond_stencil) {
uint16_t _output_pond_stencil_5 = (uint16_t) output_pond_stencil.extract<0, 15>();
return _output_pond_stencil_5;
}
//store is: output_cgra.stencil(output_cgra_s0_x, output_cgra_s0_y, ((output_cgra_s0_w_w*3) + 2)) = output_pond.stencil(0, 0, 2)
hw_uint<16> hcompute_output_cgra_stencil_2(hw_uint<16>& output_pond_stencil) {
uint16_t _output_pond_stencil_6 = (uint16_t) output_pond_stencil.extract<0, 15>();
return _output_pond_stencil_6;
}
//store is: hw_output.stencil(hw_output_s0_x_xi, hw_output_s0_y_yi, hw_output_s0_w) = output_cgra.stencil(hw_output_s0_x_xi, hw_output_s0_y_yi, hw_output_s0_w)
hw_uint<16> hcompute_hw_output_stencil(hw_uint<16>& output_cgra_stencil) {
uint16_t _output_cgra_stencil_1 = (uint16_t) output_cgra_stencil.extract<0, 15>();
return _output_cgra_stencil_1;
}