forked from dillonhuff/clockwork
-
Notifications
You must be signed in to change notification settings - Fork 0
/
gaussian_unroll.cpp
256 lines (217 loc) · 29.9 KB
/
gaussian_unroll.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
#include "example_progs.h"
prog gaussian_unroll() {
prog prg;
prg.compute_unit_file = "gaussian_unroll_compute.h";
prg.name = "gaussian_unroll";
// Stencil<uint16_t, 5844, 3962> &hw_input_stencil = arg_1;
prg.add_input("hw_input_stencil");
prg.buffer_port_widths["hw_input_stencil"] = 16;
// Stencil<uint16_t, 5842, 3960> &hw_output_global_wrapper_stencil = arg_3;
prg.add_output("hw_output_global_wrapper_stencil");
prg.buffer_port_widths["hw_output_global_wrapper_stencil"] = 16;
////producing hw_input_global_wrapper.glb.stencil
auto hw_input_global_wrapper_s0_y = prg.add_loop("hw_input_global_wrapper_s0_y", 0, 200);
auto hw_input_global_wrapper_s0_x_x = hw_input_global_wrapper_s0_y->add_loop("hw_input_global_wrapper_s0_x_x", 0, 64);
//store is: hw_input_global_wrapper.glb.stencil((hw_input_global_wrapper_s0_x_x*4), hw_input_global_wrapper_s0_y) = hw_input.stencil((hw_input_global_wrapper_s0_x_x*4), hw_input_global_wrapper_s0_y)
auto hcompute_hw_input_global_wrapper_glb_stencil = hw_input_global_wrapper_s0_x_x->add_op("op_hcompute_hw_input_global_wrapper_glb_stencil");
hcompute_hw_input_global_wrapper_glb_stencil->add_function("hcompute_hw_input_global_wrapper_glb_stencil");
hcompute_hw_input_global_wrapper_glb_stencil->add_load("hw_input_stencil", "hw_input_global_wrapper_s0_y", "(hw_input_global_wrapper_s0_x_x*4)");
prg.buffer_port_widths["hw_input_global_wrapper_glb_stencil"] = 16;
hcompute_hw_input_global_wrapper_glb_stencil->add_store("hw_input_global_wrapper_glb_stencil", "hw_input_global_wrapper_s0_y", "(hw_input_global_wrapper_s0_x_x*4)");
//store is: hw_input_global_wrapper.glb.stencil(((hw_input_global_wrapper_s0_x_x*4) + 1), hw_input_global_wrapper_s0_y) = hw_input.stencil(((hw_input_global_wrapper_s0_x_x*4) + 1), hw_input_global_wrapper_s0_y)
auto hcompute_hw_input_global_wrapper_glb_stencil_1 = hw_input_global_wrapper_s0_x_x->add_op("op_hcompute_hw_input_global_wrapper_glb_stencil_1");
hcompute_hw_input_global_wrapper_glb_stencil_1->add_function("hcompute_hw_input_global_wrapper_glb_stencil_1");
hcompute_hw_input_global_wrapper_glb_stencil_1->add_load("hw_input_stencil", "hw_input_global_wrapper_s0_y", "((hw_input_global_wrapper_s0_x_x*4) + 1)");
hcompute_hw_input_global_wrapper_glb_stencil_1->add_store("hw_input_global_wrapper_glb_stencil", "hw_input_global_wrapper_s0_y", "((hw_input_global_wrapper_s0_x_x*4) + 1)");
//store is: hw_input_global_wrapper.glb.stencil(((hw_input_global_wrapper_s0_x_x*4) + 2), hw_input_global_wrapper_s0_y) = hw_input.stencil(((hw_input_global_wrapper_s0_x_x*4) + 2), hw_input_global_wrapper_s0_y)
auto hcompute_hw_input_global_wrapper_glb_stencil_2 = hw_input_global_wrapper_s0_x_x->add_op("op_hcompute_hw_input_global_wrapper_glb_stencil_2");
hcompute_hw_input_global_wrapper_glb_stencil_2->add_function("hcompute_hw_input_global_wrapper_glb_stencil_2");
hcompute_hw_input_global_wrapper_glb_stencil_2->add_load("hw_input_stencil", "hw_input_global_wrapper_s0_y", "((hw_input_global_wrapper_s0_x_x*4) + 2)");
hcompute_hw_input_global_wrapper_glb_stencil_2->add_store("hw_input_global_wrapper_glb_stencil", "hw_input_global_wrapper_s0_y", "((hw_input_global_wrapper_s0_x_x*4) + 2)");
//store is: hw_input_global_wrapper.glb.stencil(((hw_input_global_wrapper_s0_x_x*4) + 3), hw_input_global_wrapper_s0_y) = hw_input.stencil(((hw_input_global_wrapper_s0_x_x*4) + 3), hw_input_global_wrapper_s0_y)
auto hcompute_hw_input_global_wrapper_glb_stencil_3 = hw_input_global_wrapper_s0_x_x->add_op("op_hcompute_hw_input_global_wrapper_glb_stencil_3");
hcompute_hw_input_global_wrapper_glb_stencil_3->add_function("hcompute_hw_input_global_wrapper_glb_stencil_3");
hcompute_hw_input_global_wrapper_glb_stencil_3->add_load("hw_input_stencil", "hw_input_global_wrapper_s0_y", "((hw_input_global_wrapper_s0_x_x*4) + 3)");
hcompute_hw_input_global_wrapper_glb_stencil_3->add_store("hw_input_global_wrapper_glb_stencil", "hw_input_global_wrapper_s0_y", "((hw_input_global_wrapper_s0_x_x*4) + 3)");
//consuming hw_input_global_wrapper.glb.stencil
////producing hw_output.glb.stencil
////producing blur_unnormalized.stencil
auto blur_unnormalized_s0_y = prg.add_loop("blur_unnormalized_s0_y", 0, 198);
auto blur_unnormalized_s0_x = blur_unnormalized_s0_y->add_loop("blur_unnormalized_s0_x", 0, 254);
//store is: blur_unnormalized.stencil(blur_unnormalized_s0_x, blur_unnormalized_s0_y) = (uint16)0
auto hcompute_blur_unnormalized_stencil = blur_unnormalized_s0_x->add_op("op_hcompute_blur_unnormalized_stencil");
hcompute_blur_unnormalized_stencil->add_function("hcompute_blur_unnormalized_stencil");
prg.buffer_port_widths["blur_unnormalized_stencil"] = 16;
hcompute_blur_unnormalized_stencil->add_store("blur_unnormalized_stencil", "blur_unnormalized_s0_y", "blur_unnormalized_s0_x");
auto blur_unnormalized_s1_y = prg.add_loop("blur_unnormalized_s1_y", 0, 198);
auto blur_unnormalized_s1_x_x = blur_unnormalized_s1_y->add_loop("blur_unnormalized_s1_x_x", 0, 64);
//store is: blur_unnormalized.stencil((blur_unnormalized_s1_x_x*4), blur_unnormalized_s1_y) = ((hw_input_global_wrapper.glb.stencil((blur_unnormalized_s1_x_x*4), blur_unnormalized_s1_y)*(uint16)24) + (blur_unnormalized.stencil((blur_unnormalized_s1_x_x*4), blur_unnormalized_s1_y) + ((hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 1), blur_unnormalized_s1_y)*(uint16)30) + ((hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 2), blur_unnormalized_s1_y)*(uint16)24) + ((hw_input_global_wrapper.glb.stencil((blur_unnormalized_s1_x_x*4), (blur_unnormalized_s1_y + 1))*(uint16)30) + ((hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 1), (blur_unnormalized_s1_y + 1))*(uint16)37) + ((hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 2), (blur_unnormalized_s1_y + 1))*(uint16)30) + ((hw_input_global_wrapper.glb.stencil((blur_unnormalized_s1_x_x*4), (blur_unnormalized_s1_y + 2))*(uint16)24) + ((hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 2), (blur_unnormalized_s1_y + 2))*(uint16)24) + (hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 1), (blur_unnormalized_s1_y + 2))*(uint16)30))))))))))
auto hcompute_blur_unnormalized_stencil_1 = blur_unnormalized_s1_x_x->add_op("op_hcompute_blur_unnormalized_stencil_1");
hcompute_blur_unnormalized_stencil_1->add_function("hcompute_blur_unnormalized_stencil_1");
hcompute_blur_unnormalized_stencil_1->add_load("blur_unnormalized_stencil", "blur_unnormalized_s1_y", "(blur_unnormalized_s1_x_x*4)");
hcompute_blur_unnormalized_stencil_1->add_load("hw_input_global_wrapper_glb_stencil", "blur_unnormalized_s1_y", "(blur_unnormalized_s1_x_x*4)");
hcompute_blur_unnormalized_stencil_1->add_load("hw_input_global_wrapper_glb_stencil", "blur_unnormalized_s1_y", "((blur_unnormalized_s1_x_x*4) + 1)");
hcompute_blur_unnormalized_stencil_1->add_load("hw_input_global_wrapper_glb_stencil", "blur_unnormalized_s1_y", "((blur_unnormalized_s1_x_x*4) + 2)");
hcompute_blur_unnormalized_stencil_1->add_load("hw_input_global_wrapper_glb_stencil", "(blur_unnormalized_s1_y + 1)", "(blur_unnormalized_s1_x_x*4)");
hcompute_blur_unnormalized_stencil_1->add_load("hw_input_global_wrapper_glb_stencil", "(blur_unnormalized_s1_y + 1)", "((blur_unnormalized_s1_x_x*4) + 1)");
hcompute_blur_unnormalized_stencil_1->add_load("hw_input_global_wrapper_glb_stencil", "(blur_unnormalized_s1_y + 1)", "((blur_unnormalized_s1_x_x*4) + 2)");
hcompute_blur_unnormalized_stencil_1->add_load("hw_input_global_wrapper_glb_stencil", "(blur_unnormalized_s1_y + 2)", "(blur_unnormalized_s1_x_x*4)");
hcompute_blur_unnormalized_stencil_1->add_load("hw_input_global_wrapper_glb_stencil", "(blur_unnormalized_s1_y + 2)", "((blur_unnormalized_s1_x_x*4) + 2)");
hcompute_blur_unnormalized_stencil_1->add_load("hw_input_global_wrapper_glb_stencil", "(blur_unnormalized_s1_y + 2)", "((blur_unnormalized_s1_x_x*4) + 1)");
hcompute_blur_unnormalized_stencil_1->add_store("blur_unnormalized_stencil", "blur_unnormalized_s1_y", "(blur_unnormalized_s1_x_x*4)");
//store is: blur_unnormalized.stencil(((blur_unnormalized_s1_x_x*4) + 1), blur_unnormalized_s1_y) = ((hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 1), blur_unnormalized_s1_y)*(uint16)24) + (blur_unnormalized.stencil(((blur_unnormalized_s1_x_x*4) + 1), blur_unnormalized_s1_y) + ((hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 2), blur_unnormalized_s1_y)*(uint16)30) + ((hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 3), blur_unnormalized_s1_y)*(uint16)24) + ((hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 1), (blur_unnormalized_s1_y + 1))*(uint16)30) + ((hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 2), (blur_unnormalized_s1_y + 1))*(uint16)37) + ((hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 3), (blur_unnormalized_s1_y + 1))*(uint16)30) + ((hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 1), (blur_unnormalized_s1_y + 2))*(uint16)24) + ((hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 3), (blur_unnormalized_s1_y + 2))*(uint16)24) + (hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 2), (blur_unnormalized_s1_y + 2))*(uint16)30))))))))))
auto hcompute_blur_unnormalized_stencil_2 = blur_unnormalized_s1_x_x->add_op("op_hcompute_blur_unnormalized_stencil_2");
hcompute_blur_unnormalized_stencil_2->add_function("hcompute_blur_unnormalized_stencil_2");
hcompute_blur_unnormalized_stencil_2->add_load("blur_unnormalized_stencil", "blur_unnormalized_s1_y", "((blur_unnormalized_s1_x_x*4) + 1)");
hcompute_blur_unnormalized_stencil_2->add_load("hw_input_global_wrapper_glb_stencil", "blur_unnormalized_s1_y", "((blur_unnormalized_s1_x_x*4) + 1)");
hcompute_blur_unnormalized_stencil_2->add_load("hw_input_global_wrapper_glb_stencil", "blur_unnormalized_s1_y", "((blur_unnormalized_s1_x_x*4) + 2)");
hcompute_blur_unnormalized_stencil_2->add_load("hw_input_global_wrapper_glb_stencil", "blur_unnormalized_s1_y", "((blur_unnormalized_s1_x_x*4) + 3)");
hcompute_blur_unnormalized_stencil_2->add_load("hw_input_global_wrapper_glb_stencil", "(blur_unnormalized_s1_y + 1)", "((blur_unnormalized_s1_x_x*4) + 1)");
hcompute_blur_unnormalized_stencil_2->add_load("hw_input_global_wrapper_glb_stencil", "(blur_unnormalized_s1_y + 1)", "((blur_unnormalized_s1_x_x*4) + 2)");
hcompute_blur_unnormalized_stencil_2->add_load("hw_input_global_wrapper_glb_stencil", "(blur_unnormalized_s1_y + 1)", "((blur_unnormalized_s1_x_x*4) + 3)");
hcompute_blur_unnormalized_stencil_2->add_load("hw_input_global_wrapper_glb_stencil", "(blur_unnormalized_s1_y + 2)", "((blur_unnormalized_s1_x_x*4) + 1)");
hcompute_blur_unnormalized_stencil_2->add_load("hw_input_global_wrapper_glb_stencil", "(blur_unnormalized_s1_y + 2)", "((blur_unnormalized_s1_x_x*4) + 3)");
hcompute_blur_unnormalized_stencil_2->add_load("hw_input_global_wrapper_glb_stencil", "(blur_unnormalized_s1_y + 2)", "((blur_unnormalized_s1_x_x*4) + 2)");
hcompute_blur_unnormalized_stencil_2->add_store("blur_unnormalized_stencil", "blur_unnormalized_s1_y", "((blur_unnormalized_s1_x_x*4) + 1)");
//store is: blur_unnormalized.stencil(((blur_unnormalized_s1_x_x*4) + 2), blur_unnormalized_s1_y) = ((hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 2), blur_unnormalized_s1_y)*(uint16)24) + (blur_unnormalized.stencil(((blur_unnormalized_s1_x_x*4) + 2), blur_unnormalized_s1_y) + ((hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 3), blur_unnormalized_s1_y)*(uint16)30) + ((hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 4), blur_unnormalized_s1_y)*(uint16)24) + ((hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 2), (blur_unnormalized_s1_y + 1))*(uint16)30) + ((hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 3), (blur_unnormalized_s1_y + 1))*(uint16)37) + ((hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 4), (blur_unnormalized_s1_y + 1))*(uint16)30) + ((hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 2), (blur_unnormalized_s1_y + 2))*(uint16)24) + ((hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 4), (blur_unnormalized_s1_y + 2))*(uint16)24) + (hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 3), (blur_unnormalized_s1_y + 2))*(uint16)30))))))))))
auto hcompute_blur_unnormalized_stencil_3 = blur_unnormalized_s1_x_x->add_op("op_hcompute_blur_unnormalized_stencil_3");
hcompute_blur_unnormalized_stencil_3->add_function("hcompute_blur_unnormalized_stencil_3");
hcompute_blur_unnormalized_stencil_3->add_load("blur_unnormalized_stencil", "blur_unnormalized_s1_y", "((blur_unnormalized_s1_x_x*4) + 2)");
hcompute_blur_unnormalized_stencil_3->add_load("hw_input_global_wrapper_glb_stencil", "blur_unnormalized_s1_y", "((blur_unnormalized_s1_x_x*4) + 2)");
hcompute_blur_unnormalized_stencil_3->add_load("hw_input_global_wrapper_glb_stencil", "blur_unnormalized_s1_y", "((blur_unnormalized_s1_x_x*4) + 3)");
hcompute_blur_unnormalized_stencil_3->add_load("hw_input_global_wrapper_glb_stencil", "blur_unnormalized_s1_y", "((blur_unnormalized_s1_x_x*4) + 4)");
hcompute_blur_unnormalized_stencil_3->add_load("hw_input_global_wrapper_glb_stencil", "(blur_unnormalized_s1_y + 1)", "((blur_unnormalized_s1_x_x*4) + 2)");
hcompute_blur_unnormalized_stencil_3->add_load("hw_input_global_wrapper_glb_stencil", "(blur_unnormalized_s1_y + 1)", "((blur_unnormalized_s1_x_x*4) + 3)");
hcompute_blur_unnormalized_stencil_3->add_load("hw_input_global_wrapper_glb_stencil", "(blur_unnormalized_s1_y + 1)", "((blur_unnormalized_s1_x_x*4) + 4)");
hcompute_blur_unnormalized_stencil_3->add_load("hw_input_global_wrapper_glb_stencil", "(blur_unnormalized_s1_y + 2)", "((blur_unnormalized_s1_x_x*4) + 2)");
hcompute_blur_unnormalized_stencil_3->add_load("hw_input_global_wrapper_glb_stencil", "(blur_unnormalized_s1_y + 2)", "((blur_unnormalized_s1_x_x*4) + 4)");
hcompute_blur_unnormalized_stencil_3->add_load("hw_input_global_wrapper_glb_stencil", "(blur_unnormalized_s1_y + 2)", "((blur_unnormalized_s1_x_x*4) + 3)");
hcompute_blur_unnormalized_stencil_3->add_store("blur_unnormalized_stencil", "blur_unnormalized_s1_y", "((blur_unnormalized_s1_x_x*4) + 2)");
//store is: blur_unnormalized.stencil(((blur_unnormalized_s1_x_x*4) + 3), blur_unnormalized_s1_y) = ((hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 3), blur_unnormalized_s1_y)*(uint16)24) + (blur_unnormalized.stencil(((blur_unnormalized_s1_x_x*4) + 3), blur_unnormalized_s1_y) + ((hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 4), blur_unnormalized_s1_y)*(uint16)30) + ((hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 5), blur_unnormalized_s1_y)*(uint16)24) + ((hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 3), (blur_unnormalized_s1_y + 1))*(uint16)30) + ((hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 4), (blur_unnormalized_s1_y + 1))*(uint16)37) + ((hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 5), (blur_unnormalized_s1_y + 1))*(uint16)30) + ((hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 3), (blur_unnormalized_s1_y + 2))*(uint16)24) + ((hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 5), (blur_unnormalized_s1_y + 2))*(uint16)24) + (hw_input_global_wrapper.glb.stencil(((blur_unnormalized_s1_x_x*4) + 4), (blur_unnormalized_s1_y + 2))*(uint16)30))))))))))
auto hcompute_blur_unnormalized_stencil_4 = blur_unnormalized_s1_x_x->add_op("op_hcompute_blur_unnormalized_stencil_4");
hcompute_blur_unnormalized_stencil_4->add_function("hcompute_blur_unnormalized_stencil_4");
hcompute_blur_unnormalized_stencil_4->add_load("blur_unnormalized_stencil", "blur_unnormalized_s1_y", "((blur_unnormalized_s1_x_x*4) + 3)");
hcompute_blur_unnormalized_stencil_4->add_load("hw_input_global_wrapper_glb_stencil", "blur_unnormalized_s1_y", "((blur_unnormalized_s1_x_x*4) + 3)");
hcompute_blur_unnormalized_stencil_4->add_load("hw_input_global_wrapper_glb_stencil", "blur_unnormalized_s1_y", "((blur_unnormalized_s1_x_x*4) + 4)");
hcompute_blur_unnormalized_stencil_4->add_load("hw_input_global_wrapper_glb_stencil", "blur_unnormalized_s1_y", "((blur_unnormalized_s1_x_x*4) + 5)");
hcompute_blur_unnormalized_stencil_4->add_load("hw_input_global_wrapper_glb_stencil", "(blur_unnormalized_s1_y + 1)", "((blur_unnormalized_s1_x_x*4) + 3)");
hcompute_blur_unnormalized_stencil_4->add_load("hw_input_global_wrapper_glb_stencil", "(blur_unnormalized_s1_y + 1)", "((blur_unnormalized_s1_x_x*4) + 4)");
hcompute_blur_unnormalized_stencil_4->add_load("hw_input_global_wrapper_glb_stencil", "(blur_unnormalized_s1_y + 1)", "((blur_unnormalized_s1_x_x*4) + 5)");
hcompute_blur_unnormalized_stencil_4->add_load("hw_input_global_wrapper_glb_stencil", "(blur_unnormalized_s1_y + 2)", "((blur_unnormalized_s1_x_x*4) + 3)");
hcompute_blur_unnormalized_stencil_4->add_load("hw_input_global_wrapper_glb_stencil", "(blur_unnormalized_s1_y + 2)", "((blur_unnormalized_s1_x_x*4) + 5)");
hcompute_blur_unnormalized_stencil_4->add_load("hw_input_global_wrapper_glb_stencil", "(blur_unnormalized_s1_y + 2)", "((blur_unnormalized_s1_x_x*4) + 4)");
hcompute_blur_unnormalized_stencil_4->add_store("blur_unnormalized_stencil", "blur_unnormalized_s1_y", "((blur_unnormalized_s1_x_x*4) + 3)");
//consuming blur_unnormalized.stencil
////producing blur.stencil
auto blur_s0_y = prg.add_loop("blur_s0_y", 0, 198);
auto blur_s0_x_x = blur_s0_y->add_loop("blur_s0_x_x", 0, 64);
//store is: blur.stencil(min((blur_s0_x_x*4), 250), blur_s0_y) = (blur_unnormalized.stencil(min((blur_s0_x_x*4), 250), blur_s0_y)/(uint16)256)
auto hcompute_blur_stencil = blur_s0_x_x->add_op("op_hcompute_blur_stencil");
hcompute_blur_stencil->add_function("hcompute_blur_stencil");
hcompute_blur_stencil->add_load("blur_unnormalized_stencil", "blur_s0_y", "min((blur_s0_x_x*4), 250)");
prg.buffer_port_widths["blur_stencil"] = 16;
hcompute_blur_stencil->add_store("blur_stencil", "blur_s0_y", "min((blur_s0_x_x*4), 250)");
//store is: blur.stencil((min((blur_s0_x_x*4), 250) + 1), blur_s0_y) = (blur_unnormalized.stencil((min((blur_s0_x_x*4), 250) + 1), blur_s0_y)/(uint16)256)
auto hcompute_blur_stencil_1 = blur_s0_x_x->add_op("op_hcompute_blur_stencil_1");
hcompute_blur_stencil_1->add_function("hcompute_blur_stencil_1");
hcompute_blur_stencil_1->add_load("blur_unnormalized_stencil", "blur_s0_y", "(min((blur_s0_x_x*4), 250) + 1)");
hcompute_blur_stencil_1->add_store("blur_stencil", "blur_s0_y", "(min((blur_s0_x_x*4), 250) + 1)");
//store is: blur.stencil((min((blur_s0_x_x*4), 250) + 2), blur_s0_y) = (blur_unnormalized.stencil((min((blur_s0_x_x*4), 250) + 2), blur_s0_y)/(uint16)256)
auto hcompute_blur_stencil_2 = blur_s0_x_x->add_op("op_hcompute_blur_stencil_2");
hcompute_blur_stencil_2->add_function("hcompute_blur_stencil_2");
hcompute_blur_stencil_2->add_load("blur_unnormalized_stencil", "blur_s0_y", "(min((blur_s0_x_x*4), 250) + 2)");
hcompute_blur_stencil_2->add_store("blur_stencil", "blur_s0_y", "(min((blur_s0_x_x*4), 250) + 2)");
//store is: blur.stencil((min((blur_s0_x_x*4), 250) + 3), blur_s0_y) = (blur_unnormalized.stencil((min((blur_s0_x_x*4), 250) + 3), blur_s0_y)/(uint16)256)
auto hcompute_blur_stencil_3 = blur_s0_x_x->add_op("op_hcompute_blur_stencil_3");
hcompute_blur_stencil_3->add_function("hcompute_blur_stencil_3");
hcompute_blur_stencil_3->add_load("blur_unnormalized_stencil", "blur_s0_y", "(min((blur_s0_x_x*4), 250) + 3)");
hcompute_blur_stencil_3->add_store("blur_stencil", "blur_s0_y", "(min((blur_s0_x_x*4), 250) + 3)");
//consuming blur.stencil
auto hw_output_s0_y_yi = prg.add_loop("hw_output_s0_y_yi", 0, 198);
auto hw_output_s0_x_xi_xi = hw_output_s0_y_yi->add_loop("hw_output_s0_x_xi_xi", 0, 63);
//store is: hw_output.glb.stencil((hw_output_s0_x_xi_xi*4), hw_output_s0_y_yi) = blur.stencil((hw_output_s0_x_xi_xi*4), hw_output_s0_y_yi)
auto hcompute_hw_output_glb_stencil = hw_output_s0_x_xi_xi->add_op("op_hcompute_hw_output_glb_stencil");
hcompute_hw_output_glb_stencil->add_function("hcompute_hw_output_glb_stencil");
hcompute_hw_output_glb_stencil->add_load("blur_stencil", "hw_output_s0_y_yi", "(hw_output_s0_x_xi_xi*4)");
prg.buffer_port_widths["hw_output_glb_stencil"] = 16;
hcompute_hw_output_glb_stencil->add_store("hw_output_glb_stencil", "hw_output_s0_y_yi", "(hw_output_s0_x_xi_xi*4)");
//store is: hw_output.glb.stencil(((hw_output_s0_x_xi_xi*4) + 1), hw_output_s0_y_yi) = blur.stencil(((hw_output_s0_x_xi_xi*4) + 1), hw_output_s0_y_yi)
auto hcompute_hw_output_glb_stencil_1 = hw_output_s0_x_xi_xi->add_op("op_hcompute_hw_output_glb_stencil_1");
hcompute_hw_output_glb_stencil_1->add_function("hcompute_hw_output_glb_stencil_1");
hcompute_hw_output_glb_stencil_1->add_load("blur_stencil", "hw_output_s0_y_yi", "((hw_output_s0_x_xi_xi*4) + 1)");
hcompute_hw_output_glb_stencil_1->add_store("hw_output_glb_stencil", "hw_output_s0_y_yi", "((hw_output_s0_x_xi_xi*4) + 1)");
//store is: hw_output.glb.stencil(((hw_output_s0_x_xi_xi*4) + 2), hw_output_s0_y_yi) = blur.stencil(((hw_output_s0_x_xi_xi*4) + 2), hw_output_s0_y_yi)
auto hcompute_hw_output_glb_stencil_2 = hw_output_s0_x_xi_xi->add_op("op_hcompute_hw_output_glb_stencil_2");
hcompute_hw_output_glb_stencil_2->add_function("hcompute_hw_output_glb_stencil_2");
hcompute_hw_output_glb_stencil_2->add_load("blur_stencil", "hw_output_s0_y_yi", "((hw_output_s0_x_xi_xi*4) + 2)");
hcompute_hw_output_glb_stencil_2->add_store("hw_output_glb_stencil", "hw_output_s0_y_yi", "((hw_output_s0_x_xi_xi*4) + 2)");
//store is: hw_output.glb.stencil(((hw_output_s0_x_xi_xi*4) + 3), hw_output_s0_y_yi) = blur.stencil(((hw_output_s0_x_xi_xi*4) + 3), hw_output_s0_y_yi)
auto hcompute_hw_output_glb_stencil_3 = hw_output_s0_x_xi_xi->add_op("op_hcompute_hw_output_glb_stencil_3");
hcompute_hw_output_glb_stencil_3->add_function("hcompute_hw_output_glb_stencil_3");
hcompute_hw_output_glb_stencil_3->add_load("blur_stencil", "hw_output_s0_y_yi", "((hw_output_s0_x_xi_xi*4) + 3)");
hcompute_hw_output_glb_stencil_3->add_store("hw_output_glb_stencil", "hw_output_s0_y_yi", "((hw_output_s0_x_xi_xi*4) + 3)");
//store is: hw_output.glb.stencil(250, hw_output_s0_y_yi) = blur.stencil(250, hw_output_s0_y_yi)
auto hcompute_hw_output_glb_stencil_4 = hw_output_s0_y_yi->add_op("op_hcompute_hw_output_glb_stencil_4");
hcompute_hw_output_glb_stencil_4->add_function("hcompute_hw_output_glb_stencil_4");
hcompute_hw_output_glb_stencil_4->add_load("blur_stencil", "hw_output_s0_y_yi", "250");
hcompute_hw_output_glb_stencil_4->add_store("hw_output_glb_stencil", "hw_output_s0_y_yi", "250");
//store is: hw_output.glb.stencil(251, hw_output_s0_y_yi) = blur.stencil(251, hw_output_s0_y_yi)
auto hcompute_hw_output_glb_stencil_5 = hw_output_s0_y_yi->add_op("op_hcompute_hw_output_glb_stencil_5");
hcompute_hw_output_glb_stencil_5->add_function("hcompute_hw_output_glb_stencil_5");
hcompute_hw_output_glb_stencil_5->add_load("blur_stencil", "hw_output_s0_y_yi", "251");
hcompute_hw_output_glb_stencil_5->add_store("hw_output_glb_stencil", "hw_output_s0_y_yi", "251");
//store is: hw_output.glb.stencil(252, hw_output_s0_y_yi) = blur.stencil(252, hw_output_s0_y_yi)
auto hcompute_hw_output_glb_stencil_6 = hw_output_s0_y_yi->add_op("op_hcompute_hw_output_glb_stencil_6");
hcompute_hw_output_glb_stencil_6->add_function("hcompute_hw_output_glb_stencil_6");
hcompute_hw_output_glb_stencil_6->add_load("blur_stencil", "hw_output_s0_y_yi", "252");
hcompute_hw_output_glb_stencil_6->add_store("hw_output_glb_stencil", "hw_output_s0_y_yi", "252");
//store is: hw_output.glb.stencil(253, hw_output_s0_y_yi) = blur.stencil(253, hw_output_s0_y_yi)
auto hcompute_hw_output_glb_stencil_7 = hw_output_s0_y_yi->add_op("op_hcompute_hw_output_glb_stencil_7");
hcompute_hw_output_glb_stencil_7->add_function("hcompute_hw_output_glb_stencil_7");
hcompute_hw_output_glb_stencil_7->add_load("blur_stencil", "hw_output_s0_y_yi", "253");
hcompute_hw_output_glb_stencil_7->add_store("hw_output_glb_stencil", "hw_output_s0_y_yi", "253");
//consuming hw_output.glb.stencil
auto hw_output_global_wrapper_s0_y_yi = prg.add_loop("hw_output_global_wrapper_s0_y_yi", 0, 198);
auto hw_output_global_wrapper_s0_x_xi_xi = hw_output_global_wrapper_s0_y_yi->add_loop("hw_output_global_wrapper_s0_x_xi_xi", 0, 63);
//store is: hw_output_global_wrapper.stencil((hw_output_global_wrapper_s0_x_xi_xi*4), hw_output_global_wrapper_s0_y_yi) = hw_output.glb.stencil((hw_output_global_wrapper_s0_x_xi_xi*4), hw_output_global_wrapper_s0_y_yi)
auto hcompute_hw_output_global_wrapper_stencil = hw_output_global_wrapper_s0_x_xi_xi->add_op("op_hcompute_hw_output_global_wrapper_stencil");
hcompute_hw_output_global_wrapper_stencil->add_function("hcompute_hw_output_global_wrapper_stencil");
hcompute_hw_output_global_wrapper_stencil->add_load("hw_output_glb_stencil", "hw_output_global_wrapper_s0_y_yi", "(hw_output_global_wrapper_s0_x_xi_xi*4)");
hcompute_hw_output_global_wrapper_stencil->add_store("hw_output_global_wrapper_stencil", "hw_output_global_wrapper_s0_y_yi", "(hw_output_global_wrapper_s0_x_xi_xi*4)");
//store is: hw_output_global_wrapper.stencil(((hw_output_global_wrapper_s0_x_xi_xi*4) + 1), hw_output_global_wrapper_s0_y_yi) = hw_output.glb.stencil(((hw_output_global_wrapper_s0_x_xi_xi*4) + 1), hw_output_global_wrapper_s0_y_yi)
auto hcompute_hw_output_global_wrapper_stencil_1 = hw_output_global_wrapper_s0_x_xi_xi->add_op("op_hcompute_hw_output_global_wrapper_stencil_1");
hcompute_hw_output_global_wrapper_stencil_1->add_function("hcompute_hw_output_global_wrapper_stencil_1");
hcompute_hw_output_global_wrapper_stencil_1->add_load("hw_output_glb_stencil", "hw_output_global_wrapper_s0_y_yi", "((hw_output_global_wrapper_s0_x_xi_xi*4) + 1)");
hcompute_hw_output_global_wrapper_stencil_1->add_store("hw_output_global_wrapper_stencil", "hw_output_global_wrapper_s0_y_yi", "((hw_output_global_wrapper_s0_x_xi_xi*4) + 1)");
//store is: hw_output_global_wrapper.stencil(((hw_output_global_wrapper_s0_x_xi_xi*4) + 2), hw_output_global_wrapper_s0_y_yi) = hw_output.glb.stencil(((hw_output_global_wrapper_s0_x_xi_xi*4) + 2), hw_output_global_wrapper_s0_y_yi)
auto hcompute_hw_output_global_wrapper_stencil_2 = hw_output_global_wrapper_s0_x_xi_xi->add_op("op_hcompute_hw_output_global_wrapper_stencil_2");
hcompute_hw_output_global_wrapper_stencil_2->add_function("hcompute_hw_output_global_wrapper_stencil_2");
hcompute_hw_output_global_wrapper_stencil_2->add_load("hw_output_glb_stencil", "hw_output_global_wrapper_s0_y_yi", "((hw_output_global_wrapper_s0_x_xi_xi*4) + 2)");
hcompute_hw_output_global_wrapper_stencil_2->add_store("hw_output_global_wrapper_stencil", "hw_output_global_wrapper_s0_y_yi", "((hw_output_global_wrapper_s0_x_xi_xi*4) + 2)");
//store is: hw_output_global_wrapper.stencil(((hw_output_global_wrapper_s0_x_xi_xi*4) + 3), hw_output_global_wrapper_s0_y_yi) = hw_output.glb.stencil(((hw_output_global_wrapper_s0_x_xi_xi*4) + 3), hw_output_global_wrapper_s0_y_yi)
auto hcompute_hw_output_global_wrapper_stencil_3 = hw_output_global_wrapper_s0_x_xi_xi->add_op("op_hcompute_hw_output_global_wrapper_stencil_3");
hcompute_hw_output_global_wrapper_stencil_3->add_function("hcompute_hw_output_global_wrapper_stencil_3");
hcompute_hw_output_global_wrapper_stencil_3->add_load("hw_output_glb_stencil", "hw_output_global_wrapper_s0_y_yi", "((hw_output_global_wrapper_s0_x_xi_xi*4) + 3)");
hcompute_hw_output_global_wrapper_stencil_3->add_store("hw_output_global_wrapper_stencil", "hw_output_global_wrapper_s0_y_yi", "((hw_output_global_wrapper_s0_x_xi_xi*4) + 3)");
//store is: hw_output_global_wrapper.stencil(250, hw_output_global_wrapper_s0_y_yi) = hw_output.glb.stencil(250, hw_output_global_wrapper_s0_y_yi)
auto hcompute_hw_output_global_wrapper_stencil_4 = hw_output_global_wrapper_s0_y_yi->add_op("op_hcompute_hw_output_global_wrapper_stencil_4");
hcompute_hw_output_global_wrapper_stencil_4->add_function("hcompute_hw_output_global_wrapper_stencil_4");
hcompute_hw_output_global_wrapper_stencil_4->add_load("hw_output_glb_stencil", "hw_output_global_wrapper_s0_y_yi", "250");
hcompute_hw_output_global_wrapper_stencil_4->add_store("hw_output_global_wrapper_stencil", "hw_output_global_wrapper_s0_y_yi", "250");
//store is: hw_output_global_wrapper.stencil(251, hw_output_global_wrapper_s0_y_yi) = hw_output.glb.stencil(251, hw_output_global_wrapper_s0_y_yi)
auto hcompute_hw_output_global_wrapper_stencil_5 = hw_output_global_wrapper_s0_y_yi->add_op("op_hcompute_hw_output_global_wrapper_stencil_5");
hcompute_hw_output_global_wrapper_stencil_5->add_function("hcompute_hw_output_global_wrapper_stencil_5");
hcompute_hw_output_global_wrapper_stencil_5->add_load("hw_output_glb_stencil", "hw_output_global_wrapper_s0_y_yi", "251");
hcompute_hw_output_global_wrapper_stencil_5->add_store("hw_output_global_wrapper_stencil", "hw_output_global_wrapper_s0_y_yi", "251");
//store is: hw_output_global_wrapper.stencil(252, hw_output_global_wrapper_s0_y_yi) = hw_output.glb.stencil(252, hw_output_global_wrapper_s0_y_yi)
auto hcompute_hw_output_global_wrapper_stencil_6 = hw_output_global_wrapper_s0_y_yi->add_op("op_hcompute_hw_output_global_wrapper_stencil_6");
hcompute_hw_output_global_wrapper_stencil_6->add_function("hcompute_hw_output_global_wrapper_stencil_6");
hcompute_hw_output_global_wrapper_stencil_6->add_load("hw_output_glb_stencil", "hw_output_global_wrapper_s0_y_yi", "252");
hcompute_hw_output_global_wrapper_stencil_6->add_store("hw_output_global_wrapper_stencil", "hw_output_global_wrapper_s0_y_yi", "252");
//store is: hw_output_global_wrapper.stencil(253, hw_output_global_wrapper_s0_y_yi) = hw_output.glb.stencil(253, hw_output_global_wrapper_s0_y_yi)
auto hcompute_hw_output_global_wrapper_stencil_7 = hw_output_global_wrapper_s0_y_yi->add_op("op_hcompute_hw_output_global_wrapper_stencil_7");
hcompute_hw_output_global_wrapper_stencil_7->add_function("hcompute_hw_output_global_wrapper_stencil_7");
hcompute_hw_output_global_wrapper_stencil_7->add_load("hw_output_glb_stencil", "hw_output_global_wrapper_s0_y_yi", "253");
hcompute_hw_output_global_wrapper_stencil_7->add_store("hw_output_global_wrapper_stencil", "hw_output_global_wrapper_s0_y_yi", "253");
return prg;
}