-
Notifications
You must be signed in to change notification settings - Fork 65
/
Copy pathgpu_info_struct.h
255 lines (202 loc) · 6.48 KB
/
gpu_info_struct.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
#ifndef GPU_INFO_STRUCT_H
#define GPU_INFO_STRUCT_H
struct layer_gpu_info {
int device_number = 0;//Input layer always gets device 0
cublasHandle_t handle;
//streams are shared for forward and back prop
cudaStream_t s0,s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,
s12,s13,s14,s15,s16,s17,s18,s19,s20,s21,s22,s23,s24,s25,s26,s27;
//forward prop events
cudaEvent_t sparse_forward_start;
cudaEvent_t i_t_part1,i_t_full;
cudaEvent_t f_t_part1,f_t_full;
cudaEvent_t c_prime_t_tanh_part1,c_prime_t_tanh_full;
cudaEvent_t o_t_part1,o_t_full;
//backprop events
cudaEvent_t backprop_init;
cudaEvent_t err_ot_done;
cudaEvent_t err_ft_done;
cudaEvent_t err_tanhcpt_done;
cudaEvent_t err_it_done;
cudaEvent_t htm1_p1_done;
cudaEvent_t htm1_p2_done;
cudaEvent_t htm1_p3_done;
cudaEvent_t htm1_p4_done;
cudaEvent_t W_grad_p1_done;
cudaEvent_t W_grad_p2_done;
cudaEvent_t W_grad_p3_done;
cudaEvent_t W_grad_p4_done;
cudaEvent_t attention_forward; //this is gotten from the attention layer if feed input is true
cudaEvent_t error_htild_below; //this is created here and shared with the attention layer
//These are for synchronization for the backprop
cudaEvent_t htm1_done;
cudaEvent_t htm1_done_temp;
cudaEvent_t ctm1_done;
cudaEvent_t W_grad_full_done;
cudaEvent_t W_hi_grad_done;
cudaEvent_t W_hf_grad_done;
cudaEvent_t W_ho_grad_done;
cudaEvent_t W_hc_grad_done;
cudaEvent_t M_i_grad_done;
cudaEvent_t M_f_grad_done;
cudaEvent_t M_o_grad_done;
cudaEvent_t M_c_grad_done;
cudaEvent_t b_i_grad_done;
cudaEvent_t b_f_grad_done;
cudaEvent_t b_o_grad_done;
cudaEvent_t b_c_grad_done;
cudaEvent_t char_cnn_ready;
cudaEvent_t h_t_below_transfer; //transfer h_t to upper layer
cudaEvent_t dropout_done;
cudaEvent_t d_ERR_ht_done;
void init(int device_number) {
this->device_number = device_number;
cudaSetDevice(device_number);
CUBLAS_ERROR_WRAPPER(cublasCreate(&handle),"CUBLAS handler initialization failed\n");
cudaStreamCreate(&s0);
cudaStreamCreate(&s1);
cudaStreamCreate(&s2);
cudaStreamCreate(&s3);
cudaStreamCreate(&s4);
cudaStreamCreate(&s5);
cudaStreamCreate(&s6);
cudaStreamCreate(&s7);
cudaStreamCreate(&s8);
cudaStreamCreate(&s9);
cudaStreamCreate(&s10);
cudaStreamCreate(&s11);
cudaStreamCreate(&s12);
cudaStreamCreate(&s13);
cudaStreamCreate(&s14);
cudaStreamCreate(&s15);
cudaStreamCreate(&s16);
cudaStreamCreate(&s17);
cudaStreamCreate(&s18);
cudaStreamCreate(&s19);
cudaStreamCreate(&s20);
cudaStreamCreate(&s21);
cudaStreamCreate(&s22);
cudaStreamCreate(&s23);
cudaStreamCreate(&s24);
cudaStreamCreate(&s25);
cudaStreamCreate(&s26);
cudaStreamCreate(&s27);
cudaEventCreate(&sparse_forward_start);
cudaEventCreate(&i_t_part1);
cudaEventCreate(&i_t_full);
cudaEventCreate(&f_t_part1);
cudaEventCreate(&f_t_full);
cudaEventCreate(&c_prime_t_tanh_part1);
cudaEventCreate(&c_prime_t_tanh_full);
cudaEventCreate(&o_t_part1);
cudaEventCreate(&o_t_full);
cudaEventCreate(&W_grad_full_done);
cudaEventCreate(&error_htild_below);
cudaEventCreate(&backprop_init);
cudaEventCreate(&err_ot_done);
cudaEventCreate(&err_ft_done);
cudaEventCreate(&err_tanhcpt_done);
cudaEventCreate(&err_it_done);
cudaEventCreate(&htm1_p1_done);
cudaEventCreate(&htm1_p2_done);
cudaEventCreate(&htm1_p3_done);
cudaEventCreate(&htm1_p4_done);
cudaEventCreate(&W_grad_p1_done);
cudaEventCreate(&W_grad_p2_done);
cudaEventCreate(&W_grad_p3_done);
cudaEventCreate(&W_grad_p4_done);
cudaEventCreate(&htm1_done);
cudaEventCreate(&htm1_done_temp);
cudaEventCreate(&ctm1_done);
cudaEventCreate(&W_hi_grad_done);
cudaEventCreate(&W_hf_grad_done);
cudaEventCreate(&W_ho_grad_done);
cudaEventCreate(&W_hc_grad_done);
cudaEventCreate(&M_i_grad_done);
cudaEventCreate(&M_f_grad_done);
cudaEventCreate(&M_o_grad_done);
cudaEventCreate(&M_c_grad_done);
cudaEventCreate(&b_i_grad_done);
cudaEventCreate(&b_f_grad_done);
cudaEventCreate(&b_o_grad_done);
cudaEventCreate(&b_c_grad_done);
cudaEventCreate(&char_cnn_ready);
cudaEventCreate(&h_t_below_transfer);
cudaEventCreate(&b_c_grad_done);
cudaEventCreate(&dropout_done);
cudaEventCreate(&d_ERR_ht_done);
cudaEventCreate(&attention_forward);
cudaSetDevice(0);
}
};
struct softmax_layer_gpu_info {
int device_number = 0;//this is for single GPU at the moment
cublasHandle_t handle;
cudaStream_t s0,s1,s2,s3;
cudaEvent_t outputdist_done;
cudaEvent_t d_ERR_ht_done;
cudaEvent_t d_b_d_grad_done;
cudaEvent_t d_D_grad_done;
void init(int device_number) {
this->device_number = device_number;
cudaSetDevice(device_number);
CUBLAS_ERROR_WRAPPER(cublasCreate(&handle),"CUBLAS handler initialization failed\n");
cudaStreamCreate(&s0);
cudaStreamCreate(&s1);
cudaStreamCreate(&s2);
cudaStreamCreate(&s3);
cudaEventCreate(&outputdist_done);
cudaEventCreate(&d_ERR_ht_done);
cudaEventCreate(&d_D_grad_done);
cudaEventCreate(&d_b_d_grad_done);
cudaSetDevice(0);
}
};
struct bi_layer_info {
int device_number;
cublasHandle_t handle;
cudaStream_t s0;
std::vector<int> layer_indicies;
void init(int device_number) {
this->device_number = device_number;
cudaSetDevice(device_number);
CUBLAS_ERROR_WRAPPER(cublasCreate(&handle),"CUBLAS handler initialization failed\n");
cudaStreamCreate(&s0);
}
};
struct attention_layer_gpu_info {
int device_number = 0;
cudaStream_t s0;
// cudaEvent_t ht_mat_done;
// cudaEvent_t ct_mat_done;
// cudaEvent_t ct_done;
cudaEvent_t start_forward;
cudaEvent_t start_backward;
cudaEvent_t forward_prop_done;
cudaEvent_t backward_prop_done;
cudaEvent_t error_htild_below; //this is created here and shared with the attention layer
// std::vector<cudaStream_t> alignment_streams; // (2*D+1) streams
// std::vector<cudaEvent_t> alignment_events; // (2*D+1) streams
void init(int device_number,int D) {
this->device_number = device_number;
cudaSetDevice(device_number);
cudaStreamCreate(&s0);
// cudaStreamCreate(&s1);
// cudaStreamCreate(&s2);
// cudaEventCreate(&ht_mat_done);
// cudaEventCreate(&ct_mat_done);
cudaEventCreate(&start_forward);
cudaEventCreate(&start_backward);
cudaEventCreate(&forward_prop_done);
cudaEventCreate(&backward_prop_done);
// for(int i=0; i<(2*D+1)*3; i++) {
// cudaStream_t temp;
// alignment_streams.push_back(temp);
// cudaStreamCreate(&alignment_streams[alignment_streams.size()-1]);
// cudaEvent_t temp_ev;
// alignment_events.push_back(temp_ev);
// cudaEventCreate(&alignment_events[alignment_events.size()-1]);
// }
}
};
#endif