15
15
16
16
17
17
class data_augmentator ():
18
+ # class for data augmentation
18
19
19
20
def __init__ (self ,f_prefix , num_of_data , seq_length , val_percent ):
20
21
21
22
self .base_train_path = 'data/train/'
22
23
self .base_validation_path = 'data/validation/'
23
24
25
+ # list of angles will be use for rotation
24
26
self .angles = list (range (0 ,360 ,30 ))
25
27
self .num_of_data = np .clip (num_of_data , 0 , len (self .angles ) - 1 )
26
- self .num_validation_data = math .ceil (self .num_of_data * val_percent )
27
- self .num_train_data = self .num_of_data - self .num_validation_data
28
+ self .num_validation_data = math .ceil (self .num_of_data * val_percent ) # number of validation dataset
29
+ self .num_train_data = self .num_of_data - self .num_validation_data # number of train dataset
28
30
print ("For each dataset -----> Number of additional training dataset: " , self .num_train_data , " Number of validation dataset: " , self .num_validation_data )
29
31
30
32
self .num_validation_data = + 1
@@ -35,12 +37,13 @@ def __init__(self,f_prefix, num_of_data, seq_length, val_percent):
35
37
36
38
self .dataloader = DataLoader (f_prefix , 1 , seq_length , 0 ,forcePreProcess = True , infer = False , generate = True )
37
39
38
-
40
+ # noise parameter definition
39
41
self .noise_std_min = 0.05
40
42
self .noise_std_max = 0.15
41
43
self .noise_std = random .uniform (self .noise_std_min , self .noise_std_max )
42
44
self .noise_mean = 0.0
43
45
46
+ # remove datasets from directories for new creation
44
47
self .clear_directories (self .base_train_path )
45
48
self .clear_directories (self .base_validation_path , True )
46
49
self .random_dataset_creation ()
@@ -67,39 +70,38 @@ def random_dataset_creation(self):
67
70
# Get the sequence
68
71
x_seq ,d_seq ,numPedsList_seq , PedsList_seq = x [0 ], d [0 ], numPedsList [0 ], PedsList [0 ]
69
72
73
+ # convert dense vector
70
74
x_seq , lookup_seq = self .dataloader .convert_proper_array (x_seq , numPedsList_seq , PedsList_seq )
71
75
72
76
if dataset_pointer_ins is not self .dataloader .dataset_pointer :
73
77
if self .dataloader .dataset_pointer is not 0 :
74
78
whole_dataset .append (dataset_instances )
75
79
dataset_instances = {}
76
- random_angles = random .sample (self .angles , self .num_of_data )
77
- self .noise_std = random .uniform (self .noise_std_min , self .noise_std_max )
80
+ random_angles = random .sample (self .angles , self .num_of_data ) # sample new angle
81
+ self .noise_std = random .uniform (self .noise_std_min , self .noise_std_max ) #sample new noise
78
82
print ("Dataset creation for: " , file_name , " angles: " , random_angles )
79
83
80
84
dataset_pointer_ins = self .dataloader .dataset_pointer
81
85
82
-
83
-
84
- #self.add_element_to_dict(dataset_instances, (dir_name, file_name, ''), self.submision_seq_preprocess(x_seq, self.seq_length, lookup_seq))
85
-
86
86
for index , angle in enumerate (random_angles ):
87
87
self .noise_std = random .uniform (self .noise_std_min , self .noise_std_max )
88
+ # modify and preprocess dataset
88
89
modified_x_seq = self .submision_seq_preprocess (self .handle_seq (x_seq , lookup_seq , PedsList_seq , angle ), self .seq_length , lookup_seq )
90
+ # store modified data points to dict
89
91
self .dataloader .add_element_to_dict (dataset_instances , (dir_name , file_name , index ), modified_x_seq )
90
92
91
93
end = time .time ()
92
94
print ('Current file : ' , file_name ,' Processed trajectory number : ' , batch + 1 , 'out of' , self .dataloader .num_batches , 'trajectories in time' , end - start )
93
95
94
-
96
+ # write modified datapoints to txt files
95
97
whole_dataset .append (dataset_instances )
96
98
create_directories (os .path .join (self .f_prefix , self .base_validation_path ), self .dataloader .get_all_directory_namelist ())
97
99
self .write_modified_datasets (whole_dataset )
98
100
99
101
100
102
def handle_seq (self , x_seq , lookup_seq , PedsList_seq , angle ):
103
+ # add noise and rotate a trajectory
101
104
vectorized_x_seq , first_values_dict = vectorize_seq (x_seq , PedsList_seq , lookup_seq )
102
- #print("x_seq: %s"%vectorized_x_seq)
103
105
modified_x_seq = vectorized_x_seq .clone ()
104
106
mean = torch .FloatTensor ([self .noise_mean , self .noise_mean ])
105
107
stddev = torch .FloatTensor ([self .noise_std , self .noise_std ])
@@ -108,41 +110,31 @@ def handle_seq(self, x_seq, lookup_seq, PedsList_seq, angle):
108
110
for ind , frame in enumerate (vectorized_x_seq ):
109
111
for ped in PedsList_seq [ind ]:
110
112
selected_point = frame [lookup_seq [ped ], :]
111
- #print("selected point : %s"%selected_point)
113
+ # rotate a frame point
112
114
rotated_point = rotate (origin , selected_point , math .radians (angle ))
113
- #print("after rotation: %s"%(rotated_point))
114
115
noise = torch .normal (mean , stddev ).clone ()
115
- #print("noise %s"% noise)
116
+ # add random noise
116
117
modified_x_seq [ind , lookup_seq [ped ], 0 ] = rotated_point [0 ] + noise [0 ]
117
118
modified_x_seq [ind , lookup_seq [ped ], 1 ] = rotated_point [1 ] + noise [1 ]
118
- #print("after rotation and noise: %s"%modified_x_seq[ind, lookup_seq[ped], :])
119
119
modified_x_seq [ind , lookup_seq [ped ], :] = torch .cat (rotate (origin , first_values_dict [ped ], math .radians (angle ))) + modified_x_seq [ind , lookup_seq [ped ], :]
120
120
return modified_x_seq
121
121
122
122
def submision_seq_preprocess (self , x_seq , seq_lenght , lookup_seq ):
123
-
123
+ # create original txt structure for modified datapoints
124
124
ret_x_seq_c = x_seq .data .numpy ()
125
- #np.array(frame_number_predicted, copy=False, subok=True, ndmin=2)
126
125
ped_ids = self .dataloader .get_id_sequence (seq_lenght )
127
- #print("lookup table: %s"%lookup_seq)
128
- #print("ped ids: %s"%ped_ids)
129
126
positions_of_peds = [lookup_seq [ped ] for ped in ped_ids ]
130
- #print(positions_of_peds)
131
- #print("input seq: %s"%ret_x_seq_c)
132
127
ret_x_seq_c = ret_x_seq_c [:, positions_of_peds , :]
133
128
ret_x_seq_c_selected = ret_x_seq_c [:,0 ,:]
134
129
ret_x_seq_c_selected [:,[0 ,1 ]] = ret_x_seq_c_selected [:,[1 ,0 ]]
135
130
frame_numbers = self .dataloader .get_frame_sequence (seq_lenght )
136
131
id_integrated_seq = np .append (np .array (ped_ids )[:,None ], ret_x_seq_c_selected , axis = 1 )
137
132
frame_integrated_seq = np .append (frame_numbers [:, None ], id_integrated_seq , axis = 1 )
138
- #print("final seq: %s"%frame_integrated_seq)
139
- #print(repeated_id.shape)
140
- #print(frame_integrated_prediction)
141
- #print(result)
142
- #print("************************")
133
+
143
134
return frame_integrated_seq
144
135
145
136
def write_modified_datasets (self , dataset_instances_store ):
137
+ # write constructed txt structure to txt file
146
138
self .dataloader .reset_batch_pointer ()
147
139
148
140
for dataset_index in range (self .dataloader .numDatasets ):
@@ -166,6 +158,7 @@ def write_dict(self, dict, base_path):
166
158
self .dataloader .write_dataset (value , file_name , path )
167
159
168
160
def clear_directories (self , base_path , delete_all = False ):
161
+ # delete all files from a directory
169
162
print ("Clearing directories..." )
170
163
dir_names = self .dataloader .get_all_directory_namelist ()
171
164
base_path = os .path .join (self .f_prefix , base_path )
@@ -188,13 +181,13 @@ def main():
188
181
# RNN size parameter (dimension of the output/hidden state)
189
182
parser .add_argument ('--num_data' , type = int , default = 5 ,
190
183
help = 'Number of additional dataset for each one ' )
191
-
184
+ # lenght of sequence
192
185
parser .add_argument ('--seq_length' , type = int , default = 20 ,
193
186
help = 'Processing sequence length' )
194
-
187
+ # allocation percentage between train and validation datasets
195
188
parser .add_argument ('--validation' , type = float , default = 0.1 ,
196
189
help = 'Percentage of data will be allocated for validation in additional datasets' )
197
-
190
+ # use of gogle drive
198
191
parser .add_argument ('--drive' , action = "store_true" , default = False ,
199
192
help = 'Use Google drive or not' )
200
193
@@ -208,8 +201,8 @@ def main():
208
201
prefix = ''
209
202
f_prefix = '.'
210
203
if args .drive is True :
211
- prefix = 'drive/semester_project/new_social_LSTM_pytorch_v2 /'
212
- f_prefix = 'drive/semester_project/new_social_LSTM_pytorch_v2 '
204
+ prefix = 'drive/semester_project/social_lstm_final /'
205
+ f_prefix = 'drive/semester_project/social_lstm_final '
213
206
214
207
augmentator = data_augmentator (f_prefix , args .num_data , args .seq_length , args .validation )
215
208
0 commit comments