66import cv2
77import torch
88import numpy as np
9+ import torch
910import torch .nn as nn
1011import torch .optim as optim
1112import torch .nn .functional as f
1213from functions import dataloader , dataloader_test , dataloader_augment
1314from sklearn .metrics import confusion_matrix , accuracy_score , precision_score , recall_score , f1_score
1415from skimage .metrics import structural_similarity as ssim
1516
16-
1717device = torch .device ("cuda:0" if torch .cuda .is_available () else "cpu" )
1818
1919
@@ -46,12 +46,12 @@ def compute_motion_index(video):
4646 :param video:
4747 :return:
4848 """
49- T , H , W , C = video .shape # temporal index, height, width, channels
50- motion_index = np .zeros ((T ,)) # init. motion index of zeros
49+ T , H , W , C = video .shape # temporal index, height, width, channels
50+ motion_index = np .zeros ((T ,)) # init. motion index of zeros
5151 # Nested for loop acts as a moving window for each frame Mi
5252 for i in range (T ):
5353 frame_similarities = []
54- for j in [i - 2 , i - 1 , i + 1 , i + 2 ]:
54+ for j in [i - 2 , i - 1 , i + 1 , i + 2 ]:
5555 if 0 <= j < T :
5656 # Calculate SSIM
5757 ssim_value = ssim (video [i ], video [j ], multichannel = True ,
@@ -67,6 +67,16 @@ def compute_motion_index(video):
6767 return motion_index
6868
6969
70+ def motion_index_summary (motion_index ):
71+ # Divide the motion index into 8 segments and take the average of each
72+ num_segments = 8
73+ segment_size = T // num_segments
74+ motion_index = motion_index .reshape (num_segments , segment_size )
75+ motion_index_summary = motion_index .mean (axis = 1 )
76+
77+ return motion_index_summary
78+
79+
7080class SPP (nn .Module ):
7181 """
7282 Spatial Pyramind Pooling
@@ -294,167 +304,163 @@ def train_c3d(self, trainloader=None, epochs=40):
294304 "the feature map is reduced to 1 channel. The final output feature map
295305 size is 1×T×1×1, which could be used as temporal weights."
296306
297- "loss...
307+ The equation they provide is Cross Entropy Loss, from the diagram
308+ we are predicting two classes therefore use nn.CrossEntropyLoss().
309+ "we use Adam optimizer with the learning rate of 1e-3 and weight decay of 1e-8"
298310 """
299- # The equation they provide is Cross Entropy Loss, from the diagram
300- # we are predicting two classes therefore use nn.CrossEntropyLoss().
311+ # Set loss, optimizer, train state to True
301312 classification_criterion = nn .CrossEntropyLoss ()
302- # "we use Adam optimizer with the learning rate of 1e-3 and ...
303- # weight decay of 1e-8"
304313 self .optimizer = optim .Adam (self .parameters (), lr = 1e-3 , weight_decay = 1e-8 )
305- self .train () # Set training mode to True
314+ self .train ()
315+ trainloader = torch .utils .data .DataLoader (list (dataloader_augment ()), batch_size = 64 , shuffle = True )
306316
307317 # Train loop
308318 num_epochs = epochs # 40
309- for self .epoch in range (num_epochs ):
310- # "after 40 epochs we change the LR to 1e-4"
311- if self .epoch > 20 :
312- self .optimizer = optim .Adam (self .parameters (),
313- lr = 1e-4 , weight_decay = 1e-8 )
314- # TODO: plots
315- running_loss = 0.0
316- average_loss = 0.0
317- trainloader = dataloader_augment ()
318- max_iter = len (list (dataloader_augment ()))
319- print (f"max_iter={ max_iter } " )
320-
321- # Loop over data
322- for i , data in enumerate (trainloader ):
323- # Unpack data
324- inputs , labels = data # 32 frames, y_true
325- # >>> labels = tensor([1., 0.])
326- # >>> labels.shape = torch.Size([2])
327-
328- self .optimizer .zero_grad () # zero the parameter gradients
329-
330- outputs , vtemp = self (inputs ) # prediction, temporal weights
331- # >>> outputs.shape = torch.Size([1, 2])
332- # >>> outputs = tensor([[ 4.7026, -8.8772]], grad_fn=<AddmmBackward0>)
333-
334- # Calculate Vmotion from inputs rearrange
335- # dimensions and convert to numpy array
336- inputs_np = inputs .permute (0 , 2 , 3 , 4 , 1 ).numpy ()
337- vmotion_list = []
338- for video in inputs_np :
319+ with torch .set_grad_enabled (True ):
320+ for self .epoch in range (num_epochs ):
321+
322+ # Decrease LR and Batch Size "after 40 epochs we change the LR to 1e-4"
323+ if self .epoch > 20 :
324+ self .optimizer = optim .Adam (
325+ self .parameters (),
326+ lr = 1e-4 , weight_decay = 1e-8
327+ )
328+ trainloader = torch .utils .data .DataLoader (
329+ list (dataloader_augment ()),
330+ batch_size = 16 ,
331+ shuffle = True
332+ )
333+
334+ # Track loss variables during training
335+ running_loss , average_loss = 0.0 , 0.0
336+ max_iter = len (list (dataloader_augment ()))
337+
338+ # Training Loop
339+ for i , data in enumerate (trainloader ):
340+ # Unpack data >>> labels, labels.shape = tensor([1., 0.]), torch.Size([2])
341+ inputs , labels = data
342+ labels = torch .argmax (labels , dim = 1 )
343+
344+ # Predictions, temporal weights, reshape output
345+ outputs , vtemp = self (inputs )
346+ outputs = outputs .view (- 1 , 2 ) # reshape to [batch_size, num_classes]
347+
348+ # Calculate Vmotion from inputs rearrange dimensions and convert to numpy array
349+ inputs_np = inputs .permute (0 , 2 , 3 , 4 , 1 ).numpy ()
350+ vmotion_list = []
339351 # calculate vmotion for each video
340- vmotion_video = compute_motion_index (video )
341- vmotion_list .append (vmotion_video )
342- # convert back to tensor
343- vmotion = torch .tensor (vmotion_list , dtype = torch .float32 ).to (device )
344-
345- # Change vmotion size for COSINE calculation
346- # >>> vmotion.shape = torch.Size([1, 32])
347- vmotion = vmotion .view (- 1 , 8 )
348- # >>> vmotion.shape = torch.Size([1, 8])
349-
350- # Calc. LOSS, classification & vtemp
351- loss_cls = classification_criterion (outputs [0 ], labels )
352- loss_motion = self ._l_motion (vtemp , vmotion )
353- # >>> vtemp.shape = [1, 8], vmotion.shape = [4, 8]
354- # >>> _l_motion = tensor([0.0039, 0.0023, 0.0024, 0.0030], grad_fn=<RsubBackward1>)
355-
356- # Total Loss
357- loss = loss_cls + loss_motion
358-
359- # # Average the loss
360- # loss = loss.mean() # Todo: Average or sum loss?
361- # print(f"loss.mean() = {loss}, "
362- # f"y_pred = {f.softmax(outputs[0], dim=0).detach().numpy()}, "
363- # f"y_actual = {labels}")
364-
365- # Average the loss
366- loss = loss .sum () # Todo: Average or sum loss?
367- y_pred = f .softmax (outputs [0 ], dim = 0 ).detach ().numpy ()
368-
369- ...
370-
371- print (f"loss.sum() = { round (float (loss ), 5 )} , "
372- f"y_pred = { round (float (y_pred [0 ]), 5 )} { round (float (y_pred [1 ]), 5 )} , "
373- f"y_actual = { labels } " )
374-
375- """
376- loss.sum() = 6.183663368225098, y_pred = [0.4722479 0.5277521], y_actual = tensor([1., 0.])
377- loss.sum() = 0.004501700401306152, y_pred = [1.0000000e+00 3.3208177e-14], y_actual = tensor([1., 0.])
378- loss.sum() = 0.007243692874908447, y_pred = [1.000000e+00 9.129069e-23], y_actual = tensor([1., 0.])
379- """
380-
381- # back propagation, optimize weights
382- loss .backward ()
383- self .optimizer .step ()
384-
385- # Sum loss
386- running_loss += loss
387- average_loss = running_loss / max_iter
388-
389- # End of epoch
390- print (f"Epoch: { self .epoch } , Total Loss: { running_loss } , Avg Loss: { average_loss } " )
391- self .save_checkpoint ()
392-
352+ for video in inputs_np :
353+ vmotion_video = compute_motion_index (video )
354+ vmotion_list .append (vmotion_video )
355+ vmotion = torch .tensor (vmotion_list , dtype = torch .float32 ).to (device )
356+
357+ # Divide the motion index into 8 segments and take the average of each
358+ # Change vmotion size for COSINE calculation
359+ num_segments = 8
360+ segment_size = vmotion .shape [1 ] // num_segments # 32 // 8 = 4
361+ vmotion = vmotion .view (
362+ - 1 , num_segments , segment_size
363+ ) # reshape to [batch_size, num_segments, segment_size]
364+ vmotion = vmotion .mean (dim = 2 )
365+ # vmotion.shape = ([batchsize, 32]) -> ([batchsize, 8])
366+
367+ # Calculate LOSS, classification & vtemp
368+ loss_cls = classification_criterion (outputs , labels )
369+ loss_motion = self ._l_motion (vtemp , vmotion )
370+ # >>> vtemp.shape = [1, 8], vmotion.shape = [4, 8]
371+ # >>> _l_motion = tensor([0.0039, 0.0023, 0.0024, 0.0030], grad_fn=<RsubBackward1>)
372+
373+ # Calcualte and Average the loss
374+ loss = loss_cls + loss_motion
375+ loss = loss .mean ()
376+ y_pred = f .softmax (outputs , dim = 0 ).detach ().numpy ()
377+
378+ # Out single batch updates
379+ print (
380+ f"loss.mean() = { round (float (loss ), 5 )} , "
381+ f"y_pred = { y_pred } , "
382+ f"y_actual = { labels } , "
383+ )
384+
385+ # Back propagation AND Optimize weights
386+ loss .backward ()
387+ self .optimizer .step ()
388+
389+ # Sum loss
390+ running_loss += loss
391+ average_loss = running_loss / max_iter
392+
393+ # End of epoch
394+ print (f"Epoch: { self .epoch } , Total Loss: { running_loss } , Avg Loss: { average_loss } " )
395+ self .save_checkpoint ()
396+
397+ # End of Training loop
393398 print ('Finished Training' )
399+
394400 return
395401
396402
397403if __name__ == '__main__' :
398- # Init. lightweight C3D model
399- c3d = C3D (num_classes = 2 )
400-
401- # Set model.training to True
402- # c3d.train()
403- # print(f"c3d.training = {c3d.training}")
404- # >>> True
405-
406- # print(f"\nc3d.train_c3d() = {c3d.train_c3d(1)}")
407- c3d .load_checkpoint ("checkpoints/C3D_at_epoch39.pth" )
408-
409- preds = []
410- labels_list = []
411-
412- # outputs, vtemp = self(inputs) # prediction, temporal weights6
413- for i , data in enumerate (dataloader_test ()):
414- # model input-output
415- inputs , labels = data
416- outputs , vtemp = c3d (inputs )
417-
418- outputs_vals = f .softmax (outputs [0 ], dim = 0 ).detach ().numpy ()
419- outputs_vals_rounded = [round (outputs_vals [0 ]), round (outputs_vals [1 ])]
420- print (f"outputs = { outputs_vals_rounded } , labels = { labels } " )
421-
422- # outputs_vals_binary = np.argmax(outputs_vals)
423- if outputs_vals_rounded == [round (int (labels [0 ])), round (int (labels [1 ]))]:
424- preds .append (1 )
425- else :
426- preds .append (0 )
427-
428- # Save predictions and labels for evaluation
429- # preds.append(round(outputs_vals[1]))
430- labels_list .append (np .argmax (labels .numpy ()))
431-
432- print ("Finished Predicting" )
433- print ()
434-
435- # Convert lists to numpy arrays
436- y_pred = np .array ([1 , 0 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ])
437- y_test_single_label = np .array ([1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 0 ])
438-
439- # Debug output
440- print (f"y_test_single_label = { y_test_single_label } " )
441- print (f"y_pred = { y_pred } " )
442-
443- # Create a confusion matrix
444- tn , fp , fn , tp = confusion_matrix (y_test_single_label , y_pred ).ravel ()
445-
446- # Compute metrics
447- accuracy = accuracy_score (y_test_single_label , y_pred )
448- sensitivity = recall_score (y_test_single_label , y_pred ) # Sensitivity is the same as Recall
449- specificity = tn / (tn + fp )
450- precision = precision_score (y_test_single_label , y_pred )
451- f1 = f1_score (y_test_single_label , y_pred )
452-
453- print (f"Accuracy: { accuracy } " )
454- print (f"Sensitivity: { sensitivity } " )
455- print (f"Specificity: { specificity } " )
456- print (f"Precision: { precision } " )
457- print (f"F1-score: { f1 } " )
404+ # # Init. lightweight C3D model
405+ # c3d = C3D(num_classes=2)
406+ #
407+ # # Set model.training to True
408+ # # c3d.train()
409+ # # print(f"c3d.training = {c3d.training}")
410+ # # >>> True
411+ #
412+ # # print(f"\nc3d.train_c3d() = {c3d.train_c3d(1)}")
413+ # c3d.load_checkpoint("checkpoints/C3D_at_epoch39.pth")
414+ #
415+ # preds = []
416+ # labels_list = []
417+ #
418+ # # outputs, vtemp = self(inputs) # prediction, temporal weights6
419+ # for i, data in enumerate(dataloader_test()):
420+ # # model input-output
421+ # inputs, labels = data
422+ # outputs, vtemp = c3d(inputs)
423+ #
424+ # outputs_vals = f.softmax(outputs[0], dim=0).detach().numpy()
425+ # outputs_vals_rounded = [round(outputs_vals[0]), round(outputs_vals[1])]
426+ # print(f"outputs = {outputs_vals_rounded}, labels = {labels}")
427+ #
428+ # # outputs_vals_binary = np.argmax(outputs_vals)
429+ # if outputs_vals_rounded == [round(int(labels[0])), round(int(labels[1]))]:
430+ # preds.append(1)
431+ # else:
432+ # preds.append(0)
433+ #
434+ # # Save predictions and labels for evaluation
435+ # # preds.append(round(outputs_vals[1]))
436+ # labels_list.append(np.argmax(labels.numpy()))
437+ #
438+ # print("Finished Predicting")
439+ # print()
440+ #
441+ # # Convert lists to numpy arrays
442+ # y_pred = np.array([1, 0, 1, 1, 1, 1, 1, 1, 1, 1])
443+ # y_test_single_label = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
444+ #
445+ # # Debug output
446+ # print(f"y_test_single_label = {y_test_single_label}")
447+ # print(f"y_pred = {y_pred}")
448+ #
449+ # # Create a confusion matrix
450+ # tn, fp, fn, tp = confusion_matrix(y_test_single_label, y_pred).ravel()
451+ #
452+ # # Compute metrics
453+ # accuracy = accuracy_score(y_test_single_label, y_pred)
454+ # sensitivity = recall_score(y_test_single_label, y_pred) # Sensitivity is the same as Recall
455+ # specificity = tn / (tn+fp)
456+ # precision = precision_score(y_test_single_label, y_pred)
457+ # f1 = f1_score(y_test_single_label, y_pred)
458+ #
459+ # print(f"Accuracy: {accuracy}")
460+ # print(f"Sensitivity: {sensitivity}")
461+ # print(f"Specificity: {specificity}")
462+ # print(f"Precision: {precision}")
463+ # print(f"F1-score: {f1}")
458464
459465 # Mark EOF
460466 pass
0 commit comments