Skip to content

Commit a2d46c1

Browse files
committed
Modified C3D.train to support batchsize in model.py
Added function to calculate motion index summary Updated device printed Stuck at 42% accuracy... changing and trying a lot of different potential solutions
1 parent ee0b719 commit a2d46c1

File tree

3 files changed

+164
-157
lines changed

3 files changed

+164
-157
lines changed

functions.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -281,8 +281,9 @@ def dataloader_augment():
281281
# Apply random modifications and iterate over the same 32 frames 5 times
282282
for _ in range(iters_):
283283
modified_data = data_augment(data)
284-
modified_data = np.expand_dims(modified_data, axis=0)
285-
modified_data = np.transpose(modified_data, (0, 4, 1, 2, 3))
284+
# modified_data = np.expand_dims(modified_data, axis=0)
285+
# modified_data = np.transpose(modified_data, (0, 4, 1, 2, 3))
286+
modified_data = np.transpose(modified_data, (3, 0, 1, 2))
286287
# Pass C3D input and labels
287288
# Normalize data to be between 0 and 1
288289
modified_data = (modified_data - np.min(modified_data)) / (np.max(modified_data) - np.min(modified_data))

model.py

Lines changed: 159 additions & 153 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@
66
import cv2
77
import torch
88
import numpy as np
9+
import torch
910
import torch.nn as nn
1011
import torch.optim as optim
1112
import torch.nn.functional as f
1213
from functions import dataloader, dataloader_test, dataloader_augment
1314
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
1415
from skimage.metrics import structural_similarity as ssim
1516

16-
1717
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
1818

1919

@@ -46,12 +46,12 @@ def compute_motion_index(video):
4646
:param video:
4747
:return:
4848
"""
49-
T, H, W, C = video.shape # temporal index, height, width, channels
50-
motion_index = np.zeros((T,)) # init. motion index of zeros
49+
T, H, W, C = video.shape # temporal index, height, width, channels
50+
motion_index = np.zeros((T,)) # init. motion index of zeros
5151
# Nested for loop acts as a moving window for each frame Mi
5252
for i in range(T):
5353
frame_similarities = []
54-
for j in [i-2, i-1, i+1, i+2]:
54+
for j in [i - 2, i - 1, i + 1, i + 2]:
5555
if 0 <= j < T:
5656
# Calculate SSIM
5757
ssim_value = ssim(video[i], video[j], multichannel=True,
@@ -67,6 +67,16 @@ def compute_motion_index(video):
6767
return motion_index
6868

6969

70+
def motion_index_summary(motion_index):
71+
# Divide the motion index into 8 segments and take the average of each
72+
num_segments = 8
73+
segment_size = T // num_segments
74+
motion_index = motion_index.reshape(num_segments, segment_size)
75+
motion_index_summary = motion_index.mean(axis=1)
76+
77+
return motion_index_summary
78+
79+
7080
class SPP(nn.Module):
7181
"""
7282
Spatial Pyramind Pooling
@@ -294,167 +304,163 @@ def train_c3d(self, trainloader=None, epochs=40):
294304
"the feature map is reduced to 1 channel. The final output feature map
295305
size is 1×T×1×1, which could be used as temporal weights."
296306
297-
"loss...
307+
The equation they provide is Cross Entropy Loss, from the diagram
308+
we are predicting two classes therefore use nn.CrossEntropyLoss().
309+
"we use Adam optimizer with the learning rate of 1e-3 and weight decay of 1e-8"
298310
"""
299-
# The equation they provide is Cross Entropy Loss, from the diagram
300-
# we are predicting two classes therefore use nn.CrossEntropyLoss().
311+
# Set loss, optimizer, train state to True
301312
classification_criterion = nn.CrossEntropyLoss()
302-
# "we use Adam optimizer with the learning rate of 1e-3 and ...
303-
# weight decay of 1e-8"
304313
self.optimizer = optim.Adam(self.parameters(), lr=1e-3, weight_decay=1e-8)
305-
self.train() # Set training mode to True
314+
self.train()
315+
trainloader = torch.utils.data.DataLoader(list(dataloader_augment()), batch_size=64, shuffle=True)
306316

307317
# Train loop
308318
num_epochs = epochs # 40
309-
for self.epoch in range(num_epochs):
310-
# "after 40 epochs we change the LR to 1e-4"
311-
if self.epoch > 20:
312-
self.optimizer = optim.Adam(self.parameters(),
313-
lr=1e-4, weight_decay=1e-8)
314-
# TODO: plots
315-
running_loss = 0.0
316-
average_loss = 0.0
317-
trainloader = dataloader_augment()
318-
max_iter = len(list(dataloader_augment()))
319-
print(f"max_iter={max_iter}")
320-
321-
# Loop over data
322-
for i, data in enumerate(trainloader):
323-
# Unpack data
324-
inputs, labels = data # 32 frames, y_true
325-
# >>> labels = tensor([1., 0.])
326-
# >>> labels.shape = torch.Size([2])
327-
328-
self.optimizer.zero_grad() # zero the parameter gradients
329-
330-
outputs, vtemp = self(inputs) # prediction, temporal weights
331-
# >>> outputs.shape = torch.Size([1, 2])
332-
# >>> outputs = tensor([[ 4.7026, -8.8772]], grad_fn=<AddmmBackward0>)
333-
334-
# Calculate Vmotion from inputs rearrange
335-
# dimensions and convert to numpy array
336-
inputs_np = inputs.permute(0, 2, 3, 4, 1).numpy()
337-
vmotion_list = []
338-
for video in inputs_np:
319+
with torch.set_grad_enabled(True):
320+
for self.epoch in range(num_epochs):
321+
322+
# Decrease LR and Batch Size "after 40 epochs we change the LR to 1e-4"
323+
if self.epoch > 20:
324+
self.optimizer = optim.Adam(
325+
self.parameters(),
326+
lr=1e-4, weight_decay=1e-8
327+
)
328+
trainloader = torch.utils.data.DataLoader(
329+
list(dataloader_augment()),
330+
batch_size=16,
331+
shuffle=True
332+
)
333+
334+
# Track loss variables during training
335+
running_loss, average_loss = 0.0, 0.0
336+
max_iter = len(list(dataloader_augment()))
337+
338+
# Training Loop
339+
for i, data in enumerate(trainloader):
340+
# Unpack data >>> labels, labels.shape = tensor([1., 0.]), torch.Size([2])
341+
inputs, labels = data
342+
labels = torch.argmax(labels, dim=1)
343+
344+
# Predictions, temporal weights, reshape output
345+
outputs, vtemp = self(inputs)
346+
outputs = outputs.view(-1, 2) # reshape to [batch_size, num_classes]
347+
348+
# Calculate Vmotion from inputs rearrange dimensions and convert to numpy array
349+
inputs_np = inputs.permute(0, 2, 3, 4, 1).numpy()
350+
vmotion_list = []
339351
# calculate vmotion for each video
340-
vmotion_video = compute_motion_index(video)
341-
vmotion_list.append(vmotion_video)
342-
# convert back to tensor
343-
vmotion = torch.tensor(vmotion_list, dtype=torch.float32).to(device)
344-
345-
# Change vmotion size for COSINE calculation
346-
# >>> vmotion.shape = torch.Size([1, 32])
347-
vmotion = vmotion.view(-1, 8)
348-
# >>> vmotion.shape = torch.Size([1, 8])
349-
350-
# Calc. LOSS, classification & vtemp
351-
loss_cls = classification_criterion(outputs[0], labels)
352-
loss_motion = self._l_motion(vtemp, vmotion)
353-
# >>> vtemp.shape = [1, 8], vmotion.shape = [4, 8]
354-
# >>> _l_motion = tensor([0.0039, 0.0023, 0.0024, 0.0030], grad_fn=<RsubBackward1>)
355-
356-
# Total Loss
357-
loss = loss_cls + loss_motion
358-
359-
# # Average the loss
360-
# loss = loss.mean() # Todo: Average or sum loss?
361-
# print(f"loss.mean() = {loss}, "
362-
# f"y_pred = {f.softmax(outputs[0], dim=0).detach().numpy()}, "
363-
# f"y_actual = {labels}")
364-
365-
# Average the loss
366-
loss = loss.sum() # Todo: Average or sum loss?
367-
y_pred = f.softmax(outputs[0], dim=0).detach().numpy()
368-
369-
...
370-
371-
print(f"loss.sum() = {round(float(loss), 5)}, "
372-
f"y_pred = {round(float(y_pred[0]), 5)} {round(float(y_pred[1]), 5)}, "
373-
f"y_actual = {labels}")
374-
375-
"""
376-
loss.sum() = 6.183663368225098, y_pred = [0.4722479 0.5277521], y_actual = tensor([1., 0.])
377-
loss.sum() = 0.004501700401306152, y_pred = [1.0000000e+00 3.3208177e-14], y_actual = tensor([1., 0.])
378-
loss.sum() = 0.007243692874908447, y_pred = [1.000000e+00 9.129069e-23], y_actual = tensor([1., 0.])
379-
"""
380-
381-
# back propagation, optimize weights
382-
loss.backward()
383-
self.optimizer.step()
384-
385-
# Sum loss
386-
running_loss += loss
387-
average_loss = running_loss / max_iter
388-
389-
# End of epoch
390-
print(f"Epoch: {self.epoch}, Total Loss: {running_loss}, Avg Loss: {average_loss}")
391-
self.save_checkpoint()
392-
352+
for video in inputs_np:
353+
vmotion_video = compute_motion_index(video)
354+
vmotion_list.append(vmotion_video)
355+
vmotion = torch.tensor(vmotion_list, dtype=torch.float32).to(device)
356+
357+
# Divide the motion index into 8 segments and take the average of each
358+
# Change vmotion size for COSINE calculation
359+
num_segments = 8
360+
segment_size = vmotion.shape[1] // num_segments # 32 // 8 = 4
361+
vmotion = vmotion.view(
362+
-1, num_segments, segment_size
363+
) # reshape to [batch_size, num_segments, segment_size]
364+
vmotion = vmotion.mean(dim=2)
365+
# vmotion.shape = ([batchsize, 32]) -> ([batchsize, 8])
366+
367+
# Calculate LOSS, classification & vtemp
368+
loss_cls = classification_criterion(outputs, labels)
369+
loss_motion = self._l_motion(vtemp, vmotion)
370+
# >>> vtemp.shape = [1, 8], vmotion.shape = [4, 8]
371+
# >>> _l_motion = tensor([0.0039, 0.0023, 0.0024, 0.0030], grad_fn=<RsubBackward1>)
372+
373+
# Calcualte and Average the loss
374+
loss = loss_cls + loss_motion
375+
loss = loss.mean()
376+
y_pred = f.softmax(outputs, dim=0).detach().numpy()
377+
378+
# Out single batch updates
379+
print(
380+
f"loss.mean() = {round(float(loss), 5)}, "
381+
f"y_pred = {y_pred}, "
382+
f"y_actual = {labels}, "
383+
)
384+
385+
# Back propagation AND Optimize weights
386+
loss.backward()
387+
self.optimizer.step()
388+
389+
# Sum loss
390+
running_loss += loss
391+
average_loss = running_loss / max_iter
392+
393+
# End of epoch
394+
print(f"Epoch: {self.epoch}, Total Loss: {running_loss}, Avg Loss: {average_loss}")
395+
self.save_checkpoint()
396+
397+
# End of Training loop
393398
print('Finished Training')
399+
394400
return
395401

396402

397403
if __name__ == '__main__':
398-
# Init. lightweight C3D model
399-
c3d = C3D(num_classes=2)
400-
401-
# Set model.training to True
402-
# c3d.train()
403-
# print(f"c3d.training = {c3d.training}")
404-
# >>> True
405-
406-
# print(f"\nc3d.train_c3d() = {c3d.train_c3d(1)}")
407-
c3d.load_checkpoint("checkpoints/C3D_at_epoch39.pth")
408-
409-
preds = []
410-
labels_list = []
411-
412-
# outputs, vtemp = self(inputs) # prediction, temporal weights6
413-
for i, data in enumerate(dataloader_test()):
414-
# model input-output
415-
inputs, labels = data
416-
outputs, vtemp = c3d(inputs)
417-
418-
outputs_vals = f.softmax(outputs[0], dim=0).detach().numpy()
419-
outputs_vals_rounded = [round(outputs_vals[0]), round(outputs_vals[1])]
420-
print(f"outputs = {outputs_vals_rounded}, labels = {labels}")
421-
422-
# outputs_vals_binary = np.argmax(outputs_vals)
423-
if outputs_vals_rounded == [round(int(labels[0])), round(int(labels[1]))]:
424-
preds.append(1)
425-
else:
426-
preds.append(0)
427-
428-
# Save predictions and labels for evaluation
429-
# preds.append(round(outputs_vals[1]))
430-
labels_list.append(np.argmax(labels.numpy()))
431-
432-
print("Finished Predicting")
433-
print()
434-
435-
# Convert lists to numpy arrays
436-
y_pred = np.array([1, 0, 1, 1, 1, 1, 1, 1, 1, 1])
437-
y_test_single_label = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
438-
439-
# Debug output
440-
print(f"y_test_single_label = {y_test_single_label}")
441-
print(f"y_pred = {y_pred}")
442-
443-
# Create a confusion matrix
444-
tn, fp, fn, tp = confusion_matrix(y_test_single_label, y_pred).ravel()
445-
446-
# Compute metrics
447-
accuracy = accuracy_score(y_test_single_label, y_pred)
448-
sensitivity = recall_score(y_test_single_label, y_pred) # Sensitivity is the same as Recall
449-
specificity = tn / (tn+fp)
450-
precision = precision_score(y_test_single_label, y_pred)
451-
f1 = f1_score(y_test_single_label, y_pred)
452-
453-
print(f"Accuracy: {accuracy}")
454-
print(f"Sensitivity: {sensitivity}")
455-
print(f"Specificity: {specificity}")
456-
print(f"Precision: {precision}")
457-
print(f"F1-score: {f1}")
404+
# # Init. lightweight C3D model
405+
# c3d = C3D(num_classes=2)
406+
#
407+
# # Set model.training to True
408+
# # c3d.train()
409+
# # print(f"c3d.training = {c3d.training}")
410+
# # >>> True
411+
#
412+
# # print(f"\nc3d.train_c3d() = {c3d.train_c3d(1)}")
413+
# c3d.load_checkpoint("checkpoints/C3D_at_epoch39.pth")
414+
#
415+
# preds = []
416+
# labels_list = []
417+
#
418+
# # outputs, vtemp = self(inputs) # prediction, temporal weights6
419+
# for i, data in enumerate(dataloader_test()):
420+
# # model input-output
421+
# inputs, labels = data
422+
# outputs, vtemp = c3d(inputs)
423+
#
424+
# outputs_vals = f.softmax(outputs[0], dim=0).detach().numpy()
425+
# outputs_vals_rounded = [round(outputs_vals[0]), round(outputs_vals[1])]
426+
# print(f"outputs = {outputs_vals_rounded}, labels = {labels}")
427+
#
428+
# # outputs_vals_binary = np.argmax(outputs_vals)
429+
# if outputs_vals_rounded == [round(int(labels[0])), round(int(labels[1]))]:
430+
# preds.append(1)
431+
# else:
432+
# preds.append(0)
433+
#
434+
# # Save predictions and labels for evaluation
435+
# # preds.append(round(outputs_vals[1]))
436+
# labels_list.append(np.argmax(labels.numpy()))
437+
#
438+
# print("Finished Predicting")
439+
# print()
440+
#
441+
# # Convert lists to numpy arrays
442+
# y_pred = np.array([1, 0, 1, 1, 1, 1, 1, 1, 1, 1])
443+
# y_test_single_label = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
444+
#
445+
# # Debug output
446+
# print(f"y_test_single_label = {y_test_single_label}")
447+
# print(f"y_pred = {y_pred}")
448+
#
449+
# # Create a confusion matrix
450+
# tn, fp, fn, tp = confusion_matrix(y_test_single_label, y_pred).ravel()
451+
#
452+
# # Compute metrics
453+
# accuracy = accuracy_score(y_test_single_label, y_pred)
454+
# sensitivity = recall_score(y_test_single_label, y_pred) # Sensitivity is the same as Recall
455+
# specificity = tn / (tn+fp)
456+
# precision = precision_score(y_test_single_label, y_pred)
457+
# f1 = f1_score(y_test_single_label, y_pred)
458+
#
459+
# print(f"Accuracy: {accuracy}")
460+
# print(f"Sensitivity: {sensitivity}")
461+
# print(f"Specificity: {specificity}")
462+
# print(f"Precision: {precision}")
463+
# print(f"F1-score: {f1}")
458464

459465
# Mark EOF
460466
pass

train.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,12 @@
1414
# dataloader()
1515
# quit()
1616

17-
print(f"torch.device = {torch.device}")
17+
print(f"device (used) = {device}")
1818

1919
# Init. model
2020
s2m = stage2model = C3D(num_classes=2)
2121

22-
s2m.checkpoint_path = "checkpoints/augmented_normalized_ratiosampling_loss-sum/"
22+
s2m.checkpoint_path = "checkpoints/augmented_normalized_ratiosampling_batchsize64/"
2323

2424
# Feed video data into lwC3D for training
2525
s2m.train_c3d()

0 commit comments

Comments
 (0)