Update 13_feedforward.py

kmiddleton · kmiddleton · commit 99d4e6aec7ce · 2024-09-29T16:35:32.000-05:00
diff --git a/13_feedforward.py b/13_feedforward.py
@@ -3,15 +3,26 @@
 import torchvision
 import torchvision.transforms as transforms
 import matplotlib.pyplot as plt
+import ssl
 
-# Device configuration
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+ssl._create_default_https_context = ssl._create_unverified_context
+
+# GPU device configuration
+if torch.cuda.is_available():
+  device = torch.device('cuda')
+  print('Using GPU')
+elif torch.backends.mps.is_available():
+  device = torch.device('mps')
+  print('Using MPS')
+else:
+  device = torch.device('cpu')
+  print('Using CPU')
 
 # Hyper-parameters 
 input_size = 784 # 28x28
 hidden_size = 500 
 num_classes = 10
-num_epochs = 2
+num_epochs = 10
 batch_size = 100
 learning_rate = 0.001
 
@@ -40,64 +51,81 @@
 for i in range(6):
     plt.subplot(2,3,i+1)
     plt.imshow(example_data[i][0], cmap='gray')
-plt.show()
+# plt.show()
 
 # Fully connected neural network with one hidden layer
 class NeuralNet(nn.Module):
-    def __init__(self, input_size, hidden_size, num_classes):
-        super(NeuralNet, self).__init__()
-        self.input_size = input_size
-        self.l1 = nn.Linear(input_size, hidden_size) 
-        self.relu = nn.ReLU()
-        self.l2 = nn.Linear(hidden_size, num_classes)  
-    
-    def forward(self, x):
-        out = self.l1(x)
-        out = self.relu(out)
-        out = self.l2(out)
-        # no activation and no softmax at the end
-        return out
+  def __init__(self, input_size, hidden_size, num_classes):
+    super(NeuralNet, self).__init__()
+    self.input_size = input_size
+    self.l1 = nn.Linear(input_size, hidden_size)
+    self.relu = nn.ReLU()
+    self.l2 = nn.Linear(hidden_size, hidden_size)
+  def forward(self, x):
+    out = self.l1(x)
+    out = self.relu(out)
+    out = self.l2(out)
+    # no activation and no softmax at the end
+    return out
 
 model = NeuralNet(input_size, hidden_size, num_classes).to(device)
+print(model)
 
 # Loss and optimizer
 criterion = nn.CrossEntropyLoss()
-optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  
+optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
 
 # Train the model
 n_total_steps = len(train_loader)
+loss_list = []
+
 for epoch in range(num_epochs):
-    for i, (images, labels) in enumerate(train_loader):  
-        # origin shape: [100, 1, 28, 28]
-        # resized: [100, 784]
-        images = images.reshape(-1, 28*28).to(device)
-        labels = labels.to(device)
-        
-        # Forward pass
-        outputs = model(images)
-        loss = criterion(outputs, labels)
-        
-        # Backward and optimize
-        optimizer.zero_grad()
-        loss.backward()
-        optimizer.step()
-        
-        if (i+1) % 100 == 0:
-            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')
+  epoch_loss = 0
+  for i, (images, labels) in enumerate(train_loader):  
+    # origin shape: [100, 1, 28, 28]
+    # resized: [100, 784]
+    images = images.reshape(-1, 28*28).to(device)
+    labels = labels.to(device)
+
+    # Forward pass
+    outputs = model(images)
+    loss = criterion(outputs, labels)
+
+    # Backward and optimize
+    optimizer.zero_grad()
+    loss.backward()
+    optimizer.step()
+
+    epoch_loss += loss.item()
+
+    if (i+1) % 200 == 0:
+      print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')
+  
+  avg_epoch_loss = epoch_loss / n_total_steps
+  loss_list.append(avg_epoch_loss)
+
+# Plot loss as a function of epoch
+plt.figure()
+plt.plot(range(1, num_epochs + 1), loss_list, label='Training Loss')
+plt.xlabel('Epoch')
+plt.ylabel('Loss')
+plt.title('Training Loss vs. Epoch')
+plt.legend()
+plt.show()
 
 # Test the model
 # In test phase, we don't need to compute gradients (for memory efficiency)
 with torch.no_grad():
-    n_correct = 0
-    n_samples = 0
-    for images, labels in test_loader:
-        images = images.reshape(-1, 28*28).to(device)
-        labels = labels.to(device)
-        outputs = model(images)
-        # max returns (value ,index)
-        _, predicted = torch.max(outputs.data, 1)
-        n_samples += labels.size(0)
-        n_correct += (predicted == labels).sum().item()
-
-    acc = 100.0 * n_correct / n_samples
-    print(f'Accuracy of the network on the 10000 test images: {acc} %')
+  n_correct = 0
+  n_samples = 0
+  for images, labels in test_loader:
+    images = images.reshape(-1, 28*28).to(device)
+    labels = labels.to(device)
+    outputs = model(images)
+    # max returns (value ,index)
+    _, predicted = torch.max(outputs.data, 1)
+    n_samples += labels.size(0)
+    n_correct += (predicted == labels).sum().item()
+
+  acc = 100.0 * n_correct / n_samples
+  print(f'Accuracy of the network on the 10000 test images: {acc} %')