Set default value for batch_size; tests pass in debug mode but segfault in optimized mode

milancurcic · milancurcic · commit d27bf0916f47 · 2023-07-10T11:08:39.000-04:00
diff --git a/src/nf/nf_layer.f90 b/src/nf/nf_layer.f90
@@ -32,7 +32,6 @@ module nf_layer
     procedure :: set_params
     procedure :: init
     procedure :: print_info
-    !procedure :: update
 
     ! Specific subroutines for different array ranks
     procedure, private :: backward_1d
@@ -154,20 +153,6 @@ module subroutine set_params(self, params)
         !! Parameters of this layer
     end subroutine set_params
 
-    !impure elemental module subroutine update(self, optimizer, batch_size)
-    !  !! Update the weights and biases on the layer using the stored
-    !  !! gradients (from backward passes), and flush those same stored
-    !  !! gradients to zero.
-    !  !! This changes the state of the layer.
-    !  !! Typically used only internally from the `network % update` method.
-    !  class(layer), intent(in out) :: self
-    !    !! Layer instance
-    !  class(optimizer_base_type), intent(in) :: optimizer
-    !    !! Optimizer instance to use
-    !  integer, intent(in), optional :: batch_size
-    !    !! Batch size (default 1)
-    !end subroutine update
-
   end interface
 
 end module nf_layer
diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90
@@ -405,57 +405,4 @@ module subroutine set_params(self, params)
 
   end subroutine set_params
 
-
-  !impure elemental module subroutine update(self, optimizer, batch_size)
-  !  class(layer), intent(in out) :: self
-  !  class(optimizer_base_type), intent(in) :: optimizer
-  !  integer, intent(in), optional :: batch_size
-  !  integer :: batch_size_
-  !
-  !  batch_size_ = 1
-  !  if (present(batch_size)) batch_size_ = batch_size
-  !
-  !  select type (this_layer => self % p)
-  !    type is (dense_layer)
-  !
-  !      ! Sum weight and bias gradients across images, if any
-  !      call co_sum(this_layer % dw)
-  !      call co_sum(this_layer % db)
-  !
-  !      call optimizer % minimize( &
-  !        this_layer % weights, &
-  !        this_layer % dw / batch_size_ &
-  !      )
-  !      call optimizer % minimize( &
-  !        this_layer % biases, &
-  !        this_layer % db / batch_size_ &
-  !      )
-  !
-  !      ! Reset gradients.
-  !      this_layer % dw = 0
-  !      this_layer % db = 0
-
-  !    type is (conv2d_layer)
-
-  !      ! Sum weight and bias gradients across images, if any
-  !      call co_sum(this_layer % dw)
-  !      call co_sum(this_layer % db)
-
-  !      call optimizer % minimize( &
-  !        this_layer % kernel, &
-  !        this_layer % dw / batch_size_ &
-  !      )
-  !      call optimizer % minimize( &
-  !        this_layer % biases, &
-  !        this_layer % db / batch_size_ &
-  !      )
-
-  !      ! Reset gradients.
-  !      this_layer % dw = 0
-  !      this_layer % db = 0
-
-  !  end select
-
-  !end subroutine update
-
 end submodule nf_layer_submodule
diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90
@@ -607,6 +607,7 @@ module subroutine update(self, optimizer, batch_size)
     class(optimizer_base_type), intent(in), optional :: optimizer
     integer, intent(in), optional :: batch_size
     class(optimizer_base_type), allocatable :: optimizer_
+    integer :: batch_size_
     real, allocatable :: params(:)
     integer :: n
 
@@ -623,14 +624,28 @@ module subroutine update(self, optimizer, batch_size)
       optimizer_ = sgd()
     end if
 
-    call optimizer_ % init(self % get_num_params())
+    if (present(batch_size)) then
+      batch_size_ = batch_size
+    else
+      batch_size_ = 1
+    end if
 
-    !call self % layers % update(optimizer_, batch_size)
+    call optimizer_ % init(self % get_num_params())
 
-    ! TODO: Sync gradients across images if running in parallel.
+    ! Sum weight and bias gradients across images, if any
+    do n = 2, size(self % layers)
+      select type(this_layer => self % layers(n) % p)
+        type is(dense_layer)
+          call co_sum(this_layer % dw)
+          call co_sum(this_layer % db)
+        type is(conv2d_layer)
+          call co_sum(this_layer % dw)
+          call co_sum(this_layer % db)
+      end select
+    end do
 
     params = self % get_params()
-    call optimizer_ % minimize(params, self % get_gradients() / batch_size)
+    call optimizer_ % minimize(params, self % get_gradients() / batch_size_)
     call self % set_params(params)
 
     ! Flush network gradients to zero.
diff --git a/src/nf/nf_optimizers.f90 b/src/nf/nf_optimizers.f90
@@ -49,23 +49,11 @@ end subroutine minimize
     procedure :: minimize => minimize_sgd
   end type sgd
 
-  !type :: gradients_dense
-  !  real, allocatable :: dw(:,:)
-  !  real, allocatable :: db(:)
-  !end type gradients_dense
-
-  !type :: gradients_conv2d
-  !  real, allocatable :: dw(:,:,:,:)
-  !  real, allocatable :: db(:)
-  !end type gradients_conv2d
-
   type, extends(optimizer_base_type) :: rmsprop
     !! RMSProp optimizer
     real :: decay_rate = 0.9
     real :: epsilon = 1e-8
     real, allocatable :: rms_gradient(:)
-    !type(gradients_dense), allocatable :: rms_dense(:)
-    !type(gradients_conv2d), allocatable :: rms_conv2d(:)
   contains
     procedure :: init => init_rmsprop
     procedure :: minimize => minimize_rmsprop
@@ -92,10 +80,12 @@ pure subroutine minimize_sgd(self, param, gradient)
 
     if (self % momentum > 0) then
       ! Apply momentum update
-      self % velocity = self % momentum * self % velocity - self % learning_rate * gradient
+      self % velocity = self % momentum * self % velocity &
+        - self % learning_rate * gradient
       if (self % nesterov) then
         ! Apply Nesterov update
-        param = param + self % momentum * self % velocity - self % learning_rate * gradient
+        param = param + self % momentum * self % velocity &
+          - self % learning_rate * gradient
       else
         param = param + self % velocity
       end if