Skip to content

Commit adaf9bb

Browse files
GSoC Optimizers: Example program to fit a quadratic function (#134)
* πŸ“¦ UPDATE: Added program to fit quadratic function * πŸ‘Œ IMPROVE: Added optimizer subroutines * πŸ› FIX: code refactoring & minor changes * Fixes to make the code run * πŸ“¦ Feat: Added batch shuffle in SGD * Updated the changes in Mini-batch GD * Define the quadratic as its own function * Cleanup --------- Co-authored-by: milancurcic <caomaco@gmail.com>
1 parent 8293118 commit adaf9bb

File tree

2 files changed

+180
-0
lines changed

2 files changed

+180
-0
lines changed

β€Žexample/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ foreach(execid
66
get_set_network_params
77
simple
88
sine
9+
quadratic
910
)
1011
add_executable(${execid} ${execid}.f90)
1112
target_link_libraries(${execid} PRIVATE

β€Žexample/quadratic.f90

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
program quadratic_fit
2+
! This program fits a quadratic function using a small neural network using
3+
! stochastic gradient descent, batch gradient descent, and minibatch gradient
4+
! descent.
5+
use nf, only: dense, input, network
6+
7+
implicit none
8+
type(network) :: net_sgd, net_batch_sgd, net_minibatch_sgd
9+
10+
! Training parameters
11+
integer, parameter :: num_epochs = 1000
12+
integer, parameter :: train_size = 1000
13+
integer, parameter :: test_size = 30
14+
integer, parameter :: batch_size = 10
15+
real, parameter :: learning_rate = 0.01
16+
17+
! Input and output data
18+
real, allocatable :: x(:), y(:) ! training data
19+
real, allocatable :: xtest(:), ytest(:) ! testing data
20+
real, allocatable :: ypred_sgd(:), ypred_batch_sgd(:), ypred_minibatch_sgd(:)
21+
22+
integer :: i, n
23+
24+
print '("Fitting quadratic function")'
25+
print '(60("="))'
26+
27+
allocate(xtest(test_size), ytest(test_size))
28+
xtest = [((i - 1) * 2 / test_size, i = 1, test_size)]
29+
ytest = quadratic(xtest)
30+
31+
! x and y as 1-D arrays
32+
allocate(x(train_size), y(train_size))
33+
34+
! Generating the dataset
35+
do i = 1, train_size
36+
call random_number(x(i))
37+
x(i) = x(i) * 2
38+
end do
39+
y = quadratic(x)
40+
41+
! Instantiate a separate network for each optimization method.
42+
net_sgd = network([input(1), dense(3), dense(1)])
43+
net_batch_sgd = network([input(1), dense(3), dense(1)])
44+
net_minibatch_sgd = network([input(1), dense(3), dense(1)])
45+
46+
! Print network info to stdout; this will be the same for all three networks.
47+
call net_sgd % print_info()
48+
49+
! SGD optimizer
50+
call sgd_optimizer(net_sgd, x, y, learning_rate, num_epochs)
51+
52+
! Batch SGD optimizer
53+
call batch_gd_optimizer(net_batch_sgd, x, y, learning_rate, num_epochs)
54+
55+
! Mini-batch SGD optimizer
56+
call minibatch_gd_optimizer(net_minibatch_sgd, x, y, learning_rate, num_epochs, batch_size)
57+
58+
! Calculate predictions on the test set
59+
ypred_sgd = [(net_sgd % predict([xtest(i)]), i = 1, test_size)]
60+
ypred_batch_sgd = [(net_batch_sgd % predict([xtest(i)]), i = 1, test_size)]
61+
ypred_minibatch_sgd = [(net_minibatch_sgd % predict([xtest(i)]), i = 1, test_size)]
62+
63+
! Print the mean squared error
64+
print '("Stochastic gradient descent MSE:", f9.6)', sum((ypred_sgd - ytest)**2) / size(ytest)
65+
print '(" Batch gradient descent MSE: ", f9.6)', sum((ypred_batch_sgd - ytest)**2) / size(ytest)
66+
print '(" Minibatch gradient descent MSE: ", f9.6)', sum((ypred_minibatch_sgd - ytest)**2) / size(ytest)
67+
68+
contains
69+
70+
real elemental function quadratic(x) result(y)
71+
! Quadratic function
72+
real, intent(in) :: x
73+
y = (x**2 / 2 + x / 2 + 1) / 2
74+
end function quadratic
75+
76+
subroutine sgd_optimizer(net, x, y, learning_rate, num_epochs)
77+
! In the stochastic gradient descent (SGD) optimizer, we run the forward
78+
! and backward passes and update the weights for each training sample,
79+
! one at a time.
80+
type(network), intent(inout) :: net
81+
real, intent(in) :: x(:), y(:)
82+
real, intent(in) :: learning_rate
83+
integer, intent(in) :: num_epochs
84+
integer :: i, n
85+
86+
print *, "Running SGD optimizer..."
87+
88+
do n = 1, num_epochs
89+
do i = 1, size(x)
90+
call net % forward([x(i)])
91+
call net % backward([y(i)])
92+
call net % update(learning_rate)
93+
end do
94+
end do
95+
96+
end subroutine sgd_optimizer
97+
98+
subroutine batch_gd_optimizer(net, x, y, learning_rate, num_epochs)
99+
! Like the stochastic gradient descent (SGD) optimizer, except that here we
100+
! accumulate the weight gradients for all training samples and update the
101+
! weights once per epoch.
102+
type(network), intent(inout) :: net
103+
real, intent(in) :: x(:), y(:)
104+
real, intent(in) :: learning_rate
105+
integer, intent(in) :: num_epochs
106+
integer :: i, n
107+
108+
print *, "Running batch GD optimizer..."
109+
110+
do n = 1, num_epochs
111+
do i = 1, size(x)
112+
call net % forward([x(i)])
113+
call net % backward([y(i)])
114+
end do
115+
call net % update(learning_rate / size(x))
116+
end do
117+
118+
end subroutine batch_gd_optimizer
119+
120+
subroutine minibatch_gd_optimizer(net, x, y, learning_rate, num_epochs, batch_size)
121+
! Like the batch SGD optimizer, except that here we accumulate the weight
122+
! over a number of mini batches and update the weights once per mini batch.
123+
!
124+
! Note: -O3 on GFortran must be accompanied with -fno-frontend-optimize for
125+
! this subroutine to converge to a solution.
126+
type(network), intent(inout) :: net
127+
real, intent(in) :: x(:), y(:)
128+
real, intent(in) :: learning_rate
129+
integer, intent(in) :: num_epochs, batch_size
130+
integer :: i, j, n, num_samples, num_batches, start_index, end_index
131+
real, allocatable :: batch_x(:), batch_y(:)
132+
integer, allocatable :: batch_indices(:)
133+
134+
print *, "Running mini-batch GD optimizer..."
135+
136+
num_samples = size(x)
137+
num_batches = num_samples / batch_size
138+
139+
! Generate shuffled indices for the mini-batches
140+
allocate(batch_x(batch_size), batch_y(batch_size))
141+
allocate(batch_indices(num_batches))
142+
143+
do j = 1, num_batches
144+
batch_indices(j) = (j - 1) * batch_size + 1
145+
end do
146+
147+
call shuffle(batch_indices)
148+
149+
do n = 1, num_epochs
150+
do j = 1, num_batches
151+
start_index = batch_indices(j)
152+
end_index = min(start_index + batch_size - 1, num_samples)
153+
154+
do i = start_index, end_index
155+
call net % forward([x(i)])
156+
call net % backward([y(i)])
157+
end do
158+
159+
call net % update(learning_rate / batch_size)
160+
end do
161+
end do
162+
end subroutine minibatch_gd_optimizer
163+
164+
subroutine shuffle(arr)
165+
! Shuffle an array using the Fisher-Yates algorithm.
166+
integer, intent(inout) :: arr(:)
167+
real :: a
168+
integer :: i, temp
169+
170+
do i = size(arr), 2, -1
171+
call random_number(a)
172+
a = floor(a * real(i)) + 1
173+
temp = arr(i)
174+
arr(i) = arr(int(a))
175+
arr(int(a)) = temp
176+
end do
177+
end subroutine shuffle
178+
179+
end program quadratic_fit

0 commit comments

Comments
Β (0)