1
+ program quadratic_fit
2
+ ! This program fits a quadratic function using a small neural network using
3
+ ! stochastic gradient descent, batch gradient descent, and minibatch gradient
4
+ ! descent.
5
+ use nf, only: dense, input, network
6
+
7
+ implicit none
8
+ type (network) :: net_sgd, net_batch_sgd, net_minibatch_sgd
9
+
10
+ ! Training parameters
11
+ integer , parameter :: num_epochs = 1000
12
+ integer , parameter :: train_size = 1000
13
+ integer , parameter :: test_size = 30
14
+ integer , parameter :: batch_size = 10
15
+ real , parameter :: learning_rate = 0.01
16
+
17
+ ! Input and output data
18
+ real , allocatable :: x(:), y(:) ! training data
19
+ real , allocatable :: xtest(:), ytest(:) ! testing data
20
+ real , allocatable :: ypred_sgd(:), ypred_batch_sgd(:), ypred_minibatch_sgd(:)
21
+
22
+ integer :: i, n
23
+
24
+ print ' ("Fitting quadratic function")'
25
+ print ' (60("="))'
26
+
27
+ allocate (xtest(test_size), ytest(test_size))
28
+ xtest = [((i - 1 ) * 2 / test_size, i = 1 , test_size)]
29
+ ytest = quadratic(xtest)
30
+
31
+ ! x and y as 1-D arrays
32
+ allocate (x(train_size), y(train_size))
33
+
34
+ ! Generating the dataset
35
+ do i = 1 , train_size
36
+ call random_number (x(i))
37
+ x(i) = x(i) * 2
38
+ end do
39
+ y = quadratic(x)
40
+
41
+ ! Instantiate a separate network for each optimization method.
42
+ net_sgd = network([input(1 ), dense(3 ), dense(1 )])
43
+ net_batch_sgd = network([input(1 ), dense(3 ), dense(1 )])
44
+ net_minibatch_sgd = network([input(1 ), dense(3 ), dense(1 )])
45
+
46
+ ! Print network info to stdout; this will be the same for all three networks.
47
+ call net_sgd % print_info()
48
+
49
+ ! SGD optimizer
50
+ call sgd_optimizer(net_sgd, x, y, learning_rate, num_epochs)
51
+
52
+ ! Batch SGD optimizer
53
+ call batch_gd_optimizer(net_batch_sgd, x, y, learning_rate, num_epochs)
54
+
55
+ ! Mini-batch SGD optimizer
56
+ call minibatch_gd_optimizer(net_minibatch_sgd, x, y, learning_rate, num_epochs, batch_size)
57
+
58
+ ! Calculate predictions on the test set
59
+ ypred_sgd = [(net_sgd % predict([xtest(i)]), i = 1 , test_size)]
60
+ ypred_batch_sgd = [(net_batch_sgd % predict([xtest(i)]), i = 1 , test_size)]
61
+ ypred_minibatch_sgd = [(net_minibatch_sgd % predict([xtest(i)]), i = 1 , test_size)]
62
+
63
+ ! Print the mean squared error
64
+ print ' ("Stochastic gradient descent MSE:", f9.6)' , sum ((ypred_sgd - ytest)** 2 ) / size (ytest)
65
+ print ' (" Batch gradient descent MSE: ", f9.6)' , sum ((ypred_batch_sgd - ytest)** 2 ) / size (ytest)
66
+ print ' (" Minibatch gradient descent MSE: ", f9.6)' , sum ((ypred_minibatch_sgd - ytest)** 2 ) / size (ytest)
67
+
68
+ contains
69
+
70
+ real elemental function quadratic(x) result(y)
71
+ ! Quadratic function
72
+ real , intent (in ) :: x
73
+ y = (x** 2 / 2 + x / 2 + 1 ) / 2
74
+ end function quadratic
75
+
76
+ subroutine sgd_optimizer (net , x , y , learning_rate , num_epochs )
77
+ ! In the stochastic gradient descent (SGD) optimizer, we run the forward
78
+ ! and backward passes and update the weights for each training sample,
79
+ ! one at a time.
80
+ type (network), intent (inout ) :: net
81
+ real , intent (in ) :: x(:), y(:)
82
+ real , intent (in ) :: learning_rate
83
+ integer , intent (in ) :: num_epochs
84
+ integer :: i, n
85
+
86
+ print * , " Running SGD optimizer..."
87
+
88
+ do n = 1 , num_epochs
89
+ do i = 1 , size (x)
90
+ call net % forward([x(i)])
91
+ call net % backward([y(i)])
92
+ call net % update(learning_rate)
93
+ end do
94
+ end do
95
+
96
+ end subroutine sgd_optimizer
97
+
98
+ subroutine batch_gd_optimizer (net , x , y , learning_rate , num_epochs )
99
+ ! Like the stochastic gradient descent (SGD) optimizer, except that here we
100
+ ! accumulate the weight gradients for all training samples and update the
101
+ ! weights once per epoch.
102
+ type (network), intent (inout ) :: net
103
+ real , intent (in ) :: x(:), y(:)
104
+ real , intent (in ) :: learning_rate
105
+ integer , intent (in ) :: num_epochs
106
+ integer :: i, n
107
+
108
+ print * , " Running batch GD optimizer..."
109
+
110
+ do n = 1 , num_epochs
111
+ do i = 1 , size (x)
112
+ call net % forward([x(i)])
113
+ call net % backward([y(i)])
114
+ end do
115
+ call net % update(learning_rate / size (x))
116
+ end do
117
+
118
+ end subroutine batch_gd_optimizer
119
+
120
+ subroutine minibatch_gd_optimizer (net , x , y , learning_rate , num_epochs , batch_size )
121
+ ! Like the batch SGD optimizer, except that here we accumulate the weight
122
+ ! over a number of mini batches and update the weights once per mini batch.
123
+ !
124
+ ! Note: -O3 on GFortran must be accompanied with -fno-frontend-optimize for
125
+ ! this subroutine to converge to a solution.
126
+ type (network), intent (inout ) :: net
127
+ real , intent (in ) :: x(:), y(:)
128
+ real , intent (in ) :: learning_rate
129
+ integer , intent (in ) :: num_epochs, batch_size
130
+ integer :: i, j, n, num_samples, num_batches, start_index, end_index
131
+ real , allocatable :: batch_x(:), batch_y(:)
132
+ integer , allocatable :: batch_indices(:)
133
+
134
+ print * , " Running mini-batch GD optimizer..."
135
+
136
+ num_samples = size (x)
137
+ num_batches = num_samples / batch_size
138
+
139
+ ! Generate shuffled indices for the mini-batches
140
+ allocate (batch_x(batch_size), batch_y(batch_size))
141
+ allocate (batch_indices(num_batches))
142
+
143
+ do j = 1 , num_batches
144
+ batch_indices(j) = (j - 1 ) * batch_size + 1
145
+ end do
146
+
147
+ call shuffle(batch_indices)
148
+
149
+ do n = 1 , num_epochs
150
+ do j = 1 , num_batches
151
+ start_index = batch_indices(j)
152
+ end_index = min (start_index + batch_size - 1 , num_samples)
153
+
154
+ do i = start_index, end_index
155
+ call net % forward([x(i)])
156
+ call net % backward([y(i)])
157
+ end do
158
+
159
+ call net % update(learning_rate / batch_size)
160
+ end do
161
+ end do
162
+ end subroutine minibatch_gd_optimizer
163
+
164
+ subroutine shuffle (arr )
165
+ ! Shuffle an array using the Fisher-Yates algorithm.
166
+ integer , intent (inout ) :: arr(:)
167
+ real :: a
168
+ integer :: i, temp
169
+
170
+ do i = size (arr), 2 , - 1
171
+ call random_number (a)
172
+ a = floor (a * real (i)) + 1
173
+ temp = arr(i)
174
+ arr(i) = arr(int (a))
175
+ arr(int (a)) = temp
176
+ end do
177
+ end subroutine shuffle
178
+
179
+ end program quadratic_fit
0 commit comments