Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 37 additions & 9 deletions include/language-support.F90
Original file line number Diff line number Diff line change
@@ -1,20 +1,48 @@
! Copyright (c), The Regents of the University of California
! Copyright (c) 2024-2025, The Regents of the University of California
! Terms of use are as specified in LICENSE.txt

#ifndef F2023_LOCALITY
#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 202400)
# define F2023_LOCALITY 1
#ifndef _FIATS_LANGUAGE_SUPPORT_H
#define _FIATS_LANGUAGE_SUPPORT_H

#ifdef __GNUC__
# define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
#else
# define GCC_VERSION 0
#endif

#ifndef F2023_LOCALITY
# if (__INTEL_COMPILER >= 202400) || (__clang_major__ >= 22) || (GCC_VERSION >= 150100)
# define F2023_LOCALITY 1
# else
# define F2023_LOCALITY 0
# endif
#endif

#ifndef F2018_LOCALITY
#if defined(_CRAYFTN)
# define F2018_LOCALITY 1
# if defined(_CRAYFTN)
# define F2018_LOCALITY 1
# endif
#endif

! If not already determined, make a compiler-dependent determination of
! whether to use multi-image features
#ifndef HAVE_MULTI_IMAGE_SUPPORT
# if defined(_CRAYFTN) || defined(__GFORTRAN__) || defined(__INTEL_COMPILER) || defined(NAGFOR) || __flang_major__ >= 22
# define HAVE_MULTI_IMAGE_SUPPORT 1
# else
# define HAVE_MULTI_IMAGE_SUPPORT 0
# endif
#endif

#ifndef MULTI_IMAGE_SUPPORT
#if defined(_CRAYFTN) || defined(__GFORTRAN__) || defined(__INTEL_COMPILER) || defined(NAGFOR)
# define MULTI_IMAGE_SUPPORT 1
! If not already determined, make a compiler-dependent determination of whether Julienne may pass
! procedure actual arguments to procedure pointer dummy arguments, a feature introduced in
! Fortran 2008 and described in Fortran 2023 clause 15.5.2.10 paragraph 5.
#ifndef HAVE_PROCEDURE_ACTUAL_FOR_POINTER_DUMMY
# if defined(_CRAYFTN) || defined(__INTEL_COMPILER) || defined(NAGFOR) || defined(__flang__) || (GCC_VERSION > 140200)
# define HAVE_PROCEDURE_ACTUAL_FOR_POINTER_DUMMY 1
# else
# define HAVE_PROCEDURE_ACTUAL_FOR_POINTER_DUMMY 0
# endif
#endif

#endif
130 changes: 76 additions & 54 deletions src/fiats/neural_network_s.F90
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "julienne-assert-macros.h"
#include "assert_macros.h"
#include "compound_assertions.h"
#include "language-support.F90"

submodule(neural_network_m) neural_network_s
use assert_m
Expand Down Expand Up @@ -74,12 +75,16 @@ elemental module subroutine double_precision_assert_conformable_with(self, neura
end block
#endif

feed_forward: &
do l = input_layer+1, output_layer
associate(z => matmul(w(1:n(l),1:n(l-1),l), a(1:n(l-1),l-1)) + b(1:n(l),l))
a(1:n(l),l) = self%activation_%evaluate(z)
end associate
end do feed_forward
block
integer l

feed_forward: &
do l = input_layer+1, output_layer
associate(z => matmul(w(1:n(l),1:n(l-1),l), a(1:n(l-1),l-1)) + b(1:n(l),l))
a(1:n(l),l) = self%activation_%evaluate(z)
end associate
end do feed_forward
end block

#ifndef _CRAYFTN
associate(normalized_outputs => tensor_t(a(1:n(output_layer), output_layer)))
Expand Down Expand Up @@ -120,12 +125,16 @@ elemental module subroutine double_precision_assert_conformable_with(self, neura
end block
#endif

feed_forward: &
do l = input_layer+1, output_layer
associate(z => matmul(w(1:n(l),1:n(l-1),l), a(1:n(l-1),l-1)) + b(1:n(l),l))
a(1:n(l),l) = self%activation_%evaluate(z)
end associate
end do feed_forward
block
integer l

feed_forward: &
do l = input_layer+1, output_layer
associate(z => matmul(w(1:n(l),1:n(l-1),l), a(1:n(l-1),l-1)) + b(1:n(l),l))
a(1:n(l),l) = self%activation_%evaluate(z)
end associate
end do feed_forward
end block

#ifdef _CRAYFTN
block
Expand Down Expand Up @@ -806,7 +815,7 @@ elemental module subroutine double_precision_assert_conformable_with(self, neura
end procedure

module procedure default_real_learn
integer l, batch, mini_batch_size, pair
integer batch, mini_batch_size, pair
type(tensor_t), allocatable :: inputs(:), expected_outputs(:)

call_assert_consistency(self)
Expand Down Expand Up @@ -850,11 +859,11 @@ elemental module subroutine double_precision_assert_conformable_with(self, neura
real, allocatable :: pair_cost(:)
if (present(cost)) allocate(pair_cost(mini_batch_size))

#if F2023_LOCALITY
#if defined(F2023_LOCALITY)
iterate_through_batch: &
do concurrent (pair = 1:mini_batch_size) default(none) local(a,z,delta) reduce(+: dcdb, dcdw)

#elif F2018_LOCALITY
do concurrent (pair = 1:mini_batch_size) default(none) local(a,z,delta) reduce(+: dcdb, dcdw) &
shared(self, inputs, output_layer, n, w, b, cost, expected_outputs, pair_cost)
#elif defined(F2018_LOCALITY)

reduce_gradients: &
block
Expand Down Expand Up @@ -887,11 +896,14 @@ elemental module subroutine double_precision_assert_conformable_with(self, neura

a(1:self%num_inputs(), input_layer) = inputs(pair)%values()

feed_forward: &
do l = 1,output_layer
z(1:n(l),l) = matmul(w(1:n(l),1:n(l-1),l), a(1:n(l-1),l-1)) + b(1:n(l),l) ! z_j^l = sum_k(w_jk^{l} a_k^{l-1}) + b_j^l
a(1:n(l),l) = self%activation_%evaluate(z(1:n(l),l))
end do feed_forward
block
integer l
feed_forward: &
do l = 1,output_layer
z(1:n(l),l) = matmul(w(1:n(l),1:n(l-1),l), a(1:n(l-1),l-1)) + b(1:n(l),l) ! z_j^l = sum_k(w_jk^{l} a_k^{l-1}) + b_j^l
a(1:n(l),l) = self%activation_%evaluate(z(1:n(l),l))
end do feed_forward
end block

associate(y => expected_outputs(pair)%values())
if (present(cost)) pair_cost(pair) = sum((y(1:n(output_layer))-a(1:n(output_layer),output_layer))**2)
Expand All @@ -901,20 +913,24 @@ elemental module subroutine double_precision_assert_conformable_with(self, neura
end associate

associate(n_hidden => self%num_hidden_layers())
back_propagate_error: &
do l = n_hidden,1,-1
delta(1:n(l),l) = matmul(transpose(w(1:n(l+1),1:n(l),l+1)), delta(1:n(l+1),l+1)) &
* self%activation_%differentiate(z(1:n(l),l))
end do back_propagate_error
block
integer l

back_propagate_error: &
do l = n_hidden,1,-1
delta(1:n(l),l) = matmul(transpose(w(1:n(l+1),1:n(l),l+1)), delta(1:n(l+1),l+1)) &
* self%activation_%differentiate(z(1:n(l),l))
end do back_propagate_error
end block
end associate



block
integer j
integer j, l
sum_gradients: &
do l = 1,output_layer
#if F2023_LOCALITY
#if defined(F2023_LOCALITY)
dcdb(1:n(l),l) = dcdb(1:n(l),l) + delta(1:n(l),l)
do concurrent(j = 1:n(l)) reduce(+: dcdw)
dcdw(j,1:n(l-1),l) = dcdw(j,1:n(l-1),l) + a(1:n(l-1),l-1)*delta(j,l)
Expand All @@ -928,7 +944,7 @@ elemental module subroutine double_precision_assert_conformable_with(self, neura
end do sum_gradients
end block

#if F2023_LOCALITY
#if defined(F2023_LOCALITY)
end do iterate_through_batch
#elif F2018_LOCALITY

Expand Down Expand Up @@ -957,34 +973,40 @@ elemental module subroutine double_precision_assert_conformable_with(self, neura
real, parameter :: epsilon = 1.E-08

associate(alpha => learning_rate)
adam_adjust_weights_and_biases: &
do concurrent(l = 1:output_layer)
dcdw(1:n(l),1:n(l-1),l) = dcdw(1:n(l),1:n(l-1),l)/(mini_batch_size)
vdw(1:n(l),1:n(l-1),l) = beta(1)*vdw(1:n(l),1:n(l-1),l) + obeta(1)*dcdw(1:n(l),1:n(l-1),l)
sdw (1:n(l),1:n(l-1),l) = beta(2)*sdw(1:n(l),1:n(l-1),l) + obeta(2)*(dcdw(1:n(l),1:n(l-1),l)**2)
vdwc(1:n(l),1:n(l-1),l) = vdw(1:n(l),1:n(l-1),l)/(1.- beta(1)**num_mini_batches)
sdwc(1:n(l),1:n(l-1),l) = sdw(1:n(l),1:n(l-1),l)/(1.- beta(2)**num_mini_batches)
w(1:n(l),1:n(l-1),l) = w(1:n(l),1:n(l-1),l) &
- alpha*vdwc(1:n(l),1:n(l-1),l)/(sqrt(sdwc(1:n(l),1:n(l-1),l))+epsilon) ! Adjust weights

dcdb(1:n(l),l) = dcdb(1:n(l),l)/mini_batch_size
vdb(1:n(l),l) = beta(1)*vdb(1:n(l),l) + obeta(1)*dcdb(1:n(l),l)
sdb(1:n(l),l) = beta(2)*sdb(1:n(l),l) + obeta(2)*(dcdb(1:n(l),l)**2)
vdbc(1:n(l),l) = vdb(1:n(l),l)/(1. - beta(1)**num_mini_batches)
sdbc(1:n(l),l) = sdb(1:n(l),l)/(1. - beta(2)**num_mini_batches)
b(1:n(l),l) = b(1:n(l),l) - alpha*vdbc(1:n(l),l)/(sqrt(sdbc(1:n(l),l))+epsilon) ! Adjust weights
end do adam_adjust_weights_and_biases
block
integer l
adam_adjust_weights_and_biases: &
do concurrent(l = 1:output_layer)
dcdw(1:n(l),1:n(l-1),l) = dcdw(1:n(l),1:n(l-1),l)/(mini_batch_size)
vdw(1:n(l),1:n(l-1),l) = beta(1)*vdw(1:n(l),1:n(l-1),l) + obeta(1)*dcdw(1:n(l),1:n(l-1),l)
sdw (1:n(l),1:n(l-1),l) = beta(2)*sdw(1:n(l),1:n(l-1),l) + obeta(2)*(dcdw(1:n(l),1:n(l-1),l)**2)
vdwc(1:n(l),1:n(l-1),l) = vdw(1:n(l),1:n(l-1),l)/(1.- beta(1)**num_mini_batches)
sdwc(1:n(l),1:n(l-1),l) = sdw(1:n(l),1:n(l-1),l)/(1.- beta(2)**num_mini_batches)
w(1:n(l),1:n(l-1),l) = w(1:n(l),1:n(l-1),l) &
- alpha*vdwc(1:n(l),1:n(l-1),l)/(sqrt(sdwc(1:n(l),1:n(l-1),l))+epsilon) ! Adjust weights

dcdb(1:n(l),l) = dcdb(1:n(l),l)/mini_batch_size
vdb(1:n(l),l) = beta(1)*vdb(1:n(l),l) + obeta(1)*dcdb(1:n(l),l)
sdb(1:n(l),l) = beta(2)*sdb(1:n(l),l) + obeta(2)*(dcdb(1:n(l),l)**2)
vdbc(1:n(l),l) = vdb(1:n(l),l)/(1. - beta(1)**num_mini_batches)
sdbc(1:n(l),l) = sdb(1:n(l),l)/(1. - beta(2)**num_mini_batches)
b(1:n(l),l) = b(1:n(l),l) - alpha*vdbc(1:n(l),l)/(sqrt(sdbc(1:n(l),l))+epsilon) ! Adjust weights
end do adam_adjust_weights_and_biases
end block
end associate
end block
else
associate(eta => learning_rate)
adjust_weights_and_biases: &
do concurrent(l = 1:output_layer)
dcdb(1:n(l),l) = dcdb(1:n(l),l)/mini_batch_size
b(1:n(l),l) = b(1:n(l),l) - eta*dcdb(1:n(l),l) ! Adjust biases
dcdw(1:n(l),1:n(l-1),l) = dcdw(1:n(l),1:n(l-1),l)/mini_batch_size
w(1:n(l),1:n(l-1),l) = w(1:n(l),1:n(l-1),l) - eta*dcdw(1:n(l),1:n(l-1),l) ! Adjust weights
end do adjust_weights_and_biases
block
integer l
adjust_weights_and_biases: &
do concurrent(l = 1:output_layer)
dcdb(1:n(l),l) = dcdb(1:n(l),l)/mini_batch_size
b(1:n(l),l) = b(1:n(l),l) - eta*dcdb(1:n(l),l) ! Adjust biases
dcdw(1:n(l),1:n(l-1),l) = dcdw(1:n(l),1:n(l-1),l)/mini_batch_size
w(1:n(l),1:n(l-1),l) = w(1:n(l),1:n(l-1),l) - eta*dcdw(1:n(l),1:n(l-1),l) ! Adjust weights
end do adjust_weights_and_biases
end block
end associate
end if
end do iterate_across_batches
Expand Down
Loading