diff --git a/.github/workflows/compilation.yml b/.github/workflows/compilation.yml
index a1e032a797..d90b08417c 100644
--- a/.github/workflows/compilation.yml
+++ b/.github/workflows/compilation.yml
@@ -51,14 +51,14 @@ on:
   push
 
 env:
-  CUDA_VERSION: 12.6.0
+  CUDA_VERSION: 12.6.2
   GFORTRAN_VERSION: 14.2.0
-  HDF5_VERSION: 1.14.4.3
+  HDF5_VERSION: 1.14.5
   NETCDF_C_VERSION: 4.9.2
   NETCDF_FORTRAN_VERSION: 4.6.1
-  NVFORTRAN_VERSION: 24.7
+  NVFORTRAN_VERSION: 24.9
   OPENMPI_VERSION: 5.0.5
-  PYTHON_VERSION: 3.12.5
+  PYTHON_VERSION: 3.13.0
 
 jobs:
   run_if_on_mirror:
diff --git a/.github/workflows/lfric_test.yml b/.github/workflows/lfric_test.yml
index 8f6c796f31..8d336e2ead 100644
--- a/.github/workflows/lfric_test.yml
+++ b/.github/workflows/lfric_test.yml
@@ -47,7 +47,7 @@ jobs:
     runs-on: self-hosted
     env:
       LFRIC_APPS_REV: 3269
-      PYTHON_VERSION: 3.12.5
+      PYTHON_VERSION: 3.13.0
 
     steps:
     - uses: actions/checkout@v3
@@ -75,6 +75,58 @@ jobs:
         pip install .[test]
         pip install jinja2
 
+    # PSyclone, compile and run MetOffice gungho_model on GPU
+    - name: LFRic GungHo with OpenMP offload
+      run: |
+        # Set up environment
+        source /apps/spack/psyclone-spack/spack-repo/share/spack/setup-env.sh
+        spack load lfric-build-environment%nvhpc
+        source .runner_venv/bin/activate
+        export PSYCLONE_LFRIC_DIR=${GITHUB_WORKSPACE}/examples/lfric/scripts
+        export PSYCLONE_CONFIG_FILE=${PSYCLONE_LFRIC_DIR}/KGOs/lfric_psyclone.cfg
+        # The LFRic source must be patched to workaround bugs in the NVIDIA
+        # compiler's namelist handling.
+        rm -rf ${HOME}/LFRic/gpu_build
+        mkdir -p ${HOME}/LFRic/gpu_build
+        cp -r ${HOME}/LFRic/lfric_apps_${LFRIC_APPS_REV} ${HOME}/LFRic/gpu_build/lfric_apps
+        cp -r ${HOME}/LFRic/lfric_core_50869 ${HOME}/LFRic/gpu_build/lfric
+        cd ${HOME}/LFRic/gpu_build
+        patch -p1 < ${PSYCLONE_LFRIC_DIR}/KGOs/lfric_${LFRIC_APPS_REV}_nvidia.patch
+        # Update the compiler definitions to build for GPU
+        cp ${PSYCLONE_LFRIC_DIR}/KGOs/nvfortran_acc.mk lfric/infrastructure/build/fortran/nvfortran.mk
+        cp ${PSYCLONE_LFRIC_DIR}/KGOs/nvc++.mk lfric/infrastructure/build/cxx/.
+        # Update the PSyclone commands to ensure transformed kernels are written
+        # to working directory.
+        cp ${PSYCLONE_LFRIC_DIR}/KGOs/psyclone.mk lfric/infrastructure/build/psyclone/.
+        # Update dependencies.sh to point to our patched lfric core.
+        sed -i -e 's/export lfric_core_sources=.*$/export lfric_core_sources\=\/home\/gh_runner\/LFRic\/gpu_build\/lfric/' lfric_apps/dependencies.sh
+        export LFRIC_DIR=${HOME}/LFRic/gpu_build/lfric_apps
+        export OPT_DIR=${LFRIC_DIR}/applications/gungho_model/optimisation/psyclone-test
+        cd ${LFRIC_DIR}
+        # PSyclone scripts must now be under 'optimisation' and be called 'global.py'
+        mkdir -p ${OPT_DIR}
+        cp ${PSYCLONE_LFRIC_DIR}/gpu_offloading.py ${OPT_DIR}/global.py
+        # Clean previous version and compile again
+        rm -rf applications/gungho_model/working
+        LFRIC_OFFLOAD_DIRECTIVES=omp ./build/local_build.py -a gungho_model -p psyclone-test
+        cd applications/gungho_model/example
+        cp ${PSYCLONE_LFRIC_DIR}/KGOs/lfric_gungho_configuration_4its.nml configuration.nml
+        mpirun -n 1 ../bin/gungho_model configuration.nml |& tee output.txt
+        python ${PSYCLONE_LFRIC_DIR}/compare_ouput.py ${PSYCLONE_LFRIC_DIR}/KGOs/lfric_gungho_configuration_4its_checksums.txt gungho_model-checksums.txt
+        cat timer.txt
+        export VAR_TIME=$(grep "gungho_model" timer.txt | cut -d'|' -f5)
+        export VAR_HALOS=$(grep "gungho_model" halo_calls_counter.txt | cut -d'|' -f5)
+        echo $GITHUB_REF_NAME $GITHUB_SHA $VAR_TIME $VAR_HALOS >> ${HOME}/store_results/lfric_omp_performance_history
+        ${HOME}/mongosh-2.1.1-linux-x64/bin/mongosh \
+          "mongodb+srv://cluster0.x8ncpxi.mongodb.net/PerformanceMonitoring" \
+          --quiet --apiVersion 1 --username ${{ secrets.MONGODB_USERNAME }} \
+          --password ${{ secrets.MONGODB_PASSWORD }} \
+          --eval 'db.GitHub_CI.insertOne({branch_name: "'"$GITHUB_REF_NAME"'", commit: "'"$GITHUB_SHA"'",
+          github_job: "'"$GITHUB_RUN_ID"'"-"'"$GITHUB_RUN_ATTEMPT"'",
+          ci_test: "LFRic OpenMP offloading", lfric_apps_version: '"$LFRIC_APPS_REV"', system: "GlaDos",
+          compiler:"spack-nvhpc-24.5", date: new Date(), elapsed_time: '"$VAR_TIME"',
+          num_of_halo_exchanges: '"$VAR_HALOS"'})'
+
     # PSyclone, compile and run MetOffice gungho_model on GPU
     - name: LFRic GungHo with OpenACC offload
       run: |
@@ -105,10 +157,10 @@ jobs:
         cd ${LFRIC_DIR}
         # PSyclone scripts must now be under 'optimisation' and be called 'global.py'
         mkdir -p ${OPT_DIR}
-        cp ${PSYCLONE_LFRIC_DIR}/acc_parallel.py ${OPT_DIR}/global.py
+        cp ${PSYCLONE_LFRIC_DIR}/gpu_offloading.py ${OPT_DIR}/global.py
         # Clean previous version and compile again
         rm -rf applications/gungho_model/working
-        ./build/local_build.py -a gungho_model -p psyclone-test
+        LFRIC_OFFLOAD_DIRECTIVES=acc ./build/local_build.py -a gungho_model -p psyclone-test
         cd applications/gungho_model/example
         cp ${PSYCLONE_LFRIC_DIR}/KGOs/lfric_gungho_configuration_4its.nml configuration.nml
         mpirun -n 1 ../bin/gungho_model configuration.nml |& tee output.txt
diff --git a/.github/workflows/nemo_tests.yml b/.github/workflows/nemo_tests.yml
index bbcd2bf468..7dcf064239 100644
--- a/.github/workflows/nemo_tests.yml
+++ b/.github/workflows/nemo_tests.yml
@@ -46,13 +46,13 @@ jobs:
     if: ${{ github.repository == 'stfc/PSyclone-mirror' }}
     runs-on: self-hosted
     env:
-      HDF5_VERSION: 1.14.4.3
+      HDF5_VERSION: 1.14.5
       NETCDF_C_VERSION: 4.9.2
       NETCDF_FORTRAN_VERSION: 4.6.1
       NVFORTRAN_VERSION: 23.7
       ONEAPI_VERSION: 2024.2.1
       PERL_VERSION: 5.40.0
-      PYTHON_VERSION: 3.12.5
+      PYTHON_VERSION: 3.13.0
 
     steps:
     - uses: actions/checkout@v3
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index eec02a9151..f4e85492b9 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -62,7 +62,7 @@ jobs:
     - uses: actions/checkout@v4
     - uses: actions/setup-python@v5
       with:
-        python-version: '3.12'
+        python-version: '3.13'
     - run: sudo apt-get install -y graphviz doxygen
     - run: python -m pip install --upgrade pip
     - run: pip install .[doc]
@@ -83,7 +83,7 @@ jobs:
     - uses: actions/checkout@v4
     - uses: actions/setup-python@v5
       with:
-        python-version: '3.12'
+        python-version: '3.13'
     - run: python -m pip install --upgrade pip
     - run: pip install .[doc]
     # Sphinx since version 7.2 (7.2.0/1/2) aborts with
@@ -99,7 +99,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.7, 3.8, 3.12]
+        python-version: [3.7, 3.8, 3.13]
     steps:
     - uses: actions/checkout@v4
       with:
diff --git a/changelog b/changelog
index c58fc4be65..886221f860 100644
--- a/changelog
+++ b/changelog
@@ -234,6 +234,20 @@
 	80) PR #2596 for #2592 and #2582. Routine nodes manage their own
 	symbols from their parent scope.
 
+	81) PR #2725 to close #717. Removes some TODOs and associated utility code
+	from the fparser2 frontend that is now unused.
+
+	82) PR #2733 for #2730. Adds OpenMP offloading support for LFRic plus
+	associated integration test.
+
+	83) PR #2743 for #2742. Ensure generated code for function spaces in
+	LFRic is always in a consistent order.
+
+	84) PR #2746. Update minor versions of CUDA, HDF5, nvfortran and
+	Python used in the CI.
+
+	85) PR #2748. Revert Python used on RTD to 3.12 as 3.13 unsupported.
+
 release 2.5.0 14th of February 2024
 
 	1) PR #2199 for #2189. Fix bugs with missing maps in enter data
diff --git a/doc/developer_guide/working_practises.rst b/doc/developer_guide/working_practises.rst
index 12403d2fc0..a7d1b78134 100644
--- a/doc/developer_guide/working_practises.rst
+++ b/doc/developer_guide/working_practises.rst
@@ -448,13 +448,13 @@ computational cost (so that we 'fail fast'):
  3. All links within the Sphinx documentation (rst files) are checked (see
     note below);
 
- 4. All of the examples are tested (for Python versions 3.7, 3.8 and 3.12)
+ 4. All of the examples are tested (for Python versions 3.7, 3.8 and 3.13)
     using the ``Makefile`` in the ``examples`` directory. No compilation is
     performed; only the ``transform`` (performs the PSyclone transformations)
     and ``notebook`` (runs the various Jupyter notebooks) targets are used.
     The ``transform`` target is run 2-way parallel (``-j 2``).
 
- 5. The full test suite is run for Python versions 3.7, 3.8 and 3.12 but
+ 5. The full test suite is run for Python versions 3.7, 3.8 and 3.13 but
     without the compilation checks. ``pytest`` is passed the ``-n auto`` flag
     so that it will run the tests in parallel on as many cores as are
     available (currently 2 on GHA instances).
diff --git a/doc/user_guide/getting_going.rst b/doc/user_guide/getting_going.rst
index 2729f989b6..5e8e37ffa2 100644
--- a/doc/user_guide/getting_going.rst
+++ b/doc/user_guide/getting_going.rst
@@ -212,7 +212,7 @@ Dependencies
 ------------
 
 PSyclone is written in Python so needs Python 3 to be installed on the
-target machine. PSyclone is regularly tested with Python 3.7, 3.8 and 3.12
+target machine. PSyclone is regularly tested with Python 3.7, 3.8 and 3.13
 but should work with any version >= 3.6. (The last PSyclone release to
 support Python 2.7 was version 2.1.0.)
 
diff --git a/examples/gocean/eg5/extract/Makefile b/examples/gocean/eg5/extract/Makefile
index fe6a5d5976..8685dc3290 100644
--- a/examples/gocean/eg5/extract/Makefile
+++ b/examples/gocean/eg5/extract/Makefile
@@ -122,13 +122,13 @@ $(NAME): $(INF_LIB) $(EXTRACT_DIR)/$(LIB_NAME) $(KERNELS) alg.o psy.o
 #TODO #1757: $(INF_LIB) is required because of the meta-data in the
 #            kernel - once this is fixed, $(INF_LIB) can be removed.
 $(DRIVER_INIT).$(TYPE):	$(KERNELS) $(DRIVER_INIT).o
-	$(F90) $(KERNELS) $(DRIVER_INIT).o -o $(DRIVER_INIT).$(TYPE) \
+	$(F90) $(F90FLAGS) $(KERNELS) $(DRIVER_INIT).o -o $(DRIVER_INIT).$(TYPE) \
 		$(INF_LIB) $(EXTRACT_DIR)/$(LIB_NAME) $(LDFLAGS)
 
 #TODO #1757: $(INF_LIB) is required because of the meta-data in the
 #            kernel - once this is fixed, $(INF_LIB) can be removed.
 $(DRIVER_UPDATE).$(TYPE): $(KERNELS) $(DRIVER_UPDATE).o
-	$(F90) $(KERNELS) $(DRIVER_UPDATE).o -o $(DRIVER_UPDATE).$(TYPE) \
+	$(F90) $(F90FLAGS) $(KERNELS) $(DRIVER_UPDATE).o -o $(DRIVER_UPDATE).$(TYPE) \
 		$(INF_LIB) $(EXTRACT_DIR)/$(LIB_NAME) $(LDFLAGS)
 
 # The dl_esm_inf library
diff --git a/examples/lfric/scripts/KGOs/lfric_3269_nvidia.patch b/examples/lfric/scripts/KGOs/lfric_3269_nvidia.patch
index d3e178318a..db37ced2e3 100644
--- a/examples/lfric/scripts/KGOs/lfric_3269_nvidia.patch
+++ b/examples/lfric/scripts/KGOs/lfric_3269_nvidia.patch
@@ -57,65 +57,6 @@ index 19c9cff9..b5cd3014 100644
  	$(call MESSAGE,Compiled,$<)
  
  
-diff --git a/lfric/infrastructure/build/cxx/nvc++.mk b/lfric/infrastructure/build/cxx/nvc++.mk
-new file mode 100644
-index 00000000..13b17a10
---- /dev/null
-+++ b/lfric/infrastructure/build/cxx/nvc++.mk
-@@ -0,0 +1,9 @@
-+##############################################################################
-+# (c) Crown copyright 2017 Met Office. All rights reserved.
-+# The file LICENCE, distributed with this code, contains details of the terms
-+# under which the code may be used.
-+##############################################################################
-+
-+$(info ** Chosen NVC++ compiler)
-+
-+CXX_RUNTIME_LIBRARY=stdc++
-diff --git a/lfric/infrastructure/build/fortran/nvfortran.mk b/lfric/infrastructure/build/fortran/nvfortran.mk
-new file mode 100644
-index 00000000..cfed52c1
---- /dev/null
-+++ b/lfric/infrastructure/build/fortran/nvfortran.mk
-@@ -0,0 +1,38 @@
-+##############################################################################
-+# Copyright (c) 2017,  Met Office, on behalf of HMSO and Queen's Printer
-+# For further details please refer to the file LICENCE.original which you
-+# should have received as part of this distribution.
-+##############################################################################
-+# Various things specific to the Portland Fortran compiler.
-+##############################################################################
-+#
-+# This macro is evaluated now (:= syntax) so it may be used as many times as
-+# desired without wasting time rerunning it.
-+#
-+F_MOD_DESTINATION_ARG = -module$(SPACE)
-+OPENMP_ARG            = -mp
-+
-+FFLAGS_COMPILER           =
-+FFLAGS_NO_OPTIMISATION    = -O0
-+FFLAGS_SAFE_OPTIMISATION  = -O2
-+FFLAGS_RISKY_OPTIMISATION = -O4
-+FFLAGS_DEBUG              = -g -traceback
-+FFLAGS_RUNTIME            = -Mchkptr -Mchkstk
-+# Option for checking code meets Fortran standard (not available for PGI)
-+FFLAGS_FORTRAN_STANDARD   =
-+
-+LDFLAGS_COMPILER = -g
-+
-+FPP = cpp -traditional-cpp
-+FPPFLAGS = -P
-+FC = mpif90
-+
-+# FS#34981 (nvbug 4648082)
-+science/src/um/src/atmosphere/large_scale_precipitation/ls_ppnc.o: private FFLAGS_RUNTIME = -Mchkstk
-+
-+# FS#35751
-+mesh/create_mesh_mod.o: private FFLAGS_RUNTIME = -Mchkstk
-+
-+# 24.3
-+science/src/socrates/src/cosp_github/subsample_and_optics_example/optics/quickbeam_optics/optics_lib.o: private FFLAGS_SAFE_OPTIMISATION = -O1
-+science/src/socrates/src/cosp_github/subsample_and_optics_example/optics/quickbeam_optics/optics_lib.o: private FFLAGS_RISKY_OPTIMISATION = -O1
 diff --git a/lfric/infrastructure/build/tools/DependencyRules b/lfric/infrastructure/build/tools/DependencyRules
 index 9d4db390..e37384fc 100755
 --- a/lfric/infrastructure/build/tools/DependencyRules
diff --git a/examples/lfric/scripts/KGOs/nvfortran_acc.mk b/examples/lfric/scripts/KGOs/nvfortran_acc.mk
index 79df074ed8..34ead62500 100644
--- a/examples/lfric/scripts/KGOs/nvfortran_acc.mk
+++ b/examples/lfric/scripts/KGOs/nvfortran_acc.mk
@@ -20,9 +20,20 @@ FFLAGS_DEBUG              = -g -traceback
 FFLAGS_RUNTIME            = -Mchkptr -Mchkstk
 # Option for checking code meets Fortran standard (not available for PGI)
 FFLAGS_FORTRAN_STANDARD   =
-OPENMP_ARG = -acc=gpu -gpu=managed -mp=multicore
 
-LDFLAGS_COMPILER = -g -acc=gpu -gpu=managed -mp=multicore -cuda
+# Flags for OpenMP threading / OpenMP offloading / OpenACC Offloading
+# The LFRIC_OFFLOAD_DIRECTIVES env_variable is also queried in the PSyclone
+# script to generate matching directives
+ifeq ("$(LFRIC_OFFLOAD_DIRECTIVES)", "omp")
+	OPENMP_ARG = -mp=gpu -gpu=managed
+	LDFLAGS_COMPILER = -mp=gpu -gpu=managed -cuda
+else ifeq ("$(LFRIC_OFFLOAD_DIRECTIVES)", "acc")
+	OPENMP_ARG = -acc=gpu -gpu=managed -mp=multicore
+	LDFLAGS_COMPILER = -acc=gpu -gpu=managed -mp=multicore -cuda
+else
+	OPENMP_ARG = -mp
+	LDFLAGS_COMPILER = -mp
+endif
 
 FPP = cpp -traditional-cpp
 FPPFLAGS = -P
diff --git a/examples/lfric/scripts/Makefile b/examples/lfric/scripts/Makefile
index b2703e6172..e39c1f03c0 100644
--- a/examples/lfric/scripts/Makefile
+++ b/examples/lfric/scripts/Makefile
@@ -45,7 +45,7 @@ transform: ${SCRIPTS}
 .PHONY: ${SCRIPTS}
 
 ${SCRIPTS}:
-	${PSYCLONE} -api lfric -s ./$@ ../eg3/solver_mod.x90 -oalg /dev/null -opsy /dev/null
+	LFRIC_OFFLOAD_DIRECTIVES=acc ${PSYCLONE} -api lfric -s ./$@ ../eg3/solver_mod.x90 -oalg /dev/null -opsy /dev/null
 
 compile: transform
 	@echo "No compilation supported for lfric/scripts examples"
diff --git a/examples/lfric/scripts/acc_parallel.py b/examples/lfric/scripts/acc_parallel.py
deleted file mode 100644
index 198450e91c..0000000000
--- a/examples/lfric/scripts/acc_parallel.py
+++ /dev/null
@@ -1,150 +0,0 @@
-# -----------------------------------------------------------------------------
-# BSD 3-Clause License
-#
-# Copyright (c) 2018-2024, Science and Technology Facilities Council.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# * Redistributions of source code must retain the above copyright notice, this
-#   list of conditions and the following disclaimer.
-#
-# * Redistributions in binary form must reproduce the above copyright notice,
-#   this list of conditions and the following disclaimer in the documentation
-#   and/or other materials provided with the distribution.
-#
-# * Neither the name of the copyright holder nor the names of its
-#   contributors may be used to endorse or promote products derived from
-#   this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-# -----------------------------------------------------------------------------
-# Authors: A. R. Porter, STFC Daresbury Lab
-#          R. W. Ford, STFC Daresbury Lab
-#          L. Mosimann, NVIDIA.
-
-'''PSyclone transformation script for the lfric API to apply
-colouring, OpenACC, OpenMP. Also adds redundant computation to the level-1
-halo for setval_* generically.
-
-'''
-from psyclone.domain.lfric import LFRicConstants
-from psyclone.psyir.nodes import ACCDirective, Loop
-from psyclone.psyir.transformations import (
-    ACCKernelsTrans, TransformationError)
-from psyclone.transformations import (
-    Dynamo0p3ColourTrans, Dynamo0p3OMPLoopTrans,
-    Dynamo0p3RedundantComputationTrans, OMPParallelTrans,
-    ACCParallelTrans, ACCLoopTrans, ACCRoutineTrans)
-
-
-# Names of any routines that we won't add any OpenACC to.
-ACC_EXCLUSIONS = [
-]
-
-
-def trans(psy):
-    '''Applies PSyclone colouring and OpenACC transformations. Any kernels that
-    cannot be offloaded to GPU are parallelised using OpenMP on the CPU. Any
-    setval_* kernels are transformed so as to compute into the L1 halos.
-
-    '''
-    rtrans = Dynamo0p3RedundantComputationTrans()
-    ctrans = Dynamo0p3ColourTrans()
-    otrans = Dynamo0p3OMPLoopTrans()
-    const = LFRicConstants()
-    loop_trans = ACCLoopTrans()
-    ktrans = ACCKernelsTrans()
-    parallel_trans = ACCParallelTrans(default_present=False)
-    artrans = ACCRoutineTrans()
-    oregtrans = OMPParallelTrans()
-
-    print(f"PSy name = '{psy.name}'")
-
-    # Loop over all of the Invokes in the PSy object
-    for invoke in psy.invokes.invoke_list:
-
-        print("Transforming invoke '{0}' ...".format(invoke.name))
-        schedule = invoke.schedule
-
-        # Make setval_* compute redundantly to the level 1 halo if it
-        # is in its own loop
-        for loop in schedule.loops():
-            if loop.iteration_space == "dof":
-                if len(loop.kernels()) == 1:
-                    if loop.kernels()[0].name in ["setval_c", "setval_x"]:
-                        rtrans.apply(loop, options={"depth": 1})
-
-        if psy.name.lower() in ACC_EXCLUSIONS:
-            print(f"Not adding ACC to invoke in '{psy.name}'")
-            apply_acc = False
-        else:
-            apply_acc = True
-
-        # Keep a record of any kernels we fail to module inline as we can't
-        # then add ACC ROUTINE to them.
-        failed_inline = set()
-
-        # Colour loops over cells unless they are on discontinuous
-        # spaces or over dofs
-        for loop in schedule.loops():
-            if loop.iteration_space == "cell_column":
-                if apply_acc:
-                    for kern in loop.kernels():
-                        try:
-                            artrans.apply(kern)
-                        except TransformationError as err:
-                            failed_inline.add(kern.name.lower())
-                            print(f"Adding ACC Routine to kernel '{kern.name}'"
-                                  f" failed:\n{err.value}")
-                if (loop.field_space.orig_name not in
-                        const.VALID_DISCONTINUOUS_NAMES):
-                    ctrans.apply(loop)
-
-        # Add OpenACC to loops unless they are over colours or are null.
-        schedule = invoke.schedule
-        for loop in schedule.walk(Loop):
-            if not apply_acc or any(kern.name.lower() in failed_inline for
-                                    kern in loop.kernels()):
-                print(f"Not adding OpenACC for kernels: "
-                      f"{[kern.name for kern in loop.kernels()]}")
-                continue
-            try:
-                if loop.loop_type == "colours":
-                    pass
-                if loop.loop_type == "colour":
-                    loop_trans.apply(loop, options={"independent": True})
-                    parallel_trans.apply(loop.ancestor(ACCDirective))
-                if loop.loop_type == "":
-                    loop_trans.apply(loop, options={"independent": True})
-                    parallel_trans.apply(loop.ancestor(ACCDirective))
-                if loop.loop_type == "dof":
-                    # We use ACC KERNELS for dof loops since they can contain
-                    # reductions.
-                    ktrans.apply(loop)
-            except TransformationError as err:
-                print(str(err))
-                pass
-
-        # Apply OpenMP thread parallelism for any kernels we've not been able
-        # to offload to GPU.
-        for loop in schedule.walk(Loop):
-            if not apply_acc or any(kern.name.lower() in failed_inline for
-                                    kern in loop.kernels()):
-                if loop.loop_type not in ["colours", "null"]:
-                    oregtrans.apply(loop)
-                    otrans.apply(loop, options={"reprod": True})
-
-    return psy
diff --git a/examples/lfric/scripts/gpu_offloading.py b/examples/lfric/scripts/gpu_offloading.py
new file mode 100644
index 0000000000..2167119fa1
--- /dev/null
+++ b/examples/lfric/scripts/gpu_offloading.py
@@ -0,0 +1,196 @@
+# -----------------------------------------------------------------------------
+# BSD 3-Clause License
+#
+# Copyright (c) 2018-2024, Science and Technology Facilities Council.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+# -----------------------------------------------------------------------------
+# Authors: A. R. Porter, STFC Daresbury Lab
+#          R. W. Ford, STFC Daresbury Lab
+#          S. Siso, STFC Daresbury Lab
+#          L. Mosimann, NVIDIA.
+
+'''PSyclone transformation script for LFRic to apply colouring and GPU
+offloading. Also adds redundant computation to the level-1 halo for
+setval_* generically.
+
+'''
+import os
+import sys
+from psyclone.domain.lfric import LFRicConstants
+from psyclone.psyir.nodes import Directive, Loop
+from psyclone.psyir.transformations import (
+    ACCKernelsTrans, TransformationError, OMPTargetTrans)
+from psyclone.transformations import (
+    Dynamo0p3ColourTrans, Dynamo0p3OMPLoopTrans,
+    Dynamo0p3RedundantComputationTrans, OMPParallelTrans,
+    ACCParallelTrans, ACCLoopTrans, ACCRoutineTrans,
+    OMPDeclareTargetTrans, OMPLoopTrans)
+
+
+# Names of any invoke that we won't add any GPU offloading
+INVOKE_EXCLUSIONS = [
+]
+
+OFFLOAD_DIRECTIVES = os.getenv('LFRIC_OFFLOAD_DIRECTIVES', "none")
+
+
+def trans(psy):
+    '''Applies PSyclone colouring and GPU offloading transformations. Any
+    kernels that cannot be offloaded to GPU are parallelised using OpenMP
+    on the CPU. Any setval_* kernels are transformed so as to compute
+    into the L1 halos.
+
+    '''
+    rtrans = Dynamo0p3RedundantComputationTrans()
+    ctrans = Dynamo0p3ColourTrans()
+    otrans = Dynamo0p3OMPLoopTrans()
+    const = LFRicConstants()
+    cpu_parallel = OMPParallelTrans()
+
+    if OFFLOAD_DIRECTIVES == "omp":
+        # Use OpenMP offloading
+        loop_offloading_trans = OMPLoopTrans(
+            omp_directive="teamsdistributeparalleldo",
+            omp_schedule="none"
+        )
+        # OpenMP does not have a kernels parallelism directive equivalent
+        # to OpenACC 'kernels'
+        kernels_trans = None
+        gpu_region_trans = OMPTargetTrans()
+        gpu_annotation_trans = OMPDeclareTargetTrans()
+    elif OFFLOAD_DIRECTIVES == "acc":
+        # Use OpenACC offloading
+        loop_offloading_trans = ACCLoopTrans()
+        kernels_trans = ACCKernelsTrans()
+        gpu_region_trans = ACCParallelTrans(default_present=False)
+        gpu_annotation_trans = ACCRoutineTrans()
+    else:
+        print(f"The PSyclone transformation script expects the "
+              f"LFRIC_OFFLOAD_DIRECTIVES to be set to 'omp' or 'acc' "
+              f"but found '{OFFLOAD_DIRECTIVES}'.")
+        sys.exit(-1)
+
+    print(f"PSy name = '{psy.name}'")
+
+    # Loop over all of the Invokes in the PSy object
+    for invoke in psy.invokes.invoke_list:
+
+        print("Transforming invoke '{0}' ...".format(invoke.name))
+        schedule = invoke.schedule
+
+        # Make setval_* compute redundantly to the level 1 halo if it
+        # is in its own loop
+        for loop in schedule.loops():
+            if loop.iteration_space == "dof":
+                if len(loop.kernels()) == 1:
+                    if loop.kernels()[0].name in ["setval_c", "setval_x"]:
+                        rtrans.apply(loop, options={"depth": 1})
+
+        if psy.name.lower() in INVOKE_EXCLUSIONS:
+            print(f"Not adding GPU offloading to invoke '{psy.name}'")
+            offload = False
+        else:
+            offload = True
+
+        # Keep a record of any kernels we fail to offload
+        failed_to_offload = set()
+
+        # Colour loops over cells unless they are on discontinuous spaces
+        # (alternatively we could annotate the kernels with atomics)
+        for loop in schedule.loops():
+            if loop.iteration_space == "cell_column":
+                if (loop.field_space.orig_name not in
+                        const.VALID_DISCONTINUOUS_NAMES):
+                    ctrans.apply(loop)
+
+        # Mark Kernels inside the loops over cells as GPU-enabled
+        # (alternatively we could inline them)
+        for loop in schedule.loops():
+            if loop.iteration_space == "cell_column":
+                if offload:
+                    for kern in loop.kernels():
+                        try:
+                            gpu_annotation_trans.apply(kern)
+                        except TransformationError as err:
+                            failed_to_offload.add(kern.name.lower())
+                            print(f"Failed to annotate '{kern.name}' with "
+                                  f"GPU-enabled directive due to:\n"
+                                  f"{err.value}")
+                        # For annotated or inlined kernels we could attempt to
+                        # provide compile-time dimensions for the temporary
+                        # arrays and convert to code unsupported intrinsics.
+
+        # Add GPU offloading to loops unless they are over colours or are null.
+        schedule = invoke.schedule
+        for loop in schedule.walk(Loop):
+            kernel_names = [k.name.lower() for k in loop.kernels()]
+            if offload and all(name not in failed_to_offload for name in
+                               kernel_names):
+                try:
+                    if loop.loop_type == "colours":
+                        pass
+                    if loop.loop_type == "colour":
+                        loop_offloading_trans.apply(
+                            loop, options={"independent": True})
+                        gpu_region_trans.apply(loop.ancestor(Directive))
+                    if loop.loop_type == "":
+                        loop_offloading_trans.apply(
+                            loop, options={"independent": True})
+                        gpu_region_trans.apply(loop.ancestor(Directive))
+                    if loop.loop_type == "dof":
+                        # Loops over dofs can contains reductions
+                        if kernels_trans:
+                            # If kernel offloading is available it should
+                            # manage them
+                            kernels_trans.apply(loop)
+                        else:
+                            # Otherwise, if the reductions exists, they will
+                            # be detected by the dependencyAnalysis and raise
+                            # a TransformationError captured below
+                            loop_offloading_trans.apply(
+                                loop, options={"independent": True})
+                            gpu_region_trans.apply(loop.ancestor(Directive))
+                        # Alternatively we could use loop parallelism with
+                        # reduction clauses
+                    print(f"Successfully offloaded loop with {kernel_names}")
+                except TransformationError as err:
+                    print(f"Failed to offload loop with {kernel_names} "
+                          f"because: {err}")
+
+        # Apply OpenMP thread parallelism for any kernels we've not been able
+        # to offload to GPU.
+        for loop in schedule.walk(Loop):
+            if not offload or any(kern.name.lower() in failed_to_offload for
+                                  kern in loop.kernels()):
+                if loop.loop_type not in ["colours", "null"]:
+                    cpu_parallel.apply(loop)
+                    otrans.apply(loop, options={"reprod": True})
+
+    return psy
diff --git a/examples/nemo/eg5/Makefile b/examples/nemo/eg5/Makefile
index 4f2b991761..29029b34aa 100644
--- a/examples/nemo/eg5/Makefile
+++ b/examples/nemo/eg5/Makefile
@@ -85,7 +85,7 @@ run: compile
 	IT=2 JPI=10 JPJ=10 JPK=5  ./traadv-$(TYPE).exe
 
 traadv-$(TYPE).exe: psy.o $(EXTRACT_DIR)/$(LIB_NAME)
-	$(F90) psy.o -o traadv-$(TYPE).exe $(EXTRACT_DIR)/$(LIB_NAME) $(LDFLAGS)
+	$(F90) $(F90FLAGS) psy.o -o traadv-$(TYPE).exe $(EXTRACT_DIR)/$(LIB_NAME) $(LDFLAGS)
 
 transform: kernels
 
diff --git a/examples/nemo/scripts/acc_kernels_trans.py b/examples/nemo/scripts/acc_kernels_trans.py
index a08439130c..7ae43abdd3 100755
--- a/examples/nemo/scripts/acc_kernels_trans.py
+++ b/examples/nemo/scripts/acc_kernels_trans.py
@@ -58,7 +58,7 @@
 
 import logging
 from utils import (add_profiling, enhance_tree_information, inline_calls,
-                   NOT_PERFORMANT, NOT_WORKING)
+                   NOT_PERFORMANT)
 from psyclone.errors import InternalError
 from psyclone.psyGen import TransInfo
 from psyclone.psyir.nodes import (
@@ -94,7 +94,7 @@
 ACC_EXPLICIT_MEM_MANAGEMENT = False
 
 # List of all files that psyclone will skip processing
-FILES_TO_SKIP = NOT_PERFORMANT + NOT_WORKING
+FILES_TO_SKIP = NOT_PERFORMANT
 
 # Routines we do not attempt to add any OpenACC to (because it breaks with
 # the Nvidia compiler or because it just isn't worth it)
@@ -109,18 +109,6 @@
               "bdytide_init", "bdy_init", "bdy_segs", "sbc_cpl_init",
               "asm_inc_init", "dia_obs_init"]  # Str handling, init routine
 
-# Currently fparser has no way of distinguishing array accesses from
-# function calls if the symbol is imported from some other module.
-# We therefore work-around this by keeping a list of known NEMO
-# functions that must be excluded from within KERNELS regions.
-NEMO_FUNCTIONS = ["alpha_charn", "cd_neutral_10m", "cpl_freq", "cp_air",
-                  "eos_pt_from_ct", "gamma_moist", "l_vap",
-                  "sbc_dcy", "solfrac", "psi_h", "psi_m", "psi_m_coare",
-                  "psi_h_coare", "psi_m_ecmwf", "psi_h_ecmwf", "q_sat",
-                  "rho_air", "visc_air", "sbc_dcy", "glob_sum",
-                  "glob_sum_full", "ptr_sj", "ptr_sjk", "interp1", "interp2",
-                  "interp3", "integ_spline"]
-
 
 class ExcludeSettings():
     '''
@@ -260,17 +248,6 @@ def valid_acc_kernel(node):
                                     "other loops", enode)
                             return False
 
-    # Finally, check that we haven't got any 'array accesses' that are in
-    # fact function calls.
-    refs = node.walk(ArrayReference)
-    for ref in refs:
-        # Check if this reference has the name of a known function and if that
-        # reference appears outside said known function.
-        if ref.name.lower() in NEMO_FUNCTIONS and \
-           ref.name.lower() != routine_name.lower():
-            log_msg(routine_name,
-                    f"Loop contains function call: {ref.name}", ref)
-            return False
     return True
 
 
diff --git a/examples/nemo/scripts/acc_loops_trans.py b/examples/nemo/scripts/acc_loops_trans.py
index 6d293d4b63..41896486c9 100755
--- a/examples/nemo/scripts/acc_loops_trans.py
+++ b/examples/nemo/scripts/acc_loops_trans.py
@@ -39,7 +39,7 @@
 
 from utils import (
     insert_explicit_loop_parallelism, normalise_loops, add_profiling,
-    enhance_tree_information, NOT_PERFORMANT, NOT_WORKING)
+    enhance_tree_information, NOT_PERFORMANT)
 from psyclone.psyir.nodes import Routine
 from psyclone.transformations import (
     ACCParallelTrans, ACCLoopTrans, ACCRoutineTrans)
@@ -47,7 +47,7 @@
 PROFILING_ENABLED = True
 
 # List of all files that psyclone will skip processing
-FILES_TO_SKIP = NOT_PERFORMANT + NOT_WORKING
+FILES_TO_SKIP = NOT_PERFORMANT
 
 
 def trans(psyir):
@@ -81,15 +81,6 @@ def trans(psyir):
             print("Skipping", subroutine.name)
             continue
 
-        # This are functions with scalar bodies, we don't want to parallelise
-        # them, but we could:
-        # - Inline them
-        # - Annotate them with 'omp declare target' and allow to call from gpus
-        if subroutine.name in ("q_sat", "sbc_dcy", "gamma_moist",
-                               "cd_neutral_10m", "psi_h", "psi_m"):
-            print("Skipping", subroutine.name)
-            continue
-
         # OpenACC fails in the following routines with the Compiler error:
         # Could not find allocated-variable index for symbol - xxx
         # This all happen on characters arrays, e.g. cd_nat
@@ -108,12 +99,12 @@ def trans(psyir):
                 hoist_expressions=True
         )
 
-        # In the lib_fortran file we annotate each routine of the SIGN_*
-        # interface with the OpenACC Routine Directive
-        if psyir.name == "lib_fortran.f90":
-            if subroutine.name.lower().startswith("sign_"):
-                ACCRoutineTrans().apply(subroutine)
-                continue
+        # These are functions that are called from inside parallel regions,
+        # annotate them with 'acc routine'
+        if subroutine.name.lower().startswith("sign_"):
+            ACCRoutineTrans().apply(subroutine)
+            print(f"Marked {subroutine.name} as GPU-enabled")
+            continue
 
         insert_explicit_loop_parallelism(
             subroutine,
diff --git a/examples/nemo/scripts/omp_cpu_trans.py b/examples/nemo/scripts/omp_cpu_trans.py
index 35449494b6..ceba2eadf6 100755
--- a/examples/nemo/scripts/omp_cpu_trans.py
+++ b/examples/nemo/scripts/omp_cpu_trans.py
@@ -39,14 +39,14 @@
 
 from utils import (
     insert_explicit_loop_parallelism, normalise_loops, add_profiling,
-    enhance_tree_information, NOT_PERFORMANT, NOT_WORKING)
+    enhance_tree_information, NOT_PERFORMANT)
 from psyclone.psyir.nodes import Routine
 from psyclone.transformations import OMPLoopTrans
 
 PROFILING_ENABLED = False
 
 # List of all files that psyclone will skip processing
-FILES_TO_SKIP = NOT_PERFORMANT + NOT_WORKING + [
+FILES_TO_SKIP = NOT_PERFORMANT + [
     "asminc.f90",
     "trosk.f90",
     "vremap.f90",
diff --git a/examples/nemo/scripts/omp_gpu_trans.py b/examples/nemo/scripts/omp_gpu_trans.py
index 916dc98688..88cbe99233 100755
--- a/examples/nemo/scripts/omp_gpu_trans.py
+++ b/examples/nemo/scripts/omp_gpu_trans.py
@@ -39,7 +39,7 @@
 
 from utils import (
     insert_explicit_loop_parallelism, normalise_loops, add_profiling,
-    enhance_tree_information, NOT_PERFORMANT, NOT_WORKING)
+    enhance_tree_information, NOT_PERFORMANT)
 from psyclone.psyGen import TransInfo
 from psyclone.psyir.nodes import (
     Loop, Routine, Directive, Assignment, OMPAtomicDirective)
@@ -49,7 +49,7 @@
 PROFILING_ENABLED = True
 
 # List of all files that psyclone will skip processing
-FILES_TO_SKIP = NOT_PERFORMANT + NOT_WORKING
+FILES_TO_SKIP = NOT_PERFORMANT
 
 
 def trans(psyir):
@@ -76,15 +76,6 @@ def trans(psyir):
         if PROFILING_ENABLED:
             add_profiling(subroutine.children)
 
-        # This are functions with scalar bodies, we don't want to parallelise
-        # them, but we could:
-        # - Inine them
-        # - Annotate them with 'omp declare target' and allow to call from gpus
-        if subroutine.name in ("q_sat", "sbc_dcy", "gamma_moist",
-                               "cd_neutral_10m", "psi_h", "psi_m"):
-            print("Skipping", subroutine.name)
-            continue
-
         print(f"Transforming subroutine: {subroutine.name}")
 
         enhance_tree_information(subroutine)
@@ -98,14 +89,14 @@ def trans(psyir):
                 hoist_expressions=True
         )
 
-        # In the lib_fortran file we annotate each routine of the SIGN_*
-        # interface with the OpenMP Declare Target Directive
-        if psyir.name == "lib_fortran.f90":
-            if subroutine.name.lower().startswith("sign_"):
-                OMPDeclareTargetTrans().apply(subroutine)
-                # We continue parallelising inside the routine, but this could
-                # change if the parallelisation directive are not nestable, in
-                # which case we could add a 'continue' here
+        # Thes are functions that are called from inside parallel regions,
+        # annotate them with 'omp declare target'
+        if subroutine.name.lower().startswith("sign_"):
+            OMPDeclareTargetTrans().apply(subroutine)
+            print(f"Marked {subroutine.name} as GPU-enabled")
+            # We continue parallelising inside the routine, but this could
+            # change if the parallelisation directives added below are not
+            # nestable, in that case we could add a 'continue' here
 
         # For now this is a special case for stpctl.f90 because it forces
         # loops to parallelise without many safety checks
diff --git a/examples/nemo/scripts/passthrough.py b/examples/nemo/scripts/passthrough.py
index 13ad293442..71bdd11fec 100755
--- a/examples/nemo/scripts/passthrough.py
+++ b/examples/nemo/scripts/passthrough.py
@@ -37,10 +37,8 @@
 ''' Process Nemo code with PSyclone but don't do any changes. This file is only
 needed to provide a FILES_TO_SKIP list. '''
 
-from utils import NOT_WORKING
-
 # List of all files that psyclone will skip processing
-FILES_TO_SKIP = NOT_WORKING
+FILES_TO_SKIP = []
 
 
 def trans(_):
diff --git a/examples/nemo/scripts/utils.py b/examples/nemo/scripts/utils.py
index 0a2a239b72..202c7ca59a 100755
--- a/examples/nemo/scripts/utils.py
+++ b/examples/nemo/scripts/utils.py
@@ -61,14 +61,6 @@
     "tide_mod.f90", "zdfosm.f90",
 ]
 
-# Files that we won't touch at all, either because PSyclone actually fails
-# or because it produces incorrect Fortran.
-NOT_WORKING = [
-    # NEMOv4 bugs:
-    # TODO #717 - array accessed inside WHERE does not use array notation
-    "diurnal_bulk.f90",
-]
-
 # If routine names contain these substrings then we do not profile them
 PROFILING_IGNORE = ["_init", "_rst", "alloc", "agrif", "flo_dom",
                     "macho", "mpp_", "nemo_gcm",
@@ -78,6 +70,21 @@
                     "interp1", "interp2", "interp3", "integ_spline", "sbc_dcy",
                     "sum", "sign_", "ddpdd"]
 
+# Currently fparser has no way of distinguishing array accesses from
+# function calls if the symbol is imported from some other module.
+# We therefore work-around this by keeping a list of known NEMO functions.
+NEMO_FUNCTIONS = ["alpha_charn", "cd_neutral_10m", "cpl_freq", "cp_air",
+                  "eos_pt_from_ct", "gamma_moist", "l_vap",
+                  "sbc_dcy", "solfrac", "psi_h", "psi_m", "psi_m_coare",
+                  "psi_h_coare", "psi_m_ecmwf", "psi_h_ecmwf", "q_sat",
+                  "rho_air", "visc_air", "sbc_dcy", "glob_sum",
+                  "glob_sum_full", "ptr_sj", "ptr_sjk", "interp1", "interp2",
+                  "interp3", "integ_spline"]
+
+# Currently fparser has no way of distinguishing array accesses from statement
+# functions, the following subroutines contains known statement functions
+CONTAINS_STMT_FUNCTIONS = ["sbc_dcy"]
+
 VERBOSE = False
 
 
@@ -139,15 +146,19 @@ def enhance_tree_information(schedule):
                         ArrayType.Extent.ATTRIBUTE,
                         ArrayType.Extent.ATTRIBUTE,
                         ArrayType.Extent.ATTRIBUTE]))
-        elif reference.symbol.name == "sbc_dcy":
-            # The parser gets this wrong, it is a Call not an Array access
-            if not isinstance(reference.symbol, RoutineSymbol):
-                # We haven't already specialised this Symbol.
-                reference.symbol.specialise(RoutineSymbol)
-            call = Call.create(reference.symbol)
-            for child in reference.children:
-                call.addchild(child.detach())
-            reference.replace_with(call)
+        elif reference.symbol.name in NEMO_FUNCTIONS:
+            if reference.symbol.is_import or reference.symbol.is_unresolved:
+                # The parser gets these wrong, they are Calls not ArrayRefs
+                if not isinstance(reference.symbol, RoutineSymbol):
+                    # We need to specialise the generic Symbol to a Routine
+                    reference.symbol.specialise(RoutineSymbol)
+                if not (isinstance(reference.parent, Call) and
+                        reference.parent.routine is reference):
+                    # We also need to replace the Reference node with a Call
+                    call = Call.create(reference.symbol)
+                    for child in reference.children[:]:
+                        call.addchild(child.detach())
+                    reference.replace_with(call)
 
 
 def inline_calls(schedule):
@@ -226,9 +237,10 @@ def normalise_loops(
         statements out of the loop nest.
     '''
 
-    # TODO #1902: NEMO4 mpi_ini.f90 has a HoistLocalArraysTrans bug
-    if hoist_local_arrays and schedule.root.name != "mpp_ini.f90":
-        # Apply the HoistLocalArraysTrans when possible
+    if hoist_local_arrays and schedule.name not in CONTAINS_STMT_FUNCTIONS:
+        # Apply the HoistLocalArraysTrans when possible, it cannot be applied
+        # to files with statement functions because it will attempt to put the
+        # allocate above it, which is not valid Fortran.
         try:
             HoistLocalArraysTrans().apply(schedule)
         except TransformationError:
diff --git a/psyclone.pdf b/psyclone.pdf
index 05f320b09d..f857ff38a1 100644
Binary files a/psyclone.pdf and b/psyclone.pdf differ
diff --git a/src/psyclone/domain/lfric/algorithm/lfric_alg.py b/src/psyclone/domain/lfric/algorithm/lfric_alg.py
index 8eba62b28e..c2f53c338d 100644
--- a/src/psyclone/domain/lfric/algorithm/lfric_alg.py
+++ b/src/psyclone/domain/lfric/algorithm/lfric_alg.py
@@ -34,6 +34,7 @@
 # Author: A. R. Porter, STFC Daresbury Laboratory.
 # Modified by: R. W. Ford, STFC Daresbury Laboratory.
 #              L. Turner, Met Office
+#              T. Vockerodt, Met Office
 
 '''This module contains the LFRicAlg class which encapsulates tools for
    creating standalone LFRic algorithm-layer code.
@@ -270,7 +271,7 @@ def _create_function_spaces(self, prog, fspaces):
         # Initialise the function spaces required by the kernel arguments.
         const = LFRicConstants()
 
-        for space in fspaces:
+        for space in sorted(fspaces):
 
             if space.lower() not in const.VALID_FUNCTION_SPACE_NAMES:
                 raise InternalError(
diff --git a/src/psyclone/f2pygen.py b/src/psyclone/f2pygen.py
index 4db3407e44..ac01797ef9 100644
--- a/src/psyclone/f2pygen.py
+++ b/src/psyclone/f2pygen.py
@@ -138,7 +138,8 @@ class OMPDirective(Directive):
     '''
     def __init__(self, root, line, position, dir_type):
         self._types = ["parallel do", "parallel", "do", "master", "single",
-                       "taskloop", "taskwait", "declare"]
+                       "taskloop", "taskwait", "declare", "target", "teams",
+                       "teams distribute parallel do"]
         self._positions = ["begin", "end"]
 
         super(OMPDirective, self).__init__(root, line, position, dir_type)
diff --git a/src/psyclone/psyir/frontend/fparser2.py b/src/psyclone/psyir/frontend/fparser2.py
index cfae861001..88fc6cc512 100644
--- a/src/psyclone/psyir/frontend/fparser2.py
+++ b/src/psyclone/psyir/frontend/fparser2.py
@@ -412,179 +412,6 @@ def _find_or_create_psyclone_internal_cmp(node):
         "not have an ancestor container in which to add it.")
 
 
-def _check_args(array, dim):
-    '''Utility routine used by the _check_bound_is_full_extent and
-    _check_array_range_literal functions to check common arguments.
-
-    This routine is only in fparser2.py until #717 is complete as it
-    is used to check that array syntax in a where statement is for the
-    full extent of the dimension. Once #717 is complete this routine
-    can be removed.
-
-    :param array: the node to check.
-    :type array: :py:class:`pysclone.psyir.node.array`
-    :param int dim: the dimension index to use.
-
-    :raises TypeError: if the supplied arguments are of the wrong type.
-    :raises ValueError: if the value of the supplied dim argument is \
-        less than 1 or greater than the number of dimensions in the \
-        supplied array argument.
-
-    '''
-    if not isinstance(array, ArrayMixin):
-        raise TypeError(
-            f"method _check_args 'array' argument should be some sort of "
-            f"array access (i.e. a sub-class of ArrayMixin) but found "
-            f"'{type(array).__name__}'.")
-
-    if not isinstance(dim, int):
-        raise TypeError(
-            f"method _check_args 'dim' argument should be an "
-            f"int type but found '{type(dim).__name__}'.")
-    if dim < 1:
-        raise ValueError(
-            f"method _check_args 'dim' argument should be at "
-            f"least 1 but found {dim}.")
-    if dim > len(array.children):
-        raise ValueError(
-            f"method _check_args 'dim' argument should be at most the number "
-            f"of dimensions of the array ({len(array.children)}) but found "
-            f"{dim}.")
-
-    # The first element of the array (index 0) relates to the first
-    # dimension (dim 1), so we need to reduce dim by 1.
-    if not isinstance(array.indices[dim-1], Range):
-        raise TypeError(
-            f"method _check_args 'array' argument index '{dim-1}' should be a "
-            f"Range type but found '{type(array.indices[dim-1]).__name__}'.")
-
-
-def _is_bound_full_extent(array, dim, intrinsic):
-    '''A Fortran array section with a missing lower bound implies the
-    access starts at the first element and a missing upper bound
-    implies the access ends at the last element e.g. a(:,:)
-    accesses all elements of array a and is equivalent to
-    a(lbound(a,1):ubound(a,1),lbound(a,2):ubound(a,2)). The PSyIR
-    does not support the shorthand notation, therefore the lbound
-    and ubound operators are used in the PSyIR.
-
-    This utility function checks that shorthand lower or upper
-    bound Fortran code is captured as longhand lbound and/or
-    ubound functions as expected in the PSyIR.
-
-    This routine is only in fparser2.py until #717 is complete as it
-    is used to check that array syntax in a where statement is for the
-    full extent of the dimension. Once #717 is complete this routine
-    can be moved into fparser2_test.py as it is used there in a
-    different context.
-
-    :param array: the node to check.
-    :type array: :py:class:`pysclone.psyir.nodes.ArrayMixin`
-    :param int dim: the dimension index to use.
-    :param intrinsic: the intrinsic to check.
-    :type intrinsic:
-        :py:class:`psyclone.psyir.nodes.IntrinsicCall.Intrinsic.LBOUND` |
-        :py:class:`psyclone.psyir.nodes.IntrinsicCall.Intrinsic.UBOUND`
-
-    :returns: True if the supplied array has the expected properties,
-        otherwise returns False.
-    :rtype: bool
-
-    :raises TypeError: if the supplied arguments are of the wrong type.
-
-    '''
-    _check_args(array, dim)
-
-    if intrinsic == IntrinsicCall.Intrinsic.LBOUND:
-        index = 0
-    elif intrinsic == IntrinsicCall.Intrinsic.UBOUND:
-        index = 1
-    else:
-        raise TypeError(
-            f"'intrinsic' argument  expected to be LBOUND or UBOUND but "
-            f"found '{type(intrinsic).__name__}'.")
-
-    # The first element of the array (index 0) relates to the first
-    # dimension (dim 1), so we need to reduce dim by 1.
-    bound = array.indices[dim-1].children[index]
-
-    if not isinstance(bound, IntrinsicCall):
-        return False
-
-    reference = bound.arguments[0]
-    literal = bound.arguments[1]
-
-    if bound.intrinsic != intrinsic:
-        return False
-
-    if (not isinstance(literal, Literal) or
-            literal.datatype.intrinsic != ScalarType.Intrinsic.INTEGER or
-            literal.value != str(dim)):
-        return False
-
-    return isinstance(reference, Reference) and array.is_same_array(reference)
-
-
-def _is_array_range_literal(array, dim, index, value):
-    '''Utility function to check that the supplied array has an integer
-    literal at dimension index "dim" and range index "index" with
-    value "value".
-
-    The step part of the range node has an integer literal with
-    value 1 by default.
-
-    This routine is only in fparser2.py until #717 is complete as it
-    is used to check that array syntax in a where statement is for the
-    full extent of the dimension. Once #717 is complete this routine
-    can be moved into fparser2_test.py as it is used there in a
-    different context.
-
-    :param array: the node to check.
-    :type array: :py:class:`pysclone.psyir.node.ArrayReference`
-    :param int dim: the dimension index to check.
-    :param int index: the index of the range to check (0 is the \
-        lower bound, 1 is the upper bound and 2 is the step).
-    :param int value: the expected value of the literal.
-
-    :raises NotImplementedError: if the supplied argument does not \
-        have the required properties.
-
-    :returns: True if the supplied array has the expected properties, \
-        otherwise returns False.
-    :rtype: bool
-
-    :raises TypeError: if the supplied arguments are of the wrong type.
-    :raises ValueError: if the index argument has an incorrect value.
-
-    '''
-    _check_args(array, dim)
-
-    if not isinstance(index, int):
-        raise TypeError(
-            f"method _check_array_range_literal 'index' argument should be an "
-            f"int type but found '{type(index).__name__}'.")
-
-    if index < 0 or index > 2:
-        raise ValueError(
-            f"method _check_array_range_literal 'index' argument should be "
-            f"0, 1 or 2 but found {index}.")
-
-    if not isinstance(value, int):
-        raise TypeError(
-            f"method _check_array_range_literal 'value' argument should be an "
-            f"int type but found '{type(value).__name__}'.")
-
-    # The first child of the array (index 0) relates to the first
-    # dimension (dim 1), so we need to reduce dim by 1.
-    literal = array.children[dim-1].children[index]
-
-    if (isinstance(literal, Literal) and
-            literal.datatype.intrinsic == ScalarType.Intrinsic.INTEGER and
-            literal.value == str(value)):
-        return True
-    return False
-
-
 def _copy_full_base_reference(node):
     '''
     Given the supplied node, creates a new node with the same access
diff --git a/src/psyclone/psyir/nodes/omp_directives.py b/src/psyclone/psyir/nodes/omp_directives.py
index 00318273ca..8001ae1775 100644
--- a/src/psyclone/psyir/nodes/omp_directives.py
+++ b/src/psyclone/psyir/nodes/omp_directives.py
@@ -1442,7 +1442,10 @@ def gen_code(self, parent):
             for call in reprod_red_call_list:
                 call.reduction_sum_loop(parent)
 
-        self.gen_post_region_code(parent)
+        # If there are nested OMPRegions, the post region code should be after
+        # the top-level one
+        if not self.ancestor(OMPRegionDirective):
+            self.gen_post_region_code(parent)
 
     def lower_to_language_level(self):
         '''
@@ -2302,7 +2305,7 @@ def gen_code(self, parent):
         # Add directive to the f2pygen tree
         parent.add(
             DirectiveGen(
-                parent, "omp", "begin", "parallel do", ", ".join(
+                parent, "omp", "begin", self._directive_string, ", ".join(
                     text for text in [default_str, private_str, fprivate_str,
                                       schedule_str, self._reduction_string()]
                     if text)))
@@ -2312,10 +2315,23 @@ def gen_code(self, parent):
 
         # make sure the directive occurs straight after the loop body
         position = parent.previous_loop()
-        parent.add(DirectiveGen(parent, *self.end_string().split()),
+
+        # DirectiveGen only accepts 3 terms, e.g. "omp end loop", so for longer
+        # directive e.g. "omp end teams distribute parallel do", we split them
+        # between arguments and content (which is an additional string appended
+        # at the end)
+        terms = self.end_string().split()
+        # If its < 3 the array slices still work as expected
+        arguments = terms[:3]
+        content = " ".join(terms[3:])
+
+        parent.add(DirectiveGen(parent, *arguments, content=content),
                    position=["after", position])
 
-        self.gen_post_region_code(parent)
+        # If there are nested OMPRegions, the post region code should be after
+        # the top-level one
+        if not self.ancestor(OMPRegionDirective):
+            self.gen_post_region_code(parent)
 
     def lower_to_language_level(self):
         '''
@@ -2402,6 +2418,31 @@ def end_string(self):
         '''
         return "omp end target"
 
+    def gen_code(self, parent):
+        '''Generate the OpenMP Target Directive and any associated code.
+
+        :param parent: the parent Node in the Schedule to which to add our
+                       content.
+        :type parent: sub-class of :py:class:`psyclone.f2pygen.BaseGen`
+        '''
+        # Check the constraints are correct
+        self.validate_global_constraints()
+
+        # Generate the code for this Directive
+        parent.add(DirectiveGen(parent, "omp", "begin", "target"))
+
+        # Generate the code for all of this node's children
+        for child in self.dir_body:
+            child.gen_code(parent)
+
+        # Generate the end code for this node
+        parent.add(DirectiveGen(parent, "omp", "end", "target", ""))
+
+        # If there are nested OMPRegions, the post region code should be after
+        # the top-level one
+        if not self.ancestor(OMPRegionDirective):
+            self.gen_post_region_code(parent)
+
 
 class OMPLoopDirective(OMPRegionDirective):
     ''' Class for the !$OMP LOOP directive that specifies that the iterations
diff --git a/src/psyclone/psyir/transformations/intrinsics/dotproduct2code_trans.py b/src/psyclone/psyir/transformations/intrinsics/dotproduct2code_trans.py
index e1d2103057..193e4fd575 100644
--- a/src/psyclone/psyir/transformations/intrinsics/dotproduct2code_trans.py
+++ b/src/psyclone/psyir/transformations/intrinsics/dotproduct2code_trans.py
@@ -69,12 +69,6 @@ def _get_array_bound(vector1, vector2):
     function makes use of these constraint, e.g. it always returns 1
     for the stride.
 
-    Issue #717 requires similar functionality to this
-    function. However, to use this function safely in other situations
-    we would need to move the tests in validate into this function
-    first and then potentially add this function to the ArrayMixin
-    class, or a separate utils module.
-
     :param array: the reference that we are interested in.
     :type array: :py:class:`psyir.nodes.Reference`
     :param int index: the (array) reference index that we are \
diff --git a/src/psyclone/tests/domain/lfric/algorithm/lfric_alg_test.py b/src/psyclone/tests/domain/lfric/algorithm/lfric_alg_test.py
index b37cbb6fef..727b3ed274 100644
--- a/src/psyclone/tests/domain/lfric/algorithm/lfric_alg_test.py
+++ b/src/psyclone/tests/domain/lfric/algorithm/lfric_alg_test.py
@@ -34,6 +34,7 @@
 # Author: A. R. Porter, STFC Daresbury Lab
 # Modified by: R. W. Ford, STFC Daresbury Lab
 #              L. Turner, Met Office
+#              T. Vockerodt, Met Office
 
 ''' pytest tests for the LFRic-specific algorithm-generation functionality. '''
 
@@ -150,7 +151,9 @@ def test_create_function_spaces_invalid_space(prog):
 def test_create_function_spaces(prog, fortran_writer):
     ''' Check that a Routine is populated correctly when valid function-space
     names are supplied. '''
-    LFRicAlg()._create_function_spaces(prog, ["w3", "w1"])
+    # Using a set randomises ordering of fspaces, but function should
+    # produce consistent ordering in the algorithm.
+    LFRicAlg()._create_function_spaces(prog, set(["w3", "w1"]))
     fe_config_mod = prog.symbol_table.lookup("finite_element_config_mod")
     element_order = prog.symbol_table.lookup("element_order")
     assert element_order.interface.container_symbol == fe_config_mod
@@ -159,10 +162,15 @@ def test_create_function_spaces(prog, fortran_writer):
     for space in ["w1", "w3"]:
         sym = prog.symbol_table.lookup(space)
         assert sym.interface.container_symbol is fs_mod_sym
-        assert (f"TYPE(function_space_type), POINTER :: "
-                f"vector_space_{space}_ptr" in gen)
-        assert (f"vector_space_{space}_ptr => function_space_collection%"
-                f"get_fs(mesh,element_order,{space})" in gen)
+    # Checking function space ordering is consistent.
+    assert ("TYPE(function_space_type), POINTER :: "
+            "vector_space_w1_ptr\n  "
+            "TYPE(function_space_type), POINTER :: "
+            "vector_space_w3_ptr" in gen)
+    assert ("vector_space_w1_ptr => function_space_collection%"
+            "get_fs(mesh,element_order,w1)\n  "
+            "vector_space_w3_ptr => function_space_collection%"
+            "get_fs(mesh,element_order,w3)" in gen)
 
 
 def test_initialise_field(prog, fortran_writer):
diff --git a/src/psyclone/tests/psyad/main_test.py b/src/psyclone/tests/psyad/main_test.py
index cafd1ea466..8b93b49082 100644
--- a/src/psyclone/tests/psyad/main_test.py
+++ b/src/psyclone/tests/psyad/main_test.py
@@ -121,8 +121,9 @@ def test_main_h_option(capsys):
     assert str(info.value) == "0"
     output, error = capsys.readouterr()
     assert error == ""
-    # The name of the executable is replaced with either pytest or -c
-    # when using pytest, therefore we split this test into sections.
+    # Python usage messages have seen slight tweaks over the years, e.g.,
+    # Python >= 3.13 tweaks the usage message to avoid repeating the args
+    # to an option between aliases, therefore we split this test into sections.
     assert "usage: " in output
     expected2 = (
         "[-h] [-oad OAD] [-v] [-t] [-api API] [-coord-arg COORD_ARG] "
@@ -133,9 +134,12 @@ def test_main_h_option(capsys):
         "positional arguments:\n"
         "  filename              tangent-linear kernel source\n\n")
     assert expected2 in output
+    assert ("  -h, --help            show this help message and exit\n"
+            in output)
+    assert ("  -a ACTIVE [ACTIVE ...], --active ACTIVE [ACTIVE ...]\n"
+            in output or
+            "  -a, --active ACTIVE [ACTIVE ...]\n" in output)
     expected3 = (
-        "  -h, --help            show this help message and exit\n"
-        "  -a ACTIVE [ACTIVE ...], --active ACTIVE [ACTIVE ...]\n"
         "                        names of active variables\n"
         "  -v, --verbose         increase the verbosity of the output\n"
         "  -t, --gen-test        generate a standalone unit test for the "
@@ -156,9 +160,6 @@ def test_main_h_option(capsys):
         "  -otest TEST_FILENAME  filename for the unit test (implies -t)\n"
         "  -oad OAD              filename for the transformed code\n")
     assert expected3 in output
-    assert ("-otest TEST_FILENAME  filename for the unit test (implies -t)"
-            in output)
-    assert "-oad OAD              filename for the transformed code" in output
 
 
 # no args
diff --git a/src/psyclone/tests/psyir/frontend/fparser2_test.py b/src/psyclone/tests/psyir/frontend/fparser2_test.py
index 2c7709199b..3135b37a58 100644
--- a/src/psyclone/tests/psyir/frontend/fparser2_test.py
+++ b/src/psyclone/tests/psyir/frontend/fparser2_test.py
@@ -51,13 +51,11 @@
 
 from psyclone.errors import InternalError, GenerationError
 from psyclone.psyir.frontend.fparser2 import (
-    Fparser2Reader, _is_array_range_literal, _is_bound_full_extent,
-    _check_args, default_precision,
-    default_integer_type, default_real_type, _first_type_match,
-    _get_arg_names)
+    Fparser2Reader, default_precision, default_integer_type,
+    default_real_type, _first_type_match, _get_arg_names)
 from psyclone.psyir.nodes import (
     Schedule, CodeBlock, Assignment, Return, UnaryOperation, BinaryOperation,
-    IfBlock, Reference, ArrayReference, Literal, Range, KernelSchedule,
+    IfBlock, Reference, ArrayReference, Literal, KernelSchedule,
     RegionDirective, Routine, StandaloneDirective,
     Call, IntrinsicCall)
 from psyclone.psyir.symbols import (
@@ -103,192 +101,6 @@ def test_first_type_match():
         _first_type_match(["a", "b"], int)
 
 
-def test_check_args():
-    ''' Test the _check_args function. '''
-
-    with pytest.raises(TypeError) as excinfo:
-        _check_args(None, None)
-    assert ("'array' argument should be some sort of array access (i.e. a "
-            "sub-class of ArrayMixin) but found 'NoneType'." in
-            str(excinfo.value))
-
-    one = Literal("1", INTEGER_TYPE)
-    array_type = ArrayType(REAL_TYPE, [20])
-    symbol = DataSymbol('a', array_type)
-    array_reference = ArrayReference.create(symbol, [one])
-
-    with pytest.raises(TypeError) as excinfo:
-        _check_args(array_reference, None)
-    assert ("'dim' argument should be an int type but found 'NoneType'."
-            in str(excinfo.value))
-
-    with pytest.raises(ValueError) as excinfo:
-        _check_args(array_reference, 0)
-    assert ("'dim' argument should be at least 1 but found 0."
-            in str(excinfo.value))
-
-    with pytest.raises(ValueError) as excinfo:
-        _check_args(array_reference, 2)
-    assert ("'dim' argument should be at most the number of dimensions of "
-            "the array (1) but found 2." in str(excinfo.value))
-
-    with pytest.raises(TypeError) as excinfo:
-        _check_args(array_reference, 1)
-    assert ("'array' argument index '0' should be a Range type but "
-            "found 'Literal'." in str(excinfo.value))
-
-
-def test_is_bound_full_extent():
-    ''' Test the _is_bound_full_extent function.'''
-
-    # Check that _is_bound_full_extent calls the check_args function.
-    with pytest.raises(TypeError) as excinfo:
-        _is_bound_full_extent(None, None, None)
-    assert ("'array' argument should be some sort of array access (i.e. "
-            "a sub-class of ArrayMixin) but found 'NoneType'." in
-            str(excinfo.value))
-
-    one = Literal("1", INTEGER_TYPE)
-    array_type = ArrayType(REAL_TYPE, [20])
-    symbol = DataSymbol('a', array_type)
-    my_range = Range.create(one.copy(), one.copy())
-    array_reference = ArrayReference.create(symbol, [my_range])
-
-    with pytest.raises(TypeError) as excinfo:
-        _is_bound_full_extent(array_reference, 1, None)
-    assert ("'intrinsic' argument  expected to be LBOUND or UBOUND but found "
-            "'NoneType'" in str(excinfo.value))
-
-    # Expecting BinaryOperation but found Literal
-    assert not _is_bound_full_extent(array_reference, 1,
-                                     IntrinsicCall.Intrinsic.UBOUND)
-
-    operator = IntrinsicCall.create(
-        IntrinsicCall.Intrinsic.UBOUND,
-        [one.copy(), ("dim", one.copy())])
-    my_range = Range.create(operator, one.copy())
-    array_reference = ArrayReference.create(symbol, [my_range])
-
-    # Expecting intrinsic to be LBOUND, but found UBOUND
-    assert not _is_bound_full_extent(array_reference, 1,
-                                     IntrinsicCall.Intrinsic.LBOUND)
-
-    operator = IntrinsicCall.create(
-        IntrinsicCall.Intrinsic.LBOUND,
-        [one.copy(), ("dim", one.copy())])
-    my_range = Range.create(operator, one.copy())
-    array_reference = ArrayReference.create(symbol, [my_range])
-
-    # Expecting Reference but found Literal
-    assert not _is_bound_full_extent(array_reference, 1,
-                                     IntrinsicCall.Intrinsic.LBOUND)
-
-    operator = IntrinsicCall.create(
-        IntrinsicCall.Intrinsic.LBOUND,
-        [Reference(DataSymbol("x", INTEGER_TYPE)), ("dim", one.copy())])
-    my_range = Range.create(operator, one.copy())
-    array_reference = ArrayReference.create(symbol, [my_range])
-
-    # Expecting Reference symbol x to be the same as array symbol a
-    assert not _is_bound_full_extent(array_reference, 1,
-                                     IntrinsicCall.Intrinsic.LBOUND)
-
-    operator = IntrinsicCall.create(
-        IntrinsicCall.Intrinsic.LBOUND,
-        [Reference(symbol), ("dim", Literal("1.0", REAL_TYPE))])
-    my_range = Range.create(operator, one.copy())
-    array_reference = ArrayReference.create(symbol, [my_range])
-
-    # Expecting integer but found real
-    assert not _is_bound_full_extent(array_reference, 1,
-                                     IntrinsicCall.Intrinsic.LBOUND)
-
-    operator = IntrinsicCall.create(
-        IntrinsicCall.Intrinsic.LBOUND,
-        [Reference(symbol), ("dim", Literal("2", INTEGER_TYPE))])
-    my_range = Range.create(operator, one.copy())
-    array_reference = ArrayReference.create(symbol, [my_range])
-
-    # Expecting literal value 2 to be the same as the current array
-    # dimension 1
-    assert not _is_bound_full_extent(array_reference, 1,
-                                     IntrinsicCall.Intrinsic.LBOUND)
-
-    operator = IntrinsicCall.create(
-        IntrinsicCall.Intrinsic.LBOUND,
-        [Reference(symbol), ("dim", Literal("1", INTEGER_TYPE))])
-    my_range = Range.create(operator, one.copy())
-    array_reference = ArrayReference.create(symbol, [my_range])
-
-    # valid
-    assert _is_bound_full_extent(array_reference, 1,
-                                 IntrinsicCall.Intrinsic.LBOUND)
-
-
-def test_is_array_range_literal():
-    ''' Test the _is_array_range_literal function.'''
-
-    # Check that _is_array_range_literal calls the _check_args function.
-    with pytest.raises(TypeError) as excinfo:
-        _is_array_range_literal(None, None, None, None)
-    assert ("'array' argument should be some sort of array access (i.e. a "
-            "sub-class of ArrayMixin) but found 'NoneType'." in
-            str(excinfo.value))
-
-    one = Literal("1", INTEGER_TYPE)
-    array_type = ArrayType(REAL_TYPE, [20])
-    symbol = DataSymbol('a', array_type)
-    operator = IntrinsicCall.create(
-        IntrinsicCall.Intrinsic.LBOUND,
-        [Reference(symbol), ("dim", Literal("1", INTEGER_TYPE))])
-    my_range = Range.create(operator, one)
-    array_reference = ArrayReference.create(symbol, [my_range])
-
-    with pytest.raises(TypeError) as excinfo:
-        _is_array_range_literal(array_reference, 1, None, None)
-    assert ("'index' argument should be an int type but found 'NoneType'."
-            in str(excinfo.value))
-
-    with pytest.raises(ValueError) as excinfo:
-        _is_array_range_literal(array_reference, 1, -1, None)
-    assert ("'index' argument should be 0, 1 or 2 but found -1."
-            in str(excinfo.value))
-
-    with pytest.raises(ValueError) as excinfo:
-        _is_array_range_literal(array_reference, 1, 3, None)
-    assert ("'index' argument should be 0, 1 or 2 but found 3."
-            in str(excinfo.value))
-
-    with pytest.raises(TypeError) as excinfo:
-        _is_array_range_literal(array_reference, 1, 2, None)
-    assert ("'value' argument should be an int type but found 'NoneType'."
-            in str(excinfo.value))
-
-    # 1st dimension, second argument to range is an integer literal
-    # with value 1
-    assert _is_array_range_literal(array_reference, 1, 1, 1)
-
-    # 1st dimension, first argument to range is an operator, not a literal
-    assert not _is_array_range_literal(array_reference, 1, 0, 1)
-
-    my_range = Range.create(operator.copy(), one.copy())
-
-    # Range.create checks for valid datatype. Therefore change to
-    # invalid after creation.
-    my_range.children[1] = Literal("1.0", REAL_TYPE)
-    array_reference = ArrayReference.create(symbol, [my_range])
-
-    # 1st dimension, second argument to range is a real literal,
-    # not an integer literal.
-    assert not _is_array_range_literal(array_reference, 1, 1, 1)
-
-    my_range = Range.create(operator.copy(), one.copy())
-    array_reference = ArrayReference.create(symbol, [my_range])
-    # 1st dimension, second argument to range has an unexpected
-    # value.
-    assert not _is_array_range_literal(array_reference, 1, 1, 2)
-
-
 @pytest.mark.parametrize("value",
                          [ScalarType.Intrinsic.REAL,
                           ScalarType.Intrinsic.INTEGER,
@@ -1988,190 +1800,6 @@ def test_handling_parenthesis():
     assert isinstance(new_node, BinaryOperation)
 
 
-@pytest.mark.usefixtures("disable_declaration_check", "f2008_parser")
-def test_array_section():
-    ''' Check that we correctly handle an array section.
-
-    TODO #754 fix test so that 'disable_declaration_check' fixture is not
-    required.
-    '''
-
-    def _array_create(code):
-        '''Utility function that takes the supplied Fortran code and returns
-        its PSyIR representation.
-
-        :param str code: the executable code as a string.
-
-        :returns: the executable code as PSyIR nodes.
-        :rtype: :py:class:`psyclone.psyir.nodes.Node`
-
-        '''
-        processor = Fparser2Reader()
-        fake_parent = Schedule()
-        reader = FortranStringReader(code)
-        fp2node = Execution_Part.match(reader)[0][0]
-        processor.process_nodes(fake_parent, [fp2node])
-        return fake_parent.children[0].children[0]
-
-    def _check_array(node, ndims):
-        '''Utility function that checks that the supplied node is an array and
-        has the expected number of dimensions.
-
-        :param node: the node to check.
-        :type node: :py:class:`psyclone.psyir.nodes.ArrayReference`
-        :param int ndims: the number of expected array dimensions.
-
-        '''
-        assert isinstance(node, ArrayReference)
-        assert len(node.children) == ndims
-
-    def _check_range(array, dim):
-        '''Utility function that checks that the "dim" index of the supplied
-        array contains a range node. Assumes that the supplied
-        argument "array" is an array.
-
-        :param array: the node to check.
-        :type array: :py:class:`psyclone.psyir.nodes.ArrayReference`
-        :param int dim: the array dimension index to check.
-
-        '''
-        # Note, in Fortran the 1st dimension is 1, second is 2
-        # etc. Therefore to obtain the correct child index we need to
-        # subtract 1.
-        range_node = array.children[dim-1]
-        assert isinstance(range_node, Range)
-
-    def _check_reference(node, dim, index, name):
-        '''Utility function to check that the supplied array has a reference
-        at dimension index "dim" and range index "index" with name
-        "name".
-
-        Assumes that the node argument is an array and that the
-        supplied dimension index is a Range node and that the supplied
-        range index is valid.
-
-        :param array: the node to check.
-        :type array: :py:class:`pysclone.psyir.node.ArrayReference`
-        :param int dim: the dimension index to check.
-        :param int index: the index of the range to check (0 is the \
-            lower bound, 1 is the upper bound).
-        :param str name: the expected name of the reference.
-
-        '''
-        # Note, in Fortran the 1st dimension is 1, second is 2
-        # etc. Therefore to obtain the correct child index we need to
-        # subtract 1.
-        reference = node.children[dim-1].children[index]
-        assert isinstance(reference, Reference)
-        assert reference.name == name
-
-    # Simple one-dimensional
-    for code in ["a(:) = 0.0", "a(::) = 0.0"]:
-        array_reference = _array_create(code)
-        _check_array(array_reference, ndims=1)
-        _check_range(array_reference, dim=1)
-        assert _is_bound_full_extent(array_reference, 1,
-                                     IntrinsicCall.Intrinsic.LBOUND)
-        assert _is_bound_full_extent(array_reference, 1,
-                                     IntrinsicCall.Intrinsic.UBOUND)
-        assert _is_array_range_literal(
-            array_reference, dim=1, index=2, value=1)
-    # Simple multi-dimensional
-    for code in ["a(:,:,:) = 0.0", "a(::,::,::) = 0.0"]:
-        array_reference = _array_create(code)
-        _check_array(array_reference, ndims=3)
-        for dim in range(1, 4):
-            # Check each of the 3 dimensions (1, 2, 3)
-            _check_range(array_reference, dim=dim)
-            assert _is_bound_full_extent(
-                array_reference, dim,
-                IntrinsicCall.Intrinsic.LBOUND)
-            assert _is_bound_full_extent(
-                array_reference, dim,
-                IntrinsicCall.Intrinsic.UBOUND)
-            assert _is_array_range_literal(
-                array_reference, dim=dim, index=2, value=1)
-    # Simple values
-    code = "a(1:, 1:2, 1:2:3, :2, :2:3, ::3, 1::3) = 0.0"
-    array_reference = _array_create(code)
-    _check_array(array_reference, ndims=7)
-    # dim 1
-    _check_range(array_reference, dim=1)
-    assert _is_array_range_literal(array_reference, dim=1, index=0, value=1)
-    assert _is_bound_full_extent(array_reference, 1,
-                                 IntrinsicCall.Intrinsic.UBOUND)
-    assert _is_array_range_literal(array_reference, dim=1, index=2, value=1)
-    # dim 2
-    _check_range(array_reference, dim=2)
-    assert _is_array_range_literal(array_reference, dim=2, index=0, value=1)
-    assert _is_array_range_literal(array_reference, dim=2, index=1, value=2)
-    assert _is_array_range_literal(array_reference, dim=2, index=2, value=1)
-    # dim 3
-    _check_range(array_reference, dim=3)
-    assert _is_array_range_literal(array_reference, dim=3, index=0, value=1)
-    assert _is_array_range_literal(array_reference, dim=3, index=1, value=2)
-    assert _is_array_range_literal(array_reference, dim=3, index=2, value=3)
-    # dim 4
-    _check_range(array_reference, dim=4)
-    assert _is_bound_full_extent(array_reference, 4,
-                                 IntrinsicCall.Intrinsic.LBOUND)
-    assert _is_array_range_literal(array_reference, dim=4, index=1, value=2)
-    assert _is_array_range_literal(array_reference, dim=4, index=2, value=1)
-    # dim 5
-    _check_range(array_reference, dim=5)
-    assert _is_bound_full_extent(array_reference, 5,
-                                 IntrinsicCall.Intrinsic.LBOUND)
-    assert _is_array_range_literal(array_reference, dim=5, index=1, value=2)
-    assert _is_array_range_literal(array_reference, dim=5, index=2, value=3)
-    # dim 6
-    _check_range(array_reference, dim=6)
-    assert _is_bound_full_extent(array_reference, 6,
-                                 IntrinsicCall.Intrinsic.LBOUND)
-    assert _is_bound_full_extent(array_reference, 6,
-                                 IntrinsicCall.Intrinsic.UBOUND)
-    assert _is_array_range_literal(array_reference, dim=6, index=2, value=3)
-    # dim 7
-    _check_range(array_reference, dim=7)
-    assert _is_array_range_literal(array_reference, dim=7, index=0, value=1)
-    assert _is_bound_full_extent(array_reference, 7,
-                                 IntrinsicCall.Intrinsic.UBOUND)
-    assert _is_array_range_literal(array_reference, dim=7, index=2, value=3)
-
-    # Simple variables
-    code = "a(b:, b:c, b:c:d) = 0.0"
-    array_reference = _array_create(code)
-    _check_array(array_reference, ndims=3)
-    # dim 1
-    _check_range(array_reference, dim=1)
-    _check_reference(array_reference, dim=1, index=0, name="b")
-    assert _is_bound_full_extent(array_reference, 1,
-                                 IntrinsicCall.Intrinsic.UBOUND)
-    assert _is_array_range_literal(array_reference, dim=1, index=2, value=1)
-    # dim 2
-    _check_range(array_reference, dim=2)
-    _check_reference(array_reference, dim=2, index=0, name="b")
-    _check_reference(array_reference, dim=2, index=1, name="c")
-    assert _is_array_range_literal(array_reference, dim=2, index=2, value=1)
-    # dim 3
-    _check_range(array_reference, dim=3)
-    _check_reference(array_reference, dim=3, index=0, name="b")
-    _check_reference(array_reference, dim=3, index=1, name="c")
-    _check_reference(array_reference, dim=3, index=2, name="d")
-
-    # Expressions
-    code = "a(b*c:b+c:b/c) = 0.0"
-    array_reference = _array_create(code)
-    _check_array(array_reference, ndims=1)
-    _check_range(array_reference, dim=1)
-    my_range = array_reference.children[0]
-    assert isinstance(my_range.children[0], BinaryOperation)
-    assert my_range.children[0].operator == BinaryOperation.Operator.MUL
-    assert isinstance(my_range.children[1], BinaryOperation)
-    assert my_range.children[1].operator == BinaryOperation.Operator.ADD
-    assert isinstance(my_range.children[2], BinaryOperation)
-    assert my_range.children[2].operator == BinaryOperation.Operator.DIV
-
-
 @pytest.mark.usefixtures("disable_declaration_check", "f2008_parser")
 def test_handling_array_product():
     ''' Check that we correctly handle array products.
diff --git a/src/psyclone/tests/psyir/nodes/omp_directives_test.py b/src/psyclone/tests/psyir/nodes/omp_directives_test.py
index 2c8d2dbfd4..068c71896e 100644
--- a/src/psyclone/tests/psyir/nodes/omp_directives_test.py
+++ b/src/psyclone/tests/psyir/nodes/omp_directives_test.py
@@ -4669,3 +4669,34 @@ def test_omp_serial_check_dependency_valid_pairing():
     assert test_dir._check_dependency_pairing_valid(
                array_reference1, array_reference2, None, None
            )
+
+
+def test_omptarget_gen_code():
+    ''' Check that the OMPTarget gen_code produces the right code '''
+    _, invoke_info = parse(os.path.join(BASE_PATH, "1_single_invoke.f90"),
+                           api="lfric")
+    psy = PSyFactory("lfric", distributed_memory=True).create(invoke_info)
+    schedule = psy.invokes.invoke_list[0].schedule
+    kern = schedule.children[-1]
+
+    # Add an OMPTarget and move the kernel inside it
+    target = OMPTargetDirective()
+    schedule.addchild(target)
+    target.dir_body.addchild(kern.detach())
+
+    # Check that the "omp target" is produced, and that the set_dirty is
+    # generated after it
+    code = str(psy.gen)
+    assert """
+      !$omp target
+      DO cell = loop0_start, loop0_stop, 1
+        CALL testkern_code(nlayers_f1, a, f1_data, f2_data, m1_data, \
+m2_data, ndf_w1, undf_w1, map_w1(:,cell), ndf_w2, undf_w2, map_w2(:,cell), \
+ndf_w3, undf_w3, map_w3(:,cell))
+      END DO
+      !$omp end target
+      !
+      ! Set halos dirty/clean for fields modified in the above loop(s)
+      !
+      CALL f1_proxy%set_dirty()
+    """ in code
diff --git a/src/psyclone/tests/psyir/transformations/kernel_transformation_test.py b/src/psyclone/tests/psyir/transformations/kernel_transformation_test.py
index cf7658d585..abceb2eea8 100644
--- a/src/psyclone/tests/psyir/transformations/kernel_transformation_test.py
+++ b/src/psyclone/tests/psyir/transformations/kernel_transformation_test.py
@@ -50,7 +50,8 @@
 from psyclone.psyir.nodes import Routine, FileContainer, IntrinsicCall, Call
 from psyclone.psyir.symbols import DataSymbol, INTEGER_TYPE
 from psyclone.psyir.transformations import TransformationError
-from psyclone.transformations import ACCRoutineTrans, Dynamo0p3KernelConstTrans
+from psyclone.transformations import (
+    ACCRoutineTrans, OMPDeclareTargetTrans, Dynamo0p3KernelConstTrans)
 
 from psyclone.tests.gocean_build import GOceanBuild
 from psyclone.tests.lfric_build import LFRicBuild
@@ -430,6 +431,24 @@ def test_gpumixin_validate_no_call():
             in str(err.value))
 
 
+@pytest.mark.parametrize(
+    "rtrans, expected_directive",
+    [(ACCRoutineTrans(), "!$acc routine"),
+     (OMPDeclareTargetTrans(), "!$omp declare target")])
+def test_kernel_gpu_annotation_trans(rtrans, expected_directive,
+                                     fortran_writer):
+    ''' Check that the GPU annotation transformations insert the
+    proper directive inside PSyKAl kernel code '''
+    _, invoke = get_invoke("1_single_invoke.f90", api="lfric", idx=0)
+    sched = invoke.schedule
+    kern = sched.coded_kernels()[0]
+    rtrans.apply(kern)
+
+    # Check that the directive has been added to the kernel code
+    code = fortran_writer(kern.get_kernel_schedule())
+    assert expected_directive in code
+
+
 def test_1kern_trans(kernel_outputdir):
     ''' Check that we generate the correct code when an invoke contains
     the same kernel more than once but only one of them is transformed. '''
diff --git a/src/psyclone/transformations.py b/src/psyclone/transformations.py
index ca34b76607..0e39523559 100644
--- a/src/psyclone/transformations.py
+++ b/src/psyclone/transformations.py
@@ -537,19 +537,35 @@ class OMPDeclareTargetTrans(Transformation, MarkRoutineForGPUMixin):
 
     '''
     def apply(self, node, options=None):
-        ''' Insert an OMPDeclareTargetDirective inside the provided routine.
+        ''' Insert an OMPDeclareTargetDirective inside the provided routine or
+        associated PSyKAl kernel.
 
-        :param node: the PSyIR routine to insert the directive into.
-        :type node: :py:class:`psyclone.psyir.nodes.Routine`
+        :param node: the kernel or routine which is the target of this
+            transformation.
+        :type node: :py:class:`psyclone.psyir.nodes.Routine` |
+                    :py:class:`psyclone.psyGen.Kern`
         :param options: a dictionary with options for transformations.
         :type options: Optional[Dict[str, Any]]
+        :param bool options["force"]: whether to allow routines with
+            CodeBlocks to run on the GPU.
 
         '''
         self.validate(node, options)
-        for child in node.children:
+
+        if isinstance(node, Kern):
+            # Flag that the kernel has been modified
+            node.modified = True
+
+            # Get the schedule representing the kernel subroutine
+            routine = node.get_kernel_schedule()
+        else:
+            routine = node
+
+        for child in routine.children:
             if isinstance(child, OMPDeclareTargetDirective):
                 return  # The routine is already marked with OMPDeclareTarget
-        node.children.insert(0, OMPDeclareTargetDirective())
+
+        routine.children.insert(0, OMPDeclareTargetDirective())
 
     def validate(self, node, options=None):
         ''' Check that an OMPDeclareTargetDirective can be inserted.
diff --git a/tutorial/practicals/nemo/2_nemo_profiling/Makefile b/tutorial/practicals/nemo/2_nemo_profiling/Makefile
index ff957684ee..a8a59854e5 100644
--- a/tutorial/practicals/nemo/2_nemo_profiling/Makefile
+++ b/tutorial/practicals/nemo/2_nemo_profiling/Makefile
@@ -108,7 +108,7 @@ transform:
                      -o output_3.f90 -l output tra_adv_mod.F90
 
 compile: transform $(KERNELS) output.o solutions/runner.o
-	$(F90) $(KERNELS) output.o solutions/runner.o -o $(NAME) \
+	$(F90) $(F90FLAGS) $(KERNELS) output.o solutions/runner.o -o $(NAME) \
 	       $(PROFILE_WRAPPER_LINK) $(PROFILE_LINK)
 
 # Only used for the compile CI target to compile the solution file