diff --git a/.github/workflows/compilation.yml b/.github/workflows/compilation.yml index a1e032a797..d90b08417c 100644 --- a/.github/workflows/compilation.yml +++ b/.github/workflows/compilation.yml @@ -51,14 +51,14 @@ on: push env: - CUDA_VERSION: 12.6.0 + CUDA_VERSION: 12.6.2 GFORTRAN_VERSION: 14.2.0 - HDF5_VERSION: 1.14.4.3 + HDF5_VERSION: 1.14.5 NETCDF_C_VERSION: 4.9.2 NETCDF_FORTRAN_VERSION: 4.6.1 - NVFORTRAN_VERSION: 24.7 + NVFORTRAN_VERSION: 24.9 OPENMPI_VERSION: 5.0.5 - PYTHON_VERSION: 3.12.5 + PYTHON_VERSION: 3.13.0 jobs: run_if_on_mirror: diff --git a/.github/workflows/lfric_test.yml b/.github/workflows/lfric_test.yml index 8f6c796f31..8d336e2ead 100644 --- a/.github/workflows/lfric_test.yml +++ b/.github/workflows/lfric_test.yml @@ -47,7 +47,7 @@ jobs: runs-on: self-hosted env: LFRIC_APPS_REV: 3269 - PYTHON_VERSION: 3.12.5 + PYTHON_VERSION: 3.13.0 steps: - uses: actions/checkout@v3 @@ -75,6 +75,58 @@ jobs: pip install .[test] pip install jinja2 + # PSyclone, compile and run MetOffice gungho_model on GPU + - name: LFRic GungHo with OpenMP offload + run: | + # Set up environment + source /apps/spack/psyclone-spack/spack-repo/share/spack/setup-env.sh + spack load lfric-build-environment%nvhpc + source .runner_venv/bin/activate + export PSYCLONE_LFRIC_DIR=${GITHUB_WORKSPACE}/examples/lfric/scripts + export PSYCLONE_CONFIG_FILE=${PSYCLONE_LFRIC_DIR}/KGOs/lfric_psyclone.cfg + # The LFRic source must be patched to workaround bugs in the NVIDIA + # compiler's namelist handling. + rm -rf ${HOME}/LFRic/gpu_build + mkdir -p ${HOME}/LFRic/gpu_build + cp -r ${HOME}/LFRic/lfric_apps_${LFRIC_APPS_REV} ${HOME}/LFRic/gpu_build/lfric_apps + cp -r ${HOME}/LFRic/lfric_core_50869 ${HOME}/LFRic/gpu_build/lfric + cd ${HOME}/LFRic/gpu_build + patch -p1 < ${PSYCLONE_LFRIC_DIR}/KGOs/lfric_${LFRIC_APPS_REV}_nvidia.patch + # Update the compiler definitions to build for GPU + cp ${PSYCLONE_LFRIC_DIR}/KGOs/nvfortran_acc.mk lfric/infrastructure/build/fortran/nvfortran.mk + cp ${PSYCLONE_LFRIC_DIR}/KGOs/nvc++.mk lfric/infrastructure/build/cxx/. + # Update the PSyclone commands to ensure transformed kernels are written + # to working directory. + cp ${PSYCLONE_LFRIC_DIR}/KGOs/psyclone.mk lfric/infrastructure/build/psyclone/. + # Update dependencies.sh to point to our patched lfric core. + sed -i -e 's/export lfric_core_sources=.*$/export lfric_core_sources\=\/home\/gh_runner\/LFRic\/gpu_build\/lfric/' lfric_apps/dependencies.sh + export LFRIC_DIR=${HOME}/LFRic/gpu_build/lfric_apps + export OPT_DIR=${LFRIC_DIR}/applications/gungho_model/optimisation/psyclone-test + cd ${LFRIC_DIR} + # PSyclone scripts must now be under 'optimisation' and be called 'global.py' + mkdir -p ${OPT_DIR} + cp ${PSYCLONE_LFRIC_DIR}/gpu_offloading.py ${OPT_DIR}/global.py + # Clean previous version and compile again + rm -rf applications/gungho_model/working + LFRIC_OFFLOAD_DIRECTIVES=omp ./build/local_build.py -a gungho_model -p psyclone-test + cd applications/gungho_model/example + cp ${PSYCLONE_LFRIC_DIR}/KGOs/lfric_gungho_configuration_4its.nml configuration.nml + mpirun -n 1 ../bin/gungho_model configuration.nml |& tee output.txt + python ${PSYCLONE_LFRIC_DIR}/compare_ouput.py ${PSYCLONE_LFRIC_DIR}/KGOs/lfric_gungho_configuration_4its_checksums.txt gungho_model-checksums.txt + cat timer.txt + export VAR_TIME=$(grep "gungho_model" timer.txt | cut -d'|' -f5) + export VAR_HALOS=$(grep "gungho_model" halo_calls_counter.txt | cut -d'|' -f5) + echo $GITHUB_REF_NAME $GITHUB_SHA $VAR_TIME $VAR_HALOS >> ${HOME}/store_results/lfric_omp_performance_history + ${HOME}/mongosh-2.1.1-linux-x64/bin/mongosh \ + "mongodb+srv://cluster0.x8ncpxi.mongodb.net/PerformanceMonitoring" \ + --quiet --apiVersion 1 --username ${{ secrets.MONGODB_USERNAME }} \ + --password ${{ secrets.MONGODB_PASSWORD }} \ + --eval 'db.GitHub_CI.insertOne({branch_name: "'"$GITHUB_REF_NAME"'", commit: "'"$GITHUB_SHA"'", + github_job: "'"$GITHUB_RUN_ID"'"-"'"$GITHUB_RUN_ATTEMPT"'", + ci_test: "LFRic OpenMP offloading", lfric_apps_version: '"$LFRIC_APPS_REV"', system: "GlaDos", + compiler:"spack-nvhpc-24.5", date: new Date(), elapsed_time: '"$VAR_TIME"', + num_of_halo_exchanges: '"$VAR_HALOS"'})' + # PSyclone, compile and run MetOffice gungho_model on GPU - name: LFRic GungHo with OpenACC offload run: | @@ -105,10 +157,10 @@ jobs: cd ${LFRIC_DIR} # PSyclone scripts must now be under 'optimisation' and be called 'global.py' mkdir -p ${OPT_DIR} - cp ${PSYCLONE_LFRIC_DIR}/acc_parallel.py ${OPT_DIR}/global.py + cp ${PSYCLONE_LFRIC_DIR}/gpu_offloading.py ${OPT_DIR}/global.py # Clean previous version and compile again rm -rf applications/gungho_model/working - ./build/local_build.py -a gungho_model -p psyclone-test + LFRIC_OFFLOAD_DIRECTIVES=acc ./build/local_build.py -a gungho_model -p psyclone-test cd applications/gungho_model/example cp ${PSYCLONE_LFRIC_DIR}/KGOs/lfric_gungho_configuration_4its.nml configuration.nml mpirun -n 1 ../bin/gungho_model configuration.nml |& tee output.txt diff --git a/.github/workflows/nemo_tests.yml b/.github/workflows/nemo_tests.yml index bbcd2bf468..7dcf064239 100644 --- a/.github/workflows/nemo_tests.yml +++ b/.github/workflows/nemo_tests.yml @@ -46,13 +46,13 @@ jobs: if: ${{ github.repository == 'stfc/PSyclone-mirror' }} runs-on: self-hosted env: - HDF5_VERSION: 1.14.4.3 + HDF5_VERSION: 1.14.5 NETCDF_C_VERSION: 4.9.2 NETCDF_FORTRAN_VERSION: 4.6.1 NVFORTRAN_VERSION: 23.7 ONEAPI_VERSION: 2024.2.1 PERL_VERSION: 5.40.0 - PYTHON_VERSION: 3.12.5 + PYTHON_VERSION: 3.13.0 steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index eec02a9151..f4e85492b9 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -62,7 +62,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: '3.12' + python-version: '3.13' - run: sudo apt-get install -y graphviz doxygen - run: python -m pip install --upgrade pip - run: pip install .[doc] @@ -83,7 +83,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: '3.12' + python-version: '3.13' - run: python -m pip install --upgrade pip - run: pip install .[doc] # Sphinx since version 7.2 (7.2.0/1/2) aborts with @@ -99,7 +99,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.7, 3.8, 3.12] + python-version: [3.7, 3.8, 3.13] steps: - uses: actions/checkout@v4 with: diff --git a/changelog b/changelog index c58fc4be65..886221f860 100644 --- a/changelog +++ b/changelog @@ -234,6 +234,20 @@ 80) PR #2596 for #2592 and #2582. Routine nodes manage their own symbols from their parent scope. + 81) PR #2725 to close #717. Removes some TODOs and associated utility code + from the fparser2 frontend that is now unused. + + 82) PR #2733 for #2730. Adds OpenMP offloading support for LFRic plus + associated integration test. + + 83) PR #2743 for #2742. Ensure generated code for function spaces in + LFRic is always in a consistent order. + + 84) PR #2746. Update minor versions of CUDA, HDF5, nvfortran and + Python used in the CI. + + 85) PR #2748. Revert Python used on RTD to 3.12 as 3.13 unsupported. + release 2.5.0 14th of February 2024 1) PR #2199 for #2189. Fix bugs with missing maps in enter data diff --git a/doc/developer_guide/working_practises.rst b/doc/developer_guide/working_practises.rst index 12403d2fc0..a7d1b78134 100644 --- a/doc/developer_guide/working_practises.rst +++ b/doc/developer_guide/working_practises.rst @@ -448,13 +448,13 @@ computational cost (so that we 'fail fast'): 3. All links within the Sphinx documentation (rst files) are checked (see note below); - 4. All of the examples are tested (for Python versions 3.7, 3.8 and 3.12) + 4. All of the examples are tested (for Python versions 3.7, 3.8 and 3.13) using the ``Makefile`` in the ``examples`` directory. No compilation is performed; only the ``transform`` (performs the PSyclone transformations) and ``notebook`` (runs the various Jupyter notebooks) targets are used. The ``transform`` target is run 2-way parallel (``-j 2``). - 5. The full test suite is run for Python versions 3.7, 3.8 and 3.12 but + 5. The full test suite is run for Python versions 3.7, 3.8 and 3.13 but without the compilation checks. ``pytest`` is passed the ``-n auto`` flag so that it will run the tests in parallel on as many cores as are available (currently 2 on GHA instances). diff --git a/doc/user_guide/getting_going.rst b/doc/user_guide/getting_going.rst index 2729f989b6..5e8e37ffa2 100644 --- a/doc/user_guide/getting_going.rst +++ b/doc/user_guide/getting_going.rst @@ -212,7 +212,7 @@ Dependencies ------------ PSyclone is written in Python so needs Python 3 to be installed on the -target machine. PSyclone is regularly tested with Python 3.7, 3.8 and 3.12 +target machine. PSyclone is regularly tested with Python 3.7, 3.8 and 3.13 but should work with any version >= 3.6. (The last PSyclone release to support Python 2.7 was version 2.1.0.) diff --git a/examples/gocean/eg5/extract/Makefile b/examples/gocean/eg5/extract/Makefile index fe6a5d5976..8685dc3290 100644 --- a/examples/gocean/eg5/extract/Makefile +++ b/examples/gocean/eg5/extract/Makefile @@ -122,13 +122,13 @@ $(NAME): $(INF_LIB) $(EXTRACT_DIR)/$(LIB_NAME) $(KERNELS) alg.o psy.o #TODO #1757: $(INF_LIB) is required because of the meta-data in the # kernel - once this is fixed, $(INF_LIB) can be removed. $(DRIVER_INIT).$(TYPE): $(KERNELS) $(DRIVER_INIT).o - $(F90) $(KERNELS) $(DRIVER_INIT).o -o $(DRIVER_INIT).$(TYPE) \ + $(F90) $(F90FLAGS) $(KERNELS) $(DRIVER_INIT).o -o $(DRIVER_INIT).$(TYPE) \ $(INF_LIB) $(EXTRACT_DIR)/$(LIB_NAME) $(LDFLAGS) #TODO #1757: $(INF_LIB) is required because of the meta-data in the # kernel - once this is fixed, $(INF_LIB) can be removed. $(DRIVER_UPDATE).$(TYPE): $(KERNELS) $(DRIVER_UPDATE).o - $(F90) $(KERNELS) $(DRIVER_UPDATE).o -o $(DRIVER_UPDATE).$(TYPE) \ + $(F90) $(F90FLAGS) $(KERNELS) $(DRIVER_UPDATE).o -o $(DRIVER_UPDATE).$(TYPE) \ $(INF_LIB) $(EXTRACT_DIR)/$(LIB_NAME) $(LDFLAGS) # The dl_esm_inf library diff --git a/examples/lfric/scripts/KGOs/lfric_3269_nvidia.patch b/examples/lfric/scripts/KGOs/lfric_3269_nvidia.patch index d3e178318a..db37ced2e3 100644 --- a/examples/lfric/scripts/KGOs/lfric_3269_nvidia.patch +++ b/examples/lfric/scripts/KGOs/lfric_3269_nvidia.patch @@ -57,65 +57,6 @@ index 19c9cff9..b5cd3014 100644 $(call MESSAGE,Compiled,$<) -diff --git a/lfric/infrastructure/build/cxx/nvc++.mk b/lfric/infrastructure/build/cxx/nvc++.mk -new file mode 100644 -index 00000000..13b17a10 ---- /dev/null -+++ b/lfric/infrastructure/build/cxx/nvc++.mk -@@ -0,0 +1,9 @@ -+############################################################################## -+# (c) Crown copyright 2017 Met Office. All rights reserved. -+# The file LICENCE, distributed with this code, contains details of the terms -+# under which the code may be used. -+############################################################################## -+ -+$(info ** Chosen NVC++ compiler) -+ -+CXX_RUNTIME_LIBRARY=stdc++ -diff --git a/lfric/infrastructure/build/fortran/nvfortran.mk b/lfric/infrastructure/build/fortran/nvfortran.mk -new file mode 100644 -index 00000000..cfed52c1 ---- /dev/null -+++ b/lfric/infrastructure/build/fortran/nvfortran.mk -@@ -0,0 +1,38 @@ -+############################################################################## -+# Copyright (c) 2017, Met Office, on behalf of HMSO and Queen's Printer -+# For further details please refer to the file LICENCE.original which you -+# should have received as part of this distribution. -+############################################################################## -+# Various things specific to the Portland Fortran compiler. -+############################################################################## -+# -+# This macro is evaluated now (:= syntax) so it may be used as many times as -+# desired without wasting time rerunning it. -+# -+F_MOD_DESTINATION_ARG = -module$(SPACE) -+OPENMP_ARG = -mp -+ -+FFLAGS_COMPILER = -+FFLAGS_NO_OPTIMISATION = -O0 -+FFLAGS_SAFE_OPTIMISATION = -O2 -+FFLAGS_RISKY_OPTIMISATION = -O4 -+FFLAGS_DEBUG = -g -traceback -+FFLAGS_RUNTIME = -Mchkptr -Mchkstk -+# Option for checking code meets Fortran standard (not available for PGI) -+FFLAGS_FORTRAN_STANDARD = -+ -+LDFLAGS_COMPILER = -g -+ -+FPP = cpp -traditional-cpp -+FPPFLAGS = -P -+FC = mpif90 -+ -+# FS#34981 (nvbug 4648082) -+science/src/um/src/atmosphere/large_scale_precipitation/ls_ppnc.o: private FFLAGS_RUNTIME = -Mchkstk -+ -+# FS#35751 -+mesh/create_mesh_mod.o: private FFLAGS_RUNTIME = -Mchkstk -+ -+# 24.3 -+science/src/socrates/src/cosp_github/subsample_and_optics_example/optics/quickbeam_optics/optics_lib.o: private FFLAGS_SAFE_OPTIMISATION = -O1 -+science/src/socrates/src/cosp_github/subsample_and_optics_example/optics/quickbeam_optics/optics_lib.o: private FFLAGS_RISKY_OPTIMISATION = -O1 diff --git a/lfric/infrastructure/build/tools/DependencyRules b/lfric/infrastructure/build/tools/DependencyRules index 9d4db390..e37384fc 100755 --- a/lfric/infrastructure/build/tools/DependencyRules diff --git a/examples/lfric/scripts/KGOs/nvfortran_acc.mk b/examples/lfric/scripts/KGOs/nvfortran_acc.mk index 79df074ed8..34ead62500 100644 --- a/examples/lfric/scripts/KGOs/nvfortran_acc.mk +++ b/examples/lfric/scripts/KGOs/nvfortran_acc.mk @@ -20,9 +20,20 @@ FFLAGS_DEBUG = -g -traceback FFLAGS_RUNTIME = -Mchkptr -Mchkstk # Option for checking code meets Fortran standard (not available for PGI) FFLAGS_FORTRAN_STANDARD = -OPENMP_ARG = -acc=gpu -gpu=managed -mp=multicore -LDFLAGS_COMPILER = -g -acc=gpu -gpu=managed -mp=multicore -cuda +# Flags for OpenMP threading / OpenMP offloading / OpenACC Offloading +# The LFRIC_OFFLOAD_DIRECTIVES env_variable is also queried in the PSyclone +# script to generate matching directives +ifeq ("$(LFRIC_OFFLOAD_DIRECTIVES)", "omp") + OPENMP_ARG = -mp=gpu -gpu=managed + LDFLAGS_COMPILER = -mp=gpu -gpu=managed -cuda +else ifeq ("$(LFRIC_OFFLOAD_DIRECTIVES)", "acc") + OPENMP_ARG = -acc=gpu -gpu=managed -mp=multicore + LDFLAGS_COMPILER = -acc=gpu -gpu=managed -mp=multicore -cuda +else + OPENMP_ARG = -mp + LDFLAGS_COMPILER = -mp +endif FPP = cpp -traditional-cpp FPPFLAGS = -P diff --git a/examples/lfric/scripts/Makefile b/examples/lfric/scripts/Makefile index b2703e6172..e39c1f03c0 100644 --- a/examples/lfric/scripts/Makefile +++ b/examples/lfric/scripts/Makefile @@ -45,7 +45,7 @@ transform: ${SCRIPTS} .PHONY: ${SCRIPTS} ${SCRIPTS}: - ${PSYCLONE} -api lfric -s ./$@ ../eg3/solver_mod.x90 -oalg /dev/null -opsy /dev/null + LFRIC_OFFLOAD_DIRECTIVES=acc ${PSYCLONE} -api lfric -s ./$@ ../eg3/solver_mod.x90 -oalg /dev/null -opsy /dev/null compile: transform @echo "No compilation supported for lfric/scripts examples" diff --git a/examples/lfric/scripts/acc_parallel.py b/examples/lfric/scripts/acc_parallel.py deleted file mode 100644 index 198450e91c..0000000000 --- a/examples/lfric/scripts/acc_parallel.py +++ /dev/null @@ -1,150 +0,0 @@ -# ----------------------------------------------------------------------------- -# BSD 3-Clause License -# -# Copyright (c) 2018-2024, Science and Technology Facilities Council. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -# ----------------------------------------------------------------------------- -# Authors: A. R. Porter, STFC Daresbury Lab -# R. W. Ford, STFC Daresbury Lab -# L. Mosimann, NVIDIA. - -'''PSyclone transformation script for the lfric API to apply -colouring, OpenACC, OpenMP. Also adds redundant computation to the level-1 -halo for setval_* generically. - -''' -from psyclone.domain.lfric import LFRicConstants -from psyclone.psyir.nodes import ACCDirective, Loop -from psyclone.psyir.transformations import ( - ACCKernelsTrans, TransformationError) -from psyclone.transformations import ( - Dynamo0p3ColourTrans, Dynamo0p3OMPLoopTrans, - Dynamo0p3RedundantComputationTrans, OMPParallelTrans, - ACCParallelTrans, ACCLoopTrans, ACCRoutineTrans) - - -# Names of any routines that we won't add any OpenACC to. -ACC_EXCLUSIONS = [ -] - - -def trans(psy): - '''Applies PSyclone colouring and OpenACC transformations. Any kernels that - cannot be offloaded to GPU are parallelised using OpenMP on the CPU. Any - setval_* kernels are transformed so as to compute into the L1 halos. - - ''' - rtrans = Dynamo0p3RedundantComputationTrans() - ctrans = Dynamo0p3ColourTrans() - otrans = Dynamo0p3OMPLoopTrans() - const = LFRicConstants() - loop_trans = ACCLoopTrans() - ktrans = ACCKernelsTrans() - parallel_trans = ACCParallelTrans(default_present=False) - artrans = ACCRoutineTrans() - oregtrans = OMPParallelTrans() - - print(f"PSy name = '{psy.name}'") - - # Loop over all of the Invokes in the PSy object - for invoke in psy.invokes.invoke_list: - - print("Transforming invoke '{0}' ...".format(invoke.name)) - schedule = invoke.schedule - - # Make setval_* compute redundantly to the level 1 halo if it - # is in its own loop - for loop in schedule.loops(): - if loop.iteration_space == "dof": - if len(loop.kernels()) == 1: - if loop.kernels()[0].name in ["setval_c", "setval_x"]: - rtrans.apply(loop, options={"depth": 1}) - - if psy.name.lower() in ACC_EXCLUSIONS: - print(f"Not adding ACC to invoke in '{psy.name}'") - apply_acc = False - else: - apply_acc = True - - # Keep a record of any kernels we fail to module inline as we can't - # then add ACC ROUTINE to them. - failed_inline = set() - - # Colour loops over cells unless they are on discontinuous - # spaces or over dofs - for loop in schedule.loops(): - if loop.iteration_space == "cell_column": - if apply_acc: - for kern in loop.kernels(): - try: - artrans.apply(kern) - except TransformationError as err: - failed_inline.add(kern.name.lower()) - print(f"Adding ACC Routine to kernel '{kern.name}'" - f" failed:\n{err.value}") - if (loop.field_space.orig_name not in - const.VALID_DISCONTINUOUS_NAMES): - ctrans.apply(loop) - - # Add OpenACC to loops unless they are over colours or are null. - schedule = invoke.schedule - for loop in schedule.walk(Loop): - if not apply_acc or any(kern.name.lower() in failed_inline for - kern in loop.kernels()): - print(f"Not adding OpenACC for kernels: " - f"{[kern.name for kern in loop.kernels()]}") - continue - try: - if loop.loop_type == "colours": - pass - if loop.loop_type == "colour": - loop_trans.apply(loop, options={"independent": True}) - parallel_trans.apply(loop.ancestor(ACCDirective)) - if loop.loop_type == "": - loop_trans.apply(loop, options={"independent": True}) - parallel_trans.apply(loop.ancestor(ACCDirective)) - if loop.loop_type == "dof": - # We use ACC KERNELS for dof loops since they can contain - # reductions. - ktrans.apply(loop) - except TransformationError as err: - print(str(err)) - pass - - # Apply OpenMP thread parallelism for any kernels we've not been able - # to offload to GPU. - for loop in schedule.walk(Loop): - if not apply_acc or any(kern.name.lower() in failed_inline for - kern in loop.kernels()): - if loop.loop_type not in ["colours", "null"]: - oregtrans.apply(loop) - otrans.apply(loop, options={"reprod": True}) - - return psy diff --git a/examples/lfric/scripts/gpu_offloading.py b/examples/lfric/scripts/gpu_offloading.py new file mode 100644 index 0000000000..2167119fa1 --- /dev/null +++ b/examples/lfric/scripts/gpu_offloading.py @@ -0,0 +1,196 @@ +# ----------------------------------------------------------------------------- +# BSD 3-Clause License +# +# Copyright (c) 2018-2024, Science and Technology Facilities Council. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# ----------------------------------------------------------------------------- +# Authors: A. R. Porter, STFC Daresbury Lab +# R. W. Ford, STFC Daresbury Lab +# S. Siso, STFC Daresbury Lab +# L. Mosimann, NVIDIA. + +'''PSyclone transformation script for LFRic to apply colouring and GPU +offloading. Also adds redundant computation to the level-1 halo for +setval_* generically. + +''' +import os +import sys +from psyclone.domain.lfric import LFRicConstants +from psyclone.psyir.nodes import Directive, Loop +from psyclone.psyir.transformations import ( + ACCKernelsTrans, TransformationError, OMPTargetTrans) +from psyclone.transformations import ( + Dynamo0p3ColourTrans, Dynamo0p3OMPLoopTrans, + Dynamo0p3RedundantComputationTrans, OMPParallelTrans, + ACCParallelTrans, ACCLoopTrans, ACCRoutineTrans, + OMPDeclareTargetTrans, OMPLoopTrans) + + +# Names of any invoke that we won't add any GPU offloading +INVOKE_EXCLUSIONS = [ +] + +OFFLOAD_DIRECTIVES = os.getenv('LFRIC_OFFLOAD_DIRECTIVES', "none") + + +def trans(psy): + '''Applies PSyclone colouring and GPU offloading transformations. Any + kernels that cannot be offloaded to GPU are parallelised using OpenMP + on the CPU. Any setval_* kernels are transformed so as to compute + into the L1 halos. + + ''' + rtrans = Dynamo0p3RedundantComputationTrans() + ctrans = Dynamo0p3ColourTrans() + otrans = Dynamo0p3OMPLoopTrans() + const = LFRicConstants() + cpu_parallel = OMPParallelTrans() + + if OFFLOAD_DIRECTIVES == "omp": + # Use OpenMP offloading + loop_offloading_trans = OMPLoopTrans( + omp_directive="teamsdistributeparalleldo", + omp_schedule="none" + ) + # OpenMP does not have a kernels parallelism directive equivalent + # to OpenACC 'kernels' + kernels_trans = None + gpu_region_trans = OMPTargetTrans() + gpu_annotation_trans = OMPDeclareTargetTrans() + elif OFFLOAD_DIRECTIVES == "acc": + # Use OpenACC offloading + loop_offloading_trans = ACCLoopTrans() + kernels_trans = ACCKernelsTrans() + gpu_region_trans = ACCParallelTrans(default_present=False) + gpu_annotation_trans = ACCRoutineTrans() + else: + print(f"The PSyclone transformation script expects the " + f"LFRIC_OFFLOAD_DIRECTIVES to be set to 'omp' or 'acc' " + f"but found '{OFFLOAD_DIRECTIVES}'.") + sys.exit(-1) + + print(f"PSy name = '{psy.name}'") + + # Loop over all of the Invokes in the PSy object + for invoke in psy.invokes.invoke_list: + + print("Transforming invoke '{0}' ...".format(invoke.name)) + schedule = invoke.schedule + + # Make setval_* compute redundantly to the level 1 halo if it + # is in its own loop + for loop in schedule.loops(): + if loop.iteration_space == "dof": + if len(loop.kernels()) == 1: + if loop.kernels()[0].name in ["setval_c", "setval_x"]: + rtrans.apply(loop, options={"depth": 1}) + + if psy.name.lower() in INVOKE_EXCLUSIONS: + print(f"Not adding GPU offloading to invoke '{psy.name}'") + offload = False + else: + offload = True + + # Keep a record of any kernels we fail to offload + failed_to_offload = set() + + # Colour loops over cells unless they are on discontinuous spaces + # (alternatively we could annotate the kernels with atomics) + for loop in schedule.loops(): + if loop.iteration_space == "cell_column": + if (loop.field_space.orig_name not in + const.VALID_DISCONTINUOUS_NAMES): + ctrans.apply(loop) + + # Mark Kernels inside the loops over cells as GPU-enabled + # (alternatively we could inline them) + for loop in schedule.loops(): + if loop.iteration_space == "cell_column": + if offload: + for kern in loop.kernels(): + try: + gpu_annotation_trans.apply(kern) + except TransformationError as err: + failed_to_offload.add(kern.name.lower()) + print(f"Failed to annotate '{kern.name}' with " + f"GPU-enabled directive due to:\n" + f"{err.value}") + # For annotated or inlined kernels we could attempt to + # provide compile-time dimensions for the temporary + # arrays and convert to code unsupported intrinsics. + + # Add GPU offloading to loops unless they are over colours or are null. + schedule = invoke.schedule + for loop in schedule.walk(Loop): + kernel_names = [k.name.lower() for k in loop.kernels()] + if offload and all(name not in failed_to_offload for name in + kernel_names): + try: + if loop.loop_type == "colours": + pass + if loop.loop_type == "colour": + loop_offloading_trans.apply( + loop, options={"independent": True}) + gpu_region_trans.apply(loop.ancestor(Directive)) + if loop.loop_type == "": + loop_offloading_trans.apply( + loop, options={"independent": True}) + gpu_region_trans.apply(loop.ancestor(Directive)) + if loop.loop_type == "dof": + # Loops over dofs can contains reductions + if kernels_trans: + # If kernel offloading is available it should + # manage them + kernels_trans.apply(loop) + else: + # Otherwise, if the reductions exists, they will + # be detected by the dependencyAnalysis and raise + # a TransformationError captured below + loop_offloading_trans.apply( + loop, options={"independent": True}) + gpu_region_trans.apply(loop.ancestor(Directive)) + # Alternatively we could use loop parallelism with + # reduction clauses + print(f"Successfully offloaded loop with {kernel_names}") + except TransformationError as err: + print(f"Failed to offload loop with {kernel_names} " + f"because: {err}") + + # Apply OpenMP thread parallelism for any kernels we've not been able + # to offload to GPU. + for loop in schedule.walk(Loop): + if not offload or any(kern.name.lower() in failed_to_offload for + kern in loop.kernels()): + if loop.loop_type not in ["colours", "null"]: + cpu_parallel.apply(loop) + otrans.apply(loop, options={"reprod": True}) + + return psy diff --git a/examples/nemo/eg5/Makefile b/examples/nemo/eg5/Makefile index 4f2b991761..29029b34aa 100644 --- a/examples/nemo/eg5/Makefile +++ b/examples/nemo/eg5/Makefile @@ -85,7 +85,7 @@ run: compile IT=2 JPI=10 JPJ=10 JPK=5 ./traadv-$(TYPE).exe traadv-$(TYPE).exe: psy.o $(EXTRACT_DIR)/$(LIB_NAME) - $(F90) psy.o -o traadv-$(TYPE).exe $(EXTRACT_DIR)/$(LIB_NAME) $(LDFLAGS) + $(F90) $(F90FLAGS) psy.o -o traadv-$(TYPE).exe $(EXTRACT_DIR)/$(LIB_NAME) $(LDFLAGS) transform: kernels diff --git a/examples/nemo/scripts/acc_kernels_trans.py b/examples/nemo/scripts/acc_kernels_trans.py index a08439130c..7ae43abdd3 100755 --- a/examples/nemo/scripts/acc_kernels_trans.py +++ b/examples/nemo/scripts/acc_kernels_trans.py @@ -58,7 +58,7 @@ import logging from utils import (add_profiling, enhance_tree_information, inline_calls, - NOT_PERFORMANT, NOT_WORKING) + NOT_PERFORMANT) from psyclone.errors import InternalError from psyclone.psyGen import TransInfo from psyclone.psyir.nodes import ( @@ -94,7 +94,7 @@ ACC_EXPLICIT_MEM_MANAGEMENT = False # List of all files that psyclone will skip processing -FILES_TO_SKIP = NOT_PERFORMANT + NOT_WORKING +FILES_TO_SKIP = NOT_PERFORMANT # Routines we do not attempt to add any OpenACC to (because it breaks with # the Nvidia compiler or because it just isn't worth it) @@ -109,18 +109,6 @@ "bdytide_init", "bdy_init", "bdy_segs", "sbc_cpl_init", "asm_inc_init", "dia_obs_init"] # Str handling, init routine -# Currently fparser has no way of distinguishing array accesses from -# function calls if the symbol is imported from some other module. -# We therefore work-around this by keeping a list of known NEMO -# functions that must be excluded from within KERNELS regions. -NEMO_FUNCTIONS = ["alpha_charn", "cd_neutral_10m", "cpl_freq", "cp_air", - "eos_pt_from_ct", "gamma_moist", "l_vap", - "sbc_dcy", "solfrac", "psi_h", "psi_m", "psi_m_coare", - "psi_h_coare", "psi_m_ecmwf", "psi_h_ecmwf", "q_sat", - "rho_air", "visc_air", "sbc_dcy", "glob_sum", - "glob_sum_full", "ptr_sj", "ptr_sjk", "interp1", "interp2", - "interp3", "integ_spline"] - class ExcludeSettings(): ''' @@ -260,17 +248,6 @@ def valid_acc_kernel(node): "other loops", enode) return False - # Finally, check that we haven't got any 'array accesses' that are in - # fact function calls. - refs = node.walk(ArrayReference) - for ref in refs: - # Check if this reference has the name of a known function and if that - # reference appears outside said known function. - if ref.name.lower() in NEMO_FUNCTIONS and \ - ref.name.lower() != routine_name.lower(): - log_msg(routine_name, - f"Loop contains function call: {ref.name}", ref) - return False return True diff --git a/examples/nemo/scripts/acc_loops_trans.py b/examples/nemo/scripts/acc_loops_trans.py index 6d293d4b63..41896486c9 100755 --- a/examples/nemo/scripts/acc_loops_trans.py +++ b/examples/nemo/scripts/acc_loops_trans.py @@ -39,7 +39,7 @@ from utils import ( insert_explicit_loop_parallelism, normalise_loops, add_profiling, - enhance_tree_information, NOT_PERFORMANT, NOT_WORKING) + enhance_tree_information, NOT_PERFORMANT) from psyclone.psyir.nodes import Routine from psyclone.transformations import ( ACCParallelTrans, ACCLoopTrans, ACCRoutineTrans) @@ -47,7 +47,7 @@ PROFILING_ENABLED = True # List of all files that psyclone will skip processing -FILES_TO_SKIP = NOT_PERFORMANT + NOT_WORKING +FILES_TO_SKIP = NOT_PERFORMANT def trans(psyir): @@ -81,15 +81,6 @@ def trans(psyir): print("Skipping", subroutine.name) continue - # This are functions with scalar bodies, we don't want to parallelise - # them, but we could: - # - Inline them - # - Annotate them with 'omp declare target' and allow to call from gpus - if subroutine.name in ("q_sat", "sbc_dcy", "gamma_moist", - "cd_neutral_10m", "psi_h", "psi_m"): - print("Skipping", subroutine.name) - continue - # OpenACC fails in the following routines with the Compiler error: # Could not find allocated-variable index for symbol - xxx # This all happen on characters arrays, e.g. cd_nat @@ -108,12 +99,12 @@ def trans(psyir): hoist_expressions=True ) - # In the lib_fortran file we annotate each routine of the SIGN_* - # interface with the OpenACC Routine Directive - if psyir.name == "lib_fortran.f90": - if subroutine.name.lower().startswith("sign_"): - ACCRoutineTrans().apply(subroutine) - continue + # These are functions that are called from inside parallel regions, + # annotate them with 'acc routine' + if subroutine.name.lower().startswith("sign_"): + ACCRoutineTrans().apply(subroutine) + print(f"Marked {subroutine.name} as GPU-enabled") + continue insert_explicit_loop_parallelism( subroutine, diff --git a/examples/nemo/scripts/omp_cpu_trans.py b/examples/nemo/scripts/omp_cpu_trans.py index 35449494b6..ceba2eadf6 100755 --- a/examples/nemo/scripts/omp_cpu_trans.py +++ b/examples/nemo/scripts/omp_cpu_trans.py @@ -39,14 +39,14 @@ from utils import ( insert_explicit_loop_parallelism, normalise_loops, add_profiling, - enhance_tree_information, NOT_PERFORMANT, NOT_WORKING) + enhance_tree_information, NOT_PERFORMANT) from psyclone.psyir.nodes import Routine from psyclone.transformations import OMPLoopTrans PROFILING_ENABLED = False # List of all files that psyclone will skip processing -FILES_TO_SKIP = NOT_PERFORMANT + NOT_WORKING + [ +FILES_TO_SKIP = NOT_PERFORMANT + [ "asminc.f90", "trosk.f90", "vremap.f90", diff --git a/examples/nemo/scripts/omp_gpu_trans.py b/examples/nemo/scripts/omp_gpu_trans.py index 916dc98688..88cbe99233 100755 --- a/examples/nemo/scripts/omp_gpu_trans.py +++ b/examples/nemo/scripts/omp_gpu_trans.py @@ -39,7 +39,7 @@ from utils import ( insert_explicit_loop_parallelism, normalise_loops, add_profiling, - enhance_tree_information, NOT_PERFORMANT, NOT_WORKING) + enhance_tree_information, NOT_PERFORMANT) from psyclone.psyGen import TransInfo from psyclone.psyir.nodes import ( Loop, Routine, Directive, Assignment, OMPAtomicDirective) @@ -49,7 +49,7 @@ PROFILING_ENABLED = True # List of all files that psyclone will skip processing -FILES_TO_SKIP = NOT_PERFORMANT + NOT_WORKING +FILES_TO_SKIP = NOT_PERFORMANT def trans(psyir): @@ -76,15 +76,6 @@ def trans(psyir): if PROFILING_ENABLED: add_profiling(subroutine.children) - # This are functions with scalar bodies, we don't want to parallelise - # them, but we could: - # - Inine them - # - Annotate them with 'omp declare target' and allow to call from gpus - if subroutine.name in ("q_sat", "sbc_dcy", "gamma_moist", - "cd_neutral_10m", "psi_h", "psi_m"): - print("Skipping", subroutine.name) - continue - print(f"Transforming subroutine: {subroutine.name}") enhance_tree_information(subroutine) @@ -98,14 +89,14 @@ def trans(psyir): hoist_expressions=True ) - # In the lib_fortran file we annotate each routine of the SIGN_* - # interface with the OpenMP Declare Target Directive - if psyir.name == "lib_fortran.f90": - if subroutine.name.lower().startswith("sign_"): - OMPDeclareTargetTrans().apply(subroutine) - # We continue parallelising inside the routine, but this could - # change if the parallelisation directive are not nestable, in - # which case we could add a 'continue' here + # Thes are functions that are called from inside parallel regions, + # annotate them with 'omp declare target' + if subroutine.name.lower().startswith("sign_"): + OMPDeclareTargetTrans().apply(subroutine) + print(f"Marked {subroutine.name} as GPU-enabled") + # We continue parallelising inside the routine, but this could + # change if the parallelisation directives added below are not + # nestable, in that case we could add a 'continue' here # For now this is a special case for stpctl.f90 because it forces # loops to parallelise without many safety checks diff --git a/examples/nemo/scripts/passthrough.py b/examples/nemo/scripts/passthrough.py index 13ad293442..71bdd11fec 100755 --- a/examples/nemo/scripts/passthrough.py +++ b/examples/nemo/scripts/passthrough.py @@ -37,10 +37,8 @@ ''' Process Nemo code with PSyclone but don't do any changes. This file is only needed to provide a FILES_TO_SKIP list. ''' -from utils import NOT_WORKING - # List of all files that psyclone will skip processing -FILES_TO_SKIP = NOT_WORKING +FILES_TO_SKIP = [] def trans(_): diff --git a/examples/nemo/scripts/utils.py b/examples/nemo/scripts/utils.py index 0a2a239b72..202c7ca59a 100755 --- a/examples/nemo/scripts/utils.py +++ b/examples/nemo/scripts/utils.py @@ -61,14 +61,6 @@ "tide_mod.f90", "zdfosm.f90", ] -# Files that we won't touch at all, either because PSyclone actually fails -# or because it produces incorrect Fortran. -NOT_WORKING = [ - # NEMOv4 bugs: - # TODO #717 - array accessed inside WHERE does not use array notation - "diurnal_bulk.f90", -] - # If routine names contain these substrings then we do not profile them PROFILING_IGNORE = ["_init", "_rst", "alloc", "agrif", "flo_dom", "macho", "mpp_", "nemo_gcm", @@ -78,6 +70,21 @@ "interp1", "interp2", "interp3", "integ_spline", "sbc_dcy", "sum", "sign_", "ddpdd"] +# Currently fparser has no way of distinguishing array accesses from +# function calls if the symbol is imported from some other module. +# We therefore work-around this by keeping a list of known NEMO functions. +NEMO_FUNCTIONS = ["alpha_charn", "cd_neutral_10m", "cpl_freq", "cp_air", + "eos_pt_from_ct", "gamma_moist", "l_vap", + "sbc_dcy", "solfrac", "psi_h", "psi_m", "psi_m_coare", + "psi_h_coare", "psi_m_ecmwf", "psi_h_ecmwf", "q_sat", + "rho_air", "visc_air", "sbc_dcy", "glob_sum", + "glob_sum_full", "ptr_sj", "ptr_sjk", "interp1", "interp2", + "interp3", "integ_spline"] + +# Currently fparser has no way of distinguishing array accesses from statement +# functions, the following subroutines contains known statement functions +CONTAINS_STMT_FUNCTIONS = ["sbc_dcy"] + VERBOSE = False @@ -139,15 +146,19 @@ def enhance_tree_information(schedule): ArrayType.Extent.ATTRIBUTE, ArrayType.Extent.ATTRIBUTE, ArrayType.Extent.ATTRIBUTE])) - elif reference.symbol.name == "sbc_dcy": - # The parser gets this wrong, it is a Call not an Array access - if not isinstance(reference.symbol, RoutineSymbol): - # We haven't already specialised this Symbol. - reference.symbol.specialise(RoutineSymbol) - call = Call.create(reference.symbol) - for child in reference.children: - call.addchild(child.detach()) - reference.replace_with(call) + elif reference.symbol.name in NEMO_FUNCTIONS: + if reference.symbol.is_import or reference.symbol.is_unresolved: + # The parser gets these wrong, they are Calls not ArrayRefs + if not isinstance(reference.symbol, RoutineSymbol): + # We need to specialise the generic Symbol to a Routine + reference.symbol.specialise(RoutineSymbol) + if not (isinstance(reference.parent, Call) and + reference.parent.routine is reference): + # We also need to replace the Reference node with a Call + call = Call.create(reference.symbol) + for child in reference.children[:]: + call.addchild(child.detach()) + reference.replace_with(call) def inline_calls(schedule): @@ -226,9 +237,10 @@ def normalise_loops( statements out of the loop nest. ''' - # TODO #1902: NEMO4 mpi_ini.f90 has a HoistLocalArraysTrans bug - if hoist_local_arrays and schedule.root.name != "mpp_ini.f90": - # Apply the HoistLocalArraysTrans when possible + if hoist_local_arrays and schedule.name not in CONTAINS_STMT_FUNCTIONS: + # Apply the HoistLocalArraysTrans when possible, it cannot be applied + # to files with statement functions because it will attempt to put the + # allocate above it, which is not valid Fortran. try: HoistLocalArraysTrans().apply(schedule) except TransformationError: diff --git a/psyclone.pdf b/psyclone.pdf index 05f320b09d..f857ff38a1 100644 Binary files a/psyclone.pdf and b/psyclone.pdf differ diff --git a/src/psyclone/domain/lfric/algorithm/lfric_alg.py b/src/psyclone/domain/lfric/algorithm/lfric_alg.py index 8eba62b28e..c2f53c338d 100644 --- a/src/psyclone/domain/lfric/algorithm/lfric_alg.py +++ b/src/psyclone/domain/lfric/algorithm/lfric_alg.py @@ -34,6 +34,7 @@ # Author: A. R. Porter, STFC Daresbury Laboratory. # Modified by: R. W. Ford, STFC Daresbury Laboratory. # L. Turner, Met Office +# T. Vockerodt, Met Office '''This module contains the LFRicAlg class which encapsulates tools for creating standalone LFRic algorithm-layer code. @@ -270,7 +271,7 @@ def _create_function_spaces(self, prog, fspaces): # Initialise the function spaces required by the kernel arguments. const = LFRicConstants() - for space in fspaces: + for space in sorted(fspaces): if space.lower() not in const.VALID_FUNCTION_SPACE_NAMES: raise InternalError( diff --git a/src/psyclone/f2pygen.py b/src/psyclone/f2pygen.py index 4db3407e44..ac01797ef9 100644 --- a/src/psyclone/f2pygen.py +++ b/src/psyclone/f2pygen.py @@ -138,7 +138,8 @@ class OMPDirective(Directive): ''' def __init__(self, root, line, position, dir_type): self._types = ["parallel do", "parallel", "do", "master", "single", - "taskloop", "taskwait", "declare"] + "taskloop", "taskwait", "declare", "target", "teams", + "teams distribute parallel do"] self._positions = ["begin", "end"] super(OMPDirective, self).__init__(root, line, position, dir_type) diff --git a/src/psyclone/psyir/frontend/fparser2.py b/src/psyclone/psyir/frontend/fparser2.py index cfae861001..88fc6cc512 100644 --- a/src/psyclone/psyir/frontend/fparser2.py +++ b/src/psyclone/psyir/frontend/fparser2.py @@ -412,179 +412,6 @@ def _find_or_create_psyclone_internal_cmp(node): "not have an ancestor container in which to add it.") -def _check_args(array, dim): - '''Utility routine used by the _check_bound_is_full_extent and - _check_array_range_literal functions to check common arguments. - - This routine is only in fparser2.py until #717 is complete as it - is used to check that array syntax in a where statement is for the - full extent of the dimension. Once #717 is complete this routine - can be removed. - - :param array: the node to check. - :type array: :py:class:`pysclone.psyir.node.array` - :param int dim: the dimension index to use. - - :raises TypeError: if the supplied arguments are of the wrong type. - :raises ValueError: if the value of the supplied dim argument is \ - less than 1 or greater than the number of dimensions in the \ - supplied array argument. - - ''' - if not isinstance(array, ArrayMixin): - raise TypeError( - f"method _check_args 'array' argument should be some sort of " - f"array access (i.e. a sub-class of ArrayMixin) but found " - f"'{type(array).__name__}'.") - - if not isinstance(dim, int): - raise TypeError( - f"method _check_args 'dim' argument should be an " - f"int type but found '{type(dim).__name__}'.") - if dim < 1: - raise ValueError( - f"method _check_args 'dim' argument should be at " - f"least 1 but found {dim}.") - if dim > len(array.children): - raise ValueError( - f"method _check_args 'dim' argument should be at most the number " - f"of dimensions of the array ({len(array.children)}) but found " - f"{dim}.") - - # The first element of the array (index 0) relates to the first - # dimension (dim 1), so we need to reduce dim by 1. - if not isinstance(array.indices[dim-1], Range): - raise TypeError( - f"method _check_args 'array' argument index '{dim-1}' should be a " - f"Range type but found '{type(array.indices[dim-1]).__name__}'.") - - -def _is_bound_full_extent(array, dim, intrinsic): - '''A Fortran array section with a missing lower bound implies the - access starts at the first element and a missing upper bound - implies the access ends at the last element e.g. a(:,:) - accesses all elements of array a and is equivalent to - a(lbound(a,1):ubound(a,1),lbound(a,2):ubound(a,2)). The PSyIR - does not support the shorthand notation, therefore the lbound - and ubound operators are used in the PSyIR. - - This utility function checks that shorthand lower or upper - bound Fortran code is captured as longhand lbound and/or - ubound functions as expected in the PSyIR. - - This routine is only in fparser2.py until #717 is complete as it - is used to check that array syntax in a where statement is for the - full extent of the dimension. Once #717 is complete this routine - can be moved into fparser2_test.py as it is used there in a - different context. - - :param array: the node to check. - :type array: :py:class:`pysclone.psyir.nodes.ArrayMixin` - :param int dim: the dimension index to use. - :param intrinsic: the intrinsic to check. - :type intrinsic: - :py:class:`psyclone.psyir.nodes.IntrinsicCall.Intrinsic.LBOUND` | - :py:class:`psyclone.psyir.nodes.IntrinsicCall.Intrinsic.UBOUND` - - :returns: True if the supplied array has the expected properties, - otherwise returns False. - :rtype: bool - - :raises TypeError: if the supplied arguments are of the wrong type. - - ''' - _check_args(array, dim) - - if intrinsic == IntrinsicCall.Intrinsic.LBOUND: - index = 0 - elif intrinsic == IntrinsicCall.Intrinsic.UBOUND: - index = 1 - else: - raise TypeError( - f"'intrinsic' argument expected to be LBOUND or UBOUND but " - f"found '{type(intrinsic).__name__}'.") - - # The first element of the array (index 0) relates to the first - # dimension (dim 1), so we need to reduce dim by 1. - bound = array.indices[dim-1].children[index] - - if not isinstance(bound, IntrinsicCall): - return False - - reference = bound.arguments[0] - literal = bound.arguments[1] - - if bound.intrinsic != intrinsic: - return False - - if (not isinstance(literal, Literal) or - literal.datatype.intrinsic != ScalarType.Intrinsic.INTEGER or - literal.value != str(dim)): - return False - - return isinstance(reference, Reference) and array.is_same_array(reference) - - -def _is_array_range_literal(array, dim, index, value): - '''Utility function to check that the supplied array has an integer - literal at dimension index "dim" and range index "index" with - value "value". - - The step part of the range node has an integer literal with - value 1 by default. - - This routine is only in fparser2.py until #717 is complete as it - is used to check that array syntax in a where statement is for the - full extent of the dimension. Once #717 is complete this routine - can be moved into fparser2_test.py as it is used there in a - different context. - - :param array: the node to check. - :type array: :py:class:`pysclone.psyir.node.ArrayReference` - :param int dim: the dimension index to check. - :param int index: the index of the range to check (0 is the \ - lower bound, 1 is the upper bound and 2 is the step). - :param int value: the expected value of the literal. - - :raises NotImplementedError: if the supplied argument does not \ - have the required properties. - - :returns: True if the supplied array has the expected properties, \ - otherwise returns False. - :rtype: bool - - :raises TypeError: if the supplied arguments are of the wrong type. - :raises ValueError: if the index argument has an incorrect value. - - ''' - _check_args(array, dim) - - if not isinstance(index, int): - raise TypeError( - f"method _check_array_range_literal 'index' argument should be an " - f"int type but found '{type(index).__name__}'.") - - if index < 0 or index > 2: - raise ValueError( - f"method _check_array_range_literal 'index' argument should be " - f"0, 1 or 2 but found {index}.") - - if not isinstance(value, int): - raise TypeError( - f"method _check_array_range_literal 'value' argument should be an " - f"int type but found '{type(value).__name__}'.") - - # The first child of the array (index 0) relates to the first - # dimension (dim 1), so we need to reduce dim by 1. - literal = array.children[dim-1].children[index] - - if (isinstance(literal, Literal) and - literal.datatype.intrinsic == ScalarType.Intrinsic.INTEGER and - literal.value == str(value)): - return True - return False - - def _copy_full_base_reference(node): ''' Given the supplied node, creates a new node with the same access diff --git a/src/psyclone/psyir/nodes/omp_directives.py b/src/psyclone/psyir/nodes/omp_directives.py index 00318273ca..8001ae1775 100644 --- a/src/psyclone/psyir/nodes/omp_directives.py +++ b/src/psyclone/psyir/nodes/omp_directives.py @@ -1442,7 +1442,10 @@ def gen_code(self, parent): for call in reprod_red_call_list: call.reduction_sum_loop(parent) - self.gen_post_region_code(parent) + # If there are nested OMPRegions, the post region code should be after + # the top-level one + if not self.ancestor(OMPRegionDirective): + self.gen_post_region_code(parent) def lower_to_language_level(self): ''' @@ -2302,7 +2305,7 @@ def gen_code(self, parent): # Add directive to the f2pygen tree parent.add( DirectiveGen( - parent, "omp", "begin", "parallel do", ", ".join( + parent, "omp", "begin", self._directive_string, ", ".join( text for text in [default_str, private_str, fprivate_str, schedule_str, self._reduction_string()] if text))) @@ -2312,10 +2315,23 @@ def gen_code(self, parent): # make sure the directive occurs straight after the loop body position = parent.previous_loop() - parent.add(DirectiveGen(parent, *self.end_string().split()), + + # DirectiveGen only accepts 3 terms, e.g. "omp end loop", so for longer + # directive e.g. "omp end teams distribute parallel do", we split them + # between arguments and content (which is an additional string appended + # at the end) + terms = self.end_string().split() + # If its < 3 the array slices still work as expected + arguments = terms[:3] + content = " ".join(terms[3:]) + + parent.add(DirectiveGen(parent, *arguments, content=content), position=["after", position]) - self.gen_post_region_code(parent) + # If there are nested OMPRegions, the post region code should be after + # the top-level one + if not self.ancestor(OMPRegionDirective): + self.gen_post_region_code(parent) def lower_to_language_level(self): ''' @@ -2402,6 +2418,31 @@ def end_string(self): ''' return "omp end target" + def gen_code(self, parent): + '''Generate the OpenMP Target Directive and any associated code. + + :param parent: the parent Node in the Schedule to which to add our + content. + :type parent: sub-class of :py:class:`psyclone.f2pygen.BaseGen` + ''' + # Check the constraints are correct + self.validate_global_constraints() + + # Generate the code for this Directive + parent.add(DirectiveGen(parent, "omp", "begin", "target")) + + # Generate the code for all of this node's children + for child in self.dir_body: + child.gen_code(parent) + + # Generate the end code for this node + parent.add(DirectiveGen(parent, "omp", "end", "target", "")) + + # If there are nested OMPRegions, the post region code should be after + # the top-level one + if not self.ancestor(OMPRegionDirective): + self.gen_post_region_code(parent) + class OMPLoopDirective(OMPRegionDirective): ''' Class for the !$OMP LOOP directive that specifies that the iterations diff --git a/src/psyclone/psyir/transformations/intrinsics/dotproduct2code_trans.py b/src/psyclone/psyir/transformations/intrinsics/dotproduct2code_trans.py index e1d2103057..193e4fd575 100644 --- a/src/psyclone/psyir/transformations/intrinsics/dotproduct2code_trans.py +++ b/src/psyclone/psyir/transformations/intrinsics/dotproduct2code_trans.py @@ -69,12 +69,6 @@ def _get_array_bound(vector1, vector2): function makes use of these constraint, e.g. it always returns 1 for the stride. - Issue #717 requires similar functionality to this - function. However, to use this function safely in other situations - we would need to move the tests in validate into this function - first and then potentially add this function to the ArrayMixin - class, or a separate utils module. - :param array: the reference that we are interested in. :type array: :py:class:`psyir.nodes.Reference` :param int index: the (array) reference index that we are \ diff --git a/src/psyclone/tests/domain/lfric/algorithm/lfric_alg_test.py b/src/psyclone/tests/domain/lfric/algorithm/lfric_alg_test.py index b37cbb6fef..727b3ed274 100644 --- a/src/psyclone/tests/domain/lfric/algorithm/lfric_alg_test.py +++ b/src/psyclone/tests/domain/lfric/algorithm/lfric_alg_test.py @@ -34,6 +34,7 @@ # Author: A. R. Porter, STFC Daresbury Lab # Modified by: R. W. Ford, STFC Daresbury Lab # L. Turner, Met Office +# T. Vockerodt, Met Office ''' pytest tests for the LFRic-specific algorithm-generation functionality. ''' @@ -150,7 +151,9 @@ def test_create_function_spaces_invalid_space(prog): def test_create_function_spaces(prog, fortran_writer): ''' Check that a Routine is populated correctly when valid function-space names are supplied. ''' - LFRicAlg()._create_function_spaces(prog, ["w3", "w1"]) + # Using a set randomises ordering of fspaces, but function should + # produce consistent ordering in the algorithm. + LFRicAlg()._create_function_spaces(prog, set(["w3", "w1"])) fe_config_mod = prog.symbol_table.lookup("finite_element_config_mod") element_order = prog.symbol_table.lookup("element_order") assert element_order.interface.container_symbol == fe_config_mod @@ -159,10 +162,15 @@ def test_create_function_spaces(prog, fortran_writer): for space in ["w1", "w3"]: sym = prog.symbol_table.lookup(space) assert sym.interface.container_symbol is fs_mod_sym - assert (f"TYPE(function_space_type), POINTER :: " - f"vector_space_{space}_ptr" in gen) - assert (f"vector_space_{space}_ptr => function_space_collection%" - f"get_fs(mesh,element_order,{space})" in gen) + # Checking function space ordering is consistent. + assert ("TYPE(function_space_type), POINTER :: " + "vector_space_w1_ptr\n " + "TYPE(function_space_type), POINTER :: " + "vector_space_w3_ptr" in gen) + assert ("vector_space_w1_ptr => function_space_collection%" + "get_fs(mesh,element_order,w1)\n " + "vector_space_w3_ptr => function_space_collection%" + "get_fs(mesh,element_order,w3)" in gen) def test_initialise_field(prog, fortran_writer): diff --git a/src/psyclone/tests/psyad/main_test.py b/src/psyclone/tests/psyad/main_test.py index cafd1ea466..8b93b49082 100644 --- a/src/psyclone/tests/psyad/main_test.py +++ b/src/psyclone/tests/psyad/main_test.py @@ -121,8 +121,9 @@ def test_main_h_option(capsys): assert str(info.value) == "0" output, error = capsys.readouterr() assert error == "" - # The name of the executable is replaced with either pytest or -c - # when using pytest, therefore we split this test into sections. + # Python usage messages have seen slight tweaks over the years, e.g., + # Python >= 3.13 tweaks the usage message to avoid repeating the args + # to an option between aliases, therefore we split this test into sections. assert "usage: " in output expected2 = ( "[-h] [-oad OAD] [-v] [-t] [-api API] [-coord-arg COORD_ARG] " @@ -133,9 +134,12 @@ def test_main_h_option(capsys): "positional arguments:\n" " filename tangent-linear kernel source\n\n") assert expected2 in output + assert (" -h, --help show this help message and exit\n" + in output) + assert (" -a ACTIVE [ACTIVE ...], --active ACTIVE [ACTIVE ...]\n" + in output or + " -a, --active ACTIVE [ACTIVE ...]\n" in output) expected3 = ( - " -h, --help show this help message and exit\n" - " -a ACTIVE [ACTIVE ...], --active ACTIVE [ACTIVE ...]\n" " names of active variables\n" " -v, --verbose increase the verbosity of the output\n" " -t, --gen-test generate a standalone unit test for the " @@ -156,9 +160,6 @@ def test_main_h_option(capsys): " -otest TEST_FILENAME filename for the unit test (implies -t)\n" " -oad OAD filename for the transformed code\n") assert expected3 in output - assert ("-otest TEST_FILENAME filename for the unit test (implies -t)" - in output) - assert "-oad OAD filename for the transformed code" in output # no args diff --git a/src/psyclone/tests/psyir/frontend/fparser2_test.py b/src/psyclone/tests/psyir/frontend/fparser2_test.py index 2c7709199b..3135b37a58 100644 --- a/src/psyclone/tests/psyir/frontend/fparser2_test.py +++ b/src/psyclone/tests/psyir/frontend/fparser2_test.py @@ -51,13 +51,11 @@ from psyclone.errors import InternalError, GenerationError from psyclone.psyir.frontend.fparser2 import ( - Fparser2Reader, _is_array_range_literal, _is_bound_full_extent, - _check_args, default_precision, - default_integer_type, default_real_type, _first_type_match, - _get_arg_names) + Fparser2Reader, default_precision, default_integer_type, + default_real_type, _first_type_match, _get_arg_names) from psyclone.psyir.nodes import ( Schedule, CodeBlock, Assignment, Return, UnaryOperation, BinaryOperation, - IfBlock, Reference, ArrayReference, Literal, Range, KernelSchedule, + IfBlock, Reference, ArrayReference, Literal, KernelSchedule, RegionDirective, Routine, StandaloneDirective, Call, IntrinsicCall) from psyclone.psyir.symbols import ( @@ -103,192 +101,6 @@ def test_first_type_match(): _first_type_match(["a", "b"], int) -def test_check_args(): - ''' Test the _check_args function. ''' - - with pytest.raises(TypeError) as excinfo: - _check_args(None, None) - assert ("'array' argument should be some sort of array access (i.e. a " - "sub-class of ArrayMixin) but found 'NoneType'." in - str(excinfo.value)) - - one = Literal("1", INTEGER_TYPE) - array_type = ArrayType(REAL_TYPE, [20]) - symbol = DataSymbol('a', array_type) - array_reference = ArrayReference.create(symbol, [one]) - - with pytest.raises(TypeError) as excinfo: - _check_args(array_reference, None) - assert ("'dim' argument should be an int type but found 'NoneType'." - in str(excinfo.value)) - - with pytest.raises(ValueError) as excinfo: - _check_args(array_reference, 0) - assert ("'dim' argument should be at least 1 but found 0." - in str(excinfo.value)) - - with pytest.raises(ValueError) as excinfo: - _check_args(array_reference, 2) - assert ("'dim' argument should be at most the number of dimensions of " - "the array (1) but found 2." in str(excinfo.value)) - - with pytest.raises(TypeError) as excinfo: - _check_args(array_reference, 1) - assert ("'array' argument index '0' should be a Range type but " - "found 'Literal'." in str(excinfo.value)) - - -def test_is_bound_full_extent(): - ''' Test the _is_bound_full_extent function.''' - - # Check that _is_bound_full_extent calls the check_args function. - with pytest.raises(TypeError) as excinfo: - _is_bound_full_extent(None, None, None) - assert ("'array' argument should be some sort of array access (i.e. " - "a sub-class of ArrayMixin) but found 'NoneType'." in - str(excinfo.value)) - - one = Literal("1", INTEGER_TYPE) - array_type = ArrayType(REAL_TYPE, [20]) - symbol = DataSymbol('a', array_type) - my_range = Range.create(one.copy(), one.copy()) - array_reference = ArrayReference.create(symbol, [my_range]) - - with pytest.raises(TypeError) as excinfo: - _is_bound_full_extent(array_reference, 1, None) - assert ("'intrinsic' argument expected to be LBOUND or UBOUND but found " - "'NoneType'" in str(excinfo.value)) - - # Expecting BinaryOperation but found Literal - assert not _is_bound_full_extent(array_reference, 1, - IntrinsicCall.Intrinsic.UBOUND) - - operator = IntrinsicCall.create( - IntrinsicCall.Intrinsic.UBOUND, - [one.copy(), ("dim", one.copy())]) - my_range = Range.create(operator, one.copy()) - array_reference = ArrayReference.create(symbol, [my_range]) - - # Expecting intrinsic to be LBOUND, but found UBOUND - assert not _is_bound_full_extent(array_reference, 1, - IntrinsicCall.Intrinsic.LBOUND) - - operator = IntrinsicCall.create( - IntrinsicCall.Intrinsic.LBOUND, - [one.copy(), ("dim", one.copy())]) - my_range = Range.create(operator, one.copy()) - array_reference = ArrayReference.create(symbol, [my_range]) - - # Expecting Reference but found Literal - assert not _is_bound_full_extent(array_reference, 1, - IntrinsicCall.Intrinsic.LBOUND) - - operator = IntrinsicCall.create( - IntrinsicCall.Intrinsic.LBOUND, - [Reference(DataSymbol("x", INTEGER_TYPE)), ("dim", one.copy())]) - my_range = Range.create(operator, one.copy()) - array_reference = ArrayReference.create(symbol, [my_range]) - - # Expecting Reference symbol x to be the same as array symbol a - assert not _is_bound_full_extent(array_reference, 1, - IntrinsicCall.Intrinsic.LBOUND) - - operator = IntrinsicCall.create( - IntrinsicCall.Intrinsic.LBOUND, - [Reference(symbol), ("dim", Literal("1.0", REAL_TYPE))]) - my_range = Range.create(operator, one.copy()) - array_reference = ArrayReference.create(symbol, [my_range]) - - # Expecting integer but found real - assert not _is_bound_full_extent(array_reference, 1, - IntrinsicCall.Intrinsic.LBOUND) - - operator = IntrinsicCall.create( - IntrinsicCall.Intrinsic.LBOUND, - [Reference(symbol), ("dim", Literal("2", INTEGER_TYPE))]) - my_range = Range.create(operator, one.copy()) - array_reference = ArrayReference.create(symbol, [my_range]) - - # Expecting literal value 2 to be the same as the current array - # dimension 1 - assert not _is_bound_full_extent(array_reference, 1, - IntrinsicCall.Intrinsic.LBOUND) - - operator = IntrinsicCall.create( - IntrinsicCall.Intrinsic.LBOUND, - [Reference(symbol), ("dim", Literal("1", INTEGER_TYPE))]) - my_range = Range.create(operator, one.copy()) - array_reference = ArrayReference.create(symbol, [my_range]) - - # valid - assert _is_bound_full_extent(array_reference, 1, - IntrinsicCall.Intrinsic.LBOUND) - - -def test_is_array_range_literal(): - ''' Test the _is_array_range_literal function.''' - - # Check that _is_array_range_literal calls the _check_args function. - with pytest.raises(TypeError) as excinfo: - _is_array_range_literal(None, None, None, None) - assert ("'array' argument should be some sort of array access (i.e. a " - "sub-class of ArrayMixin) but found 'NoneType'." in - str(excinfo.value)) - - one = Literal("1", INTEGER_TYPE) - array_type = ArrayType(REAL_TYPE, [20]) - symbol = DataSymbol('a', array_type) - operator = IntrinsicCall.create( - IntrinsicCall.Intrinsic.LBOUND, - [Reference(symbol), ("dim", Literal("1", INTEGER_TYPE))]) - my_range = Range.create(operator, one) - array_reference = ArrayReference.create(symbol, [my_range]) - - with pytest.raises(TypeError) as excinfo: - _is_array_range_literal(array_reference, 1, None, None) - assert ("'index' argument should be an int type but found 'NoneType'." - in str(excinfo.value)) - - with pytest.raises(ValueError) as excinfo: - _is_array_range_literal(array_reference, 1, -1, None) - assert ("'index' argument should be 0, 1 or 2 but found -1." - in str(excinfo.value)) - - with pytest.raises(ValueError) as excinfo: - _is_array_range_literal(array_reference, 1, 3, None) - assert ("'index' argument should be 0, 1 or 2 but found 3." - in str(excinfo.value)) - - with pytest.raises(TypeError) as excinfo: - _is_array_range_literal(array_reference, 1, 2, None) - assert ("'value' argument should be an int type but found 'NoneType'." - in str(excinfo.value)) - - # 1st dimension, second argument to range is an integer literal - # with value 1 - assert _is_array_range_literal(array_reference, 1, 1, 1) - - # 1st dimension, first argument to range is an operator, not a literal - assert not _is_array_range_literal(array_reference, 1, 0, 1) - - my_range = Range.create(operator.copy(), one.copy()) - - # Range.create checks for valid datatype. Therefore change to - # invalid after creation. - my_range.children[1] = Literal("1.0", REAL_TYPE) - array_reference = ArrayReference.create(symbol, [my_range]) - - # 1st dimension, second argument to range is a real literal, - # not an integer literal. - assert not _is_array_range_literal(array_reference, 1, 1, 1) - - my_range = Range.create(operator.copy(), one.copy()) - array_reference = ArrayReference.create(symbol, [my_range]) - # 1st dimension, second argument to range has an unexpected - # value. - assert not _is_array_range_literal(array_reference, 1, 1, 2) - - @pytest.mark.parametrize("value", [ScalarType.Intrinsic.REAL, ScalarType.Intrinsic.INTEGER, @@ -1988,190 +1800,6 @@ def test_handling_parenthesis(): assert isinstance(new_node, BinaryOperation) -@pytest.mark.usefixtures("disable_declaration_check", "f2008_parser") -def test_array_section(): - ''' Check that we correctly handle an array section. - - TODO #754 fix test so that 'disable_declaration_check' fixture is not - required. - ''' - - def _array_create(code): - '''Utility function that takes the supplied Fortran code and returns - its PSyIR representation. - - :param str code: the executable code as a string. - - :returns: the executable code as PSyIR nodes. - :rtype: :py:class:`psyclone.psyir.nodes.Node` - - ''' - processor = Fparser2Reader() - fake_parent = Schedule() - reader = FortranStringReader(code) - fp2node = Execution_Part.match(reader)[0][0] - processor.process_nodes(fake_parent, [fp2node]) - return fake_parent.children[0].children[0] - - def _check_array(node, ndims): - '''Utility function that checks that the supplied node is an array and - has the expected number of dimensions. - - :param node: the node to check. - :type node: :py:class:`psyclone.psyir.nodes.ArrayReference` - :param int ndims: the number of expected array dimensions. - - ''' - assert isinstance(node, ArrayReference) - assert len(node.children) == ndims - - def _check_range(array, dim): - '''Utility function that checks that the "dim" index of the supplied - array contains a range node. Assumes that the supplied - argument "array" is an array. - - :param array: the node to check. - :type array: :py:class:`psyclone.psyir.nodes.ArrayReference` - :param int dim: the array dimension index to check. - - ''' - # Note, in Fortran the 1st dimension is 1, second is 2 - # etc. Therefore to obtain the correct child index we need to - # subtract 1. - range_node = array.children[dim-1] - assert isinstance(range_node, Range) - - def _check_reference(node, dim, index, name): - '''Utility function to check that the supplied array has a reference - at dimension index "dim" and range index "index" with name - "name". - - Assumes that the node argument is an array and that the - supplied dimension index is a Range node and that the supplied - range index is valid. - - :param array: the node to check. - :type array: :py:class:`pysclone.psyir.node.ArrayReference` - :param int dim: the dimension index to check. - :param int index: the index of the range to check (0 is the \ - lower bound, 1 is the upper bound). - :param str name: the expected name of the reference. - - ''' - # Note, in Fortran the 1st dimension is 1, second is 2 - # etc. Therefore to obtain the correct child index we need to - # subtract 1. - reference = node.children[dim-1].children[index] - assert isinstance(reference, Reference) - assert reference.name == name - - # Simple one-dimensional - for code in ["a(:) = 0.0", "a(::) = 0.0"]: - array_reference = _array_create(code) - _check_array(array_reference, ndims=1) - _check_range(array_reference, dim=1) - assert _is_bound_full_extent(array_reference, 1, - IntrinsicCall.Intrinsic.LBOUND) - assert _is_bound_full_extent(array_reference, 1, - IntrinsicCall.Intrinsic.UBOUND) - assert _is_array_range_literal( - array_reference, dim=1, index=2, value=1) - # Simple multi-dimensional - for code in ["a(:,:,:) = 0.0", "a(::,::,::) = 0.0"]: - array_reference = _array_create(code) - _check_array(array_reference, ndims=3) - for dim in range(1, 4): - # Check each of the 3 dimensions (1, 2, 3) - _check_range(array_reference, dim=dim) - assert _is_bound_full_extent( - array_reference, dim, - IntrinsicCall.Intrinsic.LBOUND) - assert _is_bound_full_extent( - array_reference, dim, - IntrinsicCall.Intrinsic.UBOUND) - assert _is_array_range_literal( - array_reference, dim=dim, index=2, value=1) - # Simple values - code = "a(1:, 1:2, 1:2:3, :2, :2:3, ::3, 1::3) = 0.0" - array_reference = _array_create(code) - _check_array(array_reference, ndims=7) - # dim 1 - _check_range(array_reference, dim=1) - assert _is_array_range_literal(array_reference, dim=1, index=0, value=1) - assert _is_bound_full_extent(array_reference, 1, - IntrinsicCall.Intrinsic.UBOUND) - assert _is_array_range_literal(array_reference, dim=1, index=2, value=1) - # dim 2 - _check_range(array_reference, dim=2) - assert _is_array_range_literal(array_reference, dim=2, index=0, value=1) - assert _is_array_range_literal(array_reference, dim=2, index=1, value=2) - assert _is_array_range_literal(array_reference, dim=2, index=2, value=1) - # dim 3 - _check_range(array_reference, dim=3) - assert _is_array_range_literal(array_reference, dim=3, index=0, value=1) - assert _is_array_range_literal(array_reference, dim=3, index=1, value=2) - assert _is_array_range_literal(array_reference, dim=3, index=2, value=3) - # dim 4 - _check_range(array_reference, dim=4) - assert _is_bound_full_extent(array_reference, 4, - IntrinsicCall.Intrinsic.LBOUND) - assert _is_array_range_literal(array_reference, dim=4, index=1, value=2) - assert _is_array_range_literal(array_reference, dim=4, index=2, value=1) - # dim 5 - _check_range(array_reference, dim=5) - assert _is_bound_full_extent(array_reference, 5, - IntrinsicCall.Intrinsic.LBOUND) - assert _is_array_range_literal(array_reference, dim=5, index=1, value=2) - assert _is_array_range_literal(array_reference, dim=5, index=2, value=3) - # dim 6 - _check_range(array_reference, dim=6) - assert _is_bound_full_extent(array_reference, 6, - IntrinsicCall.Intrinsic.LBOUND) - assert _is_bound_full_extent(array_reference, 6, - IntrinsicCall.Intrinsic.UBOUND) - assert _is_array_range_literal(array_reference, dim=6, index=2, value=3) - # dim 7 - _check_range(array_reference, dim=7) - assert _is_array_range_literal(array_reference, dim=7, index=0, value=1) - assert _is_bound_full_extent(array_reference, 7, - IntrinsicCall.Intrinsic.UBOUND) - assert _is_array_range_literal(array_reference, dim=7, index=2, value=3) - - # Simple variables - code = "a(b:, b:c, b:c:d) = 0.0" - array_reference = _array_create(code) - _check_array(array_reference, ndims=3) - # dim 1 - _check_range(array_reference, dim=1) - _check_reference(array_reference, dim=1, index=0, name="b") - assert _is_bound_full_extent(array_reference, 1, - IntrinsicCall.Intrinsic.UBOUND) - assert _is_array_range_literal(array_reference, dim=1, index=2, value=1) - # dim 2 - _check_range(array_reference, dim=2) - _check_reference(array_reference, dim=2, index=0, name="b") - _check_reference(array_reference, dim=2, index=1, name="c") - assert _is_array_range_literal(array_reference, dim=2, index=2, value=1) - # dim 3 - _check_range(array_reference, dim=3) - _check_reference(array_reference, dim=3, index=0, name="b") - _check_reference(array_reference, dim=3, index=1, name="c") - _check_reference(array_reference, dim=3, index=2, name="d") - - # Expressions - code = "a(b*c:b+c:b/c) = 0.0" - array_reference = _array_create(code) - _check_array(array_reference, ndims=1) - _check_range(array_reference, dim=1) - my_range = array_reference.children[0] - assert isinstance(my_range.children[0], BinaryOperation) - assert my_range.children[0].operator == BinaryOperation.Operator.MUL - assert isinstance(my_range.children[1], BinaryOperation) - assert my_range.children[1].operator == BinaryOperation.Operator.ADD - assert isinstance(my_range.children[2], BinaryOperation) - assert my_range.children[2].operator == BinaryOperation.Operator.DIV - - @pytest.mark.usefixtures("disable_declaration_check", "f2008_parser") def test_handling_array_product(): ''' Check that we correctly handle array products. diff --git a/src/psyclone/tests/psyir/nodes/omp_directives_test.py b/src/psyclone/tests/psyir/nodes/omp_directives_test.py index 2c8d2dbfd4..068c71896e 100644 --- a/src/psyclone/tests/psyir/nodes/omp_directives_test.py +++ b/src/psyclone/tests/psyir/nodes/omp_directives_test.py @@ -4669,3 +4669,34 @@ def test_omp_serial_check_dependency_valid_pairing(): assert test_dir._check_dependency_pairing_valid( array_reference1, array_reference2, None, None ) + + +def test_omptarget_gen_code(): + ''' Check that the OMPTarget gen_code produces the right code ''' + _, invoke_info = parse(os.path.join(BASE_PATH, "1_single_invoke.f90"), + api="lfric") + psy = PSyFactory("lfric", distributed_memory=True).create(invoke_info) + schedule = psy.invokes.invoke_list[0].schedule + kern = schedule.children[-1] + + # Add an OMPTarget and move the kernel inside it + target = OMPTargetDirective() + schedule.addchild(target) + target.dir_body.addchild(kern.detach()) + + # Check that the "omp target" is produced, and that the set_dirty is + # generated after it + code = str(psy.gen) + assert """ + !$omp target + DO cell = loop0_start, loop0_stop, 1 + CALL testkern_code(nlayers_f1, a, f1_data, f2_data, m1_data, \ +m2_data, ndf_w1, undf_w1, map_w1(:,cell), ndf_w2, undf_w2, map_w2(:,cell), \ +ndf_w3, undf_w3, map_w3(:,cell)) + END DO + !$omp end target + ! + ! Set halos dirty/clean for fields modified in the above loop(s) + ! + CALL f1_proxy%set_dirty() + """ in code diff --git a/src/psyclone/tests/psyir/transformations/kernel_transformation_test.py b/src/psyclone/tests/psyir/transformations/kernel_transformation_test.py index cf7658d585..abceb2eea8 100644 --- a/src/psyclone/tests/psyir/transformations/kernel_transformation_test.py +++ b/src/psyclone/tests/psyir/transformations/kernel_transformation_test.py @@ -50,7 +50,8 @@ from psyclone.psyir.nodes import Routine, FileContainer, IntrinsicCall, Call from psyclone.psyir.symbols import DataSymbol, INTEGER_TYPE from psyclone.psyir.transformations import TransformationError -from psyclone.transformations import ACCRoutineTrans, Dynamo0p3KernelConstTrans +from psyclone.transformations import ( + ACCRoutineTrans, OMPDeclareTargetTrans, Dynamo0p3KernelConstTrans) from psyclone.tests.gocean_build import GOceanBuild from psyclone.tests.lfric_build import LFRicBuild @@ -430,6 +431,24 @@ def test_gpumixin_validate_no_call(): in str(err.value)) +@pytest.mark.parametrize( + "rtrans, expected_directive", + [(ACCRoutineTrans(), "!$acc routine"), + (OMPDeclareTargetTrans(), "!$omp declare target")]) +def test_kernel_gpu_annotation_trans(rtrans, expected_directive, + fortran_writer): + ''' Check that the GPU annotation transformations insert the + proper directive inside PSyKAl kernel code ''' + _, invoke = get_invoke("1_single_invoke.f90", api="lfric", idx=0) + sched = invoke.schedule + kern = sched.coded_kernels()[0] + rtrans.apply(kern) + + # Check that the directive has been added to the kernel code + code = fortran_writer(kern.get_kernel_schedule()) + assert expected_directive in code + + def test_1kern_trans(kernel_outputdir): ''' Check that we generate the correct code when an invoke contains the same kernel more than once but only one of them is transformed. ''' diff --git a/src/psyclone/transformations.py b/src/psyclone/transformations.py index ca34b76607..0e39523559 100644 --- a/src/psyclone/transformations.py +++ b/src/psyclone/transformations.py @@ -537,19 +537,35 @@ class OMPDeclareTargetTrans(Transformation, MarkRoutineForGPUMixin): ''' def apply(self, node, options=None): - ''' Insert an OMPDeclareTargetDirective inside the provided routine. + ''' Insert an OMPDeclareTargetDirective inside the provided routine or + associated PSyKAl kernel. - :param node: the PSyIR routine to insert the directive into. - :type node: :py:class:`psyclone.psyir.nodes.Routine` + :param node: the kernel or routine which is the target of this + transformation. + :type node: :py:class:`psyclone.psyir.nodes.Routine` | + :py:class:`psyclone.psyGen.Kern` :param options: a dictionary with options for transformations. :type options: Optional[Dict[str, Any]] + :param bool options["force"]: whether to allow routines with + CodeBlocks to run on the GPU. ''' self.validate(node, options) - for child in node.children: + + if isinstance(node, Kern): + # Flag that the kernel has been modified + node.modified = True + + # Get the schedule representing the kernel subroutine + routine = node.get_kernel_schedule() + else: + routine = node + + for child in routine.children: if isinstance(child, OMPDeclareTargetDirective): return # The routine is already marked with OMPDeclareTarget - node.children.insert(0, OMPDeclareTargetDirective()) + + routine.children.insert(0, OMPDeclareTargetDirective()) def validate(self, node, options=None): ''' Check that an OMPDeclareTargetDirective can be inserted. diff --git a/tutorial/practicals/nemo/2_nemo_profiling/Makefile b/tutorial/practicals/nemo/2_nemo_profiling/Makefile index ff957684ee..a8a59854e5 100644 --- a/tutorial/practicals/nemo/2_nemo_profiling/Makefile +++ b/tutorial/practicals/nemo/2_nemo_profiling/Makefile @@ -108,7 +108,7 @@ transform: -o output_3.f90 -l output tra_adv_mod.F90 compile: transform $(KERNELS) output.o solutions/runner.o - $(F90) $(KERNELS) output.o solutions/runner.o -o $(NAME) \ + $(F90) $(F90FLAGS) $(KERNELS) output.o solutions/runner.o -o $(NAME) \ $(PROFILE_WRAPPER_LINK) $(PROFILE_LINK) # Only used for the compile CI target to compile the solution file