Skip to content

Commit 4e300bc

Browse files
committed
Merge branch 'release-2.7.0-rc2'
2 parents ba75a56 + 96b2be4 commit 4e300bc

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

78 files changed

+2088
-1911
lines changed
File renamed without changes.

.ci/daint.cscs.ch/ocl.build.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ if [ ! -d "${HOME}/libxsmm" ]; then
2727
fi
2828
cd "${HOME}/libxsmm"
2929
git fetch
30-
git checkout 05705477183444a82c8d9be8d7c2627efd6d67fa
30+
git checkout 2fe2b1a7077ddfbc9ab3b3f7ba1f5a45d52549cb
3131
make -j
3232
cd ..
3333

.ci/daint.cscs.ch/ocl.test.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ export OMP_PROC_BIND=TRUE # set thread affinity
3232
# OMP_NUM_THREADS is set by cmake
3333

3434
# use default parameters (omit loading tuned parameters)
35-
export OPENCL_LIBSMM_SMM_PARAMS=0
35+
#export OPENCL_LIBSMM_SMM_PARAMS=0
3636

3737
# document the current environment
3838
env |& tee -a "${STAGE_NAME}.out"

.github/workflows/testing-linux.yml

+1
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ jobs:
5757
-DMPI_EXECUTABLE_SUFFIX=.${{ matrix.mpi_suffix }} \
5858
-DMPIEXEC_PREFLAGS="$([ "${{ matrix.mpi_suffix }}" = "openmpi" ] && echo "-mca btl ^openib --allow-run-as-root --oversubscribe")" \
5959
-DLCOV_ARGS="--test-name;${{ matrix.use_mpi }}-${{ matrix.use_openmp }}-${{ matrix.use_smm }}-cpu" \
60+
-DTEST_MPI_RANKS=auto \
6061
..
6162
6263
- name: Build

.github/workflows/testing-macos.yml

+2-6
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,6 @@ on:
66
- 'develop'
77
pull_request:
88

9-
# Workaround issue in Xcode 14.1/2
10-
env:
11-
DEVELOPER_DIR: /Applications/Xcode_14.0.1.app/Contents/Developer
12-
139
jobs:
1410
build-and-test:
1511
runs-on: macos-latest
@@ -45,15 +41,15 @@ jobs:
4541
mkdir -p build
4642
cd build
4743
env \
48-
CC=gcc-12 CXX=g++-12 FC=gfortran-12 \
44+
CC=gcc-14 CXX=g++-14 FC=gfortran-14 \
4945
cmake -G Ninja \
5046
-DCMAKE_BUILD_TYPE=Release \
5147
-DUSE_${{ matrix.use_mpi }} \
5248
-DUSE_${{ matrix.use_openmp }} \
5349
-DUSE_${{ matrix.use_smm }} \
5450
$([ "${{ matrix.blas_impl }}" = "openblas" ] && echo '-DCMAKE_PREFIX_PATH=/usr/local/opt/openblas') \
5551
-DMPIEXEC_PREFLAGS="$([ "${{ matrix.mpi_suffix }}" = "openmpi" ] && echo "-mca btl ^openib --allow-run-as-root")" \
56-
-DTEST_MPI_RANKS=1 \
52+
-DTEST_MPI_RANKS=auto \
5753
..
5854
5955
- name: Build

.pre-commit-config.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ fail_fast: false
66
minimum_pre_commit_version: 3.2.0
77
repos:
88
- repo: https://github.com/astral-sh/ruff-pre-commit
9-
rev: 'v0.3.2'
9+
rev: 'v0.4.10'
1010
hooks:
1111
- id: ruff
1212
args: [ --fix, --exit-non-zero-on-fix ]
@@ -15,13 +15,13 @@ repos:
1515
.cp2k/.*|
1616
)$
1717
- repo: https://github.com/psf/black
18-
rev: 24.2.0
18+
rev: 24.4.2
1919
hooks:
2020
- id: black
2121
name: Reformat Python files with the black code formatter
2222
files: '^.*(/PACKAGE)|(\.py)$'
2323
- repo: https://github.com/pre-commit/pre-commit-hooks
24-
rev: v4.5.0
24+
rev: v4.6.0
2525
hooks:
2626
- id: check-ast
2727
- id: check-yaml

.pre-commit/headers/c_cpp.3

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
/*------------------------------------------------------------------------------------------------*/
2+
/* Copyright (C) by the DBCSR developers group - All rights reserved */
3+
/* This file is part of the DBCSR library. */
4+
/* */
5+
/* For information on the license, see the LICENSE file. */
6+
/* For further information please visit https://dbcsr.cp2k.org */
7+
/* SPDX-License-Identifier: BSD-3-Clause */
8+
/*------------------------------------------------------------------------------------------------*/

CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ cmake_dependent_option(WITH_EXAMPLES "Build the examples" ON "USE_MPI" OFF
8282
)# all examples require MPI
8383

8484
set(TEST_MPI_RANKS
85-
"auto"
85+
2
8686
CACHE STRING "Number of MPI ranks for testing")
8787
set(TEST_OMP_THREADS
8888
2

VERSION

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
MAJOR = 2
22
MINOR = 7
3-
PATCH = 0-rc1
3+
PATCH = 0-rc2
44
# A specific DATE (YYYY-MM-DD) fixes an official release, otherwise
55
# it is considered Development version.
6-
DATE = 2024-03-13
6+
DATE = 2024-06-27
77

88

cmake/CompilerConfiguration.cmake

+10-4
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
2-
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -ffree-form -std=f2008ts -fimplicit-none -Werror=aliasing -Werror=ampersand -Werror=c-binding-type -Werror=intrinsic-shadow -Werror=intrinsics-std -Werror=line-truncation -Werror=tabs -Werror=target-lifetime -Werror=underflow -Werror=unused-but-set-parameter -Werror=unused-but-set-variable -Werror=unused-variable -Werror=unused-dummy-argument -Werror=conversion -Werror=zerotrip -Werror=uninitialized -Wno-maybe-uninitialized -Werror=unused-parameter")
3-
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 10)
4-
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -Werror=argument-mismatch") # gcc 10+ has this automatically
2+
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -ffree-form -std=f2008ts -fimplicit-none -Werror=aliasing -Werror=ampersand -Werror=c-binding-type -Werror=intrinsic-shadow -Werror=intrinsics-std -Werror=line-truncation -Werror=tabs -Werror=target-lifetime -Werror=underflow -Werror=unused-but-set-parameter -Werror=unused-but-set-variable -Werror=unused-variable -Werror=unused-dummy-argument -Werror=conversion -Werror=zerotrip -Wno-maybe-uninitialized -Werror=unused-parameter")
3+
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 10) # comparison against CXX version rather than GFortran version
4+
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fallow-argument-mismatch") # required for 10+ (MPI wrap)
55
else ()
6-
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fallow-argument-mismatch") # requires for 10+ for the MPI wrap module
6+
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -Werror=argument-mismatch") # gcc 10+ has this automatically
7+
endif ()
8+
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 13) # comparison against CXX version rather than GFortran version
9+
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -Wno-error=uninitialized") # false positive (allocatable array)
710
endif ()
811
set(CMAKE_Fortran_FLAGS_RELEASE "-O3 -g -funroll-loops")
912
set(CMAKE_Fortran_FLAGS_COVERAGE "-O0 -g --coverage -fno-omit-frame-pointer -fcheck=all,no-array-temps -ffpe-trap=invalid,zero,overflow -fbacktrace -finit-real=snan -finit-integer=-42 -finit-derived -Werror=realloc-lhs -finline-matmul-limit=0 -Werror")
@@ -48,6 +51,9 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
4851
if ((NOT (USE_MPI)) OR (NOT ("${MPI_Fortran_LIBRARY_VERSION_STRING}" MATCHES "Open MPI")))
4952
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=leak")
5053
endif ()
54+
if (USE_ACCEL MATCHES "hip" AND hip_VERSION GREATER_EQUAL 6.0.0) # Remove deprecated function error with ROCm v6+
55+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=deprecated-declarations")
56+
endif ()
5157
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
5258
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -funroll-loops")
5359
set(CMAKE_CXX_FLAGS_COVERAGE "-O0 -g --coverage")

docs/guide/2-user-guide/1-installation/index.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,8 @@ make
7676
-DWITH_GPU=<P100|K20X|K40|K80|V100|Mi50|Mi100|Mi250>
7777
-DCMAKE_BUILD_TYPE=<Release|Debug|Coverage>
7878
-DBUILD_TESTING=<ON|OFF>
79-
-DTEST_MPI_RANKS=<auto,N>
80-
-DTEST_OMP_THREADS=<2,N>
79+
-DTEST_MPI_RANKS=<2|auto|N>
80+
-DTEST_OMP_THREADS=<2|N>
8181
```
8282

8383
When providing a build of LIBXSMM, make sure the `lib` directory is added to the `PKG_CONFIG_PATH` variable prior

examples/CMakeLists.txt

+7
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ foreach (dbcsr_program_src ${DBCSR_PROGRAM_SRCS_FTN})
88
get_filename_component(dbcsr_program_name ${dbcsr_program_src} NAME_WE)
99
add_executable(${dbcsr_program_name} ${dbcsr_program_src})
1010
target_link_libraries(${dbcsr_program_name} dbcsr)
11+
if (OpenMP_FOUND)
12+
target_link_libraries(${dbcsr_program_name} OpenMP::OpenMP_Fortran)
13+
endif ()
1114

1215
# with the Intel compiler CMake 3.12 seems to forget that the source is
1316
# actually Fortran and needs to be told explicitly:
@@ -29,6 +32,10 @@ if (WITH_C_API)
2932
set(dbcsr_program_name ${dbcsr_program_name}_cpp)
3033
add_executable(${dbcsr_program_name} ${dbcsr_program_src})
3134
target_link_libraries(${dbcsr_program_name} dbcsr_c MPI::MPI_CXX)
35+
set_target_properties(${dbcsr_program_name} PROPERTIES LINKER_LANGUAGE CXX)
36+
if (OpenMP_FOUND)
37+
target_link_libraries(${dbcsr_program_name} OpenMP::OpenMP_CXX)
38+
endif ()
3239

3340
if (CMAKE_CXX_COMPILER_ID STREQUAL "Cray")
3441
# for recent Cray compiler versions CMake doesn't know

examples/dbcsr_example_3.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ int main(int argc, char* argv[]) {
5353

5454
for (int i = 0; i != mpi_size; ++i) {
5555
if (mpi_rank == i) {
56-
std::cout << "I'm processor " << mpi_rank << " over " << mpi_size << " proc" << ", (" << coord[0] << ", " << coord[1]
57-
<< ") in the 2D grid" << std::endl;
56+
std::cout << "I'm processor " << mpi_rank << " over " << mpi_size << " proc"
57+
<< ", (" << coord[0] << ", " << coord[1] << ") in the 2D grid" << std::endl;
5858
}
5959
MPI_Barrier(MPI_COMM_WORLD);
6060
}

src/acc/acc_bench.h

+4
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@
2525
# define INLINE
2626
#endif
2727

28+
#if !defined(MAX_KERNEL_DIM)
29+
# define MAX_KERNEL_DIM 80
30+
#endif
31+
2832
#define INIT_MAT(ELEM_TYPE, SEED, MAT, M, N, SCALE) \
2933
do { \
3034
const double init_mat_seed1_ = (SCALE) * (SEED) + (SCALE); \

src/acc/acc_bench_smm.c

+2-4
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
STRIDE_C, INDEX_STRIDE, INDEX_BASE, BATCHSIZE) \
4040
ACC_BENCH_USEOMP(libxsmm_gemm_batch) \
4141
(IPREC, OPREC, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, STRIDE_A, B, LDB, STRIDE_B, BETA, C, LDC, STRIDE_C, INDEX_STRIDE, \
42-
INDEX_BASE, BATCHSIZE, 0 /*batchcheck*/)
42+
INDEX_BASE, BATCHSIZE)
4343
# define PRINTF(...) \
4444
do { \
4545
const size_t print_buffer_size = sizeof(print_buffer) - print_offset; \
@@ -227,9 +227,7 @@ int main(int argc, char* argv[]) {
227227
int ndevices = 0;
228228
result = c_dbcsr_acc_get_ndevices(&ndevices);
229229
if (0 < ndevices && (0 == device || EXIT_SUCCESS == c_dbcsr_acc_set_active_device(device))) {
230-
#if defined(_DEBUG)
231-
fprintf(stderr, "Activated device %i of %i (device%i).\n", device + 1, ndevices, device);
232-
#endif
230+
printf("Activated device%i (ndevices=%i)\n", device, ndevices);
233231
}
234232
else {
235233
if (0 >= ndevices) {

src/acc/acc_bench_trans.c

+1-3
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,7 @@ int main(int argc, char* argv[]) {
123123
if (EXIT_SUCCESS == result) {
124124
result = c_dbcsr_acc_get_ndevices(&ndevices);
125125
if (0 < ndevices && (0 == device || EXIT_SUCCESS == c_dbcsr_acc_set_active_device(device))) {
126-
#if defined(_DEBUG)
127-
fprintf(stderr, "Activated device %i of %i (device%i).\n", device + 1, ndevices, device);
128-
#endif
126+
printf("Activated device%i (ndevices=%i)\n", device, ndevices);
129127
}
130128
else {
131129
if (0 >= ndevices) {

src/acc/cuda/Makefile

+12-8
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ OBJSMM := $(SRCSMM:.cpp=.o)
2222

2323
INCALL := $(INCACC) $(INCSMM)
2424

25-
LIBXSMMROOT := $(wildcard $(ACCDIR)/../../../libxsmm)
25+
LIBXSMMROOT := $(wildcard $(ACCDIR)/../../../../../libxsmm)
2626
ifeq (,$(LIBXSMMROOT))
2727
LIBXSMMROOT := $(wildcard $(HOME)/libxsmm)
2828
endif
@@ -33,6 +33,9 @@ INTEL ?= 0
3333
GNU ?= 0
3434
DEV ?= 0
3535

36+
# C++ baseline standard
37+
CXXSTD ?= -std=c++14
38+
3639
# select from set of predefined triplet specifications
3740
SPECID ?= 0
3841
# limit shape in tests (zero or negative for unlimited)
@@ -118,7 +121,7 @@ else ifneq (0,$(GNU))
118121
else
119122
override AR := ar
120123
endif
121-
override LD_LIBRARY_DIRS := $(NULL)
124+
#override LD_LIBRARY_DIRS := $(NULL)
122125
else
123126
CXX := g++
124127
CC := gcc
@@ -211,7 +214,7 @@ LD_LIBSTUB_PATH := $(wildcard $(patsubst %,%/stubs,$(LD_LIBRARY_DIRS)))
211214
LIBPATHS := $(foreach DIR,$(LD_LIBRARY_DIRS),$(if $(filter -L$(DIR),$(LDFLAGS)),$(NULL),-L$(DIR)))
212215
LIBSTUBS := $(foreach DIR,$(LD_LIBSTUB_PATH),$(if $(filter -L$(DIR),$(LDFLAGS)),$(NULL),-L$(DIR)))
213216
LDFLAGS += $(LIBPATHS) $(LIBSTUBS) -lcudart -lcublas -lnvrtc -lcuda
214-
CXXFLAGS += -std=c++11 $(CFLAGS)
217+
CXXFLAGS += $(CXXSTD) $(CFLAGS)
215218

216219
.PHONY: bench
217220
bench: $(ACCDIR)/acc_bench_smm $(ACCDIR)/acc_bench_trans
@@ -296,15 +299,16 @@ libsmm: $(ACCDIR)/dbcsr_acc_smm.a
296299
$(ACCDIR)/dbcsr_acc_smm.a: $(OBJSMM)
297300
$(AR) -rs $@ $^
298301

299-
%.o: %.cu $(INCALL) $(MAKDIR)/Makefile
300-
$(NVCC) $(DFLAGS) -allow-unsupported-compiler --compiler-options="$(CXXFLAGS) $(CFLAGS_XSMM)" -c $< -o $@
301-
302302
%.o: %.cpp $(INCALL) $(MAKDIR)/Makefile
303303
$(CXX) $(DFLAGS) $(CXXFLAGS) $(CFLAGS_XSMM) -c $< -o $@
304304

305+
%.o: %.cu $(INCALL) $(MAKDIR)/Makefile
306+
$(NVCC) $(DFLAGS) -allow-unsupported-compiler $(CXXSTD) \
307+
--compiler-options="$(filter-out $(CXXSTD),$(CXXFLAGS)) $(CFLAGS_XSMM)" -c $< -o $@
308+
305309
$(ACCDIR)/cuda_hip/calculate_norms.o: $(ACCDIR)/cuda_hip/calculate_norms.cpp $(INCALL) $(MAKDIR)/Makefile
306-
$(NVCC) $(DFLAGS) -x cu -allow-unsupported-compiler \
307-
--compiler-options="$(filter-out -pedantic,$(CXXFLAGS)) $(CFLAGS_XSMM)" -c $< -o $@
310+
$(NVCC) $(DFLAGS) -allow-unsupported-compiler $(CXXSTD) -x cu \
311+
--compiler-options="$(filter-out $(CXXSTD) -pedantic,$(CXXFLAGS)) $(CFLAGS_XSMM)" -c $< -o $@
308312

309313
$(MAKDIR)/acc_bench_smm.o: $(ACCDIR)/acc_bench_smm.c $(MAKDIR)/Makefile
310314
ifneq (0,$(LIBXSMM))

src/acc/cuda_hip/acc_dev.cpp

+4-7
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,6 @@
2020
#include <stdio.h>
2121
#include <math.h>
2222

23-
// for debug purpose
24-
#if defined(__HIP_PLATFORM_NVCC__)
25-
static const int verbose_print = 1;
26-
#endif
27-
2823
/****************************************************************************/
2924
extern "C" int c_dbcsr_acc_get_ndevices(int* n_devices) {
3025
ACC_API_CALL(GetDeviceCount, (n_devices));
@@ -49,9 +44,11 @@ extern "C" int c_dbcsr_acc_set_active_device(int device_id) {
4944
// establish context
5045
ACC_API_CALL(Free, (0));
5146

52-
#if defined(__HIP_PLATFORM_NVCC__)
53-
if (verbose_print) {
47+
#if defined(__CUDA) || defined(__HIP_PLATFORM_NVCC__)
48+
static bool once = false;
49+
if (!once) {
5450
ACC_API_CALL(DeviceSetLimit, (ACC(LimitPrintfFifoSize), (size_t)1000000000));
51+
once = true;
5552
}
5653
#endif
5754

src/acc/cuda_hip/acc_stream.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ extern "C" int c_dbcsr_acc_stream_create(void** stream_p, const char* name, int
5151
cErr = ACC(StreamCreate)(acc_stream);
5252
}
5353

54-
if (verbose_print) printf("StreamCreate : %p -> %p \n", *stream_p, *acc_stream);
54+
if (verbose_print) printf("StreamCreate : %p -> %p \n", *stream_p, (const void*)*acc_stream);
5555
if (acc_error_check(cErr)) return -1;
5656
if (acc_error_check(ACC(GetLastError)())) return -1;
5757

src/acc/libsmm_acc/README.md

+2
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ The performance of the matrix-matrix multiplication kernels is highly dependent
5050

5151
## Contributing to libsmm_acc
5252

53+
We expect users to contribute to the library by providing new optimized kernels and support for new GPUs.
54+
5355
#### Autotuning procedure
5456

5557
Follow the [autotuning procedure](https://github.com/cp2k/dbcsr/blob/develop/src/acc/libsmm_acc/tune/README.md)

0 commit comments

Comments
 (0)