ComputationalRadiationPhysics · sbastrakov · Nov 14, 2018 · Nov 14, 2018 · Nov 14, 2018 · Nov 19, 2018
diff --git a/include/pmacc/nvidia/atomic.hpp b/include/pmacc/nvidia/atomic.hpp
@@ -160,8 +160,8 @@ T atomicAllInc(T *ptr)
 #ifdef __CUDA_ARCH__
     return atomicAllInc(alpaka::atomic::AtomicCudaBuiltIn(), ptr, ::alpaka::hierarchy::Grids());
 #else
-   // assume that we can use stl atomics if we are not on gpu
-    return atomicAllInc(alpaka::atomic::AtomicStlLock<16>(), ptr, ::alpaka::hierarchy::Grids());
+    // assume that we can use the standard library atomics if we are not on gpu
+    return atomicAllInc(alpaka::atomic::AtomicStdLibLock<16>(), ptr, ::alpaka::hierarchy::Grids());
 #endif
 }
 

diff --git a/thirdParty/alpaka/appveyor.yml → thirdParty/alpaka/.appveyor.yml b/thirdParty/alpaka/appveyor.yml → thirdParty/alpaka/.appveyor.yml
diff --git a/thirdParty/alpaka/.gitignore b/thirdParty/alpaka/.gitignore
@@ -1,5 +1,6 @@
 /doc/doxygen/*
 !/doc/doxygen/Doxyfile
+!/doc/doxygen/alpaka_doxygen.png
 /doc/latex/*
 **/build
 

diff --git a/thirdParty/alpaka/.travis.yml b/thirdParty/alpaka/.travis.yml
diff --git a/thirdParty/alpaka/.zenodo.json b/thirdParty/alpaka/.zenodo.json
@@ -0,0 +1,66 @@
+{
+  "title": "Alpaka: Abstraction Library for Parallel Kernel Acceleration",
+  "description": "The alpaka library is a header-only C++11 abstraction library for accelerator development. Its aim is to provide performance portability across accelerators through the abstraction (not hiding!) of the underlying levels of parallelism.",
+  "creators": [
+    {
+      "affiliation": "LogMeIn, Inc.",
+      "name": "Worpitz, Benjamin"
+    },
+    {
+      "affiliation": "Helmholtz-Zentrum Dresden-Rossendorf, TU Dresden",
+      "name": "Matthes, Alexander",
+      "orcid": "0000-0002-6702-2015"
+    },
+    {
+      "affiliation": "LogMeIn, Inc.",
+      "name": "Zenker, Erik",
+      "orcid": "0000-0001-9417-8712"
+    },
+    {
+      "affiliation": "Helmholtz-Zentrum Dresden-Rossendorf, TU Dresden",
+      "name": "Huebl, Axel",
+      "orcid": "0000-0003-1943-7141"
+    },
+    {
+      "affiliation": "Helmholtz-Zentrum Dresden-Rossendorf",
+      "name": "Widera, René",
+      "orcid": "0000-0003-1642-0459"
+    }
+  ],
+  "access_right": "open",
+  "keywords": [
+    "HPC",
+    "CUDA",
+    "OpenMP",
+    "C++",
+    "GPU",
+    "HIP",
+    "heterogeneous computing",
+    "performance portability"
+  ],
+  "license": "LGPL-3.0",
+  "upload_type": "software",
+  "grants": [
+    {
+      "id": "654220"
+    }
+  ],
+  "related_identifiers": [
+    {
+      "identifier": "DOI:10.5281/zenodo.49768",
+      "relation": "isCitedBy"
+    },
+    {
+      "identifier": "DOI:10.1007/978-3-319-46079-6_21",
+      "relation": "cites"
+    },
+    {
+      "identifier": "DOI:10.1109/IPDPSW.2016.50",
+      "relation": "isCitedBy"
+    },
+    {
+      "identifier": "DOI:10.1007/978-3-319-67630-2_36",
+      "relation": "isCitedBy"
+    }
+  ]
+}
diff --git a/thirdParty/alpaka/CMakeLists.txt b/thirdParty/alpaka/CMakeLists.txt
@@ -1,5 +1,5 @@
 #
-# Copyright 2015-2017 Benjamin Worpitz
+# Copyright 2015-2018 Benjamin Worpitz
 #
 # This file is part of alpaka.
 #
@@ -25,6 +25,8 @@ cmake_minimum_required(VERSION 3.7.0)
 
 project("alpakaAll")
 
+SET_PROPERTY(GLOBAL PROPERTY USE_FOLDERS ON)
+
 ################################################################################
 # Options and Variants
 
@@ -34,7 +36,7 @@ include(CTest)
 # automatically defines: BUILD_TESTING, default is ON
 
 ################################################################################
-# Add subdirectories.
+# Add subdirectories
 
 if(alpaka_BUILD_EXAMPLES)
     add_subdirectory("example/")

diff --git a/thirdParty/alpaka/Findalpaka.cmake b/thirdParty/alpaka/Findalpaka.cmake
@@ -28,7 +28,7 @@
 # ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE will require Boost.Fiber to be built.
 # ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLE and ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLE will require a OpenMP 2.0+ capable compiler.
 # ALPAKA_ACC_CPU_BT_OMP4_ENABLE will require a OpenMP 4.0+ capable compiler.
-# ALPAKA_ACC_GPU_CUDA_ENABLE will require CUDA 7.0+ to be installed.
+# ALPAKA_ACC_GPU_CUDA_ENABLE will require CUDA 8.0+ to be installed.
 # ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLE will require TBB 2.2+ to be installed
 #
 # Set the following CMake variables BEFORE calling find_packages to
@@ -42,7 +42,7 @@
 # - ``ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLE`` {ON, OFF}
 # - ``ALPAKA_ACC_CPU_BT_OMP4_ENABLE`` {ON, OFF}
 # - ``ALPAKA_ACC_GPU_CUDA_ENABLE`` {ON, OFF}
-# - ``ALPAKA_CUDA_VERSION`` {7.0, ...}
+# - ``ALPAKA_CUDA_VERSION`` {8.0, ...}
 # - ``ALPAKA_CUDA_ARCH`` {sm_20, sm...}
 # - ``ALPAKA_CUDA_FAST_MATH`` {ON, OFF}
 # - ``ALPAKA_CUDA_FTZ`` {ON, OFF}

diff --git a/thirdParty/alpaka/README.md b/thirdParty/alpaka/README.md
@@ -7,6 +7,8 @@
 [![Platforms](https://img.shields.io/badge/platform-linux%20%7C%20windows-lightgrey.svg)](https://github.com/ComputationalRadiationPhysics/alpaka)
 [![License](https://img.shields.io/badge/license-LGPLv3-blue.svg)](https://www.gnu.org/licenses/lgpl-3.0.de.html)
 
+![Alpaka](doc/images/alpaka_401x135.png)
+
 The **alpaka** library is a header-only C++11 abstraction library for accelerator development.
 
 Its aim is to provide performance portability across accelerators through the abstraction (not hiding!) of the underlying levels of parallelism.
@@ -15,7 +17,7 @@ It is platform independent and supports the concurrent and cooperative use of mu
 A multitude of accelerator back-end variants using CUDA, OpenMP (2.0/4.0), Boost.Fiber, std::thread and also serial execution is provided and can be selected depending on the device.
 Only one implementation of the user kernel is required by representing them as function objects with a special interface.
 There is no need to write special CUDA, OpenMP or custom threading code.
-Accelerator back-ends can be mixed within a device stream.
+Accelerator back-ends can be mixed within a device queue.
 The decision which accelerator back-end executes which kernel can be made at runtime.
 
 The **alpaka** API is currently unstable (beta state).
@@ -55,37 +57,38 @@ Accelerator Back-ends
 | std::thread | std::thread |Host CPU (multi core)|sequential|parallel (preemptive multitasking)|
 | Boost.Fiber | boost::fibers::fiber |Host CPU (single core)|sequential|parallel (cooperative multitasking)|
 |TBB|TBB 2.2+|Host CPU (multi core)|parallel (preemptive multitasking)|sequential (only 1 thread per block)|
-|CUDA|CUDA 7.0-9.2|NVIDIA GPUs|parallel (undefined)|parallel (lock-step within warps)|
+|CUDA|CUDA 8.0-10.0|NVIDIA GPUs|parallel (undefined)|parallel (lock-step within warps)|
+|HIP(nvcc)|[HIP 1.5.8292+](https://github.com/ROCm-Developer-Tools/HIP)|NVIDIA GPUs SM 2.0+|parallel (undefined)|parallel (lock-step within warps)|
 
 
 Supported Compilers
 -------------------
 
 This library uses C++11 (or newer when available).
 
-|Accelerator Back-end|gcc 4.9.2|gcc 5.4|gcc 6.3/7.2|clang 3.5/3.6|clang 3.7/3.8|clang 3.9|clang 4|clang 5|MSVC 2017.5|
-|---|---|---|---|---|---|---|---|
+|Accelerator Back-end|gcc 4.9.4|gcc 5.5|gcc 6.4/7.3|gcc 8.1|clang 4|clang 5|clang 6|clang 7|MSVC 2017.7|
+|---|---|---|---|---|---|---|---|---|---|
 |Serial|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
-|OpenMP 2.0+ blocks|:white_check_mark:|:white_check_mark:|:white_check_mark:|:x:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
-|OpenMP 2.0+ threads|:white_check_mark:|:white_check_mark:|:white_check_mark:|:x:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
-|OpenMP 4.0+ (CPU)|:white_check_mark:|:white_check_mark:|:white_check_mark:|:x:|:x:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:x:|
+|OpenMP 2.0+ blocks|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
+|OpenMP 2.0+ threads|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
+|OpenMP 4.0+ (CPU)|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:x:|
 | std::thread |:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
 | Boost.Fiber |:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
 |TBB|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
-|CUDA (nvcc)|:white_check_mark: <br/> (CUDA 7.0-9.2)|:white_check_mark: <br/> (CUDA 9.0-9.2)|:white_check_mark: <br/> (CUDA 9.2)|:white_check_mark: <br/> (CUDA 8.0)|:x:|:white_check_mark: <br/> (CUDA 9.1-9.2)|:white_check_mark: <br/> (CUDA 9.1-9.2)|:x:|:x:|
-|CUDA (clang) | - | - | - | - | - | - | :white_check_mark: <br/> (CUDA 8.0)| :white_check_mark: <br/> (CUDA 8.0) | - |
-
+|CUDA (nvcc)|:white_check_mark: <br/> (CUDA 8.0-10.0)|:white_check_mark: <br/> (CUDA 9.0-10.0)|:white_check_mark: <br/> (CUDA 9.2-10.0) |:x:|:white_check_mark: <br/> (CUDA 9.1-10.0)|:white_check_mark: <br/> (CUDA 10.0)|:white_check_mark: <br/> (CUDA 10.0)|:x:|:x:|
+|CUDA (clang) | - | - | - | - | :white_check_mark: <br/> (CUDA 8.0)| :white_check_mark: <br/> (CUDA 8.0)| :white_check_mark: <br/> (CUDA 8.0-9.0) | :white_check_mark: <br/> (CUDA 8.0-9.2) | - |
+|[HIP](doc/markdown/user/implementation/mapping/HIP.md) (nvcc)<br/>(HIP 1.5.8292+)|:white_check_mark: <br/> (nvcc 8.0-9.2)|:white_check_mark: <br/> (nvcc 9.0-9.2)|:white_check_mark: <br/> (nvcc 9.2)|:x:|:x:|:x:|:x:|:x:|:x:|
 
 Dependencies
 ------------
 
-[Boost](http://boost.org/) 1.62+ is the only mandatory external dependency (for CUDA 9+ Boost >=1.65.1 is required).
+[Boost](https://boost.org/) 1.62+ is the only mandatory external dependency (for CUDA 9+ Boost >=1.65.1 is required).
 The **alpaka** library itself just requires header-only libraries.
 However some of the accelerator back-end implementations require different boost libraries to be built.
 
 When an accelerator back-end using *Boost.Fiber* is enabled, `boost-fiber` and all of its dependencies are required to be build in C++11 mode `./b2 cxxflags="-std=c++11"`.
 
-When an accelerator back-end using *CUDA* is enabled, version *7.0* of the *CUDA SDK* is the minimum requirement.
+When an accelerator back-end using *CUDA* is enabled, version *8.0* of the *CUDA SDK* is the minimum requirement.
 *NOTE*: When using nvcc as *CUDA* compiler, the *CUDA accelerator back-end* can not be enabled together with the *Boost.Fiber accelerator back-end* due to bugs in the nvcc compiler.
 *NOTE*: When using clang as a native *CUDA* compiler, the *CUDA accelerator back-end* can not be enabled together with any *OpenMP accelerator back-end* because this combination is currently unsupported.
 
@@ -135,7 +138,7 @@ consider citing us accordingly in your derived work and publications:
 %   "2017 ISC High Performance Conference"
 %   in Frankfurt, Germany
 @inproceedings{MathesP3MA2017,
-  author    = {{Matthes}, A. and {Widera}, R. and {Zenker}, E. and {Worpitz}, B. and 
+  author    = {{Matthes}, A. and {Widera}, R. and {Zenker}, E. and {Worpitz}, B. and
                {Huebl}, A. and {Bussmann}, M.},
   title     = {Tuning and optimization for a variety of many-core architectures without changing a single line of implementation code
                using the Alpaka library},