diff --git a/src/libPMacc/include/archAlignment.hpp b/src/libPMacc/include/archAlignment.hpp
new file mode 100644
index 00000000000..fcc7710cd1e
--- /dev/null
+++ b/src/libPMacc/include/archAlignment.hpp
@@ -0,0 +1,42 @@
+/**
+ * Copyright 2016 Rene Widera
+ *
+ * This file is part of libPMacc.
+ *
+ * libPMacc is free software: you can redistribute it and/or modify
+ * it under the terms of either the GNU General Public License or
+ * the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * libPMacc is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License and the GNU Lesser General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * and the GNU Lesser General Public License along with libPMacc.
+ * If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#pragma once
+
+#include <boost/align/alignment_of.hpp>
+
+namespace PMacc
+{
+    /** object to test for a useful alignment
+     *
+     * The compiler auto align the member array to a useful architecture
+     * depending value.
+     */
+    struct UsefulAlignTestObject
+    {
+        char x[512] __attribute__ ((aligned));
+    };
+
+    /** type which defines a useful alignment for the architecture */
+    typedef boost::alignment::alignment_of<UsefulAlignTestObject> useful_align_t;
+}
diff --git a/src/libPMacc/include/pmacc_types.hpp b/src/libPMacc/include/pmacc_types.hpp
index 601cc420a10..c002d7bd732 100644
--- a/src/libPMacc/include/pmacc_types.hpp
+++ b/src/libPMacc/include/pmacc_types.hpp
@@ -25,6 +25,8 @@
 #pragma once
 
 #include "debug/PMaccVerbose.hpp"
+#include "ppFunctions.hpp"
+#include "archAlignment.hpp"
 
 #define BOOST_MPL_LIMIT_VECTOR_SIZE 20
 #define BOOST_MPL_LIMIT_MAP_SIZE 20
@@ -176,16 +178,16 @@ enum EventType
  * you must align all array and structs which can used on device
  * @param byte byte of data which must aligned
  */
-#define __optimal_align__(byte)   \
-        __align__(                \
-        ((byte)==1?1:             \
-        ((byte)<=2?2:             \
-        ((byte)<=4?4:             \
-        ((byte)<=8?8:             \
-        ((byte)<=16?16:           \
-        ((byte)<=32?32:           \
-        ((byte)<=64?64:128        \
-        ))))))))
+#define __optimal_align__(byte)                                                \
+    __align__(                                                                 \
+        PMACC_MIN(                                                             \
+            /** 32 byte is the L2 cache line size of NVIDIA GPUs               \
+             * \bug 32 byte avoid bug https://github.com/ComputationalRadiationPhysics/picongpu/issues/1563 \
+             */                                                                \
+            PMACC_MIN(PMACC_ROUND_UP_NEXT_POW2(byte),32),                      \
+            PMacc::useful_align_t::value                                       \
+        )                                                                      \
+    )
 
 #define PMACC_ALIGN(var,...) __optimal_align__(sizeof(__VA_ARGS__)) __VA_ARGS__ var
 #define PMACC_ALIGN8(var,...) __align__(8) __VA_ARGS__ var
diff --git a/src/libPMacc/include/ppFunctions.hpp b/src/libPMacc/include/ppFunctions.hpp
index 4133e4f56ca..c17adea12f8 100644
--- a/src/libPMacc/include/ppFunctions.hpp
+++ b/src/libPMacc/include/ppFunctions.hpp
@@ -52,3 +52,22 @@
  * @return false if no arguments are given, else true
  */
 #define PMACC_HAS_ARGS(...)  ((sizeof((int[]){0, ##__VA_ARGS__}))==sizeof(int)?false:true)
+
+/** round up to next higher pow 2 value
+ *
+ * - if value is pow2, value is returned
+ * - maximal pow 2 value is 128
+ * - negative values are not supported
+ *
+ * @param value integral number between [1,Inf]
+ * @return next higher pow 2 value
+ */
+#define PMACC_ROUND_UP_NEXT_POW2(value) \
+        ((value)==1?1:                  \
+        ((value)<=2?2:                  \
+        ((value)<=4?4:                  \
+        ((value)<=8?8:                  \
+        ((value)<=16?16:                \
+        ((value)<=32?32:                \
+        ((value)<=64?64:128             \
+        )))))))