diff --git a/src/libPMacc/include/archAlignment.hpp b/src/libPMacc/include/archAlignment.hpp new file mode 100644 index 00000000000..fcc7710cd1e --- /dev/null +++ b/src/libPMacc/include/archAlignment.hpp @@ -0,0 +1,42 @@ +/** + * Copyright 2016 Rene Widera + * + * This file is part of libPMacc. + * + * libPMacc is free software: you can redistribute it and/or modify + * it under the terms of either the GNU General Public License or + * the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * libPMacc is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License and the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * and the GNU Lesser General Public License along with libPMacc. + * If not, see . + */ + + +#pragma once + +#include + +namespace PMacc +{ + /** object to test for a useful alignment + * + * The compiler auto align the member array to a useful architecture + * depending value. + */ + struct UsefulAlignTestObject + { + char x[512] __attribute__ ((aligned)); + }; + + /** type which defines a useful alignment for the architecture */ + typedef boost::alignment::alignment_of useful_align_t; +} diff --git a/src/libPMacc/include/pmacc_types.hpp b/src/libPMacc/include/pmacc_types.hpp index 601cc420a10..c002d7bd732 100644 --- a/src/libPMacc/include/pmacc_types.hpp +++ b/src/libPMacc/include/pmacc_types.hpp @@ -25,6 +25,8 @@ #pragma once #include "debug/PMaccVerbose.hpp" +#include "ppFunctions.hpp" +#include "archAlignment.hpp" #define BOOST_MPL_LIMIT_VECTOR_SIZE 20 #define BOOST_MPL_LIMIT_MAP_SIZE 20 @@ -176,16 +178,16 @@ enum EventType * you must align all array and structs which can used on device * @param byte byte of data which must aligned */ -#define __optimal_align__(byte) \ - __align__( \ - ((byte)==1?1: \ - ((byte)<=2?2: \ - ((byte)<=4?4: \ - ((byte)<=8?8: \ - ((byte)<=16?16: \ - ((byte)<=32?32: \ - ((byte)<=64?64:128 \ - )))))))) +#define __optimal_align__(byte) \ + __align__( \ + PMACC_MIN( \ + /** 32 byte is the L2 cache line size of NVIDIA GPUs \ + * \bug 32 byte avoid bug https://github.com/ComputationalRadiationPhysics/picongpu/issues/1563 \ + */ \ + PMACC_MIN(PMACC_ROUND_UP_NEXT_POW2(byte),32), \ + PMacc::useful_align_t::value \ + ) \ + ) #define PMACC_ALIGN(var,...) __optimal_align__(sizeof(__VA_ARGS__)) __VA_ARGS__ var #define PMACC_ALIGN8(var,...) __align__(8) __VA_ARGS__ var diff --git a/src/libPMacc/include/ppFunctions.hpp b/src/libPMacc/include/ppFunctions.hpp index 4133e4f56ca..c17adea12f8 100644 --- a/src/libPMacc/include/ppFunctions.hpp +++ b/src/libPMacc/include/ppFunctions.hpp @@ -52,3 +52,22 @@ * @return false if no arguments are given, else true */ #define PMACC_HAS_ARGS(...) ((sizeof((int[]){0, ##__VA_ARGS__}))==sizeof(int)?false:true) + +/** round up to next higher pow 2 value + * + * - if value is pow2, value is returned + * - maximal pow 2 value is 128 + * - negative values are not supported + * + * @param value integral number between [1,Inf] + * @return next higher pow 2 value + */ +#define PMACC_ROUND_UP_NEXT_POW2(value) \ + ((value)==1?1: \ + ((value)<=2?2: \ + ((value)<=4?4: \ + ((value)<=8?8: \ + ((value)<=16?16: \ + ((value)<=32?32: \ + ((value)<=64?64:128 \ + )))))))