cima22 · alibuild · Nov 17, 2023 · Nov 17, 2023 · Nov 17, 2023 · Nov 19, 2023
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -19,6 +19,9 @@ project(O2 LANGUAGES C CXX VERSION 1.2.0)
 
 include(CTest)
 
+set(CUDA_COMPUTETARGET 75)
+set(GPUCA_NO_FAST_MATH 1)
+
 # Project wide setup
 
 # Would better fit inside GPU/CMakeLists.txt, but include GPU/Common directly

diff --git a/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h b/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h
@@ -156,6 +156,17 @@ struct ClusterNative {
       return (this->getFlags() < rhs.getFlags());
     }
   }
+
+  GPUd() bool operator==(const ClusterNative& rhs) const
+  {
+    return this->getTimePacked() == rhs.getTimePacked() &&
+           this->padPacked == rhs.padPacked &&
+           this->sigmaTimePacked == rhs.sigmaTimePacked &&
+           this->sigmaPadPacked == rhs.sigmaPadPacked &&
+           this->qMax == rhs.qMax &&
+           this->qTot == rhs.qTot &&
+           this->getFlags() == rhs.getFlags();
+  }
 };
 
 // This is an index struct to access TPC clusters inside sectors and rows. It shall not own the data, but just point to

diff --git a/GPU/CMakeLists.txt b/GPU/CMakeLists.txt
@@ -18,8 +18,10 @@
 # HDRS_CINT_O2: Headers for ROOT dictionary (only for O2) HDRS_INSTALL: Headers
 # for installation only
 
+#set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE}} -fno-omit-frame-pointer") # to uncomment if needed, tired of typing this...
+
 if(NOT DEFINED GPUCA_NO_FAST_MATH)
-  set(GPUCA_NO_FAST_MATH 0)
+  set(GPUCA_NO_FAST_MATH 1)
 endif()
 if(${GPUCA_NO_FAST_MATH})
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-fast-math -ffp-contract=off")

diff --git a/GPU/GPUTracking/Base/GPUConstantMem.h b/GPU/GPUTracking/Base/GPUConstantMem.h
@@ -46,6 +46,7 @@ class GPUTRDTracker_t
 #if defined(GPUCA_NOCOMPAT_ALLCINT) && (!defined(GPUCA_GPULIBRARY) || !defined(GPUCA_ALIROOT_LIB)) && defined(GPUCA_HAVE_O2HEADERS)
 #include "GPUTPCConvert.h"
 #include "GPUTPCCompression.h"
+#include "GPUTPCDecompression.h"
 #include "GPUITSFitter.h"
 #include "GPUTPCClusterFinder.h"
 #include "GPUTrackingRefit.h"
@@ -69,6 +70,7 @@ struct GPUConstantMem {
   tpcTrackers[GPUCA_NSLICES];
   GPUTPCConvert tpcConverter;
   GPUTPCCompression tpcCompressor;
+  GPUTPCDecompression tpcDecompressor;
   GPUTPCGMMerger tpcMerger;
   GPUTRDTrackerGPU trdTrackerGPU;
 #ifdef GPUCA_HAVE_O2HEADERS

diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h
@@ -511,7 +511,7 @@ inline void GPUReconstruction::RegisterGPUProcessor(T* proc, bool deviceSlave)
 template <class T>
 inline void GPUReconstruction::SetupGPUProcessor(T* proc, bool allocate)
 {
-  static_assert(sizeof(T) > sizeof(GPUProcessor), "Need to setup derrived class");
+  static_assert(sizeof(T) > sizeof(GPUProcessor), "Need to setup derived class");
   if (allocate) {
     proc->SetMaxData(mHostConstantMem->ioPtrs);
   }

diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h
@@ -39,6 +39,7 @@
 #include "GPUITSFitterKernels.h"
 #include "GPUTPCConvertKernel.h"
 #include "GPUTPCCompressionKernels.h"
+#include "GPUTPCDecompressionKernels.h"
 #include "GPUTPCClusterFinderKernels.h"
 #include "GPUTrackingRefitKernel.h"
 #include "GPUTPCGMO2Output.h"

diff --git a/GPU/GPUTracking/Base/GPUReconstructionIncludesDevice.h b/GPU/GPUTracking/Base/GPUReconstructionIncludesDevice.h
@@ -75,6 +75,9 @@ using namespace GPUCA_NAMESPACE::gpu;
 #include "GPUTPCCompressionKernels.cxx"
 #include "GPUTPCCompressionTrackModel.cxx"
 
+// Files for TPC Decompression
+#include "GPUTPCDecompressionKernels.cxx"
+
 // Files for TPC Cluster Finder
 #include "ClusterAccumulator.cxx"
 #include "GPUTPCCFStreamCompaction.cxx"

diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt
@@ -11,7 +11,7 @@
 
 set(MODULE GPUTracking)
 
-# set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE}} -O0") # to uncomment if needed, tired of typing this...
+# set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE}} -fno-omit-frame-pointer") # to uncomment if needed, tired of typing this...
 
 include(cmake/helpers.cmake)
 
@@ -176,6 +176,8 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2" OR CONFIG_O2_EXTENSIONS)
         DataCompression/GPUTPCCompression.cxx
         DataCompression/GPUTPCCompressionTrackModel.cxx
         DataCompression/GPUTPCCompressionKernels.cxx
+        DataCompression/GPUTPCDecompression.cxx
+        DataCompression/GPUTPCDecompressionKernels.cxx
         DataCompression/TPCClusterDecompressor.cxx
         DataCompression/GPUTPCClusterStatistics.cxx
         TPCClusterFinder/GPUTPCClusterFinder.cxx
@@ -469,6 +471,7 @@ endif()
 set_source_files_properties(DataCompression/GPUTPCCompressionTrackModel.cxx
                             DataCompression/GPUTPCCompressionKernels.cxx
                             DataCompression/TPCClusterDecompressor.cxx
+                            DataCompression/GPUTPCDecompressionKernels.cxx
                             TARGET_DIRECTORY ${targetName}
                             PROPERTIES COMPILE_FLAGS "-fno-fast-math -ffp-contract=off")
 

diff --git a/GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx b/GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx
@@ -0,0 +1,99 @@
+// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
+// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
+// All rights not expressly granted are reserved.
+//
+// This software is distributed under the terms of the GNU General Public
+// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
+//
+// In applying this license CERN does not waive the privileges and immunities
+// granted to it by virtue of its status as an Intergovernmental Organization
+// or submit itself to any jurisdiction.
+
+/// \file GPUTPCDecompression.cxx
+/// \author Gabriele Cimador
+
+#include "GPUTPCDecompression.h"
+#include "GPUTPCCompression.h"
+#include "GPUReconstruction.h"
+#include "GPUO2DataTypes.h"
+#include "GPUMemorySizeScalers.h"
+#include "GPULogging.h"
+
+using namespace GPUCA_NAMESPACE::gpu;
+
+void GPUTPCDecompression::InitializeProcessor() {}
+
+void* GPUTPCDecompression::SetPointersInputGPU(void* mem)
+{
+  SetPointersCompressedClusters(mem, mInputGPU, mInputGPU.nAttachedClusters, mInputGPU.nTracks, mInputGPU.nUnattachedClusters, true);
+  return mem;
+}
+
+template <class T>
+void GPUTPCDecompression::SetPointersCompressedClusters(void*& mem, T& c, unsigned int nClA, unsigned int nTr, unsigned int nClU, bool reducedClA)
+{
+  computePointerWithAlignment(mem, c.qTotU, nClU); // Do not reorder, qTotU ist used as first address in GPUChainTracking::RunTPCCompression
+  computePointerWithAlignment(mem, c.qMaxU, nClU);
+  computePointerWithAlignment(mem, c.flagsU, nClU);
+  computePointerWithAlignment(mem, c.padDiffU, nClU);
+  computePointerWithAlignment(mem, c.timeDiffU, nClU);
+  computePointerWithAlignment(mem, c.sigmaPadU, nClU);
+  computePointerWithAlignment(mem, c.sigmaTimeU, nClU);
+  computePointerWithAlignment(mem, c.nSliceRowClusters, GPUCA_ROW_COUNT * NSLICES);
+
+  unsigned int nClAreduced = reducedClA ? nClA - nTr : nClA;
+
+  if (!(mRec->GetParam().rec.tpc.compressionTypeMask & GPUSettings::CompressionTrackModel)) {
+    return; // Track model disabled, do not allocate memory
+  }
+  computePointerWithAlignment(mem, c.qTotA, nClA);
+  computePointerWithAlignment(mem, c.qMaxA, nClA);
+  computePointerWithAlignment(mem, c.flagsA, nClA);
+  computePointerWithAlignment(mem, c.rowDiffA, nClAreduced);
+  computePointerWithAlignment(mem, c.sliceLegDiffA, nClAreduced);
+  computePointerWithAlignment(mem, c.padResA, nClAreduced);
+  computePointerWithAlignment(mem, c.timeResA, nClAreduced);
+  computePointerWithAlignment(mem, c.sigmaPadA, nClA);
+  computePointerWithAlignment(mem, c.sigmaTimeA, nClA);
+
+  computePointerWithAlignment(mem, c.qPtA, nTr);
+  computePointerWithAlignment(mem, c.rowA, nTr);
+  computePointerWithAlignment(mem, c.sliceA, nTr);
+  computePointerWithAlignment(mem, c.timeA, nTr);
+  computePointerWithAlignment(mem, c.padA, nTr);
+
+  computePointerWithAlignment(mem, c.nTrackClusters, nTr);
+}
+
+void* GPUTPCDecompression::SetPointersTmpNativeBuffersGPU(void* mem)
+{
+  computePointerWithAlignment(mem, mTmpNativeClusters, NSLICES * GPUCA_ROW_COUNT * mMaxNativeClustersPerBuffer);
+  return mem;
+}
+
+void* GPUTPCDecompression::SetPointersTmpNativeBuffersOutput(void* mem)
+{
+  computePointerWithAlignment(mem, mNativeClustersIndex, NSLICES * GPUCA_ROW_COUNT);
+  return mem;
+}
+
+void* GPUTPCDecompression::SetPointersTmpNativeBuffersInput(void* mem)
+{
+  computePointerWithAlignment(mem, mUnattachedClustersOffsets, NSLICES * GPUCA_ROW_COUNT);
+  return mem;
+}
+
+void GPUTPCDecompression::RegisterMemoryAllocation()
+{
+  AllocateAndInitializeLate();
+  mMemoryResInputGPU = mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersInputGPU, GPUMemoryResource::MEMORY_INPUT_FLAG | GPUMemoryResource::MEMORY_GPU | GPUMemoryResource::MEMORY_CUSTOM, "TPCDecompressionInput");
+  mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersTmpNativeBuffersGPU, GPUMemoryResource::MEMORY_GPU, "TPCDecompressionTmpBuffersGPU");
+  mResourceTmpIndexes = mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersTmpNativeBuffersOutput, GPUMemoryResource::MEMORY_OUTPUT, "TPCDecompressionTmpBuffersOutput");
+  mResourceTmpClustersOffsets = mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersTmpNativeBuffersInput, GPUMemoryResource::MEMORY_INPUT, "TPCDecompressionTmpBuffersInput");
+}
+
+void GPUTPCDecompression::SetMaxData(const GPUTrackingInOutPointers& io)
+{
+  // mMaxNativeClustersPerBuffer = 81760;
+  mMaxNativeClustersPerBuffer = 12000;
+}
diff --git a/GPU/GPUTracking/DataCompression/GPUTPCDecompression.h b/GPU/GPUTracking/DataCompression/GPUTPCDecompression.h
@@ -0,0 +1,78 @@
+// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
+// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
+// All rights not expressly granted are reserved.
+//
+// This software is distributed under the terms of the GNU General Public
+// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
+//
+// In applying this license CERN does not waive the privileges and immunities
+// granted to it by virtue of its status as an Intergovernmental Organization
+// or submit itself to any jurisdiction.
+
+/// \file GPUTPCDecompression.h
+/// \author Gabriele Cimador
+
+#ifndef GPUTPCDECOMPRESSION_H
+#define GPUTPCDECOMPRESSION_H
+
+#include "GPUDef.h"
+#include "GPUProcessor.h"
+#include "GPUCommonMath.h"
+#include "GPUParam.h"
+#include "GPUO2DataTypes.h"
+
+#ifdef GPUCA_HAVE_O2HEADERS
+#include "DataFormatsTPC/CompressedClusters.h"
+#else
+namespace o2::tpc
+{
+struct CompressedClustersPtrs {
+};
+struct CompressedClusters {
+};
+struct CompressedClustersFlat {
+};
+} // namespace o2::tpc
+#endif
+
+namespace GPUCA_NAMESPACE::gpu
+{
+
+class GPUTPCDecompression : public GPUProcessor
+{
+  friend class GPUTPCDecompressionKernels;
+  friend class GPUChainTracking;
+
+ public:
+#ifndef GPUCA_GPUCODE
+  void InitializeProcessor();
+  void RegisterMemoryAllocation();
+  void SetMaxData(const GPUTrackingInOutPointers& io);
+
+  void* SetPointersInputGPU(void* mem);
+  void* SetPointersTmpNativeBuffersGPU(void* mem);
+  void* SetPointersTmpNativeBuffersOutput(void* mem);
+  void* SetPointersTmpNativeBuffersInput(void* mem);
+
+#endif
+
+ protected:
+  constexpr static unsigned int NSLICES = GPUCA_NSLICES;
+  o2::tpc::CompressedClusters mInputGPU;
+
+  unsigned int mMaxNativeClustersPerBuffer;
+  unsigned int* mNativeClustersIndex;
+  unsigned int* mUnattachedClustersOffsets;
+  o2::tpc::ClusterNative* mTmpNativeClusters;
+  o2::tpc::ClusterNativeAccess* mClusterNativeAccess;
+  o2::tpc::ClusterNative* mNativeClustersBuffer;
+
+  template <class T>
+  void SetPointersCompressedClusters(void*& mem, T& c, unsigned int nClA, unsigned int nTr, unsigned int nClU, bool reducedClA);
+
+  short mMemoryResInputGPU = -1;
+  short mResourceTmpIndexes = -1;
+  short mResourceTmpClustersOffsets = -1;
+};
+} // namespace GPUCA_NAMESPACE::gpu
+#endif // GPUTPCDECOMPRESSION_H