Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
7d4340d
test
cima22 Nov 17, 2023
31e10d0
test2
cima22 Nov 17, 2023
ac8042f
Dummy Kernel Implementation
cima22 Nov 17, 2023
64c05c4
Working dummy kernel
cima22 Nov 19, 2023
5257993
Created GPUTPCDecompressor context and added to constant memory model
cima22 Nov 20, 2023
7405a42
GPUTPCDecompressor now able to move variables to GPU version
cima22 Nov 22, 2023
ceb00a6
Test memcopy from host to gpu working and fixed typo
cima22 Nov 23, 2023
ae2b6c4
Created inputBuffer on GPU for holding compressed clusters for decomp…
cima22 Dec 2, 2023
9fd661f
Correctly setted attributes on compressed clusters input for decompre…
cima22 Dec 4, 2023
12c85d5
successfully copied nSliceRowClusters from host to gpu through context
cima22 Dec 4, 2023
bc8b2d0
input completely copied from host to device (correctness of content h…
cima22 Dec 5, 2023
ac02a0b
addedo MEMORY::CUSTOM to GPUInput and single GPUMemCpy for whole input
cima22 Dec 5, 2023
47e7e34
refactoring and tested correctness of GPUMemCpy from host to device f…
cima22 Dec 5, 2023
a62865d
allocated output buffers for host and device
cima22 Dec 7, 2023
60a53c5
Decoding input from host to device made per internal buffer
cima22 Dec 11, 2023
636af96
Created tmp buffers to store native clusters per row and per slice
cima22 Dec 18, 2023
2d82384
Decoding of attached clusters on GPU
cima22 Jan 3, 2024
eee0fa5
removed debugging info in CPU decompressor
cima22 Jan 4, 2024
5b48d25
kernel for unattached clusters (not working)
cima22 Jan 5, 2024
7634c61
Added sorting algorithm
cima22 Jan 9, 2024
19cbc62
hard-coded tests -- need to remove after validaiton
cima22 Jan 25, 2024
2f773de
code cleaning and adapting to new kernel registration
cima22 Jan 29, 2024
d1d648a
Removed debugging messages and added no-fast-math option
cima22 Jan 30, 2024
f0249ef
removed unnecessary files
cima22 Jan 30, 2024
4892a14
Please consider the following formatting changes
alibuild Jan 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ project(O2 LANGUAGES C CXX VERSION 1.2.0)

include(CTest)

set(CUDA_COMPUTETARGET 75)
set(GPUCA_NO_FAST_MATH 1)

# Project wide setup

# Would better fit inside GPU/CMakeLists.txt, but include GPU/Common directly
Expand Down
11 changes: 11 additions & 0 deletions DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,17 @@ struct ClusterNative {
return (this->getFlags() < rhs.getFlags());
}
}

GPUd() bool operator==(const ClusterNative& rhs) const
{
return this->getTimePacked() == rhs.getTimePacked() &&
this->padPacked == rhs.padPacked &&
this->sigmaTimePacked == rhs.sigmaTimePacked &&
this->sigmaPadPacked == rhs.sigmaPadPacked &&
this->qMax == rhs.qMax &&
this->qTot == rhs.qTot &&
this->getFlags() == rhs.getFlags();
}
};

// This is an index struct to access TPC clusters inside sectors and rows. It shall not own the data, but just point to
Expand Down
4 changes: 3 additions & 1 deletion GPU/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
# HDRS_CINT_O2: Headers for ROOT dictionary (only for O2) HDRS_INSTALL: Headers
# for installation only

#set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE}} -fno-omit-frame-pointer") # to uncomment if needed, tired of typing this...

if(NOT DEFINED GPUCA_NO_FAST_MATH)
set(GPUCA_NO_FAST_MATH 0)
set(GPUCA_NO_FAST_MATH 1)
endif()
if(${GPUCA_NO_FAST_MATH})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-fast-math -ffp-contract=off")
Expand Down
2 changes: 2 additions & 0 deletions GPU/GPUTracking/Base/GPUConstantMem.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class GPUTRDTracker_t
#if defined(GPUCA_NOCOMPAT_ALLCINT) && (!defined(GPUCA_GPULIBRARY) || !defined(GPUCA_ALIROOT_LIB)) && defined(GPUCA_HAVE_O2HEADERS)
#include "GPUTPCConvert.h"
#include "GPUTPCCompression.h"
#include "GPUTPCDecompression.h"
#include "GPUITSFitter.h"
#include "GPUTPCClusterFinder.h"
#include "GPUTrackingRefit.h"
Expand All @@ -69,6 +70,7 @@ struct GPUConstantMem {
tpcTrackers[GPUCA_NSLICES];
GPUTPCConvert tpcConverter;
GPUTPCCompression tpcCompressor;
GPUTPCDecompression tpcDecompressor;
GPUTPCGMMerger tpcMerger;
GPUTRDTrackerGPU trdTrackerGPU;
#ifdef GPUCA_HAVE_O2HEADERS
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Base/GPUReconstruction.h
Original file line number Diff line number Diff line change
Expand Up @@ -511,7 +511,7 @@ inline void GPUReconstruction::RegisterGPUProcessor(T* proc, bool deviceSlave)
template <class T>
inline void GPUReconstruction::SetupGPUProcessor(T* proc, bool allocate)
{
static_assert(sizeof(T) > sizeof(GPUProcessor), "Need to setup derrived class");
static_assert(sizeof(T) > sizeof(GPUProcessor), "Need to setup derived class");
if (allocate) {
proc->SetMaxData(mHostConstantMem->ioPtrs);
}
Expand Down
1 change: 1 addition & 0 deletions GPU/GPUTracking/Base/GPUReconstructionCPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include "GPUITSFitterKernels.h"
#include "GPUTPCConvertKernel.h"
#include "GPUTPCCompressionKernels.h"
#include "GPUTPCDecompressionKernels.h"
#include "GPUTPCClusterFinderKernels.h"
#include "GPUTrackingRefitKernel.h"
#include "GPUTPCGMO2Output.h"
Expand Down
3 changes: 3 additions & 0 deletions GPU/GPUTracking/Base/GPUReconstructionIncludesDevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ using namespace GPUCA_NAMESPACE::gpu;
#include "GPUTPCCompressionKernels.cxx"
#include "GPUTPCCompressionTrackModel.cxx"

// Files for TPC Decompression
#include "GPUTPCDecompressionKernels.cxx"

// Files for TPC Cluster Finder
#include "ClusterAccumulator.cxx"
#include "GPUTPCCFStreamCompaction.cxx"
Expand Down
5 changes: 4 additions & 1 deletion GPU/GPUTracking/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

set(MODULE GPUTracking)

# set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE}} -O0") # to uncomment if needed, tired of typing this...
# set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE}} -fno-omit-frame-pointer") # to uncomment if needed, tired of typing this...

include(cmake/helpers.cmake)

Expand Down Expand Up @@ -176,6 +176,8 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2" OR CONFIG_O2_EXTENSIONS)
DataCompression/GPUTPCCompression.cxx
DataCompression/GPUTPCCompressionTrackModel.cxx
DataCompression/GPUTPCCompressionKernels.cxx
DataCompression/GPUTPCDecompression.cxx
DataCompression/GPUTPCDecompressionKernels.cxx
DataCompression/TPCClusterDecompressor.cxx
DataCompression/GPUTPCClusterStatistics.cxx
TPCClusterFinder/GPUTPCClusterFinder.cxx
Expand Down Expand Up @@ -469,6 +471,7 @@ endif()
set_source_files_properties(DataCompression/GPUTPCCompressionTrackModel.cxx
DataCompression/GPUTPCCompressionKernels.cxx
DataCompression/TPCClusterDecompressor.cxx
DataCompression/GPUTPCDecompressionKernels.cxx
TARGET_DIRECTORY ${targetName}
PROPERTIES COMPILE_FLAGS "-fno-fast-math -ffp-contract=off")

Expand Down
99 changes: 99 additions & 0 deletions GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
// All rights not expressly granted are reserved.
//
// This software is distributed under the terms of the GNU General Public
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
//
// In applying this license CERN does not waive the privileges and immunities
// granted to it by virtue of its status as an Intergovernmental Organization
// or submit itself to any jurisdiction.

/// \file GPUTPCDecompression.cxx
/// \author Gabriele Cimador

#include "GPUTPCDecompression.h"
#include "GPUTPCCompression.h"
#include "GPUReconstruction.h"
#include "GPUO2DataTypes.h"
#include "GPUMemorySizeScalers.h"
#include "GPULogging.h"

using namespace GPUCA_NAMESPACE::gpu;

void GPUTPCDecompression::InitializeProcessor() {}

void* GPUTPCDecompression::SetPointersInputGPU(void* mem)
{
SetPointersCompressedClusters(mem, mInputGPU, mInputGPU.nAttachedClusters, mInputGPU.nTracks, mInputGPU.nUnattachedClusters, true);
return mem;
}

template <class T>
void GPUTPCDecompression::SetPointersCompressedClusters(void*& mem, T& c, unsigned int nClA, unsigned int nTr, unsigned int nClU, bool reducedClA)
{
computePointerWithAlignment(mem, c.qTotU, nClU); // Do not reorder, qTotU ist used as first address in GPUChainTracking::RunTPCCompression
computePointerWithAlignment(mem, c.qMaxU, nClU);
computePointerWithAlignment(mem, c.flagsU, nClU);
computePointerWithAlignment(mem, c.padDiffU, nClU);
computePointerWithAlignment(mem, c.timeDiffU, nClU);
computePointerWithAlignment(mem, c.sigmaPadU, nClU);
computePointerWithAlignment(mem, c.sigmaTimeU, nClU);
computePointerWithAlignment(mem, c.nSliceRowClusters, GPUCA_ROW_COUNT * NSLICES);

unsigned int nClAreduced = reducedClA ? nClA - nTr : nClA;

if (!(mRec->GetParam().rec.tpc.compressionTypeMask & GPUSettings::CompressionTrackModel)) {
return; // Track model disabled, do not allocate memory
}
computePointerWithAlignment(mem, c.qTotA, nClA);
computePointerWithAlignment(mem, c.qMaxA, nClA);
computePointerWithAlignment(mem, c.flagsA, nClA);
computePointerWithAlignment(mem, c.rowDiffA, nClAreduced);
computePointerWithAlignment(mem, c.sliceLegDiffA, nClAreduced);
computePointerWithAlignment(mem, c.padResA, nClAreduced);
computePointerWithAlignment(mem, c.timeResA, nClAreduced);
computePointerWithAlignment(mem, c.sigmaPadA, nClA);
computePointerWithAlignment(mem, c.sigmaTimeA, nClA);

computePointerWithAlignment(mem, c.qPtA, nTr);
computePointerWithAlignment(mem, c.rowA, nTr);
computePointerWithAlignment(mem, c.sliceA, nTr);
computePointerWithAlignment(mem, c.timeA, nTr);
computePointerWithAlignment(mem, c.padA, nTr);

computePointerWithAlignment(mem, c.nTrackClusters, nTr);
}

void* GPUTPCDecompression::SetPointersTmpNativeBuffersGPU(void* mem)
{
computePointerWithAlignment(mem, mTmpNativeClusters, NSLICES * GPUCA_ROW_COUNT * mMaxNativeClustersPerBuffer);
return mem;
}

void* GPUTPCDecompression::SetPointersTmpNativeBuffersOutput(void* mem)
{
computePointerWithAlignment(mem, mNativeClustersIndex, NSLICES * GPUCA_ROW_COUNT);
return mem;
}

void* GPUTPCDecompression::SetPointersTmpNativeBuffersInput(void* mem)
{
computePointerWithAlignment(mem, mUnattachedClustersOffsets, NSLICES * GPUCA_ROW_COUNT);
return mem;
}

void GPUTPCDecompression::RegisterMemoryAllocation()
{
AllocateAndInitializeLate();
mMemoryResInputGPU = mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersInputGPU, GPUMemoryResource::MEMORY_INPUT_FLAG | GPUMemoryResource::MEMORY_GPU | GPUMemoryResource::MEMORY_CUSTOM, "TPCDecompressionInput");
mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersTmpNativeBuffersGPU, GPUMemoryResource::MEMORY_GPU, "TPCDecompressionTmpBuffersGPU");
mResourceTmpIndexes = mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersTmpNativeBuffersOutput, GPUMemoryResource::MEMORY_OUTPUT, "TPCDecompressionTmpBuffersOutput");
mResourceTmpClustersOffsets = mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersTmpNativeBuffersInput, GPUMemoryResource::MEMORY_INPUT, "TPCDecompressionTmpBuffersInput");
}

void GPUTPCDecompression::SetMaxData(const GPUTrackingInOutPointers& io)
{
// mMaxNativeClustersPerBuffer = 81760;
mMaxNativeClustersPerBuffer = 12000;
}
78 changes: 78 additions & 0 deletions GPU/GPUTracking/DataCompression/GPUTPCDecompression.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
// All rights not expressly granted are reserved.
//
// This software is distributed under the terms of the GNU General Public
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
//
// In applying this license CERN does not waive the privileges and immunities
// granted to it by virtue of its status as an Intergovernmental Organization
// or submit itself to any jurisdiction.

/// \file GPUTPCDecompression.h
/// \author Gabriele Cimador

#ifndef GPUTPCDECOMPRESSION_H
#define GPUTPCDECOMPRESSION_H

#include "GPUDef.h"
#include "GPUProcessor.h"
#include "GPUCommonMath.h"
#include "GPUParam.h"
#include "GPUO2DataTypes.h"

#ifdef GPUCA_HAVE_O2HEADERS
#include "DataFormatsTPC/CompressedClusters.h"
#else
namespace o2::tpc
{
struct CompressedClustersPtrs {
};
struct CompressedClusters {
};
struct CompressedClustersFlat {
};
} // namespace o2::tpc
#endif

namespace GPUCA_NAMESPACE::gpu
{

class GPUTPCDecompression : public GPUProcessor
{
friend class GPUTPCDecompressionKernels;
friend class GPUChainTracking;

public:
#ifndef GPUCA_GPUCODE
void InitializeProcessor();
void RegisterMemoryAllocation();
void SetMaxData(const GPUTrackingInOutPointers& io);

void* SetPointersInputGPU(void* mem);
void* SetPointersTmpNativeBuffersGPU(void* mem);
void* SetPointersTmpNativeBuffersOutput(void* mem);
void* SetPointersTmpNativeBuffersInput(void* mem);

#endif

protected:
constexpr static unsigned int NSLICES = GPUCA_NSLICES;
o2::tpc::CompressedClusters mInputGPU;

unsigned int mMaxNativeClustersPerBuffer;
unsigned int* mNativeClustersIndex;
unsigned int* mUnattachedClustersOffsets;
o2::tpc::ClusterNative* mTmpNativeClusters;
o2::tpc::ClusterNativeAccess* mClusterNativeAccess;
o2::tpc::ClusterNative* mNativeClustersBuffer;

template <class T>
void SetPointersCompressedClusters(void*& mem, T& c, unsigned int nClA, unsigned int nTr, unsigned int nClU, bool reducedClA);

short mMemoryResInputGPU = -1;
short mResourceTmpIndexes = -1;
short mResourceTmpClustersOffsets = -1;
};
} // namespace GPUCA_NAMESPACE::gpu
#endif // GPUTPCDECOMPRESSION_H
Loading