From bc36b08c62c5931e1dfc666581b3a8589278647f Mon Sep 17 00:00:00 2001 From: "yuan.xiong" Date: Fri, 28 Jul 2023 04:17:35 -0700 Subject: [PATCH] Fix the "numel needs to be smaller than int32_t max; otherwise, please use packed_accessor64" issue Signed-off-by: yuan.xiong --- cxx/mcubes_cuda.cu | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/cxx/mcubes_cuda.cu b/cxx/mcubes_cuda.cu index ba92ed9..6ff249f 100644 --- a/cxx/mcubes_cuda.cu +++ b/cxx/mcubes_cuda.cu @@ -331,13 +331,13 @@ __device__ float3 vertexInterp(float isolevel, float3 p1, float3 p2, float valp1 } __global__ void mcubes_cuda_kernel( - const torch::PackedTensorAccessor32 vol, - torch::PackedTensorAccessor32 vertices, - torch::PackedTensorAccessor32 ntris_in_cells, + const torch::PackedTensorAccessor64 vol, + torch::PackedTensorAccessor64 vertices, + torch::PackedTensorAccessor64 ntris_in_cells, int3 nGrids, float threshold, - const torch::PackedTensorAccessor32 edgeTable, - const torch::PackedTensorAccessor32 triTable) { + const torch::PackedTensorAccessor64 edgeTable, + const torch::PackedTensorAccessor64 triTable) { const int ix = blockIdx.x * blockDim.x + threadIdx.x; const int iy = blockIdx.y * blockDim.y + threadIdx.y; @@ -436,12 +436,12 @@ __global__ void mcubes_cuda_kernel( } __global__ void compaction( - const torch::PackedTensorAccessor32 vertBuf, - const torch::PackedTensorAccessor32 ntris, - const torch::PackedTensorAccessor32 offsets, + const torch::PackedTensorAccessor64 vertBuf, + const torch::PackedTensorAccessor64 ntris, + const torch::PackedTensorAccessor64 offsets, int3 nGrids, - torch::PackedTensorAccessor32 verts, - torch::PackedTensorAccessor32 faces) { + torch::PackedTensorAccessor64 verts, + torch::PackedTensorAccessor64 faces) { const int ix = blockIdx.x * blockDim.x + threadIdx.x; const int iy = blockIdx.y * blockDim.y + threadIdx.y; @@ -521,13 +521,13 @@ std::vector mcubes_cuda(torch::Tensor vol, float threshold) { // Kernel call cudaSetDevice(deviceId); mcubes_cuda_kernel<<>>( - vol.packed_accessor32(), - vert_buffer.packed_accessor32(), - ntris_in_cells.packed_accessor32(), + vol.packed_accessor64(), + vert_buffer.packed_accessor64(), + ntris_in_cells.packed_accessor64(), nGrids, threshold, - edgeTableTensorCuda.packed_accessor32(), - triTableTensorCuda.packed_accessor32() + edgeTableTensorCuda.packed_accessor64(), + triTableTensorCuda.packed_accessor64() ); cudaDeviceSynchronize(); @@ -549,12 +549,12 @@ std::vector mcubes_cuda(torch::Tensor vol, float threshold) { cudaSetDevice(deviceId); compaction<<>>( - vert_buffer.packed_accessor32(), - ntris_in_cells.packed_accessor32(), - offsets.packed_accessor32(), + vert_buffer.packed_accessor64(), + ntris_in_cells.packed_accessor64(), + offsets.packed_accessor64(), nGrids, - verts.packed_accessor32(), - faces.packed_accessor32() + verts.packed_accessor64(), + faces.packed_accessor64() ); cudaDeviceSynchronize();