@@ -270,6 +270,8 @@ FoundationPoseRenderer::PrepareBuffer()
270270 // nvdiffrast render 用到的缓存以及渲染器
271271 size_t pose_clip_size = num_vertices_ * (kVertexPoints + 1 ) * input_poses_num_ * sizeof (float );
272272 size_t pts_cam_size = num_vertices_ * kVertexPoints * input_poses_num_ * sizeof (float );
273+ size_t diffuse_intensity_size = num_vertices_ * input_poses_num_ * sizeof (float );
274+ size_t diffuse_intensity_map_size = input_poses_num_ * crop_window_H_ * crop_window_W_ * sizeof (float );
273275 size_t rast_out_size = input_poses_num_ * crop_window_H_ * crop_window_W_ * (kVertexPoints + 1 ) * sizeof (float );
274276 size_t color_size = input_poses_num_ * crop_window_H_ * crop_window_W_ * kNumChannels * sizeof (float );
275277 size_t xyz_map_size = input_poses_num_ * crop_window_H_ * crop_window_W_ * kNumChannels * sizeof (float );
@@ -279,6 +281,8 @@ FoundationPoseRenderer::PrepareBuffer()
279281 float * _pose_clip_device;
280282 float * _rast_out_device;
281283 float * _pts_cam_device;
284+ float * _diffuse_intensity_device;
285+ float * _diffuse_intensity_map_device;
282286 float * _texcoords_out_device;
283287 float * _color_device;
284288 float * _xyz_map_device;
@@ -307,6 +311,14 @@ FoundationPoseRenderer::PrepareBuffer()
307311 " [FoundationPoseRenderer] cudaMalloc `_pts_cam_device` FAILED!!!" );
308312 pts_cam_device_ = DeviceBufferUniquePtrType<float >(_pts_cam_device, CudaMemoryDeleter<float >());
309313
314+ CHECK_CUDA (cudaMalloc (&_diffuse_intensity_device, diffuse_intensity_size),
315+ " [FoundationPoseRenderer] cudaMalloc `_diffuse_intensity_device` FAILED!!!" );
316+ diffuse_intensity_device_ = DeviceBufferUniquePtrType<float >(_diffuse_intensity_device, CudaMemoryDeleter<float >());
317+
318+ CHECK_CUDA (cudaMalloc (&_diffuse_intensity_map_device, diffuse_intensity_map_size),
319+ " [FoundationPoseRenderer] cudaMalloc `_diffuse_intensity_map_device` FAILED!!!" );
320+ diffuse_intensity_map_device_ = DeviceBufferUniquePtrType<float >(_diffuse_intensity_map_device, CudaMemoryDeleter<float >());
321+
310322 CHECK_CUDA (cudaMalloc (&_texcoords_out_device, texcoords_out_size),
311323 " [FoundationPoseRenderer] cudaMalloc `_texcoords_out_device` FAILED!!!" );
312324 texcoords_out_device_ = DeviceBufferUniquePtrType<float >(_texcoords_out_device, CudaMemoryDeleter<float >());
@@ -361,18 +373,24 @@ FoundationPoseRenderer::LoadTexturedMesh()
361373{
362374 const auto & mesh_model_center = mesh_loader_->GetMeshModelCenter ();
363375 const auto & mesh_vertices = mesh_loader_->GetMeshVertices ();
376+ const auto & mesh_vertex_normals = mesh_loader_->GetMeshVertexNormals ();
364377 const auto & mesh_texcoords = mesh_loader_->GetMeshTextureCoords ();
365378 const auto & mesh_faces = mesh_loader_->GetMeshFaces ();
366379 const auto & rgb_texture_map = mesh_loader_->GetTextureMap ();
367380 mesh_diameter_ = mesh_loader_->GetMeshDiameter ();
368381
382+ std::vector<float > vertex_normals;
369383
370384 // Walk through each of the mesh's vertices
371385 for (unsigned int v = 0 ; v < mesh_vertices.size (); v++) {
372386 vertices_.push_back (mesh_vertices[v].x - mesh_model_center[0 ]);
373387 vertices_.push_back (mesh_vertices[v].y - mesh_model_center[1 ]);
374388 vertices_.push_back (mesh_vertices[v].z - mesh_model_center[2 ]);
375389
390+ vertex_normals.push_back (mesh_vertex_normals[v].x );
391+ vertex_normals.push_back (mesh_vertex_normals[v].y );
392+ vertex_normals.push_back (mesh_vertex_normals[v].z );
393+
376394 // Check if the mesh has texture coordinates
377395 if (mesh_texcoords.size () >= 1 ) {
378396 texcoords_.push_back (mesh_texcoords[0 ][v].x );
@@ -422,6 +440,7 @@ FoundationPoseRenderer::LoadTexturedMesh()
422440 size_t texcoords_size = texcoords_.size () * sizeof (float );
423441
424442 float * _vertices_device;
443+ float * _vertex_normals_device;
425444 float * _texcoords_device;
426445 int32_t * _mesh_faces_device;
427446 uint8_t * _texture_map_device;
@@ -430,6 +449,10 @@ FoundationPoseRenderer::LoadTexturedMesh()
430449 " [FoundationposeRender] cudaMalloc `mesh_faces_device` FAILED!!!" );
431450 vertices_device_ = DeviceBufferUniquePtrType<float >(_vertices_device, CudaMemoryDeleter<float >());
432451
452+ CHECK_CUDA (cudaMalloc (&_vertex_normals_device, vertices_size),
453+ " [FoundationposeRender] cudaMalloc `vertex_normals_device` FAILED!!!" );
454+ vertex_normals_device_ = DeviceBufferUniquePtrType<float >(_vertex_normals_device, CudaMemoryDeleter<float >());
455+
433456 CHECK_CUDA (cudaMalloc (&_mesh_faces_device, faces_size),
434457 " [FoundationposeRender] cudaMalloc `mesh_faces_device` FAILED!!!" );
435458 mesh_faces_device_ = DeviceBufferUniquePtrType<int32_t >(_mesh_faces_device, CudaMemoryDeleter<int32_t >());
@@ -442,9 +465,14 @@ FoundationPoseRenderer::LoadTexturedMesh()
442465 " [FoundationposeRender] cudaMalloc `texture_map_device_` FAILED!!!" );
443466 texture_map_device_ = DeviceBufferUniquePtrType<uint8_t >(_texture_map_device, CudaMemoryDeleter<uint8_t >());
444467
445- CHECK_CUDA (cudaMemcpy (vertices_device_.get (),
446- vertices_.data (),
447- vertices_size,
468+ CHECK_CUDA (cudaMemcpy (vertices_device_.get (),
469+ vertices_.data (),
470+ vertices_size,
471+ cudaMemcpyHostToDevice),
472+ " [FoundationposeRender] cudaMemcpy mesh_faces_host -> mesh_faces_device FAILED!!!" );
473+ CHECK_CUDA (cudaMemcpy (vertex_normals_device_.get (),
474+ vertex_normals.data (),
475+ vertices_size,
448476 cudaMemcpyHostToDevice),
449477 " [FoundationposeRender] cudaMemcpy mesh_faces_host -> mesh_faces_device FAILED!!!" );
450478 CHECK_CUDA (cudaMemcpy (mesh_faces_device_.get (),
@@ -514,6 +542,36 @@ bool FoundationPoseRenderer::TransformVerticesOnCUDA(cudaStream_t stream,
514542 return true ;
515543}
516544
545+ bool FoundationPoseRenderer::TransformVertexNormalsOnCUDA (cudaStream_t stream,
546+ const std::vector<Eigen::MatrixXf>& tfs,
547+ float * output_buffer)
548+ {
549+ // Get the dimensions of the inputs
550+ int tfs_size = tfs.size ();
551+ CHECK_STATE (tfs_size != 0 ,
552+ " [FoundationposeRender] The transfomation matrix is empty! " );
553+
554+ CHECK_STATE (tfs[0 ].cols () == tfs[0 ].rows (),
555+ " [FoundationposeRender] The transfomation matrix has different rows and cols! " );
556+
557+ const int total_elements = tfs[0 ].cols () * tfs[0 ].rows ();
558+
559+ float * transform_device_buffer_ = nullptr ;
560+ cudaMallocAsync (&transform_device_buffer_, tfs_size * total_elements * sizeof (float ), stream);
561+
562+ for (int i = 0 ; i < tfs_size ; ++ i) {
563+ cudaMemcpyAsync (transform_device_buffer_ + i * total_elements,
564+ tfs[i].data (),
565+ total_elements * sizeof (float ),
566+ cudaMemcpyHostToDevice,
567+ stream);
568+ }
569+
570+ foundationpose_render::transform_normals (stream, transform_device_buffer_, tfs_size, vertex_normals_device_.get (), num_vertices_, output_buffer);
571+
572+ cudaFreeAsync (transform_device_buffer_, stream);
573+ return true ;
574+ }
517575
518576bool FoundationPoseRenderer::GeneratePoseClipOnCUDA (cudaStream_t stream,
519577 float * output_buffer,
@@ -595,15 +653,15 @@ FoundationPoseRenderer::NvdiffrastRender(cudaStream_t cuda_stream,
595653 foundationpose_render::interpolate (
596654 cuda_stream,
597655 pts_cam_device_.get (), rast_out_device_.get (), mesh_faces_device_.get (), xyz_map_device_.get (),
598- num_vertices_, num_faces_, kVertexPoints ,
656+ num_vertices_, num_faces_, 3 , kVertexPoints ,
599657 H, W, N);
600658 CHECK_CUDA (cudaGetLastError (),
601659 " [FoundationPoseRenderer] interpolate failed!!!" );
602660
603661 foundationpose_render::interpolate (
604662 cuda_stream,
605663 texcoords_device_.get (), rast_out_device_.get (), mesh_faces_device_.get (), texcoords_out_device_.get (),
606- num_vertices_, num_faces_, kTexcoordsDim ,
664+ num_vertices_, num_faces_, 2 , kTexcoordsDim ,
607665 H, W, N);
608666 CHECK_CUDA (cudaGetLastError (),
609667 " [FoundationPoseRenderer] interpolate failed!!!" );
@@ -619,6 +677,26 @@ FoundationPoseRenderer::NvdiffrastRender(cudaStream_t cuda_stream,
619677 CHECK_CUDA (cudaGetLastError (),
620678 " [FoundationPoseRenderer] texture failed!!!" );
621679
680+ CHECK_STATE (TransformVertexNormalsOnCUDA (cuda_stream, poses, diffuse_intensity_device_.get ()),
681+ " [FoundationPoseRenderer] Transform vertex normals failed!!!" );
682+
683+ foundationpose_render::interpolate (cuda_stream,
684+ diffuse_intensity_device_.get (),
685+ rast_out_device_.get (),
686+ mesh_faces_device_.get (),
687+ diffuse_intensity_map_device_.get (),
688+ num_vertices_, num_faces_, 3 , 1 , H, W, N);
689+ CHECK_CUDA (cudaGetLastError (),
690+ " [FoundationPoseRenderer] interpolate failed!!!" );
691+
692+ foundationpose_render::refine_color (cuda_stream, color_device_.get (),
693+ diffuse_intensity_map_device_.get (),
694+ rast_out_device_.get (),
695+ color_device_.get (),
696+ poses.size (), 0.8 , 0.5 , H, W);
697+ CHECK_CUDA (cudaGetLastError (),
698+ " [FoundationPoseRenderer] refine_color failed!!!" );
699+
622700 float min_value = 0.0 ;
623701 float max_value = 1.0 ;
624702 foundationpose_render::clamp (cuda_stream, color_device_.get (), min_value, max_value, N * H * W * kNumChannels );
0 commit comments