Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

## Update LOG

- [2025.04] 解耦`Register`和`Track`过程; 令算法输出mesh下的位姿,提供mesh_loader相关的接口和方法供外部拓展。
- [2025.03] 渲染过程与原Python工程对齐,支持无texture纹理输入渲染. [对应PR](https://github.com/zz990099/foundationpose_cpp/pull/13).
- [2025.03] 添加对Jetson Orin平台支持,[一键配置docker环境](docs/build_enviroment_on_jetson.md)

Expand Down
3 changes: 1 addition & 2 deletions detection_6d_foundationpose/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,10 @@ set(source_file src/nvdiffrast/common/cudaraster/impl/Buffer.cpp
src/foundationpose_render.cu
src/foundationpose_sampling.cpp
src/foundationpose_sampling.cu
src/foundationpose_utils.cpp
src/foundationpose_utils.cu
src/foundationpose_decoder.cpp
src/foundationpose_decoder.cu
src/foundationpose.cpp
src/mesh_loader/assimp_mesh_loader.cpp
)

include_directories(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,134 +1,103 @@
#ifndef __FOUNDATIONPOSE_H
#define __FOUNDATIONPOSE_H
#pragma once

#include <opencv2/core.hpp>
#include <Eigen/Dense>

#include "detection_6d_foundationpose/mesh_loader.hpp"
#include "deploy_core/base_infer_core.h"

namespace detection_6d {

/**
* @brief Abstract base class for 6-DoF object detection models
*
* @note Implementations should handle both object registration and tracking
*/
class Base6DofDetectionModel {
public:
/**
* @brief 实现的接口应支持动态输入尺寸,并检查rgb/depth/mask尺寸是否一致.
* @brief Register object instance with initial pose estimation
*
* @note
* 相机内参与原始图像的尺寸是绑定的,不可在外部直接对图像进行resize操作,若需要进行resize,应对内参intrinsic同样处理
* @note Implementation must:
* - Verify consistent RGB/depth/mask dimensions
* - Maintain binding between camera intrinsics and original image size
* (resizing requires corresponding intrinsic adjustment)
*
* @param rgb rgb图像,必须是`rgb`格式,从opencv-imread读取的图像默认是bgr格式,需经过转换
* @param depth 获取的深度图像,cv::Mat数据格式为CV_32F1
* @param mask 目标物的mask图像,cv::Mat数据格式为CV_8UC1,positive的像素值大于0即可
* @param target_name 目标物的名称,与构建时提供的name->mesh_path映射一致
* @param out_pose 输出位姿
* @return true
* @return false
* @param rgb Input RGB image (must be in RGB format, convert if using opencv-imread's default
* BGR)
* @param depth Depth image (CV_32FC1 format)
* @param mask Object mask (CV_8UC1 format, positive pixels > 0)
* @param target_name Object category name (must match construction mapping)
* @param out_pose_in_mesh Output pose in mesh coordinate frame
* @return true Registration successful
* @return false Registration failed
*/
virtual bool Register(const cv::Mat &rgb,
const cv::Mat &depth,
const cv::Mat &mask,
const std::string &target_name,
Eigen::Matrix4f &out_pose) = 0;
Eigen::Matrix4f &out_pose_in_mesh) = 0;

/**
* @brief
* 从第二帧开始的跟踪过程,是`Register`的轻量化版本,精度稍低但推理效率非常高,调用前必须先调用`Register`
* @brief Track object pose from subsequent frames (lightweight version of Register)
*
* @note Requires prior successful Register call
* - Lower accuracy but higher efficiency than Register
* - Accepts pose hypotheses from external sources
*
* @param rgb rgb图像,必须是`rgb`格式,从opencv-imread读取的图像默认是bgr格式,需经过转换
* @param depth 获取的深度图像,cv::Mat数据格式为CV_32F1
* @param target_name 目标物的名称,与构建时提供的name->mesh_path映射一致
* @param out_pose 输出位姿
* @return true
* @return false
* @param rgb Input RGB image (must be in RGB format)
* @param depth Depth image (CV_32FC1 format)
* @param hyp_pose_in_mesh Hypothesis pose in mesh frame (from Register or other sources)
* @param target_name Object category name (must match construction mapping)
* @param out_pose_in_mesh Output pose in mesh coordinate frame
* @return true Tracking successful
* @return false Tracking failed
*/
virtual bool Track(const cv::Mat &rgb,
const cv::Mat &depth,
const std::string &target_name,
Eigen::Matrix4f &out_pose) = 0;
virtual bool Track(const cv::Mat &rgb,
const cv::Mat &depth,
const Eigen::Matrix4f &hyp_pose_in_mesh,
const std::string &target_name,
Eigen::Matrix4f &out_pose_in_mesh) = 0;

/**
* @brief 获取某个输入mesh目标的三维尺寸(辅助功能,用户也可自己计算)
*
* @return Eigen::Vector3f
* @brief Virtual destructor for proper resource cleanup
*/
virtual Eigen::Vector3f GetObjectDimension(const std::string &target_name) const
{
throw std::runtime_error("[Base6DofDetectionModel] GetOjbectDimension NOT Implemented yet!!!");
};

virtual ~Base6DofDetectionModel() = default;

protected:
/**
* @brief Construct a new base 6-DoF detection model
* @note Protected constructor for abstract base class
*/
Base6DofDetectionModel() = default;
};

/**
* @brief 创建一个`FoundationPose`实例
* @brief Factory function for creating FoundationPose model instances
*
* @param refiner_core refiner推理核心
* @param scorer_core scorer推理核心
* @param mesh_file_path 使用的三维模型`mesh_file`路径
* @param texture_file_path 使用的三维模型外观特征图像路径
* @param intrinsic_in_vec 相机内参矩阵,std::vector<float>形式,`row_major`格式
* @return std::shared_ptr<Base6DofDetectionModel>
*/
std::shared_ptr<Base6DofDetectionModel> CreateFoundationPoseModel(
std::shared_ptr<inference_core::BaseInferCore> refiner_core,
std::shared_ptr<inference_core::BaseInferCore> scorer_core,
const std::string &target_name,
const std::string &mesh_file_path,
const std::string &texture_file_path,
const std::vector<float> &intrinsic_in_vec);

/**
* @brief 创建一个`FoundationPose`实例
* @param refiner_core Refinement inference core handle
* @param scorer_core Scoring inference core handle
* @param mesh_loaders Registered 3D mesh loaders
* @param intrinsic_in_mat Camera intrinsic matrix (3x3, Eigen::Matrix3f)
* @param max_input_image_height Maximum allowed input image height (pixels)
* @param max_input_image_width Maximum allowed input image width (pixels)
* @return std::shared_ptr<Base6DofDetectionModel> Initialized model instance
*
* @param refiner_core refiner推理核心
* @param scorer_core scorer推理核心
* @param mesh_file_path 使用的三维模型`mesh_file`路径
* @param texture_file_path 使用的三维模型外观特征图像路径
* @param intrinsic_in_mat 相机内参矩阵,Eigen::Matrix3f格式
* @return std::shared_ptr<Base6DofDetectionModel>
*/
std::shared_ptr<Base6DofDetectionModel> CreateFoundationPoseModel(
std::shared_ptr<inference_core::BaseInferCore> refiner_core,
std::shared_ptr<inference_core::BaseInferCore> scorer_core,
const std::string &target_name,
const std::string &mesh_file_path,
const std::string &texture_file_path,
const Eigen::Matrix3f &intrinsic_in_mat);

/**
* @brief 创建一个`FoundationPose`实例
* @note Creates an implementation of the 6-DoF detection pipeline using:
* - Pose refinement neural network
* - Pose scoring network
* - Pre-registered 3D mesh models
* - Camera calibration parameters
* - Input size constraints for memory optimization
*
* @param refiner_core refiner推理核心
* @param scorer_core scorer推理核心
* @param meshes 多个目标的mesh/texture路径map: [name] -> [mesh_file_path, texture_file_path],
* 键值[name]在后续检测过程中用于辨别特定种类目标,**保持一致**
* @param intrinsic_in_vec 相机内参矩阵,std::vector<float>形式,`row_major`格式
* @return std::shared_ptr<Base6DofDetectionModel>
* @warning Input images exceeding max dimensions will cause failure in processing.
*/
std::shared_ptr<Base6DofDetectionModel> CreateFoundationPoseModel(
std::shared_ptr<inference_core::BaseInferCore> refiner_core,
std::shared_ptr<inference_core::BaseInferCore> scorer_core,
const std::unordered_map<std::string, std::pair<std::string, std::string>> &meshes,
const std::vector<float> &intrinsic_in_vec);

/**
* @brief 创建一个`FoundationPose`实例
*
* @param refiner_core refiner推理核心
* @param scorer_core scorer推理核心
* @param meshes 多个目标的mesh/texture路径map: [name] -> [mesh_file_path, texture_file_path],
* 键值[name]在后续检测过程中用于辨别特定种类目标,**保持一致**
* @param intrinsic_in_mat 相机内参矩阵,Eigen::Matrix3f格式
* @return std::shared_ptr<Base6DofDetectionModel>
*/
std::shared_ptr<Base6DofDetectionModel> CreateFoundationPoseModel(
std::shared_ptr<inference_core::BaseInferCore> refiner_core,
std::shared_ptr<inference_core::BaseInferCore> scorer_core,
const std::unordered_map<std::string, std::pair<std::string, std::string>> &meshes,
const Eigen::Matrix3f &intrinsic_in_mat);
std::shared_ptr<inference_core::BaseInferCore> refiner_core,
std::shared_ptr<inference_core::BaseInferCore> scorer_core,
const std::vector<std::shared_ptr<BaseMeshLoader>> &mesh_loaders,
const Eigen::Matrix3f &intrinsic_in_mat,
const int max_input_image_height = 1080,
const int max_input_image_width = 1920);

} // namespace detection_6d

#endif
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#pragma once

#include <vector>
#include <Eigen/Dense>
#include <opencv2/core.hpp>

namespace detection_6d {

/**
* @brief Abstract base class for mesh loading and data access interfaces
*
* @note Implementations should handle different mesh formats while maintaining
* consistent vertex/face data organization
*/
class BaseMeshLoader {
public:
virtual ~BaseMeshLoader() = default;

using Vector3ui = Eigen::Matrix<uint32_t, 3, 1>;

/**
* @brief Get identifier name for the loaded mesh
* @return Mesh name string
*/
virtual std::string GetName() const noexcept = 0;

/** @brief Get diameter of the mesh's bounding sphere */
virtual float GetMeshDiameter() const noexcept = 0;

/** @brief Get number of vertices in the mesh */
virtual size_t GetMeshNumVertices() const noexcept = 0;

/** @brief Get number of triangular faces in the mesh */
virtual size_t GetMeshNumFaces() const noexcept = 0;

/** @brief Access array of vertex positions (3D coordinates) */
virtual const std::vector<Eigen::Vector3f> &GetMeshVertices() const noexcept = 0;

/** @brief Access array of vertex normals */
virtual const std::vector<Eigen::Vector3f> &GetMeshVertexNormals() const noexcept = 0;

/** @brief Access array of texture coordinates (UV mapping) */
virtual const std::vector<Eigen::Vector3f> &GetMeshTextureCoords() const noexcept = 0;

/** @brief Access array of triangular face indices */
virtual const std::vector<Vector3ui> &GetMeshTriangleFaces() const noexcept = 0;

/** @brief Get centroid position of the mesh model */
virtual const Eigen::Vector3f &GetMeshModelCenter() const noexcept = 0;

/**
* @brief Get orientation bounds transformation matrix
* @return 4x4 matrix containing axis-aligned bounding box orientation
*/
virtual const Eigen::Matrix4f &GetOrientBounds() const noexcept = 0;

/** @brief Get dimensional measurements of the object (width/height/depth) */
virtual const Eigen::Vector3f &GetObjectDimension() const noexcept = 0;

/** @brief Access texture map image for the mesh */
virtual const cv::Mat &GetTextureMap() const noexcept = 0;
};

/**
* @brief Convert pose from mesh coordinate frame to bounding box frame
*
* @param pose_in_mesh Input pose in mesh coordinate system
* @param mesh_loader Mesh loader containing transformation parameters
* @return Transformed pose in bounding box coordinate system
*
* @note Transformation formula:
* T_bbox = T_mesh * T_center * T_orient
* Where T_center translates to mesh center, T_orient aligns with bounds
*/
inline Eigen::Matrix4f ConvertPoseMesh2BBox(const Eigen::Matrix4f &pose_in_mesh,
const std::shared_ptr<BaseMeshLoader> &mesh_loader)
{
Eigen::Matrix4f tf_to_center = Eigen::Matrix4f::Identity();
tf_to_center.block<3, 1>(0, 3) = -mesh_loader->GetMeshModelCenter();
return pose_in_mesh * tf_to_center * mesh_loader->GetOrientBounds();
}

/**
* @brief Factory function for creating Assimp-based mesh loader
*
* @param name Identifier for the mesh
* @param mesh_file_path Path to mesh file (supports .obj/.ply/.stl etc.)
* @return Shared pointer to initialized mesh loader instance
*
* @note Throws std::runtime_error if mesh loading fails
*/
std::shared_ptr<BaseMeshLoader> CreateAssimpMeshLoader(const std::string &name,
const std::string &mesh_file_path);

} // namespace detection_6d
Loading