zz990099 · zz990099 · Apr 8, 2025 · Apr 7, 2025 · Apr 7, 2025
diff --git a/README.md b/README.md
@@ -7,6 +7,7 @@
 
 ## Update LOG
 
+- [2025.04] 解耦`Register`和`Track`过程; 令算法输出mesh下的位姿，提供mesh_loader相关的接口和方法供外部拓展。
 - [2025.03] 渲染过程与原Python工程对齐，支持无texture纹理输入渲染. [对应PR](https://github.com/zz990099/foundationpose_cpp/pull/13).
 - [2025.03] 添加对Jetson Orin平台支持，[一键配置docker环境](docs/build_enviroment_on_jetson.md)
 

diff --git a/detection_6d_foundationpose/CMakeLists.txt b/detection_6d_foundationpose/CMakeLists.txt
@@ -39,11 +39,10 @@ set(source_file src/nvdiffrast/common/cudaraster/impl/Buffer.cpp
                 src/foundationpose_render.cu
                 src/foundationpose_sampling.cpp
                 src/foundationpose_sampling.cu
-                src/foundationpose_utils.cpp
                 src/foundationpose_utils.cu
-                src/foundationpose_decoder.cpp
                 src/foundationpose_decoder.cu
                 src/foundationpose.cpp
+                src/mesh_loader/assimp_mesh_loader.cpp
 )
 
 include_directories(

diff --git a/detection_6d_foundationpose/include/detection_6d_foundationpose/foundationpose.hpp b/detection_6d_foundationpose/include/detection_6d_foundationpose/foundationpose.hpp
@@ -1,134 +1,103 @@
-#ifndef __FOUNDATIONPOSE_H
-#define __FOUNDATIONPOSE_H
+#pragma once
 
 #include <opencv2/core.hpp>
 #include <Eigen/Dense>
 
+#include "detection_6d_foundationpose/mesh_loader.hpp"
 #include "deploy_core/base_infer_core.h"
 
 namespace detection_6d {
 
+/**
+ * @brief Abstract base class for 6-DoF object detection models
+ *
+ * @note Implementations should handle both object registration and tracking
+ */
 class Base6DofDetectionModel {
 public:
   /**
-   * @brief 实现的接口应支持动态输入尺寸，并检查rgb/depth/mask尺寸是否一致.
+   * @brief Register object instance with initial pose estimation
    *
-   * @note
-   * 相机内参与原始图像的尺寸是绑定的，不可在外部直接对图像进行resize操作，若需要进行resize，应对内参intrinsic同样处理
+   * @note Implementation must:
+   *       - Verify consistent RGB/depth/mask dimensions
+   *       - Maintain binding between camera intrinsics and original image size
+   *         (resizing requires corresponding intrinsic adjustment)
    *
-   * @param rgb rgb图像，必须是`rgb`格式，从opencv-imread读取的图像默认是bgr格式，需经过转换
-   * @param depth 获取的深度图像，cv::Mat数据格式为CV_32F1
-   * @param mask 目标物的mask图像，cv::Mat数据格式为CV_8UC1，positive的像素值大于0即可
-   * @param target_name 目标物的名称，与构建时提供的name->mesh_path映射一致
-   * @param out_pose 输出位姿
-   * @return true
-   * @return false
+   * @param rgb Input RGB image (must be in RGB format, convert if using opencv-imread's default
+   * BGR)
+   * @param depth Depth image (CV_32FC1 format)
+   * @param mask Object mask (CV_8UC1 format, positive pixels > 0)
+   * @param target_name Object category name (must match construction mapping)
+   * @param out_pose_in_mesh Output pose in mesh coordinate frame
+   * @return true Registration successful
+   * @return false Registration failed
    */
   virtual bool Register(const cv::Mat     &rgb,
                         const cv::Mat     &depth,
                         const cv::Mat     &mask,
                         const std::string &target_name,
-                        Eigen::Matrix4f   &out_pose) = 0;
+                        Eigen::Matrix4f   &out_pose_in_mesh) = 0;
+
   /**
-   * @brief
-   * 从第二帧开始的跟踪过程，是`Register`的轻量化版本，精度稍低但推理效率非常高，调用前必须先调用`Register`
+   * @brief Track object pose from subsequent frames (lightweight version of Register)
+   *
+   * @note Requires prior successful Register call
+   *       - Lower accuracy but higher efficiency than Register
+   *       - Accepts pose hypotheses from external sources
    *
-   * @param rgb rgb图像，必须是`rgb`格式，从opencv-imread读取的图像默认是bgr格式，需经过转换
-   * @param depth 获取的深度图像，cv::Mat数据格式为CV_32F1
-   * @param target_name 目标物的名称，与构建时提供的name->mesh_path映射一致
-   * @param out_pose 输出位姿
-   * @return true
-   * @return false
+   * @param rgb Input RGB image (must be in RGB format)
+   * @param depth Depth image (CV_32FC1 format)
+   * @param hyp_pose_in_mesh Hypothesis pose in mesh frame (from Register or other sources)
+   * @param target_name Object category name (must match construction mapping)
+   * @param out_pose_in_mesh Output pose in mesh coordinate frame
+   * @return true Tracking successful
+   * @return false Tracking failed
    */
-  virtual bool Track(const cv::Mat     &rgb,
-                     const cv::Mat     &depth,
-                     const std::string &target_name,
-                     Eigen::Matrix4f   &out_pose) = 0;
+  virtual bool Track(const cv::Mat         &rgb,
+                     const cv::Mat         &depth,
+                     const Eigen::Matrix4f &hyp_pose_in_mesh,
+                     const std::string     &target_name,
+                     Eigen::Matrix4f       &out_pose_in_mesh) = 0;
 
   /**
-   * @brief 获取某个输入mesh目标的三维尺寸(辅助功能，用户也可自己计算)
-   *
-   * @return Eigen::Vector3f
+   * @brief Virtual destructor for proper resource cleanup
    */
-  virtual Eigen::Vector3f GetObjectDimension(const std::string &target_name) const
-  {
-    throw std::runtime_error("[Base6DofDetectionModel] GetOjbectDimension NOT Implemented yet!!!");
-  };
-
   virtual ~Base6DofDetectionModel() = default;
 
 protected:
+  /**
+   * @brief Construct a new base 6-DoF detection model
+   * @note Protected constructor for abstract base class
+   */
   Base6DofDetectionModel() = default;
 };
 
 /**
- * @brief 创建一个`FoundationPose`实例
+ * @brief Factory function for creating FoundationPose model instances
  *
- * @param refiner_core refiner推理核心
- * @param scorer_core scorer推理核心
- * @param mesh_file_path 使用的三维模型`mesh_file`路径
- * @param texture_file_path 使用的三维模型外观特征图像路径
- * @param intrinsic_in_vec 相机内参矩阵，std::vector<float>形式，`row_major`格式
- * @return std::shared_ptr<Base6DofDetectionModel>
- */
-std::shared_ptr<Base6DofDetectionModel> CreateFoundationPoseModel(
-    std::shared_ptr<inference_core::BaseInferCore> refiner_core,
-    std::shared_ptr<inference_core::BaseInferCore> scorer_core,
-    const std::string                             &target_name,
-    const std::string                             &mesh_file_path,
-    const std::string                             &texture_file_path,
-    const std::vector<float>                      &intrinsic_in_vec);
-
-/**
- * @brief 创建一个`FoundationPose`实例
+ * @param refiner_core Refinement inference core handle
+ * @param scorer_core Scoring inference core handle
+ * @param mesh_loaders Registered 3D mesh loaders
+ * @param intrinsic_in_mat Camera intrinsic matrix (3x3, Eigen::Matrix3f)
+ * @param max_input_image_height Maximum allowed input image height (pixels)
+ * @param max_input_image_width Maximum allowed input image width (pixels)
+ * @return std::shared_ptr<Base6DofDetectionModel> Initialized model instance
  *
- * @param refiner_core refiner推理核心
- * @param scorer_core scorer推理核心
- * @param mesh_file_path 使用的三维模型`mesh_file`路径
- * @param texture_file_path 使用的三维模型外观特征图像路径
- * @param intrinsic_in_mat 相机内参矩阵，Eigen::Matrix3f格式
- * @return std::shared_ptr<Base6DofDetectionModel>
- */
-std::shared_ptr<Base6DofDetectionModel> CreateFoundationPoseModel(
-    std::shared_ptr<inference_core::BaseInferCore> refiner_core,
-    std::shared_ptr<inference_core::BaseInferCore> scorer_core,
-    const std::string                             &target_name,
-    const std::string                             &mesh_file_path,
-    const std::string                             &texture_file_path,
-    const Eigen::Matrix3f                         &intrinsic_in_mat);
-
-/**
- * @brief 创建一个`FoundationPose`实例
+ * @note Creates an implementation of the 6-DoF detection pipeline using:
+ *       - Pose refinement neural network
+ *       - Pose scoring network
+ *       - Pre-registered 3D mesh models
+ *       - Camera calibration parameters
+ *       - Input size constraints for memory optimization
  *
- * @param refiner_core refiner推理核心
- * @param scorer_core scorer推理核心
- * @param meshes 多个目标的mesh/texture路径map: [name] -> [mesh_file_path, texture_file_path]，
- *             键值[name]在后续检测过程中用于辨别特定种类目标，**保持一致**
- * @param intrinsic_in_vec 相机内参矩阵，std::vector<float>形式，`row_major`格式
- * @return std::shared_ptr<Base6DofDetectionModel>
+ * @warning Input images exceeding max dimensions will cause failure in processing.
  */
 std::shared_ptr<Base6DofDetectionModel> CreateFoundationPoseModel(
-    std::shared_ptr<inference_core::BaseInferCore>                              refiner_core,
-    std::shared_ptr<inference_core::BaseInferCore>                              scorer_core,
-    const std::unordered_map<std::string, std::pair<std::string, std::string>> &meshes,
-    const std::vector<float>                                                   &intrinsic_in_vec);
-
-/**
- * @brief 创建一个`FoundationPose`实例
- *
- * @param refiner_core refiner推理核心
- * @param scorer_core scorer推理核心
- * @param meshes 多个目标的mesh/texture路径map: [name] -> [mesh_file_path, texture_file_path]，
- *               键值[name]在后续检测过程中用于辨别特定种类目标，**保持一致**
- * @param intrinsic_in_mat 相机内参矩阵，Eigen::Matrix3f格式
- * @return std::shared_ptr<Base6DofDetectionModel>
- */
-std::shared_ptr<Base6DofDetectionModel> CreateFoundationPoseModel(
-    std::shared_ptr<inference_core::BaseInferCore>                              refiner_core,
-    std::shared_ptr<inference_core::BaseInferCore>                              scorer_core,
-    const std::unordered_map<std::string, std::pair<std::string, std::string>> &meshes,
-    const Eigen::Matrix3f                                                      &intrinsic_in_mat);
+    std::shared_ptr<inference_core::BaseInferCore>      refiner_core,
+    std::shared_ptr<inference_core::BaseInferCore>      scorer_core,
+    const std::vector<std::shared_ptr<BaseMeshLoader>> &mesh_loaders,
+    const Eigen::Matrix3f                              &intrinsic_in_mat,
+    const int                                           max_input_image_height = 1080,
+    const int                                           max_input_image_width  = 1920);
 
 } // namespace detection_6d
-
-#endif
diff --git a/detection_6d_foundationpose/include/detection_6d_foundationpose/mesh_loader.hpp b/detection_6d_foundationpose/include/detection_6d_foundationpose/mesh_loader.hpp
@@ -0,0 +1,95 @@
+#pragma once
+
+#include <vector>
+#include <Eigen/Dense>
+#include <opencv2/core.hpp>
+
+namespace detection_6d {
+
+/**
+ * @brief Abstract base class for mesh loading and data access interfaces
+ *
+ * @note Implementations should handle different mesh formats while maintaining
+ *       consistent vertex/face data organization
+ */
+class BaseMeshLoader {
+public:
+  virtual ~BaseMeshLoader() = default;
+
+  using Vector3ui = Eigen::Matrix<uint32_t, 3, 1>;
+
+  /**
+   * @brief Get identifier name for the loaded mesh
+   * @return Mesh name string
+   */
+  virtual std::string GetName() const noexcept = 0;
+
+  /** @brief Get diameter of the mesh's bounding sphere */
+  virtual float GetMeshDiameter() const noexcept = 0;
+
+  /** @brief Get number of vertices in the mesh */
+  virtual size_t GetMeshNumVertices() const noexcept = 0;
+
+  /** @brief Get number of triangular faces in the mesh */
+  virtual size_t GetMeshNumFaces() const noexcept = 0;
+
+  /** @brief Access array of vertex positions (3D coordinates) */
+  virtual const std::vector<Eigen::Vector3f> &GetMeshVertices() const noexcept = 0;
+
+  /** @brief Access array of vertex normals */
+  virtual const std::vector<Eigen::Vector3f> &GetMeshVertexNormals() const noexcept = 0;
+
+  /** @brief Access array of texture coordinates (UV mapping) */
+  virtual const std::vector<Eigen::Vector3f> &GetMeshTextureCoords() const noexcept = 0;
+
+  /** @brief Access array of triangular face indices */
+  virtual const std::vector<Vector3ui> &GetMeshTriangleFaces() const noexcept = 0;
+
+  /** @brief Get centroid position of the mesh model */
+  virtual const Eigen::Vector3f &GetMeshModelCenter() const noexcept = 0;
+
+  /**
+   * @brief Get orientation bounds transformation matrix
+   * @return 4x4 matrix containing axis-aligned bounding box orientation
+   */
+  virtual const Eigen::Matrix4f &GetOrientBounds() const noexcept = 0;
+
+  /** @brief Get dimensional measurements of the object (width/height/depth) */
+  virtual const Eigen::Vector3f &GetObjectDimension() const noexcept = 0;
+
+  /** @brief Access texture map image for the mesh */
+  virtual const cv::Mat &GetTextureMap() const noexcept = 0;
+};
+
+/**
+ * @brief Convert pose from mesh coordinate frame to bounding box frame
+ *
+ * @param pose_in_mesh Input pose in mesh coordinate system
+ * @param mesh_loader Mesh loader containing transformation parameters
+ * @return Transformed pose in bounding box coordinate system
+ *
+ * @note Transformation formula:
+ *       T_bbox = T_mesh * T_center * T_orient
+ *       Where T_center translates to mesh center, T_orient aligns with bounds
+ */
+inline Eigen::Matrix4f ConvertPoseMesh2BBox(const Eigen::Matrix4f                 &pose_in_mesh,
+                                            const std::shared_ptr<BaseMeshLoader> &mesh_loader)
+{
+  Eigen::Matrix4f tf_to_center   = Eigen::Matrix4f::Identity();
+  tf_to_center.block<3, 1>(0, 3) = -mesh_loader->GetMeshModelCenter();
+  return pose_in_mesh * tf_to_center * mesh_loader->GetOrientBounds();
+}
+
+/**
+ * @brief Factory function for creating Assimp-based mesh loader
+ *
+ * @param name Identifier for the mesh
+ * @param mesh_file_path Path to mesh file (supports .obj/.ply/.stl etc.)
+ * @return Shared pointer to initialized mesh loader instance
+ *
+ * @note Throws std::runtime_error if mesh loading fails
+ */
+std::shared_ptr<BaseMeshLoader> CreateAssimpMeshLoader(const std::string &name,
+                                                       const std::string &mesh_file_path);
+
+} // namespace detection_6d