bugfix: fix pybind class bindings (#255)

yzh119 · web-flow · commit ed2030459730 · 2024-05-23T19:00:43.000-07:00
Previously we bind a factory method as the init function for C++ Class in Pybind, which is returned by value instead of reference. The destructive function of handlers in #253 will be triggered twice, and it leads to segmentation faults. This PR bypass the factory method and initializes the C++ classes directly.
diff --git a/python/csrc/flashinfer_ops.cu b/python/csrc/flashinfer_ops.cu
@@ -37,19 +37,19 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
   m.def("rmsnorm", &rmsnorm, "Root mean square normalization");
   py::class_<BatchDecodeWithPagedKVCachePyTorchWrapper>(m,
                                                         "BatchDecodeWithPagedKVCachePyTorchWrapper")
-      .def(py::init(&BatchDecodeWithPagedKVCachePyTorchWrapper::Create))
+      .def(py::init<unsigned int>())
       .def("begin_forward", &BatchDecodeWithPagedKVCachePyTorchWrapper::BeginForward)
       .def("end_forward", &BatchDecodeWithPagedKVCachePyTorchWrapper::EndForward)
       .def("forward", &BatchDecodeWithPagedKVCachePyTorchWrapper::Forward);
   py::class_<BatchPrefillWithPagedKVCachePyTorchWrapper>(
       m, "BatchPrefillWithPagedKVCachePyTorchWrapper")
-      .def(py::init(&BatchPrefillWithPagedKVCachePyTorchWrapper::Create))
+      .def(py::init<unsigned int>())
       .def("begin_forward", &BatchPrefillWithPagedKVCachePyTorchWrapper::BeginForward)
       .def("end_forward", &BatchPrefillWithPagedKVCachePyTorchWrapper::EndForward)
       .def("forward", &BatchPrefillWithPagedKVCachePyTorchWrapper::Forward);
   py::class_<BatchPrefillWithRaggedKVCachePyTorchWrapper>(
       m, "BatchPrefillWithRaggedKVCachePyTorchWrapper")
-      .def(py::init(&BatchPrefillWithRaggedKVCachePyTorchWrapper::Create))
+      .def(py::init<unsigned int>())
       .def("begin_forward", &BatchPrefillWithRaggedKVCachePyTorchWrapper::BeginForward)
       .def("end_forward", &BatchPrefillWithRaggedKVCachePyTorchWrapper::EndForward)
       .def("forward", &BatchPrefillWithRaggedKVCachePyTorchWrapper::Forward);
diff --git a/python/csrc/flashinfer_ops.h b/python/csrc/flashinfer_ops.h
@@ -65,9 +65,6 @@ torch::Tensor rmsnorm(torch::Tensor x, torch::Tensor w, double eps);
 
 class BatchDecodeWithPagedKVCachePyTorchWrapper {
  public:
-  static BatchDecodeWithPagedKVCachePyTorchWrapper Create(unsigned int layout) {
-    return BatchDecodeWithPagedKVCachePyTorchWrapper(layout);
-  }
   void BeginForward(torch::Tensor workspace_buffer, torch::Tensor indptr,
                     torch::Tensor last_page_len, unsigned int batch_size, unsigned int num_qo_heads,
                     unsigned int num_kv_heads, unsigned int head_dim, unsigned int page_size,
@@ -78,19 +75,16 @@ class BatchDecodeWithPagedKVCachePyTorchWrapper {
                                      torch::Tensor paged_kv_last_page_len,
                                      unsigned int pos_encoding_mode, float sm_scale,
                                      float rope_scale, float rope_theta, bool return_lse);
-
- private:
   BatchDecodeWithPagedKVCachePyTorchWrapper(unsigned int layout)
       : kv_layout_(flashinfer::QKVLayout(layout)) {}
+
+ private:
   flashinfer::BatchDecodeHandler handler_;
   flashinfer::QKVLayout kv_layout_;
 };
 
 class BatchPrefillWithPagedKVCachePyTorchWrapper {
  public:
-  static BatchPrefillWithPagedKVCachePyTorchWrapper Create(unsigned int layout) {
-    return BatchPrefillWithPagedKVCachePyTorchWrapper(layout);
-  }
   void BeginForward(torch::Tensor workspace_buffer, torch::Tensor qo_indptr,
                     unsigned int batch_size, unsigned int num_qo_heads, unsigned int num_kv_heads,
                     unsigned int head_dim);
@@ -102,19 +96,16 @@ class BatchPrefillWithPagedKVCachePyTorchWrapper {
                                      unsigned int pos_encoding_mode, bool allow_fp16_qk_reduction,
                                      float sm_scale, float rope_scale, float rope_theta,
                                      bool return_lse);
-
- private:
   BatchPrefillWithPagedKVCachePyTorchWrapper(unsigned int layout)
       : kv_layout_(flashinfer::QKVLayout(layout)) {}
+
+ private:
   flashinfer::BatchPrefillHandler handler_;
   flashinfer::QKVLayout kv_layout_;
 };
 
 class BatchPrefillWithRaggedKVCachePyTorchWrapper {
  public:
-  static BatchPrefillWithRaggedKVCachePyTorchWrapper Create(unsigned int layout) {
-    return BatchPrefillWithRaggedKVCachePyTorchWrapper(layout);
-  }
   void BeginForward(torch::Tensor workspace_buffer, torch::Tensor qo_indptr,
                     unsigned int batch_size, unsigned int num_qo_heads, unsigned int num_kv_heads,
                     unsigned int head_dim);
@@ -124,10 +115,10 @@ class BatchPrefillWithRaggedKVCachePyTorchWrapper {
                                      unsigned int pos_encoding_mode, bool allow_fp16_qk_reduction,
                                      float sm_scale, float rope_scale, float rope_theta,
                                      bool return_lse);
-
- private:
   BatchPrefillWithRaggedKVCachePyTorchWrapper(unsigned int layout)
       : kv_layout_(flashinfer::QKVLayout(layout)) {}
+
+ private:
   flashinfer::BatchPrefillHandler handler_;
   flashinfer::QKVLayout kv_layout_;
 };