Implement proper call interface

siboehm · May 1, 2021 · 1a3dc5a · 1a3dc5a
1 parent 6e9694a
commit 1a3dc5a
Show file tree

Hide file tree

Showing 5 changed files with 118 additions and 28 deletions.
diff --git a/lleaves/lleaves.py b/lleaves/lleaves.py
@@ -1,6 +1,8 @@
-from ctypes import CFUNCTYPE, c_double, c_int
+import ctypes
+from ctypes import CFUNCTYPE, POINTER, c_double
 
 import llvmlite.binding as llvm
+import numpy as np
 
 from lleaves.tree_compiler import ir_from_model_file
 from lleaves.tree_compiler.ast import parser
@@ -60,24 +62,51 @@ def compile(self):
         Generate the LLVM IR for this model and compile it to ASM
         This function can be called multiple time, but will only compile once.
         """
-        if not self._compiled_module:
-            # Create a LLVM module object from the IR
-            module = llvm.parse_assembly(str(self.ir_module))
-            module.verify()
-
-            # add module and make sure it is ready for execution
-            self.execution_engine.add_module(module)
-            self.execution_engine.finalize_object()
-            self.execution_engine.run_static_constructors()
-            self._compiled_module = module
-
-            # construct entry func
-            addr = self._execution_engine.get_function_address("forest_root")
-            self._c_entry_func = CFUNCTYPE(
-                c_double,
-                *[c_int if is_int else c_double for is_int in self.categorical_bitmap]
-            )(addr)
-
-    def predict(self, arrs: list):
+        if self._compiled_module:
+            return
+
+        # Create a LLVM module object from the IR
+        module = llvm.parse_assembly(str(self.ir_module))
+        module.verify()
+
+        # add module and make sure it is ready for execution
+        self.execution_engine.add_module(module)
+        self.execution_engine.finalize_object()
+        self.execution_engine.run_static_constructors()
+        self._compiled_module = module
+
+        # construct entry func
+        addr = self._execution_engine.get_function_address("forest_root")
+        self._c_entry_func = CFUNCTYPE(None, POINTER(c_double), POINTER(c_double))(addr)
+
+    def predict(self, data):
         self.compile()
-        return [self._c_entry_func(*arr) for arr in arrs]
+
+        data, n_preds = self._to_1d_ndarray(data)
+        ptr_data = data.ctypes.data_as(POINTER(c_double))
+
+        preds = np.zeros(n_preds, dtype=np.float64)
+        ptr_preds = preds.ctypes.data_as(POINTER(c_double))
+        self._c_entry_func(ptr_data, ptr_preds)
+        return preds
+
+    def _to_1d_ndarray(self, data):
+        if isinstance(data, list):
+            try:
+                data = np.array(data)
+            except BaseException:
+                raise ValueError("Cannot convert data list to appropriate np array")
+
+        if not isinstance(data, np.ndarray):
+            raise ValueError(f"Expecting list or numpy.ndarray, got {type(data)}")
+        if len(data.shape) != 2:
+            raise ValueError(
+                f"Data must be 2 dimensional, is {len(data.shape)} dimensional"
+            )
+        n_preds = data.shape[0]
+        if data.dtype == np.float64:
+            # flatten the array to 1D
+            data = np.array(data.reshape(data.size), dtype=np.float64, copy=False)
+        else:
+            data = np.array(data.reshape(data.size), dtype=np.float64)
+        return data, n_preds
diff --git a/lleaves/tree_compiler/ast/nodes.py b/lleaves/tree_compiler/ast/nodes.py
@@ -6,9 +6,12 @@
 )
 
 BOOL = ir.IntType(bits=1)
-ZERO_V = ir.Constant(BOOL, 0)
 DOUBLE = ir.DoubleType()
+FLOAT = ir.FloatType()
 INT_CAT = ir.IntType(bits=32)
+ZERO_V = ir.Constant(BOOL, 0)
+FLOAT_POINTER = ir.PointerType(FLOAT)
+DOUBLE_PTR = ir.PointerType(DOUBLE)
 
 
 def scalar_func(cat_bitmap):
@@ -30,6 +33,7 @@ class Forest:
     def __init__(self, trees, categorical_bitmap):
         self.trees = trees
         self.categorical_bitmap = categorical_bitmap
+        self.n_args = len(categorical_bitmap)
 
     def get_ir(self):
         module = ir.Module(name="forest")
@@ -38,17 +42,32 @@ def get_ir(self):
 
         # entry function, do not change name
         root_func = ir.Function(
-            module, scalar_func(self.categorical_bitmap), name="forest_root"
+            module,
+            ir.FunctionType(ir.VoidType(), (DOUBLE_PTR, DOUBLE_PTR)),
+            name="forest_root",
         )
         block = root_func.append_basic_block()
         builder = ir.IRBuilder(block)
 
-        res = builder.call(tree_funcs[0], root_func.args)
+        args = []
+        raw_ptrs = [
+            builder.gep(root_func.args[0], (ir.Constant(INT_CAT, i),))
+            for i in range(self.n_args)
+        ]
+        for is_cat, ptr in zip(self.categorical_bitmap, raw_ptrs):
+            el = builder.load(ptr)
+            if is_cat:
+                args.append(builder.fptoui(el, INT_CAT))
+            else:
+                args.append(el)
+
+        res = builder.call(tree_funcs[0], args)
         for func in tree_funcs[1:]:
             # should probably inline this, but optimizer does it automatically
-            tmp = builder.call(func, root_func.args)
+            tmp = builder.call(func, args)
             res = builder.fadd(tmp, res)
-        builder.ret(res)
+        builder.store(res, root_func.args[1])
+        builder.ret_void()
 
         return module
 

diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name="lleaves",
-    version="",
+    version="0.0.1",
     packages=["lleaves", "lleaves.tree_compiler"],
     url="https://github.com/siboehm/LLeaVes",
     license="MIT",

diff --git a/tests/test_predict_interface.py b/tests/test_predict_interface.py
@@ -0,0 +1,40 @@
+import lightgbm as lgb
+import numpy as np
+import pytest
+
+import lleaves
+
+
+def test_interface():
+    lgbm = lgb.Booster(model_file="tests/models/tiniest_single_tree/model.txt")
+    llvm = lleaves.Model("tests/models/tiniest_single_tree/model.txt")
+
+    for arr in [np.array([1.0, 1.0, 1.0]), [1.0, 1.0, 1.0]]:
+        with pytest.raises(ValueError) as err1:
+            llvm.predict(arr)
+        with pytest.raises(ValueError) as err2:
+            lgbm.predict(arr)
+
+        assert "2 dimensional" in err1.value.args[0]
+        assert "2 dimensional" in err2.value.args[0]
+
+
+@pytest.mark.parametrize(
+    "model_file, n_args",
+    [
+        ("tests/models/pure_categorical/model.txt", 3),
+        ("tests/models/tiniest_single_tree/model.txt", 3),
+    ],
+)
+def test_input_dtypes(model_file, n_args):
+    lgbm = lgb.Booster(model_file=model_file)
+    llvm = lleaves.Model(model_file)
+
+    arr = np.array([[1.0, 1.0, 1.0]], dtype=np.float32)
+    assert llvm.predict(arr) == lgbm.predict(arr)
+    arr = np.array([[1.0, 1.0, 1.0]], dtype=np.float64)
+    assert llvm.predict(arr) == lgbm.predict(arr)
+    arr = np.array([[0, 0, 0]], dtype=np.int32)
+    assert llvm.predict(arr) == lgbm.predict(arr)
+    arr = [[0, 0, 0]]
+    assert llvm.predict(arr) == lgbm.predict(arr)
diff --git a/tests/test_tree_output.py b/tests/test_tree_output.py
@@ -1,4 +1,5 @@
 import lightgbm
+import numpy as np
 import pytest
 from hypothesis import given, settings
 from hypothesis import strategies as st
@@ -95,7 +96,8 @@ def test_forest_llvm_mode(data, llvm_lgbm_model):
             min_size=llvm_model.num_feature(),
         )
     )
-    assert llvm_model.predict([input_data]) == lightgbm_model.predict([input_data])
+    input_data = np.array([input_data])
+    assert llvm_model.predict(input_data) == lightgbm_model.predict(input_data)
 
 
 @pytest.mark.parametrize(