Skip to content

Commit

Permalink
Implement proper call interface
Browse files Browse the repository at this point in the history
  • Loading branch information
siboehm committed May 1, 2021
1 parent 6e9694a commit 1a3dc5a
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 28 deletions.
71 changes: 50 additions & 21 deletions lleaves/lleaves.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from ctypes import CFUNCTYPE, c_double, c_int
import ctypes
from ctypes import CFUNCTYPE, POINTER, c_double

import llvmlite.binding as llvm
import numpy as np

from lleaves.tree_compiler import ir_from_model_file
from lleaves.tree_compiler.ast import parser
Expand Down Expand Up @@ -60,24 +62,51 @@ def compile(self):
Generate the LLVM IR for this model and compile it to ASM
This function can be called multiple time, but will only compile once.
"""
if not self._compiled_module:
# Create a LLVM module object from the IR
module = llvm.parse_assembly(str(self.ir_module))
module.verify()

# add module and make sure it is ready for execution
self.execution_engine.add_module(module)
self.execution_engine.finalize_object()
self.execution_engine.run_static_constructors()
self._compiled_module = module

# construct entry func
addr = self._execution_engine.get_function_address("forest_root")
self._c_entry_func = CFUNCTYPE(
c_double,
*[c_int if is_int else c_double for is_int in self.categorical_bitmap]
)(addr)

def predict(self, arrs: list):
if self._compiled_module:
return

# Create a LLVM module object from the IR
module = llvm.parse_assembly(str(self.ir_module))
module.verify()

# add module and make sure it is ready for execution
self.execution_engine.add_module(module)
self.execution_engine.finalize_object()
self.execution_engine.run_static_constructors()
self._compiled_module = module

# construct entry func
addr = self._execution_engine.get_function_address("forest_root")
self._c_entry_func = CFUNCTYPE(None, POINTER(c_double), POINTER(c_double))(addr)

def predict(self, data):
self.compile()
return [self._c_entry_func(*arr) for arr in arrs]

data, n_preds = self._to_1d_ndarray(data)
ptr_data = data.ctypes.data_as(POINTER(c_double))

preds = np.zeros(n_preds, dtype=np.float64)
ptr_preds = preds.ctypes.data_as(POINTER(c_double))
self._c_entry_func(ptr_data, ptr_preds)
return preds

def _to_1d_ndarray(self, data):
if isinstance(data, list):
try:
data = np.array(data)
except BaseException:
raise ValueError("Cannot convert data list to appropriate np array")

if not isinstance(data, np.ndarray):
raise ValueError(f"Expecting list or numpy.ndarray, got {type(data)}")
if len(data.shape) != 2:
raise ValueError(
f"Data must be 2 dimensional, is {len(data.shape)} dimensional"
)
n_preds = data.shape[0]
if data.dtype == np.float64:
# flatten the array to 1D
data = np.array(data.reshape(data.size), dtype=np.float64, copy=False)
else:
data = np.array(data.reshape(data.size), dtype=np.float64)
return data, n_preds
29 changes: 24 additions & 5 deletions lleaves/tree_compiler/ast/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@
)

BOOL = ir.IntType(bits=1)
ZERO_V = ir.Constant(BOOL, 0)
DOUBLE = ir.DoubleType()
FLOAT = ir.FloatType()
INT_CAT = ir.IntType(bits=32)
ZERO_V = ir.Constant(BOOL, 0)
FLOAT_POINTER = ir.PointerType(FLOAT)
DOUBLE_PTR = ir.PointerType(DOUBLE)


def scalar_func(cat_bitmap):
Expand All @@ -30,6 +33,7 @@ class Forest:
def __init__(self, trees, categorical_bitmap):
self.trees = trees
self.categorical_bitmap = categorical_bitmap
self.n_args = len(categorical_bitmap)

def get_ir(self):
module = ir.Module(name="forest")
Expand All @@ -38,17 +42,32 @@ def get_ir(self):

# entry function, do not change name
root_func = ir.Function(
module, scalar_func(self.categorical_bitmap), name="forest_root"
module,
ir.FunctionType(ir.VoidType(), (DOUBLE_PTR, DOUBLE_PTR)),
name="forest_root",
)
block = root_func.append_basic_block()
builder = ir.IRBuilder(block)

res = builder.call(tree_funcs[0], root_func.args)
args = []
raw_ptrs = [
builder.gep(root_func.args[0], (ir.Constant(INT_CAT, i),))
for i in range(self.n_args)
]
for is_cat, ptr in zip(self.categorical_bitmap, raw_ptrs):
el = builder.load(ptr)
if is_cat:
args.append(builder.fptoui(el, INT_CAT))
else:
args.append(el)

res = builder.call(tree_funcs[0], args)
for func in tree_funcs[1:]:
# should probably inline this, but optimizer does it automatically
tmp = builder.call(func, root_func.args)
tmp = builder.call(func, args)
res = builder.fadd(tmp, res)
builder.ret(res)
builder.store(res, root_func.args[1])
builder.ret_void()

return module

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name="lleaves",
version="",
version="0.0.1",
packages=["lleaves", "lleaves.tree_compiler"],
url="https://github.com/siboehm/LLeaVes",
license="MIT",
Expand Down
40 changes: 40 additions & 0 deletions tests/test_predict_interface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import lightgbm as lgb
import numpy as np
import pytest

import lleaves


def test_interface():
lgbm = lgb.Booster(model_file="tests/models/tiniest_single_tree/model.txt")
llvm = lleaves.Model("tests/models/tiniest_single_tree/model.txt")

for arr in [np.array([1.0, 1.0, 1.0]), [1.0, 1.0, 1.0]]:
with pytest.raises(ValueError) as err1:
llvm.predict(arr)
with pytest.raises(ValueError) as err2:
lgbm.predict(arr)

assert "2 dimensional" in err1.value.args[0]
assert "2 dimensional" in err2.value.args[0]


@pytest.mark.parametrize(
"model_file, n_args",
[
("tests/models/pure_categorical/model.txt", 3),
("tests/models/tiniest_single_tree/model.txt", 3),
],
)
def test_input_dtypes(model_file, n_args):
lgbm = lgb.Booster(model_file=model_file)
llvm = lleaves.Model(model_file)

arr = np.array([[1.0, 1.0, 1.0]], dtype=np.float32)
assert llvm.predict(arr) == lgbm.predict(arr)
arr = np.array([[1.0, 1.0, 1.0]], dtype=np.float64)
assert llvm.predict(arr) == lgbm.predict(arr)
arr = np.array([[0, 0, 0]], dtype=np.int32)
assert llvm.predict(arr) == lgbm.predict(arr)
arr = [[0, 0, 0]]
assert llvm.predict(arr) == lgbm.predict(arr)
4 changes: 3 additions & 1 deletion tests/test_tree_output.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import lightgbm
import numpy as np
import pytest
from hypothesis import given, settings
from hypothesis import strategies as st
Expand Down Expand Up @@ -95,7 +96,8 @@ def test_forest_llvm_mode(data, llvm_lgbm_model):
min_size=llvm_model.num_feature(),
)
)
assert llvm_model.predict([input_data]) == lightgbm_model.predict([input_data])
input_data = np.array([input_data])
assert llvm_model.predict(input_data) == lightgbm_model.predict(input_data)


@pytest.mark.parametrize(
Expand Down

0 comments on commit 1a3dc5a

Please sign in to comment.