Skip to content

[CI] Add xgboost and scikit-learn to requirements #15183

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,13 @@
import cppyy


def SaveXGBoost(self, xgb_model, key_name, output_path, num_inputs=None, tmp_path="/tmp", threshold_dtype="float"):
def SaveXGBoost(self, xgb_model, key_name, output_path, num_inputs, tmp_path="/tmp", threshold_dtype="float"):
# Extract objective
objective_map = {
"multi:softprob": "softmax", # Naming the objective softmax is more common today
"binary:logistic": "logistic",
"reg:linear": "identity",
"reg:squarederror": "identity",
}
model_objective = xgb_model.objective
if not model_objective in objective_map:
Expand Down Expand Up @@ -48,7 +49,8 @@ def SaveXGBoost(self, xgb_model, key_name, output_path, num_inputs=None, tmp_pat

import json

forest = json.load(open(tmp_path, "r"))
with open(tmp_path, "r") as json_file:
forest = json.load(json_file)

# Determine whether the model has a bias paramter and write bias trees
if hasattr(xgb_model, "base_score") and "reg:" in model_objective:
Expand Down Expand Up @@ -96,16 +98,6 @@ def fill_arrays(node, index, inputs_base, thresholds_base):
for i in range(num_trees):
outputs[i] = int(i % num_outputs)

# Determine number of input variables
if not num_inputs is None:
pass
elif hasattr(xgb_model, "_features_count"):
num_inputs = xgb_model._features_count
else:
raise Exception(
"Failed to get number of input variables from XGBoost model. Please provide the additional keyword argument 'num_inputs' to this function."
)

# Store arrays in a ROOT file in a folder with the given key name
# TODO: Write single values as simple integers and not vectors.
f = cppyy.gbl.TFile(output_path, "RECREATE")
Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
# PyROOT: Interoperability with numpy arrays
# TMVA: PyMVA interfaces
numpy>=1.4.1
scikit-learn
xgboost

# PyROOT: ROOT.Numba.Declare decorator
numba>=0.47.0 ; python_version < "3.11" # See https://github.com/numba/numba/issues/8304
Expand Down
14 changes: 8 additions & 6 deletions tmva/tmva/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,14 @@ if(dataframe)
ROOT_ADD_GTEST(rbdt rbdt.cxx LIBRARIES ROOTVecOps TMVAUtils)
endif()

if(dataframe)
find_python_module(xgboost QUIET)
if (PY_XGBOOST_FOUND)
ROOT_ADD_PYUNITTEST(rbdt_xgboost rbdt_xgboost.py)
endif()
endif()
# Disabled because RBDT doesn't support the imbalanced tree structure of
# XGBoost models.
# if(dataframe)
# find_python_module(xgboost QUIET)
# if (PY_XGBOOST_FOUND)
# ROOT_ADD_PYUNITTEST(rbdt_xgboost rbdt_xgboost.py)
# endif()
# endif()

#--stressTMVA--------------------------------------------------------------------------------------

Expand Down
4 changes: 2 additions & 2 deletions tmva/tmva/test/rbdt_xgboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def _test_XGBBinary(backend, label):
x, y = create_dataset(1000, 10, 2)
xgb = xgboost.XGBClassifier(n_estimators=100, max_depth=3)
xgb.fit(x, y)
ROOT.TMVA.Experimental.SaveXGBoost(xgb, "myModel", "testXGBBinary{}.root".format(label))
ROOT.TMVA.Experimental.SaveXGBoost(xgb, "myModel", "testXGBBinary{}.root".format(label), num_inputs=10)
bdt = ROOT.TMVA.Experimental.RBDT[backend]("myModel", "testXGBBinary{}.root".format(label))

y_xgb = xgb.predict_proba(x)[:, 1].squeeze()
Expand Down Expand Up @@ -51,7 +51,7 @@ def _test_XGBMulticlass(backend, label):
x, y = create_dataset(1000, 10, 3)
xgb = xgboost.XGBClassifier(n_estimators=100, max_depth=3)
xgb.fit(x, y)
ROOT.TMVA.Experimental.SaveXGBoost(xgb, "myModel", "testXGBMulticlass{}.root".format(label))
ROOT.TMVA.Experimental.SaveXGBoost(xgb, "myModel", "testXGBMulticlass{}.root".format(label), num_inputs=10)
bdt = ROOT.TMVA.Experimental.RBDT[backend]("myModel", "testXGBMulticlass{}.root".format(label))

y_xgb = xgb.predict_proba(x)
Expand Down
7 changes: 5 additions & 2 deletions tutorials/tmva/tmva101_Training.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,13 @@
## \date August 2019
## \author Stefan Wunsch

# XGBoost has to be imported before ROOT to avoid crashes because of clashing
# std::regexp symbols that are exported by cppyy.
# See also: https://github.com/wlav/cppyy/issues/227
from xgboost import XGBClassifier

import ROOT
import numpy as np
import pickle

from tmva100_DataPreparation import variables

Expand Down Expand Up @@ -45,7 +49,6 @@ def load_data(signal_filename, background_filename):
x, y, w = load_data("train_signal.root", "train_background.root")

# Fit xgboost model
from xgboost import XGBClassifier
bdt = XGBClassifier(max_depth=3, n_estimators=500)
bdt.fit(x, y, sample_weight=w)

Expand Down