From 70e2a983bc623da6c3e0b342cf7a480e5663281a Mon Sep 17 00:00:00 2001 From: Adrian Gonzalez-Martin Date: Thu, 22 Sep 2022 16:05:49 +0200 Subject: [PATCH] Update example to avoid using sparse matrix during training (#736) --- docs/examples/mms/README.ipynb | 8 +++++--- docs/examples/mms/README.md | 4 +++- docs/examples/mms/models/mushroom-xgboost/model.json | 2 +- docs/examples/xgboost/README.ipynb | 8 +++++--- docs/examples/xgboost/README.md | 11 +++-------- docs/examples/xgboost/mushroom-xgboost.json | 2 +- 6 files changed, 18 insertions(+), 17 deletions(-) diff --git a/docs/examples/mms/README.ipynb b/docs/examples/mms/README.ipynb index da74612af..a565461da 100644 --- a/docs/examples/mms/README.ipynb +++ b/docs/examples/mms/README.ipynb @@ -128,6 +128,8 @@ "# NOTE: Workaround to load SVMLight files from the XGBoost example\n", "X_train, y_train = load_svmlight_file(train_dataset_path)\n", "X_test_agar, y_test_agar = load_svmlight_file(test_dataset_path)\n", + "X_train = X_train.toarray()\n", + "X_test_agar = X_test_agar.toarray()\n", "\n", "# read in data\n", "dtrain = xgb.DMatrix(data=X_train, label=y_train)\n", @@ -318,7 +320,7 @@ " \"name\": \"predict\",\n", " \"shape\": x_0.shape,\n", " \"datatype\": \"FP32\",\n", - " \"data\": x_0.toarray().tolist()\n", + " \"data\": x_0.tolist()\n", " }\n", " ]\n", "}\n", @@ -339,7 +341,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -353,7 +355,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.8" + "version": "3.9.8" } }, "nbformat": 4, diff --git a/docs/examples/mms/README.md b/docs/examples/mms/README.md index 6822c3e33..bced2ded3 100644 --- a/docs/examples/mms/README.md +++ b/docs/examples/mms/README.md @@ -95,6 +95,8 @@ test_dataset_path = _download_file(TEST_DATASET_URL) # NOTE: Workaround to load SVMLight files from the XGBoost example X_train, y_train = load_svmlight_file(train_dataset_path) X_test_agar, y_test_agar = load_svmlight_file(test_dataset_path) +X_train = X_train.toarray() +X_test_agar = X_test_agar.toarray() # read in data dtrain = xgb.DMatrix(data=X_train, label=y_train) @@ -223,7 +225,7 @@ inference_request = { "name": "predict", "shape": x_0.shape, "datatype": "FP32", - "data": x_0.toarray().tolist() + "data": x_0.tolist() } ] } diff --git a/docs/examples/mms/models/mushroom-xgboost/model.json b/docs/examples/mms/models/mushroom-xgboost/model.json index 5547e6516..05990f0d2 100644 --- a/docs/examples/mms/models/mushroom-xgboost/model.json +++ b/docs/examples/mms/models/mushroom-xgboost/model.json @@ -1 +1 @@ -{"learner":{"attributes":{},"gradient_booster":{"model":{"gbtree_model_param":{"num_trees":"2","size_leaf_vector":"0"},"tree_info":[0,0],"trees":[{"base_weights":[-7.15052932500839233e-02,1.29551589488983154e+00,-1.86661934852600098e+00,1.71217715740203857e+00,-1.70044052600860596e+00,-1.94070863723754883e+00,1.85964918136596680e+00],"default_left":[true,true,true,false,false,false,false],"id":0,"leaf_child_counts":[0,0,0,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[4.00053100585937500e+03,1.15821203613281250e+03,1.98173828125000000e+02,0.00000000000000000e+00,0.00000000000000000e+00,0.00000000000000000e+00,0.00000000000000000e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[-9.53674316406250000e-07,-9.53674316406250000e-07,-9.53674316406250000e-07,1.71217715740203857e+00,-1.70044052600860596e+00,-1.94070863723754883e+00,1.85964918136596680e+00],"split_indices":[28,55,108,0,0,0,0],"sum_hessian":[1.62825000000000000e+03,9.24500000000000000e+02,7.03750000000000000e+02,8.12000000000000000e+02,1.12500000000000000e+02,6.90500000000000000e+02,1.32500000000000000e+01],"tree_param":{"num_deleted":"0","num_feature":"126","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-9.31910648941993713e-02,7.82926455140113831e-02,-6.23624467849731445e+00,7.84717559814453125e-01,-9.68530356884002686e-01],"default_left":[true,true,false,false,false],"id":1,"leaf_child_counts":[0,0,0,0,0],"left_children":[1,3,-1,-1,-1],"loss_changes":[8.32545043945312500e+02,5.69725097656250000e+02,0.00000000000000000e+00,0.00000000000000000e+00,0.00000000000000000e+00],"parents":[2147483647,0,0,1,1],"right_children":[2,4,-1,-1,-1],"split_conditions":[-9.53674316406250000e-07,-9.53674316406250000e-07,-6.23624467849731445e+00,7.84717559814453125e-01,-9.68530356884002686e-01],"split_indices":[59,28,0,0,0],"sum_hessian":[7.88852050781250000e+02,7.68389709472656250e+02,2.04623889923095703e+01,4.58936859130859375e+02,3.09452819824218750e+02],"tree_param":{"num_deleted":"0","num_feature":"126","num_nodes":"5","size_leaf_vector":"0"}}]},"name":"gbtree"},"learner_model_param":{"base_score":"0.500000","num_class":"0","num_feature":"126"},"objective":{"name":"binary:logistic","reg_loss_param":{"scale_pos_weight":"1"}}},"version":[1,1,1]} \ No newline at end of file +{"learner":{"attributes":{"best_iteration":"1","best_ntree_limit":"2"},"feature_names":[],"feature_types":[],"gradient_booster":{"model":{"gbtree_model_param":{"num_trees":"2","size_leaf_vector":"0"},"tree_info":[0,0],"trees":[{"base_weights":[-7.150529E-2,1.2955159E0,-1.8666193E0,1.7121772E0,-1.7004405E0,-1.9407086E0,1.8596492E0],"categories":[],"categories_nodes":[],"categories_segments":[],"categories_sizes":[],"default_left":[true,true,true,false,false,false,false],"id":0,"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[4.000531E3,1.158212E3,1.9817383E2,0E0,0E0,0E0,0E0],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[5E-1,5E-1,5E-1,1.7121772E0,-1.7004405E0,-1.9407086E0,1.8596492E0],"split_indices":[28,55,108,0,0,0,0],"split_type":[0,0,0,0,0,0,0],"sum_hessian":[1.62825E3,9.245E2,7.0375E2,8.12E2,1.125E2,6.905E2,1.325E1],"tree_param":{"num_deleted":"0","num_feature":"126","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-9.3191065E-2,7.8292646E-2,-6.2362447E0,7.8471756E-1,-9.6853036E-1],"categories":[],"categories_nodes":[],"categories_segments":[],"categories_sizes":[],"default_left":[true,true,false,false,false],"id":1,"left_children":[1,3,-1,-1,-1],"loss_changes":[8.3254504E2,5.697251E2,0E0,0E0,0E0],"parents":[2147483647,0,0,1,1],"right_children":[2,4,-1,-1,-1],"split_conditions":[5E-1,5E-1,-6.2362447E0,7.8471756E-1,-9.6853036E-1],"split_indices":[59,28,0,0,0],"split_type":[0,0,0,0,0],"sum_hessian":[7.8885205E2,7.683897E2,2.0462389E1,4.5893686E2,3.0945282E2],"tree_param":{"num_deleted":"0","num_feature":"126","num_nodes":"5","size_leaf_vector":"0"}}]},"name":"gbtree"},"learner_model_param":{"base_score":"5E-1","num_class":"0","num_feature":"126"},"objective":{"name":"binary:logistic","reg_loss_param":{"scale_pos_weight":"1"}}},"version":[1,5,0]} \ No newline at end of file diff --git a/docs/examples/xgboost/README.ipynb b/docs/examples/xgboost/README.ipynb index 5b6e33c34..425f8c131 100644 --- a/docs/examples/xgboost/README.ipynb +++ b/docs/examples/xgboost/README.ipynb @@ -61,6 +61,8 @@ "# NOTE: Workaround to load SVMLight files from the XGBoost example\n", "X_train, y_train = load_svmlight_file(train_dataset_path)\n", "X_test, y_test = load_svmlight_file(test_dataset_path)\n", + "X_train = X_train.toarray()\n", + "X_test = X_test.toarray()\n", "\n", "# read in data\n", "dtrain = xgb.DMatrix(data=X_train, label=y_train)\n", @@ -193,7 +195,7 @@ " \"name\": \"predict\",\n", " \"shape\": x_0.shape,\n", " \"datatype\": \"FP32\",\n", - " \"data\": x_0.toarray().tolist()\n", + " \"data\": x_0.tolist()\n", " }\n", " ]\n", "}\n", @@ -230,7 +232,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -244,7 +246,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.8" + "version": "3.9.8" } }, "nbformat": 4, diff --git a/docs/examples/xgboost/README.md b/docs/examples/xgboost/README.md index 6a7649dac..e5b2d3c1b 100644 --- a/docs/examples/xgboost/README.md +++ b/docs/examples/xgboost/README.md @@ -45,6 +45,8 @@ test_dataset_path = _download_file(TEST_DATASET_URL) # NOTE: Workaround to load SVMLight files from the XGBoost example X_train, y_train = load_svmlight_file(train_dataset_path) X_test, y_test = load_svmlight_file(test_dataset_path) +X_train = X_train.toarray() +X_test = X_test.toarray() # read in data dtrain = xgb.DMatrix(data=X_train, label=y_train) @@ -113,13 +115,6 @@ mlserver start . Since this command will start the server and block the terminal, waiting for requests, this will need to be ran in the background on a separate terminal. ---- -**NOTE** - -You may first need to install the XGBoost inference runtime for MLServer using `pip install mlserver-xgboost` - ---- - ### Send test inference request We now have our model being served by `mlserver`. @@ -138,7 +133,7 @@ inference_request = { "name": "predict", "shape": x_0.shape, "datatype": "FP32", - "data": x_0.toarray().tolist() + "data": x_0.tolist() } ] } diff --git a/docs/examples/xgboost/mushroom-xgboost.json b/docs/examples/xgboost/mushroom-xgboost.json index 5547e6516..05990f0d2 100644 --- a/docs/examples/xgboost/mushroom-xgboost.json +++ b/docs/examples/xgboost/mushroom-xgboost.json @@ -1 +1 @@ -{"learner":{"attributes":{},"gradient_booster":{"model":{"gbtree_model_param":{"num_trees":"2","size_leaf_vector":"0"},"tree_info":[0,0],"trees":[{"base_weights":[-7.15052932500839233e-02,1.29551589488983154e+00,-1.86661934852600098e+00,1.71217715740203857e+00,-1.70044052600860596e+00,-1.94070863723754883e+00,1.85964918136596680e+00],"default_left":[true,true,true,false,false,false,false],"id":0,"leaf_child_counts":[0,0,0,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[4.00053100585937500e+03,1.15821203613281250e+03,1.98173828125000000e+02,0.00000000000000000e+00,0.00000000000000000e+00,0.00000000000000000e+00,0.00000000000000000e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[-9.53674316406250000e-07,-9.53674316406250000e-07,-9.53674316406250000e-07,1.71217715740203857e+00,-1.70044052600860596e+00,-1.94070863723754883e+00,1.85964918136596680e+00],"split_indices":[28,55,108,0,0,0,0],"sum_hessian":[1.62825000000000000e+03,9.24500000000000000e+02,7.03750000000000000e+02,8.12000000000000000e+02,1.12500000000000000e+02,6.90500000000000000e+02,1.32500000000000000e+01],"tree_param":{"num_deleted":"0","num_feature":"126","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-9.31910648941993713e-02,7.82926455140113831e-02,-6.23624467849731445e+00,7.84717559814453125e-01,-9.68530356884002686e-01],"default_left":[true,true,false,false,false],"id":1,"leaf_child_counts":[0,0,0,0,0],"left_children":[1,3,-1,-1,-1],"loss_changes":[8.32545043945312500e+02,5.69725097656250000e+02,0.00000000000000000e+00,0.00000000000000000e+00,0.00000000000000000e+00],"parents":[2147483647,0,0,1,1],"right_children":[2,4,-1,-1,-1],"split_conditions":[-9.53674316406250000e-07,-9.53674316406250000e-07,-6.23624467849731445e+00,7.84717559814453125e-01,-9.68530356884002686e-01],"split_indices":[59,28,0,0,0],"sum_hessian":[7.88852050781250000e+02,7.68389709472656250e+02,2.04623889923095703e+01,4.58936859130859375e+02,3.09452819824218750e+02],"tree_param":{"num_deleted":"0","num_feature":"126","num_nodes":"5","size_leaf_vector":"0"}}]},"name":"gbtree"},"learner_model_param":{"base_score":"0.500000","num_class":"0","num_feature":"126"},"objective":{"name":"binary:logistic","reg_loss_param":{"scale_pos_weight":"1"}}},"version":[1,1,1]} \ No newline at end of file +{"learner":{"attributes":{"best_iteration":"1","best_ntree_limit":"2"},"feature_names":[],"feature_types":[],"gradient_booster":{"model":{"gbtree_model_param":{"num_trees":"2","size_leaf_vector":"0"},"tree_info":[0,0],"trees":[{"base_weights":[-7.150529E-2,1.2955159E0,-1.8666193E0,1.7121772E0,-1.7004405E0,-1.9407086E0,1.8596492E0],"categories":[],"categories_nodes":[],"categories_segments":[],"categories_sizes":[],"default_left":[true,true,true,false,false,false,false],"id":0,"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[4.000531E3,1.158212E3,1.9817383E2,0E0,0E0,0E0,0E0],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[5E-1,5E-1,5E-1,1.7121772E0,-1.7004405E0,-1.9407086E0,1.8596492E0],"split_indices":[28,55,108,0,0,0,0],"split_type":[0,0,0,0,0,0,0],"sum_hessian":[1.62825E3,9.245E2,7.0375E2,8.12E2,1.125E2,6.905E2,1.325E1],"tree_param":{"num_deleted":"0","num_feature":"126","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-9.3191065E-2,7.8292646E-2,-6.2362447E0,7.8471756E-1,-9.6853036E-1],"categories":[],"categories_nodes":[],"categories_segments":[],"categories_sizes":[],"default_left":[true,true,false,false,false],"id":1,"left_children":[1,3,-1,-1,-1],"loss_changes":[8.3254504E2,5.697251E2,0E0,0E0,0E0],"parents":[2147483647,0,0,1,1],"right_children":[2,4,-1,-1,-1],"split_conditions":[5E-1,5E-1,-6.2362447E0,7.8471756E-1,-9.6853036E-1],"split_indices":[59,28,0,0,0],"split_type":[0,0,0,0,0],"sum_hessian":[7.8885205E2,7.683897E2,2.0462389E1,4.5893686E2,3.0945282E2],"tree_param":{"num_deleted":"0","num_feature":"126","num_nodes":"5","size_leaf_vector":"0"}}]},"name":"gbtree"},"learner_model_param":{"base_score":"5E-1","num_class":"0","num_feature":"126"},"objective":{"name":"binary:logistic","reg_loss_param":{"scale_pos_weight":"1"}}},"version":[1,5,0]} \ No newline at end of file