From 70e2a983bc623da6c3e0b342cf7a480e5663281a Mon Sep 17 00:00:00 2001
From: Adrian Gonzalez-Martin <agm@seldon.io>
Date: Thu, 22 Sep 2022 16:05:49 +0200
Subject: [PATCH] Update example to avoid using sparse matrix during training
 (#736)

---
 docs/examples/mms/README.ipynb                       |  8 +++++---
 docs/examples/mms/README.md                          |  4 +++-
 docs/examples/mms/models/mushroom-xgboost/model.json |  2 +-
 docs/examples/xgboost/README.ipynb                   |  8 +++++---
 docs/examples/xgboost/README.md                      | 11 +++--------
 docs/examples/xgboost/mushroom-xgboost.json          |  2 +-
 6 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/docs/examples/mms/README.ipynb b/docs/examples/mms/README.ipynb
index da74612af..a565461da 100644
--- a/docs/examples/mms/README.ipynb
+++ b/docs/examples/mms/README.ipynb
@@ -128,6 +128,8 @@
     "# NOTE: Workaround to load SVMLight files from the XGBoost example\n",
     "X_train, y_train = load_svmlight_file(train_dataset_path)\n",
     "X_test_agar, y_test_agar = load_svmlight_file(test_dataset_path)\n",
+    "X_train = X_train.toarray()\n",
+    "X_test_agar = X_test_agar.toarray()\n",
     "\n",
     "# read in data\n",
     "dtrain = xgb.DMatrix(data=X_train, label=y_train)\n",
@@ -318,7 +320,7 @@
     "          \"name\": \"predict\",\n",
     "          \"shape\": x_0.shape,\n",
     "          \"datatype\": \"FP32\",\n",
-    "          \"data\": x_0.toarray().tolist()\n",
+    "          \"data\": x_0.tolist()\n",
     "        }\n",
     "    ]\n",
     "}\n",
@@ -339,7 +341,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -353,7 +355,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.8"
+   "version": "3.9.8"
   }
  },
  "nbformat": 4,
diff --git a/docs/examples/mms/README.md b/docs/examples/mms/README.md
index 6822c3e33..bced2ded3 100644
--- a/docs/examples/mms/README.md
+++ b/docs/examples/mms/README.md
@@ -95,6 +95,8 @@ test_dataset_path = _download_file(TEST_DATASET_URL)
 # NOTE: Workaround to load SVMLight files from the XGBoost example
 X_train, y_train = load_svmlight_file(train_dataset_path)
 X_test_agar, y_test_agar = load_svmlight_file(test_dataset_path)
+X_train = X_train.toarray()
+X_test_agar = X_test_agar.toarray()
 
 # read in data
 dtrain = xgb.DMatrix(data=X_train, label=y_train)
@@ -223,7 +225,7 @@ inference_request = {
           "name": "predict",
           "shape": x_0.shape,
           "datatype": "FP32",
-          "data": x_0.toarray().tolist()
+          "data": x_0.tolist()
         }
     ]
 }
diff --git a/docs/examples/mms/models/mushroom-xgboost/model.json b/docs/examples/mms/models/mushroom-xgboost/model.json
index 5547e6516..05990f0d2 100644
--- a/docs/examples/mms/models/mushroom-xgboost/model.json
+++ b/docs/examples/mms/models/mushroom-xgboost/model.json
@@ -1 +1 @@
-{"learner":{"attributes":{},"gradient_booster":{"model":{"gbtree_model_param":{"num_trees":"2","size_leaf_vector":"0"},"tree_info":[0,0],"trees":[{"base_weights":[-7.15052932500839233e-02,1.29551589488983154e+00,-1.86661934852600098e+00,1.71217715740203857e+00,-1.70044052600860596e+00,-1.94070863723754883e+00,1.85964918136596680e+00],"default_left":[true,true,true,false,false,false,false],"id":0,"leaf_child_counts":[0,0,0,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[4.00053100585937500e+03,1.15821203613281250e+03,1.98173828125000000e+02,0.00000000000000000e+00,0.00000000000000000e+00,0.00000000000000000e+00,0.00000000000000000e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[-9.53674316406250000e-07,-9.53674316406250000e-07,-9.53674316406250000e-07,1.71217715740203857e+00,-1.70044052600860596e+00,-1.94070863723754883e+00,1.85964918136596680e+00],"split_indices":[28,55,108,0,0,0,0],"sum_hessian":[1.62825000000000000e+03,9.24500000000000000e+02,7.03750000000000000e+02,8.12000000000000000e+02,1.12500000000000000e+02,6.90500000000000000e+02,1.32500000000000000e+01],"tree_param":{"num_deleted":"0","num_feature":"126","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-9.31910648941993713e-02,7.82926455140113831e-02,-6.23624467849731445e+00,7.84717559814453125e-01,-9.68530356884002686e-01],"default_left":[true,true,false,false,false],"id":1,"leaf_child_counts":[0,0,0,0,0],"left_children":[1,3,-1,-1,-1],"loss_changes":[8.32545043945312500e+02,5.69725097656250000e+02,0.00000000000000000e+00,0.00000000000000000e+00,0.00000000000000000e+00],"parents":[2147483647,0,0,1,1],"right_children":[2,4,-1,-1,-1],"split_conditions":[-9.53674316406250000e-07,-9.53674316406250000e-07,-6.23624467849731445e+00,7.84717559814453125e-01,-9.68530356884002686e-01],"split_indices":[59,28,0,0,0],"sum_hessian":[7.88852050781250000e+02,7.68389709472656250e+02,2.04623889923095703e+01,4.58936859130859375e+02,3.09452819824218750e+02],"tree_param":{"num_deleted":"0","num_feature":"126","num_nodes":"5","size_leaf_vector":"0"}}]},"name":"gbtree"},"learner_model_param":{"base_score":"0.500000","num_class":"0","num_feature":"126"},"objective":{"name":"binary:logistic","reg_loss_param":{"scale_pos_weight":"1"}}},"version":[1,1,1]}
\ No newline at end of file
+{"learner":{"attributes":{"best_iteration":"1","best_ntree_limit":"2"},"feature_names":[],"feature_types":[],"gradient_booster":{"model":{"gbtree_model_param":{"num_trees":"2","size_leaf_vector":"0"},"tree_info":[0,0],"trees":[{"base_weights":[-7.150529E-2,1.2955159E0,-1.8666193E0,1.7121772E0,-1.7004405E0,-1.9407086E0,1.8596492E0],"categories":[],"categories_nodes":[],"categories_segments":[],"categories_sizes":[],"default_left":[true,true,true,false,false,false,false],"id":0,"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[4.000531E3,1.158212E3,1.9817383E2,0E0,0E0,0E0,0E0],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[5E-1,5E-1,5E-1,1.7121772E0,-1.7004405E0,-1.9407086E0,1.8596492E0],"split_indices":[28,55,108,0,0,0,0],"split_type":[0,0,0,0,0,0,0],"sum_hessian":[1.62825E3,9.245E2,7.0375E2,8.12E2,1.125E2,6.905E2,1.325E1],"tree_param":{"num_deleted":"0","num_feature":"126","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-9.3191065E-2,7.8292646E-2,-6.2362447E0,7.8471756E-1,-9.6853036E-1],"categories":[],"categories_nodes":[],"categories_segments":[],"categories_sizes":[],"default_left":[true,true,false,false,false],"id":1,"left_children":[1,3,-1,-1,-1],"loss_changes":[8.3254504E2,5.697251E2,0E0,0E0,0E0],"parents":[2147483647,0,0,1,1],"right_children":[2,4,-1,-1,-1],"split_conditions":[5E-1,5E-1,-6.2362447E0,7.8471756E-1,-9.6853036E-1],"split_indices":[59,28,0,0,0],"split_type":[0,0,0,0,0],"sum_hessian":[7.8885205E2,7.683897E2,2.0462389E1,4.5893686E2,3.0945282E2],"tree_param":{"num_deleted":"0","num_feature":"126","num_nodes":"5","size_leaf_vector":"0"}}]},"name":"gbtree"},"learner_model_param":{"base_score":"5E-1","num_class":"0","num_feature":"126"},"objective":{"name":"binary:logistic","reg_loss_param":{"scale_pos_weight":"1"}}},"version":[1,5,0]}
\ No newline at end of file
diff --git a/docs/examples/xgboost/README.ipynb b/docs/examples/xgboost/README.ipynb
index 5b6e33c34..425f8c131 100644
--- a/docs/examples/xgboost/README.ipynb
+++ b/docs/examples/xgboost/README.ipynb
@@ -61,6 +61,8 @@
     "# NOTE: Workaround to load SVMLight files from the XGBoost example\n",
     "X_train, y_train = load_svmlight_file(train_dataset_path)\n",
     "X_test, y_test = load_svmlight_file(test_dataset_path)\n",
+    "X_train = X_train.toarray()\n",
+    "X_test = X_test.toarray()\n",
     "\n",
     "# read in data\n",
     "dtrain = xgb.DMatrix(data=X_train, label=y_train)\n",
@@ -193,7 +195,7 @@
     "          \"name\": \"predict\",\n",
     "          \"shape\": x_0.shape,\n",
     "          \"datatype\": \"FP32\",\n",
-    "          \"data\": x_0.toarray().tolist()\n",
+    "          \"data\": x_0.tolist()\n",
     "        }\n",
     "    ]\n",
     "}\n",
@@ -230,7 +232,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -244,7 +246,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.8"
+   "version": "3.9.8"
   }
  },
  "nbformat": 4,
diff --git a/docs/examples/xgboost/README.md b/docs/examples/xgboost/README.md
index 6a7649dac..e5b2d3c1b 100644
--- a/docs/examples/xgboost/README.md
+++ b/docs/examples/xgboost/README.md
@@ -45,6 +45,8 @@ test_dataset_path = _download_file(TEST_DATASET_URL)
 # NOTE: Workaround to load SVMLight files from the XGBoost example
 X_train, y_train = load_svmlight_file(train_dataset_path)
 X_test, y_test = load_svmlight_file(test_dataset_path)
+X_train = X_train.toarray()
+X_test = X_test.toarray()
 
 # read in data
 dtrain = xgb.DMatrix(data=X_train, label=y_train)
@@ -113,13 +115,6 @@ mlserver start .
 
 Since this command will start the server and block the terminal, waiting for requests, this will need to be ran in the background on a separate terminal.
 
----
-**NOTE**
-
-You may first need to install the XGBoost inference runtime for MLServer using `pip install mlserver-xgboost`
-
----
-
 ### Send test inference request
 
 We now have our model being served by `mlserver`.
@@ -138,7 +133,7 @@ inference_request = {
           "name": "predict",
           "shape": x_0.shape,
           "datatype": "FP32",
-          "data": x_0.toarray().tolist()
+          "data": x_0.tolist()
         }
     ]
 }
diff --git a/docs/examples/xgboost/mushroom-xgboost.json b/docs/examples/xgboost/mushroom-xgboost.json
index 5547e6516..05990f0d2 100644
--- a/docs/examples/xgboost/mushroom-xgboost.json
+++ b/docs/examples/xgboost/mushroom-xgboost.json
@@ -1 +1 @@
-{"learner":{"attributes":{},"gradient_booster":{"model":{"gbtree_model_param":{"num_trees":"2","size_leaf_vector":"0"},"tree_info":[0,0],"trees":[{"base_weights":[-7.15052932500839233e-02,1.29551589488983154e+00,-1.86661934852600098e+00,1.71217715740203857e+00,-1.70044052600860596e+00,-1.94070863723754883e+00,1.85964918136596680e+00],"default_left":[true,true,true,false,false,false,false],"id":0,"leaf_child_counts":[0,0,0,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[4.00053100585937500e+03,1.15821203613281250e+03,1.98173828125000000e+02,0.00000000000000000e+00,0.00000000000000000e+00,0.00000000000000000e+00,0.00000000000000000e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[-9.53674316406250000e-07,-9.53674316406250000e-07,-9.53674316406250000e-07,1.71217715740203857e+00,-1.70044052600860596e+00,-1.94070863723754883e+00,1.85964918136596680e+00],"split_indices":[28,55,108,0,0,0,0],"sum_hessian":[1.62825000000000000e+03,9.24500000000000000e+02,7.03750000000000000e+02,8.12000000000000000e+02,1.12500000000000000e+02,6.90500000000000000e+02,1.32500000000000000e+01],"tree_param":{"num_deleted":"0","num_feature":"126","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-9.31910648941993713e-02,7.82926455140113831e-02,-6.23624467849731445e+00,7.84717559814453125e-01,-9.68530356884002686e-01],"default_left":[true,true,false,false,false],"id":1,"leaf_child_counts":[0,0,0,0,0],"left_children":[1,3,-1,-1,-1],"loss_changes":[8.32545043945312500e+02,5.69725097656250000e+02,0.00000000000000000e+00,0.00000000000000000e+00,0.00000000000000000e+00],"parents":[2147483647,0,0,1,1],"right_children":[2,4,-1,-1,-1],"split_conditions":[-9.53674316406250000e-07,-9.53674316406250000e-07,-6.23624467849731445e+00,7.84717559814453125e-01,-9.68530356884002686e-01],"split_indices":[59,28,0,0,0],"sum_hessian":[7.88852050781250000e+02,7.68389709472656250e+02,2.04623889923095703e+01,4.58936859130859375e+02,3.09452819824218750e+02],"tree_param":{"num_deleted":"0","num_feature":"126","num_nodes":"5","size_leaf_vector":"0"}}]},"name":"gbtree"},"learner_model_param":{"base_score":"0.500000","num_class":"0","num_feature":"126"},"objective":{"name":"binary:logistic","reg_loss_param":{"scale_pos_weight":"1"}}},"version":[1,1,1]}
\ No newline at end of file
+{"learner":{"attributes":{"best_iteration":"1","best_ntree_limit":"2"},"feature_names":[],"feature_types":[],"gradient_booster":{"model":{"gbtree_model_param":{"num_trees":"2","size_leaf_vector":"0"},"tree_info":[0,0],"trees":[{"base_weights":[-7.150529E-2,1.2955159E0,-1.8666193E0,1.7121772E0,-1.7004405E0,-1.9407086E0,1.8596492E0],"categories":[],"categories_nodes":[],"categories_segments":[],"categories_sizes":[],"default_left":[true,true,true,false,false,false,false],"id":0,"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[4.000531E3,1.158212E3,1.9817383E2,0E0,0E0,0E0,0E0],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[5E-1,5E-1,5E-1,1.7121772E0,-1.7004405E0,-1.9407086E0,1.8596492E0],"split_indices":[28,55,108,0,0,0,0],"split_type":[0,0,0,0,0,0,0],"sum_hessian":[1.62825E3,9.245E2,7.0375E2,8.12E2,1.125E2,6.905E2,1.325E1],"tree_param":{"num_deleted":"0","num_feature":"126","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-9.3191065E-2,7.8292646E-2,-6.2362447E0,7.8471756E-1,-9.6853036E-1],"categories":[],"categories_nodes":[],"categories_segments":[],"categories_sizes":[],"default_left":[true,true,false,false,false],"id":1,"left_children":[1,3,-1,-1,-1],"loss_changes":[8.3254504E2,5.697251E2,0E0,0E0,0E0],"parents":[2147483647,0,0,1,1],"right_children":[2,4,-1,-1,-1],"split_conditions":[5E-1,5E-1,-6.2362447E0,7.8471756E-1,-9.6853036E-1],"split_indices":[59,28,0,0,0],"split_type":[0,0,0,0,0],"sum_hessian":[7.8885205E2,7.683897E2,2.0462389E1,4.5893686E2,3.0945282E2],"tree_param":{"num_deleted":"0","num_feature":"126","num_nodes":"5","size_leaf_vector":"0"}}]},"name":"gbtree"},"learner_model_param":{"base_score":"5E-1","num_class":"0","num_feature":"126"},"objective":{"name":"binary:logistic","reg_loss_param":{"scale_pos_weight":"1"}}},"version":[1,5,0]}
\ No newline at end of file