Skip to content

Commit

Permalink
Update example to avoid using sparse matrix during training (#736)
Browse files Browse the repository at this point in the history
  • Loading branch information
Adrian Gonzalez-Martin authored Sep 22, 2022
1 parent cd4ee9a commit 70e2a98
Show file tree
Hide file tree
Showing 6 changed files with 18 additions and 17 deletions.
8 changes: 5 additions & 3 deletions docs/examples/mms/README.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@
"# NOTE: Workaround to load SVMLight files from the XGBoost example\n",
"X_train, y_train = load_svmlight_file(train_dataset_path)\n",
"X_test_agar, y_test_agar = load_svmlight_file(test_dataset_path)\n",
"X_train = X_train.toarray()\n",
"X_test_agar = X_test_agar.toarray()\n",
"\n",
"# read in data\n",
"dtrain = xgb.DMatrix(data=X_train, label=y_train)\n",
Expand Down Expand Up @@ -318,7 +320,7 @@
" \"name\": \"predict\",\n",
" \"shape\": x_0.shape,\n",
" \"datatype\": \"FP32\",\n",
" \"data\": x_0.toarray().tolist()\n",
" \"data\": x_0.tolist()\n",
" }\n",
" ]\n",
"}\n",
Expand All @@ -339,7 +341,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -353,7 +355,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.8"
"version": "3.9.8"
}
},
"nbformat": 4,
Expand Down
4 changes: 3 additions & 1 deletion docs/examples/mms/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ test_dataset_path = _download_file(TEST_DATASET_URL)
# NOTE: Workaround to load SVMLight files from the XGBoost example
X_train, y_train = load_svmlight_file(train_dataset_path)
X_test_agar, y_test_agar = load_svmlight_file(test_dataset_path)
X_train = X_train.toarray()
X_test_agar = X_test_agar.toarray()

# read in data
dtrain = xgb.DMatrix(data=X_train, label=y_train)
Expand Down Expand Up @@ -223,7 +225,7 @@ inference_request = {
"name": "predict",
"shape": x_0.shape,
"datatype": "FP32",
"data": x_0.toarray().tolist()
"data": x_0.tolist()
}
]
}
Expand Down
2 changes: 1 addition & 1 deletion docs/examples/mms/models/mushroom-xgboost/model.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"learner":{"attributes":{},"gradient_booster":{"model":{"gbtree_model_param":{"num_trees":"2","size_leaf_vector":"0"},"tree_info":[0,0],"trees":[{"base_weights":[-7.15052932500839233e-02,1.29551589488983154e+00,-1.86661934852600098e+00,1.71217715740203857e+00,-1.70044052600860596e+00,-1.94070863723754883e+00,1.85964918136596680e+00],"default_left":[true,true,true,false,false,false,false],"id":0,"leaf_child_counts":[0,0,0,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[4.00053100585937500e+03,1.15821203613281250e+03,1.98173828125000000e+02,0.00000000000000000e+00,0.00000000000000000e+00,0.00000000000000000e+00,0.00000000000000000e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[-9.53674316406250000e-07,-9.53674316406250000e-07,-9.53674316406250000e-07,1.71217715740203857e+00,-1.70044052600860596e+00,-1.94070863723754883e+00,1.85964918136596680e+00],"split_indices":[28,55,108,0,0,0,0],"sum_hessian":[1.62825000000000000e+03,9.24500000000000000e+02,7.03750000000000000e+02,8.12000000000000000e+02,1.12500000000000000e+02,6.90500000000000000e+02,1.32500000000000000e+01],"tree_param":{"num_deleted":"0","num_feature":"126","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-9.31910648941993713e-02,7.82926455140113831e-02,-6.23624467849731445e+00,7.84717559814453125e-01,-9.68530356884002686e-01],"default_left":[true,true,false,false,false],"id":1,"leaf_child_counts":[0,0,0,0,0],"left_children":[1,3,-1,-1,-1],"loss_changes":[8.32545043945312500e+02,5.69725097656250000e+02,0.00000000000000000e+00,0.00000000000000000e+00,0.00000000000000000e+00],"parents":[2147483647,0,0,1,1],"right_children":[2,4,-1,-1,-1],"split_conditions":[-9.53674316406250000e-07,-9.53674316406250000e-07,-6.23624467849731445e+00,7.84717559814453125e-01,-9.68530356884002686e-01],"split_indices":[59,28,0,0,0],"sum_hessian":[7.88852050781250000e+02,7.68389709472656250e+02,2.04623889923095703e+01,4.58936859130859375e+02,3.09452819824218750e+02],"tree_param":{"num_deleted":"0","num_feature":"126","num_nodes":"5","size_leaf_vector":"0"}}]},"name":"gbtree"},"learner_model_param":{"base_score":"0.500000","num_class":"0","num_feature":"126"},"objective":{"name":"binary:logistic","reg_loss_param":{"scale_pos_weight":"1"}}},"version":[1,1,1]}
{"learner":{"attributes":{"best_iteration":"1","best_ntree_limit":"2"},"feature_names":[],"feature_types":[],"gradient_booster":{"model":{"gbtree_model_param":{"num_trees":"2","size_leaf_vector":"0"},"tree_info":[0,0],"trees":[{"base_weights":[-7.150529E-2,1.2955159E0,-1.8666193E0,1.7121772E0,-1.7004405E0,-1.9407086E0,1.8596492E0],"categories":[],"categories_nodes":[],"categories_segments":[],"categories_sizes":[],"default_left":[true,true,true,false,false,false,false],"id":0,"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[4.000531E3,1.158212E3,1.9817383E2,0E0,0E0,0E0,0E0],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[5E-1,5E-1,5E-1,1.7121772E0,-1.7004405E0,-1.9407086E0,1.8596492E0],"split_indices":[28,55,108,0,0,0,0],"split_type":[0,0,0,0,0,0,0],"sum_hessian":[1.62825E3,9.245E2,7.0375E2,8.12E2,1.125E2,6.905E2,1.325E1],"tree_param":{"num_deleted":"0","num_feature":"126","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-9.3191065E-2,7.8292646E-2,-6.2362447E0,7.8471756E-1,-9.6853036E-1],"categories":[],"categories_nodes":[],"categories_segments":[],"categories_sizes":[],"default_left":[true,true,false,false,false],"id":1,"left_children":[1,3,-1,-1,-1],"loss_changes":[8.3254504E2,5.697251E2,0E0,0E0,0E0],"parents":[2147483647,0,0,1,1],"right_children":[2,4,-1,-1,-1],"split_conditions":[5E-1,5E-1,-6.2362447E0,7.8471756E-1,-9.6853036E-1],"split_indices":[59,28,0,0,0],"split_type":[0,0,0,0,0],"sum_hessian":[7.8885205E2,7.683897E2,2.0462389E1,4.5893686E2,3.0945282E2],"tree_param":{"num_deleted":"0","num_feature":"126","num_nodes":"5","size_leaf_vector":"0"}}]},"name":"gbtree"},"learner_model_param":{"base_score":"5E-1","num_class":"0","num_feature":"126"},"objective":{"name":"binary:logistic","reg_loss_param":{"scale_pos_weight":"1"}}},"version":[1,5,0]}
8 changes: 5 additions & 3 deletions docs/examples/xgboost/README.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@
"# NOTE: Workaround to load SVMLight files from the XGBoost example\n",
"X_train, y_train = load_svmlight_file(train_dataset_path)\n",
"X_test, y_test = load_svmlight_file(test_dataset_path)\n",
"X_train = X_train.toarray()\n",
"X_test = X_test.toarray()\n",
"\n",
"# read in data\n",
"dtrain = xgb.DMatrix(data=X_train, label=y_train)\n",
Expand Down Expand Up @@ -193,7 +195,7 @@
" \"name\": \"predict\",\n",
" \"shape\": x_0.shape,\n",
" \"datatype\": \"FP32\",\n",
" \"data\": x_0.toarray().tolist()\n",
" \"data\": x_0.tolist()\n",
" }\n",
" ]\n",
"}\n",
Expand Down Expand Up @@ -230,7 +232,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -244,7 +246,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.8"
"version": "3.9.8"
}
},
"nbformat": 4,
Expand Down
11 changes: 3 additions & 8 deletions docs/examples/xgboost/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ test_dataset_path = _download_file(TEST_DATASET_URL)
# NOTE: Workaround to load SVMLight files from the XGBoost example
X_train, y_train = load_svmlight_file(train_dataset_path)
X_test, y_test = load_svmlight_file(test_dataset_path)
X_train = X_train.toarray()
X_test = X_test.toarray()

# read in data
dtrain = xgb.DMatrix(data=X_train, label=y_train)
Expand Down Expand Up @@ -113,13 +115,6 @@ mlserver start .

Since this command will start the server and block the terminal, waiting for requests, this will need to be ran in the background on a separate terminal.

---
**NOTE**

You may first need to install the XGBoost inference runtime for MLServer using `pip install mlserver-xgboost`

---

### Send test inference request

We now have our model being served by `mlserver`.
Expand All @@ -138,7 +133,7 @@ inference_request = {
"name": "predict",
"shape": x_0.shape,
"datatype": "FP32",
"data": x_0.toarray().tolist()
"data": x_0.tolist()
}
]
}
Expand Down
2 changes: 1 addition & 1 deletion docs/examples/xgboost/mushroom-xgboost.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"learner":{"attributes":{},"gradient_booster":{"model":{"gbtree_model_param":{"num_trees":"2","size_leaf_vector":"0"},"tree_info":[0,0],"trees":[{"base_weights":[-7.15052932500839233e-02,1.29551589488983154e+00,-1.86661934852600098e+00,1.71217715740203857e+00,-1.70044052600860596e+00,-1.94070863723754883e+00,1.85964918136596680e+00],"default_left":[true,true,true,false,false,false,false],"id":0,"leaf_child_counts":[0,0,0,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[4.00053100585937500e+03,1.15821203613281250e+03,1.98173828125000000e+02,0.00000000000000000e+00,0.00000000000000000e+00,0.00000000000000000e+00,0.00000000000000000e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[-9.53674316406250000e-07,-9.53674316406250000e-07,-9.53674316406250000e-07,1.71217715740203857e+00,-1.70044052600860596e+00,-1.94070863723754883e+00,1.85964918136596680e+00],"split_indices":[28,55,108,0,0,0,0],"sum_hessian":[1.62825000000000000e+03,9.24500000000000000e+02,7.03750000000000000e+02,8.12000000000000000e+02,1.12500000000000000e+02,6.90500000000000000e+02,1.32500000000000000e+01],"tree_param":{"num_deleted":"0","num_feature":"126","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-9.31910648941993713e-02,7.82926455140113831e-02,-6.23624467849731445e+00,7.84717559814453125e-01,-9.68530356884002686e-01],"default_left":[true,true,false,false,false],"id":1,"leaf_child_counts":[0,0,0,0,0],"left_children":[1,3,-1,-1,-1],"loss_changes":[8.32545043945312500e+02,5.69725097656250000e+02,0.00000000000000000e+00,0.00000000000000000e+00,0.00000000000000000e+00],"parents":[2147483647,0,0,1,1],"right_children":[2,4,-1,-1,-1],"split_conditions":[-9.53674316406250000e-07,-9.53674316406250000e-07,-6.23624467849731445e+00,7.84717559814453125e-01,-9.68530356884002686e-01],"split_indices":[59,28,0,0,0],"sum_hessian":[7.88852050781250000e+02,7.68389709472656250e+02,2.04623889923095703e+01,4.58936859130859375e+02,3.09452819824218750e+02],"tree_param":{"num_deleted":"0","num_feature":"126","num_nodes":"5","size_leaf_vector":"0"}}]},"name":"gbtree"},"learner_model_param":{"base_score":"0.500000","num_class":"0","num_feature":"126"},"objective":{"name":"binary:logistic","reg_loss_param":{"scale_pos_weight":"1"}}},"version":[1,1,1]}
{"learner":{"attributes":{"best_iteration":"1","best_ntree_limit":"2"},"feature_names":[],"feature_types":[],"gradient_booster":{"model":{"gbtree_model_param":{"num_trees":"2","size_leaf_vector":"0"},"tree_info":[0,0],"trees":[{"base_weights":[-7.150529E-2,1.2955159E0,-1.8666193E0,1.7121772E0,-1.7004405E0,-1.9407086E0,1.8596492E0],"categories":[],"categories_nodes":[],"categories_segments":[],"categories_sizes":[],"default_left":[true,true,true,false,false,false,false],"id":0,"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[4.000531E3,1.158212E3,1.9817383E2,0E0,0E0,0E0,0E0],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[5E-1,5E-1,5E-1,1.7121772E0,-1.7004405E0,-1.9407086E0,1.8596492E0],"split_indices":[28,55,108,0,0,0,0],"split_type":[0,0,0,0,0,0,0],"sum_hessian":[1.62825E3,9.245E2,7.0375E2,8.12E2,1.125E2,6.905E2,1.325E1],"tree_param":{"num_deleted":"0","num_feature":"126","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-9.3191065E-2,7.8292646E-2,-6.2362447E0,7.8471756E-1,-9.6853036E-1],"categories":[],"categories_nodes":[],"categories_segments":[],"categories_sizes":[],"default_left":[true,true,false,false,false],"id":1,"left_children":[1,3,-1,-1,-1],"loss_changes":[8.3254504E2,5.697251E2,0E0,0E0,0E0],"parents":[2147483647,0,0,1,1],"right_children":[2,4,-1,-1,-1],"split_conditions":[5E-1,5E-1,-6.2362447E0,7.8471756E-1,-9.6853036E-1],"split_indices":[59,28,0,0,0],"split_type":[0,0,0,0,0],"sum_hessian":[7.8885205E2,7.683897E2,2.0462389E1,4.5893686E2,3.0945282E2],"tree_param":{"num_deleted":"0","num_feature":"126","num_nodes":"5","size_leaf_vector":"0"}}]},"name":"gbtree"},"learner_model_param":{"base_score":"5E-1","num_class":"0","num_feature":"126"},"objective":{"name":"binary:logistic","reg_loss_param":{"scale_pos_weight":"1"}}},"version":[1,5,0]}

0 comments on commit 70e2a98

Please sign in to comment.