forked from dmlc/xgboost
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[DIST] Add Distributed XGBoost on AWS Tutorial
- Loading branch information
Showing
11 changed files
with
355 additions
and
86 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# General Parameters, see comment for each definition | ||
# choose the booster, can be gbtree or gblinear | ||
booster = gbtree | ||
# choose logistic regression loss function for binary classification | ||
objective = binary:logistic | ||
|
||
# Tree Booster Parameters | ||
# step size shrinkage | ||
eta = 1.0 | ||
# minimum loss reduction required to make a further partition | ||
gamma = 1.0 | ||
# minimum sum of instance weight(hessian) needed in a child | ||
min_child_weight = 1 | ||
# maximum depth of a tree | ||
max_depth = 3 | ||
|
||
# Task Parameters | ||
# the number of round to do boosting | ||
num_round = 2 | ||
# 0 means do not save any model except the final round model | ||
save_period = 0 | ||
# The path of training data | ||
data = "s3://mybucket/xgb-demo/train" | ||
# The path of validation data, used to monitor training process, here [test] sets name of the validation set | ||
# evaluate on training data as well each round | ||
eval_train = 1 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# XGBoost Model Analysis\n", | ||
"\n", | ||
"This notebook can be used to load and anlysis model learnt from all xgboost bindings, including distributed training. " | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"import sys\n", | ||
"import os\n", | ||
"%matplotlib inline " | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Please change the ```pkg_path``` and ```model_file``` to be correct path" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"pkg_path = '../../python-package/'\n", | ||
"model_file = 's3://my-bucket/xgb-demo/model/0002.model'\n", | ||
"sys.path.insert(0, pkg_path)\n", | ||
"import xgboost as xgb" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Plot the Feature Importance" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# plot the first two trees.\n", | ||
"bst = xgb.Booster(model_file=model_file)\n", | ||
"xgb.plot_importance(bst)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Plot the First Tree" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"tree_id = 0\n", | ||
"xgb.to_graphviz(bst, tree_id)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 2", | ||
"language": "python", | ||
"name": "python2" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 2 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython2", | ||
"version": "2.7.3" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 0 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# This is the example script to run distributed xgboost on AWS. | ||
# Change the following two lines for configuration | ||
|
||
export BUCKET=mybucket | ||
|
||
# submit the job to YARN | ||
../../dmlc-core/tracker/dmlc-submit --cluster=yarn --num-workers=2 --worker-cores=2\ | ||
../../xgboost mushroom.aws.conf nthread=2\ | ||
data=s3://${BUCKET}/xgb-demo/train\ | ||
eval[test]=s3://${BUCKET}/xgb-demo/test\ | ||
model_dir=s3://${BUCKET}/xgb-demo/model |
This file was deleted.
Oops, something went wrong.
Submodule dmlc-core
updated
26 files
+4 −1 | Makefile | |
+1 −1 | src/io/input_split_base.h | |
+42 −4 | tracker/README.md | |
+9 −0 | tracker/dmlc-submit | |
+0 −100 | tracker/dmlc_local.py | |
+0 −71 | tracker/dmlc_mpi.py | |
+0 −77 | tracker/dmlc_sge.py | |
+2 −0 | tracker/dmlc_tracker/__init__.py | |
+68 −0 | tracker/dmlc_tracker/launcher.py | |
+83 −0 | tracker/dmlc_tracker/local.py | |
+63 −0 | tracker/dmlc_tracker/mpi.py | |
+147 −0 | tracker/dmlc_tracker/opts.py | |
+48 −0 | tracker/dmlc_tracker/sge.py | |
+50 −0 | tracker/dmlc_tracker/submit.py | |
+58 −60 | tracker/dmlc_tracker/tracker.py | |
+125 −0 | tracker/dmlc_tracker/yarn.py | |
+0 −200 | tracker/dmlc_yarn.py | |
+0 −0 | tracker/yarn/.gitignore | |
+3 −3 | tracker/yarn/README.md | |
+0 −0 | tracker/yarn/build.bat | |
+0 −0 | tracker/yarn/build.sh | |
+0 −0 | tracker/yarn/pom.xml | |
+107 −52 | tracker/yarn/src/main/java/org/apache/hadoop/yarn/dmlc/ApplicationMaster.java | |
+17 −2 | tracker/yarn/src/main/java/org/apache/hadoop/yarn/dmlc/Client.java | |
+0 −0 | tracker/yarn/src/main/java/org/apache/hadoop/yarn/dmlc/TaskRecord.java | |
+0 −49 | yarn/run_hdfs_prog.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.