Skip to content

Commit c0ed329

Browse files
committed
Added xgboost examples for DLTK 3.3
1 parent 97d63ee commit c0ed329

File tree

4 files changed

+1380
-0
lines changed

4 files changed

+1380
-0
lines changed

app/model/shap_xgboost.py

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
#!/usr/bin/env python
2+
# coding: utf-8
3+
4+
5+
6+
# In[31]:
7+
8+
9+
# this definition exposes all python module imports that should be available in all subsequent commands
10+
import json
11+
import numpy as np
12+
import pandas as pd
13+
import xgboost
14+
import shap
15+
import matplotlib.pyplot as plt
16+
# global constants
17+
MODEL_DIRECTORY = "/srv/app/model/data/"
18+
19+
20+
21+
22+
23+
24+
25+
26+
# In[33]:
27+
28+
29+
# this cell is not executed from MLTK and should only be used for staging data into the notebook environment
30+
def stage(name):
31+
with open("data/"+name+".csv", 'r') as f:
32+
df = pd.read_csv(f)
33+
with open("data/"+name+".json", 'r') as f:
34+
param = json.load(f)
35+
return df, param
36+
37+
38+
39+
40+
41+
42+
43+
44+
# In[35]:
45+
46+
47+
# initialize your model
48+
# available inputs: data and parameters
49+
# returns the model object which will be used as a reference to call fit, apply and summary subsequently
50+
def init(df,param):
51+
model = {}
52+
return model
53+
54+
55+
56+
57+
58+
59+
60+
61+
# In[37]:
62+
63+
64+
# train your model
65+
# returns a fit info json object and may modify the model object
66+
def fit(model,df,param):
67+
X = df[param['feature_variables']]
68+
y = df[param['target_variables']]
69+
learning_rate = 0.01
70+
if 'learning_rate' in param['options']['params']:
71+
learning_rate = float(param['options']['params']['learning_rate'].lstrip("\"").rstrip("\""))
72+
model['xgboost'] = xgboost.train({"learning_rate": 0.01}, xgboost.DMatrix(X, label=y), 100)
73+
# explain the model's prediction using SHAP values
74+
model['shap_values'] = shap.TreeExplainer(model['xgboost']).shap_values(X)
75+
return model
76+
77+
78+
79+
80+
81+
82+
83+
84+
# In[51]:
85+
86+
87+
# apply your model
88+
# returns the calculated results
89+
def plot_to_base64(plot):
90+
import base64
91+
import io
92+
pic_IObytes = io.BytesIO()
93+
plot.savefig(pic_IObytes, format='png')
94+
pic_IObytes.seek(0)
95+
pic_hash = base64.b64encode(pic_IObytes.read())
96+
return pic_hash
97+
98+
def apply(model,df,param):
99+
X = df[param['feature_variables']]
100+
result = model['xgboost'].predict(xgboost.DMatrix(X))
101+
if 'plot' in param['options']['params']:
102+
plots = param['options']['params']['plot'].lstrip("\"").rstrip("\"").lower().split(',')
103+
if 'shap_values' in model:
104+
shap_values = model['shap_values']
105+
plt.clf()
106+
for plot in plots:
107+
print(plot)
108+
if plot=='violin':
109+
shap.summary_plot(shap_values, X, show=False, plot_type="violin")
110+
elif plot=='layered_violin':
111+
shap.summary_plot(shap_values, X, show=False, plot_type="layered_violin", color='coolwarm')
112+
elif plot=='bar':
113+
shap.summary_plot(shap_values, X, show=False, plot_type="bar")
114+
else:
115+
shap.summary_plot(shap_values, X, show=False)
116+
# export current plot
117+
plt.gcf().set_size_inches(10,4)
118+
plt.tight_layout()
119+
model["plot_shap"] = plot_to_base64(plt)
120+
return result
121+
122+
123+
124+
125+
126+
127+
128+
129+
# In[11]:
130+
131+
132+
# save model to name in expected convention "<algo_name>_<model_name>"
133+
def save(model,name):
134+
# TODO if needed
135+
#with open(MODEL_DIRECTORY + name + ".json", 'w') as file:
136+
# json.dump(model, file)
137+
return model
138+
139+
140+
141+
142+
143+
144+
145+
146+
# In[13]:
147+
148+
149+
# load model from name in expected convention "<algo_name>_<model_name>"
150+
def load(name):
151+
model = {}
152+
# TODO if needed
153+
# with open(MODEL_DIRECTORY + name + ".json", 'r') as file:
154+
# model = json.load(file)
155+
return model
156+
157+
158+
159+
160+
161+
162+
163+
164+
# In[15]:
165+
166+
167+
# return a model summary
168+
def summary(model=None):
169+
returns = {"version": {"xgboost": xgboost.__version__, "shap": shap.__version__} }
170+
return returns
171+
172+
173+
174+
175+

app/model/xgboost_regressor.py

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
#!/usr/bin/env python
2+
# coding: utf-8
3+
4+
5+
6+
# In[1]:
7+
8+
9+
# this definition exposes all python module imports that should be available in all subsequent commands
10+
import json
11+
import numpy as np
12+
import pandas as pd
13+
from sklearn.model_selection import train_test_split
14+
from sklearn.metrics import mean_absolute_error
15+
from xgboost import XGBRegressor
16+
# ...
17+
# global constants
18+
MODEL_DIRECTORY = "/srv/app/model/data/"
19+
20+
21+
22+
23+
24+
25+
26+
27+
# In[9]:
28+
29+
30+
# this cell is not executed from MLTK and should only be used for staging data into the notebook environment
31+
def stage(name):
32+
with open("data/"+name+".csv", 'r') as f:
33+
df = pd.read_csv(f)
34+
with open("data/"+name+".json", 'r') as f:
35+
param = json.load(f)
36+
return df, param
37+
38+
39+
40+
41+
42+
43+
44+
45+
# In[11]:
46+
47+
48+
# initialize your model
49+
# available inputs: data and parameters
50+
# returns the model object which will be used as a reference to call fit, apply and summary subsequently
51+
def init(df,param):
52+
#model = {}
53+
#model['hyperparameter'] = 42.0
54+
model = XGBRegressor()
55+
return model
56+
57+
58+
59+
60+
61+
62+
63+
64+
# In[13]:
65+
66+
67+
# train your model
68+
# returns a fit info json object and may modify the model object
69+
def fit(model,df,param):
70+
returns = {}
71+
X = df[param['feature_variables']]
72+
y = df[param['target_variables']]
73+
#train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.3)
74+
#model.fit(train_X, train_y, verbose=False)
75+
#predictions = model.predict(test_X)
76+
#returns['Mean_Absolute_Error'] = str(mean_absolute_error(predictions, test_y))
77+
78+
model.fit(X, y, verbose=False)
79+
80+
info = {"message": "model trained"}
81+
return info
82+
83+
84+
85+
86+
87+
88+
89+
90+
# In[17]:
91+
92+
93+
# apply your model
94+
# returns the calculated results
95+
def apply(model,df,param):
96+
97+
X = df[param['feature_variables']]
98+
y_hat = model.predict(X)
99+
result = pd.DataFrame(y_hat, columns=['predicted_value'])
100+
101+
return result
102+
103+
104+
105+
106+
107+
108+
109+
110+
# In[20]:
111+
112+
113+
# save model to name in expected convention "<algo_name>_<model_name>"
114+
def save(model,name):
115+
path = MODEL_DIRECTORY + name + ".json"
116+
model.save_model(path)
117+
118+
return model
119+
120+
121+
122+
123+
124+
125+
# In[21]:
126+
127+
128+
# load model from name in expected convention "<algo_name>_<model_name>"
129+
def load(name):
130+
model = XGBRegressor()
131+
model.load_model(MODEL_DIRECTORY + name + ".json")
132+
return model
133+
134+
135+
136+
137+
138+
139+
# In[22]:
140+
141+
142+
# return a model summary
143+
def summary(model=None):
144+
returns = {"version": {"numpy": np.__version__, "pandas": pd.__version__} }
145+
return returns
146+
147+
148+

0 commit comments

Comments
 (0)