Skip to content

Commit

Permalink
Add Ensemble support in SageMaker MME, improve 507 on SM Invoke OOM a…
Browse files Browse the repository at this point in the history
…nd entrypoint changes (triton-inference-server#5002)

* Use personal repo

* Correct personal repo

* Fix: Improve 507 on SM Invoke OOM

* Revert custom repo changes

* Personal repo fix

* Add Ensemble support in SageMaker MME

* Update ensemble test

* Update ensemble test to include invoke and fix comment
  • Loading branch information
nikhil-sk committed Dec 7, 2022
1 parent ce4e008 commit 3e1258d
Show file tree
Hide file tree
Showing 5 changed files with 220 additions and 33 deletions.
13 changes: 12 additions & 1 deletion docker/sagemaker/serve
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ if [ -n "$SAGEMAKER_MULTI_MODEL" ]; then
if [ "$SAGEMAKER_MULTI_MODEL" == "true" ]; then
SAGEMAKER_MODEL_REPO=${SAGEMAKER_MULTI_MODEL_REPO}
is_mme_mode=true
echo "Triton is running in SageMaker MME mode"
echo "Triton is running in SageMaker MME mode."
fi
fi

Expand Down Expand Up @@ -66,13 +66,24 @@ if [ -n "$SAGEMAKER_TRITON_LOG_ERROR" ]; then
fi
if [ -n "$SAGEMAKER_TRITON_SHM_DEFAULT_BYTE_SIZE" ]; then
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --backend-config=python,shm-default-byte-size=${SAGEMAKER_TRITON_SHM_DEFAULT_BYTE_SIZE}"
else
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --backend-config=python,shm-default-byte-size=16777216" #16MB
fi
if [ -n "$SAGEMAKER_TRITON_SHM_GROWTH_BYTE_SIZE" ]; then
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --backend-config=python,shm-growth-byte-size=${SAGEMAKER_TRITON_SHM_GROWTH_BYTE_SIZE}"
else
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --backend-config=python,shm-growth-byte-size=1048576" #1MB
fi
if [ -n "$SAGEMAKER_TRITON_TENSORFLOW_VERSION" ]; then
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --backend-config=tensorflow,version=${SAGEMAKER_TRITON_TENSORFLOW_VERSION}"
fi
if [ -n "$SAGEMAKER_TRITON_MODEL_LOAD_GPU_LIMIT" ]; then
num_gpus=$(nvidia-smi -L | wc -l)
for ((i=0; i<${num_gpus}; i++)); do
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --model-load-gpu-limit ${i}:${SAGEMAKER_TRITON_MODEL_LOAD_GPU_LIMIT}"
done
fi


if [ "${is_mme_mode}" = false ] && [ -f "${SAGEMAKER_MODEL_REPO}/config.pbtxt" ]; then
echo "ERROR: Incorrect directory structure."
Expand Down
63 changes: 58 additions & 5 deletions qa/L0_sagemaker/sagemaker_multi_model_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ def setUp(self):
# Output is same as input since this is an identity model
self.model2_input_data_ = [0, 1, 2, 3, 4, 5, 6, 7]

# ensemble model setup
self.model3_name = "123456789ensemble"
self.model3_url = "/opt/ml/models/123456789ensemble/model"

def test_sm_0_environment_variables_set(self):
self.assertEqual(os.getenv("SAGEMAKER_MULTI_MODEL"), "true",
"Variable SAGEMAKER_MULTI_MODEL must be set to true")
Expand Down Expand Up @@ -142,23 +146,23 @@ def test_sm_2_model_list(self):
"models": [
{
"modelName": self.model1_name,
"modelUrl": self.model1_url
"modelUrl": self.model1_url.rstrip("/model")
},
{
"modelName": self.model2_name,
"modelUrl": self.model2_url
"modelUrl": self.model2_url.rstrip("/model")
},
]
}
expected_response_2 = {
"models": [
{
"modelName": self.model2_name,
"modelUrl": self.model2_url
"modelUrl": self.model2_url.rstrip("/model")
},
{
"modelName": self.model1_name,
"modelUrl": self.model1_url
"modelUrl": self.model1_url.rstrip("/model")
},
]
}
Expand All @@ -177,7 +181,7 @@ def test_sm_3_model_get(self):
time.sleep(3)
expected_response = {
"modelName": self.model1_name,
"modelUrl": self.model1_url
"modelUrl": self.model1_url.rstrip("/model")
}
self.assertEqual(
r.json(), expected_response,
Expand Down Expand Up @@ -280,6 +284,55 @@ def test_sm_5_model_unload(self):
r.status_code, 404,
"Expected status code 404, received {}".format(r.status_code))

def test_sm_6_ensemble_model(self):
# Load ensemble model
request_body = {"model_name": self.model3_name, "url": self.model3_url}
headers = {"Content-Type": "application/json", "X-Amzn-SageMaker-Target-Model": f"{self.model3_name}"}
r = requests.post(self.url_mme_,
data=json.dumps(request_body),
headers=headers)
time.sleep(5) # wait for model to load
self.assertEqual(
r.status_code, 200,
"Expected status code 200, received {}".format(r.status_code))

# Invoke ensemble model
inputs = []
outputs = []
inputs.append(httpclient.InferInput("INPUT0", [1, 16], "FP32"))
inputs.append(httpclient.InferInput("INPUT1", [1, 16], "FP32"))

# Initialize the data
input_data = np.array(self.model1_input_data_, dtype=np.float32)
input_data = np.expand_dims(input_data, axis=0)
inputs[0].set_data_from_numpy(input_data, binary_data=False)
inputs[1].set_data_from_numpy(input_data, binary_data=False)

outputs.append(
httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
outputs.append(
httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
request_body, _ = httpclient.InferenceServerClient.generate_request_body(
inputs, outputs=outputs)

headers = {"Content-Type": "application/json"}
invoke_url = "{}/{}/invoke".format(self.url_mme_, self.model3_name)
r = requests.post(invoke_url, data=request_body, headers=headers)
print(f"response: {r.text}")
r.raise_for_status()
self.assertEqual(
r.status_code, 200,
"Expected status code 200, received {}".format(r.status_code))

# Unload ensemble model
unload_url = "{}/{}".format(self.url_mme_, self.model3_name)
r = requests.delete(unload_url, headers=headers)
time.sleep(5)
self.assertEqual(
r.status_code, 200,
"Expected status code 200, received {}".format(r.status_code))



if __name__ == "__main__":
unittest.main()
33 changes: 30 additions & 3 deletions qa/L0_sagemaker/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,12 @@ rm -f *.out

SAGEMAKER_TEST=sagemaker_test.py
SAGEMAKER_MULTI_MODEL_TEST=sagemaker_multi_model_test.py
MULTI_MODEL_UNIT_TEST_COUNT=6
MULTI_MODEL_UNIT_TEST_COUNT=7
UNIT_TEST_COUNT=9
CLIENT_LOG="./client.log"

DATADIR=/data/inferenceserver/${REPO_VERSION}
ENSEMBLEDIR=/data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_model_repository
SERVER=/opt/tritonserver/bin/tritonserver
SERVER_LOG="./server.log"
# Link model repository to "/opt/ml/model"
Expand Down Expand Up @@ -382,6 +383,34 @@ cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32/* ${MODEL1_PATH} && \
cp -r $DATADIR/qa_identity_model_repository/onnx_zero_1_float32/* ${MODEL2_PATH} && \
sed -i "s/onnx_zero_1_float32/sm_mme_model_2/" ${MODEL2_PATH}/config.pbtxt

# Ensemble model
ENSEMBLE_MODEL_PATH="models/123456789ensemble/model"
mkdir -p "${ENSEMBLE_MODEL_PATH}"

model_name=python_float32_float32_float32

mkdir -p ${ENSEMBLE_MODEL_PATH}/${model_name}/1 && \
cp ../python_models/add_sub/model.py ${ENSEMBLE_MODEL_PATH}/${model_name}/1/. && \
cp ../python_models/add_sub/config.pbtxt ${ENSEMBLE_MODEL_PATH}/${model_name}/.
(cd ${ENSEMBLE_MODEL_PATH}/${model_name} && \
sed -i "s/label_filename:.*//" config.pbtxt && \
sed -i "0,/name:.*/{s/name:.*/name: \"${model_name}\"/}" config.pbtxt && \
echo "max_batch_size: 64" >> config.pbtxt)

# Ensemble part
mkdir -p ${ENSEMBLE_MODEL_PATH}/fan_${model_name}/1 && \
cp ../python_models/add_sub/model.py ${ENSEMBLE_MODEL_PATH}/fan_${model_name}/1/. && \
cp ../python_models/fan_add_sub/config.pbtxt ${ENSEMBLE_MODEL_PATH}/fan_${model_name}/. && \
(cd ${ENSEMBLE_MODEL_PATH}/fan_${model_name} && \
sed -i "s/label_filename:.*//" config.pbtxt && \
sed -i "s/model_name: \"ENSEMBLE_MODEL_NAME\"/model_name: \"${model_name}\"/" config.pbtxt && \
sed -i "0,/name:.*/{s/name:.*/name: \"fan_${model_name}\"/}" config.pbtxt && \
echo "max_batch_size: 64" >> config.pbtxt)

# # custom float32 component of ensemble
cp -r $ENSEMBLEDIR/nop_TYPE_FP32_-1 ${ENSEMBLE_MODEL_PATH}/. && \
mkdir -p ${ENSEMBLE_MODEL_PATH}/nop_TYPE_FP32_-1/1

# Start server with 'serve' script
export SAGEMAKER_MULTI_MODEL=true
export SAGEMAKER_TRITON_LOG_VERBOSE=true
Expand Down Expand Up @@ -423,10 +452,8 @@ rm -rf /opt/ml/models

kill $SERVER_PID
wait $SERVE_PID

# MME end


unlink /opt/ml/model
rm -rf /opt/ml/model

Expand Down
Loading

0 comments on commit 3e1258d

Please sign in to comment.