From 509e0ea97bce82d8c36e695157cd81ddd15fdbd7 Mon Sep 17 00:00:00 2001
From: rnyak <ronayak@hotmail.com>
Date: Wed, 27 Apr 2022 12:12:19 -0400
Subject: [PATCH] Replace run_ensemble_on_tritonserver() with
 send_triton_request() and do minor updates (#244)

* update inf nbs

* Update README.md
---
 ...ding-Recommender-Systems-with-Merlin.ipynb | 453 ++++++++++--------
 ...lti-stage-RecSys-with-Merlin-Systems.ipynb | 380 ++++-----------
 .../README.md                                 |   0
 examples/README.md                            |   2 +-
 4 files changed, 348 insertions(+), 487 deletions(-)
 rename examples/{Deploying-multi-stage-RecSys => Building-and-deploying-multi-stage-RecSys}/01-Building-Recommender-Systems-with-Merlin.ipynb (82%)
 rename examples/{Deploying-multi-stage-RecSys => Building-and-deploying-multi-stage-RecSys}/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb (53%)
 rename examples/{Deploying-multi-stage-RecSys => Building-and-deploying-multi-stage-RecSys}/README.md (100%)

diff --git a/examples/Deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb b/examples/Building-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb
similarity index 82%
rename from examples/Deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb
rename to examples/Building-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb
index 5b0186e46..9a0a9a9ad 100644
--- a/examples/Deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb
+++ b/examples/Building-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb
@@ -106,9 +106,19 @@
     "### Import required libraries and functions"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "6c1586d8-e5a6-40c3-b6bb-61a3e62fa34c",
+   "metadata": {},
+   "source": [
+    "**Compatibility:**\n",
+    "\n",
+    "These notebooks are developed and tested using our latest inference container on [NVIDIA's docker registry](https://catalog.ngc.nvidia.com/containers?filters=&orderBy=dateModifiedDESC&query=merlin)."
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "id": "2cd8cc8d-5cc7-4a9f-91e5-3deec6f1fe74",
    "metadata": {},
    "outputs": [],
@@ -118,7 +128,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "id": "08cdbfcc",
    "metadata": {},
    "outputs": [
@@ -126,10 +136,10 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2022-04-05 18:55:54.740755: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA\n",
+      "2022-04-26 19:18:30.785739: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA\n",
       "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-      "2022-04-05 18:55:55.858389: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:214] Using CUDA malloc Async allocator for GPU: 0\n",
-      "2022-04-05 18:55:55.858513: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 16254 MB memory:  -> device: 0, name: Quadro GV100, pci bus id: 0000:15:00.0, compute capability: 7.0\n"
+      "2022-04-26 19:18:31.885961: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:214] Using CUDA malloc Async allocator for GPU: 0\n",
+      "2022-04-26 19:18:31.886097: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 16254 MB memory:  -> device: 0, name: Quadro GV100, pci bus id: 0000:15:00.0, compute capability: 7.0\n"
      ]
     }
    ],
@@ -154,7 +164,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "id": "028a1398-76a8-4998-97d8-34a806e130d3",
    "metadata": {},
    "outputs": [],
@@ -176,7 +186,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "id": "81ddb370",
    "metadata": {},
    "outputs": [],
@@ -195,7 +205,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "id": "b44b3378-7297-4946-a271-742a9239bc3e",
    "metadata": {},
    "outputs": [],
@@ -232,7 +242,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "id": "550d45c9",
    "metadata": {},
    "outputs": [
@@ -240,8 +250,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "CPU times: user 165 µs, sys: 27 µs, total: 192 µs\n",
-      "Wall time: 195 µs\n"
+      "CPU times: user 172 µs, sys: 29 µs, total: 201 µs\n",
+      "Wall time: 204 µs\n"
      ]
     }
    ],
@@ -273,7 +283,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "id": "e117e7b5-5007-424b-8d3f-9e1db245fd4c",
    "metadata": {},
    "outputs": [
@@ -281,7 +291,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/usr/lib/python3.8/site-packages/cudf/core/dataframe.py:1253: UserWarning: The deep parameter is ignored and is only included for pandas compatibility.\n",
+      "/usr/lib/python3.8/site-packages/cudf/core/dataframe.py:1292: UserWarning: The deep parameter is ignored and is only included for pandas compatibility.\n",
       "  warnings.warn(\n"
      ]
     }
@@ -314,7 +324,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
    "id": "cb870461-6ac2-49b2-ba6a-2da6ecb57f1d",
    "metadata": {},
    "outputs": [],
@@ -329,7 +339,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
    "id": "30e4ebc2",
    "metadata": {},
    "outputs": [
@@ -339,7 +349,7 @@
        "'click'"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -359,7 +369,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
    "id": "e4325080",
    "metadata": {},
    "outputs": [],
@@ -375,7 +385,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 12,
    "id": "bfe2aa9e",
    "metadata": {},
    "outputs": [
@@ -383,37 +393,37 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2022-04-05 18:56:11.250713: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.\n"
+      "2022-04-26 19:18:47.296000: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "610/611 [============================>.] - ETA: 0s - auc: 0.5003 - loss: 0.6932 - regularization_loss: 0.0000e+00 - total_loss: 0.6932"
+      "610/611 [============================>.] - ETA: 0s - auc: 0.4999 - loss: 0.6932 - regularization_loss: 0.0000e+00 - total_loss: 0.6932"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2022-04-05 18:57:02.828073: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: cond/then/_0/cond/cond/branch_executed/_161\n"
+      "2022-04-26 19:19:38.592641: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: cond/branch_executed/_13\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "611/611 [==============================] - 51s 39ms/step - auc: 0.5003 - loss: 0.6932 - regularization_loss: 0.0000e+00 - total_loss: 0.6932 - val_auc: 0.5012 - val_loss: 0.6930 - val_regularization_loss: 0.0000e+00 - val_total_loss: 0.6930\n"
+      "611/611 [==============================] - 50s 38ms/step - auc: 0.4999 - loss: 0.6932 - regularization_loss: 0.0000e+00 - total_loss: 0.6932 - val_auc: 0.5000 - val_loss: 0.6932 - val_regularization_loss: 0.0000e+00 - val_total_loss: 0.6932\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "<keras.callbacks.History at 0x7f883c1cf820>"
+       "<keras.callbacks.History at 0x7fcefc04f730>"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -433,7 +443,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 13,
    "id": "dd78a82e",
    "metadata": {},
    "outputs": [],
@@ -459,7 +469,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 14,
    "id": "00de24e9-331a-486e-9843-6c554ad2ec77",
    "metadata": {},
    "outputs": [],
@@ -477,7 +487,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 15,
    "id": "22a7d605-478f-40e6-a5dc-3e7a61e9b035",
    "metadata": {},
    "outputs": [
@@ -485,7 +495,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/usr/lib/python3.8/site-packages/cudf/core/dataframe.py:1253: UserWarning: The deep parameter is ignored and is only included for pandas compatibility.\n",
+      "/usr/lib/python3.8/site-packages/cudf/core/dataframe.py:1292: UserWarning: The deep parameter is ignored and is only included for pandas compatibility.\n",
       "  warnings.warn(\n"
      ]
     }
@@ -510,7 +520,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 16,
    "id": "dc150549-6fa0-441f-939d-a358e56d5e43",
    "metadata": {},
    "outputs": [],
@@ -524,7 +534,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 17,
    "id": "02471088-0ed8-42e7-968e-b7e68865d55c",
    "metadata": {},
    "outputs": [],
@@ -541,7 +551,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 18,
    "id": "d6703d7c-d38f-4d6d-a20a-9ee95ff1e256",
    "metadata": {},
    "outputs": [
@@ -549,30 +559,30 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "610/611 [============================>.] - ETA: 0s - recall_at_10: 0.0337 - ndcg_10: 0.0331 - loss: 8.9147 - regularization_loss: 0.0000e+00 - total_loss: 8.9147"
+      "610/611 [============================>.] - ETA: 0s - recall_at_10: 0.0344 - ndcg_10: 0.0334 - loss: 8.6966 - regularization_loss: 0.0000e+00 - total_loss: 8.6966"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2022-04-05 18:58:03.440488: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: cond/then/_0/cond/cond/branch_executed/_189\n"
+      "2022-04-26 19:20:33.534489: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: cond/branch_executed/_24\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "611/611 [==============================] - 48s 70ms/step - recall_at_10: 0.0337 - ndcg_10: 0.0331 - loss: 8.9114 - regularization_loss: 0.0000e+00 - total_loss: 8.9114 - val_recall_at_10: 0.0038 - val_ndcg_10: 0.0015 - val_loss: 6.4472 - val_regularization_loss: 0.0000e+00 - val_total_loss: 6.4472\n"
+      "611/611 [==============================] - 43s 61ms/step - recall_at_10: 0.0344 - ndcg_10: 0.0334 - loss: 8.6942 - regularization_loss: 0.0000e+00 - total_loss: 8.6942 - val_recall_at_10: 0.0345 - val_ndcg_10: 0.0342 - val_loss: 6.4634 - val_regularization_loss: 0.0000e+00 - val_total_loss: 6.4634\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "<keras.callbacks.History at 0x7f87d1158130>"
+       "<keras.callbacks.History at 0x7fcea166eb50>"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -608,7 +618,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 19,
    "id": "fa00071a-f840-47f9-8734-75ed2c99eb92",
    "metadata": {},
    "outputs": [],
@@ -627,7 +637,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 20,
    "id": "2e7e96d2-9cd2-40d1-b356-8cd76b57bb4a",
    "metadata": {},
    "outputs": [
@@ -655,7 +665,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 21,
    "id": "26ba2521-ed1b-4c2b-afdd-26b4a5a9c008",
    "metadata": {},
    "outputs": [],
@@ -674,7 +684,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 22,
    "id": "2af24597-e89c-43a4-9a13-458d8bed7c8a",
    "metadata": {},
    "outputs": [],
@@ -693,7 +703,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 23,
    "id": "ea0b369c-2f01-42e3-9f3c-74c3ff4a6d64",
    "metadata": {},
    "outputs": [],
@@ -704,7 +714,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 24,
    "id": "6b0949f9-e67a-414f-9d74-65f138e820a8",
    "metadata": {},
    "outputs": [
@@ -846,7 +856,7 @@
        "4            5                5  "
       ]
      },
-     "execution_count": 23,
+     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -865,7 +875,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 25,
    "id": "d30bd2f8-8a78-4df7-9bc4-42bd741c5b99",
    "metadata": {},
    "outputs": [],
@@ -879,7 +889,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 26,
    "id": "d4998cd1-9dcd-4911-8f23-372e197b41e9",
    "metadata": {},
    "outputs": [
@@ -935,8 +945,8 @@
        "      <td>1</td>\n",
        "      <td>1</td>\n",
        "      <td>1</td>\n",
-       "      <td>2022-04-05 18:58:11.594310</td>\n",
-       "      <td>2022-04-05 18:58:11.960006</td>\n",
+       "      <td>2022-04-26 19:20:41.830940</td>\n",
+       "      <td>2022-04-26 19:20:42.199991</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -952,8 +962,8 @@
        "      <td>2</td>\n",
        "      <td>2</td>\n",
        "      <td>2</td>\n",
-       "      <td>2022-04-05 18:58:11.594310</td>\n",
-       "      <td>2022-04-05 18:58:11.960006</td>\n",
+       "      <td>2022-04-26 19:20:41.830940</td>\n",
+       "      <td>2022-04-26 19:20:42.199991</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -969,8 +979,8 @@
        "      <td>3</td>\n",
        "      <td>3</td>\n",
        "      <td>3</td>\n",
-       "      <td>2022-04-05 18:58:11.594310</td>\n",
-       "      <td>2022-04-05 18:58:11.960006</td>\n",
+       "      <td>2022-04-26 19:20:41.830940</td>\n",
+       "      <td>2022-04-26 19:20:42.199991</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -986,8 +996,8 @@
        "      <td>4</td>\n",
        "      <td>4</td>\n",
        "      <td>4</td>\n",
-       "      <td>2022-04-05 18:58:11.594310</td>\n",
-       "      <td>2022-04-05 18:58:11.960006</td>\n",
+       "      <td>2022-04-26 19:20:41.830940</td>\n",
+       "      <td>2022-04-26 19:20:42.199991</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -1003,8 +1013,8 @@
        "      <td>5</td>\n",
        "      <td>5</td>\n",
        "      <td>5</td>\n",
-       "      <td>2022-04-05 18:58:11.594310</td>\n",
-       "      <td>2022-04-05 18:58:11.960006</td>\n",
+       "      <td>2022-04-26 19:20:41.830940</td>\n",
+       "      <td>2022-04-26 19:20:42.199991</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -1026,21 +1036,21 @@
        "4                   1                 1               1                5   \n",
        "\n",
        "   user_brands  user_categories                   datetime  \\\n",
-       "0            1                1 2022-04-05 18:58:11.594310   \n",
-       "1            2                2 2022-04-05 18:58:11.594310   \n",
-       "2            3                3 2022-04-05 18:58:11.594310   \n",
-       "3            4                4 2022-04-05 18:58:11.594310   \n",
-       "4            5                5 2022-04-05 18:58:11.594310   \n",
+       "0            1                1 2022-04-26 19:20:41.830940   \n",
+       "1            2                2 2022-04-26 19:20:41.830940   \n",
+       "2            3                3 2022-04-26 19:20:41.830940   \n",
+       "3            4                4 2022-04-26 19:20:41.830940   \n",
+       "4            5                5 2022-04-26 19:20:41.830940   \n",
        "\n",
        "                     created  \n",
-       "0 2022-04-05 18:58:11.960006  \n",
-       "1 2022-04-05 18:58:11.960006  \n",
-       "2 2022-04-05 18:58:11.960006  \n",
-       "3 2022-04-05 18:58:11.960006  \n",
-       "4 2022-04-05 18:58:11.960006  "
+       "0 2022-04-26 19:20:42.199991  \n",
+       "1 2022-04-26 19:20:42.199991  \n",
+       "2 2022-04-26 19:20:42.199991  \n",
+       "3 2022-04-26 19:20:42.199991  \n",
+       "4 2022-04-26 19:20:42.199991  "
       ]
      },
-     "execution_count": 25,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1051,7 +1061,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 27,
    "id": "2981b3ed-6156-49f0-aa14-326a3853a58a",
    "metadata": {},
    "outputs": [],
@@ -1061,7 +1071,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 28,
    "id": "0a33a668-8e2a-4546-8f54-0060d405ba91",
    "metadata": {},
    "outputs": [],
@@ -1071,17 +1081,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 29,
    "id": "97189581-473c-4928-8be7-ec31b86d69ee",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(1307, 4)"
+       "(1331, 4)"
       ]
      },
-     "execution_count": 28,
+     "execution_count": 29,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1092,7 +1102,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 30,
    "id": "68a694d6-926f-4b0f-8edc-8cc7ac85ade7",
    "metadata": {},
    "outputs": [],
@@ -1105,7 +1115,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 31,
    "id": "6c03fa22-b112-4243-bbe1-1cd7260cb85b",
    "metadata": {},
    "outputs": [
@@ -1145,8 +1155,8 @@
        "      <td>1</td>\n",
        "      <td>1</td>\n",
        "      <td>1</td>\n",
-       "      <td>2022-04-05 18:58:12.165964</td>\n",
-       "      <td>2022-04-05 18:58:12.167496</td>\n",
+       "      <td>2022-04-26 19:20:42.415442</td>\n",
+       "      <td>2022-04-26 19:20:42.417166</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -1154,8 +1164,8 @@
        "      <td>2</td>\n",
        "      <td>2</td>\n",
        "      <td>2</td>\n",
-       "      <td>2022-04-05 18:58:12.165964</td>\n",
-       "      <td>2022-04-05 18:58:12.167496</td>\n",
+       "      <td>2022-04-26 19:20:42.415442</td>\n",
+       "      <td>2022-04-26 19:20:42.417166</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -1163,8 +1173,8 @@
        "      <td>3</td>\n",
        "      <td>3</td>\n",
        "      <td>3</td>\n",
-       "      <td>2022-04-05 18:58:12.165964</td>\n",
-       "      <td>2022-04-05 18:58:12.167496</td>\n",
+       "      <td>2022-04-26 19:20:42.415442</td>\n",
+       "      <td>2022-04-26 19:20:42.417166</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -1172,8 +1182,8 @@
        "      <td>4</td>\n",
        "      <td>4</td>\n",
        "      <td>4</td>\n",
-       "      <td>2022-04-05 18:58:12.165964</td>\n",
-       "      <td>2022-04-05 18:58:12.167496</td>\n",
+       "      <td>2022-04-26 19:20:42.415442</td>\n",
+       "      <td>2022-04-26 19:20:42.417166</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -1181,8 +1191,8 @@
        "      <td>5</td>\n",
        "      <td>5</td>\n",
        "      <td>5</td>\n",
-       "      <td>2022-04-05 18:58:12.165964</td>\n",
-       "      <td>2022-04-05 18:58:12.167496</td>\n",
+       "      <td>2022-04-26 19:20:42.415442</td>\n",
+       "      <td>2022-04-26 19:20:42.417166</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -1190,21 +1200,21 @@
       ],
       "text/plain": [
        "   item_id  item_category  item_shop  item_brand                   datetime  \\\n",
-       "0        1              1          1           1 2022-04-05 18:58:12.165964   \n",
-       "1        2              2          2           2 2022-04-05 18:58:12.165964   \n",
-       "2        3              3          3           3 2022-04-05 18:58:12.165964   \n",
-       "3        4              4          4           4 2022-04-05 18:58:12.165964   \n",
-       "4        5              5          5           5 2022-04-05 18:58:12.165964   \n",
+       "0        1              1          1           1 2022-04-26 19:20:42.415442   \n",
+       "1        2              2          2           2 2022-04-26 19:20:42.415442   \n",
+       "2        3              3          3           3 2022-04-26 19:20:42.415442   \n",
+       "3        4              4          4           4 2022-04-26 19:20:42.415442   \n",
+       "4        5              5          5           5 2022-04-26 19:20:42.415442   \n",
        "\n",
        "                     created  \n",
-       "0 2022-04-05 18:58:12.167496  \n",
-       "1 2022-04-05 18:58:12.167496  \n",
-       "2 2022-04-05 18:58:12.167496  \n",
-       "3 2022-04-05 18:58:12.167496  \n",
-       "4 2022-04-05 18:58:12.167496  "
+       "0 2022-04-26 19:20:42.417166  \n",
+       "1 2022-04-26 19:20:42.417166  \n",
+       "2 2022-04-26 19:20:42.417166  \n",
+       "3 2022-04-26 19:20:42.417166  \n",
+       "4 2022-04-26 19:20:42.417166  "
       ]
      },
-     "execution_count": 30,
+     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1215,7 +1225,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 32,
    "id": "c312884b-a1f8-4e08-8068-696e06a9bf46",
    "metadata": {},
    "outputs": [],
@@ -1234,7 +1244,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 33,
    "id": "00f1fe65-882e-4962-bb16-19a130fda215",
    "metadata": {},
    "outputs": [],
@@ -1308,122 +1318,122 @@
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>1</td>\n",
-       "      <td>-0.076915</td>\n",
-       "      <td>0.072023</td>\n",
-       "      <td>0.138205</td>\n",
-       "      <td>0.122796</td>\n",
-       "      <td>-0.002307</td>\n",
-       "      <td>-0.160278</td>\n",
-       "      <td>0.183420</td>\n",
-       "      <td>-0.219937</td>\n",
-       "      <td>-0.043195</td>\n",
+       "      <td>0.480469</td>\n",
+       "      <td>-0.313522</td>\n",
+       "      <td>0.448558</td>\n",
+       "      <td>-0.265470</td>\n",
+       "      <td>-0.448500</td>\n",
+       "      <td>-0.408890</td>\n",
+       "      <td>-0.519680</td>\n",
+       "      <td>0.478366</td>\n",
+       "      <td>-0.219287</td>\n",
        "      <td>...</td>\n",
-       "      <td>0.133487</td>\n",
-       "      <td>-0.181939</td>\n",
-       "      <td>-0.129924</td>\n",
-       "      <td>-0.025064</td>\n",
-       "      <td>-0.007420</td>\n",
-       "      <td>-0.160271</td>\n",
-       "      <td>-0.118174</td>\n",
-       "      <td>-0.166227</td>\n",
-       "      <td>0.158589</td>\n",
-       "      <td>-0.094942</td>\n",
+       "      <td>0.274009</td>\n",
+       "      <td>-0.514024</td>\n",
+       "      <td>0.632400</td>\n",
+       "      <td>0.010554</td>\n",
+       "      <td>0.395779</td>\n",
+       "      <td>-0.203225</td>\n",
+       "      <td>0.453499</td>\n",
+       "      <td>-0.461412</td>\n",
+       "      <td>0.655305</td>\n",
+       "      <td>-0.193951</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>2</td>\n",
-       "      <td>0.082813</td>\n",
-       "      <td>-0.074872</td>\n",
-       "      <td>-0.135630</td>\n",
-       "      <td>-0.116752</td>\n",
-       "      <td>0.001305</td>\n",
-       "      <td>0.156757</td>\n",
-       "      <td>-0.191399</td>\n",
-       "      <td>0.214292</td>\n",
-       "      <td>0.044232</td>\n",
+       "      <td>-0.011039</td>\n",
+       "      <td>-0.120592</td>\n",
+       "      <td>-0.145036</td>\n",
+       "      <td>0.028828</td>\n",
+       "      <td>0.129078</td>\n",
+       "      <td>0.045131</td>\n",
+       "      <td>0.080241</td>\n",
+       "      <td>-0.115238</td>\n",
+       "      <td>0.071108</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.130633</td>\n",
-       "      <td>0.177906</td>\n",
-       "      <td>0.123571</td>\n",
-       "      <td>0.023416</td>\n",
-       "      <td>0.007120</td>\n",
-       "      <td>0.167303</td>\n",
-       "      <td>0.115787</td>\n",
-       "      <td>0.165574</td>\n",
-       "      <td>-0.158438</td>\n",
-       "      <td>0.093160</td>\n",
+       "      <td>-0.091944</td>\n",
+       "      <td>0.024271</td>\n",
+       "      <td>-0.144494</td>\n",
+       "      <td>-0.145458</td>\n",
+       "      <td>-0.045634</td>\n",
+       "      <td>0.020440</td>\n",
+       "      <td>-0.114459</td>\n",
+       "      <td>0.092429</td>\n",
+       "      <td>-0.158437</td>\n",
+       "      <td>0.095762</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>3</td>\n",
-       "      <td>0.079956</td>\n",
-       "      <td>-0.072920</td>\n",
-       "      <td>-0.137390</td>\n",
-       "      <td>-0.121113</td>\n",
-       "      <td>0.005027</td>\n",
-       "      <td>0.158485</td>\n",
-       "      <td>-0.185455</td>\n",
-       "      <td>0.219271</td>\n",
-       "      <td>0.042939</td>\n",
+       "      <td>0.031866</td>\n",
+       "      <td>-0.073354</td>\n",
+       "      <td>-0.172772</td>\n",
+       "      <td>0.034140</td>\n",
+       "      <td>0.128028</td>\n",
+       "      <td>0.056722</td>\n",
+       "      <td>0.051130</td>\n",
+       "      <td>-0.116283</td>\n",
+       "      <td>0.115144</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.127179</td>\n",
-       "      <td>0.184853</td>\n",
-       "      <td>0.134282</td>\n",
-       "      <td>0.020782</td>\n",
-       "      <td>0.005622</td>\n",
-       "      <td>0.161732</td>\n",
-       "      <td>0.120336</td>\n",
-       "      <td>0.165003</td>\n",
-       "      <td>-0.158491</td>\n",
-       "      <td>0.096599</td>\n",
+       "      <td>-0.067889</td>\n",
+       "      <td>-0.007854</td>\n",
+       "      <td>-0.136730</td>\n",
+       "      <td>-0.078143</td>\n",
+       "      <td>0.004284</td>\n",
+       "      <td>0.012736</td>\n",
+       "      <td>-0.090365</td>\n",
+       "      <td>0.101954</td>\n",
+       "      <td>-0.207631</td>\n",
+       "      <td>0.081567</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>4</td>\n",
-       "      <td>0.073565</td>\n",
-       "      <td>-0.072655</td>\n",
-       "      <td>-0.136048</td>\n",
-       "      <td>-0.122305</td>\n",
-       "      <td>0.002802</td>\n",
-       "      <td>0.163697</td>\n",
-       "      <td>-0.181483</td>\n",
-       "      <td>0.221638</td>\n",
-       "      <td>0.040415</td>\n",
+       "      <td>0.049146</td>\n",
+       "      <td>-0.112506</td>\n",
+       "      <td>-0.121235</td>\n",
+       "      <td>0.029425</td>\n",
+       "      <td>0.111503</td>\n",
+       "      <td>0.050727</td>\n",
+       "      <td>0.101112</td>\n",
+       "      <td>-0.127668</td>\n",
+       "      <td>0.117911</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.135436</td>\n",
-       "      <td>0.185165</td>\n",
-       "      <td>0.123064</td>\n",
-       "      <td>0.028292</td>\n",
-       "      <td>0.006705</td>\n",
-       "      <td>0.163032</td>\n",
-       "      <td>0.116881</td>\n",
-       "      <td>0.168350</td>\n",
-       "      <td>-0.160161</td>\n",
-       "      <td>0.093206</td>\n",
+       "      <td>-0.095640</td>\n",
+       "      <td>0.017186</td>\n",
+       "      <td>-0.115709</td>\n",
+       "      <td>-0.136599</td>\n",
+       "      <td>-0.035997</td>\n",
+       "      <td>0.046666</td>\n",
+       "      <td>-0.061223</td>\n",
+       "      <td>0.046752</td>\n",
+       "      <td>-0.213177</td>\n",
+       "      <td>0.110818</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td>5</td>\n",
-       "      <td>0.079179</td>\n",
-       "      <td>-0.069680</td>\n",
-       "      <td>-0.135506</td>\n",
-       "      <td>-0.120732</td>\n",
-       "      <td>0.003728</td>\n",
-       "      <td>0.156643</td>\n",
-       "      <td>-0.186335</td>\n",
-       "      <td>0.216001</td>\n",
-       "      <td>0.045899</td>\n",
+       "      <td>0.002982</td>\n",
+       "      <td>-0.124910</td>\n",
+       "      <td>-0.155769</td>\n",
+       "      <td>-0.006471</td>\n",
+       "      <td>0.138367</td>\n",
+       "      <td>0.054510</td>\n",
+       "      <td>0.063956</td>\n",
+       "      <td>-0.108430</td>\n",
+       "      <td>0.063554</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.129166</td>\n",
-       "      <td>0.178323</td>\n",
-       "      <td>0.130659</td>\n",
-       "      <td>0.028560</td>\n",
-       "      <td>0.010816</td>\n",
-       "      <td>0.160520</td>\n",
-       "      <td>0.121017</td>\n",
-       "      <td>0.168797</td>\n",
-       "      <td>-0.160206</td>\n",
-       "      <td>0.091859</td>\n",
+       "      <td>-0.097628</td>\n",
+       "      <td>-0.003393</td>\n",
+       "      <td>-0.095431</td>\n",
+       "      <td>-0.146156</td>\n",
+       "      <td>-0.047555</td>\n",
+       "      <td>0.056421</td>\n",
+       "      <td>-0.067992</td>\n",
+       "      <td>0.107304</td>\n",
+       "      <td>-0.153172</td>\n",
+       "      <td>0.040540</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -1432,25 +1442,25 @@
       ],
       "text/plain": [
        "   item_id         0         1         2         3         4         5  \\\n",
-       "0        1 -0.076915  0.072023  0.138205  0.122796 -0.002307 -0.160278   \n",
-       "1        2  0.082813 -0.074872 -0.135630 -0.116752  0.001305  0.156757   \n",
-       "2        3  0.079956 -0.072920 -0.137390 -0.121113  0.005027  0.158485   \n",
-       "3        4  0.073565 -0.072655 -0.136048 -0.122305  0.002802  0.163697   \n",
-       "4        5  0.079179 -0.069680 -0.135506 -0.120732  0.003728  0.156643   \n",
+       "0        1  0.480469 -0.313522  0.448558 -0.265470 -0.448500 -0.408890   \n",
+       "1        2 -0.011039 -0.120592 -0.145036  0.028828  0.129078  0.045131   \n",
+       "2        3  0.031866 -0.073354 -0.172772  0.034140  0.128028  0.056722   \n",
+       "3        4  0.049146 -0.112506 -0.121235  0.029425  0.111503  0.050727   \n",
+       "4        5  0.002982 -0.124910 -0.155769 -0.006471  0.138367  0.054510   \n",
        "\n",
        "          6         7         8  ...        54        55        56        57  \\\n",
-       "0  0.183420 -0.219937 -0.043195  ...  0.133487 -0.181939 -0.129924 -0.025064   \n",
-       "1 -0.191399  0.214292  0.044232  ... -0.130633  0.177906  0.123571  0.023416   \n",
-       "2 -0.185455  0.219271  0.042939  ... -0.127179  0.184853  0.134282  0.020782   \n",
-       "3 -0.181483  0.221638  0.040415  ... -0.135436  0.185165  0.123064  0.028292   \n",
-       "4 -0.186335  0.216001  0.045899  ... -0.129166  0.178323  0.130659  0.028560   \n",
+       "0 -0.519680  0.478366 -0.219287  ...  0.274009 -0.514024  0.632400  0.010554   \n",
+       "1  0.080241 -0.115238  0.071108  ... -0.091944  0.024271 -0.144494 -0.145458   \n",
+       "2  0.051130 -0.116283  0.115144  ... -0.067889 -0.007854 -0.136730 -0.078143   \n",
+       "3  0.101112 -0.127668  0.117911  ... -0.095640  0.017186 -0.115709 -0.136599   \n",
+       "4  0.063956 -0.108430  0.063554  ... -0.097628 -0.003393 -0.095431 -0.146156   \n",
        "\n",
        "         58        59        60        61        62        63  \n",
-       "0 -0.007420 -0.160271 -0.118174 -0.166227  0.158589 -0.094942  \n",
-       "1  0.007120  0.167303  0.115787  0.165574 -0.158438  0.093160  \n",
-       "2  0.005622  0.161732  0.120336  0.165003 -0.158491  0.096599  \n",
-       "3  0.006705  0.163032  0.116881  0.168350 -0.160161  0.093206  \n",
-       "4  0.010816  0.160520  0.121017  0.168797 -0.160206  0.091859  \n",
+       "0  0.395779 -0.203225  0.453499 -0.461412  0.655305 -0.193951  \n",
+       "1 -0.045634  0.020440 -0.114459  0.092429 -0.158437  0.095762  \n",
+       "2  0.004284  0.012736 -0.090365  0.101954 -0.207631  0.081567  \n",
+       "3 -0.035997  0.046666 -0.061223  0.046752 -0.213177  0.110818  \n",
+       "4 -0.047555  0.056421 -0.067992  0.107304 -0.153172  0.040540  \n",
        "\n",
        "[5 rows x 65 columns]"
       ]
@@ -1472,7 +1482,7 @@
    "outputs": [],
    "source": [
     "# save to disk\n",
-    "item_embeddings.to_parquet('item_embeddings.parquet')"
+    "item_embeddings.to_parquet(os.path.join(BASE_DIR,'item_embeddings.parquet'))"
    ]
   },
   {
@@ -1597,10 +1607,35 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 39,
    "id": "57133c1e-18d9-4ccb-9704-cdebd271985e",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Get:1 http://security.ubuntu.com/ubuntu focal-security InRelease [114 kB]\n",
+      "Ign:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64  InRelease\n",
+      "Hit:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64  Release\n",
+      "Hit:4 http://archive.ubuntu.com/ubuntu focal InRelease   \n",
+      "Get:5 http://archive.ubuntu.com/ubuntu focal-updates InRelease [114 kB]\n",
+      "Get:7 http://archive.ubuntu.com/ubuntu focal-backports InRelease [108 kB]\n",
+      "Get:8 http://security.ubuntu.com/ubuntu focal-security/universe amd64 Packages [870 kB]\n",
+      "Get:9 http://archive.ubuntu.com/ubuntu focal-updates/universe amd64 Packages [1154 kB]\n",
+      "Get:10 http://security.ubuntu.com/ubuntu focal-security/main amd64 Packages [1773 kB]\n",
+      "Get:11 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 Packages [2188 kB]\n",
+      "Get:12 http://archive.ubuntu.com/ubuntu focal-updates/restricted amd64 Packages [1216 kB]\n",
+      "Fetched 7535 kB in 2s (3099 kB/s)                           \n",
+      "Reading package lists... Done\n",
+      "Reading package lists... Done\n",
+      "Building dependency tree       \n",
+      "Reading state information... Done\n",
+      "tree is already the newest version (1.8.0-1).\n",
+      "0 upgraded, 0 newly installed, 0 to remove and 74 not upgraded.\n"
+     ]
+    }
+   ],
    "source": [
     "# install tree\n",
     "!apt-get update\n",
diff --git a/examples/Deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb b/examples/Building-and-deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb
similarity index 53%
rename from examples/Deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb
rename to examples/Building-and-deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb
index e4dd66e34..046774ce8 100644
--- a/examples/Deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb
+++ b/examples/Building-and-deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb
@@ -63,7 +63,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "id": "ea3756f8-a115-436a-b5d4-48f0641451b9",
    "metadata": {},
    "outputs": [],
@@ -73,12 +73,52 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "id": "4db1b5f1-c8fa-4e03-8744-1197873c5bee",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.8/dist-packages/faiss/loader.py:28: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.\n",
+      "  if LooseVersion(numpy.__version__) >= \"1.19\":\n",
+      "/usr/local/lib/python3.8/dist-packages/setuptools/_distutils/version.py:351: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.\n",
+      "  other = LooseVersion(other)\n",
+      "04/26/2022 07:11:22 PM INFO:Loading faiss with AVX2 support.\n",
+      "04/26/2022 07:11:22 PM INFO:Could not load library with AVX2 support due to:\n",
+      "ModuleNotFoundError(\"No module named 'faiss.swigfaiss_avx2'\")\n",
+      "04/26/2022 07:11:22 PM INFO:Loading faiss.\n",
+      "04/26/2022 07:11:22 PM INFO:Successfully loaded faiss.\n",
+      "/usr/lib/python3.8/site-packages/cudf/utils/metadata/orc_column_statistics_pb2.py:19: DeprecationWarning: Call to deprecated create function FileDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
+      "  DESCRIPTOR = _descriptor.FileDescriptor(\n",
+      "/usr/lib/python3.8/site-packages/cudf/utils/metadata/orc_column_statistics_pb2.py:37: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
+      "  _descriptor.FieldDescriptor(\n",
+      "/usr/lib/python3.8/site-packages/cudf/utils/metadata/orc_column_statistics_pb2.py:30: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
+      "  _INTEGERSTATISTICS = _descriptor.Descriptor(\n",
+      "/usr/lib/python3.8/site-packages/dask_cudf/core.py:32: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.\n",
+      "  DASK_VERSION = LooseVersion(dask.__version__)\n",
+      "/usr/local/lib/python3.8/dist-packages/setuptools/_distutils/version.py:351: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.\n",
+      "  other = LooseVersion(other)\n",
+      "/usr/local/lib/python3.8/dist-packages/tritonclient/grpc/model_config_pb2.py:19: DeprecationWarning: Call to deprecated create function FileDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
+      "  DESCRIPTOR = _descriptor.FileDescriptor(\n",
+      "/usr/local/lib/python3.8/dist-packages/tritonclient/grpc/model_config_pb2.py:33: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
+      "  _descriptor.EnumValueDescriptor(\n",
+      "/usr/local/lib/python3.8/dist-packages/tritonclient/grpc/model_config_pb2.py:27: DeprecationWarning: Call to deprecated create function EnumDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
+      "  _DATATYPE = _descriptor.EnumDescriptor(\n",
+      "/usr/local/lib/python3.8/dist-packages/tritonclient/grpc/model_config_pb2.py:330: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
+      "  _descriptor.FieldDescriptor(\n",
+      "/usr/local/lib/python3.8/dist-packages/tritonclient/grpc/model_config_pb2.py:323: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
+      "  _MODELRATELIMITER_RESOURCE = _descriptor.Descriptor(\n",
+      "/usr/local/lib/python3.8/dist-packages/flatbuffers/compat.py:19: DeprecationWarning: the imp module is deprecated in favour of importlib; see the module's documentation for alternative uses\n",
+      "  import imp\n"
+     ]
+    }
+   ],
    "source": [
     "import os\n",
+    "os.environ[\"TF_GPU_ALLOCATOR\"]=\"cuda_malloc_async\"  # prevent TF to claim entire GPU memory\n",
+    "\n",
     "import numpy as np\n",
     "import pandas as pd\n",
     "import feast\n",
@@ -93,43 +133,6 @@
     "from merlin.systems.triton.utils import run_triton_server, run_ensemble_on_tritonserver"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "3356b2ed-3d94-4165-b311-9560212e55d3",
-   "metadata": {},
-   "source": [
-    "We use `configure_tensorflow` function to prevent the Tensorflow to claim entire GPU memory. With this func, we let TF to allocate 50% of the available GPU memory. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "13d75d74-b513-451d-b72d-790fef388e66",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "04/05/2022 06:52:55 PM INFO:init\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "<function tensorflow.python.dlpack.dlpack.from_dlpack(dlcapsule)>"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from nvtabular.loader.tf_utils import configure_tensorflow\n",
-    "configure_tensorflow()"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "55ead20e-c573-462e-9aa2-c3494bf0129f",
@@ -174,16 +177,8 @@
       "/Merlin/examples/Deploying-multi-stage-RecSys/feature_repo\n",
       "/usr/local/lib/python3.8/dist-packages/feast/feature_view.py:100: DeprecationWarning: The argument 'input' is being deprecated. Please use 'batch_source' instead. Feast 0.13 and onwards will not support the argument 'input'.\n",
       "  warnings.warn(\n",
-      "Created data source \u001b[1m\u001b[32m/Merlin/examples/Deploying-multi-stage-RecSys/feature_repo/data/user_features.parquet\u001b[0m\n",
-      "Created data source \u001b[1m\u001b[32m/Merlin/examples/Deploying-multi-stage-RecSys/feature_repo/data/item_features.parquet\u001b[0m\n",
-      "Created entity \u001b[1m\u001b[32mitem_id\u001b[0m\n",
-      "Created entity \u001b[1m\u001b[32muser_id\u001b[0m\n",
-      "Created feature view \u001b[1m\u001b[32mitem_features\u001b[0m\n",
-      "Created feature view \u001b[1m\u001b[32muser_features\u001b[0m\n",
-      "\n",
-      "Created sqlite table \u001b[1m\u001b[32mfeature_repo_item_features\u001b[0m\n",
-      "Created sqlite table \u001b[1m\u001b[32mfeature_repo_user_features\u001b[0m\n",
-      "\n"
+      "\u001b[1m\u001b[94mNo changes to registry\n",
+      "\u001b[1m\u001b[94mNo changes to infrastructure\n"
      ]
     }
    ],
@@ -219,9 +214,9 @@
       "Materializing \u001b[1m\u001b[32m2\u001b[0m feature views from \u001b[1m\u001b[32m1995-01-01 01:01:01+00:00\u001b[0m to \u001b[1m\u001b[32m2025-01-01 01:01:01+00:00\u001b[0m into the \u001b[1m\u001b[32msqlite\u001b[0m online store.\n",
       "\n",
       "\u001b[1m\u001b[32mitem_features\u001b[0m:\n",
-      "100%|█████████████████████████████████████████████████████████| 1298/1298 [00:00<00:00, 5214.52it/s]\n",
+      "100%|█████████████████████████████████████████████████████████| 1303/1303 [00:00<00:00, 5734.96it/s]\n",
       "\u001b[1m\u001b[32muser_features\u001b[0m:\n",
-      "100%|█████████████████████████████████████████████████████████| 1322/1322 [00:00<00:00, 1621.24it/s]\n"
+      "100%|█████████████████████████████████████████████████████████| 1326/1326 [00:00<00:00, 1730.98it/s]\n"
      ]
     }
    ],
@@ -433,10 +428,11 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2022-04-05 18:53:03.680149: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA\n",
+      "2022-04-26 19:11:32.908702: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA\n",
       "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-      "2022-04-05 18:53:04.788233: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 16254 MB memory:  -> device: 0, name: Quadro GV100, pci bus id: 0000:15:00.0, compute capability: 7.0\n",
-      "04/05/2022 06:53:06 PM WARNING:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n"
+      "2022-04-26 19:11:34.310185: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:214] Using CUDA malloc Async allocator for GPU: 0\n",
+      "2022-04-26 19:11:34.310342: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 30667 MB memory:  -> device: 0, name: Quadro GV100, pci bus id: 0000:15:00.0, compute capability: 7.0\n",
+      "04/26/2022 07:11:35 PM WARNING:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n"
      ]
     }
    ],
@@ -610,6 +606,8 @@
       "\u001b[01;34m/Merlin/examples/Deploying-multi-stage-RecSys/poc_ensemble\u001b[00m\n",
       "├── \u001b[01;34m0_queryfeast\u001b[00m\n",
       "│   ├── \u001b[01;34m1\u001b[00m\n",
+      "│   │   ├── \u001b[01;34m__pycache__\u001b[00m\n",
+      "│   │   │   └── model.cpython-38.pyc\n",
       "│   │   └── model.py\n",
       "│   └── config.pbtxt\n",
       "├── \u001b[01;34m1_predicttensorflow\u001b[00m\n",
@@ -654,7 +652,7 @@
       "    ├── \u001b[01;34m1\u001b[00m\n",
       "    └── config.pbtxt\n",
       "\n",
-      "23 directories, 22 files\n"
+      "24 directories, 23 files\n"
      ]
     }
    ],
@@ -664,261 +662,89 @@
   },
   {
    "cell_type": "markdown",
-   "id": "6c0a798f-6abf-4cbb-87f8-f60a6e757092",
-   "metadata": {},
+   "id": "fe7962cc-f26d-4a4a-b5a3-d214e0f37456",
+   "metadata": {
+    "tags": []
+   },
    "source": [
-    "### Retrieving Recommendations from Triton"
+    "### Starting Triton Server"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "4e3fe264-e4a4-4dab-9b04-f83fb696d7d1",
+   "id": "8c07c620-7d6c-4275-87fe-e5b94335bdb9",
    "metadata": {},
    "source": [
-    "It is time to deploy the all the models as an ensemble model to Triton Inference very easily using Merlin Systems library. Now we can launch our triton server and load our models, and get a response for our query with a utility function `run_ensemble_on_tritonserver()`."
+    "It is time to deploy all the models as an ensemble model to Triton Inference Serve [TIS](https://github.com/triton-inference-server). After we export the ensemble, we are ready to start the TIS. You can start triton server by using the following command on your terminal:\n",
+    "\n",
+    "```\n",
+    "tritonserver --model-repository=/ensemble_export_path/ --backend-config=tensorflow,version=2\n",
+    "```\n",
+    "\n",
+    "For the `--model-repository` argument, specify the same path as the `export_path` that you specified previously in the `ensemble.export` method. This command will launch the server and load all the models to the server. Once all the models are loaded successfully, you should see `READY` status printed out in the terminal for each loaded model."
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 22,
-   "id": "b7896ec0-db89-4642-bfb6-eebf9afe77ae",
+   "cell_type": "markdown",
+   "id": "6c0a798f-6abf-4cbb-87f8-f60a6e757092",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "I0405 18:53:12.285197 12809 tensorflow.cc:2176] TRITONBACKEND_Initialize: tensorflow\n",
-      "I0405 18:53:12.285284 12809 tensorflow.cc:2186] Triton TRITONBACKEND API version: 1.8\n",
-      "I0405 18:53:12.285289 12809 tensorflow.cc:2192] 'tensorflow' TRITONBACKEND API version: 1.8\n",
-      "I0405 18:53:12.285293 12809 tensorflow.cc:2216] backend configuration:\n",
-      "{\"cmdline\":{\"version\":\"2\"}}\n",
-      "I0405 18:53:12.434720 12809 pinned_memory_manager.cc:240] Pinned memory pool is created at '0x7fd4f8000000' with size 268435456\n",
-      "I0405 18:53:12.435114 12809 cuda_memory_manager.cc:105] CUDA memory pool is created on device 0 with size 67108864\n",
-      "I0405 18:53:12.440615 12809 model_repository_manager.cc:994] loading: 0_queryfeast:1\n",
-      "I0405 18:53:12.541023 12809 model_repository_manager.cc:994] loading: 1_predicttensorflow:1\n",
-      "I0405 18:53:12.544542 12809 backend.cc:46] TRITONBACKEND_Initialize: nvtabular\n",
-      "I0405 18:53:12.544572 12809 backend.cc:53] Triton TRITONBACKEND API version: 1.8\n",
-      "I0405 18:53:12.544585 12809 backend.cc:56] 'nvtabular' TRITONBACKEND API version: 1.8\n",
-      "I0405 18:53:12.544858 12809 backend.cc:76] Loaded libpython successfully\n",
-      "I0405 18:53:12.641295 12809 model_repository_manager.cc:994] loading: 2_queryfaiss:1\n",
-      "I0405 18:53:12.712613 12809 backend.cc:89] Python interpreter is initialized\n",
-      "I0405 18:53:12.713774 12809 tensorflow.cc:2276] TRITONBACKEND_ModelInitialize: 1_predicttensorflow (version 1)\n",
-      "I0405 18:53:12.715248 12809 model_inst_state.hpp:64] Loading TritonPythonnModel from model.py in path '/Merlin/examples/Deploying-multi-stage-RecSys/poc_ensemble/0_queryfeast/1'\n",
-      "I0405 18:53:12.743586 12809 model_repository_manager.cc:994] loading: 3_queryfeast:1\n",
-      "I0405 18:53:12.844174 12809 model_repository_manager.cc:994] loading: 4_unrollfeatures:1\n",
-      "I0405 18:53:12.944631 12809 model_repository_manager.cc:994] loading: 5_predicttensorflow:1\n",
-      "I0405 18:53:13.044995 12809 model_repository_manager.cc:994] loading: 6_softmaxsampling:1\n",
-      "I0405 18:53:14.728975 12809 tensorflow.cc:2325] TRITONBACKEND_ModelInstanceInitialize: 1_predicttensorflow (GPU device 0)\n",
-      "I0405 18:53:14.729115 12809 model_repository_manager.cc:1149] successfully loaded '0_queryfeast' version 1\n",
-      "2022-04-05 18:53:15.913993: I tensorflow/cc/saved_model/reader.cc:43] Reading SavedModel from: /Merlin/examples/Deploying-multi-stage-RecSys/poc_ensemble/1_predicttensorflow/1/model.savedmodel\n",
-      "2022-04-05 18:53:15.919091: I tensorflow/cc/saved_model/reader.cc:107] Reading meta graph with tags { serve }\n",
-      "2022-04-05 18:53:15.919132: I tensorflow/cc/saved_model/reader.cc:148] Reading SavedModel debug info (if present) from: /Merlin/examples/Deploying-multi-stage-RecSys/poc_ensemble/1_predicttensorflow/1/model.savedmodel\n",
-      "2022-04-05 18:53:15.925459: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13584 MB memory:  -> device: 0, name: Quadro GV100, pci bus id: 0000:15:00.0, compute capability: 7.0\n",
-      "2022-04-05 18:53:15.972201: W tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n",
-      "  /job:localhost/replica:0/task:0/device:CPU:0].\n",
-      "See below for details of this colocation group:\n",
-      "Colocation Debug Info:\n",
-      "Colocation group had the following types and supported devices: \n",
-      "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n",
-      "ReadVariableOp: GPU CPU \n",
-      "VarHandleOp: CPU \n",
-      "\n",
-      "Colocation members, user-requested devices, and framework assigned devices, if any:\n",
-      "  retrieval_model/sequential_block_20/item_id (VarHandleOp) /gpu:0\n",
-      "  retrieval_model/sequential_block_20/item_id/Read/ReadVariableOp (ReadVariableOp) /gpu:0\n",
-      "\n",
-      "2022-04-05 18:53:15.972306: I tensorflow/cc/saved_model/loader.cc:212] Restoring SavedModel bundle.\n",
-      "2022-04-05 18:53:16.051512: I tensorflow/cc/saved_model/loader.cc:196] Running initialization op on SavedModel bundle at path: /Merlin/examples/Deploying-multi-stage-RecSys/poc_ensemble/1_predicttensorflow/1/model.savedmodel\n",
-      "2022-04-05 18:53:16.076336: I tensorflow/cc/saved_model/loader.cc:303] SavedModel load for tags { serve }; Status: success: OK. Took 162361 microseconds.\n",
-      "I0405 18:53:16.076597 12809 model_repository_manager.cc:1149] successfully loaded '1_predicttensorflow' version 1\n",
-      "I0405 18:53:16.079944 12809 tensorflow.cc:2276] TRITONBACKEND_ModelInitialize: 5_predicttensorflow (version 1)\n",
-      "I0405 18:53:16.082241 12809 model_inst_state.hpp:64] Loading TritonPythonnModel from model.py in path '/Merlin/examples/Deploying-multi-stage-RecSys/poc_ensemble/2_queryfaiss/1'\n",
-      "/systems/merlin/systems/dag/ops/feast.py:15: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n",
-      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
-      "  ValueType.FLOAT: (np.float, False, False),\n",
-      "/usr/local/lib/python3.8/dist-packages/faiss/loader.py:28: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.\n",
-      "  if LooseVersion(numpy.__version__) >= \"1.19\":\n",
-      "/usr/local/lib/python3.8/dist-packages/setuptools/_distutils/version.py:351: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.\n",
-      "  other = LooseVersion(other)\n",
-      "04/05/2022 06:53:16 PM INFO:Loading faiss with AVX2 support.\n",
-      "04/05/2022 06:53:16 PM INFO:Could not load library with AVX2 support due to:\n",
-      "ModuleNotFoundError(\"No module named 'faiss.swigfaiss_avx2'\")\n",
-      "04/05/2022 06:53:16 PM INFO:Loading faiss.\n",
-      "04/05/2022 06:53:16 PM INFO:Successfully loaded faiss.\n",
-      "I0405 18:53:16.110800 12809 model_inst_state.hpp:64] Loading TritonPythonnModel from model.py in path '/Merlin/examples/Deploying-multi-stage-RecSys/poc_ensemble/3_queryfeast/1'\n",
-      "I0405 18:53:16.111056 12809 model_repository_manager.cc:1149] successfully loaded '2_queryfaiss' version 1\n",
-      "I0405 18:53:16.119599 12809 model_repository_manager.cc:1149] successfully loaded '3_queryfeast' version 1\n",
-      "I0405 18:53:16.124341 12809 model_inst_state.hpp:64] Loading TritonPythonnModel from model.py in path '/Merlin/examples/Deploying-multi-stage-RecSys/poc_ensemble/4_unrollfeatures/1'\n",
-      "I0405 18:53:16.126394 12809 tensorflow.cc:2325] TRITONBACKEND_ModelInstanceInitialize: 5_predicttensorflow (GPU device 0)\n",
-      "I0405 18:53:16.126561 12809 model_repository_manager.cc:1149] successfully loaded '4_unrollfeatures' version 1\n",
-      "2022-04-05 18:53:16.127020: I tensorflow/cc/saved_model/reader.cc:43] Reading SavedModel from: /Merlin/examples/Deploying-multi-stage-RecSys/poc_ensemble/5_predicttensorflow/1/model.savedmodel\n",
-      "2022-04-05 18:53:16.148400: I tensorflow/cc/saved_model/reader.cc:107] Reading meta graph with tags { serve }\n",
-      "2022-04-05 18:53:16.148442: I tensorflow/cc/saved_model/reader.cc:148] Reading SavedModel debug info (if present) from: /Merlin/examples/Deploying-multi-stage-RecSys/poc_ensemble/5_predicttensorflow/1/model.savedmodel\n",
-      "2022-04-05 18:53:16.150520: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13584 MB memory:  -> device: 0, name: Quadro GV100, pci bus id: 0000:15:00.0, compute capability: 7.0\n",
-      "2022-04-05 18:53:16.176360: I tensorflow/cc/saved_model/loader.cc:212] Restoring SavedModel bundle.\n",
-      "2022-04-05 18:53:16.330717: I tensorflow/cc/saved_model/loader.cc:196] Running initialization op on SavedModel bundle at path: /Merlin/examples/Deploying-multi-stage-RecSys/poc_ensemble/5_predicttensorflow/1/model.savedmodel\n",
-      "2022-04-05 18:53:16.383740: I tensorflow/cc/saved_model/loader.cc:303] SavedModel load for tags { serve }; Status: success: OK. Took 256733 microseconds.\n",
-      "I0405 18:53:16.383873 12809 model_inst_state.hpp:64] Loading TritonPythonnModel from model.py in path '/Merlin/examples/Deploying-multi-stage-RecSys/poc_ensemble/6_softmaxsampling/1'\n",
-      "I0405 18:53:16.383965 12809 model_repository_manager.cc:1149] successfully loaded '5_predicttensorflow' version 1\n",
-      "I0405 18:53:16.385192 12809 model_repository_manager.cc:1149] successfully loaded '6_softmaxsampling' version 1\n",
-      "I0405 18:53:16.389471 12809 model_repository_manager.cc:994] loading: ensemble_model:1\n",
-      "I0405 18:53:16.490208 12809 model_repository_manager.cc:1149] successfully loaded 'ensemble_model' version 1\n",
-      "I0405 18:53:16.490451 12809 server.cc:522] \n",
-      "+------------------+------+\n",
-      "| Repository Agent | Path |\n",
-      "+------------------+------+\n",
-      "+------------------+------+\n",
-      "\n",
-      "I0405 18:53:16.490576 12809 server.cc:549] \n",
-      "+------------+-----------------------------------------------------------------+-----------------------------+\n",
-      "| Backend    | Path                                                            | Config                      |\n",
-      "+------------+-----------------------------------------------------------------+-----------------------------+\n",
-      "| tensorflow | /opt/tritonserver/backends/tensorflow2/libtriton_tensorflow2.so | {\"cmdline\":{\"version\":\"2\"}} |\n",
-      "| nvtabular  | /opt/tritonserver/backends/nvtabular/libtriton_nvtabular.so     | {}                          |\n",
-      "+------------+-----------------------------------------------------------------+-----------------------------+\n",
-      "\n",
-      "I0405 18:53:16.490758 12809 server.cc:592] \n",
-      "+---------------------+---------+--------+\n",
-      "| Model               | Version | Status |\n",
-      "+---------------------+---------+--------+\n",
-      "| 0_queryfeast        | 1       | READY  |\n",
-      "| 1_predicttensorflow | 1       | READY  |\n",
-      "| 2_queryfaiss        | 1       | READY  |\n",
-      "| 3_queryfeast        | 1       | READY  |\n",
-      "| 4_unrollfeatures    | 1       | READY  |\n",
-      "| 5_predicttensorflow | 1       | READY  |\n",
-      "| 6_softmaxsampling   | 1       | READY  |\n",
-      "| ensemble_model      | 1       | READY  |\n",
-      "+---------------------+---------+--------+\n",
-      "\n",
-      "I0405 18:53:16.546009 12809 metrics.cc:623] Collecting metrics for GPU 0: Quadro GV100\n",
-      "I0405 18:53:16.546474 12809 tritonserver.cc:1932] \n",
-      "+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n",
-      "| Option                           | Value                                                                                                                                                                                        |\n",
-      "+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n",
-      "| server_id                        | triton                                                                                                                                                                                       |\n",
-      "| server_version                   | 2.19.0                                                                                                                                                                                       |\n",
-      "| server_extensions                | classification sequence model_repository model_repository(unload_dependents) schedule_policy model_configuration system_shared_memory cuda_shared_memory binary_tensor_data statistics trace |\n",
-      "| model_repository_path[0]         | /Merlin/examples/Deploying-multi-stage-RecSys/poc_ensemble                                                                                                                                   |\n",
-      "| model_control_mode               | MODE_NONE                                                                                                                                                                                    |\n",
-      "| strict_model_config              | 1                                                                                                                                                                                            |\n",
-      "| rate_limit                       | OFF                                                                                                                                                                                          |\n",
-      "| pinned_memory_pool_byte_size     | 268435456                                                                                                                                                                                    |\n",
-      "| cuda_memory_pool_byte_size{0}    | 67108864                                                                                                                                                                                     |\n",
-      "| response_cache_byte_size         | 0                                                                                                                                                                                            |\n",
-      "| min_supported_compute_capability | 6.0                                                                                                                                                                                          |\n",
-      "| strict_readiness                 | 1                                                                                                                                                                                            |\n",
-      "| exit_timeout                     | 30                                                                                                                                                                                           |\n",
-      "+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n",
-      "\n",
-      "I0405 18:53:16.547652 12809 grpc_server.cc:4375] Started GRPCInferenceService at 0.0.0.0:8001\n",
-      "I0405 18:53:16.548288 12809 http_server.cc:3075] Started HTTPService at 0.0.0.0:8000\n",
-      "I0405 18:53:16.590481 12809 http_server.cc:178] Started Metrics Service at 0.0.0.0:8002\n",
-      "I0405 18:53:22.789904 12809 server.cc:252] Waiting for in-flight requests to complete.\n",
-      "I0405 18:53:22.789934 12809 model_repository_manager.cc:1026] unloading: ensemble_model:1\n",
-      "I0405 18:53:22.790047 12809 model_repository_manager.cc:1026] unloading: 6_softmaxsampling:1\n",
-      "I0405 18:53:22.790157 12809 model_repository_manager.cc:1026] unloading: 5_predicttensorflow:1\n",
-      "I0405 18:53:22.790212 12809 model_repository_manager.cc:1132] successfully unloaded 'ensemble_model' version 1\n",
-      "I0405 18:53:22.790326 12809 model_repository_manager.cc:1026] unloading: 4_unrollfeatures:1I0405 18:53:22.790324 12809 backend.cc:160] TRITONBACKEND_ModelInstanceFinalize: delete instance state\n",
-      "\n",
-      "Signal (11) received.\n",
-      "I0405 18:53:22.790397 12809 model_repository_manager.cc:1026] unloading: 3_queryfeast:1\n",
-      "I0405 18:53:22.790436 12809 model_repository_manager.cc:1026] unloading: 2_queryfaiss:1\n",
-      "I0405 18:53:22.790500 12809 tensorflow.cc:2363] TRITONBACKEND_ModelInstanceFinalize: delete instance state\n",
-      "I0405 18:53:22.790501 12809 backend.cc:160] TRITONBACKEND_ModelInstanceFinalize: delete instance stateI0405 18:53:22.790543 12809 backend.cc:160] TRITONBACKEND_ModelInstanceFinalize: delete instance state\n",
-      "\n",
-      "Signal (11) received.\n",
-      "Signal (11) received.\n",
-      "I0405 18:53:22.790619 12809 model_repository_manager.cc:1026] unloading: 1_predicttensorflow:1\n",
-      "I0405 18:53:22.790695 12809 model_repository_manager.cc:1026] unloading: 0_queryfeast:1\n",
-      "I0405 18:53:22.790803 12809 server.cc:267] Timeout 30: Found 7 live models and 0 in-flight non-inference requests\n",
-      "I0405 18:53:22.790697 12809 tensorflow.cc:2302] TRITONBACKEND_ModelFinalize: delete model state\n",
-      "I0405 18:53:22.791279 12809 backend.cc:160] TRITONBACKEND_ModelInstanceFinalize: delete instance state\n",
-      "Signal (11) received.\n",
-      "I0405 18:53:22.791393 12809 tensorflow.cc:2363] TRITONBACKEND_ModelInstanceFinalize: delete instance state\n",
-      "I0405 18:53:22.818461 12809 backend.cc:160] TRITONBACKEND_ModelInstanceFinalize: delete instance state\n",
-      "Signal (11) received.\n",
-      "I0405 18:53:22.847960 12809 tensorflow.cc:2302] TRITONBACKEND_ModelFinalize: delete model state\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Signal (2) received.\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "I0405 18:53:23.800413 12809 server.cc:267] Timeout 29: Found 7 live models and 0 in-flight non-inference requests\n",
-      "I0405 18:53:24.811206 12809 server.cc:267] Timeout 28: Found 7 live models and 0 in-flight non-inference requests\n",
-      " 0# 0x0000555EA4C27299 in /opt/tritonserver/bin/tritonserver\n",
-      " 1# 0x00007FD58C687210 in /usr/lib/x86_64-linux-gnu/libc.so.6\n",
-      " 2# 0x00007FD531CF2F2E in /usr/lib/x86_64-linux-gnu/libpython3.8.so.1.0\n",
-      " 3# TRITONBACKEND_ModelInstanceFinalize in /opt/tritonserver/backends/nvtabular/libtriton_nvtabular.so\n",
-      " 4# 0x00007FD58D224FC4 in /opt/tritonserver/bin/../lib/libtritonserver.so\n",
-      " 5# 0x00007FD58D21E3B9 in /opt/tritonserver/bin/../lib/libtritonserver.so\n",
-      " 6# 0x00007FD58D21EB1D in /opt/tritonserver/bin/../lib/libtritonserver.so\n",
-      " 7# 0x00007FD58D0A20D7 in /opt/tritonserver/bin/../lib/libtritonserver.so\n",
-      " 8# 0x00007FD58CA75DE4 in /usr/lib/x86_64-linux-gnu/libstdc++.so.6\n",
-      " 9# 0x00007FD58CEF3609 in /usr/lib/x86_64-linux-gnu/libpthread.so.0\n",
-      "10# clone in /usr/lib/x86_64-linux-gnu/libc.so.6\n",
-      "\n"
-     ]
-    }
-   ],
    "source": [
-    "# create a request to be sent to TIS\n",
-    "from merlin.core.dispatch import make_df\n",
-    "\n",
-    "request = make_df({\"user_id\": [1]})\n",
-    "request[\"user_id\"] = request[\"user_id\"].astype(np.int32)\n",
-    "\n",
-    "response = run_ensemble_on_tritonserver(\n",
-    "    export_path, ensemble.graph.output_schema.column_names, request, \"ensemble_model\"\n",
-    ")"
+    "### Retrieving Recommendations from Triton"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "4840b471-8153-4d5f-82f8-f77614470ca4",
+   "id": "4e3fe264-e4a4-4dab-9b04-f83fb696d7d1",
    "metadata": {},
    "source": [
-    "Convert our response to a numpy array and print it out."
+    "Once our models are successfully loaded to the TIS, we can now easily send a request to TIS and get a response for our query with `send_triton_request` utility function."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
-   "id": "583e6354-183a-4dae-8533-bfc643d4452f",
+   "execution_count": 22,
+   "id": "e95f1d85-9cbc-423b-9de1-91d1e421e5e4",
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "04/26/2022 07:13:11 PM INFO:init\n"
+     ]
+    },
     {
      "data": {
       "text/plain": [
-       "array([[ 392],\n",
-       "       [ 267],\n",
-       "       [1107],\n",
-       "       [ 968],\n",
-       "       [ 457],\n",
-       "       [ 750],\n",
-       "       [ 669],\n",
-       "       [ 789],\n",
-       "       [1237],\n",
-       "       [1164]], dtype=int32)"
+       "{'ordered_ids': array([[1207],\n",
+       "        [ 900],\n",
+       "        [1299],\n",
+       "        [1172],\n",
+       "        [1045],\n",
+       "        [ 999],\n",
+       "        [1007],\n",
+       "        [1248],\n",
+       "        [1014],\n",
+       "        [1246]], dtype=int32)}"
       ]
      },
-     "execution_count": 23,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "output= response.as_numpy('ordered_ids')\n",
-    "output"
+    "from merlin.systems.triton.utils import send_triton_request\n",
+    "from merlin.core.dispatch import make_df\n",
+    "\n",
+    "# create a request to be sent to TIS\n",
+    "request = make_df({\"user_id\": [1]})\n",
+    "request[\"user_id\"] = request[\"user_id\"].astype(np.int32)\n",
+    "\n",
+    "outputs = ensemble.graph.output_schema.column_names\n",
+    "\n",
+    "response = send_triton_request(request, outputs)\n",
+    "response"
    ]
   },
   {
diff --git a/examples/Deploying-multi-stage-RecSys/README.md b/examples/Building-and-deploying-multi-stage-RecSys/README.md
similarity index 100%
rename from examples/Deploying-multi-stage-RecSys/README.md
rename to examples/Building-and-deploying-multi-stage-RecSys/README.md
diff --git a/examples/README.md b/examples/README.md
index 2193db768..f7b447725 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -4,7 +4,7 @@ We have created a collection of Jupyter example notebooks based on different dat
 
 ## Inventory
 
-### 1. [Deploying multi stage RecSys](https://github.com/NVIDIA-Merlin/Merlin/tree/main/examples/Deploying-multi-stage-RecSys)
+### 1. [Building and Deploying multi stage RecSys](https://github.com/NVIDIA-Merlin/Merlin/tree/main/examples/Building-and-deploying-multi-stage-RecSys)
 
 Recommender system pipelines are often based on multiple stages: Retrievel, Filtering, Scoring and Ordering. This example provides an end-to-end pipelines leveraging the Merlin framework by
 - Processing the dataset using NVTabular