HPInc
diff --git a/‎ngc-integration/vacation-recommendation-with-bert/docs/StreamlitUI.pdf‎
-24.8 KB b/‎ngc-integration/vacation-recommendation-with-bert/docs/StreamlitUI.pdf‎
-24.8 KB
diff --git a/‎ngc-integration/vacation-recommendation-with-bert/notebooks/register-model.ipynb‎
Lines changed: 44 additions & 140 deletions b/‎ngc-integration/vacation-recommendation-with-bert/notebooks/register-model.ipynb‎
Lines changed: 44 additions & 140 deletions
@@ -74,7 +74,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2025-09-08 18:06:35 - INFO - Notebook execution started.\n"
+      "2025-09-08 19:29:03 - INFO - Notebook execution started.\n"
      ]
     }
    ],
@@ -102,8 +102,8 @@
      "output_type": "stream",
      "text": [
       "Note: you may need to restart the kernel to use updated packages.\n",
-      "CPU times: user 96.5 ms, sys: 61.4 ms, total: 158 ms\n",
-      "Wall time: 3.76 s\n"
+      "CPU times: user 97 ms, sys: 41 ms, total: 138 ms\n",
+      "Wall time: 3.87 s\n"
      ]
     }
    ],
@@ -123,7 +123,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "[NeMo W 2025-09-08 18:06:46 nemo_logging:349] /opt/conda/envs/aistudio/lib/python3.10/site-packages/_distutils_hack/__init__.py:53: UserWarning: Reliance on distutils from stdlib is deprecated. Users must rely on setuptools to provide the distutils module. Avoid importing distutils or import setuptools first, and avoid setting SETUPTOOLS_USE_DISTUTILS=stdlib. Register concerns at https://github.com/pypa/setuptools/issues/new?template=distutils-deprecation.yml\n",
+      "[NeMo W 2025-09-08 19:29:14 nemo_logging:349] /opt/conda/envs/aistudio/lib/python3.10/site-packages/_distutils_hack/__init__.py:53: UserWarning: Reliance on distutils from stdlib is deprecated. Users must rely on setuptools to provide the distutils module. Avoid importing distutils or import setuptools first, and avoid setting SETUPTOOLS_USE_DISTUTILS=stdlib. Register concerns at https://github.com/pypa/setuptools/issues/new?template=distutils-deprecation.yml\n",
       "      warnings.warn(\n",
       "    \n"
      ]
@@ -250,9 +250,9 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2025-09-08 18:06:56 - INFO - Copied corpus to data structure: ../data/raw/corpus.csv\n",
-      "2025-09-08 18:06:58 - INFO - Copied embeddings to data structure: ../data/processed/embeddings.csv\n",
-      "2025-09-08 18:06:59 - INFO - Copied tokenizer to data structure: ../artifacts/tokenizer\n"
+      "2025-09-08 19:29:24 - INFO - Copied corpus to data structure: ../data/raw/corpus.csv\n",
+      "2025-09-08 19:29:26 - INFO - Copied embeddings to data structure: ../data/processed/embeddings.csv\n",
+      "2025-09-08 19:29:26 - INFO - Copied tokenizer to data structure: ../artifacts/tokenizer\n"
      ]
     }
    ],
@@ -309,111 +309,16 @@
   {
    "cell_type": "code",
    "execution_count": 9,
-   "id": "fcbffbf6-ddcf-4a04-bfca-57141e7cb61d",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "[NeMo W 2025-09-08 18:09:57 modelPT:161] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.\n",
-      "    Train config : \n",
-      "    data_file: /home/yzhang/data/nlp/bert/47316/hdf5/lower_case_1_seq_len_512_max_pred_80_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5_shard_1472_test_split_10/books_wiki_en_corpus/training/\n",
-      "    max_predictions_per_seq: 80\n",
-      "    batch_size: 16\n",
-      "    shuffle: true\n",
-      "    num_samples: -1\n",
-      "    num_workers: 2\n",
-      "    drop_last: false\n",
-      "    pin_memory: false\n",
-      "    \n",
-      "[NeMo W 2025-09-08 18:09:59 modelPT:617] Trainer wasn't specified in model constructor. Make sure that you really wanted it.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[NeMo I 2025-09-08 18:09:59 modelPT:728] Optimizer config = AdamW (\n",
-      "    Parameter Group 0\n",
-      "        amsgrad: False\n",
-      "        betas: (0.9, 0.999)\n",
-      "        capturable: False\n",
-      "        differentiable: False\n",
-      "        eps: 1e-08\n",
-      "        foreach: None\n",
-      "        fused: None\n",
-      "        lr: 4.375e-05\n",
-      "        maximize: False\n",
-      "        weight_decay: 0.01\n",
-      "    )\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "[NeMo W 2025-09-08 18:09:59 lr_scheduler:890] Neither `max_steps` nor `iters_per_batch` were provided to `optim.sched`, cannot compute effective `max_steps` !\n",
-      "    Scheduler will not be instantiated !\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[NeMo I 2025-09-08 18:10:01 save_restore_connector:249] Model BERTLMModel was successfully restored from /home/jovyan/datafabric/Bertlargeuncased/bertlargeuncased.nemo.\n"
-     ]
-    }
-   ],
-   "source": [
-    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
-    "bert_model =  BERTLMModel.restore_from(BERT_MODEL_DATAFABRIC_PATH, strict=False).to(device)\n",
-    "bert_model.eval() \n",
-    "\n",
-    "wrapped_model = BERTModelWithHiddenStates(bert_model) #it doesn't have oficial nemo export function so its necessary to recreate the model as torch to use torch conversion\n",
-    "\n",
-    "batch_size = 1\n",
-    "seq_len = 128\n",
-    "vocab_size = 30522\n",
-    "\n",
-    "input_ids = torch.randint(0, vocab_size, (batch_size, seq_len), dtype=torch.long)\n",
-    "attention_mask = torch.ones((batch_size, seq_len), dtype=torch.long)\n",
-    "token_type_ids = torch.zeros((batch_size, seq_len), dtype=torch.long)\n",
-    "\n",
-    "model_configs = [\n",
-    "    ModelExportConfig(\n",
-    "        model=wrapped_model,                           # 🚀 Pre-loaded model object!\n",
-    "        model_name=\"bert_tourism_onnx\",             # ONNX file naming\n",
-    "        input_sample=(                             \n",
-    "            input_ids.to(device),\n",
-    "            attention_mask.to(device),\n",
-    "            token_type_ids.to(device)\n",
-    "        ),\n",
-    "        input_names=[\"input_ids\", \"attention_mask\", \"token_type_ids\"],\n",
-    "        output_names=[\"embedding\"],\n",
-    "        dynamic_axes={\n",
-    "            \"input_ids\": {0: \"batch\", 1: \"sequence\"},\n",
-    "            \"attention_mask\": {0: \"batch\", 1: \"sequence\"},\n",
-    "            \"token_type_ids\": {0: \"batch\", 1: \"sequence\"},\n",
-    "            \"embedding\": {0: \"batch_size\"}\n",
-    "        },\n",
-    "    )    \n",
-    "]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
    "id": "a3476776",
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2025-09-08 18:10:01 - INFO - Starting the experiment: BERT_Tourism_Experiment\n",
-      "2025-09-08 18:10:01 - INFO - Using MLflow tracking URI: /phoenix/mlflow\n",
-      "[NeMo W 2025-09-08 18:10:39 modelPT:161] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.\n",
+      "2025-09-08 19:29:26 - INFO - Starting the experiment: BERT_Tourism_Experiment\n",
+      "2025-09-08 19:29:26 - INFO - Using MLflow tracking URI: /phoenix/mlflow\n",
+      "[NeMo W 2025-09-08 19:30:00 modelPT:161] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.\n",
       "    Train config : \n",
       "    data_file: /home/yzhang/data/nlp/bert/47316/hdf5/lower_case_1_seq_len_512_max_pred_80_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5_shard_1472_test_split_10/books_wiki_en_corpus/training/\n",
       "    max_predictions_per_seq: 80\n",
@@ -424,14 +329,14 @@
       "    drop_last: false\n",
       "    pin_memory: false\n",
       "    \n",
-      "[NeMo W 2025-09-08 18:10:40 modelPT:617] Trainer wasn't specified in model constructor. Make sure that you really wanted it.\n"
+      "[NeMo W 2025-09-08 19:30:02 modelPT:617] Trainer wasn't specified in model constructor. Make sure that you really wanted it.\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[NeMo I 2025-09-08 18:10:40 modelPT:728] Optimizer config = AdamW (\n",
+      "[NeMo I 2025-09-08 19:30:02 modelPT:728] Optimizer config = AdamW (\n",
       "    Parameter Group 0\n",
       "        amsgrad: False\n",
       "        betas: (0.9, 0.999)\n",
@@ -450,44 +355,43 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "[NeMo W 2025-09-08 18:10:40 lr_scheduler:890] Neither `max_steps` nor `iters_per_batch` were provided to `optim.sched`, cannot compute effective `max_steps` !\n",
+      "[NeMo W 2025-09-08 19:30:02 lr_scheduler:890] Neither `max_steps` nor `iters_per_batch` were provided to `optim.sched`, cannot compute effective `max_steps` !\n",
       "    Scheduler will not be instantiated !\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[NeMo I 2025-09-08 18:10:41 save_restore_connector:249] Model BERTLMModel was successfully restored from /tmp/model_artifacts/models/bertlargeuncased.nemo.\n"
+      "[NeMo I 2025-09-08 19:30:03 save_restore_connector:249] Model BERTLMModel was successfully restored from /tmp/model_artifacts/models/bertlargeuncased.nemo.\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2025-09-08 18:10:41 - INFO - 🔧 Generating ONNX model(s) for specified models...\n",
-      "2025-09-08 18:10:41 - INFO - 🔄 Converting pytorch model: bert_tourism_onnx\n",
-      "2025-09-08 18:10:41 - INFO - 📁 Model directory: bert_tourism_onnx\n",
-      "2025-09-08 18:10:41 - INFO - 🔍 Model identified as: pytorch\n",
-      "2025-09-08 18:10:41 - INFO - 🔄 Exporting loaded PyTorch model with opset 12...\n",
-      "2025-09-08 18:11:12 - INFO - ✅ PyTorch model exported to: bert_tourism_onnx/model.onnx\n",
-      "2025-09-08 18:11:12 - INFO - ✅ Converted bert_tourism_onnx to directory: bert_tourism_onnx\n",
-      "2025-09-08 18:11:12 - INFO - 📦 Added model directory artifact: model_directory -> bert_tourism_onnx\n",
-      "2025-09-08 18:11:12 - INFO -   No Triton structure requested, using model directories as-is\n",
-      "2025/09/08 18:11:12 WARNING mlflow.models.model: `artifact_path` is deprecated. Please use `name` instead.\n",
-      "2025-09-08 18:11:44 - INFO - Model logged with artifacts: ['model_directory']\n",
-      "2025-09-08 18:11:44 - INFO - ✅ Model logged with model directory created!\n",
+      "2025-09-08 19:30:04 - INFO - 🔧 Generating ONNX model(s) for specified models...\n",
+      "2025-09-08 19:30:04 - INFO - 🔄 Converting pytorch model: bert_tourism_onnx\n",
+      "2025-09-08 19:30:04 - INFO - 📁 Model directory: bert_tourism_onnx\n",
+      "2025-09-08 19:30:04 - INFO - 🔍 Model identified as: pytorch\n",
+      "2025-09-08 19:30:04 - INFO - 🔄 Exporting loaded PyTorch model with opset 12...\n",
+      "2025-09-08 19:30:34 - INFO - ✅ PyTorch model exported to: bert_tourism_onnx/model.onnx\n",
+      "2025-09-08 19:30:34 - INFO - ✅ Converted bert_tourism_onnx to directory: bert_tourism_onnx\n",
+      "2025-09-08 19:30:34 - INFO - 📦 Added model directory artifact: model_directory -> bert_tourism_onnx\n",
+      "2025-09-08 19:30:34 - INFO -   No Triton structure requested, using model directories as-is\n",
+      "2025-09-08 19:31:05 - INFO - Model logged with artifacts: ['model_directory']\n",
+      "2025-09-08 19:31:05 - INFO - ✅ Model logged with model directory created!\n",
       "Registered model 'BERT_Tourism_Model' already exists. Creating a new version of this model...\n",
-      "2025/09/08 18:11:46 WARNING mlflow.tracking._model_registry.fluent: Run with id 44e291bed47646fba60df08727e859f9 has no artifacts at artifact path 'BERT_Tourism_Model', registering model based on models:/m-e5dbf9c8b6864246867ae9dd568b8a41 instead\n",
-      "Created version '6' of model 'BERT_Tourism_Model'.\n",
-      "2025-09-08 18:11:49 - INFO - ✅ Model 'BERT_Tourism_Model' successfully logged and registered under experiment 'BERT_Tourism_Experiment'.\n"
+      "2025/09/08 19:31:10 WARNING mlflow.tracking._model_registry.fluent: Run with id f26b2629be514f5198e80ac6b1d4043d has no artifacts at artifact path 'BERT_Tourism_Model', registering model based on models:/m-756a73fd51eb411ea7878ac7bc41311e instead\n",
+      "Created version '12' of model 'BERT_Tourism_Model'.\n",
+      "2025-09-08 19:31:15 - INFO - ✅ Model 'BERT_Tourism_Model' successfully logged and registered under experiment 'BERT_Tourism_Experiment'.\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "CPU times: user 25.7 s, sys: 11.8 s, total: 37.4 s\n",
+      "CPU times: user 26.3 s, sys: 12.6 s, total: 38.9 s\n",
       "Wall time: 1min 48s\n"
      ]
     }
@@ -556,15 +460,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 10,
    "id": "18436de8-af7f-4573-a1b7-01084b7d1633",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Latest registered version of 'BERT_Tourism_Model': 6\n",
+      "Latest registered version of 'BERT_Tourism_Model': 12\n",
       "Signature: inputs: \n",
       "  ['query': string (required)]\n",
       "outputs: \n",
@@ -595,15 +499,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 11,
    "id": "a68f80f5",
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "[NeMo W 2025-09-08 18:14:47 modelPT:161] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.\n",
+      "[NeMo W 2025-09-08 19:33:46 modelPT:161] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.\n",
       "    Train config : \n",
       "    data_file: /home/yzhang/data/nlp/bert/47316/hdf5/lower_case_1_seq_len_512_max_pred_80_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5_shard_1472_test_split_10/books_wiki_en_corpus/training/\n",
       "    max_predictions_per_seq: 80\n",
@@ -614,14 +518,14 @@
       "    drop_last: false\n",
       "    pin_memory: false\n",
       "    \n",
-      "[NeMo W 2025-09-08 18:14:48 modelPT:617] Trainer wasn't specified in model constructor. Make sure that you really wanted it.\n"
+      "[NeMo W 2025-09-08 19:33:47 modelPT:617] Trainer wasn't specified in model constructor. Make sure that you really wanted it.\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[NeMo I 2025-09-08 18:14:48 modelPT:728] Optimizer config = AdamW (\n",
+      "[NeMo I 2025-09-08 19:33:47 modelPT:728] Optimizer config = AdamW (\n",
       "    Parameter Group 0\n",
       "        amsgrad: False\n",
       "        betas: (0.9, 0.999)\n",
@@ -640,18 +544,18 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "[NeMo W 2025-09-08 18:14:48 lr_scheduler:890] Neither `max_steps` nor `iters_per_batch` were provided to `optim.sched`, cannot compute effective `max_steps` !\n",
+      "[NeMo W 2025-09-08 19:33:47 lr_scheduler:890] Neither `max_steps` nor `iters_per_batch` were provided to `optim.sched`, cannot compute effective `max_steps` !\n",
       "    Scheduler will not be instantiated !\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[NeMo I 2025-09-08 18:14:49 save_restore_connector:249] Model BERTLMModel was successfully restored from /phoenix/mlflow/245450959274739937/models/m-e5dbf9c8b6864246867ae9dd568b8a41/artifacts/data/model_artifacts/models/bertlargeuncased.nemo.\n",
-      "Successfully loaded model 'BERT_Tourism_Model' version 6 for inference.\n",
-      "CPU times: user 35.6 s, sys: 11.4 s, total: 47 s\n",
-      "Wall time: 2min 56s\n"
+      "[NeMo I 2025-09-08 19:33:48 save_restore_connector:249] Model BERTLMModel was successfully restored from /phoenix/mlflow/245450959274739937/models/m-756a73fd51eb411ea7878ac7bc41311e/artifacts/data/model_artifacts/models/bertlargeuncased.nemo.\n",
+      "Successfully loaded model 'BERT_Tourism_Model' version 12 for inference.\n",
+      "CPU times: user 31.6 s, sys: 10.5 s, total: 42.1 s\n",
+      "Wall time: 2min 28s\n"
      ]
     }
    ],
@@ -671,7 +575,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 12,
    "id": "120bd381-f0bb-428a-ac49-ad2ed81fe00c",
    "metadata": {},
    "outputs": [
@@ -711,16 +615,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 13,
    "id": "36c4f0a8",
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2025-09-08 18:14:49 - INFO - ⏱️ Total execution time: 8m 14.50s\n",
-      "2025-09-08 18:14:49 - INFO - ✅ Notebook execution completed successfully.\n"
+      "2025-09-08 19:33:48 - INFO - ⏱️ Total execution time: 4m 45.34s\n",
+      "2025-09-08 19:33:48 - INFO - ✅ Notebook execution completed successfully.\n"
      ]
     }
    ],