Skip to content

Commit 539b308

Browse files
committed
Merge branch 'feat/mlflow-models-from-code-migration-vacation-recommendation-with-bert' of https://github.com/HPInc/AI-Blueprints into feat/mlflow-models-from-code-migration-vacation-recommendation-with-bert
2 parents 8d7ffe6 + 01709ab commit 539b308

File tree

5 files changed

+115
-184
lines changed

5 files changed

+115
-184
lines changed
Binary file not shown.

ngc-integration/vacation-recommendation-with-bert/notebooks/register-model.ipynb

Lines changed: 44 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@
7474
"name": "stderr",
7575
"output_type": "stream",
7676
"text": [
77-
"2025-09-08 18:06:35 - INFO - Notebook execution started.\n"
77+
"2025-09-08 19:29:03 - INFO - Notebook execution started.\n"
7878
]
7979
}
8080
],
@@ -102,8 +102,8 @@
102102
"output_type": "stream",
103103
"text": [
104104
"Note: you may need to restart the kernel to use updated packages.\n",
105-
"CPU times: user 96.5 ms, sys: 61.4 ms, total: 158 ms\n",
106-
"Wall time: 3.76 s\n"
105+
"CPU times: user 97 ms, sys: 41 ms, total: 138 ms\n",
106+
"Wall time: 3.87 s\n"
107107
]
108108
}
109109
],
@@ -123,7 +123,7 @@
123123
"name": "stderr",
124124
"output_type": "stream",
125125
"text": [
126-
"[NeMo W 2025-09-08 18:06:46 nemo_logging:349] /opt/conda/envs/aistudio/lib/python3.10/site-packages/_distutils_hack/__init__.py:53: UserWarning: Reliance on distutils from stdlib is deprecated. Users must rely on setuptools to provide the distutils module. Avoid importing distutils or import setuptools first, and avoid setting SETUPTOOLS_USE_DISTUTILS=stdlib. Register concerns at https://github.com/pypa/setuptools/issues/new?template=distutils-deprecation.yml\n",
126+
"[NeMo W 2025-09-08 19:29:14 nemo_logging:349] /opt/conda/envs/aistudio/lib/python3.10/site-packages/_distutils_hack/__init__.py:53: UserWarning: Reliance on distutils from stdlib is deprecated. Users must rely on setuptools to provide the distutils module. Avoid importing distutils or import setuptools first, and avoid setting SETUPTOOLS_USE_DISTUTILS=stdlib. Register concerns at https://github.com/pypa/setuptools/issues/new?template=distutils-deprecation.yml\n",
127127
" warnings.warn(\n",
128128
" \n"
129129
]
@@ -250,9 +250,9 @@
250250
"name": "stderr",
251251
"output_type": "stream",
252252
"text": [
253-
"2025-09-08 18:06:56 - INFO - Copied corpus to data structure: ../data/raw/corpus.csv\n",
254-
"2025-09-08 18:06:58 - INFO - Copied embeddings to data structure: ../data/processed/embeddings.csv\n",
255-
"2025-09-08 18:06:59 - INFO - Copied tokenizer to data structure: ../artifacts/tokenizer\n"
253+
"2025-09-08 19:29:24 - INFO - Copied corpus to data structure: ../data/raw/corpus.csv\n",
254+
"2025-09-08 19:29:26 - INFO - Copied embeddings to data structure: ../data/processed/embeddings.csv\n",
255+
"2025-09-08 19:29:26 - INFO - Copied tokenizer to data structure: ../artifacts/tokenizer\n"
256256
]
257257
}
258258
],
@@ -309,111 +309,16 @@
309309
{
310310
"cell_type": "code",
311311
"execution_count": 9,
312-
"id": "fcbffbf6-ddcf-4a04-bfca-57141e7cb61d",
313-
"metadata": {},
314-
"outputs": [
315-
{
316-
"name": "stderr",
317-
"output_type": "stream",
318-
"text": [
319-
"[NeMo W 2025-09-08 18:09:57 modelPT:161] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.\n",
320-
" Train config : \n",
321-
" data_file: /home/yzhang/data/nlp/bert/47316/hdf5/lower_case_1_seq_len_512_max_pred_80_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5_shard_1472_test_split_10/books_wiki_en_corpus/training/\n",
322-
" max_predictions_per_seq: 80\n",
323-
" batch_size: 16\n",
324-
" shuffle: true\n",
325-
" num_samples: -1\n",
326-
" num_workers: 2\n",
327-
" drop_last: false\n",
328-
" pin_memory: false\n",
329-
" \n",
330-
"[NeMo W 2025-09-08 18:09:59 modelPT:617] Trainer wasn't specified in model constructor. Make sure that you really wanted it.\n"
331-
]
332-
},
333-
{
334-
"name": "stdout",
335-
"output_type": "stream",
336-
"text": [
337-
"[NeMo I 2025-09-08 18:09:59 modelPT:728] Optimizer config = AdamW (\n",
338-
" Parameter Group 0\n",
339-
" amsgrad: False\n",
340-
" betas: (0.9, 0.999)\n",
341-
" capturable: False\n",
342-
" differentiable: False\n",
343-
" eps: 1e-08\n",
344-
" foreach: None\n",
345-
" fused: None\n",
346-
" lr: 4.375e-05\n",
347-
" maximize: False\n",
348-
" weight_decay: 0.01\n",
349-
" )\n"
350-
]
351-
},
352-
{
353-
"name": "stderr",
354-
"output_type": "stream",
355-
"text": [
356-
"[NeMo W 2025-09-08 18:09:59 lr_scheduler:890] Neither `max_steps` nor `iters_per_batch` were provided to `optim.sched`, cannot compute effective `max_steps` !\n",
357-
" Scheduler will not be instantiated !\n"
358-
]
359-
},
360-
{
361-
"name": "stdout",
362-
"output_type": "stream",
363-
"text": [
364-
"[NeMo I 2025-09-08 18:10:01 save_restore_connector:249] Model BERTLMModel was successfully restored from /home/jovyan/datafabric/Bertlargeuncased/bertlargeuncased.nemo.\n"
365-
]
366-
}
367-
],
368-
"source": [
369-
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
370-
"bert_model = BERTLMModel.restore_from(BERT_MODEL_DATAFABRIC_PATH, strict=False).to(device)\n",
371-
"bert_model.eval() \n",
372-
"\n",
373-
"wrapped_model = BERTModelWithHiddenStates(bert_model) #it doesn't have oficial nemo export function so its necessary to recreate the model as torch to use torch conversion\n",
374-
"\n",
375-
"batch_size = 1\n",
376-
"seq_len = 128\n",
377-
"vocab_size = 30522\n",
378-
"\n",
379-
"input_ids = torch.randint(0, vocab_size, (batch_size, seq_len), dtype=torch.long)\n",
380-
"attention_mask = torch.ones((batch_size, seq_len), dtype=torch.long)\n",
381-
"token_type_ids = torch.zeros((batch_size, seq_len), dtype=torch.long)\n",
382-
"\n",
383-
"model_configs = [\n",
384-
" ModelExportConfig(\n",
385-
" model=wrapped_model, # 🚀 Pre-loaded model object!\n",
386-
" model_name=\"bert_tourism_onnx\", # ONNX file naming\n",
387-
" input_sample=( \n",
388-
" input_ids.to(device),\n",
389-
" attention_mask.to(device),\n",
390-
" token_type_ids.to(device)\n",
391-
" ),\n",
392-
" input_names=[\"input_ids\", \"attention_mask\", \"token_type_ids\"],\n",
393-
" output_names=[\"embedding\"],\n",
394-
" dynamic_axes={\n",
395-
" \"input_ids\": {0: \"batch\", 1: \"sequence\"},\n",
396-
" \"attention_mask\": {0: \"batch\", 1: \"sequence\"},\n",
397-
" \"token_type_ids\": {0: \"batch\", 1: \"sequence\"},\n",
398-
" \"embedding\": {0: \"batch_size\"}\n",
399-
" },\n",
400-
" ) \n",
401-
"]"
402-
]
403-
},
404-
{
405-
"cell_type": "code",
406-
"execution_count": 10,
407312
"id": "a3476776",
408313
"metadata": {},
409314
"outputs": [
410315
{
411316
"name": "stderr",
412317
"output_type": "stream",
413318
"text": [
414-
"2025-09-08 18:10:01 - INFO - Starting the experiment: BERT_Tourism_Experiment\n",
415-
"2025-09-08 18:10:01 - INFO - Using MLflow tracking URI: /phoenix/mlflow\n",
416-
"[NeMo W 2025-09-08 18:10:39 modelPT:161] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.\n",
319+
"2025-09-08 19:29:26 - INFO - Starting the experiment: BERT_Tourism_Experiment\n",
320+
"2025-09-08 19:29:26 - INFO - Using MLflow tracking URI: /phoenix/mlflow\n",
321+
"[NeMo W 2025-09-08 19:30:00 modelPT:161] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.\n",
417322
" Train config : \n",
418323
" data_file: /home/yzhang/data/nlp/bert/47316/hdf5/lower_case_1_seq_len_512_max_pred_80_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5_shard_1472_test_split_10/books_wiki_en_corpus/training/\n",
419324
" max_predictions_per_seq: 80\n",
@@ -424,14 +329,14 @@
424329
" drop_last: false\n",
425330
" pin_memory: false\n",
426331
" \n",
427-
"[NeMo W 2025-09-08 18:10:40 modelPT:617] Trainer wasn't specified in model constructor. Make sure that you really wanted it.\n"
332+
"[NeMo W 2025-09-08 19:30:02 modelPT:617] Trainer wasn't specified in model constructor. Make sure that you really wanted it.\n"
428333
]
429334
},
430335
{
431336
"name": "stdout",
432337
"output_type": "stream",
433338
"text": [
434-
"[NeMo I 2025-09-08 18:10:40 modelPT:728] Optimizer config = AdamW (\n",
339+
"[NeMo I 2025-09-08 19:30:02 modelPT:728] Optimizer config = AdamW (\n",
435340
" Parameter Group 0\n",
436341
" amsgrad: False\n",
437342
" betas: (0.9, 0.999)\n",
@@ -450,44 +355,43 @@
450355
"name": "stderr",
451356
"output_type": "stream",
452357
"text": [
453-
"[NeMo W 2025-09-08 18:10:40 lr_scheduler:890] Neither `max_steps` nor `iters_per_batch` were provided to `optim.sched`, cannot compute effective `max_steps` !\n",
358+
"[NeMo W 2025-09-08 19:30:02 lr_scheduler:890] Neither `max_steps` nor `iters_per_batch` were provided to `optim.sched`, cannot compute effective `max_steps` !\n",
454359
" Scheduler will not be instantiated !\n"
455360
]
456361
},
457362
{
458363
"name": "stdout",
459364
"output_type": "stream",
460365
"text": [
461-
"[NeMo I 2025-09-08 18:10:41 save_restore_connector:249] Model BERTLMModel was successfully restored from /tmp/model_artifacts/models/bertlargeuncased.nemo.\n"
366+
"[NeMo I 2025-09-08 19:30:03 save_restore_connector:249] Model BERTLMModel was successfully restored from /tmp/model_artifacts/models/bertlargeuncased.nemo.\n"
462367
]
463368
},
464369
{
465370
"name": "stderr",
466371
"output_type": "stream",
467372
"text": [
468-
"2025-09-08 18:10:41 - INFO - 🔧 Generating ONNX model(s) for specified models...\n",
469-
"2025-09-08 18:10:41 - INFO - 🔄 Converting pytorch model: bert_tourism_onnx\n",
470-
"2025-09-08 18:10:41 - INFO - 📁 Model directory: bert_tourism_onnx\n",
471-
"2025-09-08 18:10:41 - INFO - 🔍 Model identified as: pytorch\n",
472-
"2025-09-08 18:10:41 - INFO - 🔄 Exporting loaded PyTorch model with opset 12...\n",
473-
"2025-09-08 18:11:12 - INFO - ✅ PyTorch model exported to: bert_tourism_onnx/model.onnx\n",
474-
"2025-09-08 18:11:12 - INFO - ✅ Converted bert_tourism_onnx to directory: bert_tourism_onnx\n",
475-
"2025-09-08 18:11:12 - INFO - 📦 Added model directory artifact: model_directory -> bert_tourism_onnx\n",
476-
"2025-09-08 18:11:12 - INFO - No Triton structure requested, using model directories as-is\n",
477-
"2025/09/08 18:11:12 WARNING mlflow.models.model: `artifact_path` is deprecated. Please use `name` instead.\n",
478-
"2025-09-08 18:11:44 - INFO - Model logged with artifacts: ['model_directory']\n",
479-
"2025-09-08 18:11:44 - INFO - ✅ Model logged with model directory created!\n",
373+
"2025-09-08 19:30:04 - INFO - 🔧 Generating ONNX model(s) for specified models...\n",
374+
"2025-09-08 19:30:04 - INFO - 🔄 Converting pytorch model: bert_tourism_onnx\n",
375+
"2025-09-08 19:30:04 - INFO - 📁 Model directory: bert_tourism_onnx\n",
376+
"2025-09-08 19:30:04 - INFO - 🔍 Model identified as: pytorch\n",
377+
"2025-09-08 19:30:04 - INFO - 🔄 Exporting loaded PyTorch model with opset 12...\n",
378+
"2025-09-08 19:30:34 - INFO - ✅ PyTorch model exported to: bert_tourism_onnx/model.onnx\n",
379+
"2025-09-08 19:30:34 - INFO - ✅ Converted bert_tourism_onnx to directory: bert_tourism_onnx\n",
380+
"2025-09-08 19:30:34 - INFO - 📦 Added model directory artifact: model_directory -> bert_tourism_onnx\n",
381+
"2025-09-08 19:30:34 - INFO - No Triton structure requested, using model directories as-is\n",
382+
"2025-09-08 19:31:05 - INFO - Model logged with artifacts: ['model_directory']\n",
383+
"2025-09-08 19:31:05 - INFO - ✅ Model logged with model directory created!\n",
480384
"Registered model 'BERT_Tourism_Model' already exists. Creating a new version of this model...\n",
481-
"2025/09/08 18:11:46 WARNING mlflow.tracking._model_registry.fluent: Run with id 44e291bed47646fba60df08727e859f9 has no artifacts at artifact path 'BERT_Tourism_Model', registering model based on models:/m-e5dbf9c8b6864246867ae9dd568b8a41 instead\n",
482-
"Created version '6' of model 'BERT_Tourism_Model'.\n",
483-
"2025-09-08 18:11:49 - INFO - ✅ Model 'BERT_Tourism_Model' successfully logged and registered under experiment 'BERT_Tourism_Experiment'.\n"
385+
"2025/09/08 19:31:10 WARNING mlflow.tracking._model_registry.fluent: Run with id f26b2629be514f5198e80ac6b1d4043d has no artifacts at artifact path 'BERT_Tourism_Model', registering model based on models:/m-756a73fd51eb411ea7878ac7bc41311e instead\n",
386+
"Created version '12' of model 'BERT_Tourism_Model'.\n",
387+
"2025-09-08 19:31:15 - INFO - ✅ Model 'BERT_Tourism_Model' successfully logged and registered under experiment 'BERT_Tourism_Experiment'.\n"
484388
]
485389
},
486390
{
487391
"name": "stdout",
488392
"output_type": "stream",
489393
"text": [
490-
"CPU times: user 25.7 s, sys: 11.8 s, total: 37.4 s\n",
394+
"CPU times: user 26.3 s, sys: 12.6 s, total: 38.9 s\n",
491395
"Wall time: 1min 48s\n"
492396
]
493397
}
@@ -556,15 +460,15 @@
556460
},
557461
{
558462
"cell_type": "code",
559-
"execution_count": 11,
463+
"execution_count": 10,
560464
"id": "18436de8-af7f-4573-a1b7-01084b7d1633",
561465
"metadata": {},
562466
"outputs": [
563467
{
564468
"name": "stdout",
565469
"output_type": "stream",
566470
"text": [
567-
"Latest registered version of 'BERT_Tourism_Model': 6\n",
471+
"Latest registered version of 'BERT_Tourism_Model': 12\n",
568472
"Signature: inputs: \n",
569473
" ['query': string (required)]\n",
570474
"outputs: \n",
@@ -595,15 +499,15 @@
595499
},
596500
{
597501
"cell_type": "code",
598-
"execution_count": 12,
502+
"execution_count": 11,
599503
"id": "a68f80f5",
600504
"metadata": {},
601505
"outputs": [
602506
{
603507
"name": "stderr",
604508
"output_type": "stream",
605509
"text": [
606-
"[NeMo W 2025-09-08 18:14:47 modelPT:161] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.\n",
510+
"[NeMo W 2025-09-08 19:33:46 modelPT:161] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.\n",
607511
" Train config : \n",
608512
" data_file: /home/yzhang/data/nlp/bert/47316/hdf5/lower_case_1_seq_len_512_max_pred_80_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5_shard_1472_test_split_10/books_wiki_en_corpus/training/\n",
609513
" max_predictions_per_seq: 80\n",
@@ -614,14 +518,14 @@
614518
" drop_last: false\n",
615519
" pin_memory: false\n",
616520
" \n",
617-
"[NeMo W 2025-09-08 18:14:48 modelPT:617] Trainer wasn't specified in model constructor. Make sure that you really wanted it.\n"
521+
"[NeMo W 2025-09-08 19:33:47 modelPT:617] Trainer wasn't specified in model constructor. Make sure that you really wanted it.\n"
618522
]
619523
},
620524
{
621525
"name": "stdout",
622526
"output_type": "stream",
623527
"text": [
624-
"[NeMo I 2025-09-08 18:14:48 modelPT:728] Optimizer config = AdamW (\n",
528+
"[NeMo I 2025-09-08 19:33:47 modelPT:728] Optimizer config = AdamW (\n",
625529
" Parameter Group 0\n",
626530
" amsgrad: False\n",
627531
" betas: (0.9, 0.999)\n",
@@ -640,18 +544,18 @@
640544
"name": "stderr",
641545
"output_type": "stream",
642546
"text": [
643-
"[NeMo W 2025-09-08 18:14:48 lr_scheduler:890] Neither `max_steps` nor `iters_per_batch` were provided to `optim.sched`, cannot compute effective `max_steps` !\n",
547+
"[NeMo W 2025-09-08 19:33:47 lr_scheduler:890] Neither `max_steps` nor `iters_per_batch` were provided to `optim.sched`, cannot compute effective `max_steps` !\n",
644548
" Scheduler will not be instantiated !\n"
645549
]
646550
},
647551
{
648552
"name": "stdout",
649553
"output_type": "stream",
650554
"text": [
651-
"[NeMo I 2025-09-08 18:14:49 save_restore_connector:249] Model BERTLMModel was successfully restored from /phoenix/mlflow/245450959274739937/models/m-e5dbf9c8b6864246867ae9dd568b8a41/artifacts/data/model_artifacts/models/bertlargeuncased.nemo.\n",
652-
"Successfully loaded model 'BERT_Tourism_Model' version 6 for inference.\n",
653-
"CPU times: user 35.6 s, sys: 11.4 s, total: 47 s\n",
654-
"Wall time: 2min 56s\n"
555+
"[NeMo I 2025-09-08 19:33:48 save_restore_connector:249] Model BERTLMModel was successfully restored from /phoenix/mlflow/245450959274739937/models/m-756a73fd51eb411ea7878ac7bc41311e/artifacts/data/model_artifacts/models/bertlargeuncased.nemo.\n",
556+
"Successfully loaded model 'BERT_Tourism_Model' version 12 for inference.\n",
557+
"CPU times: user 31.6 s, sys: 10.5 s, total: 42.1 s\n",
558+
"Wall time: 2min 28s\n"
655559
]
656560
}
657561
],
@@ -671,7 +575,7 @@
671575
},
672576
{
673577
"cell_type": "code",
674-
"execution_count": 13,
578+
"execution_count": 12,
675579
"id": "120bd381-f0bb-428a-ac49-ad2ed81fe00c",
676580
"metadata": {},
677581
"outputs": [
@@ -711,16 +615,16 @@
711615
},
712616
{
713617
"cell_type": "code",
714-
"execution_count": 14,
618+
"execution_count": 13,
715619
"id": "36c4f0a8",
716620
"metadata": {},
717621
"outputs": [
718622
{
719623
"name": "stderr",
720624
"output_type": "stream",
721625
"text": [
722-
"2025-09-08 18:14:49 - INFO - ⏱️ Total execution time: 8m 14.50s\n",
723-
"2025-09-08 18:14:49 - INFO - ✅ Notebook execution completed successfully.\n"
626+
"2025-09-08 19:33:48 - INFO - ⏱️ Total execution time: 4m 45.34s\n",
627+
"2025-09-08 19:33:48 - INFO - ✅ Notebook execution completed successfully.\n"
724628
]
725629
}
726630
],

0 commit comments

Comments
 (0)