opensearch-project · dhrubo-os · Jun 20, 2023 · Jun 12, 2023 · Jun 12, 2023 · Jun 20, 2023
@@ -1,5 +1,12 @@
 # CHANGELOG
 Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
+
+## [1.1.0]
+
+### Added
+
+- adding documentation for model group id @dhrubo-os ([#176](https://github.com/opensearch-project/opensearch-py-ml/pull/176))
+
 ## [1.0.0]    
 
 ### Added

@@ -7,6 +7,7 @@ sphinx-rtd-theme
 sphinx_rtd_theme
 nbsphinx
 pandoc
+deprecated
 # using in SentenceTransformerModel
 torch
 pyyaml

@@ -8,5 +8,7 @@
 # Integrating MLCommons plugin
 
 from opensearch_py_ml.ml_commons.ml_commons_client import MLCommonClient
+from opensearch_py_ml.ml_commons.model_execute import ModelExecute
+from opensearch_py_ml.ml_commons.model_uploader import ModelUploader
 
-__all__ = ["MLCommonClient"]
+__all__ = ["MLCommonClient", "ModelExecute", "ModelUploader"]
@@ -10,3 +10,14 @@
 MODEL_MAX_SIZE = 4_000_000_000
 BUF_SIZE = 65536  # lets read stuff in 64kb chunks!
 TIMEOUT = 120  # timeout for synchronous method calls in seconds
+META_API_ENDPOINT = "models/meta"
+MODEL_NAME_FIELD = "name"
+MODEL_VERSION_FIELD = "version"
+MODEL_FORMAT_FIELD = "model_format"
+TOTAL_CHUNKS_FIELD = "total_chunks"
+MODEL_CONFIG_FIELD = "model_config"
+MODEL_TYPE = "model_type"
+EMBEDDING_DIMENSION = "embedding_dimension"
+FRAMEWORK_TYPE = "framework_type"
+MODEL_CONTENT_HASH_VALUE = "model_content_hash_value"
+MODEL_GROUP_ID = "model_group_id"
@@ -12,7 +12,14 @@
 from deprecated.sphinx import deprecated
 from opensearchpy import OpenSearch
 
-from opensearch_py_ml.ml_commons.ml_common_utils import ML_BASE_URI, TIMEOUT
+from opensearch_py_ml.ml_commons.ml_common_utils import (
+    ML_BASE_URI,
+    MODEL_FORMAT_FIELD,
+    MODEL_GROUP_ID,
+    MODEL_NAME_FIELD,
+    MODEL_VERSION_FIELD,
+    TIMEOUT,
+)
 from opensearch_py_ml.ml_commons.model_execute import ModelExecute
 from opensearch_py_ml.ml_commons.model_uploader import ModelUploader
 
@@ -101,6 +108,7 @@ def register_model(
         self,
         model_path: str,
         model_config_path: str,
+        model_group_id: str = "",
         isVerbose: bool = False,
         deploy_model: bool = True,
         wait_until_deployed: bool = True,
@@ -128,6 +136,8 @@ def register_model(
             refer to:
             https://opensearch.org/docs/latest/ml-commons-plugin/model-serving-framework/#upload-model-to-opensearch
         :type model_config_path: string
+        :param model_group_id: Model group id
+        :type model_group_id: string
         :param isVerbose: if isVerbose is true method will print more messages. default False
         :type isVerbose: boolean
         :param deploy_model: Whether to deploy the model using uploaded model chunks
@@ -138,7 +148,7 @@ def register_model(
         :rtype: string
         """
         model_id = self._model_uploader._register_model(
-            model_path, model_config_path, isVerbose
+            model_path, model_config_path, model_group_id, isVerbose
         )
 
         # loading the model chunks from model index
@@ -179,7 +189,7 @@ def upload_pretrained_model(
         """
         # creating model meta doc
         model_config_json = {
-            "name": model_name,
+            MODEL_NAME_FIELD: model_name,
             "version": model_version,
             "model_format": model_format,
         }
@@ -196,6 +206,7 @@ def register_pretrained_model(
         model_name: str,
         model_version: str,
         model_format: str,
+        model_group_id: str = "",
         deploy_model: bool = True,
         wait_until_deployed: bool = True,
     ):
@@ -210,6 +221,8 @@ def register_pretrained_model(
         :type model_version: string
         :param model_format: "TORCH_SCRIPT" or "ONNX"
         :type model_format: string
+        :param model_group_id: Model group id
+        :type model_group_id: string
         :param deploy_model: Whether to deploy the model using uploaded model chunks
         :type deploy_model: bool
         :param wait_until_deployed: If deploy_model is true, whether to wait until the model is deployed
@@ -219,12 +232,15 @@ def register_pretrained_model(
         """
         # creating model meta doc
         model_config_json = {
-            "name": model_name,
-            "version": model_version,
-            "model_format": model_format,
+            MODEL_NAME_FIELD: model_name,
+            MODEL_VERSION_FIELD: model_version,
+            MODEL_FORMAT_FIELD: model_format,
+            MODEL_GROUP_ID: model_group_id,
         }
         model_id = self._send_model_info(model_config_json)
 
+        print(model_id)
+
         # loading the model chunks from model index
         if deploy_model:
             self.deploy_model(model_id, wait_until_deployed=wait_until_deployed)

@@ -15,9 +15,20 @@
 
 from opensearch_py_ml.ml_commons.ml_common_utils import (
     BUF_SIZE,
+    EMBEDDING_DIMENSION,
+    FRAMEWORK_TYPE,
+    META_API_ENDPOINT,
     ML_BASE_URI,
     MODEL_CHUNK_MAX_SIZE,
+    MODEL_CONFIG_FIELD,
+    MODEL_CONTENT_HASH_VALUE,
+    MODEL_FORMAT_FIELD,
+    MODEL_GROUP_ID,
     MODEL_MAX_SIZE,
+    MODEL_NAME_FIELD,
+    MODEL_TYPE,
+    MODEL_VERSION_FIELD,
+    TOTAL_CHUNKS_FIELD,
 )
 
 
@@ -26,22 +37,15 @@ class ModelUploader:
     Class for registering a model using ml-commons apis in opensearch cluster.
     """
 
-    META_API_ENDPOINT = "models/meta"
-    MODEL_NAME_FIELD = "name"
-    MODEL_VERSION_FIELD = "version"
-    MODEL_FORMAT_FIELD = "model_format"
-    TOTAL_CHUNKS_FIELD = "total_chunks"
-    MODEL_CONFIG_FIELD = "model_config"
-    MODEL_TYPE = "model_type"
-    EMBEDDING_DIMENSION = "embedding_dimension"
-    FRAMEWORK_TYPE = "framework_type"
-    MODEL_CONTENT_HASH_VALUE = "model_content_hash_value"
-
     def __init__(self, os_client: OpenSearch):
         self._client = os_client
 
     def _register_model(
-        self, model_path: str, model_meta_path: str, isVerbose: bool
+        self,
+        model_path: str,
+        model_meta_path: str,
+        model_group_id: str = "",
+        isVerbose: bool = False,
     ) -> str:
         """
         This method registers the model in the opensearch cluster using ml-common plugin's register model api.
@@ -67,6 +71,8 @@ def _register_model(
             refer to:
                 https://opensearch.org/docs/latest/ml-commons-plugin/model-serving-framework/#upload-model-to-opensearch
         :type model_meta_path: string
+        :param model_group_id: Model group id
+        :type model_group_id: string
         :param isVerbose: if isVerbose is true method will print more messages
         :type isVerbose: bool
         :return: returns model id which is created by the model metadata
@@ -89,13 +95,14 @@ def _register_model(
         model_meta_json: dict[str, Union[str, dict[str, str]]] = json.load(
             model_meta_json_file
         )
-        model_meta_json[self.TOTAL_CHUNKS_FIELD] = total_num_chunks
-        model_meta_json[self.MODEL_CONTENT_HASH_VALUE] = hash_val_model_file
+        model_meta_json[TOTAL_CHUNKS_FIELD] = total_num_chunks
+        model_meta_json[MODEL_CONTENT_HASH_VALUE] = hash_val_model_file
+        model_meta_json[MODEL_GROUP_ID] = model_group_id
 
         if self._check_mandatory_field(model_meta_json):
             meta_output: Union[bool, Any] = self._client.transport.perform_request(
                 method="POST",
-                url=f"{ML_BASE_URI}/{self.META_API_ENDPOINT}",
+                url=f"{ML_BASE_URI}/{META_API_ENDPOINT}",
                 body=model_meta_json,
             )
             print(
@@ -152,30 +159,30 @@ def _check_mandatory_field(self, model_meta: dict) -> bool:
         """
 
         if model_meta:
-            if not model_meta.get(self.MODEL_NAME_FIELD):
-                raise ValueError(f"{self.MODEL_NAME_FIELD} can not be empty")
-            if not model_meta.get(self.MODEL_VERSION_FIELD):
-                raise ValueError(f"{self.MODEL_VERSION_FIELD} can not be empty")
-            if not model_meta.get(self.MODEL_FORMAT_FIELD):
-                raise ValueError(f"{self.MODEL_FORMAT_FIELD} can not be empty")
-            if not model_meta.get(self.MODEL_CONTENT_HASH_VALUE):
-                raise ValueError(f"{self.MODEL_CONTENT_HASH_VALUE} can not be empty")
-            if not model_meta.get(self.TOTAL_CHUNKS_FIELD):
-                raise ValueError(f"{self.TOTAL_CHUNKS_FIELD} can not be empty")
-            if not model_meta.get(self.MODEL_CONFIG_FIELD):
-                raise ValueError(f"{self.MODEL_CONFIG_FIELD} can not be empty")
+            if not model_meta.get(MODEL_NAME_FIELD):
+                raise ValueError(f"{MODEL_NAME_FIELD} can not be empty")
+            if not model_meta.get(MODEL_VERSION_FIELD):
+                raise ValueError(f"{MODEL_VERSION_FIELD} can not be empty")
+            if not model_meta.get(MODEL_FORMAT_FIELD):
+                raise ValueError(f"{MODEL_FORMAT_FIELD} can not be empty")
+            if not model_meta.get(MODEL_CONTENT_HASH_VALUE):
+                raise ValueError(f"{MODEL_CONTENT_HASH_VALUE} can not be empty")
+            if not model_meta.get(TOTAL_CHUNKS_FIELD):
+                raise ValueError(f"{TOTAL_CHUNKS_FIELD} can not be empty")
+            if not model_meta.get(MODEL_CONFIG_FIELD):
+                raise ValueError(f"{MODEL_CONFIG_FIELD} can not be empty")
             else:
-                if not isinstance(model_meta.get(self.MODEL_CONFIG_FIELD), dict):
+                if not isinstance(model_meta.get(MODEL_CONFIG_FIELD), dict):
                     raise TypeError(
-                        f"{self.MODEL_CONFIG_FIELD} is expecting to be an object"
+                        f"{MODEL_CONFIG_FIELD} is expecting to be an object"
                     )
-                model_config = model_meta.get(self.MODEL_CONFIG_FIELD)
-                if not model_config.get(self.MODEL_TYPE):
-                    raise ValueError(f"{self.MODEL_TYPE} can not be empty")
-                if not model_config.get(self.EMBEDDING_DIMENSION):
-                    raise ValueError(f"{self.EMBEDDING_DIMENSION} can not be empty")
-                if not model_config.get(self.FRAMEWORK_TYPE):
-                    raise ValueError(f"{self.FRAMEWORK_TYPE} can not be empty")
+                model_config = model_meta.get(MODEL_CONFIG_FIELD)
+                if not model_config.get(MODEL_TYPE):
+                    raise ValueError(f"{MODEL_TYPE} can not be empty")
+                if not model_config.get(EMBEDDING_DIMENSION):
+                    raise ValueError(f"{EMBEDDING_DIMENSION} can not be empty")
+                if not model_config.get(FRAMEWORK_TYPE):
+                    raise ValueError(f"{FRAMEWORK_TYPE} can not be empty")
             return True
         else:
             raise ValueError("Model metadata can't be empty")

@@ -318,7 +318,10 @@ def test_integration_model_train_register_full_cycle():
         raised = False
         try:
             ml_client.register_model(
-                MODEL_PATH, MODEL_CONFIG_FILE_PATH, deploy_model=True, isVerbose=True
+                model_path=MODEL_PATH,
+                model_config_path=MODEL_CONFIG_FILE_PATH,
+                deploy_model=True,
+                isVerbose=True,
             )
         except:  # noqa: E722
             raised = True
@@ -327,7 +330,10 @@ def test_integration_model_train_register_full_cycle():
         raised = False
         try:
             model_id = ml_client.register_model(
-                MODEL_PATH, MODEL_CONFIG_FILE_PATH, deploy_model=False, isVerbose=True
+                model_path=MODEL_PATH,
+                model_config_path=MODEL_CONFIG_FILE_PATH,
+                deploy_model=False,
+                isVerbose=True,
             )
             print("Model_id:", model_id)
         except:  # noqa: E722