Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adding documentation for model group id #176

Merged
merged 5 commits into from
Jun 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# CHANGELOG
Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)

## [1.1.0]

### Added

- adding documentation for model group id @dhrubo-os ([#176](https://github.com/opensearch-project/opensearch-py-ml/pull/176))

## [1.0.0]

### Added
Expand Down
1 change: 1 addition & 0 deletions docs/requirements-docs.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ sphinx-rtd-theme
sphinx_rtd_theme
nbsphinx
pandoc
deprecated
# using in SentenceTransformerModel
torch
pyyaml
Expand Down
230 changes: 166 additions & 64 deletions docs/source/examples/demo_ml_commons_integration.ipynb

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion opensearch_py_ml/ml_commons/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,7 @@
# Integrating MLCommons plugin

from opensearch_py_ml.ml_commons.ml_commons_client import MLCommonClient
from opensearch_py_ml.ml_commons.model_execute import ModelExecute
from opensearch_py_ml.ml_commons.model_uploader import ModelUploader

__all__ = ["MLCommonClient"]
__all__ = ["MLCommonClient", "ModelExecute", "ModelUploader"]
11 changes: 11 additions & 0 deletions opensearch_py_ml/ml_commons/ml_common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,14 @@
MODEL_MAX_SIZE = 4_000_000_000
BUF_SIZE = 65536 # lets read stuff in 64kb chunks!
TIMEOUT = 120 # timeout for synchronous method calls in seconds
META_API_ENDPOINT = "models/meta"
MODEL_NAME_FIELD = "name"
MODEL_VERSION_FIELD = "version"
MODEL_FORMAT_FIELD = "model_format"
TOTAL_CHUNKS_FIELD = "total_chunks"
MODEL_CONFIG_FIELD = "model_config"
MODEL_TYPE = "model_type"
EMBEDDING_DIMENSION = "embedding_dimension"
FRAMEWORK_TYPE = "framework_type"
MODEL_CONTENT_HASH_VALUE = "model_content_hash_value"
MODEL_GROUP_ID = "model_group_id"
28 changes: 22 additions & 6 deletions opensearch_py_ml/ml_commons/ml_commons_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,14 @@
from deprecated.sphinx import deprecated
from opensearchpy import OpenSearch

from opensearch_py_ml.ml_commons.ml_common_utils import ML_BASE_URI, TIMEOUT
from opensearch_py_ml.ml_commons.ml_common_utils import (
ML_BASE_URI,
MODEL_FORMAT_FIELD,
MODEL_GROUP_ID,
MODEL_NAME_FIELD,
MODEL_VERSION_FIELD,
TIMEOUT,
)
from opensearch_py_ml.ml_commons.model_execute import ModelExecute
from opensearch_py_ml.ml_commons.model_uploader import ModelUploader

Expand Down Expand Up @@ -101,6 +108,7 @@ def register_model(
self,
model_path: str,
model_config_path: str,
model_group_id: str = "",
isVerbose: bool = False,
deploy_model: bool = True,
wait_until_deployed: bool = True,
Expand Down Expand Up @@ -128,6 +136,8 @@ def register_model(
refer to:
https://opensearch.org/docs/latest/ml-commons-plugin/model-serving-framework/#upload-model-to-opensearch
:type model_config_path: string
:param model_group_id: Model group id
:type model_group_id: string
:param isVerbose: if isVerbose is true method will print more messages. default False
:type isVerbose: boolean
:param deploy_model: Whether to deploy the model using uploaded model chunks
Expand All @@ -138,7 +148,7 @@ def register_model(
:rtype: string
"""
model_id = self._model_uploader._register_model(
model_path, model_config_path, isVerbose
model_path, model_config_path, model_group_id, isVerbose
)

# loading the model chunks from model index
Expand Down Expand Up @@ -179,7 +189,7 @@ def upload_pretrained_model(
"""
# creating model meta doc
model_config_json = {
"name": model_name,
MODEL_NAME_FIELD: model_name,
"version": model_version,
"model_format": model_format,
}
Expand All @@ -196,6 +206,7 @@ def register_pretrained_model(
model_name: str,
model_version: str,
model_format: str,
model_group_id: str = "",
deploy_model: bool = True,
wait_until_deployed: bool = True,
):
Expand All @@ -210,6 +221,8 @@ def register_pretrained_model(
:type model_version: string
:param model_format: "TORCH_SCRIPT" or "ONNX"
:type model_format: string
:param model_group_id: Model group id
:type model_group_id: string
:param deploy_model: Whether to deploy the model using uploaded model chunks
:type deploy_model: bool
:param wait_until_deployed: If deploy_model is true, whether to wait until the model is deployed
Expand All @@ -219,12 +232,15 @@ def register_pretrained_model(
"""
# creating model meta doc
model_config_json = {
"name": model_name,
"version": model_version,
"model_format": model_format,
MODEL_NAME_FIELD: model_name,
MODEL_VERSION_FIELD: model_version,
MODEL_FORMAT_FIELD: model_format,
MODEL_GROUP_ID: model_group_id,
}
model_id = self._send_model_info(model_config_json)

print(model_id)

# loading the model chunks from model index
if deploy_model:
self.deploy_model(model_id, wait_until_deployed=wait_until_deployed)
Expand Down
79 changes: 43 additions & 36 deletions opensearch_py_ml/ml_commons/model_uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,20 @@

from opensearch_py_ml.ml_commons.ml_common_utils import (
BUF_SIZE,
EMBEDDING_DIMENSION,
FRAMEWORK_TYPE,
META_API_ENDPOINT,
ML_BASE_URI,
MODEL_CHUNK_MAX_SIZE,
MODEL_CONFIG_FIELD,
MODEL_CONTENT_HASH_VALUE,
MODEL_FORMAT_FIELD,
MODEL_GROUP_ID,
MODEL_MAX_SIZE,
MODEL_NAME_FIELD,
MODEL_TYPE,
MODEL_VERSION_FIELD,
TOTAL_CHUNKS_FIELD,
)


Expand All @@ -26,22 +37,15 @@ class ModelUploader:
Class for registering a model using ml-commons apis in opensearch cluster.
"""

META_API_ENDPOINT = "models/meta"
MODEL_NAME_FIELD = "name"
MODEL_VERSION_FIELD = "version"
MODEL_FORMAT_FIELD = "model_format"
TOTAL_CHUNKS_FIELD = "total_chunks"
MODEL_CONFIG_FIELD = "model_config"
MODEL_TYPE = "model_type"
EMBEDDING_DIMENSION = "embedding_dimension"
FRAMEWORK_TYPE = "framework_type"
MODEL_CONTENT_HASH_VALUE = "model_content_hash_value"

def __init__(self, os_client: OpenSearch):
self._client = os_client

def _register_model(
self, model_path: str, model_meta_path: str, isVerbose: bool
self,
model_path: str,
model_meta_path: str,
model_group_id: str = "",
isVerbose: bool = False,
) -> str:
"""
This method registers the model in the opensearch cluster using ml-common plugin's register model api.
Expand All @@ -67,6 +71,8 @@ def _register_model(
refer to:
https://opensearch.org/docs/latest/ml-commons-plugin/model-serving-framework/#upload-model-to-opensearch
:type model_meta_path: string
:param model_group_id: Model group id
:type model_group_id: string
:param isVerbose: if isVerbose is true method will print more messages
:type isVerbose: bool
:return: returns model id which is created by the model metadata
Expand All @@ -89,13 +95,14 @@ def _register_model(
model_meta_json: dict[str, Union[str, dict[str, str]]] = json.load(
model_meta_json_file
)
model_meta_json[self.TOTAL_CHUNKS_FIELD] = total_num_chunks
model_meta_json[self.MODEL_CONTENT_HASH_VALUE] = hash_val_model_file
model_meta_json[TOTAL_CHUNKS_FIELD] = total_num_chunks
model_meta_json[MODEL_CONTENT_HASH_VALUE] = hash_val_model_file
model_meta_json[MODEL_GROUP_ID] = model_group_id

if self._check_mandatory_field(model_meta_json):
meta_output: Union[bool, Any] = self._client.transport.perform_request(
method="POST",
url=f"{ML_BASE_URI}/{self.META_API_ENDPOINT}",
url=f"{ML_BASE_URI}/{META_API_ENDPOINT}",
body=model_meta_json,
)
print(
Expand Down Expand Up @@ -152,30 +159,30 @@ def _check_mandatory_field(self, model_meta: dict) -> bool:
"""

if model_meta:
if not model_meta.get(self.MODEL_NAME_FIELD):
raise ValueError(f"{self.MODEL_NAME_FIELD} can not be empty")
if not model_meta.get(self.MODEL_VERSION_FIELD):
raise ValueError(f"{self.MODEL_VERSION_FIELD} can not be empty")
if not model_meta.get(self.MODEL_FORMAT_FIELD):
raise ValueError(f"{self.MODEL_FORMAT_FIELD} can not be empty")
if not model_meta.get(self.MODEL_CONTENT_HASH_VALUE):
raise ValueError(f"{self.MODEL_CONTENT_HASH_VALUE} can not be empty")
if not model_meta.get(self.TOTAL_CHUNKS_FIELD):
raise ValueError(f"{self.TOTAL_CHUNKS_FIELD} can not be empty")
if not model_meta.get(self.MODEL_CONFIG_FIELD):
raise ValueError(f"{self.MODEL_CONFIG_FIELD} can not be empty")
if not model_meta.get(MODEL_NAME_FIELD):
raise ValueError(f"{MODEL_NAME_FIELD} can not be empty")
if not model_meta.get(MODEL_VERSION_FIELD):
raise ValueError(f"{MODEL_VERSION_FIELD} can not be empty")
if not model_meta.get(MODEL_FORMAT_FIELD):
raise ValueError(f"{MODEL_FORMAT_FIELD} can not be empty")
if not model_meta.get(MODEL_CONTENT_HASH_VALUE):
raise ValueError(f"{MODEL_CONTENT_HASH_VALUE} can not be empty")
if not model_meta.get(TOTAL_CHUNKS_FIELD):
raise ValueError(f"{TOTAL_CHUNKS_FIELD} can not be empty")
if not model_meta.get(MODEL_CONFIG_FIELD):
raise ValueError(f"{MODEL_CONFIG_FIELD} can not be empty")
else:
if not isinstance(model_meta.get(self.MODEL_CONFIG_FIELD), dict):
if not isinstance(model_meta.get(MODEL_CONFIG_FIELD), dict):
raise TypeError(
f"{self.MODEL_CONFIG_FIELD} is expecting to be an object"
f"{MODEL_CONFIG_FIELD} is expecting to be an object"
)
model_config = model_meta.get(self.MODEL_CONFIG_FIELD)
if not model_config.get(self.MODEL_TYPE):
raise ValueError(f"{self.MODEL_TYPE} can not be empty")
if not model_config.get(self.EMBEDDING_DIMENSION):
raise ValueError(f"{self.EMBEDDING_DIMENSION} can not be empty")
if not model_config.get(self.FRAMEWORK_TYPE):
raise ValueError(f"{self.FRAMEWORK_TYPE} can not be empty")
model_config = model_meta.get(MODEL_CONFIG_FIELD)
if not model_config.get(MODEL_TYPE):
raise ValueError(f"{MODEL_TYPE} can not be empty")
if not model_config.get(EMBEDDING_DIMENSION):
raise ValueError(f"{EMBEDDING_DIMENSION} can not be empty")
if not model_config.get(FRAMEWORK_TYPE):
raise ValueError(f"{FRAMEWORK_TYPE} can not be empty")
return True
else:
raise ValueError("Model metadata can't be empty")
Expand Down
10 changes: 8 additions & 2 deletions tests/ml_commons/test_ml_commons_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,10 @@ def test_integration_model_train_register_full_cycle():
raised = False
try:
ml_client.register_model(
MODEL_PATH, MODEL_CONFIG_FILE_PATH, deploy_model=True, isVerbose=True
model_path=MODEL_PATH,
model_config_path=MODEL_CONFIG_FILE_PATH,
deploy_model=True,
isVerbose=True,
)
except: # noqa: E722
raised = True
Expand All @@ -327,7 +330,10 @@ def test_integration_model_train_register_full_cycle():
raised = False
try:
model_id = ml_client.register_model(
MODEL_PATH, MODEL_CONFIG_FILE_PATH, deploy_model=False, isVerbose=True
model_path=MODEL_PATH,
model_config_path=MODEL_CONFIG_FILE_PATH,
deploy_model=False,
isVerbose=True,
)
print("Model_id:", model_id)
except: # noqa: E722
Expand Down