Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sherif akoush/quickfix/get names from metadata endpoint #393

3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,6 @@ mlruns

# Sphinx documentation
docs/_build/

# alibi .data
runtimes/alibi-explain/tests/.data/**
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ ENV MLSERVER_MODELS_DIR=/mnt/models \

RUN apt-get update && \
apt-get -y --no-install-recommends install \
libgomp1 libgl1-mesa-dev libglib2.0-0
libgomp1 libgl1-mesa-dev libglib2.0-0 build-essential

RUN mkdir /opt/mlserver
WORKDIR /opt/mlserver
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
author = "Seldon Technologies"

# The full version, including alpha/beta/rc tags
release = "0.6.0.dev3"
release = "0.6.0.dev4"


# -- General configuration ---------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion mlserver/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.6.0.dev3"
__version__ = "0.6.0.dev4"
2 changes: 1 addition & 1 deletion runtimes/alibi-detect/mlserver_alibi_detect/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.6.0.dev3"
__version__ = "0.6.0.dev4"
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class ExplainerDependencyReference:
_ANCHOR_IMAGE_TAG = "anchor_image"
_ANCHOR_TEXT_TAG = "anchor_text"
_ANCHOR_TABULAR_TAG = "anchor_tabular"
_KERNEL_SHAP_TAG = "kernel_shap"
_INTEGRATED_GRADIENTS_TAG = "integrated_gradients"


Expand All @@ -23,6 +24,10 @@ class ExplainerDependencyReference:
# and the specific alibi explain class.
# this can be simplified when alibi moves to a config based init.

# Steps:
# update _TAG_TO_RT_IMPL
# update ExplainerEnum

_BLACKBOX_MODULDE = "mlserver_alibi_explain.explainers.black_box_runtime"
_INTEGRATED_GRADIENTS_MODULE = "mlserver_alibi_explain.explainers.integrated_gradients"

Expand All @@ -42,6 +47,11 @@ class ExplainerDependencyReference:
runtime_class=f"{_BLACKBOX_MODULDE}.AlibiExplainBlackBoxRuntime",
alibi_class="alibi.explainers.AnchorText",
),
_KERNEL_SHAP_TAG: ExplainerDependencyReference(
explainer_name=_KERNEL_SHAP_TAG,
runtime_class=f"{_BLACKBOX_MODULDE}.AlibiExplainBlackBoxRuntime",
alibi_class="alibi.explainers.KernelShap",
),
_INTEGRATED_GRADIENTS_TAG: ExplainerDependencyReference(
explainer_name=_INTEGRATED_GRADIENTS_TAG,
runtime_class=f"{_INTEGRATED_GRADIENTS_MODULE}.IntegratedGradientsWrapper",
Expand All @@ -54,6 +64,7 @@ class ExplainerEnum(str, Enum):
anchor_image = _ANCHOR_IMAGE_TAG
anchor_text = _ANCHOR_TEXT_TAG
anchor_tabular = _ANCHOR_TABULAR_TAG
kernel_shap = _KERNEL_SHAP_TAG
integrated_gradients = _INTEGRATED_GRADIENTS_TAG


Expand Down
55 changes: 52 additions & 3 deletions runtimes/alibi-explain/mlserver_alibi_explain/common.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import asyncio
import contextvars
import functools
import re
from asyncio import AbstractEventLoop
from importlib import import_module
from typing import Any, Optional, Type, Callable, Awaitable, Union, List
Expand All @@ -16,7 +17,11 @@
InferenceResponse,
InferenceRequest,
Parameters,
MetadataModelResponse,
)
from mlserver.utils import generate_uuid

_DEFAULT_INPUT_NAME = "predict"

EXPLAINER_TYPE_TAG = "explainer_type"

Expand All @@ -41,7 +46,7 @@ def convert_from_bytes(output: ResponseOutput, ty: Optional[Type]) -> Any:
return literal_eval(py_str)


# TODO: add retry
# TODO: add retry and better exceptions handling
def remote_predict(
v2_payload: InferenceRequest, predictor_url: str
) -> InferenceResponse:
Expand All @@ -51,6 +56,19 @@ def remote_predict(
return InferenceResponse.parse_raw(response_raw.text)


def remote_metadata(url: str) -> MetadataModelResponse:
"""Get metadata from v2 endpoint"""
response_raw = requests.get(url)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not that it matters much, but should we make this call asynchronous?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thats a good point. having a look at the caller load, it has also load_explainer that we need to do async so that the entire function (i.e load) is truly async.

I am tempted however to leave it as a separate ticket?

if response_raw.status_code != 200:
raise RemoteInferenceError(response_raw.status_code, response_raw.reason)
return MetadataModelResponse.parse_raw(response_raw.text)


def construct_metadata_url(infer_url: str) -> str:
"""Construct v2 metadata endpoint from v2 infer endpoint"""
return re.sub(r"/infer$", "", infer_url)


# TODO: this is very similar to `asyncio.to_thread` (python 3.9+),
# so lets use it at some point.
def execute_async(
Expand Down Expand Up @@ -82,17 +100,48 @@ def import_and_get_class(class_path: str) -> type:
return klass


def to_v2_inference_request(input_data: Union[np.ndarray, List[str]]):
def to_v2_inference_request(
input_data: Union[np.ndarray, List[str]],
metadata: Optional[MetadataModelResponse],
) -> InferenceRequest:
"""
Encode numpy payload to v2 protocol.
Note: We only fetch the first-input name and the list of outputs from the metadata
endpoint currently. We should consider wider reconciliation with data types etc.
Parameters
----------
input_data
Numpy ndarray to encode
metadata
Extra metadata that can help encode the payload.
"""

# MLServer does not really care about a correct input name!
input_name = _DEFAULT_INPUT_NAME
id_name = generate_uuid()
outputs = []

if metadata is not None:
if metadata.inputs:
# we only support a big single input numpy
input_name = metadata.inputs[0].name
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we also need to pass on the datatype, shape and parameters from the metadata?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the type and shape is inferred from the payload. I think we can add a warning it they dont match? I didnt want to get into converting the payload according to the metadata as part of this ticket?

if metadata.outputs:
outputs = metadata.outputs

# For List[str] (e.g. AnchorText), we use StringCodec for input
input_payload_codec = StringCodec if type(input_data) == list else NumpyCodec
v2_request = InferenceRequest(
id=id_name,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not 100% sure, but I think the executor will set this to a unique value. Therefore, should we leave the id field empty?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure about SC, maybe this is the case. However locally it will not work with a None id (for triron case).

parameters=Parameters(content_type=input_payload_codec.ContentType),
# TODO: we probably need to tell alibi about the expected types to use
# or even whether it is a probability of classes or targets etc
inputs=[
input_payload_codec.encode_request_input( # type: ignore
name="predict", payload=input_data
name=input_name, payload=input_data
)
],
outputs=outputs,
)
return v2_request
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
from typing import Type, Any, Dict, List, Union
from typing import Type, Any, Dict, List, Union, Optional

import numpy as np
from alibi.api.interfaces import Explanation, Explainer

from mlserver import ModelSettings
from mlserver.codecs import NumpyCodec
from mlserver.types import MetadataModelResponse
from mlserver_alibi_explain.common import (
AlibiExplainSettings,
remote_predict,
to_v2_inference_request,
remote_metadata,
construct_metadata_url,
)
from mlserver_alibi_explain.runtime import AlibiExplainRuntimeBase

Expand All @@ -29,11 +32,15 @@ def __init__(self, settings: ModelSettings, explainer_class: Type[Explainer]):
explainer_settings = AlibiExplainSettings(**extra) # type: ignore

self.infer_uri = explainer_settings.infer_uri
self.infer_metadata: Optional[MetadataModelResponse] = None

# TODO: validate the settings are ok with this specific explainer
super().__init__(settings, explainer_settings)

async def load(self) -> bool:
# get the metadata of the underlying inference model via v2 metadata endpoint
self.infer_metadata = remote_metadata(construct_metadata_url(self.infer_uri))

# TODO: use init explainer field instead?
if self.alibi_explain_settings.init_parameters is not None:
init_parameters = self.alibi_explain_settings.init_parameters
Expand All @@ -59,8 +66,7 @@ def _infer_impl(self, input_data: Union[np.ndarray, List[str]]) -> np.ndarray:
# in the case of AnchorText, we have a list of strings instead though.
# TODO: for now we only support v2 protocol, do we need more support?

v2_request = to_v2_inference_request(input_data)

v2_request = to_v2_inference_request(input_data, self.infer_metadata)
v2_response = remote_predict(
v2_payload=v2_request, predictor_url=self.infer_uri
)
Expand Down
2 changes: 1 addition & 1 deletion runtimes/alibi-explain/mlserver_alibi_explain/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.6.0.dev3"
__version__ = "0.6.0.dev4"
6 changes: 1 addition & 5 deletions runtimes/alibi-explain/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,7 @@ def _load_description() -> str:
packages=find_packages(exclude=["tests", "tests.*"]),
install_requires=[
"mlserver",
"alibi",
# Pin TF to avoid previous issues with 2.6.0 and 2.6.1.
# This should be removed when we move to a new version of alibi that
# would deal internally with TF versions
"tensorflow==2.6.2",
"alibi[shap]",
],
long_description=_load_description(),
long_description_content_type="text/markdown",
Expand Down
77 changes: 42 additions & 35 deletions runtimes/alibi-explain/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from mlserver.repository import ModelRepository
from mlserver.rest import RESTServer
from mlserver.settings import ModelSettings, ModelParameters, Settings
from mlserver.types import MetadataModelResponse
from mlserver_alibi_explain.common import AlibiExplainSettings
from mlserver_alibi_explain.runtime import AlibiExplainRuntime
from helpers.tf_model import TFMNISTModel, get_tf_mnist_model_uri
Expand All @@ -29,7 +30,7 @@
nest_asyncio.apply()

TESTS_PATH = Path(os.path.dirname(__file__))
_ANCHOR_IMAGE_DIR = TESTS_PATH / "data" / "mnist_anchor_image"
_ANCHOR_IMAGE_DIR = TESTS_PATH / ".data" / "mnist_anchor_image"


# TODO: how to make this in utils?
Expand Down Expand Up @@ -136,45 +137,51 @@ async def anchor_image_runtime_with_remote_predict_patch(
anchor_image_directory,
custom_runtime_tf: MLModel,
remote_predict_mock_path: str = "mlserver_alibi_explain.common.remote_predict",
remote_metadata_mock_path: str = "mlserver_alibi_explain.common.remote_metadata",
) -> AlibiExplainRuntime:
with patch(remote_predict_mock_path) as remote_predict:

def mock_predict(*args, **kwargs):
# note: sometimes the event loop is not running and in this case
# we create a new one otherwise
# we use the existing one.
# this mock implementation is required as we dont want to spin up a server,
# we just use MLModel.predict
try:
loop = asyncio.get_event_loop()
res = loop.run_until_complete(
custom_runtime_tf.predict(kwargs["v2_payload"])
)
return res
except Exception:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
res = loop.run_until_complete(
custom_runtime_tf.predict(kwargs["v2_payload"])
)
return res

remote_predict.side_effect = mock_predict

rt = AlibiExplainRuntime(
ModelSettings(
parallel_workers=0,
parameters=ModelParameters(
uri=str(anchor_image_directory),
extra=AlibiExplainSettings(
explainer_type="anchor_image", infer_uri="dummy_call"
with patch(remote_metadata_mock_path) as remote_metadata:

def mock_metadata(*args, **kwargs):
return MetadataModelResponse(name="dummy", platform="dummy")

def mock_predict(*args, **kwargs):
# note: sometimes the event loop is not running and in this case
# we create a new one otherwise
# we use the existing one.
# mock implementation is required as we dont want to spin up a server,
# we just use MLModel.predict
try:
loop = asyncio.get_event_loop()
res = loop.run_until_complete(
custom_runtime_tf.predict(kwargs["v2_payload"])
)
return res
except Exception:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
res = loop.run_until_complete(
custom_runtime_tf.predict(kwargs["v2_payload"])
)
return res

remote_predict.side_effect = mock_predict
remote_metadata.side_effect = mock_metadata

rt = AlibiExplainRuntime(
ModelSettings(
parallel_workers=0,
parameters=ModelParameters(
uri=str(anchor_image_directory),
extra=AlibiExplainSettings(
explainer_type="anchor_image", infer_uri="dummy_call"
),
),
),
)
)
)
await rt.load()
await rt.load()

return rt
return rt


@pytest.fixture
Expand Down
2 changes: 1 addition & 1 deletion runtimes/alibi-explain/tests/helpers/tf_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from mlserver.types import InferenceRequest, InferenceResponse


_MODEL_PATH = Path(os.path.dirname(__file__)).parent / "data" / "tf_mnist" / "model.h5"
_MODEL_PATH = Path(os.path.dirname(__file__)).parent / ".data" / "tf_mnist" / "model.h5"


def get_tf_mnist_model_uri() -> Path:
Expand Down
Loading