From 8fea140a5f8ac595b10f3aafe79f70d77763667f Mon Sep 17 00:00:00 2001 From: Adrian Gonzalez-Martin Date: Tue, 4 Jul 2023 10:38:10 +0100 Subject: [PATCH] Rename HF codec to `hf` (#1268) --- runtimes/huggingface/README.md | 10 ++++++++ .../mlserver_huggingface/codecs/base.py | 4 ++-- .../mlserver_huggingface/runtime.py | 2 +- .../tests/test_codecs/test_base.py | 24 +++++++++---------- 4 files changed, 25 insertions(+), 15 deletions(-) diff --git a/runtimes/huggingface/README.md b/runtimes/huggingface/README.md index dfd0bdbfb..499cb80ff 100644 --- a/runtimes/huggingface/README.md +++ b/runtimes/huggingface/README.md @@ -13,6 +13,16 @@ pip install mlserver mlserver-huggingface For further information on how to use MLServer with HuggingFace, you can check out this [worked out example](../../docs/examples/huggingface/README.md). +## Content Types + +The HuggingFace runtime will always decode the input request using its own +built-in codec. +Therefore, [content type annotations](../../docs/user-guide/content-type) at +the request level will **be ignored**. +Not that this **doesn't include [input-level content +type](../../docs/user-guide/content-type#Codecs) annotations**, which will be +respected as usual. + ## Settings The HuggingFace runtime exposes a couple extra parameters which can be used to diff --git a/runtimes/huggingface/mlserver_huggingface/codecs/base.py b/runtimes/huggingface/mlserver_huggingface/codecs/base.py index 4885101f6..4fdfcbb08 100644 --- a/runtimes/huggingface/mlserver_huggingface/codecs/base.py +++ b/runtimes/huggingface/mlserver_huggingface/codecs/base.py @@ -44,7 +44,7 @@ class MultiInputRequestCodec(RequestCodec): DefaultCodec: Type["InputCodecTy"] = StringCodec InputCodecsWithPriority: List[Type[InputCodecTy]] = [] - ContentType = StringCodec.ContentType + ContentType = "" @classmethod def _find_encode_codecs( @@ -194,5 +194,5 @@ class HuggingfaceRequestCodec(MultiInputRequestCodec): NumpyListCodec, RawCodec, ] - ContentType = StringCodec.ContentType + ContentType = "hf" DefaultCodec = StringCodec diff --git a/runtimes/huggingface/mlserver_huggingface/runtime.py b/runtimes/huggingface/mlserver_huggingface/runtime.py index 45d4ecf08..501181af8 100644 --- a/runtimes/huggingface/mlserver_huggingface/runtime.py +++ b/runtimes/huggingface/mlserver_huggingface/runtime.py @@ -38,7 +38,7 @@ async def load(self) -> bool: async def predict(self, payload: InferenceRequest) -> InferenceResponse: # TODO: convert and validate? - kwargs = self.decode_request(payload, default_codec=HuggingfaceRequestCodec) + kwargs = HuggingfaceRequestCodec.decode_request(payload) args = kwargs.pop("args", []) array_inputs = kwargs.pop("array_inputs", []) diff --git a/runtimes/huggingface/tests/test_codecs/test_base.py b/runtimes/huggingface/tests/test_codecs/test_base.py index 70ca235f5..3ade8c4d8 100644 --- a/runtimes/huggingface/tests/test_codecs/test_base.py +++ b/runtimes/huggingface/tests/test_codecs/test_base.py @@ -26,7 +26,7 @@ {"foo": ["bar1", "bar2"], "foo2": ["var1"]}, False, InferenceRequest( - parameters=Parameters(content_type="str"), + parameters=Parameters(content_type="hf"), inputs=[ RequestInput( name="foo", @@ -49,7 +49,7 @@ {"foo": ["bar1", "bar2"], "foo2": ["var1"]}, True, InferenceRequest( - parameters=Parameters(content_type="str"), + parameters=Parameters(content_type="hf"), inputs=[ RequestInput( name="foo", @@ -77,7 +77,7 @@ }, False, InferenceRequest( - parameters=Parameters(content_type="str"), + parameters=Parameters(content_type="hf"), inputs=[ RequestInput( name="images", @@ -98,7 +98,7 @@ }, True, InferenceRequest( - parameters=Parameters(content_type="str"), + parameters=Parameters(content_type="hf"), inputs=[ RequestInput( name="images", @@ -128,7 +128,7 @@ }, True, InferenceRequest( - parameters=Parameters(content_type="str"), + parameters=Parameters(content_type="hf"), inputs=[ RequestInput( name="conversations", @@ -158,7 +158,7 @@ }, False, InferenceRequest( - parameters=Parameters(content_type="str"), + parameters=Parameters(content_type="hf"), inputs=[ RequestInput( name="conversations", @@ -182,7 +182,7 @@ }, False, InferenceRequest( - parameters=Parameters(content_type="str"), + parameters=Parameters(content_type="hf"), inputs=[ RequestInput( name="singlejson", @@ -207,7 +207,7 @@ }, True, InferenceRequest( - parameters=Parameters(content_type="str"), + parameters=Parameters(content_type="hf"), inputs=[ RequestInput( name="singlejson", @@ -241,7 +241,7 @@ }, True, InferenceRequest( - parameters=Parameters(content_type="str"), + parameters=Parameters(content_type="hf"), inputs=[ RequestInput( name="jsonlist", @@ -277,7 +277,7 @@ }, False, InferenceRequest( - parameters=Parameters(content_type="str"), + parameters=Parameters(content_type="hf"), inputs=[ RequestInput( name="jsonlist", @@ -299,7 +299,7 @@ {"nplist": [np.int8([[2, 2], [2, 2]]), np.float64([[2, 2], [2, 2]])]}, False, InferenceRequest( - parameters=Parameters(content_type="str"), + parameters=Parameters(content_type="hf"), inputs=[ RequestInput( name="nplist", @@ -319,7 +319,7 @@ }, False, InferenceRequest( - parameters=Parameters(content_type="str"), + parameters=Parameters(content_type="hf"), inputs=[ RequestInput( name="raw_int",