diff --git a/runtimes/huggingface/mlserver_huggingface/runtime.py b/runtimes/huggingface/mlserver_huggingface/runtime.py index de1c25bf8..90ed9f398 100644 --- a/runtimes/huggingface/mlserver_huggingface/runtime.py +++ b/runtimes/huggingface/mlserver_huggingface/runtime.py @@ -19,6 +19,7 @@ from mlserver_huggingface.codecs import MultiStringRequestCodec from transformers.pipelines import SUPPORTED_TASKS from optimum.pipelines import SUPPORTED_TASKS as SUPPORTED_OPTIMUM_TASKS +from mlserver.logging import logger class HuggingFaceRuntime(MLModel): @@ -57,6 +58,12 @@ def __init__(self, settings: ModelSettings): ), ) + if settings.max_batch_size != self.hf_settings.batch_size: + logger.warning( + f"hf batch_size: {self.hf_settings.batch_size} is different " + f"from MLServer max_batch_size: {settings.max_batch_size}" + ) + super().__init__(settings) async def load(self) -> bool: