From 46ed6c091c9bc1c7b65dee70346c6da2af773589 Mon Sep 17 00:00:00 2001 From: Qing Lan Date: Fri, 19 May 2023 09:11:25 -0700 Subject: [PATCH] bug fixes (#732) --- engines/python/setup/djl_python/deepspeed.py | 2 +- engines/python/setup/djl_python/huggingface.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/engines/python/setup/djl_python/deepspeed.py b/engines/python/setup/djl_python/deepspeed.py index 53c619793..9f0dcdca1 100644 --- a/engines/python/setup/djl_python/deepspeed.py +++ b/engines/python/setup/djl_python/deepspeed.py @@ -298,7 +298,7 @@ def inference(self, inputs: Input): return_tensors="pt").to(torch.cuda.current_device()) with torch.no_grad(): output_tokens = self.model.generate( - input_id=tokenized_inputs.input_id, + input_ids=tokenized_inputs.input_ids, attention_mask=tokenized_inputs.attention_mask, **model_kwargs) generated_text = self.tokenizer.batch_decode( diff --git a/engines/python/setup/djl_python/huggingface.py b/engines/python/setup/djl_python/huggingface.py index 120bc9574..7493898de 100644 --- a/engines/python/setup/djl_python/huggingface.py +++ b/engines/python/setup/djl_python/huggingface.py @@ -241,7 +241,7 @@ def wrapped_pipeline(inputs, *args, **kwargs): with torch.no_grad(): output_tokens = model.generate( *args, - input_id=input_tokens.input_id, + input_ids=input_tokens.input_ids, attention_mask=input_tokens.attention_mask, **kwargs) generated_text = tokenizer.batch_decode(output_tokens,