From 46ed6c091c9bc1c7b65dee70346c6da2af773589 Mon Sep 17 00:00:00 2001
From: Qing Lan <qingla@amazon.com>
Date: Fri, 19 May 2023 09:11:25 -0700
Subject: [PATCH] bug fixes (#732)

---
 engines/python/setup/djl_python/deepspeed.py   | 2 +-
 engines/python/setup/djl_python/huggingface.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/engines/python/setup/djl_python/deepspeed.py b/engines/python/setup/djl_python/deepspeed.py
index 53c619793..9f0dcdca1 100644
--- a/engines/python/setup/djl_python/deepspeed.py
+++ b/engines/python/setup/djl_python/deepspeed.py
@@ -298,7 +298,7 @@ def inference(self, inputs: Input):
                     return_tensors="pt").to(torch.cuda.current_device())
                 with torch.no_grad():
                     output_tokens = self.model.generate(
-                        input_id=tokenized_inputs.input_id,
+                        input_ids=tokenized_inputs.input_ids,
                         attention_mask=tokenized_inputs.attention_mask,
                         **model_kwargs)
                 generated_text = self.tokenizer.batch_decode(
diff --git a/engines/python/setup/djl_python/huggingface.py b/engines/python/setup/djl_python/huggingface.py
index 120bc9574..7493898de 100644
--- a/engines/python/setup/djl_python/huggingface.py
+++ b/engines/python/setup/djl_python/huggingface.py
@@ -241,7 +241,7 @@ def wrapped_pipeline(inputs, *args, **kwargs):
             with torch.no_grad():
                 output_tokens = model.generate(
                     *args,
-                    input_id=input_tokens.input_id,
+                    input_ids=input_tokens.input_ids,
                     attention_mask=input_tokens.attention_mask,
                     **kwargs)
             generated_text = tokenizer.batch_decode(output_tokens,