Set sharded to false for tp 1

deepjavalibrary · Jun 28, 2023 · 4829afd · 4829afd
1 parent bfb5466
commit 4829afd
Showing 1 changed file with 3 additions and 2 deletions.
diff --git a/engines/python/setup/djl_python/rolling_batch/lmi_dist_rolling_batch.py b/engines/python/setup/djl_python/rolling_batch/lmi_dist_rolling_batch.py
@@ -28,7 +28,6 @@
 )
 
 import torch
-import logging
 
 ARCHITECTURE_2_BATCH_CLS = {
     "RWForCausalLM": FlashCausalLMBatch,
@@ -57,6 +56,7 @@ def __init__(self, model_id_or_path, device, properties, **kwargs):
         """
 
         super().__init__(device)
+        self.properties = properties
         self.batch_cls = None
         self._init_model(kwargs, model_id_or_path)
         self.batch_id_counter = 0
@@ -66,9 +66,10 @@ def _init_model(self, kwargs, model_id_or_path):
         self.config = AutoConfig.from_pretrained(model_id_or_path,
                                                  **kwargs)
         self.batch_cls = get_batch_cls_from_architecture(self.config.architectures[0])
+        sharded = int(self.properties.get("tensor_parallel_degree", "-1")) > 1
         self.model = get_model(model_id_or_path,
                                revision=None,
-                               sharded=True,
+                               sharded=sharded,
                                quantize=None,
                                trust_remote_code=kwargs.get("trust_remote_code"))