oumi-ai · nikg4 · Jan 25, 2025 · Jan 25, 2025 · Jan 25, 2025
diff --git a/configs/recipes/vision/llama3_2_vision/evaluation/11b_eval.yaml b/configs/recipes/vision/llama3_2_vision/evaluation/11b_eval.yaml
@@ -7,6 +7,7 @@ model:
   model_max_length: 1024
   torch_dtype_str: "bfloat16"
   attn_implementation: "sdpa"
+  shard_for_eval: True
 
 generation:
   batch_size: 1

diff --git a/configs/recipes/vision/llama3_2_vision/evaluation/11b_gcp_job.yaml b/configs/recipes/vision/llama3_2_vision/evaluation/11b_gcp_job.yaml
@@ -8,7 +8,7 @@ name: llama32v-11b-eval
 
 resources:
   cloud: gcp
-  accelerators: "A100:1"
+  accelerators: "A100:4"
   use_spot: false
   disk_size: 1000 # Disk size in GBs
 

diff --git a/configs/recipes/vision/qwen2_vl_2b/evaluation/eval.yaml b/configs/recipes/vision/qwen2_vl_2b/evaluation/eval.yaml
@@ -9,6 +9,7 @@ model:
   attn_implementation: "sdpa"
   load_pretrained_weights: True
   trust_remote_code: True
+  shard_for_eval: True
 
 generation:
   batch_size: 2

diff --git a/src/oumi/core/configs/params/model_params.py b/src/oumi/core/configs/params/model_params.py
@@ -8,7 +8,6 @@
 
 from oumi.core.configs.params.base_params import BaseParams
 from oumi.core.types.exceptions import HardwareException
-from oumi.utils.distributed_utils import is_using_accelerate
 from oumi.utils.logging import logger
 from oumi.utils.torch_utils import get_torch_dtype
 
@@ -162,10 +161,6 @@ class ModelParams(BaseParams):
 
     This is needed for large models that do not fit on a single GPU.
     It is used as the value for the `parallelize` argument in LM Harness.
-
-    If this is enabled, the eval job must be kicked off with `python` as opposed to
-    `accelerate launch`, as described here:
-    https://github.com/EleutherAI/lm-evaluation-harness?tab=readme-ov-file#multi-gpu-evaluation-with-hugging-face-accelerate
     """
 
     freeze_layers: list[str] = field(default_factory=list)
@@ -258,11 +253,5 @@ def __finalize_and_validate__(self):
                 "consider installing it: pip install -U flash-attn --no-build-isolation"
             )
 
-        if self.shard_for_eval and is_using_accelerate():
-            raise ValueError(
-                "Sharded-model evaluations with LM Harness should be invoked with "
-                "`python`, not `accelerate launch`."
-            )
-
         if self.model_max_length is not None and self.model_max_length <= 0:
             raise ValueError("model_max_length must be a positive integer or None.")