Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@
# 2. once modified, run: `make deps_table_update` to update src/transformers/dependency_versions_table.py
_deps = [
"Pillow>=10.0.1,<=15.0",
"accelerate>=0.21.0",
"accelerate>=0.26.0",
"av==9.2.0", # Latest version of PyAV (10.0.0) has issues with audio stream.
"beautifulsoup4",
"codecarbon==1.2.0",
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/dependency_versions_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# 2. run `make deps_table_update``
deps = {
"Pillow": "Pillow>=10.0.1,<=15.0",
"accelerate": "accelerate>=0.21.0",
"accelerate": "accelerate>=0.26.0",
"av": "av==9.2.0",
"beautifulsoup4": "beautifulsoup4",
"codecarbon": "codecarbon==1.2.0",
Expand Down
17 changes: 8 additions & 9 deletions src/transformers/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4671,16 +4671,15 @@ def create_accelerator_and_postprocess(self):
fsdp_plugin.limit_all_gathers = self.args.fsdp_config.get(
"limit_all_gathers", fsdp_plugin.limit_all_gathers
)
if is_accelerate_available("0.23.0"):
fsdp_plugin.activation_checkpointing = self.args.fsdp_config.get(
"activation_checkpointing", fsdp_plugin.activation_checkpointing
fsdp_plugin.activation_checkpointing = self.args.fsdp_config.get(
"activation_checkpointing", fsdp_plugin.activation_checkpointing
)
if fsdp_plugin.activation_checkpointing and self.args.gradient_checkpointing:
raise ValueError(
"The activation_checkpointing in FSDP config and the gradient_checkpointing in training arg "
"can't be set to True simultaneously. Please use FSDP's activation_checkpointing logic "
"when using FSDP."
)
if fsdp_plugin.activation_checkpointing and self.args.gradient_checkpointing:
raise ValueError(
"The activation_checkpointing in FSDP config and the gradient_checkpointing in training arg "
"can't be set to True simultaneously. Please use FSDP's activation_checkpointing logic "
"when using FSDP."
)

if self.is_deepspeed_enabled and getattr(self.args, "hf_deepspeed_config", None) is None:
self.propagate_args_to_deepspeed()
Expand Down
6 changes: 2 additions & 4 deletions src/transformers/training_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -1913,10 +1913,8 @@ def __post_init__(self):
for fsdp_option in self.fsdp:
if fsdp_option.upper() in FSDP_SHARDING_STRATEGY:
# set environment variable for FSDP sharding strategy
os.environ[f"{prefix}SHARDING_STRATEGY"] = (
str(FSDP_SHARDING_STRATEGY.index(fsdp_option.upper()) + 1)
if is_accelerate_available("0.26.0")
else fsdp_option.upper()
os.environ[f"{prefix}SHARDING_STRATEGY"] = str(
FSDP_SHARDING_STRATEGY.index(fsdp_option.upper()) + 1
Comment on lines +1918 to +1919
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: I can't wait until these hacks aren't needed anymore :)

)
elif fsdp_option == FSDPOption.OFFLOAD:
os.environ[f"{prefix}OFFLOAD_PARAMS"] = "true"
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/utils/import_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def _is_package_available(pkg_name: str, return_version: bool = False) -> Union[
# This is the version of torch required to run torch.fx features and torch.onnx with dictionary inputs.
TORCH_FX_REQUIRED_VERSION = version.parse("1.10")

ACCELERATE_MIN_VERSION = "0.21.0"
ACCELERATE_MIN_VERSION = "0.26.0"
FSDP_MIN_VERSION = "1.12.0"
XLA_FSDPV2_MIN_VERSION = "2.2.0"

Expand Down
6 changes: 1 addition & 5 deletions tests/fsdp/test_fsdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,11 +196,7 @@ def test_fsdp_config_transformers_auto_wrap(self, sharding_strategy, dtype):
self.assertEqual(trainer.args.fsdp[0], sharding_strategy)
self.assertEqual(trainer.args.fsdp[1], FSDPOption.OFFLOAD)
self.assertEqual(trainer.args.fsdp[2], FSDPOption.AUTO_WRAP)
fsdp_sharding_strategy = (
str(FSDP_SHARDING_STRATEGY.index(sharding_strategy.upper()) + 1)
if is_accelerate_available("0.26.0")
else sharding_strategy.upper()
)
fsdp_sharding_strategy = str(FSDP_SHARDING_STRATEGY.index(sharding_strategy.upper()) + 1)
self.assertEqual(os.environ[f"{prefix}SHARDING_STRATEGY"], fsdp_sharding_strategy)
self.assertEqual(os.environ[f"{prefix}OFFLOAD_PARAMS"], "true")
self.assertEqual(os.environ[f"{prefix}AUTO_WRAP_POLICY"], "TRANSFORMER_BASED_WRAP")
Expand Down