Potential fix for saving quantized models trained using fsdp

neuralmagic · Satrat · Mar 18, 2024 · Mar 15, 2024 · Mar 18, 2024 · cc05f07abb0d246a9f1ec2617310f43e8bb33950
commit cc05f07abb0d246a9f1ec2617310f43e8bb33950
diff --git a/src/sparseml/transformers/finetune/runner.py b/src/sparseml/transformers/finetune/runner.py
@@ -40,7 +40,11 @@
 )
 from sparseml.transformers.finetune.model_args import ModelArguments
 from sparseml.transformers.finetune.training_args import TrainingArguments
-from sparseml.utils.fsdp.helpers import is_fsdp_model, unwrap_and_export_model
+from sparseml.utils.fsdp.helpers import (
+    find_and_move_state_dicts_to_cpu,
+    is_fsdp_model,
+    unwrap_and_export_model,
+)
 
 
 _LOGGER: logging.Logger = logging.getLogger(__name__)
@@ -175,6 +179,15 @@ def one_shot(self, stage: Optional[str] = None):
                     output_dir=self._output_dir,
                     tokenizer=self.tokenizer,
                 )
+                # only allow the main process move the state
+                # dicts to cpu
+                if self.trainer.accelerator.is_main_process:
+                    # assuming quantization is the last step
+                    # we no longer need the original model
+                    # and can safely delete it to save memory
+                    del self.trainer.model
+                    find_and_move_state_dicts_to_cpu(self._output_dir)
+
         else:
             save_model_and_recipe(
                 model=self.trainer.model,

diff --git a/src/sparseml/utils/fsdp/helpers.py b/src/sparseml/utils/fsdp/helpers.py
@@ -12,7 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import logging
 import operator
+from pathlib import Path
 from typing import Optional, Union
 
 
@@ -25,6 +27,7 @@
 except ImportError:
     FullyShardedDataParallel = None
 
+import torch
 from torch.nn import Module
 
 from sparseml.core.model import ModifiableModel
@@ -39,8 +42,11 @@
     "unwrap_and_export_model",
     "save_pretrained_fsdp",
     "get_fsdp_parent",
+    "find_and_move_state_dicts_to_cpu",
 ]
 
+_LOGGER = logging.getLogger(__name__)
+
 
 def is_fsdp_model(model: Module) -> bool:
     """
@@ -113,6 +119,27 @@ def unwrap_and_export_model(model, accelerator, output_dir, tokenizer):
         )
 
 
+def find_and_move_state_dicts_to_cpu(output_dir: str):
+    """
+    Looks for state dicts in the output directory and overwrites them
+    with cpu state dicts.
+
+    this is needed for quantized models trained with FSDP as the state dict
+    contains device information, which can cause issues when loading the model
+    using transformers AutoModel.from_pretrained(...) if the device information
+    is not removed, assumes the state dicts are named pytorch_model*.bin
+    """
+
+    for model_file in Path(output_dir).rglob("pytorch_model*.bin"):
+        loaded_dict = torch.load(model_file)
+        for key, value in loaded_dict.items():
+            if isinstance(value, torch.Tensor):
+                loaded_dict[key] = value.cpu()
+
+        torch.save(loaded_dict, model_file)
+        _LOGGER.info(f"Moved state dict {model_file} to cpu")
+
+
 def save_pretrained_fsdp(model, accelerator, output_dir, save_safetensors: bool = True):
     full_state_dict_config = FullStateDictConfig(offload_to_cpu=True, rank0_only=True)
     """