updating flatten/unflatten functions (#3282)

liangel-02 · web-flow · commit ba3ac9f2f611 · 2025-11-03T19:43:14.000-08:00
diff --git a/test/prototype/safetensors/test_safetensors_support.py b/test/prototype/safetensors/test_safetensors_support.py
@@ -66,6 +66,12 @@ def test_safetensors(self, config, act_pre_scale=False):
 
         with tempfile.NamedTemporaryFile() as f:
             tensors_data_dict, metadata = flatten_tensor_state_dict(model.state_dict())
+
+            for key in tensors_data_dict.keys():
+                assert key.startswith("0._weight_") or key.startswith("0.bias"), (
+                    f"Unexpected key format: {key}"
+                )
+
             save_file(tensors_data_dict, f.name, metadata=metadata)
             tensors_data_dict, metadata = load_data(file_path=f.name, device="cuda")
             reconstructed_dict = unflatten_tensor_state_dict(
diff --git a/torchao/prototype/safetensors/safetensors_support.py b/torchao/prototype/safetensors/safetensors_support.py
@@ -24,9 +24,9 @@ def unflatten_tensor_state_dict(
 
     For example, given a previously flattened tensors_data_dict and metadata:
     tensors_data_dict = {
-        '0.weight:qdata': torch.Tensor(...),
-        '0.weight:scale': torch.Tensor(...),
-        '0.bias:_data': torch.Tensor(...),
+        '0._weight_qdata': torch.Tensor(...),
+        '0._weight_scale': torch.Tensor(...),
+        '0.bias': torch.Tensor(...),
     }
     metadata = {
         '0.weight': {
@@ -53,7 +53,7 @@ def unflatten_tensor_state_dict(
     }
 
     Args:
-        tensors_data_dict: a dictionary from "tensor_name:tensor_data_attribute_name" to flattened torch.Tensor data for tensor subclass instance
+        tensors_data_dict: a dictionary from "{tensor_name}_{tensor_data_attribute_name}" to flattened torch.Tensor data for tensor subclass instance
         metadata: a dictionary from "tensor_name" to another dictionary that contains type and attributes for tensor subclass instance
 
     Returns:
@@ -68,23 +68,30 @@ def unflatten_tensor_state_dict(
     result = {}
 
     for tensor_name in tensor_names:
+        module_fqn, weight_name = tensor_name.rsplit(".", 1)
+
+        prefix = f"{module_fqn}._{weight_name}_"
         tensor_tensors = {}
         for key, value in combined_data.items():
-            if key.startswith(f"{tensor_name}:"):
+            if key.startswith(prefix):
                 # Remove the prefix
-                tensor_tensors[key[len(tensor_name) + 1 :]] = value
+                tensor_tensors[key[len(prefix) :]] = value
 
         tensor_metadata = json.loads(metadata.get(tensor_name))
         tensor_type = tensor_metadata.get("_type")
 
         if tensor_type in ALLOWED_TENSORS_SUBCLASSES:
+            if not tensor_tensors:
+                # we allow the option of loading in state_dict info for a single tensor
+                # if tensor state dict info is not loaded in yet, we wait for it to be provided
+                # in a future call
+                continue
             tensor_metadata["_data"].update(tensor_tensors)
             result[tensor_name] = object_from_dict(tensor_metadata)
         elif tensor_type == torch.Tensor.__name__:
-            result[tensor_name] = tensor_tensors["_data"]
+            result[tensor_name] = tensors_data_dict[tensor_name]
         else:
             raise ValueError(f"Unsupported tensor type: {tensor_type}")
-
     return result
 
 
@@ -108,9 +115,9 @@ def flatten_tensor_state_dict(
 
     We flatten this to:
     tensors_data = {
-        '0.weight:qdata': torch.Tensor(...),
-        '0.weight:scale': torch.Tensor(...),
-        '0.bias:_data': torch.Tensor(...),
+        '0._weight_qdata': torch.Tensor(...),
+        '0._weight_scale': torch.Tensor(...),
+        '0.bias': torch.Tensor(...),
     }
     metadata = {
         '0.weight': {
@@ -152,22 +159,23 @@ def flatten_tensor_state_dict(
                     tensor_dict[tensor_data_name] = getattr(tensor, tensor_data_name)
 
             tensor_metadata = json.dumps(tensor, cls=TensorSubclassAttributeJSONEncoder)
+
+            # Clone tensors to avoid memory sharing issues
+            tensors_dict_to_save = {
+                f"{tensor_name.rsplit('.', 1)[0]}._{tensor_name.rsplit('.', 1)[1]}_{key}": (
+                    value.detach().clone() if isinstance(value, torch.Tensor) else value
+                )
+                for key, value in tensor_dict.items()
+            }
+
         elif type(tensor) is torch.Tensor:
-            tensor_dict = {"_data": tensor}
             tensor_metadata = json.dumps({"_type": torch.Tensor.__name__})
+            tensors_dict_to_save = {tensor_name: tensor}
         else:
             raise ValueError(f"Unsupported tensor type: {type(tensor)}")
 
-        # Clone tensors to avoid memory sharing issues
-        prefixed_tensors_dict = {
-            f"{tensor_name}:{key}": (
-                value.detach().clone() if isinstance(value, torch.Tensor) else value
-            )
-            for key, value in tensor_dict.items()
-        }
-
         metadata[tensor_name] = tensor_metadata
-        tensors_data_dict.update(prefixed_tensors_dict)
+        tensors_data_dict.update(tensors_dict_to_save)
 
     metadata["tensor_names"] = json.dumps(list(tensors_dict.keys()))
     return tensors_data_dict, metadata