pytorch · emasap · Jun 5, 2025
diff --git a/torchrec/distributed/embedding_types.py b/torchrec/distributed/embedding_types.py
@@ -519,8 +519,7 @@ def storage_usage(
             storage_map = {
                 "cuda": ParameterStorage.HBM,
                 "cpu": ParameterStorage.DDR,
-                # TODO: Update it later. Setting for MTIA is same as CPU's for now.
-                "mtia": ParameterStorage.DDR,
+                "mtia": ParameterStorage.HBM,
             }
             return {
                 storage_map[compute_device_type].value: get_tensor_size_bytes(tensor)

diff --git a/torchrec/distributed/planner/enumerators.py b/torchrec/distributed/planner/enumerators.py
@@ -80,7 +80,9 @@ def __init__(
         self._use_exact_enumerate_order: bool = (
             use_exact_enumerate_order if use_exact_enumerate_order else False
         )
-        memory_type = "hbm_cap" if topology.compute_device == "cuda" else "ddr_cap"
+        memory_type = (
+            "hbm_cap" if topology.compute_device in {"cuda", "mtia"} else "ddr_cap"
+        )
         self._device_memory_sizes: Optional[
             List[int]
         ] = (  # only used with custom topology where memory is different within a topology

diff --git a/torchrec/distributed/planner/shard_estimators.py b/torchrec/distributed/planner/shard_estimators.py
@@ -1261,7 +1261,7 @@ def calculate_shard_storages(
                 count_ephemeral_storage_cost=count_ephemeral_storage_cost,
                 is_inference=is_inference,
             )
-            if compute_device == "cuda"
+            if compute_device in {"cuda", "mtia"}
             else 0
         )
         for input_size, output_size, hbm_specific_size in zip(
@@ -1273,7 +1273,7 @@ def calculate_shard_storages(
     ddr_sizes: List[int] = [
         (
             input_size + output_size + ddr_specific_size
-            if compute_device in {"cpu", "mtia"} and not is_inference
+            if compute_device == "cpu" and not is_inference
             else ddr_specific_size
         )
         for input_size, output_size, ddr_specific_size in zip(

diff --git a/torchrec/distributed/planner/storage_reservations.py b/torchrec/distributed/planner/storage_reservations.py
@@ -73,8 +73,8 @@ def _reserve_dense_storage(
         dense_tensor_size = dense_tensor_estimate
 
     dense_tensor_storage = Storage(
-        hbm=dense_tensor_size if topology.compute_device == "cuda" else 0,
-        ddr=dense_tensor_size if topology.compute_device in {"cpu", "mtia"} else 0,
+        hbm=dense_tensor_size if topology.compute_device in {"cuda", "mtia"} else 0,
+        ddr=dense_tensor_size if topology.compute_device == "cpu" else 0,
     )
 
     for device in topology.devices:
@@ -93,8 +93,8 @@ def _reserve_kjt_storage(
     kjt_size = math.ceil(sum(batch_inputs) * float(input_data_type_size)) * multiplier
 
     kjt_storage = Storage(
-        hbm=kjt_size if topology.compute_device == "cuda" else 0,
-        ddr=kjt_size if topology.compute_device in {"cpu", "mtia"} else 0,
+        hbm=kjt_size if topology.compute_device in {"cuda", "mtia"} else 0,
+        ddr=kjt_size if topology.compute_device == "cpu" else 0,
     )
 
     for device in topology.devices:

diff --git a/torchrec/distributed/planner/types.py b/torchrec/distributed/planner/types.py
@@ -284,7 +284,7 @@ def __init__(
         self._world_size = world_size
 
         hbm_per_device = [0] * world_size
-        if self._compute_device == "cuda":
+        if self._compute_device == "cuda" or self._compute_device == "mtia":
             hbm_per_device = [hbm_cap if hbm_cap else HBM_CAP] * world_size
         ddr_cap_per_rank = [ddr_cap if ddr_cap else DDR_CAP] * world_size
 

diff --git a/torchrec/distributed/types.py b/torchrec/distributed/types.py
@@ -1197,8 +1197,7 @@ def storage_usage(
         storage_map = {
             "cuda": ParameterStorage.HBM,
             "cpu": ParameterStorage.DDR,
-            # TODO: Update it later. Setting for MTIA is same as CPU's for now.
-            "mtia": ParameterStorage.DDR,
+            "mtia": ParameterStorage.HBM,
         }
         return {storage_map[compute_device_type].value: get_tensor_size_bytes(tensor)}