Implement KV-ZCH Benchmark (#3540)

Alireza Tehrani · meta-codesync[bot] · commit b9c1e5208ad6 · 2025-11-12T18:08:17.000-08:00
Summary: Pull Request resolved: #3540 Implements KV-ZCH with the benchmarking platform Several things were added to make it work with KV-ZCH: - Added eviction policies - Added KeyValueParam to add parameters to TBE `fused_params` which is then fed into `SSDTableBatchedEmbeddingBags`. See `_populate_ssd_tbe_params` in batched_embedding_kernel and `add_params_from_parameter_sharding` in distributed/utils.py. - Added CacheParams creation to set `prefetch_pipeline=True` due to warning below. NOTE: The `prefetch_pipeline` attribute of `CacheParams` is set to True, due to the following complaint without it: {F1983388476,width=300,height=200} Update on November 11, 2025: - The line `pipeline.progress(iter(bench_inputs)) ` is commented out on `benchmark_train_pipeline.py` due to conflict with `pipeline.reset()`. This gives an error on the forward pass when using `pipeline="prefetch"` with KV-ZCH. Reviewed By: TroyGarden Differential Revision: D86677315 fbshipit-source-id: e5d9ca737c59a589fde5d0e33b27fc9874d18b80
diff --git a/torchrec/distributed/benchmark/benchmark_train_pipeline.py b/torchrec/distributed/benchmark/benchmark_train_pipeline.py
@@ -196,7 +196,8 @@ def _func_to_benchmark(
             opt=optimizer,
             device=ctx.device,
         )
-        pipeline.progress(iter(bench_inputs))  # warmup
+        # Commented out due to potential conflict with pipeline.reset()
+        # pipeline.progress(iter(bench_inputs))  # warmup
 
         run_option.name = (
             type(pipeline).__name__ if run_option.name == "" else run_option.name
diff --git a/torchrec/distributed/benchmark/yaml/prefetch_kvzch.yml b/torchrec/distributed/benchmark/yaml/prefetch_kvzch.yml
@@ -0,0 +1,67 @@
+# This is a very basic KV-ZCH (ZCH v.Next) benchmark configuration
+#   For guidelines, see document `ZCH v.Next Onboarding Guidelines`
+#   KV-ZCH parameters have comments next to them below.
+# Runs on 2 ranks, showing traces with reasonable workloads
+RunOptions:
+  world_size: 2
+  num_batches: 10
+  num_benchmarks: 1
+  num_profiles: 1
+  sharding_type: table_wise
+  profile_dir: "."
+  name: "sparsenn_prefetch_kvzch_dram"
+PipelineConfig:
+  pipeline: "prefetch"
+ModelInputConfig:
+  feature_pooling_avg: 30
+EmbeddingTablesConfig:
+  num_unweighted_features: 10
+  num_weighted_features: 10
+  embedding_feature_dim: 256
+  additional_tables:
+    - - name: FP16_table
+        embedding_dim: 512
+        num_embeddings: 100_000              # Both feature hashsize and virtual table size
+        feature_names: ["additional_0_0"]
+        data_type: FP16
+        total_num_buckets: 100               # num_embedding should be divisible by total_num_buckets
+        location: "DRAM_VIRTUAL_TABLE"       # See sparsenn.configs::LocationType,
+                                             #   either SSD_VIRTUAL_TABLE, DRAM_VIRTUAL_TABLE
+        # weight_init_max: 10                # Controls initial Embedding table values
+        # weight_init_min: -10               # Controls initial Embedding table values
+        # virtual_table_eviction_policy:     # If want eviction policy
+        #   CountBasedEvictionPolicy:
+        #     training_id_eviction_trigger_count: 10000
+        #     eviction_threshold: 15
+        #     decay_rate: 0.99
+
+      - name: large_table
+        embedding_dim: 2048
+        num_embeddings: 1_000_000
+        feature_names: ["additional_0_1"]
+    - []
+    - - name: skipped_table
+        embedding_dim: 128
+        num_embeddings: 100_000
+        feature_names: ["additional_2_1"]
+PlannerConfig:
+  additional_constraints:
+    large_table:
+      sharding_types: [column_wise]
+    FP16_table:
+      sharding_types: [row_wise]              # KV-ZCH virtual tables currently only support row_wise sharding
+      compute_kernels: [dram_virtual_table]   # Either ['ssd_virtual_table', 'dram_virtual_table'], must match above
+      cache_params:
+        prefetch_pipeline: True               # Required for SSD/DRAM virtual tables
+      key_value_params:
+        max_l1_cache_size: 1250               # in MB, check warnings in log to see if it is actually used.
+        l2_cache_size: 64                     # in GB
+        gather_ssd_cache_stats: False
+        ssd_rocksdb_shards: 32
+        # Only use if `virtual_table_eviction_policy` is set above.
+        # kvzch_tbe_config:                               # See fbgemm_gpu/split_table_batched_embeddings_ops_common.py::KVZCHEvictionTBEConfig
+        #   kvzch_eviction_trigger_mode: 2                # 0:disabled, 1:iteration, 2:mem_util, 3:manual, 4:id_count, 5:free_mem
+        #   eviction_free_mem_threshold_gb: 200           # Minimum free memory in GB before eviction
+        #   eviction_free_mem_check_interval_batch: 1000  # Batches between free memory checks
+        #   threshold_calculation_bucket_stride: 0.2      # Feature score bucket width
+        #   threshold_calculation_bucket_num: 1000000     # Total number of feature score buckets
diff --git a/torchrec/distributed/test_utils/sharding_config.py b/torchrec/distributed/test_utils/sharding_config.py
@@ -9,14 +9,16 @@
 from dataclasses import dataclass, field
 from typing import Any, Dict, List, Optional, Tuple, Union
 
+from fbgemm_gpu.split_table_batched_embeddings_ops_common import KVZCHTBEConfig
+
 from torchrec.distributed.comm import get_local_size
 
 from torchrec.distributed.embedding_types import EmbeddingComputeKernel
 from torchrec.distributed.planner import EmbeddingShardingPlanner, Topology
 from torchrec.distributed.planner.constants import POOLING_FACTOR
 from torchrec.distributed.planner.planners import HeteroEmbeddingShardingPlanner
-from torchrec.distributed.planner.types import ParameterConstraints
-from torchrec.distributed.types import ShardingType
+from torchrec.distributed.planner.types import CacheParams, ParameterConstraints
+from torchrec.distributed.types import KeyValueParams, ShardingType
 from torchrec.modules.embedding_configs import EmbeddingBagConfig, EmbeddingConfig
 
 
@@ -64,6 +66,25 @@ def table_to_constraint(
         else:
             kwargs = default_kwargs | kwargs
 
+        # (KVZCH) Convert key_value_params dict to KeyValueParams object if present
+        if "key_value_params" in kwargs:
+            key_value_params = kwargs["key_value_params"]
+            # If eviction policy is set then construct object
+            if (
+                isinstance(key_value_params, dict)
+                and "kvzch_tbe_config" in key_value_params
+            ):
+                key_value_params["kvzch_tbe_config"] = KVZCHTBEConfig(
+                    **key_value_params["kvzch_tbe_config"]
+                )
+            # pyre-ignore[6,32]
+            kwargs["key_value_params"] = KeyValueParams(**key_value_params)
+
+        # Convert cache_params dict to CacheParams object if present
+        if "cache_params" in kwargs:
+            # pyre-ignore[6,32]
+            kwargs["cache_params"] = CacheParams(**kwargs["cache_params"])
+
         constraint = ParameterConstraints(**kwargs)  # pyre-ignore [6]
         return table.name, constraint
 
diff --git a/torchrec/distributed/test_utils/table_config.py b/torchrec/distributed/test_utils/table_config.py
@@ -8,12 +8,77 @@
 # pyre-strict
 
 from dataclasses import dataclass, field
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional, Type
+
+from torchrec.modules.embedding_configs import (
+    CountBasedEvictionPolicy,
+    CountTimestampMixedEvictionPolicy,
+    EmbeddingBagConfig,
+    FeatureScoreBasedEvictionPolicy,
+    NoEvictionPolicy,
+    TimestampBasedEvictionPolicy,
+    VirtualTableEvictionPolicy,
+)
 
-from torchrec.modules.embedding_configs import EmbeddingBagConfig
 from torchrec.types import DataType
 
 
+def _return_correct_eviction_policy(
+    eviction_str: str,
+) -> Type[VirtualTableEvictionPolicy]:
+    if eviction_str == "CountBasedEvictionPolicy":
+        return CountBasedEvictionPolicy
+    if eviction_str == "TimestampBasedEvictionPolicy":
+        return TimestampBasedEvictionPolicy
+    if eviction_str == "CountTimestampMixedEvictionPolicy":
+        return CountTimestampMixedEvictionPolicy
+    if eviction_str == "FeatureScoreBasedEvictionPolicy":
+        return FeatureScoreBasedEvictionPolicy
+    raise ValueError(f"Could not recognize eviction_str in yaml file: {eviction_str}")
+
+
+def _process_virtual_table_config(config_dict: Dict[str, Any]) -> None:
+    """Converts YAML virtual table fields (location, eviction-policy) to EBC format."""
+    if "location" in config_dict:
+        # config_dict["location"] should match LocationType
+        config_dict["use_virtual_table"] = config_dict["location"] in [
+            "DRAM_VIRTUAL_TABLE",
+            "SSD_VIRTUAL_TABLE",
+        ]
+        del config_dict["location"]  # location not an attribute of EBC
+
+        if config_dict["use_virtual_table"]:
+            assert (
+                config_dict["total_num_buckets"] > 0
+            ), "Should be larger 0  when using SSD_VIRTUAL_TABLE or DRAM_VIRTUAL_TABLE"
+
+            assert (
+                config_dict["num_embeddings"] % config_dict["total_num_buckets"] == 0
+            ), (
+                f"num_embeddings ({config_dict['num_embeddings']}) must be divisible by "
+                f"total_num_buckets ({config_dict['total_num_buckets']})"
+            )
+
+            if "virtual_table_eviction_policy" in config_dict:
+                # Obtain what eviction strategy was chosen
+                eviction = config_dict["virtual_table_eviction_policy"]
+                policy_class_name = next(iter(eviction.keys()))
+                policy_params = eviction[policy_class_name]
+                eviction = _return_correct_eviction_policy(policy_class_name)(
+                    **policy_params
+                )
+            else:
+                # Choose standard no eviction policy
+                eviction = NoEvictionPolicy()
+
+            # Initialize the eviction policy
+            data_type = config_dict["data_type"]
+            embedding_dim = config_dict["embedding_dim"]
+            eviction.init_metaheader_config(data_type, embedding_dim)
+
+            config_dict["virtual_table_eviction_policy"] = eviction
+
+
 @dataclass
 class EmbeddingTablesConfig:
     """
@@ -38,13 +103,18 @@ class EmbeddingTablesConfig:
     embedding_feature_dim: int = 128
     base_row_size: int = 100_000
     table_data_type: DataType = DataType.FP32
+    total_num_buckets: Optional[int] = None
     additional_tables: List[List[Dict[str, Any]]] = field(default_factory=list)
 
     def convert_to_ebconf(self, kwargs: Dict[str, Any]) -> EmbeddingBagConfig:
         if "data_type" in kwargs:
             kwargs["data_type"] = DataType[kwargs["data_type"]]
         else:
             kwargs["data_type"] = self.table_data_type
+
+        # Process configs for KV-ZCH/ZCH v.Next
+        _process_virtual_table_config(kwargs)
+
         return EmbeddingBagConfig(**kwargs)
 
     def generate_tables(
@@ -70,6 +140,7 @@ def generate_tables(
             two lists - the first for unweighted embedding tables and the second for
             weighted embedding tables.
         """
+
         unweighted_tables = [
             EmbeddingBagConfig(
                 num_embeddings=max(i + 1, 100) * self.base_row_size // 100,
@@ -90,6 +161,7 @@ def generate_tables(
             )
             for i in range(self.num_weighted_features)
         ]
+
         tables_list = []
         for idx, adts in enumerate(self.additional_tables):
             if idx == 0:
@@ -100,6 +172,7 @@ def generate_tables(
                 tables = []
             for adt in adts:
                 tables.append(self.convert_to_ebconf(adt))
+            tables_list.append(tables)
 
         if len(tables_list) == 0:
             tables_list.append(unweighted_tables)

Original file line number	Diff line number	Diff line change
`@@ -196,7 +196,8 @@ def _func_to_benchmark(`
`196`	`196`	`opt=optimizer,`
`197`	`197`	`device=ctx.device,`
`198`	`198`	`)`
`199`		`- pipeline.progress(iter(bench_inputs)) # warmup`
	`199`	`+ # Commented out due to potential conflict with pipeline.reset()`
	`200`	`+ # pipeline.progress(iter(bench_inputs)) # warmup`
`200`	`201`
`201`	`202`	`run_option.name = (`
`202`	`203`	`type(pipeline).__name__ if run_option.name == "" else run_option.name`