NOCOMMIT: log peak mem usage and swapouts to debug test-llava-runner

swolchok · swolchok · commit 0de4aedf4ef8 · 2025-02-06T10:42:34.000-08:00
Attempting to test my theory that the timestamp gaps in #8180 are caused by swapping. ghstack-source-id: 0bc03bc ghstack-comment-id: 2634969596 Pull Request resolved: #8192
diff --git a/examples/models/llama/source_transformation/quantized_kv_cache.py b/examples/models/llama/source_transformation/quantized_kv_cache.py
@@ -202,11 +202,15 @@ def replace_kv_cache_with_quantized_kv_cache(module):
         torch.ops.load_library(libs[0])
         op = torch.ops.quantized_decomposed.quantize_per_token.out
         assert op is not None
+    import gc
+    import resource
+
     # This is needed to ensure that custom ops are registered
     from executorch.extension.llm.custom_ops import custom_ops  # noqa: F401
 
+    rusage = resource.getrusage(resource.RUSAGE_SELF)
     logging.warning(
-        "Replacing KVCache with QuantizedKVCache. This modifies the model in place."
+        f"Replacing KVCache with QuantizedKVCache. This modifies the model in place. (HACK: rusage: {rusage} gc stats: {gc.get_stats()})"
     )
     for name, child in module.named_children():
         if isinstance(child, KVCache) or isinstance(child, CustomKVCache):
@@ -270,8 +274,12 @@ def replace_kv_cache_with_custom_kv_cache(module):
     This is because the custom op treats second dim as sequence dim.
     Future work: support [B, H, S, D]
     """
+    import gc
+    import resource
+
+    rusage = resource.getrusage(resource.RUSAGE_SELF)
     logging.warning(
-        "Replacing KVCache with CustomKVCache. This modifies the model in place."
+        f"Replacing KVCache with CustomKVCache. This modifies the model in place. (HACK: rusage: {rusage} gc stats: {gc.get_stats()})"
     )
     for name, child in module.named_children():
         if isinstance(child, KVCache):
diff --git a/extension/llm/export/builder.py b/extension/llm/export/builder.py
@@ -156,7 +156,13 @@ def source_transform(
 
         if self.verbose:
             logging.info(f"Applied source transforms: {self.applied_source_transforms}")
-        logging.info(f"Model after source transforms: {self.model}")
+        import gc
+        import resource
+
+        rusage = resource.getrusage(resource.RUSAGE_SELF)
+        logging.info(
+            f"Model after source transforms: {self.model} (HACK: rusage: {rusage} gc_stats: {gc.get_stats()})"
+        )
         return self
 
     def _get_dynamic_shape(self) -> Any: