Allow for lazy-evaluation of arguments in LazyFormat (#1669)

plypaul · plypaul · commit c5d7f8f0debd · 2025-03-31T13:19:36.000-07:00
When creating a `LazyFormat` object, the keyword argument values may be
constructed from expensive methods. Similar to allowing a function to be
passed in for the `message` argument, this PR enables methods to be
passed in for the keyword argument values so that they can be lazily
evaluated.

There were some logging calls that were found in performance profiling
to be expensive because lazy-evaluation was not done, so this PR updates
a few of those call sites as well. An alternative approach would have
been to implement `MetricFlowPrettyFormattable` for those objects.
diff --git a/metricflow-semantics/metricflow_semantics/mf_logging/lazy_formattable.py b/metricflow-semantics/metricflow_semantics/mf_logging/lazy_formattable.py
@@ -1,12 +1,15 @@
 from __future__ import annotations
 
+import logging
 from functools import cached_property
 from typing import Callable, Union
 
 from typing_extensions import override
 
 from metricflow_semantics.mf_logging.pretty_print import mf_pformat_dict
 
+logger = logging.getLogger(__name__)
+
 
 class LazyFormat:
     """Lazily formats the given objects into a string representation for logging.
@@ -40,7 +43,8 @@ def __init__(self, message: Union[str, Callable[[], str]], **kwargs) -> None:  #
 
         Args:
             message: The message or a function that returns a message.
-            **kwargs: A dictionary of objects to format to text when `__str__` is called on this object.
+            **kwargs: A dictionary of objects to format to text when `__str__` is called on this object. To allow for
+            lazy evaluation of argument values, a callable that takes no arguments can be supplied for a key's value.
         """
         self._message = message
         self._kwargs = kwargs
@@ -52,7 +56,23 @@ def _str_value(self) -> str:
             message = self._message()
         else:
             message = self._message
-        return mf_pformat_dict(message, self._kwargs, preserve_raw_strings=True)
+
+        evaluated_args = {}
+        for arg_name, arg_value in self._kwargs.items():
+            if callable(arg_value):
+                try:
+                    evaluated_args[arg_name] = arg_value()
+                    continue
+                except Exception:
+                    logger.warning(
+                        f"Got an exception while evaluating {arg_name=} {arg_value=}. Since this is an error with "
+                        f"formatting log output, this should not result in system issues. However, the exception "
+                        f"indicates a bug with how the logging call is made and should be investigated.",
+                        exc_info=True,
+                    )
+
+            evaluated_args[arg_name] = arg_value
+        return mf_pformat_dict(message, evaluated_args, preserve_raw_strings=True)
 
     @override
     def __str__(self) -> str:
diff --git a/metricflow-semantics/tests_metricflow_semantics/mf_logging/test_lazy_format.py b/metricflow-semantics/tests_metricflow_semantics/mf_logging/test_lazy_format.py
@@ -9,6 +9,8 @@
 
 logger = logging.getLogger(__name__)
 
+_NOT_FORMATTED_ASSERTION_MESSAGE = "This should not have been formatted as a string."
+
 
 def test_log_kwargs() -> None:
     """Test that objects included via keyword args are formatted."""
@@ -36,12 +38,11 @@ def test_lambda() -> None:
 
 def test_lazy_object() -> None:
     """Test that formatting of objects are done lazily and not when the logging level is not appropriate."""
-    assertion_message = "This should not have been formatted as a string."
 
     class _NotFormattedObject:
         @override
         def __repr__(self) -> str:
-            raise AssertionError(assertion_message)
+            raise AssertionError(_NOT_FORMATTED_ASSERTION_MESSAGE)
 
     # Logging level is INFO, so DEBUG messages shouldn't be logged / formatted.
     recorded_logger: logging.Logger
@@ -51,14 +52,35 @@ def __repr__(self) -> str:
 
 
 def test_lazy_lambda() -> None:
-    """Test that a lambda input is not evaluated when the logging level is not appropriate."""
-    assertion_message = "This should not have been formatted as a string."
+    """Test that a lambda message is not evaluated when the logging level is not appropriate."""
 
     def _should_not_be_called() -> str:
-        raise AssertionError(assertion_message)
+        raise AssertionError(_NOT_FORMATTED_ASSERTION_MESSAGE)
 
     # Logging level is INFO, so DEBUG messages shouldn't be logged / formatted.
     recorded_logger: logging.Logger
     handler: RecordingLogHandler
     with recorded_logging_context(logger, logging.INFO) as (recorded_logger, handler):
         recorded_logger.debug(LazyFormat(lambda: f"{_should_not_be_called()}"))
+
+
+def test_lambda_argument() -> None:
+    """Tests that a callable that's supplied as an argument value is evaluated."""
+    recorded_logger: logging.Logger
+    handler: RecordingLogHandler
+    with recorded_logging_context(logger, logging.INFO) as (recorded_logger, handler):
+        recorded_logger.info(LazyFormat("Example message", arg_0=lambda: 1))
+        assert handler.get_last_message() == "Example message (arg_0=1)"
+
+
+def test_lazy_lambda_argument() -> None:
+    """Test that a lambda input is not evaluated when the logging level is not appropriate."""
+
+    def _should_not_be_called() -> str:
+        raise AssertionError(_NOT_FORMATTED_ASSERTION_MESSAGE)
+
+    # Logging level is INFO, so DEBUG messages shouldn't be logged / formatted.
+    recorded_logger: logging.Logger
+    handler: RecordingLogHandler
+    with recorded_logging_context(logger, logging.INFO) as (recorded_logger, handler):
+        recorded_logger.debug(LazyFormat("Example message", arg_0=lambda: f"{_should_not_be_called()}"))
diff --git a/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py b/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py
@@ -326,8 +326,8 @@ def optimize(self, dataflow_plan: DataflowPlan) -> DataflowPlan:  # noqa: D102
         logger.debug(
             LazyFormat(
                 "Optimized dataflow plan",
-                original_plan=dataflow_plan.sink_node.structure_text(),
-                optimized_plan=optimized_result.optimized_branch.structure_text(),
+                original_plan=dataflow_plan.sink_node.structure_text,
+                optimized_plan=optimized_result.optimized_branch.structure_text,
             )
         )
 
diff --git a/metricflow/sql/optimizer/column_pruner.py b/metricflow/sql/optimizer/column_pruner.py
@@ -114,7 +114,7 @@ def optimize(self, node: SqlPlanNode) -> SqlPlanNode:  # noqa: D102
             logger.error(
                 LazyFormat(
                     "The columns required at this node can't be determined, so skipping column pruning",
-                    node=node.structure_text(),
+                    node=node.structure_text,
                     required_select_columns=required_select_columns,
                 )
             )

Original file line number	Diff line number	Diff line change
`@@ -326,8 +326,8 @@ def optimize(self, dataflow_plan: DataflowPlan) -> DataflowPlan: # noqa: D102`
`326`	`326`	`logger.debug(`
`327`	`327`	`LazyFormat(`
`328`	`328`	`"Optimized dataflow plan",`
`329`		`- original_plan=dataflow_plan.sink_node.structure_text(),`
`330`		`- optimized_plan=optimized_result.optimized_branch.structure_text(),`
	`329`	`+ original_plan=dataflow_plan.sink_node.structure_text,`
	`330`	`+ optimized_plan=optimized_result.optimized_branch.structure_text,`
`331`	`331`	`)`
`332`	`332`	`)`
`333`	`333`
Original file line number	Diff line number	Diff line change
`@@ -114,7 +114,7 @@ def optimize(self, node: SqlPlanNode) -> SqlPlanNode: # noqa: D102`
`114`	`114`	`logger.error(`
`115`	`115`	`LazyFormat(`
`116`	`116`	`"The columns required at this node can't be determined, so skipping column pruning",`
`117`		`- node=node.structure_text(),`
	`117`	`+ node=node.structure_text,`
`118`	`118`	`required_select_columns=required_select_columns,`
`119`	`119`	`)`
`120`	`120`	`)`