Improve performance

staadecker · staadecker · commit c05e42849c3b · 2025-07-14T08:46:36.000Z
diff --git a/benchmarks/src/facility_location/bm_pyoframe.py b/benchmarks/src/facility_location/bm_pyoframe.py
@@ -59,4 +59,4 @@ def build(self):
 
 
 if __name__ == "__main__":
-    Bench("gurobi", 5).run()
+    Bench("gurobi", 100).run()
diff --git a/benchmarks/src/utils.py b/benchmarks/src/utils.py
@@ -42,6 +42,8 @@ def __init__(self, *args, use_var_names=False, **kwargs):
             import pyoframe as pf
 
             pf.Config.print_uses_variable_names = False
+            pf.Config.maintain_order = False
+            pf.Config.disable_unmatched_checks = True
 
     def solve(self, model):
         if self.block_solver:
diff --git a/src/pyoframe/_arithmetic.py b/src/pyoframe/_arithmetic.py
@@ -235,7 +235,7 @@ def _add_expressions_core(*expressions: "Expression") -> "Expression":
             left, right = right, left
 
         def get_indices(expr):
-            return expr.data.select(dims).unique(maintain_order=True)
+            return expr.data.select(dims).unique(maintain_order=Config.maintain_order)
 
         left_data, right_data = left.data, right.data
 
@@ -343,7 +343,9 @@ def _add_dimension(self: "Expression", target: "Expression") -> "Expression":
             f"Dataframe has missing dimensions {missing_dims}. If this is intentional, use .add_dim()\n{self.data}"
         )
 
-    target_data = target.data.select(target_dims).unique(maintain_order=True)
+    target_data = target.data.select(target_dims).unique(
+        maintain_order=Config.maintain_order
+    )
 
     if not dims_in_common:
         return self._new(self.data.join(target_data, how="cross"))
@@ -365,7 +367,7 @@ def _sum_like_terms(df: pl.DataFrame) -> pl.DataFrame:
     """Combines terms with the same variables."""
     dims = [c for c in df.columns if c not in RESERVED_COL_KEYS]
     var_cols = [VAR_KEY] + ([QUAD_VAR_KEY] if QUAD_VAR_KEY in df.columns else [])
-    df = df.group_by(dims + var_cols, maintain_order=True).sum()
+    df = df.group_by(dims + var_cols, maintain_order=Config.maintain_order).sum()
     return df
 
 
@@ -426,7 +428,7 @@ def _simplify_expr_df(df: pl.DataFrame) -> pl.DataFrame:
     if len(df_filtered) < len(df):
         dims = [c for c in df.columns if c not in RESERVED_COL_KEYS]
         if dims:
-            dim_values = df.select(dims).unique(maintain_order=True)
+            dim_values = df.select(dims).unique(maintain_order=Config.maintain_order)
             df = (
                 dim_values.join(df_filtered, on=dims, how="left")
                 .with_columns(pl.col(COEF_KEY).fill_null(0))
diff --git a/src/pyoframe/constants.py b/src/pyoframe/constants.py
@@ -115,6 +115,11 @@ class Config(metaclass=_ConfigMeta):
     unexpected errors. Setting the tolerance to zero disables the check.
     """
 
+    maintain_order: bool = True
+    """
+    If True, performance and memory usage may worsen, but the order of terms within expressions will not change across runs.
+    """
+
     @classmethod
     def reset_defaults(cls):
         """
diff --git a/src/pyoframe/core.py b/src/pyoframe/core.py
@@ -317,7 +317,9 @@ def __add__(self, other):
         if isinstance(other, Set):
             try:
                 return self._new(
-                    pl.concat([self.data, other.data]).unique(maintain_order=True)
+                    pl.concat([self.data, other.data]).unique(
+                        maintain_order=Config.maintain_order
+                    )
                 )
             except pl.exceptions.ShapeError as e:
                 if "unable to vstack, column names don't match" in str(e):
@@ -347,7 +349,7 @@ def _set_to_polars(set: "SetTypes") -> pl.DataFrame:
             df = (
                 set.to_expr()
                 .data.drop(RESERVED_COL_KEYS, strict=False)
-                .unique(maintain_order=True)
+                .unique(maintain_order=Config.maintain_order)
             )
         elif isinstance(set, pd.Index):
             df = pl.from_pandas(pd.DataFrame(index=set).reset_index())
@@ -481,7 +483,10 @@ def sum(self, over: Union[str, Iterable[str]]):
 
         return self._new(
             self.data.drop(over)
-            .group_by(remaining_dims + self._variable_columns, maintain_order=True)
+            .group_by(
+                remaining_dims + self._variable_columns,
+                maintain_order=Config.maintain_order,
+            )
             .sum()
         )
 
@@ -650,7 +655,7 @@ def within(self, set: "SetTypes") -> Expression:
             "Cannot use .within() with an expression with no dimensions."
         )
         dims_in_common = [dim for dim in dims if dim in set_dims]
-        by_dims = df.select(dims_in_common).unique(maintain_order=True)
+        by_dims = df.select(dims_in_common).unique(maintain_order=Config.maintain_order)
         return self._new(self.data.join(by_dims, on=dims_in_common))
 
     @property
@@ -815,7 +820,7 @@ def _add_const(self, const: int | float) -> Expression:
         else:
             keys = (
                 data.select(dim)
-                .unique(maintain_order=True)
+                .unique(maintain_order=Config.maintain_order)
                 .with_columns(pl.lit(CONST_TERM).alias(VAR_KEY).cast(KEY_TYPE))
             )
             if self.is_quadratic:
@@ -841,7 +846,9 @@ def constant_terms(self):
         if self.is_quadratic:
             constant_terms = constant_terms.drop(QUAD_VAR_KEY)
         if dims is not None:
-            dims_df = self.data.select(dims).unique(maintain_order=True)
+            dims_df = self.data.select(dims).unique(
+                maintain_order=Config.maintain_order
+            )
             df = constant_terms.join(dims_df, on=dims, how="full", coalesce=True)
             return df.with_columns(pl.col(COEF_KEY).fill_null(0.0))
         else:
@@ -909,7 +916,7 @@ def evaluate(self) -> pl.DataFrame:
 
         dims = self.dimensions
         if dims is not None:
-            df = df.group_by(dims, maintain_order=True)
+            df = df.group_by(dims, maintain_order=Config.maintain_order)
         return df.sum()
 
     def to_poi(self) -> poi.ScalarAffineFunction | poi.ScalarQuadraticFunction:
@@ -970,7 +977,7 @@ def to_str_table(self, include_const_term=True):
         ).drop(COEF_KEY, VAR_KEY)
 
         if dimensions is not None:
-            data = data.group_by(dimensions, maintain_order=True).agg(
+            data = data.group_by(dimensions, maintain_order=Config.maintain_order).agg(
                 pl.col("expr").str.join(delimiter=" ")
             )
         else:
@@ -1311,9 +1318,9 @@ def _assign_ids(self):
                 .cast(KEY_TYPE)
             )
         else:
-            df = self.lhs.data.group_by(self.dimensions, maintain_order=True).agg(
-                *key_cols_polars
-            )
+            df = self.lhs.data.group_by(
+                self.dimensions, maintain_order=Config.maintain_order
+            ).agg(*key_cols_polars)
             if use_var_names:
                 df = (
                     concat_dimensions(df, prefix=self.name)
@@ -1896,7 +1903,11 @@ def next(self, dim: str, wrap_around: bool = False) -> Expression:
             [18:00,Toronto]: bat_charge[18:00,Toronto] + bat_flow[18:00,Toronto] - bat_charge[00:00,Toronto] = 0
         """
 
-        wrapped = self.data.select(dim).unique(maintain_order=True).sort(by=dim)
+        wrapped = (
+            self.data.select(dim)
+            .unique(maintain_order=Config.maintain_order)
+            .sort(by=dim)
+        )
         wrapped = wrapped.with_columns(pl.col(dim).shift(-1).alias("__next"))
         if wrap_around:
             wrapped = wrapped.with_columns(pl.col("__next").fill_null(pl.first(dim)))

Original file line number	Diff line number	Diff line change
`@@ -59,4 +59,4 @@ def build(self):`
`59`	`59`
`60`	`60`
`61`	`61`	`if __name__ == "__main__":`
`62`		`- Bench("gurobi", 5).run()`
	`62`	`+ Bench("gurobi", 100).run()`