Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FEAT-#7141: Add an ability to use config variables with a context manager #7142

Merged
merged 9 commits into from
Apr 4, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions docs/flow/modin/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,30 @@ API.
# Changing value of `NPartitions`
modin.config.NPartitions.put(16)
print(modin.config.NPartitions.get()) # prints '16'

One can also use config variables with a context manager in order to use
some config only for a certain part of the code:

.. code-block:: python

import modin.config as cfg

# Default value for this config is 'False'
print(cfg.RangePartitioning.get()) # False

# Set the config to 'True' inside of the context-manager
with cfg.context(RangePartitioning=True):
print(cfg.RangePartitioning.get()) # True
df.merge(...) # will use range-partitioning impl

# Once the context is over, the config gets back to its previous value
print(cfg.RangePartitioning.get()) # False

# You can also set multiple config at once when you pass a dictionary to 'cfg.context'
print(cfg.AsyncReadMode.get()) # False

with cfg.context(RangePartitioning=True, AsyncReadMode=True):
print(cfg.RangePartitioning.get()) # True
print(cfg.AsyncReadMode.get()) # True
print(cfg.RangePartitioning.get()) # False
print(cfg.AsyncReadMode.get()) # False
3 changes: 2 additions & 1 deletion modin/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,13 @@
TestReadFromSqlServer,
TrackFileLeaks,
)
from modin.config.pubsub import Parameter, ValueSource
from modin.config.pubsub import Parameter, ValueSource, context

__all__ = [
"EnvironmentVariable",
"Parameter",
"ValueSource",
"context",
# General settings
"IsDebug",
"Engine",
Expand Down
49 changes: 48 additions & 1 deletion modin/config/pubsub.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

"""Module houses ``Parameter`` class - base class for all configs."""

import contextlib
import warnings
from collections import defaultdict
from enum import IntEnum
Expand All @@ -21,6 +22,7 @@
Any,
Callable,
DefaultDict,
Iterator,
NamedTuple,
Optional,
Tuple,
Expand Down Expand Up @@ -449,4 +451,49 @@ def add_option(cls, choice: Any) -> Any:
raise TypeError("Cannot add a choice to a parameter where choices is None")


__all__ = ["Parameter"]
@contextlib.contextmanager
def context(**config: dict[str, Any]) -> Iterator[None]:
"""
Set a value(s) for the specified config(s) from ``modin.config`` in the scope of the context.

Parameters
----------
**config : dict[Parameter, Any]
dchigarev marked this conversation as resolved.
Show resolved Hide resolved
Keyword describing a name of a config variable from ``modin.config`` as a key
and a new value as a value.

Examples
--------
>>> RangePartitioning.get()
False
>>> with context(RangePartitioning=True):
... print(RangePartitioning.get()) # True
True
False
>>> RangePartitioning.get()
False
>>> with context(RangePartitioning=True, AsyncReadMode=True):
... print(RangePartitioning.get()) # True
... print(AsyncReadMode.get()) # True
True
True
>>> RangePartitioning.get()
False
>>> AsyncReadMode.get()
False
"""
import modin.config as cfg
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it make sense moving this function to the modin.config module?

Copy link
Collaborator Author

@dchigarev dchigarev Apr 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you mean to modin/config/__ini__.py? Idk whether we should define something without a strong reason in an __init__.py file.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but it's up to you. If you think the current location is better, I don't mind.
LGTM!


old_values = {}
for name, val in config.items():
var = getattr(cfg, name)
old_values[var] = var.get()
var.put(val)
try:
yield
finally:
for var, val in old_values.items():
var.put(val)


__all__ = ["Parameter", "context"]
58 changes: 58 additions & 0 deletions modin/config/test/test_envvars.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,3 +298,61 @@ def get_values():
finally:
deprecated_var.put(old_depr_val)
new_var.put(old_new_var)


@pytest.mark.parametrize(
"modify_config",
[{cfg.RangePartitioning: False, cfg.LazyExecution: "Auto"}],
indirect=True,
)
def test_context_manager_update_config(modify_config):
# simple case, 1 parameter
assert cfg.RangePartitioning.get() is False
with cfg.context(RangePartitioning=True):
assert cfg.RangePartitioning.get() is True
assert cfg.RangePartitioning.get() is False

# nested case, 1 parameter
assert cfg.RangePartitioning.get() is False
with cfg.context(RangePartitioning=True):
assert cfg.RangePartitioning.get() is True
with cfg.context(RangePartitioning=False):
assert cfg.RangePartitioning.get() is False
with cfg.context(RangePartitioning=False):
assert cfg.RangePartitioning.get() is False
assert cfg.RangePartitioning.get() is False
assert cfg.RangePartitioning.get() is True
assert cfg.RangePartitioning.get() is False

# simple case, 2 parameters
assert cfg.RangePartitioning.get() is False
assert cfg.LazyExecution.get() == "Auto"
with cfg.context(RangePartitioning=True, LazyExecution="Off"):
assert cfg.RangePartitioning.get() is True
assert cfg.LazyExecution.get() == "Off"
assert cfg.RangePartitioning.get() is False
assert cfg.LazyExecution.get() == "Auto"

# nested case, 2 parameters
assert cfg.RangePartitioning.get() is False
assert cfg.LazyExecution.get() == "Auto"
with cfg.context(RangePartitioning=True, LazyExecution="Off"):
assert cfg.RangePartitioning.get() is True
assert cfg.LazyExecution.get() == "Off"
with cfg.context(RangePartitioning=False):
assert cfg.RangePartitioning.get() is False
assert cfg.LazyExecution.get() == "Off"
with cfg.context(LazyExecution="On"):
assert cfg.RangePartitioning.get() is False
assert cfg.LazyExecution.get() == "On"
with cfg.context(RangePartitioning=True, LazyExecution="Off"):
assert cfg.RangePartitioning.get() is True
assert cfg.LazyExecution.get() == "Off"
assert cfg.RangePartitioning.get() is False
assert cfg.LazyExecution.get() == "On"
assert cfg.RangePartitioning.get() is False
assert cfg.LazyExecution.get() == "Off"
assert cfg.RangePartitioning.get() is True
assert cfg.LazyExecution.get() == "Off"
assert cfg.RangePartitioning.get() is False
assert cfg.LazyExecution.get() == "Auto"
Loading