Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FEAT-#7141: Add an ability to use config variables with a context manager #7142

Merged
merged 9 commits into from
Apr 4, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions docs/flow/modin/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,21 @@ API.
# Changing value of `NPartitions`
modin.config.NPartitions.put(16)
print(modin.config.NPartitions.get()) # prints '16'

One can also use config variables with a context manager in order to use
some config only for a certain part of the code:

.. code-block:: python

import modin.config as cfg

# Default value for this config is 'False'
print(cfg.RangePartitioning.get()) # False

# Set the config to 'True' inside of the context-manager
with cfg.RangePartitioning(True):
dchigarev marked this conversation as resolved.
Show resolved Hide resolved
print(cfg.RangePartitioning.get()) # True
df.merge(...) # will use range-partitioning impl

# Once the context is over, the config gets back to its previous value
print(cfg.RangePartitioning.get()) # False
84 changes: 42 additions & 42 deletions modin/config/envvars.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
)


class EnvironmentVariable(Parameter, type=str, abstract=True):
class EnvironmentVariable(Parameter, type=str, abstract=True): # noqa: PR01
dchigarev marked this conversation as resolved.
Show resolved Hide resolved
"""Base class for environment variables-based configuration."""

varname: Optional[str] = None
Expand Down Expand Up @@ -80,7 +80,7 @@ class EnvWithSibilings(
# 'type' is a mandatory parameter for '__init_subclasses__', so we have to pass something here,
# this doesn't force child classes to have 'str' type though, they actually can be any type
type=str,
):
): # noqa: PR01
"""Ensure values synchronization between sibling parameters."""

_update_sibling = True
Expand Down Expand Up @@ -165,13 +165,13 @@ def put(cls, value: Any) -> None:
cls._update_sibling = True


class IsDebug(EnvironmentVariable, type=bool):
class IsDebug(EnvironmentVariable, type=bool): # noqa: PR01
"""Force Modin engine to be "Python" unless specified by $MODIN_ENGINE."""

varname = "MODIN_DEBUG"


class Engine(EnvironmentVariable, type=str):
class Engine(EnvironmentVariable, type=str): # noqa: PR01
"""Distribution engine to run queries by."""

varname = "MODIN_ENGINE"
Expand Down Expand Up @@ -262,40 +262,40 @@ def add_option(cls, choice: Any) -> Any:
return choice


class StorageFormat(EnvironmentVariable, type=str):
class StorageFormat(EnvironmentVariable, type=str): # noqa: PR01
"""Engine to run on a single node of distribution."""

varname = "MODIN_STORAGE_FORMAT"
default = "Pandas"
choices = ("Pandas", "Hdk", "Cudf")


class IsExperimental(EnvironmentVariable, type=bool):
class IsExperimental(EnvironmentVariable, type=bool): # noqa: PR01
"""Whether to Turn on experimental features."""

varname = "MODIN_EXPERIMENTAL"


class IsRayCluster(EnvironmentVariable, type=bool):
class IsRayCluster(EnvironmentVariable, type=bool): # noqa: PR01
"""Whether Modin is running on pre-initialized Ray cluster."""

varname = "MODIN_RAY_CLUSTER"


class RayRedisAddress(EnvironmentVariable, type=ExactStr):
class RayRedisAddress(EnvironmentVariable, type=ExactStr): # noqa: PR01
"""Redis address to connect to when running in Ray cluster."""

varname = "MODIN_REDIS_ADDRESS"


class RayRedisPassword(EnvironmentVariable, type=ExactStr):
class RayRedisPassword(EnvironmentVariable, type=ExactStr): # noqa: PR01
"""What password to use for connecting to Redis."""

varname = "MODIN_REDIS_PASSWORD"
default = secrets.token_hex(32)


class CpuCount(EnvironmentVariable, type=int):
class CpuCount(EnvironmentVariable, type=int): # noqa: PR01
"""How many CPU cores to use during initialization of the Modin engine."""

varname = "MODIN_CPUS"
Expand All @@ -314,13 +314,13 @@ def _get_default(cls) -> int:
return multiprocessing.cpu_count()


class GpuCount(EnvironmentVariable, type=int):
class GpuCount(EnvironmentVariable, type=int): # noqa: PR01
"""How may GPU devices to utilize across the whole distribution."""

varname = "MODIN_GPUS"


class Memory(EnvironmentVariable, type=int):
class Memory(EnvironmentVariable, type=int): # noqa: PR01
"""
How much memory (in bytes) give to an execution engine.

Expand All @@ -333,7 +333,7 @@ class Memory(EnvironmentVariable, type=int):
varname = "MODIN_MEMORY"


class NPartitions(EnvironmentVariable, type=int):
class NPartitions(EnvironmentVariable, type=int): # noqa: PR01
"""How many partitions to use for a Modin DataFrame (along each axis)."""

varname = "MODIN_NPARTITIONS"
Expand Down Expand Up @@ -371,27 +371,27 @@ def _get_default(cls) -> int:
return CpuCount.get()


class HdkFragmentSize(EnvironmentVariable, type=int):
class HdkFragmentSize(EnvironmentVariable, type=int): # noqa: PR01
"""How big a fragment in HDK should be when creating a table (in rows)."""

varname = "MODIN_HDK_FRAGMENT_SIZE"


class DoUseCalcite(EnvironmentVariable, type=bool):
class DoUseCalcite(EnvironmentVariable, type=bool): # noqa: PR01
"""Whether to use Calcite for HDK queries execution."""

varname = "MODIN_USE_CALCITE"
default = True


class TestDatasetSize(EnvironmentVariable, type=str):
class TestDatasetSize(EnvironmentVariable, type=str): # noqa: PR01
"""Dataset size for running some tests."""

varname = "MODIN_TEST_DATASET_SIZE"
choices = ("Small", "Normal", "Big")


class TrackFileLeaks(EnvironmentVariable, type=bool):
class TrackFileLeaks(EnvironmentVariable, type=bool): # noqa: PR01
"""Whether to track for open file handles leakage during testing."""

varname = "MODIN_TEST_TRACK_FILE_LEAKS"
Expand All @@ -401,7 +401,7 @@ class TrackFileLeaks(EnvironmentVariable, type=bool):
default = sys.platform != "win32"


class AsvImplementation(EnvironmentVariable, type=ExactStr):
class AsvImplementation(EnvironmentVariable, type=ExactStr): # noqa: PR01
"""Allows to select a library that we will use for testing performance."""

varname = "MODIN_ASV_USE_IMPL"
Expand All @@ -410,14 +410,14 @@ class AsvImplementation(EnvironmentVariable, type=ExactStr):
default = "modin"


class AsvDataSizeConfig(EnvironmentVariable, type=ExactStr):
class AsvDataSizeConfig(EnvironmentVariable, type=ExactStr): # noqa: PR01
"""Allows to override default size of data (shapes)."""

varname = "MODIN_ASV_DATASIZE_CONFIG"
default = None


class ProgressBar(EnvironmentVariable, type=bool):
class ProgressBar(EnvironmentVariable, type=bool): # noqa: PR01
"""Whether or not to show the progress bar."""

varname = "MODIN_PROGRESS_BAR"
Expand Down Expand Up @@ -448,7 +448,7 @@ def put(cls, value: bool) -> None:
super().put(value)


class BenchmarkMode(EnvironmentVariable, type=bool):
class BenchmarkMode(EnvironmentVariable, type=bool): # noqa: PR01
"""Whether or not to perform computations synchronously."""

varname = "MODIN_BENCHMARK_MODE"
Expand All @@ -469,7 +469,7 @@ def put(cls, value: bool) -> None:
super().put(value)


class LogMode(EnvironmentVariable, type=ExactStr):
class LogMode(EnvironmentVariable, type=ExactStr): # noqa: PR01
"""Set ``LogMode`` value if users want to opt-in."""

varname = "MODIN_LOG_MODE"
Expand All @@ -496,7 +496,7 @@ def enable_api_only(cls) -> None:
cls.put("enable_api_only")


class LogMemoryInterval(EnvironmentVariable, type=int):
class LogMemoryInterval(EnvironmentVariable, type=int): # noqa: PR01
"""Interval (in seconds) to profile memory utilization for logging."""

varname = "MODIN_LOG_MEMORY_INTERVAL"
Expand Down Expand Up @@ -530,7 +530,7 @@ def get(cls) -> int:
return log_memory_interval


class LogFileSize(EnvironmentVariable, type=int):
class LogFileSize(EnvironmentVariable, type=int): # noqa: PR01
"""Max size of logs (in MBs) to store per Modin job."""

varname = "MODIN_LOG_FILE_SIZE"
Expand Down Expand Up @@ -564,7 +564,7 @@ def get(cls) -> int:
return log_file_size


class PersistentPickle(EnvironmentVariable, type=bool):
class PersistentPickle(EnvironmentVariable, type=bool): # noqa: PR01
"""Whether serialization should be persistent."""

varname = "MODIN_PERSISTENT_PICKLE"
Expand All @@ -575,7 +575,7 @@ class PersistentPickle(EnvironmentVariable, type=bool):
default = False


class HdkLaunchParameters(EnvironmentVariable, type=dict):
class HdkLaunchParameters(EnvironmentVariable, type=dict): # noqa: PR01
"""
Additional command line options for the HDK engine.

Expand Down Expand Up @@ -638,7 +638,7 @@ def _get_default(cls) -> Any:
return default


class MinPartitionSize(EnvironmentVariable, type=int):
class MinPartitionSize(EnvironmentVariable, type=int): # noqa: PR01
"""
Minimum number of rows/columns in a single pandas partition split.

Expand Down Expand Up @@ -677,28 +677,28 @@ def get(cls) -> int:
return min_partition_size


class TestReadFromSqlServer(EnvironmentVariable, type=bool):
class TestReadFromSqlServer(EnvironmentVariable, type=bool): # noqa: PR01
"""Set to true to test reading from SQL server."""

varname = "MODIN_TEST_READ_FROM_SQL_SERVER"
default = False


class TestReadFromPostgres(EnvironmentVariable, type=bool):
class TestReadFromPostgres(EnvironmentVariable, type=bool): # noqa: PR01
"""Set to true to test reading from Postgres."""

varname = "MODIN_TEST_READ_FROM_POSTGRES"
default = False


class GithubCI(EnvironmentVariable, type=bool):
class GithubCI(EnvironmentVariable, type=bool): # noqa: PR01
"""Set to true when running Modin in GitHub CI."""

varname = "MODIN_GITHUB_CI"
default = False


class ModinNumpy(EnvWithSibilings, type=bool):
class ModinNumpy(EnvWithSibilings, type=bool): # noqa: PR01
"""Set to true to use Modin's implementation of NumPy API."""

varname = "MODIN_NUMPY"
Expand All @@ -710,7 +710,7 @@ def _sibling(cls) -> type[EnvWithSibilings]:
return ExperimentalNumPyAPI


class ExperimentalNumPyAPI(EnvWithSibilings, type=bool):
class ExperimentalNumPyAPI(EnvWithSibilings, type=bool): # noqa: PR01
"""
Set to true to use Modin's implementation of NumPy API.

Expand All @@ -733,7 +733,7 @@ def _sibling(cls) -> type[EnvWithSibilings]:
)


class RangePartitioningGroupby(EnvWithSibilings, type=bool):
class RangePartitioningGroupby(EnvWithSibilings, type=bool): # noqa: PR01
"""
Set to true to use Modin's range-partitioning group by implementation.

Expand All @@ -752,7 +752,7 @@ def _sibling(cls) -> type[EnvWithSibilings]:
return ExperimentalGroupbyImpl


class ExperimentalGroupbyImpl(EnvWithSibilings, type=bool):
class ExperimentalGroupbyImpl(EnvWithSibilings, type=bool): # noqa: PR01
"""
Set to true to use Modin's range-partitioning group by implementation.

Expand All @@ -775,7 +775,7 @@ def _sibling(cls) -> type[EnvWithSibilings]:
)


class RangePartitioning(EnvironmentVariable, type=bool):
class RangePartitioning(EnvironmentVariable, type=bool): # noqa: PR01
"""
Set to true to use Modin's range-partitioning implementation where possible.

Expand All @@ -787,21 +787,21 @@ class RangePartitioning(EnvironmentVariable, type=bool):
default = False


class CIAWSSecretAccessKey(EnvironmentVariable, type=str):
class CIAWSSecretAccessKey(EnvironmentVariable, type=str): # noqa: PR01
"""Set to AWS_SECRET_ACCESS_KEY when running mock S3 tests for Modin in GitHub CI."""

varname = "AWS_SECRET_ACCESS_KEY"
default = "foobar_secret"


class CIAWSAccessKeyID(EnvironmentVariable, type=str):
class CIAWSAccessKeyID(EnvironmentVariable, type=str): # noqa: PR01
"""Set to AWS_ACCESS_KEY_ID when running mock S3 tests for Modin in GitHub CI."""

varname = "AWS_ACCESS_KEY_ID"
default = "foobar_key"


class AsyncReadMode(EnvironmentVariable, type=bool):
class AsyncReadMode(EnvironmentVariable, type=bool): # noqa: PR01
"""
It does not wait for the end of reading information from the source.

Expand All @@ -825,15 +825,15 @@ class AsyncReadMode(EnvironmentVariable, type=bool):
default = False


class ReadSqlEngine(EnvironmentVariable, type=str):
class ReadSqlEngine(EnvironmentVariable, type=str): # noqa: PR01
"""Engine to run `read_sql`."""

varname = "MODIN_READ_SQL_ENGINE"
default = "Pandas"
choices = ("Pandas", "Connectorx")


class LazyExecution(EnvironmentVariable, type=str):
class LazyExecution(EnvironmentVariable, type=str): # noqa: PR01
"""
Lazy execution mode.

Expand All @@ -848,7 +848,7 @@ class LazyExecution(EnvironmentVariable, type=str):
default = "Auto"


class DocModule(EnvironmentVariable, type=ExactStr):
class DocModule(EnvironmentVariable, type=ExactStr): # noqa: PR01
"""
The module to use that will be used for docstrings.

Expand Down Expand Up @@ -890,7 +890,7 @@ def put(cls, value: str) -> None:
importlib.reload(pd)


class DaskThreadsPerWorker(EnvironmentVariable, type=int):
class DaskThreadsPerWorker(EnvironmentVariable, type=int): # noqa: PR01
"""Number of threads per Dask worker."""

varname = "MODIN_DASK_THREADS_PER_WORKER"
Expand Down
Loading
Loading