Skip to content

Commit

Permalink
[FEATURE][KED-2698] Set base and local envs via ConfigLoader an…
Browse files Browse the repository at this point in the history
…d `settings.py` (#1171)
  • Loading branch information
jiriklein authored Jul 12, 2021
1 parent 68b45d6 commit 41c2e0a
Show file tree
Hide file tree
Showing 17 changed files with 186 additions and 132 deletions.
6 changes: 6 additions & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
* Removed `cli.py` from the Kedro project template. By default, all CLI commands, including `kedro run`, are now defined on the Kedro framework side. These can be overridden in turn by a plugin or a `cli.py` file in your project. A packaged Kedro project will respect the same hierarchy when executed with `python -m my_package`.
* Merged `pandas.AppendableExcelDataSet` into `pandas.ExcelDataSet`.
* Added `save_args` to `feather.FeatherDataSet`.
* The default `kedro` environment names can now be set in `settings.py` with the help of the `CONFIG_LOADER_ARGS` variable. The relevant keys to be supplied are `base_env` and `default_run_env`. These values are set to `base` and `local` respectively as a default.

## Breaking changes to the API
* Add namespace to parameters in a modular pipeline, which addresses [Issue 399](https://github.com/quantumblacklabs/kedro/issues/399)
Expand Down Expand Up @@ -38,6 +39,9 @@
* pandas.FeatherDataSet
* pandas.JSONDataSet
* pandas.ParquetDataSet
* The environment defaulting behaviour has been removed from `KedroContext` and is now implemented in a `ConfigLoader` class (or equivalent) with the `base_env` and `default_run_env` attributes.
* `ConfigLoader` and `TemplatedConfigLoader` argument `conf_root` has been renamed to `conf_source` to align the API.
* The `settings.py` setting `CONF_ROOT` has been renamed to `CONF_SOURCE` to align the API. Default value of `conf` remains unchanged.

## Migration guide from Kedro 0.17.* to 0.18.*
* Please remove any existing `hook_impl` of the `register_config_loader` method from `ProjectHooks` (or custom alternatives).
Expand All @@ -50,6 +54,8 @@
* Edit any scripts containing `kedro pipeline package --version` to remove the `--version` option. If you wish to set a specific pipeline package version, set the `__version__` variable in the pipeline package's `__init__.py` file.
* If you had any `pandas.AppendableExcelDataSet` entries in your catalog, replace them with `pandas.ExcelDataSet`.
* If you were using `pandas~=1.2.0` and passing `storage_options` through `load_args` or `savs_args`, please specify them under `fs_args` or via `credentials` instead.
* Update the `settings.py` setting `CONF_ROOT` to `CONF_SOURCE`.
* Update the key-word argument `conf_root` to `conf_source` when calling `ConfigLoader` or `TemplatedConfigLoader` directly.

# Upcoming Release 0.17.5

Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@
"requests.auth.AuthBase",
"google.oauth2.credentials.Credentials",
"Exception",
"CONF_ROOT",
"CONF_SOURCE",
"integer -- return number of occurrences of value",
"integer -- return first index of value.",
"kedro.extras.datasets.pandas.json_dataset.JSONDataSet",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,4 @@
# CONTEXT_CLASS = KedroContext

# Define the configuration folder. Defaults to `conf`
# CONF_ROOT = "conf"
# CONF_SOURCE = "conf"
42 changes: 26 additions & 16 deletions kedro/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,14 @@ class BadConfigException(Exception):


class ConfigLoader:
"""Recursively scan directories (config paths) contained in ``conf_root`` for
"""Recursively scan directories (config paths) contained in ``conf_source`` for
configuration files with a ``yaml``, ``yml``, ``json``, ``ini``,
``pickle``, ``xml`` or ``properties`` extension, load them,
and return them in the form of a config dictionary.
The first processed config path is the ``base`` directory inside
``conf_root``. The optional ``env`` argument can be used to specify a
subdirectory of ``conf_root`` to process as a config path after ``base``.
``conf_source``. The optional ``env`` argument can be used to specify a
subdirectory of ``conf_source`` to process as a config path after ``base``.
When the same top-level key appears in any 2 config files located in
the same (sub)directory, a ``ValueError`` is raised.
Expand All @@ -76,7 +76,7 @@ class ConfigLoader:
(sub)directories, the last processed config path takes precedence
and overrides this key.
For example, if your ``conf_root`` looks like this:
For example, if your ``conf_source`` looks like this:
::
.
Expand Down Expand Up @@ -114,27 +114,37 @@ class ConfigLoader:
"""

def __init__(
self, conf_root: str, env: str = None, extra_params: Dict[str, Any] = None
self,
conf_source: str,
env: str = None,
extra_params: Dict[str, Any] = None,
**kwargs,
):
"""Instantiates a ``ConfigLoader``.
Args:
conf_root: Path to use as root directory for loading configuration.
conf_source: Path to use as root directory for loading configuration.
env: Environment that will take precedence over base.
extra_params: Extra parameters passed to a Kedro run.
"""
self.conf_paths = _remove_duplicates(
self._build_conf_paths(Path(conf_root), env)
)
self.conf_source = conf_source
self.env = env
self.logger = logging.getLogger(__name__)
self.extra_params = extra_params

@staticmethod
def _build_conf_paths(conf_root: Path, env: str = None) -> List[str]:
"""Builds list of paths to use for configuration."""
if not env:
return [str(conf_root / "base")]
return [str(conf_root / "base"), str(conf_root / env)]
self.base_env = kwargs.get("base_env") or "base"
self.default_run_env = kwargs.get("default_run_env") or "local"

@property
def conf_paths(self):
"""Property method to return deduplicated configuration paths."""
return _remove_duplicates(self._build_conf_paths())

def _build_conf_paths(self) -> Iterable[str]:
run_env = self.env or self.default_run_env
return [
str(Path(self.conf_source) / self.base_env),
str(Path(self.conf_source) / run_env),
]

@staticmethod
def _load_config_file(config_file: Path) -> Dict[str, Any]:
Expand Down
9 changes: 5 additions & 4 deletions kedro/config/templated_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,17 +115,18 @@ class TemplatedConfigLoader(ConfigLoader):

def __init__(
self,
conf_root: str,
conf_source: str,
env: str = None,
extra_params: Dict[str, Any] = None,
*,
globals_pattern: Optional[str] = None,
globals_dict: Optional[Dict[str, Any]] = None
globals_dict: Optional[Dict[str, Any]] = None,
**kwargs,
):
"""Instantiates a ``TemplatedConfigLoader``.
Args:
conf_root: Path to use as root directory for loading configuration.
conf_source: Path to use as root directory for loading configuration.
env: Environment that will take precedence over base.
extra_params: Extra parameters passed to a Kedro run.
globals_pattern: Optional keyword-only argument specifying a glob
Expand All @@ -136,7 +137,7 @@ def __init__(
obtained from the globals_pattern. In case of duplicate keys, the
``globals_dict`` keys take precedence.
"""
super().__init__(conf_root, env, extra_params)
super().__init__(conf_source, env, extra_params, **kwargs)

self._arg_dict = super().get(globals_pattern) if globals_pattern else {}
globals_dict = deepcopy(globals_dict) or {}
Expand Down
4 changes: 2 additions & 2 deletions kedro/framework/cli/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def create_catalog(metadata: ProjectMetadata, pipeline_name, env):
the `DataCatalog`.
The catalog configuration will be saved to
`<conf_root>/<env>/catalog/<pipeline_name>.yml` file.
`<conf_source>/<env>/catalog/<pipeline_name>.yml` file.
"""
env = env or "base"
session = _create_session(metadata.package_name, env=env)
Expand Down Expand Up @@ -175,7 +175,7 @@ def create_catalog(metadata: ProjectMetadata, pipeline_name, env):
if missing_ds:
catalog_path = (
context.project_path
/ settings.CONF_ROOT
/ settings.CONF_SOURCE
/ env
/ "catalog"
/ f"{pipeline_name}.yml"
Expand Down
12 changes: 6 additions & 6 deletions kedro/framework/cli/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@ def create_pipeline(
): # pylint: disable=unused-argument
"""Create a new modular pipeline by providing a name."""
package_dir = metadata.source_dir / metadata.package_name
conf_root = settings.CONF_ROOT
project_conf_path = metadata.project_path / conf_root
conf_source = settings.CONF_SOURCE
project_conf_path = metadata.project_path / conf_source

env = env or "base"
if not skip_config and not (project_conf_path / env).exists():
Expand Down Expand Up @@ -167,8 +167,8 @@ def delete_pipeline(
): # pylint: disable=unused-argument
"""Delete a modular pipeline by providing a name."""
package_dir = metadata.source_dir / metadata.package_name
conf_root = settings.CONF_ROOT
project_conf_path = metadata.project_path / conf_root
conf_source = settings.CONF_SOURCE
project_conf_path = metadata.project_path / conf_source

env = env or "base"
if not (project_conf_path / env).exists():
Expand Down Expand Up @@ -674,8 +674,8 @@ def _get_pipeline_artifacts(
) -> PipelineArtifacts:
"""From existing project, returns in order: source_path, tests_path, config_paths"""
package_dir = project_metadata.source_dir / project_metadata.package_name
conf_root = settings.CONF_ROOT
project_conf_path = project_metadata.project_path / conf_root
conf_source = settings.CONF_SOURCE
project_conf_path = project_metadata.project_path / conf_source
artifacts = PipelineArtifacts(
package_dir / "pipelines" / pipeline_name,
package_dir.parent / "tests" / "pipelines" / pipeline_name,
Expand Down
27 changes: 14 additions & 13 deletions kedro/framework/context/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
import logging
from copy import deepcopy
from pathlib import Path, PurePosixPath, PureWindowsPath
from typing import Any, Dict, Iterable, Union
from typing import Any, Dict, Iterable, Optional, Union
from urllib.parse import urlparse
from warnings import warn

Expand Down Expand Up @@ -197,8 +197,8 @@ class KedroContext:
Kedro's main functionality.
"""

_CONF_ROOT = "conf"
"""CONF_ROOT: Name of root directory containing project configuration.
_CONF_SOURCE = "conf"
"""CONF_SOURCE: Name of root directory containing project configuration.
Default name is "conf"."""

def __init__(
Expand Down Expand Up @@ -229,32 +229,32 @@ def __init__(
self._project_path = Path(project_path).expanduser().resolve()
self._package_name = package_name

self._env = env or "local"
self._env = env
self._extra_params = deepcopy(extra_params)

@property # type: ignore
@_deprecate(version="0.18.0")
def CONF_ROOT(self) -> str: # pylint: disable=invalid-name
"""Deprecated in favour of settings.CONF_ROOT
def CONF_SOURCE(self) -> str: # pylint: disable=invalid-name
"""Deprecated in favour of settings.CONF_SOURCE
Returns:
The root directory of the configuration directory of the project.
Raises:
DeprecationWarning
"""
return self._CONF_ROOT
return self._CONF_SOURCE

@CONF_ROOT.setter # type: ignore
@CONF_SOURCE.setter # type: ignore
@_deprecate(version="0.18.0")
def CONF_ROOT(self, value: str) -> None: # pylint: disable=invalid-name
"""Deprecated in favour of settings.CONF_ROOT
def CONF_SOURCE(self, value: str) -> None: # pylint: disable=invalid-name
"""Deprecated in favour of settings.CONF_SOURCE
Raises:
DeprecationWarning
"""
self._CONF_ROOT = value # pylint: disable=invalid-name
self._CONF_SOURCE = value # pylint: disable=invalid-name

@property # type: ignore
def env(self) -> str:
def env(self) -> Optional[str]:
"""Property for the current Kedro environment.
Returns:
Expand Down Expand Up @@ -425,11 +425,12 @@ def _get_config_loader(self) -> ConfigLoader:
"""
try:
return settings.CONFIG_LOADER_CLASS(
conf_root=str(self.project_path / settings.CONF_ROOT),
conf_source=str(self.project_path / settings.CONF_SOURCE),
env=self.env,
extra_params=self._extra_params,
**settings.CONFIG_LOADER_ARGS,
)

except TypeError as exc:
raise KedroContextError(
f"Expected an instance of `ConfigLoader`, "
Expand Down
4 changes: 2 additions & 2 deletions kedro/framework/project/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class _ProjectSettings(LazySettings):
Use Dynaconf's LazySettings as base.
"""

_CONF_ROOT = Validator("CONF_ROOT", default="conf")
_CONF_SOURCE = Validator("CONF_SOURCE", default="conf")
_HOOKS = Validator("HOOKS", default=tuple())
_CONTEXT_CLASS = Validator(
"CONTEXT_CLASS",
Expand All @@ -95,7 +95,7 @@ def __init__(self, *args, **kwargs):

kwargs.update(
validators=[
self._CONF_ROOT,
self._CONF_SOURCE,
self._HOOKS,
self._CONTEXT_CLASS,
self._SESSION_STORE_CLASS,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,18 @@
# CONTEXT_CLASS = KedroContext

# Define the configuration folder. Defaults to `conf`
# CONF_ROOT = "conf"
# CONF_SOURCE = "conf"

# Select the project ConfigLoader class here.
# Defaults to kedro.config.ConfigLoader
# Define the config loader. Defaults to ConfigLoader.
# from kedro.config import TemplatedConfigLoader
# CONFIG_LOADER_CLASS = TemplatedConfigLoader

# Define keyword arguments to be passed to `CONFIG_LOADER_CLASS` constructor
# Define keyword arguments to be passed to `CONFIG_LOADER_CLASS` constructor.
# These kwargs depend on the `ConfigLoader` class implementation.
# CONFIG_LOADER_ARGS = {
# "globals_pattern": "*globals.yml",
# "base_env": "base",
# "default_run_env": "local",
# }
Loading

0 comments on commit 41c2e0a

Please sign in to comment.