Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate using project directories instead of project instances #654

Merged
merged 21 commits into from
Jan 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
6d10402
Simplify migration logic by inlining functions.
vyasr Nov 14, 2021
33de14c
Add isolated functions for handling v1 config loading during migration.
vyasr Nov 14, 2021
d2538fc
Modify migration logic to operate on directory rather than project.
vyasr Dec 5, 2021
0d3811b
Require valid config for instantiating projects.
vyasr Dec 11, 2021
3b6d685
Update main APIs and rewrite all migration logic in terms of root dir…
vyasr Dec 11, 2021
d7e0fa2
Address questions on config load order.
vyasr Dec 11, 2021
e30b0ce
Add a proper deprecation for 0->1 migration.
vyasr Dec 11, 2021
12d7cc7
Undo copyright change.
vyasr Dec 11, 2021
4ab9155
Document the behavior of config loaders and add fallback for schema v…
vyasr Dec 11, 2021
822aa46
Update changelog.
vyasr Dec 11, 2021
51b2b7a
Address all PR comments except the loaders.
vyasr Jan 3, 2022
f881494
Fix versioned config loading function to return mutable config and ma…
vyasr Jan 3, 2022
b020896
Switch config writing to generate config using versioned loader.
vyasr Jan 3, 2022
883d15c
Remove backwards compat layer for v0 to v1 migration.
vyasr Jan 3, 2022
4aa459e
Address PR comments.
vyasr Jan 4, 2022
85b7e83
Merge branch 'master' into refactor/migration_root_dir
vyasr Jan 4, 2022
d94fc34
Test implicit schema version 0.
vyasr Jan 4, 2022
a53a098
Change apply_migrations to a pure function instead of a generator and…
vyasr Jan 4, 2022
ca12869
Address some more PR reviews.
vyasr Jan 18, 2022
22ab249
Fix default schema version.
vyasr Jan 27, 2022
096daba
Merge branch 'master' into refactor/migration_root_dir
vyasr Jan 27, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions changelog.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ Added
- Official support for Python 3.10 (#631).
- Benchmarks can be run using the ``asv`` (airspeed velocity) tool (#629).

Changed
+++++++

- Schema migration is now performed on directories rather than signac projects and supports a wider range of schemas (#654).

Deprecated
++++++++++

Expand Down
15 changes: 12 additions & 3 deletions signac/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -771,13 +771,15 @@ def main_update_cache(args):
# UNCOMMENT THE FOLLOWING BLOCK WHEN THE FIRST MIGRATION IS INTRODUCED.
# def main_migrate(args):
# "Migrate the project's schema to the current schema version."
# from .contrib.migration import apply_migrations
# from .contrib.migration import apply_migrations, _get_config_schema_version
# from packaging import version
# from .version import SCHEMA_VERSION
# project = get_project(_ignore_schema_version=True)
#
# root = args.root_directory if args.root_directory else os.getcwd()
#
# schema_version = version.parse(SCHEMA_VERSION)
# config_schema_version = version.parse(project.config['schema_version'])
# config_schema_version = _get_config_schema_version(root, schema_version)
#
# if config_schema_version > schema_version:
# _print_err(
Expand All @@ -792,7 +794,7 @@ def main_update_cache(args):
# "Do you want to migrate this project's schema version from '{}' to '{}'? "
# "WARNING: THIS PROCESS IS IRREVERSIBLE!".format(
# config_schema_version, schema_version), 'no'):
# apply_migrations(project)
# apply_migrations(root)
#
#
def verify_config(cfg, preserve_errors=True):
Expand Down Expand Up @@ -2025,6 +2027,13 @@ def main():
# 'migrate',
# description="Irreversibly migrate this project's schema version to the "
# "supported version.")
# parser_migrate.add_argument(
# "-r",
# "--root-directory",
# type=str,
# default='',
# help="The path to the project.",
# )
# parser_migrate.set_defaults(func=main_migrate)

# This is a hack, as argparse itself does not
Expand Down
165 changes: 101 additions & 64 deletions signac/contrib/migration/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,102 +5,139 @@

import os
import sys
from contextlib import contextmanager

from filelock import FileLock
from packaging import version

from ...common.config import get_config
from ...version import SCHEMA_VERSION, __version__
from .v0_to_v1 import migrate_v0_to_v1
from .v0_to_v1 import _load_config_v1, _migrate_v0_to_v1

FN_MIGRATION_LOCKFILE = ".SIGNAC_PROJECT_MIGRATION_LOCK"


MIGRATIONS = {
("0", "1"): migrate_v0_to_v1,
# Config loaders must be functions with the signature
# def config_loader(root_directory: str) -> MutableMapping
# When a new schema version is introduced, a corresponding loader only needs to
# be added if the old loader will no longer function. This dictionary must
# contain all unique loaders for schema versions that are supported as starting
# points for migration. The resulting MutableMapping config objects must be
# writeable, i.e. it must be possible to persist in-memory changes from these
# objects to the underlying config files.
_CONFIG_LOADERS = {
"1": _load_config_v1,
}


def _reload_project_config(project):
project_reloaded = project.get_project(
root=project.root_directory(), search=False, _ignore_schema_version=True
)
project._config = project_reloaded._config
_MIGRATIONS = {
("0", "1"): _migrate_v0_to_v1,
}


def _update_project_config(project, **kwargs):
"""Update the project configuration."""
for fn in ("signac.rc", ".signacrc"):
config = get_config(project.fn(fn))
if "project" in config:
break
else:
raise RuntimeError("Unable to determine project configuration file.")
config.update(kwargs)
config.write()
_reload_project_config(project)
_PARSED_SCHEMA_VERSION = version.parse(SCHEMA_VERSION)

_VERSION_LIST = list(reversed(sorted(version.parse(v) for v in _CONFIG_LOADERS.keys())))

@contextmanager
def _lock_for_migration(project):
lock = FileLock(project.fn(FN_MIGRATION_LOCKFILE))
try:
with lock:
yield
finally:

def _get_config_schema_version(root_directory, version_guess):
# Try loading the schema using the loader corresponding to the expected
# version if it has a configured loader.
versions = _VERSION_LIST
if version_guess in _CONFIG_LOADERS:
versions = [version_guess] + versions
for guess in versions:
try:
os.unlink(lock.lock_file)
except FileNotFoundError:
# Note: We could consider using a different component as the key
# for _CONFIG_LOADERS, but since this is an internal detail it's
# not terribly consequential.
config = _CONFIG_LOADERS[guess.public](root_directory)
break
except Exception:
# The load failed, go to the next
pass
else:
raise RuntimeError("Unable to load config file.")
try:
return version.parse(config["schema_version"])
except KeyError:
# The default schema version is version 0.
return version.parse("0")


def _collect_migrations(project):
schema_version = version.parse(SCHEMA_VERSION)

def get_config_schema_version():
return version.parse(project._config["schema_version"])
def _collect_migrations(root_directory):
schema_version = _PARSED_SCHEMA_VERSION

if get_config_schema_version() > schema_version:
current_schema_version = _get_config_schema_version(
root_directory, _PARSED_SCHEMA_VERSION
)
if current_schema_version > schema_version:
# Project config schema version is newer and therefore not supported.
raise RuntimeError(
"The signac schema version used by this project is {}, but signac {} "
"only supports up to schema version {}. Try updating signac.".format(
get_config_schema_version(), __version__, SCHEMA_VERSION
)
"The signac schema version used by this project is "
f"{current_schema_version}, but signac {__version__} only "
f"supports up to schema version {SCHEMA_VERSION}. Try updating "
"signac."
)

while get_config_schema_version() < schema_version:
for (origin, destination), migration in MIGRATIONS.items():
if version.parse(origin) == get_config_schema_version():
guess = current_schema_version
while _get_config_schema_version(root_directory, guess) < schema_version:
for (origin, destination), migration in _MIGRATIONS.items():
if version.parse(origin) == _get_config_schema_version(
root_directory, guess
):
yield (origin, destination), migration
guess = version.parse(destination)
break
else:
raise RuntimeError(
"The signac schema version used by this project is {}, but signac {} "
"uses schema version {} and does not know how to migrate.".format(
get_config_schema_version(), __version__, schema_version
)
"The signac schema version used by this project is "
f"{_get_config_schema_version(root_directory, guess)}, but "
f"signac {__version__} uses schema version {schema_version} "
"and does not know how to migrate."
)


def apply_migrations(project):
"""Apply migrations to a project."""
with _lock_for_migration(project):
for (origin, destination), migrate in _collect_migrations(project):
try:
print(
f"Applying migration for version {origin} to {destination}... ",
end="",
file=sys.stderr,
)
migrate(project)
except Exception as e:
raise RuntimeError(f"Failed to apply migration {destination}.") from e
else:
_update_project_config(project, schema_version=destination)
print("OK", file=sys.stderr)
yield origin, destination
def apply_migrations(root_directory):
"""Apply migrations to a project.

This function identifies and performs all the necessary schema migrations
to bring a project up to date with the current schema version of signac.
The calling code does not require prior knowledge of the schema version of
the project, and the function is idempotent when applied to projects that
already have an up-to-date schema.

Parameters
----------
root_directory : str
The path to the project to migrate.
"""
try:
lock = FileLock(os.path.join(root_directory, FN_MIGRATION_LOCKFILE))
with lock:
for (origin, destination), migrate in _collect_migrations(root_directory):
try:
print(
f"Applying migration for version {origin} to {destination}... ",
end="",
file=sys.stderr,
)
migrate(root_directory)
except Exception as e:
raise RuntimeError(
f"Failed to apply migration {destination}."
) from e
else:
config = _CONFIG_LOADERS[version.parse(destination).public](
root_directory
)
config["schema_version"] = destination
config.write()

print("OK", file=sys.stderr)
finally:
try:
os.unlink(lock.lock_file)
except FileNotFoundError:
pass


__all__ = [
Expand Down
26 changes: 24 additions & 2 deletions signac/contrib/migration/v0_to_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,30 @@
This migration is a null-migration that serves as a template
for future migrations and testing purposes.
"""
import os

from signac.common import configobj

def migrate_v0_to_v1(project):
# A minimal v1 config.
_cfg = """
schema_version = string(default='0')
project = string()
workspace_dir = string(default='workspace')
"""


def _load_config_v1(root_directory):
cfg = configobj.ConfigObj(
os.path.join(root_directory, "signac.rc"), configspec=_cfg.split("\n")
)
validator = configobj.validate.Validator()
if cfg.validate(validator) is not True:
raise RuntimeError(
"This project's config file is not compatible with signac's v1 schema."
)
return cfg


def _migrate_v0_to_v1(root_directory):
"""Migrate from schema version 0 to version 1."""
pass # nothing to do here, serves purely as an example
pass
9 changes: 3 additions & 6 deletions signac/contrib/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def __setitem__(self, key, value):
"Modifying the project configuration after project "
"initialization is deprecated as of version 1.3 and "
"will be removed in version 2.0.",
DeprecationWarning,
FutureWarning,
)

assert version.parse(__version__) < version.parse("2.0")
Expand Down Expand Up @@ -230,8 +230,6 @@ class Project:
The project configuration to use. By default, it loads the first signac
project configuration found while searching upward from the current
working directory (Default value = None).
_ignore_schema_version : bool
(Default value = False).

"""

Expand All @@ -253,7 +251,7 @@ class Project:

_use_pandas_for_html_repr = True # toggle use of pandas for html repr

def __init__(self, config=None, _ignore_schema_version=False):
def __init__(self, config=None):
if config is None:
config = load_config()
self._config = _ProjectConfig(
Expand All @@ -276,8 +274,7 @@ def __init__(self, config=None, _ignore_schema_version=False):
)

# Ensure that the project's data schema is supported.
if not _ignore_schema_version:
self._check_schema_compatibility()
self._check_schema_compatibility()

# Prepare project document
self._document = None
Expand Down
41 changes: 31 additions & 10 deletions tests/test_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -2478,6 +2478,9 @@ def test_get_job_symlink_other_project(self):

class TestProjectSchema(TestProjectBase):
def test_project_schema_versions(self):
from signac.contrib.migration import apply_migrations

# Ensure that project initialization fails on an unsupported version.
impossibly_high_schema_version = "9999"
assert version.parse(self.project.config["schema_version"]) < version.parse(
impossibly_high_schema_version
Expand All @@ -2490,19 +2493,37 @@ def test_project_schema_versions(self):
name=str(self.project), root=self.project.root_directory()
)

def test_project_schema_version_migration(self):
# Ensure that migration fails on an unsupported version.
with pytest.raises(RuntimeError):
apply_migrations(self.project.root_directory())

# Ensure that migration fails on an invalid version.
invalid_schema_version = "0.5"
config = get_config(self.project.fn("signac.rc"))
config["schema_version"] = invalid_schema_version
config.write()
with pytest.raises(RuntimeError):
apply_migrations(self.project.root_directory())

@pytest.mark.parametrize("implicit_version", [True, False])
def test_project_schema_version_migration(self, implicit_version):
from signac.contrib.migration import apply_migrations

apply_migrations(self.project)
self.project._config["schema_version"] = "0"
assert self.project._config["schema_version"] == "0"
config = get_config(self.project.fn("signac.rc"))
if implicit_version:
del config["schema_version"]
assert "schema_version" not in config
else:
config["schema_version"] = "0"
assert config["schema_version"] == "0"
config.write()
err = io.StringIO()
with redirect_stderr(err):
for origin, destination in apply_migrations(self.project):
assert self.project._config["schema_version"] == destination
project = signac.get_project(root=self.project.root_directory())
assert project._config["schema_version"] == destination
assert self.project._config["schema_version"] == "1"
apply_migrations(self.project.root_directory())
config = get_config(self.project.fn("signac.rc"))
assert config["schema_version"] == "1"
project = signac.get_project(root=self.project.root_directory())
assert project.config["schema_version"] == "1"
assert "OK" in err.getvalue()
assert "0 to 1" in err.getvalue()

Expand All @@ -2516,7 +2537,7 @@ def test_no_migration(self):
# 2. Either update or remove this unit test.
from signac.contrib.migration import _collect_migrations

migrations = list(_collect_migrations(self.project))
migrations = list(_collect_migrations(self.project.root_directory()))
assert len(migrations) == 0


Expand Down