Skip to content

Commit

Permalink
SqliteDosStorage: Make the migrator compatible with SQLite (aiidate…
Browse files Browse the repository at this point in the history
…am#6429)

The majority of the `SqliteDosStorage` piggy-backs off of the
`PsqlDosStorage` plugin. It also uses the `PsqlDosMigrator` as-is to
perform the database migrations. This is not safe however, as PostgreSQL
and SQLite do not have exactly the same syntax.

An example is the `main_0002` revision which was added to drop the
hashes of certain nodes. This uses the `#-` operator which is JSONB
specific syntax of PostgreSQL and is not supported by SQLite. Since this
migration was added before the `SqliteDosStorage` plugin was added, this
has never caused a problems as all profiles would be new, would not have
any nodes and therefore the SQL code of the migration would not actually
be executed.

In preparation for any future migrations that may need to be added, the
`SqliteDosStorage` now uses the `SqliteDosMigrator`. This subclasses the
`PsqlDosMigrator` as it can still use most of the functionality, but it
changes a few critical things. Most notably the location of the schema
versions which now are kept individually and are no longer lent from the
`core.psql_dos` plugin.

The initial version `main_0001_initial.py` is taken from the migration
`main_0000_initial.py` of the `core.sqlite_zip` storage plugin. The only
difference is that UUID fields are declared as `String(32)` instead of
`CHAR(32)`. The SQLAlchemy models that are automatically generated for
SQLite from the PostgreSQL-based models actually use the latter type.
See `aiida.storage.sqlite_zip.models:pg_to_sqlite`.
  • Loading branch information
sphuber authored Jul 5, 2024
1 parent f992443 commit 6196dcd
Show file tree
Hide file tree
Showing 13 changed files with 1,360 additions and 23 deletions.
8 changes: 8 additions & 0 deletions src/aiida/storage/migrations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"""Module with common resources related to storage migrations."""

TEMPLATE_INVALID_SCHEMA_VERSION = """
Database schema version `{schema_version_database}` is incompatible with the required schema version `{schema_version_code}`.
To migrate the database schema version to the current one, run the following command:
verdi -p {profile_name} storage migrate
""" # noqa: E501
8 changes: 1 addition & 7 deletions src/aiida/storage/psql_dos/migrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from aiida.common import exceptions
from aiida.manage.configuration.profile import Profile
from aiida.storage.log import MIGRATE_LOGGER
from aiida.storage.migrations import TEMPLATE_INVALID_SCHEMA_VERSION
from aiida.storage.psql_dos.models.settings import DbSetting
from aiida.storage.psql_dos.utils import create_sqlalchemy_engine

Expand All @@ -46,13 +47,6 @@
verdi -p {profile_name} storage migrate
"""

TEMPLATE_INVALID_SCHEMA_VERSION = """
Database schema version `{schema_version_database}` is incompatible with the required schema version `{schema_version_code}`.
To migrate the database schema version to the current one, run the following command:
verdi -p {profile_name} storage migrate
""" # noqa: E501

ALEMBIC_REL_PATH = 'migrations'

REPOSITORY_UUID_KEY = 'repository|uuid'
Expand Down
123 changes: 108 additions & 15 deletions src/aiida/storage/sqlite_dos/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,31 +10,36 @@

from __future__ import annotations

import pathlib
from functools import cached_property, lru_cache
from pathlib import Path
from shutil import rmtree
from typing import TYPE_CHECKING, Optional
from uuid import uuid4

from alembic.config import Config
from disk_objectstore import Container, backup_utils
from pydantic import BaseModel, Field, field_validator
from sqlalchemy import insert
from sqlalchemy import insert, inspect, select
from sqlalchemy.orm import scoped_session, sessionmaker

from aiida.common import exceptions
from aiida.common.log import AIIDA_LOGGER
from aiida.manage import Profile
from aiida.manage.configuration.profile import Profile
from aiida.manage.configuration.settings import AIIDA_CONFIG_FOLDER
from aiida.orm.implementation import BackendEntity
from aiida.storage.log import MIGRATE_LOGGER
from aiida.storage.psql_dos.models.settings import DbSetting
from aiida.storage.sqlite_zip import models, orm
from aiida.storage.sqlite_zip.migrator import get_schema_version_head
from aiida.storage.sqlite_zip.utils import create_sqla_engine

from ..migrations import TEMPLATE_INVALID_SCHEMA_VERSION
from ..psql_dos import PsqlDosBackend
from ..psql_dos.migrator import REPOSITORY_UUID_KEY, PsqlDosMigrator
from ..psql_dos.migrator import PsqlDosMigrator

if TYPE_CHECKING:
from disk_objectstore import Container

from aiida.orm.entities import EntityTypes
from aiida.repository.backend import DiskObjectStoreRepositoryBackend

Expand All @@ -45,15 +50,26 @@
FILENAME_CONTAINER = 'container'


ALEMBIC_REL_PATH = 'migrations'

REPOSITORY_UUID_KEY = 'repository|uuid'


class SqliteDosMigrator(PsqlDosMigrator):
"""Storage implementation using Sqlite database and disk-objectstore container.
"""Class for validating and migrating `sqlite_dos` storage instances.
This storage backend is not recommended for use in production. The sqlite database is not the most performant and it
does not support all the ``QueryBuilder`` functionality that is supported by the ``core.psql_dos`` storage backend.
This storage is ideally suited for use cases that want to test or demo AiiDA as it requires no server but just a
folder on the local filesystem.
.. important:: This class should only be accessed via the storage backend class (apart from for test purposes)
The class subclasses the ``PsqlDosMigrator``. It essentially changes two things in the implementation:
* Changes the path to the migration version files. This allows custom migrations to be written for SQLite-based
storage plugins, which is necessary since the PSQL-based migrations may use syntax that is not compatible.
* The logic for validating the storage is significantly simplified since the SQLite-based storage plugins do not
have to take legacy Django-based implementations into account.
"""

alembic_version_tbl_name = 'alembic_version'

def __init__(self, profile: Profile) -> None:
filepath_database = Path(profile.storage_config['filepath']) / FILENAME_DATABASE
filepath_database.touch()
Expand Down Expand Up @@ -91,6 +107,86 @@ def initialise_database(self) -> None:
context.stamp(context.script, 'main@head') # type: ignore[arg-type]
self.connection.commit()

def get_schema_version_profile(self) -> Optional[str]: # type: ignore[override]
"""Return the schema version of the backend instance for this profile.
Note, the version will be None if the database is empty or is a legacy django database.
"""
with self._migration_context() as context:
return context.get_current_revision()

@staticmethod
def _alembic_config():
"""Return an instance of an Alembic `Config`."""
dirpath = pathlib.Path(__file__).resolve().parent
config = Config()
config.set_main_option('script_location', str(dirpath / ALEMBIC_REL_PATH))
return config

def validate_storage(self) -> None:
"""Validate that the storage for this profile
1. That the database schema is at the head version, i.e. is compatible with the code API.
2. That the repository ID is equal to the UUID set in the database
:raises: :class:`aiida.common.exceptions.UnreachableStorage` if the storage cannot be connected to
:raises: :class:`aiida.common.exceptions.IncompatibleStorageSchema`
if the storage is not compatible with the code API.
:raises: :class:`aiida.common.exceptions.CorruptStorage`
if the repository ID is not equal to the UUID set in thedatabase.
"""
# check there is an alembic_version table from which to get the schema version
if not inspect(self.connection).has_table(self.alembic_version_tbl_name):
raise exceptions.IncompatibleStorageSchema('The database has no known version.')

# now we can check that the alembic version is the latest
schema_version_code = self.get_schema_version_head()
schema_version_database = self.get_schema_version_profile()
if schema_version_database != schema_version_code:
raise exceptions.IncompatibleStorageSchema(
TEMPLATE_INVALID_SCHEMA_VERSION.format(
schema_version_database=schema_version_database,
schema_version_code=schema_version_code,
profile_name=self.profile.name,
)
)

# finally, we check that the ID set within the disk-objectstore is equal to the one saved in the database,
# i.e. this container is indeed the one associated with the db
repository_uuid = self.get_repository_uuid()
stmt = select(DbSetting.val).where(DbSetting.key == REPOSITORY_UUID_KEY)
database_repository_uuid = self.connection.execute(stmt).scalar_one_or_none()
if database_repository_uuid is None:
raise exceptions.CorruptStorage('The database has no repository UUID set.')
if database_repository_uuid != repository_uuid:
raise exceptions.CorruptStorage(
f'The database has a repository UUID configured to {database_repository_uuid} '
f"but the disk-objectstore's is {repository_uuid}."
)

@property
def is_database_initialised(self) -> bool:
"""Return whether the database is initialised.
This is the case if it contains the table that holds the schema version for alembic.
:returns: ``True`` if the database is initialised, ``False`` otherwise.
"""
return inspect(self.connection).has_table(self.alembic_version_tbl_name)

def migrate(self) -> None:
"""Migrate the storage for this profile to the head version.
:raises: :class:`~aiida.common.exceptions.UnreachableStorage` if the storage cannot be accessed.
:raises: :class:`~aiida.common.exceptions.StorageMigrationError` if the storage is not initialised.
"""
if not inspect(self.connection).has_table(self.alembic_version_tbl_name):
raise exceptions.StorageMigrationError('storage is uninitialised, cannot migrate.')

MIGRATE_LOGGER.report('Migrating to the head of the main branch')
self.migrate_up('main@head')
self.connection.commit()


class SqliteDosStorage(PsqlDosBackend):
"""A lightweight storage that is easy to install.
Expand Down Expand Up @@ -178,12 +274,9 @@ def get_repository(self) -> 'DiskObjectStoreRepositoryBackend':
return DiskObjectStoreRepositoryBackend(container=self.get_container())

@classmethod
def version_head(cls) -> str:
return get_schema_version_head()

@classmethod
def version_profile(cls, profile: Profile) -> str | None:
return get_schema_version_head()
def version_profile(cls, profile: Profile) -> Optional[str]:
with cls.migrator_context(profile) as migrator:
return migrator.get_schema_version_profile()

def query(self) -> orm.SqliteQueryBuilder:
return orm.SqliteQueryBuilder(self)
Expand Down
54 changes: 54 additions & 0 deletions src/aiida/storage/sqlite_dos/migrations/env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
###########################################################################
# Copyright (c), The AiiDA team. All rights reserved. #
# This file is part of the AiiDA code. #
# #
# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
# For further information on the license, see the LICENSE.txt file #
# For further information please visit http://www.aiida.net #
###########################################################################
"""Environment configuration to be used by alembic to perform database migrations."""

from alembic import context


def run_migrations_online():
"""Run migrations in 'online' mode.
The connection should have been passed to the config, which we use to configure the migration context.
"""
from aiida.storage.sqlite_zip.models import SqliteBase

config = context.config

connection = config.attributes.get('connection', None)
aiida_profile = config.attributes.get('aiida_profile', None)
on_version_apply = config.attributes.get('on_version_apply', None)

if connection is None:
from aiida.common.exceptions import ConfigurationError

raise ConfigurationError('An initialized connection is expected for the AiiDA online migrations.')
if aiida_profile is None:
from aiida.common.exceptions import ConfigurationError

raise ConfigurationError('An aiida_profile is expected for the AiiDA online migrations.')

context.configure(
connection=connection,
target_metadata=SqliteBase.metadata,
transaction_per_migration=True,
aiida_profile=aiida_profile,
on_version_apply=on_version_apply,
)

context.run_migrations()


try:
if context.is_offline_mode():
raise NotImplementedError('This feature is not currently supported.')

run_migrations_online()
except NameError:
# This will occur in an environment that is just compiling the documentation
pass
Loading

0 comments on commit 6196dcd

Please sign in to comment.