openscm · znicholls · May 10, 2025 · May 9, 2025 · May 9, 2025 · May 10, 2025
diff --git a/changelog/19.breaking.md b/changelog/19.breaking.md
@@ -0,0 +1,3 @@
+- Required `db_dir` to be passed when initialising [pandas_openscm.db.reader.OpenSCMDBReader][]. This is required to support portable databases
+- Renamed `out_column_type` to `out_columns_type` in [pandas_openscm.io.load_timeseries_csv][] for consistency with the rest of the API
+- Bumped the minimum supported version of [filelock](https://py-filelock.readthedocs.io/) to 3.12.3, as only this version handles automatic creation of directories for the lock
diff --git a/changelog/19.feature.md b/changelog/19.feature.md
@@ -0,0 +1,3 @@
+- Made the database portable by only storing relative paths in the file map. This allows the database to be converted to an archive with [pandas_openscm.db.OpenSCMDB.to_gzipped_tar_archive][] and then unpacked elsewhere with [pandas_openscm.db.OpenSCMDB.from_gzipped_tar_archive][]
+- Added [pandas_openscm.db.path_handling][] to clarify how we handle paths internally to support portability
+- Added support for specifying the name of the output columns via [pandas_openscm.db.OpenSCMDB.load][], [pandas_openscm.db.reader.OpenSCMDBReader.load][] and [pandas_openscm.io.load_timeseries_csv][]
diff --git a/changelog/19.improvement.md b/changelog/19.improvement.md
@@ -0,0 +1,2 @@
+- Added the explicit [pandas_openscm.db.backends][] module to handle the backends we support more clearly
+- Added [pandas_openscm.db.backends.DataBackendOptions.guess_backend][] and [pandas_openscm.db.backends.IndexBackendOptions.guess_backend][] to allow for move convenient inference of the backend to use with different files
diff --git a/changelog/19.trivial.md b/changelog/19.trivial.md
@@ -0,0 +1 @@
+Moved DATA_BACKENDS and INDEX_BACKENDS to [pandas_openscm.db.backends][], out of the top level [pandas_openscm.db][] module
diff --git a/docs/how-to-guides/how-to-use-openscmdb.py b/docs/how-to-guides/how-to-use-openscmdb.py
@@ -26,6 +26,7 @@
 import concurrent.futures
 import contextlib
 import itertools
+import tarfile
 import tempfile
 import traceback
 from functools import partial
@@ -200,6 +201,82 @@
 # %% [markdown]
 # ## Advanced topics
 
+# %% [markdown]
+# ### Sharing the database
+#
+# If you need to share a database,
+# you can zip it and pass it to someone else.
+
+# %% [markdown]
+# We start by putting some data in a database.
+
+# %%
+top_level_dir = Path(tempfile.mkdtemp())
+
+# %%
+db_start = OpenSCMDB(
+    db_dir=top_level_dir / "start",
+    backend_data=DATA_BACKENDS.get_instance("csv"),
+    backend_index=INDEX_BACKENDS.get_instance("csv"),
+)
+db_start.save(df_timeseries_like)
+
+# %% [markdown]
+# Then we create a gzipped tar archive of our database.
+
+# %%
+gzipped = top_level_dir / "db_archive.tar.gz"
+db_start.to_gzipped_tar_archive(gzipped)
+
+# %% [markdown]
+# To demonstrate that this does not rely on the original data,
+# we delete the original database.
+
+# %%
+db_start.delete()
+
+# %% [markdown]
+# We can inspect the tar file's contents.
+
+# %%
+with tarfile.open(gzipped) as tar:
+    print(f"{tar.getmembers()=}")
+
+# %% [markdown]
+# A new database can be initialised from the gzipped tar archive.
+
+# %%
+db_moved = OpenSCMDB.from_gzipped_tar_archive(
+    gzipped,
+    db_dir=top_level_dir / "moved",
+)
+db_moved
+
+# %% [markdown]
+# As above, we remove the archive
+# to demonstrate that there is no reliance on it
+# for the following operations.
+
+# %%
+gzipped.unlink()
+
+# %% [markdown]
+# You can then use this database like normal,
+# but now from the new location
+# (whether on your machine or someone else's).
+
+# %%
+db_moved.load()
+
+# %%
+db_moved.load(pix.isin(unit="J"))
+
+# %% [markdown]
+# We clean up the files before moving onto the next demonstration.
+
+# %%
+db_moved.delete()
+
 # %% [markdown]
 # ### Grouping data
 #

diff --git a/pyproject.toml b/pyproject.toml
@@ -41,7 +41,7 @@ Issues = "https://github.com/openscm/pandas-openscm/issues"
 
 [project.optional-dependencies]
 db = [
-    "filelock>=3.0.0",
+    "filelock>=3.12.3",
 ]
 db-full = [
     "netcdf4>=1.7.2",

diff --git a/src/pandas_openscm/db/__init__.py b/src/pandas_openscm/db/__init__.py
@@ -4,134 +4,14 @@
 
 from __future__ import annotations
 
-from attrs import frozen
-
+from pandas_openscm.db.backends import DATA_BACKENDS, INDEX_BACKENDS
 from pandas_openscm.db.csv import CSVDataBackend, CSVIndexBackend
 from pandas_openscm.db.feather import FeatherDataBackend, FeatherIndexBackend
 from pandas_openscm.db.in_memory import InMemoryDataBackend, InMemoryIndexBackend
 from pandas_openscm.db.interfaces import OpenSCMDBDataBackend, OpenSCMDBIndexBackend
 from pandas_openscm.db.netcdf import netCDFDataBackend, netCDFIndexBackend
 from pandas_openscm.db.openscm_db import AlreadyInDBError, EmptyDBError, OpenSCMDB
 
-
-@frozen
-class DataBackendOptions:
-    """A collection of data back-end options"""
-
-    options: tuple[  # type hint doesn't work properly, but ok
-        tuple[str, type[OpenSCMDBDataBackend]], ...
-    ]
-    """
-    Options
-
-    The first element of each option is the option's short name.
-    The second element is the class that matches that option.
-    """
-
-    def get_instance(self, option: str) -> OpenSCMDBDataBackend:
-        """
-        Get an instance of one of the options
-
-        Parameters
-        ----------
-        option
-            Option for which to get a data back-end instance
-
-        Returns
-        -------
-        :
-            Initialised instance
-
-        Raises
-        ------
-        KeyError
-            The option is not supported
-        """
-        for short_name, option_cls in self.options:
-            if short_name == option:
-                return option_cls()
-
-        msg = (
-            f"{option=} is not supported. "
-            f"Available options: {tuple(v[1] for v in self.options)}"
-        )
-        raise KeyError(msg)
-
-
-DATA_BACKENDS = DataBackendOptions(
-    (  # type: ignore # using class with protocol doesn't work properly
-        ("csv", CSVDataBackend),
-        ("feather", FeatherDataBackend),
-        ("in_memory", InMemoryDataBackend),
-        ("netCDF", netCDFDataBackend),
-        # Other options to consider:
-        #
-        # - pretty netCDF, where we try and save the data with dimensions where possible
-        #
-        # - HDF5: https://pandas.pydata.org/docs/user_guide/io.html#hdf5-pytables
-        # - sqllite
-    )
-)
-"""Inbuilt data back-ends"""
-
-
-@frozen
-class IndexBackendOptions:
-    """A collection of index back-end options"""
-
-    options: tuple[tuple[str, type[OpenSCMDBIndexBackend]], ...]
-    """
-    Options
-
-    The first element of each option is the option's short name.
-    The second element is the class that matches that option.
-    """
-
-    def get_instance(self, option: str) -> OpenSCMDBIndexBackend:
-        """
-        Get an instance of one of the options
-
-        Parameters
-        ----------
-        option
-            Option for which to get a index back-end instance
-
-        Returns
-        -------
-        :
-            Initialised instance
-
-        Raises
-        ------
-        KeyError
-            The option is not supported
-        """
-        for short_name, option_cls in self.options:
-            if short_name == option:
-                return option_cls()
-
-        msg = (
-            f"{option=} is not supported. "
-            f"Available options: {tuple(v[1] for v in self.options)}"
-        )
-        raise KeyError(msg)
-
-
-INDEX_BACKENDS = IndexBackendOptions(
-    (  # type: ignore # using class with protocol doesn't work properly
-        ("csv", CSVIndexBackend),
-        ("feather", FeatherIndexBackend),
-        ("in_memory", InMemoryIndexBackend),
-        ("netCDF", netCDFIndexBackend),
-        # Other options to consider:
-        #
-        # - HDF5: https://pandas.pydata.org/docs/user_guide/io.html#hdf5-pytables
-        # - sqllite
-    )
-)
-"""Inbuilt index back-ends"""
-
-
 __all__ = [
     "DATA_BACKENDS",
     "INDEX_BACKENDS",
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		- Added the explicit [pandas_openscm.db.backends][] module to handle the backends we support more clearly
		- Added [pandas_openscm.db.backends.DataBackendOptions.guess_backend][] and [pandas_openscm.db.backends.IndexBackendOptions.guess_backend][] to allow for move convenient inference of the backend to use with different files
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Moved DATA_BACKENDS and INDEX_BACKENDS to [pandas_openscm.db.backends][], out of the top level [pandas_openscm.db][] module