AdaptiveMotorControlLab
diff --git a/‎cebra/data/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎cebra/data/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎cebra/data/base.py‎
Lines changed: 2 additions & 1 deletion b/‎cebra/data/base.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎cebra/data/datasets.py‎
Lines changed: 95 additions & 1 deletion b/‎cebra/data/datasets.py‎
Lines changed: 95 additions & 1 deletion
@@ -51,3 +51,4 @@
 from cebra.data.multiobjective import *
 from cebra.data.datasets import *
 from cebra.data.helper import *
+from cebra.data.masking import *
@@ -27,6 +27,7 @@
 import torch
 
 import cebra.data.assets as cebra_data_assets
+import cebra.data.masking as cebra_data_masking
 import cebra.distributions
 import cebra.io
 from cebra.data.datatypes import Batch
@@ -36,7 +37,7 @@
 __all__ = ["Dataset", "Loader"]
 
 
-class Dataset(abc.ABC, cebra.io.HasDevice):
+class Dataset(abc.ABC, cebra.io.HasDevice, cebra_data_masking.MaskedMixin):
     """Abstract base class for implementing a dataset.
 
     The class attributes provide information about the shape of the data when
 
@@ -28,7 +28,9 @@
 import numpy.typing as npt
 import torch
 
+import cebra
 import cebra.data as cebra_data
+import cebra.data.masking as cebra_data_masking
 import cebra.helper as cebra_helper
 import cebra.io as cebra_io
 from cebra.data.datatypes import Batch
@@ -304,7 +306,7 @@ def _iter_property(self, attr):
 
 
 # TODO(stes): This should be a single session dataset?
-class DatasetxCEBRA(cebra_io.HasDevice):
+class DatasetxCEBRA(cebra_io.HasDevice, cebra_data_masking.MaskedMixin):
     """Dataset class for xCEBRA models.
 
     This class handles neural data and associated labels for xCEBRA models, providing
@@ -435,3 +437,95 @@ def load_batch_contrastive(self, index: BatchIndex) -> Batch:
             positive=[self[idx] for idx in index.positive],
             negative=self[index.negative],
         )
+
+
+class UnifiedDataset(DatasetCollection):
+    """Multi session dataset made up of a list of datasets, considered as a unique session.
+
+    Considering the sessions as a unique session, or pseudo-session, is used to later train a single
+    model for all the sessions, even if they originally contain a variable number of neurons.
+    To do that, we sample ref/pos/neg for each session and concatenate them along the neurons axis.
+
+    For instance, for a batch size ``batch_size``, we sample ``(batch_size, num_neurons(session), offset)`` for
+    each type of samples (ref/pos/neg) and then concatenate so that the final :py:class:`cebra.data.datatypes.Batch`
+    is of shape ``(batch_size, total_num_neurons, offset)``, with ``total_num_neurons`` is  the sum of all the
+    ``num_neurons(session)``.
+    """
+
+    def __init__(self, *datasets: cebra_data.SingleSessionDataset):
+        super().__init__(*datasets)
+
+    @property
+    def input_dimension(self) -> int:
+        """Returns the sum of the input dimension for each session."""
+        return np.sum([
+            self.get_input_dimension(session_id)
+            for session_id in range(self.num_sessions)
+        ])
+
+    def _get_batches(self, index):
+        """Return the data at the specified index location."""
+        return [
+            cebra_data.Batch(
+                reference=self.get_session(session_id)[
+                    index.reference[session_id]],
+                positive=self.get_session(session_id)[
+                    index.positive[session_id]],
+                negative=self.get_session(session_id)[
+                    index.negative[session_id]],
+            ) for session_id in range(self.num_sessions)
+        ]
+
+    def configure_for(self, model: "cebra.models.Model"):
+        """Configure the dataset offset for the provided model.
+
+        Call this function before indexing the dataset. This sets the
+        :py:attr:`~.Dataset.offset` attribute of the dataset.
+
+        Args:
+            model: The model to configure the dataset for.
+        """
+        for i, session in enumerate(self.iter_sessions()):
+            session.configure_for(model)
+
+    def load_batch(self, index: BatchIndex) -> Batch:
+        """Return the data at the specified index location.
+
+        Concatenate batches for each sessions on the number of neurons axis.
+
+        Args:
+            batches: List of :py:class:`cebra.data.datatypes.Batch` sampled for each session. An instance
+                :py:class:`cebra.data.datatypes.Batch` of the list is of shape ``(batch_size, num_neurons(session), offset)``.
+
+        Returns:
+            A :py:class:`cebra.data.datatypes.Batch`, of shape ``(batch_size, total_num_neurons, offset)``, where
+            ``total_num_neurons`` is  the sum of all the ``num_neurons(session)``
+        """
+        batches = self._get_batches(index)
+
+        if hasattr(self, "apply_mask"):
+            # If the dataset has a mask, apply it to the data.
+            batch = cebra_data.Batch(
+                reference=self.apply_mask(
+                    torch.cat([batch.reference for batch in batches], dim=1)),
+                positive=self.apply_mask(
+                    torch.cat([batch.positive for batch in batches], dim=1)),
+                negative=self.apply_mask(
+                    torch.cat([batch.negative for batch in batches], dim=1)),
+            )
+        else:
+            batch = cebra_data.Batch(
+                reference=torch.cat([batch.reference for batch in batches],
+                                    dim=1),
+                positive=torch.cat([batch.positive for batch in batches],
+                                   dim=1),
+                negative=torch.cat([batch.negative for batch in batches],
+                                   dim=1),
+            )
+        return batch
+
+    def __getitem__(self, args) -> List[Batch]:
+        """Return a set of samples from all sessions."""
+
+        session_id, index = args
+        return self.get_session(session_id).__getitem__(index)