lycosystem · noemibuehrer · Oct 23, 2025 · Jun 30, 2025 · Jun 30, 2025 · Jul 24, 2025
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -1,4 +1,4 @@
-name: tests
+name: Tests
 
 on:
   push:

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,25 @@
 
 All notable changes to this project will be documented in this file.
 
+## [1.3.7] - 2025-10-23
+
+### Bug Fixes
+
+- Make models compatible with new data format.
+- Correct array shape mismatch in `draw_patients`.
+
+### Miscellaneous Tasks
+
+- Update codecov badge.
+- Mention wiki in contribution guide.
+- Update email addresses.
+
+### Change
+
+- Adjustments to allow bilateral mixture model.
+- Make compatible with new data format (again).
+- Change dataframe indexing to conform with new standard.
+
 ## [1.3.6] - 2025-06-30
 
 ### Miscellaneous Tasks
@@ -948,6 +967,7 @@ Almost the entire API has changed. I'd therefore recommend to have a look at the
 - fix pyproject.toml typo
 - add pre-commit hook to check commit msg
 
+[1.3.7]: https://github.com/lycosystem/lymph/compare/1.3.6...1.3.7
 [1.3.6]: https://github.com/lycosystem/lymph/compare/1.3.5...1.3.6
 [1.3.5]: https://github.com/lycosystem/lymph/compare/1.3.4...1.3.5
 [1.3.4]: https://github.com/lycosystem/lymph/compare/1.3.3...1.3.4

diff --git a/CITATION.cff b/CITATION.cff
@@ -8,7 +8,7 @@ type: software
 authors:
   - given-names: Roman
     family-names: Ludwig
-    email: roman.ludwig@usz.ch
+    email: gygqdstu3@mozmail.com
     affiliation: University Hospital Zurich
     orcid: 'https://orcid.org/0000-0001-9434-328X'
   - given-names: Bertrand

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -2,7 +2,8 @@
 
 First, thanks for considering to help out on this project!
 
-Before you start coding away and fixing some of the [:link:issues], let me give you an introduction of the standards and best-practices that we have tried to follow so far:
+> [!WARNING]
+> Before you start coding away and fixing some of the [:link:issues], please familiarize yourself with the conventions we try to adhere to. These are explained in our [:link:organization-wide wiki] on the one hand, and in this document as well. The content of the wiki is more general and probably also more up-to-date, so start there.
 
 ## Git
 
@@ -123,9 +124,10 @@ For more complicated stuff, we used [:link:pytest] and [:link:hypothesis] to cov
 
 ## Questions?
 
-If there is still something unclear, feel free to reach out to me: [roman.ludwig@usz.ch](mailto:roman.ludwig@usz.ch).
+If there is still something unclear, feel free to reach out to us: [yoel.perezhaas@usz.ch](mailto:yoel.perezhaas@usz.ch) or [noemi.buehrer@usz.ch](mailto:noemi.buehrer@usz.ch).
 
 [:link:issues]: https://github.com/lycosystem/lymph/issues
+[:link:organization-wide wiki]: https://github.com/lycosystem/.github/wiki
 [:link:git]: https://git-scm.com
 [:link:GitHub]: https://github.com
 [:link:git-flow]: https://nvie.com/posts/a-successful-git-branching-model/

diff --git a/README.rst b/README.rst
@@ -10,10 +10,10 @@
     :target: https://github.com/lycosystem/lymph/actions
 .. image:: https://github.com/lycosystem/lymph/actions/workflows/release.yml/badge.svg?style=flat
     :target: https://pypi.org/project/lymph-model/
-.. image:: https://codecov.io/gh/lycosystem/lymph/branch/main/graph/badge.svg?token=LPXQPK5K78
-    :target: https://codecov.io/gh/lycosystem/lymph
 .. image:: https://readthedocs.org/projects/lymph-model/badge
     :target: https://lymph-model.readthedocs.io
+.. image:: https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/lycosystem/lymph/python-coverage-comment-action-data/endpoint.json
+    :target: https://htmlpreview.github.io/?https://github.com/lycosystem/lymph/blob/python-coverage-comment-action-data/htmlcov/index.html
 
 
 A Python package for statistical modelling of lymphatic metastatic spread in head & neck squamous cell carcinoma (HNSCC).

diff --git a/pyproject.toml b/pyproject.toml
@@ -5,7 +5,11 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "lymph-model"
 description = "Package for statistical modelling of lymphatic metastatic spread."
-authors = [{ name = "Roman Ludwig", email = "roman.ludwig@usz.ch" }]
+authors = [
+    { name = "Roman Ludwig", email = "gygqdstu3@mozmail.com" },
+    { name = "Yoel Pérez Haas", email = "yoel.perezhaas@usz.ch" },
+    { name = "Noemi Bührer", email = "noemi.buehrer@usz.ch" },
+]
 readme = "README.rst"
 requires-python = ">=3.10"
 keywords = ["cancer", "metastasis", "lymphatic progression", "model"]
@@ -98,8 +102,10 @@ select = [
 ignore = ["B028"]
 
 [tool.ruff.lint.per-file-ignores]
-"test/*" = [
+"tests/*" = [
+    "D102",
     "S101",
+    "RET503",
 ]
 
 

diff --git a/src/lymph/__init__.py b/src/lymph/__init__.py
@@ -15,7 +15,7 @@
 __version__ = version
 __description__ = "Package for statistical modelling of lymphatic metastatic spread."
 __author__ = "Roman Ludwig"
-__email__ = "roman.ludwig@usz.ch"
+__email__ = "gygqdstu3@mozmail.com"
 __uri__ = "https://github.com/lycosystem/lymph"
 
 # nopycln: file

diff --git a/src/lymph/models/bilateral.py b/src/lymph/models/bilateral.py
@@ -404,6 +404,15 @@ def load_patient_data(
         """
         self.ipsi.load_patient_data(patient_data, "ipsi", mapping)
         self.contra.load_patient_data(patient_data, "contra", mapping)
+        # Keep all columns except '_model'
+        # From '_model' only keep those with first subheader '#' or 'core'
+        cols = [col for col in self.ipsi.patient_data.columns if col[0] != "_model"]
+        cols += [
+            col
+            for col in self.ipsi.patient_data.columns
+            if col[0] == "_model" and (col[1] == "#" or col[1] == "core")
+        ]
+        self.patient_data = self.ipsi.patient_data[cols]
 
     def state_dist(
         self,
@@ -472,10 +481,15 @@ def patient_likelihoods(
         mode: Literal["HMM", "BN"] = "HMM",
     ) -> np.ndarray:
         """Compute the likelihood of each patient individually."""
-        joint_state_dist = self.state_dist(t_stage=t_stage, mode=mode)
-        return matrix.fast_trace(
-            self.ipsi.diagnosis_matrix(t_stage),
-            joint_state_dist @ self.contra.diagnosis_matrix(t_stage).T,
+        if mode == "HMM":
+            joint_state_dist = self.state_dist(t_stage=t_stage, mode=mode)
+            return matrix.fast_trace(
+                self.ipsi.diagnosis_matrix(t_stage),
+                joint_state_dist @ self.contra.diagnosis_matrix(t_stage).T,
+            )
+        raise NotImplementedError(
+            f"Mode '{mode}' not implemented for patient likelihoods. "
+            "Only 'HMM? is supported.",
         )
 
     def _bn_likelihood(self, log: bool = True, t_stage: str | None = None) -> float:
@@ -730,6 +744,6 @@ def draw_patients(
         dataset = pd.DataFrame(drawn_obs, columns=multi_cols)
         dataset = dataset.reorder_levels(order=[1, 0, 2], axis="columns")
         dataset = dataset.sort_index(axis="columns", level=0)
-        dataset[("tumor", "1", "t_stage")] = drawn_t_stages
+        dataset[("tumor", "core", "t_stage")] = drawn_t_stages
 
         return dataset
diff --git a/src/lymph/models/hpv.py b/src/lymph/models/hpv.py
@@ -17,6 +17,10 @@
 logger = logging.getLogger(__name__)
 
 
+RAW_HPV_COL_OLD = ("patient", "#", "hpv_status")
+RAW_HPV_COL_NEW = ("patient", "core", "hpv_status")
+
+
 def select_hpv_model(method):
     """Decorate methods that simply delegate to the `hpv` or `nohpv` model."""
 
@@ -299,8 +303,9 @@ def load_patient_data(
         method of both the HPV+ and the HPV- model.
         """
         # TODO: What about patients with unknown HPV status?
-        is_hpv_pos = patient_data["patient", "#", "hpv_status"] == True  # noqa: E712
-        is_hpv_neg = patient_data["patient", "#", "hpv_status"] == False  # noqa: E712
+        hpv_data = utils.get_item(patient_data, [RAW_HPV_COL_NEW, RAW_HPV_COL_OLD])
+        is_hpv_pos = hpv_data == True  # noqa: E712
+        is_hpv_neg = hpv_data == False  # noqa: E712
 
         hpv_patient_data = patient_data.loc[is_hpv_pos]
         nohpv_patient_data = patient_data.loc[is_hpv_neg]

diff --git a/src/lymph/models/midline.py b/src/lymph/models/midline.py
@@ -16,8 +16,13 @@
 logger = logging.getLogger(__name__)
 
 
-EXT_COL = ("tumor", "1", "extension")
-CENTRAL_COL = ("tumor", "1", "central")
+EXT_COL_OLD = ("tumor", "1", "extension")
+CENT_COL_OLD = ("tumor", "1", "central")
+EXT_COL_NEW = ("tumor", "core", "extension")
+MAP_EXT_COL = ("_model", "core", "extension")
+MAP_CENT_COL = ("_model", "core", "central")
+CENTRAL_COL_NEW = ("tumor", "core", "central")
+MAP_T_COL = ("_model", "core", "t_stage")
 
 
 class Midline(
@@ -505,10 +510,10 @@ def load_patient_data(
         This amounts to sorting the patients into three bins:
 
         1. Patients whose tumor is clearly lateralized, meaning the column
-           ``("tumor", "1", "extension")`` reports ``False``. These get assigned to
+           ``("tumor", "core", "extension")`` reports ``False``. These get assigned to
            the :py:attr:`.noext` attribute.
         2. Those with a central tumor, indicated by ``True`` in the column
-           ``("tumor", "1", "central")``. If the :py:attr:`.use_central` attribute is
+           ``("tumor", "core", "central")``. If the :py:attr:`.use_central` attribute is
            set to ``True``, these patients are assigned to the :py:attr:`.central`
            model. Otherwise, they are assigned to the :py:attr:`.ext` model.
         3. The rest, which amounts to patients whose tumor extends over the mid-sagittal
@@ -519,13 +524,20 @@ def load_patient_data(
         the respective models.
         """
         # pylint: disable=singleton-comparison
-        is_lateralized = patient_data[EXT_COL] == False  # noqa: E712
-        has_extension = patient_data[EXT_COL] == True  # noqa: E712
-        is_unknown = patient_data[EXT_COL].isna()
+        midext_data = utils.get_item(patient_data, [EXT_COL_NEW, EXT_COL_OLD])
+        # first load complete data into noext to assign the loaded dataset to self
+        self.noext.load_patient_data(patient_data, mapping)
+        main_data = self.noext.patient_data.copy()
+        main_data[MAP_EXT_COL] = midext_data
+        self.patient_data = main_data
+        is_lateralized = midext_data == False  # noqa: E712
+        has_extension = midext_data == True  # noqa: E712
+        is_unknown = midext_data.isna()
         self.noext.load_patient_data(patient_data[is_lateralized], mapping)
 
         if self.use_central:
-            is_central = patient_data[CENTRAL_COL] == True  # noqa: E712
+            central_data = utils.get_item(patient_data, [CENTRAL_COL_NEW, CENT_COL_OLD])
+            is_central = central_data == True  # noqa: E712
             has_extension = has_extension & ~is_central
             self.central.load_patient_data(patient_data[is_central], mapping)
 
@@ -669,6 +681,56 @@ def obs_dist(
         ]
         return np.stack(obs_dist)
 
+    def patient_likelihoods(
+        self,
+        t_stage: str = None,
+        mode: Literal["HMM", "BN"] = "HMM",
+    ) -> np.ndarray:
+        """Compute the likelihood of each patient individually."""
+        if mode != "HMM":
+            raise NotImplementedError("Only HMM mode is supported as of now.")
+        ipsi_dist_evo = self.ext.ipsi.state_dist_evo()
+        contra_dist_evo = {}
+        contra_dist_evo["noext"], contra_dist_evo["ext"] = self.contra_state_dist_evo()
+        t_stages = self.t_stages if t_stage is None else [t_stage]
+        patient_data = self.patient_data.loc[
+            self.patient_data[MAP_T_COL].isin(t_stages)
+        ]
+        patient_llhs = np.zeros(len(patient_data))
+        for stage in t_stages:
+            t_idx = patient_data[MAP_T_COL] == stage
+            diag_time_matrix = np.diag(self.get_distribution(stage).pmf)
+            num_states = ipsi_dist_evo.shape[1]
+            marg_joint_state_dist = np.zeros(shape=(num_states, num_states))
+            # see the `Bilateral` model for why this is done in this way.
+            for case in ["ext", "noext"]:
+                ext_idx = patient_data[MAP_EXT_COL] == (case == "ext")
+                joint_state_dist = (
+                    ipsi_dist_evo.T @ diag_time_matrix @ contra_dist_evo[case]
+                )
+                marg_joint_state_dist += joint_state_dist
+                _model = getattr(self, case)
+                llhs = matrix.fast_trace(
+                    _model.ipsi.diagnosis_matrix(stage),
+                    joint_state_dist @ _model.contra.diagnosis_matrix(stage).T,
+                )
+                patient_llhs[t_idx & ext_idx] = llhs
+
+            try:
+                marg_patient_llhs = matrix.fast_trace(
+                    self.unknown.ipsi.diagnosis_matrix(stage),
+                    marg_joint_state_dist
+                    @ self.unknown.contra.diagnosis_matrix(stage).T,
+                )
+                patient_llhs[t_idx & patient_data[MAP_EXT_COL].isna()] = (
+                    marg_patient_llhs
+                )
+            except AttributeError:
+                # an AttributeError is raised both when the model has no `unknown`
+                # attribute and when no data is loaded in the `unknown` model.
+                pass
+        return patient_llhs
+
     def _hmm_likelihood(
         self,
         log: bool = True,
@@ -959,7 +1021,10 @@ def draw_patients(
             )
 
         ipsi_evo = self.ext.ipsi.state_dist_evo()
-        drawn_diags = np.empty(shape=(num, len(self.ext.ipsi.obs_list)))
+        drawn_diags = np.empty(
+            shape=(num, self.ext.ipsi.obs_list.shape[1] * 2),
+            dtype=bool,
+        )
         for case in ["ext", "noext"]:
             case_model = getattr(self, case)
             drawn_ipsi_diags = utils.draw_diagnosis(
@@ -996,8 +1061,8 @@ def draw_patients(
         dataset = pd.DataFrame(drawn_diags, columns=multi_cols)
         dataset = dataset.reorder_levels(order=[1, 0, 2], axis="columns")
         dataset = dataset.sort_index(axis="columns", level=0)
-        dataset["tumor", "1", "t_stage"] = drawn_t_stages
-        dataset["tumor", "1", "extension"] = drawn_midexts
-        dataset["patient", "#", "diagnosis_time"] = drawn_diag_times
+        dataset["tumor", "core", "t_stage"] = drawn_t_stages
+        dataset["tumor", "core", "extension"] = drawn_midexts
+        dataset["patient", "core", "diagnosis_time"] = drawn_diag_times
 
         return dataset
diff --git a/src/lymph/models/unilateral.py b/src/lymph/models/unilateral.py
@@ -18,15 +18,17 @@
     draw_diagnosis,  # noqa: F401
     early_late_mapping,
     flatten,
+    get_item,
     get_params_from,
     set_params_for,
 )
 
 warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning)
 
 
-MAP_T_COL = ("_model", "#", "t_stage")
-RAW_T_COL = ("tumor", "1", "t_stage")
+MAP_T_COL = ("_model", "core", "t_stage")
+RAW_T_COL_OLD = ("tumor", "1", "t_stage")
+RAW_T_COL_NEW = ("tumor", "core", "t_stage")
 
 
 class Unilateral(
@@ -548,7 +550,11 @@ def load_patient_data(
 
                 patient_data["_model", modality, lnl] = column
 
-        patient_data[MAP_T_COL] = patient_data[RAW_T_COL].map(mapping)
+        patient_data[MAP_T_COL] = get_item(
+            mapping=patient_data,
+            keys=[RAW_T_COL_NEW, RAW_T_COL_OLD],
+        ).map(mapping)
+
         self._patient_data = patient_data
         self._cache_version += 1
 
@@ -570,7 +576,7 @@ def patient_data(self) -> pd.DataFrame:
         each of the LNLs in the list :py:attr:`.graph.Representation.lnls`.
 
         It also contains information on the patient's T-stage under the header
-        ``("_model", "#", "t_stage")``.
+        ``("_model", "core", "t_stage")``.
 
         Additionally, it holds the data encodings and probability of diagnosis given the
         hidden states for each patient under the headers ``("_model", "_encoding",
@@ -673,6 +679,19 @@ def obs_dist(
 
         return given_state_dist @ self.observation_matrix()
 
+    def patient_likelihoods(
+        self,
+        t_stage: str,
+        mode: Literal["HMM", "BN"] = "HMM",
+    ) -> np.ndarray:
+        """Compute the likelihood of each patient individually."""
+        if mode == "HMM":
+            return self.state_dist(t_stage) @ self.diagnosis_matrix(t_stage).T
+        raise NotImplementedError(
+            f"Mode '{mode}' not implemented for patient likelihoods."
+            "Only 'HMM' is supported.",
+        )
+
     def _bn_likelihood(self, log: bool = True, t_stage: str | None = None) -> float:
         """Compute the BN likelihood, using the stored params."""
         state_dist = self.state_dist(mode="BN")
@@ -959,6 +978,6 @@ def draw_patients(
         multi_cols = pd.MultiIndex.from_product([modality_names, ["ipsi"], lnl_names])
 
         dataset = pd.DataFrame(drawn_obs, columns=multi_cols)
-        dataset[(RAW_T_COL)] = drawn_t_stages
+        dataset[RAW_T_COL_NEW] = drawn_t_stages
 
         return dataset