Fix flatten_timepoint_specific_output_overrides (#239)

dweindl · web-flow · commit 179dc8282aea · 2023-12-18T15:34:41.000+01:00
... again.

Fixes an error when trying to apply re.sub to numeric values:

```
    petab.flatten_timepoint_specific_output_overrides(problem)
build/venv/lib/python3.9/site-packages/petab/core.py:290: in flatten_timepoint_specific_output_overrides
    observable[target] = re.sub(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

pattern = 'observableParameter([0-9]+)_obs_a'
repl = 'observableParameter\\1_obs_a__10__c0', string = 1, count = 0, flags = 0

    def sub(pattern, repl, string, count=0, flags=0):
        """Return the string obtained by replacing the leftmost
        non-overlapping occurrences of the pattern in string by the
        replacement repl.  repl can be either a string or a callable;
        if a string, backslash escapes in it are processed.  If it is
        a callable, it's passed the Match object and must return
        a replacement string to be used."""
&gt;       return _compile(pattern, flags).sub(repl, string, count)
E       TypeError: cannot use a string pattern on a bytes-like object

/opt/hostedtoolcache/Python/3.9.18/x64/lib/python3.9/re.py:210: TypeError
```
diff --git a/petab/core.py b/petab/core.py
@@ -17,6 +17,7 @@
 
 import numpy as np
 import pandas as pd
+from pandas.api.types import is_string_dtype
 
 from . import yaml
 from .C import *  # noqa: F403
@@ -282,6 +283,10 @@ def flatten_timepoint_specific_output_overrides(
             if field not in measurements:
                 continue
 
+            if not is_string_dtype(type(observable[target])):
+                # if not a string, we don't have to substitute anything
+                continue
+
             hyperparameter_replacement_id = get_hyperparameter_replacement_id(
                 hyperparameter_type=hyperparameter_type,
                 observable_replacement_id=observable_replacement_id,
diff --git a/tests/test_petab.py b/tests/test_petab.py
@@ -349,12 +349,14 @@ def test_flatten_timepoint_specific_output_overrides():
     """Test flatten_timepoint_specific_output_overrides"""
     observable_df = pd.DataFrame(
         data={
-            OBSERVABLE_ID: ["obs1"],
+            OBSERVABLE_ID: ["obs1", "obs2"],
             OBSERVABLE_FORMULA: [
-                "observableParameter1_obs1 + observableParameter2_obs1"
+                "observableParameter1_obs1 + observableParameter2_obs1",
+                "x",
             ],
             NOISE_FORMULA: [
-                "(observableParameter1_obs1 + observableParameter2_obs1) * noiseParameter1_obs1"
+                "(observableParameter1_obs1 + observableParameter2_obs1) * noiseParameter1_obs1",
+                1,
             ],
         }
     )
@@ -366,11 +368,17 @@ def test_flatten_timepoint_specific_output_overrides():
     obs1_2_2_1 = "obs1__obsParOverride2_1_0__noiseParOverride2__condition1"
     observable_df_expected = pd.DataFrame(
         data={
-            OBSERVABLE_ID: [obs1_1_1_1, obs1_2_1_1, obs1_2_2_1],
+            OBSERVABLE_ID: [
+                obs1_1_1_1,
+                obs1_2_1_1,
+                obs1_2_2_1,
+                "obs2__condition1",
+            ],
             OBSERVABLE_FORMULA: [
                 f"observableParameter1_{obs1_1_1_1} + observableParameter2_{obs1_1_1_1}",
                 f"observableParameter1_{obs1_2_1_1} + observableParameter2_{obs1_2_1_1}",
                 f"observableParameter1_{obs1_2_2_1} + observableParameter2_{obs1_2_2_1}",
+                "x",
             ],
             NOISE_FORMULA: [
                 f"(observableParameter1_{obs1_1_1_1} + observableParameter2_{obs1_1_1_1})"
@@ -379,6 +387,7 @@ def test_flatten_timepoint_specific_output_overrides():
                 f" * noiseParameter1_{obs1_2_1_1}",
                 f"(observableParameter1_{obs1_2_2_1} + observableParameter2_{obs1_2_2_1})"
                 f" * noiseParameter1_{obs1_2_2_1}",
+                1,
             ],
         }
     )
@@ -387,54 +396,66 @@ def test_flatten_timepoint_specific_output_overrides():
     # Measurement table with timepoint-specific overrides
     measurement_df = pd.DataFrame(
         data={
-            OBSERVABLE_ID: ["obs1", "obs1", "obs1", "obs1"],
+            OBSERVABLE_ID: ["obs1", "obs1", "obs1", "obs1", "obs2"],
             SIMULATION_CONDITION_ID: [
                 "condition1",
                 "condition1",
                 "condition1",
                 "condition1",
+                "condition1",
             ],
-            PREEQUILIBRATION_CONDITION_ID: ["", "", "", ""],
-            TIME: [1.0, 1.0, 2.0, 2.0],
-            MEASUREMENT: [0.1] * 4,
+            PREEQUILIBRATION_CONDITION_ID: ["", "", "", "", ""],
+            TIME: [1.0, 1.0, 2.0, 2.0, 3.0],
+            MEASUREMENT: [0.1] * 5,
             OBSERVABLE_PARAMETERS: [
                 "obsParOverride1;1.0",
                 "obsParOverride2;1.0",
                 "obsParOverride2;1.0",
                 "obsParOverride2;1.0",
+                "",
             ],
             NOISE_PARAMETERS: [
                 "noiseParOverride1",
                 "noiseParOverride1",
                 "noiseParOverride2",
                 "noiseParOverride2",
+                "",
             ],
         }
     )
 
     measurement_df_expected = pd.DataFrame(
         data={
-            OBSERVABLE_ID: [obs1_1_1_1, obs1_2_1_1, obs1_2_2_1, obs1_2_2_1],
+            OBSERVABLE_ID: [
+                obs1_1_1_1,
+                obs1_2_1_1,
+                obs1_2_2_1,
+                obs1_2_2_1,
+                "obs2__condition1",
+            ],
             SIMULATION_CONDITION_ID: [
                 "condition1",
                 "condition1",
                 "condition1",
                 "condition1",
+                "condition1",
             ],
-            PREEQUILIBRATION_CONDITION_ID: ["", "", "", ""],
-            TIME: [1.0, 1.0, 2.0, 2.0],
-            MEASUREMENT: [0.1] * 4,
+            PREEQUILIBRATION_CONDITION_ID: ["", "", "", "", ""],
+            TIME: [1.0, 1.0, 2.0, 2.0, 3.0],
+            MEASUREMENT: [0.1] * 5,
             OBSERVABLE_PARAMETERS: [
                 "obsParOverride1;1.0",
                 "obsParOverride2;1.0",
                 "obsParOverride2;1.0",
                 "obsParOverride2;1.0",
+                "",
             ],
             NOISE_PARAMETERS: [
                 "noiseParOverride1",
                 "noiseParOverride1",
                 "noiseParOverride2",
                 "noiseParOverride2",
+                "",
             ],
         }
     )
@@ -483,7 +504,9 @@ def test_flatten_timepoint_specific_output_overrides():
         petab_problem=unflattened_problem,
     )
     # The unflattened simulation dataframe has the original observable IDs.
-    assert (unflattened_simulation_df[OBSERVABLE_ID] == "obs1").all()
+    assert (
+        unflattened_simulation_df[OBSERVABLE_ID] == ["obs1"] * 4 + ["obs2"]
+    ).all()
 
 
 def test_flatten_timepoint_specific_output_overrides_special_cases():