Skip to content

Commit 6a433e0

Browse files
dweindldilpath
andauthored
Support for petab v2 experiments (#332)
Add basic support for PEtab version 2 experiments (see also PEtab-dev/PEtab#586, and PEtab-dev/PEtab#581). Follow-up to #334. Partially supersedes #263, which was started before petab.v1/petab.v2 were introduced and before PEtab-dev/PEtab#586. * updates the required fields in the measurement table * updates some validation functions to not expect the old `simulationConditionId`s (but does not do full validation yet) * extends PEtab v1 up-conversion to create a new experiment table. --------- Co-authored-by: Dilan Pathirana <59329744+dilpath@users.noreply.github.com>
1 parent 1d3fda1 commit 6a433e0

File tree

8 files changed

+207
-92
lines changed

8 files changed

+207
-92
lines changed

petab/v1/calculate.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@ def calculate_residuals_for_table(
9797
Calculate residuals for a single measurement table.
9898
For the arguments, see `calculate_residuals`.
9999
"""
100+
# below, we rely on a unique index
101+
measurement_df = measurement_df.reset_index(drop=True)
102+
100103
# create residual df as copy of measurement df, change column
101104
residual_df = measurement_df.copy(deep=True).rename(
102105
columns={MEASUREMENT: RESIDUAL}
@@ -120,6 +123,10 @@ def calculate_residuals_for_table(
120123
for col in compared_cols
121124
]
122125
mask = reduce(lambda x, y: x & y, masks)
126+
if mask.sum() == 0:
127+
raise ValueError(
128+
f"Could not find simulation for measurement {row}."
129+
)
123130
simulation = simulation_df.loc[mask][SIMULATION].iloc[0]
124131
if scale:
125132
# apply scaling

petab/v1/problem.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1149,8 +1149,8 @@ def add_measurement(
11491149
sim_cond_id: str,
11501150
time: float,
11511151
measurement: float,
1152-
observable_parameters: Sequence[str] = None,
1153-
noise_parameters: Sequence[str] = None,
1152+
observable_parameters: Sequence[str | float] = None,
1153+
noise_parameters: Sequence[str | float] = None,
11541154
preeq_cond_id: str = None,
11551155
):
11561156
"""Add a measurement to the problem.
@@ -1172,11 +1172,11 @@ def add_measurement(
11721172
}
11731173
if observable_parameters is not None:
11741174
record[OBSERVABLE_PARAMETERS] = [
1175-
PARAMETER_SEPARATOR.join(observable_parameters)
1175+
PARAMETER_SEPARATOR.join(map(str, observable_parameters))
11761176
]
11771177
if noise_parameters is not None:
11781178
record[NOISE_PARAMETERS] = [
1179-
PARAMETER_SEPARATOR.join(noise_parameters)
1179+
PARAMETER_SEPARATOR.join(map(str, noise_parameters))
11801180
]
11811181
if preeq_cond_id is not None:
11821182
record[PREEQUILIBRATION_CONDITION_ID] = [preeq_cond_id]

petab/v2/C.py

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,6 @@
1313
#: Experiment ID column in the measurement table
1414
EXPERIMENT_ID = "experimentId"
1515

16-
# TODO: remove
17-
#: Preequilibration condition ID column in the measurement table
18-
PREEQUILIBRATION_CONDITION_ID = "preequilibrationConditionId"
19-
20-
# TODO: remove
21-
#: Simulation condition ID column in the measurement table
22-
SIMULATION_CONDITION_ID = "simulationConditionId"
23-
2416
#: Measurement value column in the measurement table
2517
MEASUREMENT = "measurement"
2618

@@ -30,6 +22,9 @@
3022
#: Time value that indicates steady-state measurements
3123
TIME_STEADY_STATE = _math.inf
3224

25+
#: Time value that indicates pre-equilibration in the experiments table
26+
TIME_PREEQUILIBRATION = -_math.inf
27+
3328
#: Observable parameters column in the measurement table
3429
OBSERVABLE_PARAMETERS = "observableParameters"
3530

@@ -45,17 +40,13 @@
4540
#: Mandatory columns of measurement table
4641
MEASUREMENT_DF_REQUIRED_COLS = [
4742
OBSERVABLE_ID,
48-
# TODO: add
49-
# EXPERIMENT_ID,
50-
SIMULATION_CONDITION_ID,
43+
EXPERIMENT_ID,
5144
MEASUREMENT,
5245
TIME,
5346
]
5447

5548
#: Optional columns of measurement table
5649
MEASUREMENT_DF_OPTIONAL_COLS = [
57-
# TODO: remove
58-
PREEQUILIBRATION_CONDITION_ID,
5950
OBSERVABLE_PARAMETERS,
6051
NOISE_PARAMETERS,
6152
DATASET_ID,

petab/v2/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,10 @@
2727

2828
# import after v1
2929
from ..version import __version__ # noqa: F401, E402
30-
from . import models # noqa: F401, E402
30+
from . import ( # noqa: F401, E402
31+
C, # noqa: F401, E402
32+
models, # noqa: F401, E402
33+
)
3134
from .conditions import * # noqa: F403, F401, E402
3235
from .experiments import ( # noqa: F401, E402
3336
get_experiment_df,

petab/v2/lint.py

Lines changed: 85 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
from .. import v2
1616
from ..v1.lint import (
1717
_check_df,
18+
assert_measured_observables_defined,
19+
assert_measurements_not_null,
20+
assert_measurements_numeric,
1821
assert_model_parameters_in_condition_or_parameter_table,
1922
assert_no_leading_trailing_whitespace,
2023
assert_parameter_bounds_are_numeric,
@@ -23,13 +26,16 @@
2326
assert_parameter_prior_parameters_are_valid,
2427
assert_parameter_prior_type_is_valid,
2528
assert_parameter_scale_is_valid,
29+
assert_unique_observable_ids,
2630
assert_unique_parameter_ids,
2731
check_ids,
28-
check_measurement_df,
2932
check_observable_df,
3033
check_parameter_bounds,
3134
)
32-
from ..v1.measurements import split_parameter_replacement_list
35+
from ..v1.measurements import (
36+
assert_overrides_match_parameter_count,
37+
split_parameter_replacement_list,
38+
)
3339
from ..v1.observables import get_output_parameters, get_placeholders
3440
from ..v1.visualize.lint import validate_visualization_df
3541
from ..v2.C import *
@@ -102,6 +108,23 @@ class ValidationError(ValidationIssue):
102108
level: ValidationIssueSeverity = field(
103109
default=ValidationIssueSeverity.ERROR, init=False
104110
)
111+
task: str | None = None
112+
113+
def __post_init__(self):
114+
if self.task is None:
115+
self.task = self._get_task_name()
116+
117+
def _get_task_name(self):
118+
"""Get the name of the ValidationTask that raised this error."""
119+
import inspect
120+
121+
# walk up the stack until we find the ValidationTask.run method
122+
for frame_info in inspect.stack():
123+
frame = frame_info.frame
124+
if "self" in frame.f_locals:
125+
task = frame.f_locals["self"]
126+
if isinstance(task, ValidationTask):
127+
return task.__class__.__name__
105128

106129

107130
class ValidationResultList(list[ValidationIssue]):
@@ -237,8 +260,51 @@ def run(self, problem: Problem) -> ValidationIssue | None:
237260
if problem.measurement_df is None:
238261
return
239262

263+
df = problem.measurement_df
240264
try:
241-
check_measurement_df(problem.measurement_df, problem.observable_df)
265+
_check_df(df, MEASUREMENT_DF_REQUIRED_COLS, "measurement")
266+
267+
for column_name in MEASUREMENT_DF_REQUIRED_COLS:
268+
if not np.issubdtype(df[column_name].dtype, np.number):
269+
assert_no_leading_trailing_whitespace(
270+
df[column_name].values, column_name
271+
)
272+
273+
for column_name in MEASUREMENT_DF_OPTIONAL_COLS:
274+
if column_name in df and not np.issubdtype(
275+
df[column_name].dtype, np.number
276+
):
277+
assert_no_leading_trailing_whitespace(
278+
df[column_name].values, column_name
279+
)
280+
281+
if problem.observable_df is not None:
282+
assert_measured_observables_defined(df, problem.observable_df)
283+
assert_overrides_match_parameter_count(
284+
df, problem.observable_df
285+
)
286+
287+
if OBSERVABLE_TRANSFORMATION in problem.observable_df:
288+
# Check for positivity of measurements in case of
289+
# log-transformation
290+
assert_unique_observable_ids(problem.observable_df)
291+
# If the above is not checked, in the following loop
292+
# trafo may become a pandas Series
293+
for measurement, obs_id in zip(
294+
df[MEASUREMENT], df[OBSERVABLE_ID], strict=True
295+
):
296+
trafo = problem.observable_df.loc[
297+
obs_id, OBSERVABLE_TRANSFORMATION
298+
]
299+
if measurement <= 0.0 and trafo in [LOG, LOG10]:
300+
raise ValueError(
301+
"Measurements with observable "
302+
f"transformation {trafo} must be "
303+
f"positive, but {measurement} <= 0."
304+
)
305+
306+
assert_measurements_not_null(df)
307+
assert_measurements_numeric(df)
242308
except AssertionError as e:
243309
return ValidationError(str(e))
244310

@@ -247,46 +313,20 @@ def run(self, problem: Problem) -> ValidationIssue | None:
247313
# condition table should be an error if the measurement table refers
248314
# to conditions
249315

250-
# check that measured experiments/conditions exist
251-
# TODO: fully switch to experiment table and remove this:
252-
if SIMULATION_CONDITION_ID in problem.measurement_df:
253-
if problem.condition_df is None:
254-
return
255-
used_conditions = set(
256-
problem.measurement_df[SIMULATION_CONDITION_ID].dropna().values
257-
)
258-
if PREEQUILIBRATION_CONDITION_ID in problem.measurement_df:
259-
used_conditions |= set(
260-
problem.measurement_df[PREEQUILIBRATION_CONDITION_ID]
261-
.dropna()
262-
.values
263-
)
264-
available_conditions = set(
265-
problem.condition_df[CONDITION_ID].unique()
266-
)
267-
if missing_conditions := (used_conditions - available_conditions):
268-
return ValidationError(
269-
"Measurement table references conditions that "
270-
"are not specified in the condition table: "
271-
+ str(missing_conditions)
272-
)
273-
elif EXPERIMENT_ID in problem.measurement_df:
274-
if problem.experiment_df is None:
275-
return
276-
used_experiments = set(
277-
problem.measurement_df[EXPERIMENT_ID].values
278-
)
279-
available_experiments = set(
280-
problem.condition_df[CONDITION_ID].unique()
316+
# check that measured experiments
317+
if problem.experiment_df is None:
318+
return
319+
320+
used_experiments = set(problem.measurement_df[EXPERIMENT_ID].values)
321+
available_experiments = set(
322+
problem.experiment_df[EXPERIMENT_ID].unique()
323+
)
324+
if missing_experiments := (used_experiments - available_experiments):
325+
raise AssertionError(
326+
"Measurement table references experiments that "
327+
"are not specified in the experiments table: "
328+
+ str(missing_experiments)
281329
)
282-
if missing_experiments := (
283-
used_experiments - available_experiments
284-
):
285-
raise AssertionError(
286-
"Measurement table references experiments that "
287-
"are not specified in the experiments table: "
288-
+ str(missing_experiments)
289-
)
290330

291331

292332
class CheckConditionTable(ValidationTask):
@@ -486,7 +526,7 @@ def run(self, problem: Problem) -> ValidationIssue | None:
486526
)
487527

488528
required_conditions = problem.experiment_df[CONDITION_ID].unique()
489-
existing_conditions = problem.condition_df.index
529+
existing_conditions = problem.condition_df[CONDITION_ID].unique()
490530

491531
missing_conditions = set(required_conditions) - set(
492532
existing_conditions
@@ -771,7 +811,8 @@ def append_overrides(overrides):
771811
)
772812

773813
# parameters that are overridden via the condition table are not allowed
774-
parameter_ids -= set(problem.condition_df[TARGET_ID].unique())
814+
if problem.condition_df is not None:
815+
parameter_ids -= set(problem.condition_df[TARGET_ID].unique())
775816

776817
return parameter_ids
777818

0 commit comments

Comments
 (0)