Skip to content

Commit d2715e9

Browse files
committed
petab1->2: create experiment df
1 parent 1d3fda1 commit d2715e9

File tree

7 files changed

+173
-72
lines changed

7 files changed

+173
-72
lines changed

petab/v2/C.py

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,6 @@
1313
#: Experiment ID column in the measurement table
1414
EXPERIMENT_ID = "experimentId"
1515

16-
# TODO: remove
17-
#: Preequilibration condition ID column in the measurement table
18-
PREEQUILIBRATION_CONDITION_ID = "preequilibrationConditionId"
19-
20-
# TODO: remove
21-
#: Simulation condition ID column in the measurement table
22-
SIMULATION_CONDITION_ID = "simulationConditionId"
23-
2416
#: Measurement value column in the measurement table
2517
MEASUREMENT = "measurement"
2618

@@ -45,17 +37,13 @@
4537
#: Mandatory columns of measurement table
4638
MEASUREMENT_DF_REQUIRED_COLS = [
4739
OBSERVABLE_ID,
48-
# TODO: add
49-
# EXPERIMENT_ID,
50-
SIMULATION_CONDITION_ID,
40+
EXPERIMENT_ID,
5141
MEASUREMENT,
5242
TIME,
5343
]
5444

5545
#: Optional columns of measurement table
5646
MEASUREMENT_DF_OPTIONAL_COLS = [
57-
# TODO: remove
58-
PREEQUILIBRATION_CONDITION_ID,
5947
OBSERVABLE_PARAMETERS,
6048
NOISE_PARAMETERS,
6149
DATASET_ID,

petab/v2/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,10 @@
2727

2828
# import after v1
2929
from ..version import __version__ # noqa: F401, E402
30-
from . import models # noqa: F401, E402
30+
from . import ( # noqa: F401, E402
31+
C, # noqa: F401, E402
32+
models, # noqa: F401, E402
33+
)
3134
from .conditions import * # noqa: F403, F401, E402
3235
from .experiments import ( # noqa: F401, E402
3336
get_experiment_df,

petab/v2/lint.py

Lines changed: 65 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
from .. import v2
1616
from ..v1.lint import (
1717
_check_df,
18+
assert_measured_observables_defined,
19+
assert_measurements_not_null,
20+
assert_measurements_numeric,
1821
assert_model_parameters_in_condition_or_parameter_table,
1922
assert_no_leading_trailing_whitespace,
2023
assert_parameter_bounds_are_numeric,
@@ -23,13 +26,16 @@
2326
assert_parameter_prior_parameters_are_valid,
2427
assert_parameter_prior_type_is_valid,
2528
assert_parameter_scale_is_valid,
29+
assert_unique_observable_ids,
2630
assert_unique_parameter_ids,
2731
check_ids,
28-
check_measurement_df,
2932
check_observable_df,
3033
check_parameter_bounds,
3134
)
32-
from ..v1.measurements import split_parameter_replacement_list
35+
from ..v1.measurements import (
36+
assert_overrides_match_parameter_count,
37+
split_parameter_replacement_list,
38+
)
3339
from ..v1.observables import get_output_parameters, get_placeholders
3440
from ..v1.visualize.lint import validate_visualization_df
3541
from ..v2.C import *
@@ -237,8 +243,51 @@ def run(self, problem: Problem) -> ValidationIssue | None:
237243
if problem.measurement_df is None:
238244
return
239245

246+
df = problem.measurement_df
240247
try:
241-
check_measurement_df(problem.measurement_df, problem.observable_df)
248+
_check_df(df, MEASUREMENT_DF_REQUIRED_COLS, "measurement")
249+
250+
for column_name in MEASUREMENT_DF_REQUIRED_COLS:
251+
if not np.issubdtype(df[column_name].dtype, np.number):
252+
assert_no_leading_trailing_whitespace(
253+
df[column_name].values, column_name
254+
)
255+
256+
for column_name in MEASUREMENT_DF_OPTIONAL_COLS:
257+
if column_name in df and not np.issubdtype(
258+
df[column_name].dtype, np.number
259+
):
260+
assert_no_leading_trailing_whitespace(
261+
df[column_name].values, column_name
262+
)
263+
264+
if problem.observable_df is not None:
265+
assert_measured_observables_defined(df, problem.observable_df)
266+
assert_overrides_match_parameter_count(
267+
df, problem.observable_df
268+
)
269+
270+
if OBSERVABLE_TRANSFORMATION in problem.observable_df:
271+
# Check for positivity of measurements in case of
272+
# log-transformation
273+
assert_unique_observable_ids(problem.observable_df)
274+
# If the above is not checked, in the following loop
275+
# trafo may become a pandas Series
276+
for measurement, obs_id in zip(
277+
df[MEASUREMENT], df[OBSERVABLE_ID], strict=True
278+
):
279+
trafo = problem.observable_df.loc[
280+
obs_id, OBSERVABLE_TRANSFORMATION
281+
]
282+
if measurement <= 0.0 and trafo in [LOG, LOG10]:
283+
raise ValueError(
284+
"Measurements with observable "
285+
f"transformation {trafo} must be "
286+
f"positive, but {measurement} <= 0."
287+
)
288+
289+
assert_measurements_not_null(df)
290+
assert_measurements_numeric(df)
242291
except AssertionError as e:
243292
return ValidationError(str(e))
244293

@@ -247,46 +296,21 @@ def run(self, problem: Problem) -> ValidationIssue | None:
247296
# condition table should be an error if the measurement table refers
248297
# to conditions
249298

250-
# check that measured experiments/conditions exist
299+
# check that measured experiments
251300
# TODO: fully switch to experiment table and remove this:
252-
if SIMULATION_CONDITION_ID in problem.measurement_df:
253-
if problem.condition_df is None:
254-
return
255-
used_conditions = set(
256-
problem.measurement_df[SIMULATION_CONDITION_ID].dropna().values
257-
)
258-
if PREEQUILIBRATION_CONDITION_ID in problem.measurement_df:
259-
used_conditions |= set(
260-
problem.measurement_df[PREEQUILIBRATION_CONDITION_ID]
261-
.dropna()
262-
.values
263-
)
264-
available_conditions = set(
265-
problem.condition_df[CONDITION_ID].unique()
266-
)
267-
if missing_conditions := (used_conditions - available_conditions):
268-
return ValidationError(
269-
"Measurement table references conditions that "
270-
"are not specified in the condition table: "
271-
+ str(missing_conditions)
272-
)
273-
elif EXPERIMENT_ID in problem.measurement_df:
274-
if problem.experiment_df is None:
275-
return
276-
used_experiments = set(
277-
problem.measurement_df[EXPERIMENT_ID].values
278-
)
279-
available_experiments = set(
280-
problem.condition_df[CONDITION_ID].unique()
301+
302+
if problem.experiment_df is None:
303+
return
304+
used_experiments = set(problem.measurement_df[EXPERIMENT_ID].values)
305+
available_experiments = set(
306+
problem.condition_df[CONDITION_ID].unique()
307+
)
308+
if missing_experiments := (used_experiments - available_experiments):
309+
raise AssertionError(
310+
"Measurement table references experiments that "
311+
"are not specified in the experiments table: "
312+
+ str(missing_experiments)
281313
)
282-
if missing_experiments := (
283-
used_experiments - available_experiments
284-
):
285-
raise AssertionError(
286-
"Measurement table references experiments that "
287-
"are not specified in the experiments table: "
288-
+ str(missing_experiments)
289-
)
290314

291315

292316
class CheckConditionTable(ValidationTask):

petab/v2/petab1to2.py

Lines changed: 88 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
from itertools import chain
55
from pathlib import Path
66
from urllib.parse import urlparse
7+
from uuid import uuid4
78

8-
import numpy as np
99
import pandas as pd
1010
from pandas.io.common import get_handle, is_url
1111

@@ -98,10 +98,81 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None):
9898
condition_df = v1v2_condition_df(condition_df, petab_problem.model)
9999
v2.write_condition_df(condition_df, get_dest_path(condition_file))
100100

101+
# records for the experiment table to be created
102+
experiments = []
103+
104+
def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str:
105+
# TODO: can a condition ID be used as an experiment ID if there
106+
# would be only a single condition in the experiment?
107+
if not sim_cond_id and not preeq_cond_id:
108+
return ""
109+
if preeq_cond_id:
110+
preeq_cond_id = f"{preeq_cond_id}_"
111+
exp_id = f"experiment_{preeq_cond_id}{sim_cond_id}"
112+
if exp_id in experiments: # noqa: B023
113+
i = 1
114+
while f"{exp_id}_{i}" in experiments: # noqa: B023
115+
i += 1
116+
exp_id = f"{exp_id}_{i}"
117+
return exp_id
118+
119+
measured_experiments = (
120+
petab_problem.get_simulation_conditions_from_measurement_df()
121+
)
122+
for (
123+
_,
124+
row,
125+
) in measured_experiments.iterrows():
126+
sim_cond_id = row[v1.C.SIMULATION_CONDITION_ID]
127+
preeq_cond_id = row.get(v1.C.PREEQUILIBRATION_CONDITION_ID, "")
128+
exp_id = create_experiment_id(sim_cond_id, preeq_cond_id)
129+
if preeq_cond_id:
130+
experiments.append(
131+
{
132+
v2.C.EXPERIMENT_ID: exp_id,
133+
v2.C.CONDITION_ID: preeq_cond_id,
134+
v2.C.TIME: float("-inf"),
135+
}
136+
)
137+
experiments.append(
138+
{
139+
v2.C.EXPERIMENT_ID: exp_id,
140+
v2.C.CONDITION_ID: sim_cond_id,
141+
v2.C.TIME: 0,
142+
}
143+
)
144+
if experiments:
145+
exp_table_path = output_dir / "experiments.tsv"
146+
if exp_table_path.exists():
147+
raise ValueError(
148+
f"Experiment table file {exp_table_path} already exists."
149+
)
150+
problem_config[v2.C.EXPERIMENT_FILES] = [exp_table_path.name]
151+
v2.write_experiment_df(
152+
v2.get_experiment_df(pd.DataFrame(experiments)), exp_table_path
153+
)
154+
101155
for measurement_file in problem_config.get(v2.C.MEASUREMENT_FILES, []):
102156
measurement_df = v1.get_measurement_df(
103157
get_src_path(measurement_file)
104158
)
159+
# if there is already an experiment ID column, we rename it
160+
if v2.C.EXPERIMENT_ID in measurement_df.columns:
161+
measurement_df.rename(
162+
columns={v2.C.EXPERIMENT_ID: f"experiment_id_{uuid4()}"},
163+
inplace=True,
164+
)
165+
# add pre-eq condition id if not present or convert to string
166+
# for simplicity
167+
if v1.C.PREEQUILIBRATION_CONDITION_ID in measurement_df.columns:
168+
measurement_df[
169+
v1.C.PREEQUILIBRATION_CONDITION_ID
170+
] = measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID].astype(
171+
str
172+
)
173+
else:
174+
measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID] = ""
175+
105176
if (
106177
petab_problem.condition_df is not None
107178
and len(
@@ -112,18 +183,29 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None):
112183
):
113184
# can't have "empty" conditions with no overrides in v2
114185
# TODO: this needs to be done condition wise
115-
measurement_df[v2.C.SIMULATION_CONDITION_ID] = np.nan
186+
measurement_df[v1.C.SIMULATION_CONDITION_ID] = ""
116187
if (
117188
v1.C.PREEQUILIBRATION_CONDITION_ID
118189
in measurement_df.columns
119190
):
120-
measurement_df[v2.C.PREEQUILIBRATION_CONDITION_ID] = np.nan
191+
measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID] = ""
192+
# condition IDs to experiment IDs
193+
measurement_df.insert(
194+
0,
195+
v2.C.EXPERIMENT_ID,
196+
measurement_df.apply(
197+
lambda row: create_experiment_id(
198+
row[v1.C.SIMULATION_CONDITION_ID],
199+
row.get(v1.C.PREEQUILIBRATION_CONDITION_ID, ""),
200+
),
201+
axis=1,
202+
),
203+
)
204+
del measurement_df[v1.C.SIMULATION_CONDITION_ID]
205+
del measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID]
121206
v2.write_measurement_df(
122207
measurement_df, get_dest_path(measurement_file)
123208
)
124-
# TODO: Measurements: preequilibration to experiments/timecourses once
125-
# finalized
126-
...
127209

128210
# validate updated Problem
129211
validation_issues = v2.lint_problem(new_yaml_file)

petab/v2/problem.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -908,27 +908,25 @@ def add_parameter(
908908
def add_measurement(
909909
self,
910910
obs_id: str,
911-
sim_cond_id: str,
911+
experiment_id: str,
912912
time: float,
913913
measurement: float,
914914
observable_parameters: Sequence[str] = None,
915915
noise_parameters: Sequence[str] = None,
916-
preeq_cond_id: str = None,
917916
):
918917
"""Add a measurement to the problem.
919918
920919
Arguments:
921920
obs_id: The observable ID
922-
sim_cond_id: The simulation condition ID
921+
experiment_id: The experiment ID
923922
time: The measurement time
924923
measurement: The measurement value
925924
observable_parameters: The observable parameters
926925
noise_parameters: The noise parameters
927-
preeq_cond_id: The pre-equilibration condition ID
928926
"""
929927
record = {
930928
OBSERVABLE_ID: [obs_id],
931-
SIMULATION_CONDITION_ID: [sim_cond_id],
929+
EXPERIMENT_ID: [experiment_id],
932930
TIME: [time],
933931
MEASUREMENT: [measurement],
934932
}
@@ -940,8 +938,6 @@ def add_measurement(
940938
record[NOISE_PARAMETERS] = [
941939
PARAMETER_SEPARATOR.join(noise_parameters)
942940
]
943-
if preeq_cond_id is not None:
944-
record[PREEQUILIBRATION_CONDITION_ID] = [preeq_cond_id]
945941

946942
tmp_df = pd.DataFrame(record)
947943
self.measurement_df = (

tests/v1/test_petab.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,14 @@ def petab_problem():
4444
petab_problem = petab.Problem()
4545
petab_problem.add_measurement(
4646
obs_id="obs1",
47-
sim_cond_id="condition1",
47+
experiment_id="experiment1",
4848
time=1.0,
4949
measurement=0.1,
5050
noise_parameters=["p3", "p4"],
5151
)
5252
petab_problem.add_measurement(
5353
obs_id="obs2",
54-
sim_cond_id="condition2",
54+
experiment_id="experiment2",
5555
time=1.0,
5656
measurement=0.2,
5757
observable_parameters=["p1", "p2"],
@@ -63,6 +63,9 @@ def petab_problem():
6363
"condition2", fixedParameter1=2.0, name="Condition 2"
6464
)
6565

66+
petab_problem.add_experiment("experiment1", 0, "condition1")
67+
petab_problem.add_experiment("experiment2", 0, "condition2")
68+
6669
petab_problem.add_parameter("dynamicParameter1", estimate=1)
6770
petab_problem.add_parameter("dynamicParameter2", estimate=0, name="...")
6871

@@ -87,6 +90,11 @@ def petab_problem():
8790
petab_problem.condition_df, condition_file_name
8891
)
8992

93+
experiment_file_name = Path(temp_dir, "experiments.tsv")
94+
petab.write_experiment_df(
95+
petab_problem.experiment_df, experiment_file_name
96+
)
97+
9098
parameter_file_name = Path(temp_dir, "parameters.tsv")
9199
petab.write_parameter_df(
92100
petab_problem.parameter_df, parameter_file_name

0 commit comments

Comments
 (0)