Skip to content

Commit

Permalink
Merge branch 'develop' into pl_visualization
Browse files Browse the repository at this point in the history
  • Loading branch information
plakrisenko committed Mar 23, 2020
2 parents c7bb05e + e713f8b commit f9295c5
Show file tree
Hide file tree
Showing 6 changed files with 227 additions and 24 deletions.
75 changes: 57 additions & 18 deletions petab/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,33 +116,61 @@ def flatten_timepoint_specific_output_overrides(
petab_problem:
PEtab problem to work on
"""
measurement_df = petab_problem.measurement_df

# remember if columns exist
has_obs_par = OBSERVABLE_PARAMETERS in measurement_df
has_noise_par = NOISE_PARAMETERS in measurement_df
has_preeq = PREEQUILIBRATION_CONDITION_ID in measurement_df

# fill in optional columns to avoid special cases later
if not has_obs_par \
or np.all(measurement_df[OBSERVABLE_PARAMETERS].isnull()):
measurement_df[OBSERVABLE_PARAMETERS] = ''
if not has_noise_par \
or np.all(measurement_df[NOISE_PARAMETERS].isnull()):
measurement_df[NOISE_PARAMETERS] = ''
if not has_preeq \
or np.all(measurement_df[PREEQUILIBRATION_CONDITION_ID].isnull()):
measurement_df[PREEQUILIBRATION_CONDITION_ID] = ''
# convert to str row by row
for irow, row in measurement_df.iterrows():
if is_empty(row[OBSERVABLE_PARAMETERS]):
measurement_df.at[irow, OBSERVABLE_PARAMETERS] = ''
if is_empty(row[NOISE_PARAMETERS]):
measurement_df.at[irow, NOISE_PARAMETERS] = ''
if is_empty(row[PREEQUILIBRATION_CONDITION_ID]):
measurement_df.at[irow, PREEQUILIBRATION_CONDITION_ID] = ''

# Create empty df -> to be filled with replicate-specific observables
df_new = pd.DataFrame()

# Get observableId, preequilibrationConditionId
# and simulationConditionId columns in measurement df
df = petab_problem.measurement_df[
[OBSERVABLE_ID,
PREEQUILIBRATION_CONDITION_ID,
cols = get_notnull_columns(
measurement_df,
[OBSERVABLE_ID, PREEQUILIBRATION_CONDITION_ID,
SIMULATION_CONDITION_ID]
]
)
df = measurement_df[cols]

# Get unique combinations of observableId, preequilibrationConditionId
# and simulationConditionId
df_unique_values = df.drop_duplicates()

# replaced observables: new ID => old ID
replacements = dict()

# Loop over each unique combination
for nrow in range(len(df_unique_values.index)):
df = petab_problem.measurement_df.loc[
(petab_problem.measurement_df[OBSERVABLE_ID] ==
df_unique_values.loc[nrow, OBSERVABLE_ID])
& (petab_problem.measurement_df[PREEQUILIBRATION_CONDITION_ID] <=
df_unique_values.loc[nrow, PREEQUILIBRATION_CONDITION_ID])
& (petab_problem.measurement_df[SIMULATION_CONDITION_ID] <=
df_unique_values.loc[nrow, SIMULATION_CONDITION_ID])
]
for irow in df_unique_values.index:
df = measurement_df.loc[
(measurement_df[OBSERVABLE_ID] ==
df_unique_values.loc[irow, OBSERVABLE_ID])
& (measurement_df[PREEQUILIBRATION_CONDITION_ID] ==
df_unique_values.loc[irow, PREEQUILIBRATION_CONDITION_ID])
& (measurement_df[SIMULATION_CONDITION_ID] ==
df_unique_values.loc[irow, SIMULATION_CONDITION_ID])
]

# Get list of unique observable parameters
unique_sc = df[OBSERVABLE_PARAMETERS].unique()
Expand All @@ -156,8 +184,8 @@ def flatten_timepoint_specific_output_overrides(
# and unique_sc[j] in their corresponding column
# (full-string matches are denoted by zero)
idxs = (
df[NOISE_PARAMETERS].str.find(cur_noise) +
df[OBSERVABLE_PARAMETERS].str.find(cur_sc)
df[NOISE_PARAMETERS].astype(str).str.find(cur_noise) +
df[OBSERVABLE_PARAMETERS].astype(str).str.find(cur_sc)
)
tmp_ = df.loc[idxs == 0, OBSERVABLE_ID]
# Create replicate-specific observable name
Expand All @@ -168,7 +196,7 @@ def flatten_timepoint_specific_output_overrides(
while (df[OBSERVABLE_ID].str.find(
tmp.to_string()
) == 0).any():
tmp = tmp_ + counter*"_" + str(i_noise + i_sc + 1)
tmp = tmp_ + counter * "_" + str(i_noise + i_sc + 1)
counter += 1
if not tmp_.empty and not tmp_.empty:
replacements[tmp.values[0]] = tmp_.values[0]
Expand All @@ -179,21 +207,32 @@ def flatten_timepoint_specific_output_overrides(
# (for continuation of the loop)
df.loc[idxs == 0, OBSERVABLE_ID] = tmp

# remove previously non-existent columns again
if not has_obs_par:
df_new.drop(columns=OBSERVABLE_PARAMETERS, inplace=True)
if not has_noise_par:
df_new.drop(columns=NOISE_PARAMETERS, inplace=True)
if not has_preeq:
df_new.drop(columns=PREEQUILIBRATION_CONDITION_ID, inplace=True)

# Update/Redefine measurement df with replicate-specific observables
petab_problem.measurement_df = df_new

observable_df = petab_problem.observable_df

# Update observables table
for replacement, replacee in replacements.items():
new_obs = petab_problem.observable_df.loc[replacee].copy()
new_obs = observable_df.loc[replacee].copy()
new_obs.name = replacement
new_obs[OBSERVABLE_FORMULA] = new_obs[OBSERVABLE_FORMULA].replace(
replacee, replacement)
new_obs[NOISE_FORMULA] = new_obs[NOISE_FORMULA].replace(
replacee, replacement)
petab_problem.observable_df = petab_problem.observable_df.append(
observable_df = observable_df.append(
new_obs
)

petab_problem.observable_df = observable_df
petab_problem.observable_df.drop(index=set(replacements.values()),
inplace=True)

Expand Down
4 changes: 4 additions & 0 deletions petab/petab_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ properties:
type: array
description: List of PEtab observable files.

items:
type: string
description: PEtab observable file name or URL.

visualization_files:
type: array
description: List of PEtab visualization files.
Expand Down
10 changes: 9 additions & 1 deletion petab/problem.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,8 @@ def to_files(self,
measurement_file: Optional[str] = None,
parameter_file: Optional[str] = None,
visualization_file: Optional[str] = None,
observable_file: Optional[str] = None) -> None:
observable_file: Optional[str] = None,
yaml_file: Optional[str] = None) -> None:
"""
Write PEtab tables to files for this problem
Expand All @@ -282,6 +283,7 @@ def to_files(self,
parameter_file: Parameter table destination
visualization_file: Visualization table destination
observable_file: Observables table destination
yaml_file: YAML file destination
Raises:
ValueError: If a destination was provided for a non-existing
Expand Down Expand Up @@ -333,6 +335,12 @@ def error(name: str) -> ValueError:
else:
raise error("visualization")

if yaml_file:
yaml.create_problem_yaml(sbml_file, condition_file,
measurement_file, parameter_file,
observable_file, yaml_file,
visualization_file)

def get_optimization_parameters(self):
"""
Return list of optimization parameter IDs.
Expand Down
50 changes: 47 additions & 3 deletions petab/yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,12 @@

import os

from typing import Any, Dict, Union, Optional
from typing import Any, Dict, Union, Optional, List

import jsonschema
import yaml
from .C import * # noqa: F403


SCHEMA = os.path.join(os.path.abspath(os.path.dirname(__file__)),
"petab_schema.yaml")

Expand Down Expand Up @@ -159,4 +158,49 @@ def write_yaml(yaml_config: Dict[str, Any], filename: str) -> None:
"""

with open(filename, 'w') as outfile:
yaml.dump(yaml_config, outfile, default_flow_style=False)
yaml.dump(yaml_config, outfile, default_flow_style=False,
sort_keys=False)


def create_problem_yaml(sbml_files: Union[str, List[str]],
condition_files: Union[str, List[str]],
measurement_files: Union[str, List[str]],
parameter_file: str,
observable_files: Union[str, List[str]],
yaml_file: str,
visualization_files: Optional[Union[str, List[str]]]
= None) -> None:
"""
Create and write default YAML file for a single PEtab problem
Arguments:
sbml_files: Path of SBML model file or list of such
condition_files: Path of condition file or list of such
measurement_files: Path of measurement file or list of such
parameter_file: Path of parameter file
observable_files: Path of observable file or lsit of such
yaml_file: Path to which YAML file should be written
visualization_files: Optional Path to visualization file or list of
such
"""
if isinstance(sbml_files, str):
sbml_files = [sbml_files]
if isinstance(condition_files, str):
condition_files = [condition_files]
if isinstance(measurement_files, str):
measurement_files = [measurement_files]
if isinstance(observable_files, str):
observable_files = [observable_files]
if isinstance(visualization_files, str):
visualization_files = [visualization_files]

problem_dic = {CONDITION_FILES: condition_files,
MEASUREMENT_FILES: measurement_files,
SBML_FILES: sbml_files,
OBSERVABLE_FILES: observable_files}
if visualization_files is not None:
problem_dic.update({'visualization_files': visualization_files})
yaml_dic = {PARAMETER_FILE: parameter_file,
FORMAT_VERSION: 1,
PROBLEMS: [problem_dic]}
write_yaml(yaml_dic, yaml_file)
76 changes: 75 additions & 1 deletion tests/test_petab.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,79 @@ def test_flatten_timepoint_specific_output_overrides():
assert petab.lint_problem(problem) is False


def test_flatten_timepoint_specific_output_overrides_special_cases():
"""Test flatten_timepoint_specific_output_overrides
for special cases:
* no preequilibration
* no observable parameters
"""
observable_df = pd.DataFrame(data={
OBSERVABLE_ID: ['obs1'],
OBSERVABLE_FORMULA: ['species1'],
NOISE_FORMULA: ['noiseParameter1_obs1']
})
observable_df.set_index(OBSERVABLE_ID, inplace=True)

observable_df_expected = pd.DataFrame(data={
OBSERVABLE_ID: ['obs1_1', 'obs1_2'],
OBSERVABLE_FORMULA: [
'species1',
'species1'],
NOISE_FORMULA: ['noiseParameter1_obs1_1',
'noiseParameter1_obs1_2']
})
observable_df_expected.set_index(OBSERVABLE_ID, inplace=True)

# Measurement table with timepoint-specific overrides
measurement_df = pd.DataFrame(data={
OBSERVABLE_ID:
['obs1', 'obs1', 'obs1', 'obs1'],
SIMULATION_CONDITION_ID:
['condition1', 'condition1', 'condition1', 'condition1'],
TIME:
[1.0, 1.0, 2.0, 2.0],
MEASUREMENT:
[np.nan] * 4,
NOISE_PARAMETERS:
['noiseParOverride1', 'noiseParOverride1',
'noiseParOverride2', 'noiseParOverride2'],
})

measurement_df_expected = pd.DataFrame(data={
OBSERVABLE_ID:
['obs1_1', 'obs1_1', 'obs1_2', 'obs1_2'],
SIMULATION_CONDITION_ID:
['condition1', 'condition1', 'condition1', 'condition1'],
TIME:
[1.0, 1.0, 2.0, 2.0],
MEASUREMENT:
[np.nan] * 4,
NOISE_PARAMETERS:
['noiseParOverride1', 'noiseParOverride1',
'noiseParOverride2', 'noiseParOverride2'],
})

problem = petab.Problem(measurement_df=measurement_df,
observable_df=observable_df)

assert petab.lint_problem(problem) is False

# Ensure having timepoint-specific overrides
assert petab.lint.measurement_table_has_timepoint_specific_mappings(
measurement_df) is True

petab.flatten_timepoint_specific_output_overrides(problem)

# Timepoint-specific overrides should be gone now
assert petab.lint.measurement_table_has_timepoint_specific_mappings(
problem.measurement_df) is False

assert problem.observable_df.equals(observable_df_expected) is True
assert problem.measurement_df.equals(measurement_df_expected) is True

assert petab.lint_problem(problem) is False


def test_concat_measurements():
a = pd.DataFrame({MEASUREMENT: [1.0]})
b = pd.DataFrame({TIME: [1.0]})
Expand Down Expand Up @@ -458,7 +531,8 @@ def test_to_files(petab_problem): # pylint: disable=W0621
measurement_file=measurement_file,
parameter_file=parameter_file,
visualization_file=None,
observable_file=observable_file)
observable_file=observable_file,
yaml_file=None)

# exemplarily load some
parameter_df = petab.get_parameter_df(parameter_file)
Expand Down
36 changes: 35 additions & 1 deletion tests/test_yaml.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
"""Test for petab.yaml"""
import tempfile

import pytest
from petab.yaml import validate
from petab.yaml import validate, create_problem_yaml

from jsonschema.exceptions import ValidationError

Expand All @@ -18,3 +19,36 @@ def test_validate():
# should be well-formed
file_ = "doc/example/example_Fujita/Fujita.yaml"
validate(file_)


def test_create_problem_yaml():
with tempfile.TemporaryDirectory() as folder:
# test with single problem files
# create target files
sbml_file = tempfile.mkstemp(dir=folder)[1]
condition_file = tempfile.mkstemp(dir=folder)[1]
measurement_file = tempfile.mkstemp(dir=folder)[1]
parameter_file = tempfile.mkstemp(dir=folder)[1]
observable_file = tempfile.mkstemp(dir=folder)[1]
yaml_file = tempfile.mkstemp(dir=folder)[1]
visualization_file = tempfile.mkstemp(dir=folder)[1]
create_problem_yaml(sbml_file, condition_file, measurement_file,
parameter_file, observable_file, yaml_file,
visualization_file)
validate(yaml_file)

# test for list of files
# create additional target files
sbml_file2 = tempfile.mkstemp(dir=folder)[1]
condition_file2 = tempfile.mkstemp(dir=folder)[1]
measurement_file2 = tempfile.mkstemp(dir=folder)[1]
observable_file2 = tempfile.mkstemp(dir=folder)[1]
yaml_file2 = tempfile.mkstemp(dir=folder)[1]

sbml_files = [sbml_file, sbml_file2]
condition_files = [condition_file, condition_file2]
measurement_files = [measurement_file, measurement_file2]
observable_files = [observable_file, observable_file2]
create_problem_yaml(sbml_files, condition_files, measurement_files,
parameter_file, observable_files, yaml_file2)
validate(yaml_file2)

0 comments on commit f9295c5

Please sign in to comment.