Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/modules.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,6 @@ API Reference
petab.sampling
petab.sbml
petab.simulate
petab.simplify
petab.visualize
petab.yaml
105 changes: 105 additions & 0 deletions petab/simplify.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
"""Functionality for simplifying PEtab problems"""
from math import nan

import pandas as pd

import petab
from . import Problem
from .C import * # noqa: F403
from .lint import lint_problem

__all__ = [
"remove_nan_measurements",
"remove_unused_observables",
"remove_unused_conditions",
"simplify_problem",
"condition_parameters_to_parameter_table",
]


def remove_nan_measurements(problem: Problem):
"""Drop any measurements that are NaN"""
problem.measurement_df = problem.measurement_df[
~problem.measurement_df[MEASUREMENT].isna()
]
problem.measurement_df.reset_index(inplace=True, drop=True)


def remove_unused_observables(problem: Problem):
"""Remove observables that have no measurements"""
measured_observables = set(problem.measurement_df[OBSERVABLE_ID].unique())
problem.observable_df = problem.observable_df[
problem.observable_df.index.isin(measured_observables)]


def remove_unused_conditions(problem: Problem):
"""Remove conditions that have no measurements"""
measured_conditions = \
set(problem.measurement_df[SIMULATION_CONDITION_ID].unique())
if PREEQUILIBRATION_CONDITION_ID in problem.measurement_df:
measured_conditions |= \
set(problem.measurement_df[PREEQUILIBRATION_CONDITION_ID].unique())

problem.condition_df = problem.condition_df[
problem.condition_df.index.isin(measured_conditions)]


def simplify_problem(problem: Problem):
if lint_problem(problem):
raise ValueError("Invalid PEtab problem supplied.")

remove_unused_observables(problem)
remove_unused_conditions(problem)
condition_parameters_to_parameter_table(problem)

if lint_problem(problem):
raise AssertionError("Invalid PEtab problem generated.")


def condition_parameters_to_parameter_table(problem: Problem):
"""Move parameters from the condition table to the parameters table, if
the same parameter value is used for all conditions."""
if problem.condition_df is None or problem.condition_df.empty \
or problem.model is None:
return

replacements = {}
for parameter_id in problem.condition_df:
if parameter_id == CONDITION_NAME:
continue

if problem.model.is_state_variable(parameter_id):
# initial states can't go the parameters table
continue

series = problem.condition_df[parameter_id]
value = petab.to_float_if_float(series[0])

# same value for all conditions and no parametric overrides (str)?
if isinstance(value, float) and len(series.unique()) == 1:
replacements[parameter_id] = series[0]

if not replacements:
return

rows = [
{
PARAMETER_ID: parameter_id,
PARAMETER_SCALE: LIN,
LOWER_BOUND: nan,
UPPER_BOUND: nan,
NOMINAL_VALUE: value,
ESTIMATE: 0
}
for parameter_id, value in replacements.items()
]
rows = pd.DataFrame(rows)
rows.set_index(PARAMETER_ID, inplace=True)

if problem.parameter_df is None:
problem.parameter_df = rows
else:
problem.parameter_df = pd.concat([problem.parameter_df, rows])

problem.condition_df = \
problem.condition_df.drop(columns=replacements.keys())
151 changes: 151 additions & 0 deletions tests/test_simplify.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
"""Tests for petab.simplify.*"""
from math import nan

import pandas as pd
import pytest
import simplesbml
from pandas.testing import *

from petab import Problem
from petab.C import * # noqa: F403
from petab.models.sbml_model import SbmlModel
from petab.simplify import *


@pytest.fixture
def problem() -> Problem:
ss_model = simplesbml.SbmlModel()
ss_model.addParameter("some_parameter", val=1.0)
ss_model.addParameter("same_value_for_all_conditions", val=1.0)

observable_df = pd.DataFrame(
{
OBSERVABLE_ID: ["obs_used", "obs_unused", "obs_used_2"],
OBSERVABLE_FORMULA: [1.0, 2.0, 3.0],
NOISE_FORMULA: [1.0, 2.0, 3.0],
}
)
observable_df.set_index(OBSERVABLE_ID, inplace=True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could use get_observable_df

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I found it more explicit this way. Undecided.


conditions_df = pd.DataFrame(
{
CONDITION_ID: ["condition_used_1",
"condition_unused",
"condition_used_2"],
"some_parameter": [1.0, 2.0, 3.0],
"same_value_for_all_conditions": [4.0] * 3,
}
)
conditions_df.set_index(CONDITION_ID, inplace=True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could use get_condition_df


measurement_df = pd.DataFrame(
{
OBSERVABLE_ID: ["obs_used", "obs_used_2", "obs_used"],
MEASUREMENT: [1.0, 1.5, 2.0],
SIMULATION_CONDITION_ID: ["condition_used_1",
"condition_used_1",
"condition_used_2"],
TIME: [1.0] * 3,
}
)
yield Problem(
model=SbmlModel(sbml_model=ss_model.getModel()),
condition_df=conditions_df,
observable_df=observable_df,
measurement_df=measurement_df,
)


def test_remove_nan_measurements(problem):
expected = pd.DataFrame(
{
OBSERVABLE_ID: ["obs_used"] * 2,
MEASUREMENT: [1.0, 2.0],
SIMULATION_CONDITION_ID:
["condition_used_1", "condition_used_2"],
TIME: [1.0] * 2,
}
)

problem.measurement_df = pd.DataFrame(
{
OBSERVABLE_ID: ["obs_used", "obs_with_nan", "obs_used"],
MEASUREMENT: [1.0, nan, 2.0],
SIMULATION_CONDITION_ID: ["condition_used_1",
"condition_used_1",
"condition_used_2"],
TIME: [1.0] * 3,
}
)
assert not problem.measurement_df.equals(expected)

remove_nan_measurements(problem)

assert_frame_equal(problem.measurement_df, expected)


def test_remove_unused_observables(problem):
expected = pd.DataFrame(
{
OBSERVABLE_ID: ["obs_used", "obs_used_2"],
OBSERVABLE_FORMULA: [1.0, 3.0],
NOISE_FORMULA: [1.0, 3.0],
}
)
expected.set_index(OBSERVABLE_ID, inplace=True)
assert not problem.observable_df.equals(expected)

remove_unused_observables(problem)

assert_frame_equal(problem.observable_df, expected)


def test_remove_unused_conditions(problem):
expected = pd.DataFrame(
{
CONDITION_ID: ["condition_used_1",
"condition_used_2"],
"some_parameter": [1.0, 3.0],
"same_value_for_all_conditions": [4.0] * 2,
}
)
expected.set_index(CONDITION_ID, inplace=True)
assert not problem.condition_df.equals(expected)

remove_unused_conditions(problem)

assert_frame_equal(problem.condition_df, expected)


def test_condition_parameters_to_parameter_table(problem):
expected_conditions = pd.DataFrame(
{
CONDITION_ID: ["condition_used_1",
"condition_unused",
"condition_used_2"],
"some_parameter": [1.0, 2.0, 3.0],
}
)
expected_conditions.set_index(CONDITION_ID, inplace=True)
assert not problem.condition_df.equals(expected_conditions)

expected_parameters = pd.DataFrame({
PARAMETER_ID: ["same_value_for_all_conditions"],
PARAMETER_SCALE: [LIN],
LOWER_BOUND: [nan],
UPPER_BOUND: [nan],
NOMINAL_VALUE: [4.0],
ESTIMATE: [0],
})
expected_parameters.set_index(PARAMETER_ID, inplace=True)
assert problem.parameter_df is None

condition_parameters_to_parameter_table(problem)

assert_frame_equal(problem.condition_df, expected_conditions)
assert_frame_equal(problem.parameter_df, expected_parameters)


def test_simplify_problem(problem):
# simplify_problem checks whether the result is valid
simplify_problem(problem)