Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add PySBModel for handling of PySB models #145

Merged
merged 26 commits into from
Mar 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions petab/C.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,12 @@
#:
SBML_FILES = 'sbml_files'
#:
MODEL_FILES = 'model_files'
#:
MODEL_LOCATION = 'location'
#:
MODEL_LANGUAGE = 'language'
#:
CONDITION_FILES = 'condition_files'
#:
MEASUREMENT_FILES = 'measurement_files'
Expand All @@ -271,9 +277,19 @@
#:
VISUALIZATION_FILES = 'visualization_files'
#:
MAPPING_FILES = 'mapping_files'
#:
EXTENSIONS = 'extensions'


# MAPPING
#:
PETAB_ENTITY_ID = 'petabEntityId'
#:
MODEL_ENTITY_ID = 'modelEntityId'
dweindl marked this conversation as resolved.
Show resolved Hide resolved
#:
MAPPING_DF_REQUIRED_COLS = [PETAB_ENTITY_ID, MODEL_ENTITY_ID]

# MORE

#:
Expand Down
1 change: 1 addition & 0 deletions petab/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,4 @@
from .yaml import * # noqa: F403, F401, E402
from .version import __version__ # noqa: F401, E402
from .format_version import __format_version__ # noqa: F401, E402
from .mapping import * # noqa: F403, F401, E402
104 changes: 84 additions & 20 deletions petab/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,16 @@ def assert_no_leading_trailing_whitespace(
def check_condition_df(
df: pd.DataFrame,
model: Optional[Model] = None,
observable_df: Optional[pd.DataFrame] = None
observable_df: Optional[pd.DataFrame] = None,
mapping_df: Optional[pd.DataFrame] = None,
) -> None:
"""Run sanity checks on PEtab condition table

Arguments:
df: PEtab condition DataFrame
model: Model for additional checking of parameter IDs
observable_df: PEtab observables DataFrame
mapping_df: PEtab mapping DataFrame

Raises:
AssertionError: in case of problems
Expand Down Expand Up @@ -123,7 +125,10 @@ def check_condition_df(
allowed_cols = set(model.get_valid_ids_for_condition_table())
if observable_df is not None:
allowed_cols |= set(petab.get_output_parameters(
model=model, observable_df=observable_df))
model=model, observable_df=observable_df, mapping_df=mapping_df
))
if mapping_df is not None:
allowed_cols |= set(mapping_df.index.values)
for column_name in df.columns:
if column_name != CONDITION_NAME \
and column_name not in allowed_cols:
Expand Down Expand Up @@ -185,20 +190,22 @@ def check_parameter_df(
model: Optional[Model] = None,
observable_df: Optional[pd.DataFrame] = None,
measurement_df: Optional[pd.DataFrame] = None,
condition_df: Optional[pd.DataFrame] = None) -> None:
condition_df: Optional[pd.DataFrame] = None,
mapping_df: Optional[pd.DataFrame] = None,
) -> None:
"""Run sanity checks on PEtab parameter table

Arguments:
df: PEtab condition DataFrame
df: PEtab parameter DataFrame
model: Model for additional checking of parameter IDs
observable_df: PEtab observable table for additional checks
measurement_df: PEtab measurement table for additional checks
condition_df: PEtab condition table for additional checks
mapping_df: PEtab mapping table for additional checks

Raises:
AssertionError: in case of problems
"""

_check_df(df, PARAMETER_DF_REQUIRED_COLS[1:], "parameter")

if df.index.name != PARAMETER_ID:
Expand Down Expand Up @@ -244,7 +251,8 @@ def check_parameter_df(
if model and measurement_df is not None \
and condition_df is not None:
assert_all_parameters_present_in_parameter_df(
df, model, observable_df, measurement_df, condition_df)
df, model, observable_df, measurement_df, condition_df, mapping_df
)


def check_observable_df(observable_df: pd.DataFrame) -> None:
Expand Down Expand Up @@ -305,7 +313,9 @@ def assert_all_parameters_present_in_parameter_df(
model: Model,
observable_df: pd.DataFrame,
measurement_df: pd.DataFrame,
condition_df: pd.DataFrame) -> None:
condition_df: pd.DataFrame,
mapping_df: pd.DataFrame = None,
) -> None:
"""Ensure all required parameters are contained in the parameter table
with no additional ones

Expand All @@ -315,26 +325,48 @@ def assert_all_parameters_present_in_parameter_df(
observable_df: PEtab observable table
measurement_df: PEtab measurement table
condition_df: PEtab condition table
mapping_df: PEtab mapping table for additional checks

Raises:
AssertionError: in case of problems
"""

required = parameters.get_required_parameters_for_parameter_table(
model=model, condition_df=condition_df,
observable_df=observable_df, measurement_df=measurement_df)
observable_df=observable_df, measurement_df=measurement_df,
mapping_df=mapping_df
)

allowed = parameters.get_valid_parameters_for_parameter_table(
model=model, condition_df=condition_df,
observable_df=observable_df, measurement_df=measurement_df)
observable_df=observable_df, measurement_df=measurement_df,
mapping_df=mapping_df,
)

actual = set(parameter_df.index)

missing = required - actual
extraneous = actual - allowed

# missing parameters might be present under a different name based on
# the mapping table
if missing and mapping_df is not None:
model_to_petab_mapping = {}
for map_from, map_to in zip(mapping_df.index.values,
mapping_df[MODEL_ENTITY_ID]):
if map_to in model_to_petab_mapping:
model_to_petab_mapping[map_to].append(map_from)
else:
model_to_petab_mapping[map_to] = [map_from]
missing = {
missing_id
for missing_id in missing
if missing_id not in model_to_petab_mapping
or all(mapping_parameter not in actual
for mapping_parameter in model_to_petab_mapping[missing_id])
}

if missing:
raise AssertionError('Missing parameter(s) in parameter table: '
raise AssertionError('Missing parameter(s) in the model or the '
'parameters table: '
+ str(missing))

if extraneous:
Expand Down Expand Up @@ -794,8 +826,12 @@ def lint_problem(problem: 'petab.Problem') -> bool:
if problem.condition_df is not None:
logger.info("Checking condition table...")
try:
check_condition_df(problem.condition_df, problem.model,
problem.observable_df)
check_condition_df(
problem.condition_df,
model=problem.model,
observable_df=problem.observable_df,
mapping_df=problem.mapping_df
)
except AssertionError as e:
logger.error(e)
errors_occurred = True
Expand All @@ -821,9 +857,11 @@ def lint_problem(problem: 'petab.Problem') -> bool:
if problem.parameter_df is not None:
logger.info("Checking parameter table...")
try:
check_parameter_df(problem.parameter_df, problem.model,
problem.observable_df,
problem.measurement_df, problem.condition_df)
check_parameter_df(
problem.parameter_df, problem.model, problem.observable_df,
problem.measurement_df, problem.condition_df,
problem.mapping_df
)
except AssertionError as e:
logger.error(e)
errors_occurred = True
Expand All @@ -836,7 +874,8 @@ def lint_problem(problem: 'petab.Problem') -> bool:
assert_model_parameters_in_condition_or_parameter_table(
problem.model,
problem.condition_df,
problem.parameter_df
problem.parameter_df,
problem.mapping_df,
)
except AssertionError as e:
logger.error(e)
Expand Down Expand Up @@ -865,7 +904,11 @@ def lint_problem(problem: 'petab.Problem') -> bool:
def assert_model_parameters_in_condition_or_parameter_table(
model: Model,
condition_df: pd.DataFrame,
parameter_df: pd.DataFrame) -> None:
parameter_df: pd.DataFrame,
mapping_df: pd.DataFrame = None,
observable_df: pd.DataFrame = None,
measurement_df: pd.DataFrame = None,
) -> None:
"""Model parameters that are rule targets must not be present in the
parameter table. Other parameters must only be present in either in
parameter table or condition table columns. Check that.
Expand All @@ -874,13 +917,34 @@ def assert_model_parameters_in_condition_or_parameter_table(
parameter_df: PEtab parameter DataFrame
model: PEtab model
condition_df: PEtab condition table
mapping_df: PEtab mapping table
observable_df: PEtab observable table
measurement_df: PEtab measurement table

Raises:
AssertionError: in case of problems
"""
allowed_in_condition_cols = set(model.get_valid_ids_for_condition_table())
if mapping_df is not None:
allowed_in_condition_cols |= {
from_id
for from_id, to_id in zip(mapping_df.index.values,
mapping_df[MODEL_ENTITY_ID])
# mapping table entities mapping to already allowed parameters
if to_id in allowed_in_condition_cols
# mapping table entities mapping to species
or model.is_state_variable(to_id)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should these state variables be provided by model.get_valid_ids_for_condition_table() instead?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right. Needs to be changed.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm. The problem here is that the list of allowed species could be very long (potentially infinite) for rule-based models. But it doesn't make sense to not include them in model.get_valid_ids_for_condition_table.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, no, it makes sense the way it is. For SBML, to_id in allowed_in_condition_cols will always do the job. For PySB, the species ID itself is not allowed in the condition table, because they don't match our PEtab identifier requirements. That's why we have the mapping table. For this case, we need the model.is_state_variable(to_id) part.

}

allowed_in_parameter_table = \
set(model.get_valid_parameters_for_parameter_table())
parameters.get_valid_parameters_for_parameter_table(
model=model,
condition_df=condition_df,
observable_df=observable_df,
measurement_df=measurement_df,
mapping_df=mapping_df,
)

entities_in_condition_table = set(condition_df.columns) - {CONDITION_NAME}
entities_in_parameter_table = set(parameter_df.index.values)

Expand Down
111 changes: 111 additions & 0 deletions petab/mapping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
"""Functionality related to the PEtab entity mapping table"""
from pathlib import Path
from typing import Union, Optional
from .models import Model
import pandas as pd

from . import lint
from .C import * # noqa: F403

__all__ = [
'get_mapping_df',
'write_mapping_df',
'check_mapping_df',
]


def get_mapping_df(
mapping_file: Union[None, str, Path, pd.DataFrame]
) -> pd.DataFrame:
"""
Read the provided mapping file into a ``pandas.Dataframe``.

Arguments:
mapping_file: Name of file to read from or pandas.Dataframe

Returns:
Mapping DataFrame
"""
if mapping_file is None:
return mapping_file

if isinstance(mapping_file, (str, Path)):
mapping_file = pd.read_csv(mapping_file, sep='\t',
float_precision='round_trip')

if not isinstance(mapping_file.index, pd.RangeIndex):
mapping_file.reset_index(inplace=True)

for col in MAPPING_DF_REQUIRED_COLS:
if col not in mapping_file.columns:
raise KeyError(
f"Mapping table missing mandatory field {PETAB_ENTITY_ID}.")

lint.assert_no_leading_trailing_whitespace(
mapping_file.reset_index()[col].values, col)

mapping_file.set_index([PETAB_ENTITY_ID], inplace=True)

return mapping_file


def write_mapping_df(df: pd.DataFrame, filename: Union[str, Path]) -> None:
"""Write PEtab mapping table

Arguments:
df: PEtab mapping table
filename: Destination file name
"""
df = get_mapping_df(df)
df.to_csv(filename, sep='\t', index=True)


def check_mapping_df(
df: pd.DataFrame,
model: Optional[Model] = None,
) -> None:
"""Run sanity checks on PEtab mapping table

Arguments:
df: PEtab mapping DataFrame
model: Model for additional checking of parameter IDs

Raises:
AssertionError: in case of problems
"""
lint._check_df(df, MAPPING_DF_REQUIRED_COLS[1:], "mapping")

if df.index.name != PETAB_ENTITY_ID:
raise AssertionError(
f"Mapping table has wrong index {df.index.name}. "
f"Expected {PETAB_ENTITY_ID}.")

lint.check_ids(df.index.values, kind=PETAB_ENTITY_ID)

if model:
for model_entity_id in df[MODEL_ENTITY_ID]:
if not model.has_entity_with_id(model_entity_id):
raise AssertionError(
"Mapping table maps to unknown "
f"model entity ID {model_entity_id}."
)


def resolve_mapping(mapping_df: Optional[pd.DataFrame],
element: str) -> str:
"""Resolve mapping for a given element.

:param element:
Element to resolve.

:param mapping_df:
Mapping table.

:return:
Resolved element.
"""
if mapping_df is None:
return element
if element in mapping_df.index:
return mapping_df.loc[element, MODEL_ENTITY_ID]
return element
6 changes: 5 additions & 1 deletion petab/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
MODEL_TYPE_SBML = 'sbml'
MODEL_TYPE_PYSB = 'pysb'

known_model_types = {MODEL_TYPE_SBML}
known_model_types = {
MODEL_TYPE_SBML,
MODEL_TYPE_PYSB,
}
Comment on lines 1 to +7
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could use ModelType.SBML/PYSB enum

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would keep that for later.


from .model import Model # noqa F401
Loading