Skip to content

Commit

Permalink
Add validation for visualization files (#184)
Browse files Browse the repository at this point in the history
Some basic validation of visualization files. Closes #8, closes  #1.
  • Loading branch information
dweindl authored Dec 8, 2022
1 parent a0817db commit 7231eba
Show file tree
Hide file tree
Showing 5 changed files with 181 additions and 1 deletion.
6 changes: 6 additions & 0 deletions petab/C.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,12 @@
#: Supported plot types
PLOT_TYPES_SIMULATION = [LINE_PLOT, BAR_PLOT, SCATTER_PLOT]

#: Supported xScales
X_SCALES = [LIN, LOG, LOG10]

#: Supported yScales
Y_SCALES = [LIN, LOG, LOG10]


#:
MEAN_AND_SD = 'MeanAndSD'
Expand Down
7 changes: 7 additions & 0 deletions petab/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -842,6 +842,13 @@ def lint_problem(problem: 'petab.Problem') -> bool:
logger.error(e)
errors_occurred = True

if problem.visualization_df is not None:
logger.info("Checking visualization table...")
from petab.visualize.lint import validate_visualization_df
errors_occurred |= validate_visualization_df(problem)
else:
logger.warning("Visualization table not available. Skipping.")

if errors_occurred:
logger.error('Not OK')
elif problem.measurement_df is None or problem.condition_df is None \
Expand Down
9 changes: 8 additions & 1 deletion petab/petablint.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ def parse_cli_args():
help='Conditions table')
parser.add_argument('-p', '--parameters', dest='parameter_file_name',
help='Parameter table')
parser.add_argument('--vis', '--visualizations',
dest='visualization_file_name',
help='Visualization table')

group = parser.add_mutually_exclusive_group()
group.add_argument('-y', '--yaml', dest='yaml_file_name',
Expand Down Expand Up @@ -109,14 +112,18 @@ def main():
logger.debug(f'\tMeasurement table: {args.measurement_file_name}')
if args.parameter_file_name:
logger.debug(f'\tParameter table: {args.parameter_file_name}')
if args.visualization_file_name:
logger.debug('\tVisualization table: '
f'{args.visualization_file_name}')

try:
problem = petab.Problem.from_files(
sbml_file=args.sbml_file_name,
condition_file=args.condition_file_name,
measurement_file=args.measurement_file_name,
parameter_file=args.parameter_file_name,
observable_files=args.observable_file_name
observable_files=args.observable_file_name,
visualization_files=args.visualization_file_name,
)
except FileNotFoundError as e:
logger.error(e)
Expand Down
130 changes: 130 additions & 0 deletions petab/visualize/lint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
"""Validation of PEtab visualization files"""
import logging

import pandas as pd

from .. import C, Problem
from ..C import VISUALIZATION_DF_REQUIRED_COLS


logger = logging.getLogger(__name__)


def validate_visualization_df(
problem: Problem
) -> bool:
"""Validate visualization table
Arguments:
problem: The PEtab problem containing a visualization table
Returns:
``True`` if errors occurred, ``False`` otherwise
"""
vis_df = problem.visualization_df
if vis_df is None or vis_df.empty:
return False

errors = False

if missing_req_cols := (set(VISUALIZATION_DF_REQUIRED_COLS)
- set(vis_df.columns)):
logger.error(f"Missing required columns {missing_req_cols} "
"in visualization table.")
errors = True

# Set all unspecified optional values to their defaults to simplify
# validation
vis_df = vis_df.copy()
_apply_defaults(vis_df)

if unknown_types := (set(vis_df[C.PLOT_TYPE_SIMULATION].unique())
- set(C.PLOT_TYPES_SIMULATION)):
logger.error(f"Unknown {C.PLOT_TYPE_SIMULATION}: {unknown_types}. "
f"Must be one of {C.PLOT_TYPES_SIMULATION}")
errors = True

if unknown_types := (set(vis_df[C.PLOT_TYPE_DATA].unique())
- set(C.PLOT_TYPES_DATA)):
logger.error(f"Unknown {C.PLOT_TYPE_DATA}: {unknown_types}. "
f"Must be one of {C.PLOT_TYPES_DATA}")
errors = True

if unknown_scale := (set(vis_df[C.X_SCALE].unique())
- set(C.X_SCALES)):
logger.error(f"Unknown {C.X_SCALE}: {unknown_scale}. "
f"Must be one of {C.X_SCALES}")
errors = True

if any(
(vis_df[C.X_SCALE] == 'order')
& (vis_df[C.PLOT_TYPE_SIMULATION] != C.LINE_PLOT)
):
logger.error(f"{C.X_SCALE}=order is only allowed with "
f"{C.PLOT_TYPE_SIMULATION}={C.LINE_PLOT}.")
errors = True

if unknown_scale := (set(vis_df[C.Y_SCALE].unique())
- set(C.Y_SCALES)):
logger.error(f"Unknown {C.Y_SCALE}: {unknown_scale}. "
f"Must be one of {C.Y_SCALES}")
errors = True

if problem.condition_df is not None:
# check for ambiguous values
reserved_names = {C.TIME, "condition"}
for reserved_name in reserved_names:
if reserved_name in problem.condition_df \
and reserved_name in vis_df[C.X_VALUES]:
logger.error(f"Ambiguous value for `{C.X_VALUES}`: "
f"`{reserved_name}` has a special meaning as "
f"`{C.X_VALUES}`, but there exists also a model "
"entity with that name.")
errors = True

# check xValues exist in condition table
for xvalue in set(vis_df[C.X_VALUES].unique()) - reserved_names:
if xvalue not in problem.condition_df:
logger.error(f"{C.X_VALUES} was set to `{xvalue}`, but no "
"such column exists in the conditions table.")
errors = True

if problem.observable_df is not None:
# yValues must be an observable
for yvalue in vis_df[C.Y_VALUES].unique():
if yvalue not in problem.observable_df.index:
logger.error(
f"{C.Y_VALUES} was set to `{yvalue}`, but no such "
"observable exists in the observables table."
)
errors = True

return errors


def _apply_defaults(vis_df: pd.DataFrame):
"""
Set default values.
Adds default values to the given visualization table where no value was
specified.
"""
def set_default(column: str, value):
if column not in vis_df:
vis_df[column] = value
elif value is not None:
vis_df[column].fillna(value)

set_default(C.PLOT_NAME, "")
set_default(C.PLOT_TYPE_SIMULATION, C.LINE_PLOT)
set_default(C.PLOT_TYPE_DATA, C.MEAN_AND_SD)
set_default(C.DATASET_ID, None)
set_default(C.X_VALUES, C.TIME)
set_default(C.X_OFFSET, 0)
set_default(C.X_LABEL, vis_df[C.X_VALUES])
set_default(C.X_SCALE, C.LIN)
set_default(C.Y_VALUES, None)
set_default(C.Y_OFFSET, 0)
set_default(C.Y_LABEL, vis_df[C.Y_VALUES])
set_default(C.Y_SCALE, C.LIN)
set_default(C.LEGEND_ENTRY, vis_df[C.DATASET_ID])
30 changes: 30 additions & 0 deletions tests/test_visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
import matplotlib.pyplot as plt
import pytest

import petab
from petab.C import *
from petab.visualize import plot_with_vis_spec, plot_without_vis_spec
from petab.visualize.plotting import VisSpecParser
from petab.visualize.lint import validate_visualization_df

# Avoid errors when plotting without X server
plt.switch_backend('agg')
Expand Down Expand Up @@ -135,6 +137,12 @@ def test_visualization_with_vis_and_sim(data_file_Isensee,
condition_file_Isensee,
vis_spec_file_Isensee,
simulation_file_Isensee):
validate_visualization_df(
petab.Problem(
condition_df=petab.get_condition_df(condition_file_Isensee),
visualization_df=petab.get_visualization_df(vis_spec_file_Isensee),
)
)
plot_with_vis_spec(vis_spec_file_Isensee, condition_file_Isensee,
data_file_Isensee, simulation_file_Isensee)

Expand Down Expand Up @@ -366,3 +374,25 @@ def test_cli():
"-o", temp_dir
]
subprocess.run(args, check=True)


@pytest.mark.parametrize(
"vis_file",
(
"vis_spec_file_Isensee",
"vis_spec_file_Isensee_replicates",
"vis_spec_file_Isensee_scatterplot",
"visu_file_Fujita_wo_dsid_wo_yvalues",
"visu_file_Fujita_all_obs_with_diff_settings",
"visu_file_Fujita_empty",
"visu_file_Fujita_minimal",
"visu_file_Fujita_replicates",
"visu_file_Fujita_small",
)
)
def test_validate(vis_file, request):
"""Check that all test files pass validation."""
vis_file = request.getfixturevalue(vis_file)
assert False is validate_visualization_df(
petab.Problem(visualization_df=petab.get_visualization_df(vis_file))
)

0 comments on commit 7231eba

Please sign in to comment.