Add validation for visualization files (#184)

dweindl · web-flow · commit 7231ebaa14f7 · 2022-12-08T13:47:43.000+01:00
Some basic validation of visualization files. Closes #8, closes #1.
diff --git a/petab/C.py b/petab/C.py
@@ -234,6 +234,12 @@
 #: Supported plot types
 PLOT_TYPES_SIMULATION = [LINE_PLOT, BAR_PLOT, SCATTER_PLOT]
 
+#: Supported xScales
+X_SCALES = [LIN, LOG, LOG10]
+
+#: Supported yScales
+Y_SCALES = [LIN, LOG, LOG10]
+
 
 #:
 MEAN_AND_SD = 'MeanAndSD'
diff --git a/petab/lint.py b/petab/lint.py
@@ -842,6 +842,13 @@ def lint_problem(problem: 'petab.Problem') -> bool:
             logger.error(e)
             errors_occurred = True
 
+    if problem.visualization_df is not None:
+        logger.info("Checking visualization table...")
+        from petab.visualize.lint import validate_visualization_df
+        errors_occurred |= validate_visualization_df(problem)
+    else:
+        logger.warning("Visualization table not available. Skipping.")
+
     if errors_occurred:
         logger.error('Not OK')
     elif problem.measurement_df is None or problem.condition_df is None \
diff --git a/petab/petablint.py b/petab/petablint.py
@@ -50,6 +50,9 @@ def parse_cli_args():
                         help='Conditions table')
     parser.add_argument('-p', '--parameters', dest='parameter_file_name',
                         help='Parameter table')
+    parser.add_argument('--vis', '--visualizations',
+                        dest='visualization_file_name',
+                        help='Visualization table')
 
     group = parser.add_mutually_exclusive_group()
     group.add_argument('-y', '--yaml', dest='yaml_file_name',
@@ -109,14 +112,18 @@ def main():
             logger.debug(f'\tMeasurement table: {args.measurement_file_name}')
         if args.parameter_file_name:
             logger.debug(f'\tParameter table: {args.parameter_file_name}')
+        if args.visualization_file_name:
+            logger.debug('\tVisualization table: '
+                         f'{args.visualization_file_name}')
 
         try:
             problem = petab.Problem.from_files(
                 sbml_file=args.sbml_file_name,
                 condition_file=args.condition_file_name,
                 measurement_file=args.measurement_file_name,
                 parameter_file=args.parameter_file_name,
-                observable_files=args.observable_file_name
+                observable_files=args.observable_file_name,
+                visualization_files=args.visualization_file_name,
             )
         except FileNotFoundError as e:
             logger.error(e)
diff --git a/petab/visualize/lint.py b/petab/visualize/lint.py
@@ -0,0 +1,130 @@
+"""Validation of PEtab visualization files"""
+import logging
+
+import pandas as pd
+
+from .. import C, Problem
+from ..C import VISUALIZATION_DF_REQUIRED_COLS
+
+
+logger = logging.getLogger(__name__)
+
+
+def validate_visualization_df(
+        problem: Problem
+) -> bool:
+    """Validate visualization table
+
+    Arguments:
+        problem: The PEtab problem containing a visualization table
+
+    Returns:
+        ``True`` if errors occurred, ``False`` otherwise
+    """
+    vis_df = problem.visualization_df
+    if vis_df is None or vis_df.empty:
+        return False
+
+    errors = False
+
+    if missing_req_cols := (set(VISUALIZATION_DF_REQUIRED_COLS)
+                            - set(vis_df.columns)):
+        logger.error(f"Missing required columns {missing_req_cols} "
+                     "in visualization table.")
+        errors = True
+
+    # Set all unspecified optional values to their defaults to simplify
+    # validation
+    vis_df = vis_df.copy()
+    _apply_defaults(vis_df)
+
+    if unknown_types := (set(vis_df[C.PLOT_TYPE_SIMULATION].unique())
+                         - set(C.PLOT_TYPES_SIMULATION)):
+        logger.error(f"Unknown {C.PLOT_TYPE_SIMULATION}: {unknown_types}. "
+                     f"Must be one of {C.PLOT_TYPES_SIMULATION}")
+        errors = True
+
+    if unknown_types := (set(vis_df[C.PLOT_TYPE_DATA].unique())
+                         - set(C.PLOT_TYPES_DATA)):
+        logger.error(f"Unknown {C.PLOT_TYPE_DATA}: {unknown_types}. "
+                     f"Must be one of {C.PLOT_TYPES_DATA}")
+        errors = True
+
+    if unknown_scale := (set(vis_df[C.X_SCALE].unique())
+                         - set(C.X_SCALES)):
+        logger.error(f"Unknown {C.X_SCALE}: {unknown_scale}. "
+                     f"Must be one of {C.X_SCALES}")
+        errors = True
+
+    if any(
+            (vis_df[C.X_SCALE] == 'order')
+            & (vis_df[C.PLOT_TYPE_SIMULATION] != C.LINE_PLOT)
+    ):
+        logger.error(f"{C.X_SCALE}=order is only allowed with "
+                     f"{C.PLOT_TYPE_SIMULATION}={C.LINE_PLOT}.")
+        errors = True
+
+    if unknown_scale := (set(vis_df[C.Y_SCALE].unique())
+                         - set(C.Y_SCALES)):
+        logger.error(f"Unknown {C.Y_SCALE}: {unknown_scale}. "
+                     f"Must be one of {C.Y_SCALES}")
+        errors = True
+
+    if problem.condition_df is not None:
+        # check for ambiguous values
+        reserved_names = {C.TIME, "condition"}
+        for reserved_name in reserved_names:
+            if reserved_name in problem.condition_df \
+                    and reserved_name in vis_df[C.X_VALUES]:
+                logger.error(f"Ambiguous value for `{C.X_VALUES}`: "
+                             f"`{reserved_name}` has a special meaning as "
+                             f"`{C.X_VALUES}`, but there exists also a model "
+                             "entity with that name.")
+                errors = True
+
+        # check xValues exist in condition table
+        for xvalue in set(vis_df[C.X_VALUES].unique()) - reserved_names:
+            if xvalue not in problem.condition_df:
+                logger.error(f"{C.X_VALUES} was set to `{xvalue}`, but no "
+                             "such column exists in the conditions table.")
+                errors = True
+
+        if problem.observable_df is not None:
+            # yValues must be an observable
+            for yvalue in vis_df[C.Y_VALUES].unique():
+                if yvalue not in problem.observable_df.index:
+                    logger.error(
+                        f"{C.Y_VALUES} was set to `{yvalue}`, but no such "
+                        "observable exists in the observables table."
+                    )
+                    errors = True
+
+    return errors
+
+
+def _apply_defaults(vis_df: pd.DataFrame):
+    """
+    Set default values.
+
+    Adds default values to the given visualization table where no value was
+    specified.
+    """
+    def set_default(column: str, value):
+        if column not in vis_df:
+            vis_df[column] = value
+        elif value is not None:
+            vis_df[column].fillna(value)
+
+    set_default(C.PLOT_NAME, "")
+    set_default(C.PLOT_TYPE_SIMULATION, C.LINE_PLOT)
+    set_default(C.PLOT_TYPE_DATA, C.MEAN_AND_SD)
+    set_default(C.DATASET_ID, None)
+    set_default(C.X_VALUES, C.TIME)
+    set_default(C.X_OFFSET, 0)
+    set_default(C.X_LABEL, vis_df[C.X_VALUES])
+    set_default(C.X_SCALE, C.LIN)
+    set_default(C.Y_VALUES, None)
+    set_default(C.Y_OFFSET, 0)
+    set_default(C.Y_LABEL, vis_df[C.Y_VALUES])
+    set_default(C.Y_SCALE, C.LIN)
+    set_default(C.LEGEND_ENTRY, vis_df[C.DATASET_ID])
diff --git a/tests/test_visualization.py b/tests/test_visualization.py
@@ -6,9 +6,11 @@
 import matplotlib.pyplot as plt
 import pytest
 
+import petab
 from petab.C import *
 from petab.visualize import plot_with_vis_spec, plot_without_vis_spec
 from petab.visualize.plotting import VisSpecParser
+from petab.visualize.lint import validate_visualization_df
 
 # Avoid errors when plotting without X server
 plt.switch_backend('agg')
@@ -135,6 +137,12 @@ def test_visualization_with_vis_and_sim(data_file_Isensee,
                                         condition_file_Isensee,
                                         vis_spec_file_Isensee,
                                         simulation_file_Isensee):
+    validate_visualization_df(
+        petab.Problem(
+            condition_df=petab.get_condition_df(condition_file_Isensee),
+            visualization_df=petab.get_visualization_df(vis_spec_file_Isensee),
+        )
+    )
     plot_with_vis_spec(vis_spec_file_Isensee, condition_file_Isensee,
                        data_file_Isensee, simulation_file_Isensee)
 
@@ -366,3 +374,25 @@ def test_cli():
             "-o", temp_dir
         ]
         subprocess.run(args, check=True)
+
+
+@pytest.mark.parametrize(
+    "vis_file",
+    (
+            "vis_spec_file_Isensee",
+            "vis_spec_file_Isensee_replicates",
+            "vis_spec_file_Isensee_scatterplot",
+            "visu_file_Fujita_wo_dsid_wo_yvalues",
+            "visu_file_Fujita_all_obs_with_diff_settings",
+            "visu_file_Fujita_empty",
+            "visu_file_Fujita_minimal",
+            "visu_file_Fujita_replicates",
+            "visu_file_Fujita_small",
+    )
+)
+def test_validate(vis_file, request):
+    """Check that all test files pass validation."""
+    vis_file = request.getfixturevalue(vis_file)
+    assert False is validate_visualization_df(
+        petab.Problem(visualization_df=petab.get_visualization_df(vis_file))
+    )