Merge branch 'develop' into pl_visualization

PEtab-dev · Mar 23, 2020 · f9295c5 · f9295c5
2 parents c7bb05e + e713f8b
commit f9295c5
Show file tree

Hide file tree

Showing 6 changed files with 227 additions and 24 deletions.
diff --git a/petab/core.py b/petab/core.py
@@ -116,33 +116,61 @@ def flatten_timepoint_specific_output_overrides(
         petab_problem:
             PEtab problem to work on
     """
+    measurement_df = petab_problem.measurement_df
+
+    # remember if columns exist
+    has_obs_par = OBSERVABLE_PARAMETERS in measurement_df
+    has_noise_par = NOISE_PARAMETERS in measurement_df
+    has_preeq = PREEQUILIBRATION_CONDITION_ID in measurement_df
+
+    # fill in optional columns to avoid special cases later
+    if not has_obs_par \
+            or np.all(measurement_df[OBSERVABLE_PARAMETERS].isnull()):
+        measurement_df[OBSERVABLE_PARAMETERS] = ''
+    if not has_noise_par \
+            or np.all(measurement_df[NOISE_PARAMETERS].isnull()):
+        measurement_df[NOISE_PARAMETERS] = ''
+    if not has_preeq \
+            or np.all(measurement_df[PREEQUILIBRATION_CONDITION_ID].isnull()):
+        measurement_df[PREEQUILIBRATION_CONDITION_ID] = ''
+    # convert to str row by row
+    for irow, row in measurement_df.iterrows():
+        if is_empty(row[OBSERVABLE_PARAMETERS]):
+            measurement_df.at[irow, OBSERVABLE_PARAMETERS] = ''
+        if is_empty(row[NOISE_PARAMETERS]):
+            measurement_df.at[irow, NOISE_PARAMETERS] = ''
+        if is_empty(row[PREEQUILIBRATION_CONDITION_ID]):
+            measurement_df.at[irow, PREEQUILIBRATION_CONDITION_ID] = ''
 
     # Create empty df -> to be filled with replicate-specific observables
     df_new = pd.DataFrame()
 
     # Get observableId, preequilibrationConditionId
     # and simulationConditionId columns in measurement df
-    df = petab_problem.measurement_df[
-        [OBSERVABLE_ID,
-         PREEQUILIBRATION_CONDITION_ID,
+    cols = get_notnull_columns(
+        measurement_df,
+        [OBSERVABLE_ID, PREEQUILIBRATION_CONDITION_ID,
          SIMULATION_CONDITION_ID]
-    ]
+    )
+    df = measurement_df[cols]
+
     # Get unique combinations of observableId, preequilibrationConditionId
     # and simulationConditionId
     df_unique_values = df.drop_duplicates()
 
     # replaced observables: new ID => old ID
     replacements = dict()
+
     # Loop over each unique combination
-    for nrow in range(len(df_unique_values.index)):
-        df = petab_problem.measurement_df.loc[
-            (petab_problem.measurement_df[OBSERVABLE_ID] ==
-             df_unique_values.loc[nrow, OBSERVABLE_ID])
-            & (petab_problem.measurement_df[PREEQUILIBRATION_CONDITION_ID] <=
-               df_unique_values.loc[nrow, PREEQUILIBRATION_CONDITION_ID])
-            & (petab_problem.measurement_df[SIMULATION_CONDITION_ID] <=
-               df_unique_values.loc[nrow, SIMULATION_CONDITION_ID])
-        ]
+    for irow in df_unique_values.index:
+        df = measurement_df.loc[
+            (measurement_df[OBSERVABLE_ID] ==
+             df_unique_values.loc[irow, OBSERVABLE_ID])
+            & (measurement_df[PREEQUILIBRATION_CONDITION_ID] ==
+               df_unique_values.loc[irow, PREEQUILIBRATION_CONDITION_ID])
+            & (measurement_df[SIMULATION_CONDITION_ID] ==
+               df_unique_values.loc[irow, SIMULATION_CONDITION_ID])
+            ]
 
         # Get list of unique observable parameters
         unique_sc = df[OBSERVABLE_PARAMETERS].unique()
@@ -156,8 +184,8 @@ def flatten_timepoint_specific_output_overrides(
                 # and unique_sc[j] in their corresponding column
                 # (full-string matches are denoted by zero)
                 idxs = (
-                    df[NOISE_PARAMETERS].str.find(cur_noise) +
-                    df[OBSERVABLE_PARAMETERS].str.find(cur_sc)
+                    df[NOISE_PARAMETERS].astype(str).str.find(cur_noise) +
+                    df[OBSERVABLE_PARAMETERS].astype(str).str.find(cur_sc)
                 )
                 tmp_ = df.loc[idxs == 0, OBSERVABLE_ID]
                 # Create replicate-specific observable name
@@ -168,7 +196,7 @@ def flatten_timepoint_specific_output_overrides(
                 while (df[OBSERVABLE_ID].str.find(
                         tmp.to_string()
                 ) == 0).any():
-                    tmp = tmp_ + counter*"_" + str(i_noise + i_sc + 1)
+                    tmp = tmp_ + counter * "_" + str(i_noise + i_sc + 1)
                     counter += 1
                 if not tmp_.empty and not tmp_.empty:
                     replacements[tmp.values[0]] = tmp_.values[0]
@@ -179,21 +207,32 @@ def flatten_timepoint_specific_output_overrides(
                 # (for continuation of the loop)
                 df.loc[idxs == 0, OBSERVABLE_ID] = tmp
 
+    # remove previously non-existent columns again
+    if not has_obs_par:
+        df_new.drop(columns=OBSERVABLE_PARAMETERS, inplace=True)
+    if not has_noise_par:
+        df_new.drop(columns=NOISE_PARAMETERS, inplace=True)
+    if not has_preeq:
+        df_new.drop(columns=PREEQUILIBRATION_CONDITION_ID, inplace=True)
+
     # Update/Redefine measurement df with replicate-specific observables
     petab_problem.measurement_df = df_new
 
+    observable_df = petab_problem.observable_df
+
     # Update observables table
     for replacement, replacee in replacements.items():
-        new_obs = petab_problem.observable_df.loc[replacee].copy()
+        new_obs = observable_df.loc[replacee].copy()
         new_obs.name = replacement
         new_obs[OBSERVABLE_FORMULA] = new_obs[OBSERVABLE_FORMULA].replace(
             replacee, replacement)
         new_obs[NOISE_FORMULA] = new_obs[NOISE_FORMULA].replace(
             replacee, replacement)
-        petab_problem.observable_df = petab_problem.observable_df.append(
+        observable_df = observable_df.append(
             new_obs
         )
 
+    petab_problem.observable_df = observable_df
     petab_problem.observable_df.drop(index=set(replacements.values()),
                                      inplace=True)
 

diff --git a/petab/petab_schema.yaml b/petab/petab_schema.yaml
@@ -59,6 +59,10 @@ properties:
           type: array
           description: List of PEtab observable files.
 
+          items:
+            type: string
+            description: PEtab observable file name or URL.
+
         visualization_files:
           type: array
           description: List of PEtab visualization files.

diff --git a/petab/problem.py b/petab/problem.py
@@ -265,7 +265,8 @@ def to_files(self,
                  measurement_file: Optional[str] = None,
                  parameter_file: Optional[str] = None,
                  visualization_file: Optional[str] = None,
-                 observable_file: Optional[str] = None) -> None:
+                 observable_file: Optional[str] = None,
+                 yaml_file: Optional[str] = None) -> None:
         """
         Write PEtab tables to files for this problem
 
@@ -282,6 +283,7 @@ def to_files(self,
             parameter_file: Parameter table destination
             visualization_file: Visualization table destination
             observable_file: Observables table destination
+            yaml_file: YAML file destination
 
         Raises:
             ValueError: If a destination was provided for a non-existing
@@ -333,6 +335,12 @@ def error(name: str) -> ValueError:
             else:
                 raise error("visualization")
 
+        if yaml_file:
+            yaml.create_problem_yaml(sbml_file, condition_file,
+                                     measurement_file, parameter_file,
+                                     observable_file, yaml_file,
+                                     visualization_file)
+
     def get_optimization_parameters(self):
         """
         Return list of optimization parameter IDs.

diff --git a/petab/yaml.py b/petab/yaml.py
@@ -2,13 +2,12 @@
 
 import os
 
-from typing import Any, Dict, Union, Optional
+from typing import Any, Dict, Union, Optional, List
 
 import jsonschema
 import yaml
 from .C import *  # noqa: F403
 
-
 SCHEMA = os.path.join(os.path.abspath(os.path.dirname(__file__)),
                       "petab_schema.yaml")
 
@@ -159,4 +158,49 @@ def write_yaml(yaml_config: Dict[str, Any], filename: str) -> None:
     """
 
     with open(filename, 'w') as outfile:
-        yaml.dump(yaml_config, outfile, default_flow_style=False)
+        yaml.dump(yaml_config, outfile, default_flow_style=False,
+                  sort_keys=False)
+
+
+def create_problem_yaml(sbml_files: Union[str, List[str]],
+                        condition_files: Union[str, List[str]],
+                        measurement_files: Union[str, List[str]],
+                        parameter_file: str,
+                        observable_files: Union[str, List[str]],
+                        yaml_file: str,
+                        visualization_files: Optional[Union[str, List[str]]]
+                        = None) -> None:
+    """
+    Create and write default YAML file for a single PEtab problem
+
+    Arguments:
+        sbml_files: Path of SBML model file or list of such
+        condition_files: Path of condition file or list of such
+        measurement_files: Path of measurement file or list of such
+        parameter_file: Path of parameter file
+        observable_files: Path of observable file or lsit of such
+        yaml_file: Path to which YAML file should be written
+        visualization_files: Optional Path to visualization file or list of
+        such
+    """
+    if isinstance(sbml_files, str):
+        sbml_files = [sbml_files]
+    if isinstance(condition_files, str):
+        condition_files = [condition_files]
+    if isinstance(measurement_files, str):
+        measurement_files = [measurement_files]
+    if isinstance(observable_files, str):
+        observable_files = [observable_files]
+    if isinstance(visualization_files, str):
+        visualization_files = [visualization_files]
+
+    problem_dic = {CONDITION_FILES: condition_files,
+                   MEASUREMENT_FILES: measurement_files,
+                   SBML_FILES: sbml_files,
+                   OBSERVABLE_FILES: observable_files}
+    if visualization_files is not None:
+        problem_dic.update({'visualization_files': visualization_files})
+    yaml_dic = {PARAMETER_FILE: parameter_file,
+                FORMAT_VERSION: 1,
+                PROBLEMS: [problem_dic]}
+    write_yaml(yaml_dic, yaml_file)
diff --git a/tests/test_petab.py b/tests/test_petab.py
@@ -370,6 +370,79 @@ def test_flatten_timepoint_specific_output_overrides():
     assert petab.lint_problem(problem) is False
 
 
+def test_flatten_timepoint_specific_output_overrides_special_cases():
+    """Test flatten_timepoint_specific_output_overrides
+    for special cases:
+    * no preequilibration
+    * no observable parameters
+    """
+    observable_df = pd.DataFrame(data={
+        OBSERVABLE_ID: ['obs1'],
+        OBSERVABLE_FORMULA: ['species1'],
+        NOISE_FORMULA: ['noiseParameter1_obs1']
+    })
+    observable_df.set_index(OBSERVABLE_ID, inplace=True)
+
+    observable_df_expected = pd.DataFrame(data={
+        OBSERVABLE_ID: ['obs1_1', 'obs1_2'],
+        OBSERVABLE_FORMULA: [
+            'species1',
+            'species1'],
+        NOISE_FORMULA: ['noiseParameter1_obs1_1',
+                        'noiseParameter1_obs1_2']
+    })
+    observable_df_expected.set_index(OBSERVABLE_ID, inplace=True)
+
+    # Measurement table with timepoint-specific overrides
+    measurement_df = pd.DataFrame(data={
+        OBSERVABLE_ID:
+            ['obs1', 'obs1', 'obs1', 'obs1'],
+        SIMULATION_CONDITION_ID:
+            ['condition1', 'condition1', 'condition1', 'condition1'],
+        TIME:
+            [1.0, 1.0, 2.0, 2.0],
+        MEASUREMENT:
+            [np.nan] * 4,
+        NOISE_PARAMETERS:
+            ['noiseParOverride1', 'noiseParOverride1',
+             'noiseParOverride2', 'noiseParOverride2'],
+    })
+
+    measurement_df_expected = pd.DataFrame(data={
+        OBSERVABLE_ID:
+            ['obs1_1', 'obs1_1', 'obs1_2', 'obs1_2'],
+        SIMULATION_CONDITION_ID:
+            ['condition1', 'condition1', 'condition1', 'condition1'],
+        TIME:
+            [1.0, 1.0, 2.0, 2.0],
+        MEASUREMENT:
+            [np.nan] * 4,
+        NOISE_PARAMETERS:
+            ['noiseParOverride1', 'noiseParOverride1',
+             'noiseParOverride2', 'noiseParOverride2'],
+    })
+
+    problem = petab.Problem(measurement_df=measurement_df,
+                            observable_df=observable_df)
+
+    assert petab.lint_problem(problem) is False
+
+    # Ensure having timepoint-specific overrides
+    assert petab.lint.measurement_table_has_timepoint_specific_mappings(
+        measurement_df) is True
+
+    petab.flatten_timepoint_specific_output_overrides(problem)
+
+    # Timepoint-specific overrides should be gone now
+    assert petab.lint.measurement_table_has_timepoint_specific_mappings(
+        problem.measurement_df) is False
+
+    assert problem.observable_df.equals(observable_df_expected) is True
+    assert problem.measurement_df.equals(measurement_df_expected) is True
+
+    assert petab.lint_problem(problem) is False
+
+
 def test_concat_measurements():
     a = pd.DataFrame({MEASUREMENT: [1.0]})
     b = pd.DataFrame({TIME: [1.0]})
@@ -458,7 +531,8 @@ def test_to_files(petab_problem):  # pylint: disable=W0621
             measurement_file=measurement_file,
             parameter_file=parameter_file,
             visualization_file=None,
-            observable_file=observable_file)
+            observable_file=observable_file,
+            yaml_file=None)
 
         # exemplarily load some
         parameter_df = petab.get_parameter_df(parameter_file)

diff --git a/tests/test_yaml.py b/tests/test_yaml.py
@@ -1,7 +1,8 @@
 """Test for petab.yaml"""
+import tempfile
 
 import pytest
-from petab.yaml import validate
+from petab.yaml import validate, create_problem_yaml
 
 from jsonschema.exceptions import ValidationError
 
@@ -18,3 +19,36 @@ def test_validate():
     # should be well-formed
     file_ = "doc/example/example_Fujita/Fujita.yaml"
     validate(file_)
+
+
+def test_create_problem_yaml():
+    with tempfile.TemporaryDirectory() as folder:
+        # test with single problem files
+        # create target files
+        sbml_file = tempfile.mkstemp(dir=folder)[1]
+        condition_file = tempfile.mkstemp(dir=folder)[1]
+        measurement_file = tempfile.mkstemp(dir=folder)[1]
+        parameter_file = tempfile.mkstemp(dir=folder)[1]
+        observable_file = tempfile.mkstemp(dir=folder)[1]
+        yaml_file = tempfile.mkstemp(dir=folder)[1]
+        visualization_file = tempfile.mkstemp(dir=folder)[1]
+        create_problem_yaml(sbml_file, condition_file, measurement_file,
+                            parameter_file, observable_file, yaml_file,
+                            visualization_file)
+        validate(yaml_file)
+
+        # test for list of files
+        # create additional target files
+        sbml_file2 = tempfile.mkstemp(dir=folder)[1]
+        condition_file2 = tempfile.mkstemp(dir=folder)[1]
+        measurement_file2 = tempfile.mkstemp(dir=folder)[1]
+        observable_file2 = tempfile.mkstemp(dir=folder)[1]
+        yaml_file2 = tempfile.mkstemp(dir=folder)[1]
+
+        sbml_files = [sbml_file, sbml_file2]
+        condition_files = [condition_file, condition_file2]
+        measurement_files = [measurement_file, measurement_file2]
+        observable_files = [observable_file, observable_file2]
+        create_problem_yaml(sbml_files, condition_files, measurement_files,
+                            parameter_file, observable_files, yaml_file2)
+        validate(yaml_file2)