From 0f5b073052162ccc253d10202898fa8799c01896 Mon Sep 17 00:00:00 2001
From: ErikaDudkin <49193148+erikadudki@users.noreply.github.com>
Date: Fri, 24 Apr 2020 07:48:50 -0700
Subject: [PATCH 01/14] Visualization column XValue not mandatory anymore
 (#429)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* xValues default is time or DR if only one timepoint

* separate plots by observables, plotIds

Co-Authored-By: Yannik Schälte <31767307+yannikschaelte@users.noreply.github.com>

* Update petab/visualize/helper_functions.py

Co-Authored-By: Yannik Schälte <31767307+yannikschaelte@users.noreply.github.com>

* incorporated pr comments

Co-authored-by: Yannik Schälte <31767307+yannikschaelte@users.noreply.github.com>
---
 petab/visualize/helper_functions.py | 66 ++++++++++++++++++++++++++---
 1 file changed, 60 insertions(+), 6 deletions(-)

diff --git a/petab/visualize/helper_functions.py b/petab/visualize/helper_functions.py
index 79a29e96..77d8e728 100644
--- a/petab/visualize/helper_functions.py
+++ b/petab/visualize/helper_functions.py
@@ -370,7 +370,7 @@ def get_default_vis_specs(
         dataset_label_column = dataset_id_column
 
     # get number of plots and create plotId-lists
-    plot_id_list = ['plot%s' % str(ind + 1) for ind, inner_list in enumerate(
+    plot_id_list = [f'plot{ind+1}' for ind, inner_list in enumerate(
         dataset_id_list) for _ in inner_list]
 
     # create dataframe
@@ -442,8 +442,21 @@ def get_vis_spec_dependent_columns_dict(
         yvalues_column = ['']*len(dataset_id_column)
 
     # get number of plots and create plotId-lists
-    plot_id_column = ['plot%s' % str(ind + 1) for ind, inner_list in enumerate(
-        dataset_id_list) for _ in inner_list]
+    if group_by == 'observable':
+        obs_uni = list(np.unique(exp_data[OBSERVABLE_ID]))
+        # copy of dataset ids, for later replacing with plot ids
+        plot_id_column = dataset_id_column.copy()
+        for i_obs in range(0, len(obs_uni)):
+            # get dataset_ids which include observable name
+            matching = [s for s in dataset_id_column if obs_uni[i_obs] in s]
+            # replace the dataset ids with plot id with grouping of observables
+            for m_i in matching:
+                plot_id_column = [sub.replace(m_i, 'plot%s' % str(i_obs + 1))
+                                  for sub in plot_id_column]
+    else:
+        # get number of plots and create plotId-lists
+        plot_id_column = ['plot%s' % str(ind + 1) for ind, inner_list in
+                          enumerate(dataset_id_list) for _ in inner_list]
 
     columns_dict = {PLOT_ID: plot_id_column,
                     DATASET_ID: dataset_id_column,
@@ -473,6 +486,23 @@ def expand_vis_spec_settings(vis_spec, columns_dict):
                     column_entries.append(
                         vis_spec[select_conditions].loc[:, column].values[0])
             else:
+                # get unique plotIDs from visspecfile
+                vis_plotid_u = vis_spec[PLOT_ID].unique()
+                auto_plotid_u = list(set(columns_dict[PLOT_ID]))
+                # if number of plotIds does not coincide (autmatically
+                # generated plotIds according to observable grouping, vs
+                # plotIds specified in the visu_Spec)
+                if len(vis_plotid_u) is not len(auto_plotid_u):
+                    # which items are not in visu_plotId:
+                    del_plotid = \
+                        list(set(columns_dict[PLOT_ID]) - set(vis_plotid_u))
+                    # replace automatically generated plotIds with 'plot1' from
+                    # visu file
+                    for d_i in del_plotid:
+                        columns_dict[PLOT_ID] = [
+                            sub.replace(d_i, vis_plotid_u[0])
+                            for sub in columns_dict[PLOT_ID]]
+
                 for plot_id in columns_dict[PLOT_ID]:
                     select_conditions = vis_spec[PLOT_ID] == plot_id
                     column_entries.append(
@@ -545,11 +575,13 @@ def create_or_update_vis_spec(
     vis_spec[PLOT_TYPE_DATA] = plotted_noise
 
     # check columns, and add non-mandatory default columns
-    vis_spec = check_ex_visu_columns(vis_spec)
+    vis_spec = check_ex_visu_columns(vis_spec, exp_data, exp_conditions)
     return exp_data, vis_spec
 
 
-def check_ex_visu_columns(vis_spec: pd.DataFrame) -> pd.DataFrame:
+def check_ex_visu_columns(vis_spec: pd.DataFrame,
+                          exp_data: pd.DataFrame,
+                          exp_conditions: pd.DataFrame) -> pd.DataFrame:
     """
     Check the columns in Visu_Spec file, if non-mandotory columns does not
     exist, create default columns
@@ -564,7 +596,29 @@ def check_ex_visu_columns(vis_spec: pd.DataFrame) -> pd.DataFrame:
     if PLOT_TYPE_DATA not in vis_spec.columns:
         vis_spec[PLOT_TYPE_DATA] = MEAN_AND_SD
     if X_VALUES not in vis_spec.columns:
-        vis_spec[X_VALUES] = 'time'
+        # check if time is constant in expdata (if yes, plot dose response)
+        # otherwise plot time series
+        uni_time = pd.unique(exp_data[TIME])
+        if len(uni_time) > 1:
+            vis_spec[X_VALUES] = 'time'
+        elif len(uni_time) == 1:
+            if np.isin(exp_conditions.columns.values, 'conditionName').any():
+                conds = exp_conditions.columns.drop('conditionName')
+            else:
+                conds = exp_conditions.columns
+            # default: first dose-response condition (first from condition
+            # table) is plotted
+            # TODO: expand to automatic plotting of all conditions
+            vis_spec[X_VALUES] = conds[0]
+            vis_spec[X_LABEL] = conds[0]
+            warnings.warn(
+                '\n First dose-response condition is plotted. \n Check which '
+                'condition you want to plot \n and possibly enter it into the '
+                'column *xValues* \n in the visualization table.')
+        else:
+            raise NotImplementedError(
+                'Strange Error. There is no time defined in the measurement '
+                'table?')
     if X_OFFSET not in vis_spec.columns:
         vis_spec[X_OFFSET] = 0
     if X_LABEL not in vis_spec.columns:

From b050558bfa74e4d43714f8badfa6bea572da7a96 Mon Sep 17 00:00:00 2001
From: LeonardSchmiester <leonard.schmiester@helmholtz-muenchen.de>
Date: Fri, 1 May 2020 10:33:27 +0200
Subject: [PATCH 02/14] Use core.is_empty instead of np.isnan. Closes #433
 (#434)

---
 petab/parameter_mapping.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/petab/parameter_mapping.py b/petab/parameter_mapping.py
index 6950a56e..91d6b87c 100644
--- a/petab/parameter_mapping.py
+++ b/petab/parameter_mapping.py
@@ -520,12 +520,12 @@ def merge_preeq_and_sim_pars_condition(
         par_preeq = condition_map_preeq[par_id]
         par_sim = condition_map_sim[par_id]
         if par_preeq != par_sim \
-                and not (np.isnan(par_sim) and np.isnan(par_preeq)):
+                and not (core.is_empty(par_sim) and core.is_empty(par_preeq)):
             # both identical or both nan is okay
-            if np.isnan(par_sim):
+            if core.is_empty(par_sim):
                 # unmapped for simulation
                 condition_map_sim[par_id] = par_preeq
-            elif np.isnan(par_preeq):
+            elif core.is_empty(par_preeq):
                 # unmapped for preeq is okay
                 pass
             else:
@@ -540,10 +540,10 @@ def merge_preeq_and_sim_pars_condition(
 
         if scale_preeq != scale_sim:
             # both identical is okay
-            if np.isnan(par_sim):
+            if core.is_empty(par_sim):
                 # unmapped for simulation
                 condition_scale_map_sim[par_id] = scale_preeq
-            elif np.isnan(par_preeq):
+            elif core.is_empty(par_preeq):
                 # unmapped for preeq is okay
                 pass
             else:

From a8d5927beefaebe33e401b085c6076838d5cfc0b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20Sch=C3=A4lte?=
 <31767307+yannikschaelte@users.noreply.github.com>
Date: Mon, 11 May 2020 10:01:28 +0200
Subject: [PATCH 03/14] Update tests (#435)

* use codecov action

* tidy up

* try multiple python versions

* restrict libcombine version

* fix version inc
---
 .github/workflows/ci_tests.yml | 18 +++++++++++++-----
 setup.py                       |  2 +-
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/ci_tests.yml b/.github/workflows/ci_tests.yml
index 0a5a804d..b253eb7c 100644
--- a/.github/workflows/ci_tests.yml
+++ b/.github/workflows/ci_tests.yml
@@ -1,4 +1,4 @@
-name: CI tests
+name: CI
 
 # trigger
 on: [push]
@@ -8,30 +8,38 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.6]
+        python-version: [3.8]
 
     steps:
     - name: Check out repository
       uses: actions/checkout@v2
+
     - name: Prepare python ${{ matrix.python-version }}
       uses: actions/setup-python@v1
       with:
         python-version: ${{ matrix.python-version }}
-    - name: Cache pip
+
+    - name: Cache
       uses: actions/cache@v1
       with:
         path: ~/.cache/pip
         key: ${{ runner.os }}-${{ hashFiles('**/.ci_pip_reqs.txt') }}-${{ hashFiles('**/setup.py') }}
         restore-keys: |
           ${{ runner.os }}-
+
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
         pip install -r .ci_pip_reqs.txt
         pip install -e .[reports,combine]
+
     - name: Run tests
       run: |
-        pytest --cov
+        pytest --cov --cov-report=xml tests
         python -m flake8 --exclude=build,doc,example,tmp --extend-ignore=F403,F405
+
     - name: Coverage
-      run: codecov -t ${{ secrets.CODECOV_TOKEN }}
+      uses: codecov/codecov-action@v1
+      with:
+        token: ${{ secrets.CODECOV_TOKEN }}
+        file: ./coverage.xml
diff --git a/setup.py b/setup.py
index 6138785a..04f02764 100644
--- a/setup.py
+++ b/setup.py
@@ -65,5 +65,5 @@ def absolute_links(txt):
       python_requires='>=3.6',
       entry_points=ENTRY_POINTS,
       extras_require={'reports': ['Jinja2'],
-                      'combine': ['python-libcombine']},
+                      'combine': ['python-libcombine==0.2.3_1']},
       )

From fb4534867e60d5b82803bd5c9963744cda2b0eb3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20Sch=C3=A4lte?=
 <31767307+yannikschaelte@users.noreply.github.com>
Date: Mon, 11 May 2020 15:33:13 +0200
Subject: [PATCH 04/14] update libcombine version (#437)

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 04f02764..123dbf2d 100644
--- a/setup.py
+++ b/setup.py
@@ -65,5 +65,5 @@ def absolute_links(txt):
       python_requires='>=3.6',
       entry_points=ENTRY_POINTS,
       extras_require={'reports': ['Jinja2'],
-                      'combine': ['python-libcombine==0.2.3_1']},
+                      'combine': ['python-libcombine>=0.2.6']},
       )

From 49cfc71255f6694332579623647c1c0605b82b93 Mon Sep 17 00:00:00 2001
From: ErikaDudkin <49193148+erikadudki@users.noreply.github.com>
Date: Tue, 12 May 2020 07:14:37 -0700
Subject: [PATCH 05/14] add sorting of indices of dataframes for the correct
 sorting of x-values (#430)

* add sorting of indices of dataframes for the correct sorting of x-values

* added sorting also for the time case, then conditions is a numpy array and has to be handled differently

* Update petab/visualize/plotting_config.py
---
 petab/visualize/plotting_config.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/petab/visualize/plotting_config.py b/petab/visualize/plotting_config.py
index fc5e0558..25042b0b 100644
--- a/petab/visualize/plotting_config.py
+++ b/petab/visualize/plotting_config.py
@@ -97,6 +97,18 @@ def plot_lowlevel(plot_spec: pd.Series,
 
         # construct errorbar-plots: noise specified above
         else:
+            # sort index for the case that indices of conditions and
+            # measurements differ if indep_var='time', conditions is a numpy
+            # array, for indep_var=observable its a Series
+            if isinstance(conditions, np.ndarray):
+                conditions.sort()
+            elif isinstance(conditions, pd.core.series.Series):
+                conditions.sort_index(inplace=True)
+            else:
+                raise ValueError('Strange: conditions object is neither numpy'
+                                 ' nor series...')
+            ms.sort_index(inplace=True)
+            # sorts according to ascending order of conditions
             scond, smean, snoise = \
                 zip(*sorted(zip(conditions, ms['mean'], ms[noise_col])))
             p = ax.errorbar(

From c34e7a9df599db2a21546351dd2e16361ac59f98 Mon Sep 17 00:00:00 2001
From: Jakob Vanhoefer <38346459+jvanhoefer@users.noreply.github.com>
Date: Sat, 30 May 2020 15:49:18 +0200
Subject: [PATCH 06/14] fix broken parameter table Fujita (#440)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Yannik Schälte <31767307+yannikschaelte@users.noreply.github.com>
---
 doc/example/example_Fujita/Fujita_parameters_scaling.tsv | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/example/example_Fujita/Fujita_parameters_scaling.tsv b/doc/example/example_Fujita/Fujita_parameters_scaling.tsv
index 180651cd..2254fbf8 100644
--- a/doc/example/example_Fujita/Fujita_parameters_scaling.tsv
+++ b/doc/example/example_Fujita/Fujita_parameters_scaling.tsv
@@ -18,4 +18,3 @@ reaction_9_k1	reaction_{9,k1}	log10	1E-08	100000000	0.028510798479438	1
 scaling_pAkt_tot	scaling_{pAkt}_tot	log10	1E-08	100000000	41.377103160384	1				
 scaling_pEGFR_tot	scaling_{pEGFR}_tot	log10	1E-08	100000000	5.64785460492811E-08	1				
 scaling_pS6_tot	scaling_{pS6}_tot	log10	1E-08	100000000	78521.9513232784	1				
->>>>>>> develop

From dc253fee4314ba4d208420d4b4ea7b596498cb91 Mon Sep 17 00:00:00 2001
From: lcontento <lcontento@users.noreply.github.com>
Date: Tue, 9 Jun 2020 17:59:27 +0200
Subject: [PATCH 07/14] Make float parsing from CSV round-trip (#444)

* Make float parsing from CSV round-trip

Change the float parsing behaviour of `pandas.read_csv` to be the same
as in Python (e.g., in Python `float(0.999) == 0.999`).

* Fix flake8 warnings failing CI
---
 petab/__init__.py     | 30 +++++++++++++++---------------
 petab/conditions.py   |  3 ++-
 petab/core.py         |  8 +++++---
 petab/measurements.py |  3 ++-
 petab/observables.py  |  3 ++-
 petab/parameters.py   |  6 ++++--
 petab/sbml.py         |  2 +-
 7 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/petab/__init__.py b/petab/__init__.py
index 2f9d1c99..4246e68e 100644
--- a/petab/__init__.py
+++ b/petab/__init__.py
@@ -9,18 +9,18 @@
 
 ENV_NUM_THREADS = "PETAB_NUM_THREADS"
 
-from .calculate import *  # noqa: F403, F401
-from .composite_problem import *  # noqa: F403, F401
-from .conditions import *  # noqa: F403, F401
-from .core import *  # noqa: F403, F401
-from .lint import *  # noqa: F403, F401
-from .measurements import *  # noqa: F403, F401
-from .observables import *  # noqa: F403, F401
-from .parameter_mapping import *  # noqa: F403, F401
-from .parameters import *  # noqa: F403, F401
-from .problem import *  # noqa: F403, F401
-from .sampling import *  # noqa: F403, F401
-from .sbml import *  # noqa: F403, F401
-from .yaml import *  # noqa: F403, F401
-from .version import __version__  # noqa: F401
-from .format_version import __format_version__  # noqa: F401
+from .calculate import *  # noqa: F403, F401, E402
+from .composite_problem import *  # noqa: F403, F401, E402
+from .conditions import *  # noqa: F403, F401, E402
+from .core import *  # noqa: F403, F401, E402
+from .lint import *  # noqa: F403, F401, E402
+from .measurements import *  # noqa: F403, F401, E402
+from .observables import *  # noqa: F403, F401, E402
+from .parameter_mapping import *  # noqa: F403, F401, E402
+from .parameters import *  # noqa: F403, F401, E402
+from .problem import *  # noqa: F403, F401, E402
+from .sampling import *  # noqa: F403, F401, E402
+from .sbml import *  # noqa: F403, F401, E402
+from .yaml import *  # noqa: F403, F401, E402
+from .version import __version__  # noqa: F401, E402
+from .format_version import __format_version__  # noqa: F401, E402
diff --git a/petab/conditions.py b/petab/conditions.py
index 43a15a76..ffaca423 100644
--- a/petab/conditions.py
+++ b/petab/conditions.py
@@ -23,7 +23,8 @@ def get_condition_df(
         return condition_file
 
     if isinstance(condition_file, str):
-        condition_file = pd.read_csv(condition_file, sep='\t')
+        condition_file = pd.read_csv(condition_file, sep='\t',
+                                     float_precision='round_trip')
 
     lint.assert_no_leading_trailing_whitespace(
         condition_file.columns.values, "condition")
diff --git a/petab/core.py b/petab/core.py
index 695fe281..83e7f818 100644
--- a/petab/core.py
+++ b/petab/core.py
@@ -23,7 +23,8 @@ def get_simulation_df(simulation_file: str) -> pd.DataFrame:
     Returns:
         Simulation DataFrame
     """
-    return pd.read_csv(simulation_file, sep="\t", index_col=None)
+    return pd.read_csv(simulation_file, sep="\t", index_col=None,
+                       float_precision='round_trip')
 
 
 def write_simulation_df(df: pd.DataFrame, filename: str) -> None:
@@ -47,7 +48,8 @@ def get_visualization_df(visualization_file: str) -> pd.DataFrame:
         Visualization DataFrame
     """
     try:
-        vis_spec = pd.read_csv(visualization_file, sep="\t", index_col=None)
+        vis_spec = pd.read_csv(visualization_file, sep="\t", index_col=None,
+                               float_precision='round_trip')
     except pd.errors.EmptyDataError:
         warn("Visualization table is empty. Defaults will be used. "
              "Refer to the documentation for details.")
@@ -370,7 +372,7 @@ def _add_file_metadata(location: str, description: str = ""):
         )
         _add_file_metadata(
             location=parameter_subset_file,
-            description=f"PEtab parameter file"
+            description="PEtab parameter file"
         )
 
     for problem in yaml_config[PROBLEMS]:
diff --git a/petab/measurements.py b/petab/measurements.py
index 154c0bbb..8bc0f9a7 100644
--- a/petab/measurements.py
+++ b/petab/measurements.py
@@ -30,7 +30,8 @@ def get_measurement_df(
         return measurement_file
 
     if isinstance(measurement_file, str):
-        measurement_file = pd.read_csv(measurement_file, sep='\t')
+        measurement_file = pd.read_csv(measurement_file, sep='\t',
+                                       float_precision='round_trip')
 
     lint.assert_no_leading_trailing_whitespace(
         measurement_file.columns.values, MEASUREMENT)
diff --git a/petab/observables.py b/petab/observables.py
index 81ab7d47..19975c93 100644
--- a/petab/observables.py
+++ b/petab/observables.py
@@ -28,7 +28,8 @@ def get_observable_df(
         return observable_file
 
     if isinstance(observable_file, str):
-        observable_file = pd.read_csv(observable_file, sep='\t')
+        observable_file = pd.read_csv(observable_file, sep='\t',
+                                      float_precision='round_trip')
 
     lint.assert_no_leading_trailing_whitespace(
         observable_file.columns.values, "observable")
diff --git a/petab/parameters.py b/petab/parameters.py
index f7aaad51..d1213d18 100644
--- a/petab/parameters.py
+++ b/petab/parameters.py
@@ -33,10 +33,12 @@ def get_parameter_df(
         parameter_df = parameter_file
 
     if isinstance(parameter_file, str):
-        parameter_df = pd.read_csv(parameter_file, sep='\t')
+        parameter_df = pd.read_csv(parameter_file, sep='\t',
+                                   float_precision='round_trip')
 
     if isinstance(parameter_file, list):
-        parameter_df = pd.concat([pd.read_csv(subset_file, sep='\t')
+        parameter_df = pd.concat([pd.read_csv(subset_file, sep='\t',
+                                              float_precision='round_trip')
                                   for subset_file in parameter_file])
         # Remove identical parameter definitions
         parameter_df.drop_duplicates(inplace=True, ignore_index=True)
diff --git a/petab/sbml.py b/petab/sbml.py
index be21ae93..6ded75c0 100644
--- a/petab/sbml.py
+++ b/petab/sbml.py
@@ -368,7 +368,7 @@ def get_sigmas(sbml_model: libsbml.Model, remove: bool = False) -> dict:
         remove=remove
     )
     # set correct observable name
-    sigmas = {re.sub(f'^sigma_', 'observable_', key): value['formula']
+    sigmas = {re.sub('^sigma_', 'observable_', key): value['formula']
               for key, value in sigmas.items()}
     return sigmas
 

From 843edd3a2a76f55bc8771de9d9886d9857b6e806 Mon Sep 17 00:00:00 2001
From: lcontento <lcontento@users.noreply.github.com>
Date: Wed, 17 Jun 2020 17:39:27 +0200
Subject: [PATCH 08/14] Allow model time in observable formulas (#445)

---
 doc/documentation_data_format.md | 33 ++++++++++++++++----------------
 petab/observables.py             |  2 +-
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/doc/documentation_data_format.md b/doc/documentation_data_format.md
index 576a54e8..9300f868 100644
--- a/doc/documentation_data_format.md
+++ b/doc/documentation_data_format.md
@@ -79,7 +79,7 @@ This is specified as a tab-separated value file in the following way:
 |... | ... | ... | ... |...| ...|
 
 Row- and column-ordering are arbitrary, although specifying `conditionId`
-first may improve human readability. 
+first may improve human readability.
 
 Additional columns are *not* allowed.
 
@@ -102,7 +102,7 @@ Additional columns are *not* allowed.
   Values for these condition parameters may be provided either as numeric
   values, or as IDs defined in the SBML model, the parameter table or both.
 
-  - `${parameterId}` 
+  - `${parameterId}`
 
     The values will override any parameter values specified in the model.
 
@@ -114,7 +114,7 @@ Additional columns are *not* allowed.
     condition. If `NaN` is provided for a condition, the result of the
     preequilibration (or initial concentration/amount from the SBML model, if
     no preequilibration is defined) is used.
-  
+
   - `${compartmentId}`
 
     If a compartment ID is provided, it is interpreted as the initial
@@ -141,7 +141,7 @@ order:
 |... | [parameterId&#124;NUMERIC[;parameterId&#124;NUMERIC][...]] | [parameterId&#124;NUMERIC[;parameterId&#124;NUMERIC][...]]
 |...|...|...|
 
-Additional (non-standard) columns may be added. If the additional plotting 
+Additional (non-standard) columns may be added. If the additional plotting
 functionality of PEtab should be used, such columns could be
 
 | ... | [datasetId] | [replicateId]  |
@@ -149,9 +149,9 @@ functionality of PEtab should be used, such columns could be
 |... | [datasetId] | [replicateId] |
 |...|...|...|
 
-where `datasetId` is a necessary column to use particular plotting 
-functionality, and `replicateId` is optional, which can be used to group 
-replicates and plot error bars. 
+where `datasetId` is a necessary column to use particular plotting
+functionality, and `replicateId` is optional, which can be used to group
+replicates and plot error bars.
 
 
 ### Detailed field description
@@ -244,7 +244,7 @@ The observable table has the following columns:
 | observableId | [observableName] | observableFormula | [observableTransformation] | noiseFormula | [noiseDistribution] |
 | --- | --- | --- | --- | --- | --- |
 | STRING | [STRING] | STRING | [lin(default)&#124;log&#124;log10] |  STRING&#124;NUMBER | [laplace&#124;normal] |
-| e.g. | | | | | | 
+| e.g. | | | | | |
 | relativeTotalProtein1 | Relative abundance of Protein1 | observableParameter1_relativeTotalProtein1  * (protein1 + phospho_protein1 ) | lin | noiseParameter1_relativeTotalProtein1  | normal |
 | ... |  ... | ... | ... | ... |
 
@@ -264,8 +264,9 @@ The observable table has the following columns:
 * `observableFormula` [STRING]
 
   Observation function as plain text formula expression.
-  May contain any symbol defined in the SBML model or parameter table. In the
-  simplest case just an SBML species ID or an `AssignmentRule` target.
+  May contain any symbol defined in the SBML model (including model time `time`)
+  or parameter table. In the simplest case just an SBML species ID
+  or an `AssignmentRule` target.
 
   May introduce new parameters of the form `observableParameter${n}_${observableId}`,
   which are overridden by `observableParameters` in the measurement table
@@ -306,7 +307,7 @@ The observable table has the following columns:
 - `noiseDistribution` [STRING: 'normal' or 'laplace', OPTIONAL]
 
   Assumed noise distribution for the given measurement. Only normally or
-  Laplace distributed noise is currently allowed (log-normal and 
+  Laplace distributed noise is currently allowed (log-normal and
   log-laplace are obtained by setting `observableTransformation` to `log`).
   Defaults to `normal`. If `normal`, the specified `noiseParameters` will be
   interpreted as standard deviation (*not* variance).
@@ -407,9 +408,9 @@ Additional columns may be added.
   Prior parameters used for sampling of initial points for optimization,
   separated by a semicolon. Defaults to `lowerBound;upperBound`.
 
-  So far, only numeric values will be supported, no parameter names. 
+  So far, only numeric values will be supported, no parameter names.
   Parameters for the different prior types are:
-  
+
     - uniform: lower bound; upper bound
     - normal: mean; standard deviation (**not** variance)
     - laplace: location; scale
@@ -433,8 +434,8 @@ Additional columns may be added.
 ## Visualization table
 
 A tab-separated value file containing the specification of the visualization
-routines which come with the PEtab repository. Plots are in general 
-collections of different datasets as specified using their `datasetId` (if 
+routines which come with the PEtab repository. Plots are in general
+collections of different datasets as specified using their `datasetId` (if
 provided) inside the measurement table.
 
 Expected to have the following columns in any (but preferably this)
@@ -489,7 +490,7 @@ order:
 
 - `xValues` [STRING, OPTIONAL]
 
-  The independent variable, which will be plotted on the x-axis. Can be 
+  The independent variable, which will be plotted on the x-axis. Can be
   `time` (default, for time resolved data), or it can be `parameterOrStateId`
   for dose-response plots. The corresponding numeric values will be shown on
   the x-axis.
diff --git a/petab/observables.py b/petab/observables.py
index 19975c93..7187397c 100644
--- a/petab/observables.py
+++ b/petab/observables.py
@@ -81,7 +81,7 @@ def get_output_parameters(observable_df: pd.DataFrame,
                            key=lambda symbol: symbol.name)
         for free_sym in free_syms:
             sym = str(free_sym)
-            if sbml_model.getElementBySId(sym) is None:
+            if sbml_model.getElementBySId(sym) is None and sym != 'time':
                 output_parameters[sym] = None
 
     return list(output_parameters.keys())

From 9cbf54eecaa4243f3f18acc1798dc74856b8f6a1 Mon Sep 17 00:00:00 2001
From: lcontento <lcontento@users.noreply.github.com>
Date: Wed, 1 Jul 2020 10:36:52 +0200
Subject: [PATCH 09/14] Lint: detect duplicated observable IDs (#446)

* Lint: detect duplicated observable IDs

* Lint: duplicated IDs: better naming and messages
---
 petab/lint.py      | 37 +++++++++++++++++++++++++++++++++----
 tests/test_lint.py | 18 ++++++++++++++++++
 2 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/petab/lint.py b/petab/lint.py
index 39effc44..e51f68cc 100644
--- a/petab/lint.py
+++ b/petab/lint.py
@@ -5,6 +5,7 @@
 import numbers
 import re
 from typing import Optional, Iterable
+from collections import Counter
 
 import libsbml
 import numpy as np
@@ -133,6 +134,9 @@ def check_measurement_df(df: pd.DataFrame,
         if OBSERVABLE_TRANSFORMATION in observable_df:
             # Check for positivity of measurements in case of
             #  log-transformation
+            assert_unique_observable_ids(observable_df)
+            # If the above is not checked, in the following loop
+            # trafo may become a pandas Series
             for measurement, obs_id in zip(df[MEASUREMENT], df[OBSERVABLE_ID]):
                 trafo = observable_df.loc[obs_id, OBSERVABLE_TRANSFORMATION]
                 if measurement <= 0.0 and trafo in [LOG, LOG10]:
@@ -202,7 +206,7 @@ def check_parameter_df(
     assert_parameter_scale_is_valid(df)
     assert_parameter_bounds_are_numeric(df)
     assert_parameter_estimate_is_boolean(df)
-    assert_parameter_id_is_unique(df)
+    assert_unique_parameter_ids(df)
     check_parameter_bounds(df)
     assert_parameter_prior_type_is_valid(df)
 
@@ -238,6 +242,7 @@ def check_observable_df(observable_df: pd.DataFrame) -> None:
                 observable_df[column_name].values, column_name)
 
     assert_noise_distributions_valid(observable_df)
+    assert_unique_observable_ids(observable_df)
 
     # Check that formulas are parsable
     for row in observable_df.itertuples():
@@ -359,7 +364,7 @@ def assert_parameter_id_is_string(parameter_df: pd.DataFrame) -> None:
             raise AssertionError(f"Empty {PARAMETER_ID} found.")
 
 
-def assert_parameter_id_is_unique(parameter_df: pd.DataFrame) -> None:
+def assert_unique_parameter_ids(parameter_df: pd.DataFrame) -> None:
     """
     Check if the parameterId column of the parameter table is unique.
 
@@ -369,9 +374,11 @@ def assert_parameter_id_is_unique(parameter_df: pd.DataFrame) -> None:
     Raises:
         AssertionError: in case of problems
     """
-    if len(parameter_df.index) != len(set(parameter_df.index)):
+    non_unique_ids = get_non_unique(parameter_df.index)
+    if len(non_unique_ids) > 0:
         raise AssertionError(
-            f"{PARAMETER_ID} column in parameter table is not unique.")
+            f"Non-unique values found in the {PARAMETER_ID} column"
+            " of the parameter table: " + str(non_unique_ids))
 
 
 def assert_parameter_scale_is_valid(parameter_df: pd.DataFrame) -> None:
@@ -621,6 +628,28 @@ def assert_noise_distributions_valid(observable_df: pd.DataFrame) -> None:
                     f"table: {distr}.")
 
 
+def assert_unique_observable_ids(observable_df: pd.DataFrame) -> None:
+    """
+    Check if the observableId column of the observable table is unique.
+
+    Arguments:
+        observable_df: PEtab observable DataFrame
+
+    Raises:
+        AssertionError: in case of problems
+    """
+    non_unique_ids = get_non_unique(observable_df.index)
+    if len(non_unique_ids) > 0:
+        raise AssertionError(
+            f"Non-unique values found in the {OBSERVABLE_ID} column"
+            " of the observable table: " + str(non_unique_ids))
+
+
+def get_non_unique(values):
+    counter = Counter(values)
+    return [value for (value, count) in counter.items() if count > 1]
+
+
 def lint_problem(problem: 'petab.Problem') -> bool:
     """Run PEtab validation on problem
 
diff --git a/tests/test_lint.py b/tests/test_lint.py
index 6bb03712..0e2031b2 100644
--- a/tests/test_lint.py
+++ b/tests/test_lint.py
@@ -417,3 +417,21 @@ def test_check_parameter_df():
     del parameter_df[NOMINAL_VALUE]
     with pytest.raises(AssertionError):
         lint.check_parameter_df(df=parameter_df)
+
+
+def test_check_observable_df():
+    """Check that we correctly detect errors in observable table"""
+
+    observable_df = pd.DataFrame(data={
+        OBSERVABLE_ID: ['obs1', 'obs2'],
+        OBSERVABLE_FORMULA: ['x1', 'x2'],
+        NOISE_FORMULA: ['sigma1', 'sigma2']
+    }).set_index(OBSERVABLE_ID)
+
+    lint.check_observable_df(observable_df)
+
+    # Check that duplicated observables ids are detected
+    bad_observable_df = observable_df.copy()
+    bad_observable_df.index = ['obs1', 'obs1']
+    with pytest.raises(AssertionError):
+        lint.check_observable_df(bad_observable_df)

From 4b031e82127dc687895402318a78c08f3ec0209b Mon Sep 17 00:00:00 2001
From: Polina Lakrisenko <p.lakrisenko@gmail.com>
Date: Thu, 2 Jul 2020 15:34:32 +0200
Subject: [PATCH 10/14] default value for the column x_label in vis_spec (#431)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Yannik Schälte <31767307+yannikschaelte@users.noreply.github.com>
Co-authored-by: Daniel Weindl <dweindl@users.noreply.github.com>
---
 petab/visualize/helper_functions.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/petab/visualize/helper_functions.py b/petab/visualize/helper_functions.py
index 77d8e728..be57c4cf 100644
--- a/petab/visualize/helper_functions.py
+++ b/petab/visualize/helper_functions.py
@@ -623,6 +623,7 @@ def check_ex_visu_columns(vis_spec: pd.DataFrame,
         vis_spec[X_OFFSET] = 0
     if X_LABEL not in vis_spec.columns:
         vis_spec[X_LABEL] = 'time'
+        vis_spec.loc[vis_spec[X_VALUES] != 'time', X_LABEL] = 'condition'
     if X_SCALE not in vis_spec.columns:
         vis_spec[X_SCALE] = LIN
     if Y_VALUES not in vis_spec.columns:

From a5dd618fad688e0e49fbe53b31a272477718d97d Mon Sep 17 00:00:00 2001
From: Daniel Weindl <daniel.weindl@helmholtz-muenchen.de>
Date: Sun, 19 Jul 2020 21:42:13 +0200
Subject: [PATCH 11/14] Fix calculate_llh and calculate_llh

don't try to compare missing columns. Closes #450.
---
 petab/calculate.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/petab/calculate.py b/petab/calculate.py
index 6e74b186..e9193bf6 100644
--- a/petab/calculate.py
+++ b/petab/calculate.py
@@ -82,6 +82,7 @@ def calculate_residuals_for_table(
     compared_cols = set(MEASUREMENT_DF_COLS)
     compared_cols -= {MEASUREMENT}
     compared_cols &= set(measurement_df.columns)
+    compared_cols &= set(simulation_df.columns)
 
     # compute noise formulas for observables
     noise_formulas = get_symbolic_noise_formulas(observable_df)
@@ -290,6 +291,7 @@ def calculate_llh_for_table(
     compared_cols = set(MEASUREMENT_DF_COLS)
     compared_cols -= {MEASUREMENT}
     compared_cols &= set(measurement_df.columns)
+    compared_cols &= set(simulation_df.columns)
 
     # compute noise formulas for observables
     noise_formulas = get_symbolic_noise_formulas(observable_df)

From 12bcd8a87e0f1cb392d9b9228432abfaed1197a1 Mon Sep 17 00:00:00 2001
From: Daniel Weindl <dweindl@users.noreply.github.com>
Date: Tue, 21 Jul 2020 10:58:43 +0200
Subject: [PATCH 12/14] Fix documentation for prior distribution (Closes #448)
 (#449)

---
 doc/documentation_data_format.md | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/doc/documentation_data_format.md b/doc/documentation_data_format.md
index 9300f868..88edfc4f 100644
--- a/doc/documentation_data_format.md
+++ b/doc/documentation_data_format.md
@@ -334,10 +334,17 @@ it *may* include:
 
 One row per parameter with arbitrary order of rows and columns:
 
-| parameterId | [parameterName] | parameterScale | lowerBound  |upperBound | nominalValue | estimate | [priorType] | [priorParameters] |
-|---|---|---|---|---|---|---|---|---|
-|STRING|[STRING]|log10&#124;lin&#124;log|NUMERIC|NUMERIC|NUMERIC|0&#124;1|*see below*|*see below*
-|...|...|...|...|...|...|...|...|...|
+| parameterId | [parameterName] | parameterScale | lowerBound  |upperBound | nominalValue | estimate | ... |
+|---|---|---|---|---|---|---|---|
+|STRING|[STRING]|log10&#124;lin&#124;log|NUMERIC|NUMERIC|NUMERIC|0&#124;1| ... |
+|...|...|...|...|...|...|...|...|
+
+*(wrapped for readability)*
+
+| ... | \[initializationPriorType\] | \[initializationPriorParameters\] | \[objectivePriorType\] | \[objectivePriorParameters\] |
+|---|---|---|---|---|
+| ... | *see below* | *see below* | *see below* | *see below* |
+|...|...|...|...|...|
 
 Additional columns may be added.
 

From e5786616ede0d7e1296ecdfb18750276643f537b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20Sch=C3=A4lte?=
 <31767307+yannikschaelte@users.noreply.github.com>
Date: Thu, 23 Jul 2020 13:12:54 +0200
Subject: [PATCH 13/14] Write format specification in rst + add noise model
 description (#452)

* cont

* cont

* cont

* add noise formulas

* fix formating

* fixup
---
 doc/documentation_data_format.md  | 577 -------------------------
 doc/documentation_data_format.rst | 689 ++++++++++++++++++++++++++++++
 doc/index.rst                     |   2 +-
 3 files changed, 690 insertions(+), 578 deletions(-)
 delete mode 100644 doc/documentation_data_format.md
 create mode 100644 doc/documentation_data_format.rst

diff --git a/doc/documentation_data_format.md b/doc/documentation_data_format.md
deleted file mode 100644
index 88edfc4f..00000000
--- a/doc/documentation_data_format.md
+++ /dev/null
@@ -1,577 +0,0 @@
-# PEtab data format specification
-
-
-## Format version: 1
-
-This document explains the PEtab data format.
-
-
-## Purpose
-
-Providing a standardized way for specifying parameter estimation problems in
-systems biology, especially for the case of Ordinary Differential Equation
-(ODE) models.
-
-
-## Overview
-
-The PEtab data format specifies a parameter estimation problem using a number
-of text-based files ([Systems Biology Markup Language (SBML)](http://sbml.org)
-and
-[Tab-Separated Values (TSV)](https://www.iana.org/assignments/media-types/text/tab-separated-values)),
-i.e.
-
-- An SBML model [SBML]
-
-- A measurement file to fit the model to [TSV]
-
-- A condition file specifying model inputs and condition-specific parameters
-  [TSV]
-
-- An observable file specifying the observation model [TSV]
-
-- A parameter file specifying optimization parameters and related information
-  [TSV]
-
-- (optional) A simulation file, which has the same format as the measurement
-  file, but contains model simulations [TSV]
-
-- (optional) A visualization file, which contains specifications how the data
-  and/or simulations should be plotted by the visualization routines [TSV]
-
-![Files constituting a PEtab problem](gfx/petab_files.png)
-
-The following sections will describe the minimum requirements of those
-components in the core standard, which should provide all information for
-defining the parameter estimation problem.
-
-Extensions of this format (e.g. additional columns in the measurement table)
-are possible and intended. However, while those columns may provide extra
-information for example for plotting, downstream analysis, or for more
-efficient parameter estimation, they should not affect the optimization
-problem as such.
-
-**General remarks**
-- All model entities, column names and row names are case-sensitive
-- All identifiers must consist only of upper and lower case letters, digits and
-  underscores, and must not start with a digit.
-- Fields in "[]" are optional and may be left empty.
-
-
-## SBML model definition
-
-The model must be specified as valid SBML. There are no further restrictions.
-
-## Condition table
-
-The condition table specifies parameters, or initial values of species and
-compartments for specific simulation conditions (generally corresponding to
-different experimental conditions).
-
-This is specified as a tab-separated value file in the following way:
-
-| conditionId | [conditionName] | parameterOrSpeciesOrCompartmentId1 | ... | parameterOrSpeciesOrCompartmentId${n} |
-|---|---|---|---|---|
-| STRING | [STRING] | NUMERIC&#124;STRING | ... | NUMERIC&#124;STRING |
-| e.g. | | | | |
-| conditionId1 | [conditionName1] | 0.42 | ...| parameterId|
-| conditionId2 | ... | ... | ...| ...|
-|... | ... | ... | ... |...| ...|
-
-Row- and column-ordering are arbitrary, although specifying `conditionId`
-first may improve human readability.
-
-Additional columns are *not* allowed.
-
-### Detailed field description
-
-- `conditionId` [STRING, NOT NULL]
-
-  Unique identifier for the simulation/experimental condition, to be referenced
-  by the measurement table described below.
-
-- `conditionName` [STRING, OPTIONAL]
-
-  Condition names are arbitrary strings to describe the given condition.
-  They may be used for reporting or visualization.
-
-- `${parameterOrSpeciesOrCompartmentId1}`
-
-  Further columns may be global parameter IDs, IDs of species or compartments
-  as defined in the SBML model. Only one column is allowed per ID.
-  Values for these condition parameters may be provided either as numeric
-  values, or as IDs defined in the SBML model, the parameter table or both.
-
-  - `${parameterId}`
-
-    The values will override any parameter values specified in the model.
-
-  - `${speciesId}`
-
-    If a species ID is provided, it is interpreted as the initial
-    concentration/amount of that species and will override the initial
-    concentration/amount given in the SBML model or given by a preequilibration
-    condition. If `NaN` is provided for a condition, the result of the
-    preequilibration (or initial concentration/amount from the SBML model, if
-    no preequilibration is defined) is used.
-
-  - `${compartmentId}`
-
-    If a compartment ID is provided, it is interpreted as the initial
-    compartment size.
-
-
-## Measurement table
-
-A tab-separated values files containing all measurements to be used for
-model training or validation.
-
-Expected to have the following named columns in any (but preferably this)
-order:
-
-| observableId | [preequilibrationConditionId] | simulationConditionId | measurement | time |
-|---|---|---|---|---|
-| observableId | [conditionId] | conditionId | NUMERIC | NUMERIC&#124;inf |
-|...|...|...|...|...|
-
-*(wrapped for readability)*
-
-| ... | [observableParameters] | [noiseParameters]
-|---|---|---|
-|... | [parameterId&#124;NUMERIC[;parameterId&#124;NUMERIC][...]] | [parameterId&#124;NUMERIC[;parameterId&#124;NUMERIC][...]]
-|...|...|...|
-
-Additional (non-standard) columns may be added. If the additional plotting
-functionality of PEtab should be used, such columns could be
-
-| ... | [datasetId] | [replicateId]  |
-|---|---|---|
-|... | [datasetId] | [replicateId] |
-|...|...|...|
-
-where `datasetId` is a necessary column to use particular plotting
-functionality, and `replicateId` is optional, which can be used to group
-replicates and plot error bars.
-
-
-### Detailed field description
-
-- `observableId` [STRING, NOT NULL, REFERENCES(observables.observableID)]
-
-  Observable ID as defined in the observables table described below.
-
-- `preequilibrationConditionId` [STRING OR NULL,
-REFERENCES(conditionsTable.conditionID), OPTIONAL]
-
-  The `conditionId` to be used for preequilibration. E.g. for drug
-  treatments, the model would be preequilibrated with the no-drug condition.
-  Empty for no preequilibration.
-
-- `simulationConditionId` [STRING, NOT NULL,
-REFERENCES(conditionsTable.conditionID)]
-
-  `conditionId` as provided in the condition table, specifying the
-condition-specific parameters used for simulation.
-
-- `measurement` [NUMERIC, NOT NULL]
-
-  The measured value in the same units/scale as the model output.
-
-- `time` [NUMERIC OR STRING, NOT NULL]
-
-  Time point of the measurement in the time unit specified in the SBML model,
-numeric value or `inf` (lower-case) for steady-state measurements.
-
-- `observableParameters` [NUMERIC, STRING OR NULL, OPTIONAL]
-
-  This field allows overriding or introducing condition-specific versions of
-  output parameters defined in the observation model. The model can define
-  observables (see below) containing place-holder parameters which can be
-  replaced by condition-specific dynamic or constant parameters. Placeholder
-  parameters must be named `observableParameter${n}_${observableId}`
-  with `n` ranging from 1 (not 0) to the number of placeholders for the given
-  observable, without gaps.
-  If the observable specified under `observableId` contains no placeholders,
-  this field must be empty. If it contains `n > 0` placeholders, this field
-  must hold `n` semicolon-separated numeric values or parameter names. No
-  trailing semicolon must be added.
-
-  Different lines for the same `observableId` may specify different
-  parameters. This may be used to account for condition-specific or
-  batch-specific parameters. This will translate into an extended optimization
-  parameter vector.
-
-  All placeholders defined in the observation model must be overwritten here.
-  If there are no placeholders used, this column may be omitted.
-
-- `noiseParameters` [NUMERIC, STRING OR NULL, OPTIONAL]
-
-  The measurement standard deviation or `NaN` if the corresponding sigma is a
-  model parameter.
-
-  Numeric values or parameter names are allowed. Same rules apply as for
-  `observableParameters` in the previous point.
-
-- `datasetId` [STRING, OPTIONAL]
-
-  The datasetId is used to group certain measurements to datasets. This is
-  typically the case for data points which belong to the same observable,
-  the same simulation and preequilibration condition, the same noise model,
-  the same observable transformation and the same observable parameters.
-  This grouping makes it possible to use the plotting routines which are
-  provided in the PEtab repository.
-
-- `replicateId` [STRING, OPTIONAL]
-
-  The replicateId can be used to discern replicates with the same
-  `datasetId`, which is helpful for plotting e.g. error bars.
-
-
-## Observables table
-
-Parameter estimation requires linking experimental observations to the model
-of interest. Therefore, one needs to define observables (model outputs) and
-respective noise models, which represent the measurement process.
-Since parameter estimation is beyond the scope of SBML, there exists no
-standard way to specify observables (model outputs) and respective noise
-models. Therefore, in PEtab observables are specified in a separate table
-as described in the following. This allows for a clear separation of the
-observation model and the underlying dynamic model, which allows, in  most
-cases, to reuse any existing SBML model without modifications.
-
-The observable table has the following columns:
-
-| observableId | [observableName] | observableFormula | [observableTransformation] | noiseFormula | [noiseDistribution] |
-| --- | --- | --- | --- | --- | --- |
-| STRING | [STRING] | STRING | [lin(default)&#124;log&#124;log10] |  STRING&#124;NUMBER | [laplace&#124;normal] |
-| e.g. | | | | | |
-| relativeTotalProtein1 | Relative abundance of Protein1 | observableParameter1_relativeTotalProtein1  * (protein1 + phospho_protein1 ) | lin | noiseParameter1_relativeTotalProtein1  | normal |
-| ... |  ... | ... | ... | ... |
-
-
-### Detailed field description:
-
-* `observableId` [STRING]
-
-  Any identifier which would be a valid identifier in SBML. This is referenced
-  by the `observableId` column in the measurement table. Must be different from
-  any existing model entity or parameter introduced elsewhere.
-
-* [`observableName`] [STRING, OPTIONAL]
-
-  Name of the observable. Only used for output, not for identification.
-
-* `observableFormula` [STRING]
-
-  Observation function as plain text formula expression.
-  May contain any symbol defined in the SBML model (including model time `time`)
-  or parameter table. In the simplest case just an SBML species ID
-  or an `AssignmentRule` target.
-
-  May introduce new parameters of the form `observableParameter${n}_${observableId}`,
-  which are overridden by `observableParameters` in the measurement table
-  (see description there).
-
-- `observableTransformation` [STRING, OPTIONAL]
-
-  Transformation of the observable and measurement for computing the objective
-  function. Must be one of `lin`, `log` or `log10`. Defaults to `lin`.
-  The measurements and model outputs are both assumed to be provided in linear
-  space.
-
-* `noiseFormula` [NUMERIC|STRING]
-
-  Measurement noise can be specified as a numerical value which will
-  default to a Gaussian noise model if not specified differently in
-  `noiseDistribution` with standard deviation as provided here. In this case,
-  the same standard deviation is assumed for all measurements for the given
-  observable.
-
-  Alternatively, some formula expression can be provided to specify
-  more complex noise models. A noise model which accounts for relative and
-  absolute contributions could, e.g., be defined as
-  ```
-  noiseParameter1_observable_pErk + noiseParameter2_observable_pErk*pErk
-  ```
-  with `noiseParameter1_observable_pErk` denoting the absolute and
-  `noiseParameter2_observable_pErk` the relative contribution for the
-  observable `observable_pErk` corresponding to species `pErk`.
-  IDs of noise parameters
-  that need to have different values for different measurements have the
-  structure: `noiseParameter${indexOfNoiseParameter}_${observableId}`
-  to facilitate automatic recognition. The specific values or parameters are
-  assigned in the `noiseParameters` field of the *measurement table*
-  (see above). Any parameters named `noiseParameter${1..n}_${observableId}`
-  *must* be overwritten in the measurement table.
-
-- `noiseDistribution` [STRING: 'normal' or 'laplace', OPTIONAL]
-
-  Assumed noise distribution for the given measurement. Only normally or
-  Laplace distributed noise is currently allowed (log-normal and
-  log-laplace are obtained by setting `observableTransformation` to `log`).
-  Defaults to `normal`. If `normal`, the specified `noiseParameters` will be
-  interpreted as standard deviation (*not* variance).
-
-
-## Parameter table
-
-A tab-separated value text file containing information on model parameters.
-
-This table *must* include the following parameters:
-- Named parameter overrides introduced in the *conditions table*,
-  unless defined in the SBML model
-- Named parameter overrides introduced in the *measurement table*
-
-and *must not* include:
-- Placeholder parameters (see `observableParameters` and `noiseParameters`
-  above)
-- Parameters included as column names in the *condition table*
-- Parameters that are AssignmentRule targets in the SBML model
-
-it *may* include:
-- Any SBML model parameter that was not excluded above
-- Named parameter overrides introduced in the *conditions table*
-
-One row per parameter with arbitrary order of rows and columns:
-
-| parameterId | [parameterName] | parameterScale | lowerBound  |upperBound | nominalValue | estimate | ... |
-|---|---|---|---|---|---|---|---|
-|STRING|[STRING]|log10&#124;lin&#124;log|NUMERIC|NUMERIC|NUMERIC|0&#124;1| ... |
-|...|...|...|...|...|...|...|...|
-
-*(wrapped for readability)*
-
-| ... | \[initializationPriorType\] | \[initializationPriorParameters\] | \[objectivePriorType\] | \[objectivePriorParameters\] |
-|---|---|---|---|---|
-| ... | *see below* | *see below* | *see below* | *see below* |
-|...|...|...|...|...|
-
-Additional columns may be added.
-
-
-### Detailed field description:
-
-- `parameterId` [STRING, NOT NULL]
-
-  The `parameterId` of the parameter described in this row. This has to match
-  the ID of a parameter specified in the SBML model, a parameter introduced
-  as override in the condition table, or a parameter occurring in the
-  `observableParameters` or `noiseParameters` column of the measurement table
-  (see above).
-
-- `parameterName` [STRING, OPTIONAL]
-
-  Parameter name to be used e.g. for plotting etc. Can be chosen freely. May
-  or may not coincide with the SBML parameter name.
-
-- `parameterScale` [lin|log|log10]
-
-  Scale of the parameter to be used during parameter estimation.
-
-- `lowerBound` [NUMERIC]
-
-  Lower bound of the parameter used for optimization.
-  Optional, if `estimate==0`.
-  Must be provided in linear space, independent of `parameterScale`.
-
-- `upperBound` [NUMERIC]
-
-  Upper bound of the parameter used for optimization.
-  Optional, if `estimate==0`.
-  Must be provided in linear space, independent of `parameterScale`.
-
-- `nominalValue` [NUMERIC]
-
-  Some parameter value to be used if
-  the parameter is not subject to estimation (see `estimate` below).
-  Must be provided in linear space, independent of `parameterScale`.
-  Optional, unless `estimate==0`.
-
-- `estimate` [BOOL 0|1]
-
-  1 or 0, depending on, if the parameter is estimated (1) or set to a fixed
-  value(0) (see `nominalValue`).
-
-- `initializationPriorType` [STRING, OPTIONAL]
-
-  Prior types used for sampling of initial points for optimization. Sampled
-  points are clipped to lie inside the parameter boundaries specified by
-  `lowerBound` and `upperBound`. Defaults to `parameterScaleUniform`.
-
-  Possible prior types are:
-
-    - *uniform*: flat prior on linear parameters
-    - *normal*: Gaussian prior on linear parameters
-    - *laplace*: Laplace prior on linear parameters
-    - *logNormal*: exponentiated Gaussian prior on linear parameters
-    - *logLaplace*: exponentiated Laplace prior on linear parameters
-    - *parameterScaleUniform* (default): Flat prior on original parameter
-      scale (equivalent to "no prior")
-    - *parameterScaleNormal*: Gaussian prior on original parameter scale
-    - *parameterScaleLaplace*: Laplace prior on original parameter scale
-
-- `initializationPriorParameters` [STRING, OPTIONAL]
-
-  Prior parameters used for sampling of initial points for optimization,
-  separated by a semicolon. Defaults to `lowerBound;upperBound`.
-
-  So far, only numeric values will be supported, no parameter names.
-  Parameters for the different prior types are:
-
-    - uniform: lower bound; upper bound
-    - normal: mean; standard deviation (**not** variance)
-    - laplace: location; scale
-    - logNormal: parameters of corresp. normal distribution (see: normal)
-    - logLaplace: parameters of corresp. Laplace distribution (see: laplace)
-    - parameterScaleUniform: lower bound; upper bound
-    - parameterScaleNormal: mean; standard deviation (**not** variance)
-    - parameterScaleLaplace: location; scale
-
-- `objectivePriorType` [STRING, OPTIONAL]
-
-  Prior types used for the objective function during optimization or sampling.
-  For possible values, see `initializationPriorType`.
-
-- `objectivePriorParameters` [STRING, OPTIONAL]
-
-  Prior parameters used for the objective function during optimization.
-  For more detailed documentation, see `initializationPriorParameters`.   
-
-
-## Visualization table
-
-A tab-separated value file containing the specification of the visualization
-routines which come with the PEtab repository. Plots are in general
-collections of different datasets as specified using their `datasetId` (if
-provided) inside the measurement table.
-
-Expected to have the following columns in any (but preferably this)
-order:
-
-| plotId | [plotName] | [plotTypeSimulation] | [plotTypeData] |
-|---|---|---|---|
-| STRING | [STRING] | [LinePlot(default)&#124;BarPlot&#124;ScatterPlot] | [MeanAndSD(default)&#124;MeanAndSEM&#124;replicate;provided] |
-|...|...|...|...|
-
-*(wrapped for readability)*
-
-| ... | [datasetId] | [xValues] | [xOffset] | [xLabel] | [xScale] |
-|---|---|---|---|---|---|
-|... | [datasetId] | [time(default)&#124;parameterOrStateId] | [NUMERIC] | [STRING] | [lin&#124;log&#124;log10&#124;order] |
-|...|...|...|...|...|
-
-
-*(wrapped for readability)*
-
-| ... | [yValues] | [yOffset] | [yLabel] | [yScale] | [legendEntry] |
-|---|---|---|---|---|---|
-|... |  [observableId] | [NUMERIC] | [STRING] | [lin&#124;log&#124;log10] | [STRING] |
-|...|...|...|...|...|...|
-
-
-### Detailed field description:
-
-- `plotId` [STRING, NOT NULL]
-
-  An ID which corresponds to a specific plot. All datasets with the same
-  plotId will be plotted into the same axes object.
-
-- `plotName` [STRING, OPTIONAL]
-
-  A name for the specific plot.
-
-- `plotTypeSimulation` [STRING, OPTIONAL]
-
-  The type of the corresponding plot, can be `LinePlot`, `BarPlot` and `ScatterPlot`. Default is `LinePlot`.
-
-- `plotTypeData` [STRING, OPTIONAL]
-
-  The type how replicates should be handled, can be `MeanAndSD`,
-  `MeanAndSEM`, `replicate` (for plotting all replicates separately), or
-  `provided` (if numeric values for the noise level are provided in the
-  measurement table). Default is `MeanAndSD`.
-
-- `datasetId` [STRING, NOT NULL, REFERENCES(measurementTable.datasetId), OPTIONAL]
-
-  The datasets which should be grouped into one plot.
-
-- `xValues` [STRING, OPTIONAL]
-
-  The independent variable, which will be plotted on the x-axis. Can be
-  `time` (default, for time resolved data), or it can be `parameterOrStateId`
-  for dose-response plots. The corresponding numeric values will be shown on
-  the x-axis.
-
-- `xOffset` [NUMERIC, OPTIONAL]
-
-  Possible data-offsets for the independent variable (default is `0`).
-
-- `xLabel` [STRING, OPTIONAL]
-
-  Label for the x-axis. Defaults to the entry in `xValues`.
-
-- `xScale` [STRING, OPTIONAL]
-
-  Scale of the independent variable, can be `lin`, `log`, `log10` or `order`.
-  The `order` value should be used if values of the independent variable are
-  ordinal. This value can only be used in combination with `LinePlot` value for
-  the `plotTypeSimulation` column. In this case, points on x axis will be
-  placed equidistantly from each other. Default is `lin`.
-
-- `yValues` [observableId, REFERENCES(measurementTable.observableId), OPTIONAL]
-
-  The observable which should be plotted on the y-axis.
-
-- `yOffset` [NUMERIC, OPTIONAL]
-
-  Possible data-offsets for the observable (default is `0`).
-
-- `yLabel` [STRING, OPTIONAL]
-
-  Label for the y-axis. Defaults to the entry in `yValues`.
-
-- `yScale` [STRING, OPTIONAL]
-
-  Scale of the observable, can be `lin`, `log`, or `log10`. Default is `lin`.
-
-- `legendEntry` [STRING, OPTIONAL]
-
-  The name that should be displayed for the corresponding dataset in the
-  legend and which defaults to the value in `datasetId`.
-
-
-### Extensions
-
-Additional columns, such as `Color`, etc. may be specified.
-
-### Examples
-
-Examples of the visualization table can be found in the
-[Benchmark model collection](https://github.com/Benchmarking-Initiative/Benchmark-Models-PEtab/).
-For example, for
-[Chen_MSB2009](https://github.com/Benchmarking-Initiative/Benchmark-Models-PEtab/tree/master/Benchmark-Models/Chen_MSB2009)
-model.
-
-## YAML file for grouping files
-
-To link the SBML model, measurement table, condition table, etc. in an
-unambiguous way, we use a [YAML](https://yaml.org/) file.
-
-This file also allows specifying a PEtab version (as the format is not unlikely
-to change in the future).
-
-Furthermore, this can be used to describe parameter estimation problems
-comprising multiple models (more details below).
-
-The format is described in the schema
-[../petab/petab_schema.yaml](_static/petab_schema.yaml), which allows for
-easy validation.
-
-
-### Parameter estimation problems combining multiple models
-
-Parameter estimation problems can comprise multiple models. For now, PEtab
-allows to specify multiple SBML models with corresponding condition and
-measurement tables, and one joint parameter table. This means that the parameter
-namespace is global. Therefore, parameters with the same ID in different models
-will be considered identical.
diff --git a/doc/documentation_data_format.rst b/doc/documentation_data_format.rst
new file mode 100644
index 00000000..237806dc
--- /dev/null
+++ b/doc/documentation_data_format.rst
@@ -0,0 +1,689 @@
+PEtab data format specification
+===============================
+
+
+Format version: 1
+-----------------
+
+This document explains the PEtab data format.
+
+
+Purpose
+-------
+
+Providing a standardized way for specifying parameter estimation problems in
+systems biology, especially for the case of Ordinary Differential Equation
+(ODE) models.
+
+
+Overview
+---------
+
+The PEtab data format specifies a parameter estimation problem using a number
+of text-based files (`Systems Biology Markup Language (SBML) <http://sbml.org>`_
+and
+`Tab-Separated Values (TSV) <https://www.iana.org/assignments/media-types/text/tab-separated-values>`_),
+i.e.
+
+- An SBML model [SBML]
+
+- A measurement file to fit the model to [TSV]
+
+- A condition file specifying model inputs and condition-specific parameters
+  [TSV]
+
+- An observable file specifying the observation model [TSV]
+
+- A parameter file specifying optimization parameters and related information
+  [TSV]
+
+- (optional) A simulation file, which has the same format as the measurement
+  file, but contains model simulations [TSV]
+
+- (optional) A visualization file, which contains specifications how the data
+  and/or simulations should be plotted by the visualization routines [TSV]
+
+.. image:: gfx/petab_files.png
+   :alt: Files constituting a PEtab problem
+
+The following sections will describe the minimum requirements of those
+components in the core standard, which should provide all information for
+defining the parameter estimation problem.
+
+Extensions of this format (e.g. additional columns in the measurement table)
+are possible and intended. However, while those columns may provide extra
+information for example for plotting, downstream analysis, or for more
+efficient parameter estimation, they should not affect the optimization
+problem as such.
+
+**General remarks**
+
+- All model entities, column names and row names are case-sensitive
+- All identifiers must consist only of upper and lower case letters, digits and
+  underscores, and must not start with a digit.
+- Fields in "[]" are optional and may be left empty.
+
+
+SBML model definition
+---------------------
+
+The model must be specified as valid SBML. There are no further restrictions.
+
+
+Condition table
+---------------
+
+The condition table specifies parameters, or initial values of species and
+compartments for specific simulation conditions (generally corresponding to
+different experimental conditions).
+
+This is specified as a tab-separated value file in the following way:
+
++--------------+------------------+------------------------------------+-----+---------------------------------------+
+| conditionId  | [conditionName]  | parameterOrSpeciesOrCompartmentId1 | ... | parameterOrSpeciesOrCompartmentId${n} |
++==============+==================+====================================+=====+=======================================+
+| STRING       | [STRING]         | NUMERIC\|STRING                    | ... | NUMERIC\|STRING                       |
++--------------+------------------+------------------------------------+-----+---------------------------------------+
+| e.g.         |                  |                                    |     |                                       |
++--------------+------------------+------------------------------------+-----+---------------------------------------+
+| conditionId1 | [conditionName1] | 0.42                               | ... | parameterId                           |
++--------------+------------------+------------------------------------+-----+---------------------------------------+
+| conditionId2 | ...              | ...                                | ... | ...                                   |
++--------------+------------------+------------------------------------+-----+---------------------------------------+
+|...           | ...              | ...                                | ... |...                                    |
++--------------+------------------+------------------------------------+-----+---------------------------------------+
+
+Row- and column-ordering are arbitrary, although specifying ``conditionId``
+first may improve human readability.
+
+Additional columns are *not* allowed.
+
+
+Detailed field description
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+- ``conditionId`` [STRING, NOT NULL]
+
+  Unique identifier for the simulation/experimental condition, to be referenced
+  by the measurement table described below.
+
+- ``conditionName`` [STRING, OPTIONAL]
+
+  Condition names are arbitrary strings to describe the given condition.
+  They may be used for reporting or visualization.
+
+- ``${parameterOrSpeciesOrCompartmentId1}``
+
+  Further columns may be global parameter IDs, IDs of species or compartments
+  as defined in the SBML model. Only one column is allowed per ID.
+  Values for these condition parameters may be provided either as numeric
+  values, or as IDs defined in the SBML model, the parameter table or both.
+
+  - ``${parameterId}``
+
+    The values will override any parameter values specified in the model.
+
+  - ``${speciesId}``
+
+    If a species ID is provided, it is interpreted as the initial
+    concentration/amount of that species and will override the initial
+    concentration/amount given in the SBML model or given by a preequilibration
+    condition. If ``NaN`` is provided for a condition, the result of the
+    preequilibration (or initial concentration/amount from the SBML model, if
+    no preequilibration is defined) is used.
+
+  - ``${compartmentId}``
+
+    If a compartment ID is provided, it is interpreted as the initial
+    compartment size.
+
+
+Measurement table
+-----------------
+
+A tab-separated values files containing all measurements to be used for
+model training or validation.
+
+Expected to have the following named columns in any (but preferably this)
+order:
+
++--------------+-------------------------------+-----------------------+-------------+--------------+
+| observableId | [preequilibrationConditionId] | simulationConditionId | measurement | time         |
++==============+===============================+=======================+=============+==============+
+| observableId | [conditionId]                 | conditionId           | NUMERIC     | NUMERIC\|inf |
++--------------+-------------------------------+-----------------------+-------------+--------------+
+| ...          | ...                           | ...                   | ...         | ...          |
++--------------+-------------------------------+-----------------------+-------------+--------------+
+
+*(wrapped for readability)*
+
++-----+----------------------------------------------------+----------------------------------------------------+
+| ... | [observableParameters]                             | [noiseParameters]                                  |
++=====+====================================================+====================================================+
+| ... | [parameterId\|NUMERIC[;parameterId\|NUMERIC][...]] | [parameterId\|NUMERIC[;parameterId\|NUMERIC][...]] |
++-----+----------------------------------------------------+----------------------------------------------------+
+| ... | ...                                                | ...                                                |
++-----+----------------------------------------------------+----------------------------------------------------+
+
+Additional (non-standard) columns may be added. If the additional plotting
+functionality of PEtab should be used, such columns could be
+
++-----+-------------+---------------+
+| ... | [datasetId] | [replicateId] |
++=====+=============+===============+
+| ... | [datasetId] | [replicateId] |
++-----+-------------+---------------+
+| ... | ...         | ...           |
++-----+-------------+---------------+
+
+where ``datasetId`` is a necessary column to use particular plotting
+functionality, and ``replicateId`` is optional, which can be used to group
+replicates and plot error bars.
+
+
+Detailed field description
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+- ``observableId`` [STRING, NOT NULL, REFERENCES(observables.observableID)]
+
+  Observable ID as defined in the observables table described below.
+
+- ``preequilibrationConditionId`` [STRING OR NULL, REFERENCES(conditionsTable.conditionID), OPTIONAL]
+
+  The ``conditionId`` to be used for preequilibration. E.g. for drug
+  treatments, the model would be preequilibrated with the no-drug condition.
+  Empty for no preequilibration.
+
+- ``simulationConditionId`` [STRING, NOT NULL, REFERENCES(conditionsTable.conditionID)]
+
+  ``conditionId`` as provided in the condition table, specifying the condition-specific parameters used for simulation.
+
+- ``measurement`` [NUMERIC, NOT NULL]
+
+  The measured value in the same units/scale as the model output.
+
+- ``time`` [NUMERIC OR STRING, NOT NULL]
+
+  Time point of the measurement in the time unit specified in the SBML model, numeric value or ``inf`` (lower-case) for steady-state measurements.
+
+- ``observableParameters`` [NUMERIC, STRING OR NULL, OPTIONAL]
+
+  This field allows overriding or introducing condition-specific versions of
+  output parameters defined in the observation model. The model can define
+  observables (see below) containing place-holder parameters which can be
+  replaced by condition-specific dynamic or constant parameters. Placeholder
+  parameters must be named ``observableParameter${n}_${observableId}``
+  with ``n`` ranging from 1 (not 0) to the number of placeholders for the given
+  observable, without gaps.
+  If the observable specified under ``observableId`` contains no placeholders,
+  this field must be empty. If it contains ``n > 0`` placeholders, this field
+  must hold ``n`` semicolon-separated numeric values or parameter names. No
+  trailing semicolon must be added.
+
+  Different lines for the same ``observableId`` may specify different
+  parameters. This may be used to account for condition-specific or
+  batch-specific parameters. This will translate into an extended optimization
+  parameter vector.
+
+  All placeholders defined in the observation model must be overwritten here.
+  If there are no placeholders used, this column may be omitted.
+
+- ``noiseParameters`` [NUMERIC, STRING OR NULL, OPTIONAL]
+
+  The measurement standard deviation or ``NaN`` if the corresponding sigma is a
+  model parameter.
+
+  Numeric values or parameter names are allowed. Same rules apply as for
+  ``observableParameters`` in the previous point.
+
+- ``datasetId`` [STRING, OPTIONAL]
+
+  The datasetId is used to group certain measurements to datasets. This is
+  typically the case for data points which belong to the same observable,
+  the same simulation and preequilibration condition, the same noise model,
+  the same observable transformation and the same observable parameters.
+  This grouping makes it possible to use the plotting routines which are
+  provided in the PEtab repository.
+
+- ``replicateId`` [STRING, OPTIONAL]
+
+  The replicateId can be used to discern replicates with the same
+  ``datasetId``, which is helpful for plotting e.g. error bars.
+
+
+Observables table
+-----------------
+
+Parameter estimation requires linking experimental observations to the model
+of interest. Therefore, one needs to define observables (model outputs) and
+respective noise models, which represent the measurement process.
+Since parameter estimation is beyond the scope of SBML, there exists no
+standard way to specify observables (model outputs) and respective noise
+models. Therefore, in PEtab observables are specified in a separate table
+as described in the following. This allows for a clear separation of the
+observation model and the underlying dynamic model, which allows, in  most
+cases, to reuse any existing SBML model without modifications.
+
+The observable table has the following columns:
+
++-----------------------+--------------------------------+-----------------------------------------------------------------------------+
+| observableId          | [observableName]               | observableFormula                                                           |
++=======================+================================+=============================================================================+
+| STRING                | [STRING]                       | STRING                                                                      |
++-----------------------+--------------------------------+-----------------------------------------------------------------------------+
+| e.g.                  |                                |                                                                             |
++-----------------------+--------------------------------+-----------------------------------------------------------------------------+
+| relativeTotalProtein1 | Relative abundance of Protein1 | observableParameter1_relativeTotalProtein1 * (protein1 + phospho_protein1 ) |
++-----------------------+--------------------------------+-----------------------------------------------------------------------------+
+| ...                   | ...                            | ...                                                                         |
++-----------------------+--------------------------------+-----------------------------------------------------------------------------+
+
+*(wrapped for readability)*
+
++-----+----------------------------+---------------------------------------+-----------------------+
+| ... | [observableTransformation] | noiseFormula                          | [noiseDistribution]   |
++=====+============================+=======================================+=======================+
+| ... | [lin(default)\|log\|log10] | STRING\|NUMBER                        | [laplace\|normal]     |
++-----+----------------------------+---------------------------------------+-----------------------+
+| ... | e.g.                       |                                       |                       |
++-----+----------------------------+---------------------------------------+-----------------------+
+| ... | lin                        | noiseParameter1_relativeTotalProtein1 | normal                |
++-----+----------------------------+---------------------------------------+-----------------------+
+| ... | ...                        | ...                                   | ...                   |
++-----+----------------------------+---------------------------------------+-----------------------+
+
+
+Detailed field description
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+* ``observableId`` [STRING]
+
+  Any identifier which would be a valid identifier in SBML. This is referenced
+  by the ``observableId`` column in the measurement table. Must be different from
+  any existing model entity or parameter introduced elsewhere.
+
+* [``observableName``] [STRING, OPTIONAL]
+
+  Name of the observable. Only used for output, not for identification.
+
+* ``observableFormula`` [STRING]
+
+  Observation function as plain text formula expression.
+  May contain any symbol defined in the SBML model (including model time ``time``)
+  or parameter table. In the simplest case just an SBML species ID
+  or an ``AssignmentRule`` target.
+
+  May introduce new parameters of the form ``observableParameter${n}_${observableId}``,
+  which are overridden by ``observableParameters`` in the measurement table
+  (see description there).
+
+- ``observableTransformation`` [STRING, OPTIONAL]
+
+  Transformation of the observable and measurement for computing the objective
+  function. Must be one of ``lin``, ``log`` or ``log10``. Defaults to ``lin``.
+  The measurements and model outputs are both assumed to be provided in linear
+  space.
+
+* ``noiseFormula`` [NUMERIC|STRING]
+
+  Measurement noise can be specified as a numerical value which will
+  default to a Gaussian noise model if not specified differently in
+  ``noiseDistribution`` with standard deviation as provided here. In this case,
+  the same standard deviation is assumed for all measurements for the given
+  observable.
+
+  Alternatively, some formula expression can be provided to specify
+  more complex noise models. A noise model which accounts for relative and
+  absolute contributions could, e.g., be defined as::
+
+    noiseParameter1_observable_pErk + noiseParameter2_observable_pErk*pErk
+
+  with ``noiseParameter1_observable_pErk`` denoting the absolute and
+  ``noiseParameter2_observable_pErk`` the relative contribution for the
+  observable ``observable_pErk`` corresponding to species ``pErk``.
+  IDs of noise parameters
+  that need to have different values for different measurements have the
+  structure: ``noiseParameter${indexOfNoiseParameter}_${observableId}``
+  to facilitate automatic recognition. The specific values or parameters are
+  assigned in the ``noiseParameters`` field of the *measurement table*
+  (see above). Any parameters named ``noiseParameter${1..n}_${observableId}``
+  *must* be overwritten in the measurement table.
+
+- ``noiseDistribution`` [STRING: 'normal' or 'laplace', OPTIONAL]
+
+  Assumed noise distribution for the given measurement. Only normally or
+  Laplace distributed noise is currently allowed (log-normal and
+  log-Laplace are obtained by setting ``observableTransformation`` to ``log``, similarly for ``log10``).
+  Defaults to ``normal``. If ``normal``, the specified ``noiseParameters`` will be
+  interpreted as standard deviation (*not* variance). If ``Laplace`` ist specified, the specified ``noiseParameter`` will be interpreted as the scale, or diversity, parameter.
+
+
+Noise distributions
+~~~~~~~~~~~~~~~~~~~
+
+For ``noiseDistribution``, ``normal`` and ``laplace`` are supported. For ``observableTransformation``, ``lin``, ``log`` and ``log10`` are supported. Denote by :math:`y` the simulation, :math:`m` the measurement, and :math:`\sigma` the standard deviation of a normal, or the scale parameter of a laplace model, as given via the ``noiseFormula`` field. Then we have the following effective noise distributions.
+
+- Normal distribution:
+
+  .. math::
+     \pi(m|y,\sigma) = \frac{1}{\sqrt{2\pi}\sigma}\exp\left(-\frac{(m-y)^2}{2\sigma^2}\right)
+
+- Log-normal distribution (i.e. log(m) is normally distributed):
+
+  .. math::
+     \pi(m|y,\sigma) = \frac{1}{\sqrt{2\pi}\sigma m}\exp\left(-\frac{(\log m - \log y)^2}{2\sigma^2}\right)
+
+- Log10-normal distribution (i.e. log10(m) is normally distributed):
+
+  .. math::
+     \pi(m|y,\sigma) = \frac{1}{\sqrt{2\pi}\sigma m \log(10)}\exp\left(-\frac{(\log_{10} m - \log_{10} y)^2}{2\sigma^2}\right)
+
+- Laplace distribution:
+
+  .. math::
+     \pi(m|y,\sigma) = \frac{1}{2\sigma}\exp\left(-\frac{|m-y|}{\sigma}\right)
+
+- Log-Laplace distribution (i.e. log(m) is Laplace distributed):
+
+  .. math::
+     \pi(m|y,\sigma) = \frac{1}{2\sigma m}\exp\left(-\frac{|\log m - \log y|}{\sigma}\right)
+
+- Log10-Laplace distribution (i.e. log10(m) is Laplace distributed):
+
+  .. math::
+     \pi(m|y,\sigma) = \frac{1}{2\sigma m \log(10)}\exp\left(-\frac{|\log_{10} m - \log_{10} y|}{\sigma}\right)
+
+
+The distributions above are for a single data point. For a collection :math:`D=\{m_i\}_i` of data points and corresponding simulations :math:`Y=\{y_i\}_i` and noise parameters :math:`\Sigma=\{\sigma_i\}_i`, the current specification assumes independence, i.e. the full distributions is
+
+.. math::
+   \pi(D|Y,\Sigma) = \prod_i\pi(m_i|y_i,\sigma_i)
+
+
+Parameter table
+---------------
+
+A tab-separated value text file containing information on model parameters.
+
+This table *must* include the following parameters:
+
+- Named parameter overrides introduced in the *conditions table*,
+  unless defined in the SBML model
+- Named parameter overrides introduced in the *measurement table*
+
+and *must not* include:
+
+- Placeholder parameters (see ``observableParameters`` and ``noiseParameters``
+  above)
+- Parameters included as column names in the *condition table*
+- Parameters that are AssignmentRule targets in the SBML model
+
+it *may* include:
+
+- Any SBML model parameter that was not excluded above
+- Named parameter overrides introduced in the *conditions table*
+
+One row per parameter with arbitrary order of rows and columns:
+
++-------------+-----------------+-------------------------+-------------+------------+--------------+----------+-----+
+| parameterId | [parameterName] | parameterScale          | lowerBound  | upperBound | nominalValue | estimate | ... |
++=============+=================+=========================+=============+============+==============+==========+=====+
+|STRING       | [STRING]        | log10\|lin\|log         | NUMERIC     | NUMERIC    | NUMERIC      | 0\|1     | ... |
++-------------+-----------------+-------------------------+-------------+------------+--------------+----------+-----+
+| ...         | ...             | ...                     | ...         | ...        | ...          | ... | ...      |
++-------------+-----------------+-------------------------+-------------+------------+--------------+----------+-----+
+
+*(wrapped for readability)*
+
++-----+---------------------------+---------------------------------+----------------------+----------------------------+
+| ... | [initializationPriorType] | [initializationPriorParameters] | [objectivePriorType] | [objectivePriorParameters] |
++=====+===========================+=================================+======================+============================+
+| ... | *see below*               | *see below*                     | *see below*          | *see below*                |
++-----+---------------------------+---------------------------------+----------------------+----------------------------+
+| ... | ...                       | ...                             | ...                  | ...                        |
++-----+---------------------------+---------------------------------+----------------------+----------------------------+
+
+Additional columns may be added.
+
+
+Detailed field description
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+- ``parameterId`` [STRING, NOT NULL]
+
+  The ``parameterId`` of the parameter described in this row. This has to match
+  the ID of a parameter specified in the SBML model, a parameter introduced
+  as override in the condition table, or a parameter occurring in the
+  ``observableParameters`` or ``noiseParameters`` column of the measurement table
+  (see above).
+
+- ``parameterName`` [STRING, OPTIONAL]
+
+  Parameter name to be used e.g. for plotting etc. Can be chosen freely. May
+  or may not coincide with the SBML parameter name.
+
+- ``parameterScale`` [lin|log|log10]
+
+  Scale of the parameter to be used during parameter estimation.
+
+- ``lowerBound`` [NUMERIC]
+
+  Lower bound of the parameter used for optimization.
+  Optional, if ``estimate==0``.
+  Must be provided in linear space, independent of ``parameterScale``.
+
+- ``upperBound`` [NUMERIC]
+
+  Upper bound of the parameter used for optimization.
+  Optional, if ``estimate==0``.
+  Must be provided in linear space, independent of ``parameterScale``.
+
+- ``nominalValue`` [NUMERIC]
+
+  Some parameter value to be used if
+  the parameter is not subject to estimation (see ``estimate`` below).
+  Must be provided in linear space, independent of ``parameterScale``.
+  Optional, unless ``estimate==0``.
+
+- ``estimate`` [BOOL 0|1]
+
+  1 or 0, depending on, if the parameter is estimated (1) or set to a fixed
+  value(0) (see ``nominalValue``).
+
+- ``initializationPriorType`` [STRING, OPTIONAL]
+
+  Prior types used for sampling of initial points for optimization. Sampled
+  points are clipped to lie inside the parameter boundaries specified by
+  ``lowerBound`` and ``upperBound``. Defaults to ``parameterScaleUniform``.
+
+  Possible prior types are:
+
+    - *uniform*: flat prior on linear parameters
+    - *normal*: Gaussian prior on linear parameters
+    - *laplace*: Laplace prior on linear parameters
+    - *logNormal*: exponentiated Gaussian prior on linear parameters
+    - *logLaplace*: exponentiated Laplace prior on linear parameters
+    - *parameterScaleUniform* (default): Flat prior on original parameter
+      scale (equivalent to "no prior")
+    - *parameterScaleNormal*: Gaussian prior on original parameter scale
+    - *parameterScaleLaplace*: Laplace prior on original parameter scale
+
+- ``initializationPriorParameters`` [STRING, OPTIONAL]
+
+  Prior parameters used for sampling of initial points for optimization,
+  separated by a semicolon. Defaults to ``lowerBound;upperBound``.
+
+  So far, only numeric values will be supported, no parameter names.
+  Parameters for the different prior types are:
+
+    - uniform: lower bound; upper bound
+    - normal: mean; standard deviation (**not** variance)
+    - laplace: location; scale
+    - logNormal: parameters of corresp. normal distribution (see: normal)
+    - logLaplace: parameters of corresp. Laplace distribution (see: laplace)
+    - parameterScaleUniform: lower bound; upper bound
+    - parameterScaleNormal: mean; standard deviation (**not** variance)
+    - parameterScaleLaplace: location; scale
+
+- ``objectivePriorType`` [STRING, OPTIONAL]
+
+  Prior types used for the objective function during optimization or sampling.
+  For possible values, see ``initializationPriorType``.
+
+- ``objectivePriorParameters`` [STRING, OPTIONAL]
+
+  Prior parameters used for the objective function during optimization.
+  For more detailed documentation, see ``initializationPriorParameters``.
+
+
+Visualization table
+-------------------
+
+A tab-separated value file containing the specification of the visualization
+routines which come with the PEtab repository. Plots are in general
+collections of different datasets as specified using their ``datasetId`` (if
+provided) inside the measurement table.
+
+Expected to have the following columns in any (but preferably this)
+order:
+
++--------+------------+-------------------------------------------+------------------------------------------------------+
+| plotId | [plotName] | [plotTypeSimulation]                      | [plotTypeData]                                       |
++========+============+===========================================+======================================================+
+| STRING | [STRING]   | [LinePlot(default)\|BarPlot\|ScatterPlot] | [MeanAndSD(default)\|MeanAndSEM\|replicate;provided] |
++--------+------------+-------------------------------------------+------------------------------------------------------+
+| ...    | ...        | ...                                       | ...                                                  |
++--------+------------+-------------------------------------------+------------------------------------------------------+
+
+*(wrapped for readability)*
+
++-----+-------------+-------------------------------------+-----------+----------+--------------------------+
+| ... | [datasetId] | [xValues]                           | [xOffset] | [xLabel] | [xScale]                 |
++=====+=============+=====================================+===========+==========+==========================+
+| ... | [datasetId] | [time(default)\|parameterOrStateId] | [NUMERIC] | [STRING] | [lin\|log\|log10\|order] |
++-----+-------------+-------------------------------------+-----------+----------+--------------------------+
+| ... | ...         | ...                                 | ...       | ...      | ...                      |
++-----+-------------+-------------------------------------+-----------+----------+--------------------------+
+
+*(wrapped for readability)*
+
++-----+----------------+-----------+----------+-------------------+---------------+
+| ... | [yValues]      | [yOffset] | [yLabel] | [yScale]          | [legendEntry] |
++=====+================+===========+==========+===================+===============+
+| ... | [observableId] | [NUMERIC] | [STRING] | [lin\|log\|log10] | [STRING]      |
++-----+----------------+-----------+----------+-------------------+---------------+
+| ... | ...            | ...       | ...      | ...               | ...           |
++-----+----------------+-----------+----------+-------------------+---------------+
+
+
+Detailed field description
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+- ``plotId`` [STRING, NOT NULL]
+
+  An ID which corresponds to a specific plot. All datasets with the same
+  plotId will be plotted into the same axes object.
+
+- ``plotName`` [STRING, OPTIONAL]
+
+  A name for the specific plot.
+
+- ``plotTypeSimulation`` [STRING, OPTIONAL]
+
+  The type of the corresponding plot, can be ``LinePlot``, ``BarPlot`` and ``ScatterPlot``. Default is ``LinePlot``.
+
+- ``plotTypeData`` [STRING, OPTIONAL]
+
+  The type how replicates should be handled, can be ``MeanAndSD``,
+  ``MeanAndSEM``, ``replicate`` (for plotting all replicates separately), or
+  ``provided`` (if numeric values for the noise level are provided in the
+  measurement table). Default is ``MeanAndSD``.
+
+- ``datasetId`` [STRING, NOT NULL, REFERENCES(measurementTable.datasetId), OPTIONAL]
+
+  The datasets which should be grouped into one plot.
+
+- ``xValues`` [STRING, OPTIONAL]
+
+  The independent variable, which will be plotted on the x-axis. Can be
+  ``time`` (default, for time resolved data), or it can be ``parameterOrStateId``
+  for dose-response plots. The corresponding numeric values will be shown on
+  the x-axis.
+
+- ``xOffset`` [NUMERIC, OPTIONAL]
+
+  Possible data-offsets for the independent variable (default is ``0``).
+
+- ``xLabel`` [STRING, OPTIONAL]
+
+  Label for the x-axis. Defaults to the entry in ``xValues``.
+
+- ``xScale`` [STRING, OPTIONAL]
+
+  Scale of the independent variable, can be ``lin``, ``log``, ``log10`` or ``order``.
+  The ``order`` value should be used if values of the independent variable are
+  ordinal. This value can only be used in combination with ``LinePlot`` value for
+  the ``plotTypeSimulation`` column. In this case, points on x axis will be
+  placed equidistantly from each other. Default is ``lin``.
+
+- ``yValues`` [observableId, REFERENCES(measurementTable.observableId), OPTIONAL]
+
+  The observable which should be plotted on the y-axis.
+
+- ``yOffset`` [NUMERIC, OPTIONAL]
+
+  Possible data-offsets for the observable (default is ``0``).
+
+- ``yLabel`` [STRING, OPTIONAL]
+
+  Label for the y-axis. Defaults to the entry in ``yValues``.
+
+- ``yScale`` [STRING, OPTIONAL]
+
+  Scale of the observable, can be ``lin``, ``log``, or ``log10``. Default is ``lin``.
+
+- ``legendEntry`` [STRING, OPTIONAL]
+
+  The name that should be displayed for the corresponding dataset in the
+  legend and which defaults to the value in ``datasetId``.
+
+
+Extensions
+~~~~~~~~~~
+
+Additional columns, such as ``Color``, etc. may be specified.
+
+
+Examples
+~~~~~~~~
+
+Examples of the visualization table can be found in the
+`Benchmark model collection <https://github.com/Benchmarking-Initiative/Benchmark-Models-PEtab/>`_, for example in the `Chen_MSB2009 <https://github.com/Benchmarking-Initiative/Benchmark-Models-PEtab/tree/master/Benchmark-Models/Chen_MSB2009>`_
+model.
+
+
+YAML file for grouping files
+----------------------------
+
+To link the SBML model, measurement table, condition table, etc. in an
+unambiguous way, we use a `YAML <https://yaml.org/>`_ file.
+
+This file also allows specifying a PEtab version (as the format is not unlikely
+to change in the future).
+
+Furthermore, this can be used to describe parameter estimation problems
+comprising multiple models (more details below).
+
+The format is described in the schema
+`../petab/petab_schema.yaml <_static/petab_schema.yaml>`_, which allows for
+easy validation.
+
+
+Parameter estimation problems combining multiple models
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Parameter estimation problems can comprise multiple models. For now, PEtab
+allows to specify multiple SBML models with corresponding condition and
+measurement tables, and one joint parameter table. This means that the parameter
+namespace is global. Therefore, parameters with the same ID in different models
+will be considered identical.
diff --git a/doc/index.rst b/doc/index.rst
index cfd82352..c49ad3b2 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -5,7 +5,7 @@
    :maxdepth: 3
    :caption: Data format
 
-   Data format <documentation_data_format.md>
+   Data format <documentation_data_format.rst>
 
 .. toctree::
    :maxdepth: 3

From 68167b60065ba32b4f24d7d0f05515bdf77dbf4a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20Sch=C3=A4lte?=
 <31767307+yannikschaelte@users.noreply.github.com>
Date: Thu, 23 Jul 2020 17:05:21 +0200
Subject: [PATCH 14/14] update changelog, version, fix readme (#453)

---
 CHANGELOG.md     | 26 ++++++++++++++++++++++++++
 README.md        |  2 +-
 petab/version.py |  2 +-
 3 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 502b241d..9c363068 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,32 @@
 
 ## 0.1 series
 
+
+### 0.1.8
+
+Library:
+
+* Use ``core.is_empty`` to check for empty values (#434)
+* Move tests to python 3.8 (#435)
+* Update to libcombine 0.2.6 (#437)
+* Make float parsing from CSV round-trip (#444)
+* Lint: Allow model time in observable formulas (#445)
+* Lint: Detect duplicated observable ids (#446)
+* Fix likelihood calculation with missing values (#451)
+
+Documentation:
+
+* Move format documentation to restructuredtext format (#452)
+* Document all noise distributions and observable scales (#452)
+* Fix documentation for prior distribution (#449)
+
+Visualization:
+
+* Make XValue column non-mandatory (#429)
+* Apply correct condition sorting (#430)
+* Apply correct default x label (#431)
+
+
 ### 0.1.7
 
 Documentation:
diff --git a/README.md b/README.md
index 522f2518..3696f1a7 100644
--- a/README.md
+++ b/README.md
@@ -105,7 +105,7 @@ Legend:
 ## Using PEtab
 
 If you would like to use PEtab yourself, please have a look at 
-[doc/documentation_data_format.md](doc/documentation_data_format.md) or at
+[doc/documentation_data_format.rst](doc/documentation_data_format.rst) or at
 the example models provided in the 
 [benchmark collection](https://github.com/Benchmarking-Initiative/Benchmark-Models-PEtab).
 
diff --git a/petab/version.py b/petab/version.py
index 5e78dc76..3a17f754 100644
--- a/petab/version.py
+++ b/petab/version.py
@@ -1,2 +1,2 @@
 """PEtab library version"""
-__version__ = '0.1.7'
+__version__ = '0.1.8'