From 0f5b073052162ccc253d10202898fa8799c01896 Mon Sep 17 00:00:00 2001 From: ErikaDudkin <49193148+erikadudki@users.noreply.github.com> Date: Fri, 24 Apr 2020 07:48:50 -0700 Subject: [PATCH 01/14] Visualization column XValue not mandatory anymore (#429) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * xValues default is time or DR if only one timepoint * separate plots by observables, plotIds Co-Authored-By: Yannik Schälte <31767307+yannikschaelte@users.noreply.github.com> * Update petab/visualize/helper_functions.py Co-Authored-By: Yannik Schälte <31767307+yannikschaelte@users.noreply.github.com> * incorporated pr comments Co-authored-by: Yannik Schälte <31767307+yannikschaelte@users.noreply.github.com> --- petab/visualize/helper_functions.py | 66 ++++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 6 deletions(-) diff --git a/petab/visualize/helper_functions.py b/petab/visualize/helper_functions.py index 79a29e96..77d8e728 100644 --- a/petab/visualize/helper_functions.py +++ b/petab/visualize/helper_functions.py @@ -370,7 +370,7 @@ def get_default_vis_specs( dataset_label_column = dataset_id_column # get number of plots and create plotId-lists - plot_id_list = ['plot%s' % str(ind + 1) for ind, inner_list in enumerate( + plot_id_list = [f'plot{ind+1}' for ind, inner_list in enumerate( dataset_id_list) for _ in inner_list] # create dataframe @@ -442,8 +442,21 @@ def get_vis_spec_dependent_columns_dict( yvalues_column = ['']*len(dataset_id_column) # get number of plots and create plotId-lists - plot_id_column = ['plot%s' % str(ind + 1) for ind, inner_list in enumerate( - dataset_id_list) for _ in inner_list] + if group_by == 'observable': + obs_uni = list(np.unique(exp_data[OBSERVABLE_ID])) + # copy of dataset ids, for later replacing with plot ids + plot_id_column = dataset_id_column.copy() + for i_obs in range(0, len(obs_uni)): + # get dataset_ids which include observable name + matching = [s for s in dataset_id_column if obs_uni[i_obs] in s] + # replace the dataset ids with plot id with grouping of observables + for m_i in matching: + plot_id_column = [sub.replace(m_i, 'plot%s' % str(i_obs + 1)) + for sub in plot_id_column] + else: + # get number of plots and create plotId-lists + plot_id_column = ['plot%s' % str(ind + 1) for ind, inner_list in + enumerate(dataset_id_list) for _ in inner_list] columns_dict = {PLOT_ID: plot_id_column, DATASET_ID: dataset_id_column, @@ -473,6 +486,23 @@ def expand_vis_spec_settings(vis_spec, columns_dict): column_entries.append( vis_spec[select_conditions].loc[:, column].values[0]) else: + # get unique plotIDs from visspecfile + vis_plotid_u = vis_spec[PLOT_ID].unique() + auto_plotid_u = list(set(columns_dict[PLOT_ID])) + # if number of plotIds does not coincide (autmatically + # generated plotIds according to observable grouping, vs + # plotIds specified in the visu_Spec) + if len(vis_plotid_u) is not len(auto_plotid_u): + # which items are not in visu_plotId: + del_plotid = \ + list(set(columns_dict[PLOT_ID]) - set(vis_plotid_u)) + # replace automatically generated plotIds with 'plot1' from + # visu file + for d_i in del_plotid: + columns_dict[PLOT_ID] = [ + sub.replace(d_i, vis_plotid_u[0]) + for sub in columns_dict[PLOT_ID]] + for plot_id in columns_dict[PLOT_ID]: select_conditions = vis_spec[PLOT_ID] == plot_id column_entries.append( @@ -545,11 +575,13 @@ def create_or_update_vis_spec( vis_spec[PLOT_TYPE_DATA] = plotted_noise # check columns, and add non-mandatory default columns - vis_spec = check_ex_visu_columns(vis_spec) + vis_spec = check_ex_visu_columns(vis_spec, exp_data, exp_conditions) return exp_data, vis_spec -def check_ex_visu_columns(vis_spec: pd.DataFrame) -> pd.DataFrame: +def check_ex_visu_columns(vis_spec: pd.DataFrame, + exp_data: pd.DataFrame, + exp_conditions: pd.DataFrame) -> pd.DataFrame: """ Check the columns in Visu_Spec file, if non-mandotory columns does not exist, create default columns @@ -564,7 +596,29 @@ def check_ex_visu_columns(vis_spec: pd.DataFrame) -> pd.DataFrame: if PLOT_TYPE_DATA not in vis_spec.columns: vis_spec[PLOT_TYPE_DATA] = MEAN_AND_SD if X_VALUES not in vis_spec.columns: - vis_spec[X_VALUES] = 'time' + # check if time is constant in expdata (if yes, plot dose response) + # otherwise plot time series + uni_time = pd.unique(exp_data[TIME]) + if len(uni_time) > 1: + vis_spec[X_VALUES] = 'time' + elif len(uni_time) == 1: + if np.isin(exp_conditions.columns.values, 'conditionName').any(): + conds = exp_conditions.columns.drop('conditionName') + else: + conds = exp_conditions.columns + # default: first dose-response condition (first from condition + # table) is plotted + # TODO: expand to automatic plotting of all conditions + vis_spec[X_VALUES] = conds[0] + vis_spec[X_LABEL] = conds[0] + warnings.warn( + '\n First dose-response condition is plotted. \n Check which ' + 'condition you want to plot \n and possibly enter it into the ' + 'column *xValues* \n in the visualization table.') + else: + raise NotImplementedError( + 'Strange Error. There is no time defined in the measurement ' + 'table?') if X_OFFSET not in vis_spec.columns: vis_spec[X_OFFSET] = 0 if X_LABEL not in vis_spec.columns: From b050558bfa74e4d43714f8badfa6bea572da7a96 Mon Sep 17 00:00:00 2001 From: LeonardSchmiester Date: Fri, 1 May 2020 10:33:27 +0200 Subject: [PATCH 02/14] Use core.is_empty instead of np.isnan. Closes #433 (#434) --- petab/parameter_mapping.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/petab/parameter_mapping.py b/petab/parameter_mapping.py index 6950a56e..91d6b87c 100644 --- a/petab/parameter_mapping.py +++ b/petab/parameter_mapping.py @@ -520,12 +520,12 @@ def merge_preeq_and_sim_pars_condition( par_preeq = condition_map_preeq[par_id] par_sim = condition_map_sim[par_id] if par_preeq != par_sim \ - and not (np.isnan(par_sim) and np.isnan(par_preeq)): + and not (core.is_empty(par_sim) and core.is_empty(par_preeq)): # both identical or both nan is okay - if np.isnan(par_sim): + if core.is_empty(par_sim): # unmapped for simulation condition_map_sim[par_id] = par_preeq - elif np.isnan(par_preeq): + elif core.is_empty(par_preeq): # unmapped for preeq is okay pass else: @@ -540,10 +540,10 @@ def merge_preeq_and_sim_pars_condition( if scale_preeq != scale_sim: # both identical is okay - if np.isnan(par_sim): + if core.is_empty(par_sim): # unmapped for simulation condition_scale_map_sim[par_id] = scale_preeq - elif np.isnan(par_preeq): + elif core.is_empty(par_preeq): # unmapped for preeq is okay pass else: From a8d5927beefaebe33e401b085c6076838d5cfc0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20Sch=C3=A4lte?= <31767307+yannikschaelte@users.noreply.github.com> Date: Mon, 11 May 2020 10:01:28 +0200 Subject: [PATCH 03/14] Update tests (#435) * use codecov action * tidy up * try multiple python versions * restrict libcombine version * fix version inc --- .github/workflows/ci_tests.yml | 18 +++++++++++++----- setup.py | 2 +- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci_tests.yml b/.github/workflows/ci_tests.yml index 0a5a804d..b253eb7c 100644 --- a/.github/workflows/ci_tests.yml +++ b/.github/workflows/ci_tests.yml @@ -1,4 +1,4 @@ -name: CI tests +name: CI # trigger on: [push] @@ -8,30 +8,38 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.6] + python-version: [3.8] steps: - name: Check out repository uses: actions/checkout@v2 + - name: Prepare python ${{ matrix.python-version }} uses: actions/setup-python@v1 with: python-version: ${{ matrix.python-version }} - - name: Cache pip + + - name: Cache uses: actions/cache@v1 with: path: ~/.cache/pip key: ${{ runner.os }}-${{ hashFiles('**/.ci_pip_reqs.txt') }}-${{ hashFiles('**/setup.py') }} restore-keys: | ${{ runner.os }}- + - name: Install dependencies run: | python -m pip install --upgrade pip pip install -r .ci_pip_reqs.txt pip install -e .[reports,combine] + - name: Run tests run: | - pytest --cov + pytest --cov --cov-report=xml tests python -m flake8 --exclude=build,doc,example,tmp --extend-ignore=F403,F405 + - name: Coverage - run: codecov -t ${{ secrets.CODECOV_TOKEN }} + uses: codecov/codecov-action@v1 + with: + token: ${{ secrets.CODECOV_TOKEN }} + file: ./coverage.xml diff --git a/setup.py b/setup.py index 6138785a..04f02764 100644 --- a/setup.py +++ b/setup.py @@ -65,5 +65,5 @@ def absolute_links(txt): python_requires='>=3.6', entry_points=ENTRY_POINTS, extras_require={'reports': ['Jinja2'], - 'combine': ['python-libcombine']}, + 'combine': ['python-libcombine==0.2.3_1']}, ) From fb4534867e60d5b82803bd5c9963744cda2b0eb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20Sch=C3=A4lte?= <31767307+yannikschaelte@users.noreply.github.com> Date: Mon, 11 May 2020 15:33:13 +0200 Subject: [PATCH 04/14] update libcombine version (#437) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 04f02764..123dbf2d 100644 --- a/setup.py +++ b/setup.py @@ -65,5 +65,5 @@ def absolute_links(txt): python_requires='>=3.6', entry_points=ENTRY_POINTS, extras_require={'reports': ['Jinja2'], - 'combine': ['python-libcombine==0.2.3_1']}, + 'combine': ['python-libcombine>=0.2.6']}, ) From 49cfc71255f6694332579623647c1c0605b82b93 Mon Sep 17 00:00:00 2001 From: ErikaDudkin <49193148+erikadudki@users.noreply.github.com> Date: Tue, 12 May 2020 07:14:37 -0700 Subject: [PATCH 05/14] add sorting of indices of dataframes for the correct sorting of x-values (#430) * add sorting of indices of dataframes for the correct sorting of x-values * added sorting also for the time case, then conditions is a numpy array and has to be handled differently * Update petab/visualize/plotting_config.py --- petab/visualize/plotting_config.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/petab/visualize/plotting_config.py b/petab/visualize/plotting_config.py index fc5e0558..25042b0b 100644 --- a/petab/visualize/plotting_config.py +++ b/petab/visualize/plotting_config.py @@ -97,6 +97,18 @@ def plot_lowlevel(plot_spec: pd.Series, # construct errorbar-plots: noise specified above else: + # sort index for the case that indices of conditions and + # measurements differ if indep_var='time', conditions is a numpy + # array, for indep_var=observable its a Series + if isinstance(conditions, np.ndarray): + conditions.sort() + elif isinstance(conditions, pd.core.series.Series): + conditions.sort_index(inplace=True) + else: + raise ValueError('Strange: conditions object is neither numpy' + ' nor series...') + ms.sort_index(inplace=True) + # sorts according to ascending order of conditions scond, smean, snoise = \ zip(*sorted(zip(conditions, ms['mean'], ms[noise_col]))) p = ax.errorbar( From c34e7a9df599db2a21546351dd2e16361ac59f98 Mon Sep 17 00:00:00 2001 From: Jakob Vanhoefer <38346459+jvanhoefer@users.noreply.github.com> Date: Sat, 30 May 2020 15:49:18 +0200 Subject: [PATCH 06/14] fix broken parameter table Fujita (#440) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Yannik Schälte <31767307+yannikschaelte@users.noreply.github.com> --- doc/example/example_Fujita/Fujita_parameters_scaling.tsv | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/example/example_Fujita/Fujita_parameters_scaling.tsv b/doc/example/example_Fujita/Fujita_parameters_scaling.tsv index 180651cd..2254fbf8 100644 --- a/doc/example/example_Fujita/Fujita_parameters_scaling.tsv +++ b/doc/example/example_Fujita/Fujita_parameters_scaling.tsv @@ -18,4 +18,3 @@ reaction_9_k1 reaction_{9,k1} log10 1E-08 100000000 0.028510798479438 1 scaling_pAkt_tot scaling_{pAkt}_tot log10 1E-08 100000000 41.377103160384 1 scaling_pEGFR_tot scaling_{pEGFR}_tot log10 1E-08 100000000 5.64785460492811E-08 1 scaling_pS6_tot scaling_{pS6}_tot log10 1E-08 100000000 78521.9513232784 1 ->>>>>>> develop From dc253fee4314ba4d208420d4b4ea7b596498cb91 Mon Sep 17 00:00:00 2001 From: lcontento Date: Tue, 9 Jun 2020 17:59:27 +0200 Subject: [PATCH 07/14] Make float parsing from CSV round-trip (#444) * Make float parsing from CSV round-trip Change the float parsing behaviour of `pandas.read_csv` to be the same as in Python (e.g., in Python `float(0.999) == 0.999`). * Fix flake8 warnings failing CI --- petab/__init__.py | 30 +++++++++++++++--------------- petab/conditions.py | 3 ++- petab/core.py | 8 +++++--- petab/measurements.py | 3 ++- petab/observables.py | 3 ++- petab/parameters.py | 6 ++++-- petab/sbml.py | 2 +- 7 files changed, 31 insertions(+), 24 deletions(-) diff --git a/petab/__init__.py b/petab/__init__.py index 2f9d1c99..4246e68e 100644 --- a/petab/__init__.py +++ b/petab/__init__.py @@ -9,18 +9,18 @@ ENV_NUM_THREADS = "PETAB_NUM_THREADS" -from .calculate import * # noqa: F403, F401 -from .composite_problem import * # noqa: F403, F401 -from .conditions import * # noqa: F403, F401 -from .core import * # noqa: F403, F401 -from .lint import * # noqa: F403, F401 -from .measurements import * # noqa: F403, F401 -from .observables import * # noqa: F403, F401 -from .parameter_mapping import * # noqa: F403, F401 -from .parameters import * # noqa: F403, F401 -from .problem import * # noqa: F403, F401 -from .sampling import * # noqa: F403, F401 -from .sbml import * # noqa: F403, F401 -from .yaml import * # noqa: F403, F401 -from .version import __version__ # noqa: F401 -from .format_version import __format_version__ # noqa: F401 +from .calculate import * # noqa: F403, F401, E402 +from .composite_problem import * # noqa: F403, F401, E402 +from .conditions import * # noqa: F403, F401, E402 +from .core import * # noqa: F403, F401, E402 +from .lint import * # noqa: F403, F401, E402 +from .measurements import * # noqa: F403, F401, E402 +from .observables import * # noqa: F403, F401, E402 +from .parameter_mapping import * # noqa: F403, F401, E402 +from .parameters import * # noqa: F403, F401, E402 +from .problem import * # noqa: F403, F401, E402 +from .sampling import * # noqa: F403, F401, E402 +from .sbml import * # noqa: F403, F401, E402 +from .yaml import * # noqa: F403, F401, E402 +from .version import __version__ # noqa: F401, E402 +from .format_version import __format_version__ # noqa: F401, E402 diff --git a/petab/conditions.py b/petab/conditions.py index 43a15a76..ffaca423 100644 --- a/petab/conditions.py +++ b/petab/conditions.py @@ -23,7 +23,8 @@ def get_condition_df( return condition_file if isinstance(condition_file, str): - condition_file = pd.read_csv(condition_file, sep='\t') + condition_file = pd.read_csv(condition_file, sep='\t', + float_precision='round_trip') lint.assert_no_leading_trailing_whitespace( condition_file.columns.values, "condition") diff --git a/petab/core.py b/petab/core.py index 695fe281..83e7f818 100644 --- a/petab/core.py +++ b/petab/core.py @@ -23,7 +23,8 @@ def get_simulation_df(simulation_file: str) -> pd.DataFrame: Returns: Simulation DataFrame """ - return pd.read_csv(simulation_file, sep="\t", index_col=None) + return pd.read_csv(simulation_file, sep="\t", index_col=None, + float_precision='round_trip') def write_simulation_df(df: pd.DataFrame, filename: str) -> None: @@ -47,7 +48,8 @@ def get_visualization_df(visualization_file: str) -> pd.DataFrame: Visualization DataFrame """ try: - vis_spec = pd.read_csv(visualization_file, sep="\t", index_col=None) + vis_spec = pd.read_csv(visualization_file, sep="\t", index_col=None, + float_precision='round_trip') except pd.errors.EmptyDataError: warn("Visualization table is empty. Defaults will be used. " "Refer to the documentation for details.") @@ -370,7 +372,7 @@ def _add_file_metadata(location: str, description: str = ""): ) _add_file_metadata( location=parameter_subset_file, - description=f"PEtab parameter file" + description="PEtab parameter file" ) for problem in yaml_config[PROBLEMS]: diff --git a/petab/measurements.py b/petab/measurements.py index 154c0bbb..8bc0f9a7 100644 --- a/petab/measurements.py +++ b/petab/measurements.py @@ -30,7 +30,8 @@ def get_measurement_df( return measurement_file if isinstance(measurement_file, str): - measurement_file = pd.read_csv(measurement_file, sep='\t') + measurement_file = pd.read_csv(measurement_file, sep='\t', + float_precision='round_trip') lint.assert_no_leading_trailing_whitespace( measurement_file.columns.values, MEASUREMENT) diff --git a/petab/observables.py b/petab/observables.py index 81ab7d47..19975c93 100644 --- a/petab/observables.py +++ b/petab/observables.py @@ -28,7 +28,8 @@ def get_observable_df( return observable_file if isinstance(observable_file, str): - observable_file = pd.read_csv(observable_file, sep='\t') + observable_file = pd.read_csv(observable_file, sep='\t', + float_precision='round_trip') lint.assert_no_leading_trailing_whitespace( observable_file.columns.values, "observable") diff --git a/petab/parameters.py b/petab/parameters.py index f7aaad51..d1213d18 100644 --- a/petab/parameters.py +++ b/petab/parameters.py @@ -33,10 +33,12 @@ def get_parameter_df( parameter_df = parameter_file if isinstance(parameter_file, str): - parameter_df = pd.read_csv(parameter_file, sep='\t') + parameter_df = pd.read_csv(parameter_file, sep='\t', + float_precision='round_trip') if isinstance(parameter_file, list): - parameter_df = pd.concat([pd.read_csv(subset_file, sep='\t') + parameter_df = pd.concat([pd.read_csv(subset_file, sep='\t', + float_precision='round_trip') for subset_file in parameter_file]) # Remove identical parameter definitions parameter_df.drop_duplicates(inplace=True, ignore_index=True) diff --git a/petab/sbml.py b/petab/sbml.py index be21ae93..6ded75c0 100644 --- a/petab/sbml.py +++ b/petab/sbml.py @@ -368,7 +368,7 @@ def get_sigmas(sbml_model: libsbml.Model, remove: bool = False) -> dict: remove=remove ) # set correct observable name - sigmas = {re.sub(f'^sigma_', 'observable_', key): value['formula'] + sigmas = {re.sub('^sigma_', 'observable_', key): value['formula'] for key, value in sigmas.items()} return sigmas From 843edd3a2a76f55bc8771de9d9886d9857b6e806 Mon Sep 17 00:00:00 2001 From: lcontento Date: Wed, 17 Jun 2020 17:39:27 +0200 Subject: [PATCH 08/14] Allow model time in observable formulas (#445) --- doc/documentation_data_format.md | 33 ++++++++++++++++---------------- petab/observables.py | 2 +- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/doc/documentation_data_format.md b/doc/documentation_data_format.md index 576a54e8..9300f868 100644 --- a/doc/documentation_data_format.md +++ b/doc/documentation_data_format.md @@ -79,7 +79,7 @@ This is specified as a tab-separated value file in the following way: |... | ... | ... | ... |...| ...| Row- and column-ordering are arbitrary, although specifying `conditionId` -first may improve human readability. +first may improve human readability. Additional columns are *not* allowed. @@ -102,7 +102,7 @@ Additional columns are *not* allowed. Values for these condition parameters may be provided either as numeric values, or as IDs defined in the SBML model, the parameter table or both. - - `${parameterId}` + - `${parameterId}` The values will override any parameter values specified in the model. @@ -114,7 +114,7 @@ Additional columns are *not* allowed. condition. If `NaN` is provided for a condition, the result of the preequilibration (or initial concentration/amount from the SBML model, if no preequilibration is defined) is used. - + - `${compartmentId}` If a compartment ID is provided, it is interpreted as the initial @@ -141,7 +141,7 @@ order: |... | [parameterId|NUMERIC[;parameterId|NUMERIC][...]] | [parameterId|NUMERIC[;parameterId|NUMERIC][...]] |...|...|...| -Additional (non-standard) columns may be added. If the additional plotting +Additional (non-standard) columns may be added. If the additional plotting functionality of PEtab should be used, such columns could be | ... | [datasetId] | [replicateId] | @@ -149,9 +149,9 @@ functionality of PEtab should be used, such columns could be |... | [datasetId] | [replicateId] | |...|...|...| -where `datasetId` is a necessary column to use particular plotting -functionality, and `replicateId` is optional, which can be used to group -replicates and plot error bars. +where `datasetId` is a necessary column to use particular plotting +functionality, and `replicateId` is optional, which can be used to group +replicates and plot error bars. ### Detailed field description @@ -244,7 +244,7 @@ The observable table has the following columns: | observableId | [observableName] | observableFormula | [observableTransformation] | noiseFormula | [noiseDistribution] | | --- | --- | --- | --- | --- | --- | | STRING | [STRING] | STRING | [lin(default)|log|log10] | STRING|NUMBER | [laplace|normal] | -| e.g. | | | | | | +| e.g. | | | | | | | relativeTotalProtein1 | Relative abundance of Protein1 | observableParameter1_relativeTotalProtein1 * (protein1 + phospho_protein1 ) | lin | noiseParameter1_relativeTotalProtein1 | normal | | ... | ... | ... | ... | ... | @@ -264,8 +264,9 @@ The observable table has the following columns: * `observableFormula` [STRING] Observation function as plain text formula expression. - May contain any symbol defined in the SBML model or parameter table. In the - simplest case just an SBML species ID or an `AssignmentRule` target. + May contain any symbol defined in the SBML model (including model time `time`) + or parameter table. In the simplest case just an SBML species ID + or an `AssignmentRule` target. May introduce new parameters of the form `observableParameter${n}_${observableId}`, which are overridden by `observableParameters` in the measurement table @@ -306,7 +307,7 @@ The observable table has the following columns: - `noiseDistribution` [STRING: 'normal' or 'laplace', OPTIONAL] Assumed noise distribution for the given measurement. Only normally or - Laplace distributed noise is currently allowed (log-normal and + Laplace distributed noise is currently allowed (log-normal and log-laplace are obtained by setting `observableTransformation` to `log`). Defaults to `normal`. If `normal`, the specified `noiseParameters` will be interpreted as standard deviation (*not* variance). @@ -407,9 +408,9 @@ Additional columns may be added. Prior parameters used for sampling of initial points for optimization, separated by a semicolon. Defaults to `lowerBound;upperBound`. - So far, only numeric values will be supported, no parameter names. + So far, only numeric values will be supported, no parameter names. Parameters for the different prior types are: - + - uniform: lower bound; upper bound - normal: mean; standard deviation (**not** variance) - laplace: location; scale @@ -433,8 +434,8 @@ Additional columns may be added. ## Visualization table A tab-separated value file containing the specification of the visualization -routines which come with the PEtab repository. Plots are in general -collections of different datasets as specified using their `datasetId` (if +routines which come with the PEtab repository. Plots are in general +collections of different datasets as specified using their `datasetId` (if provided) inside the measurement table. Expected to have the following columns in any (but preferably this) @@ -489,7 +490,7 @@ order: - `xValues` [STRING, OPTIONAL] - The independent variable, which will be plotted on the x-axis. Can be + The independent variable, which will be plotted on the x-axis. Can be `time` (default, for time resolved data), or it can be `parameterOrStateId` for dose-response plots. The corresponding numeric values will be shown on the x-axis. diff --git a/petab/observables.py b/petab/observables.py index 19975c93..7187397c 100644 --- a/petab/observables.py +++ b/petab/observables.py @@ -81,7 +81,7 @@ def get_output_parameters(observable_df: pd.DataFrame, key=lambda symbol: symbol.name) for free_sym in free_syms: sym = str(free_sym) - if sbml_model.getElementBySId(sym) is None: + if sbml_model.getElementBySId(sym) is None and sym != 'time': output_parameters[sym] = None return list(output_parameters.keys()) From 9cbf54eecaa4243f3f18acc1798dc74856b8f6a1 Mon Sep 17 00:00:00 2001 From: lcontento Date: Wed, 1 Jul 2020 10:36:52 +0200 Subject: [PATCH 09/14] Lint: detect duplicated observable IDs (#446) * Lint: detect duplicated observable IDs * Lint: duplicated IDs: better naming and messages --- petab/lint.py | 37 +++++++++++++++++++++++++++++++++---- tests/test_lint.py | 18 ++++++++++++++++++ 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/petab/lint.py b/petab/lint.py index 39effc44..e51f68cc 100644 --- a/petab/lint.py +++ b/petab/lint.py @@ -5,6 +5,7 @@ import numbers import re from typing import Optional, Iterable +from collections import Counter import libsbml import numpy as np @@ -133,6 +134,9 @@ def check_measurement_df(df: pd.DataFrame, if OBSERVABLE_TRANSFORMATION in observable_df: # Check for positivity of measurements in case of # log-transformation + assert_unique_observable_ids(observable_df) + # If the above is not checked, in the following loop + # trafo may become a pandas Series for measurement, obs_id in zip(df[MEASUREMENT], df[OBSERVABLE_ID]): trafo = observable_df.loc[obs_id, OBSERVABLE_TRANSFORMATION] if measurement <= 0.0 and trafo in [LOG, LOG10]: @@ -202,7 +206,7 @@ def check_parameter_df( assert_parameter_scale_is_valid(df) assert_parameter_bounds_are_numeric(df) assert_parameter_estimate_is_boolean(df) - assert_parameter_id_is_unique(df) + assert_unique_parameter_ids(df) check_parameter_bounds(df) assert_parameter_prior_type_is_valid(df) @@ -238,6 +242,7 @@ def check_observable_df(observable_df: pd.DataFrame) -> None: observable_df[column_name].values, column_name) assert_noise_distributions_valid(observable_df) + assert_unique_observable_ids(observable_df) # Check that formulas are parsable for row in observable_df.itertuples(): @@ -359,7 +364,7 @@ def assert_parameter_id_is_string(parameter_df: pd.DataFrame) -> None: raise AssertionError(f"Empty {PARAMETER_ID} found.") -def assert_parameter_id_is_unique(parameter_df: pd.DataFrame) -> None: +def assert_unique_parameter_ids(parameter_df: pd.DataFrame) -> None: """ Check if the parameterId column of the parameter table is unique. @@ -369,9 +374,11 @@ def assert_parameter_id_is_unique(parameter_df: pd.DataFrame) -> None: Raises: AssertionError: in case of problems """ - if len(parameter_df.index) != len(set(parameter_df.index)): + non_unique_ids = get_non_unique(parameter_df.index) + if len(non_unique_ids) > 0: raise AssertionError( - f"{PARAMETER_ID} column in parameter table is not unique.") + f"Non-unique values found in the {PARAMETER_ID} column" + " of the parameter table: " + str(non_unique_ids)) def assert_parameter_scale_is_valid(parameter_df: pd.DataFrame) -> None: @@ -621,6 +628,28 @@ def assert_noise_distributions_valid(observable_df: pd.DataFrame) -> None: f"table: {distr}.") +def assert_unique_observable_ids(observable_df: pd.DataFrame) -> None: + """ + Check if the observableId column of the observable table is unique. + + Arguments: + observable_df: PEtab observable DataFrame + + Raises: + AssertionError: in case of problems + """ + non_unique_ids = get_non_unique(observable_df.index) + if len(non_unique_ids) > 0: + raise AssertionError( + f"Non-unique values found in the {OBSERVABLE_ID} column" + " of the observable table: " + str(non_unique_ids)) + + +def get_non_unique(values): + counter = Counter(values) + return [value for (value, count) in counter.items() if count > 1] + + def lint_problem(problem: 'petab.Problem') -> bool: """Run PEtab validation on problem diff --git a/tests/test_lint.py b/tests/test_lint.py index 6bb03712..0e2031b2 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -417,3 +417,21 @@ def test_check_parameter_df(): del parameter_df[NOMINAL_VALUE] with pytest.raises(AssertionError): lint.check_parameter_df(df=parameter_df) + + +def test_check_observable_df(): + """Check that we correctly detect errors in observable table""" + + observable_df = pd.DataFrame(data={ + OBSERVABLE_ID: ['obs1', 'obs2'], + OBSERVABLE_FORMULA: ['x1', 'x2'], + NOISE_FORMULA: ['sigma1', 'sigma2'] + }).set_index(OBSERVABLE_ID) + + lint.check_observable_df(observable_df) + + # Check that duplicated observables ids are detected + bad_observable_df = observable_df.copy() + bad_observable_df.index = ['obs1', 'obs1'] + with pytest.raises(AssertionError): + lint.check_observable_df(bad_observable_df) From 4b031e82127dc687895402318a78c08f3ec0209b Mon Sep 17 00:00:00 2001 From: Polina Lakrisenko Date: Thu, 2 Jul 2020 15:34:32 +0200 Subject: [PATCH 10/14] default value for the column x_label in vis_spec (#431) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Yannik Schälte <31767307+yannikschaelte@users.noreply.github.com> Co-authored-by: Daniel Weindl --- petab/visualize/helper_functions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/petab/visualize/helper_functions.py b/petab/visualize/helper_functions.py index 77d8e728..be57c4cf 100644 --- a/petab/visualize/helper_functions.py +++ b/petab/visualize/helper_functions.py @@ -623,6 +623,7 @@ def check_ex_visu_columns(vis_spec: pd.DataFrame, vis_spec[X_OFFSET] = 0 if X_LABEL not in vis_spec.columns: vis_spec[X_LABEL] = 'time' + vis_spec.loc[vis_spec[X_VALUES] != 'time', X_LABEL] = 'condition' if X_SCALE not in vis_spec.columns: vis_spec[X_SCALE] = LIN if Y_VALUES not in vis_spec.columns: From a5dd618fad688e0e49fbe53b31a272477718d97d Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Sun, 19 Jul 2020 21:42:13 +0200 Subject: [PATCH 11/14] Fix calculate_llh and calculate_llh don't try to compare missing columns. Closes #450. --- petab/calculate.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/petab/calculate.py b/petab/calculate.py index 6e74b186..e9193bf6 100644 --- a/petab/calculate.py +++ b/petab/calculate.py @@ -82,6 +82,7 @@ def calculate_residuals_for_table( compared_cols = set(MEASUREMENT_DF_COLS) compared_cols -= {MEASUREMENT} compared_cols &= set(measurement_df.columns) + compared_cols &= set(simulation_df.columns) # compute noise formulas for observables noise_formulas = get_symbolic_noise_formulas(observable_df) @@ -290,6 +291,7 @@ def calculate_llh_for_table( compared_cols = set(MEASUREMENT_DF_COLS) compared_cols -= {MEASUREMENT} compared_cols &= set(measurement_df.columns) + compared_cols &= set(simulation_df.columns) # compute noise formulas for observables noise_formulas = get_symbolic_noise_formulas(observable_df) From 12bcd8a87e0f1cb392d9b9228432abfaed1197a1 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Tue, 21 Jul 2020 10:58:43 +0200 Subject: [PATCH 12/14] Fix documentation for prior distribution (Closes #448) (#449) --- doc/documentation_data_format.md | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/doc/documentation_data_format.md b/doc/documentation_data_format.md index 9300f868..88edfc4f 100644 --- a/doc/documentation_data_format.md +++ b/doc/documentation_data_format.md @@ -334,10 +334,17 @@ it *may* include: One row per parameter with arbitrary order of rows and columns: -| parameterId | [parameterName] | parameterScale | lowerBound |upperBound | nominalValue | estimate | [priorType] | [priorParameters] | -|---|---|---|---|---|---|---|---|---| -|STRING|[STRING]|log10|lin|log|NUMERIC|NUMERIC|NUMERIC|0|1|*see below*|*see below* -|...|...|...|...|...|...|...|...|...| +| parameterId | [parameterName] | parameterScale | lowerBound |upperBound | nominalValue | estimate | ... | +|---|---|---|---|---|---|---|---| +|STRING|[STRING]|log10|lin|log|NUMERIC|NUMERIC|NUMERIC|0|1| ... | +|...|...|...|...|...|...|...|...| + +*(wrapped for readability)* + +| ... | \[initializationPriorType\] | \[initializationPriorParameters\] | \[objectivePriorType\] | \[objectivePriorParameters\] | +|---|---|---|---|---| +| ... | *see below* | *see below* | *see below* | *see below* | +|...|...|...|...|...| Additional columns may be added. From e5786616ede0d7e1296ecdfb18750276643f537b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20Sch=C3=A4lte?= <31767307+yannikschaelte@users.noreply.github.com> Date: Thu, 23 Jul 2020 13:12:54 +0200 Subject: [PATCH 13/14] Write format specification in rst + add noise model description (#452) * cont * cont * cont * add noise formulas * fix formating * fixup --- doc/documentation_data_format.md | 577 ------------------------- doc/documentation_data_format.rst | 689 ++++++++++++++++++++++++++++++ doc/index.rst | 2 +- 3 files changed, 690 insertions(+), 578 deletions(-) delete mode 100644 doc/documentation_data_format.md create mode 100644 doc/documentation_data_format.rst diff --git a/doc/documentation_data_format.md b/doc/documentation_data_format.md deleted file mode 100644 index 88edfc4f..00000000 --- a/doc/documentation_data_format.md +++ /dev/null @@ -1,577 +0,0 @@ -# PEtab data format specification - - -## Format version: 1 - -This document explains the PEtab data format. - - -## Purpose - -Providing a standardized way for specifying parameter estimation problems in -systems biology, especially for the case of Ordinary Differential Equation -(ODE) models. - - -## Overview - -The PEtab data format specifies a parameter estimation problem using a number -of text-based files ([Systems Biology Markup Language (SBML)](http://sbml.org) -and -[Tab-Separated Values (TSV)](https://www.iana.org/assignments/media-types/text/tab-separated-values)), -i.e. - -- An SBML model [SBML] - -- A measurement file to fit the model to [TSV] - -- A condition file specifying model inputs and condition-specific parameters - [TSV] - -- An observable file specifying the observation model [TSV] - -- A parameter file specifying optimization parameters and related information - [TSV] - -- (optional) A simulation file, which has the same format as the measurement - file, but contains model simulations [TSV] - -- (optional) A visualization file, which contains specifications how the data - and/or simulations should be plotted by the visualization routines [TSV] - -![Files constituting a PEtab problem](gfx/petab_files.png) - -The following sections will describe the minimum requirements of those -components in the core standard, which should provide all information for -defining the parameter estimation problem. - -Extensions of this format (e.g. additional columns in the measurement table) -are possible and intended. However, while those columns may provide extra -information for example for plotting, downstream analysis, or for more -efficient parameter estimation, they should not affect the optimization -problem as such. - -**General remarks** -- All model entities, column names and row names are case-sensitive -- All identifiers must consist only of upper and lower case letters, digits and - underscores, and must not start with a digit. -- Fields in "[]" are optional and may be left empty. - - -## SBML model definition - -The model must be specified as valid SBML. There are no further restrictions. - -## Condition table - -The condition table specifies parameters, or initial values of species and -compartments for specific simulation conditions (generally corresponding to -different experimental conditions). - -This is specified as a tab-separated value file in the following way: - -| conditionId | [conditionName] | parameterOrSpeciesOrCompartmentId1 | ... | parameterOrSpeciesOrCompartmentId${n} | -|---|---|---|---|---| -| STRING | [STRING] | NUMERIC|STRING | ... | NUMERIC|STRING | -| e.g. | | | | | -| conditionId1 | [conditionName1] | 0.42 | ...| parameterId| -| conditionId2 | ... | ... | ...| ...| -|... | ... | ... | ... |...| ...| - -Row- and column-ordering are arbitrary, although specifying `conditionId` -first may improve human readability. - -Additional columns are *not* allowed. - -### Detailed field description - -- `conditionId` [STRING, NOT NULL] - - Unique identifier for the simulation/experimental condition, to be referenced - by the measurement table described below. - -- `conditionName` [STRING, OPTIONAL] - - Condition names are arbitrary strings to describe the given condition. - They may be used for reporting or visualization. - -- `${parameterOrSpeciesOrCompartmentId1}` - - Further columns may be global parameter IDs, IDs of species or compartments - as defined in the SBML model. Only one column is allowed per ID. - Values for these condition parameters may be provided either as numeric - values, or as IDs defined in the SBML model, the parameter table or both. - - - `${parameterId}` - - The values will override any parameter values specified in the model. - - - `${speciesId}` - - If a species ID is provided, it is interpreted as the initial - concentration/amount of that species and will override the initial - concentration/amount given in the SBML model or given by a preequilibration - condition. If `NaN` is provided for a condition, the result of the - preequilibration (or initial concentration/amount from the SBML model, if - no preequilibration is defined) is used. - - - `${compartmentId}` - - If a compartment ID is provided, it is interpreted as the initial - compartment size. - - -## Measurement table - -A tab-separated values files containing all measurements to be used for -model training or validation. - -Expected to have the following named columns in any (but preferably this) -order: - -| observableId | [preequilibrationConditionId] | simulationConditionId | measurement | time | -|---|---|---|---|---| -| observableId | [conditionId] | conditionId | NUMERIC | NUMERIC|inf | -|...|...|...|...|...| - -*(wrapped for readability)* - -| ... | [observableParameters] | [noiseParameters] -|---|---|---| -|... | [parameterId|NUMERIC[;parameterId|NUMERIC][...]] | [parameterId|NUMERIC[;parameterId|NUMERIC][...]] -|...|...|...| - -Additional (non-standard) columns may be added. If the additional plotting -functionality of PEtab should be used, such columns could be - -| ... | [datasetId] | [replicateId] | -|---|---|---| -|... | [datasetId] | [replicateId] | -|...|...|...| - -where `datasetId` is a necessary column to use particular plotting -functionality, and `replicateId` is optional, which can be used to group -replicates and plot error bars. - - -### Detailed field description - -- `observableId` [STRING, NOT NULL, REFERENCES(observables.observableID)] - - Observable ID as defined in the observables table described below. - -- `preequilibrationConditionId` [STRING OR NULL, -REFERENCES(conditionsTable.conditionID), OPTIONAL] - - The `conditionId` to be used for preequilibration. E.g. for drug - treatments, the model would be preequilibrated with the no-drug condition. - Empty for no preequilibration. - -- `simulationConditionId` [STRING, NOT NULL, -REFERENCES(conditionsTable.conditionID)] - - `conditionId` as provided in the condition table, specifying the -condition-specific parameters used for simulation. - -- `measurement` [NUMERIC, NOT NULL] - - The measured value in the same units/scale as the model output. - -- `time` [NUMERIC OR STRING, NOT NULL] - - Time point of the measurement in the time unit specified in the SBML model, -numeric value or `inf` (lower-case) for steady-state measurements. - -- `observableParameters` [NUMERIC, STRING OR NULL, OPTIONAL] - - This field allows overriding or introducing condition-specific versions of - output parameters defined in the observation model. The model can define - observables (see below) containing place-holder parameters which can be - replaced by condition-specific dynamic or constant parameters. Placeholder - parameters must be named `observableParameter${n}_${observableId}` - with `n` ranging from 1 (not 0) to the number of placeholders for the given - observable, without gaps. - If the observable specified under `observableId` contains no placeholders, - this field must be empty. If it contains `n > 0` placeholders, this field - must hold `n` semicolon-separated numeric values or parameter names. No - trailing semicolon must be added. - - Different lines for the same `observableId` may specify different - parameters. This may be used to account for condition-specific or - batch-specific parameters. This will translate into an extended optimization - parameter vector. - - All placeholders defined in the observation model must be overwritten here. - If there are no placeholders used, this column may be omitted. - -- `noiseParameters` [NUMERIC, STRING OR NULL, OPTIONAL] - - The measurement standard deviation or `NaN` if the corresponding sigma is a - model parameter. - - Numeric values or parameter names are allowed. Same rules apply as for - `observableParameters` in the previous point. - -- `datasetId` [STRING, OPTIONAL] - - The datasetId is used to group certain measurements to datasets. This is - typically the case for data points which belong to the same observable, - the same simulation and preequilibration condition, the same noise model, - the same observable transformation and the same observable parameters. - This grouping makes it possible to use the plotting routines which are - provided in the PEtab repository. - -- `replicateId` [STRING, OPTIONAL] - - The replicateId can be used to discern replicates with the same - `datasetId`, which is helpful for plotting e.g. error bars. - - -## Observables table - -Parameter estimation requires linking experimental observations to the model -of interest. Therefore, one needs to define observables (model outputs) and -respective noise models, which represent the measurement process. -Since parameter estimation is beyond the scope of SBML, there exists no -standard way to specify observables (model outputs) and respective noise -models. Therefore, in PEtab observables are specified in a separate table -as described in the following. This allows for a clear separation of the -observation model and the underlying dynamic model, which allows, in most -cases, to reuse any existing SBML model without modifications. - -The observable table has the following columns: - -| observableId | [observableName] | observableFormula | [observableTransformation] | noiseFormula | [noiseDistribution] | -| --- | --- | --- | --- | --- | --- | -| STRING | [STRING] | STRING | [lin(default)|log|log10] | STRING|NUMBER | [laplace|normal] | -| e.g. | | | | | | -| relativeTotalProtein1 | Relative abundance of Protein1 | observableParameter1_relativeTotalProtein1 * (protein1 + phospho_protein1 ) | lin | noiseParameter1_relativeTotalProtein1 | normal | -| ... | ... | ... | ... | ... | - - -### Detailed field description: - -* `observableId` [STRING] - - Any identifier which would be a valid identifier in SBML. This is referenced - by the `observableId` column in the measurement table. Must be different from - any existing model entity or parameter introduced elsewhere. - -* [`observableName`] [STRING, OPTIONAL] - - Name of the observable. Only used for output, not for identification. - -* `observableFormula` [STRING] - - Observation function as plain text formula expression. - May contain any symbol defined in the SBML model (including model time `time`) - or parameter table. In the simplest case just an SBML species ID - or an `AssignmentRule` target. - - May introduce new parameters of the form `observableParameter${n}_${observableId}`, - which are overridden by `observableParameters` in the measurement table - (see description there). - -- `observableTransformation` [STRING, OPTIONAL] - - Transformation of the observable and measurement for computing the objective - function. Must be one of `lin`, `log` or `log10`. Defaults to `lin`. - The measurements and model outputs are both assumed to be provided in linear - space. - -* `noiseFormula` [NUMERIC|STRING] - - Measurement noise can be specified as a numerical value which will - default to a Gaussian noise model if not specified differently in - `noiseDistribution` with standard deviation as provided here. In this case, - the same standard deviation is assumed for all measurements for the given - observable. - - Alternatively, some formula expression can be provided to specify - more complex noise models. A noise model which accounts for relative and - absolute contributions could, e.g., be defined as - ``` - noiseParameter1_observable_pErk + noiseParameter2_observable_pErk*pErk - ``` - with `noiseParameter1_observable_pErk` denoting the absolute and - `noiseParameter2_observable_pErk` the relative contribution for the - observable `observable_pErk` corresponding to species `pErk`. - IDs of noise parameters - that need to have different values for different measurements have the - structure: `noiseParameter${indexOfNoiseParameter}_${observableId}` - to facilitate automatic recognition. The specific values or parameters are - assigned in the `noiseParameters` field of the *measurement table* - (see above). Any parameters named `noiseParameter${1..n}_${observableId}` - *must* be overwritten in the measurement table. - -- `noiseDistribution` [STRING: 'normal' or 'laplace', OPTIONAL] - - Assumed noise distribution for the given measurement. Only normally or - Laplace distributed noise is currently allowed (log-normal and - log-laplace are obtained by setting `observableTransformation` to `log`). - Defaults to `normal`. If `normal`, the specified `noiseParameters` will be - interpreted as standard deviation (*not* variance). - - -## Parameter table - -A tab-separated value text file containing information on model parameters. - -This table *must* include the following parameters: -- Named parameter overrides introduced in the *conditions table*, - unless defined in the SBML model -- Named parameter overrides introduced in the *measurement table* - -and *must not* include: -- Placeholder parameters (see `observableParameters` and `noiseParameters` - above) -- Parameters included as column names in the *condition table* -- Parameters that are AssignmentRule targets in the SBML model - -it *may* include: -- Any SBML model parameter that was not excluded above -- Named parameter overrides introduced in the *conditions table* - -One row per parameter with arbitrary order of rows and columns: - -| parameterId | [parameterName] | parameterScale | lowerBound |upperBound | nominalValue | estimate | ... | -|---|---|---|---|---|---|---|---| -|STRING|[STRING]|log10|lin|log|NUMERIC|NUMERIC|NUMERIC|0|1| ... | -|...|...|...|...|...|...|...|...| - -*(wrapped for readability)* - -| ... | \[initializationPriorType\] | \[initializationPriorParameters\] | \[objectivePriorType\] | \[objectivePriorParameters\] | -|---|---|---|---|---| -| ... | *see below* | *see below* | *see below* | *see below* | -|...|...|...|...|...| - -Additional columns may be added. - - -### Detailed field description: - -- `parameterId` [STRING, NOT NULL] - - The `parameterId` of the parameter described in this row. This has to match - the ID of a parameter specified in the SBML model, a parameter introduced - as override in the condition table, or a parameter occurring in the - `observableParameters` or `noiseParameters` column of the measurement table - (see above). - -- `parameterName` [STRING, OPTIONAL] - - Parameter name to be used e.g. for plotting etc. Can be chosen freely. May - or may not coincide with the SBML parameter name. - -- `parameterScale` [lin|log|log10] - - Scale of the parameter to be used during parameter estimation. - -- `lowerBound` [NUMERIC] - - Lower bound of the parameter used for optimization. - Optional, if `estimate==0`. - Must be provided in linear space, independent of `parameterScale`. - -- `upperBound` [NUMERIC] - - Upper bound of the parameter used for optimization. - Optional, if `estimate==0`. - Must be provided in linear space, independent of `parameterScale`. - -- `nominalValue` [NUMERIC] - - Some parameter value to be used if - the parameter is not subject to estimation (see `estimate` below). - Must be provided in linear space, independent of `parameterScale`. - Optional, unless `estimate==0`. - -- `estimate` [BOOL 0|1] - - 1 or 0, depending on, if the parameter is estimated (1) or set to a fixed - value(0) (see `nominalValue`). - -- `initializationPriorType` [STRING, OPTIONAL] - - Prior types used for sampling of initial points for optimization. Sampled - points are clipped to lie inside the parameter boundaries specified by - `lowerBound` and `upperBound`. Defaults to `parameterScaleUniform`. - - Possible prior types are: - - - *uniform*: flat prior on linear parameters - - *normal*: Gaussian prior on linear parameters - - *laplace*: Laplace prior on linear parameters - - *logNormal*: exponentiated Gaussian prior on linear parameters - - *logLaplace*: exponentiated Laplace prior on linear parameters - - *parameterScaleUniform* (default): Flat prior on original parameter - scale (equivalent to "no prior") - - *parameterScaleNormal*: Gaussian prior on original parameter scale - - *parameterScaleLaplace*: Laplace prior on original parameter scale - -- `initializationPriorParameters` [STRING, OPTIONAL] - - Prior parameters used for sampling of initial points for optimization, - separated by a semicolon. Defaults to `lowerBound;upperBound`. - - So far, only numeric values will be supported, no parameter names. - Parameters for the different prior types are: - - - uniform: lower bound; upper bound - - normal: mean; standard deviation (**not** variance) - - laplace: location; scale - - logNormal: parameters of corresp. normal distribution (see: normal) - - logLaplace: parameters of corresp. Laplace distribution (see: laplace) - - parameterScaleUniform: lower bound; upper bound - - parameterScaleNormal: mean; standard deviation (**not** variance) - - parameterScaleLaplace: location; scale - -- `objectivePriorType` [STRING, OPTIONAL] - - Prior types used for the objective function during optimization or sampling. - For possible values, see `initializationPriorType`. - -- `objectivePriorParameters` [STRING, OPTIONAL] - - Prior parameters used for the objective function during optimization. - For more detailed documentation, see `initializationPriorParameters`. - - -## Visualization table - -A tab-separated value file containing the specification of the visualization -routines which come with the PEtab repository. Plots are in general -collections of different datasets as specified using their `datasetId` (if -provided) inside the measurement table. - -Expected to have the following columns in any (but preferably this) -order: - -| plotId | [plotName] | [plotTypeSimulation] | [plotTypeData] | -|---|---|---|---| -| STRING | [STRING] | [LinePlot(default)|BarPlot|ScatterPlot] | [MeanAndSD(default)|MeanAndSEM|replicate;provided] | -|...|...|...|...| - -*(wrapped for readability)* - -| ... | [datasetId] | [xValues] | [xOffset] | [xLabel] | [xScale] | -|---|---|---|---|---|---| -|... | [datasetId] | [time(default)|parameterOrStateId] | [NUMERIC] | [STRING] | [lin|log|log10|order] | -|...|...|...|...|...| - - -*(wrapped for readability)* - -| ... | [yValues] | [yOffset] | [yLabel] | [yScale] | [legendEntry] | -|---|---|---|---|---|---| -|... | [observableId] | [NUMERIC] | [STRING] | [lin|log|log10] | [STRING] | -|...|...|...|...|...|...| - - -### Detailed field description: - -- `plotId` [STRING, NOT NULL] - - An ID which corresponds to a specific plot. All datasets with the same - plotId will be plotted into the same axes object. - -- `plotName` [STRING, OPTIONAL] - - A name for the specific plot. - -- `plotTypeSimulation` [STRING, OPTIONAL] - - The type of the corresponding plot, can be `LinePlot`, `BarPlot` and `ScatterPlot`. Default is `LinePlot`. - -- `plotTypeData` [STRING, OPTIONAL] - - The type how replicates should be handled, can be `MeanAndSD`, - `MeanAndSEM`, `replicate` (for plotting all replicates separately), or - `provided` (if numeric values for the noise level are provided in the - measurement table). Default is `MeanAndSD`. - -- `datasetId` [STRING, NOT NULL, REFERENCES(measurementTable.datasetId), OPTIONAL] - - The datasets which should be grouped into one plot. - -- `xValues` [STRING, OPTIONAL] - - The independent variable, which will be plotted on the x-axis. Can be - `time` (default, for time resolved data), or it can be `parameterOrStateId` - for dose-response plots. The corresponding numeric values will be shown on - the x-axis. - -- `xOffset` [NUMERIC, OPTIONAL] - - Possible data-offsets for the independent variable (default is `0`). - -- `xLabel` [STRING, OPTIONAL] - - Label for the x-axis. Defaults to the entry in `xValues`. - -- `xScale` [STRING, OPTIONAL] - - Scale of the independent variable, can be `lin`, `log`, `log10` or `order`. - The `order` value should be used if values of the independent variable are - ordinal. This value can only be used in combination with `LinePlot` value for - the `plotTypeSimulation` column. In this case, points on x axis will be - placed equidistantly from each other. Default is `lin`. - -- `yValues` [observableId, REFERENCES(measurementTable.observableId), OPTIONAL] - - The observable which should be plotted on the y-axis. - -- `yOffset` [NUMERIC, OPTIONAL] - - Possible data-offsets for the observable (default is `0`). - -- `yLabel` [STRING, OPTIONAL] - - Label for the y-axis. Defaults to the entry in `yValues`. - -- `yScale` [STRING, OPTIONAL] - - Scale of the observable, can be `lin`, `log`, or `log10`. Default is `lin`. - -- `legendEntry` [STRING, OPTIONAL] - - The name that should be displayed for the corresponding dataset in the - legend and which defaults to the value in `datasetId`. - - -### Extensions - -Additional columns, such as `Color`, etc. may be specified. - -### Examples - -Examples of the visualization table can be found in the -[Benchmark model collection](https://github.com/Benchmarking-Initiative/Benchmark-Models-PEtab/). -For example, for -[Chen_MSB2009](https://github.com/Benchmarking-Initiative/Benchmark-Models-PEtab/tree/master/Benchmark-Models/Chen_MSB2009) -model. - -## YAML file for grouping files - -To link the SBML model, measurement table, condition table, etc. in an -unambiguous way, we use a [YAML](https://yaml.org/) file. - -This file also allows specifying a PEtab version (as the format is not unlikely -to change in the future). - -Furthermore, this can be used to describe parameter estimation problems -comprising multiple models (more details below). - -The format is described in the schema -[../petab/petab_schema.yaml](_static/petab_schema.yaml), which allows for -easy validation. - - -### Parameter estimation problems combining multiple models - -Parameter estimation problems can comprise multiple models. For now, PEtab -allows to specify multiple SBML models with corresponding condition and -measurement tables, and one joint parameter table. This means that the parameter -namespace is global. Therefore, parameters with the same ID in different models -will be considered identical. diff --git a/doc/documentation_data_format.rst b/doc/documentation_data_format.rst new file mode 100644 index 00000000..237806dc --- /dev/null +++ b/doc/documentation_data_format.rst @@ -0,0 +1,689 @@ +PEtab data format specification +=============================== + + +Format version: 1 +----------------- + +This document explains the PEtab data format. + + +Purpose +------- + +Providing a standardized way for specifying parameter estimation problems in +systems biology, especially for the case of Ordinary Differential Equation +(ODE) models. + + +Overview +--------- + +The PEtab data format specifies a parameter estimation problem using a number +of text-based files (`Systems Biology Markup Language (SBML) `_ +and +`Tab-Separated Values (TSV) `_), +i.e. + +- An SBML model [SBML] + +- A measurement file to fit the model to [TSV] + +- A condition file specifying model inputs and condition-specific parameters + [TSV] + +- An observable file specifying the observation model [TSV] + +- A parameter file specifying optimization parameters and related information + [TSV] + +- (optional) A simulation file, which has the same format as the measurement + file, but contains model simulations [TSV] + +- (optional) A visualization file, which contains specifications how the data + and/or simulations should be plotted by the visualization routines [TSV] + +.. image:: gfx/petab_files.png + :alt: Files constituting a PEtab problem + +The following sections will describe the minimum requirements of those +components in the core standard, which should provide all information for +defining the parameter estimation problem. + +Extensions of this format (e.g. additional columns in the measurement table) +are possible and intended. However, while those columns may provide extra +information for example for plotting, downstream analysis, or for more +efficient parameter estimation, they should not affect the optimization +problem as such. + +**General remarks** + +- All model entities, column names and row names are case-sensitive +- All identifiers must consist only of upper and lower case letters, digits and + underscores, and must not start with a digit. +- Fields in "[]" are optional and may be left empty. + + +SBML model definition +--------------------- + +The model must be specified as valid SBML. There are no further restrictions. + + +Condition table +--------------- + +The condition table specifies parameters, or initial values of species and +compartments for specific simulation conditions (generally corresponding to +different experimental conditions). + +This is specified as a tab-separated value file in the following way: + ++--------------+------------------+------------------------------------+-----+---------------------------------------+ +| conditionId | [conditionName] | parameterOrSpeciesOrCompartmentId1 | ... | parameterOrSpeciesOrCompartmentId${n} | ++==============+==================+====================================+=====+=======================================+ +| STRING | [STRING] | NUMERIC\|STRING | ... | NUMERIC\|STRING | ++--------------+------------------+------------------------------------+-----+---------------------------------------+ +| e.g. | | | | | ++--------------+------------------+------------------------------------+-----+---------------------------------------+ +| conditionId1 | [conditionName1] | 0.42 | ... | parameterId | ++--------------+------------------+------------------------------------+-----+---------------------------------------+ +| conditionId2 | ... | ... | ... | ... | ++--------------+------------------+------------------------------------+-----+---------------------------------------+ +|... | ... | ... | ... |... | ++--------------+------------------+------------------------------------+-----+---------------------------------------+ + +Row- and column-ordering are arbitrary, although specifying ``conditionId`` +first may improve human readability. + +Additional columns are *not* allowed. + + +Detailed field description +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- ``conditionId`` [STRING, NOT NULL] + + Unique identifier for the simulation/experimental condition, to be referenced + by the measurement table described below. + +- ``conditionName`` [STRING, OPTIONAL] + + Condition names are arbitrary strings to describe the given condition. + They may be used for reporting or visualization. + +- ``${parameterOrSpeciesOrCompartmentId1}`` + + Further columns may be global parameter IDs, IDs of species or compartments + as defined in the SBML model. Only one column is allowed per ID. + Values for these condition parameters may be provided either as numeric + values, or as IDs defined in the SBML model, the parameter table or both. + + - ``${parameterId}`` + + The values will override any parameter values specified in the model. + + - ``${speciesId}`` + + If a species ID is provided, it is interpreted as the initial + concentration/amount of that species and will override the initial + concentration/amount given in the SBML model or given by a preequilibration + condition. If ``NaN`` is provided for a condition, the result of the + preequilibration (or initial concentration/amount from the SBML model, if + no preequilibration is defined) is used. + + - ``${compartmentId}`` + + If a compartment ID is provided, it is interpreted as the initial + compartment size. + + +Measurement table +----------------- + +A tab-separated values files containing all measurements to be used for +model training or validation. + +Expected to have the following named columns in any (but preferably this) +order: + ++--------------+-------------------------------+-----------------------+-------------+--------------+ +| observableId | [preequilibrationConditionId] | simulationConditionId | measurement | time | ++==============+===============================+=======================+=============+==============+ +| observableId | [conditionId] | conditionId | NUMERIC | NUMERIC\|inf | ++--------------+-------------------------------+-----------------------+-------------+--------------+ +| ... | ... | ... | ... | ... | ++--------------+-------------------------------+-----------------------+-------------+--------------+ + +*(wrapped for readability)* + ++-----+----------------------------------------------------+----------------------------------------------------+ +| ... | [observableParameters] | [noiseParameters] | ++=====+====================================================+====================================================+ +| ... | [parameterId\|NUMERIC[;parameterId\|NUMERIC][...]] | [parameterId\|NUMERIC[;parameterId\|NUMERIC][...]] | ++-----+----------------------------------------------------+----------------------------------------------------+ +| ... | ... | ... | ++-----+----------------------------------------------------+----------------------------------------------------+ + +Additional (non-standard) columns may be added. If the additional plotting +functionality of PEtab should be used, such columns could be + ++-----+-------------+---------------+ +| ... | [datasetId] | [replicateId] | ++=====+=============+===============+ +| ... | [datasetId] | [replicateId] | ++-----+-------------+---------------+ +| ... | ... | ... | ++-----+-------------+---------------+ + +where ``datasetId`` is a necessary column to use particular plotting +functionality, and ``replicateId`` is optional, which can be used to group +replicates and plot error bars. + + +Detailed field description +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- ``observableId`` [STRING, NOT NULL, REFERENCES(observables.observableID)] + + Observable ID as defined in the observables table described below. + +- ``preequilibrationConditionId`` [STRING OR NULL, REFERENCES(conditionsTable.conditionID), OPTIONAL] + + The ``conditionId`` to be used for preequilibration. E.g. for drug + treatments, the model would be preequilibrated with the no-drug condition. + Empty for no preequilibration. + +- ``simulationConditionId`` [STRING, NOT NULL, REFERENCES(conditionsTable.conditionID)] + + ``conditionId`` as provided in the condition table, specifying the condition-specific parameters used for simulation. + +- ``measurement`` [NUMERIC, NOT NULL] + + The measured value in the same units/scale as the model output. + +- ``time`` [NUMERIC OR STRING, NOT NULL] + + Time point of the measurement in the time unit specified in the SBML model, numeric value or ``inf`` (lower-case) for steady-state measurements. + +- ``observableParameters`` [NUMERIC, STRING OR NULL, OPTIONAL] + + This field allows overriding or introducing condition-specific versions of + output parameters defined in the observation model. The model can define + observables (see below) containing place-holder parameters which can be + replaced by condition-specific dynamic or constant parameters. Placeholder + parameters must be named ``observableParameter${n}_${observableId}`` + with ``n`` ranging from 1 (not 0) to the number of placeholders for the given + observable, without gaps. + If the observable specified under ``observableId`` contains no placeholders, + this field must be empty. If it contains ``n > 0`` placeholders, this field + must hold ``n`` semicolon-separated numeric values or parameter names. No + trailing semicolon must be added. + + Different lines for the same ``observableId`` may specify different + parameters. This may be used to account for condition-specific or + batch-specific parameters. This will translate into an extended optimization + parameter vector. + + All placeholders defined in the observation model must be overwritten here. + If there are no placeholders used, this column may be omitted. + +- ``noiseParameters`` [NUMERIC, STRING OR NULL, OPTIONAL] + + The measurement standard deviation or ``NaN`` if the corresponding sigma is a + model parameter. + + Numeric values or parameter names are allowed. Same rules apply as for + ``observableParameters`` in the previous point. + +- ``datasetId`` [STRING, OPTIONAL] + + The datasetId is used to group certain measurements to datasets. This is + typically the case for data points which belong to the same observable, + the same simulation and preequilibration condition, the same noise model, + the same observable transformation and the same observable parameters. + This grouping makes it possible to use the plotting routines which are + provided in the PEtab repository. + +- ``replicateId`` [STRING, OPTIONAL] + + The replicateId can be used to discern replicates with the same + ``datasetId``, which is helpful for plotting e.g. error bars. + + +Observables table +----------------- + +Parameter estimation requires linking experimental observations to the model +of interest. Therefore, one needs to define observables (model outputs) and +respective noise models, which represent the measurement process. +Since parameter estimation is beyond the scope of SBML, there exists no +standard way to specify observables (model outputs) and respective noise +models. Therefore, in PEtab observables are specified in a separate table +as described in the following. This allows for a clear separation of the +observation model and the underlying dynamic model, which allows, in most +cases, to reuse any existing SBML model without modifications. + +The observable table has the following columns: + ++-----------------------+--------------------------------+-----------------------------------------------------------------------------+ +| observableId | [observableName] | observableFormula | ++=======================+================================+=============================================================================+ +| STRING | [STRING] | STRING | ++-----------------------+--------------------------------+-----------------------------------------------------------------------------+ +| e.g. | | | ++-----------------------+--------------------------------+-----------------------------------------------------------------------------+ +| relativeTotalProtein1 | Relative abundance of Protein1 | observableParameter1_relativeTotalProtein1 * (protein1 + phospho_protein1 ) | ++-----------------------+--------------------------------+-----------------------------------------------------------------------------+ +| ... | ... | ... | ++-----------------------+--------------------------------+-----------------------------------------------------------------------------+ + +*(wrapped for readability)* + ++-----+----------------------------+---------------------------------------+-----------------------+ +| ... | [observableTransformation] | noiseFormula | [noiseDistribution] | ++=====+============================+=======================================+=======================+ +| ... | [lin(default)\|log\|log10] | STRING\|NUMBER | [laplace\|normal] | ++-----+----------------------------+---------------------------------------+-----------------------+ +| ... | e.g. | | | ++-----+----------------------------+---------------------------------------+-----------------------+ +| ... | lin | noiseParameter1_relativeTotalProtein1 | normal | ++-----+----------------------------+---------------------------------------+-----------------------+ +| ... | ... | ... | ... | ++-----+----------------------------+---------------------------------------+-----------------------+ + + +Detailed field description +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* ``observableId`` [STRING] + + Any identifier which would be a valid identifier in SBML. This is referenced + by the ``observableId`` column in the measurement table. Must be different from + any existing model entity or parameter introduced elsewhere. + +* [``observableName``] [STRING, OPTIONAL] + + Name of the observable. Only used for output, not for identification. + +* ``observableFormula`` [STRING] + + Observation function as plain text formula expression. + May contain any symbol defined in the SBML model (including model time ``time``) + or parameter table. In the simplest case just an SBML species ID + or an ``AssignmentRule`` target. + + May introduce new parameters of the form ``observableParameter${n}_${observableId}``, + which are overridden by ``observableParameters`` in the measurement table + (see description there). + +- ``observableTransformation`` [STRING, OPTIONAL] + + Transformation of the observable and measurement for computing the objective + function. Must be one of ``lin``, ``log`` or ``log10``. Defaults to ``lin``. + The measurements and model outputs are both assumed to be provided in linear + space. + +* ``noiseFormula`` [NUMERIC|STRING] + + Measurement noise can be specified as a numerical value which will + default to a Gaussian noise model if not specified differently in + ``noiseDistribution`` with standard deviation as provided here. In this case, + the same standard deviation is assumed for all measurements for the given + observable. + + Alternatively, some formula expression can be provided to specify + more complex noise models. A noise model which accounts for relative and + absolute contributions could, e.g., be defined as:: + + noiseParameter1_observable_pErk + noiseParameter2_observable_pErk*pErk + + with ``noiseParameter1_observable_pErk`` denoting the absolute and + ``noiseParameter2_observable_pErk`` the relative contribution for the + observable ``observable_pErk`` corresponding to species ``pErk``. + IDs of noise parameters + that need to have different values for different measurements have the + structure: ``noiseParameter${indexOfNoiseParameter}_${observableId}`` + to facilitate automatic recognition. The specific values or parameters are + assigned in the ``noiseParameters`` field of the *measurement table* + (see above). Any parameters named ``noiseParameter${1..n}_${observableId}`` + *must* be overwritten in the measurement table. + +- ``noiseDistribution`` [STRING: 'normal' or 'laplace', OPTIONAL] + + Assumed noise distribution for the given measurement. Only normally or + Laplace distributed noise is currently allowed (log-normal and + log-Laplace are obtained by setting ``observableTransformation`` to ``log``, similarly for ``log10``). + Defaults to ``normal``. If ``normal``, the specified ``noiseParameters`` will be + interpreted as standard deviation (*not* variance). If ``Laplace`` ist specified, the specified ``noiseParameter`` will be interpreted as the scale, or diversity, parameter. + + +Noise distributions +~~~~~~~~~~~~~~~~~~~ + +For ``noiseDistribution``, ``normal`` and ``laplace`` are supported. For ``observableTransformation``, ``lin``, ``log`` and ``log10`` are supported. Denote by :math:`y` the simulation, :math:`m` the measurement, and :math:`\sigma` the standard deviation of a normal, or the scale parameter of a laplace model, as given via the ``noiseFormula`` field. Then we have the following effective noise distributions. + +- Normal distribution: + + .. math:: + \pi(m|y,\sigma) = \frac{1}{\sqrt{2\pi}\sigma}\exp\left(-\frac{(m-y)^2}{2\sigma^2}\right) + +- Log-normal distribution (i.e. log(m) is normally distributed): + + .. math:: + \pi(m|y,\sigma) = \frac{1}{\sqrt{2\pi}\sigma m}\exp\left(-\frac{(\log m - \log y)^2}{2\sigma^2}\right) + +- Log10-normal distribution (i.e. log10(m) is normally distributed): + + .. math:: + \pi(m|y,\sigma) = \frac{1}{\sqrt{2\pi}\sigma m \log(10)}\exp\left(-\frac{(\log_{10} m - \log_{10} y)^2}{2\sigma^2}\right) + +- Laplace distribution: + + .. math:: + \pi(m|y,\sigma) = \frac{1}{2\sigma}\exp\left(-\frac{|m-y|}{\sigma}\right) + +- Log-Laplace distribution (i.e. log(m) is Laplace distributed): + + .. math:: + \pi(m|y,\sigma) = \frac{1}{2\sigma m}\exp\left(-\frac{|\log m - \log y|}{\sigma}\right) + +- Log10-Laplace distribution (i.e. log10(m) is Laplace distributed): + + .. math:: + \pi(m|y,\sigma) = \frac{1}{2\sigma m \log(10)}\exp\left(-\frac{|\log_{10} m - \log_{10} y|}{\sigma}\right) + + +The distributions above are for a single data point. For a collection :math:`D=\{m_i\}_i` of data points and corresponding simulations :math:`Y=\{y_i\}_i` and noise parameters :math:`\Sigma=\{\sigma_i\}_i`, the current specification assumes independence, i.e. the full distributions is + +.. math:: + \pi(D|Y,\Sigma) = \prod_i\pi(m_i|y_i,\sigma_i) + + +Parameter table +--------------- + +A tab-separated value text file containing information on model parameters. + +This table *must* include the following parameters: + +- Named parameter overrides introduced in the *conditions table*, + unless defined in the SBML model +- Named parameter overrides introduced in the *measurement table* + +and *must not* include: + +- Placeholder parameters (see ``observableParameters`` and ``noiseParameters`` + above) +- Parameters included as column names in the *condition table* +- Parameters that are AssignmentRule targets in the SBML model + +it *may* include: + +- Any SBML model parameter that was not excluded above +- Named parameter overrides introduced in the *conditions table* + +One row per parameter with arbitrary order of rows and columns: + ++-------------+-----------------+-------------------------+-------------+------------+--------------+----------+-----+ +| parameterId | [parameterName] | parameterScale | lowerBound | upperBound | nominalValue | estimate | ... | ++=============+=================+=========================+=============+============+==============+==========+=====+ +|STRING | [STRING] | log10\|lin\|log | NUMERIC | NUMERIC | NUMERIC | 0\|1 | ... | ++-------------+-----------------+-------------------------+-------------+------------+--------------+----------+-----+ +| ... | ... | ... | ... | ... | ... | ... | ... | ++-------------+-----------------+-------------------------+-------------+------------+--------------+----------+-----+ + +*(wrapped for readability)* + ++-----+---------------------------+---------------------------------+----------------------+----------------------------+ +| ... | [initializationPriorType] | [initializationPriorParameters] | [objectivePriorType] | [objectivePriorParameters] | ++=====+===========================+=================================+======================+============================+ +| ... | *see below* | *see below* | *see below* | *see below* | ++-----+---------------------------+---------------------------------+----------------------+----------------------------+ +| ... | ... | ... | ... | ... | ++-----+---------------------------+---------------------------------+----------------------+----------------------------+ + +Additional columns may be added. + + +Detailed field description +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- ``parameterId`` [STRING, NOT NULL] + + The ``parameterId`` of the parameter described in this row. This has to match + the ID of a parameter specified in the SBML model, a parameter introduced + as override in the condition table, or a parameter occurring in the + ``observableParameters`` or ``noiseParameters`` column of the measurement table + (see above). + +- ``parameterName`` [STRING, OPTIONAL] + + Parameter name to be used e.g. for plotting etc. Can be chosen freely. May + or may not coincide with the SBML parameter name. + +- ``parameterScale`` [lin|log|log10] + + Scale of the parameter to be used during parameter estimation. + +- ``lowerBound`` [NUMERIC] + + Lower bound of the parameter used for optimization. + Optional, if ``estimate==0``. + Must be provided in linear space, independent of ``parameterScale``. + +- ``upperBound`` [NUMERIC] + + Upper bound of the parameter used for optimization. + Optional, if ``estimate==0``. + Must be provided in linear space, independent of ``parameterScale``. + +- ``nominalValue`` [NUMERIC] + + Some parameter value to be used if + the parameter is not subject to estimation (see ``estimate`` below). + Must be provided in linear space, independent of ``parameterScale``. + Optional, unless ``estimate==0``. + +- ``estimate`` [BOOL 0|1] + + 1 or 0, depending on, if the parameter is estimated (1) or set to a fixed + value(0) (see ``nominalValue``). + +- ``initializationPriorType`` [STRING, OPTIONAL] + + Prior types used for sampling of initial points for optimization. Sampled + points are clipped to lie inside the parameter boundaries specified by + ``lowerBound`` and ``upperBound``. Defaults to ``parameterScaleUniform``. + + Possible prior types are: + + - *uniform*: flat prior on linear parameters + - *normal*: Gaussian prior on linear parameters + - *laplace*: Laplace prior on linear parameters + - *logNormal*: exponentiated Gaussian prior on linear parameters + - *logLaplace*: exponentiated Laplace prior on linear parameters + - *parameterScaleUniform* (default): Flat prior on original parameter + scale (equivalent to "no prior") + - *parameterScaleNormal*: Gaussian prior on original parameter scale + - *parameterScaleLaplace*: Laplace prior on original parameter scale + +- ``initializationPriorParameters`` [STRING, OPTIONAL] + + Prior parameters used for sampling of initial points for optimization, + separated by a semicolon. Defaults to ``lowerBound;upperBound``. + + So far, only numeric values will be supported, no parameter names. + Parameters for the different prior types are: + + - uniform: lower bound; upper bound + - normal: mean; standard deviation (**not** variance) + - laplace: location; scale + - logNormal: parameters of corresp. normal distribution (see: normal) + - logLaplace: parameters of corresp. Laplace distribution (see: laplace) + - parameterScaleUniform: lower bound; upper bound + - parameterScaleNormal: mean; standard deviation (**not** variance) + - parameterScaleLaplace: location; scale + +- ``objectivePriorType`` [STRING, OPTIONAL] + + Prior types used for the objective function during optimization or sampling. + For possible values, see ``initializationPriorType``. + +- ``objectivePriorParameters`` [STRING, OPTIONAL] + + Prior parameters used for the objective function during optimization. + For more detailed documentation, see ``initializationPriorParameters``. + + +Visualization table +------------------- + +A tab-separated value file containing the specification of the visualization +routines which come with the PEtab repository. Plots are in general +collections of different datasets as specified using their ``datasetId`` (if +provided) inside the measurement table. + +Expected to have the following columns in any (but preferably this) +order: + ++--------+------------+-------------------------------------------+------------------------------------------------------+ +| plotId | [plotName] | [plotTypeSimulation] | [plotTypeData] | ++========+============+===========================================+======================================================+ +| STRING | [STRING] | [LinePlot(default)\|BarPlot\|ScatterPlot] | [MeanAndSD(default)\|MeanAndSEM\|replicate;provided] | ++--------+------------+-------------------------------------------+------------------------------------------------------+ +| ... | ... | ... | ... | ++--------+------------+-------------------------------------------+------------------------------------------------------+ + +*(wrapped for readability)* + ++-----+-------------+-------------------------------------+-----------+----------+--------------------------+ +| ... | [datasetId] | [xValues] | [xOffset] | [xLabel] | [xScale] | ++=====+=============+=====================================+===========+==========+==========================+ +| ... | [datasetId] | [time(default)\|parameterOrStateId] | [NUMERIC] | [STRING] | [lin\|log\|log10\|order] | ++-----+-------------+-------------------------------------+-----------+----------+--------------------------+ +| ... | ... | ... | ... | ... | ... | ++-----+-------------+-------------------------------------+-----------+----------+--------------------------+ + +*(wrapped for readability)* + ++-----+----------------+-----------+----------+-------------------+---------------+ +| ... | [yValues] | [yOffset] | [yLabel] | [yScale] | [legendEntry] | ++=====+================+===========+==========+===================+===============+ +| ... | [observableId] | [NUMERIC] | [STRING] | [lin\|log\|log10] | [STRING] | ++-----+----------------+-----------+----------+-------------------+---------------+ +| ... | ... | ... | ... | ... | ... | ++-----+----------------+-----------+----------+-------------------+---------------+ + + +Detailed field description +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- ``plotId`` [STRING, NOT NULL] + + An ID which corresponds to a specific plot. All datasets with the same + plotId will be plotted into the same axes object. + +- ``plotName`` [STRING, OPTIONAL] + + A name for the specific plot. + +- ``plotTypeSimulation`` [STRING, OPTIONAL] + + The type of the corresponding plot, can be ``LinePlot``, ``BarPlot`` and ``ScatterPlot``. Default is ``LinePlot``. + +- ``plotTypeData`` [STRING, OPTIONAL] + + The type how replicates should be handled, can be ``MeanAndSD``, + ``MeanAndSEM``, ``replicate`` (for plotting all replicates separately), or + ``provided`` (if numeric values for the noise level are provided in the + measurement table). Default is ``MeanAndSD``. + +- ``datasetId`` [STRING, NOT NULL, REFERENCES(measurementTable.datasetId), OPTIONAL] + + The datasets which should be grouped into one plot. + +- ``xValues`` [STRING, OPTIONAL] + + The independent variable, which will be plotted on the x-axis. Can be + ``time`` (default, for time resolved data), or it can be ``parameterOrStateId`` + for dose-response plots. The corresponding numeric values will be shown on + the x-axis. + +- ``xOffset`` [NUMERIC, OPTIONAL] + + Possible data-offsets for the independent variable (default is ``0``). + +- ``xLabel`` [STRING, OPTIONAL] + + Label for the x-axis. Defaults to the entry in ``xValues``. + +- ``xScale`` [STRING, OPTIONAL] + + Scale of the independent variable, can be ``lin``, ``log``, ``log10`` or ``order``. + The ``order`` value should be used if values of the independent variable are + ordinal. This value can only be used in combination with ``LinePlot`` value for + the ``plotTypeSimulation`` column. In this case, points on x axis will be + placed equidistantly from each other. Default is ``lin``. + +- ``yValues`` [observableId, REFERENCES(measurementTable.observableId), OPTIONAL] + + The observable which should be plotted on the y-axis. + +- ``yOffset`` [NUMERIC, OPTIONAL] + + Possible data-offsets for the observable (default is ``0``). + +- ``yLabel`` [STRING, OPTIONAL] + + Label for the y-axis. Defaults to the entry in ``yValues``. + +- ``yScale`` [STRING, OPTIONAL] + + Scale of the observable, can be ``lin``, ``log``, or ``log10``. Default is ``lin``. + +- ``legendEntry`` [STRING, OPTIONAL] + + The name that should be displayed for the corresponding dataset in the + legend and which defaults to the value in ``datasetId``. + + +Extensions +~~~~~~~~~~ + +Additional columns, such as ``Color``, etc. may be specified. + + +Examples +~~~~~~~~ + +Examples of the visualization table can be found in the +`Benchmark model collection `_, for example in the `Chen_MSB2009 `_ +model. + + +YAML file for grouping files +---------------------------- + +To link the SBML model, measurement table, condition table, etc. in an +unambiguous way, we use a `YAML `_ file. + +This file also allows specifying a PEtab version (as the format is not unlikely +to change in the future). + +Furthermore, this can be used to describe parameter estimation problems +comprising multiple models (more details below). + +The format is described in the schema +`../petab/petab_schema.yaml <_static/petab_schema.yaml>`_, which allows for +easy validation. + + +Parameter estimation problems combining multiple models +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Parameter estimation problems can comprise multiple models. For now, PEtab +allows to specify multiple SBML models with corresponding condition and +measurement tables, and one joint parameter table. This means that the parameter +namespace is global. Therefore, parameters with the same ID in different models +will be considered identical. diff --git a/doc/index.rst b/doc/index.rst index cfd82352..c49ad3b2 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -5,7 +5,7 @@ :maxdepth: 3 :caption: Data format - Data format + Data format .. toctree:: :maxdepth: 3 From 68167b60065ba32b4f24d7d0f05515bdf77dbf4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20Sch=C3=A4lte?= <31767307+yannikschaelte@users.noreply.github.com> Date: Thu, 23 Jul 2020 17:05:21 +0200 Subject: [PATCH 14/14] update changelog, version, fix readme (#453) --- CHANGELOG.md | 26 ++++++++++++++++++++++++++ README.md | 2 +- petab/version.py | 2 +- 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 502b241d..9c363068 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,32 @@ ## 0.1 series + +### 0.1.8 + +Library: + +* Use ``core.is_empty`` to check for empty values (#434) +* Move tests to python 3.8 (#435) +* Update to libcombine 0.2.6 (#437) +* Make float parsing from CSV round-trip (#444) +* Lint: Allow model time in observable formulas (#445) +* Lint: Detect duplicated observable ids (#446) +* Fix likelihood calculation with missing values (#451) + +Documentation: + +* Move format documentation to restructuredtext format (#452) +* Document all noise distributions and observable scales (#452) +* Fix documentation for prior distribution (#449) + +Visualization: + +* Make XValue column non-mandatory (#429) +* Apply correct condition sorting (#430) +* Apply correct default x label (#431) + + ### 0.1.7 Documentation: diff --git a/README.md b/README.md index 522f2518..3696f1a7 100644 --- a/README.md +++ b/README.md @@ -105,7 +105,7 @@ Legend: ## Using PEtab If you would like to use PEtab yourself, please have a look at -[doc/documentation_data_format.md](doc/documentation_data_format.md) or at +[doc/documentation_data_format.rst](doc/documentation_data_format.rst) or at the example models provided in the [benchmark collection](https://github.com/Benchmarking-Initiative/Benchmark-Models-PEtab). diff --git a/petab/version.py b/petab/version.py index 5e78dc76..3a17f754 100644 --- a/petab/version.py +++ b/petab/version.py @@ -1,2 +1,2 @@ """PEtab library version""" -__version__ = '0.1.7' +__version__ = '0.1.8'