From 85bd1b2ee2b588e69a443ad618dc608b68ca8ab9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20Sch=C3=A4lte?= <31767307+yannikschaelte@users.noreply.github.com> Date: Tue, 11 Feb 2020 17:45:46 +0100 Subject: [PATCH 1/3] Prior tests (#285) * add test for valid prior parameters * add raising test * fix codacy * random edit to make codacy happy * address reviewer comments --- petab/core.py | 12 +++++++++ petab/lint.py | 60 ++++++++++++++++++++++++++++++++++++++++----- tests/test_lint.py | 26 +++++++++++++++++--- tests/test_petab.py | 5 ++-- 4 files changed, 92 insertions(+), 11 deletions(-) diff --git a/petab/core.py b/petab/core.py index 8c1cbf9a..2f3b7285 100644 --- a/petab/core.py +++ b/petab/core.py @@ -251,6 +251,18 @@ def to_float_if_float(x: Any) -> Any: return x +def is_empty(val) -> bool: + """Check if the value `val`, e.g. a table entry, is empty. + + Arguments: + val: The value to check. + + Returns: + empty: Whether the field is to be considered empty. + """ + return val == '' or pd.isnull(val) + + def create_combine_archive( yaml_file: str, filename: str, family_name: Optional[str] = None, diff --git a/petab/lint.py b/petab/lint.py index 14522871..2de054c1 100644 --- a/petab/lint.py +++ b/petab/lint.py @@ -404,15 +404,63 @@ def assert_parameter_prior_type_is_valid( Raises: AssertionError in case of invalid prior """ - for prefix in [INITIALIZATION, OBJECTIVE]: - col_name = f"{prefix}PriorType" - if col_name not in parameter_df.columns: + for col in [INITIALIZATION_PRIOR_TYPE, OBJECTIVE_PRIOR_TYPE]: + if col not in parameter_df.columns: continue for _, row in parameter_df.iterrows(): - if row[col_name] not in PRIOR_TYPES: + if row[col] not in PRIOR_TYPES and not core.is_empty(row[col]): raise AssertionError( - f"{col_name} must be one of {PRIOR_TYPES} but is " - f"{row[col_name]}.") + f"{col} must be one of {PRIOR_TYPES} but is " + f"'{row[col]}'.") + + +def assert_parameter_prior_parameters_are_valid( + parameter_df: pd.DataFrame) -> None: + """Check that the prior parameters are valid. + + Arguments: + parameter_df: PEtab parameter table + + Raises: + AssertionError in case of invalide prior parameters + """ + prior_type_cols = [INITIALIZATION_PRIOR_TYPE, + OBJECTIVE_PRIOR_TYPE] + prior_par_cols = [INITIALIZATION_PRIOR_PARAMETERS, + OBJECTIVE_PRIOR_PARAMETERS] + + # perform test for both priors + for type_col, par_col in zip(prior_type_cols, prior_par_cols): + # iterate over rows + for _, row in parameter_df.iterrows(): + # get type + if type_col not in row or core.is_empty(row[type_col]): + type_ = PARAMETER_SCALE_UNIFORM + else: + type_ = row[type_col] + # get parameters + pars_str = row.get(par_col, '') + with_default_parameters = [PARAMETER_SCALE_UNIFORM] + # check if parameters are empty + if core.is_empty(pars_str): + if type_ not in with_default_parameters: + raise AssertionError( + f"An empty {par_col} is only permitted with " + f"{type_col} in {with_default_parameters}.") + # empty parameters fine + continue + # parse parameters + try: + pars = tuple([float(val) for val in pars_str.split(';')]) + except ValueError: + raise AssertionError( + f"Could not parse prior parameters '{pars}'.") + # all distributions take 2 parameters + if len(pars) != 2: + raise AssertionError( + f"The prior parameters '{pars}' do not contain the " + "expected number of entries (currently 'par1;par2' " + "for all prior types).") def assert_parameter_estimate_is_boolean(parameter_df: pd.DataFrame) -> None: diff --git a/tests/test_lint.py b/tests/test_lint.py index 559080da..3209ded6 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -257,14 +257,34 @@ def test_check_parameter_bounds(): def test_assert_parameter_prior_type_is_valid(): + """Check lint.assert_parameter_prior_type_is_valid.""" lint.assert_parameter_prior_type_is_valid(pd.DataFrame( - {INITIALIZATION_PRIOR_TYPE: [UNIFORM, LAPLACE], - OBJECTIVE_PRIOR_TYPE: [NORMAL, LOG_NORMAL]})) + {INITIALIZATION_PRIOR_TYPE: [UNIFORM, LAPLACE, ''], + OBJECTIVE_PRIOR_TYPE: [NORMAL, LOG_NORMAL, '']})) lint.assert_parameter_prior_type_is_valid(pd.DataFrame()) with pytest.raises(AssertionError): lint.assert_parameter_prior_type_is_valid(pd.DataFrame( - {INITIALIZATION_PRIOR_TYPE: ['normal', '']})) + {INITIALIZATION_PRIOR_TYPE: ['normel']})) + + +def test_assert_parameter_prior_parameters_are_valid(): + """Check lint.assert_parameter_prior_parameters_are_valid.""" + parameter_df = pd.DataFrame({ + INITIALIZATION_PRIOR_TYPE: [UNIFORM, '', ''], + INITIALIZATION_PRIOR_PARAMETERS: ['0;1', '10;20', ''], + OBJECTIVE_PRIOR_PARAMETERS: ['0;20', '10;20', ''] + }) + + lint.assert_parameter_prior_parameters_are_valid(parameter_df) + + with pytest.raises(AssertionError): + lint.assert_parameter_prior_parameters_are_valid(pd.DataFrame( + {INITIALIZATION_PRIOR_TYPE: [NORMAL]})) + + with pytest.raises(AssertionError): + lint.assert_parameter_prior_parameters_are_valid(pd.DataFrame( + {OBJECTIVE_PRIOR_PARAMETERS: ['0;1;2']})) def test_petablint_succeeds(): diff --git a/tests/test_petab.py b/tests/test_petab.py index 1ea25a3d..08af5ce8 100644 --- a/tests/test_petab.py +++ b/tests/test_petab.py @@ -165,9 +165,10 @@ def test_get_observable_id(): def test_startpoint_sampling(fujita_model_scaling): - startpoints = fujita_model_scaling.sample_parameter_startpoints(100) + n_starts = 10 + startpoints = fujita_model_scaling.sample_parameter_startpoints(n_starts) assert (np.isfinite(startpoints)).all - assert startpoints.shape == (100, 19) + assert startpoints.shape == (n_starts, 19) for sp in startpoints: assert np.log10(31.62) <= sp[0] <= np.log10(316.23) assert -3 <= sp[1] <= 3 From f302cab4308ab8ec518438d5681e456afb129d15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20Sch=C3=A4lte?= <31767307+yannikschaelte@users.noreply.github.com> Date: Tue, 11 Feb 2020 17:56:12 +0100 Subject: [PATCH 2/3] Fix prior scales (#286) * add test for valid prior parameters * add raising test * cont * fix codacy * fix errors in get_priors_from_df: wrong default scaling and empty values * random edit to make codacy happy * fix merge error * make codacy happy --- petab/parameters.py | 11 ++++++++--- tests/test_petab.py | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/petab/parameters.py b/petab/parameters.py index 45e02fdc..597e10c2 100644 --- a/petab/parameters.py +++ b/petab/parameters.py @@ -285,11 +285,16 @@ def get_priors_from_df(parameter_df: pd.DataFrame, prior_list = [] for _, row in par_to_estimate.iterrows(): # retrieve info about type - prior_type = str(row.get(f'{mode}PriorType', PARAMETER_SCALE_UNIFORM)) + prior_type = str(row.get(f'{mode}PriorType', '')) + if core.is_empty(prior_type): + prior_type = PARAMETER_SCALE_UNIFORM # retrieve info about parameters of priors, make it a tuple of floats - pars_str = str(row.get(f'{mode}PriorParameters', - f'{row[LOWER_BOUND]};{row[UPPER_BOUND]}')) + pars_str = str(row.get(f'{mode}PriorParameters', '')) + if core.is_empty(pars_str): + lb, ub = map_scale([row[LOWER_BOUND], row[UPPER_BOUND]], + [row[PARAMETER_SCALE]] * 2) + pars_str = f'{lb};{ub}' prior_pars = tuple([float(entry) for entry in pars_str.split(';')]) # add parameter scale and bounds, as this may be needed diff --git a/tests/test_petab.py b/tests/test_petab.py index 08af5ce8..d12632e2 100644 --- a/tests/test_petab.py +++ b/tests/test_petab.py @@ -164,6 +164,44 @@ def test_get_observable_id(): assert petab.get_observable_id('sigma_obs1') == 'obs1' +def test_get_priors_from_df(): + """Check petab.get_priors_from_df.""" + parameter_df = pd.DataFrame({ + PARAMETER_SCALE: [LOG10, LOG10, LOG10, LOG10, LOG10], + LOWER_BOUND: [1e-8, 1e-9, 1e-10, 1e-11, 1e-5], + UPPER_BOUND: [1e8, 1e9, 1e10, 1e11, 1e5], + ESTIMATE: [1, 1, 1, 1, 0], + INITIALIZATION_PRIOR_TYPE: ['', '', + UNIFORM, NORMAL, ''], + INITIALIZATION_PRIOR_PARAMETERS: ['', '-5;5', '1e-5;1e5', '0;1', ''] + }) + + prior_list = petab.get_priors_from_df(parameter_df, mode=INITIALIZATION) + + # only give values for estimated parameters + assert len(prior_list) == 4 + + # correct types + types = [entry[0] for entry in prior_list] + assert types == [PARAMETER_SCALE_UNIFORM, PARAMETER_SCALE_UNIFORM, + UNIFORM, NORMAL] + + # correct scales + scales = [entry[2] for entry in prior_list] + assert scales == [LOG10] * 4 + + # correct bounds + bounds = [entry[3] for entry in prior_list] + assert bounds == list(zip(parameter_df[LOWER_BOUND], + parameter_df[UPPER_BOUND]))[:4] + + # give correct value for empty + prior_pars = [entry[1] for entry in prior_list] + assert prior_pars[0] == (-8, 8) + assert prior_pars[1] == (-5, 5) + assert prior_pars[2] == (1e-5, 1e5) + + def test_startpoint_sampling(fujita_model_scaling): n_starts = 10 startpoints = fujita_model_scaling.sample_parameter_startpoints(n_starts) From 1ba6f5c442215ed9cf6627376037e49cfe2e2719 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20Sch=C3=A4lte?= <31767307+yannikschaelte@users.noreply.github.com> Date: Tue, 11 Feb 2020 19:39:42 +0100 Subject: [PATCH 3/3] Feature defaultvalues (#288) * init parameter df normalization function * cont * add normalize_parameter_df function * make other dfs optional in check_parameter_df * fixup normalize_parameter_df * add test for normalize_parameter_df; rename test_parameter.py -> test_parameters.py * address reviewer comments and make codacy happy --- petab/lint.py | 6 +-- petab/parameters.py | 32 ++++++++++++ .../{test_parameter.py => test_parameters.py} | 51 +++++++++++++++++++ 3 files changed, 86 insertions(+), 3 deletions(-) rename tests/{test_parameter.py => test_parameters.py} (52%) diff --git a/petab/lint.py b/petab/lint.py index 2de054c1..9e7032e8 100644 --- a/petab/lint.py +++ b/petab/lint.py @@ -134,9 +134,9 @@ def check_measurement_df(df: pd.DataFrame, def check_parameter_df( df: pd.DataFrame, - sbml_model: Optional[libsbml.Model], - measurement_df: Optional[pd.DataFrame], - condition_df: Optional[pd.DataFrame]) -> None: + sbml_model: Optional[libsbml.Model] = None, + measurement_df: Optional[pd.DataFrame] = None, + condition_df: Optional[pd.DataFrame] = None) -> None: """Run sanity checks on PEtab parameter table Arguments: diff --git a/petab/parameters.py b/petab/parameters.py index 597e10c2..c7629110 100644 --- a/petab/parameters.py +++ b/petab/parameters.py @@ -334,3 +334,35 @@ def map_scale(parameters: Iterable[numbers.Number], scale_strs: Iterable[str]) -> Iterable[numbers.Number]: """As scale(), but for Iterables""" return map(lambda x: scale(x[0], x[1]), zip(parameters, scale_strs)) + + +def normalize_parameter_df(parameter_df: pd.DataFrame) -> pd.DataFrame: + """Add missing columns and fill in default values.""" + df = parameter_df.copy(deep=True) + + if PARAMETER_NAME not in df: + df[PARAMETER_NAME] = df.reset_index()[PARAMETER_ID] + + prior_type_cols = [INITIALIZATION_PRIOR_TYPE, + OBJECTIVE_PRIOR_TYPE] + prior_par_cols = [INITIALIZATION_PRIOR_PARAMETERS, + OBJECTIVE_PRIOR_PARAMETERS] + # iterate over initialization and objective priors + for prior_type_col, prior_par_col in zip(prior_type_cols, prior_par_cols): + # fill in default values for prior type + if prior_type_col not in df: + df[prior_type_col] = PARAMETER_SCALE_UNIFORM + else: + for irow, row in df.iterrows(): + if core.is_empty(row[prior_type_col]): + df.loc[irow, prior_type_col] = PARAMETER_SCALE_UNIFORM + if prior_par_col not in df: + df[prior_par_col] = None + for irow, row in df.iterrows(): + if core.is_empty(row[prior_par_col]) \ + and row[prior_type_col] == PARAMETER_SCALE_UNIFORM: + lb, ub = map_scale([row[LOWER_BOUND], row[UPPER_BOUND]], + [row[PARAMETER_SCALE]] * 2) + df.loc[irow, prior_par_col] = f'{lb};{ub}' + + return df diff --git a/tests/test_parameter.py b/tests/test_parameters.py similarity index 52% rename from tests/test_parameter.py rename to tests/test_parameters.py index abb2725c..7c55410e 100644 --- a/tests/test_parameter.py +++ b/tests/test_parameters.py @@ -77,3 +77,54 @@ def test_write_parameter_df(): petab.write_parameter_df(parameter_df, file_name) re_df = petab.get_parameter_df(file_name) assert (parameter_df == re_df).all().all() + + +def test_normalize_parameter_df(): + """Check parameters.normalize_parameter_df.""" + parameter_df = pd.DataFrame({ + PARAMETER_ID: ['par0', 'par1', 'par2'], + PARAMETER_SCALE: [LOG10, LOG10, LIN], + NOMINAL_VALUE: [1e-2, 1e-3, 1e-4], + ESTIMATE: [1, 1, 0], + LOWER_BOUND: [1e-5, 1e-6, 1e-7], + UPPER_BOUND: [1e5, 1e6, 1e7] + }).set_index(PARAMETER_ID) + + actual = petab.normalize_parameter_df(parameter_df) + + expected = parameter_df.copy(deep=True) + expected[PARAMETER_NAME] = parameter_df.reset_index()[PARAMETER_ID] + expected[INITIALIZATION_PRIOR_TYPE] = [PARAMETER_SCALE_UNIFORM] * 3 + expected[INITIALIZATION_PRIOR_PARAMETERS] = ["-5;5", "-6;6", "1e-7;1e7"] + expected[OBJECTIVE_PRIOR_TYPE] = [PARAMETER_SCALE_UNIFORM] * 3 + expected[OBJECTIVE_PRIOR_PARAMETERS] = ["-5;5", "-6;6", "1e-7;1e7"] + + # check ids + assert list(actual.index.values) == list(expected.index.values) + + # check if basic columns match + for col in PARAMETER_DF_COLS[1:]: + if col in [INITIALIZATION_PRIOR_PARAMETERS, + OBJECTIVE_PRIOR_PARAMETERS]: + continue + assert ((actual[col] == expected[col]) | + (actual[col].isnull() == expected[col].isnull())).all() + + # check if prior parameters match + for col in [INITIALIZATION_PRIOR_PARAMETERS, OBJECTIVE_PRIOR_PARAMETERS]: + for (_, actual_row), (_, expected_row) in \ + zip(actual.iterrows(), expected.iterrows()): + actual_pars = tuple([float(val) for val in + actual_row[col].split(';')]) + expected_pars = tuple([float(val) for val in + expected_row[col].split(';')]) + + assert actual_pars == expected_pars + + # check is a projection + actual2 = petab.normalize_parameter_df(actual) + assert ((actual == actual2) | (actual.isnull() == actual2.isnull())) \ + .all().all() + + # check is valid petab + petab.check_parameter_df(actual)