Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 15 additions & 11 deletions petab/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,8 @@ def get_parameter_df(
return None

parameter_df = pd.concat(dfs)
# Remove identical parameter definitions
parameter_df.drop_duplicates(inplace=True, ignore_index=False)
# Check for contradicting parameter definitions
parameter_duplicates = set(parameter_df.index.values[
parameter_df.index.duplicated()])
if parameter_duplicates:
raise ValueError(
f'The values of {PARAMETER_ID} must be unique or'
' identical between all parameter subset files. The'
' following duplicates were found:\n'
f'{parameter_duplicates}'
)
_check_for_contradicting_parameter_definitions(parameter_df)

return parameter_df

Expand All @@ -81,10 +71,24 @@ def get_parameter_df(
except KeyError as e:
raise KeyError(
f"Parameter table missing mandatory field {PARAMETER_ID}.") from e
_check_for_contradicting_parameter_definitions(parameter_df)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As the drop_duplicates call is removed, the duplicated parameter definitions may be consistent (e.g. your test raises an error for redundance too), so this check is now just about duplicated parameter IDs.

The logic for this should now be equivalent to petab.lint.assert_unique_parameter_ids. Would be good if that could be reused here, but fine as is, since the circular dependency would need to be resolved.


return parameter_df


def _check_for_contradicting_parameter_definitions(parameter_df: pd.DataFrame):
"""
Raises a ValueError for non-unique parameter IDs
"""
parameter_duplicates = set(parameter_df.index.values[
parameter_df.index.duplicated()])
if parameter_duplicates:
raise ValueError(
f'The values of `{PARAMETER_ID}` must be unique. The '
f'following duplicates were found:\n{parameter_duplicates}'
)


def write_parameter_df(df: pd.DataFrame, filename: Union[str, Path]) -> None:
"""Write PEtab parameter table

Expand Down
53 changes: 45 additions & 8 deletions tests/test_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,11 @@ def test_get_parameter_df():
PARAMETER_ID: ['id3'],
PARAMETER_NAME: ['name3']
})
parameter_dfs['subset2_overlap'] = pd.DataFrame(data={
parameter_dfs['subset2_redundance'] = pd.DataFrame(data={
PARAMETER_ID: ['id2', 'id3'],
PARAMETER_NAME: ['name2', 'name3']
})
parameter_dfs['subset2_error'] = pd.DataFrame(data={
parameter_dfs['subset2_contradiction'] = pd.DataFrame(data={
PARAMETER_ID: ['id2', 'id3'],
PARAMETER_NAME: ['different_name2', 'name3']
})
Expand All @@ -98,15 +98,52 @@ def test_get_parameter_df():
assert(petab.get_parameter_df(parameter_files['complete']).equals(
petab.get_parameter_df([parameter_files['subset1'],
parameter_files['subset2_strict']])))
# Check that identical parameter definitions are correctly combined
assert(petab.get_parameter_df(parameter_files['complete']).equals(
petab.get_parameter_df([parameter_files['subset1'],
parameter_files['subset2_overlap']])))
# Ensure an error is raised if there exist parameterId duplicates
# with identical parameter definitions
with pytest.raises(ValueError):
petab.get_parameter_df(
[parameter_files["subset1"],
parameter_files["subset2_redundance"]]
)
# with non-identical parameter definitions
with pytest.raises(ValueError):
petab.get_parameter_df([parameter_files['subset1'],
parameter_files['subset2_error']])
petab.get_parameter_df(
[parameter_files["subset1"],
parameter_files["subset2_contradiction"],
]
)

# Ensure that parameters that differ only by parameterId
# are recognized as distinct
with tempfile.TemporaryDirectory() as directory:
parameter_dfs, parameter_files = ({}, {})
parameter_dfs["complete"] = pd.DataFrame(
data={
PARAMETER_ID: ["id1", "id2", "id3", "id4"],
NOMINAL_VALUE: [1, 1, 1, 1],
}
)
parameter_dfs["subset1"] = pd.DataFrame(
data={PARAMETER_ID: ["id1", "id2"], NOMINAL_VALUE: [1, 1]}
)
parameter_dfs["subset2"] = pd.DataFrame(
data={PARAMETER_ID: ["id3", "id4"], NOMINAL_VALUE: [1, 1]}
)
for name, df in parameter_dfs.items():
with tempfile.NamedTemporaryFile(
mode="w", delete=False, dir=directory
) as fh:
parameter_files[name] = fh.name
parameter_dfs[name].to_csv(fh, sep="\t", index=False)
# from one parameter file
df_template = parameter_dfs["complete"].set_index(PARAMETER_ID)
df_test = petab.get_parameter_df(parameter_files["complete"])
assert (df_template == df_test).all().all()
# several parameter files
assert petab.get_parameter_df(parameter_files["complete"]).equals(
petab.get_parameter_df([parameter_files["subset1"],
parameter_files["subset2"]])
)


def test_write_parameter_df():
Expand Down