From 665c81d00b765d3f12b8716dcd9b332e37bce954 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Thu, 14 Nov 2024 12:23:26 +0100 Subject: [PATCH] remove more tests and readd f --- .github/workflows/python_installation.yml | 46 ------ conda-recipe/run_test.sh | 9 -- .../src/validphys/tests/test_filter_rules.py | 139 ++++++++++++++++++ .../src/validphys/tests/test_fitdata.py | 67 +++++++++ .../src/validphys/tests/test_fitveto.py | 60 ++++++++ 5 files changed, 266 insertions(+), 55 deletions(-) delete mode 100644 .github/workflows/python_installation.yml create mode 100644 validphys2/src/validphys/tests/test_filter_rules.py create mode 100644 validphys2/src/validphys/tests/test_fitdata.py create mode 100644 validphys2/src/validphys/tests/test_fitveto.py diff --git a/.github/workflows/python_installation.yml b/.github/workflows/python_installation.yml deleted file mode 100644 index 8597c5a218..0000000000 --- a/.github/workflows/python_installation.yml +++ /dev/null @@ -1,46 +0,0 @@ -name: Test the python installation - -on: [push] - -env: - # https://keras.io/getting_started/faq/#how-can-i-obtain-reproducible-results-using-keras-during-development - PYTHONHASHSEED: "0" - -jobs: - build: - strategy: - matrix: - os: [ubuntu-latest, macos-14] - python-version: ["3.12"] - include: - - os: ubuntu-latest - CONDA_OS: linux-64 - fail-fast: false - runs-on: ${{ matrix.os }} - env: - NETRC_FILE: ${{ secrets.NETRC_FILE }} - NNPDF_SSH_KEY: ${{ secrets.NNPDF_SSH_KEY }} - steps: - - uses: actions/checkout@v4 - - uses: conda-incubator/setup-miniconda@v3 - with: - python-version: ${{ matrix.python-version }} - - name: Setup conda and install LHAPDF and pandoc - shell: bash -l {0} - run: | - echo "$NETRC_FILE" | base64 --decode > ~/.netrc - conda install -n base conda-libmamba-solver - conda config --set solver libmamba - conda config --append channels conda-forge - conda config --set show_channel_urls true - conda install lhapdf pandoc mongodb - - name: Install nnpdf with testing and qed extras - shell: bash -l {0} - run: | - conda activate test - pip install .[qed,tests] - - name: Test n3fit and validphys - shell: bash -l {0} - run: | - conda activate test - pytest --pyargs --mpl validphys n3fit diff --git a/conda-recipe/run_test.sh b/conda-recipe/run_test.sh index 5be3fb78d7..770f137653 100644 --- a/conda-recipe/run_test.sh +++ b/conda-recipe/run_test.sh @@ -5,12 +5,3 @@ set -e #Python tests for the installed validphys package pytest --pyargs --mpl validphys - -platformstr=`uname` - -# skip n3fit tests on mac -if [[ "$platformstr" != "Darwin" ]]; then - pytest --pyargs n3fit -else - echo "Skipping n3fit tests on Mac" -fi diff --git a/validphys2/src/validphys/tests/test_filter_rules.py b/validphys2/src/validphys/tests/test_filter_rules.py new file mode 100644 index 0000000000..bbd09655ec --- /dev/null +++ b/validphys2/src/validphys/tests/test_filter_rules.py @@ -0,0 +1,139 @@ +import numpy as np +import pytest + +from validphys.api import API +from validphys.filters import ( + BadPerturbativeOrder, + PerturbativeOrder, + Rule, + RuleProcessingError, + default_filter_settings_input, +) +from validphys.loader import FallbackLoader as Loader +from validphys.tests.conftest import PDF +from validphys.tests.conftest import THEORYID_NEW as THEORYID + +bad_rules = [ + {'dataset': "NMC_NC_NOTFIXED_DW_EM-F2"}, + {'rule': 'x < 0.1'}, + {'dataset': 'NOT_EXISTING', 'rule': 'x < 0.1'}, + {'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", 'rule': 'x < 0.1', 'PTO': 'bogus'}, + {'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", 'rule': 'x < 0.1', 'PTO': {'bog': 'us'}}, + {'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", 'rule': 'x < 0.1', 'local_variables': 'bogus'}, + {'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", 'rule': 'bogus syntax'}, + {'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", 'rule': 'unknown_variable > 10'}, + { + 'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", + 'local_variables': {'z': 'bogus syntax'}, + 'rule': 'z > 10', + }, + { + 'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", + 'local_variables': {'z': 'unknown_variable + 1'}, + 'rule': 'z > 10', + }, + { + 'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", + 'local_variables': {'z': 'v+1', 'v': '10'}, + 'rule': 'z > 10', + }, +] + +# Note: Don't change the order here. In this way it tests all cases. +good_rules = [ + {'process_type': 'DIS_ALL', 'PTO': 'N3LO', 'rule': 'x < 1e-2'}, + {'process_type': 'DIS_ALL', 'IC': 'False', 'rule': 'x < 1e-2'}, + {'process_type': 'JET', 'rule': 'pT < 3.16'}, +] + + +def mkrule(inp): + l = Loader() + th = l.check_theoryID(THEORYID) + desc = th.get_description() + defaults = default_filter_settings_input() + return Rule(initial_data=inp, defaults=defaults, theory_parameters=desc) + + +def test_rule_caching(): + rule_list_1, *rule_list_2 = good_rules + rule_list_1 = [rule_list_1] + + cut_list = [] + for rule_list in (rule_list_1, rule_list_2): + cut_list.append( + API.cuts( + dataset_input={"dataset": "NMC_NC_NOTFIXED_DW_EM-F2"}, + use_cuts="internal", + theoryid=THEORYID, + filter_rules=rule_list, + ) + ) + assert not cut_list[0] == cut_list[1] + + +def test_PTO(): + assert 2 in PerturbativeOrder("NNLO") + assert 2 in PerturbativeOrder("N2LO") + assert 2 not in PerturbativeOrder("NNLO!") + assert 2 in PerturbativeOrder("NNLO+") + assert 2 not in PerturbativeOrder("NNLO-") + with pytest.raises(BadPerturbativeOrder): + PerturbativeOrder("NBogus+") + + +def test_bad_rules(): + for rule_inp in bad_rules: + with pytest.raises(RuleProcessingError): + mkrule(rule_inp) + + +def test_default_rules(): + l = Loader() + dsnames = ['NMC_NC_NOTFIXED_EM-F2', 'LHCB_Z0_8TEV_MUON_Y'] + variants = ["legacy_dw", None] + for dsname, v in zip(dsnames, variants): + ds = l.check_dataset(dsname, cuts='internal', theoryid=THEORYID, variant=v) + assert ds.cuts.load() is not None + + +def test_good_rules(): + l = Loader() + rules = [mkrule(inp) for inp in good_rules] + dsnames = ['ATLAS_1JET_8TEV_R06_PTY', 'NMC_NC_NOTFIXED_EM-F2'] + variants = ["legacy","legacy_dw"] + for dsname, v in zip(dsnames, variants): + ds = l.check_dataset( + dsname, cuts='internal', rules=tuple(rules), theoryid=THEORYID, variant=v + ) + assert ds.cuts.load() is not None + + +def test_added_rules(): + inp = { + "theoryid": THEORYID, + "pdf": PDF, + "use_cuts": "internal", + "dataset_inputs": [{"dataset": "ATLAS_1JET_8TEV_R06_PTY", "variant": "legacy"}], + "filter_rules": [], + "dataspecs": [ + {"speclabel": "Original", "added_filter_rules": None}, + { + "speclabel": "fewer data", + "added_filter_rules": [ + {"dataset": "ATLAS_1JET_8TEV_R06_PTY", "rule": "pT < 1000", "reason": "pt cut"} + ], + }, + { + "speclabel": "empty data", + "added_filter_rules": [ + {"dataset": "ATLAS_1JET_8TEV_R06_PTY", "rule": "y < 0", "reason": "empty data"} + ], + }, + ], + } + tb = API.dataspecs_chi2_table(**inp) + assert tb["empty data"]["ndata"].iloc[0] == 0 + assert np.isnan(tb["empty data"].iloc[1, 1]) + assert tb["empty data"]["ndata"].iloc[0] == 0 + assert np.all(tb[1:]["fewer data"] != tb[1:]["Original"]) diff --git a/validphys2/src/validphys/tests/test_fitdata.py b/validphys2/src/validphys/tests/test_fitdata.py new file mode 100644 index 0000000000..bfd70b46a4 --- /dev/null +++ b/validphys2/src/validphys/tests/test_fitdata.py @@ -0,0 +1,67 @@ +from validphys.api import API +from validphys.fitdata import print_different_cuts, print_systype_overlap +from validphys.tests.conftest import FIT_3REPLICAS, FIT_3REPLICAS_DCUTS + + +def test_print_different_cuts(): + """Checks the print_different_cuts functions + using two fits with a different choice of q2min and w2min in the runcard + One of the datasets (SLACP) gets 0 points in in the most restrictive case + The different cuts are: + q2min: 3.49 - 13.49 + w2min: 12.5 - 22.5 + """ + fit_1 = API.fit(fit=FIT_3REPLICAS) + fit_2 = API.fit(fit=FIT_3REPLICAS_DCUTS) + fits = [fit_1, fit_2] + testi = API.test_for_same_cuts(fits=[FIT_3REPLICAS, FIT_3REPLICAS_DCUTS], use_cuts="fromfit") + res = print_different_cuts(fits, testi) + assert "121 out of 260" in res + assert "59 out of 260" in res + assert "33 out of 211" in res + assert "0 out of 211" in res + + +def test_print_systype_overlap(): + """Test that print_systype_overlap does expected thing + for some simple examples. We can't use the API directly + here because we want to create fictional groups where + overlaps do exist. + + Note that the first input of py:func:`print_systype_overlap` is + ``groups_commondata`` which is a list of lists, the outer list usually + contains an inner list for each ``metadata_group``. Each inner list contains + a ``CommonDataSpec`` for each dataset which is part of that group. In this + test we create fake groups and ensure the output of the function is correct. + + The second input is ``group_dataset_inputs_by_metadata`` which is a list + containing a dictionary for each ``metadata_group``. The function gets + ``group_name`` from each dictionary and uses to label each group, but the + actual value is unimportant for these tests. + + """ + cd_1 = API.commondata(dataset_input={"dataset": "ATLAS_Z0J_8TEV_PT-Y", "variant": "legacy"}) + cd_2 = API.commondata(dataset_input={"dataset": "ATLAS_WJ_8TEV_WM-PT", "variant": "legacy"}) + cd_3 = API.commondata( + dataset_input={"dataset": "NMC_NC_NOTFIXED_P_EM-SIGMARED", "variant": "legacy"} + ) + + # group names don't affect results, set arbitrarily. + group_1 = {"group_name": "group_1"} + group_2 = {"group_name": "group_2"} + + # each group contains same dataset, so systypes will overlap + match = print_systype_overlap([[cd_1], [cd_1]], [group_1, group_2]) + assert isinstance(match, tuple) + # single group passed so systype won't overlap + match2 = print_systype_overlap([[cd_1]], [group_1]) + assert isinstance(match2, str) + # cd in each group are different but share a systematic so overlap. + match3 = print_systype_overlap([[cd_1], [cd_2]], [group_1, group_2]) + assert isinstance(match3, tuple) + # test non-overlapping groups + match4 = print_systype_overlap([[cd_1, cd_2], [cd_3]], [group_1, group_2]) + assert isinstance(match4, str) + # no groups, no overlap + match5 = print_systype_overlap([], []) + assert isinstance(match5, str) diff --git a/validphys2/src/validphys/tests/test_fitveto.py b/validphys2/src/validphys/tests/test_fitveto.py new file mode 100644 index 0000000000..54b2452731 --- /dev/null +++ b/validphys2/src/validphys/tests/test_fitveto.py @@ -0,0 +1,60 @@ +import numpy as np +import pytest +import itertools +from hypothesis import given +from hypothesis.strategies import floats, integers, tuples, lists, booleans +from hypothesis.extra.numpy import arrays, array_shapes + +from validphys.fitveto import distribution_veto, determine_vetoes +from validphys.fitveto import NSIGMA_DISCARD_ARCLENGTH, NSIGMA_DISCARD_CHI2, INTEG_THRESHOLD +from validphys.fitdata import FitInfo + +shape1d = array_shapes(max_dims=1, min_side=1, max_side=1000) +nicefloats = floats(allow_nan=False, allow_infinity=False) +integ_floats = floats(allow_nan=False, max_value=0.4) + +fitinfos = tuples( + integers(min_value=1), + nicefloats, + nicefloats, + nicefloats, + booleans(), + arrays(float, shape=7, elements=nicefloats), + arrays(float, shape=5, elements=integ_floats), +).map(FitInfo._make) + + +thresholds = floats(min_value=1, max_value=10) +distributions = arrays(float, shape=shape1d, elements=nicefloats) + + +# Ignore over- and underflow warnings. +@pytest.mark.filterwarnings("ignore") +@given(distributions, thresholds) +def test_distribution_veto(arr, threshold): + veto = distribution_veto(arr, np.ones_like(arr, dtype=bool), threshold) + masked = arr[veto] + assert np.all(masked - np.mean(arr) <= threshold * np.std(arr)) + + +# The case where the list is empty is handled in postfit +@pytest.mark.filterwarnings('ignore') +@given(lists(fitinfos, min_size=1)) +def test_determine_vetoes(fitinfos): + vetoes = determine_vetoes( + fitinfos, NSIGMA_DISCARD_CHI2, NSIGMA_DISCARD_ARCLENGTH, INTEG_THRESHOLD + ) + assert np.all(vetoes['Positivity'] == np.array([info.is_positive for info in fitinfos])) + tot = vetoes['Total'] + assert all(np.all(tot & val == tot) for val in vetoes.values()) + single_replica_veto = determine_vetoes( + [fitinfos[0]], NSIGMA_DISCARD_CHI2, NSIGMA_DISCARD_ARCLENGTH, INTEG_THRESHOLD + ) + assert single_replica_veto['Total'][0] == single_replica_veto['Positivity'][0] + # distribution_vetoes applied a second time should veto nothing + if sum(tot) > 0: + passing_fitinfos = list(itertools.compress(fitinfos, tot)) + second_vetoes = determine_vetoes( + passing_fitinfos, NSIGMA_DISCARD_CHI2, NSIGMA_DISCARD_ARCLENGTH, INTEG_THRESHOLD + ) + assert sum(vetoes["Total"]) == sum(second_vetoes["Total"])