-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
fa2fc47
commit 665c81d
Showing
5 changed files
with
266 additions
and
55 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
import numpy as np | ||
import pytest | ||
|
||
from validphys.api import API | ||
from validphys.filters import ( | ||
BadPerturbativeOrder, | ||
PerturbativeOrder, | ||
Rule, | ||
RuleProcessingError, | ||
default_filter_settings_input, | ||
) | ||
from validphys.loader import FallbackLoader as Loader | ||
from validphys.tests.conftest import PDF | ||
from validphys.tests.conftest import THEORYID_NEW as THEORYID | ||
|
||
bad_rules = [ | ||
{'dataset': "NMC_NC_NOTFIXED_DW_EM-F2"}, | ||
{'rule': 'x < 0.1'}, | ||
{'dataset': 'NOT_EXISTING', 'rule': 'x < 0.1'}, | ||
{'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", 'rule': 'x < 0.1', 'PTO': 'bogus'}, | ||
{'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", 'rule': 'x < 0.1', 'PTO': {'bog': 'us'}}, | ||
{'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", 'rule': 'x < 0.1', 'local_variables': 'bogus'}, | ||
{'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", 'rule': 'bogus syntax'}, | ||
{'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", 'rule': 'unknown_variable > 10'}, | ||
{ | ||
'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", | ||
'local_variables': {'z': 'bogus syntax'}, | ||
'rule': 'z > 10', | ||
}, | ||
{ | ||
'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", | ||
'local_variables': {'z': 'unknown_variable + 1'}, | ||
'rule': 'z > 10', | ||
}, | ||
{ | ||
'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", | ||
'local_variables': {'z': 'v+1', 'v': '10'}, | ||
'rule': 'z > 10', | ||
}, | ||
] | ||
|
||
# Note: Don't change the order here. In this way it tests all cases. | ||
good_rules = [ | ||
{'process_type': 'DIS_ALL', 'PTO': 'N3LO', 'rule': 'x < 1e-2'}, | ||
{'process_type': 'DIS_ALL', 'IC': 'False', 'rule': 'x < 1e-2'}, | ||
{'process_type': 'JET', 'rule': 'pT < 3.16'}, | ||
] | ||
|
||
|
||
def mkrule(inp): | ||
l = Loader() | ||
th = l.check_theoryID(THEORYID) | ||
desc = th.get_description() | ||
defaults = default_filter_settings_input() | ||
return Rule(initial_data=inp, defaults=defaults, theory_parameters=desc) | ||
|
||
|
||
def test_rule_caching(): | ||
rule_list_1, *rule_list_2 = good_rules | ||
rule_list_1 = [rule_list_1] | ||
|
||
cut_list = [] | ||
for rule_list in (rule_list_1, rule_list_2): | ||
cut_list.append( | ||
API.cuts( | ||
dataset_input={"dataset": "NMC_NC_NOTFIXED_DW_EM-F2"}, | ||
use_cuts="internal", | ||
theoryid=THEORYID, | ||
filter_rules=rule_list, | ||
) | ||
) | ||
assert not cut_list[0] == cut_list[1] | ||
|
||
|
||
def test_PTO(): | ||
assert 2 in PerturbativeOrder("NNLO") | ||
assert 2 in PerturbativeOrder("N2LO") | ||
assert 2 not in PerturbativeOrder("NNLO!") | ||
assert 2 in PerturbativeOrder("NNLO+") | ||
assert 2 not in PerturbativeOrder("NNLO-") | ||
with pytest.raises(BadPerturbativeOrder): | ||
PerturbativeOrder("NBogus+") | ||
|
||
|
||
def test_bad_rules(): | ||
for rule_inp in bad_rules: | ||
with pytest.raises(RuleProcessingError): | ||
mkrule(rule_inp) | ||
|
||
|
||
def test_default_rules(): | ||
l = Loader() | ||
dsnames = ['NMC_NC_NOTFIXED_EM-F2', 'LHCB_Z0_8TEV_MUON_Y'] | ||
variants = ["legacy_dw", None] | ||
for dsname, v in zip(dsnames, variants): | ||
ds = l.check_dataset(dsname, cuts='internal', theoryid=THEORYID, variant=v) | ||
assert ds.cuts.load() is not None | ||
|
||
|
||
def test_good_rules(): | ||
l = Loader() | ||
rules = [mkrule(inp) for inp in good_rules] | ||
dsnames = ['ATLAS_1JET_8TEV_R06_PTY', 'NMC_NC_NOTFIXED_EM-F2'] | ||
variants = ["legacy","legacy_dw"] | ||
for dsname, v in zip(dsnames, variants): | ||
ds = l.check_dataset( | ||
dsname, cuts='internal', rules=tuple(rules), theoryid=THEORYID, variant=v | ||
) | ||
assert ds.cuts.load() is not None | ||
|
||
|
||
def test_added_rules(): | ||
inp = { | ||
"theoryid": THEORYID, | ||
"pdf": PDF, | ||
"use_cuts": "internal", | ||
"dataset_inputs": [{"dataset": "ATLAS_1JET_8TEV_R06_PTY", "variant": "legacy"}], | ||
"filter_rules": [], | ||
"dataspecs": [ | ||
{"speclabel": "Original", "added_filter_rules": None}, | ||
{ | ||
"speclabel": "fewer data", | ||
"added_filter_rules": [ | ||
{"dataset": "ATLAS_1JET_8TEV_R06_PTY", "rule": "pT < 1000", "reason": "pt cut"} | ||
], | ||
}, | ||
{ | ||
"speclabel": "empty data", | ||
"added_filter_rules": [ | ||
{"dataset": "ATLAS_1JET_8TEV_R06_PTY", "rule": "y < 0", "reason": "empty data"} | ||
], | ||
}, | ||
], | ||
} | ||
tb = API.dataspecs_chi2_table(**inp) | ||
assert tb["empty data"]["ndata"].iloc[0] == 0 | ||
assert np.isnan(tb["empty data"].iloc[1, 1]) | ||
assert tb["empty data"]["ndata"].iloc[0] == 0 | ||
assert np.all(tb[1:]["fewer data"] != tb[1:]["Original"]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
from validphys.api import API | ||
from validphys.fitdata import print_different_cuts, print_systype_overlap | ||
from validphys.tests.conftest import FIT_3REPLICAS, FIT_3REPLICAS_DCUTS | ||
|
||
|
||
def test_print_different_cuts(): | ||
"""Checks the print_different_cuts functions | ||
using two fits with a different choice of q2min and w2min in the runcard | ||
One of the datasets (SLACP) gets 0 points in in the most restrictive case | ||
The different cuts are: | ||
q2min: 3.49 - 13.49 | ||
w2min: 12.5 - 22.5 | ||
""" | ||
fit_1 = API.fit(fit=FIT_3REPLICAS) | ||
fit_2 = API.fit(fit=FIT_3REPLICAS_DCUTS) | ||
fits = [fit_1, fit_2] | ||
testi = API.test_for_same_cuts(fits=[FIT_3REPLICAS, FIT_3REPLICAS_DCUTS], use_cuts="fromfit") | ||
res = print_different_cuts(fits, testi) | ||
assert "121 out of 260" in res | ||
assert "59 out of 260" in res | ||
assert "33 out of 211" in res | ||
assert "0 out of 211" in res | ||
|
||
|
||
def test_print_systype_overlap(): | ||
"""Test that print_systype_overlap does expected thing | ||
for some simple examples. We can't use the API directly | ||
here because we want to create fictional groups where | ||
overlaps do exist. | ||
Note that the first input of py:func:`print_systype_overlap` is | ||
``groups_commondata`` which is a list of lists, the outer list usually | ||
contains an inner list for each ``metadata_group``. Each inner list contains | ||
a ``CommonDataSpec`` for each dataset which is part of that group. In this | ||
test we create fake groups and ensure the output of the function is correct. | ||
The second input is ``group_dataset_inputs_by_metadata`` which is a list | ||
containing a dictionary for each ``metadata_group``. The function gets | ||
``group_name`` from each dictionary and uses to label each group, but the | ||
actual value is unimportant for these tests. | ||
""" | ||
cd_1 = API.commondata(dataset_input={"dataset": "ATLAS_Z0J_8TEV_PT-Y", "variant": "legacy"}) | ||
cd_2 = API.commondata(dataset_input={"dataset": "ATLAS_WJ_8TEV_WM-PT", "variant": "legacy"}) | ||
cd_3 = API.commondata( | ||
dataset_input={"dataset": "NMC_NC_NOTFIXED_P_EM-SIGMARED", "variant": "legacy"} | ||
) | ||
|
||
# group names don't affect results, set arbitrarily. | ||
group_1 = {"group_name": "group_1"} | ||
group_2 = {"group_name": "group_2"} | ||
|
||
# each group contains same dataset, so systypes will overlap | ||
match = print_systype_overlap([[cd_1], [cd_1]], [group_1, group_2]) | ||
assert isinstance(match, tuple) | ||
# single group passed so systype won't overlap | ||
match2 = print_systype_overlap([[cd_1]], [group_1]) | ||
assert isinstance(match2, str) | ||
# cd in each group are different but share a systematic so overlap. | ||
match3 = print_systype_overlap([[cd_1], [cd_2]], [group_1, group_2]) | ||
assert isinstance(match3, tuple) | ||
# test non-overlapping groups | ||
match4 = print_systype_overlap([[cd_1, cd_2], [cd_3]], [group_1, group_2]) | ||
assert isinstance(match4, str) | ||
# no groups, no overlap | ||
match5 = print_systype_overlap([], []) | ||
assert isinstance(match5, str) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
import numpy as np | ||
import pytest | ||
import itertools | ||
from hypothesis import given | ||
from hypothesis.strategies import floats, integers, tuples, lists, booleans | ||
from hypothesis.extra.numpy import arrays, array_shapes | ||
|
||
from validphys.fitveto import distribution_veto, determine_vetoes | ||
from validphys.fitveto import NSIGMA_DISCARD_ARCLENGTH, NSIGMA_DISCARD_CHI2, INTEG_THRESHOLD | ||
from validphys.fitdata import FitInfo | ||
|
||
shape1d = array_shapes(max_dims=1, min_side=1, max_side=1000) | ||
nicefloats = floats(allow_nan=False, allow_infinity=False) | ||
integ_floats = floats(allow_nan=False, max_value=0.4) | ||
|
||
fitinfos = tuples( | ||
integers(min_value=1), | ||
nicefloats, | ||
nicefloats, | ||
nicefloats, | ||
booleans(), | ||
arrays(float, shape=7, elements=nicefloats), | ||
arrays(float, shape=5, elements=integ_floats), | ||
).map(FitInfo._make) | ||
|
||
|
||
thresholds = floats(min_value=1, max_value=10) | ||
distributions = arrays(float, shape=shape1d, elements=nicefloats) | ||
|
||
|
||
# Ignore over- and underflow warnings. | ||
@pytest.mark.filterwarnings("ignore") | ||
@given(distributions, thresholds) | ||
def test_distribution_veto(arr, threshold): | ||
veto = distribution_veto(arr, np.ones_like(arr, dtype=bool), threshold) | ||
masked = arr[veto] | ||
assert np.all(masked - np.mean(arr) <= threshold * np.std(arr)) | ||
|
||
|
||
# The case where the list is empty is handled in postfit | ||
@pytest.mark.filterwarnings('ignore') | ||
@given(lists(fitinfos, min_size=1)) | ||
def test_determine_vetoes(fitinfos): | ||
vetoes = determine_vetoes( | ||
fitinfos, NSIGMA_DISCARD_CHI2, NSIGMA_DISCARD_ARCLENGTH, INTEG_THRESHOLD | ||
) | ||
assert np.all(vetoes['Positivity'] == np.array([info.is_positive for info in fitinfos])) | ||
tot = vetoes['Total'] | ||
assert all(np.all(tot & val == tot) for val in vetoes.values()) | ||
single_replica_veto = determine_vetoes( | ||
[fitinfos[0]], NSIGMA_DISCARD_CHI2, NSIGMA_DISCARD_ARCLENGTH, INTEG_THRESHOLD | ||
) | ||
assert single_replica_veto['Total'][0] == single_replica_veto['Positivity'][0] | ||
# distribution_vetoes applied a second time should veto nothing | ||
if sum(tot) > 0: | ||
passing_fitinfos = list(itertools.compress(fitinfos, tot)) | ||
second_vetoes = determine_vetoes( | ||
passing_fitinfos, NSIGMA_DISCARD_CHI2, NSIGMA_DISCARD_ARCLENGTH, INTEG_THRESHOLD | ||
) | ||
assert sum(vetoes["Total"]) == sum(second_vetoes["Total"]) |