Skip to content

feat: more flexible helper for batch reduction #155

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions docs/user-guide/amor/amor-reduction.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -467,13 +467,15 @@
"metadata": {},
"outputs": [],
"source": [
"from ess.reflectometry.tools import orso_datasets_from_measurements\n",
"from ess.reflectometry.tools import from_measurements\n",
"\n",
"datasets = orso_datasets_from_measurements(\n",
"datasets = from_measurements(\n",
" workflow,\n",
" runs.values(),\n",
" target=orso.OrsoIofQDataset,\n",
" # Optionally scale the curves to overlap using `scale_reflectivity_curves_to_overlap`\n",
" scale_to_overlap=True\n",
" # with the same \"critical edge interval\" that was used before\n",
" scale_to_overlap=(sc.scalar(0.01, unit='1/angstrom'), sc.scalar(0.014, unit='1/angstrom'))\n",
")"
]
},
Expand Down
2 changes: 1 addition & 1 deletion src/ess/amor/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
NeXusComponent,
NeXusDetectorName,
ProtonCurrent,
RawChopper,
RawSampleRotation,
RunType,
SampleRotation,
Expand All @@ -29,7 +30,6 @@
ChopperFrequency,
ChopperPhase,
ChopperSeparation,
RawChopper,
)


Expand Down
1 change: 1 addition & 0 deletions src/ess/amor/normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ def evaluate_reference_at_sample_coords(
ref = ref.transform_coords(
(
"Q",
"theta",
"wavelength_resolution",
"sample_size_resolution",
"angular_resolution",
Expand Down
4 changes: 0 additions & 4 deletions src/ess/amor/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,4 @@ class ChopperSeparation(sciline.Scope[RunType, sc.Variable], sc.Variable):
"""Distance between the two choppers."""


class RawChopper(sciline.Scope[RunType, sc.DataGroup], sc.DataGroup):
"""Chopper data loaded from nexus file."""


GravityToggle = NewType("GravityToggle", bool)
114 changes: 81 additions & 33 deletions src/ess/reflectometry/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,15 @@
import sciline
import scipp as sc
import scipy.optimize as opt
from orsopy.fileio.orso import OrsoDataset

from ess.reflectometry import orso
from ess.reflectometry.types import ReflectivityOverQ
from ess.reflectometry.types import (
Filename,
ReducibleData,
ReflectivityOverQ,
SampleRun,
)
from ess.reflectometry.workflow import with_filenames

_STD_TO_FWHM = sc.scalar(2.0) * sc.sqrt(sc.scalar(2.0) * sc.log(sc.scalar(2.0)))

Expand Down Expand Up @@ -279,18 +284,18 @@ def combine_curves(
)


def orso_datasets_from_measurements(
def from_measurements(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we think of a better name? I don't have great suggestions, I thought of something like batch_reduction, but it's not super descriptive either...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure either. I would like to avoid reduction in the name because I feel the term is already overloaded in our context, and strictly speaking you don't have to request a "reduced" data set here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I asked copilot, and the suggestion was compute_reflectivity_datasets.
I guess it's because of the last thing in the docstring that says

    Returns
    ---------
    list of the computed ORSO datasets, containing one reflectivity curve each

which I guess should be changed if you can request other data than reflectivity curves?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes that's true, I'll change it.
By the way, I asked Nico earlier today and he's thinking about a better name, so we are probably going to have a suggestion from him soon.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We discussed some options and we came up with compute_pipeline_targets or get_workflow_targets. They both seem reasonably descriptive to me but maybe the shorter one is better

workflow: sciline.Pipeline,
runs: Sequence[Mapping[type, Any]],
runs: Sequence[Mapping[type, Any]] | Mapping[Any, Mapping[type, Any]],
target: type | Sequence[type] = orso.OrsoIofQDataset,
*,
scale_to_overlap: bool = True,
) -> list[OrsoDataset]:
'''Produces a list of ORSO datasets containing one
reflectivity curve for each of the provided runs.
scale_to_overlap: bool | tuple[sc.Variable, sc.Variable] = False,
) -> list | Mapping:
'''Produces a list of datasets containing for each of the provided runs.
Each entry of :code:`runs` is a mapping of parameters and
values needed to produce the dataset.

Optionally, the reflectivity curves can be scaled to overlap in
Optionally, the results can be scaled so that the reflectivity curves overlap in
the regions where they have the same Q-value.

Parameters
Expand All @@ -299,38 +304,81 @@ def orso_datasets_from_measurements(
The sciline workflow used to compute `ReflectivityOverQ` for each of the runs.

runs:
The sciline parameters to be used for each run
The sciline parameters to be used for each run.

target:
The domain type(s) to compute for each run.

scale_to_overlap:
If True the curves will be scaled to overlap.
Note that the curve of the first run is unscaled and
the rest are scaled to match it.
If ``True`` the curves will be scaled to overlap.
If a tuple then this argument will be passed as the ``critical_edge_interval``
argument to the ``scale_reflectivity_curves_to_overlap`` function.

Returns
---------
list of the computed ORSO datasets, containing one reflectivity curve each
'''
reflectivity_curves = []
for parameters in runs:
names = runs.keys() if hasattr(runs, 'keys') else None
runs = runs.values() if hasattr(runs, 'values') else runs

def init_workflow(workflow, parameters):
wf = workflow.copy()
for name, value in parameters.items():
wf[name] = value
reflectivity_curves.append(wf.compute(ReflectivityOverQ))

scale_factors = (
scale_reflectivity_curves_to_overlap([r.hist() for r in reflectivity_curves])[1]
if scale_to_overlap
else (1,) * len(runs)
)
for tp, value in parameters.items():
if tp is Filename[SampleRun]:
continue
wf[tp] = value

if Filename[SampleRun] in parameters:
if isinstance(parameters[Filename[SampleRun]], list | tuple):
wf = with_filenames(
wf,
SampleRun,
parameters[Filename[SampleRun]],
)
else:
wf[Filename[SampleRun]] = parameters[Filename[SampleRun]]
return wf

if scale_to_overlap:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As discussed during the standup, I don't really get why we need the helper function, as opposed to just having different workflows.

  • You would have a workflow that computes a reflectivity curve.
  • If you want to compute this for multiple runs, you just map over the run numbers or filenames, and then use compute_mapped
  • If you want to scale_to_overlap, you have a workflow that basically does the mapping and adds a provider that either just scales the results or scales and merges. You then compute a different result (e.g. CombinedScaledReflectivityOverQ)

You can then view the graph of what is being done for everything (because with the helper function you can't see it all, only pieces, and they are not easy to get to either if you just installed the package?)

In any case, we should avoid having a long discussion here on github, this was just to have a starting point for an in-person discussion.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's possible we can implement the same thing using sciline map reduce. Let us see the helper in this PR as a reference implementation and try to re-implement as one big workflow.

results = []
for parameters in runs:
wf = init_workflow(workflow, parameters)
results.append(wf.compute((ReflectivityOverQ, ReducibleData[SampleRun])))

scale_factors = scale_reflectivity_curves_to_overlap(
[r[ReflectivityOverQ].hist() for r in results],
critical_edge_interval=scale_to_overlap
if isinstance(scale_to_overlap, list | tuple)
else None,
)[1]

datasets = []
for parameters, curve, scale_factor in zip(
runs, reflectivity_curves, scale_factors, strict=True
):
wf = workflow.copy()
for name, value in parameters.items():
wf[name] = value
wf[ReflectivityOverQ] = scale_factor * curve
dataset = wf.compute(orso.OrsoIofQDataset)
for i, parameters in enumerate(runs):
wf = init_workflow(workflow, parameters)
if scale_to_overlap:
# Optimization in case we can avoid re-doing some work:
# Check if any of the targets need ReducibleData if
# ReflectivityOverQ already exists.
# If they don't, we can avoid recomputing ReducibleData.
targets = target if hasattr(target, '__len__') else (target,)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can there ever be a danger that someone passed a string as target and the __len__ check fails? I guess if they passed a string, they will have other problems earlier? (target not found in graph)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess, but like you said, passing a string is illegal anyway and will lead to error at some point.

if any(
_workflow_needs_quantity_A_even_if_quantity_B_is_set(
wf[t], ReducibleData[SampleRun], ReflectivityOverQ
)
for t in targets
):
wf[ReducibleData[SampleRun]] = (
scale_factors[i] * results[i][ReducibleData[SampleRun]]
)
else:
wf[ReflectivityOverQ] = scale_factors[i] * results[i][ReflectivityOverQ]

dataset = wf.compute(target)
datasets.append(dataset)
return datasets
return datasets if names is None else dict(zip(names, datasets, strict=True))


def _workflow_needs_quantity_A_even_if_quantity_B_is_set(workflow, A, B):
wf = workflow.copy()
wf[B] = 'Not important'
return A in wf.underlying_graph
4 changes: 4 additions & 0 deletions src/ess/reflectometry/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@
CoordTransformationGraph = NewType("CoordTransformationGraph", dict)


class RawChopper(sciline.Scope[RunType, sc.DataGroup], sc.DataGroup):
"""Chopper data loaded from nexus file."""


class ReducibleData(sciline.Scope[RunType, sc.DataArray], sc.DataArray):
"""Event data with common coordinates added"""

Expand Down
40 changes: 30 additions & 10 deletions src/ess/reflectometry/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,16 @@
import sciline
import scipp as sc

from ess.amor.types import RawChopper
from ess.reflectometry.orso import (
OrsoExperiment,
OrsoOwner,
OrsoSample,
OrsoSampleFilenames,
)
from ess.reflectometry.types import (
DetectorRotation,
Filename,
RawChopper,
ReducibleData,
RunType,
SampleRotation,
Expand Down Expand Up @@ -62,15 +63,34 @@ def with_filenames(

mapped = wf.map(df)

wf[ReducibleData[runtype]] = mapped[ReducibleData[runtype]].reduce(
index=axis_name, func=_concatenate_event_lists
)
wf[RawChopper[runtype]] = mapped[RawChopper[runtype]].reduce(
index=axis_name, func=_any_value
)
wf[SampleRotation[runtype]] = mapped[SampleRotation[runtype]].reduce(
index=axis_name, func=_any_value
)
try:
wf[ReducibleData[runtype]] = mapped[ReducibleData[runtype]].reduce(
index=axis_name, func=_concatenate_event_lists
)
except ValueError:
# ReducibleData[runtype] is independent of Filename[runtype]
pass
try:
wf[RawChopper[runtype]] = mapped[RawChopper[runtype]].reduce(
index=axis_name, func=_any_value
)
except ValueError:
# RawChopper[runtype] is independent of Filename[runtype]
pass
try:
wf[SampleRotation[runtype]] = mapped[SampleRotation[runtype]].reduce(
index=axis_name, func=_any_value
)
except ValueError:
# SampleRotation[runtype] is independent of Filename[runtype]
pass
try:
wf[DetectorRotation[runtype]] = mapped[DetectorRotation[runtype]].reduce(
index=axis_name, func=_any_value
)
except ValueError:
# DetectorRotation[runtype] is independent of Filename[runtype]
pass

if runtype is SampleRun:
wf[OrsoSample] = mapped[OrsoSample].reduce(index=axis_name, func=_any_value)
Expand Down
68 changes: 68 additions & 0 deletions tests/amor/tools_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2024 Scipp contributors (https://github.com/scipp)
import pytest
import sciline
import scipp as sc
from scipp.testing import assert_allclose

from amor.pipeline_test import amor_pipeline # noqa: F401
from ess.amor import data
from ess.amor.types import ChopperPhase
from ess.reflectometry.tools import from_measurements
from ess.reflectometry.types import (
DetectorRotation,
Filename,
QBins,
ReducedReference,
ReferenceRun,
ReflectivityOverQ,
SampleRotation,
SampleRun,
)

# The files used in the AMOR reduction workflow have some scippnexus warnings
pytestmark = pytest.mark.filterwarnings(
"ignore:.*Invalid transformation, .*missing attribute 'vector':UserWarning",
)


@pytest.fixture
def pipeline_with_1632_reference(amor_pipeline): # noqa: F811
amor_pipeline[ChopperPhase[ReferenceRun]] = sc.scalar(7.5, unit='deg')
amor_pipeline[ChopperPhase[SampleRun]] = sc.scalar(7.5, unit='deg')
amor_pipeline[Filename[ReferenceRun]] = data.amor_run('1632')
amor_pipeline[ReducedReference] = amor_pipeline.compute(ReducedReference)
return amor_pipeline


@pytestmark
def test_from_measurements_tool_concatenates_event_lists(
pipeline_with_1632_reference: sciline.Pipeline,
):
pl = pipeline_with_1632_reference

run = {
Filename[SampleRun]: list(map(data.amor_run, (1636, 1639, 1641))),
QBins: sc.geomspace(
dim='Q', start=0.062, stop=0.18, num=391, unit='1/angstrom'
),
DetectorRotation[SampleRun]: sc.scalar(0.140167, unit='rad'),
SampleRotation[SampleRun]: sc.scalar(0.0680678, unit='rad'),
}
results = from_measurements(
pl,
[run],
target=ReflectivityOverQ,
scale_to_overlap=False,
)

results2 = []
for fname in run[Filename[SampleRun]]:
pl.copy()
pl[Filename[SampleRun]] = fname
pl[QBins] = run[QBins]
pl[DetectorRotation[SampleRun]] = run[DetectorRotation[SampleRun]]
pl[SampleRotation[SampleRun]] = run[SampleRotation[SampleRun]]
results2.append(pl.compute(ReflectivityOverQ).hist().data)

assert_allclose(sum(results2), results[0].hist().data)
Loading
Loading