scipp · jokasimr · Jun 24, 2025 · Jun 24, 2025 · Jun 24, 2025 · Jun 25, 2025
diff --git a/docs/user-guide/beer/beer_mcstas.ipynb b/docs/user-guide/beer/beer_mcstas.ipynb
@@ -0,0 +1,108 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%matplotlib ipympl"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1",
+   "metadata": {},
+   "source": [
+    "# BEER McStas data reduction"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import scipp as sc\n",
+    "\n",
+    "from ess.beer import BeerMcStasWorkflow\n",
+    "from ess.beer.data import mcstas_silicon_medium_resolution, mcstas_duplex_medium_resolution, duplex_peaks_array, silicon_peaks_array\n",
+    "from ess.reduce.nexus.types import Filename, SampleRun\n",
+    "from ess.reduce.time_of_flight.types import DetectorTofData"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "wf = BeerMcStasWorkflow()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "wf[Filename[SampleRun]] = mcstas_duplex_medium_resolution()\n",
+    "da = wf.compute(DetectorTofData[SampleRun])\n",
+    "da.bins.coords['d'] = (sc.constants.h / sc.constants.m_n * da.bins.coords['tof'] / sc.sin(da.bins.coords['two_theta'] / 2) / da.bins.coords['L'] / 2).to(unit='angstrom')\n",
+    "p = da.bins.concat().hist(d=2000).plot()\n",
+    "p.ax.vlines(duplex_peaks_array().values, 0, 20000, linestyle='--', color='black', lw=0.5, label='true $d_{hkl}$')\n",
+    "p"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "wf[Filename[SampleRun]] = mcstas_silicon_medium_resolution()\n",
+    "da = wf.compute(DetectorTofData[SampleRun])\n",
+    "da.bins.coords['d'] = (sc.constants.h / sc.constants.m_n * da.bins.coords['tof'] / sc.sin(da.bins.coords['two_theta'] / 2) / da.bins.coords['L'] / 2).to(unit='angstrom')\n",
+    "p = da.bins.concat().hist(d=2000).plot()\n",
+    "p.ax.vlines(silicon_peaks_array().values, 0, 20000, linestyle='--', color='black', lw=0.5, label='true $d_{hkl}$')\n",
+    "p"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "da.bins.concat().hist(two_theta=1000, t=1000).plot(norm='log')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/user-guide/beer/index.md b/docs/user-guide/beer/index.md
@@ -0,0 +1,10 @@
+# BEER
+
+## Reduction Workflows
+
+```{toctree}
+---
+maxdepth: 1
+---
+beer_mcstas
+```
diff --git a/docs/user-guide/index.md b/docs/user-guide/index.md
@@ -7,6 +7,7 @@ maxdepth: 1
 
 installation
 dream/index
+beer/index
 sns-instruments/index
 common/index
 ```
diff --git a/src/ess/beer/__init__.py b/src/ess/beer/__init__.py
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2025 Scipp contributors (https://github.com/scipp)
+
+"""
+Components for BEER
+"""
+
+import importlib.metadata
+
+from .io import load_beer_mcstas
+from .workflow import BeerMcStasWorkflow, default_parameters
+
+try:
+    __version__ = importlib.metadata.version("essdiffraction")
+except importlib.metadata.PackageNotFoundError:
+    __version__ = "0.0.0"
+
+del importlib
+
+__all__ = [
+    'BeerMcStasWorkflow',
+    '__version__',
+    'default_parameters',
+    'load_beer_mcstas',
+]
diff --git a/src/ess/beer/clustering.py b/src/ess/beer/clustering.py
@@ -0,0 +1,58 @@
+import scipp as sc
+from scipp import constants
+from scipy.signal import find_peaks
+
+from .types import DetectorData, RunType, StreakClusteredData
+
+
+def cluster_events_by_streak(da: DetectorData[RunType]) -> StreakClusteredData[RunType]:
+    da = da.copy(deep=False)
+    da.coords['coarse_d'] = (
+        constants.h
+        / constants.m_n
+        * (da.coords['t'] - da.coords['approximate_t0'])
+        / sc.sin(da.coords['two_theta'] / 2)
+        / da.coords['L']
+        / 2
+    ).to(unit='angstrom')
+
+    h = da.hist(coarse_d=1000)
+    i_peaks, _ = find_peaks(h.data.values, height=40, distance=3)
+    i_valleys, _ = find_peaks(
+        h.data.values.max() - h.data.values, distance=3, height=h.data.values.max() / 2
+    )
+
+    valleys = sc.array(
+        dims=['coarse_d'],
+        values=h.coords['coarse_d'].values[i_valleys],
+        unit=h.coords['coarse_d'].unit,
+    )
+    peaks = sc.array(
+        dims=['coarse_d'],
+        values=h.coords['coarse_d'].values[i_peaks],
+        unit=h.coords['coarse_d'].unit,
+    )
+
+    has_peak = peaks.bin(coarse_d=valleys).bins.size().data.to(dtype='bool')
+    has_peak_left = sc.concat(
+        (has_peak, sc.array(dims=['coarse_d'], values=[False], unit=None)), 'coarse_d'
+    )
+    has_peak_right = sc.concat(
+        (
+            sc.array(dims=['coarse_d'], values=[False], unit=None),
+            has_peak,
+        ),
+        'coarse_d',
+    )
+    filtered_valleys = valleys[has_peak_left | has_peak_right]
+    has_peak = peaks.bin(coarse_d=filtered_valleys).bins.size().data
+    b = da.bin(coarse_d=filtered_valleys).assign_masks(
+        no_peak=has_peak != sc.scalar(1, unit=None)
+    )
+    b = b.drop_coords(('coarse_d',))
+    b = b.bins.drop_coords(('coarse_d',))
+    b = b.rename_dims(coarse_d='streak')
+    return b
+
+
+providers = (cluster_events_by_streak,)
diff --git a/src/ess/beer/conversions.py b/src/ess/beer/conversions.py
@@ -0,0 +1,80 @@
+import numpy as np
+import scipp as sc
+
+from .types import DetectorTofData, RunType, StreakClusteredData
+
+
+def compute_tof_in_each_cluster(
+    da: StreakClusteredData[RunType],
+) -> DetectorTofData[RunType]:
+    '''Fits a line through each cluster, the intercept of the line is t0.
+    The line is fitted using linear regression with an outlier removal procedure.
+
+    The algorithm is:
+
+    1. Use least squares to fit line through clusters.
+    2. Mask points that are "outliers" based on the criteria that they are too far
+       from the line in the ``t`` variable.
+       This means they don't seem to have the same time of flight origin as the rest
+       of the points in the cluster, and probably should belong to another cluster or
+       are part of the background.
+    3. Go back to 1) and iterate until convergence. A few iterations should be enough.
+    '''
+    sin_theta_L = sc.sin(da.bins.coords['two_theta'] / 2) * da.bins.coords['L']
+    t = da.bins.coords['t']
+    for _ in range(5):
+        s, t0 = _linear_regression_by_bin(sin_theta_L, t, da)
+
+        s_left = sc.array(dims=s.dims, values=np.roll(s.values, 1), unit=s.unit)
+        s_right = sc.array(dims=s.dims, values=np.roll(s.values, -1), unit=s.unit)
+        t0_left = sc.array(dims=t0.dims, values=np.roll(t0.values, 1), unit=t0.unit)
+        t0_right = sc.array(dims=t0.dims, values=np.roll(t0.values, -1), unit=t0.unit)
+
+        # Distance from point to line through cluster
+        distance_to_self = sc.abs(sc.values(t0) + sc.values(s) * sin_theta_L - t)
+        # Distance from this cluster line to next before cluster line
+        distance_self_to_left = sc.abs(
+            sc.values(t0_left)
+            + sc.values(s_left) * sin_theta_L
+            - (sc.values(t0) + sc.values(s) * sin_theta_L)
+        )
+        # Distance from this cluster line to next after cluster line
+        distance_self_to_right = sc.abs(
+            sc.values(t0_right)
+            + sc.values(s_right) * sin_theta_L
+            - (sc.values(t0) + sc.values(s) * sin_theta_L)
+        )
+
+        da = da.bins.assign_masks(
+            # TODO: Find suitable masking parameters for other chopper settings
+            too_far_from_center=(distance_to_self > sc.scalar(3e-4, unit='s')).data,
+            too_close_to_other=(
+                (distance_self_to_left < sc.scalar(8e-4, unit='s'))
+                | (distance_self_to_right < sc.scalar(8e-4, unit='s'))
+            ).data,
+        )
+
+    da = da.assign_coords(t0=sc.values(t0).data)
+    da = da.bins.assign_coords(tof=(t - sc.values(t0)).data)
+    return da
+
+
+def _linear_regression_by_bin(
+    x: sc.Variable, y: sc.Variable, w: sc.DataArray
+) -> tuple[sc.DataArray, sc.DataArray]:
+    w = sc.values(w)
+    tot_w = w.bins.sum()
+
+    avg_x = (w * x).bins.sum() / tot_w
+    avg_y = (w * y).bins.sum() / tot_w
+
+    cov_xy = (w * (x - avg_x) * (y - avg_y)).bins.sum() / tot_w
+    var_x = (w * (x - avg_x) ** 2).bins.sum() / tot_w
+
+    b1 = cov_xy / var_x
+    b0 = avg_y - b1 * avg_x
+
+    return b1, b0
+
+
+providers = (compute_tof_in_each_cluster,)
diff --git a/src/ess/beer/data.py b/src/ess/beer/data.py
@@ -0,0 +1,71 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2025 Scipp contributors (https://github.com/scipp)
+"""Data for tests and documentation with BEER."""
+
+import scipp as sc
+
+_version = "1"
+
+__all__ = ["mcstas_duplex_medium_resolution", "mcstas_silicon_medium_resolution"]
+
+
+def _make_pooch():
+    import pooch
+
+    return pooch.create(
+        path=pooch.os_cache("ess/beer"),
+        env="ESS_DATA_DIR",
+        base_url="https://public.esss.dk/groups/scipp/ess/beer/{version}/",
+        version=_version,
+        registry={
+            "duplex.h5": "md5:ebb3f9694ffdd0949f342bd0deaaf627",
+            "silicon.h5": "md5:3425ae2b2fe7a938c6f0a4afeb9e0819",
+            "silicon-dhkl.tab": "md5:90bedceb23245b045ce1ed0170c1313b",
+            "duplex-dhkl.tab": "md5:b4c6c2fcd66466ad291f306b2d6b346e",
+        },
+    )
+
+
+_pooch = _make_pooch()
+
+
+def mcstas_duplex_medium_resolution() -> str:
+    """
+    Simulated intensity from duplex sample with
+    medium resolution chopper configuration.
+    """
+    return _pooch.fetch('duplex.h5')
+
+
+def mcstas_silicon_medium_resolution() -> str:
+    """
+    Simulated intensity from silicon sample with
+    medium resolution chopper configuration.
+    """
+    return _pooch.fetch('silicon.h5')
+
+
+def duplex_peaks() -> str:
+    return _pooch.fetch('duplex-dhkl.tab')
+
+
+def silicon_peaks() -> str:
+    return _pooch.fetch('silicon-dhkl.tab')
+
+
+def duplex_peaks_array() -> sc.Variable:
+    with open(duplex_peaks()) as f:
+        return sc.array(
+            dims='d',
+            values=sorted([float(x) for x in f.read().split('\n') if x != '']),
+            unit='angstrom',
+        )
+
+
+def silicon_peaks_array() -> sc.Variable:
+    with open(silicon_peaks()) as f:
+        return sc.array(
+            dims='d',
+            values=sorted([float(x) for x in f.read().split('\n') if x != '']),
+            unit='angstrom',
+        )