diff --git a/doc/conf.py b/doc/conf.py index 27bb9bb55..8f32f6e5a 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -54,6 +54,7 @@ autoclass_content = "both" autosummary_generate = True autodoc_member_order = "groupwise" +autodoc_type_aliases = {"ArrayLike": "ArrayLike"} # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. diff --git a/doc/notebooks/cost_functions.ipynb b/doc/notebooks/cost_functions.ipynb index af4b3363d..a82de5102 100644 --- a/doc/notebooks/cost_functions.ipynb +++ b/doc/notebooks/cost_functions.ipynb @@ -24,7 +24,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "lucky-canvas", "metadata": {}, "outputs": [], @@ -34,7 +34,9 @@ "from numba_stats import truncnorm, truncexpon, norm, expon \n", "import numpy as np\n", "from matplotlib import pyplot as plt\n", - "from scipy.stats import multivariate_normal as mvnorm" + "from scipy.stats import multivariate_normal as mvnorm\n", + "# accurate numerical derivatives\n", + "from jacobi import jacobi" ] }, { @@ -47,10 +49,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "destroyed-fusion", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "xr = (0, 2) # xrange\n", "\n", @@ -79,10 +92,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "b62cbb46", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "n2, _, ye = np.histogram2d(xdata, ydata, bins=(20, 5), range=(xr, (0, np.max(ydata))))\n", "\n", @@ -117,15 +141,990 @@ "* $\\sigma > 0$,\n", "* $\\tau > 0$.\n", "\n", - "In addition, it can be beneficial to use $-1 < \\mu < 1$ (optional), but it is not required. We use `truncnorm` and `truncexpon`, which are normalised inside the data range (0, 2)." + "In addition, it can be beneficial to use $0 < \\mu < 2$, but it is not required. We use `truncnorm` and `truncexpon`, which are normalised inside the data range (0, 2)." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "uniform-drama", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Migrad
FCN = 768.1 Nfcn = 111
EDM = 3.76e-06 (Goal: 0.0002)
Valid Minimum Below EDM threshold (goal x 10)
No parameters at limit Below call limit
Hesse ok Covariance accurate
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Name Value Hesse Error Minos Error- Minos Error+ Limit- Limit+ Fixed
0 z 0.537 0.015 0 1
1 mu 0.996 0.004 0 2
2 sigma 0.1006 0.0035 0
3 tau 1.05 0.08 0
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
z mu sigma tau
z 0.000231 -0.003e-3 (-0.052) 0.019e-3 (0.353) -0.24e-3 (-0.206)
mu -0.003e-3 (-0.052) 1.5e-05 -0.001e-3 (-0.069) -0.015e-3 (-0.053)
sigma 0.019e-3 (0.353) -0.001e-3 (-0.069) 1.25e-05 -0.043e-3 (-0.163)
tau -0.24e-3 (-0.206) -0.015e-3 (-0.053) -0.043e-3 (-0.163) 0.00568
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2023-08-23T14:43:37.279667\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.7.2, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "┌─────────────────────────────────────────────────────────────────────────┐\n", + "│ Migrad │\n", + "├──────────────────────────────────┬──────────────────────────────────────┤\n", + "│ FCN = 768.1 │ Nfcn = 111 │\n", + "│ EDM = 3.76e-06 (Goal: 0.0002) │ │\n", + "├──────────────────────────────────┼──────────────────────────────────────┤\n", + "│ Valid Minimum │ Below EDM threshold (goal x 10) │\n", + "├──────────────────────────────────┼──────────────────────────────────────┤\n", + "│ No parameters at limit │ Below call limit │\n", + "├──────────────────────────────────┼──────────────────────────────────────┤\n", + "│ Hesse ok │ Covariance accurate │\n", + "└──────────────────────────────────┴──────────────────────────────────────┘\n", + "┌───┬───────┬───────────┬───────────┬────────────┬────────────┬─────────┬─────────┬───────┐\n", + "│ │ Name │ Value │ Hesse Err │ Minos Err- │ Minos Err+ │ Limit- │ Limit+ │ Fixed │\n", + "├───┼───────┼───────────┼───────────┼────────────┼────────────┼─────────┼─────────┼───────┤\n", + "│ 0 │ z │ 0.537 │ 0.015 │ │ │ 0 │ 1 │ │\n", + "│ 1 │ mu │ 0.996 │ 0.004 │ │ │ 0 │ 2 │ │\n", + "│ 2 │ sigma │ 0.1006 │ 0.0035 │ │ │ 0 │ │ │\n", + "│ 3 │ tau │ 1.05 │ 0.08 │ │ │ 0 │ │ │\n", + "└───┴───────┴───────────┴───────────┴────────────┴────────────┴─────────┴─────────┴───────┘\n", + "┌───────┬─────────────────────────────────────────┐\n", + "│ │ z mu sigma tau │\n", + "├───────┼─────────────────────────────────────────┤\n", + "│ z │ 0.000231 -0.003e-3 0.019e-3 -0.24e-3 │\n", + "│ mu │ -0.003e-3 1.5e-05 -0.001e-3 -0.015e-3 │\n", + "│ sigma │ 0.019e-3 -0.001e-3 1.25e-05 -0.043e-3 │\n", + "│ tau │ -0.24e-3 -0.015e-3 -0.043e-3 0.00568 │\n", + "└───────┴─────────────────────────────────────────┘" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "def pdf(x, z, mu, sigma, tau):\n", " return (z * truncnorm.pdf(x, *xr, mu, sigma) + \n", @@ -133,8 +1132,1011 @@ "\n", "c = cost.UnbinnedNLL(xmix, pdf)\n", "\n", - "m = Minuit(c, z=0.4, mu=0.1, sigma=0.2, tau=2)\n", + "m = Minuit(c, z=0.4, mu=1, sigma=0.2, tau=1)\n", + "m.limits[\"z\"] = (0, 1)\n", + "m.limits[\"mu\"] = (0, 2)\n", + "m.limits[\"sigma\", \"tau\"] = (0, None)\n", + "m.migrad()" + ] + }, + { + "cell_type": "markdown", + "id": "fa50a81d", + "metadata": {}, + "source": [ + "If the gradient of the model is available, it can be passed to the cost function to enable the computation of its gradient, which Minuit then uses to potentially improve the minimization. We use a numerically computed gradient here obtained from the `jacobi` library. This is for demonstration purpose only and generally not recommended, since `jacobi` computes the gradient much more accurately than what is required for Minuit." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "8081f7f5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Migrad
FCN = 768.1 Nfcn = 87, Ngrad = 5
EDM = 3.31e-05 (Goal: 0.0002) time = 0.1 sec
Valid Minimum Below EDM threshold (goal x 10)
No parameters at limit Below call limit
Hesse ok Covariance accurate
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Name Value Hesse Error Minos Error- Minos Error+ Limit- Limit+ Fixed
0 z 0.537 0.015 0 1
1 mu 0.996 0.004 0 2
2 sigma 0.1006 0.0035 0
3 tau 1.05 0.08 0
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
z mu sigma tau
z 0.000231 -0.003e-3 (-0.052) 0.019e-3 (0.353) -0.24e-3 (-0.206)
mu -0.003e-3 (-0.052) 1.5e-05 -0.001e-3 (-0.069) -0.015e-3 (-0.053)
sigma 0.019e-3 (0.353) -0.001e-3 (-0.069) 1.25e-05 -0.043e-3 (-0.163)
tau -0.24e-3 (-0.206) -0.015e-3 (-0.053) -0.043e-3 (-0.163) 0.00569
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2023-08-23T14:43:38.309566\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.7.2, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "┌─────────────────────────────────────────────────────────────────────────┐\n", + "│ Migrad │\n", + "├──────────────────────────────────┬──────────────────────────────────────┤\n", + "│ FCN = 768.1 │ Nfcn = 87, Ngrad = 5 │\n", + "│ EDM = 3.31e-05 (Goal: 0.0002) │ time = 0.1 sec │\n", + "├──────────────────────────────────┼──────────────────────────────────────┤\n", + "│ Valid Minimum │ Below EDM threshold (goal x 10) │\n", + "├──────────────────────────────────┼──────────────────────────────────────┤\n", + "│ No parameters at limit │ Below call limit │\n", + "├──────────────────────────────────┼──────────────────────────────────────┤\n", + "│ Hesse ok │ Covariance accurate │\n", + "└──────────────────────────────────┴──────────────────────────────────────┘\n", + "┌───┬───────┬───────────┬───────────┬────────────┬────────────┬─────────┬─────────┬───────┐\n", + "│ │ Name │ Value │ Hesse Err │ Minos Err- │ Minos Err+ │ Limit- │ Limit+ │ Fixed │\n", + "├───┼───────┼───────────┼───────────┼────────────┼────────────┼─────────┼─────────┼───────┤\n", + "│ 0 │ z │ 0.537 │ 0.015 │ │ │ 0 │ 1 │ │\n", + "│ 1 │ mu │ 0.996 │ 0.004 │ │ │ 0 │ 2 │ │\n", + "│ 2 │ sigma │ 0.1006 │ 0.0035 │ │ │ 0 │ │ │\n", + "│ 3 │ tau │ 1.05 │ 0.08 │ │ │ 0 │ │ │\n", + "└───┴───────┴───────────┴───────────┴────────────┴────────────┴─────────┴─────────┴───────┘\n", + "┌───────┬─────────────────────────────────────────┐\n", + "│ │ z mu sigma tau │\n", + "├───────┼─────────────────────────────────────────┤\n", + "│ z │ 0.000231 -0.003e-3 0.019e-3 -0.24e-3 │\n", + "│ mu │ -0.003e-3 1.5e-05 -0.001e-3 -0.015e-3 │\n", + "│ sigma │ 0.019e-3 -0.001e-3 1.25e-05 -0.043e-3 │\n", + "│ tau │ -0.24e-3 -0.015e-3 -0.043e-3 0.00569 │\n", + "└───────┴─────────────────────────────────────────┘" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def grad(x, *par):\n", + " return jacobi(lambda par: pdf(x, *par), par)[0].T\n", + "\n", + "c = cost.UnbinnedNLL(xmix, pdf, grad=grad)\n", + "\n", + "m = Minuit(c, z=0.4, mu=1, sigma=0.2, tau=1, grad=c.gradient)\n", "m.limits[\"z\"] = (0, 1)\n", + "m.limits[\"mu\"] = (0, 2)\n", "m.limits[\"sigma\", \"tau\"] = (0, None)\n", "m.migrad()" ] @@ -149,10 +2151,143 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "9da33a94", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Migrad
FCN = 147.6 Nfcn = 134
EDM = 2.1e-06 (Goal: 0.0002)
Valid Minimum Below EDM threshold (goal x 10)
No parameters at limit Below call limit
Hesse ok Covariance accurate
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Name Value Hesse Error Minos Error- Minos Error+ Limit- Limit+ Fixed
0 mu 0.9946 0.0031
1 sigma 0.0986 0.0022 0
2 tau 0.972 0.031 0
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
mu sigma tau
mu 9.73e-06 0e-6 -0e-6
sigma 0e-6 4.86e-06 -0e-6
tau -0e-6 -0e-6 0.000944
" + ], + "text/plain": [ + "┌─────────────────────────────────────────────────────────────────────────┐\n", + "│ Migrad │\n", + "├──────────────────────────────────┬──────────────────────────────────────┤\n", + "│ FCN = 147.6 │ Nfcn = 134 │\n", + "│ EDM = 2.1e-06 (Goal: 0.0002) │ │\n", + "├──────────────────────────────────┼──────────────────────────────────────┤\n", + "│ Valid Minimum │ Below EDM threshold (goal x 10) │\n", + "├──────────────────────────────────┼──────────────────────────────────────┤\n", + "│ No parameters at limit │ Below call limit │\n", + "├──────────────────────────────────┼──────────────────────────────────────┤\n", + "│ Hesse ok │ Covariance accurate │\n", + "└──────────────────────────────────┴──────────────────────────────────────┘\n", + "┌───┬───────┬───────────┬───────────┬────────────┬────────────┬─────────┬─────────┬───────┐\n", + "│ │ Name │ Value │ Hesse Err │ Minos Err- │ Minos Err+ │ Limit- │ Limit+ │ Fixed │\n", + "├───┼───────┼───────────┼───────────┼────────────┼────────────┼─────────┼─────────┼───────┤\n", + "│ 0 │ mu │ 0.9946 │ 0.0031 │ │ │ │ │ │\n", + "│ 1 │ sigma │ 0.0986 │ 0.0022 │ │ │ 0 │ │ │\n", + "│ 2 │ tau │ 0.972 │ 0.031 │ │ │ 0 │ │ │\n", + "└───┴───────┴───────────┴───────────┴────────────┴────────────┴─────────┴─────────┴───────┘\n", + "┌───────┬────────────────────────────┐\n", + "│ │ mu sigma tau │\n", + "├───────┼────────────────────────────┤\n", + "│ mu │ 9.73e-06 0e-6 -0e-6 │\n", + "│ sigma │ 0e-6 4.86e-06 -0e-6 │\n", + "│ tau │ -0e-6 -0e-6 0.000944 │\n", + "└───────┴────────────────────────────┘" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "def logpdf(xy, mu, sigma, tau):\n", " x, y = xy\n", @@ -164,6 +2299,191 @@ "m.migrad()" ] }, + { + "cell_type": "markdown", + "id": "c272cd2b", + "metadata": {}, + "source": [ + "And we can also use a gradient as before." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "220d17c6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Migrad
FCN = 147.6 Nfcn = 81, Ngrad = 9
EDM = 3.73e-05 (Goal: 0.0002) time = 0.2 sec
Valid Minimum Below EDM threshold (goal x 10)
No parameters at limit Below call limit
Hesse ok Covariance accurate
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Name Value Hesse Error Minos Error- Minos Error+ Limit- Limit+ Fixed
0 mu 0.9946 0.0031
1 sigma 0.0986 0.0022 0
2 tau 0.972 0.031 0
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
mu sigma tau
mu 9.73e-06 0e-6 -0e-6
sigma 0e-6 4.87e-06 -0e-6
tau -0e-6 -0e-6 0.000944
" + ], + "text/plain": [ + "┌─────────────────────────────────────────────────────────────────────────┐\n", + "│ Migrad │\n", + "├──────────────────────────────────┬──────────────────────────────────────┤\n", + "│ FCN = 147.6 │ Nfcn = 81, Ngrad = 9 │\n", + "│ EDM = 3.73e-05 (Goal: 0.0002) │ time = 0.2 sec │\n", + "├──────────────────────────────────┼──────────────────────────────────────┤\n", + "│ Valid Minimum │ Below EDM threshold (goal x 10) │\n", + "├──────────────────────────────────┼──────────────────────────────────────┤\n", + "│ No parameters at limit │ Below call limit │\n", + "├──────────────────────────────────┼──────────────────────────────────────┤\n", + "│ Hesse ok │ Covariance accurate │\n", + "└──────────────────────────────────┴──────────────────────────────────────┘\n", + "┌───┬───────┬───────────┬───────────┬────────────┬────────────┬─────────┬─────────┬───────┐\n", + "│ │ Name │ Value │ Hesse Err │ Minos Err- │ Minos Err+ │ Limit- │ Limit+ │ Fixed │\n", + "├───┼───────┼───────────┼───────────┼────────────┼────────────┼─────────┼─────────┼───────┤\n", + "│ 0 │ mu │ 0.9946 │ 0.0031 │ │ │ │ │ │\n", + "│ 1 │ sigma │ 0.0986 │ 0.0022 │ │ │ 0 │ │ │\n", + "│ 2 │ tau │ 0.972 │ 0.031 │ │ │ 0 │ │ │\n", + "└───┴───────┴───────────┴───────────┴────────────┴────────────┴─────────┴─────────┴───────┘\n", + "┌───────┬────────────────────────────┐\n", + "│ │ mu sigma tau │\n", + "├───────┼────────────────────────────┤\n", + "│ mu │ 9.73e-06 0e-6 -0e-6 │\n", + "│ sigma │ 0e-6 4.87e-06 -0e-6 │\n", + "│ tau │ -0e-6 -0e-6 0.000944 │\n", + "└───────┴────────────────────────────┘" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def grad(xy, *par):\n", + " return jacobi(lambda p: logpdf(xy, *p), par)[0].T\n", + "\n", + "c = cost.UnbinnedNLL((xdata, ydata), logpdf, log=True, grad=grad)\n", + "m = Minuit(c, mu=1, sigma=2, tau=2, grad=c.gradient)\n", + "m.limits[\"sigma\", \"tau\"] = (0, None)\n", + "m.migrad()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "4f9704d9", + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "Iteration of zero-sized operands is not enabled", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[19], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m c\u001b[39m.\u001b[39;49mcovariance()\n", + "File \u001b[0;32m~/Extern/iminuit/src/iminuit/cost.py:870\u001b[0m, in \u001b[0;36mMaskedCost.covariance\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 852\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mcovariance\u001b[39m(\u001b[39mself\u001b[39m, \u001b[39m*\u001b[39margs: \u001b[39mfloat\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m NDArray:\n\u001b[1;32m 853\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 854\u001b[0m \u001b[39m Estimate covariance of the parameters with the sandwich estimator.\u001b[39;00m\n\u001b[1;32m 855\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 868\u001b[0m \u001b[39m The array has shape (K, K) for K arguments.\u001b[39;00m\n\u001b[1;32m 869\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 870\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sandwich(args)\n", + "File \u001b[0;32m~/Extern/iminuit/src/iminuit/cost.py:984\u001b[0m, in \u001b[0;36mUnbinnedCost._sandwich\u001b[0;34m(self, args)\u001b[0m\n\u001b[1;32m 983\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_sandwich\u001b[39m(\u001b[39mself\u001b[39m, args: Sequence[\u001b[39mfloat\u001b[39m]) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m NDArray:\n\u001b[0;32m--> 984\u001b[0m \u001b[39mreturn\u001b[39;00m np\u001b[39m.\u001b[39mlinalg\u001b[39m.\u001b[39minv(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_fisher_information(args))\n", + "File \u001b[0;32m~/Extern/iminuit/src/iminuit/cost.py:980\u001b[0m, in \u001b[0;36mUnbinnedCost._fisher_information\u001b[0;34m(self, args)\u001b[0m\n\u001b[1;32m 979\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_fisher_information\u001b[39m(\u001b[39mself\u001b[39m, args: Sequence[\u001b[39mfloat\u001b[39m]) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m NDArray:\n\u001b[0;32m--> 980\u001b[0m g \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_pointwise_grad(args)\n\u001b[1;32m 981\u001b[0m \u001b[39mreturn\u001b[39;00m np\u001b[39m.\u001b[39meinsum(\u001b[39m\"\u001b[39m\u001b[39mji,ki->jk\u001b[39m\u001b[39m\"\u001b[39m, g, g)\n", + "File \u001b[0;32m~/Extern/iminuit/src/iminuit/cost.py:1076\u001b[0m, in \u001b[0;36mUnbinnedNLL._pointwise_grad\u001b[0;34m(self, args)\u001b[0m\n\u001b[1;32m 1074\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39ma gradient is required to use this functionality\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 1075\u001b[0m data \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_masked\n\u001b[0;32m-> 1076\u001b[0m g \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_grad(data, \u001b[39m*\u001b[39;49margs)\n\u001b[1;32m 1077\u001b[0m g \u001b[39m=\u001b[39m _normalize_model_output(g)\n\u001b[1;32m 1078\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_log:\n", + "Cell \u001b[0;32mIn[18], line 2\u001b[0m, in \u001b[0;36mgrad\u001b[0;34m(xy, *par)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mgrad\u001b[39m(xy, \u001b[39m*\u001b[39mpar):\n\u001b[0;32m----> 2\u001b[0m \u001b[39mreturn\u001b[39;00m jacobi(\u001b[39mlambda\u001b[39;49;00m p: logpdf(xy, \u001b[39m*\u001b[39;49mp), par)[\u001b[39m0\u001b[39m]\u001b[39m.\u001b[39mT\n", + "File \u001b[0;32m~/Extern/iminuit/py39/lib/python3.9/site-packages/jacobi/_jacobi.py:112\u001b[0m, in \u001b[0;36mjacobi\u001b[0;34m(fn, x, diagonal, method, mask, rtol, maxiter, maxgrad, step, diagnostic, *args)\u001b[0m\n\u001b[1;32m 110\u001b[0m jac \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m 111\u001b[0m err \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m--> 112\u001b[0m it \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39;49mnditer(x, flags\u001b[39m=\u001b[39;49m[\u001b[39m\"\u001b[39;49m\u001b[39mc_index\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mmulti_index\u001b[39;49m\u001b[39m\"\u001b[39;49m])\n\u001b[1;32m 113\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mnot\u001b[39;00m it\u001b[39m.\u001b[39mfinished:\n\u001b[1;32m 114\u001b[0m k \u001b[39m=\u001b[39m it\u001b[39m.\u001b[39mindex\n", + "\u001b[0;31mValueError\u001b[0m: Iteration of zero-sized operands is not enabled" + ] + } + ], + "source": [ + "c.covariance()" + ] + }, { "cell_type": "markdown", "id": "introductory-watershed", @@ -808,7 +3128,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.9.16" }, "vscode": { "interpreter": { diff --git a/doc/plots/interactive.py b/doc/plots/interactive.py index e7a62ce11..b0c2b4451 100644 --- a/doc/plots/interactive.py +++ b/doc/plots/interactive.py @@ -3,7 +3,7 @@ from matplotlib import pyplot as plt -# custom visualization, x and y are taken from outer scope +# custom visualization; x, y, model are taken from outer scope def viz(args): plt.plot(x, y, "ok") xm = np.linspace(x[0], x[-1], 100) @@ -18,4 +18,5 @@ def model(x, a, b): y = np.array([1.03, 1.58, 2.03, 2.37, 3.09]) c = cost.LeastSquares(x, y, 0.1, model) m = Minuit(c, 0.5, 0.5) -m.interactive(viz) # calling m.interactive() uses LeastSquares.visualize +m.interactive(viz) +# m.interactive() also works and calls LeastSquares.visualize diff --git a/pyproject.toml b/pyproject.toml index 19c684e22..baf08f66e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -95,10 +95,13 @@ filterwarnings = [ [tool.ruff] select = [ - "E", "F", # flake8 - "D", # pydocstyle + "E", + "F", # flake8 + "D", # pydocstyle +] +extend-ignore = [ + "D212", # multi-line-summary-first-line ] -extend-ignore = ["D203", "D212"] src = ["src"] unfixable = [ "F841", # Removes unused variables @@ -112,8 +115,6 @@ convention = "numpy" ".ci/*.py" = ["D"] "bench/*.py" = ["D"] "doc/*.py" = ["D"] -"setup.py" = ["D"] -"cmake_ext.py" = ["D"] [tool.mypy] ignore_missing_imports = true diff --git a/src/iminuit/_optional_dependencies.py b/src/iminuit/_optional_dependencies.py index 7c097fa99..719c952a9 100644 --- a/src/iminuit/_optional_dependencies.py +++ b/src/iminuit/_optional_dependencies.py @@ -1,13 +1,17 @@ import contextlib import warnings from iminuit.warnings import OptionalDependencyWarning +from typing import Dict, Optional @contextlib.contextmanager -def optional_module_for(functionality, *, replace=None, stacklevel=3): +def optional_module_for( + functionality: str, *, replace: Optional[Dict[str, str]] = None, stacklevel: int = 3 +): try: yield except ModuleNotFoundError as e: + assert e.name is not None package = e.name.split(".")[0] if replace: package = replace.get(package, package) diff --git a/src/iminuit/cost.py b/src/iminuit/cost.py index 0f9bba498..d7f0a9ee4 100644 --- a/src/iminuit/cost.py +++ b/src/iminuit/cost.py @@ -18,8 +18,9 @@ - Data are binned: :class:`ExtendedBinnedNLL`, also supports histogram of weighted samples -- Fit a template to binned data with bin-wise uncertainties on the template: - :class:`Template`, which also supports weighted data and weighted templates +- Fit a template to binned data with bin-wise uncertainties on the template + + - :class:`Template`, also supports weighted data and weighted template histograms - Fit of a function f(x) to (x, y, yerror) pairs with normal-distributed fluctuations. x is one- or multi-dimensional, y is one-dimensional. @@ -28,8 +29,9 @@ - y values contain outliers: :class:`LeastSquares` with loss function set to "soft_l1" -- Include constraints from external fits or apply regularisation: - :class:`NormalConstraint` +- Include constraints from external fits or apply regularisation + + - :class:`NormalConstraint` Combining cost functions ------------------------ @@ -52,6 +54,18 @@ can have negative amplitudes. To achieve this, simply reset the limits with :attr:`iminuit.Minuit.limits` after creating the Minuit instance. +User-defined gradients +---------------------- +If the user provides a model gradient, the cost functions defined here except +:class:`Template` will then also make their gradient available, which is then +automatically used by :class:`iminuit.Minuit` (see the constructor for details) to +potentially improve the fit (improve convergence or robustness). + +Note that it is perfectly normal to use Minuit without a user-defined gradient, and +Minuit does not always benefit from a user-defined gradient. If the gradient is +expensive to compute, the time to converge may increase. If you have trouble with the +fitting process, it is unlikely that the issues are resolved by a user-defined gradient. + Notes ----- The cost functions defined here have been optimized with knowledge about implementation @@ -62,16 +76,19 @@ the histogram, the sum of weights and the sum of squared weights is needed then, see class documentation for details. """ +from __future__ import annotations + from .util import ( describe, merge_signatures, PerformanceWarning, _smart_sampling, _detect_log_spacing, + is_positive_definite, ) -from .typing import Model, LossFunction +from .typing import Model, ModelGradient, LossFunction import numpy as np -from numpy.typing import ArrayLike, NDArray +from numpy.typing import NDArray, ArrayLike from collections.abc import Sequence as ABCSequence import abc from typing import ( @@ -85,14 +102,16 @@ class documentation for details. Iterable, Optional, overload, + TypeVar, + Callable, + cast, ) import warnings -from ._deprecated import deprecated_parameter +from ._deprecated import deprecated_parameter, deprecated __all__ = [ "CHISQUARE", "NEGATIVE_LOG_LIKELIHOOD", - "BohmZechTransform", "chi2", "multinominal_chi2", "poisson_chi2", @@ -110,6 +129,8 @@ class documentation for details. "LeastSquares", ] +T = TypeVar("T", float, NDArray) + CHISQUARE = 1.0 NEGATIVE_LOG_LIKELIHOOD = 0.5 @@ -130,20 +151,13 @@ def _z_squared(y, ye, ym): return z * z -def _soft_l1_loss(z_sqr): - return np.sum(2 * (np.sqrt(1 + z_sqr) - 1)) - - -def _soft_l1_cost(y, ye, ym): - return _soft_l1_loss(_z_squared(y, ye, ym)) - - def _replace_none(x, replacement): if x is None: return replacement return x +@deprecated("The class is deprecated and will be removed without replacement") class BohmZechTransform: """ Apply Bohm-Zech transform. @@ -153,8 +167,10 @@ class BohmZechTransform: :meta private: """ - _scale: np.ndarray - _obs: np.ndarray + __slots__ = "_obs", "_scale" + + _obs: NDArray + _scale: NDArray def __init__(self, val: ArrayLike, var: ArrayLike): """ @@ -232,6 +248,21 @@ def chi2(y: ArrayLike, ye: ArrayLike, ym: ArrayLike) -> float: return np.sum(_z_squared(y, ye, ym)) +def _chi2_grad(y: NDArray, ye: NDArray, ym: NDArray, ymg: NDArray) -> NDArray: + return -2 * np.sum((y - ym) * ymg * ye**-2, axis=1) + + +def _soft_l1_cost(y: NDArray, ye: NDArray, ym: NDArray) -> float: + z_sqr = _z_squared(y, ye, ym) + return 2 * np.sum(np.sqrt(1 + z_sqr) - 1) + + +def _soft_l1_cost_grad(y: NDArray, ye: NDArray, ym: NDArray, ymg: NDArray) -> NDArray: + inv_ye = 1 / ye + z = (y - ym) * inv_ye + return -2 * np.sum(z * ymg * inv_ye * (1 + z**2) ** -0.5, axis=1) + + def multinominal_chi2(n: ArrayLike, mu: ArrayLike) -> float: """ Compute asymptotically chi2-distributed cost for binomially-distributed data. @@ -260,6 +291,10 @@ def multinominal_chi2(n: ArrayLike, mu: ArrayLike) -> float: return 2 * np.sum(n * (_safe_log(n) - _safe_log(mu))) +def _multinominal_chi2_grad(n: NDArray, mu: NDArray, gmu: NDArray) -> NDArray: + return -2 * np.sum(n * gmu / mu, axis=tuple(range(1, gmu.ndim))) + + def poisson_chi2(n: ArrayLike, mu: ArrayLike) -> float: """ Compute asymptotically chi2-distributed cost for Poisson-distributed data. @@ -287,6 +322,10 @@ def poisson_chi2(n: ArrayLike, mu: ArrayLike) -> float: return 2 * np.sum(mu - n + n * (_safe_log(n) - _safe_log(mu))) +def _poisson_chi2_grad(n: NDArray, mu: NDArray, gmu: NDArray) -> NDArray: + return 2 * np.sum(gmu * (1.0 - n / mu), axis=tuple(range(1, gmu.ndim))) + + def template_chi2_jsc(n: ArrayLike, mu: ArrayLike, mu_var: ArrayLike) -> float: """ Compute asymptotically chi2-distributed cost for a template fit. @@ -470,23 +509,6 @@ def chi2(y: ArrayLike, ye: ArrayLike, ym: ArrayLike) -> float: # noqa chi2.__doc__ = _chi2_np.__doc__ - _soft_l1_loss_np = _soft_l1_loss - _soft_l1_loss_nb = jit( - nogil=True, - cache=True, - error_model="numpy", - )(_soft_l1_loss_np) - - def _soft_l1_loss(z_sqr): - if z_sqr.dtype in (np.float32, np.float64): - return _soft_l1_loss_nb(z_sqr) - # fallback to numpy for float128 - return _soft_l1_loss_np(z_sqr) - - @nb_overload(_soft_l1_loss, inline="always") - def _ol_soft_l1_loss(z_sqr): - return _soft_l1_loss_np # pragma: no cover - _soft_l1_cost_np = _soft_l1_cost _soft_l1_cost_nb = jit( nogil=True, @@ -494,7 +516,7 @@ def _ol_soft_l1_loss(z_sqr): error_model="numpy", )(_soft_l1_cost_np) - def _soft_l1_cost(y, ye, ym): + def _soft_l1_cost(y: NDArray, ye: NDArray, ym: NDArray) -> float: if ym.dtype in (np.float32, np.float64): return _soft_l1_cost_nb(y, ye, ym) # fallback to numpy for float128 @@ -538,6 +560,11 @@ def ndata(self): """ return self._ndata() + @property + def npar(self): + """Return total number of model parameters.""" + return len(self._parameters) + @abc.abstractmethod def _ndata(self): NotImplemented # pragma: no cover @@ -597,13 +624,44 @@ def __call__(self, *args: float) -> float: ------- float """ - r = self._call(args) + r = self._value(args) if self.verbose >= 1: print(args, "->", r) return r + def grad(self, *args: float) -> NDArray: + """ + Compute gradient of the cost function. + + This requires that a model gradient is provided. + + Parameters + ---------- + *args : float + Parameter values. + + Returns + ------- + ndarray of float + The length of the array is equal to the length of args. + """ + return self._grad(args) + + @property + def has_grad(self) -> bool: + """Return True if cost function can compute a gradient.""" + return self._has_grad() + + @abc.abstractmethod + def _value(self, args: Sequence[float]) -> float: + ... # pragma: no cover + + @abc.abstractmethod + def _grad(self, args: Sequence[float]) -> NDArray: + ... # pragma: no cover + @abc.abstractmethod - def _call(self, args: Sequence[float]) -> float: + def _has_grad(self) -> bool: ... # pragma: no cover @@ -625,9 +683,16 @@ def __init__(self, value: float): def _ndata(self): return 0 - def _call(self, args: Sequence[float]) -> float: + def _value(self, args: Sequence[float]) -> float: return self.value + def _grad(self, args: Sequence[float]) -> NDArray: + return np.zeros(0) + + @staticmethod + def _has_grad(): + return True + class CostSum(Cost, ABCSequence): """ @@ -683,12 +748,22 @@ def _split(self, args: Sequence[float]): component_args = tuple(args[i] for i in cmap) yield component, component_args - def _call(self, args: Sequence[float]) -> float: + def _value(self, args: Sequence[float]) -> float: r = 0.0 - for comp, cargs in self._split(args): - r += comp._call(cargs) / comp.errordef + for component, component_args in self._split(args): + r += component._value(component_args) / component.errordef return r + def _grad(self, args: Sequence[float]) -> NDArray: + r = np.zeros(self.npar) + for component, indices in zip(self._items, self._maps): + component_args = tuple(args[i] for i in indices) + r[indices] += component._grad(component_args) / component.errordef + return r + + def _has_grad(self) -> bool: + return all(component.has_grad for component in self._items) + def _ndata(self): return sum(c.ndata for c in self._items) @@ -795,6 +870,56 @@ def _update_cache(self): self._masked = self._data[_replace_none(self._mask, ...)] +class MaskedCostWithPulls(MaskedCost): + """ + Base class for cost functions with pulls. + + :meta private: + """ + + def pulls(self, args: Sequence[float]) -> NDArray: + """ + Return studentized residuals (aka pulls). + + Parameters + ---------- + args : sequence of float + Parameter values. + + Returns + ------- + array + Array of pull values. If the cost function is masked, the array contains NaN + values where the mask value is False. + + Notes + ----- + Pulls allow one to estimate how well a model fits the data. A pull is a value + computed for each data point. It is given by (observed - predicted) / + standard-deviation. If the model is correct, the expectation value of each pull + is zero and its variance is one in the asymptotic limit of infinite samples. + Under these conditions, the chi-square statistic is computed from the sum of + pulls squared has a known probability distribution if the model is correct. It + therefore serves as a goodness-of-fit statistic. + + Beware: the sum of pulls squared in general is not identical to the value + returned by the cost function, even if the cost function returns a chi-square + distributed test-statistic. The cost function is computed in a slightly + differently way that makes the return value approach the asymptotic chi-square + distribution faster than a test statistic based on sum of pulls squared. In + summary, only use pulls for plots. Compute the chi-square test statistic + directly from the cost function. + """ + return self._pulls(args) + + def _ndata(self): + return np.prod(self._masked.shape[: self._ndim]) + + @abc.abstractmethod + def _pulls(self, args: Sequence[float]) -> NDArray: + ... # pragma: no cover + + class UnbinnedCost(MaskedCost): """ Base class for unbinned cost functions. @@ -802,12 +927,20 @@ class UnbinnedCost(MaskedCost): :meta private: """ - __slots__ = "_model", "_log" + __slots__ = "_model", "_model_grad", "_log" - def __init__(self, data, model: Model, verbose: int, log: bool): + def __init__( + self, + data, + model: Model, + verbose: int, + log: bool, + grad: Optional[ModelGradient], + ): """For internal use.""" self._model = model self._log = log + self._model_grad = grad super().__init__(_model_parameters(model), _norm(data), verbose) @abc.abstractproperty @@ -824,6 +957,11 @@ def _ndata(self): # unbinned likelihoods have infinite degrees of freedom return np.inf + def _npoints(self): + # cannot use len(self._masked) because multi-dimensional data has format + # (K, N) with K dimensions and N points + return self._masked.shape[-1] + @deprecated_parameter(bins="nbins") def visualize( self, @@ -846,7 +984,6 @@ def visualize( it is interpreted as the point locations. bins : int, optional number of bins. Default is 50 bins. - """ from matplotlib import pyplot as plt @@ -877,6 +1014,48 @@ def visualize( plt.errorbar(cx, n, n**0.5, fmt="ok") plt.fill_between(xm, 0, ym * dx, fc="C0") + def fisher_information(self, *args: float) -> NDArray: + """ + Estimate Fisher information for model and sample. + + The estimated Fisher information is only meaningful if the arguments provided + are estimates of the true values. + + Parameters + ---------- + *args: float + Estimates of model parameters. + """ + g = self._pointwise_score(args) + return np.einsum("ji,ki->jk", g, g) + + def covariance(self, *args: float) -> NDArray: + """ + Estimate covariance of the parameters with the sandwich estimator. + + This requires that the model gradient is provided, and that the arguments are + the maximum-likelihood estimates. The sandwich estimator is only asymptotically + correct. + + Parameters + ---------- + *args : float + Maximum-likelihood estimates of the parameter values. + + Returns + ------- + ndarray of float + The array has shape (K, K) for K arguments. + """ + return np.linalg.inv(self.fisher_information(*args)) + + @abc.abstractmethod + def _pointwise_score(self, args: Sequence[float]) -> NDArray: + ... # pragma: no cover + + def _has_grad(self) -> bool: + return self._model_grad is not None + class UnbinnedNLL(UnbinnedCost): """ @@ -907,8 +1086,10 @@ def __init__( self, data: ArrayLike, pdf: Model, + *, verbose: int = 0, log: bool = False, + grad: Optional[ModelGradient] = None, ): """ Initialize UnbinnedNLL with data and model. @@ -923,26 +1104,56 @@ def __init__( Probability density function of the form f(data, par0, [par1, ...]), where data is the data sample and par0, ... are model parameters. If the data are multivariate, data passed to f has shape (D, N), where D is the number of - dimensions and N the number of data points. + dimensions and N the number of data points. Must return an array with the + shape (N,). verbose : int, optional Verbosity level. 0: is no output (default). 1: print current args and negative log-likelihood value. log : bool, optional Distributions of the exponential family (normal, exponential, poisson, ...) allow one to compute the logarithm of the pdf directly, which is more - accurate and efficient than effectively doing ``log(exp(logpdf))``. Set this - to True, if the model returns the logarithm of the pdf instead of the pdf. + accurate and efficient than numerically computing ``log(pdf)``. Set this + to True, if the model returns the logpdf instead of the pdf. Default is False. + grad : callable or None, optional + Optionally pass the gradient of the pdf. Has the same calling signature like + the pdf, but must return an array with the shape (K, N), where N is the + number of data points and K is the number of parameters. If `log` is True, + the function must return the gradient of the logpdf instead of the pdf. The + gradient can be used by Minuit to improve or speed up convergence and to + compute the sandwich estimator for the variance of the parameter estimates. + Default is None. """ - super().__init__(data, pdf, verbose, log) + super().__init__(data, pdf, verbose, log, grad) - def _call(self, args: Sequence[float]) -> float: - data = self._masked - x = self._model(data, *args) - x = _normalize_model_output(x) + def _value(self, args: Sequence[float]) -> float: + f = self._eval_model(args) + if self._log: + return -2.0 * np.sum(f) + return 2.0 * _unbinned_nll(f) + + def _grad(self, args: Sequence[float]) -> NDArray: + g = self._pointwise_score(args) + return -2.0 * np.sum(g, axis=1) + + def _pointwise_score(self, args: Sequence[float]) -> NDArray: + g = self._eval_model_grad(args) if self._log: - return -2.0 * np.sum(x) - return 2.0 * _unbinned_nll(x) + return g + f = self._eval_model(args) + return g / f + + def _eval_model(self, args: Sequence[float]) -> float: + data = self._masked + return _normalize_output(self._model(data, *args), "model", self._npoints()) + + def _eval_model_grad(self, args: Sequence[float]) -> NDArray: + if self._model_grad is None: + raise ValueError("no gradient available") # pragma: no cover + data = self._masked + return _normalize_output( + self._model_grad(data, *args), "model gradient", self.npar, self._npoints() + ) class ExtendedUnbinnedNLL(UnbinnedCost): @@ -983,8 +1194,10 @@ def __init__( self, data: ArrayLike, scaled_pdf: Model, + *, verbose: int = 0, log: bool = False, + grad: Optional[ModelGradient] = None, ): """ Initialize cost function with data and model. @@ -1013,32 +1226,70 @@ def __init__( accurate and efficient than effectively doing ``log(exp(logpdf))``. Set this to True, if the model returns the logarithm of the density as the second argument instead of the density. Default is False. + grad : callable or None, optional + Optionally pass the gradient of the density function. Has the same calling + signature like the density function, but must return two arrays. The first + array has shape (K,) where K are the number of parameters, while the second + has shape (K, N), where N is the number of data points. The first array is + the gradient of the integrated density. The second array is the gradient of + the density itself. If `log` is True, the second array must be the gradient + of the log-density instead. The gradient can be used by Minuit to improve or + speed up convergence and to compute the sandwich estimator for the variance + of the parameter estimates. Default is None. """ - super().__init__(data, scaled_pdf, verbose, log) + super().__init__(data, scaled_pdf, verbose, log, grad) + + def _value(self, args: Sequence[float]) -> float: + fint, f = self._eval_model(args) + if self._log: + return 2 * (fint - np.sum(f)) + return 2 * (fint + _unbinned_nll(f)) + + def _grad(self, args: Sequence[float]) -> NDArray: + g = self._pointwise_score(args) + return -2 * np.sum(g, axis=1) - def _call(self, args: Sequence[float]) -> float: + def _pointwise_score(self, args: Sequence[float]) -> NDArray: + gint, g = self._eval_model_grad(args) + m = self._npoints() + if self._log: + return g - (gint / m)[:, np.newaxis] + _, f = self._eval_model(args) + return g / f - (gint / m)[:, np.newaxis] + + def _eval_model(self, args: Sequence[float]) -> Tuple[float, float]: + data = self._masked + fint, f = self._model(data, *args) + f = _normalize_output(f, "model", self._npoints(), msg="in second position") + return fint, f + + def _eval_model_grad(self, args: Sequence[float]) -> Tuple[NDArray, NDArray]: + if self._model_grad is None: + raise ValueError("no gradient available") # pragma: no cover data = self._masked - ns, x = self._model(data, *args) - x = _normalize_model_output( - x, "Model should return numpy array in second position" + gint, g = self._model_grad(data, *args) + gint = _normalize_output( + gint, "model gradient", self.npar, msg="in first position" ) - if self._log: - return 2 * (ns - np.sum(x)) - return 2 * (ns + _unbinned_nll(x)) + g = _normalize_output( + g, "model gradient", self.npar, self._npoints(), msg="in second position" + ) + return gint, g -class BinnedCost(MaskedCost): +class BinnedCost(MaskedCostWithPulls): """ Base class for binned cost functions. :meta private: """ - __slots__ = "_xe", "_ndim", "_bztrafo" + __slots__ = "_xe", "_ndim", "_bohm_zech_scale", "_bohm_zech_n" _xe: Union[NDArray, Tuple[NDArray, ...]] _ndim: int - _bztrafo: Optional[BohmZechTransform] + _bohm_zech_scale: Optional[NDArray] + _bohm_zech_n: Optional[NDArray] n = MaskedCost.data @@ -1053,7 +1304,6 @@ def __init__( n: ArrayLike, xe: Union[ArrayLike, Sequence[ArrayLike]], verbose: int, - *updater, ): """For internal use.""" if not isinstance(xe, Iterable): @@ -1062,12 +1312,13 @@ def __init__( shape = _shape_from_xe(xe) self._ndim = len(shape) if self._ndim == 1: - self._xe = _norm(xe) # type:ignore + self._xe = _norm(cast(ArrayLike, xe)) else: self._xe = tuple(_norm(xei) for xei in xe) n = _norm(n) - is_weighted = n.ndim > self._ndim + + is_weighted = n.ndim > self._ndim and n.shape[-1] == 2 if n.ndim != (self._ndim + int(is_weighted)): raise ValueError("n must either have same dimension as xe or one extra") @@ -1080,13 +1331,9 @@ def __init__( "xe must be longer by one element along each dimension" ) - if is_weighted: - if n.shape[-1] != 2: - raise ValueError("n must have shape (..., 2)") - self._bztrafo = BohmZechTransform(n[..., 0], n[..., 1]) - else: - self._bztrafo = None - + self._bohm_zech_scale = None + self._bohm_zech_n = None + self._set_bohm_zech(n, is_weighted) super().__init__(parameters, n, verbose) def prediction( @@ -1103,45 +1350,11 @@ def prediction( Returns ------- NDArray - Model prediction for each bin. + Model prediction for each bin. The expectation is always returned for all + bins, even if some bins are temporarily masked. """ return self._pred(args) - def pulls(self, args: Sequence[float]) -> NDArray: - """ - Return studentized residuals (aka pulls). - - Parameters - ---------- - args : sequence of float - Parameter values. - - Returns - ------- - array - Array of pull values. If the cost function is masked, the array contains NaN - values where the mask value is False. - - Notes - ----- - Pulls allow one to estimate how well a model fits the data. A pull is a value - computed for each data bin. It is given by (observed - predicted) / - standard-deviation. If the model is correct, the expectation value of each pull - is zero and its variance is one in the asymptotic limit of infinite samples. - Under these conditions, the chi-square statistic is computed from the sum of - pulls squared has a known probability distribution if the model is correct. It - therefore serves as a goodness-of-fit statistic. - - Beware: the sum of pulls squared in general is not identical to the value - returned by the cost function, even if the cost function returns a chi-square - distributed test-statistic. The cost function is computed in a slightly - differently way that makes the return value approach the asymptotic chi-square - distribution faster than a test statistic based on sum of pulls squared. In - summary, only use pulls for plots. Compute the chi-square test statistic - directly from the cost function. - """ - return self._pulls(args) - def visualize(self, args: Sequence[float]) -> None: """ Visualize data and model agreement (requires matplotlib). @@ -1161,9 +1374,9 @@ def visualize(self, args: Sequence[float]) -> None: comparison to a model, the visualization shows all data bins as a single sequence. """ - return self._vis(args) + return self._visualize(args) - def _vis(self, args: Sequence[float]) -> None: + def _visualize(self, args: Sequence[float]) -> None: from matplotlib import pyplot as plt n, ne = self._n_err() @@ -1188,31 +1401,20 @@ def _vis(self, args: Sequence[float]) -> None: def _pred(self, args: Sequence[float]) -> Union[NDArray, Tuple[NDArray, NDArray]]: ... # pragma: no cover - def _ndata(self): - return np.prod(self._masked.shape[: self._ndim]) - - def _update_cache(self): - super()._update_cache() - if self._bztrafo: - ma = _replace_none(self._mask, ...) - self._bztrafo = BohmZechTransform(self._data[ma, 0], self._data[ma, 1]) - def _n_err(self) -> Tuple[NDArray, NDArray]: d = self.data - if self._bztrafo: - n = d[..., 0].copy() - err = d[..., 1] ** 0.5 - else: + if self._bohm_zech_scale is None: n = d.copy() err = d**0.5 - if self.mask is not None: - ma = ~self.mask - n[ma] = np.nan - err[ma] = np.nan + else: + n = d[..., 0].copy() + err = d[..., 1] ** 0.5 # mask values where error is zero ma = err == 0 - err[ma] = np.nan + if self.mask is not None: + ma = ~self.mask n[ma] = np.nan + err[ma] = np.nan return n, err def _pulls(self, args: Sequence[float]) -> NDArray: @@ -1220,6 +1422,49 @@ def _pulls(self, args: Sequence[float]) -> NDArray: n, ne = self._n_err() return (n - mu) / ne + def _set_bohm_zech(self, n: NDArray, is_weighted: bool): + if not is_weighted: + return + val = n[..., 0] + var = n[..., 1] + self._bohm_zech_scale = np.ones_like(val) + np.divide(val, var, out=self._bohm_zech_scale, where=var > 0) + self._bohm_zech_n = val * self._bohm_zech_scale + + def _update_cache(self): + super()._update_cache() + self._set_bohm_zech(self._masked, self._bohm_zech_scale is not None) + + @overload + def _transformed(self, val: NDArray) -> Tuple[NDArray, NDArray]: + ... # pragma: no cover + + @overload + def _transformed( + self, val: NDArray, var: NDArray + ) -> Tuple[NDArray, NDArray, NDArray]: + ... # pragma: no cover + + def _transformed(self, val, var=None): + s = self._bohm_zech_scale + ma = self.mask + if ma is not None: + val = val[ma] + if var is not None: + var = var[ma] + if s is None: + if var is None: + return self._masked, val + return self._masked, val, var + if var is None: + return self._bohm_zech_n, val * s + return self._bohm_zech_n, val * s, var * s**2 + + def _counts(self): + if self._bohm_zech_scale is None: + return self._masked + return self._masked[..., 0] + class BinnedCostWithModel(BinnedCost): """ @@ -1228,14 +1473,15 @@ class BinnedCostWithModel(BinnedCost): :meta private: """ - __slots__ = "_xe_shape", "_model", "_model_xe" + __slots__ = "_xe_shape", "_model", "_model_xe", "_model_len", "_model_grad" _model_xe: np.ndarray _xe_shape: Union[Tuple[int], Tuple[int, ...]] - def __init__(self, n, xe, model, verbose): + def __init__(self, n, xe, model, verbose, grad): """For internal use.""" self._model = model + self._model_grad = grad super().__init__(_model_parameters(model), n, xe, verbose) @@ -1247,16 +1493,11 @@ def __init__(self, n, xe, model, verbose): self._model_xe = np.row_stack( [x.flatten() for x in np.meshgrid(*self.xe, indexing="ij")] ) + self._model_len = np.prod(self._xe_shape) def _pred(self, args: Sequence[float]) -> NDArray: d = self._model(self._model_xe, *args) - d = _normalize_model_output(d) - expected_shape = (np.prod(self._xe_shape),) - if d.shape != expected_shape: - raise ValueError( - f"Expected model to return an array of shape {expected_shape}, " - f"but it returns an array of shape {d.shape}" - ) + d = _normalize_output(d, "model", self._model_len) if self._ndim > 1: d = d.reshape(self._xe_shape) for i in range(self._ndim): @@ -1266,6 +1507,18 @@ def _pred(self, args: Sequence[float]) -> NDArray: d[d < 0] = 0 return d + def _pred_grad(self, args: Sequence[float]) -> NDArray: + d = self._model_grad(self._model_xe, *args) + d = _normalize_output(d, "model gradient", self.npar, self._model_len) + if self._ndim > 1: + d = d.reshape((self.npar, *self._xe_shape)) + for i in range(1, self._ndim + 1): + d = np.diff(d, axis=i) + return d + + def _has_grad(self) -> bool: + return self._model_grad is not None + class Template(BinnedCost): """ @@ -1317,7 +1570,7 @@ class Template(BinnedCost): .. [6] Langenbruch, Eur.Phys.J.C 82 (2022) 5, 393 """ - __slots__ = "_model_data", "_model_xe", "_xe_shape", "_impl" + __slots__ = "_model_data", "_model_xe", "_xe_shape", "_impl", "_model_len" _model_data: List[ Union[ @@ -1333,6 +1586,7 @@ def __init__( n: ArrayLike, xe: Union[ArrayLike, Sequence[ArrayLike]], model_or_template: Collection[Union[Model, ArrayLike]], + *, name: Optional[Sequence[str]] = None, verbose: int = 0, method: str = "da", @@ -1451,6 +1705,7 @@ def __init__( self._model_xe = np.row_stack( [x.flatten() for x in np.meshgrid(*self.xe, indexing="ij")] ) + self._model_len = np.prod(self._xe_shape) def _pred(self, args: Sequence[float]) -> Tuple[NDArray, NDArray]: mu: NDArray = 0 # type:ignore @@ -1464,7 +1719,7 @@ def _pred(self, args: Sequence[float]) -> Tuple[NDArray, NDArray]: i += 1 elif isinstance(t1, Model) and isinstance(t2, int): d = t1(self._model_xe, *args[i : i + t2]) - d = _normalize_model_output(d) + d = _normalize_output(d, "model", self._model_len) if self._ndim > 1: d = d.reshape(self._xe_shape) for j in range(self._ndim): @@ -1479,22 +1734,18 @@ def _pred(self, args: Sequence[float]) -> Tuple[NDArray, NDArray]: assert False # pragma: no cover return mu, mu_var - def _call(self, args: Sequence[float]) -> float: + def _value(self, args: Sequence[float]) -> float: mu, mu_var = self._pred(args) - - ma = self.mask - if ma is not None: - mu = mu[ma] - mu_var = mu_var[ma] - - if self._bztrafo: - n, mu, mu_var = self._bztrafo(mu, mu_var) - else: - n = self._masked - + n, mu, mu_var = self._transformed(mu, mu_var) ma = mu > 0 return self._impl(n[ma], mu[ma], mu_var[ma]) + def _grad(self, args: Sequence[float]) -> NDArray: + raise NotImplementedError # pragma: no cover + + def _has_grad(self) -> bool: + return False + def _errordef(self) -> float: return NEGATIVE_LOG_LIKELIHOOD if self._impl is template_nll_asy else CHISQUARE @@ -1523,7 +1774,7 @@ def prediction(self, args: Sequence[float]) -> Tuple[NDArray, NDArray]: mu, mu_var = self._pred(args) return mu, np.sqrt(mu_var) - def _vis(self, args: Sequence[float]) -> None: + def _visualize(self, args: Sequence[float]) -> None: from matplotlib import pyplot as plt n, ne = self._n_err() @@ -1578,7 +1829,9 @@ def __init__( n: ArrayLike, xe: Union[ArrayLike, Sequence[ArrayLike]], cdf: Model, + *, verbose: int = 0, + grad: Optional[ModelGradient] = None, ): """ Initialize cost function with data and model. @@ -1604,26 +1857,44 @@ def __init__( Verbosity level. 0: is no output (default). 1: print current args and negative log-likelihood value. """ - super().__init__(n, xe, cdf, verbose) + super().__init__(n, xe, cdf, verbose, grad) def _pred(self, args: Sequence[float]) -> NDArray: + # must return array of full length, mask not applied yet p = super()._pred(args) + # normalise probability of remaining bins ma = self.mask if ma is not None: - p /= np.sum(p[ma]) # normalise probability of remaining bins - scale = np.sum(self._masked[..., 0] if self._bztrafo else self._masked) - return p * scale + p /= np.sum(p[ma]) + # scale probabilities with total number of entries of unmasked bins in histogram + return p * np.sum(self._counts()) - def _call(self, args: Sequence[float]) -> float: + def _value(self, args: Sequence[float]) -> float: mu = self._pred(args) + n, mu = self._transformed(mu) + return multinominal_chi2(n, mu) + + def _grad(self, args: Sequence[float]) -> NDArray: + # pg and p must be arrays of full length, mask not applied yet + pg = super()._pred_grad(args) + p = super()._pred(args) + ma = self.mask + # normalise probability of remaining bins + if ma is not None: + scale = np.sum(p[ma]) + pg = pg / scale - p * np.sum(pg[:, ma]) / scale**2 + p /= scale + # scale probabilities with total number of entries of unmasked bins in histogram + scale = np.sum(self._counts()) + mu = p * scale + gmu = pg * scale ma = self.mask if ma is not None: mu = mu[ma] - if self._bztrafo: - n, mu = self._bztrafo(mu) - else: - n = self._masked - return multinominal_chi2(n, mu) + gmu = gmu[:, ma] + # don't need to scale mu and gmu, because scale factor cancels + n = self._masked if self._bohm_zech_scale is None else self._bohm_zech_n + return _multinominal_chi2_grad(n, mu, gmu) class ExtendedBinnedNLL(BinnedCostWithModel): @@ -1656,7 +1927,9 @@ def __init__( n: ArrayLike, xe: Union[ArrayLike, Sequence[ArrayLike]], scaled_cdf: Model, + *, verbose: int = 0, + grad: Optional[ModelGradient] = None, ): """ Initialize cost function with data and model. @@ -1680,22 +1953,29 @@ def __init__( Verbosity level. 0: is no output (default). 1: print current args and negative log-likelihood value. """ - super().__init__(n, xe, scaled_cdf, verbose) + super().__init__(n, xe, scaled_cdf, verbose, grad) - def _call(self, args: Sequence[float]) -> float: + def _value(self, args: Sequence[float]) -> float: mu = self._pred(args) + n, mu = self._transformed(mu) + return poisson_chi2(n, mu) + + def _grad(self, args: Sequence[float]) -> NDArray: + mu = self._pred(args) + gmu = self._pred_grad(args) ma = self.mask if ma is not None: mu = mu[ma] - if self._bztrafo: - n, mu = self._bztrafo(mu) - else: - n = self._masked - # assert isinstance(n, np.ndarray) - return poisson_chi2(n, mu) + gmu = gmu[:, ma] + n = self._counts() + s = self._bohm_zech_scale + if s is None: + return _poisson_chi2_grad(n, mu, gmu) + # use original n and mu because Bohm-Zech scale factor cancels + return _poisson_chi2_grad(n, mu, s * gmu) -class LeastSquares(MaskedCost): +class LeastSquares(MaskedCostWithPulls): """ Least-squares cost function (aka chisquare function). @@ -1704,9 +1984,14 @@ class LeastSquares(MaskedCost): :meth:`__init__` for details on how to use a multivariate model. """ - __slots__ = "_loss", "_cost", "_model", "_ndim" + __slots__ = "_loss", "_cost", "_cost_grad", "_model", "_model_grad", "_ndim" _loss: Union[str, LossFunction] + _cost: Callable[[ArrayLike, ArrayLike, ArrayLike], float] + _cost_grad: Optional[Callable[[NDArray, NDArray, NDArray, NDArray], NDArray]] + _model: Model + _model_grad: Optional[ModelGradient] + _ndim: int @property def x(self): @@ -1759,14 +2044,17 @@ def loss(self, loss: Union[str, LossFunction]): if isinstance(loss, str): if loss == "linear": self._cost = chi2 + self._cost_grad = _chi2_grad elif loss == "soft_l1": - self._cost = _soft_l1_cost + self._cost = _soft_l1_cost # type: ignore + self._cost_grad = _soft_l1_cost_grad else: raise ValueError(f"unknown loss {loss!r}") elif isinstance(loss, LossFunction): self._cost = lambda y, ye, ym: np.sum( loss(_z_squared(y, ye, ym)) # type:ignore ) + self._cost_grad = None else: raise ValueError("loss must be str or LossFunction") @@ -1778,6 +2066,7 @@ def __init__( model: Model, loss: Union[str, LossFunction] = "linear", verbose: int = 0, + grad: Optional[ModelGradient] = None, ): """ Initialize cost function with data and model. @@ -1826,19 +2115,13 @@ def __init__( self._ndim = x.ndim self._model = model + self._model_grad = grad self.loss = loss x = np.atleast_2d(x) data = np.column_stack(np.broadcast_arrays(*x, y, yerror)) super().__init__(_model_parameters(self._model), data, verbose) - def _call(self, args: Sequence[float]) -> float: - x = self._masked.T[0] if self._ndim == 1 else self._masked.T[: self._ndim] - y, yerror = self._masked.T[self._ndim :] - ym = self._model(x, *args) - ym = _normalize_model_output(ym) - return self._cost(y, yerror, ym) - def _ndata(self): return len(self._masked) @@ -1894,7 +2177,7 @@ def prediction(self, args: Sequence[float]) -> NDArray: """ return self.model(self.x, *args) - def pulls(self, args: Sequence[float]) -> NDArray: # noqa: D102 + def _pulls(self, args: Sequence[float]) -> NDArray: y = self.y.copy() ye = self.yerror.copy() ym = self.prediction(args) @@ -1905,8 +2188,33 @@ def pulls(self, args: Sequence[float]) -> NDArray: # noqa: D102 ye[ma] = np.nan return (y - ym) / ye + def _pred(self, args: Sequence[float]) -> NDArray: + x = self._masked.T[0] if self._ndim == 1 else self._masked.T[: self._ndim] + ym = self._model(x, *args) + return _normalize_output(ym, "model", self._ndata()) -LeastSquares.pulls.__doc__ = BinnedCost.pulls.__doc__ + def _pred_grad(self, args: Sequence[float]) -> NDArray: + if self._model_grad is None: + raise ValueError("no gradient available") # pragma: no cover + x = self._masked.T[0] if self._ndim == 1 else self._masked.T[: self._ndim] + ymg = self._model_grad(x, *args) + return _normalize_output(ymg, "model gradient", self.npar, self._ndata()) + + def _value(self, args: Sequence[float]) -> float: + y, ye = self._masked.T[self._ndim :] + ym = self._pred(args) + return self._cost(y, ye, ym) + + def _grad(self, args: Sequence[float]) -> NDArray: + if self._cost_grad is None: + raise ValueError("no cost gradient available") # pragma: no cover + y, ye = self._masked.T[self._ndim :] + ym = self._pred(args) + ymg = self._pred_grad(args) + return self._cost_grad(y, ye, ym, ymg) + + def _has_grad(self) -> bool: + return self._model_grad is not None and self._cost_grad is not None class NormalConstraint(Cost): @@ -1934,7 +2242,7 @@ class NormalConstraint(Cost): result. """ - __slots__ = "_value", "_cov", "_covinv" + __slots__ = "_expected", "_cov", "_covinv" def __init__( self, @@ -1955,12 +2263,25 @@ def __init__( Expected error(s). If 1D, must have same length as `args`. If 2D, must be the covariance matrix of the parameters. """ - self._value = _norm(value) + tp_args = (args,) if isinstance(args, str) else tuple(args) + nargs = len(tp_args) + self._expected = _norm(value) + if self._expected.ndim > 1: + raise ValueError("value must be a scalar or one-dimensional") + # args can be a vector of values, in this case we have nargs == 1 + if nargs > 1 and len(self._expected) != nargs: + raise ValueError("size of value does not match size of args") self._cov = _norm(error) + if len(self._cov) != len(self._expected): + raise ValueError("size of error does not match size of value") if self._cov.ndim < 2: self._cov **= 2 + elif self._cov.ndim == 2: + if not is_positive_definite(self._cov): + raise ValueError("covariance matrix is not positive definite") + else: + raise ValueError("covariance matrix cannot have more than two dimensions") self._covinv = _covinv(self._cov) - tp_args = (args,) if isinstance(args, str) else tuple(args) super().__init__({k: None for k in tp_args}, False) @property @@ -1974,26 +2295,38 @@ def covariance(self): @covariance.setter def covariance(self, value): + value = np.asarray(value) + if value.ndim == 2 and not is_positive_definite(value): + raise ValueError("covariance matrix is not positive definite") self._cov[:] = value self._covinv = _covinv(self._cov) @property def value(self): """Get expected parameter values.""" - return self._value + return self._expected @value.setter def value(self, value): - self._value[:] = value + self._expected[:] = value - def _call(self, args: Sequence[float]) -> float: - delta = self._value - args + def _value(self, args: Sequence[float]) -> float: + delta = args - self._expected if self._covinv.ndim < 2: return np.sum(delta**2 * self._covinv) return np.einsum("i,ij,j", delta, self._covinv, delta) + def _grad(self, args: Sequence[float]) -> NDArray: + delta = args - self._expected + if self._covinv.ndim < 2: + return 2 * delta * self._covinv + return 2 * self._covinv @ delta + + def _has_grad(self) -> bool: + return True + def _ndata(self): - return len(self._value) + return len(self._expected) def visualize(self, args: ArrayLike): """ @@ -2046,15 +2379,21 @@ def _covinv(array): return np.linalg.inv(array) if array.ndim == 2 else 1.0 / array -def _normalize_model_output(x, msg="Model should return numpy array"): +def _normalize_output(x, kind, *shape, msg=None): if not isinstance(x, np.ndarray): - warnings.warn( - f"{msg}, but returns {type(x)}", - PerformanceWarning, - ) + if msg is None: + msg = f"{kind} should return numpy array, but returns {type(x)}" + else: + msg = f"{kind} should return numpy array {msg}, but returns {type(x)}" + warnings.warn(msg, PerformanceWarning) x = np.array(x) if x.dtype.kind != "f": return x.astype(float) + if x.ndim < len(shape): + return x.reshape(*shape) + elif x.shape != shape: + msg = f"output of {kind} has shape {x.shape!r}, but {shape!r} is required" + raise ValueError(msg) return x diff --git a/src/iminuit/minuit.py b/src/iminuit/minuit.py index 7f38d0f6e..3a50c1655 100644 --- a/src/iminuit/minuit.py +++ b/src/iminuit/minuit.py @@ -1,4 +1,5 @@ """Minuit class.""" +from __future__ import annotations import warnings from iminuit import util as mutil @@ -18,7 +19,6 @@ import numpy as np from typing import ( Union, - Sequence, Optional, Callable, Tuple, @@ -30,12 +30,9 @@ Set, Sized, ) -from iminuit.typing import UserBound +from iminuit.typing import UserBound, Cost, CostGradient from iminuit._optional_dependencies import optional_module_for - -# Better use numpy.typing.ArrayLike in the future, but this -# requires dropping Python-3.6 support -_ArrayLike = Sequence +from numpy.typing import ArrayLike MnPrint.global_level = 0 @@ -488,9 +485,9 @@ def ngrad(self) -> int: def __init__( self, - fcn: Callable, - *args: Union[float, _ArrayLike[float]], - grad: Callable = None, + fcn: Cost, + *args: Union[float, ArrayLike], + grad: Union[CostGradient, bool, None] = None, name: Collection[str] = None, **kwds: float, ): @@ -508,11 +505,18 @@ def __init__( *args : Starting values for the minimization as positional arguments. See notes for details on how to set starting values. - grad : - Function that calculates the gradient and returns an iterable object with - one entry for each parameter, which is the derivative for that parameter. If - None (default), Minuit will calculate the gradient numerically. - name : + grad : callable, bool, or None, optional + If grad is a callable, it must be a function that calculates the gradient + and returns an iterable object with one entry for each parameter, which is + the derivative of `fcn` for that parameter. If None (default), Minuit will + call the function :func:`iminuit.util.gradient` on `fcn`. If this function + returns a callable, it will be used, otherwise Minuit will internally + compute the gradient numerically. Please see the documentation of + :func:`iminuit.util.gradient` how gradients are detected. Passing a boolean + override this detection. If grad=True is used, a ValueError is raised if no + useable gradient is found. If grad=False, Minuit will internally compute the + gradient numerically. + name : sequence of str, optional If it is set, it overrides iminuit's function signature detection. **kwds : Starting values for the minimization as keyword arguments. @@ -605,7 +609,7 @@ def fcn_np(x): See Also -------- - migrad, hesse, minos, scan, simplex + migrad, hesse, minos, scan, simplex, iminuit.util.gradient """ array_call = False if len(args) == 1 and isinstance(args[0], Iterable): @@ -637,9 +641,24 @@ def fcn_np(x): # set self.tol to default value self.tol = None # type:ignore self._strategy = MnStrategy(1) + + if grad is None: + grad = mutil.gradient(fcn) + elif grad is True: + grad = mutil.gradient(fcn) + if grad is None: + raise ValueError( + "gradient enforced, but iminuit.util.gradient returned None" + ) + elif grad is False: + grad = None + + if grad is not None and not isinstance(grad, CostGradient): + raise ValueError("provided gradient is not a CostGradient") + self._fcn = FCN( fcn, - getattr(fcn, "grad", grad), + grad, array_call, getattr(fcn, "errordef", 1.0), ) @@ -1327,6 +1346,10 @@ def visualize(self, plot: Callable = None, **kwargs): kwargs : Other keyword arguments are forwarded to the plot function. + + See Also + -------- + Minuit.interactive """ return self._visualize(plot)(self.values, **kwargs) @@ -1528,7 +1551,7 @@ def mnprofile( *, size: int = 30, bound: Union[float, UserBound] = 2, - grid: _ArrayLike[float] = None, + grid: ArrayLike = None, subtract_min: bool = False, ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: r""" @@ -1634,7 +1657,7 @@ def profile( *, size: int = 100, bound: Union[float, UserBound] = 2, - grid: _ArrayLike[float] = None, + grid: ArrayLike = None, subtract_min: bool = False, ) -> Tuple[np.ndarray, np.ndarray]: r""" @@ -1772,7 +1795,7 @@ def contour( *, size: int = 50, bound: Union[float, Iterable[Tuple[float, float]]] = 2, - grid: Tuple[_ArrayLike, _ArrayLike] = None, + grid: Tuple[ArrayLike, ArrayLike] = None, subtract_min: bool = False, ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: r""" @@ -2012,7 +2035,7 @@ def draw_mncontour( x: Union[int, str], y: Union[int, str], *, - cl: Union[float, _ArrayLike[float]] = None, + cl: Union[float, ArrayLike] = None, size: int = 100, interpolated: int = 0, experimental: bool = False, @@ -2078,7 +2101,7 @@ def draw_mncontour( def draw_mnmatrix( self, *, - cl: Union[float, _ArrayLike[float]] = None, + cl: Union[float, ArrayLike] = None, size: int = 100, experimental: bool = False, figsize=None, @@ -2240,6 +2263,10 @@ def interactive( -------- .. plot:: plots/interactive.py :include-source: + + See Also + -------- + Minuit.visualize """ with warnings.catch_warnings(): # ipywidgets produces deprecation warnings through use of internal APIs :( diff --git a/src/iminuit/typing.py b/src/iminuit/typing.py index a330d2c72..5f8033c10 100644 --- a/src/iminuit/typing.py +++ b/src/iminuit/typing.py @@ -29,6 +29,33 @@ def __call__(self, x: np.ndarray, *args: float) -> np.ndarray: ... # pragma: no cover +@runtime_checkable +class ModelGradient(Protocol): + """Type for user-defined model gradient.""" + + def __call__(self, x: np.ndarray, *args: float) -> np.ndarray: + """Evaluate model gradient at locations x and return results as an array.""" + ... # pragma: no cover + + +@runtime_checkable +class Cost(Protocol): + """Type for user-defined cost function.""" + + def __call__(self, *args: float) -> float: + """Evaluate cost and return results as a float.""" + ... # pragma: no cover + + +@runtime_checkable +class CostGradient(Protocol): + """Type for user-defined gradient of a cost function.""" + + def __call__(self, *args: float) -> np.ndarray: + """Evaluate gradient and return results as an array.""" + ... # pragma: no cover + + @runtime_checkable class LossFunction(Protocol): """Type for user-defined loss function for LeastSquares clas.""" diff --git a/src/iminuit/util.py b/src/iminuit/util.py index 4751ae150..8f62ac03a 100644 --- a/src/iminuit/util.py +++ b/src/iminuit/util.py @@ -8,10 +8,10 @@ from collections import OrderedDict from argparse import Namespace from iminuit import _repr_html, _repr_text, _deprecated -from iminuit.typing import Key, UserBound +from iminuit.typing import Key, UserBound, Cost, CostGradient from iminuit.warnings import IMinuitWarning, HesseFailedWarning, PerformanceWarning import numpy as np -from numpy.typing import NDArray +from numpy.typing import NDArray, ArrayLike from typing import ( overload, Any, @@ -52,6 +52,8 @@ "make_with_signature", "merge_signatures", "describe", + "gradient", + "is_positive_definite", ) @@ -1066,7 +1068,7 @@ def __call__(self, *args: object) -> object: def merge_signatures( callables: Iterable[Callable], annotations: bool = False -) -> Tuple[Any, List[Tuple[int, ...]]]: +) -> Tuple[Any, List[List[int]]]: """ Merge signatures of callables with positional arguments. @@ -1078,7 +1080,7 @@ def g(x, p): ... parameters, mapping = merge_signatures(f, g) # parameters is ('x', 'y', 'z', 'p') - # mapping is ((0, 1, 2), (0, 3)) + # mapping is ([0, 1, 2], [0, 3]) Parameters ---------- @@ -1108,7 +1110,7 @@ def g(x, p): ... amap.append(len(args)) args.append(k) anns.append(ann) - mapping.append(tuple(amap)) + mapping.append(amap) if annotations: return {k: a for (k, a) in zip(args, anns)}, mapping @@ -1605,3 +1607,60 @@ def _detect_log_spacing(x: NDArray) -> bool: lin_rel_std = np.std(d_lin) / np.mean(d_lin) log_rel_std = np.std(d_log) / np.mean(d_log) return log_rel_std < lin_rel_std + + +def gradient(fcn: Cost) -> Optional[CostGradient]: + """ + Return a callable which computes the gradient of fcn or None. + + Parameters + ---------- + fcn: Cost + Cost function which may provide a callable gradient. How the gradient + is detected is specified below in the Notes. + + Notes + ----- + This function checks whether the following attributes exist: `fcn.grad` and + `fcn.has_grad`. If `fcn.grad` exists and is a CostGradient, it is returned unless + `fcn.has_grad` exists and is False. If no useable gradient is detected, None is + returned. + + Returns + ------- + callable or None + The gradient function or None + """ + grad = getattr(fcn, "grad", None) + has_grad = getattr(fcn, "has_grad", True) + if grad and isinstance(grad, CostGradient) and has_grad: + return grad + return None + + +def is_positive_definite(m: ArrayLike) -> bool: + """ + Return True if argument is a positive definite matrix. + + This test is somewhat expensive, because we attempt a cholesky decomposition. + + Parameters + ---------- + m : array-like + Matrix to be tested. + + Returns + ------- + bool + True if matrix is positive definite and False otherwise. + """ + m = np.atleast_2d(m) + if np.all(m.T == m): + # maybe check this first https://en.wikipedia.org/wiki/Diagonally_dominant_matrix + # and only try cholesky if that fails + try: + np.linalg.cholesky(m) + except np.linalg.LinAlgError: + return False + return True + return False diff --git a/tests/test_cost.py b/tests/test_cost.py index f1c918ec5..05a1e4073 100644 --- a/tests/test_cost.py +++ b/tests/test_cost.py @@ -14,7 +14,6 @@ NormalConstraint, Template, multinominal_chi2, - _soft_l1_loss, PerformanceWarning, ) from iminuit.util import describe @@ -55,6 +54,27 @@ def expon_cdf(x, a): return 1 - np.exp(-x / a) +def numerical_cost_gradient(fcn): + jacobi = pytest.importorskip("jacobi").jacobi + return lambda *args: jacobi(lambda p: fcn(*p), args)[0] + + +def numerical_model_gradient(fcn): + jacobi = pytest.importorskip("jacobi").jacobi + return lambda x, *args: jacobi(lambda p: fcn(x, *p), args)[0].T + + +def numerical_extended_model_gradient(fcn): + jacobi = pytest.importorskip("jacobi").jacobi + + def fn(x, *args): + fint = jacobi(lambda p: fcn(x, *p)[0], args)[0] + f = jacobi(lambda p: fcn(x, *p)[1], args)[0].T + return fint, f + + return fn + + @pytest.fixture def unbinned(): rng = np.random.default_rng(1) @@ -143,48 +163,106 @@ def model( } -def test_Constant(): +def test_Constant_1(): c = Constant(2.5) assert c.value == 2.5 assert c.ndata == 0 + assert c.has_grad + assert_equal(c.grad(), []) + + +def test_Constant_2(): + c = NormalConstraint(("a", "b"), (1, 2), (3, 4)) + Constant(10.2) + assert_allclose(c(1, 2), 10.2) + assert_allclose(c(4, 2), 10.2 + 1) + + +def test_Constant_3(): + c = NormalConstraint(("a", "b"), (1, 2), (3, 4)) + Constant(10.2) + ref = numerical_cost_gradient(c) + assert c.has_grad + assert_allclose(c.grad(1, 2), ref(1, 2)) + assert_allclose(c.grad(2, 3), ref(2, 3)) + assert_allclose(c.grad(-1, -2), ref(-1, -2)) @pytest.mark.parametrize("verbose", (0, 1)) @pytest.mark.parametrize("model", (logpdf, pdf)) -def test_UnbinnedNLL(unbinned, verbose, model): +@pytest.mark.parametrize("use_grad", (False, True)) +def test_UnbinnedNLL(unbinned, verbose, model, use_grad): mle, x = unbinned - cost = UnbinnedNLL(x, model, verbose=verbose, log=model is logpdf) + cost = UnbinnedNLL( + x, + model, + verbose=verbose, + log=model is logpdf, + grad=numerical_model_gradient(model), + ) assert cost.ndata == np.inf - m = Minuit(cost, mu=0, sigma=1) + if use_grad: + ref = numerical_cost_gradient(cost) + assert_allclose(cost.grad(1, 2), ref(1, 2), rtol=1e-3) + assert_allclose(cost.grad(-1, 3), ref(-1, 3), rtol=1e-3) + + m = Minuit(cost, mu=0, sigma=1, grad=use_grad) m.limits["sigma"] = (0, None) m.migrad() + assert m.valid assert_allclose(m.values, mle[1:], atol=1e-3) assert m.errors["mu"] == pytest.approx(1000**-0.5, rel=0.05) + if use_grad: + assert m.ngrad > 0 + else: + assert m.ngrad == 0 + assert_equal(m.fmin.reduced_chi2, np.nan) + if use_grad and verbose == 0: + fi = cost.fisher_information(*m.values) + cov = cost.covariance(*m.values) + assert_allclose(cov, np.linalg.inv(fi)) + assert_allclose(np.diag(cov) ** 0.5, m.errors, rtol=5e-2) + rho1 = m.covariance.correlation()[0, 1] + rho2 = cov[0, 1] / (cov[0, 0] * cov[1, 1]) ** 0.5 + assert_allclose(rho1, rho2, atol=0.01) + -def test_UnbinnedNLL_2D(): +@pytest.mark.parametrize("use_grad", (False, True)) +def test_UnbinnedNLL_2D(use_grad): def model(x_y, mux, muy, sx, sy, rho): return mvnorm(mux, muy, sx, sy, rho).pdf(x_y.T) truth = 0.1, 0.2, 0.3, 0.4, 0.5 - x, y = mvnorm(*truth).rvs(size=1000, random_state=1).T + x, y = mvnorm(*truth).rvs(size=100, random_state=1).T - cost = UnbinnedNLL((x, y), model) - m = Minuit(cost, *truth) + cost = UnbinnedNLL((x, y), model, grad=numerical_model_gradient(model)) + m = Minuit(cost, *truth, grad=use_grad) m.limits["sx", "sy"] = (0, None) m.limits["rho"] = (-1, 1) m.migrad() assert m.valid - assert_allclose(m.values, truth, atol=0.02) + if use_grad: + assert m.ngrad > 0 + + ref = numerical_cost_gradient(cost) + assert_allclose(cost.grad(*m.values), ref(*m.values), rtol=0.01) + else: + assert m.ngrad == 0 + + assert_allclose(m.values, truth, atol=0.2) -def test_UnbinnedNLL_mask(): - c = UnbinnedNLL([1, np.nan, 2], lambda x, a: x + a) +@pytest.mark.parametrize("use_grad", (False, True)) +def test_UnbinnedNLL_mask(use_grad): + c = UnbinnedNLL( + [1, np.nan, 2], + lambda x, a: x + a**2, + grad=lambda x, a: np.ones_like(x) * 2 * a, + ) assert c.mask is None assert np.isnan(c(0)) @@ -192,6 +270,11 @@ def test_UnbinnedNLL_mask(): assert_equal(c.mask, (True, False, True)) assert not np.isnan(c(0)) + assert_allclose(c.grad(0), [0]) + + ref = numerical_cost_gradient(c) + assert_allclose(c.grad(1), ref(1), atol=1e-3) + @pytest.mark.parametrize("log", (False, True)) def test_UnbinnedNLL_properties(log): @@ -281,7 +364,8 @@ def model( @pytest.mark.parametrize("verbose", (0, 1)) @pytest.mark.parametrize("model", (logpdf, pdf)) -def test_ExtendedUnbinnedNLL(unbinned, verbose, model): +@pytest.mark.parametrize("use_grad", (False, True)) +def test_ExtendedUnbinnedNLL(unbinned, verbose, model, use_grad): mle, x = unbinned log = model is logpdf @@ -291,39 +375,67 @@ def density(x, n, mu, sigma): return n, np.log(n) + logpdf(x, mu, sigma) return n, n * pdf(x, mu, sigma) - cost = ExtendedUnbinnedNLL(x, density, verbose=verbose, log=log) + cost = ExtendedUnbinnedNLL( + x, + density, + verbose=verbose, + log=log, + grad=numerical_extended_model_gradient(density), + ) assert cost.ndata == np.inf - m = Minuit(cost, n=len(x), mu=0, sigma=1) + m = Minuit(cost, n=len(x), mu=0, sigma=1, grad=use_grad) m.limits["n"] = (0, None) m.limits["sigma"] = (0, None) m.migrad() + assert m.valid assert_allclose(m.values, mle, atol=1e-3) assert m.errors["mu"] == pytest.approx(1000**-0.5, rel=0.05) assert_equal(m.fmin.reduced_chi2, np.nan) + if use_grad: + assert m.ngrad > 0 + else: + assert m.ngrad == 0 -def test_ExtendedUnbinnedNLL_2D(): + +@pytest.mark.parametrize("use_grad", (False, True)) +def test_ExtendedUnbinnedNLL_2D(use_grad): def model(x_y, n, mux, muy, sx, sy, rho): - return n * 1000, n * 1000 * mvnorm(mux, muy, sx, sy, rho).pdf(x_y.T) + return n, n * mvnorm(mux, muy, sx, sy, rho).pdf(x_y.T) - truth = 1.0, 0.1, 0.2, 0.3, 0.4, 0.5 - x, y = mvnorm(*truth[1:]).rvs(size=int(truth[0] * 1000)).T + truth = 100.0, 0.1, 0.2, 0.3, 0.4, 0.5 + x, y = mvnorm(*truth[1:]).rvs(size=int(truth[0]), random_state=1).T - cost = ExtendedUnbinnedNLL((x, y), model) + cost = ExtendedUnbinnedNLL( + (x, y), model, grad=numerical_extended_model_gradient(model) + ) - m = Minuit(cost, *truth) + m = Minuit(cost, *truth, grad=use_grad) m.limits["n", "sx", "sy"] = (0, None) m.limits["rho"] = (-1, 1) m.migrad() assert m.valid - assert_allclose(m.values, truth, atol=0.1) + assert_allclose(m.values, truth, atol=0.5) + + if use_grad: + assert m.ngrad > 0 + + ref = numerical_cost_gradient(cost) + assert_allclose(cost.grad(*m.values), ref(*m.values), rtol=0.01) + else: + assert m.ngrad == 0 def test_ExtendedUnbinnedNLL_mask(): - c = ExtendedUnbinnedNLL([1, np.nan, 2], lambda x, a: (1, x + a)) + def model(x, a): + return 1, x + a + + c = ExtendedUnbinnedNLL( + [1, np.nan, 2], model, grad=numerical_extended_model_gradient(model) + ) assert c.ndata == np.inf assert np.isnan(c(0)) @@ -331,6 +443,10 @@ def test_ExtendedUnbinnedNLL_mask(): assert not np.isnan(c(0)) assert c.ndata == np.inf + ref = numerical_cost_gradient(c) + assert_allclose(c.grad(0), ref(0)) + assert_allclose(c.grad(1.5), ref(1.5)) + @pytest.mark.parametrize("log", (False, True)) def test_ExtendedUnbinnedNLL_properties(log): @@ -400,15 +516,22 @@ def test_ExtendedUnbinnedNLL_pickle(): @pytest.mark.parametrize("verbose", (0, 1)) -def test_BinnedNLL(binned, verbose): +@pytest.mark.parametrize("use_grad", (False, True)) +def test_BinnedNLL(binned, verbose, use_grad): mle, nx, xe = binned - cost = BinnedNLL(nx, xe, cdf, verbose=verbose) + cost = BinnedNLL(nx, xe, cdf, verbose=verbose, grad=numerical_model_gradient(cdf)) assert cost.ndata == len(nx) - m = Minuit(cost, mu=0, sigma=1) + if use_grad: + ref = numerical_cost_gradient(cost) + assert_allclose(cost.grad(*mle[1:]), ref(*mle[1:])) + assert_allclose(cost.grad(-1, 2), ref(-1, 2)) + + m = Minuit(cost, mu=0, sigma=1, grad=use_grad) m.limits["sigma"] = (0, None) m.migrad() + assert m.valid # binning loses information compared to unbinned case assert_allclose(m.values, mle[1:], rtol=0.15) assert m.errors["mu"] == pytest.approx(1000**-0.5, rel=0.05) @@ -416,6 +539,11 @@ def test_BinnedNLL(binned, verbose): assert_allclose(m.fmin.reduced_chi2, 1, atol=0.15) + if use_grad: + assert m.ngrad > 0 + else: + assert m.ngrad == 0 + def test_BinnedNLL_pulls(binned): mle, nx, xe = binned @@ -428,7 +556,8 @@ def test_BinnedNLL_pulls(binned): assert np.nanvar(pulls) == pytest.approx(1, abs=0.2) -def test_BinnedNLL_weighted(): +@pytest.mark.parametrize("use_grad", (False, True)) +def test_BinnedNLL_weighted(use_grad): xe = np.array([0, 0.2, 0.4, 0.8, 1.5, 10]) p = np.diff(expon_cdf(xe, 1)) n = p * 1000 @@ -442,31 +571,24 @@ def test_BinnedNLL_weighted(): # variance * 4 = (sigma * 2)^2, constructed so that # fitted parameter has twice the uncertainty w = np.transpose((n, 4 * n)) - c = BinnedNLL(w, xe, expon_cdf) + c = BinnedNLL(w, xe, expon_cdf, grad=numerical_model_gradient(expon_cdf)) assert_equal(c.data, w) - m2 = Minuit(c, 1) + + if use_grad: + ref = numerical_cost_gradient(c) + assert_allclose(c.grad(*m1.values), ref(*m1.values)) + assert_allclose(c.grad(12), ref(12)) + + m2 = Minuit(c, 1, grad=use_grad) m2.migrad() + assert m2.valid assert m2.values[0] == pytest.approx(1, rel=1e-2) assert m2.errors[0] == pytest.approx(2 * m1.errors[0], rel=1e-2) - -def test_ExtendedBinnedNLL_weighted_pulls(): - rng = np.random.default_rng(1) - xe = np.linspace(0, 5, 500) - p = np.diff(expon_cdf(xe, 2)) - m = p * 100000 - n = rng.poisson(m * 4) / 4 - nvar = m * 4 / 4**2 - w = np.transpose((n, nvar)) - c = ExtendedBinnedNLL(w, xe, lambda x, n, s: n * expon_cdf(x, s)) - m = Minuit(c, np.sum(n), 2) - m.limits["n"] = (0.1, None) - m.limits["s"] = (0.1, None) - m.migrad() - assert m.valid - pulls = c.pulls(m.values) - assert np.nanmean(pulls) == pytest.approx(0, abs=0.1) - assert np.nanvar(pulls) == pytest.approx(1, abs=0.2) + if use_grad: + assert m2.ngrad > 0 + else: + assert m2.ngrad == 0 def test_BinnedNLL_bad_input_1(): @@ -485,7 +607,7 @@ def test_BinnedNLL_bad_input_3(): def test_BinnedNLL_bad_input_4(): - with pytest.raises(ValueError, match="n must have shape"): + with pytest.raises(ValueError, match="n must either have same dimension as xe"): BinnedNLL([[1, 2, 3]], [1, 2], lambda x, a: 0) @@ -507,7 +629,8 @@ def test_BinnedNLL_bad_input_6(): BinnedNLL(1, 2, lambda x, a: 0) -def test_BinnedNLL_2D(): +@pytest.mark.parametrize("use_grad", (False, True)) +def test_BinnedNLL_2D(use_grad): truth = (0.1, 0.2, 0.3, 0.4, 0.5) x, y = mvnorm(*truth).rvs(size=1000, random_state=1).T @@ -516,15 +639,25 @@ def test_BinnedNLL_2D(): def model(xy, mux, muy, sx, sy, rho): return mvnorm(mux, muy, sx, sy, rho).cdf(xy.T) - cost = BinnedNLL(w, (xe, ye), model) + cost = BinnedNLL(w, (xe, ye), model, grad=numerical_model_gradient(model)) assert cost.ndata == np.prod(w.shape) - m = Minuit(cost, *truth) + + if use_grad: + ref = numerical_cost_gradient(cost) + assert_allclose(cost.grad(*truth), ref(*truth)) + + m = Minuit(cost, *truth, grad=use_grad) m.limits["sx", "sy"] = (0, None) m.limits["rho"] = (-1, 1) m.migrad() assert m.valid assert_allclose(m.values, truth, atol=0.05) + if use_grad: + assert m.ngrad > 0 + else: + assert m.ngrad == 0 + assert cost.ndata == np.prod(w.shape) w2 = w.copy() w2[1, 1] += 1 @@ -552,14 +685,22 @@ def model(xy, mux, muy, sx, sy, rho): def test_BinnedNLL_mask(): - c = BinnedNLL([5, 1000, 1], [0, 1, 2, 3], expon_cdf) + c = BinnedNLL( + [5, 1000, 1], [0, 1, 2, 3], expon_cdf, grad=numerical_model_gradient(expon_cdf) + ) assert c.ndata == 3 + ref = numerical_cost_gradient(c) + assert_allclose(c.grad(2), ref(2)) + c_unmasked = c(1) c.mask = np.arange(3) != 1 assert c(1) < c_unmasked assert c.ndata == 2 + ref = numerical_cost_gradient(c) + assert_allclose(c.grad(2), ref(2)) + def test_BinnedNLL_properties(): def cdf(x, a, b): @@ -611,16 +752,24 @@ def test_BinnedNLL_pickle(): @pytest.mark.parametrize("verbose", (0, 1)) -def test_ExtendedBinnedNLL(binned, verbose): +@pytest.mark.parametrize("use_grad", (False, True)) +def test_ExtendedBinnedNLL(binned, verbose, use_grad): mle, nx, xe = binned - cost = ExtendedBinnedNLL(nx, xe, scaled_cdf, verbose=verbose) + cost = ExtendedBinnedNLL( + nx, xe, scaled_cdf, verbose=verbose, grad=numerical_model_gradient(scaled_cdf) + ) assert cost.ndata == len(nx) - m = Minuit(cost, n=mle[0], mu=0, sigma=1) + if use_grad: + ref = numerical_cost_gradient(cost) + assert_allclose(cost.grad(*mle), ref(*mle)) + + m = Minuit(cost, n=mle[0], mu=0, sigma=1, grad=use_grad) m.limits["n"] = (0, None) m.limits["sigma"] = (0, None) m.migrad() + assert m.valid # binning loses information compared to unbinned case assert_allclose(m.values, mle, rtol=0.15) assert m.errors["mu"] == pytest.approx(1000**-0.5, rel=0.05) @@ -628,8 +777,14 @@ def test_ExtendedBinnedNLL(binned, verbose): assert_allclose(m.fmin.reduced_chi2, 1, 0.1) + if use_grad: + assert m.ngrad > 0 + else: + assert m.ngrad == 0 -def test_ExtendedBinnedNLL_weighted(): + +@pytest.mark.parametrize("use_grad", (False, True)) +def test_ExtendedBinnedNLL_weighted(use_grad): xe = np.array([0, 1, 10]) n = np.diff(expon_cdf(xe, 1)) m1 = Minuit(ExtendedBinnedNLL(n, xe, expon_cdf), 1) @@ -637,36 +792,58 @@ def test_ExtendedBinnedNLL_weighted(): assert_allclose(m1.values, (1,), rtol=1e-2) w = np.transpose((n, 4 * n)) - m2 = Minuit(ExtendedBinnedNLL(w, xe, expon_cdf), 1) + c = ExtendedBinnedNLL(w, xe, expon_cdf, grad=numerical_model_gradient(expon_cdf)) + if use_grad: + ref = numerical_cost_gradient(c) + assert_allclose(c.grad(1), ref(1), atol=1e-14) + assert_allclose(c.grad(12), ref(12)) + m2 = Minuit(c, 1.1, grad=use_grad) m2.migrad() + assert m2.valid assert_allclose(m2.values, (1,), rtol=1e-2) assert m2.errors[0] == pytest.approx(2 * m1.errors[0], rel=1e-2) + if use_grad: + assert m2.ngrad > 0 + else: + assert m2.ngrad == 0 + def test_ExtendedBinnedNLL_bad_input(): with pytest.raises(ValueError): ExtendedBinnedNLL([1], [1], lambda x, a: 0) -def test_ExtendedBinnedNLL_2D(): +@pytest.mark.parametrize("use_grad", (False, True)) +def test_ExtendedBinnedNLL_2D(use_grad): truth = (1.0, 0.1, 0.2, 0.3, 0.4, 0.5) - x, y = mvnorm(*truth[1:]).rvs(size=int(truth[0] * 1000), random_state=1).T + x, y = mvnorm(*truth[1:]).rvs(size=int(truth[0] * 100), random_state=1).T w, xe, ye = np.histogram2d(x, y, bins=(10, 20)) def model(xy, n, mux, muy, sx, sy, rho): - return n * 1000 * mvnorm(mux, muy, sx, sy, rho).cdf(np.transpose(xy)) + return n * 100 * mvnorm(mux, muy, sx, sy, rho).cdf(np.transpose(xy)) - cost = ExtendedBinnedNLL(w, (xe, ye), model) + cost = ExtendedBinnedNLL(w, (xe, ye), model, grad=numerical_model_gradient(model)) assert cost.ndata == np.prod(w.shape) - m = Minuit(cost, *truth) + + if use_grad: + ref = numerical_cost_gradient(cost) + assert_allclose(cost.grad(*truth), ref(*truth)) + + m = Minuit(cost, *truth, grad=use_grad) m.limits["n", "sx", "sy"] = (0, None) m.limits["rho"] = (-1, 1) m.migrad() assert m.valid assert_allclose(m.values, truth, atol=0.1) + if use_grad: + assert m.ngrad > 0 + else: + assert m.ngrad == 0 + def test_ExtendedBinnedNLL_3D(): norm = pytest.importorskip("scipy.stats").norm @@ -698,7 +875,9 @@ def model(xyz, n, mux, muy, sx, sy, rho, muz, sz): def test_ExtendedBinnedNLL_mask(): - c = ExtendedBinnedNLL([1, 1000, 2], [0, 1, 2, 3], expon_cdf) + c = ExtendedBinnedNLL( + [1, 1000, 2], [0, 1, 2, 3], expon_cdf, grad=numerical_model_gradient(expon_cdf) + ) assert c.ndata == 3 c_unmasked = c(2) @@ -706,6 +885,9 @@ def test_ExtendedBinnedNLL_mask(): assert c(2) < c_unmasked assert c.ndata == 2 + ref = numerical_cost_gradient(c) + assert_allclose(c.grad(2), ref(2)) + def test_ExtendedBinnedNLL_properties(): def cdf(x, a): @@ -748,9 +930,32 @@ def test_ExtendedBinnedNLL_pickle(): assert_equal(c.data, c2.data) +def test_ExtendedBinnedNLL_weighted_pulls(): + rng = np.random.default_rng(1) + xe = np.linspace(0, 5, 500) + p = np.diff(expon_cdf(xe, 2)) + m = p * 100000 + n = rng.poisson(m * 4) / 4 + nvar = m * 4 / 4**2 + w = np.transpose((n, nvar)) + c = ExtendedBinnedNLL(w, xe, lambda x, n, s: n * expon_cdf(x, s)) + m = Minuit(c, np.sum(n), 2) + m.limits["n"] = (0.1, None) + m.limits["s"] = (0.1, None) + m.migrad() + assert m.valid + pulls = c.pulls(m.values) + assert np.nanmean(pulls) == pytest.approx(0, abs=0.1) + assert np.nanvar(pulls) == pytest.approx(1, abs=0.2) + + @pytest.mark.parametrize("loss", ["linear", "soft_l1", np.arctan]) @pytest.mark.parametrize("verbose", (0, 1)) -def test_LeastSquares(loss, verbose): +@pytest.mark.parametrize("use_grad", (False, True)) +def test_LeastSquares(loss, verbose, use_grad): + if use_grad and loss is np.arctan: + pytest.skip() + rng = np.random.default_rng(1) x = np.linspace(0, 1, 1000) @@ -760,10 +965,22 @@ def test_LeastSquares(loss, verbose): def model(x, a, b): return a + b * x - cost = LeastSquares(x, y, ye, model, loss=loss, verbose=verbose) + cost = LeastSquares( + x, + y, + ye, + model, + loss=loss, + verbose=verbose, + grad=numerical_model_gradient(model), + ) assert cost.ndata == len(x) - m = Minuit(cost, a=0, b=0) + if use_grad: + ref = numerical_cost_gradient(cost) + assert_allclose(cost.grad(1, 2), ref(1, 2)) + + m = Minuit(cost, a=0, b=0, grad=use_grad) m.migrad() assert_allclose(m.values, (1, 2), rtol=0.05) assert cost.loss == loss @@ -776,6 +993,11 @@ def model(x, a, b): assert_allclose(m.fmin.reduced_chi2, 1, atol=5e-2) + if use_grad: + assert m.ngrad > 0 + else: + assert m.ngrad == 0 + def test_LeastSquares_2D(): x = np.array([1.0, 2.0, 3.0]) @@ -787,7 +1009,12 @@ def model(xy, a, b): x, y = xy return a * x + b * y - c = LeastSquares((x, y), z, ze, model) + c = LeastSquares((x, y), z, ze, model, grad=numerical_model_gradient(model)) + assert c.ndata == 3 + + ref = numerical_cost_gradient(c) + assert_allclose(c.grad(1, 2), ref(1, 2)) + assert_equal(c.x, (x, y)) assert_equal(c.y, z) assert_equal(c.yerror, ze) @@ -815,19 +1042,28 @@ def test_LeastSquares_bad_input(): LeastSquares([1], [1], [1], lambda x, a: 0, loss=[1, 2, 3]) -def test_LeastSquares_mask(): - c = LeastSquares([1, 2, 3], [3, np.nan, 4], [1, 1, 1], lambda x, a: x + a) +@pytest.mark.parametrize("use_grad", (False, True)) +def test_LeastSquares_mask(use_grad): + c = LeastSquares( + [1, 2, 3], + [3, np.nan, 4], + [1, 1, 1], + lambda x, a: x + a, + grad=lambda x, a: np.ones_like(x), + ) assert c.ndata == 3 assert np.isnan(c(0)) + assert np.all(np.isnan(c.grad(1))) - m = Minuit(c, 1) + m = Minuit(c, 1, grad=use_grad) assert m.ndof == 2 m.migrad() assert not m.valid c.mask = np.arange(3) != 1 - assert not np.isnan(c(0)) assert c.ndata == 2 + assert not np.isnan(c(0)) + assert not np.any(np.isnan(c.grad(0))) assert m.ndof == 1 m.migrad() @@ -910,25 +1146,47 @@ def test_LeastSquares_pulls(): assert_equal(c.pulls((0, 1)), [10, np.nan]) -def test_CostSum_1(): +@pytest.mark.parametrize("use_grad", (False, True)) +def test_CostSum_1(use_grad): def model1(x, a): return a + x + def grad1(x, a): + return np.ones((1, len(x))) + def model2(x, b, a): return a + b * x + def grad2(x, b, a): + g = np.empty((2, len(x))) + g[0] = x + g[1] = 1 + return g + def model3(x, c): return c - lsq1 = LeastSquares(1, 2, 3, model1) + def grad3(x, c): + return np.ones((1, len(x))) + + lsq1 = LeastSquares(1, 2, 3, model1, grad=grad1) assert describe(lsq1) == ["a"] - lsq2 = LeastSquares(1, 3, 4, model2) + if use_grad: + assert_allclose(lsq1.grad(2), numerical_cost_gradient(lsq1)(2)) + + lsq2 = LeastSquares(1, 3, 4, model2, grad=grad2) assert describe(lsq2) == ["b", "a"] - lsq3 = LeastSquares(1, 1, 1, model3) + if use_grad: + assert_allclose(lsq2.grad(2, 3), numerical_cost_gradient(lsq2)(2, 3)) + + lsq3 = LeastSquares(1, 1, 1, model3, grad=grad3) assert describe(lsq3) == ["c"] + if use_grad: + assert_allclose(lsq3.grad(4), numerical_cost_gradient(lsq3)(4)) + lsq12 = lsq1 + lsq2 assert lsq12._items == [lsq1, lsq2] assert isinstance(lsq12, CostSum) @@ -939,33 +1197,74 @@ def model3(x, c): assert lsq12(1, 2) == lsq1(1) + lsq2(2, 1) - m = Minuit(lsq12, a=0, b=0) - m.migrad() - assert m.parameters == ("a", "b") - assert_allclose(m.values, (1, 2)) - assert_allclose(m.errors, (3, 5)) - assert_allclose(m.covariance, ((9, -9), (-9, 25)), atol=1e-10) + if use_grad: + a = 2 + b = 3 + ref = np.zeros(2) + ref[0] += lsq1.grad(a) + ref[[1, 0]] += lsq2.grad(b, a) + assert_allclose(lsq12.grad(a, b), ref) lsq121 = lsq12 + lsq1 assert lsq121._items == [lsq1, lsq2, lsq1] assert describe(lsq121) == ["a", "b"] assert lsq121.ndata == 3 + if use_grad: + a = 2 + b = 3 + ref = np.zeros(2) + ref[0] += lsq1.grad(a) + ref[[1, 0]] += lsq2.grad(b, a) + ref[0] += lsq1.grad(a) + assert_allclose(lsq121.grad(a, b), ref) + lsq312 = lsq3 + lsq12 assert lsq312._items == [lsq3, lsq1, lsq2] assert describe(lsq312) == ["c", "a", "b"] assert lsq312.ndata == 3 + if use_grad: + a = 2 + b = 3 + c = 4 + ref = np.zeros(3) + ref[0] += lsq3.grad(c) + ref[1] += lsq1.grad(a) + ref[[2, 1]] += lsq2.grad(b, a) + assert_allclose(lsq312.grad(c, a, b), ref) + lsq31212 = lsq312 + lsq12 assert lsq31212._items == [lsq3, lsq1, lsq2, lsq1, lsq2] assert describe(lsq31212) == ["c", "a", "b"] assert lsq31212.ndata == 5 + if use_grad: + a = 2 + b = 3 + c = 4 + ref = np.zeros(3) + ref[:] += lsq312.grad(c, a, b) + ref[1:] += lsq12.grad(a, b) + assert_allclose(lsq31212.grad(c, a, b), ref) + lsq31212 += lsq1 assert lsq31212._items == [lsq3, lsq1, lsq2, lsq1, lsq2, lsq1] assert describe(lsq31212) == ["c", "a", "b"] assert lsq31212.ndata == 6 + m = Minuit(lsq12, a=0, b=0, grad=use_grad) + m.migrad() + assert m.parameters == ("a", "b") + assert_allclose(m.values, (1, 2)) + assert_allclose(m.errors, (3, 5)) + assert_allclose(m.covariance, ((9, -9), (-9, 25)), atol=1e-10) + + if use_grad: + assert m.ngrad > 0 + else: + assert m.ngrad == 0 + def test_CostSum_2(): ref = NormalConstraint("a", 1, 2), NormalConstraint(("b", "a"), (1, 1), (2, 2)) @@ -1023,28 +1322,115 @@ def test_CostSum_visualize(): def test_NormalConstraint_1(): - def model(x, a): - return a * np.ones_like(x) - - lsq1 = LeastSquares(0, 1, 1, model) - lsq2 = lsq1 + NormalConstraint("a", 1, 0.1) - assert describe(lsq1) == ["a"] - assert describe(lsq2) == ["a"] - assert lsq1.ndata == 1 - assert lsq2.ndata == 2 - - m = Minuit(lsq1, 0) + c1 = NormalConstraint("a", 1, 1.5) + c2 = NormalConstraint(("a", "b"), (1, 2), (3, 4)) + c3 = NormalConstraint(("a", "b"), (1, 2), ((1, 0.5), (0.5, 4))) + + assert describe(c1) == ["a"] + assert describe(c2) == ["a", "b"] + assert describe(c3) == ["a", "b"] + assert c1.ndata == 1 + assert c2.ndata == 2 + assert c3.ndata == 2 + assert c1.has_grad + assert c2.has_grad + assert c3.has_grad + assert c1.value == 1 + assert_equal(c1.covariance, [1.5**2]) + assert_equal(c2.value, (1, 2)) + assert_equal(c2.covariance, (9, 16)) + assert_equal(c3.value, (1, 2)) + assert_equal(c3.covariance, ((1, 0.5), (0.5, 4))) + + assert_allclose(c1(1), 0) + assert_allclose(c1(1 + 1.5), 1) + assert_allclose(c1(1 - 1.5), 1) + + assert_allclose(c2(1, 2), 0) + assert_allclose(c2(1 + 3, 2), 1) + assert_allclose(c2(1 - 3, 2), 1) + assert_allclose(c2(1, 2 + 4), 1) + assert_allclose(c2(1, 2 - 4), 1) + + def ref(x, y): + d = np.subtract((x, y), (1, 2)) + c = ((1, 0.5), (0.5, 4)) + cinv = np.linalg.inv(c) + return d.T @ cinv @ d + + assert_allclose(c3(1, 2), 0) + assert_allclose(c3(2, 2), ref(2, 2)) + assert_allclose(c3(3, 4), ref(3, 4)) + assert_allclose(c3(-1, -2), ref(-1, -2)) + + ref = numerical_cost_gradient(c1) + assert_allclose(c1.grad(1), [0]) + assert_allclose(c1.grad(2), ref(2)) + assert_allclose(c1.grad(-2), ref(-2)) + + ref = numerical_cost_gradient(c2) + assert_allclose(c2.grad(1, 2), [0, 0]) + assert_allclose(c2.grad(2, 3), ref(2, 3)) + assert_allclose(c2.grad(-2, -4), ref(-2, -4)) + + ref = numerical_cost_gradient(c3) + assert_allclose(c3.grad(1, 2), [0, 0]) + assert_allclose(c3.grad(2, 3), ref(2, 3)) + assert_allclose(c3.grad(-2, -4), ref(-2, -4)) + + c1.value = 2 + c1.covariance = 4 + assert_equal(c1.value, 2) + assert_equal(c1.covariance, 4) + assert_allclose(c1(2), 0) + assert_allclose(c1(4), 1) + assert_allclose(c1(0), 1) + + c2.value = (2, 3) + c2.covariance = (1, 4) + assert_equal(c2.value, (2, 3)) + assert_equal(c2.covariance, (1, 4)) + assert_allclose(c2(2, 3), 0) + assert_allclose(c2(1, 3), 1) + assert_allclose(c2(2, 5), 1) + + c3.value = (2, 3) + c3.covariance = [(4, 1), (1, 5)] + assert_equal(c3.value, (2, 3)) + assert_equal(c3.covariance, [(4, 1), (1, 5)]) + + def ref(x, y): + d = np.subtract((x, y), (2, 3)) + c = ((4, 1), (1, 5)) + cinv = np.linalg.inv(c) + return d.T @ cinv @ d + + assert_allclose(c3(2, 3), 0) + assert_allclose(c3(1, 3), ref(1, 3)) + assert_allclose(c3(2, 5), ref(2, 5)) + + +@pytest.mark.parametrize("use_grad", (False, True)) +def test_NormalConstraint_2(use_grad): + c1 = NormalConstraint("a", 1, 1) + c2 = c1 + NormalConstraint("a", 1, 0.1) + + m = Minuit(c1, 0, grad=use_grad) m.migrad() + if use_grad: + assert m.ngrad > 0 assert_allclose(m.values, (1,), atol=1e-2) assert_allclose(m.errors, (1,), rtol=1e-2) - m = Minuit(lsq2, 0) + m = Minuit(c2, 0, grad=use_grad) m.migrad() + if use_grad: + assert m.ngrad > 0 assert_allclose(m.values, (1,), atol=1e-2) assert_allclose(m.errors, (0.1,), rtol=1e-2) -def test_NormalConstraint_2(): +def test_NormalConstraint_3(): lsq1 = NormalConstraint(("a", "b"), (1, 2), (2, 2)) lsq2 = lsq1 + NormalConstraint("b", 2, 0.1) + NormalConstraint("a", 1, 0.01) sa = 0.1 @@ -1075,16 +1461,6 @@ def test_NormalConstraint_2(): assert_allclose(m.covariance, cov, rtol=1e-2) -def test_NormalConstraint_properties(): - nc = NormalConstraint(("a", "b"), (1, 2), (3, 4)) - assert_equal(nc.value, (1, 2)) - assert_equal(nc.covariance, (9, 16)) - nc.value = (2, 3) - nc.covariance = (1, 2) - assert_equal(nc.value, (2, 3)) - assert_equal(nc.covariance, (1, 2)) - - def test_NormalConstraint_visualize(): pytest.importorskip("matplotlib") @@ -1104,21 +1480,43 @@ def test_NormalConstraint_pickle(): assert_equal(c.covariance, c2.covariance) +def test_NormalConstraint_bad_input_1(): + with pytest.raises(ValueError, match="scalar or one-dimensional"): + NormalConstraint("par", [[[1, 2]]], np.eye(2)) + + +def test_NormalConstraint_bad_input_2(): + with pytest.raises(ValueError, match="not match size of args"): + NormalConstraint(["a", "b", "c"], [1, 2], np.eye(2)) + + +def test_NormalConstraint_bad_input_3(): + with pytest.raises(ValueError, match="size of error does not match size of value"): + NormalConstraint(["a", "b"], [1, 2], np.eye(3)) + + +def test_NormalConstraint_bad_input_4(): + with pytest.raises(ValueError, match="positive definite"): + NormalConstraint(["a", "b"], [1, 2], [[1, 1], [1, 1]]) + + +def test_NormalConstraint_bad_input_5(): + n = NormalConstraint(["a", "b"], [1, 2], [[1, 0], [0, 1]]) + + with pytest.raises(ValueError, match="positive definite"): + n.covariance = [[1, 1], [1, 1]] + + +def test_NormalConstraint_bad_input_6(): + with pytest.raises(ValueError, match="cannot have more than two dimensions"): + NormalConstraint(["a", "b"], [1, 2], np.ones((2, 2, 2))) + + dtypes_to_test = [np.float32] if hasattr(np, "float128"): # not available on all platforms dtypes_to_test.append(np.float128) -@pytest.mark.parametrize("dtype", dtypes_to_test) -def test_soft_l1_loss(dtype): - v = np.array([0], dtype=dtype) - assert _soft_l1_loss(v) == v - v[:] = 0.1 - assert _soft_l1_loss(v) == pytest.approx(0.1, abs=0.01) - v[:] = 1e10 - assert _soft_l1_loss(v) == pytest.approx(2e5, rel=0.01) - - def test_multinominal_chi2(): zero = np.array(0) one = np.array(1) @@ -1469,10 +1867,7 @@ def cdf(xe, a): c = cost(n, edges, cdf) with pytest.raises( ValueError, - match=( - r"Expected model to return an array of shape \(3,\), " - r"but it returns an array of shape \(2,\)" - ), + match=(r"output of model has shape \(2,\), but \(3,\) is required"), ): c(1) @@ -1489,9 +1884,25 @@ def cdf(xye, a): c = cost(n, edges, cdf) with pytest.raises( ValueError, - match=( - r"Expected model to return an array of shape \(12,\), " - r"but it returns an array of shape \(11,\)" - ), + match=(r"output of model has shape \(11,\), but \(12,\) is required"), ): c(1) + + +def test_BohmZechTransform(): + from iminuit.cost import BohmZechTransform + + with pytest.warns(np.VisibleDeprecationWarning): + val = np.array([1.0, 2.0]) + var = np.array([3.0, 4.0]) + tr = BohmZechTransform(val, var) + s = val / var + mu = np.array([2.0, 2.0]) + ns, mus = tr(mu) + assert_allclose(ns, val * s) + assert_allclose(mus, mu * s) + var2 = mu**2 + ns, mus, vars = tr(mu, var2) + assert_allclose(ns, val * s) + assert_allclose(mus, mu * s) + assert_allclose(vars, var2 * s**2) diff --git a/tests/test_minuit.py b/tests/test_minuit.py index 7f1e01d95..fd7e7909a 100644 --- a/tests/test_minuit.py +++ b/tests/test_minuit.py @@ -1693,3 +1693,19 @@ def cost(a, b: Annotated[float, 0.1:1]): assert m2.limits["y"] == (0.1, 1.0) m.migrad() assert_allclose(m.values, (0, 0.1), atol=1e-2) + + +def test_enforced_grad(): + def cost(a, b): + return a**2 + b**2 + + with pytest.raises(ValueError): + Minuit(cost, 0, 0, grad=True) + + +def test_bad_grad(): + def cost(a, b): + return a**2 + b**2 + + with pytest.raises(ValueError, match="provided gradient is not a CostGradient"): + Minuit(cost, 0, 0, grad="foo") diff --git a/tests/test_util.py b/tests/test_util.py index 21d486dc0..793f34721 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -514,8 +514,8 @@ def g(x, a, b): args, (pf, pg) = util.merge_signatures([f, g]) assert args == ["x", "y", "z", "a", "b"] - assert pf == (0, 1, 2) - assert pg == (0, 3, 4) + assert pf == [0, 1, 2] + assert pg == [0, 3, 4] @pytest.mark.skipif(not scipy_available, reason="needs scipy") @@ -768,3 +768,9 @@ def test_optional_module_for_3(): ): with optional_module_for("foo", replace={"foobarbaz": "foo"}): import foobarbaz # noqa + + +def test_positive_definite(): + assert util.is_positive_definite([[1, 0], [0, 1]]) + assert not util.is_positive_definite([[1, 1], [1, 1]]) + assert not util.is_positive_definite([[1, 0], [1, 1]]) diff --git a/tests/test_without_numba.py b/tests/test_without_numba.py index 60b8e2e76..674f02833 100644 --- a/tests/test_without_numba.py +++ b/tests/test_without_numba.py @@ -19,7 +19,6 @@ def npa(*args, **kwargs): ("multinominal_chi2", (npa(1, 0), npa(1.2, 0))), ("chi2", (npa(1.2, 0), npa(1.2, 1.0), npa(1.2, 0.1))), ("poisson_chi2", (npa(1, 0), npa(1.2, 0.1))), - ("_soft_l1_loss", (npa(1.2, 0),)), ("_soft_l1_cost", (npa(1.2, 0), npa(1.2, 0.1), npa(1.0, 1.0))), ), )