fix: frequentist toy sampling

scikit-hep · Jan 24, 2025 · febd07f · febd07f
1 parent 5ccf879
commit febd07f
Show file tree

Hide file tree

Showing 6 changed files with 57 additions and 17 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -2,6 +2,13 @@ Changelog
 =========
 
 main
+*************
+
+* fix dumping of fitresult in test, require ASDF version < 1.6.0 in writing to file
+* fix sampling of model in FrequentistCalculator with simultaneous fits
+
+
+Version 0.9.0
 **************
 
 * Add support for Python 3.13

diff --git a/src/hepstats/hypotests/toyutils.py b/src/hepstats/hypotests/toyutils.py
@@ -356,7 +356,9 @@ def to_yaml(self, filename: str):
 
         tree["toys"] = self.toyresults_to_dict()
         af = asdf.AsdfFile(tree)
-        af.write_to(filename)
+        af.write_to(
+            filename, version="1.5.0"
+        )  # TODO: change the dict layout not to have floats as keys. Fails with new ASDF version
         af.close()
 
     def toysresults_from_yaml(self, filename: str) -> list[ToyResult]:

diff --git a/src/hepstats/utils/fit/diverse.py b/src/hepstats/utils/fit/diverse.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+from collections.abc import Mapping
 from contextlib import ExitStack, contextmanager, suppress
 
 import numpy as np
@@ -67,7 +68,14 @@ def pll(minimizer, loss, pois, init=None) -> float:
 
 
 @contextmanager
-def set_values(params, values):
+def set_values(params, values=None):
+    if values is None:
+        if isinstance(params, Mapping):
+            values = tuple(params.values())
+            params = tuple(params.keys())
+        else:
+            msg = "values must be provided if params is not a Mapping (dict-like)"
+            raise ValueError(msg)
     old_values = [p.value() for p in params]
     for p, v in zip(params, values):
         p.set_value(v)

diff --git a/src/hepstats/utils/fit/sampling.py b/src/hepstats/utils/fit/sampling.py
@@ -5,7 +5,7 @@
 from __future__ import annotations
 
 from .api_check import is_valid_pdf
-from .diverse import get_value
+from .diverse import get_value, set_values
 
 
 def base_sampler(models, nevents):
@@ -56,11 +56,11 @@ def base_sample(samplers, ntoys, parameter=None, value=None, constraints=None):
             except AttributeError:
                 continue
 
+    params = {} if parameter is None or value is None else {parameter: value}
     for i in range(ntoys):
-        params = None if parameter is None or value is None else {parameter: value}
-
-        for s in samplers:
-            s.resample(params=params)
+        with set_values(params):
+            for s in samplers:
+                s.resample()  # do not pass parameters as arguments as it will fail in simultaneous fits
 
         if constraints is not None:
             yield {param: value[i] for param, value in sampled_constraints.items()}

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -56,28 +56,30 @@ def _setup_teardown():
     zfit.run.set_autograd_mode()
 
 
-def create_loss_func(npeak, nbins=None):
+def create_loss_func(npeak, nbins=None, nbkg=None, nameadd="", obs=None):
     import zfit
 
     bounds = (0.1, 3.0)
-    obs = zfit.Space("x", limits=bounds)
+    obs = "x" if obs is None else obs
+    obs = zfit.Space(obs, limits=bounds)
 
     # Data and signal
     np.random.seed(0)
     tau = -2.0
     beta = -1 / tau
-    bkg = np.random.exponential(beta, 300)
+    nbkg = 300 if nbkg is None else nbkg
+    bkg = np.random.exponential(beta, nbkg)
     peak = np.random.normal(1.2, 0.1, npeak)
     data = np.concatenate((bkg, peak))
     data = data[(data > bounds[0]) & (data < bounds[1])]
     N = len(data)
     data = zfit.data.Data.from_numpy(obs=obs, array=data)
 
-    mean = zfit.Parameter("mean", 1.2, 0.5, 2.0)
-    sigma = zfit.Parameter("sigma", 0.1, 0.02, 0.2)
-    lambda_ = zfit.Parameter("lambda", -2.0, -4.0, -1.0)
-    Nsig = zfit.Parameter("Nsig", 20.0, -20.0, N)
-    Nbkg = zfit.Parameter("Nbkg", N, 0.0, N * 1.1)
+    mean = zfit.Parameter("mean" + nameadd, 1.2, 0.5, 2.0)
+    sigma = zfit.Parameter("sigma" + nameadd, 0.1, 0.02, 0.2)
+    lambda_ = zfit.Parameter("lambda" + nameadd, -2.0, -4.0, -1.0)
+    Nsig = zfit.Parameter("Nsig" + nameadd, 20.0, -20.0, N * 3)
+    Nbkg = zfit.Parameter("Nbkg" + nameadd, N, 0.0, N * 3)
 
     signal = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma).create_extended(Nsig)
     background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_).create_extended(Nbkg)
@@ -95,6 +97,19 @@ def create_loss_func(npeak, nbins=None):
     return loss, (Nsig, Nbkg, mean, sigma)
 
 
+def create_sim_loss_func(npeak, nbins=None):
+    loss1, params1 = create_loss_func(npeak, nbins=nbins, nameadd="_1", obs="x1")
+    loss2, params2 = create_loss_func(npeak * 10, nbins=nbins, nameadd="_2", obs="x2", nbkg=500)
+    loss = loss1 + loss2
+
+    return loss, params1
+
+
 @pytest.fixture
 def create_loss():
     return create_loss_func
+
+
+@pytest.fixture
+def create_sim_loss():
+    return create_sim_loss_func
diff --git a/tests/hypotests/test_discovery.py b/tests/hypotests/test_discovery.py
@@ -2,6 +2,9 @@
 
 import numpy as np
 import pytest
+
+from tests.conftest import create_loss_func, create_sim_loss_func
+
 zfit = pytest.importorskip("zfit")
 from zfit.loss import UnbinnedNLL
 from zfit.minimize import Minuit
@@ -71,8 +74,13 @@ def test_with_asymptotic_calculator(create_loss, nbins, Calculator):
 @pytest.mark.parametrize(
     "nbins", [None, 95, 153], ids=lambda x: "unbinned" if x is None else f"nbin={x}"
 )
-def test_with_frequentist_calculator(create_loss, nbins):
-    loss, (Nsig, Nbkg, mean, sigma) = create_loss(npeak=25, nbins=nbins)
+@pytest.mark.parametrize("losscreator", [create_loss_func,
+                                         # create_sim_loss_func
+                                         ], ids=["simple",
+                                                 # "sim"
+                                                 ])
+def test_with_frequentist_calculator(losscreator, nbins):
+    loss, (Nsig, Nbkg, mean, sigma) = losscreator(npeak=25, nbins=nbins)
     mean.floating = False
     sigma.floating = False
     calculator = FrequentistCalculator.from_yaml(