Merge pull request #20 from scikit-hep/mm_savetoys

add ToyResult and ToyManager classes
scikit-hep · Feb 15, 2020 · e759346 · e759346
2 parents d63fe0f + d61ae7c
commit e759346
Show file tree

Hide file tree

Showing 33 changed files with 34,270 additions and 909 deletions.
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -18,6 +18,8 @@ jobs:
         python.version: '3.6'
       Python37:
         python.version: '3.7'
+      Python38:
+        python.version: '3.8'
 
   steps:
     - bash: echo "##vso[task.prependpath]$CONDA/bin"

diff --git a/environment.yml b/environment.yml
@@ -11,3 +11,4 @@ dependencies:
   - tensorflow=>1.14
   - tensorflow-probability
   - zfit
+  - asdf
diff --git a/hepstats/hypotests/calculators/asymptotic_calculator.py b/hepstats/hypotests/calculators/asymptotic_calculator.py
@@ -131,7 +131,7 @@ def asimov_dataset(self, poi) -> (np.array, np.array):
             for i, ad in enumerate([generate_asimov_hist(m, values, self._asimov_bins) for m in model]):
                 weights, bin_edges = ad
                 bin_centers = bin_edges[0: -1] + np.diff(bin_edges)/2
-                asimov_data.append(array2dataset(type(data[i]), data[i].obs, bin_centers, weights))
+                asimov_data.append(array2dataset(type(data[i]), data[i].space, bin_centers, weights))
 
             self._asimov_dataset[poi] = asimov_data
 

diff --git a/hepstats/hypotests/calculators/basecalculator.py b/hepstats/hypotests/calculators/basecalculator.py
@@ -2,10 +2,11 @@
 from typing import Dict, Union, Tuple, List
 import numpy as np
 
-from ..parameters import POI, POIarray
-from ..fitutils.api_check import is_valid_loss, is_valid_fitresult, is_valid_minimizer
-from ..fitutils.api_check import is_valid_data, is_valid_pdf
+from ..hypotests_object import HypotestsObject
+from ..parameters import POI, POIarray, asarray
 from ..fitutils.utils import pll
+from ..toyutils import ToysManager
+from ..fitutils.sampling import base_sampler, base_sample
 
 """
 Module defining the base class for the calculators for statistical tests based on the likelyhood ratio.
@@ -21,7 +22,7 @@
 """
 
 
-class BaseCalculator(object):
+class BaseCalculator(HypotestsObject):
 
     def __init__(self, input, minimizer):
         """Base class for calculator.
@@ -45,149 +46,15 @@ def __init__(self, input, minimizer):
                 >>> calc = BaseCalculator(input=loss, minimizer=MinuitMinimizer())
         """
 
-        if is_valid_fitresult(input):
-            self._loss = input.loss
-            self._bestfit = input
-        elif is_valid_loss(input):
-            self._loss = input
-            self._bestfit = None
-        else:
-            raise ValueError("{} is not a valid loss funtion or fit result!".format(input))
-
-        if not is_valid_minimizer(minimizer):
-            raise ValueError("{} is not a valid minimizer !".format(minimizer))
+        super(BaseCalculator, self).__init__(input, minimizer)
 
-        self._minimizer = minimizer
-        self.minimizer.verbosity = 0
-        # cache of the observed nll values
         self._obs_nll = {}
 
         self._parameters = {}
         for m in self.model:
             for d in m.get_dependents():
                 self._parameters[d.name] = d
 
-    @property
-    def loss(self):
-        """
-        Returns the loss / likelihood function used in the calculator.
-        """
-        return self._loss
-
-    @property
-    def minimizer(self):
-        """
-        Returns the minimzer used in the calculator.
-        """
-        return self._minimizer
-
-    @property
-    def bestfit(self):
-        """
-        Returns the best fit values of the model parameters.
-        """
-        if getattr(self, "_bestfit", None):
-            return self._bestfit
-        else:
-            print("Get fit best values!")
-            self.minimizer.verbosity = 5
-            mininum = self.minimizer.minimize(loss=self.loss)
-            self.minimizer.verbosity = 0
-            self._bestfit = mininum
-            return self._bestfit
-
-    @bestfit.setter
-    def bestfit(self, value):
-        """
-        Set the best fit values  of the model parameters.
-
-            Args:
-                value: fit result
-        """
-        if not is_valid_fitresult(value):
-            raise ValueError()
-        self._bestfit = value
-
-    @property
-    def model(self):
-        """
-        Returns the model used in the calculator.
-        """
-        return self.loss.model
-
-    @property
-    def data(self):
-        """
-        Returns the data used in the calculator.
-        """
-        return self.loss.data
-
-    @property
-    def constraints(self):
-        """
-        Returns the constraints on the loss / likehood function used in the calculator.
-        """
-        return self.loss.constraints
-
-    def get_parameter(self, name):
-        """
-        Returns the parameter in loss function with given input name.
-
-        Args:
-            name (str): name of the parameter to return
-        """
-        return self._parameters[name]
-
-    def set_dependents_to_bestfit(self):
-        """
-        Set the values of the parameters in the models to the best fit values
-        """
-        for m in self.model:
-            for d in m.get_dependents():
-                d.set_value(self.bestfit.params[d]["value"])
-
-    def lossbuilder(self, model, data, weights=None):
-        """ Method to build a new loss function.
-
-            Args:
-                model (List): The model or models to evaluate the data on
-                data (List): Data to use
-                weights (optional, List): the data weights
-
-            Example with `zfit`:
-                >>> data = zfit.data.Data.from_numpy(obs=obs, array=np.random.normal(1.2, 0.1, 10000))
-                >>> mean = zfit.Parameter("mu", 1.2)
-                >>> sigma = zfit.Parameter("sigma", 0.1)
-                >>> model = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma)
-                >>> loss = calc.lossbuilder(model, data)
-
-            Returns:
-                Loss function
-
-        """
-
-        assert all(is_valid_pdf(m) for m in model)
-        assert all(is_valid_data(d) for d in data)
-
-        msg = "{0} must have the same number of components as {1}"
-        if len(data) != len(self.data):
-            raise ValueError(msg.format("data", "`self.data"))
-        if len(model) != len(self.model):
-            raise ValueError(msg.format("model", "`self.model"))
-        if weights is not None and len(weights) != len(self.data):
-            raise ValueError(msg.format("weights", "`self.data`"))
-
-        fit_range = self.loss.fit_range
-
-        if weights is not None:
-            for d, w in zip(data, weights):
-                d.set_weights(w)
-
-        loss = type(self.loss)(model=model, data=data, fit_range=fit_range)
-        loss.add_constraints(self.constraints)
-
-        return loss
-
     def obs_nll(self, pois) -> np.array:
         """ Compute observed negative log-likelihood values for given parameters of interest.
 
@@ -436,3 +303,176 @@ def q(self, nll1: np.array, nll2: np.array, poi1, poi2,
             q = q
 
         return q
+
+
+class BaseToysCalculator(BaseCalculator):
+
+    def __init__(self, input, minimizer, sampler, sample):
+        """Basis for toys calculator class.
+
+            Args:
+                input : loss or fit result
+                minimizer : minimizer to use to find the minimum of the loss function
+                sampler : function used to create sampler with models, number of events and
+                    floating parameters in the sample Default is `hepstats.fitutils.sampling.base_sampler`.
+                sample : function used to get samples from the sampler.
+                    Default is `hepstats.fitutils.sampling.base_sample`.
+        """
+        super(BaseToysCalculator, self).__init__(input, minimizer)
+
+
+class ToysCalculator(BaseToysCalculator, ToysManager):
+    """
+    Class for calculators using toys.
+    """
+
+    def __init__(self, input, minimizer, ntoysnull=100, ntoysalt=100, sampler=base_sampler, sample=base_sample):
+        """Toys calculator class.
+
+            Args:
+                input : loss or fit result
+                minimizer : minimizer to use to find the minimum of the loss function
+                ntoysnull (int, default=100): minimum number of toys to generate for the null hypothesis
+                ntoysalt (int, default=100): minimum number of toys to generate for the alternative hypothesis
+                sampler : function used to create sampler with models, number of events and
+                    floating parameters in the sample Default is `hepstats.fitutils.sampling.base_sampler`.
+                sample : function used to get samples from the sampler.
+                    Default is `hepstats.fitutils.sampling.base_sample`.
+        """
+        super(ToysCalculator, self).__init__(input, minimizer, sampler, sample)
+
+        self._ntoysnull = ntoysnull
+        self._ntoysalt = ntoysalt
+
+    @classmethod
+    def from_yaml(cls, filename, loss, minimizer, ntoysnull=100, ntoysalt=100, sampler=base_sampler,
+                  sample=base_sample):
+        """
+        ToysCalculator constructor with the toys loaded from a yaml file.
+
+        Args:
+            filename (str)
+            input : loss or fit result
+            minimizer : minimizer to use to find the minimum of the loss function
+            ntoysnull (int, default=100): minimum number of toys to generate for the null hypothesis
+            ntoysalt (int, default=100): minimum number of toys to generate for the alternative hypothesis
+            sampler : function used to create sampler with models, number of events and
+                floating parameters in the sample Default is `hepstats.fitutils.sampling.base_sampler`.
+            sample : function used to get samples from the sampler.
+                Default is `hepstats.fitutils.sampling.base_sample`.
+
+        Returns
+            ToysCalculator
+        """
+
+        calculator = cls(loss, minimizer, ntoysnull, ntoysalt, sampler, sample)
+        toysresults = calculator.toysresults_from_yaml(filename)
+
+        for t in toysresults:
+            calculator.add_toyresult(t)
+
+        return calculator
+
+    @property
+    def ntoysnull(self):
+        """
+        Returns the number of toys generated for the null hypothesis.
+        """
+        return self._ntoysnull
+
+    @property
+    def ntoysalt(self):
+        """
+        Returns the number of toys generated for the alternative hypothesis.
+        """
+        return self._ntoysalt
+
+    def _get_toys(self, poigen, poieval=None, qtilde=False, hypothesis="null"):
+        """
+        Return the generated toys for a given POI.
+
+        Args:
+            poigen (POI): POI used to generate the toys
+            poieval (POIarray): POI values to evaluate the loss function
+            qtilde (bool, optional): if `True` use the $$\tilde{q}$$ test statistics else (default) use
+                the $$q$$ test statistic
+            hypothesis: `null` or `alternative`
+        """
+
+        assert hypothesis in ["null", "alternative"]
+
+        if hypothesis == "null":
+            ntoys = self.ntoysnull
+        else:
+            ntoys = self.ntoysalt
+
+        ret = {}
+
+        for p in poigen:
+            poieval_p = poieval
+
+            if poieval_p is None:
+                poieval_p = POIarray(poigen.parameter, [p.value])
+            else:
+                if p not in poieval_p:
+                    poieval_p = poieval_p.append(p.value)
+
+            if qtilde and 0. not in poieval_p.values:
+                poieval_p = poieval_p.append(0.0)
+
+            poieval_p = asarray(poieval_p)
+
+            ngenerated = self.ntoys(p, poieval_p)
+            if ngenerated < ntoys:
+                ntogen = ntoys - ngenerated
+            else:
+                ntogen = 0
+
+            if ntogen > 0:
+                print(f"Generating {hypothesis} hypothesis toys for {p}.")
+                print(p, poieval_p)
+
+                self.generate_and_fit_toys(ntoys=ntogen, poigen=p, poieval=poieval_p)
+
+            ret[p] = self.get_toyresult(p, poieval_p)
+
+        return ret
+
+    def get_toys_null(self, poigen, poieval, qtilde=False):
+        """
+        Return the generated toys for the null hypothesis.
+
+        Args:
+            poigen (POI): POI used to generate the toys
+            ntoys (int): number of toys to generate
+            poieval (POIarray): POI values to evaluate the loss function
+            qtilde (bool, optional): if `True` use the $$\tilde{q}$$ test statistics else (default) use
+                the $$q$$ test statistic
+
+        Example with `zfit`:
+            >>> mean = zfit.Parameter("mu", 1.2)
+            >>> poinull = POIarray(mean, [1.1, 1.2, 1.0])
+            >>> poialt = POI(mean, 1.2)
+            >>> for p in poinull:
+            ...     calc.get_toys_alt(p, poieval=poialt)
+        """
+        return self._get_toys(poigen=poigen, poieval=poieval, qtilde=qtilde, hypothesis="null")
+
+    def get_toys_alt(self, poigen, poieval, qtilde=False):
+        """
+        Return the generated toys for the alternative hypothesis.
+
+        Args:
+            poigen (POI): POI used to generate the toys
+            ntoys (int): number of toys to generate
+            poieval (POIarray): POI values to evaluate the loss function
+            qtilde (bool, optional): if `True` use the $$\tilde{q}$$ test statistics else (default) use
+                the $$q$$ test statistic
+
+        Example with `zfit`:
+            >>> mean = zfit.Parameter("mu", 1.2)
+            >>> poinull = POIarray(mean, [1.1, 1.2, 1.0])
+            >>> poialt = POI(mean, 1.2)
+            >>> calc.get_toys_alt(poialt, poieval=poinull)
+        """
+        return self._get_toys(poigen=poigen, poieval=poieval, qtilde=qtilde, hypothesis="alternative")
-Original file line number
+Diff line change
@@ Expand Up / @@ -11,3 +11,4 @@ dependencies: @@
       - tensorflow=>1.14
       - tensorflow-probability
       - zfit
+      - asdf