Skip to content

Commit

Permalink
Merge pull request #20 from scikit-hep/mm_savetoys
Browse files Browse the repository at this point in the history
add ToyResult and ToyManager classes
  • Loading branch information
marinang authored Feb 15, 2020
2 parents d63fe0f + d61ae7c commit e759346
Show file tree
Hide file tree
Showing 33 changed files with 34,270 additions and 909 deletions.
2 changes: 2 additions & 0 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ jobs:
python.version: '3.6'
Python37:
python.version: '3.7'
Python38:
python.version: '3.8'

steps:
- bash: echo "##vso[task.prependpath]$CONDA/bin"
Expand Down
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ dependencies:
- tensorflow=>1.14
- tensorflow-probability
- zfit
- asdf
2 changes: 1 addition & 1 deletion hepstats/hypotests/calculators/asymptotic_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def asimov_dataset(self, poi) -> (np.array, np.array):
for i, ad in enumerate([generate_asimov_hist(m, values, self._asimov_bins) for m in model]):
weights, bin_edges = ad
bin_centers = bin_edges[0: -1] + np.diff(bin_edges)/2
asimov_data.append(array2dataset(type(data[i]), data[i].obs, bin_centers, weights))
asimov_data.append(array2dataset(type(data[i]), data[i].space, bin_centers, weights))

self._asimov_dataset[poi] = asimov_data

Expand Down
318 changes: 179 additions & 139 deletions hepstats/hypotests/calculators/basecalculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
from typing import Dict, Union, Tuple, List
import numpy as np

from ..parameters import POI, POIarray
from ..fitutils.api_check import is_valid_loss, is_valid_fitresult, is_valid_minimizer
from ..fitutils.api_check import is_valid_data, is_valid_pdf
from ..hypotests_object import HypotestsObject
from ..parameters import POI, POIarray, asarray
from ..fitutils.utils import pll
from ..toyutils import ToysManager
from ..fitutils.sampling import base_sampler, base_sample

"""
Module defining the base class for the calculators for statistical tests based on the likelyhood ratio.
Expand All @@ -21,7 +22,7 @@
"""


class BaseCalculator(object):
class BaseCalculator(HypotestsObject):

def __init__(self, input, minimizer):
"""Base class for calculator.
Expand All @@ -45,149 +46,15 @@ def __init__(self, input, minimizer):
>>> calc = BaseCalculator(input=loss, minimizer=MinuitMinimizer())
"""

if is_valid_fitresult(input):
self._loss = input.loss
self._bestfit = input
elif is_valid_loss(input):
self._loss = input
self._bestfit = None
else:
raise ValueError("{} is not a valid loss funtion or fit result!".format(input))

if not is_valid_minimizer(minimizer):
raise ValueError("{} is not a valid minimizer !".format(minimizer))
super(BaseCalculator, self).__init__(input, minimizer)

self._minimizer = minimizer
self.minimizer.verbosity = 0
# cache of the observed nll values
self._obs_nll = {}

self._parameters = {}
for m in self.model:
for d in m.get_dependents():
self._parameters[d.name] = d

@property
def loss(self):
"""
Returns the loss / likelihood function used in the calculator.
"""
return self._loss

@property
def minimizer(self):
"""
Returns the minimzer used in the calculator.
"""
return self._minimizer

@property
def bestfit(self):
"""
Returns the best fit values of the model parameters.
"""
if getattr(self, "_bestfit", None):
return self._bestfit
else:
print("Get fit best values!")
self.minimizer.verbosity = 5
mininum = self.minimizer.minimize(loss=self.loss)
self.minimizer.verbosity = 0
self._bestfit = mininum
return self._bestfit

@bestfit.setter
def bestfit(self, value):
"""
Set the best fit values of the model parameters.
Args:
value: fit result
"""
if not is_valid_fitresult(value):
raise ValueError()
self._bestfit = value

@property
def model(self):
"""
Returns the model used in the calculator.
"""
return self.loss.model

@property
def data(self):
"""
Returns the data used in the calculator.
"""
return self.loss.data

@property
def constraints(self):
"""
Returns the constraints on the loss / likehood function used in the calculator.
"""
return self.loss.constraints

def get_parameter(self, name):
"""
Returns the parameter in loss function with given input name.
Args:
name (str): name of the parameter to return
"""
return self._parameters[name]

def set_dependents_to_bestfit(self):
"""
Set the values of the parameters in the models to the best fit values
"""
for m in self.model:
for d in m.get_dependents():
d.set_value(self.bestfit.params[d]["value"])

def lossbuilder(self, model, data, weights=None):
""" Method to build a new loss function.
Args:
model (List): The model or models to evaluate the data on
data (List): Data to use
weights (optional, List): the data weights
Example with `zfit`:
>>> data = zfit.data.Data.from_numpy(obs=obs, array=np.random.normal(1.2, 0.1, 10000))
>>> mean = zfit.Parameter("mu", 1.2)
>>> sigma = zfit.Parameter("sigma", 0.1)
>>> model = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma)
>>> loss = calc.lossbuilder(model, data)
Returns:
Loss function
"""

assert all(is_valid_pdf(m) for m in model)
assert all(is_valid_data(d) for d in data)

msg = "{0} must have the same number of components as {1}"
if len(data) != len(self.data):
raise ValueError(msg.format("data", "`self.data"))
if len(model) != len(self.model):
raise ValueError(msg.format("model", "`self.model"))
if weights is not None and len(weights) != len(self.data):
raise ValueError(msg.format("weights", "`self.data`"))

fit_range = self.loss.fit_range

if weights is not None:
for d, w in zip(data, weights):
d.set_weights(w)

loss = type(self.loss)(model=model, data=data, fit_range=fit_range)
loss.add_constraints(self.constraints)

return loss

def obs_nll(self, pois) -> np.array:
""" Compute observed negative log-likelihood values for given parameters of interest.
Expand Down Expand Up @@ -436,3 +303,176 @@ def q(self, nll1: np.array, nll2: np.array, poi1, poi2,
q = q

return q


class BaseToysCalculator(BaseCalculator):

def __init__(self, input, minimizer, sampler, sample):
"""Basis for toys calculator class.
Args:
input : loss or fit result
minimizer : minimizer to use to find the minimum of the loss function
sampler : function used to create sampler with models, number of events and
floating parameters in the sample Default is `hepstats.fitutils.sampling.base_sampler`.
sample : function used to get samples from the sampler.
Default is `hepstats.fitutils.sampling.base_sample`.
"""
super(BaseToysCalculator, self).__init__(input, minimizer)


class ToysCalculator(BaseToysCalculator, ToysManager):
"""
Class for calculators using toys.
"""

def __init__(self, input, minimizer, ntoysnull=100, ntoysalt=100, sampler=base_sampler, sample=base_sample):
"""Toys calculator class.
Args:
input : loss or fit result
minimizer : minimizer to use to find the minimum of the loss function
ntoysnull (int, default=100): minimum number of toys to generate for the null hypothesis
ntoysalt (int, default=100): minimum number of toys to generate for the alternative hypothesis
sampler : function used to create sampler with models, number of events and
floating parameters in the sample Default is `hepstats.fitutils.sampling.base_sampler`.
sample : function used to get samples from the sampler.
Default is `hepstats.fitutils.sampling.base_sample`.
"""
super(ToysCalculator, self).__init__(input, minimizer, sampler, sample)

self._ntoysnull = ntoysnull
self._ntoysalt = ntoysalt

@classmethod
def from_yaml(cls, filename, loss, minimizer, ntoysnull=100, ntoysalt=100, sampler=base_sampler,
sample=base_sample):
"""
ToysCalculator constructor with the toys loaded from a yaml file.
Args:
filename (str)
input : loss or fit result
minimizer : minimizer to use to find the minimum of the loss function
ntoysnull (int, default=100): minimum number of toys to generate for the null hypothesis
ntoysalt (int, default=100): minimum number of toys to generate for the alternative hypothesis
sampler : function used to create sampler with models, number of events and
floating parameters in the sample Default is `hepstats.fitutils.sampling.base_sampler`.
sample : function used to get samples from the sampler.
Default is `hepstats.fitutils.sampling.base_sample`.
Returns
ToysCalculator
"""

calculator = cls(loss, minimizer, ntoysnull, ntoysalt, sampler, sample)
toysresults = calculator.toysresults_from_yaml(filename)

for t in toysresults:
calculator.add_toyresult(t)

return calculator

@property
def ntoysnull(self):
"""
Returns the number of toys generated for the null hypothesis.
"""
return self._ntoysnull

@property
def ntoysalt(self):
"""
Returns the number of toys generated for the alternative hypothesis.
"""
return self._ntoysalt

def _get_toys(self, poigen, poieval=None, qtilde=False, hypothesis="null"):
"""
Return the generated toys for a given POI.
Args:
poigen (POI): POI used to generate the toys
poieval (POIarray): POI values to evaluate the loss function
qtilde (bool, optional): if `True` use the $$\tilde{q}$$ test statistics else (default) use
the $$q$$ test statistic
hypothesis: `null` or `alternative`
"""

assert hypothesis in ["null", "alternative"]

if hypothesis == "null":
ntoys = self.ntoysnull
else:
ntoys = self.ntoysalt

ret = {}

for p in poigen:
poieval_p = poieval

if poieval_p is None:
poieval_p = POIarray(poigen.parameter, [p.value])
else:
if p not in poieval_p:
poieval_p = poieval_p.append(p.value)

if qtilde and 0. not in poieval_p.values:
poieval_p = poieval_p.append(0.0)

poieval_p = asarray(poieval_p)

ngenerated = self.ntoys(p, poieval_p)
if ngenerated < ntoys:
ntogen = ntoys - ngenerated
else:
ntogen = 0

if ntogen > 0:
print(f"Generating {hypothesis} hypothesis toys for {p}.")
print(p, poieval_p)

self.generate_and_fit_toys(ntoys=ntogen, poigen=p, poieval=poieval_p)

ret[p] = self.get_toyresult(p, poieval_p)

return ret

def get_toys_null(self, poigen, poieval, qtilde=False):
"""
Return the generated toys for the null hypothesis.
Args:
poigen (POI): POI used to generate the toys
ntoys (int): number of toys to generate
poieval (POIarray): POI values to evaluate the loss function
qtilde (bool, optional): if `True` use the $$\tilde{q}$$ test statistics else (default) use
the $$q$$ test statistic
Example with `zfit`:
>>> mean = zfit.Parameter("mu", 1.2)
>>> poinull = POIarray(mean, [1.1, 1.2, 1.0])
>>> poialt = POI(mean, 1.2)
>>> for p in poinull:
... calc.get_toys_alt(p, poieval=poialt)
"""
return self._get_toys(poigen=poigen, poieval=poieval, qtilde=qtilde, hypothesis="null")

def get_toys_alt(self, poigen, poieval, qtilde=False):
"""
Return the generated toys for the alternative hypothesis.
Args:
poigen (POI): POI used to generate the toys
ntoys (int): number of toys to generate
poieval (POIarray): POI values to evaluate the loss function
qtilde (bool, optional): if `True` use the $$\tilde{q}$$ test statistics else (default) use
the $$q$$ test statistic
Example with `zfit`:
>>> mean = zfit.Parameter("mu", 1.2)
>>> poinull = POIarray(mean, [1.1, 1.2, 1.0])
>>> poialt = POI(mean, 1.2)
>>> calc.get_toys_alt(poialt, poieval=poinull)
"""
return self._get_toys(poigen=poigen, poieval=poieval, qtilde=qtilde, hypothesis="alternative")
Loading

0 comments on commit e759346

Please sign in to comment.