From d22dbae97ae67bb5258628a27d122520e6a33e82 Mon Sep 17 00:00:00 2001 From: Jonas Eschle Date: Tue, 16 Apr 2024 16:35:19 -0400 Subject: [PATCH 1/6] enh: upgrade to Python312 --- .pre-commit-config.yaml | 4 +-- environment.yml | 5 ++- setup.cfg | 12 ++++--- src/hepstats/hypotests/hypotests_object.py | 21 +++++------ src/hepstats/hypotests/toyutils.py | 15 ++++---- src/hepstats/utils/fit/__init__.py | 10 +++++- src/hepstats/utils/fit/diverse.py | 16 +++++++-- src/hepstats/utils/fit/sampling.py | 33 +++++------------ tests/hypotests/test_calculators.py | 2 +- tests/hypotests/test_discovery.py | 42 ++++------------------ tests/hypotests/test_toysutils.py | 2 +- 11 files changed, 67 insertions(+), 95 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 18e269d1..bfffe832 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -63,10 +63,10 @@ repos: rev: 1.8.5 hooks: - id: nbqa-isort - additional_dependencies: [ isort==5.6.4 ] + additional_dependencies: [ isort] - id: nbqa-pyupgrade - additional_dependencies: [ pyupgrade==2.7.4 ] + additional_dependencies: [ pyupgrade] args: [ --py38-plus ] diff --git a/environment.yml b/environment.yml index ea3517ba..471d88d3 100644 --- a/environment.yml +++ b/environment.yml @@ -6,11 +6,10 @@ dependencies: - numpy - scipy - iminuit - - tensorflow>=2.4 - tensorflow-probability - #- zfit + #- zfit # todo: conda-forge is 0.18.1, we need 0.20.0 - asdf - matplotlib - pip: - . - - zfit >=0.6.4 + - zfit >=0.20.0 diff --git a/setup.cfg b/setup.cfg index 7d595eb8..5cd7b1b9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -23,7 +23,6 @@ classifiers = Programming Language :: Python Programming Language :: Python :: 3 Programming Language :: Python :: 3 :: Only - Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 Programming Language :: Python :: 3.11 @@ -43,7 +42,7 @@ install_requires = scipy tqdm uhi -python_requires = >=3.8 +python_requires = >=3.9 package_dir = = src @@ -52,8 +51,9 @@ where = src [options.extras_require] dev = - black - zfit + %(docs)s + %(test)s + pre-commit docs = matplotlib pydata-sphinx-theme @@ -65,7 +65,9 @@ test = pytest pytest-cov pytest-runner - zfit + zfit>=0.20.0 +zfit = + zfit>=0.20.0 [tool:pytest] junit_family = xunit2 diff --git a/src/hepstats/hypotests/hypotests_object.py b/src/hepstats/hypotests/hypotests_object.py index 19e6bde7..914e8720 100644 --- a/src/hepstats/hypotests/hypotests_object.py +++ b/src/hepstats/hypotests/hypotests_object.py @@ -3,7 +3,7 @@ import numpy as np from .parameters import POI -from ..utils.fit import get_nevents +from ..utils.fit import get_nevents, set_values_once from ..utils.fit.api_check import is_valid_data, is_valid_pdf from ..utils.fit.api_check import is_valid_loss, is_valid_fitresult, is_valid_minimizer @@ -117,8 +117,7 @@ def set_params_to_bestfit(self): """ Set the values of the parameters in the models to the best fit values """ - for param in self.parameters: - param.set_value(self.bestfit.params[param]["value"]) + set_values_once(self.parameters, self.bestfit) def lossbuilder(self, model, data, weights=None, oldloss=None): """Method to build a new loss function. @@ -156,7 +155,7 @@ def lossbuilder(self, model, data, weights=None, oldloss=None): if weights is not None: for d, w in zip(data, weights): - d.set_weights(w) + d = d.with_weights(w) if hasattr(oldloss, "create_new"): loss = oldloss.create_new( @@ -193,15 +192,11 @@ def __init__(self, input, minimizer, sampler, sample): self._sample = sample self._toys_loss = {} - def sampler(self, floating_params=None): + def sampler(self): """ Create sampler with models. - Args: - floating_params: floating parameters in the sampler - - Example with `zfit`: - >>> sampler = calc.sampler(floating_params=[zfit.Parameter("mean")]) + >>> sampler = calc.sampler() """ self.set_params_to_bestfit() nevents = [] @@ -214,7 +209,7 @@ def sampler(self, floating_params=None): else: nevents.append(nevents_data) - return self._sampler(self.loss.model, nevents, floating_params) + return self._sampler(self.loss.model, nevents) def sample(self, sampler, ntoys, poi: POI, constraints=None): """ @@ -229,7 +224,7 @@ def sample(self, sampler, ntoys, poi: POI, constraints=None): Example with `zfit`: >>> mean = zfit.Parameter("mean") - >>> sampler = calc.sampler(floating_params=[mean]) + >>> sampler = calc.sampler() >>> sample = calc.sample(sampler, 1000, POI(mean, 1.2)) Returns: @@ -257,6 +252,6 @@ def toys_loss(self, parameter_name: str): """ if parameter_name not in self._toys_loss: parameter = self.get_parameter(parameter_name) - sampler = self.sampler(floating_params=[parameter]) + sampler = self.sampler() self._toys_loss[parameter.name] = self.lossbuilder(self.model, sampler) return self._toys_loss[parameter_name] diff --git a/src/hepstats/hypotests/toyutils.py b/src/hepstats/hypotests/toyutils.py index 62f56831..8c684d5a 100644 --- a/src/hepstats/hypotests/toyutils.py +++ b/src/hepstats/hypotests/toyutils.py @@ -7,6 +7,7 @@ import asdf import numpy as np +import zfit.param from tqdm.auto import tqdm from .exceptions import ParameterNotFound, FormatError @@ -219,7 +220,7 @@ def ntoys(self, poigen: POI, poieval: POIarray) -> int: except KeyError: return 0 - def generate_and_fit_toys( + def generate_and_fit_toys( # TODO PROFILE THIS self, ntoys: int, poigen: POI, @@ -265,6 +266,7 @@ def generate_and_fit_toys( ntrials = 0 progressbar = tqdm(total=ntoys) + minimum = None for i in range(ntoys): ntrials += 1 @@ -282,13 +284,13 @@ def generate_and_fit_toys( ) param_dict = next(samples) - with ExitStack() as stack: - for param, value in param_dict.items(): - stack.enter_context(param.set_value(value)) + with zfit.param.set_values(param_dict): for _ in range(2): try: - minimum = minimizer.minimize(loss=toys_loss) + minimum = minimizer.minimize( + loss=toys_loss + ) # TODO: , init=minimum use previous minimum as starting point for parameter uncertainties converged = minimum.converged if converged: break @@ -303,7 +305,8 @@ def generate_and_fit_toys( msg = f"{nfailures} out of {ntrials} fits failed or didn't converge." warnings.warn(msg, FitFailuresWarning) continue - + if minimum is None: + raise RuntimeError("No minimum found.") bestfit[i] = minimum.params[param]["value"] nll_bestfit[i] = pll(minimizer, toys_loss, POI(param, bestfit[i])) diff --git a/src/hepstats/utils/fit/__init__.py b/src/hepstats/utils/fit/__init__.py index e7a31b74..5878e45f 100644 --- a/src/hepstats/utils/fit/__init__.py +++ b/src/hepstats/utils/fit/__init__.py @@ -1,2 +1,10 @@ -from .diverse import get_value, eval_pdf, pll, array2dataset, get_nevents, set_values +from .diverse import ( + get_value, + eval_pdf, + pll, + array2dataset, + get_nevents, + set_values, + set_values_once, +) from .sampling import base_sampler, base_sample diff --git a/src/hepstats/utils/fit/diverse.py b/src/hepstats/utils/fit/diverse.py index 891eeeb7..01648d72 100644 --- a/src/hepstats/utils/fit/diverse.py +++ b/src/hepstats/utils/fit/diverse.py @@ -1,4 +1,4 @@ -from contextlib import ExitStack, contextmanager +from contextlib import ExitStack, contextmanager, suppress import numpy as np @@ -12,6 +12,16 @@ def get_value(value): return np.array(value) +def set_values_once(params, values): + with suppress(ImportError): + import zfit + + return zfit.param.set_values(params, values) # more efficient + + for p, v in zip(params, values): + p.set_value(v) + + def eval_pdf(model, x, params=None, allow_extended=False): """Compute pdf of model at a given point x and for given parameters values""" @@ -34,7 +44,7 @@ def pdf(model, x): return pdf(model, x) -def pll(minimizer, loss, pois) -> float: +def pll(minimizer, loss, pois, init=None) -> float: """Compute minimum profile likelihood for fixed given parameters values.""" with ExitStack() as stack: @@ -44,7 +54,7 @@ def pll(minimizer, loss, pois) -> float: param.floating = False if any(param_loss.floating for param_loss in loss.get_params()): - minimum = minimizer.minimize(loss=loss) + minimum = minimizer.minimize(loss=loss) # TODO: add init? value = minimum.fmin else: value = get_value(loss.value()) diff --git a/src/hepstats/utils/fit/sampling.py b/src/hepstats/utils/fit/sampling.py index 753c56c4..293c3b98 100644 --- a/src/hepstats/utils/fit/sampling.py +++ b/src/hepstats/utils/fit/sampling.py @@ -6,14 +6,13 @@ from .diverse import get_value -def base_sampler(models, nevents, floating_params=None): +def base_sampler(models, nevents): """ Creates samplers from models. Args: models (list(model)): models to sample nevents (list(int)): number of in each sampler - floating_params (list(parameter), optionnal): floating parameter in the samplers Returns: Samplers @@ -22,24 +21,10 @@ def base_sampler(models, nevents, floating_params=None): assert all(is_valid_pdf(m) for m in models) assert len(nevents) == len(models) - if floating_params: - floating_params_names = [f.name for f in floating_params] - samplers = [] - fixed_params = [] - for m in models: - - def to_fix(p): - if floating_params: - return p.name in floating_params_names - else: - return False - fixed = [p for p in m.get_params() if not to_fix(p)] - fixed_params.append(fixed) - - for i, (m, p) in enumerate(zip(models, fixed_params)): - sampler = m.create_sampler(n=nevents[i], fixed_params=p) + for i, m in enumerate(models): + sampler = m.create_sampler(n=nevents[i]) samplers.append(sampler) return samplers @@ -72,13 +57,13 @@ def base_sample(samplers, ntoys, parameter=None, value=None, constraints=None): continue for i in range(ntoys): - if not (parameter is None or value is None): - with parameter.set_value(value): - for s in samplers: - s.resample() + if parameter is None or value is None: + params = None else: - for s in samplers: - s.resample() + params = {parameter: value} + + for s in samplers: + s.resample(params=params) if constraints is not None: yield {param: value[i] for param, value in sampled_constraints.items()} diff --git a/tests/hypotests/test_calculators.py b/tests/hypotests/test_calculators.py index b7106547..da3eb667 100644 --- a/tests/hypotests/test_calculators.py +++ b/tests/hypotests/test_calculators.py @@ -182,7 +182,7 @@ def test_frequentist_calculator_one_poi(constraint): assert calc.ntoysnull == 100 assert calc.ntoysalt == 100 - samplers = calc.sampler(floating_params=[mean]) + samplers = calc.sampler() assert all(is_valid_data(s) for s in samplers) loss = calc.toys_loss(mean.name) assert is_valid_loss(loss) diff --git a/tests/hypotests/test_discovery.py b/tests/hypotests/test_discovery.py index 26d11319..12e75b84 100644 --- a/tests/hypotests/test_discovery.py +++ b/tests/hypotests/test_discovery.py @@ -1,20 +1,15 @@ import os -import pytest + import numpy as np -import tqdm +import pytest import zfit -from zfit.loss import ExtendedUnbinnedNLL, UnbinnedNLL +from zfit.loss import UnbinnedNLL from zfit.minimize import Minuit -from zfit.models.dist_tfp import WrapDistribution -import tensorflow_probability as tfp -from zfit.util import ztyping -from collections import OrderedDict - import hepstats -from hepstats.hypotests.calculators.basecalculator import BaseCalculator -from hepstats.hypotests.calculators import AsymptoticCalculator, FrequentistCalculator from hepstats.hypotests import Discovery +from hepstats.hypotests.calculators import AsymptoticCalculator, FrequentistCalculator +from hepstats.hypotests.calculators.basecalculator import BaseCalculator from hepstats.hypotests.parameters import POI notebooks_dir = f"{os.path.dirname(hepstats.__file__)}/../../notebooks/hypotests" @@ -101,31 +96,6 @@ def test_with_frequentist_calculator(create_loss, nbins): assert significance >= 3 -class Poisson(WrapDistribution): - _N_OBS = 1 - - def __init__( - self, - lamb: ztyping.ParamTypeInput, - obs: ztyping.ObsTypeInput, - name: str = "Poisson", - ): - """ - Temporary class - """ - (lamb,) = self._check_input_params(lamb) - params = OrderedDict((("lamb", lamb),)) - dist_params = lambda: dict(rate=lamb.value()) - distribution = tfp.distributions.Poisson - super().__init__( - distribution=distribution, - dist_params=dist_params, - obs=obs, - params=params, - name=name, - ) - - def create_loss_counting(): n = 370 nbkg = 340 @@ -135,7 +105,7 @@ def create_loss_counting(): Nobs = zfit.ComposedParameter("Nobs", lambda a, b: a + b, params=[Nsig, Nbkg]) obs = zfit.Space("N", limits=(0, 800)) - model = Poisson(obs=obs, lamb=Nobs) + model = zfit.pdf.Poisson(obs=obs, lamb=Nobs) data = zfit.data.Data.from_numpy(obs=obs, array=np.array([n])) diff --git a/tests/hypotests/test_toysutils.py b/tests/hypotests/test_toysutils.py index 2a5fe28f..21b82140 100644 --- a/tests/hypotests/test_toysutils.py +++ b/tests/hypotests/test_toysutils.py @@ -127,7 +127,7 @@ def test_toymanager_attributes(): == tmc.get_toyresult(poigen, poieval).ntoys ) - samplers = tm.sampler(floating_params=[poigen.parameter]) + samplers = tm.sampler() assert all(is_valid_data(s) for s in samplers) loss = tm.toys_loss(poigen.name) assert is_valid_loss(loss) From 33195ca4368a18515a9f28fca4cacf69515b6cf8 Mon Sep 17 00:00:00 2001 From: Jonas Eschle Date: Tue, 16 Apr 2024 17:25:49 -0400 Subject: [PATCH 2/6] ci: update python versions --- .github/workflows/main.yml | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 39ddf631..af148c8c 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -31,19 +31,17 @@ jobs: os: - ubuntu-latest python-version: - - "3.8" - "3.9" - "3.10" - "3.11" + - "3.12" include: - os: windows-latest - python-version: "3.8" - - os: windows-latest - python-version: "3.11" - - os: macos-latest - python-version: "3.8" + python-version: "3.9" - os: macos-latest - python-version: "3.11" + python-version: "3.9" + - os: macos-14 + python-version: "3.9" name: Check Python ${{ matrix.python-version }} ${{ matrix.os }} steps: - uses: actions/checkout@v4 @@ -75,7 +73,7 @@ jobs: run: python -m pytest --doctest-modules --cov=hepstats --cov-report=xml -n3 - name: Upload coverage to Codecov - if: matrix.python-version == '3.11' && matrix.os == 'ubuntu-latest' + if: matrix.python-version == '3.12' && matrix.os == 'ubuntu-latest' uses: codecov/codecov-action@v4 with: token: ${{ secrets.CODECOV_TOKEN }} # technically not needed, but prevents failures: https://community.codecov.com/t/upload-issues-unable-to-locate-build-via-github-actions-api/3954 @@ -109,7 +107,7 @@ jobs: fetch-depth: 0 - uses: actions/setup-python@v5 with: - python-version: "3.11" + python-version: "3.12" - name: Install dependencies run: | pip install -U -q -e .[docs] From 91e440023f4fa4f71f667a6fbebae128e0b8cc35 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 16 Apr 2024 21:42:47 +0000 Subject: [PATCH 3/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/hepstats/hypotests/hypotests_object.py | 5 ++--- src/hepstats/hypotests/toyutils.py | 5 ++--- src/hepstats/utils/fit/diverse.py | 3 +++ src/hepstats/utils/fit/sampling.py | 5 +---- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/hepstats/hypotests/hypotests_object.py b/src/hepstats/hypotests/hypotests_object.py index bebd0acb..d922954c 100644 --- a/src/hepstats/hypotests/hypotests_object.py +++ b/src/hepstats/hypotests/hypotests_object.py @@ -4,10 +4,9 @@ import numpy as np -from .parameters import POI from ..utils.fit import get_nevents, set_values_once -from ..utils.fit.api_check import is_valid_data, is_valid_pdf -from ..utils.fit.api_check import is_valid_loss, is_valid_fitresult, is_valid_minimizer +from ..utils.fit.api_check import is_valid_data, is_valid_fitresult, is_valid_loss, is_valid_minimizer, is_valid_pdf +from .parameters import POI class HypotestsObject: diff --git a/src/hepstats/hypotests/toyutils.py b/src/hepstats/hypotests/toyutils.py index af341269..a4cad7c3 100644 --- a/src/hepstats/hypotests/toyutils.py +++ b/src/hepstats/hypotests/toyutils.py @@ -2,7 +2,6 @@ import warnings from collections.abc import Callable -from contextlib import ExitStack from pathlib import Path import asdf @@ -283,7 +282,6 @@ def generate_and_fit_toys( # TODO PROFILE THIS param_dict = next(samples) with zfit.param.set_values(param_dict): - for _ in range(2): try: minimum = minimizer.minimize( @@ -304,7 +302,8 @@ def generate_and_fit_toys( # TODO PROFILE THIS warnings.warn(msg, FitFailuresWarning, stacklevel=2) continue if minimum is None: - raise RuntimeError("No minimum found.") + msg = "No minimum found." + raise RuntimeError(msg) bestfit[i] = minimum.params[param]["value"] nll_bestfit[i] = pll(minimizer, toys_loss, POI(param, bestfit[i])) diff --git a/src/hepstats/utils/fit/diverse.py b/src/hepstats/utils/fit/diverse.py index 1f793bb7..775aa9ab 100644 --- a/src/hepstats/utils/fit/diverse.py +++ b/src/hepstats/utils/fit/diverse.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from contextlib import ExitStack, contextmanager, suppress import numpy as np @@ -20,6 +22,7 @@ def set_values_once(params, values): for p, v in zip(params, values): p.set_value(v) + return None def eval_pdf(model, x, params=None, allow_extended=False): diff --git a/src/hepstats/utils/fit/sampling.py b/src/hepstats/utils/fit/sampling.py index ff825400..a1cf86bd 100644 --- a/src/hepstats/utils/fit/sampling.py +++ b/src/hepstats/utils/fit/sampling.py @@ -57,10 +57,7 @@ def base_sample(samplers, ntoys, parameter=None, value=None, constraints=None): continue for i in range(ntoys): - if parameter is None or value is None: - params = None - else: - params = {parameter: value} + params = None if parameter is None or value is None else {parameter: value} for s in samplers: s.resample(params=params) From ce060d32940ae9aa414b2f2ce5329818c5471e0f Mon Sep 17 00:00:00 2001 From: Jonas Eschle Date: Tue, 16 Apr 2024 17:56:34 -0400 Subject: [PATCH 4/6] ci: fix macos version --- .github/workflows/main.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index af148c8c..daa3e465 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -41,7 +41,7 @@ jobs: - os: macos-latest python-version: "3.9" - os: macos-14 - python-version: "3.9" + python-version: "3.12" # old versions not supported name: Check Python ${{ matrix.python-version }} ${{ matrix.os }} steps: - uses: actions/checkout@v4 @@ -66,11 +66,12 @@ jobs: ${{ runner.os }}-pip- - name: Install package - run: python -m pip install -e .[test] pytest-xdist # for multiprocessing - + run: | + python -m pip install --upgrade pip + python -m pip install -e .[test] pytest-xdist # for multiprocessing - name: Test package - run: python -m pytest --doctest-modules --cov=hepstats --cov-report=xml -n3 + run: python -m pytest --doctest-modules --cov=hepstats --cov-report=xml -n auto - name: Upload coverage to Codecov if: matrix.python-version == '3.12' && matrix.os == 'ubuntu-latest' From 18633beda98166a09a67535691a9937bfb5f4e92 Mon Sep 17 00:00:00 2001 From: Jonas Eschle Date: Tue, 16 Apr 2024 18:36:59 -0400 Subject: [PATCH 5/6] chore: fix unused init arg --- src/hepstats/utils/fit/diverse.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/hepstats/utils/fit/diverse.py b/src/hepstats/utils/fit/diverse.py index 775aa9ab..db2fe281 100644 --- a/src/hepstats/utils/fit/diverse.py +++ b/src/hepstats/utils/fit/diverse.py @@ -46,6 +46,7 @@ def pdf(model, x): def pll(minimizer, loss, pois, init=None) -> float: """Compute minimum profile likelihood for fixed given parameters values.""" + del init # unused currently with ExitStack() as stack: for p in pois: From 1b5696a8657a178d38e03902957411c0cc88ec2d Mon Sep 17 00:00:00 2001 From: Jonas Eschle Date: Tue, 16 Apr 2024 18:38:07 -0400 Subject: [PATCH 6/6] ci: downgrade python for docs --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index daa3e465..b90fc319 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -108,7 +108,7 @@ jobs: fetch-depth: 0 - uses: actions/setup-python@v5 with: - python-version: "3.12" + python-version: "3.11" - name: Install dependencies run: | pip install -U -q -e .[docs]