diff --git a/src/cabinetry/cli/__init__.py b/src/cabinetry/cli/__init__.py index 254ccc5d..7932785e 100644 --- a/src/cabinetry/cli/__init__.py +++ b/src/cabinetry/cli/__init__.py @@ -154,8 +154,17 @@ def fit( default="figures", help='folder to save figures to (default: "figures")', ) +@click.option( + "--impacts_method", + default="covariance", + help="The method to be used for computing impacts", +) def ranking( - ws_spec: io.TextIOWrapper, asimov: bool, max_pars: int, figfolder: str + ws_spec: io.TextIOWrapper, + asimov: bool, + max_pars: int, + figfolder: str, + impacts_method: str, ) -> None: """Ranks nuisance parameters and visualizes the result. @@ -165,9 +174,13 @@ def ranking( ws = json.load(ws_spec) model, data = cabinetry_model_utils.model_and_data(ws, asimov=asimov) fit_results = cabinetry_fit.fit(model, data) - ranking_results = cabinetry_fit.ranking(model, data, fit_results=fit_results) + ranking_results = cabinetry_fit.ranking( + model, data, fit_results=fit_results, impacts_method=impacts_method + ) cabinetry_visualize.ranking( - ranking_results, figure_folder=figfolder, max_pars=max_pars + ranking_results, + figure_folder=figfolder, + max_pars=max_pars, ) diff --git a/src/cabinetry/fit/__init__.py b/src/cabinetry/fit/__init__.py index 5253d2ea..63deb1ef 100644 --- a/src/cabinetry/fit/__init__.py +++ b/src/cabinetry/fit/__init__.py @@ -1,5 +1,6 @@ """High-level entry point for statistical inference.""" +from collections import defaultdict import logging from typing import Any, Dict, List, Literal, Optional, Tuple, Union @@ -453,6 +454,353 @@ def _goodness_of_fit( return p_val +def _get_impacts_summary( + impacts_by_modifier_type: Dict[str, Dict[str, List[float]]], +) -> Dict[str, Dict[str, float]]: + """ + Get the impacts summary, which includes the total impact of all nuisance parameters + split by systematic and non-systematic sources. + + Args: + impacts_by_modifier_type (Dict[str, Dict[str, List[float]]]): impacts of + nuisance parameters categorized by modifier type and impact type + + Returns: + Dict[str, Dict[str, float]]: impacts summary categorized by systematic and + non-systematic sources + """ + non_syst_modifiers = ["normfactor", "shapefactor", "staterror"] # Lumi ? + impacts_summary = defaultdict(lambda: defaultdict(float)) + # Dictionary to store the merged values after removing certain modifiers + syst_impacts_map = defaultdict(list) + # Iterate through each modifier and its corresponding data + for modifier, impacts_map in impacts_by_modifier_type.items(): + if modifier not in non_syst_modifiers: + for impact_type, impact_values in impacts_map.items(): + syst_impacts_map[impact_type].extend(impact_values) + + for impact_type in ["impact_up", "impact_down"]: + impacts_summary["syst"][impact_type] = np.sqrt( + sum(np.power(syst_impacts_map[impact_type], 2)) + ) + for non_syst_modifier in non_syst_modifiers: + impacts_summary[non_syst_modifier][impact_type] = np.sqrt( + sum( + np.power( + impacts_by_modifier_type[non_syst_modifier][impact_type], 2 + ) + ) + ) + + return impacts_summary + + +def _get_datastat_impacts_np_shift( + impacts_summary: Dict[str, Dict[str, float]], + model: pyhf.pdf.Model, + data: List[float], + poi_index: int, + fit_results: FitResults, + fit_kwargs, +) -> Dict[str, Dict[str, float]]: + """ + Calculate the impact of statistical uncertainties on the parameter of interest by + fixing all nuisance parameters to their best-fit values and repeating the fit + + Args: + impacts_summary (Dict[str, Dict[str, float]]): The impacts summary + categorized by systematic and non-systematic sources + model (pyhf.pdf.model): the model to be used in the fit + data (List[float]): data (including auxdata) the model is fit to + poi_index (int): index of the parameter of interest + fit_results (FitResults): nominal fit results to use in impacts calculation + fit_kwargs (_type_): settings to be used in the fits + + Returns: + Dict[str, Dict[str, float]]: impacts summary categorized by systematic and + non-systematic sources with the statistical uncertainty on data added. + """ + nominal_poi = fit_results.bestfit[poi_index] + # Get statistical uncertainty + fix_pars_datastat = fit_kwargs["fix_pars"].copy() + fix_pars_datastat = [ + True if i_par != poi_index else False + for i_par in range(len(model.config.par_names)) + ] + init_pars_datastat = fit_kwargs["init_pars"].copy() + init_pars_datastat = [ + fit_results.bestfit[i_par] if i_par != poi_index else init_pars_datastat[i_par] + for i_par in range(len(model.config.par_names)) + ] + fit_results_datastat = _fit_model( + model, + data, + init_pars=init_pars_datastat, + fix_pars=fix_pars_datastat, + **{k: v for k, v in fit_kwargs.items() if k not in ["init_pars", "fix_pars"]}, + ) + datastat_poi_val = fit_results_datastat.bestfit[poi_index] + datastat_impact = datastat_poi_val - nominal_poi + impacts_summary["datastat"] = datastat_impact + + return impacts_summary + + +def _get_datastat_impacts_quadruture(impacts_summary, total_error): + """ + Calculate the impact of statistical uncertainties on the parameter of subtracting + other sources from the total error in quadrature. + + Args: + impacts_summary (Dict[str, Dict[str, float]]): The impacts summary + categorized by systematic and non-systematic sources + total_error (float): total error on the parameter of interest + + Returns: + Dict[str, Dict[str, float]]: impacts summary categorized by systematic and + non-systematic sources with the statistical uncertainty on data added. + """ + for impact_type in ["impact_up", "impact_down"]: + # Statistical uncertainties including shape and norm factors + data_staterror_with_nf_sf = np.sqrt( + np.power(total_error, 2) + - ( + np.power(impacts_summary["syst"][impact_type], 2) + + np.power(impacts_summary["staterror"][impact_type], 2) + ) + ) + impacts_summary["datastat_with_NF_SF"][impact_type] = data_staterror_with_nf_sf + # Statistical uncertainties including norm factors only + data_staterror_with_nf_only = np.sqrt( + np.power(total_error, 2) + - ( + np.power(impacts_summary["syst"][impact_type], 2) + + np.power(impacts_summary["staterror"][impact_type], 2) + + np.power(impacts_summary["shapefactor"][impact_type], 2) + ) + ) + impacts_summary["datastat_with_NF_only"][ + impact_type + ] = data_staterror_with_nf_only + # Statistical uncertainties including shape factors only + data_staterror_with_sf_only = np.sqrt( + np.power(total_error, 2) + - ( + np.power(impacts_summary["syst"][impact_type], 2) + + np.power(impacts_summary["staterror"][impact_type], 2) + + np.power(impacts_summary["normfactor"][impact_type], 2) + ) + ) + impacts_summary["datastat_with_SF_only"][ + impact_type + ] = data_staterror_with_sf_only + # Statistical uncertainties without shape and norm factors + data_staterror_without_nf_sf = np.sqrt( + np.power(total_error, 2) + - ( + np.power(impacts_summary["syst"][impact_type], 2) + + np.power(impacts_summary["staterror"][impact_type], 2) + + np.power(impacts_summary["normfactor"][impact_type], 2) + + np.power(impacts_summary["shapefactor"][impact_type], 2) + ) + ) + impacts_summary["datastat"][impact_type] = data_staterror_without_nf_sf + + return impacts_summary + + +def _cov_impacts( + model: pyhf.pdf.Model, + poi_index: int, + fit_results: FitResults, + prefit_unc: np.ndarray, + labels: List[str], +) -> Tuple[Dict[str, Dict[str, List[float]]], Dict[str, Dict[str, float]]]: + """ + Computes the impact of nuisance parameters on a parameter of interest by using + their correlation in the post-fit covariance matrix. + + Args: + model (pyhf.pdf.Model): model used in fit + poi_index (int): index of the parameter of interest + fit_results (FitResults): nominal fit results to use in impacts calculation + prefit_unc (np.ndarray): pre-fit uncertainties of fit parameters + labels (List[str]): list of parameter names + + Returns: + Tuple[Dict[str, Dict[str, List[float]]], Dict[str, Dict[str, float]]]: + - A dictionary mapping modifier types to the computed impact values + of all parameters belonging to that type, categorized by + impact type (e.g., "prefit_impact_up", "postfit_impact_down"). + - A summary dictionary of total impact contributions categorized into + systematic and non-systematic sources + """ + total_poi_error = fit_results.uncertainty[poi_index] + impacts_by_modifier_type = defaultdict(lambda: defaultdict(list)) + i_global_par = 0 + + for parameter in model.config.par_order: + par_modifier = [ + name_and_mod[1] + for name_and_mod in model.config.modifiers + if name_and_mod[0] == parameter + ][0] + + for i_sub_par in np.arange(model.config.param_set(parameter).n_parameters): + i_par = i_global_par + i_sub_par + label = model.config.par_names[i_par] + if i_par == poi_index: + i_par += model.config.param_set(parameter).n_parameters + continue # do not calculate impact of POI on itself + log.info(f"calculating impact of {label} on {labels[poi_index]}") + + # We need the correlation of this parameter with the POI + corr_with_POI = fit_results.corr_mat[i_par][poi_index] + + for impact_type in [ + "prefit_impact_up", + "prefit_impact_down", + "postfit_impact_up", + "postfit_impact_down", + ]: + if "prefit" in impact_type: + parameter_impact = 0.0 + else: + parameter_error = fit_results.uncertainty[i_par] + if "down" in impact_type: + parameter_error *= -1 + parameter_impact = parameter_error * corr_with_POI * total_poi_error + if par_modifier == "staterror": + prefit_parameter_error = prefit_unc[i_par] + # if "down" in impact_type: + # prefit_parameter_error *= -1 + parameter_impact /= prefit_parameter_error + + impacts_by_modifier_type[par_modifier][impact_type].append( + parameter_impact + ) + + # All impacts put together to be used for ranking + impacts_by_modifier_type["all"][impact_type].append(parameter_impact) + + # update combined parameters index (e.g. staterrors) + i_global_par += model.config.param_set(parameter).n_parameters + impacts_summary = _get_impacts_summary(impacts_by_modifier_type) + impacts_summary = _get_datastat_impacts_quadruture(impacts_summary, total_poi_error) + + return impacts_by_modifier_type, impacts_summary + + +def _np_impacts( + model: pyhf.pdf.Model, + data: List[float], + poi_index: int, + fit_results: FitResults, + prefit_unc: np.ndarray, + labels: List[str], + fit_kwargs, +) -> Tuple[Dict[str, Dict[str, List[float]]], Dict[str, Dict[str, float]]]: + """ + Computes the impact of nuisance parameters on the POI by shifting parameter + values according to their pre/post fit uncertainties, fixing them, and + re-evaluating the fit. This process involves performing four fits per NP. + + Args: + model (pyhf.pdf.Model): model to be used in fits + data (List[float]): data (including auxdata) the model is fit to + poi_index (int): index of the parameter of interest + fit_results (FitResults): nominal fit results to use in impacts calculation + prefit_unc (np.ndarray): pre-fit uncertainties of parameters + labels (List[str]): list of parameter names + fit_kwargs: settings to be used in the fits. + + Returns: + Tuple[Dict[str, Dict[str, List[float]]], Dict[str, Dict[str, float]]]: + - A dictionary mapping modifier types to the computed impact values + of all parameters belonging to that type, categorized by + impact type (e.g., "prefit_impact_up", "postfit_impact_down"). + - A summary dictionary of total impact contributions categorized into + systematic and non-systematic sources. + """ + nominal_poi = fit_results.bestfit[poi_index] + impacts_by_modifier_type = defaultdict(lambda: defaultdict(list)) + i_global_par = 0 + + for parameter in model.config.par_order: + # Get modifier type for parameter + par_modifier = [ + name_and_mod[1] + for name_and_mod in model.config.modifiers + if name_and_mod[0] == parameter + ][0] + for i_sub_par in np.arange(model.config.param_set(parameter).n_parameters): + i_par = i_global_par + i_sub_par + label = model.config.par_names[i_par] + if i_par == poi_index: + i_par += model.config.param_set(parameter).n_parameters + continue # do not calculate impact of POI on itself + log.info(f"calculating impact of {label} on {labels[poi_index]}") + + # hold current parameter constant + fix_pars_ranking = fit_kwargs["fix_pars"].copy() + fix_pars_ranking[i_par] = True + + parameter_impacts = [] + # calculate impacts: pre-fit up, pre-fit down, post-fit up, post-fit down + for i_np_val, np_val in enumerate( + [ + fit_results.bestfit[i_par] + prefit_unc[i_par], + fit_results.bestfit[i_par] - prefit_unc[i_par], + fit_results.bestfit[i_par] + fit_results.uncertainty[i_par], + fit_results.bestfit[i_par] - fit_results.uncertainty[i_par], + ] + ): + # can skip pre-fit calculation for unconstrained parameters (their + # pre-fit uncertainty is set to 0), and pre- and post-fit calculation + # for fixed parameters (both uncertainties set to 0 as well) + if np_val == fit_results.bestfit[i_par]: + log.debug(f"impact of {label} is zero, skipping fit") + parameter_impacts.append(0.0) + else: + init_pars_ranking = fit_kwargs["init_pars"].copy() + # value of current nuisance parameter + init_pars_ranking[i_par] = np_val + fit_results_ranking = _fit_model( + model, + data, + init_pars=init_pars_ranking, + fix_pars=fix_pars_ranking, + **{ + k: v + for k, v in fit_kwargs.items() + if k not in ["init_pars", "fix_pars"] + }, + ) + poi_val = fit_results_ranking.bestfit[poi_index] + parameter_impact = poi_val - nominal_poi + log.debug( + f"POI is {poi_val:.6f}, difference to nominal is " + f"{parameter_impact:.6f}" + ) + impact_type = "prefit_impact" if i_np_val < 2 else "postfit_impact" + impact_type += "_up" if i_np_val % 2 == 0 else "_down" + impacts_by_modifier_type[par_modifier][impact_type].append( + parameter_impact + ) + impacts_by_modifier_type["all"][impact_type].append( + parameter_impact + ) + + # update combined parameters index (e.g. staterrors) + i_global_par += model.config.param_set(parameter).n_parameters + impacts_summary = _get_impacts_summary(impacts_by_modifier_type) + impacts_summary = _get_datastat_impacts_np_shift( + impacts_summary, model, data, poi_index, fit_results, fit_kwargs + ) + + return impacts_by_modifier_type, impacts_summary + + def fit( model: pyhf.pdf.Model, data: List[float], @@ -549,6 +897,7 @@ def ranking( maxiter: Optional[int] = None, tolerance: Optional[float] = None, custom_fit: bool = False, + impacts_method: str = "covariance", # breaks tests ) -> RankingResults: """Calculates the impact of nuisance parameters on the parameter of interest (POI). @@ -580,6 +929,12 @@ def ranking( None (use ``iminuit`` default of 0.1) custom_fit (bool, optional): whether to use the ``pyhf.infer`` API or ``iminuit``, defaults to False (using ``pyhf.infer``) + impacts_method (str, optional): The method to be used for evaluating impacts. + covariance = use post-fit covaraince matrix + np_shift = shift each NP according to its uncertainty, fix it, + then re-run fit. + auxdata_shift = shift the auxiliary data values by their pre-fit + uncertatinties, then re-run a fit. Raises: ValueError: if no POI is found @@ -587,18 +942,19 @@ def ranking( Returns: RankingResults: fit results for parameters, and pre- and post-fit impacts """ + + fit_settings = { + "init_pars": init_pars or model.config.suggested_init(), + "fix_pars": fix_pars or model.config.suggested_fixed(), + "par_bounds": par_bounds, + "strategy": strategy, + "maxiter": maxiter, + "tolerance": tolerance, + "custom_fit": custom_fit, + } + if fit_results is None: - fit_results = _fit_model( - model, - data, - init_pars=init_pars, - fix_pars=fix_pars, - par_bounds=par_bounds, - strategy=strategy, - maxiter=maxiter, - tolerance=tolerance, - custom_fit=custom_fit, - ) + fit_results = _fit_model(model, data, **fit_settings) labels = model.config.par_names prefit_unc = model_utils.prefit_uncertainties(model) @@ -608,66 +964,29 @@ def ranking( if poi_index is None: raise ValueError("no POI specified, cannot calculate ranking") - nominal_poi = fit_results.bestfit[poi_index] - - # need to get values for parameter settings, as they will be partially changed - # during the ranking (init/fix changes) - # use parameter settings provided in function arguments if they exist, else defaults - init_pars = init_pars or model.config.suggested_init() - fix_pars = fix_pars or model.config.suggested_fixed() - - all_impacts = [] - for i_par, label in enumerate(labels): - if i_par == poi_index: - continue # do not calculate impact of POI on itself - log.info(f"calculating impact of {label} on {labels[poi_index]}") - - # hold current parameter constant - fix_pars_ranking = fix_pars.copy() - fix_pars_ranking[i_par] = True - - parameter_impacts = [] - # calculate impacts: pre-fit up, pre-fit down, post-fit up, post-fit down - for np_val in [ - fit_results.bestfit[i_par] + prefit_unc[i_par], - fit_results.bestfit[i_par] - prefit_unc[i_par], - fit_results.bestfit[i_par] + fit_results.uncertainty[i_par], - fit_results.bestfit[i_par] - fit_results.uncertainty[i_par], - ]: - # can skip pre-fit calculation for unconstrained parameters (their - # pre-fit uncertainty is set to 0), and pre- and post-fit calculation - # for fixed parameters (both uncertainties set to 0 as well) - if np_val == fit_results.bestfit[i_par]: - log.debug(f"impact of {label} is zero, skipping fit") - parameter_impacts.append(0.0) - else: - init_pars_ranking = init_pars.copy() - init_pars_ranking[i_par] = np_val # value of current nuisance parameter - fit_results_ranking = _fit_model( - model, - data, - init_pars=init_pars_ranking, - fix_pars=fix_pars_ranking, - par_bounds=par_bounds, - strategy=strategy, - maxiter=maxiter, - tolerance=tolerance, - custom_fit=custom_fit, - ) - poi_val = fit_results_ranking.bestfit[poi_index] - parameter_impact = poi_val - nominal_poi - log.debug( - f"POI is {poi_val:.6f}, difference to nominal is " - f"{parameter_impact:.6f}" - ) - parameter_impacts.append(parameter_impact) - all_impacts.append(parameter_impacts) + if impacts_method == "np_shift": + impacts_by_modifier, impacts_summary = _np_impacts( + model, data, poi_index, fit_results, prefit_unc, labels, fit_settings + ) + elif impacts_method == "covariance": + impacts_by_modifier, impacts_summary = _cov_impacts( + model, poi_index, fit_results, prefit_unc, labels + ) + elif impacts_method == "auxdata_shift": + raise NotImplementedError( + "Impacts using auxiliary data shifting are not supported yet." + ) + else: + raise ValueError( + f"The option {impacts_method} is not a valid method to compute impacts." + + " Valid options are: (np_shift, covariance, auxdata_shift)" + ) - all_impacts_np = np.asarray(all_impacts) - prefit_up = all_impacts_np[:, 0] - prefit_down = all_impacts_np[:, 1] - postfit_up = all_impacts_np[:, 2] - postfit_down = all_impacts_np[:, 3] + # Impacts of all parameters + prefit_up = np.asarray(impacts_by_modifier["all"]["prefit_impact_up"]) + prefit_down = np.asarray(impacts_by_modifier["all"]["prefit_impact_down"]) + postfit_up = np.asarray(impacts_by_modifier["all"]["postfit_impact_up"]) + postfit_down = np.asarray(impacts_by_modifier["all"]["postfit_impact_down"]) # remove parameter of interest from bestfit / uncertainty / labels # such that their entries match the entries of the impacts @@ -676,7 +995,14 @@ def ranking( labels = np.delete(fit_results.labels, poi_index).tolist() ranking_results = RankingResults( - bestfit, uncertainty, labels, prefit_up, prefit_down, postfit_up, postfit_down + bestfit, + uncertainty, + labels, + prefit_up, + prefit_down, + postfit_up, + postfit_down, + impacts_method, ) return ranking_results diff --git a/src/cabinetry/fit/results_containers.py b/src/cabinetry/fit/results_containers.py index f35f0a86..4a5b8a7c 100644 --- a/src/cabinetry/fit/results_containers.py +++ b/src/cabinetry/fit/results_containers.py @@ -43,6 +43,7 @@ class RankingResults(NamedTuple): prefit_down (np.ndarray): pre-fit impact in "down" direction postfit_up (np.ndarray): post-fit impact in "up" direction postfit_down (np.ndarray): post-fit impact in "down" direction + impacts_method (str): the method used to compute parameter impacts """ bestfit: np.ndarray @@ -52,6 +53,7 @@ class RankingResults(NamedTuple): prefit_down: np.ndarray postfit_up: np.ndarray postfit_down: np.ndarray + impacts_method: str class ScanResults(NamedTuple): diff --git a/src/cabinetry/visualize/__init__.py b/src/cabinetry/visualize/__init__.py index a5bc5401..e800fa88 100644 --- a/src/cabinetry/visualize/__init__.py +++ b/src/cabinetry/visualize/__init__.py @@ -566,7 +566,11 @@ def ranking( matplotlib.figure.Figure: the ranking figure """ # path is None if figure should not be saved - figure_path = pathlib.Path(figure_folder) / "ranking.pdf" if save_figure else None + figure_path = ( + pathlib.Path(figure_folder) / f"ranking_{ranking_results.impacts_method}.pdf" + if save_figure + else None + ) # sort parameters by decreasing maximum post-fit impact max_postfit_impact = np.maximum( @@ -603,6 +607,7 @@ def ranking( postfit_down, figure_path=figure_path, close_figure=close_figure, + impacts_method=ranking_results.impacts_method, ) return fig diff --git a/src/cabinetry/visualize/plot_result.py b/src/cabinetry/visualize/plot_result.py index adfd2e7d..a98cffad 100644 --- a/src/cabinetry/visualize/plot_result.py +++ b/src/cabinetry/visualize/plot_result.py @@ -128,6 +128,7 @@ def ranking( *, figure_path: Optional[pathlib.Path] = None, close_figure: bool = False, + impacts_method: str = "covariance", ) -> mpl.figure.Figure: """Draws a ranking plot. @@ -146,6 +147,8 @@ def ranking( close_figure (bool, optional): whether to close each figure immediately after saving it, defaults to False (enable when producing many figures to avoid memory issues, prevents rendering in notebooks) + impacts_method (str, optional): the method used to compute parameter impacts. + Options are np_shift, covariance, auxdata_shift Returns: matplotlib.figure.Figure: the ranking figure @@ -163,6 +166,22 @@ def ranking( else: layout = None # pragma: no cover # layout set after figure creation instead + if impacts_method not in ["np_shift", "covariance", "auxdata_shift"]: + raise ValueError( + f"The impacts method {impacts_method} provided is not supported." + + " Valid options are (np_shift, covariance, auxdata_shift)" + ) + if impacts_method == "auxdata_shift": + raise NotImplementedError( + "Plotting impacts computed by shifting auxiliary data is not supported yet." + ) + + impacts_color_map = { + "np_shift": ["C0", "C5"], + "covariance": ["#2CA02C", "#98DF8A"], + "auxdata_shift": ["#9467BD", "#C5B0D5"], + } + mpl.style.use(MPL_STYLE) fig, ax_pulls = plt.subplots( figsize=(8, 2.5 + num_pars * 0.45), dpi=100, layout=layout @@ -197,18 +216,33 @@ def ranking( y_pos = np.arange(num_pars)[::-1] - # pre-fit up - pre_up = ax_impact.barh( - y_pos, impact_prefit_up, fill=False, linewidth=1, edgecolor="C0" - ) - # pre-fit down - pre_down = ax_impact.barh( - y_pos, impact_prefit_down, fill=False, linewidth=1, edgecolor="C5" - ) + pre_up, pre_down = None, None + if impacts_method == "np_shift": + # pre-fit up + pre_up = ax_impact.barh( + y_pos, + impact_prefit_up, + fill=False, + linewidth=1, + edgecolor=impacts_color_map[impacts_method][0], + ) + # pre-fit down + pre_down = ax_impact.barh( + y_pos, + impact_prefit_down, + fill=False, + linewidth=1, + edgecolor=impacts_color_map[impacts_method][1], + ) + # post-fit up - post_up = ax_impact.barh(y_pos, impact_postfit_up, color="C0") + post_up = ax_impact.barh( + y_pos, impact_postfit_up, color=impacts_color_map[impacts_method][0] + ) # post-fit down - post_down = ax_impact.barh(y_pos, impact_postfit_down, color="C5") + post_down = ax_impact.barh( + y_pos, impact_postfit_down, color=impacts_color_map[impacts_method][1] + ) # nuisance parameter pulls pulls = ax_pulls.errorbar(bestfit, y_pos, xerr=uncertainty, fmt="o", color="k") @@ -244,20 +278,44 @@ def ranking( ax_pulls.tick_params(direction="in", which="both") ax_impact.tick_params(direction="in", which="both") - fig.legend( - (pre_up, pre_down, post_up, post_down, pulls), - ( - r"pre-fit impact: $\theta = \hat{\theta} + \Delta \theta$", - r"pre-fit impact: $\theta = \hat{\theta} - \Delta \theta$", - r"post-fit impact: $\theta = \hat{\theta} + \Delta \hat{\theta}$", - r"post-fit impact: $\theta = \hat{\theta} - \Delta \hat{\theta}$", - "pulls", - ), - frameon=False, - loc="upper left", - ncol=3, - fontsize="large", + leg_handlers = ( + (pre_up, pre_down, post_up, post_down, pulls) + if impacts_method == "np_shift" + else (post_up, post_down, pulls) ) + leg_settings = { + "frameon": False, + "loc": "upper left", + "ncol": 3, + "fontsize": "large", + } + + if impacts_method == "np_shift": + fig.legend( + leg_handlers, + ( + r"pre-fit impact: $\theta = \hat{\theta} + \Delta \theta$", + r"pre-fit impact: $\theta = \hat{\theta} - \Delta \theta$", + r"post-fit impact: $\theta = \hat{\theta} + \Delta \hat{\theta}$", + r"post-fit impact: $\theta = \hat{\theta} - \Delta \hat{\theta}$", + "pulls", + ), + **leg_settings, + ) + elif impacts_method == "covariance": + fig.legend( + leg_handlers, + ( + r"post-fit impact: $\sigma_{\text{POI}}\,$" + + r"$\rho(\text{POI},\theta)\,\sigma_{\theta}$", + r"post-fit impact: $-\sigma_{\text{POI}}\,$" + + r"$\rho(\text{POI},\theta)\,\sigma_{\theta}$", + "pulls", + ), + **leg_settings, + ) + else: + pass # to be implemented if not MPL_GEQ_36: fig.tight_layout(rect=[0, 0, 1.0, 1 - leg_space]) # pragma: no cover diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py index c7d580ac..297d5243 100644 --- a/tests/cli/test_cli.py +++ b/tests/cli/test_cli.py @@ -203,6 +203,7 @@ def test_fit(mock_utils, mock_fit, mock_pulls, mock_corrmat, tmp_path): np.asarray([[0.8]]), np.asarray([[1.1]]), np.asarray([[0.9]]), + impacts_method="np_shift", ), autospec=True, ) @@ -240,7 +241,10 @@ def test_ranking(mock_utils, mock_fit, mock_rank, mock_vis, tmp_path): assert mock_utils.call_args_list == [((workspace,), {"asimov": False})] assert mock_fit.call_args_list == [(("model", "data"), {})] assert mock_rank.call_args_list == [ - (("model", "data"), {"fit_results": fit_results}) + ( + ("model", "data"), + {"fit_results": fit_results, "impacts_method": "covariance"}, + ) ] assert mock_vis.call_count == 1 assert np.allclose(mock_vis.call_args[0][0].prefit_up, [[1.2]]) @@ -252,12 +256,24 @@ def test_ranking(mock_utils, mock_fit, mock_rank, mock_vis, tmp_path): # Asimov, maximum amount of parameters, custom folder result = runner.invoke( cli.ranking, - ["--asimov", "--max_pars", 3, "--figfolder", "folder", workspace_path], + [ + "--impacts_method", + "np_shift", + "--asimov", + "--max_pars", + 3, + "--figfolder", + "folder", + workspace_path, + ], ) assert result.exit_code == 0 assert mock_utils.call_args == ((workspace,), {"asimov": True}) assert mock_fit.call_args == (("model", "data"), {}) - assert mock_rank.call_args == (("model", "data"), {"fit_results": fit_results}) + assert mock_rank.call_args == ( + ("model", "data"), + {"fit_results": fit_results, "impacts_method": "np_shift"}, + ) assert mock_vis.call_args[1] == {"figure_folder": "folder", "max_pars": 3} diff --git a/tests/fit/test_fit.py b/tests/fit/test_fit.py index 29058b7b..56ff838d 100644 --- a/tests/fit/test_fit.py +++ b/tests/fit/test_fit.py @@ -522,6 +522,10 @@ def test_fit(mock_fit, mock_print, mock_gof): fit.FitResults( np.asarray([0.8, 0.9]), np.asarray([0.1, 0.1]), ["a", "b"], np.empty(0), 0.0 ), + # !datastats: nominal fit results because everything is fixed except POI + fit.FitResults( + np.asarray([1.0, 0.9]), np.asarray([0.1, 0.1]), ["a", "b"], np.empty(0), 0.0 + ), # for second ranking call with fixed parameter fit.FitResults( np.asarray([1.2, 0.9]), np.asarray([0.1, 0.1]), ["a", "b"], np.empty(0), 0.0 @@ -529,6 +533,10 @@ def test_fit(mock_fit, mock_print, mock_gof): fit.FitResults( np.asarray([0.8, 0.9]), np.asarray([0.1, 0.1]), ["a", "b"], np.empty(0), 0.0 ), + # !datastats: nominal fit results because everything is fixed except POI + fit.FitResults( + np.asarray([1.0, 0.9]), np.asarray([0.1, 0.1]), ["a", "b"], np.empty(0), 0.0 + ), # for third ranking call without reference results fit.FitResults( np.asarray([1.0, 0.9]), np.asarray([0.3, 0.3]), ["a", "b"], np.empty(0), 0.0 @@ -539,6 +547,18 @@ def test_fit(mock_fit, mock_print, mock_gof): fit.FitResults( np.asarray([0.7, 0.9]), np.asarray([0.1, 0.1]), ["a", "b"], np.empty(0), 0.0 ), + # !datastats: nominal fit results because everything is fixed except POI + fit.FitResults( + np.asarray([1.0, 0.9]), np.asarray([0.1, 0.1]), ["a", "b"], np.empty(0), 0.0 + ), + # covariance-based method + fit.FitResults( + np.asarray([0.7, 0.9]), + np.asarray([0.1, 0.1]), + ["a", "b"], + np.asarray([[1.0, 0.1], [0.1, 1.0]]), + 0.0, + ), ], ) def test_ranking(mock_fit, example_spec): @@ -546,14 +566,18 @@ def test_ranking(mock_fit, example_spec): bestfit = np.asarray([1.0, 0.9]) uncertainty = np.asarray([0.1, 0.02]) labels = ["Signal strength", "staterror"] - fit_results = fit.FitResults(bestfit, uncertainty, labels, np.empty(0), 0.0) + corr_mat = np.asarray([[1.0, 0.1], [0.1, 1.0]]) + fit_results = fit.FitResults(bestfit, uncertainty, labels, corr_mat, 0.0) model, data = model_utils.model_and_data(example_spec) - ranking_results = fit.ranking(model, data, fit_results=fit_results) + # NP shifting method + ranking_results = fit.ranking( + model, data, fit_results=fit_results, impacts_method="np_shift" + ) # correct call to fit expected_fix = [False, True] expected_inits = [[1.0, 0.95019305], [1.0, 0.84980695], [1.0, 0.92], [1.0, 0.88]] - assert mock_fit.call_count == 4 + assert mock_fit.call_count == 5 # four fits + datastat fit for i in range(4): assert mock_fit.call_args_list[i][0] == (model, data) assert np.allclose( @@ -577,20 +601,41 @@ def test_ranking(mock_fit, example_spec): assert np.allclose(ranking_results.postfit_up, [0.2]) assert np.allclose(ranking_results.postfit_down, [-0.2]) + # Covariance-based method + ranking_results = fit.ranking( + model, data, fit_results=fit_results, impacts_method="covariance" + ) + # no further calls to mock fit + assert mock_fit.call_count == 5 + # POI removed from fit results + assert np.allclose(ranking_results.bestfit, [0.9]) + assert np.allclose(ranking_results.uncertainty, [0.02]) + assert ranking_results.labels == ["staterror"] + + # received correct results - new values + assert np.allclose(ranking_results.prefit_up, [0.0]) + assert np.allclose(ranking_results.prefit_down, [0.0]) + assert np.allclose( + ranking_results.postfit_up, [0.003984615385] + ) # 0.02*0.1*0.1/0.05 + assert np.allclose(ranking_results.postfit_down, [-0.003984615385]) + # fixed parameter in ranking, custom fit, POI via kwarg example_spec["measurements"][0]["config"]["parameters"][0]["fixed"] = True example_spec["measurements"][0]["config"]["poi"] = "" model, data = model_utils.model_and_data(example_spec) + # NP shifting method for impacts ranking_results = fit.ranking( model, data, fit_results=fit_results, poi_name="Signal strength", custom_fit=True, + impacts_method="np_shift", ) - # expect two calls in this ranking (and had 4 before, so 6 total): pre-fit + # expect three calls in this ranking (and had 5 before, so 8 total): pre-fit # uncertainty is 0 since parameter is fixed, mock post-fit uncertainty is not 0 - assert mock_fit.call_count == 6 + assert mock_fit.call_count == 8 assert mock_fit.call_args[1]["custom_fit"] is True assert np.allclose(ranking_results.prefit_up, [0.0]) assert np.allclose(ranking_results.prefit_down, [0.0]) @@ -598,6 +643,8 @@ def test_ranking(mock_fit, example_spec): assert np.allclose(ranking_results.postfit_down, [-0.2]) # no reference results, init/fixed pars, par bounds, strategy/maxiter/tolerance + # NP shifting method + # !is this test complete? parameter is still considered fixed with prefit_unc = 0.0 ranking_results = fit.ranking( model, data, @@ -609,10 +656,11 @@ def test_ranking(mock_fit, example_spec): maxiter=100, tolerance=0.01, custom_fit=True, + impacts_method="np_shift", ) - assert mock_fit.call_count == 9 + assert mock_fit.call_count == 12 # reference fit - assert mock_fit.call_args_list[-3] == ( + assert mock_fit.call_args_list[-4] == ( (model, data), { "init_pars": [1.5, 1.0], @@ -625,22 +673,23 @@ def test_ranking(mock_fit, example_spec): }, ) # fits for impact (comparing each option separately since init_pars needs allclose) + assert mock_fit.call_args_list[-3][0] == (model, data) + assert np.allclose(mock_fit.call_args_list[-3][1]["init_pars"], [1.5, 1.2]) + assert mock_fit.call_args_list[-3][1]["fix_pars"] == [False, True] + assert mock_fit.call_args_list[-3][1]["par_bounds"] == [(0, 5), (0.1, 10)] + assert mock_fit.call_args_list[-3][1]["strategy"] == 2 + assert mock_fit.call_args_list[-3][1]["maxiter"] == 100 + assert mock_fit.call_args_list[-3][1]["tolerance"] == 0.01 + assert mock_fit.call_args_list[-3][1]["custom_fit"] is True assert mock_fit.call_args_list[-2][0] == (model, data) - assert np.allclose(mock_fit.call_args_list[-2][1]["init_pars"], [1.5, 1.2]) + assert np.allclose(mock_fit.call_args_list[-2][1]["init_pars"], [1.5, 0.6]) assert mock_fit.call_args_list[-2][1]["fix_pars"] == [False, True] assert mock_fit.call_args_list[-2][1]["par_bounds"] == [(0, 5), (0.1, 10)] assert mock_fit.call_args_list[-2][1]["strategy"] == 2 assert mock_fit.call_args_list[-2][1]["maxiter"] == 100 assert mock_fit.call_args_list[-2][1]["tolerance"] == 0.01 assert mock_fit.call_args_list[-2][1]["custom_fit"] is True - assert mock_fit.call_args_list[-1][0] == (model, data) - assert np.allclose(mock_fit.call_args_list[-1][1]["init_pars"], [1.5, 0.6]) - assert mock_fit.call_args_list[-1][1]["fix_pars"] == [False, True] - assert mock_fit.call_args_list[-1][1]["par_bounds"] == [(0, 5), (0.1, 10)] - assert mock_fit.call_args_list[-1][1]["strategy"] == 2 - assert mock_fit.call_args_list[-1][1]["maxiter"] == 100 - assert mock_fit.call_args_list[-1][1]["tolerance"] == 0.01 - assert mock_fit.call_args_list[-1][1]["custom_fit"] is True + # [-1] is datastat # ranking results assert np.allclose(ranking_results.prefit_up, [0.0]) assert np.allclose(ranking_results.prefit_down, [0.0]) @@ -651,6 +700,60 @@ def test_ranking(mock_fit, example_spec): with pytest.raises(ValueError, match="no POI specified, cannot calculate ranking"): fit.ranking(model, data, fit_results=fit_results) + # Covariance-based method + # approach requires non-zero pre-fit uncertainty on staterror modifiers + example_spec["measurements"][0]["config"]["parameters"][0]["fixed"] = False + example_spec["measurements"][0]["config"]["poi"] = "Signal strength" + model, data = model_utils.model_and_data(example_spec) + ranking_results = fit.ranking( + model, + data, + poi_name="Signal strength", + init_pars=[1.5, 1.0], + fix_pars=[False, False], + par_bounds=[(0, 5), (0.1, 10)], + strategy=2, + maxiter=100, + tolerance=0.01, + custom_fit=True, + impacts_method="covariance", + ) + # reference fit + assert mock_fit.call_count == 13 + assert mock_fit.call_args_list[-1] == ( + (model, data), + { + "init_pars": [1.5, 1.0], + "fix_pars": [False, False], + "par_bounds": [(0, 5), (0.1, 10)], + "strategy": 2, + "maxiter": 100, + "tolerance": 0.01, + "custom_fit": True, + }, + ) + # ranking results + assert np.allclose(ranking_results.prefit_up, [0.0]) + assert np.allclose(ranking_results.prefit_down, [0.0]) + assert np.allclose(ranking_results.postfit_up, [0.01992307692]) # 0.1*0.1*0.1/0.05 + assert np.allclose(ranking_results.postfit_down, [-0.01992307692]) + + # Aux data shifting method not supported yet + with pytest.raises( + NotImplementedError, + match="Impacts using auxiliary data shifting are not supported yet.", + ): + fit.ranking( + model, data, fit_results=fit_results, impacts_method="auxdata_shift" + ) + # catch non-existent method + with pytest.raises( + ValueError, + match="The option wrong_method is not a valid method to compute impacts." + + " Valid options are: \\(np_shift, covariance, auxdata_shift\\)", + ): + fit.ranking(model, data, fit_results=fit_results, impacts_method="wrong_method") + @mock.patch( "cabinetry.fit._fit_model", diff --git a/tests/fit/test_fit_results_containers.py b/tests/fit/test_fit_results_containers.py index 0739663b..627e6a69 100644 --- a/tests/fit/test_fit_results_containers.py +++ b/tests/fit/test_fit_results_containers.py @@ -30,7 +30,14 @@ def test_RankingResults(): postfit_up = np.asarray([0.2]) postfit_down = np.asarray([-0.2]) ranking_results = fit.RankingResults( - bestfit, uncertainty, labels, prefit_up, prefit_down, postfit_up, postfit_down + bestfit, + uncertainty, + labels, + prefit_up, + prefit_down, + postfit_up, + postfit_down, + impacts_method="np_shift", ) assert np.allclose(ranking_results.bestfit, bestfit) assert np.allclose(ranking_results.uncertainty, uncertainty) @@ -39,6 +46,7 @@ def test_RankingResults(): assert np.allclose(ranking_results.prefit_down, prefit_down) assert np.allclose(ranking_results.postfit_up, postfit_up) assert np.allclose(ranking_results.postfit_down, postfit_down) + assert ranking_results.impacts_method, "np_shift" def test_ScanResults(): diff --git a/tests/test_integration.py b/tests/test_integration.py index 73ab7697..129da8d1 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -200,8 +200,9 @@ def test_integration(tmp_path, ntuple_creator, caplog): _ = cabinetry.visualize.data_mc(prediction_postfit, data, close_figure=True) # nuisance parameter ranking + # with NP shifting ranking_results = cabinetry.fit.ranking( - model, data, fit_results=fit_results, custom_fit=True + model, data, fit_results=fit_results, custom_fit=True, impacts_method="np_shift" ) assert np.allclose( ranking_results.prefit_up, @@ -254,6 +255,44 @@ def test_integration(tmp_path, ntuple_creator, caplog): atol=1e-4, ) + # with covariance-based method + ranking_results = cabinetry.fit.ranking( + model, + data, + fit_results=fit_results, + custom_fit=True, + impacts_method="covariance", + ) + assert np.allclose(ranking_results.prefit_up, [0.0] * 7) + + assert np.allclose(ranking_results.prefit_down, [0.0] * 7) + assert np.allclose( + ranking_results.postfit_up, + [ + -0.48565867, + -0.5250071, + -0.15659875, + -0.10167499, + 0.13321283, + -0.10301342, + -0.05424587, + ], + atol=1e-4, + ) + assert np.allclose( + ranking_results.postfit_down, + [ + 0.48565867, + 0.5250071, + 0.15659875, + 0.10167499, + -0.13321283, + 0.10301342, + 0.05424587, + ], + atol=1e-4, + ) + # parameter scan scan_results = cabinetry.fit.scan( model, diff --git a/tests/visualize/reference/ranking_covariance.png b/tests/visualize/reference/ranking_covariance.png new file mode 100644 index 00000000..171c226b Binary files /dev/null and b/tests/visualize/reference/ranking_covariance.png differ diff --git a/tests/visualize/reference/ranking.png b/tests/visualize/reference/ranking_np_shift.png similarity index 100% rename from tests/visualize/reference/ranking.png rename to tests/visualize/reference/ranking_np_shift.png diff --git a/tests/visualize/test_visualize.py b/tests/visualize/test_visualize.py index c2405751..dd403154 100644 --- a/tests/visualize/test_visualize.py +++ b/tests/visualize/test_visualize.py @@ -510,10 +510,11 @@ def test_ranking(mock_draw): impact_prefit_down, impact_postfit_up, impact_postfit_down, + impacts_method="np_shift", ) folder_path = "tmp" - figure_path = pathlib.Path(folder_path) / "ranking.pdf" + figure_path = pathlib.Path(folder_path) / "ranking_np_shift.pdf" bestfit_expected = np.asarray([0.1, 1.2]) uncertainty_expected = np.asarray([0.8, 0.2]) labels_expected = ["modeling", "staterror_a"] @@ -530,7 +531,11 @@ def test_ranking(mock_draw): assert np.allclose(mock_draw.call_args[0][4], impact_prefit_down[::-1]) assert np.allclose(mock_draw.call_args[0][5], impact_postfit_up[::-1]) assert np.allclose(mock_draw.call_args[0][6], impact_postfit_down[::-1]) - assert mock_draw.call_args[1] == {"figure_path": figure_path, "close_figure": True} + assert mock_draw.call_args[1] == { + "figure_path": figure_path, + "close_figure": True, + "impacts_method": "np_shift", + } # maximum parameter amount specified, do not close figure, do not save figure _ = visualize.ranking( @@ -548,7 +553,44 @@ def test_ranking(mock_draw): assert np.allclose(mock_draw.call_args[0][4], impact_prefit_down[1]) assert np.allclose(mock_draw.call_args[0][5], impact_postfit_up[1]) assert np.allclose(mock_draw.call_args[0][6], impact_postfit_down[1]) - assert mock_draw.call_args[1] == {"figure_path": None, "close_figure": False} + assert mock_draw.call_args[1] == { + "figure_path": None, + "close_figure": False, + "impacts_method": "np_shift", + } + + # maximum parameter amount specified, do not close figure, do not save figure + # covariance impacts + ranking_results = fit.RankingResults( + bestfit, + uncertainty, + labels, + impact_prefit_up, + impact_prefit_down, + impact_postfit_up, + impact_postfit_down, + impacts_method="covariance", + ) + _ = visualize.ranking( + ranking_results, + figure_folder=folder_path, + max_pars=1, + close_figure=False, + save_figure=False, + ) + assert mock_draw.call_count == 3 + assert np.allclose(mock_draw.call_args[0][0], bestfit_expected[0]) + assert np.allclose(mock_draw.call_args[0][1], uncertainty_expected[0]) + assert mock_draw.call_args[0][2] == labels_expected[0] + assert np.allclose(mock_draw.call_args[0][3], impact_prefit_up[1]) + assert np.allclose(mock_draw.call_args[0][4], impact_prefit_down[1]) + assert np.allclose(mock_draw.call_args[0][5], impact_postfit_up[1]) + assert np.allclose(mock_draw.call_args[0][6], impact_postfit_down[1]) + assert mock_draw.call_args[1] == { + "figure_path": None, + "close_figure": False, + "impacts_method": "covariance", + } @mock.patch( diff --git a/tests/visualize/test_visualize_plot_result.py b/tests/visualize/test_visualize_plot_result.py index d510fa55..c60f4e6f 100644 --- a/tests/visualize/test_visualize_plot_result.py +++ b/tests/visualize/test_visualize_plot_result.py @@ -70,6 +70,10 @@ def test_pulls(tmp_path): plt.close("all") +@pytest.mark.xfail( + sys.version_info <= (3, 9), + reason="legend positioning in Python 3.8 with older matplotlib, see cabinetry#476", +) def test_ranking(tmp_path): fname = tmp_path / "fig.png" bestfit = np.asarray([0.3, -0.1]) @@ -89,17 +93,49 @@ def test_ranking(tmp_path): impact_postfit_up, impact_postfit_down, figure_path=fname, + impacts_method="np_shift", ) # large tolerance needed here, possibly related to lack of set_tight_layout usage assert ( - compare_images("tests/visualize/reference/ranking.png", str(fname), 50) is None + compare_images("tests/visualize/reference/ranking_np_shift.png", str(fname), 50) + is None ) # compare figure returned by function fname = tmp_path / "fig_from_return.png" fig.savefig(fname) assert ( - compare_images("tests/visualize/reference/ranking.png", str(fname), 50) is None + compare_images("tests/visualize/reference/ranking_np_shift.png", str(fname), 50) + is None + ) + + cov_ranking_comparisons = [] + fname = tmp_path / "fig_cov.png" + fig = plot_result.ranking( + bestfit, + uncertainty, + labels, + impact_prefit_up, + impact_prefit_down, + impact_postfit_up, + impact_postfit_down, + figure_path=fname, + impacts_method="covariance", + ) + # large tolerance needed here, possibly related to lack of set_tight_layout usage + cov_ranking_comparisons.append( + compare_images( + "tests/visualize/reference/ranking_covariance.png", str(fname), 50 + ) + ) + + # compare figure returned by function + fname = tmp_path / "fig_cov_from_return.png" + fig.savefig(fname) + cov_ranking_comparisons.append( + compare_images( + "tests/visualize/reference/ranking_covariance.png", str(fname), 50 + ) ) # do not save figure, but close it @@ -116,6 +152,43 @@ def test_ranking(tmp_path): ) assert mock_close_safe.call_args_list == [((fig, None, True), {})] + with pytest.raises( + ValueError, + match="The impacts method wrong_method provided is not supported." + + " Valid options are \\(np_shift, covariance, auxdata_shift\\)", + ): + plot_result.ranking( + bestfit, + uncertainty, + labels, + impact_prefit_up, + impact_prefit_down, + impact_postfit_up, + impact_postfit_down, + close_figure=True, + impacts_method="wrong_method", + ) + + with pytest.raises( + NotImplementedError, + match="Plotting impacts computed by shifting auxiliary data is not " + + "supported yet.", + ): + plot_result.ranking( + bestfit, + uncertainty, + labels, + impact_prefit_up, + impact_prefit_down, + impact_postfit_up, + impact_postfit_down, + close_figure=True, + impacts_method="auxdata_shift", + ) + + for cov_ranking_comparison in cov_ranking_comparisons: + assert cov_ranking_comparison is None + plt.close("all")