Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: more flexible filter specification to allow overriding and appending to filters #310

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
4 changes: 3 additions & 1 deletion config_example.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ General:
Regions:
- Name: "Signal_region"
Variable: "jet_pt"
Filter: "lep_charge > 0"
Filters:
- Name: "lepton_charge"
Filter: "lep_charge > 0"
Binning: [200, 300, 400, 500, 600]

Samples:
Expand Down
8 changes: 6 additions & 2 deletions docs/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,16 @@ Details about the setting blocks:
Common options:
---------------

.. jsonschema:: ../src/cabinetry/schemas/config.json#/definitions/template_setting
.. jsonschema:: ../src/cabinetry/schemas/config.json#/definitions/filters_setting

.. jsonschema:: ../src/cabinetry/schemas/config.json#/definitions/samples_setting
.. jsonschema:: ../src/cabinetry/schemas/config.json#/definitions/filter_setting

.. jsonschema:: ../src/cabinetry/schemas/config.json#/definitions/samplepath_setting

.. jsonschema:: ../src/cabinetry/schemas/config.json#/definitions/regions_setting

.. jsonschema:: ../src/cabinetry/schemas/config.json#/definitions/template_setting

.. jsonschema:: ../src/cabinetry/schemas/config.json#/definitions/samples_setting

.. jsonschema:: ../src/cabinetry/schemas/config.json#/definitions/smoothing_setting
14 changes: 10 additions & 4 deletions src/cabinetry/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import logging
import pathlib
import pkgutil
from typing import Any, Dict, List, Optional, Union
from typing import Any, Dict, List, Optional, TypeVar, Union

import jsonschema
import yaml
Expand All @@ -15,6 +15,10 @@
log = logging.getLogger(__name__)


# used in _setting_to_list below
T = TypeVar("T", str, Dict[str, Any])


def load(file_path_string: Union[str, pathlib.Path]) -> Dict[str, Any]:
"""Loads, validates, and returns a config file from the provided path.

Expand Down Expand Up @@ -99,7 +103,7 @@ def print_overview(config: Dict[str, Any]) -> None:
log.info(f" {len(config['Systematics'])} Systematic(s)")


def _setting_to_list(setting: Union[str, List[str]]) -> List[str]:
def _setting_to_list(setting: Union[T, List[T]]) -> List[T]:
"""Converts a configuration setting to a list.

The config allows for two ways of specifying some settings, for example samples. A
Expand All @@ -108,10 +112,12 @@ def _setting_to_list(setting: Union[str, List[str]]) -> List[str]:
converted to a list.

Args:
setting (Union[str, List[str]]): name of single setting value or list of values
setting (Union[Union[str, Dict[str, Any]], List[Union[str, Dict[str, Any]]]]):
single setting value (string or dictionary) or list of values (each being
strings or dictionaries)

Returns:
list: name(s) of sample(s)
list: values (strings or dictionaries) in list form
"""
if not isinstance(setting, list):
setting = [setting]
Expand Down
6 changes: 3 additions & 3 deletions src/cabinetry/route.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@
log = logging.getLogger(__name__)


# type of a function processing templates, takes sample-region-systematic-template,
# returns None
# type of a function processing templates, takes general-region-sample-systematic-
# template, returns None
# template can be "Up" / "Down" for variations, or None for nominal
ProcessorFunc = Callable[
[Dict[str, Any], Dict[str, Any], Dict[str, Any], Optional[Literal["Up", "Down"]]],
None,
]

# type of a user-defined function for template processing, takes sample-region-
# type of a user-defined function for template processing, takes general-region-sample-
# systematic-template, returns a boost_histogram.Histogram
# template can be any string (to match "Up" / "Down"), or None / "*" to match nominal
UserTemplateFunc = Callable[
Expand Down
65 changes: 56 additions & 9 deletions src/cabinetry/schemas/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,11 @@
"description": "folder to save histograms to and read histograms from",
"type": "string"
},
"Filters": {
"description": "selection criteria to apply",
"$$description": "only affects ntuple inputs",
"$ref": "#/definitions/filters_setting"
},
"Fixed": {
"description": "list of parameters to treat as constant in fits",
"type": "array",
Expand Down Expand Up @@ -145,9 +150,10 @@
},
"uniqueItems": true
},
"Filter": {
"description": "selection criteria to apply",
"type": "string"
"Filters": {
"description": "selection criteria to apply (can override filters at general level)",
"$$description": "only affects ntuple inputs",
"$ref": "#/definitions/filters_setting"
},
"RegionPath": {
"description": "(part of) path to file containing region",
Expand Down Expand Up @@ -182,9 +188,10 @@
"description": "name of tree",
"type": "string"
},
"Filter": {
"description": "selection criteria to apply (override for region setting)",
"type": "string"
"Filters": {
"description": "selection criteria to apply (can override filters at general and region level)",
"$$description": "only affects ntuple inputs",
"$ref": "#/definitions/filters_setting"
},
"Weight": {
"description": "weight to apply to events",
Expand Down Expand Up @@ -307,9 +314,10 @@
"description": "variable to bin in (override for nominal setting)",
"type": "string"
},
"Filter": {
"description": "selection criteria to apply (override for region / sample setting)",
"type": "string"
"Filters": {
"description": "selection criteria to apply (can override filters at general, region and sample level)",
"$$description": "only affects ntuple inputs",
"$ref": "#/definitions/filters_setting"
},
"RegionPath": {
"description": "(part of) path to file containing region (override for nominal setting)",
Expand Down Expand Up @@ -420,6 +428,45 @@
}
},
"additionalProperties": false
},
"filters_setting": {
"title": "Filters setting",
"$$target": "#/definitions/filters_setting",
"description": "filter(s) to apply for histogram creation",
"oneOf": [
{
"description": "single filter",
"$ref": "#/definitions/filter_setting"
},
{
"description": "list of filters",
"type": "array",
"minItems": 1,
"items": {
"description": "single filter",
"$ref": "#/definitions/filter_setting"
},
"uniqueItems": true
}
]
},
"filter_setting": {
"title": "Filter setting",
"$$target": "#/definitions/filter_setting",
"description": "filter setting, filters with the same name override each other",
"type": "object",
"required": ["Name", "Filter"],
"properties": {
"Name": {
"description": "name of filter (filters can be overridden by other filters with the same name)",
"type": "string"
},
"Filter": {
"description": "filter to apply",
"type": "string"
}
},
"additionalProperties": false
}
},
"additionalProperties": false
Expand Down
5 changes: 4 additions & 1 deletion src/cabinetry/templates/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@ def build(
# create an instance of the class doing the template building
histogram_folder = pathlib.Path(config["General"]["HistogramFolder"])
general_path = config["General"]["InputPath"]
template_builder = builder._Builder(histogram_folder, general_path, method)
general_filters = config["General"].get("Filters", {})
template_builder = builder._Builder(
histogram_folder, general_path, general_filters, method
)

match_func: Optional[route.MatchFunc] = None
if router is not None:
Expand Down
47 changes: 34 additions & 13 deletions src/cabinetry/templates/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ def _variable(


def _filter(
general: Dict[str, Any],
region: Dict[str, Any],
sample: Dict[str, Any],
systematic: Dict[str, Any],
Expand All @@ -148,6 +149,7 @@ def _filter(
the sample-specific filter if both are provided.

Args:
general (Dict[str, Any]): containing general configuration information
region (Dict[str, Any]): containing all region information
sample (Dict[str, Any]): containing all sample information
systematic (Dict[str, Any]): containing all systematic information
Expand All @@ -157,21 +159,32 @@ def _filter(
Returns:
Optional[str]: expression for the filter to be used, or None for no filtering
"""
selection_filter = region.get("Filter", None)
selection_filters = {}

# check for sample-specific overrides
selection_filter_override = sample.get("Filter", None)
if selection_filter_override is not None:
selection_filter = selection_filter_override
# general options can set standard values for filter
for filter in configuration._setting_to_list(general.get("Filters", [])):
selection_filters.update({filter["Name"]: filter["Filter"]})

# regions can set default filters (general level not implemented yet)
for filter in configuration._setting_to_list(region.get("Filters", [])):
selection_filters.update({filter["Name"]: filter["Filter"]})

# samples can append to and override filters
for filter in configuration._setting_to_list(sample.get("Filters", [])):
selection_filters.update({filter["Name"]: filter["Filter"]})

# check whether a systematic is being processed
if template is not None:
# determine whether the template has an override specified
selection_filter_override = utils._check_for_override(
systematic, template, "Filter"
)
if selection_filter_override is not None:
selection_filter = selection_filter_override
# templates can append to and override filters
template_filters = systematic.get(template, {}).get("Filters", [])
for filter in configuration._setting_to_list(template_filters):
selection_filters.update({filter["Name"]: filter["Filter"]})

if selection_filters == {}:
return None

# combine all filters
selection_filter = " & ".join([f"({f})" for f in selection_filters.values()])
return selection_filter


Expand Down Expand Up @@ -261,17 +274,23 @@ class _Builder:
"""Handles the instructions for backends to create histograms."""

def __init__(
self, histogram_folder: pathlib.Path, general_path: str, method: str
self,
histogram_folder: pathlib.Path,
general_path: str,
general_filters: Dict[str, Any],
method: str,
) -> None:
"""Creates an instance, sets histogram folder, path template and method.

Args:
histogram_folder (pathlib.Path): folder to save the histograms to
general_path (str): template for paths to input files for histogram building
general_filters (Dict[str, Any]): dictionary with general filters to apply
method (str): backend to use for histogram production
"""
self.histogram_folder = histogram_folder
self.general_path = general_path
self.general_filters = general_filters
self.method = method

def _create_histogram(
Expand Down Expand Up @@ -303,7 +322,9 @@ def _create_histogram(
variable = _variable(region, sample, systematic, template)
bins = _binning(region)
weight = _weight(region, sample, systematic, template)
selection_filter = _filter(region, sample, systematic, template)
selection_filter = _filter(
self.general_filters, region, sample, systematic, template
)

# obtain the histogram
if self.method == "uproot":
Expand Down
12 changes: 9 additions & 3 deletions tests/templates/test_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,26 @@ def test_build(mock_builder, mock_apply):
config = {"General": {"HistogramFolder": "path/", "InputPath": "file.root"}}
method = "uproot"

# no router
# no router or filter
templates.build(config, method=method)
assert mock_builder.call_args_list == [
((pathlib.Path("path/"), "file.root", method), {})
((pathlib.Path("path/"), "file.root", {}, method), {})
]
assert mock_apply.call_count == 1
config_call, func_call = mock_apply.call_args[0]
assert config_call == config
assert func_call._extract_mock_name() == "_Builder()._create_histogram"
assert mock_apply.call_args[1] == {"match_func": None}

# including a router
# including a router and general filter
filter_dict = {"Name": "f", "Filter": "c"}
config["General"].update({"Filters": filter_dict})
mock_router = mock.MagicMock()
templates.build(config, method=method, router=mock_router)
assert mock_builder.call_args == (
(pathlib.Path("path/"), "file.root", filter_dict, method),
{},
)

# verify wrapper was set
assert (
Expand Down
Loading