From 2fd6da8653d8b706b5b9298a2821cbac3524e6d5 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Wed, 11 Dec 2024 14:10:52 +0100 Subject: [PATCH 01/11] Implement proper truncation for prior distributions Currently, when sampled startpoints are outside the bounds, their value is set to the upper/lower bounds. This may put too much probability mass on the bounds. With these changes, we properly sample from the respective truncated distributions. Closes #330. --- doc/conf.py | 1 + doc/example/distributions.ipynb | 28 +++-- petab/v1/distributions.py | 181 +++++++++++++++++++++++++++----- petab/v1/priors.py | 46 ++++++-- tests/v1/test_distributions.py | 64 ++++++----- 5 files changed, 248 insertions(+), 72 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 4dbd3009..002a7bf0 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -85,6 +85,7 @@ nb_execution_mode = "force" nb_execution_raise_on_error = True nb_execution_show_tb = True +nb_execution_timeout = 90 # max. seconds/cell source_suffix = { ".rst": "restructuredtext", diff --git a/doc/example/distributions.ipynb b/doc/example/distributions.ipynb index 86235fe1..06c08272 100644 --- a/doc/example/distributions.ipynb +++ b/doc/example/distributions.ipynb @@ -23,7 +23,10 @@ }, { "metadata": { - "collapsed": true + "collapsed": true, + "jupyter": { + "is_executing": true + } }, "cell_type": "code", "source": [ @@ -42,7 +45,7 @@ " if ax is None:\n", " fig, ax = plt.subplots()\n", "\n", - " sample = prior.sample(10000)\n", + " sample = prior.sample(20_000)\n", "\n", " # pdf\n", " xmin = min(sample.min(), prior.lb_scaled if prior.bounds is not None else sample.min())\n", @@ -138,11 +141,13 @@ "metadata": {}, "cell_type": "code", "source": [ + "# different, because transformation!=LIN\n", "plot(Prior(UNIFORM, (0.01, 2), transformation=LOG10))\n", "plot(Prior(PARAMETER_SCALE_UNIFORM, (0.01, 2), transformation=LOG10))\n", "\n", + "# same, because transformation=LIN\n", "plot(Prior(UNIFORM, (0.01, 2), transformation=LIN))\n", - "plot(Prior(PARAMETER_SCALE_UNIFORM, (0.01, 2), transformation=LIN))\n" + "plot(Prior(PARAMETER_SCALE_UNIFORM, (0.01, 2), transformation=LIN))" ], "id": "5ca940bc24312fc6", "outputs": [], @@ -151,15 +156,18 @@ { "metadata": {}, "cell_type": "markdown", - "source": "To prevent the sampled parameters from exceeding the bounds, the sampled parameters are clipped to the bounds. The bounds are defined in the parameter table. Note that the current implementation does not support sampling from a truncated distribution. Instead, the samples are clipped to the bounds. This may introduce unwanted bias, and thus, should only be used with caution (i.e., the bounds should be chosen wide enough):", + "source": "The given distributions are truncated at the bounds defined in the parameter table:", "id": "b1a8b17d765db826" }, { "metadata": {}, "cell_type": "code", "source": [ - "plot(Prior(NORMAL, (0, 1), bounds=(-4, 4))) # negligible clipping-bias at 4 sigma\n", - "plot(Prior(UNIFORM, (0, 1), bounds=(0.1, 0.9))) # significant clipping-bias" + "plot(Prior(NORMAL, (0, 1), bounds=(-2, 2)))\n", + "plot(Prior(UNIFORM, (0, 1), bounds=(0.1, 0.9)))\n", + "plot(Prior(UNIFORM, (1e-8, 1), bounds=(0.1, 0.9), transformation=LOG10))\n", + "plot(Prior(LAPLACE, (0, 1), bounds=(-0.5, 0.5)))\n", + "plot(Prior(PARAMETER_SCALE_UNIFORM, (-3, 1), bounds=(1e-2, 1), transformation=LOG10))\n" ], "id": "4ac42b1eed759bdd", "outputs": [], @@ -175,9 +183,11 @@ "metadata": {}, "cell_type": "code", "source": [ - "plot(Prior(NORMAL, (10, 1), bounds=(6, 14), transformation=\"log10\"))\n", - "plot(Prior(PARAMETER_SCALE_NORMAL, (10, 1), bounds=(10**6, 10**14), transformation=\"log10\"))\n", - "plot(Prior(LAPLACE, (10, 2), bounds=(6, 14)))" + "plot(Prior(NORMAL, (10, 1), bounds=(6, 11), transformation=\"log10\"))\n", + "plot(Prior(PARAMETER_SCALE_NORMAL, (10, 1), bounds=(10**9, 10**14), transformation=\"log10\"))\n", + "plot(Prior(LAPLACE, (10, 2), bounds=(6, 14)))\n", + "plot(Prior(LOG_LAPLACE, (1, 0.5), bounds=(0.5, 8)))\n", + "plot(Prior(LOG_NORMAL, (2, 1), bounds=(0.5, 8)))" ], "id": "581e1ac431860419", "outputs": [], diff --git a/petab/v1/distributions.py b/petab/v1/distributions.py index 418f5b44..e0e5c279 100644 --- a/petab/v1/distributions.py +++ b/petab/v1/distributions.py @@ -28,15 +28,65 @@ class Distribution(abc.ABC): If a float, the distribution is transformed to its corresponding log distribution with the given base (e.g., Normal -> Log10Normal). If ``False``, no transformation is applied. + :param trunc: The truncation points (lower, upper) of the distribution + or ``None`` if the distribution is not truncated. """ - def __init__(self, log: bool | float = False): + def __init__( + self, *, log: bool | float = False, trunc: tuple[float, float] = None + ): if log is True: log = np.exp(1) + + if trunc == (-np.inf, np.inf): + trunc = None + + if trunc is not None and trunc[0] > trunc[1]: + raise ValueError( + "The lower truncation limit must be smaller " + "than the upper truncation limit." + ) + self._logbase = log + self._trunc = trunc + + self._cd_low = None + self._cd_high = None + self._truncation_normalizer = 1 + + if self._trunc is not None: + try: + # the cumulative density of the transformed distribution at the + # truncation limits + self._cd_low = self._cdf_transformed_untruncated( + self.trunc_low + ) + self._cd_high = self._cdf_transformed_untruncated( + self.trunc_high + ) + # normalization factor for the PDF of the transformed + # distribution to account for truncation + self._truncation_normalizer = 1 / ( + self._cd_high - self._cd_low + ) + except NotImplementedError: + pass + + @property + def trunc_low(self) -> float: + """The lower truncation limit of the transformed distribution.""" + return self._trunc[0] if self._trunc else -np.inf + + @property + def trunc_high(self) -> float: + """The upper truncation limit of the transformed distribution.""" + return self._trunc[1] if self._trunc else np.inf - def _undo_log(self, x: np.ndarray | float) -> np.ndarray | float: - """Undo the log transformation. + def _exp(self, x: np.ndarray | float) -> np.ndarray | float: + """Exponentiate / undo the log transformation according. + + Exponentiate if a log transformation is applied to the distribution. + Otherwise, return the input. :param x: The sample to transform. :return: The transformed sample @@ -45,9 +95,12 @@ def _undo_log(self, x: np.ndarray | float) -> np.ndarray | float: return x return self._logbase**x - def _apply_log(self, x: np.ndarray | float) -> np.ndarray | float: + def _log(self, x: np.ndarray | float) -> np.ndarray | float: """Apply the log transformation. + Compute the log of x with the specified base if a log transformation + is applied to the distribution. Otherwise, return the input. + :param x: The value to transform. :return: The transformed value. """ @@ -61,12 +114,17 @@ def sample(self, shape=None) -> np.ndarray: :param shape: The shape of the sample. :return: A sample from the distribution. """ - sample = self._sample(shape) - return self._undo_log(sample) + sample = ( + self._exp(self._sample(shape)) + if self._trunc is None + else self._inverse_transform_sample(shape) + ) + + return sample @abc.abstractmethod def _sample(self, shape=None) -> np.ndarray: - """Sample from the underlying distribution. + """Sample from the underlying distribution, accounting for truncation. :param shape: The shape of the sample. :return: A sample from the underlying distribution, @@ -85,7 +143,11 @@ def pdf(self, x): chain_rule_factor = ( (1 / (x * np.log(self._logbase))) if self._logbase else 1 ) - return self._pdf(self._apply_log(x)) * chain_rule_factor + return ( + self._pdf(self._log(x)) + * chain_rule_factor + * self._truncation_normalizer + ) @abc.abstractmethod def _pdf(self, x): @@ -104,13 +166,71 @@ def logbase(self) -> bool | float: """ return self._logbase + def cdf(self, x): + """Cumulative distribution function at x. + + :param x: The value at which to evaluate the CDF. + :return: The value of the CDF at ``x``. + """ + return self._cdf_transformed_untruncated(x) - self._cd_low + + def _cdf_transformed_untruncated(self, x): + """Cumulative distribution function of the transformed, but untruncated + distribution at x. + + :param x: The value at which to evaluate the CDF. + :return: The value of the CDF at ``x``. + """ + return self._cdf_untransformed_untruncated(self._log(x)) + + def _cdf_untransformed_untruncated(self, x): + """Cumulative distribution function of the underlying + (untransformed, untruncated) distribution at x. + + :param x: The value at which to evaluate the CDF. + :return: The value of the CDF at ``x``. + """ + raise NotImplementedError + + def _ppf_untransformed_untruncated(self, q): + """Percent point function of the underlying + (untransformed, untruncated) distribution at q. + + :param q: The quantile at which to evaluate the PPF. + :return: The value of the PPF at ``q``. + """ + raise NotImplementedError + + def _ppf_transformed_untruncated(self, q): + """Percent point function of the transformed, but untruncated + distribution at q. + + :param q: The quantile at which to evaluate the PPF. + :return: The value of the PPF at ``q``. + """ + return self._exp(self._ppf_untransformed_untruncated(q)) + + def _inverse_transform_sample(self, shape): + """Generate an inverse transform sample from the transformed and + truncated distribution. + + :param shape: The shape of the sample. + :return: The sample. + """ + uniform_sample = np.random.uniform( + low=self._cd_low, high=self._cd_high, size=shape + ) + return self._ppf_transformed_untruncated(uniform_sample) + class Normal(Distribution): """A (log-)normal distribution. :param loc: The location parameter of the distribution. :param scale: The scale parameter of the distribution. - :param truncation: The truncation limits of the distribution. + :param trunc: The truncation limits of the distribution. + ``None`` if the distribution is not truncated. The truncation limits + are the truncation limits of the transformed distribution. :param log: If ``True``, the distribution is transformed to a log-normal distribution. If a float, the distribution is transformed to a log-normal distribution with the given base. @@ -124,19 +244,15 @@ def __init__( self, loc: float, scale: float, - truncation: tuple[float, float] | None = None, + trunc: tuple[float, float] | None = None, log: bool | float = False, ): - super().__init__(log=log) self._loc = loc self._scale = scale - self._truncation = truncation - - if truncation is not None: - raise NotImplementedError("Truncation is not yet implemented.") + super().__init__(log=log, trunc=trunc) def __repr__(self): - trunc = f", truncation={self._truncation}" if self._truncation else "" + trunc = f", trunc={self._trunc}" if self._trunc else "" log = f", log={self._logbase}" if self._logbase else "" return f"Normal(loc={self._loc}, scale={self._scale}{trunc}{log})" @@ -146,6 +262,12 @@ def _sample(self, shape=None): def _pdf(self, x): return norm.pdf(x, loc=self._loc, scale=self._scale) + def _cdf_untransformed_untruncated(self, x): + return norm.cdf(x, loc=self._loc, scale=self._scale) + + def _ppf_untransformed_untruncated(self, q): + return norm.ppf(q, loc=self._loc, scale=self._scale) + @property def loc(self): """The location parameter of the underlying distribution.""" @@ -177,9 +299,9 @@ def __init__( *, log: bool | float = False, ): - super().__init__(log=log) self._low = low self._high = high + super().__init__(log=log) def __repr__(self): log = f", log={self._logbase}" if self._logbase else "" @@ -191,13 +313,21 @@ def _sample(self, shape=None): def _pdf(self, x): return uniform.pdf(x, loc=self._low, scale=self._high - self._low) + def _cdf_untransformed_untruncated(self, x): + return uniform.cdf(x, loc=self._low, scale=self._high - self._low) + + def _ppf_untransformed_untruncated(self, q): + return uniform.ppf(q, loc=self._low, scale=self._high - self._low) + class Laplace(Distribution): """A (log-)Laplace distribution. :param loc: The location parameter of the distribution. :param scale: The scale parameter of the distribution. - :param truncation: The truncation limits of the distribution. + :param trunc: The truncation limits of the distribution. + ``None`` if the distribution is not truncated. The truncation limits + are the truncation limits of the transformed distribution. :param log: If ``True``, the distribution is transformed to a log-Laplace distribution. If a float, the distribution is transformed to a log-Laplace distribution with the given base. @@ -211,18 +341,15 @@ def __init__( self, loc: float, scale: float, - truncation: tuple[float, float] | None = None, + trunc: tuple[float, float] | None = None, log: bool | float = False, ): - super().__init__(log=log) self._loc = loc self._scale = scale - self._truncation = truncation - if truncation is not None: - raise NotImplementedError("Truncation is not yet implemented.") + super().__init__(log=log, trunc=trunc) def __repr__(self): - trunc = f", truncation={self._truncation}" if self._truncation else "" + trunc = f", trunc={self._trunc}" if self._trunc else "" log = f", log={self._logbase}" if self._logbase else "" return f"Laplace(loc={self._loc}, scale={self._scale}{trunc}{log})" @@ -232,6 +359,12 @@ def _sample(self, shape=None): def _pdf(self, x): return laplace.pdf(x, loc=self._loc, scale=self._scale) + def _cdf_untransformed_untruncated(self, x): + return laplace.cdf(x, loc=self._loc, scale=self._scale) + + def _ppf_untransformed_untruncated(self, q): + return laplace.ppf(q, loc=self._loc, scale=self._scale) + @property def loc(self): """The location parameter of the underlying distribution.""" diff --git a/petab/v1/priors.py b/petab/v1/priors.py index f0f37f75..8f29bed3 100644 --- a/petab/v1/priors.py +++ b/petab/v1/priors.py @@ -40,6 +40,9 @@ __all__ = ["priors_to_measurements"] +# TODO: does anybody really rely on the old behavior? +USE_PROPER_TRUNCATION = True + class Prior: """A PEtab parameter prior. @@ -88,26 +91,49 @@ def __init__( self._bounds = bounds self._transformation = transformation + truncation = bounds if USE_PROPER_TRUNCATION else None + if truncation is not None: + # for uniform, we don't want to implement truncation and just + # adapt the distribution parameters + if type_ == C.PARAMETER_SCALE_UNIFORM: + parameters = ( + max(parameters[0], scale(truncation[0], transformation)), + min(parameters[1], scale(truncation[1], transformation)), + ) + elif type_ == C.UNIFORM: + parameters = ( + max(parameters[0], truncation[0]), + min(parameters[1], truncation[1]), + ) + # create the underlying distribution match type_, transformation: case (C.UNIFORM, _) | (C.PARAMETER_SCALE_UNIFORM, C.LIN): self.distribution = Uniform(*parameters) case (C.NORMAL, _) | (C.PARAMETER_SCALE_NORMAL, C.LIN): - self.distribution = Normal(*parameters) + self.distribution = Normal(*parameters, trunc=truncation) case (C.LAPLACE, _) | (C.PARAMETER_SCALE_LAPLACE, C.LIN): - self.distribution = Laplace(*parameters) + self.distribution = Laplace(*parameters, trunc=truncation) case (C.PARAMETER_SCALE_UNIFORM, C.LOG): self.distribution = Uniform(*parameters, log=True) case (C.LOG_NORMAL, _) | (C.PARAMETER_SCALE_NORMAL, C.LOG): - self.distribution = Normal(*parameters, log=True) + self.distribution = Normal( + *parameters, log=True, trunc=truncation + ) case (C.LOG_LAPLACE, _) | (C.PARAMETER_SCALE_LAPLACE, C.LOG): - self.distribution = Laplace(*parameters, log=True) + self.distribution = Laplace( + *parameters, log=True, trunc=truncation + ) case (C.PARAMETER_SCALE_UNIFORM, C.LOG10): self.distribution = Uniform(*parameters, log=10) case (C.PARAMETER_SCALE_NORMAL, C.LOG10): - self.distribution = Normal(*parameters, log=10) + self.distribution = Normal( + *parameters, log=10, trunc=truncation + ) case (C.PARAMETER_SCALE_LAPLACE, C.LOG10): - self.distribution = Laplace(*parameters, log=10) + self.distribution = Laplace( + *parameters, log=10, trunc=truncation + ) case _: raise ValueError( "Unsupported distribution type / transformation: " @@ -149,9 +175,8 @@ def sample(self, shape=None) -> np.ndarray: def _scale_sample(self, sample): """Scale the sample to the parameter space""" - # if self.on_parameter_scale: - # return sample - + # we also need to scale paramterScale* distributions, because + # internally, they are handled as (unscaled) log-distributions return scale(sample, self.transformation) def _clip_to_bounds(self, x): @@ -159,8 +184,7 @@ def _clip_to_bounds(self, x): :param x: The values to clip. Assumed to be on the parameter scale. """ - # TODO: replace this by proper truncation - if self.bounds is None: + if self.bounds is None or USE_PROPER_TRUNCATION: return x return np.maximum( diff --git a/tests/v1/test_distributions.py b/tests/v1/test_distributions.py index 9df830fa..1f2ecf59 100644 --- a/tests/v1/test_distributions.py +++ b/tests/v1/test_distributions.py @@ -27,6 +27,11 @@ Uniform(2, 4, log=10), Laplace(1, 2), Laplace(1, 0.5, log=True), + Normal(2, 1, trunc=(1, 2)), + Normal(2, 1, log=True, trunc=(0.5, 8)), + Normal(2, 1, log=10), + Laplace(1, 2, trunc=(1, 2)), + Laplace(1, 0.5, log=True, trunc=(0.5, 8)), ], ) def test_sample_matches_pdf(distribution): @@ -53,34 +58,37 @@ def cdf(x): # Test samples match scipy CDFs reference_pdf = None - if isinstance(distribution, Normal) and distribution.logbase is False: - reference_pdf = norm.pdf(sample, distribution.loc, distribution.scale) - elif isinstance(distribution, Uniform) and distribution.logbase is False: - reference_pdf = uniform.pdf( - sample, distribution._low, distribution._high - distribution._low - ) - elif isinstance(distribution, Laplace) and distribution.logbase is False: - reference_pdf = laplace.pdf( - sample, distribution.loc, distribution.scale - ) - elif isinstance(distribution, Normal) and distribution.logbase == np.exp( - 1 - ): - reference_pdf = lognorm.pdf( - sample, scale=np.exp(distribution.loc), s=distribution.scale - ) - elif isinstance(distribution, Uniform) and distribution.logbase == np.exp( - 1 - ): - reference_pdf = loguniform.pdf( - sample, np.exp(distribution._low), np.exp(distribution._high) - ) - elif isinstance(distribution, Laplace) and distribution.logbase == np.exp( - 1 - ): - reference_pdf = loglaplace.pdf( - sample, c=1 / distribution.scale, scale=np.exp(distribution.loc) - ) + if distribution._trunc is None and distribution.logbase is False: + if isinstance(distribution, Normal): + reference_pdf = norm.pdf( + sample, distribution.loc, distribution.scale + ) + elif isinstance(distribution, Uniform): + reference_pdf = uniform.pdf( + sample, + distribution._low, + distribution._high - distribution._low, + ) + elif isinstance(distribution, Laplace): + reference_pdf = laplace.pdf( + sample, distribution.loc, distribution.scale + ) + + if distribution._trunc is None and distribution.logbase == np.exp(1): + if isinstance(distribution, Normal): + reference_pdf = lognorm.pdf( + sample, scale=np.exp(distribution.loc), s=distribution.scale + ) + elif isinstance(distribution, Uniform): + reference_pdf = loguniform.pdf( + sample, np.exp(distribution._low), np.exp(distribution._high) + ) + elif isinstance(distribution, Laplace): + reference_pdf = loglaplace.pdf( + sample, + c=1 / distribution.scale, + scale=np.exp(distribution.loc), + ) if reference_pdf is not None: assert_allclose( distribution.pdf(sample), reference_pdf, rtol=1e-10, atol=1e-14 From 65ef80fe5e07797556b436e669613f733b5c26d1 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Wed, 11 Dec 2024 18:56:44 +0100 Subject: [PATCH 02/11] optional --- petab/v1/priors.py | 25 +++++++++++++++++++------ petab/v1/sampling.py | 10 ++++++++-- tests/v1/test_priors.py | 4 +++- 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/petab/v1/priors.py b/petab/v1/priors.py index 8f29bed3..71dfa98a 100644 --- a/petab/v1/priors.py +++ b/petab/v1/priors.py @@ -40,9 +40,6 @@ __all__ = ["priors_to_measurements"] -# TODO: does anybody really rely on the old behavior? -USE_PROPER_TRUNCATION = True - class Prior: """A PEtab parameter prior. @@ -61,6 +58,15 @@ class Prior: on the `parameter_scale` scale). :param bounds: The untransformed bounds of the sample (lower, upper). :param transformation: The transformation of the distribution. + :param bounds_truncate: Whether the generated prior will be truncated + at the bounds. + If ``True``, the probability density will be rescaled + accordingly and the sample is generated from the truncated + distribution. + If ``False``, the probability density will not account for the + bounds, but any parameter samples outside the bounds will be set to + the value of the closest bound. In this case, the PDF might not match + the sample. """ def __init__( @@ -69,6 +75,7 @@ def __init__( parameters: tuple, bounds: tuple = None, transformation: str = C.LIN, + bounds_truncate: bool = True, ): if transformation not in C.PARAMETER_SCALES: raise ValueError( @@ -90,8 +97,9 @@ def __init__( self._parameters = parameters self._bounds = bounds self._transformation = transformation + self._bounds_truncate = bounds_truncate - truncation = bounds if USE_PROPER_TRUNCATION else None + truncation = bounds if bounds_truncate else None if truncation is not None: # for uniform, we don't want to implement truncation and just # adapt the distribution parameters @@ -184,7 +192,7 @@ def _clip_to_bounds(self, x): :param x: The values to clip. Assumed to be on the parameter scale. """ - if self.bounds is None or USE_PROPER_TRUNCATION: + if self.bounds is None or self._bounds_truncate: return x return np.maximum( @@ -235,12 +243,16 @@ def neglogprior(self, x): @staticmethod def from_par_dict( - d, type_=Literal["initialization", "objective"] + d, + type_=Literal["initialization", "objective"], + bounds_truncate: bool = True, ) -> Prior: """Create a distribution from a row of the parameter table. :param d: A dictionary representing a row of the parameter table. :param type_: The type of the distribution. + :param bounds_truncate: Whether the generated prior will be truncated + at the bounds. :return: A distribution object. """ dist_type = d.get(f"{type_}PriorType", C.PARAMETER_SCALE_UNIFORM) @@ -268,6 +280,7 @@ def from_par_dict( parameters=params, bounds=(d[C.LOWER_BOUND], d[C.UPPER_BOUND]), transformation=pscale, + bounds_truncate=bounds_truncate, ) diff --git a/petab/v1/sampling.py b/petab/v1/sampling.py index a046879f..3849297b 100644 --- a/petab/v1/sampling.py +++ b/petab/v1/sampling.py @@ -28,7 +28,11 @@ def sample_from_prior( # unpack info p_type, p_params, scaling, bounds = prior prior = Prior( - p_type, tuple(p_params), bounds=tuple(bounds), transformation=scaling + p_type, + tuple(p_params), + bounds=tuple(bounds), + transformation=scaling, + bounds_truncate=True, ) return prior.sample(shape=(n_starts,)) @@ -74,7 +78,9 @@ def sample_parameter_startpoints( # get types and parameters of priors from dataframe return np.array( [ - Prior.from_par_dict(row, type_="initialization").sample(n_starts) + Prior.from_par_dict( + row, type_="initialization", bounds_truncate=True + ).sample(n_starts) for row in par_to_estimate.to_dict("records") ] ).T diff --git a/tests/v1/test_priors.py b/tests/v1/test_priors.py index d98879e3..70d3df90 100644 --- a/tests/v1/test_priors.py +++ b/tests/v1/test_priors.py @@ -156,7 +156,9 @@ def apply_parameter_values(row): ] priors = [ Prior.from_par_dict( - petab_problem_priors.parameter_df.loc[par_id], type_="objective" + petab_problem_priors.parameter_df.loc[par_id], + type_="objective", + bounds_truncate=False, ) for par_id in parameter_ids ] From c01f2fb97b54dcc9f63c0f2c9add7e13fa64eb51 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Wed, 11 Dec 2024 21:40:55 +0100 Subject: [PATCH 03/11] fix cdf normalization --- petab/v1/distributions.py | 8 ++++++-- tests/v1/test_distributions.py | 17 +++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/petab/v1/distributions.py b/petab/v1/distributions.py index e0e5c279..1f5fa24d 100644 --- a/petab/v1/distributions.py +++ b/petab/v1/distributions.py @@ -64,7 +64,7 @@ def __init__( self._cd_high = self._cdf_transformed_untruncated( self.trunc_high ) - # normalization factor for the PDF of the transformed + # normalization factor for the PDF/CDF of the transformed # distribution to account for truncation self._truncation_normalizer = 1 / ( self._cd_high - self._cd_low @@ -172,7 +172,11 @@ def cdf(self, x): :param x: The value at which to evaluate the CDF. :return: The value of the CDF at ``x``. """ - return self._cdf_transformed_untruncated(x) - self._cd_low + if self._trunc is None: + return self._cdf_transformed_untruncated(x) + return ( + self._cdf_transformed_untruncated(x) - self._cd_low + ) * self._truncation_normalizer def _cdf_transformed_untruncated(self, x): """Cumulative distribution function of the transformed, but untruncated diff --git a/tests/v1/test_distributions.py b/tests/v1/test_distributions.py index 1f2ecf59..e06d9edc 100644 --- a/tests/v1/test_distributions.py +++ b/tests/v1/test_distributions.py @@ -1,3 +1,5 @@ +import sys + import numpy as np import pytest from numpy.testing import assert_allclose @@ -56,6 +58,21 @@ def cdf(x): assert p > 0.05, (p, distribution) + # check min/max of CDF at the bounds + assert np.isclose( + distribution.cdf( + distribution.trunc_low + if not distribution.logbase + else max(sys.float_info.min, distribution.trunc_low) + ), + 0, + atol=1e-16, + rtol=0, + ) + assert np.isclose( + distribution.cdf(distribution.trunc_high), 1, atol=1e-14, rtol=0 + ) + # Test samples match scipy CDFs reference_pdf = None if distribution._trunc is None and distribution.logbase is False: From d3b4e7ff64a8e9cf63e43f2112ac3409a9846d5d Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Wed, 11 Dec 2024 21:57:12 +0100 Subject: [PATCH 04/11] review --- petab/v1/distributions.py | 62 +++++++++++++++++++-------------------- petab/v1/priors.py | 20 ++++++------- 2 files changed, 41 insertions(+), 41 deletions(-) diff --git a/petab/v1/distributions.py b/petab/v1/distributions.py index 1f5fa24d..8240b180 100644 --- a/petab/v1/distributions.py +++ b/petab/v1/distributions.py @@ -41,7 +41,7 @@ def __init__( if trunc == (-np.inf, np.inf): trunc = None - if trunc is not None and trunc[0] > trunc[1]: + if trunc is not None and trunc[0] >= trunc[1]: raise ValueError( "The lower truncation limit must be smaller " "than the upper truncation limit." @@ -83,7 +83,7 @@ def trunc_high(self) -> float: return self._trunc[1] if self._trunc else np.inf def _exp(self, x: np.ndarray | float) -> np.ndarray | float: - """Exponentiate / undo the log transformation according. + """Exponentiate / undo the log transformation if applicable. Exponentiate if a log transformation is applied to the distribution. Otherwise, return the input. @@ -96,7 +96,7 @@ def _exp(self, x: np.ndarray | float) -> np.ndarray | float: return self._logbase**x def _log(self, x: np.ndarray | float) -> np.ndarray | float: - """Apply the log transformation. + """Apply the log transformation if enabled. Compute the log of x with the specified base if a log transformation is applied to the distribution. Otherwise, return the input. @@ -108,7 +108,7 @@ def _log(self, x: np.ndarray | float) -> np.ndarray | float: return x return np.log(x) / np.log(self._logbase) - def sample(self, shape=None) -> np.ndarray: + def sample(self, shape=None) -> np.ndarray | float: """Sample from the distribution. :param shape: The shape of the sample. @@ -123,16 +123,16 @@ def sample(self, shape=None) -> np.ndarray: return sample @abc.abstractmethod - def _sample(self, shape=None) -> np.ndarray: - """Sample from the underlying distribution, accounting for truncation. + def _sample(self, shape=None) -> np.ndarray | float: + """Sample from the underlying distribution. :param shape: The shape of the sample. :return: A sample from the underlying distribution, - before applying, e.g., the log transformation. + before applying, e.g., the log transformation or truncation. """ ... - def pdf(self, x): + def pdf(self, x) -> np.ndarray | float: """Probability density function at x. :param x: The value at which to evaluate the PDF. @@ -150,7 +150,7 @@ def pdf(self, x): ) @abc.abstractmethod - def _pdf(self, x): + def _pdf(self, x) -> np.ndarray | float: """Probability density function of the underlying distribution at x. :param x: The value at which to evaluate the PDF. @@ -166,7 +166,7 @@ def logbase(self) -> bool | float: """ return self._logbase - def cdf(self, x): + def cdf(self, x) -> np.ndarray | float: """Cumulative distribution function at x. :param x: The value at which to evaluate the CDF. @@ -178,7 +178,7 @@ def cdf(self, x): self._cdf_transformed_untruncated(x) - self._cd_low ) * self._truncation_normalizer - def _cdf_transformed_untruncated(self, x): + def _cdf_transformed_untruncated(self, x) -> np.ndarray | float: """Cumulative distribution function of the transformed, but untruncated distribution at x. @@ -187,7 +187,7 @@ def _cdf_transformed_untruncated(self, x): """ return self._cdf_untransformed_untruncated(self._log(x)) - def _cdf_untransformed_untruncated(self, x): + def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: """Cumulative distribution function of the underlying (untransformed, untruncated) distribution at x. @@ -196,7 +196,7 @@ def _cdf_untransformed_untruncated(self, x): """ raise NotImplementedError - def _ppf_untransformed_untruncated(self, q): + def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: """Percent point function of the underlying (untransformed, untruncated) distribution at q. @@ -205,7 +205,7 @@ def _ppf_untransformed_untruncated(self, q): """ raise NotImplementedError - def _ppf_transformed_untruncated(self, q): + def _ppf_transformed_untruncated(self, q) -> np.ndarray | float: """Percent point function of the transformed, but untruncated distribution at q. @@ -214,7 +214,7 @@ def _ppf_transformed_untruncated(self, q): """ return self._exp(self._ppf_untransformed_untruncated(q)) - def _inverse_transform_sample(self, shape): + def _inverse_transform_sample(self, shape) -> np.ndarray | float: """Generate an inverse transform sample from the transformed and truncated distribution. @@ -260,25 +260,25 @@ def __repr__(self): log = f", log={self._logbase}" if self._logbase else "" return f"Normal(loc={self._loc}, scale={self._scale}{trunc}{log})" - def _sample(self, shape=None): + def _sample(self, shape=None) -> np.ndarray | float: return np.random.normal(loc=self._loc, scale=self._scale, size=shape) - def _pdf(self, x): + def _pdf(self, x) -> np.ndarray | float: return norm.pdf(x, loc=self._loc, scale=self._scale) - def _cdf_untransformed_untruncated(self, x): + def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: return norm.cdf(x, loc=self._loc, scale=self._scale) - def _ppf_untransformed_untruncated(self, q): + def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: return norm.ppf(q, loc=self._loc, scale=self._scale) @property - def loc(self): + def loc(self) -> float: """The location parameter of the underlying distribution.""" return self._loc @property - def scale(self): + def scale(self) -> float: """The scale parameter of the underlying distribution.""" return self._scale @@ -311,16 +311,16 @@ def __repr__(self): log = f", log={self._logbase}" if self._logbase else "" return f"Uniform(low={self._low}, high={self._high}{log})" - def _sample(self, shape=None): + def _sample(self, shape=None) -> np.ndarray | float: return np.random.uniform(low=self._low, high=self._high, size=shape) - def _pdf(self, x): + def _pdf(self, x) -> np.ndarray | float: return uniform.pdf(x, loc=self._low, scale=self._high - self._low) - def _cdf_untransformed_untruncated(self, x): + def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: return uniform.cdf(x, loc=self._low, scale=self._high - self._low) - def _ppf_untransformed_untruncated(self, q): + def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: return uniform.ppf(q, loc=self._low, scale=self._high - self._low) @@ -357,24 +357,24 @@ def __repr__(self): log = f", log={self._logbase}" if self._logbase else "" return f"Laplace(loc={self._loc}, scale={self._scale}{trunc}{log})" - def _sample(self, shape=None): + def _sample(self, shape=None) -> np.ndarray | float: return np.random.laplace(loc=self._loc, scale=self._scale, size=shape) - def _pdf(self, x): + def _pdf(self, x) -> np.ndarray | float: return laplace.pdf(x, loc=self._loc, scale=self._scale) - def _cdf_untransformed_untruncated(self, x): + def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: return laplace.cdf(x, loc=self._loc, scale=self._scale) - def _ppf_untransformed_untruncated(self, q): + def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: return laplace.ppf(q, loc=self._loc, scale=self._scale) @property - def loc(self): + def loc(self) -> float: """The location parameter of the underlying distribution.""" return self._loc @property - def scale(self): + def scale(self) -> float: """The scale parameter of the underlying distribution.""" return self._scale diff --git a/petab/v1/priors.py b/petab/v1/priors.py index 71dfa98a..a6031521 100644 --- a/petab/v1/priors.py +++ b/petab/v1/priors.py @@ -157,19 +157,19 @@ def __repr__(self): ) @property - def type(self): + def type(self) -> str: return self._type @property - def parameters(self): + def parameters(self) -> tuple: return self._parameters @property - def bounds(self): + def bounds(self) -> tuple[float, float] | None: return self._bounds @property - def transformation(self): + def transformation(self) -> str: return self._transformation def sample(self, shape=None) -> np.ndarray: @@ -183,11 +183,11 @@ def sample(self, shape=None) -> np.ndarray: def _scale_sample(self, sample): """Scale the sample to the parameter space""" - # we also need to scale paramterScale* distributions, because + # we also need to scale parameterScale* distributions, because # internally, they are handled as (unscaled) log-distributions return scale(sample, self.transformation) - def _clip_to_bounds(self, x): + def _clip_to_bounds(self, x) -> np.ndarray | float: """Clip `x` values to bounds. :param x: The values to clip. Assumed to be on the parameter scale. @@ -201,16 +201,16 @@ def _clip_to_bounds(self, x): ) @property - def lb_scaled(self): + def lb_scaled(self) -> float: """The lower bound on the parameter scale.""" return scale(self.bounds[0], self.transformation) @property - def ub_scaled(self): + def ub_scaled(self) -> float: """The upper bound on the parameter scale.""" return scale(self.bounds[1], self.transformation) - def pdf(self, x): + def pdf(self, x) -> np.ndarray | float: """Probability density function at x. :param x: The value at which to evaluate the PDF. @@ -232,7 +232,7 @@ def pdf(self, x): return self.distribution.pdf(x) * coeff - def neglogprior(self, x): + def neglogprior(self, x) -> np.ndarray | float: """Negative log-prior at x. :param x: The value at which to evaluate the negative log-prior. From 1425d9c5c23df5799a48a42080d364bd2a84258e Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Wed, 11 Dec 2024 23:36:18 +0100 Subject: [PATCH 05/11] fix cdf/pdf outside bounds / <0 --- doc/example/distributions.ipynb | 8 ++--- petab/v1/distributions.py | 56 ++++++++++++++++++++++++--------- 2 files changed, 46 insertions(+), 18 deletions(-) diff --git a/doc/example/distributions.ipynb b/doc/example/distributions.ipynb index 06c08272..d083d2a7 100644 --- a/doc/example/distributions.ipynb +++ b/doc/example/distributions.ipynb @@ -23,10 +23,7 @@ }, { "metadata": { - "collapsed": true, - "jupyter": { - "is_executing": true - } + "collapsed": true }, "cell_type": "code", "source": [ @@ -50,6 +47,9 @@ " # pdf\n", " xmin = min(sample.min(), prior.lb_scaled if prior.bounds is not None else sample.min())\n", " xmax = max(sample.max(), prior.ub_scaled if prior.bounds is not None else sample.max())\n", + " padding = 0.1 * (xmax - xmin)\n", + " xmin -= padding\n", + " xmax += padding\n", " x = np.linspace(xmin, xmax, 500)\n", " y = prior.pdf(x)\n", " ax.plot(x, y, color='red', label='pdf')\n", diff --git a/petab/v1/distributions.py b/petab/v1/distributions.py index 8240b180..36641650 100644 --- a/petab/v1/distributions.py +++ b/petab/v1/distributions.py @@ -138,19 +138,17 @@ def pdf(self, x) -> np.ndarray | float: :param x: The value at which to evaluate the PDF. :return: The value of the PDF at ``x``. """ - # handle the log transformation; see also: - # https://en.wikipedia.org/wiki/Probability_density_function#Scalar_to_scalar - chain_rule_factor = ( - (1 / (x * np.log(self._logbase))) if self._logbase else 1 - ) - return ( - self._pdf(self._log(x)) - * chain_rule_factor - * self._truncation_normalizer + if self._trunc is None: + return self._pdf_transformed_untruncated(x) + + return np.where( + (x >= self.trunc_low) & (x <= self.trunc_high), + self._pdf_transformed_untruncated(x) * self._truncation_normalizer, + 0, ) @abc.abstractmethod - def _pdf(self, x) -> np.ndarray | float: + def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: """Probability density function of the underlying distribution at x. :param x: The value at which to evaluate the PDF. @@ -158,6 +156,30 @@ def _pdf(self, x) -> np.ndarray | float: """ ... + def _pdf_transformed_untruncated(self, x) -> np.ndarray | float: + """Probability density function of the transformed, but untruncated + distribution at x. + + :param x: The value at which to evaluate the PDF. + :return: The value of the PDF at ``x``. + """ + if self.logbase is False: + return self._pdf_untransformed_untruncated(x) + + # handle the log transformation; see also: + # https://en.wikipedia.org/wiki/Probability_density_function#Scalar_to_scalar + chain_rule_factor = ( + (1 / (x * np.log(self._logbase))) if self._logbase else 1 + ) + + with np.errstate(invalid="ignore"): + return np.where( + x > 0, + self._pdf_untransformed_untruncated(self._log(x)) + * chain_rule_factor, + 0, + ) + @property def logbase(self) -> bool | float: """The base of the log transformation. @@ -185,7 +207,13 @@ def _cdf_transformed_untruncated(self, x) -> np.ndarray | float: :param x: The value at which to evaluate the CDF. :return: The value of the CDF at ``x``. """ - return self._cdf_untransformed_untruncated(self._log(x)) + if not self.logbase: + return self._cdf_untransformed_untruncated(x) + + with np.errstate(invalid="ignore"): + return np.where( + x < 0, 0, self._cdf_untransformed_untruncated(self._log(x)) + ) def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: """Cumulative distribution function of the underlying @@ -263,7 +291,7 @@ def __repr__(self): def _sample(self, shape=None) -> np.ndarray | float: return np.random.normal(loc=self._loc, scale=self._scale, size=shape) - def _pdf(self, x) -> np.ndarray | float: + def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: return norm.pdf(x, loc=self._loc, scale=self._scale) def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: @@ -314,7 +342,7 @@ def __repr__(self): def _sample(self, shape=None) -> np.ndarray | float: return np.random.uniform(low=self._low, high=self._high, size=shape) - def _pdf(self, x) -> np.ndarray | float: + def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: return uniform.pdf(x, loc=self._low, scale=self._high - self._low) def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: @@ -360,7 +388,7 @@ def __repr__(self): def _sample(self, shape=None) -> np.ndarray | float: return np.random.laplace(loc=self._loc, scale=self._scale, size=shape) - def _pdf(self, x) -> np.ndarray | float: + def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: return laplace.pdf(x, loc=self._loc, scale=self._scale) def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: From 155853f0702f925ac5128a40214fdf2d460b8ecf Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 12 Dec 2024 00:09:29 +0100 Subject: [PATCH 06/11] Always sample correctly, but optionally use unscaled pdf for neglogprior --- petab/v1/priors.py | 50 ++++++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/petab/v1/priors.py b/petab/v1/priors.py index a6031521..8a2cf84e 100644 --- a/petab/v1/priors.py +++ b/petab/v1/priors.py @@ -63,10 +63,9 @@ class Prior: If ``True``, the probability density will be rescaled accordingly and the sample is generated from the truncated distribution. - If ``False``, the probability density will not account for the - bounds, but any parameter samples outside the bounds will be set to - the value of the closest bound. In this case, the PDF might not match - the sample. + If ``False``, the probability density will not be rescaled + accordingly, but the sample will be generated from the truncated + distribution. """ def __init__( @@ -99,7 +98,7 @@ def __init__( self._transformation = transformation self._bounds_truncate = bounds_truncate - truncation = bounds if bounds_truncate else None + truncation = bounds if truncation is not None: # for uniform, we don't want to implement truncation and just # adapt the distribution parameters @@ -179,7 +178,7 @@ def sample(self, shape=None) -> np.ndarray: :return: A sample from the distribution. """ raw_sample = self.distribution.sample(shape) - return self._clip_to_bounds(self._scale_sample(raw_sample)) + return self._scale_sample(raw_sample) def _scale_sample(self, sample): """Scale the sample to the parameter space""" @@ -187,19 +186,6 @@ def _scale_sample(self, sample): # internally, they are handled as (unscaled) log-distributions return scale(sample, self.transformation) - def _clip_to_bounds(self, x) -> np.ndarray | float: - """Clip `x` values to bounds. - - :param x: The values to clip. Assumed to be on the parameter scale. - """ - if self.bounds is None or self._bounds_truncate: - return x - - return np.maximum( - np.minimum(self.ub_scaled, x), - self.lb_scaled, - ) - @property def lb_scaled(self) -> float: """The lower bound on the parameter scale.""" @@ -215,8 +201,8 @@ def pdf(self, x) -> np.ndarray | float: :param x: The value at which to evaluate the PDF. ``x`` is assumed to be on the parameter scale. - :return: The value of the PDF at ``x``. Note that the PDF does - currently not account for the clipping at the bounds. + :return: The value of the PDF at ``x``. ``x`` is assumed to be on the + parameter scale. """ x = unscale(x, self.transformation) @@ -239,7 +225,27 @@ def neglogprior(self, x) -> np.ndarray | float: ``x`` is assumed to be on the parameter scale. :return: The negative log-prior at ``x``. """ - return -np.log(self.pdf(x)) + # FIXME: the prior is always defined on linear scale + if self._bounds_truncate: + # the truncation is handled by the distribution + return -np.log(self.pdf(x)) + + # we want to evaluate the prior on the untruncated distribution + x = unscale(x, self.transformation) + + # scale the PDF to the parameter scale + if self.transformation == C.LIN: + coeff = 1 + elif self.transformation == C.LOG10: + coeff = x * np.log(10) + elif self.transformation == C.LOG: + coeff = x + else: + raise ValueError(f"Unknown transformation: {self.transformation}") + + return -np.log( + self.distribution._pdf_transformed_untruncated(x) * coeff + ) @staticmethod def from_par_dict( From 2484a7f8d8038c7e260bb2c6e547daa2c50591d1 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 12 Dec 2024 13:26:08 +0100 Subject: [PATCH 07/11] prior always on linear --- doc/example/distributions.ipynb | 71 +++++++++++++++------ petab/v1/C.py | 7 +++ petab/v1/distributions.py | 21 +++++-- petab/v1/priors.py | 89 +++++++++++++++++---------- petab/v1/sampling.py | 2 +- tests/v1/test_priors.py | 106 ++++++++++++++++++++++++++------ 6 files changed, 219 insertions(+), 77 deletions(-) diff --git a/doc/example/distributions.ipynb b/doc/example/distributions.ipynb index d083d2a7..fd251213 100644 --- a/doc/example/distributions.ipynb +++ b/doc/example/distributions.ipynb @@ -33,42 +33,73 @@ "\n", "from petab.v1.C import *\n", "from petab.v1.priors import Prior\n", + "from petab.v1.parameters import scale, unscale\n", + "\n", "\n", "sns.set_style(None)\n", "\n", "\n", - "def plot(prior: Prior, ax=None):\n", + "def plot(prior: Prior):\n", " \"\"\"Visualize a distribution.\"\"\"\n", + " fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))\n", + " sample = prior.sample(20_000, x_scaled=True)\n", + "\n", + " fig.suptitle(str(prior))\n", + "\n", + " plot_single(prior, ax=ax1, sample=sample, scaled=False)\n", + " plot_single(prior, ax=ax2, sample=sample, scaled=True)\n", + " plt.tight_layout()\n", + " plt.show()\n", + "\n", + "def plot_single(prior: Prior, scaled: bool = False, ax=None, sample: np.array = None):\n", + " fig = None\n", " if ax is None:\n", " fig, ax = plt.subplots()\n", "\n", - " sample = prior.sample(20_000)\n", + " if sample is None:\n", + " sample = prior.sample(20_000)\n", + "\n", + " # assuming scaled sample\n", + " if not scaled:\n", + " sample = unscale(sample, prior.transformation)\n", + " bounds = prior.bounds\n", + " else:\n", + " bounds = (prior.lb_scaled, prior.ub_scaled) if prior.bounds is not None else None\n", "\n", - " # pdf\n", - " xmin = min(sample.min(), prior.lb_scaled if prior.bounds is not None else sample.min())\n", - " xmax = max(sample.max(), prior.ub_scaled if prior.bounds is not None else sample.max())\n", + " # plot pdf\n", + " xmin = min(sample.min(), bounds[0] if prior.bounds is not None else sample.min())\n", + " xmax = max(sample.max(), bounds[1] if prior.bounds is not None else sample.max())\n", " padding = 0.1 * (xmax - xmin)\n", " xmin -= padding\n", " xmax += padding\n", " x = np.linspace(xmin, xmax, 500)\n", - " y = prior.pdf(x)\n", + " y = prior.pdf(x, x_scaled=scaled, rescale=scaled)\n", " ax.plot(x, y, color='red', label='pdf')\n", "\n", " sns.histplot(sample, stat='density', ax=ax, label=\"sample\")\n", "\n", - " # bounds\n", + " # plot bounds\n", " if prior.bounds is not None:\n", - " for bound in (prior.lb_scaled, prior.ub_scaled):\n", + " for bound in bounds:\n", " if bound is not None and np.isfinite(bound):\n", " ax.axvline(bound, color='black', linestyle='--', label='bound')\n", "\n", - " ax.set_title(str(prior))\n", - " ax.set_xlabel('Parameter value on the parameter scale')\n", + " if fig is not None:\n", + " ax.set_title(str(prior))\n", + "\n", + " if scaled:\n", + " ax.set_xlabel(f'Parameter value on parameter scale ({prior.transformation})')\n", + " ax.set_ylabel(\"Rescaled density\")\n", + " else:\n", + " ax.set_xlabel('Parameter value')\n", + "\n", " ax.grid(False)\n", " handles, labels = ax.get_legend_handles_labels()\n", " unique_labels = dict(zip(labels, handles))\n", " ax.legend(unique_labels.values(), unique_labels.keys())\n", - " plt.show()" + "\n", + " if ax is None:\n", + " plt.show()\n" ], "id": "initial_id", "outputs": [], @@ -84,11 +115,11 @@ "metadata": {}, "cell_type": "code", "source": [ - "plot(Prior(UNIFORM, (0, 1)))\n", - "plot(Prior(NORMAL, (0, 1)))\n", - "plot(Prior(LAPLACE, (0, 1)))\n", - "plot(Prior(LOG_NORMAL, (0, 1)))\n", - "plot(Prior(LOG_LAPLACE, (1, 0.5)))" + "plot_single(Prior(UNIFORM, (0, 1)))\n", + "plot_single(Prior(NORMAL, (0, 1)))\n", + "plot_single(Prior(LAPLACE, (0, 1)))\n", + "plot_single(Prior(LOG_NORMAL, (0, 1)))\n", + "plot_single(Prior(LOG_LAPLACE, (1, 0.5)))" ], "id": "4f09e50a3db06d9f", "outputs": [], @@ -97,7 +128,7 @@ { "metadata": {}, "cell_type": "markdown", - "source": "If a parameter scale is specified (`parameterScale=lin|log|log10` not a `parameterScale*`-type distribution), the sample is transformed accordingly (but not the distribution parameters):\n", + "source": "If a parameter scale is specified (`parameterScale=lin|log|log10`) and the chosen distribution is not a `parameterScale*`-type distribution, then the distribution parameters are taken as is, i.e., the `parameterScale` is not applied to the distribution parameters. In the context of PEtab prior distributions, `parameterScale` will only be used for the start point sampling for optimization, where the sample will be transformed accordingly. This is demonstrated below. The left plot always shows the prior distribution for unscaled parameter values, and the right plot shows the prior distribution for scaled parameter values. Note that in the objective function, the prior is always on the unscaled parameters.\n", "id": "dab4b2d1e0f312d8" }, { @@ -134,7 +165,7 @@ { "metadata": {}, "cell_type": "markdown", - "source": "Prior distributions can also be defined on the parameter scale by using the types `parameterScaleUniform`, `parameterScaleNormal` or `parameterScaleLaplace`. In these cases, 1) the distribution parameter are interpreted on the transformed parameter scale, and 2) a sample from the given distribution is used directly, without applying any transformation according to `parameterScale` (this implies, that for `parameterScale=lin`, there is no difference between `parameterScaleUniform` and `uniform`):", + "source": "Prior distributions can also be defined on the scaled parameters (i.e., transformed according to `parameterScale`) by using the types `parameterScaleUniform`, `parameterScaleNormal` or `parameterScaleLaplace`. In these cases, the distribution parameter are interpreted on the transformed parameter scale (but not the parameter bounds, see below). This implies, that for `parameterScale=lin`, there is no difference between `parameterScaleUniform` and `uniform`.", "id": "263c9fd31156a4d5" }, { @@ -167,7 +198,7 @@ "plot(Prior(UNIFORM, (0, 1), bounds=(0.1, 0.9)))\n", "plot(Prior(UNIFORM, (1e-8, 1), bounds=(0.1, 0.9), transformation=LOG10))\n", "plot(Prior(LAPLACE, (0, 1), bounds=(-0.5, 0.5)))\n", - "plot(Prior(PARAMETER_SCALE_UNIFORM, (-3, 1), bounds=(1e-2, 1), transformation=LOG10))\n" + "plot(Prior(PARAMETER_SCALE_UNIFORM, (-3, 1), bounds=(1e-2, 1), transformation=LOG10))" ], "id": "4ac42b1eed759bdd", "outputs": [], @@ -184,7 +215,7 @@ "cell_type": "code", "source": [ "plot(Prior(NORMAL, (10, 1), bounds=(6, 11), transformation=\"log10\"))\n", - "plot(Prior(PARAMETER_SCALE_NORMAL, (10, 1), bounds=(10**9, 10**14), transformation=\"log10\"))\n", + "plot(Prior(PARAMETER_SCALE_NORMAL, (2, 1), bounds=(10**0, 10**3), transformation=\"log10\"))\n", "plot(Prior(LAPLACE, (10, 2), bounds=(6, 14)))\n", "plot(Prior(LOG_LAPLACE, (1, 0.5), bounds=(0.5, 8)))\n", "plot(Prior(LOG_NORMAL, (2, 1), bounds=(0.5, 8)))" diff --git a/petab/v1/C.py b/petab/v1/C.py index be044a5c..c7e7cbdd 100644 --- a/petab/v1/C.py +++ b/petab/v1/C.py @@ -207,6 +207,13 @@ PARAMETER_SCALE_LAPLACE, ] +#: parameterScale*-type prior distributions +PARAMETER_SCALE_PRIOR_TYPES = [ + PARAMETER_SCALE_UNIFORM, + PARAMETER_SCALE_NORMAL, + PARAMETER_SCALE_LAPLACE, +] + #: Supported noise distributions NOISE_MODELS = [NORMAL, LAPLACE] diff --git a/petab/v1/distributions.py b/petab/v1/distributions.py index 36641650..0bac259a 100644 --- a/petab/v1/distributions.py +++ b/petab/v1/distributions.py @@ -168,11 +168,11 @@ def _pdf_transformed_untruncated(self, x) -> np.ndarray | float: # handle the log transformation; see also: # https://en.wikipedia.org/wiki/Probability_density_function#Scalar_to_scalar - chain_rule_factor = ( - (1 / (x * np.log(self._logbase))) if self._logbase else 1 - ) + with np.errstate(invalid="ignore", divide="ignore"): + chain_rule_factor = ( + (1 / (x * np.log(self._logbase))) if self._logbase else 1 + ) - with np.errstate(invalid="ignore"): return np.where( x > 0, self._pdf_untransformed_untruncated(self._log(x)) @@ -242,6 +242,19 @@ def _ppf_transformed_untruncated(self, q) -> np.ndarray | float: """ return self._exp(self._ppf_untransformed_untruncated(q)) + def ppf(self, q) -> np.ndarray | float: + """Percent point function at q. + + :param q: The quantile at which to evaluate the PPF. + :return: The value of the PPF at ``q``. + """ + if self._trunc is None: + return self._ppf_transformed_untruncated(q) + + # Adjust quantiles to account for truncation + adjusted_q = self._cd_low + q * (self._cd_high - self._cd_low) + return self._ppf_transformed_untruncated(adjusted_q) + def _inverse_transform_sample(self, shape) -> np.ndarray | float: """Generate an inverse transform sample from the transformed and truncated distribution. diff --git a/petab/v1/priors.py b/petab/v1/priors.py index 8a2cf84e..9ec811fc 100644 --- a/petab/v1/priors.py +++ b/petab/v1/priors.py @@ -161,24 +161,31 @@ def type(self) -> str: @property def parameters(self) -> tuple: + """The parameters of the distribution.""" return self._parameters @property def bounds(self) -> tuple[float, float] | None: + """The non-scaled bounds of the distribution.""" return self._bounds @property def transformation(self) -> str: + """The `parameterScale`.""" return self._transformation - def sample(self, shape=None) -> np.ndarray: + def sample(self, shape=None, x_scaled=False) -> np.ndarray | float: """Sample from the distribution. :param shape: The shape of the sample. + :param x_scaled: Whether the sample should be on the parameter scale. :return: A sample from the distribution. """ raw_sample = self.distribution.sample(shape) - return self._scale_sample(raw_sample) + if x_scaled: + return self._scale_sample(raw_sample) + else: + return raw_sample def _scale_sample(self, sample): """Scale the sample to the parameter space""" @@ -196,14 +203,8 @@ def ub_scaled(self) -> float: """The upper bound on the parameter scale.""" return scale(self.bounds[1], self.transformation) - def pdf(self, x) -> np.ndarray | float: - """Probability density function at x. - - :param x: The value at which to evaluate the PDF. - ``x`` is assumed to be on the parameter scale. - :return: The value of the PDF at ``x``. ``x`` is assumed to be on the - parameter scale. - """ + def _chain_rule_coeff(self, x) -> np.ndarray | float: + """The chain rule coefficient for the transformation at x.""" x = unscale(x, self.transformation) # scale the PDF to the parameter scale @@ -216,36 +217,50 @@ def pdf(self, x) -> np.ndarray | float: else: raise ValueError(f"Unknown transformation: {self.transformation}") - return self.distribution.pdf(x) * coeff + return coeff + + def pdf( + self, x, x_scaled: bool = False, rescale=False + ) -> np.ndarray | float: + """Probability density function at x. + + This accounts for truncation, independent of the `bounds_truncate` + parameter. - def neglogprior(self, x) -> np.ndarray | float: + :param x: The value at which to evaluate the PDF. + ``x`` is assumed to be on the parameter scale. + :param x_scaled: Whether ``x`` is on the parameter scale. + :param rescale: Whether to rescale the PDF to integrate to 1 on the + parameter scale. Only used if ``x_scaled`` is ``True``. + :return: The value of the PDF at ``x``. + """ + if x_scaled: + coeff = self._chain_rule_coeff(x) if rescale else 1 + x = unscale(x, self.transformation) + return self.distribution.pdf(x) * coeff + + return self.distribution.pdf(x) + + def neglogprior( + self, x: np.array | float, x_scaled: bool = False + ) -> np.ndarray | float: """Negative log-prior at x. :param x: The value at which to evaluate the negative log-prior. - ``x`` is assumed to be on the parameter scale. + :param x_scaled: Whether ``x`` is on the parameter scale. + Note that the prior is always evaluated on the non-scaled + parameters. :return: The negative log-prior at ``x``. """ - # FIXME: the prior is always defined on linear scale if self._bounds_truncate: # the truncation is handled by the distribution - return -np.log(self.pdf(x)) + # the prior is always evaluated on the non-scaled parameters + return -np.log(self.pdf(x, x_scaled=x_scaled, rescale=False)) # we want to evaluate the prior on the untruncated distribution - x = unscale(x, self.transformation) - - # scale the PDF to the parameter scale - if self.transformation == C.LIN: - coeff = 1 - elif self.transformation == C.LOG10: - coeff = x * np.log(10) - elif self.transformation == C.LOG: - coeff = x - else: - raise ValueError(f"Unknown transformation: {self.transformation}") - - return -np.log( - self.distribution._pdf_transformed_untruncated(x) * coeff - ) + if x_scaled: + x = unscale(x, self.transformation) + return -np.log(self.distribution._pdf_transformed_untruncated(x)) @staticmethod def from_par_dict( @@ -339,6 +354,7 @@ def priors_to_measurements(problem: Problem): return new_problem def scaled_observable_formula(parameter_id, parameter_scale): + # The location parameter of the prior if parameter_scale == LIN: return parameter_id if parameter_scale == LOG: @@ -367,6 +383,12 @@ def scaled_observable_formula(parameter_id, parameter_scale): # offset raise NotImplementedError("Uniform priors are not supported.") + if prior_type not in (C.NORMAL, C.LAPLACE): + # we can't (easily) handle parameterScale* priors or log*-priors + raise NotImplementedError( + f"Objective prior type {prior_type} is not implemented." + ) + parameter_id = row.name prior_parameters = tuple( map( @@ -391,7 +413,9 @@ def scaled_observable_formula(parameter_id, parameter_scale): OBSERVABLE_ID: new_obs_id, OBSERVABLE_FORMULA: scaled_observable_formula( parameter_id, - parameter_scale if "parameterScale" in prior_type else LIN, + parameter_scale + if prior_type in C.PARAMETER_SCALE_PRIOR_TYPES + else LIN, ), NOISE_FORMULA: f"noiseParameter1_{new_obs_id}", } @@ -400,12 +424,13 @@ def scaled_observable_formula(parameter_id, parameter_scale): elif OBSERVABLE_TRANSFORMATION in new_problem.observable_df: # only set default if the column is already present new_observable[OBSERVABLE_TRANSFORMATION] = LIN - + # type of the underlying distribution if prior_type in (NORMAL, PARAMETER_SCALE_NORMAL, LOG_NORMAL): new_observable[NOISE_DISTRIBUTION] = NORMAL elif prior_type in (LAPLACE, PARAMETER_SCALE_LAPLACE, LOG_LAPLACE): new_observable[NOISE_DISTRIBUTION] = LAPLACE else: + # we can't (easily) handle uniform priors in PEtab v1 raise NotImplementedError( f"Objective prior type {prior_type} is not implemented." ) diff --git a/petab/v1/sampling.py b/petab/v1/sampling.py index 3849297b..f96bd1a0 100644 --- a/petab/v1/sampling.py +++ b/petab/v1/sampling.py @@ -80,7 +80,7 @@ def sample_parameter_startpoints( [ Prior.from_par_dict( row, type_="initialization", bounds_truncate=True - ).sample(n_starts) + ).sample(n_starts, x_scaled=True) for row in par_to_estimate.to_dict("records") ] ).T diff --git a/tests/v1/test_priors.py b/tests/v1/test_priors.py index 70d3df90..5cb2ad3a 100644 --- a/tests/v1/test_priors.py +++ b/tests/v1/test_priors.py @@ -6,7 +6,7 @@ import numpy as np import pandas as pd import pytest -from scipy.integrate import cumulative_trapezoid +from scipy.integrate import cumulative_trapezoid, quad from scipy.stats import kstest import petab.v1 @@ -20,9 +20,39 @@ get_simulation_conditions, get_simulation_df, ) +from petab.v1.calculate import calculate_single_llh from petab.v1.priors import Prior, priors_to_measurements +def test_priors_to_measurements_simple(): + """Test the conversion of priors to measurements. + + Illustrates & tests the conversion of a prior to a measurement. + """ + # parameter value at which we evaluate the prior + par_value = 2.5 + # location and scale parameters of the prior + prior_loc = 3 + prior_scale = 3 + + for prior_type in [C.NORMAL, C.LAPLACE]: + # evaluate the orignal prior + prior = Prior( + prior_type, (prior_loc, prior_scale), transformation=C.LIN + ) + logprior = -prior.neglogprior(par_value, x_scaled=False) + + # evaluate the alternative implementation as a measurement + llh = calculate_single_llh( + measurement=prior_loc, + simulation=par_value, + scale=C.LIN, + noise_distribution=prior_type, + noise_value=prior_scale, + ) + assert np.isclose(llh, logprior, rtol=1e-12, atol=1e-16) + + @pytest.mark.parametrize( "problem_id", ["Schwen_PONE2014", "Isensee_JCB2018", "Raimundez_PCB2020"] ) @@ -59,8 +89,13 @@ def test_priors_to_measurements(problem_id): ) ) - # convert priors to measurements - petab_problem_measurements = priors_to_measurements(petab_problem_priors) + try: + # convert priors to measurements + petab_problem_measurements = priors_to_measurements( + petab_problem_priors + ) + except NotImplementedError as e: + pytest.skip(str(e)) # check that the original problem is not modified for attr in [ @@ -121,9 +156,12 @@ def apply_parameter_values(row): # apply the parameter values to the observable formula for the prior if row[OBSERVABLE_ID].startswith("prior_"): parameter_id = row[OBSERVABLE_ID].removeprefix("prior_") - if original_problem.parameter_df.loc[ - parameter_id, OBJECTIVE_PRIOR_TYPE - ].startswith("parameterScale"): + if ( + original_problem.parameter_df.loc[ + parameter_id, OBJECTIVE_PRIOR_TYPE + ] + in C.PARAMETER_SCALE_PRIOR_TYPES + ): row[SIMULATION] = x_scaled_dict[parameter_id] else: row[SIMULATION] = x_unscaled_dict[parameter_id] @@ -164,7 +202,9 @@ def apply_parameter_values(row): ] prior_contrib = 0 for parameter_id, prior in zip(parameter_ids, priors, strict=True): - prior_contrib -= prior.neglogprior(x_scaled_dict[parameter_id]) + prior_contrib -= prior.neglogprior( + x_scaled_dict[parameter_id], x_scaled=True + ) assert np.isclose( llh_priors + prior_contrib, llh_measurements, rtol=1e-8, atol=1e-16 @@ -196,21 +236,47 @@ def test_sample_matches_pdf(prior_args, transform): """Test that the sample matches the PDF.""" np.random.seed(1) N_SAMPLES = 10_000 + prior = Prior(*prior_args, transformation=transform) - sample = prior.sample(N_SAMPLES) - # pdf -> cdf - def cdf(x): - return cumulative_trapezoid(prior.pdf(x), x) + for x_scaled in [False, True]: + sample = prior.sample(N_SAMPLES, x_scaled=x_scaled) + + # pdf -> cdf + def cdf(x): + return cumulative_trapezoid( + prior.pdf( + x, + x_scaled=x_scaled, # noqa B208 + rescale=x_scaled, # noqa B208 + ), + x, + ) + + # Kolmogorov-Smirnov test to check if the sample is drawn from the CDF + _, p = kstest(sample, cdf) - # Kolmogorov-Smirnov test to check if the sample is drawn from the CDF - _, p = kstest(sample, cdf) + if p < 0.05: + import matplotlib.pyplot as plt - # if p < 0.05: - # import matplotlib.pyplot as plt - # plt.hist(sample, bins=100, density=True) - # x = np.linspace(min(sample), max(sample), 100) - # plt.plot(x, distribution.pdf(x)) - # plt.show() + plt.hist(sample, bins=100, density=True) + x = np.linspace(min(sample), max(sample), 100) + plt.plot(x, prior.pdf(x, x_scaled=x_scaled, rescale=x_scaled)) + plt.xlabel(("scaled" if x_scaled else "unscaled") + " x") + plt.ylabel(("rescaled " if x_scaled else "") + "density") + plt.title(str(prior)) + plt.show() + print() - assert p > 0.05, (p, prior) + assert p > 0.05, (p, prior) + + # check that the integral of the PDF is 1 on the parameter scale + integral, abserr = quad( + lambda x: prior.pdf(x, x_scaled=False), + -np.inf, + np.inf, + limit=100, + epsabs=1e-10, + epsrel=0, + ) + assert np.isclose(integral, 1, rtol=0, atol=10 * abserr) From a17aa62023425aaaa3deb7e4cf54a0717e125f8e Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 12 Dec 2024 10:40:43 +0100 Subject: [PATCH 08/11] Fix Prior.from_par_dict for missing priorParameters columns (#341) Previously, missing `*PriorParameters` would have resulted in a KeyError. --- petab/v1/priors.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/petab/v1/priors.py b/petab/v1/priors.py index 9ec811fc..b66ec484 100644 --- a/petab/v1/priors.py +++ b/petab/v1/priors.py @@ -281,10 +281,8 @@ def from_par_dict( dist_type = C.PARAMETER_SCALE_UNIFORM pscale = d.get(C.PARAMETER_SCALE, C.LIN) - if ( - pd.isna(d[f"{type_}PriorParameters"]) - and dist_type == C.PARAMETER_SCALE_UNIFORM - ): + params = d.get(f"{type_}PriorParameters", None) + if pd.isna(params) and dist_type == C.PARAMETER_SCALE_UNIFORM: params = ( scale(d[C.LOWER_BOUND], pscale), scale(d[C.UPPER_BOUND], pscale), @@ -293,7 +291,7 @@ def from_par_dict( params = tuple( map( float, - d[f"{type_}PriorParameters"].split(C.PARAMETER_SEPARATOR), + params.split(C.PARAMETER_SEPARATOR), ) ) return Prior( From a5d2a3d6f560c93b8756628ada0a41a18b248652 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Tue, 11 Feb 2025 12:12:03 +0100 Subject: [PATCH 09/11] Update doc/example/distributions.ipynb Co-authored-by: Maren Philipps <55318391+m-philipps@users.noreply.github.com> --- doc/example/distributions.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/example/distributions.ipynb b/doc/example/distributions.ipynb index fd251213..690714da 100644 --- a/doc/example/distributions.ipynb +++ b/doc/example/distributions.ipynb @@ -165,7 +165,7 @@ { "metadata": {}, "cell_type": "markdown", - "source": "Prior distributions can also be defined on the scaled parameters (i.e., transformed according to `parameterScale`) by using the types `parameterScaleUniform`, `parameterScaleNormal` or `parameterScaleLaplace`. In these cases, the distribution parameter are interpreted on the transformed parameter scale (but not the parameter bounds, see below). This implies, that for `parameterScale=lin`, there is no difference between `parameterScaleUniform` and `uniform`.", + "source": "Prior distributions can also be defined on the scaled parameters (i.e., transformed according to `parameterScale`) by using the types `parameterScaleUniform`, `parameterScaleNormal` or `parameterScaleLaplace`. In these cases, the distribution parameters are interpreted on the transformed parameter scale (but not the parameter bounds, see below). This implies, that for `parameterScale=lin`, there is no difference between `parameterScaleUniform` and `uniform`.", "id": "263c9fd31156a4d5" }, { From 7ae0f40b828d6660e65256810b10698a14395f63 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 10 Mar 2025 15:26:40 +0100 Subject: [PATCH 10/11] tuncation/transformation --- petab/v1/distributions.py | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/petab/v1/distributions.py b/petab/v1/distributions.py index 0bac259a..ebb35930 100644 --- a/petab/v1/distributions.py +++ b/petab/v1/distributions.py @@ -75,12 +75,12 @@ def __init__( @property def trunc_low(self) -> float: """The lower truncation limit of the transformed distribution.""" - return self._trunc[0] if self._trunc else -np.inf + return self._exp(self._trunc[0]) if self._trunc else -np.inf @property def trunc_high(self) -> float: """The upper truncation limit of the transformed distribution.""" - return self._trunc[1] if self._trunc else np.inf + return self._exp(self._trunc[1]) if self._trunc else np.inf def _exp(self, x: np.ndarray | float) -> np.ndarray | float: """Exponentiate / undo the log transformation if applicable. @@ -274,15 +274,17 @@ class Normal(Distribution): :param loc: The location parameter of the distribution. :param scale: The scale parameter of the distribution. :param trunc: The truncation limits of the distribution. - ``None`` if the distribution is not truncated. The truncation limits - are the truncation limits of the transformed distribution. + ``None`` if the distribution is not truncated. :param log: If ``True``, the distribution is transformed to a log-normal distribution. If a float, the distribution is transformed to a - log-normal distribution with the given base. + log-normal distribution with the given log-base. If ``False``, no transformation is applied. - If a transformation is applied, the location and scale parameters - and the truncation limits are the location, scale and truncation limits - of the underlying normal distribution. + + .. note:: + + If a transformation is applied, the location and scale parameters + and the truncation limits are the location, scale and truncation + limits of the underlying normal distribution. """ def __init__( @@ -333,8 +335,11 @@ class Uniform(Distribution): distribution. If a float, the distribution is transformed to a log-uniform distribution with the given base. If ``False``, no transformation is applied. - If a transformation is applied, the lower and upper bounds are the - lower and upper bounds of the underlying uniform distribution. + + .. note:: + + If a transformation is applied, the lower and upper bounds are the + lower and upper bounds of the underlying uniform distribution. """ def __init__( @@ -371,15 +376,17 @@ class Laplace(Distribution): :param loc: The location parameter of the distribution. :param scale: The scale parameter of the distribution. :param trunc: The truncation limits of the distribution. - ``None`` if the distribution is not truncated. The truncation limits - are the truncation limits of the transformed distribution. + ``None`` if the distribution is not truncated. :param log: If ``True``, the distribution is transformed to a log-Laplace distribution. If a float, the distribution is transformed to a log-Laplace distribution with the given base. If ``False``, no transformation is applied. - If a transformation is applied, the location and scale parameters - and the truncation limits are the location, scale and truncation limits - of the underlying Laplace distribution. + + .. note:: + + If a transformation is applied, the location and scale parameters + and the truncation limits are the location, scale and truncation + limits of the underlying Laplace distribution. """ def __init__( From 51367dbc103785197e5b39555b58cd34cb5feda1 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Fri, 28 Mar 2025 09:25:12 +0100 Subject: [PATCH 11/11] reruff --- .pre-commit-config.yaml | 4 +- doc/example/distributions.ipynb | 171 ++++++++++-------- petab/__init__.py | 1 + petab/petablint.py | 6 +- petab/v1/C.py | 1 + petab/v1/composite_problem.py | 1 + petab/v1/core.py | 1 + petab/v1/distributions.py | 1 + petab/v1/format_version.py | 1 + petab/v1/lint.py | 12 +- petab/v1/mapping.py | 1 + petab/v1/math/SympyVisitor.py | 1 + petab/v1/math/__init__.py | 1 + petab/v1/models/__init__.py | 1 + petab/v1/models/model.py | 10 +- petab/v1/models/pysb_model.py | 4 +- petab/v1/models/sbml_model.py | 1 + petab/v1/parameter_mapping.py | 8 +- petab/v1/parameters.py | 2 +- petab/v1/priors.py | 17 +- petab/v1/problem.py | 5 +- petab/v1/sbml.py | 6 +- petab/v1/simplify.py | 1 + petab/v1/simulate.py | 1 + petab/v1/visualize/__init__.py | 1 + petab/v1/visualize/cli.py | 1 + petab/v1/visualize/helper_functions.py | 1 - petab/v1/visualize/lint.py | 1 + .../v1/visualize/plot_data_and_simulation.py | 5 +- petab/v1/visualize/plot_residuals.py | 1 + petab/v1/visualize/plotter.py | 1 + petab/v1/visualize/plotting.py | 15 +- petab/v1/yaml.py | 1 + petab/v2/C.py | 1 + petab/v2/__init__.py | 1 + petab/v2/_helpers.py | 1 + petab/v2/conditions.py | 1 + petab/v2/experiments.py | 1 + petab/v2/lint.py | 1 + petab/v2/models/__init__.py | 1 + petab/v2/models/model.py | 1 + petab/v2/models/pysb_model.py | 1 + petab/v2/models/sbml_model.py | 1 + petab/v2/petab1to2.py | 1 + petab/v2/problem.py | 7 +- petab/version.py | 1 + petab/versions.py | 1 + tests/v1/math/test_math.py | 12 +- tests/v1/test_calculate.py | 2 +- tests/v1/test_combine.py | 1 + tests/v1/test_conditions.py | 1 + tests/v1/test_deprecated.py | 1 + tests/v1/test_lint.py | 6 +- tests/v1/test_measurements.py | 1 + tests/v1/test_model_pysb.py | 1 + tests/v1/test_observables.py | 1 + tests/v1/test_parameters.py | 1 + tests/v1/test_petab.py | 6 +- tests/v1/test_sbml.py | 6 +- tests/v1/test_simplify.py | 1 + tests/v1/test_simulate.py | 1 + tests/v1/test_yaml.py | 1 + tests/v2/test_experiments.py | 1 + tests/v2/test_mapping.py | 1 + 64 files changed, 208 insertions(+), 141 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d32e5b68..4ba9c6fb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ # See https://pre-commit.com/hooks.html for more hooks repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v5.0.0 hooks: - id: check-added-large-files - id: check-merge-conflict @@ -12,7 +12,7 @@ repos: - id: trailing-whitespace - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.1.11 + rev: v0.9.10 hooks: # Run the linter. - id: ruff diff --git a/doc/example/distributions.ipynb b/doc/example/distributions.ipynb index 690714da..984278fc 100644 --- a/doc/example/distributions.ipynb +++ b/doc/example/distributions.ipynb @@ -1,8 +1,9 @@ { "cells": [ { - "metadata": {}, "cell_type": "markdown", + "id": "372289411a2aa7b3", + "metadata": {}, "source": [ "# Prior distributions in PEtab\n", "\n", @@ -18,23 +19,24 @@ "* *Initialization priors* can be used as a hint for the optimization algorithm. They will not enter the objective function. They are specified in the `initializationPriorType` and `initializationPriorParameters` columns of the parameter table.\n", "\n", "\n" - ], - "id": "372289411a2aa7b3" + ] }, { + "cell_type": "code", + "execution_count": null, + "id": "initial_id", "metadata": { "collapsed": true }, - "cell_type": "code", + "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import seaborn as sns\n", "\n", "from petab.v1.C import *\n", + "from petab.v1.parameters import unscale\n", "from petab.v1.priors import Prior\n", - "from petab.v1.parameters import scale, unscale\n", - "\n", "\n", "sns.set_style(None)\n", "\n", @@ -51,7 +53,10 @@ " plt.tight_layout()\n", " plt.show()\n", "\n", - "def plot_single(prior: Prior, scaled: bool = False, ax=None, sample: np.array = None):\n", + "\n", + "def plot_single(\n", + " prior: Prior, scaled: bool = False, ax=None, sample: np.array = None\n", + "):\n", " fig = None\n", " if ax is None:\n", " fig, ax = plt.subplots()\n", @@ -64,113 +69,124 @@ " sample = unscale(sample, prior.transformation)\n", " bounds = prior.bounds\n", " else:\n", - " bounds = (prior.lb_scaled, prior.ub_scaled) if prior.bounds is not None else None\n", + " bounds = (\n", + " (prior.lb_scaled, prior.ub_scaled)\n", + " if prior.bounds is not None\n", + " else None\n", + " )\n", "\n", " # plot pdf\n", - " xmin = min(sample.min(), bounds[0] if prior.bounds is not None else sample.min())\n", - " xmax = max(sample.max(), bounds[1] if prior.bounds is not None else sample.max())\n", + " xmin = min(\n", + " sample.min(), bounds[0] if prior.bounds is not None else sample.min()\n", + " )\n", + " xmax = max(\n", + " sample.max(), bounds[1] if prior.bounds is not None else sample.max()\n", + " )\n", " padding = 0.1 * (xmax - xmin)\n", " xmin -= padding\n", " xmax += padding\n", " x = np.linspace(xmin, xmax, 500)\n", " y = prior.pdf(x, x_scaled=scaled, rescale=scaled)\n", - " ax.plot(x, y, color='red', label='pdf')\n", + " ax.plot(x, y, color=\"red\", label=\"pdf\")\n", "\n", - " sns.histplot(sample, stat='density', ax=ax, label=\"sample\")\n", + " sns.histplot(sample, stat=\"density\", ax=ax, label=\"sample\")\n", "\n", " # plot bounds\n", " if prior.bounds is not None:\n", " for bound in bounds:\n", " if bound is not None and np.isfinite(bound):\n", - " ax.axvline(bound, color='black', linestyle='--', label='bound')\n", + " ax.axvline(bound, color=\"black\", linestyle=\"--\", label=\"bound\")\n", "\n", " if fig is not None:\n", " ax.set_title(str(prior))\n", "\n", " if scaled:\n", - " ax.set_xlabel(f'Parameter value on parameter scale ({prior.transformation})')\n", + " ax.set_xlabel(\n", + " f\"Parameter value on parameter scale ({prior.transformation})\"\n", + " )\n", " ax.set_ylabel(\"Rescaled density\")\n", " else:\n", - " ax.set_xlabel('Parameter value')\n", + " ax.set_xlabel(\"Parameter value\")\n", "\n", " ax.grid(False)\n", " handles, labels = ax.get_legend_handles_labels()\n", - " unique_labels = dict(zip(labels, handles))\n", + " unique_labels = dict(zip(labels, handles, strict=False))\n", " ax.legend(unique_labels.values(), unique_labels.keys())\n", "\n", " if ax is None:\n", - " plt.show()\n" - ], - "id": "initial_id", - "outputs": [], - "execution_count": null + " plt.show()" + ] }, { - "metadata": {}, "cell_type": "markdown", - "source": "The basic distributions are the uniform, normal, Laplace, log-normal, and log-laplace distributions:\n", - "id": "db36a4a93622ccb8" + "id": "db36a4a93622ccb8", + "metadata": {}, + "source": "The basic distributions are the uniform, normal, Laplace, log-normal, and log-laplace distributions:\n" }, { - "metadata": {}, "cell_type": "code", + "execution_count": null, + "id": "4f09e50a3db06d9f", + "metadata": {}, + "outputs": [], "source": [ "plot_single(Prior(UNIFORM, (0, 1)))\n", "plot_single(Prior(NORMAL, (0, 1)))\n", "plot_single(Prior(LAPLACE, (0, 1)))\n", "plot_single(Prior(LOG_NORMAL, (0, 1)))\n", "plot_single(Prior(LOG_LAPLACE, (1, 0.5)))" - ], - "id": "4f09e50a3db06d9f", - "outputs": [], - "execution_count": null + ] }, { - "metadata": {}, "cell_type": "markdown", - "source": "If a parameter scale is specified (`parameterScale=lin|log|log10`) and the chosen distribution is not a `parameterScale*`-type distribution, then the distribution parameters are taken as is, i.e., the `parameterScale` is not applied to the distribution parameters. In the context of PEtab prior distributions, `parameterScale` will only be used for the start point sampling for optimization, where the sample will be transformed accordingly. This is demonstrated below. The left plot always shows the prior distribution for unscaled parameter values, and the right plot shows the prior distribution for scaled parameter values. Note that in the objective function, the prior is always on the unscaled parameters.\n", - "id": "dab4b2d1e0f312d8" + "id": "dab4b2d1e0f312d8", + "metadata": {}, + "source": "If a parameter scale is specified (`parameterScale=lin|log|log10`) and the chosen distribution is not a `parameterScale*`-type distribution, then the distribution parameters are taken as is, i.e., the `parameterScale` is not applied to the distribution parameters. In the context of PEtab prior distributions, `parameterScale` will only be used for the start point sampling for optimization, where the sample will be transformed accordingly. This is demonstrated below. The left plot always shows the prior distribution for unscaled parameter values, and the right plot shows the prior distribution for scaled parameter values. Note that in the objective function, the prior is always on the unscaled parameters.\n" }, { - "metadata": {}, "cell_type": "code", + "execution_count": null, + "id": "f6192c226f179ef9", + "metadata": {}, + "outputs": [], "source": [ "plot(Prior(NORMAL, (10, 2), transformation=LIN))\n", "plot(Prior(NORMAL, (10, 2), transformation=LOG))\n", "\n", - "# Note that the log-normal distribution is different from a log-transformed normal distribution:\n", + "# Note that the log-normal distribution is different\n", + "# from a log-transformed normal distribution:\n", "plot(Prior(LOG_NORMAL, (10, 2), transformation=LIN))" - ], - "id": "f6192c226f179ef9", - "outputs": [], - "execution_count": null + ] }, { - "metadata": {}, "cell_type": "markdown", - "source": "On the log-transformed parameter scale, `Log*` and `parameterScale*` distributions are equivalent:", - "id": "4281ed48859e6431" + "id": "4281ed48859e6431", + "metadata": {}, + "source": "On the log-transformed parameter scale, `Log*` and `parameterScale*` distributions are equivalent:" }, { - "metadata": {}, "cell_type": "code", + "execution_count": null, + "id": "34c95268e8921070", + "metadata": {}, + "outputs": [], "source": [ "plot(Prior(LOG_NORMAL, (10, 2), transformation=LOG))\n", "plot(Prior(PARAMETER_SCALE_NORMAL, (10, 2)))" - ], - "id": "34c95268e8921070", - "outputs": [], - "execution_count": null + ] }, { - "metadata": {}, "cell_type": "markdown", - "source": "Prior distributions can also be defined on the scaled parameters (i.e., transformed according to `parameterScale`) by using the types `parameterScaleUniform`, `parameterScaleNormal` or `parameterScaleLaplace`. In these cases, the distribution parameters are interpreted on the transformed parameter scale (but not the parameter bounds, see below). This implies, that for `parameterScale=lin`, there is no difference between `parameterScaleUniform` and `uniform`.", - "id": "263c9fd31156a4d5" + "id": "263c9fd31156a4d5", + "metadata": {}, + "source": "Prior distributions can also be defined on the scaled parameters (i.e., transformed according to `parameterScale`) by using the types `parameterScaleUniform`, `parameterScaleNormal` or `parameterScaleLaplace`. In these cases, the distribution parameters are interpreted on the transformed parameter scale (but not the parameter bounds, see below). This implies, that for `parameterScale=lin`, there is no difference between `parameterScaleUniform` and `uniform`." }, { - "metadata": {}, "cell_type": "code", + "execution_count": null, + "id": "5ca940bc24312fc6", + "metadata": {}, + "outputs": [], "source": [ "# different, because transformation!=LIN\n", "plot(Prior(UNIFORM, (0.01, 2), transformation=LOG10))\n", @@ -179,50 +195,61 @@ "# same, because transformation=LIN\n", "plot(Prior(UNIFORM, (0.01, 2), transformation=LIN))\n", "plot(Prior(PARAMETER_SCALE_UNIFORM, (0.01, 2), transformation=LIN))" - ], - "id": "5ca940bc24312fc6", - "outputs": [], - "execution_count": null + ] }, { - "metadata": {}, "cell_type": "markdown", - "source": "The given distributions are truncated at the bounds defined in the parameter table:", - "id": "b1a8b17d765db826" + "id": "b1a8b17d765db826", + "metadata": {}, + "source": "The given distributions are truncated at the bounds defined in the parameter table:" }, { - "metadata": {}, "cell_type": "code", + "execution_count": null, + "id": "4ac42b1eed759bdd", + "metadata": {}, + "outputs": [], "source": [ "plot(Prior(NORMAL, (0, 1), bounds=(-2, 2)))\n", "plot(Prior(UNIFORM, (0, 1), bounds=(0.1, 0.9)))\n", "plot(Prior(UNIFORM, (1e-8, 1), bounds=(0.1, 0.9), transformation=LOG10))\n", "plot(Prior(LAPLACE, (0, 1), bounds=(-0.5, 0.5)))\n", - "plot(Prior(PARAMETER_SCALE_UNIFORM, (-3, 1), bounds=(1e-2, 1), transformation=LOG10))" - ], - "id": "4ac42b1eed759bdd", - "outputs": [], - "execution_count": null + "plot(\n", + " Prior(\n", + " PARAMETER_SCALE_UNIFORM,\n", + " (-3, 1),\n", + " bounds=(1e-2, 1),\n", + " transformation=LOG10,\n", + " )\n", + ")" + ] }, { - "metadata": {}, "cell_type": "markdown", - "source": "Further distribution examples:", - "id": "45ffce1341483f24" + "id": "45ffce1341483f24", + "metadata": {}, + "source": "Further distribution examples:" }, { - "metadata": {}, "cell_type": "code", + "execution_count": null, + "id": "581e1ac431860419", + "metadata": {}, + "outputs": [], "source": [ "plot(Prior(NORMAL, (10, 1), bounds=(6, 11), transformation=\"log10\"))\n", - "plot(Prior(PARAMETER_SCALE_NORMAL, (2, 1), bounds=(10**0, 10**3), transformation=\"log10\"))\n", + "plot(\n", + " Prior(\n", + " PARAMETER_SCALE_NORMAL,\n", + " (2, 1),\n", + " bounds=(10**0, 10**3),\n", + " transformation=\"log10\",\n", + " )\n", + ")\n", "plot(Prior(LAPLACE, (10, 2), bounds=(6, 14)))\n", "plot(Prior(LOG_LAPLACE, (1, 0.5), bounds=(0.5, 8)))\n", "plot(Prior(LOG_NORMAL, (2, 1), bounds=(0.5, 8)))" - ], - "id": "581e1ac431860419", - "outputs": [], - "execution_count": null + ] } ], "metadata": { diff --git a/petab/__init__.py b/petab/__init__.py index 36110069..dd30d186 100644 --- a/petab/__init__.py +++ b/petab/__init__.py @@ -8,6 +8,7 @@ PEtab should use for operations that can be performed in parallel. By default, all operations are performed sequentially. """ + import importlib import sys from functools import partial diff --git a/petab/petablint.py b/petab/petablint.py index 43796c42..244b7536 100755 --- a/petab/petablint.py +++ b/petab/petablint.py @@ -161,7 +161,7 @@ def main(): validate(args.yaml_file_name) except SchemaValidationError as e: logger.error( - "Provided YAML file does not adhere to PEtab " f"schema: {e}" + f"Provided YAML file does not adhere to PEtab schema: {e}" ) sys.exit(1) @@ -205,9 +205,7 @@ def main(): if args.parameter_file_name: logger.debug(f"\tParameter table: {args.parameter_file_name}") if args.visualization_file_name: - logger.debug( - "\tVisualization table: " f"{args.visualization_file_name}" - ) + logger.debug(f"\tVisualization table: {args.visualization_file_name}") try: problem = petab.Problem.from_files( diff --git a/petab/v1/C.py b/petab/v1/C.py index c7e7cbdd..09e94c20 100644 --- a/petab/v1/C.py +++ b/petab/v1/C.py @@ -2,6 +2,7 @@ """ This file contains constant definitions. """ + import math as _math import sys diff --git a/petab/v1/composite_problem.py b/petab/v1/composite_problem.py index 5f07d523..f887ec03 100644 --- a/petab/v1/composite_problem.py +++ b/petab/v1/composite_problem.py @@ -1,4 +1,5 @@ """PEtab problems consisting of multiple models""" + import os import pandas as pd diff --git a/petab/v1/core.py b/petab/v1/core.py index 3b4a4082..1149c67e 100644 --- a/petab/v1/core.py +++ b/petab/v1/core.py @@ -1,4 +1,5 @@ """PEtab core functions (or functions that don't fit anywhere else)""" + import logging import os import re diff --git a/petab/v1/distributions.py b/petab/v1/distributions.py index ebb35930..826a4a56 100644 --- a/petab/v1/distributions.py +++ b/petab/v1/distributions.py @@ -1,4 +1,5 @@ """Probability distributions used by PEtab.""" + from __future__ import annotations import abc diff --git a/petab/v1/format_version.py b/petab/v1/format_version.py index a8d63484..f303237e 100644 --- a/petab/v1/format_version.py +++ b/petab/v1/format_version.py @@ -1,2 +1,3 @@ """PEtab file format version""" + __format_version__ = 1 diff --git a/petab/v1/lint.py b/petab/v1/lint.py index e970bfde..b2260b83 100644 --- a/petab/v1/lint.py +++ b/petab/v1/lint.py @@ -556,7 +556,7 @@ def check_parameter_bounds(parameter_df: pd.DataFrame) -> None: ] in [LOG, LOG10]: raise AssertionError( f"Bounds for {row[PARAMETER_SCALE]} scaled parameter " - f"{ row.name} must be positive." + f"{row.name} must be positive." ) if ( row.get(PARAMETER_SCALE, LIN) in [LOG, LOG10] @@ -586,8 +586,7 @@ def assert_parameter_prior_type_is_valid(parameter_df: pd.DataFrame) -> None: for _, row in parameter_df.iterrows(): if row[col] not in PRIOR_TYPES and not core.is_empty(row[col]): raise AssertionError( - f"{col} must be one of {PRIOR_TYPES} but is " - f"'{row[col]}'." + f"{col} must be one of {PRIOR_TYPES} but is '{row[col]}'." ) @@ -945,7 +944,7 @@ def lint_problem(problem: "petab.Problem") -> bool: for obs_id in problem.observable_df.index: if problem.model.has_entity_with_id(obs_id): logger.error( - f"Observable ID {obs_id} shadows model " "entity." + f"Observable ID {obs_id} shadows model entity." ) errors_occurred = True else: @@ -1002,8 +1001,7 @@ def lint_problem(problem: "petab.Problem") -> bool: or problem.observable_df is None ): logger.warning( - "Not all files of the PEtab problem definition could " - "be checked." + "Not all files of the PEtab problem definition could be checked." ) else: logger.info("PEtab format check completed successfully.") @@ -1213,7 +1211,7 @@ def check_ids(ids: Iterable[str], kind: str = "") -> None: offset = 2 error_output = "\n".join( [ - f"Line {index+offset}: " + f"Line {index + offset}: " + ("Missing ID" if pd.isna(_id) else _id) for index, _id in invalids ] diff --git a/petab/v1/mapping.py b/petab/v1/mapping.py index 813cf7d3..81f77017 100644 --- a/petab/v1/mapping.py +++ b/petab/v1/mapping.py @@ -1,4 +1,5 @@ """Functionality related to the PEtab entity mapping table""" + # TODO: Move to petab.v2.mapping from pathlib import Path diff --git a/petab/v1/math/SympyVisitor.py b/petab/v1/math/SympyVisitor.py index 016e872c..b8154301 100644 --- a/petab/v1/math/SympyVisitor.py +++ b/petab/v1/math/SympyVisitor.py @@ -1,4 +1,5 @@ """PEtab-math to sympy conversion.""" + import sympy as sp from sympy.logic.boolalg import Boolean, BooleanFalse, BooleanTrue diff --git a/petab/v1/math/__init__.py b/petab/v1/math/__init__.py index 27ebacd2..b9a4f59b 100644 --- a/petab/v1/math/__init__.py +++ b/petab/v1/math/__init__.py @@ -1,2 +1,3 @@ """Functions for parsing and evaluating mathematical expressions.""" + from .sympify import sympify_petab # noqa: F401 diff --git a/petab/v1/models/__init__.py b/petab/v1/models/__init__.py index 938f55fb..a35ad432 100644 --- a/petab/v1/models/__init__.py +++ b/petab/v1/models/__init__.py @@ -1,4 +1,5 @@ """Handling of different model types supported by PEtab.""" + #: SBML model type as used in a PEtab v2 yaml file as `language`. MODEL_TYPE_SBML = "sbml" #: PySB model type as used in a PEtab v2 yaml file as `language`. diff --git a/petab/v1/models/model.py b/petab/v1/models/model.py index 795c7f0b..e25ca0b2 100644 --- a/petab/v1/models/model.py +++ b/petab/v1/models/model.py @@ -1,4 +1,5 @@ """PEtab model abstraction""" + from __future__ import annotations import abc @@ -13,8 +14,7 @@ class Model(abc.ABC): """Base class for wrappers for any PEtab-supported model type""" @abc.abstractmethod - def __init__(self): - ... + def __init__(self): ... def __repr__(self): return f"<{self.__class__.__name__} {self.model_id!r}>" @@ -41,13 +41,11 @@ def to_file(self, filename: [str, Path]): @classmethod @property @abc.abstractmethod - def type_id(cls): - ... + def type_id(cls): ... @property @abc.abstractmethod - def model_id(self): - ... + def model_id(self): ... @abc.abstractmethod def get_parameter_value(self, id_: str) -> float: diff --git a/petab/v1/models/pysb_model.py b/petab/v1/models/pysb_model.py index f0147990..0b69d797 100644 --- a/petab/v1/models/pysb_model.py +++ b/petab/v1/models/pysb_model.py @@ -192,7 +192,7 @@ def parse_species_name( match = complex_constituent_pattern.match(complex_constituent) if not match: raise ValueError( - f"Invalid species name: '{name}' " f"('{complex_constituent}')" + f"Invalid species name: '{name}' ('{complex_constituent}')" ) monomer = match.groupdict()["monomer"] site_config_str = match.groupdict()["site_config"] @@ -208,7 +208,7 @@ def parse_species_name( elif config.startswith("'"): if not config.endswith("'"): raise ValueError( - f"Invalid species name: '{name}' " f"('{config}')" + f"Invalid species name: '{name}' ('{config}')" ) # strip quotes config = config[1:-1] diff --git a/petab/v1/models/sbml_model.py b/petab/v1/models/sbml_model.py index 55cd7b4d..8e8cf498 100644 --- a/petab/v1/models/sbml_model.py +++ b/petab/v1/models/sbml_model.py @@ -1,4 +1,5 @@ """Functions for handling SBML models""" + from __future__ import annotations import itertools diff --git a/petab/v1/parameter_mapping.py b/petab/v1/parameter_mapping.py index 014b4a8e..a438c6b2 100644 --- a/petab/v1/parameter_mapping.py +++ b/petab/v1/parameter_mapping.py @@ -133,7 +133,7 @@ def get_optimization_to_simulation_parameter_mapping( if model: raise ValueError( - "Arguments `model` and `sbml_model` are " "mutually exclusive." + "Arguments `model` and `sbml_model` are mutually exclusive." ) model = SbmlModel(sbml_model=sbml_model) @@ -383,7 +383,7 @@ def get_parameter_mapping_for_condition( if model: raise ValueError( - "Arguments `model` and `sbml_model` are " "mutually exclusive." + "Arguments `model` and `sbml_model` are mutually exclusive." ) model = SbmlModel(sbml_model=sbml_model) @@ -404,7 +404,7 @@ def get_parameter_mapping_for_condition( # initialize mapping dicts # for the case of matching simulation and optimization parameter vector par_mapping = simulation_parameters.copy() - scale_mapping = {par_id: LIN for par_id in par_mapping.keys()} + scale_mapping = dict.fromkeys(par_mapping.keys(), LIN) _output_parameters_to_nan(par_mapping) # not strictly necessary for preequilibration, be we do it to have @@ -495,7 +495,7 @@ def _apply_overrides_for_observable( overrides: list of overrides for noise or observable parameters """ for i, override in enumerate(overrides): - overridee_id = f"{override_type}Parameter{i+1}_{observable_id}" + overridee_id = f"{override_type}Parameter{i + 1}_{observable_id}" mapping[overridee_id] = override diff --git a/petab/v1/parameters.py b/petab/v1/parameters.py index c2f37f62..82e23669 100644 --- a/petab/v1/parameters.py +++ b/petab/v1/parameters.py @@ -201,7 +201,7 @@ def create_parameter_df( if model: raise ValueError( - "Arguments `model` and `sbml_model` are " "mutually exclusive." + "Arguments `model` and `sbml_model` are mutually exclusive." ) model = SbmlModel(sbml_model=sbml_model) if include_optional: diff --git a/petab/v1/priors.py b/petab/v1/priors.py index b66ec484..45309a94 100644 --- a/petab/v1/priors.py +++ b/petab/v1/priors.py @@ -1,4 +1,5 @@ """Functions related to prior handling.""" + from __future__ import annotations import copy @@ -452,17 +453,17 @@ def scaled_observable_formula(parameter_id, parameter_scale): ].iloc[0], } if PREEQUILIBRATION_CONDITION_ID in new_problem.measurement_df: - new_measurement[ - PREEQUILIBRATION_CONDITION_ID - ] = new_problem.measurement_df[PREEQUILIBRATION_CONDITION_ID].iloc[ - 0 - ] + new_measurement[PREEQUILIBRATION_CONDITION_ID] = ( + new_problem.measurement_df[PREEQUILIBRATION_CONDITION_ID].iloc[ + 0 + ] + ) new_measurement_dicts.append(new_measurement) # remove prior from parameter table - new_problem.parameter_df.loc[ - parameter_id, OBJECTIVE_PRIOR_TYPE - ] = np.nan + new_problem.parameter_df.loc[parameter_id, OBJECTIVE_PRIOR_TYPE] = ( + np.nan + ) new_problem.parameter_df.loc[ parameter_id, OBJECTIVE_PRIOR_PARAMETERS ] = np.nan diff --git a/petab/v1/problem.py b/petab/v1/problem.py index 91bbcd64..2405f5c0 100644 --- a/petab/v1/problem.py +++ b/petab/v1/problem.py @@ -1,4 +1,5 @@ """PEtab Problem class""" + from __future__ import annotations import os @@ -123,7 +124,7 @@ def __getattr__(self, name): if name in {"sbml_model", "sbml_reader", "sbml_document"}: return getattr(self.model, name) if self.model else None raise AttributeError( - f"'{self.__class__.__name__}' object has no " f"attribute '{name}'" + f"'{self.__class__.__name__}' object has no attribute '{name}'" ) def __setattr__(self, name, value): @@ -486,7 +487,7 @@ def to_files_generic( if self.model: if not isinstance(self.model, SbmlModel): raise NotImplementedError( - "Saving non-SBML models is " "currently not supported." + "Saving non-SBML models is currently not supported." ) filenames["model_file"] = "model.xml" diff --git a/petab/v1/sbml.py b/petab/v1/sbml.py index 6395e41b..9e5549d2 100644 --- a/petab/v1/sbml.py +++ b/petab/v1/sbml.py @@ -258,9 +258,9 @@ def get_model_for_condition( condition_dict = {petab.SIMULATION_CONDITION_ID: sim_condition_id} if preeq_condition_id: - condition_dict[ - petab.PREEQUILIBRATION_CONDITION_ID - ] = preeq_condition_id + condition_dict[petab.PREEQUILIBRATION_CONDITION_ID] = ( + preeq_condition_id + ) cur_measurement_df = petab.measurements.get_rows_for_condition( measurement_df=petab_problem.measurement_df, condition=condition_dict, diff --git a/petab/v1/simplify.py b/petab/v1/simplify.py index c4cdeb91..78c039b1 100644 --- a/petab/v1/simplify.py +++ b/petab/v1/simplify.py @@ -1,4 +1,5 @@ """Functionality for simplifying PEtab problems""" + from math import nan import pandas as pd diff --git a/petab/v1/simulate.py b/petab/v1/simulate.py index 682c470f..334929ad 100644 --- a/petab/v1/simulate.py +++ b/petab/v1/simulate.py @@ -1,4 +1,5 @@ """PEtab simulator base class and related functions.""" + from __future__ import annotations import abc diff --git a/petab/v1/visualize/__init__.py b/petab/v1/visualize/__init__.py index 924be86a..15385697 100644 --- a/petab/v1/visualize/__init__.py +++ b/petab/v1/visualize/__init__.py @@ -6,6 +6,7 @@ ``import petab.visualize``. """ + # ruff: noqa: F401 import importlib.util diff --git a/petab/v1/visualize/cli.py b/petab/v1/visualize/cli.py index 72074936..1416cae0 100644 --- a/petab/v1/visualize/cli.py +++ b/petab/v1/visualize/cli.py @@ -1,4 +1,5 @@ """Command-line interface for visualization.""" + import argparse from pathlib import Path diff --git a/petab/v1/visualize/helper_functions.py b/petab/v1/visualize/helper_functions.py index b1a6f1b1..85b5d936 100644 --- a/petab/v1/visualize/helper_functions.py +++ b/petab/v1/visualize/helper_functions.py @@ -4,7 +4,6 @@ hence not be directly visible/usable when using `import petab.visualize`. """ - import pandas as pd from ..C import * diff --git a/petab/v1/visualize/lint.py b/petab/v1/visualize/lint.py index b5de74bc..29ea4f7d 100644 --- a/petab/v1/visualize/lint.py +++ b/petab/v1/visualize/lint.py @@ -1,4 +1,5 @@ """Validation of PEtab visualization files""" + from __future__ import annotations import logging diff --git a/petab/v1/visualize/plot_data_and_simulation.py b/petab/v1/visualize/plot_data_and_simulation.py index c76bcd43..5ca8c6fb 100644 --- a/petab/v1/visualize/plot_data_and_simulation.py +++ b/petab/v1/visualize/plot_data_and_simulation.py @@ -2,7 +2,6 @@ the same format. """ - import matplotlib.pyplot as plt import pandas as pd @@ -73,7 +72,7 @@ def plot_with_vis_spec( plotter = MPLPlotter(figure, dataprovider) else: raise NotImplementedError( - "Currently, only visualization with " "matplotlib is possible." + "Currently, only visualization with matplotlib is possible." ) return plotter.generate_figure(subplot_dir, format_=format_) @@ -150,7 +149,7 @@ def plot_without_vis_spec( plotter = MPLPlotter(figure, dataprovider) else: raise NotImplementedError( - "Currently, only visualization with " "matplotlib is possible." + "Currently, only visualization with matplotlib is possible." ) return plotter.generate_figure(subplot_dir, format_=format_) diff --git a/petab/v1/visualize/plot_residuals.py b/petab/v1/visualize/plot_residuals.py index 90298154..a45fcde3 100644 --- a/petab/v1/visualize/plot_residuals.py +++ b/petab/v1/visualize/plot_residuals.py @@ -1,6 +1,7 @@ """ Functions for plotting residuals. """ + from pathlib import Path import matplotlib diff --git a/petab/v1/visualize/plotter.py b/petab/v1/visualize/plotter.py index 2a1eaaa9..14af5650 100644 --- a/petab/v1/visualize/plotter.py +++ b/petab/v1/visualize/plotter.py @@ -1,4 +1,5 @@ """PEtab visualization plotter classes""" + import os from abc import ABC, abstractmethod diff --git a/petab/v1/visualize/plotting.py b/petab/v1/visualize/plotting.py index b607350b..e474c4c8 100644 --- a/petab/v1/visualize/plotting.py +++ b/petab/v1/visualize/plotting.py @@ -1,4 +1,5 @@ """PEtab visualization data selection and visualization settings classes""" + import warnings from numbers import Number, Real from pathlib import Path @@ -609,9 +610,9 @@ def get_data_series( isinstance(tmp_noise, Number) or tmp_noise.dtype == "float64" ): - measurements_to_plot.at[ - var_cond_id, "noise_model" - ] = tmp_noise + measurements_to_plot.at[var_cond_id, "noise_model"] = ( + tmp_noise + ) # standard error of mean measurements_to_plot.at[var_cond_id, "sem"] = np.std( @@ -619,9 +620,9 @@ def get_data_series( ) / np.sqrt(len(data_measurements)) # single replicates - measurements_to_plot.at[ - var_cond_id, "repl" - ] = data_measurements.values + measurements_to_plot.at[var_cond_id, "repl"] = ( + data_measurements.values + ) data_series = DataSeries(conditions_, measurements_to_plot) data_series.add_offsets(dataplot.xOffset, dataplot.yOffset) @@ -964,7 +965,7 @@ def _get_vis_spec_dependent_columns_dict( # get number of plots and create plotId-lists plot_id_column = [ - "plot%s" % str(ind + 1) + f"plot{ind + 1}" for ind, inner_list in enumerate(dataset_id_list) for _ in inner_list ] diff --git a/petab/v1/yaml.py b/petab/v1/yaml.py index ac134d5b..b8330028 100644 --- a/petab/v1/yaml.py +++ b/petab/v1/yaml.py @@ -1,4 +1,5 @@ """Code regarding the PEtab YAML config files""" + from __future__ import annotations import os diff --git a/petab/v2/C.py b/petab/v2/C.py index 1ab6f795..0a406fac 100644 --- a/petab/v2/C.py +++ b/petab/v2/C.py @@ -2,6 +2,7 @@ """ This file contains constant definitions. """ + import math as _math import sys diff --git a/petab/v2/__init__.py b/petab/v2/__init__.py index adeb0e84..4d147828 100644 --- a/petab/v2/__init__.py +++ b/petab/v2/__init__.py @@ -2,6 +2,7 @@ Contains all functionality related to handling PEtab 2.0 problems. """ + from warnings import warn # TODO: remove v1 star imports diff --git a/petab/v2/_helpers.py b/petab/v2/_helpers.py index a7522f35..3201769a 100644 --- a/petab/v2/_helpers.py +++ b/petab/v2/_helpers.py @@ -1,2 +1,3 @@ """Various internal helper functions.""" + from ..v1.core import to_float_if_float # noqa: F401, E402 diff --git a/petab/v2/conditions.py b/petab/v2/conditions.py index 7bb6d262..8d5a3067 100644 --- a/petab/v2/conditions.py +++ b/petab/v2/conditions.py @@ -1,4 +1,5 @@ """Functions operating on the PEtab condition table""" + from __future__ import annotations from pathlib import Path diff --git a/petab/v2/experiments.py b/petab/v2/experiments.py index 17137b5c..9837b953 100644 --- a/petab/v2/experiments.py +++ b/petab/v2/experiments.py @@ -1,4 +1,5 @@ """Functions operating on the PEtab experiments table.""" + from pathlib import Path import pandas as pd diff --git a/petab/v2/lint.py b/petab/v2/lint.py index c5cf5eb9..2deb0ebd 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -1,4 +1,5 @@ """Validation of PEtab problems""" + from __future__ import annotations import logging diff --git a/petab/v2/models/__init__.py b/petab/v2/models/__init__.py index a387c27b..79ec7639 100644 --- a/petab/v2/models/__init__.py +++ b/petab/v2/models/__init__.py @@ -1,2 +1,3 @@ """Handling of different model types supported by PEtab.""" + from ...v1.models import * # noqa: F401, F403 diff --git a/petab/v2/models/model.py b/petab/v2/models/model.py index 403a03e2..345247eb 100644 --- a/petab/v2/models/model.py +++ b/petab/v2/models/model.py @@ -1,2 +1,3 @@ """PEtab model abstraction""" + from ...v1.models.model import * # noqa: F401, F403 diff --git a/petab/v2/models/pysb_model.py b/petab/v2/models/pysb_model.py index 111c9864..4da866e7 100644 --- a/petab/v2/models/pysb_model.py +++ b/petab/v2/models/pysb_model.py @@ -1,2 +1,3 @@ """Functions for handling PySB models""" + from ...v1.models.pysb_model import * # noqa: F401, F403 diff --git a/petab/v2/models/sbml_model.py b/petab/v2/models/sbml_model.py index 2a0eadc7..b696ce31 100644 --- a/petab/v2/models/sbml_model.py +++ b/petab/v2/models/sbml_model.py @@ -1,2 +1,3 @@ """Functions for handling SBML models""" + from ...v1.models.sbml_model import * # noqa: F401, F403 diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index 0d48d9cf..dc1b2b8c 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -1,4 +1,5 @@ """Convert PEtab version 1 problems to version 2.""" + import shutil from contextlib import suppress from itertools import chain diff --git a/petab/v2/problem.py b/petab/v2/problem.py index d07c7f2e..32684d0b 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -1,4 +1,5 @@ """PEtab v2 problems.""" + from __future__ import annotations import logging @@ -87,9 +88,9 @@ def __init__( self.mapping_df: pd.DataFrame | None = mapping_df self.model: Model | None = model self.extensions_config = extensions_config or {} - self.validation_tasks: list[ - ValidationTask - ] = default_validation_tasks.copy() + self.validation_tasks: list[ValidationTask] = ( + default_validation_tasks.copy() + ) self.config = config def __str__(self): diff --git a/petab/version.py b/petab/version.py index c59cab99..ca57250b 100644 --- a/petab/version.py +++ b/petab/version.py @@ -1,2 +1,3 @@ """PEtab library version""" + __version__ = "0.5.0" diff --git a/petab/versions.py b/petab/versions.py index e19d0cd0..b1fdecf4 100644 --- a/petab/versions.py +++ b/petab/versions.py @@ -1,4 +1,5 @@ """Handling of PEtab version numbers.""" + from __future__ import annotations import re diff --git a/tests/v1/math/test_math.py b/tests/v1/math/test_math.py index 4b350d4e..828aac88 100644 --- a/tests/v1/math/test_math.py +++ b/tests/v1/math/test_math.py @@ -61,13 +61,13 @@ def test_parse_cases(expr_str, expected): else: try: result = float(result.evalf()) - assert np.isclose( - result, expected - ), f"{expr_str}: Expected {expected}, got {result}" + assert np.isclose(result, expected), ( + f"{expr_str}: Expected {expected}, got {result}" + ) except TypeError: - assert ( - result == expected - ), f"{expr_str}: Expected {expected}, got {result}" + assert result == expected, ( + f"{expr_str}: Expected {expected}, got {result}" + ) def test_ids(): diff --git a/tests/v1/test_calculate.py b/tests/v1/test_calculate.py index ca93c33a..526ea7c9 100644 --- a/tests/v1/test_calculate.py +++ b/tests/v1/test_calculate.py @@ -15,7 +15,7 @@ def model_simple(): - "Simple model." "" + "Simple model." measurement_df = pd.DataFrame( data={ OBSERVABLE_ID: ["obs_a", "obs_a", "obs_b", "obs_b"], diff --git a/tests/v1/test_combine.py b/tests/v1/test_combine.py index 4fca4105..e685bf2b 100644 --- a/tests/v1/test_combine.py +++ b/tests/v1/test_combine.py @@ -1,4 +1,5 @@ """Test COMBINE archive""" + import tempfile from pathlib import Path diff --git a/tests/v1/test_conditions.py b/tests/v1/test_conditions.py index b240241d..45059ba1 100644 --- a/tests/v1/test_conditions.py +++ b/tests/v1/test_conditions.py @@ -1,4 +1,5 @@ """Tests related to petab.conditions""" + import os import tempfile from pathlib import Path diff --git a/tests/v1/test_deprecated.py b/tests/v1/test_deprecated.py index b78e7856..ef96f2e9 100644 --- a/tests/v1/test_deprecated.py +++ b/tests/v1/test_deprecated.py @@ -1,4 +1,5 @@ """Check that deprecated functionality raises but still works.""" + import tempfile from pathlib import Path diff --git a/tests/v1/test_lint.py b/tests/v1/test_lint.py index d75bdcea..4ad2e9b1 100644 --- a/tests/v1/test_lint.py +++ b/tests/v1/test_lint.py @@ -202,9 +202,9 @@ def test_assert_overrides_match_parameter_count(): # 3 observable parameters given, 2 expected measurement_df = measurement_df_orig.copy() - measurement_df.loc[ - 1, OBSERVABLE_PARAMETERS - ] = "override1;override2;oneTooMuch" + measurement_df.loc[1, OBSERVABLE_PARAMETERS] = ( + "override1;override2;oneTooMuch" + ) with pytest.raises(AssertionError): petab.assert_overrides_match_parameter_count( measurement_df, observable_df diff --git a/tests/v1/test_measurements.py b/tests/v1/test_measurements.py index ac3e59a3..10f5ba98 100644 --- a/tests/v1/test_measurements.py +++ b/tests/v1/test_measurements.py @@ -1,4 +1,5 @@ """Tests related to petab.measurements""" + import tempfile from pathlib import Path diff --git a/tests/v1/test_model_pysb.py b/tests/v1/test_model_pysb.py index 922dab2f..57371c79 100644 --- a/tests/v1/test_model_pysb.py +++ b/tests/v1/test_model_pysb.py @@ -1,4 +1,5 @@ """Test related to petab.models.model_pysb""" + import pysb import pytest diff --git a/tests/v1/test_observables.py b/tests/v1/test_observables.py index e870ac12..c9932b0d 100644 --- a/tests/v1/test_observables.py +++ b/tests/v1/test_observables.py @@ -1,4 +1,5 @@ """Tests for petab.observables""" + import tempfile from pathlib import Path diff --git a/tests/v1/test_parameters.py b/tests/v1/test_parameters.py index c28528fe..33e7c97d 100644 --- a/tests/v1/test_parameters.py +++ b/tests/v1/test_parameters.py @@ -1,4 +1,5 @@ """Tests for petab/parameters.py""" + import tempfile from pathlib import Path diff --git a/tests/v1/test_petab.py b/tests/v1/test_petab.py index ff9621fa..564dcb7f 100644 --- a/tests/v1/test_petab.py +++ b/tests/v1/test_petab.py @@ -331,9 +331,9 @@ def test_create_parameter_df( assert parameter_df.index.values.tolist() == expected # test with condition parameter override: - condition_df_2_conditions.loc[ - "condition2", "fixedParameter1" - ] = "overrider" + condition_df_2_conditions.loc["condition2", "fixedParameter1"] = ( + "overrider" + ) expected = ["p3", "p4", "p1", "p2", "p5", "overrider"] parameter_df = petab.create_parameter_df( diff --git a/tests/v1/test_sbml.py b/tests/v1/test_sbml.py index 5c262d43..b29f1ea7 100644 --- a/tests/v1/test_sbml.py +++ b/tests/v1/test_sbml.py @@ -85,9 +85,9 @@ def check_model(condition_model): condition_model.getSpecies("species_4").getInitialConcentration() == 3.25 ) - assert ( - len(condition_model.getListOfInitialAssignments()) == 0 - ), "InitialAssignment not removed" + assert len(condition_model.getListOfInitialAssignments()) == 0, ( + "InitialAssignment not removed" + ) assert condition_model.getCompartment("compartment_1").getSize() == 2.0 assert condition_model.getParameter("parameter_1").getValue() == 1.25 assert condition_model.getParameter("parameter_2").getValue() == 2.25 diff --git a/tests/v1/test_simplify.py b/tests/v1/test_simplify.py index 9aa25f8f..1724f8bb 100644 --- a/tests/v1/test_simplify.py +++ b/tests/v1/test_simplify.py @@ -1,4 +1,5 @@ """Tests for petab.simplify.*""" + from math import nan import pandas as pd diff --git a/tests/v1/test_simulate.py b/tests/v1/test_simulate.py index e23b63cb..7945b1bb 100644 --- a/tests/v1/test_simulate.py +++ b/tests/v1/test_simulate.py @@ -1,4 +1,5 @@ """Tests for petab/simulate.py.""" + import functools from collections.abc import Callable from pathlib import Path diff --git a/tests/v1/test_yaml.py b/tests/v1/test_yaml.py index 82ab242c..168d7697 100644 --- a/tests/v1/test_yaml.py +++ b/tests/v1/test_yaml.py @@ -1,4 +1,5 @@ """Test for petab.yaml""" + import tempfile from pathlib import Path diff --git a/tests/v2/test_experiments.py b/tests/v2/test_experiments.py index 234552f2..205f200d 100644 --- a/tests/v2/test_experiments.py +++ b/tests/v2/test_experiments.py @@ -1,4 +1,5 @@ """Tests related to ``petab.v2.experiments``.""" + from tempfile import TemporaryDirectory import pandas as pd diff --git a/tests/v2/test_mapping.py b/tests/v2/test_mapping.py index 60ba6b49..e60e9082 100644 --- a/tests/v2/test_mapping.py +++ b/tests/v2/test_mapping.py @@ -1,4 +1,5 @@ """Tests related to petab.v2.mapping""" + import tempfile import pandas as pd