diff --git a/doc/api.rst b/doc/api.rst index fd0f5047..9d5b1f0b 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -6,14 +6,32 @@ API Documentation .. currentmodule:: pyglmnet -Classes -======= +GLM Classes +=========== -.. autoclass:: GLM - :members: +.. currentmodule:: pyglmnet + +.. autosummary:: + :toctree: generated/ + + GLM + GLMCV + +Distribution Classes +==================== + +.. currentmodule:: pyglmnet.distributions + +.. autosummary:: + :toctree: generated/ -.. autoclass:: GLMCV - :members: + BaseDistribution + Poisson + PoissonSoftplus + NegBinomialSoftplus + Binomial + Probit + GammaSoftplus Datasets diff --git a/examples/plot_custom_distributions.py b/examples/plot_custom_distributions.py new file mode 100644 index 00000000..a13d38f6 --- /dev/null +++ b/examples/plot_custom_distributions.py @@ -0,0 +1,76 @@ +""" +====================== +Rolling out custom GLM +====================== + +This is an example demonstrating rolling out your custom +GLM class using Pyglmnet. +""" +######################################################## + +# Author: Pavan Ramkumar +# License: MIT + +from sklearn.model_selection import train_test_split +from pyglmnet import GLMCV, datasets + +######################################################## +# Download and preprocess data files + +X, y = datasets.fetch_community_crime_data() +n_samples, n_features = X.shape + +######################################################## +# Split the data into training and test sets + +X_train, X_test, y_train, y_test = \ + train_test_split(X, y, test_size=0.33, random_state=0) + +######################################################## +# Now we define our own distribution class. This must +# inherit from BaseDistribution. The BaseDistribution +# class requires defining the following methods: +# - mu: inverse link function +# - grad_mu: gradient of the inverse link function +# - log_likelihood: the log likelihood function +# - grad_log_likelihood: the gradient of the log +# likelihood. +# All distributions in pyglmnet inherit from BaseDistribution +# +# This is really powerful. For instance, we can start from +# the existing Binomial distribution and override mu and grad_mu +# if we want to use the cloglog link function. + +import numpy as np # noqa: E402 +from pyglmnet.distributions import Binomial # noqa: E402 + + +class CustomBinomial(Binomial): + """Custom binomial distribution.""" + + def mu(self, z): + """clogclog inverse link""" + mu = 1 - np.exp(-np.exp(z)) + return mu + + def grad_mu(self, z): + """Gradient of inverse link.""" + grad_mu = np.exp(1 - np.exp(z)) + return grad_mu + + +distr = CustomBinomial() + +######################################################## +# Now we pass it to the GLMCV class just as before. + +# use the default value for reg_lambda +glm = GLMCV(distr=distr, alpha=0.05, score_metric='pseudo_R2', cv=3, + tol=1e-4) + +# fit model +glm.fit(X_train, y_train) + +# score the test set prediction +y_test_hat = glm.predict_proba(X_test) +print("test set pseudo $R^2$ = %f" % glm.score(X_test, y_test)) diff --git a/pyglmnet/__init__.py b/pyglmnet/__init__.py index 89ae6e23..028a9030 100644 --- a/pyglmnet/__init__.py +++ b/pyglmnet/__init__.py @@ -19,7 +19,7 @@ __version__ = '1.2.dev0' -from .pyglmnet import GLM, GLMCV, _grad_L2loss, _L2loss, simulate_glm, _gradhess_logloss_1d, _loss, ALLOWED_DISTRS +from .pyglmnet import GLM, GLMCV, _grad_L2loss, _L2loss, simulate_glm, _loss, ALLOWED_DISTRS from .utils import softmax, label_binarizer, set_log_level from .datasets import fetch_tikhonov_data, fetch_rgc_spike_trains, fetch_community_crime_data, fetch_group_lasso_data from . import externals diff --git a/pyglmnet/distributions.py b/pyglmnet/distributions.py new file mode 100644 index 00000000..ffdc7279 --- /dev/null +++ b/pyglmnet/distributions.py @@ -0,0 +1,442 @@ +"""Exponential family distributions for GLM estimators.""" + +from abc import ABC, abstractmethod +import numpy as np +from scipy.special import expit, log1p, loggamma +from scipy.stats import norm + +from .externals.sklearn.utils import check_random_state + + +def softplus(z): + """Numerically stable version of log(1 + exp(z)).""" + # see stabilizing softplus: http://sachinashanbhag.blogspot.com/2014/05/numerically-approximation-of-log-1-expy.html # noqa + mu = z.copy() + mu[z > 35] = z[z > 35] + mu[z < -10] = np.exp(z[z < -10]) + mu[(z >= -10) & (z <= 35)] = log1p(np.exp(z[(z >= -10) & (z <= 35)])) + return mu + + +class BaseDistribution(ABC): + """Base class for distributions.""" + + def __init__(self): + """init.""" + pass + + @abstractmethod + def mu(self, z): + """Inverse link function.""" + pass + + @abstractmethod + def grad_mu(self, z): + """Gradient of inverse link.""" + pass + + @abstractmethod + def log_likelihood(self, y, y_hat, z=None): + """Log L2-penalized likelihood.""" + pass + + def grad_log_likelihood(self): + """Gradient of L2-penalized log likelihood.""" + msg = (f"""Gradients of log likelihood are not specified for {self.__class__} distribution""") # noqa + raise NotImplementedError(msg) + + def gradhess_log_likelihood_1d(self): + """One-dimensional Gradient and Hessian of log likelihood.""" + msg = (f"""Gradients and Hessians of 1-d log likelihood are not specified for {self.__class__} distribution""") # noqa + raise NotImplementedError(msg) + + def simulate(self, beta0, beta, X, random_state, sample): + """Simulate targets y given model beta0, beta, X.""" + msg = (f"""Simulation of model is not implemented for {self.__class__} distribution""") # noqa + raise NotImplementedError(msg) + + def _z(self, beta0, beta, X): + """Compute z to be passed through non-linearity.""" + return beta0 + np.dot(X, beta) + + +class SoftplusLink: + + def mu(self, z): + """Inverse link function.""" + mu = softplus(z) + return mu + + def grad_mu(self, z): + """Gradient of inverse link.""" + grad_mu = expit(z) + return grad_mu + + +class Gaussian(BaseDistribution): + """Class for Gaussian distribution.""" + + def mu(self, z): + """Inverse link function.""" + mu = z + return mu + + def grad_mu(self, z): + """Gradient of inverse link.""" + grad_mu = np.ones_like(z) + return grad_mu + + def log_likelihood(self, y, y_hat): + """Log L2-penalized likelihood.""" + log_likelihood = -0.5 * np.sum((y - y_hat) ** 2) + return log_likelihood + + def grad_log_likelihood(self, X, y, beta0, beta): + """Gradient of L2-penalized log likelihood.""" + z = self._z(beta0, beta, X) + mu = self.mu(z) + grad_mu = self.grad_mu(z) + grad_beta0 = np.sum((mu - y) * grad_mu) + grad_beta = np.dot((mu - y).T, X * grad_mu[:, None]).T + return grad_beta0, grad_beta + + def gradhess_log_likelihood_1d(self, xk, y, z): + """One-dimensional Gradient and Hessian of log likelihood.""" + gk = np.sum((z - y) * xk) + hk = np.sum(xk * xk) + return gk, hk + + def simulate(self, beta0, beta, X, random_state, sample): + """Simulate targets y given model beta0, beta, X.""" + mu = self.mu(self._z(beta0, beta, X)) + if not sample: + return mu + else: + _random_state = check_random_state(random_state) + return _random_state.normal(mu) + + +class Poisson(BaseDistribution): + """Class for Poisson distribution.""" + + def __init__(self): + """init.""" + self.eta = None + + def mu(self, z): + """Inverse link function.""" + mu = z.copy() + mu0 = (1 - self.eta) * np.exp(self.eta) + mu[z > self.eta] = z[z > self.eta] * np.exp(self.eta) + mu0 + mu[z <= self.eta] = np.exp(z[z <= self.eta]) + return mu + + def grad_mu(self, z): + """Gradient of inverse link.""" + grad_mu = z.copy() + grad_mu[z > self.eta] = \ + np.ones_like(z)[z > self.eta] * np.exp(self.eta) + grad_mu[z <= self.eta] = np.exp(z[z <= self.eta]) + return grad_mu + + def log_likelihood(self, y, y_hat): + """Log L2-penalized likelihood.""" + eps = np.spacing(1) + log_likelihood = np.sum(y * np.log(y_hat + eps) - y_hat) + return log_likelihood + + def grad_log_likelihood(self, X, y, beta0, beta): + """Gradient of L2-penalized log likelihood.""" + z = self._z(beta0, beta, X) + mu = self.mu(z) + grad_mu = self.grad_mu(z) + grad_beta0 = np.sum(grad_mu) - np.sum(y * grad_mu / mu) + grad_beta = ((np.dot(grad_mu.T, X) - + np.dot((y * grad_mu / mu).T, X)).T) + return grad_beta0, grad_beta + + def gradhess_log_likelihood_1d(self, xk, y, z): + """One-dimensional Gradient and Hessian of log likelihood.""" + mu = self.mu(z) + gk = np.sum((mu[z <= self.eta] - y[z <= self.eta]) * + xk[z <= self.eta]) + \ + np.exp(self.eta) * \ + np.sum((1 - y[z > self.eta] / mu[z > self.eta]) * + xk[z > self.eta]) + hk = np.sum(mu[z <= self.eta] * xk[z <= self.eta] ** 2) + \ + np.exp(self.eta) ** 2 * \ + np.sum(y[z > self.eta] / (mu[z > self.eta] ** 2) * + (xk[z > self.eta] ** 2)) + return gk, hk + + def simulate(self, beta0, beta, X, random_state, sample): + """Simulate targets y given model beta0, beta, X.""" + mu = self.mu(self._z(beta0, beta, X)) + if not sample: + return mu + else: + _random_state = check_random_state(random_state) + return _random_state.poisson(mu) + + +class PoissonSoftplus(SoftplusLink, Poisson): + """Class for Poisson distribution with softplus inverse link.""" + + def gradhess_log_likelihood_1d(self, xk, y, z): + """One-dimensional Gradient and Hessian of log likelihood.""" + mu = self.mu(z) + s = expit(z) + gk = np.sum(s * xk) - np.sum(y * s / mu * xk) + + grad_s = s * (1 - s) + grad_s_by_mu = grad_s / mu - s / (mu ** 2) + hk = np.sum(grad_s * xk ** 2) - np.sum(y * grad_s_by_mu * xk ** 2) + return gk, hk + + +class NegBinomialSoftplus(SoftplusLink, BaseDistribution): + """Class for Negative binomial distribution with softplus inverse link.""" + + def __init__(self): + """init.""" + self.theta = None + + def log_likelihood(self, y, y_hat): + """Log L2-penalized likelihood.""" + theta = self.theta + log_likelihood = \ + np.sum(loggamma(y + theta) - loggamma(theta) - loggamma(y + 1) + + theta * np.log(theta) + y * np.log(y_hat) - (theta + y) * + np.log(y_hat + theta)) + return log_likelihood + + def grad_log_likelihood(self, X, y, beta0, beta): + """Gradient of L2-penalized log likelihood.""" + z = self._z(beta0, beta, X) + mu = self.mu(z) + grad_mu = self.grad_mu(z) + theta = self.theta + partial_beta_0 = grad_mu * ((theta + y) / (mu + theta) - y / mu) + grad_beta0 = np.sum(partial_beta_0) + grad_beta = np.dot(partial_beta_0.T, X) + return grad_beta0, grad_beta + + def gradhess_log_likelihood_1d(self, xk, y, z): + """One-dimensional Gradient and Hessian of log likelihood.""" + mu = self.mu(z) + grad_mu = expit(z) + hess_mu = np.exp(-z) * expit(z) ** 2 + theta = self.theta + gradient_beta_j = -grad_mu * (y / mu - (y + theta) / (mu + theta)) + partial_beta_0_1 = hess_mu * (y / mu - (y + theta) / (mu + theta)) + partial_beta_0_2 = grad_mu ** 2 * \ + ((y + theta) / (mu + theta) ** 2 - y / mu ** 2) + partial_beta_0 = -(partial_beta_0_1 + partial_beta_0_2) + gk = np.dot(gradient_beta_j.T, xk) + hk = np.dot(partial_beta_0.T, xk ** 2) + return gk, hk + + def simulate(self, beta0, beta, X, random_state, sample): + """Simulate targets y given model beta0, beta, X.""" + mu = self.mu(self._z(beta0, beta, X)) + if not sample: + return mu + else: + _random_state = check_random_state(random_state) + p = self.theta / (self.theta + mu) # Probability of success + return _random_state.negative_binomial(self.theta, p) + + +class Binomial(BaseDistribution): + """Class for binomial distribution.""" + + def mu(self, z): + """Inverse link function.""" + mu = expit(z) + return mu + + def grad_mu(self, z): + """Gradient of inverse link.""" + grad_mu = expit(z) * (1 - expit(z)) + return grad_mu + + def log_likelihood(self, y, y_hat, z=None): + """Log L2-penalized likelihood.""" + # prevents underflow + if z is not None: + log_likelihood = np.sum(y * z - np.log(1 + np.exp(z))) + # for scoring + else: + log_likelihood = \ + np.sum(y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat)) + return log_likelihood + + def grad_log_likelihood(self, X, y, beta0, beta): + """Gradient of L2-penalized log likelihood.""" + z = self._z(beta0, beta, X) + mu = self.mu(z) + grad_beta0 = np.sum(mu - y) + grad_beta = np.dot((mu - y).T, X).T + return grad_beta0, grad_beta + + def gradhess_log_likelihood_1d(self, xk, y, z): + """One-dimensional Gradient and Hessian of log likelihood.""" + mu = self.mu(z) + gk = np.sum((mu - y) * xk) + hk = np.sum(mu * (1.0 - mu) * xk * xk) + return gk, hk + + def simulate(self, beta0, beta, X, random_state, sample): + """Simulate targets y given model beta0, beta, X.""" + mu = self.mu(self._z(beta0, beta, X)) + if not sample: + return mu + else: + _random_state = check_random_state(random_state) + return _random_state.binomial(1, mu) + + +def _probit_g1(z, pdfz, cdfz, thresh=5): + res = np.zeros_like(z) + res[z < -thresh] = np.log(-pdfz[z < -thresh] / z[z < -thresh]) + res[np.abs(z) <= thresh] = np.log(cdfz[np.abs(z) <= thresh]) + res[z > thresh] = -pdfz[z > thresh] / z[z > thresh] + return res + + +def _probit_g2(z, pdfz, cdfz, thresh=5): + res = np.zeros_like(z) + res[z < -thresh] = pdfz[z < -thresh] / z[z < -thresh] + res[np.abs(z) <= thresh] = np.log(1 - cdfz[np.abs(z) <= thresh]) + res[z > thresh] = np.log(pdfz[z > thresh] / z[z > thresh]) + return res + + +def _probit_g3(z, pdfz, cdfz, thresh=5): + res = np.zeros_like(z) + res[z < -thresh] = -z[z < -thresh] + res[np.abs(z) <= thresh] = \ + pdfz[np.abs(z) <= thresh] / cdfz[np.abs(z) <= thresh] + res[z > thresh] = pdfz[z > thresh] + return res + + +def _probit_g4(z, pdfz, cdfz, thresh=5): + res = np.zeros_like(z) + res[z < -thresh] = pdfz[z < -thresh] + res[np.abs(z) <= thresh] = \ + pdfz[np.abs(z) <= thresh] / (1 - cdfz[np.abs(z) <= thresh]) + res[z > thresh] = z[z > thresh] + return res + + +def _probit_g5(z, pdfz, cdfz, thresh=5): + res = np.zeros_like(z) + res[z < -thresh] = 0 * z[z < -thresh] + res[np.abs(z) <= thresh] = \ + z[np.abs(z) <= thresh] * pdfz[np.abs(z) <= thresh] / \ + cdfz[np.abs(z) <= thresh] + (pdfz[np.abs(z) <= thresh] / + cdfz[np.abs(z) <= thresh]) ** 2 + res[z > thresh] = z[z > thresh] * pdfz[z > thresh] + pdfz[z > thresh] ** 2 + return res + + +def _probit_g6(z, pdfz, cdfz, thresh=5): + res = np.zeros_like(z) + res[z < -thresh] = \ + pdfz[z < -thresh] ** 2 - z[z < -thresh] * pdfz[z < -thresh] + res[np.abs(z) <= thresh] = \ + (pdfz[np.abs(z) <= thresh] / (1 - cdfz[np.abs(z) <= thresh])) ** 2 - \ + z[np.abs(z) <= thresh] * pdfz[np.abs(z) <= thresh] / \ + (1 - cdfz[np.abs(z) <= thresh]) + res[z > thresh] = 0 * z[z > thresh] + return res + + +class Probit(BaseDistribution): + """Class for probit distribution.""" + + def mu(self, z): + """Inverse link function.""" + mu = norm.cdf(z) + return mu + + def grad_mu(self, z): + """Gradient of inverse link.""" + grad_mu = norm.pdf(z) + return grad_mu + + def log_likelihood(self, y, y_hat, z=None): + """Log L2-penalized likelihood.""" + if z is not None: + pdfz, cdfz = norm.pdf(z), norm.cdf(z) + log_likelihood = \ + np.sum(y * _probit_g1(z, pdfz, cdfz) + + (1 - y) * _probit_g2(z, pdfz, cdfz)) + else: + log_likelihood = \ + np.sum(y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat)) + return log_likelihood + + def grad_log_likelihood(self, X, y, beta0, beta): + """Gradient of L2-penalized log likelihood.""" + z = self._z(beta0, beta, X) + mu = self.mu(z) + grad_beta0 = np.sum(mu - y) + grad_beta = np.dot((mu - y).T, X).T + return grad_beta0, grad_beta + + def gradhess_log_likelihood_1d(self, xk, y, z): + """One-dimensional Gradient and Hessian of log likelihood.""" + pdfz = norm.pdf(z) + cdfz = norm.cdf(z) + gk = -np.sum((y * _probit_g3(z, pdfz, cdfz) - + (1 - y) * _probit_g4(z, pdfz, cdfz)) * xk) + hk = np.sum((y * _probit_g5(z, pdfz, cdfz) + + (1 - y) * _probit_g6(z, pdfz, cdfz)) * (xk * xk)) + return gk, hk + + def simulate(self, beta0, beta, X, random_state, sample): + """Simulate targets y given model beta0, beta, X.""" + mu = self.mu(self._z(beta0, beta, X)) + if not sample: + return mu + else: + _random_state = check_random_state(random_state) + return _random_state.binomial(1, mu) + + +class GammaSoftplus(SoftplusLink, BaseDistribution): + """Class for Gamma distribution with softplus inverse link.""" + + def log_likelihood(self, y, y_hat, z=None): + """Log L2-penalized likelihood.""" + # see + # https://www.statistics.ma.tum.de/fileadmin/w00bdb/www/czado/lec8.pdf + nu = 1. # shape parameter, exponential for now + log_likelihood = np.sum(nu * (-y / y_hat - np.log(y_hat))) + return log_likelihood + + def grad_log_likelihood(self, X, y, beta0, beta): + """Gradient of L2-penalized log likelihood.""" + z = self._z(beta0, beta, X) + mu = self.mu(z) + grad_mu = self.grad_mu(z) + nu = 1. + grad_logl = (y / mu ** 2 - 1 / mu) * grad_mu + grad_beta0 = -nu * np.sum(grad_logl) + grad_beta = -nu * np.dot(grad_logl.T, X).T + return grad_beta0, grad_beta + + def gradhess_log_likelihood_1d(self, xk, y, z): + """One-dimensional Gradient and Hessian of log likelihood.""" + raise NotImplementedError('cdfast is not implemented for Gamma ' + 'distribution') + + def simulate(self, beta0, beta, X, random_state, sample): + """Simulate targets y given model beta0, beta, X.""" + mu = self.mu(self._z(beta0, beta, X)) + if not sample: + return mu + else: + return np.exp(mu) diff --git a/pyglmnet/externals/sklearn/utils/validation.py b/pyglmnet/externals/sklearn/utils/validation.py index 7cda34e4..3e09c504 100644 --- a/pyglmnet/externals/sklearn/utils/validation.py +++ b/pyglmnet/externals/sklearn/utils/validation.py @@ -119,7 +119,7 @@ def check_consistent_length(*arrays): uniques = np.unique(lengths) if len(uniques) > 1: raise ValueError("Found input variables with inconsistent numbers of" - " samples: %r" % [int(l) for l in lengths]) + " samples: %r" % [int(ll) for ll in lengths]) def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, diff --git a/pyglmnet/metrics.py b/pyglmnet/metrics.py index 27bd1080..d97754bc 100644 --- a/pyglmnet/metrics.py +++ b/pyglmnet/metrics.py @@ -1,7 +1,7 @@ -""" A set of scoring functions. """ +"""A set of scoring functions.""" import numpy as np -from .pyglmnet import _logL +from .distributions import Poisson, PoissonSoftplus, NegBinomialSoftplus def deviance(y, yhat, distr, theta): @@ -23,12 +23,13 @@ def deviance(y, yhat, distr, theta): score : float Deviance of the predicted labels. """ - if distr in ['softplus', 'poisson', 'neg-binomial']: - LS = _logL(distr, y, y, theta=theta) + if isinstance(distr, (Poisson, PoissonSoftplus, NegBinomialSoftplus)): + LS = distr.log_likelihood(y, y) else: LS = 0 - L1 = _logL(distr, y, yhat, theta=theta) + # L1 = _logL(distr, y, yhat, theta=theta) + L1 = distr.log_likelihood(y, yhat) score = -2 * (L1 - LS) return score @@ -55,15 +56,15 @@ def pseudo_R2(X, y, yhat, ynull_, distr, theta): score : float Pseudo-R2 score. """ - if distr in ['softplus', 'poisson', 'neg-binomial']: - LS = _logL(distr, y, y, theta=theta) + if isinstance(distr, (Poisson, PoissonSoftplus, NegBinomialSoftplus)): + LS = distr.log_likelihood(y, y) else: LS = 0 - L0 = _logL(distr, y, ynull_, theta=theta) - L1 = _logL(distr, y, yhat, theta=theta) + L0 = distr.log_likelihood(y, ynull_) + L1 = distr.log_likelihood(y, yhat) - if distr in ['softplus', 'poisson', 'neg-binomial']: + if isinstance(distr, (Poisson, PoissonSoftplus, NegBinomialSoftplus)): score = (1 - (LS - L1) / (LS - L0)) else: score = (1 - L1 / L0) diff --git a/pyglmnet/pyglmnet.py b/pyglmnet/pyglmnet.py index 9e00a9c1..b3fe8b40 100644 --- a/pyglmnet/pyglmnet.py +++ b/pyglmnet/pyglmnet.py @@ -4,161 +4,21 @@ from copy import deepcopy import numpy as np -from scipy.special import expit, loggamma -from scipy.stats import norm from .utils import logger, set_log_level, _check_params, \ - _verbose_iterable, _tqdm_log, softplus + _verbose_iterable, _tqdm_log from .base import BaseEstimator, is_classifier, check_version from .externals.sklearn.utils import check_random_state, check_array, check_X_y from .externals.sklearn.utils.validation import check_is_fitted +from .distributions import BaseDistribution, Gaussian, Poisson, \ + PoissonSoftplus, NegBinomialSoftplus, Binomial, Probit, GammaSoftplus + ALLOWED_DISTRS = ['gaussian', 'binomial', 'softplus', 'poisson', 'probit', 'gamma', 'neg-binomial'] -def _probit_g1(z, pdfz, cdfz, thresh=5): - res = np.zeros_like(z) - res[z < -thresh] = np.log(-pdfz[z < -thresh] / z[z < -thresh]) - res[np.abs(z) <= thresh] = np.log(cdfz[np.abs(z) <= thresh]) - res[z > thresh] = -pdfz[z > thresh] / z[z > thresh] - return res - - -def _probit_g2(z, pdfz, cdfz, thresh=5): - res = np.zeros_like(z) - res[z < -thresh] = pdfz[z < -thresh] / z[z < -thresh] - res[np.abs(z) <= thresh] = np.log(1 - cdfz[np.abs(z) <= thresh]) - res[z > thresh] = np.log(pdfz[z > thresh] / z[z > thresh]) - return res - - -def _probit_g3(z, pdfz, cdfz, thresh=5): - res = np.zeros_like(z) - res[z < -thresh] = -z[z < -thresh] - res[np.abs(z) <= thresh] = \ - pdfz[np.abs(z) <= thresh] / cdfz[np.abs(z) <= thresh] - res[z > thresh] = pdfz[z > thresh] - return res - - -def _probit_g4(z, pdfz, cdfz, thresh=5): - res = np.zeros_like(z) - res[z < -thresh] = pdfz[z < -thresh] - res[np.abs(z) <= thresh] = \ - pdfz[np.abs(z) <= thresh] / (1 - cdfz[np.abs(z) <= thresh]) - res[z > thresh] = z[z > thresh] - return res - - -def _probit_g5(z, pdfz, cdfz, thresh=5): - res = np.zeros_like(z) - res[z < -thresh] = 0 * z[z < -thresh] - res[np.abs(z) <= thresh] = \ - z[np.abs(z) <= thresh] * pdfz[np.abs(z) <= thresh] / \ - cdfz[np.abs(z) <= thresh] + (pdfz[np.abs(z) <= thresh] / - cdfz[np.abs(z) <= thresh]) ** 2 - res[z > thresh] = z[z > thresh] * pdfz[z > thresh] + pdfz[z > thresh] ** 2 - return res - - -def _probit_g6(z, pdfz, cdfz, thresh=5): - res = np.zeros_like(z) - res[z < -thresh] = \ - pdfz[z < -thresh] ** 2 - z[z < -thresh] * pdfz[z < -thresh] - res[np.abs(z) <= thresh] = \ - (pdfz[np.abs(z) <= thresh] / (1 - cdfz[np.abs(z) <= thresh])) ** 2 - \ - z[np.abs(z) <= thresh] * pdfz[np.abs(z) <= thresh] / \ - (1 - cdfz[np.abs(z) <= thresh]) - res[z > thresh] = 0 * z[z > thresh] - return res - - -def _z(beta0, beta, X, fit_intercept): - """Compute z to be passed through non-linearity.""" - if fit_intercept: - z = beta0 + np.dot(X, beta) - else: - z = np.dot(X, np.r_[beta0, beta]) - return z - - -def _lmb(distr, beta0, beta, X, eta, fit_intercept=True): - """Conditional intensity function.""" - z = _z(beta0, beta, X, fit_intercept) - return _mu(distr, z, eta, fit_intercept) - - -def _mu(distr, z, eta, fit_intercept): - """The non-linearity (inverse link).""" - if distr in ['softplus', 'gamma', 'neg-binomial']: - mu = softplus(z) - elif distr == 'poisson': - mu = z.copy() - beta0 = (1 - eta) * np.exp(eta) if fit_intercept else 0. - mu[z > eta] = z[z > eta] * np.exp(eta) + beta0 - mu[z <= eta] = np.exp(z[z <= eta]) - elif distr == 'gaussian': - mu = z - elif distr == 'binomial': - mu = expit(z) - elif distr == 'probit': - mu = norm.cdf(z) - return mu - - -def _grad_mu(distr, z, eta): - """Derivative of the non-linearity.""" - if distr in ['softplus', 'gamma', 'neg-binomial']: - grad_mu = expit(z) - elif distr == 'poisson': - grad_mu = z.copy() - grad_mu[z > eta] = np.ones_like(z)[z > eta] * np.exp(eta) - grad_mu[z <= eta] = np.exp(z[z <= eta]) - elif distr == 'gaussian': - grad_mu = np.ones_like(z) - elif distr == 'binomial': - grad_mu = expit(z) * (1 - expit(z)) - elif distr in 'probit': - grad_mu = norm.pdf(z) - return grad_mu - - -def _logL(distr, y, y_hat, z=None, theta=1.0): - """The log likelihood.""" - if distr in ['softplus', 'poisson']: - eps = np.spacing(1) - logL = np.sum(y * np.log(y_hat + eps) - y_hat) - elif distr == 'gaussian': - logL = -0.5 * np.sum((y - y_hat)**2) - elif distr == 'binomial': - - # prevents underflow - if z is not None: - logL = np.sum(y * z - np.log(1 + np.exp(z))) - # for scoring - else: - logL = np.sum(y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat)) - elif distr == 'probit': - if z is not None: - pdfz, cdfz = norm.pdf(z), norm.cdf(z) - logL = np.sum(y * _probit_g1(z, pdfz, cdfz) + - (1 - y) * _probit_g2(z, pdfz, cdfz)) - else: - logL = np.sum(y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat)) - elif distr == 'gamma': - # see - # https://www.statistics.ma.tum.de/fileadmin/w00bdb/www/czado/lec8.pdf - nu = 1. # shape parameter, exponential for now - logL = np.sum(nu * (-y / y_hat - np.log(y_hat))) - elif distr == 'neg-binomial': - logL = np.sum(loggamma(y + theta) - loggamma(theta) - loggamma(y + 1) + - theta * np.log(theta) + y * np.log(y_hat) - (theta + y) * - np.log(y_hat + theta)) - return logL - - def _penalty(alpha, beta, Tau, group): """The penalty.""" # Combine L1 and L2 penalty terms @@ -205,9 +65,15 @@ def _loss(distr, alpha, Tau, reg_lambda, X, y, eta, theta, group, beta, fit_intercept=True): """Define the objective function for elastic net.""" n_samples, n_features = X.shape - z = _z(beta[0], beta[1:], X, fit_intercept) - y_hat = _mu(distr, z, eta, fit_intercept) - L = 1. / n_samples * _logL(distr, y, y_hat, z, theta) + if fit_intercept: + z = distr._z(beta[0], beta[1:], X) + else: + z = distr._z(0., beta, X) + y_hat = distr.mu(z) + if isinstance(distr, (Binomial, Probit)): + L = 1. / n_samples * distr.log_likelihood(y, y_hat, z) + else: + L = 1. / n_samples * distr.log_likelihood(y, y_hat) if fit_intercept: P = _penalty(alpha, beta[1:], Tau, group) else: @@ -220,9 +86,15 @@ def _L2loss(distr, alpha, Tau, reg_lambda, X, y, eta, theta, group, beta, fit_intercept=True): """Define the objective function for elastic net.""" n_samples, n_features = X.shape - z = _z(beta[0], beta[1:], X, fit_intercept) - y_hat = _mu(distr, z, eta, fit_intercept) - L = 1. / n_samples * _logL(distr, y, y_hat, z, theta) + if fit_intercept: + z = distr._z(beta[0], beta[1:], X) + else: + z = distr._z(0., beta, X) + y_hat = distr.mu(z) + if isinstance(distr, (Binomial, Probit)): + L = 1. / n_samples * distr.log_likelihood(y, y_hat, z) + else: + L = 1. / n_samples * distr.log_likelihood(y, y_hat) if fit_intercept: P = 0.5 * (1 - alpha) * _L2penalty(beta[1:], Tau) else: @@ -244,51 +116,16 @@ def _grad_L2loss(distr, alpha, Tau, reg_lambda, X, y, eta, theta, beta, Tau = np.eye(beta.shape[0]) InvCov = np.dot(Tau.T, Tau) - z = _z(beta[0], beta[1:], X, fit_intercept) - mu = _mu(distr, z, eta, fit_intercept) - grad_mu = _grad_mu(distr, z, eta) - - grad_beta0 = 0. - if distr in ['poisson', 'softplus']: - if fit_intercept: - grad_beta0 = np.sum(grad_mu) - np.sum(y * grad_mu / mu) - grad_beta = ((np.dot(grad_mu.T, X) - - np.dot((y * grad_mu / mu).T, X)).T) - - elif distr == 'gaussian': - if fit_intercept: - grad_beta0 = np.sum((mu - y) * grad_mu) - grad_beta = np.dot((mu - y).T, X * grad_mu[:, None]).T - - elif distr == 'binomial': - if fit_intercept: - grad_beta0 = np.sum(mu - y) - grad_beta = np.dot((mu - y).T, X).T - - elif distr == 'probit': - grad_logl = (y * _probit_g3(z, grad_mu, mu) - - (1 - y) * _probit_g4(z, grad_mu, mu)) - if fit_intercept: - grad_beta0 = -np.sum(grad_logl) - grad_beta = -np.dot(grad_logl.T, X).T - - elif distr == 'gamma': - nu = 1. - grad_logl = (y / mu ** 2 - 1 / mu) * grad_mu - if fit_intercept: - grad_beta0 = -nu * np.sum(grad_logl) - grad_beta = -nu * np.dot(grad_logl.T, X).T - - elif distr == 'neg-binomial': - partial_beta_0 = grad_mu * ((theta + y) / (mu + theta) - y / mu) - - if fit_intercept: - grad_beta0 = np.sum(partial_beta_0) - - grad_beta = np.dot(partial_beta_0.T, X) + if fit_intercept: + beta0_, beta_ = beta[0], beta[1:] + else: + beta0_, beta_ = 0., beta + grad_beta0, grad_beta = distr.grad_log_likelihood(X, y, beta0_, beta_) + grad_beta0 = grad_beta0 if fit_intercept else 0. grad_beta0 *= 1. / n_samples grad_beta *= 1. / n_samples + if fit_intercept: grad_beta += reg_lambda * (1 - alpha) * np.dot(InvCov, beta[1:]) g = np.zeros((n_features + 1, )) @@ -301,105 +138,6 @@ def _grad_L2loss(distr, alpha, Tau, reg_lambda, X, y, eta, theta, beta, return g -def _gradhess_logloss_1d(distr, xk, y, z, eta, theta, fit_intercept=True): - """ - Compute gradient (1st derivative) - and Hessian (2nd derivative) - of log likelihood for a single coordinate. - - Parameters - ---------- - xk: float - (n_samples,) - y: float - (n_samples,) - z: float - (n_samples,) - - Returns - ------- - gk: gradient, float: - (n_features + 1,) - hk: float: - (n_features + 1,) - """ - n_samples = xk.shape[0] - - if distr == 'softplus': - mu = _mu(distr, z, eta, fit_intercept) - s = expit(z) - gk = np.sum(s * xk) - np.sum(y * s / mu * xk) - - grad_s = s * (1 - s) - grad_s_by_mu = grad_s / mu - s / (mu ** 2) - hk = np.sum(grad_s * xk ** 2) - np.sum(y * grad_s_by_mu * xk ** 2) - - elif distr == 'poisson': - mu = _mu(distr, z, eta, fit_intercept) - s = expit(z) - gk = np.sum((mu[z <= eta] - y[z <= eta]) * - xk[z <= eta]) + \ - np.exp(eta) * \ - np.sum((1 - y[z > eta] / mu[z > eta]) * - xk[z > eta]) - hk = np.sum(mu[z <= eta] * xk[z <= eta] ** 2) + \ - np.exp(eta) ** 2 * \ - np.sum(y[z > eta] / (mu[z > eta] ** 2) * - (xk[z > eta] ** 2)) - - elif distr == 'gaussian': - gk = np.sum((z - y) * xk) - hk = np.sum(xk * xk) - - elif distr == 'binomial': - mu = _mu(distr, z, eta, fit_intercept) - gk = np.sum((mu - y) * xk) - hk = np.sum(mu * (1.0 - mu) * xk * xk) - - elif distr == 'probit': - pdfz = norm.pdf(z) - cdfz = norm.cdf(z) - gk = -np.sum((y * _probit_g3(z, pdfz, cdfz) - - (1 - y) * _probit_g4(z, pdfz, cdfz)) * xk) - hk = np.sum((y * _probit_g5(z, pdfz, cdfz) + - (1 - y) * _probit_g6(z, pdfz, cdfz)) * (xk * xk)) - - elif distr == 'neg-binomial': - mu = _mu(distr, z, eta, fit_intercept) - grad_mu = _grad_mu(distr, z, eta) - hess_mu = np.exp(-z) * expit(z)**2 - - gradient_beta_j = -grad_mu * (y / mu - (y + theta) / (mu + theta)) - partial_beta_0_1 = hess_mu * (y / mu - (y + theta) / (mu + theta)) - partial_beta_0_2 = grad_mu**2 * \ - ((y + theta) / (mu + theta)**2 - y / mu**2) - partial_beta_0 = -(partial_beta_0_1 + partial_beta_0_2) - gk = np.dot(gradient_beta_j.T, xk) - hk = np.dot(partial_beta_0.T, xk**2) - - elif distr == 'gamma': - raise NotImplementedError('cdfast is not implemented for Gamma ' - 'distribution') - elif distr == 'neg-binomial': - mu = _mu(distr, z, eta, fit_intercept) - grad_mu = _grad_mu(distr, z, eta) - hess_mu = np.exp(-z) * expit(z) ** 2 - - gradient_beta_j = -grad_mu * (y / mu - (y + theta) / (mu + theta)) - partial_beta_0_1 = hess_mu * (y / mu - (y + theta) / (mu + theta)) - partial_beta_0_2 = grad_mu ** 2 * \ - ((y + theta) / (mu + theta) ** 2 - y / mu ** 2) - partial_beta_0 = -(partial_beta_0_1 + partial_beta_0_2) - gk = np.dot(gradient_beta_j.T, xk) - hk = np.dot(partial_beta_0.T, xk ** 2) - - elif distr == 'gamma': - raise NotImplementedError('cdfast is not implemented for Gamma ' - 'distribution') - - return 1. / n_samples * gk, 1. / n_samples * hk - - def simulate_glm(distr, beta0, beta, X, eta=2.0, random_state=None, sample=False, theta=1.0, fit_intercept=True): """Simulate target data under a generative model. @@ -427,9 +165,15 @@ def simulate_glm(distr, beta0, beta, X, eta=2.0, random_state=None, y: array simulated target data of shape (n_samples,) """ - if distr not in ALLOWED_DISTRS: - raise ValueError("'distr' must be in %s, got %s" - % (repr(ALLOWED_DISTRS), distr)) + err_msg = ('distr must be one of %s or a subclass of BaseDistribution. ' + 'Got %s' % (', '.join(ALLOWED_DISTRS), distr)) + if isinstance(distr, str) and distr not in ALLOWED_DISTRS: + raise ValueError(err_msg) + if not isinstance(distr, str) and not isinstance(distr, BaseDistribution): + raise TypeError(err_msg) + + glm = GLM(distr=distr) + glm._set_distr() if not isinstance(beta0, float): raise ValueError("'beta0' must be float, got %s" % type(beta0)) @@ -437,30 +181,10 @@ def simulate_glm(distr, beta0, beta, X, eta=2.0, random_state=None, if beta.ndim != 1: raise ValueError("'beta' must be 1D, got %dD" % beta.ndim) - if not sample: - return _lmb(distr, beta0, beta, X, eta, fit_intercept=fit_intercept) - - _random_state = check_random_state(random_state) - if distr == 'softplus' or distr == 'poisson': - y = _random_state.poisson( - _lmb(distr, beta0, beta, X, eta, - fit_intercept=fit_intercept)) - if distr == 'gaussian': - y = _random_state.normal( - _lmb(distr, beta0, beta, X, eta, - fit_intercept=fit_intercept)) - if distr == 'binomial' or distr == 'probit': - y = _random_state.binomial( - 1, - _lmb(distr, beta0, beta, X, eta, - fit_intercept=fit_intercept)) - if distr == 'gamma': - mu = _lmb(distr, beta0, beta, X, eta, fit_intercept=fit_intercept) - y = np.exp(mu) - if distr == 'neg-binomial': - mu = _lmb(distr, beta0, beta, X, eta, fit_intercept=fit_intercept) - p = theta / (theta + mu) # Probability of success - y = _random_state.negative_binomial(theta, p) + if not fit_intercept: + beta0 = 0. + + y = glm.distr_.simulate(beta0, beta, X, random_state, sample) return y @@ -495,8 +219,8 @@ class GLM(BaseEstimator): Parameters ---------- - distr: str - distribution family can be one of the following + distr: str or object subclassed from BaseDistribution + if str, distribution family can be one of the following 'gaussian' | 'binomial' | 'poisson' | 'softplus' | 'probit' | 'gamma' | 'neg-binomial' default: 'poisson'. @@ -604,7 +328,6 @@ def __init__(self, distr='poisson', alpha=0.5, _check_params(distr=distr, max_iter=max_iter, fit_intercept=fit_intercept) - self.distr = distr self.alpha = alpha self.reg_lambda = reg_lambda @@ -623,9 +346,27 @@ def __init__(self, distr='poisson', alpha=0.5, self.theta = theta set_log_level(verbose) + def _set_distr(self): + distr_lookup = { + 'gaussian': Gaussian(), + 'poisson': Poisson(), + 'softplus': PoissonSoftplus(), + 'neg-binomial': NegBinomialSoftplus(), + 'binomial': Binomial(), + 'probit': Probit(), + 'gamma': GammaSoftplus() + } + self.distr_ = distr_lookup[self.distr] + if isinstance(self.distr_, Poisson): + self.distr_.eta = self.eta + if isinstance(self.distr_, NegBinomialSoftplus): + self.distr_.theta = self.theta + def _set_cv(cv, estimator=None, X=None, y=None): - """Set the default CV depending on whether clf - is classifier/regressor.""" + """Set the default CV. + + Depends on whether clf is classifier/regressor. + """ # Detect whether classification or regression if estimator in ['classifier', 'regressor']: est_is_classifier = estimator == 'classifier' @@ -757,7 +498,10 @@ def _cdfast(self, X, y, ActiveSet, beta, rl, fit_intercept=True): """ n_samples, n_features = X.shape reg_scale = rl * (1 - self.alpha) - z = _z(beta[0], beta[1:], X, fit_intercept) + if fit_intercept: + z = self.distr_._z(beta[0], beta[1:], X) + else: + z = self.distr_._z(0., beta, X) for k in range(0, n_features + int(fit_intercept)): # Only update parameters in active set if ActiveSet[k] != 0: @@ -770,8 +514,11 @@ def _cdfast(self, X, y, ActiveSet, beta, rl, fit_intercept=True): xk = X[:, k] # Calculate grad and hess of log likelihood term - gk, hk = _gradhess_logloss_1d(self.distr, xk, y, z, self.eta, - self.theta, fit_intercept) + # gk, hk = _gradhess_logloss_1d(self.distr, xk, y, z, self.eta, + # self.theta, fit_intercept) + gk, hk = self.distr_.gradhess_log_likelihood_1d(xk, y, z) + gk = 1. / n_samples * gk + hk = 1. / n_samples * hk # Add grad and hess of regularization term if self.Tau is None: @@ -795,6 +542,7 @@ def _cdfast(self, X, y, ActiveSet, beta, rl, fit_intercept=True): # Update parameters, z beta[k], z = beta[k] - update, z - update * xk + return beta def fit(self, X, y): @@ -814,7 +562,7 @@ def fit(self, X, y): The fitted model. """ X, y = check_X_y(X, y, accept_sparse=False) - + self._set_distr() self.beta0_ = None self.beta_ = None self.ynull_ = None @@ -887,7 +635,7 @@ def fit(self, X, y): self.n_iter_ += 1 beta_old = beta.copy() if self.solver == 'batch-gradient': - grad = _grad_L2loss(self.distr, + grad = _grad_L2loss(self.distr_, alpha, self.Tau, reg_lambda, X, y, self.eta, self.theta, beta, self.fit_intercept) @@ -998,10 +746,10 @@ def predict(self, X): raise ValueError('Input data should be of type ndarray (got %s).' % type(X)) - yhat = _lmb(self.distr, self.beta0_, self.beta_, X, self.eta, - fit_intercept=True) + z = self.distr_._z(self.beta0_, self.beta_, X) + yhat = self.distr_.mu(z) - if self.distr == 'binomial': + if isinstance(self.distr_, Binomial): yhat = (yhat > 0.5).astype(int) yhat = np.asarray(yhat) return yhat @@ -1020,7 +768,7 @@ def _predict_proba(self, X): The predicted targets of shape (n_samples,). """ - if self.distr not in ['binomial', 'probit']: + if not isinstance(self.distr_, (Binomial, Probit)): raise ValueError('This is only applicable for \ the binomial distribution.') @@ -1028,8 +776,8 @@ def _predict_proba(self, X): raise ValueError('Input data should be of type ndarray (got %s).' % type(X)) - yhat = _lmb(self.distr, self.beta0_, self.beta_, X, self.eta, - fit_intercept=True) + z = self.distr_._z(self.beta0_, self.beta_, X) + yhat = self.distr_.mu(z) yhat = np.asarray(yhat) return yhat @@ -1055,7 +803,7 @@ def predict_proba(self, X): X = check_array(X, accept_sparse=False) check_is_fitted(self, 'is_fitted_') - if self.distr in ['binomial', 'probit']: + if isinstance(self.distr_, (Binomial, Probit)): return self._predict_proba(X) else: warnings.warn('This is only applicable for \ @@ -1110,14 +858,14 @@ def score(self, X, y): # For f1 as well if self.score_metric in ['accuracy']: - if self.distr not in ['binomial', 'multinomial']: + if not isinstance(self.distr_, (Binomial, Probit)): raise ValueError(self.score_metric + ' is only defined for binomial ' + 'or multinomial distributions') y = np.asarray(y).ravel() - if self.distr in ['binomial', 'probit'] and \ + if isinstance(self.distr_, (Binomial, Probit)) and \ self.score_metric != 'accuracy': yhat = self.predict_proba(X) else: @@ -1125,10 +873,10 @@ def score(self, X, y): # Check whether we have a list of estimators or a single estimator if self.score_metric == 'deviance': - return metrics.deviance(y, yhat, self.distr, self.theta) + return metrics.deviance(y, yhat, self.distr_, self.theta) elif self.score_metric == 'pseudo_R2': return metrics.pseudo_R2(X, y, yhat, self.ynull_, - self.distr, self.theta) + self.distr_, self.theta) if self.score_metric == 'accuracy': return metrics.accuracy(y, yhat) diff --git a/pyglmnet/utils.py b/pyglmnet/utils.py index ce8bc864..6f9046c0 100644 --- a/pyglmnet/utils.py +++ b/pyglmnet/utils.py @@ -1,11 +1,8 @@ -""" -A few miscellaneous helper functions for pyglmnet.py -""" +"""A few miscellaneous helper functions for pyglmnet.""" import numpy as np from copy import copy import logging -from scipy.special import log1p logger = logging.getLogger('pyglmnet') @@ -21,16 +18,6 @@ has_tqdm = False -def softplus(z): - """Numerically stable version of log(1 + exp(z)).""" - # see stabilizing softplus: http://sachinashanbhag.blogspot.com/2014/05/numerically-approximation-of-log-1-expy.html # noqa - mu = z.copy() - mu[z > 35] = z[z > 35] - mu[z < -10] = np.exp(z[z < -10]) - mu[(z >= -10) & (z <= 35)] = log1p(np.exp(z[(z >= -10) & (z <= 35)])) - return mu - - def softmax(w): """Softmax function of given array of number w. @@ -113,9 +100,10 @@ def tikhonov_from_prior(prior_cov, n_samples, threshold=0.0001): def _check_params(distr, max_iter, fit_intercept): from .pyglmnet import ALLOWED_DISTRS + err_msg = ('distr must be one of %s or a subclass of BaseDistribution. ' + 'Got %s' % (', '.join(ALLOWED_DISTRS), distr)) if distr not in ALLOWED_DISTRS: - raise ValueError('distr must be one of %s, Got ' - '%s' % (', '.join(ALLOWED_DISTRS), distr)) + raise ValueError(err_msg) if not isinstance(max_iter, int): raise ValueError('max_iter must be of type int') diff --git a/tests/test_distributions.py b/tests/test_distributions.py new file mode 100644 index 00000000..ef4fe3c4 --- /dev/null +++ b/tests/test_distributions.py @@ -0,0 +1,71 @@ +"""Tests for distributions.""" + +from functools import partial + +import pytest + +import numpy as np +from numpy.testing import assert_allclose + +import scipy.sparse as sps +from scipy.optimize import approx_fprime +from sklearn.preprocessing import StandardScaler + +from pyglmnet import ALLOWED_DISTRS, simulate_glm, GLM, _grad_L2loss, _L2loss +from pyglmnet.distributions import BaseDistribution + + +def test_base_distribution(): + """Test the base distribution.""" + class TestDistr(BaseDistribution): + def mu(): + pass + + def grad_mu(): + pass + + class TestDistr2(TestDistr): + def log_likelihood(): + pass + + with pytest.raises(TypeError, match='abstract methods log_likelihood'): + distr = TestDistr() + + msg = 'Gradients of log likelihood are not specified' + with pytest.raises(NotImplementedError, match=msg): + distr = TestDistr2() + distr.grad_log_likelihood() + + msg = 'distr must be one of' + with pytest.raises(ValueError, match=msg): + GLM(distr='blah') + + +@pytest.mark.parametrize("distr", ALLOWED_DISTRS) +def test_gradients(distr): + """Test gradient accuracy.""" + # data + scaler = StandardScaler() + n_samples, n_features = 1000, 100 + X = np.random.normal(0.0, 1.0, [n_samples, n_features]) + X = scaler.fit_transform(X) + + density = 0.1 + beta_ = np.zeros(n_features + 1) + beta_[0] = np.random.rand() + beta_[1:] = sps.rand(n_features, 1, density=density).toarray()[:, 0] + + reg_lambda = 0.1 + + glm = GLM(distr=distr, reg_lambda=reg_lambda) + glm._set_distr() + y = simulate_glm(glm.distr, beta_[0], beta_[1:], X) + + func = partial(_L2loss, glm.distr_, glm.alpha, + glm.Tau, reg_lambda, X, y, glm.eta, glm.theta, glm.group) + grad = partial(_grad_L2loss, glm.distr_, glm.alpha, glm.Tau, + reg_lambda, X, y, + glm.eta, glm.theta) + approx_grad = approx_fprime(beta_, func, 1.5e-8) + analytical_grad = grad(beta_) + assert_allclose(approx_grad, analytical_grad, rtol=1e-5, atol=1e-3) diff --git a/tests/test_pyglmnet.py b/tests/test_pyglmnet.py index b9401a63..7f10e689 100644 --- a/tests/test_pyglmnet.py +++ b/tests/test_pyglmnet.py @@ -3,14 +3,12 @@ import subprocess import os.path as op -from functools import partial import pytest import numpy as np from numpy.testing import assert_allclose, assert_array_equal from pytest import raises import scipy.sparse as sps -from scipy.optimize import approx_fprime from sklearn.datasets import make_regression from sklearn.preprocessing import StandardScaler @@ -18,8 +16,8 @@ from sklearn.linear_model import ElasticNet from sklearn.utils.estimator_checks import check_estimator -from pyglmnet import (GLM, GLMCV, _grad_L2loss, _L2loss, simulate_glm, - _gradhess_logloss_1d, _loss, datasets, ALLOWED_DISTRS) +from pyglmnet import (GLM, GLMCV, simulate_glm, + _loss, datasets, ALLOWED_DISTRS) matplotlib.use('agg') @@ -29,35 +27,6 @@ def test_glm_estimator(): check_estimator(GLM) -@pytest.mark.parametrize("distr", ALLOWED_DISTRS) -def test_gradients(distr): - """Test gradient accuracy.""" - # data - scaler = StandardScaler() - n_samples, n_features = 1000, 100 - X = np.random.normal(0.0, 1.0, [n_samples, n_features]) - X = scaler.fit_transform(X) - - density = 0.1 - beta_ = np.zeros(n_features + 1) - beta_[0] = np.random.rand() - beta_[1:] = sps.rand(n_features, 1, density=density).toarray()[:, 0] - - reg_lambda = 0.1 - - glm = GLM(distr=distr, reg_lambda=reg_lambda) - y = simulate_glm(glm.distr, beta_[0], beta_[1:], X) - - func = partial(_L2loss, distr, glm.alpha, - glm.Tau, reg_lambda, X, y, glm.eta, glm.theta, glm.group) - grad = partial(_grad_L2loss, distr, glm.alpha, glm.Tau, - reg_lambda, X, y, - glm.eta, glm.theta) - approx_grad = approx_fprime(beta_, func, 1.5e-8) - analytical_grad = grad(beta_) - assert_allclose(approx_grad, analytical_grad, rtol=1e-5, atol=1e-3) - - def test_tikhonov(): """Tikhonov regularization test.""" n_samples, n_features = 100, 10 @@ -203,10 +172,13 @@ def test_glmnet(distr, reg_lambda, fit_intercept, solver): group = None Tau = None + glm_ = GLM(distr=distr) + glm_._set_distr() + def callback(beta): Tau = None loss_trace.append( - _loss(distr, alpha, Tau, reg_lambda, + _loss(glm_.distr_, alpha, Tau, reg_lambda, X_train, y_train, eta, theta, group, beta, fit_intercept=fit_intercept)) @@ -225,7 +197,7 @@ def callback(beta): # true loss and beta should be recovered when reg_lambda == 0 if reg_lambda == 0.: # verify loss at convergence = loss when beta=beta_ - l_true = _loss(distr, alpha, Tau, reg_lambda, + l_true = _loss(glm_.distr_, alpha, Tau, reg_lambda, X_train, y_train, eta, theta, group, np.concatenate(([beta0], beta))) assert_allclose(loss_trace[-1], l_true, rtol=1e-4, atol=1e-5) @@ -316,7 +288,7 @@ def test_compare_sklearn(solver): def rmse(a, b): return np.sqrt(np.mean((a - b) ** 2)) - X, Y, coef_ = make_regression( + X, y, coef_ = make_regression( n_samples=1000, n_features=500, noise=0.1, n_informative=10, coef=True, random_state=42) @@ -325,18 +297,18 @@ def rmse(a, b): l1_ratio = 0.5 clf = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, tol=1e-5) - clf.fit(X, Y) + clf.fit(X, y) glm = GLM(distr='gaussian', alpha=l1_ratio, reg_lambda=alpha, solver=solver, tol=1e-6, max_iter=500) - glm.fit(X, Y) + glm.fit(X, y) y_sk = clf.predict(X) y_pg = glm.predict(X) - assert abs(rmse(Y, y_sk) - rmse(Y, y_pg)) < 0.5 + assert abs(rmse(y, y_sk) - rmse(y, y_pg)) < 0.5 glm = GLM(distr='gaussian', alpha=l1_ratio, reg_lambda=alpha, solver=solver, tol=1e-6, max_iter=5, fit_intercept=False) - glm.fit(X, Y) + glm.fit(X, y) assert glm.beta0_ == 0. glm.predict(X) @@ -356,6 +328,7 @@ def test_cdfast(distr): return glm = GLM(distr, solver='cdfast') + glm._set_distr() np.random.seed(glm.random_state) @@ -374,7 +347,9 @@ def test_cdfast(distr): z = beta_[0] + np.dot(X, beta_[1:]) k = 1 xk = X[:, k - 1] - gk, hk = _gradhess_logloss_1d(glm.distr, xk, y, z, glm.eta, glm.theta) + gk, hk = glm.distr_.gradhess_log_likelihood_1d(xk, y, z) + gk = 1. / n_samples * gk + hk = 1. / n_samples * hk # test grad and hess if distr != 'multinomial': @@ -438,7 +413,6 @@ def test_random_state_consistency(): @pytest.mark.parametrize("distr", ALLOWED_DISTRS) def test_simulate_glm(distr): """Test that every generative model can be simulated from.""" - random_state = 1 state = np.random.RandomState(random_state) n_samples, n_features = 10, 3 @@ -458,13 +432,12 @@ def test_simulate_glm(distr): # If the distribution name is garbage it will fail distr = 'multivariate_gaussian_poisson' - with pytest.raises(ValueError, match="'distr' must be in"): + with pytest.raises(ValueError): simulate_glm(distr, 1.0, 1.0, np.array([[1.0]])) def test_api_input(): """Test that the input value of y can be of different types.""" - random_state = 1 state = np.random.RandomState(random_state) n_samples, n_features = 100, 5