Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add machine learning method IsotonicRegression #87

Merged
merged 4 commits into from
Apr 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion docs/source/autoapi/skmodel.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Classes
skscope.skmodel.NonlinearSelection
skscope.skmodel.RobustRegression
skscope.skmodel.MultivariateFailure
skscope.skmodel.IsotonicRegression

.. autoapimodule:: skscope.skmodel
:members: PortfolioSelection, NonlinearSelection, RobustRegression, MultivariateFailure
:members: PortfolioSelection, NonlinearSelection, RobustRegression, MultivariateFailure, IsotonicRegression
16 changes: 16 additions & 0 deletions pytest/test_skmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
NonlinearSelection,
RobustRegression,
MultivariateFailure,
IsotonicRegression,
)
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
Expand Down Expand Up @@ -198,3 +199,18 @@ def make_Clayton2_data(n, theta=15, lambda1=1, lambda2=1, c1=1, c2=1):


test_MultivariateFailure()

def test_IsotonicRegression():
# check_estimator(IsotonicRegression())
np.random.seed(0)
n = 200
X = np.arange(n) + 1
y = 2 * np.log1p(np.arange(n)) + np.random.normal(size=n)
model = IsotonicRegression(sparsity=10)
model = model.fit(X, y)
score = model.score(X, y)
assert score >= 0.8
X_new = model.transform(X)
print("IsotonicRegression passed test!")

test_IsotonicRegression()
155 changes: 155 additions & 0 deletions skscope/skmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,18 @@
from skscope import ScopeSolver
from sklearn.base import BaseEstimator
from sklearn.covariance import LedoitWolf
from sklearn.metrics import r2_score
from sklearn.metrics.pairwise import rbf_kernel
from sklearn.utils.validation import (
check_array,
check_random_state,
check_X_y,
check_is_fitted,
check_consistent_length,
)
from sklearn.utils._param_validation import Hidden, Interval, StrOptions
from numbers import Integral, Real
from scipy import interpolate


def check_data(X, y=None, sample_weight=None):
Expand Down Expand Up @@ -543,3 +546,155 @@ def score(self, X, y, delta, sample_weight=None):
)
score = np.mean(tmp * delta)
return score

class IsotonicRegression(BaseEstimator):
r"""
Isotonic regression.

Parameters
-----------
sparsity : int, default=5
The number of features to be selected, i.e., the sparsity level.
"""

_parameter_constraints: dict = {
"sparsity": [Interval(Integral, 1, None, closed="left")],
}

def __init__(
self,
sparsity=5,
):
self.sparsity = sparsity

def _check_input_data_shape(self, X):
if not (X.ndim == 1 or (X.ndim == 2 and X.shape[1] == 1)):
msg = (
"Isotonic regression input X should be a 1d array or "
"2d array with 1 feature"
)
raise ValueError(msg)

def fit(
self,
X,
y,
sample_weight=None,
):
"""Fit the model using X, y as training data.

Parameters
----------
X : array-like of shape (n_samples,) or (n_samples, 1)
Training data.

y : array-like of shape (n_samples,)
Training target.

sample_weight : array-like of shape (n_samples,), default=None
Weights. If set to None, all weights will be set to 1 (equal
weights).

Returns
-------
self : object
Returns an instance of self.

"""
self._validate_params()
# check_params = dict(accept_sparse=False, ensure_2d=False)
# X = check_array(
# X, input_name="X", dtype=[np.float64, np.float32], **check_params
# )
# y = check_array(y, input_name="y", dtype=X.dtype, **check_params)
# check_consistent_length(X, y, sample_weight)

X = check_array(X, ensure_2d=False)
X = X.reshape(-1)
n = len(y)

def isotonic_loss(params):
return jnp.sum(jnp.square(y - jnp.cumsum(jnp.abs(params))))
solver = ScopeSolver(n, sparsity=self.sparsity)
self.params = solver.solve(isotonic_loss)
y_pred = np.cumsum(np.abs(self.params))
self.f_ = interpolate.interp1d(
X, y_pred, kind="linear"
)
return self


def transform(
self,
X,
):
"""Transform new data by linear interpolation.

Parameters
----------
X : array-like of shape (n_samples,) or (n_samples, 1)
Data to transform.

Returns
-------
y_pred : ndarray of shape (n_samples,)
The transformed data.
"""
X = check_array(X, ensure_2d=False)
self._check_input_data_shape(X)
X = X.reshape(-1)
y_pred = self.f_(X)
return y_pred

def score(self, X, y, sample_weight=None):
"""Return the coefficient of determination of the prediction.

The coefficient of determination :math:`R^2` is defined as
:math:`(1 - \\frac{u}{v})`, where :math:`u` is the residual
sum of squares ``((y_true - y_pred)** 2).sum()`` and :math:`v`
is the total sum of squares ``((y_true - y_true.mean()) ** 2).sum()``.
The best possible score is 1.0 and it can be negative (because the
model can be arbitrarily worse). A constant model that always predicts
the expected value of `y`, disregarding the input features, would get
a :math:`R^2` score of 0.0.

Parameters
----------
X : array-like of shape (n_samples, n_features)
Test samples. For some estimators this may be a precomputed
kernel matrix or a list of generic objects instead with shape
``(n_samples, n_samples_fitted)``, where ``n_samples_fitted``
is the number of samples used in the fitting for the estimator.

y : array-like of shape (n_samples,) or (n_samples, n_outputs)
True values for `X`.

sample_weight : array-like of shape (n_samples,), default=None
Sample weights.

Returns
-------
score : float
:math:`R^2` of ``self.predict(X)`` w.r.t. `y`.
"""
check_is_fitted(self)

y_pred = self.predict(X)
score = r2_score(y, y_pred, sample_weight=sample_weight)
return score

def predict(self, X):
"""Predict new data by linear interpolation.

Parameters
----------
X : array-like of shape (n_samples,) or (n_samples, 1)
Data to transform.

Returns
-------
y_pred : ndarray of shape (n_samples,)
Transformed data.
"""
y_pred = self.transform(X)
return y_pred
Loading