Skip to content

Commit e8dc899

Browse files
committed
ENH: line search for step size
1 parent 3bf03d9 commit e8dc899

File tree

4 files changed

+32
-77
lines changed

4 files changed

+32
-77
lines changed

examples/plot_community_crime.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -43,16 +43,14 @@
4343
# Fit a gaussian distributed GLM with elastic net regularization
4444

4545
# use the default value for reg_lambda
46-
glm = GLMCV(distr='gaussian', alpha=0.05, score_metric='pseudo_R2',
47-
learning_rate=1e-2)
46+
glm = GLMCV(distr='gaussian', alpha=0.05, score_metric='pseudo_R2')
4847

4948
# fit model
5049
glm.fit(X_train, y_train)
5150

5251
# score the test set prediction
5352
y_test_hat = glm.predict(X_test)
5453
print ("test set pseudo $R^2$ = %f" % glm.score(X_test, y_test))
55-
sdfd
5654
########################################################
5755
# Now use plain grid search cv to compare
5856

pyglmnet/pyglmnet.py

+31-32
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,13 @@
33
from copy import deepcopy
44

55
import numpy as np
6+
from scipy import optimize
67
from scipy.special import expit
78
from scipy.stats import norm
89

9-
from .utils import logger, set_log_level, power_iteration
10+
from functools import partial
11+
12+
from .utils import logger, set_log_level
1013
from .base import BaseEstimator, is_classifier, check_version
1114

1215

@@ -192,17 +195,6 @@ def _grad_L2loss(distr, alpha, Tau, reg_lambda, X, y, eta, beta):
192195
return g
193196

194197

195-
def _learning_rate(distr, X, reg_lambda, alpha):
196-
if distr == 'gaussian':
197-
s = power_iteration(X.T.dot(X)) + reg_lambda * (1 - alpha)
198-
return 0.99 / s
199-
elif distr == 'binomial':
200-
s = (np.linalg.norm(X.T.dot(X)) ** 2) / 4
201-
return 0.99 / s
202-
else:
203-
return 1e-4
204-
205-
206198
def _gradhess_logloss_1d(distr, xk, y, z, eta):
207199
"""
208200
Compute gradient (1st derivative)
@@ -380,8 +372,8 @@ class GLM(BaseEstimator):
380372
'cdfast' (Newton coordinate gradient descent).
381373
default: 'batch-gradient'
382374
learning_rate : float | 'auto'
383-
learning rate for gradient descent. If "auto", it is 0.95 / L
384-
where the differentiable part of the loss function is L-smooth.
375+
learning rate for gradient descent. If "auto", backtracking line
376+
search is performed using scipy.optimize.line_search.
385377
default: "auto"
386378
max_iter : int
387379
maximum iterations for the model.
@@ -627,12 +619,6 @@ def fit(self, X, y):
627619
self : instance of GLM
628620
The fitted model.
629621
"""
630-
if self.learning_rate == 'auto':
631-
step_size = _learning_rate(self.distr, X,
632-
self.reg_lambda, self.alpha)
633-
print('Step size calculated as %f' % step_size)
634-
else:
635-
step_size = self.learning_rate
636622
np.random.RandomState(self.random_state)
637623

638624
# checks for group
@@ -675,13 +661,27 @@ def fit(self, X, y):
675661

676662
# Initialize loss accumulators
677663
L, DL = list(), list()
664+
# Compute and save loss
665+
L.append(_loss(self.distr, alpha, self.Tau, reg_lambda,
666+
X, y, self.eta, self.group, beta))
678667
for t in range(0, self.max_iter):
679668
if self.solver == 'batch-gradient':
680669
grad = _grad_L2loss(self.distr,
681670
alpha, self.Tau,
682671
reg_lambda, X, y, self.eta,
683672
beta)
684673

674+
if self.learning_rate == 'auto':
675+
func = partial(_loss, self.distr, alpha, self.Tau,
676+
reg_lambda, X, y, self.eta, self.group)
677+
fprime = partial(_grad_L2loss, self.distr, alpha, self.Tau,
678+
reg_lambda, X, y, self.eta)
679+
step_size, _, _, _, _, _ = optimize.linesearch.line_search(
680+
func, fprime, beta, -grad, grad, L, c1=1e-4)
681+
if step_size is None:
682+
step_size = 1e-4
683+
else:
684+
step_size = self.learning_rate
685685
beta = beta - step_size * grad
686686
elif self.solver == 'cdfast':
687687
beta, z = \
@@ -698,16 +698,15 @@ def fit(self, X, y):
698698
# Compute and save loss
699699
L.append(_loss(self.distr, alpha, self.Tau, reg_lambda,
700700
X, y, self.eta, self.group, beta))
701-
print(L[-1])
702-
# if t > 1:
703-
# DL.append(L[-1] - L[-2])
704-
# if np.abs(DL[-1] / L[-1]) < tol:
705-
# msg = ('\tConverged. Loss function:'
706-
# ' {0:.2f}').format(L[-1])
707-
# logger.info(msg)
708-
# msg = ('\tdL/L: {0:.6f}\n'.format(DL[-1] / L[-1]))
709-
# logger.info(msg)
710-
# break
701+
if t > 1:
702+
DL.append(L[-1] - L[-2])
703+
if np.abs(DL[-1] / L[-1]) < tol:
704+
msg = ('\tConverged. Loss function:'
705+
' {0:.2f}').format(L[-1])
706+
logger.info(msg)
707+
msg = ('\tdL/L: {0:.6f}\n'.format(DL[-1] / L[-1]))
708+
logger.info(msg)
709+
break
711710

712711
# Update the estimated variables
713712
self.beta0_ = beta[0]
@@ -906,8 +905,8 @@ class GLMCV(object):
906905
'cdfast' (Newton coordinate gradient descent).
907906
default: 'batch-gradient'
908907
learning_rate : float | 'auto'
909-
learning rate for gradient descent. If "auto", it is 0.95 / L
910-
where the differentiable part of the loss function is L-smooth.
908+
learning rate for gradient descent. If "auto", backtracking line
909+
search is performed using scipy.optimize.line_search.
911910
default: "auto"
912911
max_iter : int
913912
maximum iterations for the model.

pyglmnet/utils.py

-33
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
import numpy as np
66
from copy import copy
7-
from scipy import linalg
87
import logging
98

109

@@ -91,38 +90,6 @@ def tikhonov_from_prior(prior_cov, n_samples, threshold=0.0001):
9190
return Tau
9291

9392

94-
def power_iteration(A, max_iter=1000, tol=1e-7, random_state=None):
95-
"""Estimate dominant eigenvalue of matrix A.
96-
Parameters
97-
----------
98-
A : array, shape (n_points, n_points)
99-
The matrix whose largest eigenvalue is to be found.
100-
b_hat_0 : array, shape (n_points, )
101-
init vector
102-
Returns
103-
-------
104-
mu_hat : float
105-
The largest eigenvalue
106-
"""
107-
rng = np.random.RandomState(random_state)
108-
b_hat = rng.rand((A.shape[1]))
109-
110-
Ab_hat = A.dot(b_hat)
111-
mu_hat = np.nan
112-
for ii in range(max_iter):
113-
b_hat = A.dot(b_hat)
114-
b_hat /= linalg.norm(b_hat)
115-
Ab_hat = A.dot(b_hat)
116-
mu_old = mu_hat
117-
mu_hat = np.dot(b_hat, Ab_hat)
118-
# note, we might exit the loop before b_hat converges
119-
# since we care only about mu_hat converging
120-
if (mu_hat - mu_old) / mu_old < tol:
121-
break
122-
123-
return mu_hat
124-
125-
12693
def set_log_level(verbose):
12794
"""Convenience function for setting the log level.
12895

tests/test_pyglmnet.py

-9
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515

1616
from pyglmnet import (GLM, GLMCV, _grad_L2loss, _L2loss, simulate_glm,
1717
_gradhess_logloss_1d, _loss, datasets)
18-
from pyglmnet.utils import power_iteration
1918

2019

2120
def test_gradients():
@@ -313,11 +312,3 @@ def test_cdfast():
313312
def test_fetch_datasets():
314313
"""Test fetching datasets."""
315314
datasets.fetch_community_crime_data('/tmp/glm-tools')
316-
317-
318-
def test_power_iterations():
319-
"""Test power iteration."""
320-
A = np.diag((1, 2, 3))
321-
mu, b = np.linalg.eig(A)
322-
mu_hat = power_iteration(A)
323-
assert_allclose(mu_hat, mu.max())

0 commit comments

Comments
 (0)