3
3
from copy import deepcopy
4
4
5
5
import numpy as np
6
+ from scipy import optimize
6
7
from scipy .special import expit
7
8
from scipy .stats import norm
8
9
9
- from .utils import logger , set_log_level , power_iteration
10
+ from functools import partial
11
+
12
+ from .utils import logger , set_log_level
10
13
from .base import BaseEstimator , is_classifier , check_version
11
14
12
15
@@ -192,17 +195,6 @@ def _grad_L2loss(distr, alpha, Tau, reg_lambda, X, y, eta, beta):
192
195
return g
193
196
194
197
195
- def _learning_rate (distr , X , reg_lambda , alpha ):
196
- if distr == 'gaussian' :
197
- s = power_iteration (X .T .dot (X )) + reg_lambda * (1 - alpha )
198
- return 0.99 / s
199
- elif distr == 'binomial' :
200
- s = (np .linalg .norm (X .T .dot (X )) ** 2 ) / 4
201
- return 0.99 / s
202
- else :
203
- return 1e-4
204
-
205
-
206
198
def _gradhess_logloss_1d (distr , xk , y , z , eta ):
207
199
"""
208
200
Compute gradient (1st derivative)
@@ -380,8 +372,8 @@ class GLM(BaseEstimator):
380
372
'cdfast' (Newton coordinate gradient descent).
381
373
default: 'batch-gradient'
382
374
learning_rate : float | 'auto'
383
- learning rate for gradient descent. If "auto", it is 0.95 / L
384
- where the differentiable part of the loss function is L-smooth .
375
+ learning rate for gradient descent. If "auto", backtracking line
376
+ search is performed using scipy.optimize.line_search .
385
377
default: "auto"
386
378
max_iter : int
387
379
maximum iterations for the model.
@@ -627,12 +619,6 @@ def fit(self, X, y):
627
619
self : instance of GLM
628
620
The fitted model.
629
621
"""
630
- if self .learning_rate == 'auto' :
631
- step_size = _learning_rate (self .distr , X ,
632
- self .reg_lambda , self .alpha )
633
- print ('Step size calculated as %f' % step_size )
634
- else :
635
- step_size = self .learning_rate
636
622
np .random .RandomState (self .random_state )
637
623
638
624
# checks for group
@@ -675,13 +661,27 @@ def fit(self, X, y):
675
661
676
662
# Initialize loss accumulators
677
663
L , DL = list (), list ()
664
+ # Compute and save loss
665
+ L .append (_loss (self .distr , alpha , self .Tau , reg_lambda ,
666
+ X , y , self .eta , self .group , beta ))
678
667
for t in range (0 , self .max_iter ):
679
668
if self .solver == 'batch-gradient' :
680
669
grad = _grad_L2loss (self .distr ,
681
670
alpha , self .Tau ,
682
671
reg_lambda , X , y , self .eta ,
683
672
beta )
684
673
674
+ if self .learning_rate == 'auto' :
675
+ func = partial (_loss , self .distr , alpha , self .Tau ,
676
+ reg_lambda , X , y , self .eta , self .group )
677
+ fprime = partial (_grad_L2loss , self .distr , alpha , self .Tau ,
678
+ reg_lambda , X , y , self .eta )
679
+ step_size , _ , _ , _ , _ , _ = optimize .linesearch .line_search (
680
+ func , fprime , beta , - grad , grad , L , c1 = 1e-4 )
681
+ if step_size is None :
682
+ step_size = 1e-4
683
+ else :
684
+ step_size = self .learning_rate
685
685
beta = beta - step_size * grad
686
686
elif self .solver == 'cdfast' :
687
687
beta , z = \
@@ -698,16 +698,15 @@ def fit(self, X, y):
698
698
# Compute and save loss
699
699
L .append (_loss (self .distr , alpha , self .Tau , reg_lambda ,
700
700
X , y , self .eta , self .group , beta ))
701
- print (L [- 1 ])
702
- # if t > 1:
703
- # DL.append(L[-1] - L[-2])
704
- # if np.abs(DL[-1] / L[-1]) < tol:
705
- # msg = ('\tConverged. Loss function:'
706
- # ' {0:.2f}').format(L[-1])
707
- # logger.info(msg)
708
- # msg = ('\tdL/L: {0:.6f}\n'.format(DL[-1] / L[-1]))
709
- # logger.info(msg)
710
- # break
701
+ if t > 1 :
702
+ DL .append (L [- 1 ] - L [- 2 ])
703
+ if np .abs (DL [- 1 ] / L [- 1 ]) < tol :
704
+ msg = ('\t Converged. Loss function:'
705
+ ' {0:.2f}' ).format (L [- 1 ])
706
+ logger .info (msg )
707
+ msg = ('\t dL/L: {0:.6f}\n ' .format (DL [- 1 ] / L [- 1 ]))
708
+ logger .info (msg )
709
+ break
711
710
712
711
# Update the estimated variables
713
712
self .beta0_ = beta [0 ]
@@ -906,8 +905,8 @@ class GLMCV(object):
906
905
'cdfast' (Newton coordinate gradient descent).
907
906
default: 'batch-gradient'
908
907
learning_rate : float | 'auto'
909
- learning rate for gradient descent. If "auto", it is 0.95 / L
910
- where the differentiable part of the loss function is L-smooth .
908
+ learning rate for gradient descent. If "auto", backtracking line
909
+ search is performed using scipy.optimize.line_search .
911
910
default: "auto"
912
911
max_iter : int
913
912
maximum iterations for the model.
0 commit comments