@@ -5870,68 +5870,107 @@ def _entropy(self, s):
5870
5870
this function uses explicit formulas for the maximum likelihood
5871
5871
estimation of the log-normal shape and scale parameters, so the
5872
5872
`optimizer`, `loc` and `scale` keyword arguments are ignored.
5873
+ If the location is free, a likelihood maximum is found by
5874
+ setting its partial derivative wrt to location to 0, and
5875
+ solving by substituting the analytical expressions of shape
5876
+ and scale (or provided parameters).
5877
+ See, e.g., equation 3.1 in
5878
+ A. Clifford Cohen & Betty Jones Whitten (1980)
5879
+ Estimation in the Three-Parameter Lognormal Distribution,
5880
+ Journal of the American Statistical Association, 75:370, 399-404
5881
+ https://doi.org/10.2307/2287466
5873
5882
\n \n """ )
5874
5883
def fit (self , data , * args , ** kwds ):
5875
- floc = kwds .get ('floc' , None )
5876
-
5877
- if floc is None :
5878
- # loc is not fixed. Use the default fit method.
5884
+ if kwds .pop ('superfit' , False ):
5879
5885
return super ().fit (data , * args , ** kwds )
5880
5886
5881
- f0 = (kwds .get ('f0' , None ) or kwds .get ('fs' , None ) or
5882
- kwds .get ('fix_s' , None ))
5883
- fscale = kwds .get ('fscale' , None )
5884
-
5885
- if len (args ) > 1 :
5886
- raise TypeError ("Too many input arguments." )
5887
- for name in ['f0' , 'fs' , 'fix_s' , 'floc' , 'fscale' , 'loc' , 'scale' ,
5888
- 'optimizer' , 'method' ]:
5889
- kwds .pop (name , None )
5890
- if kwds :
5891
- raise TypeError ("Unknown arguments: %s." % kwds )
5887
+ parameters = _check_fit_input_parameters (self , data , args , kwds )
5888
+ data , fshape , floc , fscale = parameters
5889
+ data_min = np .min (data )
5890
+
5891
+ def get_shape_scale (loc ):
5892
+ # Calculate maximum likelihood scale and shape with analytical
5893
+ # formulas unless provided by the user
5894
+ if fshape is None or fscale is None :
5895
+ lndata = np .log (data - loc )
5896
+ scale = fscale or np .exp (lndata .mean ())
5897
+ shape = fshape or np .sqrt (np .mean ((lndata - np .log (scale ))** 2 ))
5898
+ return shape , scale
5899
+
5900
+ def dL_dLoc (loc ):
5901
+ # Derivative of (positive) LL w.r.t. loc
5902
+ shape , scale = get_shape_scale (loc )
5903
+ shifted = data - loc
5904
+ return np .sum ((1 + np .log (shifted / scale )/ shape ** 2 )/ shifted )
5905
+
5906
+ def ll (loc ):
5907
+ # (Positive) log-likelihood
5908
+ shape , scale = get_shape_scale (loc )
5909
+ return - self .nnlf ((shape , loc , scale ), data )
5892
5910
5893
- # Special case: loc is fixed. Use the maximum likelihood formulas
5894
- # instead of the numerical solver.
5911
+ if floc is None :
5912
+ # The location must be less than the minimum of the data.
5913
+ # Back off a bit to avoid numerical issues.
5914
+ spacing = np .spacing (data_min )
5915
+ rbrack = data_min - spacing
5916
+
5917
+ # Find the right end of the bracket by successive doubling of the
5918
+ # distance to data_min. We're interested in a maximum LL, so the
5919
+ # slope dL_dLoc_rbrack should be negative at the right end.
5920
+ # optimization for later: share shape, scale
5921
+ dL_dLoc_rbrack = dL_dLoc (rbrack )
5922
+ ll_rbrack = ll (rbrack )
5923
+ delta = 2 * spacing # 2 * (data_min - rbrack)
5924
+ while dL_dLoc_rbrack >= - 1e-6 :
5925
+ rbrack = data_min - delta
5926
+ dL_dLoc_rbrack = dL_dLoc (rbrack )
5927
+ delta *= 2
5895
5928
5896
- if f0 is not None and fscale is not None :
5897
- # This check is for consistency with `rv_continuous.fit`.
5898
- raise ValueError ("All parameters fixed. There is nothing to "
5899
- "optimize." )
5929
+ if not np .isfinite (rbrack ) or not np .isfinite (dL_dLoc_rbrack ):
5930
+ # If we never find a negative slope, either we missed it or the
5931
+ # slope is always positive. It's usually the latter,
5932
+ # which means
5933
+ # loc = data_min - spacing
5934
+ # But sometimes when shape and/or scale are fixed there are
5935
+ # other issues, so be cautious.
5936
+ return super ().fit (data , * args , ** kwds )
5900
5937
5901
- data = np .asarray (data )
5938
+ # Now find the left end of the bracket. Guess is `rbrack-1`
5939
+ # unless that is too small of a difference to resolve. Double
5940
+ # the size of the interval until the left end is found.
5941
+ lbrack = np .minimum (np .nextafter (rbrack , - np .inf ), rbrack - 1 )
5942
+ dL_dLoc_lbrack = dL_dLoc (lbrack )
5943
+ delta = 2 * (rbrack - lbrack )
5944
+ while (np .isfinite (lbrack ) and np .isfinite (dL_dLoc_lbrack )
5945
+ and np .sign (dL_dLoc_lbrack ) == np .sign (dL_dLoc_rbrack )):
5946
+ lbrack = rbrack - delta
5947
+ dL_dLoc_lbrack = dL_dLoc (lbrack )
5948
+ delta *= 2
5902
5949
5903
- if not np .isfinite (data ).all ():
5904
- raise ValueError ("The data contains non-finite values." )
5950
+ # I don't recall observing this, but just in case...
5951
+ if not np .isfinite (lbrack ) or not np .isfinite (dL_dLoc_lbrack ):
5952
+ return super ().fit (data , * args , ** kwds )
5905
5953
5906
- floc = float (floc )
5907
- if floc != 0 :
5908
- # Shifting the data by floc. Don't do the subtraction in-place,
5909
- # because `data` might be a view of the input array.
5910
- data = data - floc
5911
- if np .any (data <= 0 ):
5912
- raise FitDataError ("lognorm" , lower = floc , upper = np .inf )
5913
- lndata = np .log (data )
5954
+ # If we have a valid bracket, find the root
5955
+ res = root_scalar (dL_dLoc , bracket = (lbrack , rbrack ))
5956
+ if not res .converged :
5957
+ return super ().fit (data , * args , ** kwds )
5914
5958
5915
- # Three cases to handle:
5916
- # * shape and scale both free
5917
- # * shape fixed, scale free
5918
- # * shape free, scale fixed
5959
+ # If the slope was positive near the minimum of the data,
5960
+ # the maximum LL could be there instead of at the root. Compare
5961
+ # the LL of the two points to decide.
5962
+ ll_root = ll (res .root )
5963
+ loc = res .root if ll_root > ll_rbrack else data_min - spacing
5919
5964
5920
- if fscale is None :
5921
- # scale is free.
5922
- scale = np .exp (lndata .mean ())
5923
- if f0 is None :
5924
- # shape is free.
5925
- shape = lndata .std ()
5926
- else :
5927
- # shape is fixed.
5928
- shape = float (f0 )
5929
5965
else :
5930
- # scale is fixed, shape is free
5931
- scale = float ( fscale )
5932
- shape = np . sqrt ((( lndata - np . log ( scale )) ** 2 ). mean ())
5966
+ if floc >= data_min :
5967
+ raise FitDataError ( "lognorm" , lower = 0. , upper = np . inf )
5968
+ loc = floc
5933
5969
5934
- return shape , floc , scale
5970
+ shape , scale = get_shape_scale (loc )
5971
+ if not (self ._argcheck (shape ) and scale > 0 ):
5972
+ return super ().fit (data , * args , ** kwds )
5973
+ return shape , loc , scale
5935
5974
5936
5975
5937
5976
lognorm = lognorm_gen (a = 0.0 , name = 'lognorm' )
0 commit comments