|
| 1 | +// Copyright ©2015 The gonum Authors. All rights reserved. |
| 2 | +// Use of this source code is governed by a BSD-style |
| 3 | +// license that can be found in the LICENSE file. |
| 4 | + |
| 5 | +package optimize |
| 6 | + |
| 7 | +import "math" |
| 8 | + |
| 9 | +// MoreThuente is a Linesearcher that finds steps that satisfy both the |
| 10 | +// sufficient decrease and curvature conditions (the strong Wolfe conditions). |
| 11 | +// |
| 12 | +// References: |
| 13 | +// - More, J.J. and D.J. Thuente: Line Search Algorithms with Guaranteed Sufficient |
| 14 | +// Decrease. ACM Transactions on Mathematical Software 20(3) (1994), 286-307 |
| 15 | +type MoreThuente struct { |
| 16 | + // DecreaseFactor is the constant factor in the sufficient decrease |
| 17 | + // (Armijo) condition. |
| 18 | + // It must be in the interval [0, 1). The default value is 0. |
| 19 | + DecreaseFactor float64 |
| 20 | + // CurvatureFactor is the constant factor in the Wolfe conditions. Smaller |
| 21 | + // values result in a more exact line search. |
| 22 | + // A set value must be in the interval (0, 1). If it is zero, it will be |
| 23 | + // defaulted to 0.9. |
| 24 | + CurvatureFactor float64 |
| 25 | + // StepTolerance sets the minimum acceptable width for the linesearch |
| 26 | + // interval. If the relative interval length is less than this value, |
| 27 | + // ErrLinesearcherFailure is returned. |
| 28 | + // It must be non-negative. If it is zero, it will be defaulted to 1e-10. |
| 29 | + StepTolerance float64 |
| 30 | + |
| 31 | + // MinimumStep is the minimum step that the linesearcher will take. |
| 32 | + // It must be non-negative and less than MaximumStep. Defaults to no |
| 33 | + // minimum (a value of 0). |
| 34 | + MinimumStep float64 |
| 35 | + // MaximumStep is the maximum step that the linesearcher will take. |
| 36 | + // It must be greater than MinimumStep. If it is zero, it will be defaulted |
| 37 | + // to 1e20. |
| 38 | + MaximumStep float64 |
| 39 | + |
| 40 | + bracketed bool // Indicates if a minimum has been bracketed. |
| 41 | + fInit float64 // Function value at step = 0. |
| 42 | + gInit float64 // Derivative value at step = 0. |
| 43 | + |
| 44 | + // When stage is 1, the algorithm updates the interval given by x and y |
| 45 | + // so that it contains a minimizer of the modified function |
| 46 | + // psi(step) = f(step) - f(0) - DecreaseFactor * step * f'(0). |
| 47 | + // When stage is 2, the interval is updated so that it contains a minimizer |
| 48 | + // of f. |
| 49 | + stage int |
| 50 | + |
| 51 | + step float64 // Current step. |
| 52 | + lower, upper float64 // Lower and upper bounds on the next step. |
| 53 | + x float64 // Endpoint of the interval with a lower function value. |
| 54 | + fx, gx float64 // Data at x. |
| 55 | + y float64 // The other endpoint. |
| 56 | + fy, gy float64 // Data at y. |
| 57 | + width [2]float64 // Width of the interval at two previous iterations. |
| 58 | +} |
| 59 | + |
| 60 | +const ( |
| 61 | + mtMinGrowthFactor float64 = 1.1 |
| 62 | + mtMaxGrowthFactor float64 = 4 |
| 63 | +) |
| 64 | + |
| 65 | +func (mt *MoreThuente) Init(f, g float64, step float64) Operation { |
| 66 | + // Based on the original Fortran code that is available, for example, from |
| 67 | + // http://ftp.mcs.anl.gov/pub/MINPACK-2/csrch/ |
| 68 | + // as part of |
| 69 | + // MINPACK-2 Project. November 1993. |
| 70 | + // Argonne National Laboratory and University of Minnesota. |
| 71 | + // Brett M. Averick, Richard G. Carter, and Jorge J. Moré. |
| 72 | + |
| 73 | + if g >= 0 { |
| 74 | + panic("morethuente: initial derivative is non-negative") |
| 75 | + } |
| 76 | + if step <= 0 { |
| 77 | + panic("morethuente: invalid initial step") |
| 78 | + } |
| 79 | + |
| 80 | + if mt.CurvatureFactor == 0 { |
| 81 | + mt.CurvatureFactor = 0.9 |
| 82 | + } |
| 83 | + if mt.StepTolerance == 0 { |
| 84 | + mt.StepTolerance = 1e-10 |
| 85 | + } |
| 86 | + if mt.MaximumStep == 0 { |
| 87 | + mt.MaximumStep = 1e20 |
| 88 | + } |
| 89 | + |
| 90 | + if mt.MinimumStep < 0 { |
| 91 | + panic("morethuente: minimum step is negative") |
| 92 | + } |
| 93 | + if mt.MaximumStep <= mt.MinimumStep { |
| 94 | + panic("morethuente: maximum step is not greater than minimum step") |
| 95 | + } |
| 96 | + if mt.DecreaseFactor < 0 || mt.DecreaseFactor >= 1 { |
| 97 | + panic("morethuente: invalid decrease factor") |
| 98 | + } |
| 99 | + if mt.CurvatureFactor <= 0 || mt.CurvatureFactor >= 1 { |
| 100 | + panic("morethuente: invalid curvature factor") |
| 101 | + } |
| 102 | + if mt.StepTolerance <= 0 { |
| 103 | + panic("morethuente: step tolerance is not positive") |
| 104 | + } |
| 105 | + |
| 106 | + if step < mt.MinimumStep { |
| 107 | + step = mt.MinimumStep |
| 108 | + } |
| 109 | + if step > mt.MaximumStep { |
| 110 | + step = mt.MaximumStep |
| 111 | + } |
| 112 | + |
| 113 | + mt.bracketed = false |
| 114 | + mt.stage = 1 |
| 115 | + mt.fInit = f |
| 116 | + mt.gInit = g |
| 117 | + |
| 118 | + mt.x, mt.fx, mt.gx = 0, f, g |
| 119 | + mt.y, mt.fy, mt.gy = 0, f, g |
| 120 | + |
| 121 | + mt.lower = 0 |
| 122 | + mt.upper = step + mtMaxGrowthFactor*step |
| 123 | + |
| 124 | + mt.width[0] = mt.MaximumStep - mt.MinimumStep |
| 125 | + mt.width[1] = 2 * mt.width[0] |
| 126 | + |
| 127 | + mt.step = step |
| 128 | + return FuncEvaluation | GradEvaluation |
| 129 | +} |
| 130 | + |
| 131 | +func (mt *MoreThuente) Iterate(f, g float64) (Operation, float64, error) { |
| 132 | + if mt.stage == 0 { |
| 133 | + panic("morethuente: Init has not been called") |
| 134 | + } |
| 135 | + |
| 136 | + gTest := mt.DecreaseFactor * mt.gInit |
| 137 | + fTest := mt.fInit + mt.step*gTest |
| 138 | + |
| 139 | + if mt.bracketed { |
| 140 | + if mt.step <= mt.lower || mt.step >= mt.upper || mt.upper-mt.lower <= mt.StepTolerance*mt.upper { |
| 141 | + // step contains the best step found (see below). |
| 142 | + return NoOperation, mt.step, ErrLinesearcherFailure |
| 143 | + } |
| 144 | + } |
| 145 | + if mt.step == mt.MaximumStep && f <= fTest && g <= gTest { |
| 146 | + return NoOperation, mt.step, ErrLinesearcherBound |
| 147 | + } |
| 148 | + if mt.step == mt.MinimumStep && (f > fTest || g >= gTest) { |
| 149 | + return NoOperation, mt.step, ErrLinesearcherFailure |
| 150 | + } |
| 151 | + |
| 152 | + // Test for convergence. |
| 153 | + if f <= fTest && math.Abs(g) <= mt.CurvatureFactor*(-mt.gInit) { |
| 154 | + mt.stage = 0 |
| 155 | + return MajorIteration, mt.step, nil |
| 156 | + } |
| 157 | + |
| 158 | + if mt.stage == 1 && f <= fTest && g >= 0 { |
| 159 | + mt.stage = 2 |
| 160 | + } |
| 161 | + |
| 162 | + if mt.stage == 1 && f <= mt.fx && f > fTest { |
| 163 | + // Lower function value but the decrease is not sufficient . |
| 164 | + |
| 165 | + // Compute values and derivatives of the modified function at step, x, y. |
| 166 | + fm := f - mt.step*gTest |
| 167 | + fxm := mt.fx - mt.x*gTest |
| 168 | + fym := mt.fy - mt.y*gTest |
| 169 | + gm := g - gTest |
| 170 | + gxm := mt.gx - gTest |
| 171 | + gym := mt.gy - gTest |
| 172 | + // Update x, y and step. |
| 173 | + mt.nextStep(fxm, gxm, fym, gym, fm, gm) |
| 174 | + // Recover values and derivates of the non-modified function at x and y. |
| 175 | + mt.fx = fxm + mt.x*gTest |
| 176 | + mt.fy = fym + mt.y*gTest |
| 177 | + mt.gx = gxm + gTest |
| 178 | + mt.gy = gym + gTest |
| 179 | + } else { |
| 180 | + // Update x, y and step. |
| 181 | + mt.nextStep(mt.fx, mt.gx, mt.fy, mt.gy, f, g) |
| 182 | + } |
| 183 | + |
| 184 | + if mt.bracketed { |
| 185 | + // Monitor the length of the bracketing interval. If the interval has |
| 186 | + // not been reduced sufficiently after two steps, use bisection to |
| 187 | + // force its length to zero. |
| 188 | + width := mt.y - mt.x |
| 189 | + if math.Abs(width) >= 2.0/3*mt.width[1] { |
| 190 | + mt.step = mt.x + 0.5*width |
| 191 | + } |
| 192 | + mt.width[0], mt.width[1] = math.Abs(width), mt.width[0] |
| 193 | + } |
| 194 | + |
| 195 | + if mt.bracketed { |
| 196 | + mt.lower = math.Min(mt.x, mt.y) |
| 197 | + mt.upper = math.Max(mt.x, mt.y) |
| 198 | + } else { |
| 199 | + mt.lower = mt.step + mtMinGrowthFactor*(mt.step-mt.x) |
| 200 | + mt.upper = mt.step + mtMaxGrowthFactor*(mt.step-mt.x) |
| 201 | + } |
| 202 | + |
| 203 | + // Force the step to be in [MinimumStep, MaximumStep]. |
| 204 | + mt.step = math.Max(mt.MinimumStep, math.Min(mt.step, mt.MaximumStep)) |
| 205 | + |
| 206 | + if mt.bracketed { |
| 207 | + if mt.step <= mt.lower || mt.step >= mt.upper || mt.upper-mt.lower <= mt.StepTolerance*mt.upper { |
| 208 | + // If further progress is not possible, set step to the best step |
| 209 | + // obtained during the search. |
| 210 | + mt.step = mt.x |
| 211 | + } |
| 212 | + } |
| 213 | + |
| 214 | + return FuncEvaluation | GradEvaluation, mt.step, nil |
| 215 | +} |
| 216 | + |
| 217 | +// nextStep computes the next safeguarded step and updates the interval that |
| 218 | +// contains a step that satisfies the sufficient decrease and curvature |
| 219 | +// conditions. |
| 220 | +func (mt *MoreThuente) nextStep(fx, gx, fy, gy, f, g float64) { |
| 221 | + x := mt.x |
| 222 | + y := mt.y |
| 223 | + step := mt.step |
| 224 | + |
| 225 | + gNeg := g < 0 |
| 226 | + if gx < 0 { |
| 227 | + gNeg = !gNeg |
| 228 | + } |
| 229 | + |
| 230 | + var next float64 |
| 231 | + var bracketed bool |
| 232 | + switch { |
| 233 | + case f > fx: |
| 234 | + // A higher function value. The minimum is bracketed between x and step. |
| 235 | + // We want the next step to be closer to x because the function value |
| 236 | + // there is lower. |
| 237 | + |
| 238 | + theta := 3*(fx-f)/(step-x) + gx + g |
| 239 | + s := math.Max(math.Abs(gx), math.Abs(g)) |
| 240 | + s = math.Max(s, math.Abs(theta)) |
| 241 | + gamma := s * math.Sqrt((theta/s)*(theta/s)-(gx/s)*(g/s)) |
| 242 | + if step < x { |
| 243 | + gamma *= -1 |
| 244 | + } |
| 245 | + p := gamma - gx + theta |
| 246 | + q := gamma - gx + gamma + g |
| 247 | + r := p / q |
| 248 | + stpc := x + r*(step-x) |
| 249 | + stpq := x + gx/((fx-f)/(step-x)+gx)/2*(step-x) |
| 250 | + |
| 251 | + if math.Abs(stpc-x) < math.Abs(stpq-x) { |
| 252 | + // The cubic step is closer to x than the quadratic step. |
| 253 | + // Take the cubic step. |
| 254 | + next = stpc |
| 255 | + } else { |
| 256 | + // If f is much larger than fx, then the quadratic step may be too |
| 257 | + // close to x. Therefore heuristically take the average of the |
| 258 | + // cubic and quadratic steps. |
| 259 | + next = stpc + (stpq-stpc)/2 |
| 260 | + } |
| 261 | + bracketed = true |
| 262 | + |
| 263 | + case gNeg: |
| 264 | + // A lower function value and derivatives of opposite sign. The minimum |
| 265 | + // is bracketed between x and step. If we choose a step that is far |
| 266 | + // from step, the next iteration will also likely fall in this case. |
| 267 | + |
| 268 | + theta := 3*(fx-f)/(step-x) + gx + g |
| 269 | + s := math.Max(math.Abs(gx), math.Abs(g)) |
| 270 | + s = math.Max(s, math.Abs(theta)) |
| 271 | + gamma := s * math.Sqrt((theta/s)*(theta/s)-(gx/s)*(g/s)) |
| 272 | + if step > x { |
| 273 | + gamma *= -1 |
| 274 | + } |
| 275 | + p := gamma - g + theta |
| 276 | + q := gamma - g + gamma + gx |
| 277 | + r := p / q |
| 278 | + stpc := step + r*(x-step) |
| 279 | + stpq := step + g/(g-gx)*(x-step) |
| 280 | + |
| 281 | + if math.Abs(stpc-step) > math.Abs(stpq-step) { |
| 282 | + // The cubic step is farther from x than the quadratic step. |
| 283 | + // Take the cubic step. |
| 284 | + next = stpc |
| 285 | + } else { |
| 286 | + // Take the quadratic step. |
| 287 | + next = stpq |
| 288 | + } |
| 289 | + bracketed = true |
| 290 | + |
| 291 | + case math.Abs(g) < math.Abs(gx): |
| 292 | + // A lower function value, derivatives of the same sign, and the |
| 293 | + // magnitude of the derivative decreases. Extrapolate function values |
| 294 | + // at x and step so that the next step lies between step and y. |
| 295 | + |
| 296 | + theta := 3*(fx-f)/(step-x) + gx + g |
| 297 | + s := math.Max(math.Abs(gx), math.Abs(g)) |
| 298 | + s = math.Max(s, math.Abs(theta)) |
| 299 | + gamma := s * math.Sqrt(math.Max(0, (theta/s)*(theta/s)-(gx/s)*(g/s))) |
| 300 | + if step > x { |
| 301 | + gamma *= -1 |
| 302 | + } |
| 303 | + p := gamma - g + theta |
| 304 | + q := gamma + gx - g + gamma |
| 305 | + r := p / q |
| 306 | + var stpc float64 |
| 307 | + switch { |
| 308 | + case r < 0 && gamma != 0: |
| 309 | + stpc = step + r*(x-step) |
| 310 | + case step > x: |
| 311 | + stpc = mt.upper |
| 312 | + default: |
| 313 | + stpc = mt.lower |
| 314 | + } |
| 315 | + stpq := step + g/(g-gx)*(x-step) |
| 316 | + |
| 317 | + if mt.bracketed { |
| 318 | + // We are extrapolating so be cautious and take the step that |
| 319 | + // is closer to step. |
| 320 | + if math.Abs(stpc-step) < math.Abs(stpq-step) { |
| 321 | + next = stpc |
| 322 | + } else { |
| 323 | + next = stpq |
| 324 | + } |
| 325 | + // Modify next if it is close to or beyond y. |
| 326 | + if step > x { |
| 327 | + next = math.Min(step+2.0/3*(y-step), next) |
| 328 | + } else { |
| 329 | + next = math.Max(step+2.0/3*(y-step), next) |
| 330 | + } |
| 331 | + } else { |
| 332 | + // Minimum has not been bracketed so take the larger step... |
| 333 | + if math.Abs(stpc-step) > math.Abs(stpq-step) { |
| 334 | + next = stpc |
| 335 | + } else { |
| 336 | + next = stpq |
| 337 | + } |
| 338 | + // ...but within reason. |
| 339 | + next = math.Max(mt.lower, math.Min(next, mt.upper)) |
| 340 | + } |
| 341 | + |
| 342 | + default: |
| 343 | + // A lower function value, derivatives of the same sign, and the |
| 344 | + // magnitude of the derivative does not decrease. The function seems to |
| 345 | + // decrease rapidly in the direction of the step. |
| 346 | + |
| 347 | + switch { |
| 348 | + case mt.bracketed: |
| 349 | + theta := 3*(f-fy)/(y-step) + gy + g |
| 350 | + s := math.Max(math.Abs(gy), math.Abs(g)) |
| 351 | + s = math.Max(s, math.Abs(theta)) |
| 352 | + gamma := s * math.Sqrt((theta/s)*(theta/s)-(gy/s)*(g/s)) |
| 353 | + if step > y { |
| 354 | + gamma *= -1 |
| 355 | + } |
| 356 | + p := gamma - g + theta |
| 357 | + q := gamma - g + gamma + gy |
| 358 | + r := p / q |
| 359 | + next = step + r*(y-step) |
| 360 | + case step > x: |
| 361 | + next = mt.upper |
| 362 | + default: |
| 363 | + next = mt.lower |
| 364 | + } |
| 365 | + } |
| 366 | + |
| 367 | + if f > fx { |
| 368 | + // x is still the best step. |
| 369 | + mt.y = step |
| 370 | + mt.fy = f |
| 371 | + mt.gy = g |
| 372 | + } else { |
| 373 | + // step is the new best step. |
| 374 | + if gNeg { |
| 375 | + mt.y = x |
| 376 | + mt.fy = fx |
| 377 | + mt.gy = gx |
| 378 | + } |
| 379 | + mt.x = step |
| 380 | + mt.fx = f |
| 381 | + mt.gx = g |
| 382 | + } |
| 383 | + mt.bracketed = bracketed |
| 384 | + mt.step = next |
| 385 | +} |
0 commit comments