Merge branch 'morethuente'

vladimir-ch · vladimir-ch · commit 43586070b041 · 2016-01-22T16:04:37.000+09:00
diff --git a/cg.go b/cg.go
@@ -103,7 +103,7 @@ func (cg *CG) Init(loc *Location) (Operation, error) {
 	}
 
 	if cg.Linesearcher == nil {
-		cg.Linesearcher = &Bisection{GradConst: 0.1}
+		cg.Linesearcher = &MoreThuente{CurvatureFactor: 0.1}
 	}
 	if cg.Variant == nil {
 		cg.Variant = &HestenesStiefel{}
diff --git a/errors.go b/errors.go
@@ -27,6 +27,10 @@ var (
 	// progress because there is no change in location after Linesearcher step
 	// due to floating-point arithmetic.
 	ErrNoProgress = errors.New("linesearch: no change in location after Linesearcher step")
+
+	// ErrLinesearcherBound signifies that a Linesearcher reached a step that
+	// lies out of allowed bounds.
+	ErrLinesearcherBound = errors.New("linesearch: step out of bounds")
 )
 
 // ErrFunc is returned when an initial function value is invalid. The error
diff --git a/functions/functions.go b/functions/functions.go
@@ -1627,3 +1627,152 @@ func (Wood) Minima() []Minimum {
 		},
 	}
 }
+
+// ConcaveRight implements an univariate function that is concave to the right
+// of the minimizer which is located at x=sqrt(2).
+//
+// References:
+//  More, J.J., and Thuente, D.J.: Line Search Algorithms with Guaranteed Sufficient Decrease.
+//  ACM Transactions on Mathematical Software 20(3) (1994), 286–307, eq. (5.1)
+type ConcaveRight struct{}
+
+func (ConcaveRight) Func(x []float64) float64 {
+	if len(x) != 1 {
+		panic("dimension of the problem must be 1")
+	}
+	return -x[0] / (x[0]*x[0] + 2)
+}
+
+func (ConcaveRight) Grad(grad, x []float64) {
+	if len(x) != 1 {
+		panic("dimension of the problem must be 1")
+	}
+	if len(x) != len(grad) {
+		panic("incorrect size of the gradient")
+	}
+	xSqr := x[0] * x[0]
+	grad[0] = (xSqr - 2) / (xSqr + 2) / (xSqr + 2)
+}
+
+// ConcaveLeft implements an univariate function that is concave to the left of
+// the minimizer which is located at x=399/250=1.596.
+//
+// References:
+//  More, J.J., and Thuente, D.J.: Line Search Algorithms with Guaranteed Sufficient Decrease.
+//  ACM Transactions on Mathematical Software 20(3) (1994), 286–307, eq. (5.2)
+type ConcaveLeft struct{}
+
+func (ConcaveLeft) Func(x []float64) float64 {
+	if len(x) != 1 {
+		panic("dimension of the problem must be 1")
+	}
+	return math.Pow(x[0]+0.004, 4) * (x[0] - 1.996)
+}
+
+func (ConcaveLeft) Grad(grad, x []float64) {
+	if len(x) != 1 {
+		panic("dimension of the problem must be 1")
+	}
+	if len(x) != len(grad) {
+		panic("incorrect size of the gradient")
+	}
+	grad[0] = math.Pow(x[0]+0.004, 3) * (5*x[0] - 7.98)
+}
+
+// Plassmann implements an univariate oscillatory function where the value of L
+// controls the number of oscillations. The value of Beta controls the size of
+// the derivative at zero and the size of the interval where the strong Wolfe
+// conditions can hold. For small values of Beta this function represents a
+// difficult test problem for linesearchers also because the information based
+// on the derivative is unreliable due to the oscillations.
+//
+// References:
+//  More, J.J., and Thuente, D.J.: Line Search Algorithms with Guaranteed Sufficient Decrease.
+//  ACM Transactions on Mathematical Software 20(3) (1994), 286–307, eq. (5.3)
+type Plassmann struct {
+	L    float64 // Number of oscillations for |x-1| ≥ Beta.
+	Beta float64 // Size of the derivative at zero, f'(0) = -Beta.
+}
+
+func (f Plassmann) Func(x []float64) float64 {
+	if len(x) != 1 {
+		panic("dimension of the problem must be 1")
+	}
+	a := x[0]
+	b := f.Beta
+	l := f.L
+	r := 2 * (1 - b) / l / math.Pi * math.Sin(l*math.Pi/2*a)
+	switch {
+	case a <= 1-b:
+		r += 1 - a
+	case 1-b < a && a <= 1+b:
+		r += 0.5 * ((a-1)*(a-1)/b + b)
+	default: // a > 1+b
+		r += a - 1
+	}
+	return r
+}
+
+func (f Plassmann) Grad(grad, x []float64) {
+	if len(x) != 1 {
+		panic("dimension of the problem must be 1")
+	}
+	if len(x) != len(grad) {
+		panic("incorrect size of the gradient")
+	}
+	a := x[0]
+	b := f.Beta
+	l := f.L
+	grad[0] = (1 - b) * math.Cos(l*math.Pi/2*a)
+	switch {
+	case a <= 1-b:
+		grad[0] -= 1
+	case 1-b < a && a <= 1+b:
+		grad[0] += (a - 1) / b
+	default: // a > 1+b
+		grad[0] += 1
+	}
+}
+
+// YanaiOzawaKaneko is an univariate convex function where the values of Beta1
+// and Beta2 control the curvature around the minimum. Far away from the
+// minimum the function approximates an absolute value function. Near the
+// minimum, the function can either be sharply curved or flat, controlled by
+// the parameter values.
+//
+// References:
+//  - More, J.J., and Thuente, D.J.: Line Search Algorithms with Guaranteed Sufficient Decrease.
+//    ACM Transactions on Mathematical Software 20(3) (1994), 286–307, eq. (5.4)
+//  - Yanai, H., Ozawa, M., and Kaneko, S.: Interpolation methods in one dimensional
+//    optimization. Computing 27 (1981), 155–163
+type YanaiOzawaKaneko struct {
+	Beta1 float64
+	Beta2 float64
+}
+
+func (f YanaiOzawaKaneko) Func(x []float64) float64 {
+	if len(x) != 1 {
+		panic("dimension of the problem must be 1")
+	}
+	a := x[0]
+	b1 := f.Beta1
+	b2 := f.Beta2
+	g1 := math.Sqrt(1+b1*b1) - b1
+	g2 := math.Sqrt(1+b2*b2) - b2
+	return g1*math.Sqrt((a-1)*(a-1)+b2*b2) + g2*math.Sqrt(a*a+b1*b1)
+}
+
+func (f YanaiOzawaKaneko) Grad(grad, x []float64) {
+	if len(x) != 1 {
+		panic("dimension of the problem must be 1")
+	}
+	if len(x) != len(grad) {
+		panic("incorrect size of the gradient")
+	}
+	a := x[0]
+	b1 := f.Beta1
+	b2 := f.Beta2
+	g1 := math.Sqrt(1+b1*b1) - b1
+	g2 := math.Sqrt(1+b2*b2) - b2
+	grad[0] = g1*(a-1)/math.Sqrt(b2*b2+(a-1)*(a-1)) + g2*a/math.Sqrt(b1*b1+a*a)
+}
diff --git a/linesearcher_test.go b/linesearcher_test.go
@@ -0,0 +1,136 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package optimize
+
+import (
+	"fmt"
+	"math"
+	"reflect"
+	"testing"
+
+	"github.com/gonum/optimize/functions"
+)
+
+func TestMoreThuente(t *testing.T) {
+	d := 0.001
+	c := 0.001
+	ls := &MoreThuente{
+		DecreaseFactor:  d,
+		CurvatureFactor: c,
+	}
+	testLinesearcher(t, ls, d, c, true)
+}
+
+func TestBisection(t *testing.T) {
+	c := 0.1
+	ls := &Bisection{
+		GradConst: c,
+	}
+	testLinesearcher(t, ls, 0, c, true)
+}
+
+func TestBacktracking(t *testing.T) {
+	d := 0.001
+	ls := &Backtracking{
+		FuncConst: d,
+	}
+	testLinesearcher(t, ls, d, 0, false)
+}
+
+type funcGrader interface {
+	Func([]float64) float64
+	Grad([]float64, []float64)
+}
+
+type linesearcherTest struct {
+	name string
+	f    func(float64) float64
+	g    func(float64) float64
+}
+
+func newLinesearcherTest(name string, fg funcGrader) linesearcherTest {
+	grad := make([]float64, 1)
+	return linesearcherTest{
+		name: name,
+		f: func(x float64) float64 {
+			return fg.Func([]float64{x})
+		},
+		g: func(x float64) float64 {
+			fg.Grad(grad, []float64{x})
+			return grad[0]
+		},
+	}
+}
+
+func testLinesearcher(t *testing.T, ls Linesearcher, decrease, curvature float64, strongWolfe bool) {
+	for i, prob := range []linesearcherTest{
+		newLinesearcherTest("Concave-to-the-right function", functions.ConcaveRight{}),
+		newLinesearcherTest("Concave-to-the-left function", functions.ConcaveLeft{}),
+		newLinesearcherTest("Plassmann wiggly function (l=39, beta=0.01)", functions.Plassmann{39, 0.01}),
+		newLinesearcherTest("Yanai-Ozawa-Kaneko function (beta1=0.001, beta2=0.001)", functions.YanaiOzawaKaneko{0.001, 0.001}),
+		newLinesearcherTest("Yanai-Ozawa-Kaneko function (beta1=0.01, beta2=0.001)", functions.YanaiOzawaKaneko{0.01, 0.001}),
+		newLinesearcherTest("Yanai-Ozawa-Kaneko function (beta1=0.001, beta2=0.01)", functions.YanaiOzawaKaneko{0.001, 0.01}),
+	} {
+		for _, initStep := range []float64{0.001, 0.1, 1, 10, 1000} {
+			prefix := fmt.Sprintf("test %d (%v started from %v)", i, prob.name, initStep)
+
+			f0 := prob.f(0)
+			g0 := prob.g(0)
+			if g0 >= 0 {
+				panic("bad test function")
+			}
+
+			op := ls.Init(f0, g0, initStep)
+			if !op.isEvaluation() {
+				t.Errorf("%v: Linesearcher.Init returned non-evaluating operation %v", op)
+				continue
+			}
+
+			var (
+				err  error
+				k    int
+				f, g float64
+				step float64
+			)
+		loop:
+			for {
+				switch op {
+				case MajorIteration:
+					if f > f0+step*decrease*g0 {
+						t.Errorf("%v: %v found step %v that does not satisfy the sufficient decrease condition",
+							prefix, reflect.TypeOf(ls), step)
+					}
+					if strongWolfe && math.Abs(g) > curvature*(-g0) {
+						t.Errorf("%v: %v found step %v that does not satisfy the curvature condition",
+							prefix, reflect.TypeOf(ls), step)
+					}
+					break loop
+				case FuncEvaluation:
+					f = prob.f(step)
+				case GradEvaluation:
+					g = prob.g(step)
+				case FuncEvaluation | GradEvaluation:
+					f = prob.f(step)
+					g = prob.g(step)
+				default:
+					t.Errorf("%v: Linesearcher returned an invalid operation %v", op)
+					break loop
+				}
+
+				k++
+				if k == 1000 {
+					t.Errorf("%v: %v did not finish", prefix, reflect.TypeOf(ls))
+					break
+				}
+
+				op, step, err = ls.Iterate(f, g)
+				if err != nil {
+					t.Errorf("%v: %v failed at step %v with %v", prefix, reflect.TypeOf(ls), step, err)
+					break
+				}
+			}
+		}
+	}
+}
diff --git a/morethuente.go b/morethuente.go

Original file line number	Diff line number	Diff line change
`@@ -103,7 +103,7 @@ func (cg CG) Init(loc Location) (Operation, error) {`
`103`	`103`	`}`
`104`	`104`
`105`	`105`	`if cg.Linesearcher == nil {`
`106`		`- cg.Linesearcher = &Bisection{GradConst: 0.1}`
	`106`	`+ cg.Linesearcher = &MoreThuente{CurvatureFactor: 0.1}`
`107`	`107`	`}`
`108`	`108`	`if cg.Variant == nil {`
`109`	`109`	`cg.Variant = &HestenesStiefel{}`