From 1ca41250d79aebab93ba52e209c37510018a13e0 Mon Sep 17 00:00:00 2001
From: bbayukari <40588568+bbayukari@users.noreply.github.com>
Date: Sun, 12 May 2024 12:45:22 +0800
Subject: [PATCH] feat: remove dependence on nlopt and use L-BFGS-B in scipy as
 default numeric solver (#95)

* feat: use L-BFGS-B in scipy as default numeric solver

remove dependence on nlopt

* chore: debug jax[cpu]

* format code style
---
 docs/source/autoapi/numeric_solver.rst        |  2 +-
 docs/source/feature/Variants.rst              |  6 +-
 .../Inverse-gaussian-regression.ipynb         |  3 +-
 .../gamma-regression.ipynb                    |  4 +-
 docs/source/userguide/install.rst             |  3 +-
 pytest/test_skmodel.py                        |  2 +-
 setup.py                                      |  5 +-
 skscope/__init__.py                           |  4 +-
 skscope/base_solver.py                        |  6 +-
 skscope/layer.py                              |  4 +-
 skscope/numeric_solver.py                     | 66 ++++---------------
 skscope/solver.py                             | 26 ++++----
 src/Algorithm.cpp                             | 14 ++--
 13 files changed, 51 insertions(+), 94 deletions(-)

diff --git a/docs/source/autoapi/numeric_solver.rst b/docs/source/autoapi/numeric_solver.rst
index 53dd132..75c56b3 100644
--- a/docs/source/autoapi/numeric_solver.rst
+++ b/docs/source/autoapi/numeric_solver.rst
@@ -17,7 +17,7 @@ Functions
 
 .. autoapisummary::
 
-    skscope.numeric_solver.convex_solver_nlopt
+    skscope.numeric_solver.convex_solver_LBFGS
 
 .. autoapimodule:: skscope.numeric_solver
     :members: 
\ No newline at end of file
diff --git a/docs/source/feature/Variants.rst b/docs/source/feature/Variants.rst
index 4cd2629..2786abc 100644
--- a/docs/source/feature/Variants.rst
+++ b/docs/source/feature/Variants.rst
@@ -168,16 +168,16 @@ where
 
 - :math:`f(\theta)` is the objective function; 
 
-All solvers in :ref:`skscope <skscope_package>` use `nlopt <https://nlopt.readthedocs.io/en/latest/>`_ as the default numeric optimization solver for this problem. 
+All solvers in :ref:`skscope <skscope_package>` use `L-BFGS-B` algorithm in `scipy.optimize <https://docs.scipy.org/doc/scipy/reference/optimize.minimize-lbfgsb.html>`_ as the default numeric optimization solver for this problem. 
 
 In some cases, there may be additional constraints on the intrinsic structure of :math:`\theta`, which can be formulated as a set :math:`\mathcal{C}`:
 
 .. math::
     \arg\min_{\theta \in R^s, \theta \in \mathcal{C}} f(\theta).
 
-A typical example is the Gaussian graphical model for continuous random variables, which constrains :math:`\theta` on symmetric positive-definite spaces (see this example `<../userguide/examples/GraphicalModels/sparse-gaussian-precision-matrix.html>`__). Although ``nlopt`` cannot solve this problem, ``skscope`` provides a flexible interface that allows for its replacement. Specifically, users can change the default numerical optimization solver by properly setting the ``numeric_solver`` in the solver. 
+A typical example is the Gaussian graphical model for continuous random variables, which constrains :math:`\theta` on symmetric positive-definite spaces (see this example `<../userguide/examples/GraphicalModels/sparse-gaussian-precision-matrix.html>`__). Although the default numeric solver cannot solve this problem, ``skscope`` provides a flexible interface that allows for its replacement. Specifically, users can change the default numerical optimization solver by properly setting the ``numeric_solver`` in the solver. 
 
-    > Notice that, the accepted input of ``numeric_solver`` should have the same interface as ``skscope.numeric_solver.convex_solver_nlopt``.
+    > Notice that, the accepted input of ``numeric_solver`` should have the same interface as ``skscope.numeric_solver.convex_solver_LBFGS``.
 
 
 .. code-block:: python
diff --git a/docs/source/gallery/GeneralizedLinearModels/Inverse-gaussian-regression.ipynb b/docs/source/gallery/GeneralizedLinearModels/Inverse-gaussian-regression.ipynb
index 472b6b9..df49eaf 100644
--- a/docs/source/gallery/GeneralizedLinearModels/Inverse-gaussian-regression.ipynb
+++ b/docs/source/gallery/GeneralizedLinearModels/Inverse-gaussian-regression.ipynb
@@ -269,6 +269,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "from skscope.numeric_solver import convex_solver_LBFGS\n",
     "def convex_solver_inverse_gaussian(\n",
     "    objective_func,\n",
     "    value_and_grad,\n",
@@ -282,7 +283,7 @@
     "    m = np.min(X @ params)\n",
     "    if m <= 0.0:\n",
     "        params[0] -= m\n",
-    "    return convex_solver_nlopt(objective_func, value_and_grad, params, optim_variable_set, data)"
+    "    return convex_solver_LBFGS(objective_func, value_and_grad, params, optim_variable_set, data)"
    ]
   },
   {
diff --git a/docs/source/gallery/GeneralizedLinearModels/gamma-regression.ipynb b/docs/source/gallery/GeneralizedLinearModels/gamma-regression.ipynb
index a28f535..b17c6f9 100644
--- a/docs/source/gallery/GeneralizedLinearModels/gamma-regression.ipynb
+++ b/docs/source/gallery/GeneralizedLinearModels/gamma-regression.ipynb
@@ -80,7 +80,7 @@
     "import numpy as np\n",
     "import jax.numpy as jnp\n",
     "from skscope import ScopeSolver, HTPSolver, GraspSolver\n",
-    "from skscope.numeric_solver import convex_solver_nlopt"
+    "from skscope.numeric_solver import convex_solver_LBFGS"
    ]
   },
   {
@@ -284,7 +284,7 @@
     "    m = np.min(X @ params)\n",
     "    if m <= 0.0:\n",
     "        params[0] -= m\n",
-    "    return convex_solver_nlopt(objective_func, value_and_grad, params, optim_variable_set, data)\n"
+    "    return convex_solver_LBFGS(objective_func, value_and_grad, params, optim_variable_set, data)\n"
    ]
   },
   {
diff --git a/docs/source/userguide/install.rst b/docs/source/userguide/install.rst
index a3b3c24..db22c4e 100644
--- a/docs/source/userguide/install.rst
+++ b/docs/source/userguide/install.rst
@@ -72,6 +72,5 @@ Dependencies
 The current minimum dependencies to run ``skscope`` are:
 
 - ``numpy``
-- ``nlopt``
 - ``scikit-learn>=1.2.2``
-- ``"jax[cpu]"``
\ No newline at end of file
+- ``jax[cpu]``
\ No newline at end of file
diff --git a/pytest/test_skmodel.py b/pytest/test_skmodel.py
index 21f646d..ab998a1 100644
--- a/pytest/test_skmodel.py
+++ b/pytest/test_skmodel.py
@@ -45,7 +45,7 @@ def test_PortfolioSelection():
     # fit and test
     port = port.fit(X_train)
     score = port.score(X_test)
-    assert score > 0.05
+    assert score > 0.049
 
     # gridsearch with time-series splitting
     tscv = TimeSeriesSplit(n_splits=5)
diff --git a/setup.py b/setup.py
index 29fb6e6..9e23c7c 100644
--- a/setup.py
+++ b/setup.py
@@ -100,9 +100,7 @@ def build_extension(self, ext):
 
             # Multi-config generators have a different way to specify configs
             if not single_config:
-                cmake_args += [
-                    f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}"
-                ]
+                cmake_args += [f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}"]
                 build_args += ["--config", cfg]
 
         if sys.platform.startswith("darwin"):
@@ -151,7 +149,6 @@ def build_extension(self, ext):
         "numpy",
         "scikit-learn>=1.2.2",
         "jax[cpu]",
-        "nlopt",
         "scipy",
     ],
     license="MIT",
diff --git a/skscope/__init__.py b/skscope/__init__.py
index 5bcda60..21cc458 100644
--- a/skscope/__init__.py
+++ b/skscope/__init__.py
@@ -18,7 +18,7 @@
     OMPSolver,
 )
 from .base_solver import BaseSolver
-from .numeric_solver import convex_solver_nlopt
+from .numeric_solver import convex_solver_LBFGS
 from .skmodel import PortfolioSelection, NonlinearSelection, RobustRegression
 
 __all__ = [
@@ -30,7 +30,7 @@
     "FobaSolver",
     "ForwardSolver",
     "OMPSolver",
-    "convex_solver_nlopt",
+    "convex_solver_LBFGS",
     "PortfolioSelection",
     "NonlinearSelection",
     "RobustRegression",
diff --git a/skscope/base_solver.py b/skscope/base_solver.py
index c25d23d..fcdcbbd 100644
--- a/skscope/base_solver.py
+++ b/skscope/base_solver.py
@@ -8,7 +8,7 @@
 from sklearn.model_selection import KFold
 import numpy as np
 import jax
-from .numeric_solver import convex_solver_nlopt
+from .numeric_solver import convex_solver_LBFGS
 import math
 from . import utilities
 
@@ -32,7 +32,7 @@ class BaseSolver(BaseEstimator):
         An array contains the indexes of variables which must be selected.
     numeric_solver : callable, optional
         A solver for the convex optimization problem. ``BaseSolver`` will call this function to solve the convex optimization problem in each iteration.
-        It should have the same interface as ``skscope.convex_solver_nlopt``.
+        It should have the same interface as ``skscope.convex_solver_LBFGS``.
     max_iter : int, default=100
         Maximum number of iterations taken for converging.
     group : array of shape (dimensionality,), default=range(dimensionality)
@@ -78,7 +78,7 @@ def __init__(
         sample_size=1,
         *,
         preselect=[],
-        numeric_solver=convex_solver_nlopt,
+        numeric_solver=convex_solver_LBFGS,
         max_iter=100,
         group=None,
         ic_method=None,
diff --git a/skscope/layer.py b/skscope/layer.py
index ac184a6..c6f4689 100644
--- a/skscope/layer.py
+++ b/skscope/layer.py
@@ -93,7 +93,7 @@ def __init__(self, dimensionality, coef=None):
     @jax.jit
     def transform_params(self, params):
         x = jnp.dot(params, self.coef)
-        return params / jnp.where(x == 0.0, 1.0, x)
+        return params / jnp.where(jnp.abs(x) < 1e-5, 1.0, x)
 
     def tree_flatten(self):
         children = (self.coef,)
@@ -133,7 +133,7 @@ def __init__(self, dimensionality, coef=None):
     def transform_params(self, params):
         p = jnp.abs(params)
         x = jnp.dot(p, self.coef)
-        return p / jnp.where(x == 0.0, 1.0, x)
+        return p / jnp.where(jnp.abs(x) < 1e-5, 1.0, x)
 
     def tree_flatten(self):
         children = (self.coef,)
diff --git a/skscope/numeric_solver.py b/skscope/numeric_solver.py
index d8c224f..982e50f 100644
--- a/skscope/numeric_solver.py
+++ b/skscope/numeric_solver.py
@@ -6,73 +6,31 @@
 
 import numpy as np
 import math
-import nlopt
 from scipy.optimize import minimize
 
 
-def convex_solver_nlopt(
+def convex_solver_BFGS(
     objective_func,
     value_and_grad,
     init_params,
     optim_variable_set,
     data,
 ):
-    """
-    A wrapper of ``nlopt`` solver for convex optimization.
-
-    Parameters
-    ----------
-    objective_func: callable
-        The objective function.
-        ``objective_func(params, data) -> loss``, where ``params`` is a 1-D array with shape (dimensionality,).
-    value_and_grad: callable
-        The function to compute the loss and gradient.
-        ``value_and_grad(params, data) -> (loss, grad)``, where ``params`` is a 1-D array with shape (dimensionality,).
-    init_params: array of shape (dimensionality,)
-        The initial value of the parameters to be optimized.
-    optim_variable_set: array of int
-        The index of variables to be optimized, others are fixed to the initial value.
-    data:
-        The data passed to objective_func and value_and_grad.
-
-    Returns
-    -------
-    loss: float
-        The loss of the optimized parameters, i.e., `objective_func(params, data)`.
-    optimized_params: array of shape (dimensionality,)
-        The optimized parameters.
-    """
-    best_loss = math.inf
-    best_params = None
-
-    def cache_opt_fn(x, grad):
-        nonlocal best_loss, best_params
-        # update the nonlocal variable: params
+    def fun(x):
         init_params[optim_variable_set] = x
-        if grad.size > 0:
-            loss, full_grad = value_and_grad(init_params, data)
-            grad[:] = full_grad[optim_variable_set]
-        else:
-            loss = objective_func(init_params, data)
-        if loss < best_loss:
-            best_loss = loss
-            best_params = np.copy(x)
-        return loss
+        return objective_func(init_params, data)
 
-    nlopt_solver = nlopt.opt(nlopt.LD_LBFGS, optim_variable_set.size)
-    nlopt_solver.set_min_objective(cache_opt_fn)
+    def jac(x):
+        init_params[optim_variable_set] = x
+        _, grad = value_and_grad(init_params, data)
+        return grad[optim_variable_set]
 
-    try:
-        init_params[optim_variable_set] = nlopt_solver.optimize(
-            init_params[optim_variable_set]
-        )
-        return nlopt_solver.last_optimum_value(), init_params
-    except RuntimeError:
-        init_params[optim_variable_set] = best_params
-        return best_loss, init_params
+    res = minimize(fun, init_params[optim_variable_set], method="BFGS", jac=jac)
+    init_params[optim_variable_set] = res.x
+    return res.fun, init_params
 
 
-def convex_solver_BFGS(
+def convex_solver_LBFGS(
     objective_func,
     value_and_grad,
     init_params,
@@ -88,6 +46,6 @@ def jac(x):
         _, grad = value_and_grad(init_params, data)
         return grad[optim_variable_set]
 
-    res = minimize(fun, init_params[optim_variable_set], method="BFGS", jac=jac)
+    res = minimize(fun, init_params[optim_variable_set], method="L-BFGS-B", jac=jac)
     init_params[optim_variable_set] = res.x
     return res.fun, init_params
diff --git a/skscope/solver.py b/skscope/solver.py
index 85c925f..a46a1eb 100644
--- a/skscope/solver.py
+++ b/skscope/solver.py
@@ -11,7 +11,7 @@
 import jax
 from jax import numpy as jnp
 from . import _scope, utilities
-from .numeric_solver import convex_solver_nlopt
+from .numeric_solver import convex_solver_LBFGS
 
 
 class ScopeSolver(BaseEstimator):
@@ -33,7 +33,7 @@ class ScopeSolver(BaseEstimator):
         An array contains the indexes of variables which must be selected.
     numeric_solver : callable, optional
         A solver for the convex optimization problem. ``ScopeSolver`` will call this function to solve the convex optimization problem in each iteration.
-        It should have the same interface as ``skscope.convex_solver_nlopt``.
+        It should have the same interface as ``skscope.convex_solver_LBFGS``.
     max_iter : int, default=20
         Maximum number of iterations taken for converging.
     ic_method : callable, optional
@@ -122,7 +122,7 @@ def __init__(
         sample_size=1,
         *,
         preselect=[],
-        numeric_solver=convex_solver_nlopt,
+        numeric_solver=convex_solver_LBFGS,
         max_iter=20,
         ic_method=None,
         cv=1,
@@ -699,7 +699,7 @@ class HTPSolver(BaseSolver):
         Step size of gradient descent.
     numeric_solver : callable, optional
         A solver for the convex optimization problem. ``HTPSolver`` will call this function to solve the convex optimization problem in each iteration.
-        It should have the same interface as ``skscope.convex_solver_nlopt``.
+        It should have the same interface as ``skscope.convex_solver_LBFGS``.
     max_iter : int, default=100
         Maximum number of iterations taken for converging.
     group : array of shape (dimensionality,), default=range(dimensionality)
@@ -750,7 +750,7 @@ def __init__(
         *,
         preselect=[],
         step_size=0.005,
-        numeric_solver=convex_solver_nlopt,
+        numeric_solver=convex_solver_LBFGS,
         max_iter=100,
         group=None,
         ic_method=None,
@@ -860,7 +860,7 @@ class IHTSolver(HTPSolver):
         Step size of gradient descent.
     numeric_solver : callable, optional
         A solver for the convex optimization problem. ``IHTSolver`` will call this function to solve the convex optimization problem in each iteration.
-        It should have the same interface as ``skscope.convex_solver_nlopt``.
+        It should have the same interface as ``skscope.convex_solver_LBFGS``.
     max_iter : int, default=100
         Maximum number of iterations taken for converging.
     group : array of shape (dimensionality,), default=range(dimensionality)
@@ -980,7 +980,7 @@ class GraspSolver(BaseSolver):
         An array contains the indexes of variables which must be selected.
     numeric_solver : callable, optional
         A solver for the convex optimization problem. ``GraspSolver`` will call this function to solve the convex optimization problem in each iteration.
-        It should have the same interface as ``skscope.convex_solver_nlopt``.
+        It should have the same interface as ``skscope.convex_solver_LBFGS``.
     max_iter : int, default=100
         Maximum number of iterations taken for converging.
     group : array of shape (dimensionality,), default=range(dimensionality)
@@ -1141,7 +1141,7 @@ class FobaSolver(BaseSolver):
         An array contains the indexes of variables which must be selected.
     numeric_solver : callable, optional
         A solver for the convex optimization problem. ``FobaSolver`` will call this function to solve the convex optimization problem in each iteration.
-        It should have the same interface as ``skscope.convex_solver_nlopt``.
+        It should have the same interface as ``skscope.convex_solver_LBFGS``.
     max_iter : int, default=100
         Maximum number of iterations taken for converging.
     group : array of shape (dimensionality,), default=range(dimensionality)
@@ -1194,7 +1194,7 @@ def __init__(
         foba_threshold_ratio=0.5,
         strict_sparsity=True,
         preselect=[],
-        numeric_solver=convex_solver_nlopt,
+        numeric_solver=convex_solver_LBFGS,
         max_iter=100,
         group=None,
         ic_method=None,
@@ -1416,7 +1416,7 @@ class ForwardSolver(FobaSolver):
         An array contains the indexes of variables which must be selected.
     numeric_solver : callable, optional
         A solver for the convex optimization problem. ``ForwardSolver`` will call this function to solve the convex optimization problem in each iteration.
-        It should have the same interface as ``skscope.convex_solver_nlopt``.
+        It should have the same interface as ``skscope.convex_solver_LBFGS``.
     max_iter : int, default=100
         Maximum number of iterations taken for converging.
     group : array of shape (dimensionality,), default=range(dimensionality)
@@ -1464,7 +1464,7 @@ def __init__(
         threshold=0.0,
         strict_sparsity=True,
         preselect=[],
-        numeric_solver=convex_solver_nlopt,
+        numeric_solver=convex_solver_LBFGS,
         max_iter=100,
         group=None,
         ic_method=None,
@@ -1564,7 +1564,7 @@ class OMPSolver(ForwardSolver):
         An array contains the indexes of variables which must be selected.
     numeric_solver : callable, optional
         A solver for the convex optimization problem. ``OMPSolver`` will call this function to solve the convex optimization problem in each iteration.
-        It should have the same interface as ``skscope.convex_solver_nlopt``.
+        It should have the same interface as ``skscope.convex_solver_LBFGS``.
     max_iter : int, default=100
         Maximum number of iterations taken for converging.
     group : array of shape (dimensionality,), default=range(dimensionality)
@@ -1615,7 +1615,7 @@ def __init__(
         threshold=0.0,
         strict_sparsity=True,
         preselect=[],
-        numeric_solver=convex_solver_nlopt,
+        numeric_solver=convex_solver_LBFGS,
         max_iter=100,
         group=None,
         ic_method=None,
diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp
index 68b740c..34059ae 100644
--- a/src/Algorithm.cpp
+++ b/src/Algorithm.cpp
@@ -320,10 +320,10 @@ bool Algorithm::splicing(UniversalData &X, MatrixXd &y, VectorXi &A, VectorXi &I
 
     for (int k = C_max; k >= 1;)
     {
-        //SPDLOG_INFO("exchange num is {}", k);
+        // SPDLOG_INFO("exchange num is {}", k);
         A_exchange = diff_union(A, s1, s2);
         A_ind_exchage = find_ind(A_exchange, g_index, g_size, (this->beta).rows(), N);
-        X_A_exchage = X.slice_by_para(A_ind_exchage); 
+        X_A_exchage = X.slice_by_para(A_ind_exchage);
         beta_A_exchange = beta(A_ind_exchage);
         coef0_A_exchange = coef0;
         bool success = this->primary_model_fit(X_A_exchage, y, weights, beta_A_exchange, coef0_A_exchange,
@@ -356,7 +356,7 @@ bool Algorithm::splicing(UniversalData &X, MatrixXd &y, VectorXi &A, VectorXi &I
     coef0 = best_coef0_A_exchange;
     if (this->is_dynamic_exchange_num)
         C_max = best_exchange_num;
-    //SPDLOG_INFO("best exchange num is {}", best_exchange_num);
+    // SPDLOG_INFO("best exchange num is {}", best_exchange_num);
     return true;
 };
 
@@ -444,7 +444,7 @@ bool Algorithm::primary_model_fit(UniversalData &active_data, MatrixXd &y, Vecto
 
     bool success = result > 0;
     if(!success){
-        SPDLOG_WARN("nlopt failed to optimize, state: {} ", nlopt_result_to_string(result));
+        SPDLOG_WARN("failed to optimize, state: {} ", nlopt_result_to_string(result));
     }
     */
 }
@@ -454,10 +454,12 @@ void Algorithm::sacrifice(UniversalData &data, UniversalData &XA, MatrixXd &y, V
     SPDLOG_DEBUG("sacrifice begin");
     VectorXd gradient_full;
     MatrixXd hessian_full;
-    if (this->use_hessian){
+    if (this->use_hessian)
+    {
         data.gradient_and_hessian(para, gradient_full, hessian_full);
     }
-    else{
+    else
+    {
         data.loss_and_gradient(para, gradient_full);
         hessian_full = MatrixXd::Identity(para.size(), para.size());
     }