add first sketch of the factorization module

ddbourgin · ddbourgin · commit 28fef84ca172 · 2020-05-09T19:55:43.000-04:00
diff --git a/docs/index.rst b/docs/index.rst
@@ -38,6 +38,8 @@ that in mind, don't just read the docs -- read the source!
 
    numpy_ml.nonparametric
 
+   numpy_ml.factorization
+
    numpy_ml.trees
 
    numpy_ml.neural_nets
diff --git a/docs/numpy_ml.factorization.factors.rst b/docs/numpy_ml.factorization.factors.rst
@@ -0,0 +1,11 @@
+``VanillaALS``
+--------------
+.. autoclass:: numpy_ml.factorization.VanillaALS
+    :members:
+    :undoc-members:
+
+``NMF``
+--------
+.. autoclass:: numpy_ml.factorization.NMF
+    :members:
+    :undoc-members:
diff --git a/docs/numpy_ml.factorization.rst b/docs/numpy_ml.factorization.rst
@@ -0,0 +1,7 @@
+Matrix factorization
+####################
+
+.. toctree::
+   :maxdepth: 3
+
+   numpy_ml.factorization.factors
diff --git a/numpy_ml/__init__.py b/numpy_ml/__init__.py
@@ -1,3 +1,6 @@
+# noqa
+"""Common ML and ML-adjacent algorithms implemented in NumPy"""
+
 from . import utils
 from . import preprocessing
 
@@ -11,3 +14,4 @@
 from . import rl_models
 from . import trees
 from . import bandits
+from . import factorization
diff --git a/numpy_ml/factorization/factors.py b/numpy_ml/factorization/factors.py
@@ -25,7 +25,7 @@ def __init__(self, K, alpha=1, max_iter=200, tol=1e-4):
         where :math:`||\cdot||` denotes the Frobenius norm, **X** is the
         :math:`N \times M` data matrix, :math:`\mathbf{W}` and
         :math:`\mathbf{H}` are learned factor matrices with dimensions :math:`N
-        \times K` and :math:`K \times M`, respectively. :math:`\alpha` is a
+        \times K` and :math:`K \times M`, respectively, and :math:`\alpha` is a
         user-defined regularization weight.
 
         ALS proceeds by alternating between fixing **W** and optimizing for
@@ -36,8 +36,8 @@ def __init__(self, K, alpha=1, max_iter=200, tol=1e-4):
         References
         ----------
         .. [1] Gillis, N. (2014). The why and how of nonnegative matrix
-        factorization.  *Regularization, optimization, kernels, and support
-        vector machines*, 12(257), 257-291.
+           factorization.  *Regularization, optimization, kernels, and support
+           vector machines, 12(257)*, 257-291.
 
         Parameters
         ----------
@@ -294,14 +294,15 @@ def fit(self, X, W=None, H=None, n_initializations=10, verbose=False):
            \mathbf{X}^{(j)} :=
                 \mathbf{X} - \mathbf{WH}^\top + \mathbf{w}_j \mathbf{h}_j^\top
 
-        where :math:`\mathbf{X}^{(j)}` is the `j`th residue, **X** is the input
-        data matrix, and :math:`\mathbf{w}_j` and :math:`\mathbf{h}_j` are the
-        `j`th columns of the current factor matrices **W** and **H**. HALS
-        proceeds by minimizing the cost for each residue, first with respect to
-        :math:`\mathbf{w}_j` holding :math:`\mathbf{h}_j` fixed, and then with
-        respect to :math:`\mathbf{h}_j`, holding the newly updated
-        :math:`\mathbf{w}_j` fixed. The residue cost :math:`\mathcal{L}^{(j)}`
-        for :math:`\mathbf{X}^{j}` is simply:
+        where :math:`\mathbf{X}^{(j)}` is the :math:`j^{th}` residue, **X** is
+        the input data matrix, and :math:`\mathbf{w}_j` and
+        :math:`\mathbf{h}_j` are the :math:`j^{th}` columns of the current
+        factor matrices **W** and **H**. HALS proceeds by minimizing the cost
+        for each residue, first with respect to :math:`\mathbf{w}_j` holding
+        :math:`\mathbf{h}_j` fixed, and then with respect to
+        :math:`\mathbf{h}_j`, holding the newly updated :math:`\mathbf{w}_j`
+        fixed. The residue cost :math:`\mathcal{L}^{(j)}` for
+        :math:`\mathbf{X}^{j}` is simply:
 
         .. math::