scikit-learn-contrib · perimosocordiae · Jun 7, 2019 · Apr 23, 2019 · Apr 24, 2019 · Apr 24, 2019
diff --git a/metric_learn/_util.py b/metric_learn/_util.py
@@ -1,10 +1,13 @@
-import warnings
 import numpy as np
 import six
 from numpy.linalg import LinAlgError
+from sklearn.decomposition import PCA
 from sklearn.utils import check_array
-from sklearn.utils.validation import check_X_y
+from sklearn.utils.validation import check_X_y, check_random_state
 from metric_learn.exceptions import PreprocessorError
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
+import sys
+import time
 
 # hack around lack of axis kwarg in older numpy versions
 try:
@@ -405,3 +408,119 @@ def validate_vector(u, dtype=None):
   if u.ndim > 1:
     raise ValueError("Input vector should be 1-D.")
   return u
+
+
+def _initialize_transformer(num_dims, X, y=None, init='auto', verbose=False,
+                            random_state=None):
+  """Returns the initial transformer to be used depending on the arguments.
+
+  Parameters
+  ----------
+  num_dims : int
+    The number of components to take. (Note: it should have been checked
+    before, meaning it should not be None and it should be a value in
+    [1, X.shape[1]])
+
+  X : array-like
+    The input samples.
+
+  y : array-like or None
+    The input labels (or not if there are no labels).
+
+  init : array-like or None or str
+    The initial matrix.
+
+  verbose : bool
+    Whether to print the details of the initialization or not.
+
+  random_state: int or `numpy.RandomState` or None, optional (default=None)
+    A pseudo random number generator object or a seed for it if int. If
+    ``init='random'``, ``random_state`` is used to initialize the random
+    transformation. If ``init='pca'``, ``random_state`` is passed as an
+    argument to PCA when initializing the transformation.
+
+  Returns
+  -------
+  init_transformer : `numpy.ndarray`
+    The initial transformer to use.
+  """
+
+  if isinstance(init, np.ndarray):
+    init = check_array(init)
+
+    # Assert that init.shape[1] = X.shape[1]
+    if init.shape[1] != X.shape[1]:
+      raise ValueError('The input dimensionality ({}) of the given '
+                       'linear transformation `init` must match the '
+                       'dimensionality of the given inputs `X` ({}).'
+                       .format(init.shape[1], X.shape[1]))
+
+    # Assert that init.shape[0] <= init.shape[1]
+    if init.shape[0] > init.shape[1]:
+      raise ValueError('The output dimensionality ({}) of the given '
+                       'linear transformation `init` cannot be '
+                       'greater than its input dimensionality ({}).'
+                       .format(init.shape[0], init.shape[1]))
+
+    if num_dims is not None:
+      # Assert that self.num_dims = init.shape[0]
+      if num_dims != init.shape[0]:
+        raise ValueError('The preferred dimensionality of the '
+                         'projected space `num_dims` ({}) does'
+                         ' not match the output dimensionality of '
+                         'the given linear transformation '
+                         '`init` ({})!'
+                         .format(num_dims,
+                                 init.shape[0]))
+  elif init in ['auto', 'pca', 'lda', 'identity', 'random']:
+    pass
+  else:
+    raise ValueError(
+        "`init` must be 'auto', 'pca', 'lda', 'identity', 'random' "
+        "or a numpy array of shape (num_dims, n_features).")
+
+  random_state = check_random_state(random_state)
+  transformation = init
+  if isinstance(init, np.ndarray):
+    pass
+  else:
+    n_samples, n_features = X.shape
+    num_dims = num_dims or n_features
+    if init == 'auto':
+      n_classes = len(np.unique(y))
+      if num_dims <= min(n_features, n_classes - 1):
+        init = 'lda'
+      elif num_dims < min(n_features, n_samples):
+        init = 'pca'
+      else:
+        init = 'identity'
+    if init == 'identity':
+      transformation = np.eye(num_dims, X.shape[1])
+    elif init == 'random':
+      transformation = random_state.randn(num_dims,
+                                          X.shape[1])
+    elif init in {'pca', 'lda'}:
+      init_time = time.time()
+      if init == 'pca':
+        pca = PCA(n_components=num_dims,
+                  random_state=random_state)
+        if verbose:
+          print('Finding principal components... ')
+          sys.stdout.flush()
+        pca.fit(X)
+        transformation = pca.components_
+      elif init == 'lda':
+        lda = LinearDiscriminantAnalysis(n_components=num_dims)
+        if verbose:
+          print('Finding most discriminative components... ')
+          sys.stdout.flush()
+        lda.fit(X, y)
+        transformation = lda.scalings_.T[:num_dims]
+      if verbose:
+        print('done in {:5.2f}s'.format(time.time() - init_time))
+  return transformation
+
+
+def _initialize_metric_mahalanobis():
+  """Returns the initial metric from arguments"""
+  raise NotImplementedError
diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py
@@ -288,7 +288,7 @@ def get_mahalanobis_matrix(self):
 
     Returns
     -------
-    M : `numpy.ndarray`, shape=(n_components, n_features)
+    M : `numpy.ndarray`, shape=(num_dims, n_features)
       The copy of the learned Mahalanobis matrix.
     """
     return self.transformer_.T.dot(self.transformer_)

diff --git a/metric_learn/covariance.py b/metric_learn/covariance.py
@@ -21,7 +21,7 @@ class Covariance(MahalanobisMixin, TransformerMixin):
 
   Attributes
   ----------
-  transformer_ : `numpy.ndarray`, shape=(num_dims, n_features)
+  transformer_ : `numpy.ndarray`, shape=(n_features, n_features)
       The linear transformation ``L`` deduced from the learned Mahalanobis
       metric (See function `transformer_from_metric`.)
   """

diff --git a/metric_learn/itml.py b/metric_learn/itml.py
@@ -145,7 +145,7 @@ class ITML(_BaseITML, _PairsClassifierMixin):
   n_iter_ : `int`
       The number of iterations the solver has run.
 
-  transformer_ : `numpy.ndarray`, shape=(num_dims, n_features)
+  transformer_ : `numpy.ndarray`, shape=(n_features, n_features)
       The linear transformation ``L`` deduced from the learned Mahalanobis
       metric (See function `transformer_from_metric`.)
 
@@ -213,7 +213,7 @@ class ITML_Supervised(_BaseITML, TransformerMixin):
   n_iter_ : `int`
       The number of iterations the solver has run.
 
-  transformer_ : `numpy.ndarray`, shape=(num_dims, n_features)
+  transformer_ : `numpy.ndarray`, shape=(n_features, n_features)
       The linear transformation ``L`` deduced from the learned Mahalanobis
       metric (See function `transformer_from_metric`.)
   """

diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py
@@ -16,18 +16,61 @@
 from six.moves import xrange
 from sklearn.metrics import euclidean_distances
 from sklearn.base import TransformerMixin
+
+from metric_learn._util import _initialize_transformer
 from .base_metric import MahalanobisMixin
 
 
 # commonality between LMNN implementations
 class _base_LMNN(MahalanobisMixin, TransformerMixin):
-  def __init__(self, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7,
-               regularization=0.5, convergence_tol=0.001, use_pca=True,
-               verbose=False, preprocessor=None):
+  def __init__(self, init='auto', k=3, min_iter=50, max_iter=1000,
+               learn_rate=1e-7, regularization=0.5, convergence_tol=0.001,
+               use_pca=True, num_dims=None,
+               verbose=False, preprocessor=None, random_state=None):
     """Initialize the LMNN object.
 
     Parameters
     ----------
+    init : string or numpy array, optional (default='auto')
+        Initialization of the linear transformation. Possible options are
+        'auto', 'pca', 'lda', 'identity', 'random', and a numpy array of shape
+        (n_features_a, n_features_b).
+
+        'auto'
+            Depending on ``num_dims``, the most reasonable initialization
+            will be chosen. If ``num_dims <= n_classes`` we use 'lda', as
+            it uses labels information. If not, but
+            ``num_dims < min(n_features, n_samples)``, we use 'pca', as
+            it projects data in meaningful directions (those of higher
+            variance). Otherwise, we just use 'identity'.
+
+        'pca'
+            ``num_dims`` principal components of the inputs passed
+            to :meth:`fit` will be used to initialize the transformation.
+            (See `sklearn.decomposition.PCA`)
+
+        'lda'
+            ``min(num_dims, n_classes)`` most discriminative
+            components of the inputs passed to :meth:`fit` will be used to
+            initialize the transformation. (If ``num_dims > n_classes``,
+            the rest of the components will be zero.) (See
+            `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`)
+
+        'identity'
+            If ``num_dims`` is strictly smaller than the
+            dimensionality of the inputs passed to :meth:`fit`, the identity
+            matrix will be truncated to the first ``num_dims`` rows.
+
+        'random'
+            The initial transformation will be a random array of shape
+            `(num_dims, n_features)`. Each value is sampled from the
+            standard normal distribution.
+
+        numpy array
+            n_features_b must match the dimensionality of the inputs passed to
+            :meth:`fit` and n_features_a must be less than or equal to that.
+            If ``num_dims`` is not None, n_features_a must match it.
+
     k : int, optional
         Number of neighbors to consider, not including self-edges.
 
@@ -37,15 +80,24 @@ def __init__(self, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7,
     preprocessor : array-like, shape=(n_samples, n_features) or callable
         The preprocessor to call to get tuples from indices. If array-like,
         tuples will be formed like this: X[indices].
+
+    random_state : int or numpy.RandomState or None, optional (default=None)
+        A pseudo random number generator object or a seed for it if int. If
+        ``init='random'``, ``random_state`` is used to initialize the random
+        transformation. If ``init='pca'``, ``random_state`` is passed as an
+        argument to PCA when initializing the transformation.
     """
+    self.init = init
     self.k = k
     self.min_iter = min_iter
     self.max_iter = max_iter
     self.learn_rate = learn_rate
     self.regularization = regularization
     self.convergence_tol = convergence_tol
     self.use_pca = use_pca
+    self.num_dims = num_dims  # FIXME Tmp fix waiting for #167 to be merged:
     self.verbose = verbose
+    self.random_state = random_state
     super(_base_LMNN, self).__init__(preprocessor)
 
 
@@ -60,13 +112,15 @@ def fit(self, X, y):
     X, y = self._prepare_inputs(X, y, dtype=float,
                                 ensure_min_samples=2)
     num_pts, num_dims = X.shape
+    # FIXME Tmp fix waiting for #167 to be merged:
+    n_dims = self.num_dims if self.num_dims is not None else num_dims
     unique_labels, label_inds = np.unique(y, return_inverse=True)
     if len(label_inds) != num_pts:
       raise ValueError('Must have one label per point.')
     self.labels_ = np.arange(len(unique_labels))
-    if self.use_pca:
-      warnings.warn('use_pca does nothing for the python_LMNN implementation')
-    self.transformer_ = np.eye(num_dims)
+    self.transformer_ = _initialize_transformer(n_dims, X, y, self.init,
+                                                self.verbose,
+                                                self.random_state)
     required_k = np.bincount(label_inds).min()
     if self.k > required_k:
       raise ValueError('not enough class labels for specified k'
@@ -99,6 +153,8 @@ def fit(self, X, y):
         self._loss_grad(X, L, dfG, impostors, 1, k, reg, target_neighbors, df,
                         a1, a2))
 
+    it = 1  # we already made one iteration
+
     # main loop
     for it in xrange(2, self.max_iter):
       # then at each iteration, we try to find a value of L that has better

diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py
@@ -139,7 +139,7 @@ class LSML(_BaseLSML, _QuadrupletsClassifierMixin):
   n_iter_ : `int`
       The number of iterations the solver has run.
 
-  transformer_ : `numpy.ndarray`, shape=(num_dims, n_features)
+  transformer_ : `numpy.ndarray`, shape=(n_features, n_features)
       The linear transformation ``L`` deduced from the learned Mahalanobis
       metric (See function `transformer_from_metric`.)
   """
@@ -175,7 +175,7 @@ class LSML_Supervised(_BaseLSML, TransformerMixin):
   n_iter_ : `int`
       The number of iterations the solver has run.
 
-  transformer_ : `numpy.ndarray`, shape=(num_dims, n_features)
+  transformer_ : `numpy.ndarray`, shape=(n_features, n_features)
       The linear transformation ``L`` deduced from the learned Mahalanobis
       metric (See function `transformer_from_metric`.)
   """

diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py
@@ -356,7 +356,7 @@ class MMC(_BaseMMC, _PairsClassifierMixin):
   n_iter_ : `int`
       The number of iterations the solver has run.
 
-  transformer_ : `numpy.ndarray`, shape=(num_dims, n_features)
+  transformer_ : `numpy.ndarray`, shape=(n_features, n_features)
       The linear transformation ``L`` deduced from the learned Mahalanobis
       metric (See function `transformer_from_metric`.)
 
@@ -406,7 +406,7 @@ class MMC_Supervised(_BaseMMC, TransformerMixin):
   n_iter_ : `int`
       The number of iterations the solver has run.
 
-  transformer_ : `numpy.ndarray`, shape=(num_dims, n_features)
+  transformer_ : `numpy.ndarray`, shape=(n_features, n_features)
       The linear transformation ``L`` deduced from the learned Mahalanobis
       metric (See function `transformer_from_metric`.)
   """