-
Notifications
You must be signed in to change notification settings - Fork 230
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[MRG] Uniformize initialization for all algorithms #195
Changes from 4 commits
a2ae9e1
5e626d5
ffcfa2d
27eb74b
4395c13
09fda87
0e59d72
60ca662
71a75ed
1b2d296
bd709e9
e162e6a
d1e88af
eb98eff
508d94e
bbf31cb
aafa8e2
748459e
06a55da
d321319
26fb9e7
5e3daa4
e86b61b
0b69e7e
95a86a9
d622fae
d2cc7ce
a7d2791
3590cfa
503a715
32bbdf3
fdad8c2
5b048b4
d96930d
2de3d4c
499a296
c371d0c
b63d017
a5a6af8
9c4d70d
8cb9c42
b40e75e
0f5b9ed
cec35ab
617ab0a
a5b13f2
bd43168
0ea0aa6
6e452ed
4f822a8
c19ca4c
d8181d0
21e20c6
e27d8a1
4a861c8
dd2b8c7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,13 @@ | ||
import warnings | ||
import numpy as np | ||
import six | ||
from numpy.linalg import LinAlgError | ||
from sklearn.decomposition import PCA | ||
from sklearn.utils import check_array | ||
from sklearn.utils.validation import check_X_y | ||
from sklearn.utils.validation import check_X_y, check_random_state | ||
from metric_learn.exceptions import PreprocessorError | ||
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis | ||
import sys | ||
import time | ||
|
||
# hack around lack of axis kwarg in older numpy versions | ||
try: | ||
|
@@ -405,3 +408,119 @@ def validate_vector(u, dtype=None): | |
if u.ndim > 1: | ||
raise ValueError("Input vector should be 1-D.") | ||
return u | ||
|
||
|
||
def _initialize_transformer(num_dims, X, y=None, init='auto', verbose=False, | ||
random_state=None): | ||
"""Returns the initial transformer to be used depending on the arguments. | ||
|
||
Parameters | ||
---------- | ||
num_dims : int | ||
The number of components to take. (Note: it should have been checked | ||
before, meaning it should not be None and it should be a value in | ||
[1, X.shape[1]]) | ||
|
||
X : array-like | ||
The input samples. | ||
|
||
y : array-like or None | ||
The input labels (or not if there are no labels). | ||
|
||
init : array-like or None or str | ||
The initial matrix. | ||
|
||
verbose : bool | ||
Whether to print the details of the initialization or not. | ||
|
||
random_state: int or `numpy.RandomState` or None, optional (default=None) | ||
A pseudo random number generator object or a seed for it if int. If | ||
``init='random'``, ``random_state`` is used to initialize the random | ||
transformation. If ``init='pca'``, ``random_state`` is passed as an | ||
argument to PCA when initializing the transformation. | ||
|
||
Returns | ||
------- | ||
init_transformer : `numpy.ndarray` | ||
The initial transformer to use. | ||
""" | ||
|
||
if isinstance(init, np.ndarray): | ||
init = check_array(init) | ||
|
||
# Assert that init.shape[1] = X.shape[1] | ||
if init.shape[1] != X.shape[1]: | ||
raise ValueError('The input dimensionality ({}) of the given ' | ||
'linear transformation `init` must match the ' | ||
'dimensionality of the given inputs `X` ({}).' | ||
.format(init.shape[1], X.shape[1])) | ||
|
||
# Assert that init.shape[0] <= init.shape[1] | ||
if init.shape[0] > init.shape[1]: | ||
raise ValueError('The output dimensionality ({}) of the given ' | ||
'linear transformation `init` cannot be ' | ||
'greater than its input dimensionality ({}).' | ||
.format(init.shape[0], init.shape[1])) | ||
|
||
if num_dims is not None: | ||
# Assert that self.num_dims = init.shape[0] | ||
if num_dims != init.shape[0]: | ||
raise ValueError('The preferred dimensionality of the ' | ||
'projected space `num_dims` ({}) does' | ||
' not match the output dimensionality of ' | ||
'the given linear transformation ' | ||
'`init` ({})!' | ||
.format(num_dims, | ||
init.shape[0])) | ||
elif init in ['auto', 'pca', 'lda', 'identity', 'random']: | ||
pass | ||
else: | ||
raise ValueError( | ||
"`init` must be 'auto', 'pca', 'lda', 'identity', 'random' " | ||
"or a numpy array of shape (num_dims, n_features).") | ||
|
||
random_state = check_random_state(random_state) | ||
transformation = init | ||
if isinstance(init, np.ndarray): | ||
pass | ||
else: | ||
n_samples, n_features = X.shape | ||
num_dims = num_dims or n_features | ||
if init == 'auto': | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This might be simpler to test if we broke out pieces into standalone functions. For example, the "auto-select" logic could be it's own function. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree, done, and tested the function |
||
n_classes = len(np.unique(y)) | ||
if num_dims <= min(n_features, n_classes - 1): | ||
init = 'lda' | ||
elif num_dims < min(n_features, n_samples): | ||
init = 'pca' | ||
else: | ||
init = 'identity' | ||
if init == 'identity': | ||
transformation = np.eye(num_dims, X.shape[1]) | ||
elif init == 'random': | ||
transformation = random_state.randn(num_dims, | ||
X.shape[1]) | ||
elif init in {'pca', 'lda'}: | ||
init_time = time.time() | ||
if init == 'pca': | ||
pca = PCA(n_components=num_dims, | ||
random_state=random_state) | ||
if verbose: | ||
print('Finding principal components... ') | ||
sys.stdout.flush() | ||
pca.fit(X) | ||
transformation = pca.components_ | ||
elif init == 'lda': | ||
lda = LinearDiscriminantAnalysis(n_components=num_dims) | ||
if verbose: | ||
print('Finding most discriminative components... ') | ||
sys.stdout.flush() | ||
lda.fit(X, y) | ||
transformation = lda.scalings_.T[:num_dims] | ||
if verbose: | ||
print('done in {:5.2f}s'.format(time.time() - init_time)) | ||
return transformation | ||
|
||
|
||
def _initialize_metric_mahalanobis(): | ||
"""Returns the initial metric from arguments""" | ||
raise NotImplementedError |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -288,7 +288,7 @@ def get_mahalanobis_matrix(self): | |
|
||
Returns | ||
------- | ||
M : `numpy.ndarray`, shape=(n_components, n_features) | ||
M : `numpy.ndarray`, shape=(num_dims, n_features) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shouldn't this always be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that's right, I didn't pay attention, thanks |
||
The copy of the learned Mahalanobis matrix. | ||
""" | ||
return self.transformer_.T.dot(self.transformer_) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,18 +16,61 @@ | |
from six.moves import xrange | ||
from sklearn.metrics import euclidean_distances | ||
from sklearn.base import TransformerMixin | ||
|
||
from metric_learn._util import _initialize_transformer | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, done |
||
from .base_metric import MahalanobisMixin | ||
|
||
|
||
# commonality between LMNN implementations | ||
class _base_LMNN(MahalanobisMixin, TransformerMixin): | ||
def __init__(self, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7, | ||
regularization=0.5, convergence_tol=0.001, use_pca=True, | ||
verbose=False, preprocessor=None): | ||
def __init__(self, init='auto', k=3, min_iter=50, max_iter=1000, | ||
learn_rate=1e-7, regularization=0.5, convergence_tol=0.001, | ||
use_pca=True, num_dims=None, | ||
verbose=False, preprocessor=None, random_state=None): | ||
"""Initialize the LMNN object. | ||
|
||
Parameters | ||
---------- | ||
init : string or numpy array, optional (default='auto') | ||
Initialization of the linear transformation. Possible options are | ||
'auto', 'pca', 'lda', 'identity', 'random', and a numpy array of shape | ||
(n_features_a, n_features_b). | ||
|
||
'auto' | ||
Depending on ``num_dims``, the most reasonable initialization | ||
will be chosen. If ``num_dims <= n_classes`` we use 'lda', as | ||
it uses labels information. If not, but | ||
``num_dims < min(n_features, n_samples)``, we use 'pca', as | ||
it projects data in meaningful directions (those of higher | ||
variance). Otherwise, we just use 'identity'. | ||
|
||
'pca' | ||
``num_dims`` principal components of the inputs passed | ||
to :meth:`fit` will be used to initialize the transformation. | ||
(See `sklearn.decomposition.PCA`) | ||
|
||
'lda' | ||
``min(num_dims, n_classes)`` most discriminative | ||
components of the inputs passed to :meth:`fit` will be used to | ||
initialize the transformation. (If ``num_dims > n_classes``, | ||
the rest of the components will be zero.) (See | ||
`sklearn.discriminant_analysis.LinearDiscriminantAnalysis`) | ||
|
||
'identity' | ||
If ``num_dims`` is strictly smaller than the | ||
dimensionality of the inputs passed to :meth:`fit`, the identity | ||
matrix will be truncated to the first ``num_dims`` rows. | ||
|
||
'random' | ||
The initial transformation will be a random array of shape | ||
`(num_dims, n_features)`. Each value is sampled from the | ||
standard normal distribution. | ||
|
||
numpy array | ||
n_features_b must match the dimensionality of the inputs passed to | ||
:meth:`fit` and n_features_a must be less than or equal to that. | ||
If ``num_dims`` is not None, n_features_a must match it. | ||
|
||
k : int, optional | ||
Number of neighbors to consider, not including self-edges. | ||
|
||
|
@@ -37,15 +80,24 @@ def __init__(self, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7, | |
preprocessor : array-like, shape=(n_samples, n_features) or callable | ||
The preprocessor to call to get tuples from indices. If array-like, | ||
tuples will be formed like this: X[indices]. | ||
|
||
random_state : int or numpy.RandomState or None, optional (default=None) | ||
A pseudo random number generator object or a seed for it if int. If | ||
``init='random'``, ``random_state`` is used to initialize the random | ||
transformation. If ``init='pca'``, ``random_state`` is passed as an | ||
argument to PCA when initializing the transformation. | ||
""" | ||
self.init = init | ||
self.k = k | ||
self.min_iter = min_iter | ||
self.max_iter = max_iter | ||
self.learn_rate = learn_rate | ||
self.regularization = regularization | ||
self.convergence_tol = convergence_tol | ||
self.use_pca = use_pca | ||
self.num_dims = num_dims # FIXME Tmp fix waiting for #167 to be merged: | ||
self.verbose = verbose | ||
self.random_state = random_state | ||
super(_base_LMNN, self).__init__(preprocessor) | ||
|
||
|
||
|
@@ -60,13 +112,15 @@ def fit(self, X, y): | |
X, y = self._prepare_inputs(X, y, dtype=float, | ||
ensure_min_samples=2) | ||
num_pts, num_dims = X.shape | ||
# FIXME Tmp fix waiting for #167 to be merged: | ||
n_dims = self.num_dims if self.num_dims is not None else num_dims | ||
unique_labels, label_inds = np.unique(y, return_inverse=True) | ||
if len(label_inds) != num_pts: | ||
raise ValueError('Must have one label per point.') | ||
self.labels_ = np.arange(len(unique_labels)) | ||
if self.use_pca: | ||
warnings.warn('use_pca does nothing for the python_LMNN implementation') | ||
self.transformer_ = np.eye(num_dims) | ||
self.transformer_ = _initialize_transformer(n_dims, X, y, self.init, | ||
self.verbose, | ||
self.random_state) | ||
required_k = np.bincount(label_inds).min() | ||
if self.k > required_k: | ||
raise ValueError('not enough class labels for specified k' | ||
|
@@ -99,6 +153,8 @@ def fit(self, X, y): | |
self._loss_grad(X, L, dfG, impostors, 1, k, reg, target_neighbors, df, | ||
a1, a2)) | ||
|
||
it = 1 # we already made one iteration | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems like a no-op line. Maybe just update the "main loop" comment? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure I understand, I think the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, I didn't see that we're referencing |
||
|
||
# main loop | ||
for it in xrange(2, self.max_iter): | ||
# then at each iteration, we try to find a value of L that has better | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Especially for long functions with lots of nesting like this one, I prefer the "no else" style:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's right, it's better, done