Skip to content

Added MLKR algorithm #28

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Oct 28, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions metric_learn/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@
from .nca import NCA
from .lfda import LFDA
from .rca import RCA, RCA_Supervised
from .mlkr import MLKR
132 changes: 132 additions & 0 deletions metric_learn/mlkr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
"""
Metric Learning for Kernel Regression (MLKR), Weinberger et al.,

MLKR is an algorithm for supervised metric learning, which learns a distance
function by directly minimising the leave-one-out regression error. This
algorithm can also be viewed as a supervised variation of PCA and can be used
for dimensionality reduction and high dimensional data visualization.
"""
from __future__ import division
import numpy as np
from six.moves import xrange
from scipy.spatial.distance import pdist, squareform

from .base_metric import BaseMetricLearner

class MLKR(BaseMetricLearner):
"""Metric Learning for Kernel Regression (MLKR)"""
def __init__(self, A0=None, epsilon=0.01, alpha=0.0001):
"""
MLKR initialization

Parameters
----------
A0: Initialization of matrix A. Defaults to the identity matrix.
epsilon: Step size for gradient descent.
alpha: Stopping criterion for loss function in gradient descent.
"""
self.params = {
"A0": A0,
"epsilon": epsilon,
"alpha": alpha
}

def _process_inputs(self, X, y):
self.X = np.array(X, copy=False)
y = np.array(y, copy=False)
if X.ndim == 1:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's usually a good idea to convert inputs to numpy arrays here, just in case the user provided a plain Python list, or some other sequence.

self.X = np.array(X, copy=False)
y = np.array(y, copy=False)

X = X[:, np.newaxis]
if y.ndim == 1:
y = y[:, np.newaxis]
n, d = X.shape
if y.shape[0] != n:
raise ValueError('Data and label lengths mismatch: %d != %d'
% (n, y.shape[0]))
return y, n, d

def fit(self, X, y):
"""
Fit MLKR model

Parameters:
----------
X : (n x d) array of samples
y : (n) data labels

Returns:
-------
self: Instance of self
"""
y, n, d = self._process_inputs(X, y)
if self.params['A0'] is None:
A = np.identity(d) # Initialize A as eye matrix
else:
A = self.params['A0']
if A.shape != (d, d):
raise ValueError('A0 should be a square matrix of dimension'
' %d. %s shape was provided' % (d, A.shape))
cost = np.Inf
# Gradient descent procedure
alpha = self.params['alpha']
epsilon = self.params['epsilon']
while cost > alpha:
K = self._computeK(X, A)
yhat = self._computeyhat(y, K)
cost = np.sum(np.square(yhat - y))
# Compute gradient
sum_i = 0
for i in xrange(n):
sum_j = 0
for j in xrange(n):
diffK = (yhat[j] - y[j]) * K[i, j]
x_ij = (X[i, :] - X[j, :])[:, np.newaxis]
x_ijT = x_ij.T
sum_j += diffK * x_ij.dot(x_ijT)
sum_i += (yhat[i] - y[i]) * sum_j
gradient = 4 * A.dot(sum_i)
A -= epsilon * gradient
self._transformer = A
return self

@staticmethod
def _computeK(X, A):
"""
Internal helper function to compute K matrix.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suspect that this function could be replaced with a call to scipy.spatial.pdist with the 'mahalanobis' metric.


Parameters:
----------
X: (n x d) array of samples
A: (d x d) 'A' matrix

Returns:
-------
K: (n x n) K matrix where Kij = exp(-distance(x_i, x_j)) where
distance is defined as squared L2 norm of (x_i - x_j)
"""
dist_mat = pdist(X, metric='mahalanobis', VI=A.T.dot(A))
return np.exp(squareform(-(dist_mat ** 2)))

@staticmethod
def _computeyhat(y, K):
"""
Internal helper function to compute yhat matrix.

Parameters:
----------
y: (n) data labels
K: (n x n) K matrix

Returns:
-------
yhat: (n x 1) yhat matrix
"""
K_mod = np.copy(K)
np.fill_diagonal(K_mod, 0)
numerator = K_mod.dot(y)
denominator = np.sum(K_mod, 1)[:, np.newaxis]
denominator[denominator == 0] = 2.2204e-16 # eps val in octave
yhat = numerator / denominator
return yhat

def transformer(self):
return self._transformer
9 changes: 8 additions & 1 deletion test/metric_learn_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from metric_learn import (
LMNN, NCA, LFDA, Covariance,
LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised)
LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, MLKR)
# Import this specially for testing.
from metric_learn.lmnn import python_LMNN

Expand Down Expand Up @@ -113,6 +113,13 @@ def test_iris(self):
csep = class_separation(rca.transform(), self.iris_labels)
self.assertLess(csep, 0.25)

class TestMLKR(MetricTestCase):
def test_iris(self):
mlkr = MLKR(epsilon=10, alpha=10) # for faster testing
mlkr.fit(self.iris_points, self.iris_labels)
csep = class_separation(mlkr.transform(), self.iris_labels)
self.assertLess(csep, 0.25)


if __name__ == '__main__':
unittest.main()