From 544f6d2523629edbc97d9e10e3cf0358bee9f449 Mon Sep 17 00:00:00 2001 From: dsquareindia Date: Sat, 10 Sep 2016 01:32:01 +0530 Subject: [PATCH 1/4] Added MLKR algorithm --- metric_learn/mlkr.py | 124 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 metric_learn/mlkr.py diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py new file mode 100644 index 00000000..900130de --- /dev/null +++ b/metric_learn/mlkr.py @@ -0,0 +1,124 @@ +""" +Metric Learning for Kernel Regression (MLKR), Weinberger et al., + +MLKR is an algorithm for supervised metric learning, which learns a distance +function by directly minimising the leave-one-out regression error. This +algorithm can also be viewed as a supervised variation of PCA and can be used +for dimensionality reduction and high dimensional data visualization. +""" +from __future__ import division +import numpy as np +from six.moves import xrange + +from .base_metric import BaseMetricLearner + +class MLKR(BaseMetricLearner): + """Metric Learning for Kernel Regression (MLKR)""" + def __init__(self, A=None, epsilon=0.01): + """ + MLKR initialization + + Parameters + ---------- + A: Initialization of matrix A. Defaults to the identity matrix. + epsilon: Step size for gradient descent + """ + self.params = { + "A": A, + "epsilon": epsilon + } + + def _process_inputs(self, X, y): + if X.ndim == 1: + X = X[:, np.newaxis] + if y.ndim == 1: + y == y[:, np.newaxis] + self.X = X + n, d = X.shape + assert y.shape[0] == n + return y, n, d + + def fit(self, X, y): + """ + Fit MLKR model + + Parameters: + ---------- + X : (n x d) array of samples + y : (n) data labels + + Returns: + ------- + self: Instance of self + """ + y, n, d = self._process_inputs(X, y) + alpha = 0.0001 # Stopping criterion + if self.params['A'] is None: + A = np.identity(d) # Initialize A as eye matrix + else: + A = self.params['A'] + assert A.shape == (d, d) + cost = np.Inf + # Gradient descent procedure + while cost > alpha: + K = self._computeK(X, A, n) + yhat = self._computeyhat(y, K) + sum_i = 0 + for i in xrange(n): + sum_j = 0 + for j in xrange(n): + sum_j += (yhat[j] - y[j]) * K[i][j] * \ + (X[i, :] - X[j, :])[:, np.newaxis].dot \ + ((X[i, :] - X[j, :])[:, np.newaxis].T) + sum_i += (yhat[i] - y[i]) * sum_j + gradient = 4 * A.dot(sum_i) + A -= self.params['epsilon'] * gradient + cost = np.sum(np.square(yhat - y)) + self._transformer = A + return self + + def _computeK(self, X, A, n): + """ + Internal helper function to compute K matrix. + + Parameters: + ---------- + X: (n x d) array of samples + A: (d x d) 'A' matrix + n: number of rows in X + + Returns: + ------- + K: (n x n) K matrix where Kij = exp(-distance(x_i, x_j)) where + distance is defined as squared L2 norm of (x_i - x_j) + """ + dist_mat = np.zeros(shape=(n, n)) + for i in xrange(n): + for j in xrange(n): + if i == j: + dist = 0 + else: + dist = np.sum(np.square(A.dot((X[i, :] - X[j, :])))) + dist_mat[i, j] = dist + return np.exp(-dist_mat) + + def _computeyhat(self, y, K): + """ + Internal helper function to compute yhat matrix. + + Parameters: + ---------- + y: (n) data labels + K: (n x n) K matrix + + Returns: + ------- + yhat: (n x 1) yhat matrix + """ + numerator = K.dot(y) + denominator = np.sum(K, 1)[:, np.newaxis] + yhat = numerator / denominator + return yhat + + def transformer(self): + return self._transformer From 1e78faab93a28a7b6e5e894f1fc8650e1317eb79 Mon Sep 17 00:00:00 2001 From: dsquareindia Date: Sat, 17 Sep 2016 20:47:55 +0530 Subject: [PATCH 2/4] Addressed initial comments, changed to pdist --- metric_learn/__init__.py | 1 + metric_learn/mlkr.py | 52 +++++++++++++++++++-------------------- test/metric_learn_test.py | 9 ++++++- 3 files changed, 35 insertions(+), 27 deletions(-) diff --git a/metric_learn/__init__.py b/metric_learn/__init__.py index cc60049d..5a7508c0 100644 --- a/metric_learn/__init__.py +++ b/metric_learn/__init__.py @@ -9,3 +9,4 @@ from .nca import NCA from .lfda import LFDA from .rca import RCA, RCA_Supervised +from .mlkr import MLKR diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py index 900130de..58a601d3 100644 --- a/metric_learn/mlkr.py +++ b/metric_learn/mlkr.py @@ -9,33 +9,39 @@ from __future__ import division import numpy as np from six.moves import xrange +from scipy.spatial.distance import pdist, squareform from .base_metric import BaseMetricLearner class MLKR(BaseMetricLearner): """Metric Learning for Kernel Regression (MLKR)""" - def __init__(self, A=None, epsilon=0.01): + def __init__(self, A0=None, epsilon=0.01, alpha=0.0001): """ MLKR initialization Parameters ---------- - A: Initialization of matrix A. Defaults to the identity matrix. - epsilon: Step size for gradient descent + A0: Initialization of matrix A. Defaults to the identity matrix. + epsilon: Step size for gradient descent. + alpha: Stopping criterion for loss function in gradient descent. """ self.params = { - "A": A, - "epsilon": epsilon + "A0": A0, + "epsilon": epsilon, + "alpha": alpha } def _process_inputs(self, X, y): + self.X = np.array(X, copy=False) + y = np.array(y, copy=False) if X.ndim == 1: X = X[:, np.newaxis] if y.ndim == 1: - y == y[:, np.newaxis] - self.X = X + y = y[:, np.newaxis] n, d = X.shape - assert y.shape[0] == n + if y.shape[0] != n: + raise ValueError('Data and label lengths mismatch: %d != %d' + % (n, y.shape[0])) return y, n, d def fit(self, X, y): @@ -52,24 +58,24 @@ def fit(self, X, y): self: Instance of self """ y, n, d = self._process_inputs(X, y) - alpha = 0.0001 # Stopping criterion - if self.params['A'] is None: + if self.params['A0'] is None: A = np.identity(d) # Initialize A as eye matrix else: - A = self.params['A'] + A = self.params['A0'] assert A.shape == (d, d) cost = np.Inf # Gradient descent procedure - while cost > alpha: - K = self._computeK(X, A, n) + while cost > self.params['alpha']: + K = self._computeK(X, A) yhat = self._computeyhat(y, K) sum_i = 0 for i in xrange(n): sum_j = 0 for j in xrange(n): - sum_j += (yhat[j] - y[j]) * K[i][j] * \ - (X[i, :] - X[j, :])[:, np.newaxis].dot \ - ((X[i, :] - X[j, :])[:, np.newaxis].T) + diffK = (yhat[j] - y[j]) * K[i, j] + x_ij = (X[i, :] - X[j, :])[:, np.newaxis] + x_ijT = x_ij.T + sum_j += diffK * x_ij.dot(x_ijT) sum_i += (yhat[i] - y[i]) * sum_j gradient = 4 * A.dot(sum_i) A -= self.params['epsilon'] * gradient @@ -77,7 +83,7 @@ def fit(self, X, y): self._transformer = A return self - def _computeK(self, X, A, n): + def _computeK(self, X, A): """ Internal helper function to compute K matrix. @@ -85,21 +91,15 @@ def _computeK(self, X, A, n): ---------- X: (n x d) array of samples A: (d x d) 'A' matrix - n: number of rows in X Returns: ------- K: (n x n) K matrix where Kij = exp(-distance(x_i, x_j)) where distance is defined as squared L2 norm of (x_i - x_j) """ - dist_mat = np.zeros(shape=(n, n)) - for i in xrange(n): - for j in xrange(n): - if i == j: - dist = 0 - else: - dist = np.sum(np.square(A.dot((X[i, :] - X[j, :])))) - dist_mat[i, j] = dist + dist_mat = pdist(X, metric='mahalanobis', VI=A.T.dot(A)) + dist_mat = np.square(dist_mat) + dist_mat = squareform(dist_mat) return np.exp(-dist_mat) def _computeyhat(self, y, K): diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 2ef97237..c56bbb99 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -7,7 +7,7 @@ from metric_learn import ( LMNN, NCA, LFDA, Covariance, - LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised) + LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, MLKR) # Import this specially for testing. from metric_learn.lmnn import python_LMNN @@ -113,6 +113,13 @@ def test_iris(self): csep = class_separation(rca.transform(), self.iris_labels) self.assertLess(csep, 0.25) +class TestMLKR(MetricTestCase): + def test_iris(self): + mlkr = MLKR(epsilon=10, alpha=10) # for faster testing + mlkr.fit(self.iris_points, self.iris_labels) + csep = class_separation(mlkr.transform(), self.iris_labels) + self.assertLess(csep, 0.25) + if __name__ == '__main__': unittest.main() From b5a9e99ba684e18b8d0394b5af8e6313178f9602 Mon Sep 17 00:00:00 2001 From: dsquareindia Date: Sat, 17 Sep 2016 23:29:16 +0530 Subject: [PATCH 3/4] addressed 2nd review --- metric_learn/mlkr.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py index 58a601d3..c76a664a 100644 --- a/metric_learn/mlkr.py +++ b/metric_learn/mlkr.py @@ -62,10 +62,14 @@ def fit(self, X, y): A = np.identity(d) # Initialize A as eye matrix else: A = self.params['A0'] - assert A.shape == (d, d) + if A.shape != (d, d): + raise ValueError('A0 should be a square matrix of dimension' + ' %d. %s shape was provided' % (d, A.shape)) cost = np.Inf # Gradient descent procedure - while cost > self.params['alpha']: + alpha = self.params['alpha'] + epsilon = self.params['epsilon'] + while cost > alpha: K = self._computeK(X, A) yhat = self._computeyhat(y, K) sum_i = 0 @@ -78,7 +82,7 @@ def fit(self, X, y): sum_j += diffK * x_ij.dot(x_ijT) sum_i += (yhat[i] - y[i]) * sum_j gradient = 4 * A.dot(sum_i) - A -= self.params['epsilon'] * gradient + A -= epsilon * gradient cost = np.sum(np.square(yhat - y)) self._transformer = A return self @@ -98,9 +102,7 @@ def _computeK(self, X, A): distance is defined as squared L2 norm of (x_i - x_j) """ dist_mat = pdist(X, metric='mahalanobis', VI=A.T.dot(A)) - dist_mat = np.square(dist_mat) - dist_mat = squareform(dist_mat) - return np.exp(-dist_mat) + return squareform(np.exp(-dist_mat ** 2)) def _computeyhat(self, y, K): """ From 39e4ba816e9c9f178e4331c97ec05348dce73886 Mon Sep 17 00:00:00 2001 From: dsquareindia Date: Thu, 27 Oct 2016 12:51:47 +0530 Subject: [PATCH 4/4] Made changes in computeyhat --- metric_learn/mlkr.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py index c76a664a..7c279cc8 100644 --- a/metric_learn/mlkr.py +++ b/metric_learn/mlkr.py @@ -72,6 +72,8 @@ def fit(self, X, y): while cost > alpha: K = self._computeK(X, A) yhat = self._computeyhat(y, K) + cost = np.sum(np.square(yhat - y)) + # Compute gradient sum_i = 0 for i in xrange(n): sum_j = 0 @@ -83,11 +85,11 @@ def fit(self, X, y): sum_i += (yhat[i] - y[i]) * sum_j gradient = 4 * A.dot(sum_i) A -= epsilon * gradient - cost = np.sum(np.square(yhat - y)) self._transformer = A return self - def _computeK(self, X, A): + @staticmethod + def _computeK(X, A): """ Internal helper function to compute K matrix. @@ -102,9 +104,10 @@ def _computeK(self, X, A): distance is defined as squared L2 norm of (x_i - x_j) """ dist_mat = pdist(X, metric='mahalanobis', VI=A.T.dot(A)) - return squareform(np.exp(-dist_mat ** 2)) + return np.exp(squareform(-(dist_mat ** 2))) - def _computeyhat(self, y, K): + @staticmethod + def _computeyhat(y, K): """ Internal helper function to compute yhat matrix. @@ -117,8 +120,11 @@ def _computeyhat(self, y, K): ------- yhat: (n x 1) yhat matrix """ - numerator = K.dot(y) - denominator = np.sum(K, 1)[:, np.newaxis] + K_mod = np.copy(K) + np.fill_diagonal(K_mod, 0) + numerator = K_mod.dot(y) + denominator = np.sum(K_mod, 1)[:, np.newaxis] + denominator[denominator == 0] = 2.2204e-16 # eps val in octave yhat = numerator / denominator return yhat