Addressed initial comments, changed to pdist

devashishd12 · devashishd12 · commit 1e78faab93a2 · 2016-09-17T21:51:26.000+05:30
diff --git a/metric_learn/__init__.py b/metric_learn/__init__.py
@@ -9,3 +9,4 @@
 from .nca import NCA
 from .lfda import LFDA
 from .rca import RCA, RCA_Supervised
+from .mlkr import MLKR
diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py
@@ -9,33 +9,39 @@
 from __future__ import division
 import numpy as np
 from six.moves import xrange
+from scipy.spatial.distance import pdist, squareform
 
 from .base_metric import BaseMetricLearner
 
 class MLKR(BaseMetricLearner):
     """Metric Learning for Kernel Regression (MLKR)"""
-    def __init__(self, A=None, epsilon=0.01):
+    def __init__(self, A0=None, epsilon=0.01, alpha=0.0001):
         """
         MLKR initialization
 
         Parameters
         ----------
-        A: Initialization of matrix A. Defaults to the identity matrix.
-        epsilon: Step size for gradient descent
+        A0: Initialization of matrix A. Defaults to the identity matrix.
+        epsilon: Step size for gradient descent.
+        alpha: Stopping criterion for loss function in gradient descent.
         """
         self.params = {
-            "A": A,
-            "epsilon": epsilon
+            "A0": A0,
+            "epsilon": epsilon,
+            "alpha": alpha
         }
 
     def _process_inputs(self, X, y):
+        self.X = np.array(X, copy=False)
+        y = np.array(y, copy=False)
         if X.ndim == 1:
             X = X[:, np.newaxis]
         if y.ndim == 1:
-            y == y[:, np.newaxis]
-        self.X = X
+            y = y[:, np.newaxis]
         n, d = X.shape
-        assert y.shape[0] == n
+        if y.shape[0] != n:
+            raise ValueError('Data and label lengths mismatch: %d != %d'
+                             % (n, y.shape[0]))
         return y, n, d
 
     def fit(self, X, y):
@@ -52,54 +58,48 @@ def fit(self, X, y):
         self: Instance of self
         """
         y, n, d = self._process_inputs(X, y)
-        alpha = 0.0001  # Stopping criterion
-        if self.params['A'] is None:
+        if self.params['A0'] is None:
             A = np.identity(d)  # Initialize A as eye matrix
         else:
-            A = self.params['A']
+            A = self.params['A0']
             assert A.shape == (d, d)
         cost = np.Inf
         # Gradient descent procedure
-        while cost > alpha:
-            K = self._computeK(X, A, n)
+        while cost > self.params['alpha']:
+            K = self._computeK(X, A)
             yhat = self._computeyhat(y, K)
             sum_i = 0
             for i in xrange(n):
                 sum_j = 0
                 for j in xrange(n):
-                    sum_j += (yhat[j] - y[j]) * K[i][j] * \
-                             (X[i, :] - X[j, :])[:, np.newaxis].dot \
-                                 ((X[i, :] - X[j, :])[:, np.newaxis].T)
+                    diffK = (yhat[j] - y[j]) * K[i, j]
+                    x_ij = (X[i, :] - X[j, :])[:, np.newaxis]
+                    x_ijT = x_ij.T
+                    sum_j += diffK * x_ij.dot(x_ijT)
                 sum_i += (yhat[i] - y[i]) * sum_j
             gradient = 4 * A.dot(sum_i)
             A -= self.params['epsilon'] * gradient
             cost = np.sum(np.square(yhat - y))
         self._transformer = A
         return self
 
-    def _computeK(self, X, A, n):
+    def _computeK(self, X, A):
         """
         Internal helper function to compute K matrix.
 
         Parameters:
         ----------
         X: (n x d) array of samples
         A: (d x d) 'A' matrix
-        n: number of rows in X
 
         Returns:
         -------
         K: (n x n) K matrix where Kij = exp(-distance(x_i, x_j)) where
            distance is defined as squared L2 norm of (x_i - x_j)
         """
-        dist_mat = np.zeros(shape=(n, n))
-        for i in xrange(n):
-            for j in xrange(n):
-                if i == j:
-                    dist = 0
-                else:
-                    dist = np.sum(np.square(A.dot((X[i, :] - X[j, :]))))
-                dist_mat[i, j] = dist
+        dist_mat = pdist(X, metric='mahalanobis', VI=A.T.dot(A))
+        dist_mat = np.square(dist_mat)
+        dist_mat = squareform(dist_mat)
         return np.exp(-dist_mat)
 
     def _computeyhat(self, y, K):
diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py
@@ -7,7 +7,7 @@
 
 from metric_learn import (
     LMNN, NCA, LFDA, Covariance,
-    LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised)
+    LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, MLKR)
 # Import this specially for testing.
 from metric_learn.lmnn import python_LMNN
 
@@ -113,6 +113,13 @@ def test_iris(self):
     csep = class_separation(rca.transform(), self.iris_labels)
     self.assertLess(csep, 0.25)
 
+class TestMLKR(MetricTestCase):
+  def test_iris(self):
+    mlkr = MLKR(epsilon=10, alpha=10)  # for faster testing
+    mlkr.fit(self.iris_points, self.iris_labels)
+    csep = class_separation(mlkr.transform(), self.iris_labels)
+    self.assertLess(csep, 0.25)
+
 
 if __name__ == '__main__':
   unittest.main()