From 84b7156d23c85aafb8d713ee0758e381e539aef4 Mon Sep 17 00:00:00 2001
From: Neil Lawrence <lawrennd@gmail.com>
Date: Wed, 28 Aug 2013 01:19:43 +0200
Subject: [PATCH] Implemented MLP gradients with respect to X.

---
 GPy/examples/dimensionality_reduction.py | 18 +++++-----
 GPy/kern/parts/mlp.py                    | 46 ++++++++----------------
 GPy/kern/parts/poly.py                   |  2 +-
 GPy/util/visualize.py                    | 12 ++++---
 4 files changed, 32 insertions(+), 46 deletions(-)

diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index f4862842..6be043e0 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -363,18 +363,18 @@ def stick_play(range=None, frame_rate=15):
     GPy.util.visualize.data_play(Y, data_show, frame_rate)
     return Y
 
-def stick():
+def stick(kernel=None):
     data = GPy.util.datasets.osu_run1()
     # optimize
-    m = GPy.models.GPLVM(data['Y'], 2)
+    m = GPy.models.GPLVM(data['Y'], 2, kernel=kernel)
     m.optimize(messages=1, max_f_eval=10000)
-    m._set_params(m._get_params())
-    plt.clf
-    ax = m.plot_latent()
-    y = m.likelihood.Y[0, :]
-    data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
-    lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
-    raw_input('Press enter to finish')
+    if GPy.util.visualize.visual_available:
+        plt.clf
+        ax = m.plot_latent()
+        y = m.likelihood.Y[0, :]
+        data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
+        lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
+        raw_input('Press enter to finish')
 
     return m
 
diff --git a/GPy/kern/parts/mlp.py b/GPy/kern/parts/mlp.py
index 41eca708..72fd376c 100644
--- a/GPy/kern/parts/mlp.py
+++ b/GPy/kern/parts/mlp.py
@@ -27,7 +27,6 @@ class MLP(Kernpart):
     """
 
     def __init__(self, input_dim, variance=1., weight_variance=None, bias_variance=100., ARD=False):
-        ARD = False
         self.input_dim = input_dim
         self.ARD = ARD
         if not ARD:
@@ -44,6 +43,7 @@ class MLP(Kernpart):
                 assert weight_variance.size == self.input_dim, "bad number of weight variances"
             else:
                 weight_variance = np.ones(self.input_dim)
+            raise NotImplementedError
 
         self.name='mlp'
         self._set_params(np.hstack((variance, weight_variance.flatten(), bias_variance)))
@@ -104,47 +104,27 @@ class MLP(Kernpart):
             
         target[0] += np.sum(self._K_dvar*dL_dK)
 
-
     def dK_dX(self, dL_dK, X, X2, target):
         """Derivative of the covariance matrix with respect to X"""
         self._K_computations(X, X2)
         arg = self._K_asin_arg
-        post_div = np.sqrt(1-arg*arg)
         numer = self._K_numer
         denom = self._K_denom
         vec2 = (X2*X2).sum(1)*self.weight_variance + self.bias_variance + 1.
         denom3 = denom*denom*denom
-        target += (((X2[None,:, :]/denom[None, :, None]+vec2[None, None, :]*X[:, :, None]*numer/denom)/post_div[:, :, None]) * dL_dK[:, :, None]).sum(1)
-        target *= four_over_tau*self.weight_variance*self.variance
-        raise NotImplementedError
-
-
-        gX = np.zeros((X2.shape[0], X.shape[1], X.shape[0]))
-        
-        for i in range(X.shape[0]):
-            gX[:, :, i] = self._dK_dX_point(dL_dK, X, X2, target, i)
+        target += four_over_tau*self.weight_variance*self.variance*((X2[None, :, :]/denom[:, :, None] - vec2[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
             
-            
-    def _dK_dX_point(self, dL_dK, X, X2, target, i):
-        """Gradient with respect to one point of X"""
-        
-        inner_prod = self._K_inner_prod[i, :].T
-        numer = self._K_numer[i, :].T
-        denom = self._K_denom[i, :].T
-        arg = self._K_asin_arg[i, :].T
-        vec1 = (X[i, :]*X[i, :]).sum()*self.weight_variance + self.bias_variance + 1.
-        vec2 = (X2*X2).sum(1)*self.weight_variance + self.bias_variance + 1.
-        #denom = np.sqrt(np.outer(vec2,vec1))
-        #arg = numer/denom
-        gX = np.zeros(X2.shape)
-        denom3 = denom*denom*denom
-        gX = np.zeros((X2.shape[0], X2.shape[1]))
-        for j in range(X2.shape[1]):
-            gX[:, j] = X2[:, j]/denom - vec2*X[i, j]*numer/denom3
-            gX[:, j] = four_over_tau*self.weight_variance*self.variance*gX[:, j]/np.sqrt(1-arg*arg)
-    
+    def dKdiag_dX(self, dL_dKdiag, X, target):
+        """Gradient of diagonal of covariance with respect to X"""
+        self._K_diag_computations(X)
+        arg = self._K_diag_asin_arg
+        denom = self._K_diag_denom
+        numer = self._K_diag_numer
+        target += four_over_tau*2.*self.weight_variance*self.variance*X*(1/denom*(1 - arg)*dL_dKdiag/(np.sqrt(1-arg*arg)))[:, None] 
+
     
     def _K_computations(self, X, X2):
+        """Pre-computations for the covariance matrix (used for computing the covariance and its gradients."""
         if self.ARD:
             pass
         else:
@@ -165,9 +145,11 @@ class MLP(Kernpart):
                 self._K_dvar = four_over_tau*np.arcsin(self._K_asin_arg)
 
     def _K_diag_computations(self, X):
+        """Pre-computations concerning the diagonal terms (used for computation of diagonal and its gradients)."""
         if self.ARD:
             pass
         else:
             self._K_diag_numer = (X*X).sum(1)*self.weight_variance + self.bias_variance
             self._K_diag_denom = self._K_diag_numer+1.
-            self._K_diag_dvar = four_over_tau*np.arcsin(self._K_diag_numer/self._K_diag_denom)
+            self._K_diag_asin_arg = self._K_diag_numer/self._K_diag_denom
+            self._K_diag_dvar = four_over_tau*np.arcsin(self._K_diag_asin_arg)
diff --git a/GPy/kern/parts/poly.py b/GPy/kern/parts/poly.py
index 783f8386..542e20e0 100644
--- a/GPy/kern/parts/poly.py
+++ b/GPy/kern/parts/poly.py
@@ -39,7 +39,6 @@ class POLY(Kernpart):
     """
 
     def __init__(self, input_dim, variance=1., weight_variance=None, bias_variance=1., degree=2, ARD=False):
-        ARD = False
         self.input_dim = input_dim
         self.ARD = ARD
         if not ARD:
@@ -56,6 +55,7 @@ class POLY(Kernpart):
                 assert weight_variance.size == self.input_dim, "bad number of weight variances"
             else:
                 weight_variance = np.ones(self.input_dim)
+            raise NotImplementedError
         self.degree=degree
         self.name='poly_deg' + str(self.degree)
         self._set_params(np.hstack((variance, weight_variance.flatten(), bias_variance)))
diff --git a/GPy/util/visualize.py b/GPy/util/visualize.py
index 8d6b9837..4c3dbe2b 100644
--- a/GPy/util/visualize.py
+++ b/GPy/util/visualize.py
@@ -5,7 +5,13 @@ import numpy as np
 import matplotlib as mpl
 import time
 import Image
-# import visual
+try:
+    import visual
+    visual_available = True
+
+except ImportError:
+    visual_available = False
+
 
 class data_show:
     """
@@ -24,7 +30,6 @@ class data_show:
     def close(self):
         raise NotImplementedError, "this needs to be implemented to use the data_show class"
 
-
 class vpython_show(data_show):
     """
     the vpython_show class is a base class for all visualization methods that use vpython to display. It is initialized with a scene. If the scene is set to None it creates a scene window.
@@ -318,7 +323,7 @@ class mocap_data_show_vpython(vpython_show):
         for i in range(self.vals.shape[0]):
             self.spheres.append(visual.sphere(pos=(self.vals[i, 0], self.vals[i, 2], self.vals[i, 1]), radius=self.radius))
         self.scene.visible=True
-        
+
     def draw_edges(self):
         self.rods = []
         self.line_handle = []
@@ -435,7 +440,6 @@ class mocap_data_show(matplotlib_show):
         self.axes.set_ylim(self.y_lim)
         self.axes.set_zlim(self.z_lim)
 
-
 class stick_show(mocap_data_show_vpython):
     """Show a three dimensional point cloud as a figure. Connect elements of the figure together using the matrix connect."""
     def __init__(self, vals, connect=None, scene=None):