From 4a3db2bef763fda7af8583d1f2547cf1c1e92632 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 22 Sep 2014 22:07:35 +0100
Subject: [PATCH 01/19] removed more more imports

---
 GPy/likelihoods/poisson.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/GPy/likelihoods/poisson.py b/GPy/likelihoods/poisson.py
index 862f873f..088fc478 100644
--- a/GPy/likelihoods/poisson.py
+++ b/GPy/likelihoods/poisson.py
@@ -5,7 +5,6 @@ from __future__ import division
 import numpy as np
 from scipy import stats,special
 import scipy as sp
-from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
 import link_functions
 from likelihood import Likelihood
 

From 1765e017dd6d305fdf69837487cf0c159bf45fb3 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 22 Sep 2014 22:09:25 +0100
Subject: [PATCH 02/19] removed more more more imports

---
 GPy/likelihoods/skew_exponential.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/GPy/likelihoods/skew_exponential.py b/GPy/likelihoods/skew_exponential.py
index 98489a3e..43708d36 100644
--- a/GPy/likelihoods/skew_exponential.py
+++ b/GPy/likelihoods/skew_exponential.py
@@ -5,8 +5,6 @@
 import sympy as sym
 from GPy.util.symbolic import normcdfln
 import numpy as np
-from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
-#from GPy.util.functions import clip_exp
 import link_functions
 from symbolic import Symbolic
 from scipy import stats

From 9081c8ee96cd852b4c9977d729ca77587faa7f8c Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 29 Sep 2014 10:09:28 +0100
Subject: [PATCH 03/19] optionally unweaved the coregionalize kernel

coregionalize shoudl now work without weave. Added kernel tests also.
---
 GPy/kern/_src/coregionalize.py | 61 ++++++++++++++++++++++++++++++----
 GPy/testing/kernel_tests.py    | 44 ++++++++++++++++++++++++
 2 files changed, 99 insertions(+), 6 deletions(-)

diff --git a/GPy/kern/_src/coregionalize.py b/GPy/kern/_src/coregionalize.py
index fc4a2f33..dcafeb88 100644
--- a/GPy/kern/_src/coregionalize.py
+++ b/GPy/kern/_src/coregionalize.py
@@ -6,6 +6,7 @@ import numpy as np
 from scipy import weave
 from ...core.parameterization import Param
 from ...core.parameterization.transformations import Logexp
+from ...util.config import config # for assesing whether to use weave
 
 class Coregionalize(Kern):
     """
@@ -56,6 +57,27 @@ class Coregionalize(Kern):
         self.B = np.dot(self.W, self.W.T) + np.diag(self.kappa)
 
     def K(self, X, X2=None):
+        if config.getboolean('weave', 'working'):
+            try:
+                return self._K_weave(X, X2)
+            except:
+                print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
+                config.set('weave', 'working', False)
+                return self._K_numpy(X, X2)
+        else:
+            return self._K_numpy(X, X2)
+
+
+    def _K_numpy(self, X, X2=None):
+        index = np.asarray(X, dtype=np.int)
+        if X2 is None:
+            return self.B[index,index.T]
+        else:
+            index2 = np.asarray(X2, dtype=np.int)
+            return self.B[index,index2.T]
+
+    def _K_weave(self, X, X2=None):
+        """compute the kernel function using scipy.weave"""
         index = np.asarray(X, dtype=np.int)
 
         if X2 is None:
@@ -91,12 +113,33 @@ class Coregionalize(Kern):
 
     def update_gradients_full(self, dL_dK, X, X2=None):
         index = np.asarray(X, dtype=np.int)
-        dL_dK_small = np.zeros_like(self.B)
         if X2 is None:
             index2 = index
         else:
             index2 = np.asarray(X2, dtype=np.int)
 
+        #attempt to use weave for a nasty double indexing loop: fall back to numpy
+        if config.getboolean('weave', 'working'):
+            try:
+                dL_dK_small = self._gradient_reduce_weave(dL_dK, index1, index2)
+            except:
+                print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
+                config.set('weave', 'working', False)
+                dL_dK_small = self._gradient_reduce_weave(dL_dK, index1, index2)
+        else:
+            dL_dK_small = self._gradient_reduce_weave(dL_dK, index1, index2)
+
+
+
+        dkappa = np.diag(dL_dK_small)
+        dL_dK_small += dL_dK_small.T
+        dW = (self.W[:, None, :]*dL_dK_small[:, :, None]).sum(0)
+
+        self.W.gradient = dW
+        self.kappa.gradient = dkappa
+
+    def _gradient_reduce_weave(self, dL_dK, index, index2):
+        dL_dK_small = np.zeros_like(self.B)
         code="""
         for(int i=0; i<num_inducing; i++){
           for(int j=0; j<N; j++){
@@ -106,13 +149,19 @@ class Coregionalize(Kern):
         """
         N, num_inducing, output_dim = index.size, index2.size, self.output_dim
         weave.inline(code, ['N', 'num_inducing', 'output_dim', 'dL_dK', 'dL_dK_small', 'index', 'index2'])
+        return dL_dK_small
+1
+    def _gradient_reduce_numpy(self, dL_dK, index, index2):
+        index, index2 = index[:,0], index2[:,0]
+        for i in range(k.output_dim):
+        dL_dK_small = np.zeros_like(self.B)
+            tmp1 = dL_dK[index==i]
+            for j in range(k.output_dim):
+                dL_dK_small[j,i] = tmp1[:,index2==j].sum()
+        return dL_dK_small
+
 
-        dkappa = np.diag(dL_dK_small)
-        dL_dK_small += dL_dK_small.T
-        dW = (self.W[:, None, :]*dL_dK_small[:, :, None]).sum(0)
 
-        self.W.gradient = dW
-        self.kappa.gradient = dkappa
 
     def update_gradients_diag(self, dL_dKdiag, X):
         index = np.asarray(X, dtype=np.int).flatten()
diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index 95ad7961..cde024f6 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -356,6 +356,50 @@ class KernelTestsNonContinuous(unittest.TestCase):
         X2 = self.X2[self.X2[:,-1]!=2]
         self.assertTrue(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))
 
+class Coregionalize_weave_test(unittest.TestCase):
+    """
+    Make sure that the coregionalize kernel work with and without weave enabled
+    """
+    k = GPy.kern.coregionalize(1, output_dim=12)
+    N1, N2 = 100, 200
+    X = np.random.randint(0,12,(N1,1))
+    X2 = np.random.randint(0,12,(N2,1))
+
+    #symmetric case
+    dL_dK = np.random.randn(N1, N1)
+    GPy.util.config.config.set('weave', 'working', True)
+    K_weave = k.K(X)
+    k.update_gradients_full(dL_dK, X)
+    grads_weave = k.gradient.copy()
+
+    GPy.util.config.config.set('weave', 'working', False)
+    K_numpy = k.K(X)
+    k.update_gradients_full(dL_dK, X)
+    grads_numpy = k.gradient.copy()
+
+    self.assertTrue(np.allclose(K_numpy, K_weave))
+    self.assertTrue(np.allclose(grads_numpy, grads_weave))
+
+    #non-symmetric case
+    dL_dK = np.random.randn(N1, N2)
+    GPy.util.config.config.set('weave', 'working', True)
+    K_weave = k.K(X, X2)
+    k.update_gradients_full(dL_dK, X, X2)
+    grads_weave = k.gradient.copy()
+
+    GPy.util.config.config.set('weave', 'working', False)
+    K_numpy = k.K(X, X2)
+    k.update_gradients_full(dL_dK, X, X2)
+    grads_numpy = k.gradient.copy()
+
+    self.assertTrue(np.allclose(K_numpy, K_weave))
+    self.assertTrue(np.allclose(grads_numpy, grads_weave))
+
+    #reset the weave state for any other tests
+    GPy.util.config.config.set('weave', 'working', False)
+
+
+
 
 if __name__ == "__main__":
     print "Running unit tests, please be (very) patient..."

From f4718edfb86cda31104c414a9b3a59a2d79370f0 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 30 Sep 2014 16:44:55 +0100
Subject: [PATCH 04/19] edited coregionalize implementation

---
 GPy/kern/_src/coregionalize.py | 17 ++++------
 GPy/testing/kernel_tests.py    | 59 +++++++++++++++++-----------------
 2 files changed, 37 insertions(+), 39 deletions(-)

diff --git a/GPy/kern/_src/coregionalize.py b/GPy/kern/_src/coregionalize.py
index dcafeb88..3fcf1c98 100644
--- a/GPy/kern/_src/coregionalize.py
+++ b/GPy/kern/_src/coregionalize.py
@@ -62,7 +62,7 @@ class Coregionalize(Kern):
                 return self._K_weave(X, X2)
             except:
                 print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
-                config.set('weave', 'working', False)
+                config.set('weave', 'working', 'False')
                 return self._K_numpy(X, X2)
         else:
             return self._K_numpy(X, X2)
@@ -121,13 +121,13 @@ class Coregionalize(Kern):
         #attempt to use weave for a nasty double indexing loop: fall back to numpy
         if config.getboolean('weave', 'working'):
             try:
-                dL_dK_small = self._gradient_reduce_weave(dL_dK, index1, index2)
+                dL_dK_small = self._gradient_reduce_weave(dL_dK, index, index2)
             except:
                 print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
-                config.set('weave', 'working', False)
-                dL_dK_small = self._gradient_reduce_weave(dL_dK, index1, index2)
+                config.set('weave', 'working', 'False')
+                dL_dK_small = self._gradient_reduce_weave(dL_dK, index, index2)
         else:
-            dL_dK_small = self._gradient_reduce_weave(dL_dK, index1, index2)
+            dL_dK_small = self._gradient_reduce_weave(dL_dK, index, index2)
 
 
 
@@ -150,19 +150,16 @@ class Coregionalize(Kern):
         N, num_inducing, output_dim = index.size, index2.size, self.output_dim
         weave.inline(code, ['N', 'num_inducing', 'output_dim', 'dL_dK', 'dL_dK_small', 'index', 'index2'])
         return dL_dK_small
-1
+
     def _gradient_reduce_numpy(self, dL_dK, index, index2):
         index, index2 = index[:,0], index2[:,0]
-        for i in range(k.output_dim):
         dL_dK_small = np.zeros_like(self.B)
+        for i in range(k.output_dim):
             tmp1 = dL_dK[index==i]
             for j in range(k.output_dim):
                 dL_dK_small[j,i] = tmp1[:,index2==j].sum()
         return dL_dK_small
 
-
-
-
     def update_gradients_diag(self, dL_dKdiag, X):
         index = np.asarray(X, dtype=np.int).flatten()
         dL_dKdiag_small = np.array([dL_dKdiag[index==i].sum() for i in xrange(self.output_dim)])
diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index cde024f6..dcfb9de4 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -360,43 +360,44 @@ class Coregionalize_weave_test(unittest.TestCase):
     """
     Make sure that the coregionalize kernel work with and without weave enabled
     """
-    k = GPy.kern.coregionalize(1, output_dim=12)
-    N1, N2 = 100, 200
-    X = np.random.randint(0,12,(N1,1))
-    X2 = np.random.randint(0,12,(N2,1))
+    def setUp(self):
+        self.k = GPy.kern.Coregionalize(1, output_dim=12)
+        self.N1, self.N2 = 100, 200
+        self.X = np.random.randint(0,12,(self.N1,1))
+        self.X2 = np.random.randint(0,12,(self.N2,1))
 
-    #symmetric case
-    dL_dK = np.random.randn(N1, N1)
-    GPy.util.config.config.set('weave', 'working', True)
-    K_weave = k.K(X)
-    k.update_gradients_full(dL_dK, X)
-    grads_weave = k.gradient.copy()
+    def test_sym(self):
+        dL_dK = np.random.randn(self.N1, self.N1)
+        GPy.util.config.config.set('weave', 'working', 'True')
+        K_weave = self.k.K(self.X)
+        self.k.update_gradients_full(dL_dK, self.X)
+        grads_weave = self.k.gradient.copy()
 
-    GPy.util.config.config.set('weave', 'working', False)
-    K_numpy = k.K(X)
-    k.update_gradients_full(dL_dK, X)
-    grads_numpy = k.gradient.copy()
+        GPy.util.config.config.set('weave', 'working', 'False')
+        K_numpy = self.k.K(self.X)
+        self.k.update_gradients_full(dL_dK, self.X)
+        grads_numpy = self.k.gradient.copy()
 
-    self.assertTrue(np.allclose(K_numpy, K_weave))
-    self.assertTrue(np.allclose(grads_numpy, grads_weave))
+        self.assertTrue(np.allclose(K_numpy, K_weave))
+        self.assertTrue(np.allclose(grads_numpy, grads_weave))
 
-    #non-symmetric case
-    dL_dK = np.random.randn(N1, N2)
-    GPy.util.config.config.set('weave', 'working', True)
-    K_weave = k.K(X, X2)
-    k.update_gradients_full(dL_dK, X, X2)
-    grads_weave = k.gradient.copy()
+    def test_nonsym(self):
+        dL_dK = np.random.randn(self.N1, self.N2)
+        GPy.util.config.config.set('weave', 'working', 'True')
+        K_weave = self.k.K(self.X, self.X2)
+        self.k.update_gradients_full(dL_dK, self.X, self.X2)
+        grads_weave = self.k.gradient.copy()
 
-    GPy.util.config.config.set('weave', 'working', False)
-    K_numpy = k.K(X, X2)
-    k.update_gradients_full(dL_dK, X, X2)
-    grads_numpy = k.gradient.copy()
+        GPy.util.config.config.set('weave', 'working', 'False')
+        K_numpy = self.k.K(self.X, self.X2)
+        self.k.update_gradients_full(dL_dK, self.X, self.X2)
+        grads_numpy = self.k.gradient.copy()
 
-    self.assertTrue(np.allclose(K_numpy, K_weave))
-    self.assertTrue(np.allclose(grads_numpy, grads_weave))
+        self.assertTrue(np.allclose(K_numpy, K_weave))
+        self.assertTrue(np.allclose(grads_numpy, grads_weave))
 
     #reset the weave state for any other tests
-    GPy.util.config.config.set('weave', 'working', False)
+    GPy.util.config.config.set('weave', 'working', 'False')
 
 
 

From 657c78ba348500ecdecc99d5a25398371ea5f087 Mon Sep 17 00:00:00 2001
From: Ricardo <acq11ra@sheffield.ac.uk>
Date: Fri, 3 Oct 2014 10:07:58 +0100
Subject: [PATCH 05/19] Half_t prior (Martin's contribution)

---
 GPy/core/parameterization/priors.py | 60 +++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/GPy/core/parameterization/priors.py b/GPy/core/parameterization/priors.py
index ddc4d02f..e488d86b 100644
--- a/GPy/core/parameterization/priors.py
+++ b/GPy/core/parameterization/priors.py
@@ -290,3 +290,63 @@ class inverse_gamma(Prior):
 
     def rvs(self, n):
         return 1. / np.random.gamma(scale=1. / self.b, shape=self.a, size=n)
+
+class half_t(Prior):
+    """
+    Implementation of the half student t probability function, coupled with random variables.
+
+    :param A: scale parameter
+    :param nu: degrees of freedom
+
+    """
+    domain = _POSITIVE
+    _instances = []
+    def __new__(cls, A, nu): # Singleton:
+        if cls._instances:
+            cls._instances[:] = [instance for instance in cls._instances if instance()]
+            for instance in cls._instances:
+                if instance().A == A and instance().nu == nu:
+                   return instance()
+        o = super(Prior, cls).__new__(cls, A, nu)
+        cls._instances.append(weakref.ref(o))
+        return cls._instances[-1]()
+    def __init__(self, A, nu):
+        self.A = float(A)
+        self.nu = float(nu)
+        #self.constant = -gammaln(self.a) + a * np.log(b)
+
+    def __str__(self):
+        return "half_t(" + str(np.round(self.A)) + ', ' + str(np.round(self.nu)) + ')'
+
+    def lnpdf(self,theta):
+        theta = theta if isinstance(theta,np.ndarray) else np.array([theta])
+        lnpdfs = np.zeros_like(theta)
+        theta = np.array([theta])
+        above_zero = theta.flatten()>1e-6
+        v = self.nu
+        sigma2=self.A
+        lnpdfs[above_zero] = (+ gammaln((v + 1) * 0.5)
+            - gammaln(v * 0.5)
+            - 0.5*np.log(sigma2 * v * np.pi)
+            - 0.5*(v + 1)*np.log(1 + (1/np.float(v))*((theta[above_zero][0]**2)/sigma2))
+        )
+        #return np.sum(objective)
+        return lnpdfs
+
+    def lnpdf_grad(self,theta):
+        theta = theta if isinstance(theta,np.ndarray) else np.array([theta])
+        grad = np.zeros_like(theta)
+        above_zero = theta>1e-6
+        v = self.nu
+        sigma2=self.A
+        grad[above_zero] = -0.5*(v+1)*(2*theta[above_zero])/(v*sigma2 + theta[above_zero][0]**2)
+        return grad
+
+    def rvs(self, n):
+         #return np.random.randn(n) * self.sigma + self.mu
+         from scipy.stats import t
+         #[np.abs(x) for x in t.rvs(df=4,loc=0,scale=50, size=10000)])
+         ret = t.rvs(self.nu,loc=0,scale=self.A, size=n)
+         ret[ret<0] = 0
+         return ret
+

From 94b19242f0ad56cb5467d5f982b0cfa48e7effa2 Mon Sep 17 00:00:00 2001
From: Ricardo <acq11ra@sheffield.ac.uk>
Date: Fri, 3 Oct 2014 11:37:09 +0100
Subject: [PATCH 06/19] HalfT prior is working

---
 GPy/core/parameterization/priors.py | 37 ++++++++++++++++-------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/GPy/core/parameterization/priors.py b/GPy/core/parameterization/priors.py
index e488d86b..b41cef3d 100644
--- a/GPy/core/parameterization/priors.py
+++ b/GPy/core/parameterization/priors.py
@@ -254,7 +254,7 @@ class Gamma(Prior):
         b = E / V
         return Gamma(a, b)
 
-class inverse_gamma(Prior):
+class InverseGamma(Prior):
     """
     Implementation of the inverse-Gamma probability function, coupled with random variables.
 
@@ -265,6 +265,7 @@ class inverse_gamma(Prior):
 
     """
     domain = _POSITIVE
+    _instances = []
     def __new__(cls, a, b): # Singleton:
         if cls._instances:
             cls._instances[:] = [instance for instance in cls._instances if instance()]
@@ -291,7 +292,7 @@ class inverse_gamma(Prior):
     def rvs(self, n):
         return 1. / np.random.gamma(scale=1. / self.b, shape=self.a, size=n)
 
-class half_t(Prior):
+class HalfT(Prior):
     """
     Implementation of the half student t probability function, coupled with random variables.
 
@@ -313,25 +314,27 @@ class half_t(Prior):
     def __init__(self, A, nu):
         self.A = float(A)
         self.nu = float(nu)
-        #self.constant = -gammaln(self.a) + a * np.log(b)
+        self.constant = gammaln(.5*(self.nu+1.)) - gammaln(.5*self.nu) - .5*np.log(np.pi*self.A*self.nu)
 
     def __str__(self):
-        return "half_t(" + str(np.round(self.A)) + ', ' + str(np.round(self.nu)) + ')'
+        return "hT(" + str(np.round(self.A)) + ', ' + str(np.round(self.nu)) + ')'
 
     def lnpdf(self,theta):
-        theta = theta if isinstance(theta,np.ndarray) else np.array([theta])
-        lnpdfs = np.zeros_like(theta)
-        theta = np.array([theta])
-        above_zero = theta.flatten()>1e-6
-        v = self.nu
-        sigma2=self.A
-        lnpdfs[above_zero] = (+ gammaln((v + 1) * 0.5)
-            - gammaln(v * 0.5)
-            - 0.5*np.log(sigma2 * v * np.pi)
-            - 0.5*(v + 1)*np.log(1 + (1/np.float(v))*((theta[above_zero][0]**2)/sigma2))
-        )
-        #return np.sum(objective)
-        return lnpdfs
+        return self.constant*(theta>0) -.5*(self.nu+1) * np.log( 1.+ (1./self.nu) * (theta/self.A)**2 )
+
+        #theta = theta if isinstance(theta,np.ndarray) else np.array([theta])
+        #lnpdfs = np.zeros_like(theta)
+        #theta = np.array([theta])
+        #above_zero = theta.flatten()>1e-6
+        #v = self.nu
+        #sigma2=self.A
+        #stop
+        #lnpdfs[above_zero] = (+ gammaln((v + 1) * 0.5)
+        #    - gammaln(v * 0.5)
+        #    - 0.5*np.log(sigma2 * v * np.pi)
+        #    - 0.5*(v + 1)*np.log(1 + (1/np.float(v))*((theta[above_zero][0]**2)/sigma2))
+        #)
+        #return lnpdfs
 
     def lnpdf_grad(self,theta):
         theta = theta if isinstance(theta,np.ndarray) else np.array([theta])

From 45f87517690dacaf07c81a1739e08ffab4a83646 Mon Sep 17 00:00:00 2001
From: Ricardo <acq11ra@sheffield.ac.uk>
Date: Fri, 3 Oct 2014 11:55:58 +0100
Subject: [PATCH 07/19] Bug fixed

---
 GPy/core/parameterization/priors.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/GPy/core/parameterization/priors.py b/GPy/core/parameterization/priors.py
index b41cef3d..f3e0c9d7 100644
--- a/GPy/core/parameterization/priors.py
+++ b/GPy/core/parameterization/priors.py
@@ -320,7 +320,8 @@ class HalfT(Prior):
         return "hT(" + str(np.round(self.A)) + ', ' + str(np.round(self.nu)) + ')'
 
     def lnpdf(self,theta):
-        return self.constant*(theta>0) -.5*(self.nu+1) * np.log( 1.+ (1./self.nu) * (theta/self.A)**2 )
+        stop
+        return (theta>0) * ( self.constant -.5*(self.nu+1) * np.log( 1.+ (1./self.nu) * (theta/self.A)**2 ) )
 
         #theta = theta if isinstance(theta,np.ndarray) else np.array([theta])
         #lnpdfs = np.zeros_like(theta)

From c1d998e27252a18a3e464518e957f842670fa017 Mon Sep 17 00:00:00 2001
From: Ricardo <acq11ra@sheffield.ac.uk>
Date: Fri, 3 Oct 2014 12:02:58 +0100
Subject: [PATCH 08/19] Another bug fixed

---
 GPy/core/parameterization/priors.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/GPy/core/parameterization/priors.py b/GPy/core/parameterization/priors.py
index f3e0c9d7..453bdfb0 100644
--- a/GPy/core/parameterization/priors.py
+++ b/GPy/core/parameterization/priors.py
@@ -320,7 +320,6 @@ class HalfT(Prior):
         return "hT(" + str(np.round(self.A)) + ', ' + str(np.round(self.nu)) + ')'
 
     def lnpdf(self,theta):
-        stop
         return (theta>0) * ( self.constant -.5*(self.nu+1) * np.log( 1.+ (1./self.nu) * (theta/self.A)**2 ) )
 
         #theta = theta if isinstance(theta,np.ndarray) else np.array([theta])

From 6a260409fa3edbdb450d53d90a4d6bc6def6eaaa Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Mon, 6 Oct 2014 08:59:24 +0100
Subject: [PATCH 09/19] [param_to_array] deprecated and removed param_to_array
 from code, use param.values instead

---
 GPy/examples/dimensionality_reduction.py      |  78 +++-----
 .../expectation_propagation_dtc.py            |   5 +-
 .../latent_function_inference/laplace.py      |   6 +-
 .../latent_function_inference/var_dtc.py      |   5 +-
 .../latent_function_inference/var_dtc_gpu.py  | 127 +++++++------
 .../var_dtc_parallel.py                       | 129 +++++++-------
 GPy/kern/_src/poly.py                         |   1 -
 GPy/models/sparse_gp_regression.py            |   3 +-
 .../matplot_dep/dim_reduction_plots.py        |   7 +-
 GPy/plotting/matplot_dep/kernel_plots.py      |   3 +-
 GPy/plotting/matplot_dep/models_plots.py      |   2 -
 GPy/plotting/matplot_dep/variational_plots.py |   7 +-
 GPy/plotting/matplot_dep/visualize.py         |  15 +-
 GPy/util/data_resources.json                  |  23 ++-
 GPy/util/datasets.py                          | 167 ++++++++++++++++--
 GPy/util/misc.py                              |   2 +
 16 files changed, 349 insertions(+), 231 deletions(-)

diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index a640e360..f79f4e6f 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -23,9 +23,6 @@ def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan
     X = _np.random.rand(num_inputs, input_dim)
     lengthscales = _np.random.rand(input_dim)
     k = GPy.kern.RBF(input_dim, .5, lengthscales, ARD=True)
-         ##+ GPy.kern.white(input_dim, 0.01)
-         #)
-    #k = GPy.kern.Linear(input_dim, ARD=1)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
     K = k.K(X)
     Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, (output_dim,)).T
 
@@ -159,7 +156,6 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=25, Q=4
 def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k):
     import GPy
     from matplotlib import pyplot as plt
-    from ..util.misc import param_to_array
     import numpy as np
 
     _np.random.seed(0)
@@ -177,7 +173,7 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
         fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
         m.plot_latent(ax=latent_axes, labels=m.data_labels)
         data_show = GPy.plotting.matplot_dep.visualize.vector_show((m.Y[0,:]))
-        lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(param_to_array(m.X.mean)[0:1,:], # @UnusedVariable
+        lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean.values[0:1,:], # @UnusedVariable
             m, data_show, latent_axes=latent_axes, sense_axes=sense_axes, labels=m.data_labels)
         raw_input('Press enter to finish')
         plt.close(fig)
@@ -186,8 +182,6 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
 def ssgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k):
     import GPy
     from matplotlib import pyplot as plt
-    from ..util.misc import param_to_array
-    import numpy as np
 
     _np.random.seed(0)
     data = GPy.util.datasets.oil()
@@ -204,7 +198,7 @@ def ssgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40
         fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
         m.plot_latent(ax=latent_axes, labels=m.data_labels)
         data_show = GPy.plotting.matplot_dep.visualize.vector_show((m.Y[0,:]))
-        lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(param_to_array(m.X.mean)[0:1,:], # @UnusedVariable
+        lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean.values[0:1,:], # @UnusedVariable
             m, data_show, latent_axes=latent_axes, sense_axes=sense_axes, labels=m.data_labels)
         raw_input('Press enter to finish')
         plt.close(fig)
@@ -228,10 +222,10 @@ def _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim=False):
     Ylist = [Y1, Y2, Y3]
 
     if plot_sim:
-        import pylab
+        from matplotlib import pyplot as plt
         import matplotlib.cm as cm
         import itertools
-        fig = pylab.figure("MRD Simulation Data", figsize=(8, 6))
+        fig = plt.figure("MRD Simulation Data", figsize=(8, 6))
         fig.clf()
         ax = fig.add_subplot(2, 1, 1)
         labls = slist_names
@@ -242,29 +236,11 @@ def _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim=False):
             ax = fig.add_subplot(2, len(Ylist), len(Ylist) + 1 + i)
             ax.imshow(Y, aspect='auto', cmap=cm.gray) # @UndefinedVariable
             ax.set_title("Y{}".format(i + 1))
-        pylab.draw()
-        pylab.tight_layout()
+        plt.draw()
+        plt.tight_layout()
 
     return slist, [S1, S2, S3], Ylist
 
-def _generate_high_dimensional_output(D1, D2, D3, s1, s2, s3, sS):
-    S1 = _np.hstack([s1, sS])
-    S2 = _np.hstack([s2, s3, sS])
-    S3 = _np.hstack([s3, sS])
-    Y1 = S1.dot(_np.random.randn(S1.shape[1], D1))
-    Y2 = S2.dot(_np.random.randn(S2.shape[1], D2))
-    Y3 = S3.dot(_np.random.randn(S3.shape[1], D3))
-    Y1 += .3 * _np.random.randn(*Y1.shape)
-    Y2 += .2 * _np.random.randn(*Y2.shape)
-    Y3 += .25 * _np.random.randn(*Y3.shape)
-    Y1 -= Y1.mean(0)
-    Y2 -= Y2.mean(0)
-    Y3 -= Y3.mean(0)
-    Y1 /= Y1.std(0)
-    Y2 /= Y2.std(0)
-    Y3 /= Y3.std(0)
-    return Y1, Y2, Y3, S1, S2, S3
-
 def _simulate_sincos(D1, D2, D3, N, num_inducing, plot_sim=False):
     _np.random.seed(1234)
 
@@ -291,10 +267,10 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, plot_sim=False):
     Ylist = [Y1, Y2, Y3]
 
     if plot_sim:
-        import pylab
+        from matplotlib import pyplot as plt
         import matplotlib.cm as cm
         import itertools
-        fig = pylab.figure("MRD Simulation Data", figsize=(8, 6))
+        fig = plt.figure("MRD Simulation Data", figsize=(8, 6))
         fig.clf()
         ax = fig.add_subplot(2, 1, 1)
         labls = slist_names
@@ -305,28 +281,28 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, plot_sim=False):
             ax = fig.add_subplot(2, len(Ylist), len(Ylist) + 1 + i)
             ax.imshow(Y, aspect='auto', cmap=cm.gray) # @UndefinedVariable
             ax.set_title("Y{}".format(i + 1))
-        pylab.draw()
-        pylab.tight_layout()
+        plt.draw()
+        plt.tight_layout()
 
     return slist, [S1, S2, S3], Ylist
 
-# def bgplvm_simulation_matlab_compare():
-#     from GPy.util.datasets import simulation_BGPLVM
-#     from GPy import kern
-#     from GPy.models import BayesianGPLVM
-#
-#     sim_data = simulation_BGPLVM()
-#     Y = sim_data['Y']
-#     mu = sim_data['mu']
-#     num_inducing, [_, Q] = 3, mu.shape
-#
-#     k = kern.linear(Q, ARD=True) + kern.bias(Q, _np.exp(-2)) + kern.white(Q, _np.exp(-2))
-#     m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k,
-#                        _debug=False)
-#     m.auto_scale_factor = True
-#     m['noise'] = Y.var() / 100.
-#     m['linear_variance'] = .01
-#     return m
+def _generate_high_dimensional_output(D1, D2, D3, s1, s2, s3, sS):
+    S1 = _np.hstack([s1, sS])
+    S2 = _np.hstack([s2, s3, sS])
+    S3 = _np.hstack([s3, sS])
+    Y1 = S1.dot(_np.random.randn(S1.shape[1], D1))
+    Y2 = S2.dot(_np.random.randn(S2.shape[1], D2))
+    Y3 = S3.dot(_np.random.randn(S3.shape[1], D3))
+    Y1 += .3 * _np.random.randn(*Y1.shape)
+    Y2 += .2 * _np.random.randn(*Y2.shape)
+    Y3 += .25 * _np.random.randn(*Y3.shape)
+    Y1 -= Y1.mean(0)
+    Y2 -= Y2.mean(0)
+    Y3 -= Y3.mean(0)
+    Y1 /= Y1.std(0)
+    Y2 /= Y2.std(0)
+    Y3 /= Y3.std(0)
+    return Y1, Y2, Y3, S1, S2, S3
 
 def bgplvm_simulation(optimize=True, verbose=1,
                       plot=True, plot_sim=False,
diff --git a/GPy/inference/latent_function_inference/expectation_propagation_dtc.py b/GPy/inference/latent_function_inference/expectation_propagation_dtc.py
index 3aeb4fbb..7c8041ce 100644
--- a/GPy/inference/latent_function_inference/expectation_propagation_dtc.py
+++ b/GPy/inference/latent_function_inference/expectation_propagation_dtc.py
@@ -1,7 +1,6 @@
 import numpy as np
 from ...util import diag
 from ...util.linalg import mdot, jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify, DSYR
-from ...util.misc import param_to_array
 from ...core.parameterization.variational import VariationalPosterior
 from . import LatentFunctionInference
 from posterior import Posterior
@@ -23,7 +22,7 @@ class EPDTC(LatentFunctionInference):
         self.get_YYTfactor.limit = limit
 
     def _get_trYYT(self, Y):
-        return param_to_array(np.sum(np.square(Y)))
+        return np.sum(np.square(Y))
 
     def __getstate__(self):
         # has to be overridden, as Cacher objects cannot be pickled.
@@ -44,7 +43,7 @@ class EPDTC(LatentFunctionInference):
         """
         N, D = Y.shape
         if (N>=D):
-            return param_to_array(Y)
+            return Y
         else:
             return jitchol(tdot(Y))
 
diff --git a/GPy/inference/latent_function_inference/laplace.py b/GPy/inference/latent_function_inference/laplace.py
index 1c153518..a815d433 100644
--- a/GPy/inference/latent_function_inference/laplace.py
+++ b/GPy/inference/latent_function_inference/laplace.py
@@ -12,7 +12,6 @@
 
 import numpy as np
 from ...util.linalg import mdot, jitchol, dpotrs, dtrtrs, dpotri, symmetrify, pdinv
-from ...util.misc import param_to_array
 from posterior import Posterior
 import warnings
 from scipy import optimize
@@ -39,9 +38,6 @@ class Laplace(LatentFunctionInference):
         Returns a Posterior class containing essential quantities of the posterior
         """
 
-        #make Y a normal array!
-        Y = param_to_array(Y)
-
         # Compute K
         K = kern.K(X)
 
@@ -153,7 +149,7 @@ class Laplace(LatentFunctionInference):
 
         #compute vital matrices
         C = np.dot(LiW12, K)
-        Ki_W_i  = K - C.T.dot(C) 
+        Ki_W_i  = K - C.T.dot(C)
 
         #compute the log marginal
         log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata) - np.sum(np.log(np.diag(L)))
diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py
index 4f21bc29..64ee30c4 100644
--- a/GPy/inference/latent_function_inference/var_dtc.py
+++ b/GPy/inference/latent_function_inference/var_dtc.py
@@ -6,7 +6,6 @@ from ...util.linalg import mdot, jitchol, backsub_both_sides, tdot, dtrtrs, dtrt
 from ...util import diag
 from ...core.parameterization.variational import VariationalPosterior
 import numpy as np
-from ...util.misc import param_to_array
 from . import LatentFunctionInference
 log_2_pi = np.log(2*np.pi)
 import logging, itertools
@@ -35,7 +34,7 @@ class VarDTC(LatentFunctionInference):
         self.get_YYTfactor.limit = limit
 
     def _get_trYYT(self, Y):
-        return param_to_array(np.sum(np.square(Y)))
+        return np.sum(np.square(Y))
 
     def __getstate__(self):
         # has to be overridden, as Cacher objects cannot be pickled.
@@ -56,7 +55,7 @@ class VarDTC(LatentFunctionInference):
         """
         N, D = Y.shape
         if (N>=D):
-            return param_to_array(Y)
+            return Y.view(np.ndarray)
         else:
             return jitchol(tdot(Y))
 
diff --git a/GPy/inference/latent_function_inference/var_dtc_gpu.py b/GPy/inference/latent_function_inference/var_dtc_gpu.py
index 3bd5c347..f7da9080 100644
--- a/GPy/inference/latent_function_inference/var_dtc_gpu.py
+++ b/GPy/inference/latent_function_inference/var_dtc_gpu.py
@@ -6,7 +6,6 @@ from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs
 from ...util import diag
 from ...core.parameterization.variational import VariationalPosterior
 import numpy as np
-from ...util.misc import param_to_array
 from . import LatentFunctionInference
 log_2_pi = np.log(2*np.pi)
 
@@ -32,18 +31,18 @@ class VarDTC_GPU(LatentFunctionInference):
     """
     const_jitter = np.float64(1e-6)
     def __init__(self, batchsize=None, gpu_memory=4., limit=1):
-        
+
         self.batchsize = batchsize
         self.gpu_memory = gpu_memory
-                
+
         self.midRes = {}
         self.batch_pos = 0 # the starting position of the current mini-batch
-        
+
         self.cublas_handle = gpu_init.cublas_handle
-        
+
         # Initialize GPU caches
         self.gpuCache = None
-        
+
     def _initGPUCache(self, kern, num_inducing, input_dim, output_dim, Y):
         ndata = Y.shape[0]
         if self.batchsize==None:
@@ -75,10 +74,10 @@ class VarDTC_GPU(LatentFunctionInference):
                              'psi2p_gpu'            :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
                              }
             self.gpuCache['ones_gpu'].fill(1.0)
-            
+
             YT_gpu = self.gpuCache['YT_gpu']
             self._trYYT = cublas.cublasDdot(self.cublas_handle, YT_gpu.size, YT_gpu.gpudata, 1, YT_gpu.gpudata, 1)
-            
+
     def _estimateMemoryOccupation(self, N, M, D):
         """
         Estimate the best batch size.
@@ -89,7 +88,7 @@ class VarDTC_GPU(LatentFunctionInference):
         unit: GB
         """
         return (M+9.*M*M+3*M*D+N+2.*N*D)*8./1024./1024./1024., (4.+3.*M+D+3.*M*M)*8./1024./1024./1024.
-    
+
     def _estimateBatchSize(self, kern, N, M, Q, D):
         """
         Estimate the best batch size.
@@ -104,11 +103,11 @@ class VarDTC_GPU(LatentFunctionInference):
         else:
             x0, x1 = 0.,0.
         y0, y1 = self._estimateMemoryOccupation(N, M, D)
-        
+
         opt_batchsize = min(int((self.gpu_memory-y0-x0)/(x1+y1)), N)
-        
+
         return opt_batchsize
-        
+
     def _get_YYTfactor(self, Y):
         """
         find a matrix L which satisfies LLT = YYT.
@@ -117,10 +116,10 @@ class VarDTC_GPU(LatentFunctionInference):
         """
         N, D = Y.shape
         if (N>=D):
-            return param_to_array(Y)
+            return Y.view(np.ndarray)
         else:
             return jitchol(tdot(Y))
-        
+
     def gatherPsiStat(self, kern, X, Z, Y, beta, uncertain_inputs, het_noise):
         num_inducing, input_dim = Z.shape[0], Z.shape[1]
         num_data, output_dim = Y.shape
@@ -130,7 +129,7 @@ class VarDTC_GPU(LatentFunctionInference):
         beta_gpu = self.gpuCache['beta_gpu']
         YT_gpu = self.gpuCache['YT_gpu']
         betaYT_gpu = self.gpuCache['betaYT_gpu']
-        
+
         beta_gpu.fill(beta)
         betaYT_gpu.fill(0.)
         cublas.cublasDaxpy(self.cublas_handle, betaYT_gpu.size, beta, YT_gpu.gpudata, 1, betaYT_gpu.gpudata, 1)
@@ -140,7 +139,7 @@ class VarDTC_GPU(LatentFunctionInference):
             psi1Y_gpu.fill(0.)
             psi2_gpu.fill(0.)
             psi0_full = 0
-            
+
             for n_start in xrange(0,num_data,self.batchsize):
                 n_end = min(self.batchsize+n_start, num_data)
                 ndata = n_end - n_start
@@ -156,35 +155,35 @@ class VarDTC_GPU(LatentFunctionInference):
                     psi1p_gpu = kern.K(X_slice, Z)
 
                 cublas.cublasDgemm(self.cublas_handle, 'T', 'T', num_inducing, output_dim, ndata, 1.0, psi1p_gpu.gpudata, ndata, betaYT_gpu_slice.gpudata, output_dim, 1.0, psi1Y_gpu.gpudata, num_inducing)
-                
+
                 psi0_full += psi0.sum()
-                                    
+
                 if uncertain_inputs:
                     sum_axis(psi2_gpu,psi2p_gpu,1,1)
                 else:
                     cublas.cublasDgemm(self.cublas_handle, 'T', 'N', num_inducing, num_inducing, ndata, beta, psi1p_gpu.gpudata, ndata, psi1p_gpu.gpudata, ndata, 1.0, psi2_gpu.gpudata, num_inducing)
-                    
+
             psi0_full *= beta
             if uncertain_inputs:
                 cublas.cublasDscal(self.cublas_handle, psi2_gpu.size, beta, psi2_gpu.gpudata, 1)
-            
-        else:    
+
+        else:
             psi2_full = np.zeros((num_inducing,num_inducing))
             psi1Y_full = np.zeros((output_dim,num_inducing)) # DxM
             psi0_full = 0.
             YRY_full = 0.
-            
-            for n_start in xrange(0,num_data,self.batchsize):            
+
+            for n_start in xrange(0,num_data,self.batchsize):
                 n_end = min(self.batchsize+n_start, num_data)
                 Y_slice = Y[n_start:n_end]
                 X_slice = X[n_start:n_end]
-                
+
                 if het_noise:
                     b = beta[n_start]
                     YRY_full += np.inner(Y_slice, Y_slice)*b
                 else:
                     b = beta
-                
+
                 if uncertain_inputs:
                     psi0 = kern.psi0(Z, X_slice)
                     psi1 = kern.psi1(Z, X_slice)
@@ -193,50 +192,50 @@ class VarDTC_GPU(LatentFunctionInference):
                     psi0 = kern.Kdiag(X_slice)
                     psi1 = kern.K(X_slice, Z)
                     psi2_full += np.dot(psi1.T,psi1)*b
-                    
+
                 psi0_full += psi0.sum()*b
-                psi1Y_full += np.dot(Y_slice.T,psi1)*b # DxM                
-    
+                psi1Y_full += np.dot(Y_slice.T,psi1)*b # DxM
+
             if not het_noise:
                 YRY_full = trYYT*beta
             psi1Y_gpu.set(psi1Y_full)
             psi2_gpu.set(psi2_full)
-    
+
         return psi0_full, YRY_full
-        
+
     def inference_likelihood(self, kern, X, Z, likelihood, Y):
         """
         The first phase of inference:
         Compute: log-likelihood, dL_dKmm
-        
+
         Cached intermediate results: Kmm, KmmInv,
         """
-        
+
         num_inducing, input_dim = Z.shape[0], Z.shape[1]
         num_data, output_dim = Y.shape
-        
+
         #see whether we've got a different noise variance for each datum
         beta = 1./np.fmax(likelihood.variance, 1e-6)
         het_noise = beta.size > 1
         if het_noise:
             self.batchsize=0
-        
+
         self._initGPUCache(kern, num_inducing, input_dim, output_dim, Y)
 
         if isinstance(X, VariationalPosterior):
             uncertain_inputs = True
         else:
             uncertain_inputs = False
-        
+
         psi1Y_gpu = self.gpuCache['psi1Y_gpu']
         psi2_gpu = self.gpuCache['psi2_gpu']
-        
+
         psi0_full, YRY_full = self.gatherPsiStat(kern, X, Z, Y, beta, uncertain_inputs, het_noise)
-        
+
         #======================================================================
         # Compute Common Components
         #======================================================================
-        
+
         Kmm = kern.K(Z).copy()
         Kmm_gpu = self.gpuCache['Kmm_gpu']
         Kmm_gpu.set(np.asfortranarray(Kmm))
@@ -244,14 +243,14 @@ class VarDTC_GPU(LatentFunctionInference):
         ones_gpu = self.gpuCache['ones_gpu']
         cublas.cublasDaxpy(self.cublas_handle, num_inducing, self.const_jitter, ones_gpu.gpudata, 1, Kmm_gpu.gpudata, num_inducing+1)
 #         assert np.allclose(Kmm, Kmm_gpu.get())
-        
+
 #         Lm = jitchol(Kmm)
         #
         Lm_gpu = self.gpuCache['Lm_gpu']
         cublas.cublasDcopy(self.cublas_handle, Kmm_gpu.size, Kmm_gpu.gpudata, 1, Lm_gpu.gpudata, 1)
         culinalg.cho_factor(Lm_gpu,'L')
 #         print np.abs(np.tril(Lm)-np.tril(Lm_gpu.get())).max()
-                
+
 #         Lambda = Kmm+psi2_full
 #         LL = jitchol(Lambda)
         #
@@ -261,7 +260,7 @@ class VarDTC_GPU(LatentFunctionInference):
         LL_gpu = Lambda_gpu
         culinalg.cho_factor(LL_gpu,'L')
 #         print np.abs(np.tril(LL)-np.tril(LL_gpu.get())).max()
-        
+
 #         b,_ = dtrtrs(LL, psi1Y_full)
 #         bbt_cpu = np.square(b).sum()
         #
@@ -270,7 +269,7 @@ class VarDTC_GPU(LatentFunctionInference):
         cublas.cublasDtrsm(self.cublas_handle , 'L', 'L', 'N', 'N', num_inducing, output_dim, np.float64(1.0), LL_gpu.gpudata, num_inducing, b_gpu.gpudata, num_inducing)
         bbt = cublas.cublasDdot(self.cublas_handle, b_gpu.size, b_gpu.gpudata, 1, b_gpu.gpudata, 1)
 #         print np.abs(bbt-bbt_cpu)
-        
+
 #         v,_ = dtrtrs(LL.T,b,lower=False)
 #         vvt = np.einsum('md,od->mo',v,v)
 #         LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right')
@@ -288,7 +287,7 @@ class VarDTC_GPU(LatentFunctionInference):
         tr_LmInvPsi2LmInvT = float(strideSum(LmInvPsi2LmInvT_gpu, num_inducing+1).get())
 #         print np.abs(vvt-vvt_gpu.get()).max()
 #         print np.abs(np.trace(LmInvPsi2LmInvT)-tr_LmInvPsi2LmInvT)
-        
+
 #         Psi2LLInvT = dtrtrs(LL,psi2_full)[0].T
 #         LmInvPsi2LLInvT= dtrtrs(Lm,Psi2LLInvT)[0]
 #         KmmInvPsi2LLInvT = dtrtrs(Lm,LmInvPsi2LLInvT,trans=True)[0]
@@ -303,7 +302,7 @@ class VarDTC_GPU(LatentFunctionInference):
         cublas.cublasDcopy(self.cublas_handle, KmmInvPsi2LLInvT_gpu.size, KmmInvPsi2LLInvT_gpu.gpudata, 1, KmmInvPsi2P_gpu.gpudata, 1)
         cublas.cublasDtrsm(self.cublas_handle , 'r', 'L', 'N', 'N', num_inducing, num_inducing, np.float64(1.0), LL_gpu.gpudata, num_inducing, KmmInvPsi2P_gpu.gpudata, num_inducing)
 #         print np.abs(KmmInvPsi2P-KmmInvPsi2P_gpu.get()).max()
-        
+
 #         dL_dpsi2R = (output_dim*KmmInvPsi2P - vvt)/2. # dL_dpsi2 with R inside psi2
         #
         dL_dpsi2R_gpu = self.gpuCache['dL_dpsi2R_gpu']
@@ -311,7 +310,7 @@ class VarDTC_GPU(LatentFunctionInference):
         cublas.cublasDaxpy(self.cublas_handle, KmmInvPsi2P_gpu.size, np.float64(-output_dim), KmmInvPsi2P_gpu.gpudata, 1, dL_dpsi2R_gpu.gpudata, 1)
         cublas.cublasDscal(self.cublas_handle, dL_dpsi2R_gpu.size, np.float64(-0.5), dL_dpsi2R_gpu.gpudata, 1)
 #         print np.abs(dL_dpsi2R_gpu.get()-dL_dpsi2R).max()
-                        
+
         #======================================================================
         # Compute log-likelihood
         #======================================================================
@@ -320,7 +319,7 @@ class VarDTC_GPU(LatentFunctionInference):
         else:
             logL_R = -num_data*np.log(beta)
 #         logL_old = -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-np.trace(LmInvPsi2LmInvT))+YRY_full-bbt)/2.-output_dim*(-np.log(np.diag(Lm)).sum()+np.log(np.diag(LL)).sum())
-        
+
         logdetKmm = float(logDiagSum(Lm_gpu,num_inducing+1).get())
         logdetLambda = float(logDiagSum(LL_gpu,num_inducing+1).get())
         logL = -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-tr_LmInvPsi2LmInvT)+YRY_full-bbt)/2.+output_dim*(logdetKmm-logdetLambda)
@@ -329,7 +328,7 @@ class VarDTC_GPU(LatentFunctionInference):
         #======================================================================
         # Compute dL_dKmm
         #======================================================================
-        
+
 #         dL_dKmm =  -(output_dim*np.einsum('md,od->mo',KmmInvPsi2LLInvT,KmmInvPsi2LLInvT) + vvt)/2.
         #
         dL_dKmm_gpu = self.gpuCache['dL_dKmm_gpu']
@@ -341,24 +340,24 @@ class VarDTC_GPU(LatentFunctionInference):
         #======================================================================
         # Compute the Posterior distribution of inducing points p(u|Y)
         #======================================================================
-                
+
         post = Posterior(woodbury_inv=KmmInvPsi2P_gpu.get(), woodbury_vector=v_gpu.get(), K=Kmm_gpu.get(), mean=None, cov=None, K_chol=Lm_gpu.get())
 
         #======================================================================
         # Compute dL_dthetaL for uncertian input and non-heter noise
-        #======================================================================        
-        
+        #======================================================================
+
         if not het_noise:
             dL_dthetaL = (YRY_full + output_dim*psi0_full - num_data*output_dim)/-2.
             dL_dthetaL += cublas.cublasDdot(self.cublas_handle,dL_dpsi2R_gpu.size, dL_dpsi2R_gpu.gpudata,1,psi2_gpu.gpudata,1)
             dL_dthetaL += cublas.cublasDdot(self.cublas_handle,v_gpu.size, v_gpu.gpudata,1,psi1Y_gpu.gpudata,1)
             self.midRes['dL_dthetaL'] = -beta*dL_dthetaL
-            
+
         return logL, dL_dKmm_gpu.get(), post
 
     def inference_minibatch(self, kern, X, Z, likelihood, Y):
         """
-        The second phase of inference: Computing the derivatives over a minibatch of Y 
+        The second phase of inference: Computing the derivatives over a minibatch of Y
         Compute: dL_dpsi0, dL_dpsi1, dL_dpsi2, dL_dthetaL
         return a flag showing whether it reached the end of Y (isEnd)
         """
@@ -370,10 +369,10 @@ class VarDTC_GPU(LatentFunctionInference):
             uncertain_inputs = True
         else:
             uncertain_inputs = False
-        
+
         beta = 1./np.fmax(likelihood.variance, 1e-6)
         het_noise = beta.size > 1
-        
+
         n_start = self.batch_pos
         n_end = min(self.batchsize+n_start, num_data)
         if n_end==num_data:
@@ -382,12 +381,12 @@ class VarDTC_GPU(LatentFunctionInference):
         else:
             isEnd = False
             self.batch_pos = n_end
-        
+
         nSlice = n_end-n_start
         X_slice = X[n_start:n_end]
         if het_noise:
             beta = beta[n_start] # nSlice==1
-        
+
         if kern.useGPU:
             if not uncertain_inputs:
                 psi0p_gpu = kern.Kdiag(X_slice)
@@ -416,28 +415,28 @@ class VarDTC_GPU(LatentFunctionInference):
             psi1p_gpu.set(np.asfortranarray(psi1))
             if uncertain_inputs:
                 psi2p_gpu.set(np.asfortranarray(psi2))
-                            
+
         #======================================================================
         # Compute dL_dpsi
         #======================================================================
 
         dL_dpsi2R_gpu = self.gpuCache['dL_dpsi2R_gpu']
-        v_gpu = self.gpuCache['v_gpu']        
+        v_gpu = self.gpuCache['v_gpu']
         dL_dpsi0_gpu = self.gpuCache['dL_dpsi0_gpu']
         dL_dpsi1_gpu = self.gpuCache['dL_dpsi1_gpu']
         dL_dpsi2_gpu = self.gpuCache['dL_dpsi2_gpu']
         betaYT_gpu = self.gpuCache['betaYT_gpu']
         betaYT_gpu_slice = betaYT_gpu[:,n_start:n_end]
-        
+
         # Adjust to the batch size
         if dL_dpsi0_gpu.shape[0] > nSlice:
             dL_dpsi0_gpu = dL_dpsi0_gpu.ravel()[:nSlice]
             dL_dpsi1_gpu = dL_dpsi1_gpu.ravel()[:nSlice*num_inducing].reshape(nSlice,num_inducing)
-        
+
         dL_dpsi0_gpu.fill(-output_dim *beta/2.)
-        
+
         cublas.cublasDgemm(self.cublas_handle, 'T', 'T', nSlice, num_inducing, output_dim, 1.0, betaYT_gpu_slice.gpudata, output_dim, v_gpu.gpudata, num_inducing, 0., dL_dpsi1_gpu.gpudata, nSlice)
-        
+
         if uncertain_inputs:
             cublas.cublasDcopy(self.cublas_handle, dL_dpsi2R_gpu.size, dL_dpsi2R_gpu.gpudata, 1, dL_dpsi2_gpu.gpudata, 1)
             cublas.cublasDscal(self.cublas_handle, dL_dpsi2_gpu.size, beta, dL_dpsi2_gpu.gpudata, 1)
@@ -458,7 +457,7 @@ class VarDTC_GPU(LatentFunctionInference):
             dL_dpsi1 = dL_dpsi1_gpu
         else:
             dL_dpsi0 = dL_dpsi0_gpu.get()
-            dL_dpsi1 = dL_dpsi1_gpu.get()            
+            dL_dpsi1 = dL_dpsi1_gpu.get()
         if uncertain_inputs:
             if kern.useGPU:
                 dL_dpsi2 = dL_dpsi2_gpu
@@ -480,4 +479,4 @@ class VarDTC_GPU(LatentFunctionInference):
                          'dL_dthetaL':dL_dthetaL}
 
         return isEnd, (n_start,n_end), grad_dict
-    
+
diff --git a/GPy/inference/latent_function_inference/var_dtc_parallel.py b/GPy/inference/latent_function_inference/var_dtc_parallel.py
index b9ecbb5c..ae25f3e3 100644
--- a/GPy/inference/latent_function_inference/var_dtc_parallel.py
+++ b/GPy/inference/latent_function_inference/var_dtc_parallel.py
@@ -6,7 +6,6 @@ from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri,pdi
 from ...util import diag
 from ...core.parameterization.variational import VariationalPosterior
 import numpy as np
-from ...util.misc import param_to_array
 from . import LatentFunctionInference
 log_2_pi = np.log(2*np.pi)
 
@@ -27,26 +26,26 @@ class VarDTC_minibatch(LatentFunctionInference):
     """
     const_jitter = 1e-6
     def __init__(self, batchsize=None, limit=1, mpi_comm=None):
-        
+
         self.batchsize = batchsize
         self.mpi_comm = mpi_comm
         self.limit = limit
-        
+
         # Cache functions
         from ...util.caching import Cacher
         self.get_trYYT = Cacher(self._get_trYYT, limit)
         self.get_YYTfactor = Cacher(self._get_YYTfactor, limit)
-        
+
         self.midRes = {}
         self.batch_pos = 0 # the starting position of the current mini-batch
         self.Y_speedup = False # Replace Y with the cholesky factor of YY.T, but the posterior inference will be wrong
-        
+
     def __getstate__(self):
-        # has to be overridden, as Cacher objects cannot be pickled. 
+        # has to be overridden, as Cacher objects cannot be pickled.
         return self.batchsize, self.limit, self.Y_speedup
 
     def __setstate__(self, state):
-        # has to be overridden, as Cacher objects cannot be pickled. 
+        # has to be overridden, as Cacher objects cannot be pickled.
         self.batchsize, self.limit, self.Y_speedup = state
         self.mpi_comm = None
         self.midRes = {}
@@ -58,9 +57,9 @@ class VarDTC_minibatch(LatentFunctionInference):
     def set_limit(self, limit):
         self.get_trYYT.limit = limit
         self.get_YYTfactor.limit = limit
-        
+
     def _get_trYYT(self, Y):
-        return param_to_array(np.sum(np.square(Y)))
+        return np.sum(np.square(Y))
 
     def _get_YYTfactor(self, Y):
         """
@@ -70,19 +69,19 @@ class VarDTC_minibatch(LatentFunctionInference):
         """
         N, D = Y.shape
         if (N>=D):
-            return param_to_array(Y)
+            return Y.view(np.ndarray)
         else:
             return jitchol(tdot(Y))
-        
+
     def gatherPsiStat(self, kern, X, Z, Y, beta, uncertain_inputs):
-        
+
         het_noise = beta.size > 1
 
         trYYT = self.get_trYYT(Y)
         if self.Y_speedup and not het_noise:
             Y =  self.get_YYTfactor(Y)
-        
-        num_inducing = Z.shape[0]        
+
+        num_inducing = Z.shape[0]
         num_data, output_dim = Y.shape
         if self.batchsize == None:
             self.batchsize = num_data
@@ -91,8 +90,8 @@ class VarDTC_minibatch(LatentFunctionInference):
         psi1Y_full = np.zeros((output_dim,num_inducing)) # DxM
         psi0_full = 0.
         YRY_full = 0.
-        
-        for n_start in xrange(0,num_data,self.batchsize):            
+
+        for n_start in xrange(0,num_data,self.batchsize):
             n_end = min(self.batchsize+n_start, num_data)
             if (n_end-n_start)==num_data:
                 Y_slice = Y
@@ -100,13 +99,13 @@ class VarDTC_minibatch(LatentFunctionInference):
             else:
                 Y_slice = Y[n_start:n_end]
                 X_slice = X[n_start:n_end]
-            
+
             if het_noise:
                 b = beta[n_start]
                 YRY_full += np.inner(Y_slice, Y_slice)*b
             else:
                 b = beta
-            
+
             if uncertain_inputs:
                 psi0 = kern.psi0(Z, X_slice)
                 psi1 = kern.psi1(Z, X_slice)
@@ -115,13 +114,13 @@ class VarDTC_minibatch(LatentFunctionInference):
                 psi0 = kern.Kdiag(X_slice)
                 psi1 = kern.K(X_slice, Z)
                 psi2_full += np.dot(psi1.T,psi1)*b
-                
+
             psi0_full += psi0.sum()*b
-            psi1Y_full += np.dot(Y_slice.T,psi1)*b # DxM                
+            psi1Y_full += np.dot(Y_slice.T,psi1)*b # DxM
 
         if not het_noise:
             YRY_full = trYYT*beta
-        
+
         if self.mpi_comm != None:
             psi0_all = np.array(psi0_full)
             psi1Y_all = psi1Y_full.copy()
@@ -132,18 +131,18 @@ class VarDTC_minibatch(LatentFunctionInference):
             self.mpi_comm.Allreduce([psi2_full, MPI.DOUBLE], [psi2_all, MPI.DOUBLE])
             self.mpi_comm.Allreduce([YRY_full, MPI.DOUBLE], [YRY_all, MPI.DOUBLE])
             return psi0_all, psi1Y_all, psi2_all, YRY_all
-            
+
         return psi0_full, psi1Y_full, psi2_full, YRY_full
-        
+
     def inference_likelihood(self, kern, X, Z, likelihood, Y):
         """
         The first phase of inference:
         Compute: log-likelihood, dL_dKmm
-        
+
         Cached intermediate results: Kmm, KmmInv,
         """
-        
-        num_data, output_dim = Y.shape 
+
+        num_data, output_dim = Y.shape
         input_dim = Z.shape[0]
         if self.mpi_comm != None:
             num_data_all = np.array(num_data,dtype=np.int32)
@@ -154,7 +153,7 @@ class VarDTC_minibatch(LatentFunctionInference):
             uncertain_inputs = True
         else:
             uncertain_inputs = False
-        
+
         #see whether we've got a different noise variance for each datum
         beta = 1./np.fmax(likelihood.variance, 1e-6)
         het_noise = beta.size > 1
@@ -162,28 +161,28 @@ class VarDTC_minibatch(LatentFunctionInference):
             self.batchsize = 1
 
         psi0_full, psi1Y_full, psi2_full, YRY_full = self.gatherPsiStat(kern, X, Z, Y, beta, uncertain_inputs)
-        
+
         #======================================================================
         # Compute Common Components
         #======================================================================
-        
+
         Kmm = kern.K(Z).copy()
         diag.add(Kmm, self.const_jitter)
         KmmInv,Lm,LmInv,_ = pdinv(Kmm)
-        
+
         LmInvPsi2LmInvT = LmInv.dot(psi2_full).dot(LmInv.T)
         Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
         LInv,LL,LLInv,logdet_L = pdinv(Lambda)
         b = LLInv.dot(LmInv.dot(psi1Y_full.T))
         bbt = np.square(b).sum()
         v = LmInv.T.dot(LLInv.T.dot(b))
-        
+
         dL_dpsi2R = LmInv.T.dot(-LLInv.T.dot(tdot(b)+output_dim*np.eye(input_dim)).dot(LLInv)+output_dim*np.eye(input_dim)).dot(LmInv)/2.
-        
+
         # Cache intermediate results
         self.midRes['dL_dpsi2R'] = dL_dpsi2R
         self.midRes['v'] = v
-                
+
         #======================================================================
         # Compute log-likelihood
         #======================================================================
@@ -196,22 +195,22 @@ class VarDTC_minibatch(LatentFunctionInference):
         #======================================================================
         # Compute dL_dKmm
         #======================================================================
-        
+
         dL_dKmm =  dL_dpsi2R - output_dim*KmmInv.dot(psi2_full).dot(KmmInv)/2.
 
         #======================================================================
         # Compute the Posterior distribution of inducing points p(u|Y)
         #======================================================================
-        
+
         if not self.Y_speedup or het_noise:
             post = Posterior(woodbury_inv=LmInv.T.dot(np.eye(input_dim)-LInv).dot(LmInv), woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=Lm)
         else:
             post = None
-        
+
         #======================================================================
         # Compute dL_dthetaL for uncertian input and non-heter noise
-        #======================================================================        
-        
+        #======================================================================
+
         if not het_noise:
             dL_dthetaL = (YRY_full*beta + beta*output_dim*psi0_full - num_data*output_dim*beta)/2. - beta*(dL_dpsi2R*psi2_full).sum() - beta*(v.T*psi1Y_full).sum()
             self.midRes['dL_dthetaL'] = dL_dthetaL
@@ -220,7 +219,7 @@ class VarDTC_minibatch(LatentFunctionInference):
 
     def inference_minibatch(self, kern, X, Z, likelihood, Y):
         """
-        The second phase of inference: Computing the derivatives over a minibatch of Y 
+        The second phase of inference: Computing the derivatives over a minibatch of Y
         Compute: dL_dpsi0, dL_dpsi1, dL_dpsi2, dL_dthetaL
         return a flag showing whether it reached the end of Y (isEnd)
         """
@@ -231,7 +230,7 @@ class VarDTC_minibatch(LatentFunctionInference):
             uncertain_inputs = True
         else:
             uncertain_inputs = False
-        
+
         #see whether we've got a different noise variance for each datum
         beta = 1./np.fmax(likelihood.variance, 1e-6)
         het_noise = beta.size > 1
@@ -241,7 +240,7 @@ class VarDTC_minibatch(LatentFunctionInference):
             YYT_factor = self.get_YYTfactor(Y)
         else:
             YYT_factor = Y
-        
+
         n_start = self.batch_pos
         n_end = min(self.batchsize+n_start, num_data)
         if n_end==num_data:
@@ -250,10 +249,10 @@ class VarDTC_minibatch(LatentFunctionInference):
         else:
             isEnd = False
             self.batch_pos = n_end
-        
+
         Y_slice = YYT_factor[n_start:n_end]
         X_slice = X[n_start:n_end]
-        
+
         if not uncertain_inputs:
             psi0 = kern.Kdiag(X_slice)
             psi1 = kern.K(X_slice, Z)
@@ -264,33 +263,33 @@ class VarDTC_minibatch(LatentFunctionInference):
             psi1 = kern.psi1(Z, X_slice)
             psi2 = kern.psi2(Z, X_slice)
             betapsi1 = np.einsum('n,nm->nm',beta,psi1)
-            
+
         if het_noise:
             beta = beta[n_start] # assuming batchsize==1
 
         betaY = beta*Y_slice
-        
+
         #======================================================================
         # Load Intermediate Results
         #======================================================================
-        
+
         dL_dpsi2R = self.midRes['dL_dpsi2R']
         v = self.midRes['v']
-        
+
         #======================================================================
         # Compute dL_dpsi
         #======================================================================
-        
+
         dL_dpsi0 = -output_dim * (beta * np.ones((n_end-n_start,)))/2.
-        
+
         dL_dpsi1 = np.dot(betaY,v.T)
-        
+
         if uncertain_inputs:
             dL_dpsi2 = beta* dL_dpsi2R
         else:
             dL_dpsi1 += np.dot(betapsi1,dL_dpsi2R)*2.
             dL_dpsi2 = None
-            
+
         #======================================================================
         # Compute dL_dthetaL
         #======================================================================
@@ -300,14 +299,14 @@ class VarDTC_minibatch(LatentFunctionInference):
                 psiR = np.einsum('mo,mo->',dL_dpsi2R,psi2)
             else:
                 psiR = np.einsum('nm,no,mo->',psi1,psi1,dL_dpsi2R)
-            
+
             dL_dthetaL = ((np.square(betaY)).sum(axis=-1) + np.square(beta)*(output_dim*psi0)-output_dim*beta)/2. - np.square(beta)*psiR- (betaY*np.dot(betapsi1,v)).sum(axis=-1)
         else:
             if isEnd:
                 dL_dthetaL = self.midRes['dL_dthetaL']
             else:
                 dL_dthetaL = 0.
-                
+
         if uncertain_inputs:
             grad_dict = {'dL_dpsi0':dL_dpsi0,
                          'dL_dpsi1':dL_dpsi1,
@@ -317,7 +316,7 @@ class VarDTC_minibatch(LatentFunctionInference):
             grad_dict = {'dL_dKdiag':dL_dpsi0,
                          'dL_dKnm':dL_dpsi1,
                          'dL_dthetaL':dL_dthetaL}
-            
+
         return isEnd, (n_start,n_end), grad_dict
 
 
@@ -330,18 +329,18 @@ def update_gradients(model, mpi_comm=None):
         X = model.X[model.N_range[0]:model.N_range[1]]
 
     model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, X, model.Z, model.likelihood, Y)
-    
+
     het_noise = model.likelihood.variance.size > 1
-    
+
     if het_noise:
         dL_dthetaL = np.empty((model.Y.shape[0],))
     else:
         dL_dthetaL = np.float64(0.)
-    
+
     kern_grad = model.kern.gradient.copy()
     kern_grad[:] = 0.
     model.Z.gradient = 0.
-    
+
     isEnd = False
     while not isEnd:
         isEnd, n_range, grad_dict = model.inference_method.inference_minibatch(model.kern, X, model.Z, model.likelihood, Y)
@@ -352,24 +351,24 @@ def update_gradients(model, mpi_comm=None):
                 X_slice = model.X[n_range[0]:n_range[1]]
             else:
                 X_slice = model.X[model.N_range[0]+n_range[0]:model.N_range[0]+n_range[1]]
-            
+
             #gradients w.r.t. kernel
             model.kern.update_gradients_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
             kern_grad += model.kern.gradient
-    
+
             #gradients w.r.t. Z
             model.Z.gradient += model.kern.gradients_Z_expectations(
                                dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=X_slice)
-        
+
             #gradients w.r.t. posterior parameters of X
             X_grad = model.kern.gradients_qX_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
             model.set_X_gradients(X_slice, X_grad)
-                
+
             if het_noise:
                 dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL']
             else:
                 dL_dthetaL += grad_dict['dL_dthetaL']
-    
+
     # Gather the gradients from multiple MPI nodes
     if mpi_comm != None:
         if het_noise:
@@ -380,14 +379,14 @@ def update_gradients(model, mpi_comm=None):
         mpi_comm.Allreduce([model.Z.gradient, MPI.DOUBLE], [Z_grad_all, MPI.DOUBLE])
         kern_grad = kern_grad_all
         model.Z.gradient = Z_grad_all
-    
+
     #gradients w.r.t. kernel
     model.kern.update_gradients_full(dL_dKmm, model.Z, None)
     model.kern.gradient += kern_grad
 
     #gradients w.r.t. Z
     model.Z.gradient += model.kern.gradients_X(dL_dKmm, model.Z)
-    
+
     # Update Log-likelihood
     KL_div = model.variational_prior.KL_divergence(X)
     # update for the KL divergence
diff --git a/GPy/kern/_src/poly.py b/GPy/kern/_src/poly.py
index 4c5f0e93..b90e8f8f 100644
--- a/GPy/kern/_src/poly.py
+++ b/GPy/kern/_src/poly.py
@@ -3,7 +3,6 @@
 
 import numpy as np
 from kern import Kern
-from ...util.misc import param_to_array
 from ...core.parameterization import Param
 from ...core.parameterization.transformations import Logexp
 class Poly(Kern):
diff --git a/GPy/models/sparse_gp_regression.py b/GPy/models/sparse_gp_regression.py
index 744de6e7..3be2aed2 100644
--- a/GPy/models/sparse_gp_regression.py
+++ b/GPy/models/sparse_gp_regression.py
@@ -7,7 +7,6 @@ from ..core import SparseGP
 from .. import likelihoods
 from .. import kern
 from ..inference.latent_function_inference import VarDTC
-from ..util.misc import param_to_array
 from ..core.parameterization.variational import NormalPosterior
 
 class SparseGPRegression(SparseGP):
@@ -40,7 +39,7 @@ class SparseGPRegression(SparseGP):
         # Z defaults to a subset of the data
         if Z is None:
             i = np.random.permutation(num_data)[:min(num_inducing, num_data)]
-            Z = param_to_array(X)[i].copy()
+            Z = X.view(np.ndarray)[i].copy()
         else:
             assert Z.shape[1] == input_dim
 
diff --git a/GPy/plotting/matplot_dep/dim_reduction_plots.py b/GPy/plotting/matplot_dep/dim_reduction_plots.py
index 20e8e962..b4fed8fd 100644
--- a/GPy/plotting/matplot_dep/dim_reduction_plots.py
+++ b/GPy/plotting/matplot_dep/dim_reduction_plots.py
@@ -1,7 +1,6 @@
 
 import numpy as np
 from latent_space_visualizations.controllers.imshow_controller import ImshowController,ImAnnotateController
-from ...util.misc import param_to_array
 from ...core.parameterization.variational import VariationalPosterior
 from .base_plots import x_frame2D
 import itertools
@@ -55,9 +54,9 @@ def plot_latent(model, labels=None, which_indices=None,
     #fethch the data points X that we'd like to plot
     X = model.X
     if isinstance(X, VariationalPosterior):
-        X = param_to_array(X.mean)
+        X = X.mean
     else:
-        X = param_to_array(X)
+        X = X
 
 
     if X.shape[0] > 1000:
@@ -175,7 +174,7 @@ def plot_latent(model, labels=None, which_indices=None,
     ax.set_aspect('auto') # set a nice aspect ratio
 
     if plot_inducing:
-        Z = param_to_array(model.Z)
+        Z = model.Z
         ax.plot(Z[:, input_1], Z[:, input_2], '^w')
 
     ax.set_xlim((xmin, xmax))
diff --git a/GPy/plotting/matplot_dep/kernel_plots.py b/GPy/plotting/matplot_dep/kernel_plots.py
index c0bd1599..c2bd7d38 100644
--- a/GPy/plotting/matplot_dep/kernel_plots.py
+++ b/GPy/plotting/matplot_dep/kernel_plots.py
@@ -35,8 +35,7 @@ def add_bar_labels(fig, ax, bars, bottom=0):
 
 
 def plot_bars(fig, ax, x, ard_params, color, name, bottom=0):
-    from ...util.misc import param_to_array
-    return ax.bar(left=x, height=param_to_array(ard_params), width=.8,
+    return ax.bar(left=x, height=ard_params.view(np.ndarray), width=.8,
                   bottom=bottom, align='center',
                   color=color, edgecolor='k', linewidth=1.2,
                   label=name.replace("_"," "))
diff --git a/GPy/plotting/matplot_dep/models_plots.py b/GPy/plotting/matplot_dep/models_plots.py
index 509c9485..ed024c0a 100644
--- a/GPy/plotting/matplot_dep/models_plots.py
+++ b/GPy/plotting/matplot_dep/models_plots.py
@@ -8,7 +8,6 @@ except:
     pass
 import numpy as np
 from base_plots import gpplot, x_frame1D, x_frame2D
-from ...util.misc import param_to_array
 from ...models.gp_coregionalized_regression import GPCoregionalizedRegression
 from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
 from scipy import sparse
@@ -67,7 +66,6 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
         X_variance = model.X.variance
     else:
         X = model.X
-    #X, Y = param_to_array(X, model.Y)
     Y = model.Y
     if sparse.issparse(Y): Y = Y.todense().view(np.ndarray)
 
diff --git a/GPy/plotting/matplot_dep/variational_plots.py b/GPy/plotting/matplot_dep/variational_plots.py
index e97f001b..5cced10d 100644
--- a/GPy/plotting/matplot_dep/variational_plots.py
+++ b/GPy/plotting/matplot_dep/variational_plots.py
@@ -1,5 +1,4 @@
 import pylab as pb, numpy as np
-from ...util.misc import param_to_array
 
 def plot(parameterized, fignum=None, ax=None, colors=None):
     """
@@ -21,7 +20,7 @@ def plot(parameterized, fignum=None, ax=None, colors=None):
     else:
         colors = iter(colors)
     plots = []
-    means, variances = param_to_array(parameterized.mean, parameterized.variance)
+    means, variances = parameterized.mean, parameterized.variance
     x = np.arange(means.shape[0])
     for i in range(means.shape[1]):
         if ax is None:
@@ -68,7 +67,7 @@ def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None, side_by_sid
     else:
         colors = iter(colors)
     plots = []
-    means, variances, gamma = param_to_array(parameterized.mean, parameterized.variance, parameterized.binary_prob)
+    means, variances, gamma = parameterized.mean, parameterized.variance, parameterized.binary_prob
     x = np.arange(means.shape[0])
     for i in range(means.shape[1]):
         if side_by_side:
@@ -77,7 +76,7 @@ def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None, side_by_sid
         else:
             sub1 = (means.shape[1]*2,1,2*i+1)
             sub2 = (means.shape[1]*2,1,2*i+2)
-            
+
         # mean and variance plot
         a = fig.add_subplot(*sub1)
         a.plot(means, c='k', alpha=.3)
diff --git a/GPy/plotting/matplot_dep/visualize.py b/GPy/plotting/matplot_dep/visualize.py
index 957d5a78..9ff41730 100644
--- a/GPy/plotting/matplot_dep/visualize.py
+++ b/GPy/plotting/matplot_dep/visualize.py
@@ -4,7 +4,6 @@ import GPy
 import numpy as np
 import matplotlib as mpl
 import time
-from ...util.misc import param_to_array
 from GPy.core.parameterization.variational import VariationalPosterior
 try:
     import visual
@@ -127,7 +126,7 @@ class lvm(matplotlib_show):
         self.latent_index = latent_index
         self.latent_dim = model.input_dim
         self.disable_drag = disable_drag
- 
+
         # The red cross which shows current latent point.
         self.latent_values = vals
         self.latent_handle = self.latent_axes.plot([0],[0],'rx',mew=2)[0]
@@ -474,7 +473,7 @@ class mocap_data_show(matplotlib_show):
         self.axes.set_ylim(self.y_lim)
         self.axes.set_zlim(self.z_lim)
         self.axes.auto_scale_xyz([-1., 1.], [-1., 1.], [-1., 1.])
-        
+
 #        self.axes.set_aspect('equal')
 #         self.axes.autoscale(enable=False)
 
@@ -500,7 +499,7 @@ class skeleton_show(mocap_data_show):
         :param vals: set of modeled angles to use for printing in the axis when it's first created.
         :type vals: np.array
         :param skel: skeleton object that has the parameters of the motion capture skeleton associated with it.
-        :type skel: mocap.skeleton object 
+        :type skel: mocap.skeleton object
         :param padding:
         :type int
         """
@@ -512,7 +511,7 @@ class skeleton_show(mocap_data_show):
         """Takes a set of angles and converts them to the x,y,z coordinates in the internal prepresentation of the class, ready for plotting.
 
         :param vals: the values that are being modelled."""
-        
+
         if self.padding>0:
             channels = np.zeros((self.vals.shape[0], self.vals.shape[1]+self.padding))
             channels[:, 0:self.vals.shape[0]] = self.vals
@@ -524,7 +523,7 @@ class skeleton_show(mocap_data_show):
         self.vals[:, 0] = vals_mat[:, 0].copy()
         self.vals[:, 1] = vals_mat[:, 2].copy()
         self.vals[:, 2] = vals_mat[:, 1].copy()
-        
+
     def wrap_around(self, lim, connect):
         quot = lim[1] - lim[0]
         self.vals = rem(self.vals, quot)+lim[0]
@@ -546,7 +545,7 @@ def data_play(Y, visualizer, frame_rate=30):
     Example usage:
 
     This example loads in the CMU mocap database (http://mocap.cs.cmu.edu) subject number 35 motion number 01. It then plays it using the mocap_show visualize object.
-    
+
     .. code-block:: python
 
        data = GPy.util.datasets.cmu_mocap(subject='35', train_motions=['01'])
@@ -556,7 +555,7 @@ def data_play(Y, visualizer, frame_rate=30):
        GPy.util.visualize.data_play(Y, visualize)
 
     """
-    
+
 
     for y in Y:
         visualizer.modify(y[None, :])
diff --git a/GPy/util/data_resources.json b/GPy/util/data_resources.json
index f8b00ce8..1ed735c3 100644
--- a/GPy/util/data_resources.json
+++ b/GPy/util/data_resources.json
@@ -228,7 +228,7 @@
     "fruitfly_tomancak_cel_files": {
         "citation": "'Systematic determination of patterns of gene expression during Drosophila embryogenesis' Pavel Tomancak, Amy Beaton, Richard Weiszmann, Elaine Kwan, ShengQiang Shu, Suzanna E Lewis, Stephen Richards, Michael Ashburner, Volker Hartenstein, Susan E Celniker, and Gerald M Rubin",
         "details": "Gene expression results from blastoderm development in Drosophila Melanogaster.",
-        "files": [ 
+        "files": [
             [
                 "embryo_tc_4_1.CEL",
                 "embryo_tc_4_2.CEL",
@@ -284,7 +284,7 @@
         "details": "Google trends results.",
         "files": [
             [
-                
+
             ]
         ],
         "license": null,
@@ -293,7 +293,7 @@
             "http://www.google.com/trends/"
         ]
     },
-    
+
     "hapmap3": {
         "citation": "Gibbs, Richard A., et al. 'The international HapMap project.' Nature 426.6968 (2003): 789-796.",
         "details": "HapMap Project: Single Nucleotide Polymorphism sequenced in all human populations. \n        The HapMap phase three SNP dataset - 1184 samples out of 11 populations.\n        See http://www.nature.com/nature/journal/v426/n6968/abs/nature02168.html for details.\n\n        SNP_matrix (A) encoding [see Paschou et all. 2007 (PCA-Correlated SNPs...)]:\n        Let (B1,B2) be the alphabetically sorted bases, which occur in the j-th SNP, then\n\n              /  1, iff SNPij==(B1,B1)\n        Aij = |  0, iff SNPij==(B1,B2)\n              \\\\ -1, iff SNPij==(B2,B2)\n\n        The SNP data and the meta information (such as iid, sex and phenotype) are\n        stored in the dataframe datadf, index is the Individual ID, \n        with following columns for metainfo:\n\n            * family_id   -> Family ID\n            * paternal_id -> Paternal ID\n            * maternal_id -> Maternal ID\n            * sex         -> Sex (1=male; 2=female; other=unknown)\n            * phenotype   -> Phenotype (-9, or 0 for unknown)\n            * population  -> Population string (e.g. 'ASW' - 'YRI')\n            * rest are SNP rs (ids)\n\n        More information is given in infodf:\n\n            * Chromosome:\n                - autosomal chromosemes                -> 1-22\n                - X    X chromosome                    -> 23\n                - Y    Y chromosome                    -> 24\n                - XY   Pseudo-autosomal region of X    -> 25\n                - MT   Mitochondrial                   -> 26\n            * Relative Positon (to Chromosome) [base pairs]\n\n        ",
@@ -523,6 +523,23 @@
             "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/singlecell/"
         ]
     },
+    "singlecell_islam": {
+        "citation": "Single-Cell RNA-Seq Reveals Dynamic, Random Monoallelic Gene Expression in Mammalian Cells Qiaolin Deng, Daniel Ramskoeld, Bjoern Reinius, and Rickard Sandberg Science 10 January 2014: 343 (6167), 193-196. [DOI:10.1126/science.1245316]",
+        "details" : "92 single cells (48 mouse ES cells, 44 mouse embryonic fibroblasts and 4 negative controls) were analyzed by single-cell tagged reverse transcription (STRT)",
+        "files"   : [["GSE29087_L139_expression_tab.txt.gz"], ["GSE29087_family.soft.gz"]],
+        "license" : "Gene Expression Omnibus: http://www.ncbi.nlm.nih.gov/geo/info/disclaimer.html",
+        "size"    : 1159449,
+        "urls"    : ["ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE29nnn/GSE29087/suppl/", "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE29nnn/GSE29087/soft/"]
+    },
+    "singlecell_deng": {
+        "citation": "Deng Q, Ramsköld D, Reinius B, Sandberg R. Single-cell RNA-seq reveals dynamic, random monoallelic gene expression in mammalian cells. Science 2014 Jan 10;343(6167):193-6. PMID: 24408435",
+        "details" : "First generation mouse strain crosses were used to study monoallelic expression on the single cell level",
+        "files"   : [["?acc=GSE45719&format=file"], ["GSE45719_series_matrix.txt.gz"]],
+        "license" : "Gene Expression Omnibus: http://www.ncbi.nlm.nih.gov/geo/info/disclaimer.html",
+        "size"    : 1159449,
+        "save_names": [["GSE45719_Raw.tar"], [null]],
+        "urls"    : ["http://www.ncbi.nlm.nih.gov/geo/download/", "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE45nnn/GSE45719/matrix/"]
+    },
     "sod1_mouse": {
         "citation": "Transcriptomic indices of fast and slow disease progression in two mouse models of amyotrophic lateral sclerosis' Nardo G1, Iennaco R, Fusi N, Heath PR, Marino M, Trolese MC, Ferraiuolo L, Lawrence N, Shaw PJ, Bendotti C Brain. 2013 Nov;136(Pt 11):3305-32. doi: 10.1093/brain/awt250. Epub 2013 Sep 24.",
         "details": "Gene expression data from two separate strains of mice: C57 and 129Sv in wild type and SOD1 mutant strains.",
diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py
index 93a5dceb..d1250ae4 100644
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@@ -82,20 +82,32 @@ def prompt_user(prompt):
 
 def data_available(dataset_name=None):
     """Check if the data set is available on the local machine already."""
-    for file_list in data_resources[dataset_name]['files']:
-        for file in file_list:
-            if not os.path.exists(os.path.join(data_path, dataset_name, file)):
+    from itertools import izip_longest
+    dr = data_resources[dataset_name]
+    zip_urls = (dr['files'], )
+    if dr.has_key('save_names'): zip_urls += (dr['save_names'], )
+    else: zip_urls += ([],)
+
+    for file_list, save_list in izip_longest(*zip_urls, fillvalue=[]):
+        for f, s in izip_longest(file_list, save_list, fillvalue=None):
+            if s is not None: f=s # If there is a save_name given, use that one
+            if not os.path.exists(os.path.join(data_path, dataset_name, f)):
                 return False
     return True
 
-def download_url(url, store_directory, save_name = None, messages = True, suffix=''):
+def download_url(url, store_directory, save_name=None, messages=True, suffix=''):
     """Download a file from a url and save it to disk."""
     i = url.rfind('/')
     file = url[i+1:]
     print file
     dir_name = os.path.join(data_path, store_directory)
-    save_name = os.path.join(dir_name, file)
-    print "Downloading ", url, "->", os.path.join(store_directory, file)
+
+    if save_name is None: save_name = os.path.join(dir_name, file)
+    else: save_name = os.path.join(dir_name, save_name)
+
+    if suffix is None: suffix=''
+
+    print "Downloading ", url, "->", save_name
     if not os.path.exists(dir_name):
         os.makedirs(dir_name)
     try:
@@ -178,19 +190,24 @@ def authorize_download(dataset_name=None):
 
 def download_data(dataset_name=None):
     """Check with the user that the are happy with terms and conditions for the data set, then download it."""
+    import itertools
 
     dr = data_resources[dataset_name]
     if not authorize_download(dataset_name):
         raise Exception("Permission to download data set denied.")
 
-    if dr.has_key('suffices'):
-        for url, files, suffices in zip(dr['urls'], dr['files'], dr['suffices']):
-            for file, suffix in zip(files, suffices):
-                download_url(os.path.join(url,file), dataset_name, dataset_name, suffix=suffix)
-    else:
-        for url, files in zip(dr['urls'], dr['files']):
-            for file in files:
-                download_url(os.path.join(url,file), dataset_name, dataset_name)
+    zip_urls = (dr['urls'], dr['files'])
+
+    if dr.has_key('save_names'): zip_urls += (dr['save_names'], )
+    else: zip_urls += ([],)
+
+    if dr.has_key('suffices'): zip_urls += (dr['suffices'], )
+    else: zip_urls += ([],)
+
+    for url, files, save_names, suffices in itertools.izip_longest(*zip_urls, fillvalue=[]):
+        for f, save_name, suffix in itertools.izip_longest(files, save_names, suffices, fillvalue=None):
+            download_url(os.path.join(url,f), dataset_name, save_name, suffix=suffix)
+
     return True
 
 def data_details_return(data, data_set):
@@ -895,6 +912,128 @@ def singlecell(data_set='singlecell'):
                                 'genes': genes, 'labels':labels,
                                 }, data_set)
 
+def singlecell_rna_seq_islam(dataset='singlecell_islam'):
+    if not data_available(dataset):
+        download_data(dataset)
+
+    from pandas import read_csv, DataFrame, concat
+    dir_path = os.path.join(data_path, dataset)
+    filename = os.path.join(dir_path, 'GSE29087_L139_expression_tab.txt.gz')
+    data = read_csv(filename, sep='\t', skiprows=6, compression='gzip', header=None)
+    header1 = read_csv(filename, sep='\t', header=None, skiprows=5, nrows=1, compression='gzip')
+    header2 = read_csv(filename, sep='\t', header=None, skiprows=3, nrows=1, compression='gzip')
+    data.columns = np.concatenate((header1.ix[0, :], header2.ix[0, 7:]))
+    Y = data.set_index("Feature").ix[8:, 6:-4].T.astype(float)
+
+    # read the info .soft
+    filename = os.path.join(dir_path, 'GSE29087_family.soft.gz')
+    info = read_csv(filename, sep='\t', skiprows=0, compression='gzip', header=None)
+    # split at ' = '
+    info = DataFrame(info.ix[:,0].str.split(' = ').tolist())
+    # only take samples:
+    info = info[info[0].str.contains("!Sample")]
+    info[0] = info[0].apply(lambda row: row[len("!Sample_"):])
+
+    groups = info.groupby(0).groups
+    # remove 'GGG' from barcodes
+    barcode = info[1][groups['barcode']].apply(lambda row: row[:-3])
+
+    title = info[1][groups['title']]
+    title.index = barcode
+    title.name = 'title'
+    geo_accession = info[1][groups['geo_accession']]
+    geo_accession.index = barcode
+    geo_accession.name = 'geo_accession'
+    case_id = info[1][groups['source_name_ch1']]
+    case_id.index = barcode
+    case_id.name = 'source_name_ch1'
+
+    info = concat([title, geo_accession, case_id], axis=1)
+    labels = info.join(Y).source_name_ch1[:-4]
+    labels[labels=='Embryonic stem cell'] = "ES"
+    labels[labels=='Embryonic fibroblast'] = "MEF"
+
+    return data_details_return({'Y': Y,
+                                'info': '92 single cells (48 mouse ES cells, 44 mouse embryonic fibroblasts and 4 negative controls) were analyzed by single-cell tagged reverse transcription (STRT)',
+                                'genes': Y.columns,
+                                'labels': labels,
+                                'datadf': data,
+                                'infodf': info}, dataset)
+
+def singlecell_rna_seq_deng(dataset='singlecell_deng'):
+    if not data_available(dataset):
+        download_data(dataset)
+
+    from pandas import read_csv
+    dir_path = os.path.join(data_path, dataset)
+
+    # read the info .soft
+    filename = os.path.join(dir_path, 'GSE45719_series_matrix.txt.gz')
+    info = read_csv(filename, sep='\t', skiprows=0, compression='gzip', header=None, nrows=29, index_col=0)
+    summary = info.loc['!Series_summary'][1]
+    design = info.loc['!Series_overall_design']
+
+    # only take samples:
+    sample_info = read_csv(filename, sep='\t', skiprows=30, compression='gzip', header=0, index_col=0).T
+    sample_info.columns = sample_info.columns.to_series().apply(lambda row: row[len("!Sample_"):])
+    sample_info.columns.name = sample_info.columns.name[len("!Sample_"):]
+    sample_info = sample_info[['geo_accession', 'characteristics_ch1',  'description']]
+    sample_info = sample_info.ix[:, np.r_[0:3, 5:sample_info.shape[1]]]
+    c = sample_info.columns.to_series()
+    c[1:4] = ['strain', 'cross', 'developmental_stage']
+    sample_info.columns = c
+
+    # Extract the tar file
+    filename = os.path.join(dir_path, 'GSE45719_Raw.tar')
+    with tarfile.open(filename, 'r') as files:
+        data = None
+        gene_info = None
+        message = ''
+        members = files.getmembers()
+        overall = len(members)
+        for i, file_info in enumerate(members):
+            f = files.extractfile(file_info)
+            inner = read_csv(f, sep='\t', header=0, compression='gzip', index_col=0)
+            sys.stdout.write(' '*(len(message)+1) + '\r')
+            sys.stdout.flush()
+            message = "{: >7.2%}: Extracting: {}".format(float(i+1)/overall, file_info.name[:20]+"...txt.gz")
+            sys.stdout.write(message)
+            if data is None:
+                data = inner.RPKM.to_frame()
+                data.columns = [file_info.name[:-18]]
+                gene_info = inner.Refseq_IDs.to_frame()
+                gene_info.columns = [file_info.name[:-18]]
+            else:
+                data[file_info.name[:-18]] = inner.RPKM
+                gene_info[file_info.name[:-18]] = inner.Refseq_IDs
+
+    # Strip GSM number off data index
+    rep = re.compile('GSM\d+_')
+    data.columns = data.columns.to_series().apply(lambda row: row[rep.match(row).end():])
+    data = data.T
+
+    # make sure the same index gets used
+    sample_info.index = data.index
+
+    # get the labels from the description
+    rep = re.compile('fibroblast|\d+-cell|embryo|liver|blastocyst|blastomere|zygote', re.IGNORECASE)
+    labels = sample_info.developmental_stage.apply(lambda row: " ".join(rep.findall(row)))
+
+    sys.stdout.write(' '*len(message) + '\r')
+    sys.stdout.flush()
+    print "Read Archive {}".format(files.name)
+
+    return data_details_return({'Y': data,
+                                'series_info': info,
+                                'sample_info': sample_info,
+                                'gene_info': gene_info,
+                                'summary': summary,
+                                'design': design,
+                                'genes': data.columns,
+                                'labels': labels,
+                                }, dataset)
+
+
 def swiss_roll_1000():
     return swiss_roll(num_samples=1000)
 
diff --git a/GPy/util/misc.py b/GPy/util/misc.py
index ac5a2e38..bf37159d 100644
--- a/GPy/util/misc.py
+++ b/GPy/util/misc.py
@@ -90,6 +90,8 @@ Convert an arbitrary number of parameters to :class:ndarray class objects. This
 converting parameter objects to numpy arrays, when using scipy.weave.inline routine.
 In scipy.weave.blitz there is no automatic array detection (even when the array inherits
 from :class:ndarray)"""
+    import warnings
+    warnings.warn("Please use param.values, as this function will be deprecated in the next release.", DeprecationWarning)
     assert len(param) > 0, "At least one parameter needed"
     if len(param) == 1:
         return param[0].view(np.ndarray)

From 919be3ceba70fda1874761a1cfbf68879f4d9980 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Mon, 6 Oct 2014 11:00:34 +0100
Subject: [PATCH 10/19] [vardtc missing data] updated to new psi2 stuff

---
 GPy/inference/latent_function_inference/var_dtc.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py
index 64ee30c4..70637e3b 100644
--- a/GPy/inference/latent_function_inference/var_dtc.py
+++ b/GPy/inference/latent_function_inference/var_dtc.py
@@ -246,12 +246,10 @@ class VarDTCMissingData(LatentFunctionInference):
             uncertain_inputs = True
             psi0_all = kern.psi0(Z, X)
             psi1_all = kern.psi1(Z, X)
-            psi2_all = kern.psi2(Z, X)
         else:
             uncertain_inputs = False
             psi0_all = kern.Kdiag(X)
             psi1_all = kern.K(X, Z)
-            psi2_all = None
 
         Ys, traces = self._Y(Y)
         beta_all = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6)
@@ -262,7 +260,7 @@ class VarDTCMissingData(LatentFunctionInference):
         dL_dpsi0_all = np.zeros(Y.shape[0])
         dL_dpsi1_all = np.zeros((Y.shape[0], num_inducing))
         if uncertain_inputs:
-            dL_dpsi2_all = np.zeros((Y.shape[0], num_inducing, num_inducing))
+            dL_dpsi2_all = np.zeros((num_inducing, num_inducing))
 
         dL_dR = 0
         woodbury_vector = np.zeros((num_inducing, Y.shape[1]))
@@ -278,6 +276,7 @@ class VarDTCMissingData(LatentFunctionInference):
 
         size = Y.shape[1]
         next_ten = 0
+
         for i, [y, v, trYYT] in enumerate(itertools.izip(Ys, self._inan.T, traces)):
             if ((i+1.)/size) >= next_ten:
                 logger.info('inference {:> 6.1%}'.format((i+1.)/size))
@@ -290,13 +289,13 @@ class VarDTCMissingData(LatentFunctionInference):
 
             psi0 = psi0_all[v]
             psi1 = psi1_all[v, :]
-            if uncertain_inputs: psi2 = psi2_all[v, :]
+            if uncertain_inputs: psi2 = kern.psi2(Z, X[v, :])
             else: psi2 = None
             num_data = psi1.shape[0]
 
             if uncertain_inputs:
                 if het_noise: psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0)
-                else: psi2_beta = psi2.sum(0) * beta
+                else: psi2_beta = psi2 * beta
                 A = LmInv.dot(psi2_beta.dot(LmInv.T))
             else:
                 if het_noise: tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
@@ -331,7 +330,7 @@ class VarDTCMissingData(LatentFunctionInference):
             dL_dpsi0_all[v] += dL_dpsi0
             dL_dpsi1_all[v, :] += dL_dpsi1
             if uncertain_inputs:
-                dL_dpsi2_all[v, :] += dL_dpsi2
+                dL_dpsi2_all += dL_dpsi2
 
             # log marginal likelihood
             log_marginal += _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise,

From d93fac8c13282b2d197ffdd564bfe63d7c0b5e78 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Mon, 6 Oct 2014 11:00:57 +0100
Subject: [PATCH 11/19] [datasets] deng et al. single cell experiment prints
 for ipython notebook

---
 GPy/util/datasets.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py
index d1250ae4..0f63a353 100644
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@@ -986,6 +986,7 @@ def singlecell_rna_seq_deng(dataset='singlecell_deng'):
     # Extract the tar file
     filename = os.path.join(dir_path, 'GSE45719_Raw.tar')
     with tarfile.open(filename, 'r') as files:
+        print "Extracting Archive {}...".format(files.name)
         data = None
         gene_info = None
         message = ''
@@ -994,10 +995,9 @@ def singlecell_rna_seq_deng(dataset='singlecell_deng'):
         for i, file_info in enumerate(members):
             f = files.extractfile(file_info)
             inner = read_csv(f, sep='\t', header=0, compression='gzip', index_col=0)
-            sys.stdout.write(' '*(len(message)+1) + '\r')
-            sys.stdout.flush()
+            print ' '*(len(message)+1) + '\r',
             message = "{: >7.2%}: Extracting: {}".format(float(i+1)/overall, file_info.name[:20]+"...txt.gz")
-            sys.stdout.write(message)
+            print message,
             if data is None:
                 data = inner.RPKM.to_frame()
                 data.columns = [file_info.name[:-18]]
@@ -1021,6 +1021,7 @@ def singlecell_rna_seq_deng(dataset='singlecell_deng'):
 
     sys.stdout.write(' '*len(message) + '\r')
     sys.stdout.flush()
+    print
     print "Read Archive {}".format(files.name)
 
     return data_details_return({'Y': data,

From 3e07b207a9d7a02759ac8a72c3bca5099bc97ba7 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 6 Oct 2014 11:49:02 +0100
Subject: [PATCH 12/19] whitespace

---
 GPy/core/gp.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index 6aa1a4b9..0e93cd99 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -92,7 +92,7 @@ class GP(Model):
         logger.info("adding kernel and likelihood as parameters")
         self.link_parameter(self.kern)
         self.link_parameter(self.likelihood)
-        
+
     def set_X(self,X):
         # TODO: it does not work with BGPLVM
         if isinstance(X, ObsAr):
@@ -107,7 +107,7 @@ class GP(Model):
             self.Y = Y
         else:
             self.Y = ObsAr(Y)
-            self.Y_normalized = self.Y        
+            self.Y_normalized = self.Y
 
     def parameters_changed(self):
         self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.Y_metadata)

From fd4404a11a598d009b07118b1d8a528326393b31 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 7 Oct 2014 16:20:16 +0100
Subject: [PATCH 13/19] minor bugfix on windows (thanks NC)

---
 GPy/util/linalg.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py
index ed8e7ba2..cef699cf 100644
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@@ -528,7 +528,7 @@ def symmetrify(A, upper=False):
             symmetrify_weave(A, upper)
         except:
             print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
-            config.set('weave', 'working', False)
+            config.set('weave', 'working', 'False')
             symmetrify_numpy(A, upper)
     else:
         symmetrify_numpy(A, upper)

From fa7807ee6fb0359f5558e08cb7bffc31fd1da8f7 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 8 Oct 2014 12:03:51 +0100
Subject: [PATCH 14/19] [missing data] general implementation for subsetting
 data

---
 GPy/core/sparse_gp.py                         | 223 +++++++++++++++---
 GPy/core/sparse_gp_mpi.py                     |  14 +-
 GPy/examples/dimensionality_reduction.py      |   6 +-
 .../latent_function_inference/var_dtc.py      |  42 ++--
 GPy/kern/_src/psi_comp/rbf_psi_comp.py        |  29 ++-
 GPy/models/bayesian_gplvm.py                  |  64 +++--
 GPy/testing/model_tests.py                    |  59 +++--
 7 files changed, 329 insertions(+), 108 deletions(-)

diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py
index 6b923609..ecc1e4ba 100644
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@@ -9,6 +9,7 @@ from .. import likelihoods
 from parameterization.variational import VariationalPosterior
 
 import logging
+from GPy.inference.latent_function_inference.posterior import Posterior
 logger = logging.getLogger("sparse gp")
 
 class SparseGP(GP):
@@ -34,12 +35,18 @@ class SparseGP(GP):
 
     """
 
-    def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, name='sparse gp', Y_metadata=None, normalizer=False):
+    def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None,
+                 name='sparse gp', Y_metadata=None, normalizer=False,
+                 missing_data=False):
+
+        self.missing_data = missing_data
+        if self.missing_data:
+            self.ninan = ~np.isnan(Y)
 
         #pick a sensible inference method
         if inference_method is None:
             if isinstance(likelihood, likelihoods.Gaussian):
-                inference_method = var_dtc.VarDTC()
+                inference_method = var_dtc.VarDTC(limit=1 if not self.missing_data else Y.shape[1])
             else:
                 #inference_method = ??
                 raise NotImplementedError, "what to do what to do?"
@@ -51,44 +58,200 @@ class SparseGP(GP):
         GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
         logger.info("Adding Z as parameter")
         self.link_parameter(self.Z, index=0)
+        if self.missing_data:
+            self.Ylist = []
+            overall = self.Y_normalized.shape[1]
+            m_f = lambda i: "Precomputing Y for missing data: {: >7.2%}".format(float(i+1)/overall)
+            message = m_f(-1)
+            print message,
+            for d in xrange(overall):
+                self.Ylist.append(self.Y_normalized[self.ninan[:, d], d][:, None])
+                print ' '*(len(message)+1) + '\r',
+                message = m_f(d)
+                print message,
+            print ''
+
 
     def has_uncertain_inputs(self):
         return isinstance(self.X, VariationalPosterior)
 
-    def parameters_changed(self):
-        self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
-        self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
-        if isinstance(self.X, VariationalPosterior):
+    def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None):
+        """
+        This is the standard part, which usually belongs in parameters_changed.
+
+        For automatic handling of subsampling (such as missing_data, stochastics etc.), we need to put this into an inner
+        loop, in order to ensure a different handling of gradients etc of different
+        subsets of data.
+
+        The dict in current_values will be passed aroung as current_values for
+        the rest of the algorithm, so this is the place to store current values,
+        such as subsets etc, if necessary.
+
+        If Lm and dL_dKmm can be precomputed (or only need to be computed once)
+        pass them in here, so they will be passed to the inference_method.
+
+        subset_indices is a dictionary of indices. you can put the indices however you
+        like them into this dictionary for inner use of the indices inside the
+        algorithm.
+        """
+        posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=None)
+        current_values = {}
+        likelihood.update_gradients(grad_dict['dL_dthetaL'])
+        current_values['likgrad'] = likelihood.gradient.copy()
+        if subset_indices is None:
+            subset_indices = {}
+        if isinstance(X, VariationalPosterior):
             #gradients wrt kernel
-            dL_dKmm = self.grad_dict['dL_dKmm']
-            self.kern.update_gradients_full(dL_dKmm, self.Z, None)
-            target = self.kern.gradient.copy()
-            self.kern.update_gradients_expectations(variational_posterior=self.X,
-                                                    Z=self.Z,
-                                                    dL_dpsi0=self.grad_dict['dL_dpsi0'],
-                                                    dL_dpsi1=self.grad_dict['dL_dpsi1'],
-                                                    dL_dpsi2=self.grad_dict['dL_dpsi2'])
-            self.kern.gradient += target
+            dL_dKmm = grad_dict['dL_dKmm']
+            kern.update_gradients_full(dL_dKmm, Z, None)
+            current_values['kerngrad'] = kern.gradient.copy()
+            kern.update_gradients_expectations(variational_posterior=X,
+                                                    Z=Z,
+                                                    dL_dpsi0=grad_dict['dL_dpsi0'],
+                                                    dL_dpsi1=grad_dict['dL_dpsi1'],
+                                                    dL_dpsi2=grad_dict['dL_dpsi2'])
+            current_values['kerngrad'] += kern.gradient
 
             #gradients wrt Z
-            self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
-            self.Z.gradient += self.kern.gradients_Z_expectations(
-                               self.grad_dict['dL_dpsi0'],
-                               self.grad_dict['dL_dpsi1'],
-                               self.grad_dict['dL_dpsi2'],
-                               Z=self.Z,
-                               variational_posterior=self.X)
+            current_values['Zgrad'] = kern.gradients_X(dL_dKmm, Z)
+            current_values['Zgrad'] += kern.gradients_Z_expectations(
+                               grad_dict['dL_dpsi0'],
+                               grad_dict['dL_dpsi1'],
+                               grad_dict['dL_dpsi2'],
+                               Z=Z,
+                               variational_posterior=X)
         else:
             #gradients wrt kernel
-            self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X)
-            target = self.kern.gradient.copy()
-            self.kern.update_gradients_full(self.grad_dict['dL_dKnm'], self.X, self.Z)
-            target += self.kern.gradient
-            self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z, None)
-            self.kern.gradient += target
+            kern.update_gradients_diag(grad_dict['dL_dKdiag'], X)
+            current_values['kerngrad'] = kern.gradient.copy()
+            kern.update_gradients_full(grad_dict['dL_dKnm'], X, Z)
+            current_values['kerngrad'] += kern.gradient
+            kern.update_gradients_full(grad_dict['dL_dKmm'], Z, None)
+            current_values['kerngrad'] += kern.gradient
             #gradients wrt Z
-            self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
-            self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
+            current_values['Zgrad'] = kern.gradients_X(grad_dict['dL_dKmm'], Z)
+            current_values['Zgrad'] += kern.gradients_X(grad_dict['dL_dKnm'].T, Z, X)
+        return posterior, log_marginal_likelihood, grad_dict, current_values, subset_indices
+
+    def _inner_take_over_or_update(self, full_values=None, current_values=None, value_indices=None):
+        """
+        This is for automatic updates of values in the inner loop of missing
+        data handling. Both arguments are dictionaries and the values in
+        full_values will be updated by the current_gradients.
+
+        If a key from current_values does not exist in full_values, it will be
+        initialized to the value in current_values.
+
+        If there is indices needed for the update, value_indices can be used for
+        that. If value_indices has the same key, as current_values, the update
+        in full_values will be indexed by the indices in value_indices.
+
+        grads:
+            dictionary of standing gradients (you will have to carefully make sure, that
+            the ordering is right!). The values in here will be updated such that
+            full_values[key] += current_values[key]  forall key in full_gradients.keys()
+
+        gradients:
+            dictionary of gradients in the current set of parameters.
+
+        value_indices:
+            dictionary holding indices for the update in full_values.
+            if the key exists the update rule is:
+            full_values[key][value_indices[key]] += current_values[key]
+        """
+        for key in current_values.keys():
+            if value_indices is not None and value_indices.has_key(key):
+                index = value_indices[key]
+            else:
+                index = slice(None)
+            if full_values.has_key(key):
+                full_values[key][index] += current_values[key]
+            else:
+                full_values[key] = current_values[key]
+
+    def _inner_values_update(self, current_values):
+        """
+        This exists if there is more to do with the current values.
+        It will be called allways in the inner loop, so that
+        you can do additional inner updates for the inside of the missing data
+        loop etc. This can also be used for stochastic updates, when only working on
+        one dimension of the output.
+        """
+        pass
+
+    def _outer_values_update(self, full_values):
+        """
+        Here you put the values, which were collected before in the right places.
+        E.g. set the gradients of parameters, etc.
+        """
+        self.likelihood.gradient = full_values['likgrad']
+        self.kern.gradient = full_values['kerngrad']
+        self.Z.gradient = full_values['Zgrad']
+
+    def _outer_init_full_values(self):
+        """
+        If full_values has indices in values_indices, we might want to initialize
+        the full_values differently, so that subsetting is possible.
+
+        Here you can initialize the full_values for the values needed.
+
+        Keep in mind, that if a key does not exist in full_values when updating
+        values, it will be set (so e.g. for Z there is no need to initialize Zgrad,
+        as there is no subsetting needed. For X in BGPLVM on the other hand we probably need
+        to initialize the gradients for the mean and the variance in order to
+        have the full gradient for indexing)
+        """
+        return {}
+
+    def _outer_loop_for_missing_data(self):
+        Lm = None
+        dL_dKmm = None
+        Kmm = None
+
+        self._log_marginal_likelihood = 0
+        full_values = self._outer_init_full_values()
+
+        woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim))
+        woodbury_vector = np.zeros((self.num_inducing, self.output_dim))
+        m_f = lambda i: "Inference with missing data: {: >7.2%}".format(float(i+1)/self.output_dim)
+        message = m_f(-1)
+        print message,
+        for d in xrange(self.output_dim):
+            ninan = self.ninan[:, d]
+            print ' '*(len(message)) + '\r',
+            message = m_f(d)
+            print message,
+
+            posterior, log_marginal_likelihood, \
+                grad_dict, current_values, value_indices = self._inner_parameters_changed(
+                                self.kern, self.X[ninan],
+                                self.Z, self.likelihood,
+                                self.Ylist[d], self.Y_metadata,
+                                Lm, dL_dKmm,
+                                subset_indices=dict(outputs=d, samples=ninan))
+
+            self._inner_take_over_or_update(full_values, current_values, value_indices)
+            self._inner_values_update(current_values)
+
+            Lm = posterior.K_chol
+            dL_dKmm = grad_dict['dL_dKmm']
+            Kmm = posterior._K
+            woodbury_inv[:, :, d] = posterior.woodbury_inv
+            woodbury_vector[:, d:d+1] = posterior.woodbury_vector
+            self._log_marginal_likelihood += log_marginal_likelihood
+        print ''
+
+        self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,
+                                   K=Kmm, mean=None, cov=None, K_chol=Lm)
+        self._outer_values_update(full_values)
+
+    def parameters_changed(self):
+        if self.missing_data:
+            self._outer_loop_for_missing_data()
+        else:
+            self.posterior, self._log_marginal_likelihood, self.grad_dict, gradients, _ = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
+            self.kern.gradient = gradients['kerngrad']
+            self.Z.gradient = gradients['Zgrad']
 
     def _raw_predict(self, Xnew, full_cov=False, kern=None):
         """
diff --git a/GPy/core/sparse_gp_mpi.py b/GPy/core/sparse_gp_mpi.py
index e7faf7a8..d0008e58 100644
--- a/GPy/core/sparse_gp_mpi.py
+++ b/GPy/core/sparse_gp_mpi.py
@@ -34,15 +34,15 @@ class SparseGP_MPI(SparseGP):
 
     """
 
-    def __init__(self, X, Y, Z, kernel, likelihood, variational_prior=None, inference_method=None, name='sparse gp mpi', Y_metadata=None, mpi_comm=None, normalizer=False):
+    def __init__(self, X, Y, Z, kernel, likelihood, variational_prior=None, inference_method=None, name='sparse gp mpi', Y_metadata=None, mpi_comm=None, normalizer=False, missing_data=False):
         self._IN_OPTIMIZATION_ = False
         if mpi_comm != None:
             if inference_method is None:
                 inference_method = VarDTC_minibatch(mpi_comm=mpi_comm)
             else:
                 assert isinstance(inference_method, VarDTC_minibatch), 'inference_method has to support MPI!'
-                        
-        super(SparseGP_MPI, self).__init__(X, Y, Z, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
+
+        super(SparseGP_MPI, self).__init__(X, Y, Z, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer, missing_data=missing_data)
         self.update_model(False)
         self.link_parameter(self.X, index=0)
         if variational_prior is not None:
@@ -75,18 +75,18 @@ class SparseGP_MPI(SparseGP):
         return dc
 
     #=====================================================
-    # The MPI parallelization 
+    # The MPI parallelization
     #     - can move to model at some point
     #=====================================================
-    
+
     @SparseGP.optimizer_array.setter
     def optimizer_array(self, p):
         if self.mpi_comm != None:
             if self._IN_OPTIMIZATION_ and self.mpi_comm.rank==0:
                 self.mpi_comm.Bcast(np.int32(1),root=0)
-            self.mpi_comm.Bcast(p, root=0)        
+            self.mpi_comm.Bcast(p, root=0)
         SparseGP.optimizer_array.fset(self,p)
-        
+
     def optimize(self, optimizer=None, start=None, **kwargs):
         self._IN_OPTIMIZATION_ = True
         if self.mpi_comm==None:
diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index f79f4e6f..bf99dd66 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -367,14 +367,14 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
     Y = Ylist[0]
     k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
 
-    inan = _np.random.binomial(1, .8, size=Y.shape).astype(bool) # 80% missing data
+    inan = _np.random.binomial(1, .2, size=Y.shape).astype(bool) # 80% missing data
     Ymissing = Y.copy()
     Ymissing[inan] = _np.nan
 
     m = BayesianGPLVM(Ymissing, Q, init="random", num_inducing=num_inducing,
-                      inference_method=VarDTCMissingData(inan=inan), kernel=k)
+                      kernel=k, missing_data=True)
 
-    m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape)
+    m.X.variance[:] = _np.random.uniform(0,.1,m.X.shape)
     m.likelihood.variance = .01
     m.parameters_changed()
     m.Yreal = Y
diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py
index 70637e3b..b10ec832 100644
--- a/GPy/inference/latent_function_inference/var_dtc.py
+++ b/GPy/inference/latent_function_inference/var_dtc.py
@@ -62,7 +62,9 @@ class VarDTC(LatentFunctionInference):
     def get_VVTfactor(self, Y, prec):
         return Y * prec # TODO chache this, and make it effective
 
-    def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
+
+
+    def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None, dL_dKmm=None):
 
         _, output_dim = Y.shape
         uncertain_inputs = isinstance(X, VariationalPosterior)
@@ -84,8 +86,8 @@ class VarDTC(LatentFunctionInference):
 
         Kmm = kern.K(Z).copy()
         diag.add(Kmm, self.const_jitter)
-        Lm = jitchol(Kmm)
-
+        if Lm is None:
+            Lm = jitchol(Kmm)
 
         # The rather complex computations of A, and the psi stats
         if uncertain_inputs:
@@ -100,7 +102,6 @@ class VarDTC(LatentFunctionInference):
         else:
             psi0 = kern.Kdiag(X)
             psi1 = kern.K(X, Z)
-            psi2 = None
             if het_noise:
                 tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
             else:
@@ -122,11 +123,12 @@ class VarDTC(LatentFunctionInference):
         delit = tdot(_LBi_Lmi_psi1Vf)
         data_fit = np.trace(delit)
         DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit)
-        delit = -0.5 * DBi_plus_BiPBi
-        delit += -0.5 * B * output_dim
-        delit += output_dim * np.eye(num_inducing)
-        # Compute dL_dKmm
-        dL_dKmm = backsub_both_sides(Lm, delit)
+        if dL_dKmm is None:
+            delit = -0.5 * DBi_plus_BiPBi
+            delit += -0.5 * B * output_dim
+            delit += output_dim * np.eye(num_inducing)
+            # Compute dL_dKmm
+            dL_dKmm = backsub_both_sides(Lm, delit)
 
         # derivatives of L w.r.t. psi
         dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm,
@@ -208,6 +210,7 @@ class VarDTCMissingData(LatentFunctionInference):
             inan = np.isnan(Y)
             has_none = inan.any()
             self._inan = ~inan
+            inan = self._inan
         else:
             inan = self._inan
             has_none = True
@@ -241,7 +244,7 @@ class VarDTCMissingData(LatentFunctionInference):
             self._subarray_indices = [[slice(None),slice(None)]]
             return [Y], [(Y**2).sum()]
 
-    def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
+    def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None):
         if isinstance(X, VariationalPosterior):
             uncertain_inputs = True
             psi0_all = kern.psi0(Z, X)
@@ -260,7 +263,7 @@ class VarDTCMissingData(LatentFunctionInference):
         dL_dpsi0_all = np.zeros(Y.shape[0])
         dL_dpsi1_all = np.zeros((Y.shape[0], num_inducing))
         if uncertain_inputs:
-            dL_dpsi2_all = np.zeros((num_inducing, num_inducing))
+            dL_dpsi2_all = np.zeros((Y.shape[0], num_inducing, num_inducing))
 
         dL_dR = 0
         woodbury_vector = np.zeros((num_inducing, Y.shape[1]))
@@ -271,31 +274,31 @@ class VarDTCMissingData(LatentFunctionInference):
         Kmm = kern.K(Z).copy()
         diag.add(Kmm, self.const_jitter)
         #factor Kmm
-        Lm = jitchol(Kmm)
+        if Lm is None:
+            Lm = jitchol(Kmm)
         if uncertain_inputs: LmInv = dtrtri(Lm)
 
-        size = Y.shape[1]
+        size = len(Ys)
         next_ten = 0
-
         for i, [y, v, trYYT] in enumerate(itertools.izip(Ys, self._inan.T, traces)):
             if ((i+1.)/size) >= next_ten:
                 logger.info('inference {:> 6.1%}'.format((i+1.)/size))
                 next_ten += .1
             if het_noise: beta = beta_all[i]
             else: beta = beta_all
-
             VVT_factor = (y*beta)
             output_dim = 1#len(ind)
 
             psi0 = psi0_all[v]
             psi1 = psi1_all[v, :]
-            if uncertain_inputs: psi2 = kern.psi2(Z, X[v, :])
+            if uncertain_inputs:
+                psi2 = kern.psi2(Z, X[v, :])
             else: psi2 = None
             num_data = psi1.shape[0]
 
             if uncertain_inputs:
                 if het_noise: psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0)
-                else: psi2_beta = psi2 * beta
+                else: psi2_beta = psi2 * (beta)
                 A = LmInv.dot(psi2_beta.dot(LmInv.T))
             else:
                 if het_noise: tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
@@ -330,11 +333,11 @@ class VarDTCMissingData(LatentFunctionInference):
             dL_dpsi0_all[v] += dL_dpsi0
             dL_dpsi1_all[v, :] += dL_dpsi1
             if uncertain_inputs:
-                dL_dpsi2_all += dL_dpsi2
+                dL_dpsi2_all[v, :, :] += dL_dpsi2
 
             # log marginal likelihood
             log_marginal += _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise,
-                psi0, A, LB, trYYT, data_fit,VVT_factor)
+                psi0, A, LB, trYYT, data_fit, VVT_factor)
 
             #put the gradients in the right places
             dL_dR += _compute_dL_dR(likelihood,
@@ -393,7 +396,6 @@ def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, C
             # subsume back into psi1 (==Kmn)
             dL_dpsi1 += 2.*np.dot(psi1, dL_dpsi2)
             dL_dpsi2 = None
-
     return dL_dpsi0, dL_dpsi1, dL_dpsi2
 
 
diff --git a/GPy/kern/_src/psi_comp/rbf_psi_comp.py b/GPy/kern/_src/psi_comp/rbf_psi_comp.py
index 93399ea7..d2d3312d 100644
--- a/GPy/kern/_src/psi_comp/rbf_psi_comp.py
+++ b/GPy/kern/_src/psi_comp/rbf_psi_comp.py
@@ -17,7 +17,7 @@ def psicomputations(variance, lengthscale, Z, variational_posterior):
     # _psi1                NxM
     mu = variational_posterior.mean
     S = variational_posterior.variance
-    
+
     psi0 = np.empty(mu.shape[0])
     psi0[:] = variance
     psi1 = _psi1computations(variance, lengthscale, Z, mu, S)
@@ -41,7 +41,7 @@ def __psi1computations(variance, lengthscale, Z, mu, S):
     _psi1_logdenom = np.log(S/lengthscale2+1.).sum(axis=-1) # N
     _psi1_log = (_psi1_logdenom[:,None]+np.einsum('nmq,nq->nm',np.square(mu[:,None,:]-Z[None,:,:]),1./(S+lengthscale2)))/(-2.)
     _psi1 = variance*np.exp(_psi1_log)
-        
+
     return _psi1
 
 def __psi2computations(variance, lengthscale, Z, mu, S):
@@ -54,27 +54,27 @@ def __psi2computations(variance, lengthscale, Z, mu, S):
     # here are the "statistics" for psi2
     # Produced intermediate results:
     # _psi2                MxM
-    
+
     lengthscale2 = np.square(lengthscale)
-    
+
     _psi2_logdenom = np.log(2.*S/lengthscale2+1.).sum(axis=-1)/(-2.) # N
     _psi2_exp1 = (np.square(Z[:,None,:]-Z[None,:,:])/lengthscale2).sum(axis=-1)/(-4.) #MxM
     Z_hat = (Z[:,None,:]+Z[None,:,:])/2. #MxMxQ
     denom = 1./(2.*S+lengthscale2)
     _psi2_exp2 = -(np.square(mu)*denom).sum(axis=-1)[:,None,None]+2.*np.einsum('nq,moq,nq->nmo',mu,Z_hat,denom)-np.einsum('moq,nq->nmo',np.square(Z_hat),denom)
     _psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2)
-    
+
 
     return _psi2
 
 def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
     ARD = (len(lengthscale)!=1)
-    
+
     dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
     dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
 
     dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
-    
+
     dL_dlengscale = dl_psi1 + dl_psi2
     if not ARD:
         dL_dlengscale = dL_dlengscale.sum()
@@ -82,7 +82,7 @@ def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscal
     dL_dmu = dmu_psi1 + dmu_psi2
     dL_dS = dS_psi1 + dS_psi2
     dL_dZ = dZ_psi1 + dZ_psi2
-    
+
     return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS
 
 def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S):
@@ -101,9 +101,9 @@ def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S):
     # _dL_dgamma        NxQ
     # _dL_dmu           NxQ
     # _dL_dS            NxQ
-    
+
     lengthscale2 = np.square(lengthscale)
-    
+
     _psi1 = _psi1computations(variance, lengthscale, Z, mu, S)
     Lpsi1 = dL_dpsi1*_psi1
     Zmu = Z[None,:,:]-mu[:,None,:] # NxMxQ
@@ -114,7 +114,7 @@ def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S):
     _dL_dS = np.einsum('nm,nmq,nq->nq',Lpsi1,(Zmu2_denom-1.),denom)/2.
     _dL_dZ = -np.einsum('nm,nmq,nq->mq',Lpsi1,Zmu,denom)
     _dL_dl = np.einsum('nm,nmq,nq->q',Lpsi1,(Zmu2_denom+(S/lengthscale2)[:,None,:]),denom*lengthscale)
-    
+
     return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
 
 def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
@@ -133,13 +133,12 @@ def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
     # _dL_dgamma         NxQ
     # _dL_dmu            NxQ
     # _dL_dS             NxQ
-    
+
     lengthscale2 = np.square(lengthscale)
     denom = 1./(2*S+lengthscale2)
     denom2 = np.square(denom)
 
     _psi2 = _psi2computations(variance, lengthscale, Z, mu, S) # NxMxM
-    
     Lpsi2 = dL_dpsi2[None,:,:]*_psi2
     Lpsi2sum = np.einsum('nmo->n',Lpsi2) #N
     Lpsi2Z = np.einsum('nmo,oq->nq',Lpsi2,Z) #NxQ
@@ -147,7 +146,7 @@ def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
     Lpsi2Z2p = np.einsum('nmo,mq,oq->nq',Lpsi2,Z,Z) #NxQ
     Lpsi2Zhat = Lpsi2Z
     Lpsi2Zhat2 = (Lpsi2Z2+Lpsi2Z2p)/2
-    
+
     _dL_dvar = Lpsi2sum.sum()*2/variance
     _dL_dmu = (-2*denom) * (mu*Lpsi2sum[:,None]-Lpsi2Zhat)
     _dL_dS = (2*np.square(denom))*(np.square(mu)*Lpsi2sum[:,None]-2*mu*Lpsi2Zhat+Lpsi2Zhat2) - denom*Lpsi2sum[:,None]
@@ -157,6 +156,6 @@ def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
                              (2*mu*denom2)*Lpsi2Zhat+denom2*Lpsi2Zhat2).sum(axis=0)
 
     return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
-    
+
 _psi1computations = Cacher(__psi1computations, limit=1)
 _psi2computations = Cacher(__psi2computations, limit=1)
diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py
index 5c5e7b5c..54c11fea 100644
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/models/bayesian_gplvm.py
@@ -25,7 +25,9 @@ class BayesianGPLVM(SparseGP_MPI):
 
     """
     def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
-                 Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', mpi_comm=None, normalizer=None):
+                 Z=None, kernel=None, inference_method=None, likelihood=None,
+                 name='bayesian gplvm', mpi_comm=None, normalizer=None,
+                 missing_data=False):
         self.mpi_comm = mpi_comm
         self.__IN_OPTIMIZATION__ = False
 
@@ -59,24 +61,23 @@ class BayesianGPLVM(SparseGP_MPI):
         X = NormalPosterior(X, X_variance)
 
         if inference_method is None:
-            inan = np.isnan(Y)
-            if np.any(inan):
-                from ..inference.latent_function_inference.var_dtc import VarDTCMissingData
-                self.logger.debug("creating inference_method with var_dtc missing data")
-                inference_method = VarDTCMissingData(inan=inan)
-            elif mpi_comm is not None:
+            if mpi_comm is not None:
                 inference_method = VarDTC_minibatch(mpi_comm=mpi_comm)
             else:
                 from ..inference.latent_function_inference.var_dtc import VarDTC
                 self.logger.debug("creating inference_method var_dtc")
-                inference_method = VarDTC()
+                inference_method = VarDTC(limit=1 if not missing_data else Y.shape[1])
         if isinstance(inference_method,VarDTC_minibatch):
             inference_method.mpi_comm = mpi_comm
 
         if kernel.useGPU and isinstance(inference_method, VarDTC_GPU):
             kernel.psicomp.GPU_direct = True
 
-        super(BayesianGPLVM,self).__init__(X, Y, Z, kernel, likelihood=likelihood, name=name, inference_method=inference_method, normalizer=normalizer, mpi_comm=mpi_comm, variational_prior=self.variational_prior)
+        super(BayesianGPLVM,self).__init__(X, Y, Z, kernel, likelihood=likelihood,
+                                           name=name, inference_method=inference_method,
+                                           normalizer=normalizer, mpi_comm=mpi_comm,
+                                           variational_prior=self.variational_prior,
+                                           missing_data=missing_data)
 
     def set_X_gradients(self, X, X_grad):
         """Set the gradients of the posterior distribution of X in its specific form."""
@@ -86,15 +87,48 @@ class BayesianGPLVM(SparseGP_MPI):
         """Get the gradients of the posterior distribution of X in its specific form."""
         return X.mean.gradient, X.variance.gradient
 
+    def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None):
+        posterior, log_marginal_likelihood, grad_dict, current_values, value_indices = super(BayesianGPLVM, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, subset_indices=subset_indices)
+
+        log_marginal_likelihood -= self.variational_prior.KL_divergence(X)
+
+        current_values['meangrad'], current_values['vargrad'] = self.kern.gradients_qX_expectations(
+                                            variational_posterior=X,
+                                            Z=Z, dL_dpsi0=grad_dict['dL_dpsi0'],
+                                            dL_dpsi1=grad_dict['dL_dpsi1'],
+                                            dL_dpsi2=grad_dict['dL_dpsi2'])
+        X.mean.gradient[:] = 0
+        X.variance.gradient[:] = 0
+        self.variational_prior.update_gradients_KL(X)
+        current_values['meangrad'] += X.mean.gradient
+        current_values['vargrad'] += X.variance.gradient
+
+        value_indices['meangrad'] = subset_indices['samples']
+        value_indices['vargrad'] = subset_indices['samples']
+        return posterior, log_marginal_likelihood, grad_dict, current_values, value_indices
+
+    def _outer_values_update(self, full_values):
+        """
+        Here you put the values, which were collected before in the right places.
+        E.g. set the gradients of parameters, etc.
+        """
+        super(BayesianGPLVM, self)._outer_values_update(full_values)
+        self.X.mean.gradient = full_values['meangrad']
+        self.X.variance.gradient = full_values['vargrad']
+
+    def _outer_init_full_values(self):
+        return dict(meangrad=np.zeros(self.X.mean.shape),
+                    vargrad=np.zeros(self.X.variance.shape))
+
     def parameters_changed(self):
         super(BayesianGPLVM,self).parameters_changed()
         if isinstance(self.inference_method, VarDTC_minibatch):
             return
 
-        super(BayesianGPLVM, self).parameters_changed()
-        self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
+        #super(BayesianGPLVM, self).parameters_changed()
+        #self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
 
-        self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi2=self.grad_dict['dL_dpsi2'])
+        #self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi2=self.grad_dict['dL_dpsi2'])
 
         # This is testing code -------------------------
 #         i = np.random.randint(self.X.shape[0])
@@ -113,7 +147,7 @@ class BayesianGPLVM(SparseGP_MPI):
         # -----------------------------------------------
 
         # update for the KL divergence
-        self.variational_prior.update_gradients_KL(self.X)
+        #self.variational_prior.update_gradients_KL(self.X)
 
     def plot_latent(self, labels=None, which_indices=None,
                 resolution=50, ax=None, marker='o', s=40,
@@ -178,7 +212,7 @@ class BayesianGPLVM(SparseGP_MPI):
         """
         dmu_dX = np.zeros_like(Xnew)
         for i in range(self.Z.shape[0]):
-            dmu_dX += self.kern.gradients_X(self.Cpsi1Vf[i:i + 1, :], Xnew, self.Z[i:i + 1, :])
+            dmu_dX += self.kern.gradients_X(self.grad_dict['dL_dpsi1'][i:i + 1, :], Xnew, self.Z[i:i + 1, :])
         return dmu_dX
 
     def dmu_dXnew(self, Xnew):
@@ -189,7 +223,7 @@ class BayesianGPLVM(SparseGP_MPI):
         ones = np.ones((1, 1))
         for i in range(self.Z.shape[0]):
             gradients_X[:, i] = self.kern.gradients_X(ones, Xnew, self.Z[i:i + 1, :]).sum(-1)
-        return np.dot(gradients_X, self.Cpsi1Vf)
+        return np.dot(gradients_X, self.grad_dict['dL_dpsi1'])
 
     def plot_steepest_gradient_map(self, *args, ** kwargs):
         """
diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py
index 42f82121..c87a44b1 100644
--- a/GPy/testing/model_tests.py
+++ b/GPy/testing/model_tests.py
@@ -20,7 +20,7 @@ class MiscTests(unittest.TestCase):
         m = GPy.models.GPRegression(self.X, self.Y, kernel=k)
         m.randomize()
         m.likelihood.variance = .5
-        Kinv = np.linalg.pinv(k.K(self.X) + np.eye(self.N)*m.likelihood.variance)
+        Kinv = np.linalg.pinv(k.K(self.X) + np.eye(self.N) * m.likelihood.variance)
         K_hat = k.K(self.X_new) - k.K(self.X_new, self.X).dot(Kinv).dot(k.K(self.X, self.X_new))
         mu_hat = k.K(self.X_new, self.X).dot(Kinv).dot(m.Y_normalized)
 
@@ -43,7 +43,7 @@ class MiscTests(unittest.TestCase):
         Z = m.Z[:]
         X = self.X[:]
 
-        #Not easy to check if woodbury_inv is correct in itself as it requires a large derivation and expression
+        # Not easy to check if woodbury_inv is correct in itself as it requires a large derivation and expression
         Kinv = m.posterior.woodbury_inv
         K_hat = k.K(self.X_new) - k.K(self.X_new, Z).dot(Kinv).dot(k.K(Z, self.X_new))
 
@@ -51,13 +51,13 @@ class MiscTests(unittest.TestCase):
         self.assertEquals(mu.shape, (self.N_new, self.D))
         self.assertEquals(covar.shape, (self.N_new, self.N_new))
         np.testing.assert_almost_equal(K_hat, covar)
-        #np.testing.assert_almost_equal(mu_hat, mu)
+        # np.testing.assert_almost_equal(mu_hat, mu)
 
         mu, var = m._raw_predict(self.X_new)
         self.assertEquals(mu.shape, (self.N_new, self.D))
         self.assertEquals(var.shape, (self.N_new, 1))
         np.testing.assert_almost_equal(np.diag(K_hat)[:, None], var)
-        #np.testing.assert_almost_equal(mu_hat, mu)
+        # np.testing.assert_almost_equal(mu_hat, mu)
 
     def test_likelihood_replicate(self):
         m = GPy.models.GPRegression(self.X, self.Y)
@@ -110,6 +110,29 @@ class MiscTests(unittest.TestCase):
         m2.kern.lengthscale = m.kern['.*lengthscale']
         np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
 
+    def test_missing_data(self):
+        from GPy import kern
+        from GPy.models import BayesianGPLVM
+        from GPy.examples.dimensionality_reduction import _simulate_matern
+
+        D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4
+        _, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, False)
+        Y = Ylist[0]
+
+        inan = np.random.binomial(1, .9, size=Y.shape).astype(bool) # 80% missing data
+        Ymissing = Y.copy()
+        Ymissing[inan] = np.nan
+
+        k = kern.Linear(Q, ARD=True) + kern.White(Q, np.exp(-2)) # + kern.bias(Q)
+        m = BayesianGPLVM(Ymissing, Q, init="random", num_inducing=num_inducing,
+                          kernel=k, missing_data=True)
+        assert(m.checkgrad())
+
+        k = kern.RBF(Q, ARD=True) + kern.White(Q, np.exp(-2)) # + kern.bias(Q)
+        m = BayesianGPLVM(Ymissing, Q, init="random", num_inducing=num_inducing,
+                          kernel=k, missing_data=True)
+        assert(m.checkgrad())
+
     def test_likelihood_replicate_kern(self):
         m = GPy.models.GPRegression(self.X, self.Y)
         m2 = GPy.models.GPRegression(self.X, self.Y)
@@ -158,7 +181,7 @@ class MiscTests(unittest.TestCase):
     def test_model_optimize(self):
         X = np.random.uniform(-3., 3., (20, 1))
         Y = np.sin(X) + np.random.randn(20, 1) * 0.05
-        m = GPy.models.GPRegression(X,Y)
+        m = GPy.models.GPRegression(X, Y)
         m.optimize()
         print m
 
@@ -219,8 +242,8 @@ class GradientTests(np.testing.TestCase):
         mlp = GPy.kern.MLP(1)
         self.check_model(mlp, model_type='GPRegression', dimension=1)
 
-    #TODO:
-    #def test_GPRegression_poly_1d(self):
+    # TODO:
+    # def test_GPRegression_poly_1d(self):
     #    ''' Testing the GP regression with polynomial kernel with white kernel on 1d data '''
     #    mlp = GPy.kern.Poly(1, degree=5)
     #    self.check_model(mlp, model_type='GPRegression', dimension=1)
@@ -417,11 +440,11 @@ class GradientTests(np.testing.TestCase):
 
     def test_gp_heteroscedastic_regression(self):
         num_obs = 25
-        X = np.random.randint(0,140,num_obs)
-        X = X[:,None]
-        Y = 25. + np.sin(X/20.) * 2. + np.random.rand(num_obs)[:,None]
+        X = np.random.randint(0, 140, num_obs)
+        X = X[:, None]
+        Y = 25. + np.sin(X / 20.) * 2. + np.random.rand(num_obs)[:, None]
         kern = GPy.kern.Bias(1) + GPy.kern.RBF(1)
-        m = GPy.models.GPHeteroscedasticRegression(X,Y,kern)
+        m = GPy.models.GPHeteroscedasticRegression(X, Y, kern)
         self.assertTrue(m.checkgrad())
 
     def test_gp_kronecker_gaussian(self):
@@ -432,12 +455,12 @@ class GradientTests(np.testing.TestCase):
         k1 = GPy.kern.RBF(1)  # + GPy.kern.White(1)
         k2 = GPy.kern.RBF(1)  # + GPy.kern.White(1)
         Y = np.random.randn(N1, N2)
-        Y = Y-Y.mean(0)
-        Y = Y/Y.std(0)
+        Y = Y - Y.mean(0)
+        Y = Y / Y.std(0)
         m = GPy.models.GPKroneckerGaussianRegression(X1, X2, Y, k1, k2)
 
         # build the model the dumb way
-        assert (N1*N2<1000), "too much data for standard GPs!"
+        assert (N1 * N2 < 1000), "too much data for standard GPs!"
         yy, xx = np.meshgrid(X2, X1)
         Xgrid = np.vstack((xx.flatten(order='F'), yy.flatten(order='F'))).T
         kg = GPy.kern.RBF(1, active_dims=[0]) * GPy.kern.RBF(1, active_dims=[1])
@@ -458,11 +481,11 @@ class GradientTests(np.testing.TestCase):
 
     def test_gp_VGPC(self):
         num_obs = 25
-        X = np.random.randint(0,140,num_obs)
-        X = X[:,None]
-        Y = 25. + np.sin(X/20.) * 2. + np.random.rand(num_obs)[:,None]
+        X = np.random.randint(0, 140, num_obs)
+        X = X[:, None]
+        Y = 25. + np.sin(X / 20.) * 2. + np.random.rand(num_obs)[:, None]
         kern = GPy.kern.Bias(1) + GPy.kern.RBF(1)
-        m = GPy.models.GPVariationalGaussianApproximation(X,Y,kern)
+        m = GPy.models.GPVariationalGaussianApproximation(X, Y, kern)
         self.assertTrue(m.checkgrad())
 
 

From d954829a40748b108653c96b8283fb5432ce001a Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Thu, 9 Oct 2014 08:40:15 +0100
Subject: [PATCH 15/19] [pca] missing data is now handled as mean

---
 GPy/util/pca.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/GPy/util/pca.py b/GPy/util/pca.py
index 046f47d7..996e6f48 100644
--- a/GPy/util/pca.py
+++ b/GPy/util/pca.py
@@ -11,14 +11,16 @@ try:
 except:
     pass
 from numpy.linalg.linalg import LinAlgError
+from operator import setitem
+import itertools
 
 class pca(object):
     """
     pca module with automatic primal/dual determination.
     """
     def __init__(self, X):
-        self.mu = X.mean(0)
-        self.sigma = X.std(0)
+        self.mu = None
+        self.sigma = None
 
         X = self.center(X)
 
@@ -39,6 +41,13 @@ class pca(object):
         """
         Center `X` in pca space.
         """
+        X = X.copy()
+        inan = numpy.isnan(X)
+        if self.mu is None:
+            X_ = numpy.ma.masked_array(X, inan)
+            self.mu = X_.mean(0).base
+            self.sigma = X_.std(0).base
+        reduce(lambda y,x: setitem(x[0], x[1], x[2]), itertools.izip(X.T, inan.T, self.mu), None)
         X = X - self.mu
         X = X / numpy.where(self.sigma == 0, 1e-30, self.sigma)
         return X
@@ -94,7 +103,7 @@ class pca(object):
                 fignum=None, cmap=None, # @UndefinedVariable
                 ** kwargs):
         """
-        Plot dimensions `dimensions` with given labels against each other in 
+        Plot dimensions `dimensions` with given labels against each other in
         PC space. Labels can be any sequence of labels of dimensions X.shape[0].
         Labels can be drawn with a subsequent call to legend()
         """

From de801c9d291df158a5dc77ec4336563a534c1a7d Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Thu, 9 Oct 2014 10:32:32 +0100
Subject: [PATCH 16/19] [datasets] updated deng loading pandas bugs...

---
 GPy/util/datasets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py
index 0f63a353..c14ce66f 100644
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@@ -978,7 +978,7 @@ def singlecell_rna_seq_deng(dataset='singlecell_deng'):
     sample_info.columns = sample_info.columns.to_series().apply(lambda row: row[len("!Sample_"):])
     sample_info.columns.name = sample_info.columns.name[len("!Sample_"):]
     sample_info = sample_info[['geo_accession', 'characteristics_ch1',  'description']]
-    sample_info = sample_info.ix[:, np.r_[0:3, 5:sample_info.shape[1]]]
+    sample_info = sample_info.iloc[:, np.r_[0:4, 5:sample_info.shape[1]]]
     c = sample_info.columns.to_series()
     c[1:4] = ['strain', 'cross', 'developmental_stage']
     sample_info.columns = c

From 829e40b25c6735ad3673eb1c29da265b5ea058b0 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Thu, 9 Oct 2014 10:34:01 +0100
Subject: [PATCH 17/19] [missing_data in sparse gp] can be extended towards
 missing_data handling in gp itself. Setting up gpy issue

---
 GPy/core/parameterization/param.py          |  2 ++
 GPy/core/parameterization/parameter_core.py |  4 ++--
 GPy/core/sparse_gp.py                       |  6 +++---
 GPy/examples/dimensionality_reduction.py    |  3 ---
 GPy/models/bayesian_gplvm.py                | 11 ++++++++---
 5 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py
index cb9b0cfe..c0836355 100644
--- a/GPy/core/parameterization/param.py
+++ b/GPy/core/parameterization/param.py
@@ -53,7 +53,9 @@ class Param(Parameterizable, ObsAr):
         return obj
 
     def __init__(self, name, input_array, default_constraint=None, *a, **kw):
+        self._in_init_ = True
         super(Param, self).__init__(name=name, default_constraint=default_constraint, *a, **kw)
+        self._in_init_ = False
 
     def build_pydot(self,G):
         import pydot
diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py
index 8d1efb1c..67d74626 100644
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@@ -18,7 +18,7 @@ import numpy as np
 import re
 import logging
 
-__updated__ = '2014-09-22'
+__updated__ = '2014-10-09'
 
 class HierarchyError(Exception):
     """
@@ -99,7 +99,7 @@ class Observable(object):
 
         :param bool trigger_parent: Whether to trigger the parent, after self has updated
         """
-        if not self.update_model():
+        if not self.update_model() or self._in_init_:
             #print "Warning: updates are off, updating the model will do nothing"
             return
         self._trigger_params_changed(trigger_parent)
diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py
index ecc1e4ba..8ea0d4c6 100644
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@@ -218,6 +218,7 @@ class SparseGP(GP):
         print message,
         for d in xrange(self.output_dim):
             ninan = self.ninan[:, d]
+
             print ' '*(len(message)) + '\r',
             message = m_f(d)
             print message,
@@ -249,9 +250,8 @@ class SparseGP(GP):
         if self.missing_data:
             self._outer_loop_for_missing_data()
         else:
-            self.posterior, self._log_marginal_likelihood, self.grad_dict, gradients, _ = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
-            self.kern.gradient = gradients['kerngrad']
-            self.Z.gradient = gradients['Zgrad']
+            self.posterior, self._log_marginal_likelihood, self.grad_dict, full_values, _ = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
+            self._outer_values_update(full_values)
 
     def _raw_predict(self, Xnew, full_cov=False, kern=None):
         """
diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index bf99dd66..94e83ff1 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -374,9 +374,6 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
     m = BayesianGPLVM(Ymissing, Q, init="random", num_inducing=num_inducing,
                       kernel=k, missing_data=True)
 
-    m.X.variance[:] = _np.random.uniform(0,.1,m.X.shape)
-    m.likelihood.variance = .01
-    m.parameters_changed()
     m.Yreal = Y
 
     if optimize:
diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py
index 54c11fea..67cb6e62 100644
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/models/bayesian_gplvm.py
@@ -32,7 +32,7 @@ class BayesianGPLVM(SparseGP_MPI):
         self.__IN_OPTIMIZATION__ = False
 
         self.logger = logging.getLogger(self.__class__.__name__)
-        if X == None:
+        if X is None:
             from ..util.initialization import initialize_latent
             self.logger.info("initializing latent space X with method {}".format(init))
             X, fracs = initialize_latent(init, input_dim, Y)
@@ -97,14 +97,19 @@ class BayesianGPLVM(SparseGP_MPI):
                                             Z=Z, dL_dpsi0=grad_dict['dL_dpsi0'],
                                             dL_dpsi1=grad_dict['dL_dpsi1'],
                                             dL_dpsi2=grad_dict['dL_dpsi2'])
+
+        # Subsetting Variational Posterior objects, makes the gradients
+        # empty. We need them to be 0 though:
         X.mean.gradient[:] = 0
         X.variance.gradient[:] = 0
+
         self.variational_prior.update_gradients_KL(X)
         current_values['meangrad'] += X.mean.gradient
         current_values['vargrad'] += X.variance.gradient
 
-        value_indices['meangrad'] = subset_indices['samples']
-        value_indices['vargrad'] = subset_indices['samples']
+        if subset_indices is not None:
+            value_indices['meangrad'] = subset_indices['samples']
+            value_indices['vargrad'] = subset_indices['samples']
         return posterior, log_marginal_likelihood, grad_dict, current_values, value_indices
 
     def _outer_values_update(self, full_values):

From 7916c5f9ead987c514e4055efc74a6d45bebce59 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 10 Oct 2014 17:19:37 +0100
Subject: [PATCH 18/19] Stopped rounding to int in priors printing

---
 GPy/core/parameterization/priors.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/GPy/core/parameterization/priors.py b/GPy/core/parameterization/priors.py
index 453bdfb0..1805f2ec 100644
--- a/GPy/core/parameterization/priors.py
+++ b/GPy/core/parameterization/priors.py
@@ -52,7 +52,7 @@ class Gaussian(Prior):
         self.constant = -0.5 * np.log(2 * np.pi * self.sigma2)
 
     def __str__(self):
-        return "N(" + str(np.round(self.mu)) + ', ' + str(np.round(self.sigma2)) + ')'
+        return "N({:.2g}, {:.2g})".format(self.mu, self.sigma)
 
     def lnpdf(self, x):
         return self.constant - 0.5 * np.square(x - self.mu) / self.sigma2
@@ -82,7 +82,7 @@ class Uniform(Prior):
         self.upper = float(upper)
 
     def __str__(self):
-        return "[" + str(np.round(self.lower)) + ', ' + str(np.round(self.upper)) + ']'
+        return "[{:.2g}, {:.2g}]".format(self.lower, self.upper)
 
     def lnpdf(self, x):
         region = (x>=self.lower) * (x<=self.upper)
@@ -122,7 +122,7 @@ class LogGaussian(Prior):
         self.constant = -0.5 * np.log(2 * np.pi * self.sigma2)
 
     def __str__(self):
-        return "lnN(" + str(np.round(self.mu)) + ', ' + str(np.round(self.sigma2)) + ')'
+        return "lnN({:.2g}, {:.2g})".format(self.mu, self.sigma)
 
     def lnpdf(self, x):
         return self.constant - 0.5 * np.square(np.log(x) - self.mu) / self.sigma2 - np.log(x)
@@ -220,7 +220,7 @@ class Gamma(Prior):
         self.constant = -gammaln(self.a) + a * np.log(b)
 
     def __str__(self):
-        return "Ga(" + str(np.round(self.a)) + ', ' + str(np.round(self.b)) + ')'
+        return "Ga({:.2g}, {:.2g})".format(self.a, self.b)
 
     def summary(self):
         ret = {"E[x]": self.a / self.b, \
@@ -281,7 +281,7 @@ class InverseGamma(Prior):
         self.constant = -gammaln(self.a) + a * np.log(b)
 
     def __str__(self):
-        return "iGa(" + str(np.round(self.a)) + ', ' + str(np.round(self.b)) + ')'
+        return "iGa({:.2g}, {:.2g})".format(self.a, self.b)
 
     def lnpdf(self, x):
         return self.constant - (self.a + 1) * np.log(x) - self.b / x
@@ -317,7 +317,7 @@ class HalfT(Prior):
         self.constant = gammaln(.5*(self.nu+1.)) - gammaln(.5*self.nu) - .5*np.log(np.pi*self.A*self.nu)
 
     def __str__(self):
-        return "hT(" + str(np.round(self.A)) + ', ' + str(np.round(self.nu)) + ')'
+        return "hT({:.2g}, {:.2g})".format(self.A, self.nu)
 
     def lnpdf(self,theta):
         return (theta>0) * ( self.constant -.5*(self.nu+1) * np.log( 1.+ (1./self.nu) * (theta/self.A)**2 ) )

From f9a16059e481e2da79a4e9242f6040f9d87dc34b Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Thu, 16 Oct 2014 01:28:41 +0100
Subject: [PATCH 19/19] weaved some slow functions in the stationary class. We
 now fall back (and latch) to numpy if weave fails

---
 GPy/kern/_src/stationary.py | 74 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 73 insertions(+), 1 deletion(-)

diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py
index cc5634e9..cd209d9d 100644
--- a/GPy/kern/_src/stationary.py
+++ b/GPy/kern/_src/stationary.py
@@ -10,6 +10,7 @@ from ... import util
 import numpy as np
 from scipy import integrate
 from ...util.caching import Cache_this
+from ...util.config import config # for assesing whether to use weave
 
 class Stationary(Kern):
     """
@@ -139,7 +140,17 @@ class Stationary(Kern):
             #self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)/self.lengthscale**3
             tmp = dL_dr*self._inv_dist(X, X2)
             if X2 is None: X2 = X
-            self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
+            
+
+            if config.getboolean('weave', 'working'):
+                try:
+                    self.lengthscale.gradient = self.weave_lengthscale_grads(tmp, X, X2)
+                except:
+                    print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
+                    config.set('weave', 'working', 'False')
+                    self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
+            else:
+                self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
         else:
             r = self._scaled_dist(X, X2)
             self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale
@@ -154,10 +165,43 @@ class Stationary(Kern):
         dist = self._scaled_dist(X, X2).copy()
         return 1./np.where(dist != 0., dist, np.inf)
 
+    def weave_lengthscale_grads(self, tmp, X, X2):
+        N,M = tmp.shape
+        Q = X.shape[1]
+        if hasattr(X, 'values'):X = X.values
+        if hasattr(X2, 'values'):X2 = X2.values
+        grads = np.zeros(self.input_dim)
+        code = """
+        double gradq;
+        for(int q=0; q<Q; q++){
+          gradq = 0;
+          for(int n=0; n<N; n++){
+            for(int m=0; m<M; m++){
+              gradq += tmp(n,m)*(X(n,q)-X2(m,q))*(X(n,q)-X2(m,q));
+            }
+          }
+          grads[q] = gradq;
+        }
+        """
+        from scipy import weave
+        weave.inline(code, ['tmp', 'X', 'X2', 'grads', 'N', 'M', 'Q'], type_converters=weave.converters.blitz, support_code="#include <math.h>")
+        return -grads/self.lengthscale**3
+
     def gradients_X(self, dL_dK, X, X2=None):
         """
         Given the derivative of the objective wrt K (dL_dK), compute the derivative wrt X
         """
+        if config.getboolean('weave', 'working'):
+            try:
+                return self.gradients_X_weave(dL_dK, X, X2)
+            except:
+                print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
+                config.set('weave', 'working', 'False')
+                return self.gradients_X_(dL_dK, X, X2)
+        else:
+            return self.gradients_X_(dL_dK, X, X2)
+
+    def gradients_X_(self, dL_dK, X, X2=None):
         invdist = self._inv_dist(X, X2)
         dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
         tmp = invdist*dL_dr
@@ -177,6 +221,34 @@ class Stationary(Kern):
 
         return ret
 
+    def gradients_X_weave(self, dL_dK, X, X2=None):
+        invdist = self._inv_dist(X, X2)
+        dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
+        tmp = invdist*dL_dr
+        if X2 is None:
+            tmp = tmp + tmp.T
+            X2 = X
+
+        ret = np.zeros(X.shape)
+        code = """
+        int n,q,d;
+        double retnd;
+        for(n=0;n<N;n++){
+          for(d=0;d<D;d++){
+            retnd = 0;
+            for(q=0;q<Q;q++){
+              retnd += tmp(n,q)*(X(n,d)-X2(q,d));
+            }
+            ret(n,d) = retnd;
+          }
+        }
+        """
+        from scipy import weave
+        N,D = X.shape
+        Q = tmp.shape[1]
+        weave.inline(code, ['ret', 'N', 'D', 'Q', 'tmp', 'X', 'X2'], type_converters=weave.converters.blitz)
+        return ret/self.lengthscale**2
+    
     def gradients_X_diag(self, dL_dKdiag, X):
         return np.zeros(X.shape)