Merge branch 'devel' of github.com:SheffieldML/GPy into devel

2026-07-20 16:51:05 +02:00 · 2013-09-24 17:30:54 +01:00 · 2013-09-24 17:30:54 +01:00 · 0dc987de15
commit 0dc987de15
parent f209477885 527f4ca469
10 changed files with 333 additions and 181 deletions
--- a/GPy/FAQ.txt
+++ b/GPy/FAQ.txt
@ -0,0 +1,8 @@
+Frequently Asked Questions
+--------------------------
+
+Unit tests are run through Travis-Ci. They can be run locally through entering the GPy route diretory and writing
+
+nosetests testing/
+
+Documentation is handled by Sphinx. To build the documentation:
--- a/GPy/coding_style_guide.txt
+++ b/GPy/coding_style_guide.txt
@ -0,0 +1,10 @@
+In this text document we will describe coding conventions to be used in GPy to keep things consistent.
+
+All arrays containing data are two dimensional. The first dimension is the number of data, the second dimension is number of features. This keeps things consistent with the idea of a design matrix.
+
+Input matrices are either X or t, output matrices are Y.
+
+Input dimensionality is input_dim, output dimensionality is output_dim, number of data is num_data.
+
+Data sets are preprocessed in the datasets.py file. This file also records where the data set was obtained from in the dictionary stored in the file. Long term we should move this dictionary to sqlite or similar.
+
--- a/GPy/core/gp_base.py
+++ b/GPy/core/gp_base.py
@ -128,10 +128,9 @@ class GPBase(Model):
            else:
                raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
        else:
-            assert self.num_outputs > output, 'The model has only %s outputs.' %self.num_outputs
+            assert len(self.likelihood.noise_model_list) > output, 'The model has only %s outputs.' %self.num_outputs

            if self.X.shape[1] == 2:
-                assert self.num_outputs >= output, 'The model has only %s outputs.' %self.num_outputs
                Xu = self.X[self.X[:,-1]==output ,0:1]
                Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)

@ -263,7 +262,7 @@ class GPBase(Model):
                raise NotImplementedError, "Cannot define a frame with more than two input dimensions"

        else:
-            assert self.num_outputs > output, 'The model has only %s outputs.' %self.num_outputs
+            assert len(self.likelihood.noise_model_list) > output, 'The model has only %s outputs.' %self.num_outputs
            if self.X.shape[1] == 2:
                resolution = resolution or 200
                Xu = self.X[self.X[:,-1]==output,:] #keep the output of interest
@ -287,3 +286,20 @@ class GPBase(Model):

            else:
                raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
+
+    """
+    def samples_f(self,X,samples=10, which_data='all', which_parts='all',output=None):
+        if which_data == 'all':
+            which_data = slice(None)
+
+        if hasattr(self,'multioutput'):
+            np.hstack([X,np.ones((X.shape[0],1))*output])
+
+        m, v = self._raw_predict(X, which_parts=which_parts, full_cov=True)
+        v = v.reshape(m.size,-1) if len(v.shape)==3 else v
+        Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
+        #gpplot(X, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax)
+        for i in range(samples):
+            ax.plot(X, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25)
+
+    """
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@ -367,9 +367,8 @@ class SparseGP(GPBase):
                ax.plot(Zu[:, 0], Zu[:, 1], 'wo')

        else:
-            pass
-            """
            if self.X.shape[1] == 2 and hasattr(self,'multioutput'):
+                """
                Xu = self.X[self.X[:,-1]==output,:]
                if self.has_uncertain_inputs:
                    Xu = self.X * self._Xscale + self._Xoffset  # NOTE self.X are the normalized values now
@ -380,6 +379,7 @@ class SparseGP(GPBase):
                                xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
                                ecolor='k', fmt=None, elinewidth=.5, alpha=.5)

+                """
                Zu = self.Z[self.Z[:,-1]==output,:]
                Zu = self.Z * self._Xscale + self._Xoffset
                Zu = self.Z[self.Z[:,-1]==output ,0:1] #??
@ -388,7 +388,6 @@ class SparseGP(GPBase):

            else:
                raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
-            """

    def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False):
        """
--- a/GPy/kern/parts/eq_ode1.py
+++ b/GPy/kern/parts/eq_ode1.py
@ -39,11 +39,12 @@ class Eq_ode1(Kernpart):
        self.name = 'eq_ode1'
        self.output_dim = output_dim
        self.lengthscale = lengthscale
-        self.num_params = self.output_dim*(1. + self.rank) + 1
+        self.num_params = self.output_dim*self.rank + 1 + (self.output_dim - 1)
        if kappa is not None:
            self.num_params+=self.output_dim
        if delay is not None:
-            self.num_params+=self.output_dim
+            assert delay.shape==(self.output_dim-1,)
+            self.num_params+=self.output_dim-1
        self.rank = rank
        if W is None:
            self.W = 0.5*np.random.randn(self.output_dim,self.rank)/np.sqrt(self.rank)
@ -51,18 +52,17 @@ class Eq_ode1(Kernpart):
            assert W.shape==(self.output_dim,self.rank)
            self.W = W
        if decay is None:
-            self.decay = np.ones(self.output_dim)
+            self.decay = np.ones(self.output_dim-1)
        if kappa is not None:
            assert kappa.shape==(self.output_dim,)
        self.kappa = kappa

-        if delay is not None:
-            assert delay.shape==(self.output_dim,)
        self.delay = delay
        self.is_normalized = True
        self.is_stationary = False
+        self.gaussian_initial = False
        self._set_params(self._get_params())
-
+        
    def _get_params(self):
        param_list = [self.W.flatten()]
        if self.kappa is not None:
@ -84,11 +84,11 @@ class Eq_ode1(Kernpart):
            self.kappa = x[start:end]
            self.B += np.diag(self.kappa)
            start=end
-        end+=self.output_dim
+        end+=self.output_dim-1
        self.decay = x[start:end]
        start=end
        if self.delay is not None:
-            end+=self.output_dim
+            end+=self.output_dim-1
            self.delay = x[start:end]
            start=end
        end+=1
@ -100,9 +100,9 @@ class Eq_ode1(Kernpart):
        param_names = sum([['W%i_%i'%(i,j) for j in range(self.rank)] for i in range(self.output_dim)],[])
        if self.kappa is not None:
            param_names += ['kappa_%i'%i for i in range(self.output_dim)]
-        param_names += ['decay_%i'%i for i in range(self.output_dim)]
+        param_names += ['decay_%i'%i for i in range(1,self.output_dim)]
        if self.delay is not None:
-            param_names += ['delay_%i'%i for i in range(self.output_dim)]
+            param_names += ['delay_%i'%i for i in 1+range(1,self.output_dim)]
        param_names+= ['lengthscale'] 
        return param_names

@ -112,7 +112,7 @@ class Eq_ode1(Kernpart):
            raise ValueError('Input matrix for ode1 covariance should have at most two columns, one containing times, the other output indices')

        self._K_computations(X, X2)
-        target += self._scales*self._dK_dvar
+        target += self._scale*self._K_dvar

        if self.gaussian_initial:
            # Add covariance associated with initial condition.
@ -140,9 +140,8 @@ class Eq_ode1(Kernpart):

        # TODO: some fast checking here to see if this needs recomputing?
        self._t = X[:, 0]
-        if X.shape[1]==1:
-            # No index passed, assume single output of ode model.
-            self._index = np.ones_like(X[:, 1],dtype=np.int)
+        if not X.shape[1] == 2:
+            raise ValueError('Input matrix for ode1 covariance should have two columns, one containing times, the other output indices')
        self._index = np.asarray(X[:, 1],dtype=np.int)
        # Sort indices so that outputs are in blocks for computational
        # convenience.
@ -156,15 +155,12 @@ class Eq_ode1(Kernpart):
            self._index2 = None
            self._rorder2 = self._rorder
        else:
-            if X2.shape[1] > 2:
-                raise ValueError('Input matrix for ode1 covariance should have at most two columns, one containing times, the other output indices')
+            if not X2.shape[1] == 2:
+                raise ValueError('Input matrix for ode1 covariance should have two columns, one containing times, the other output indices')
            self._t2 = X2[:, 0]
-            if X.shape[1]==1:
-                # No index passed, assume single output of ode model.
-                self._index2 = np.ones_like(X2[:, 1],dtype=np.int)
            self._index2 = np.asarray(X2[:, 1],dtype=np.int)
            self._order2 = self._index2.argsort()
-            slef._index2 = self._index2[self._order2]
+            self._index2 = self._index2[self._order2]
            self._t2 = self._t2[self._order2]
            self._rorder2 = self._order2.argsort() # rorder2 is for reversing order

@ -180,25 +176,65 @@ class Eq_ode1(Kernpart):
        else:
            self._K_compute_ode_eq(transpose=True)
        self._K_compute_ode()
-        
+
+        if X2 is None:
+            self._K_dvar = np.zeros((self._t.shape[0], self._t.shape[0]))
+        else:
+            self._K_dvar = np.zeros((self._t.shape[0], self._t2.shape[0]))
+
        # Reorder values of blocks for placing back into _K_dvar.
-        self._K_dvar[self._rorder, :] = np.vstack((np.hstack((self._K_eq, self._Keq_ode)),
-                                                   np.hstack((self._K_ode_eq, self.K_ode))))[:, self._rorder2]
+        self._K_dvar = np.vstack((np.hstack((self._K_eq, self._K_eq_ode)),
+                                                   np.hstack((self._K_ode_eq, self._K_ode))))
+        self._K_dvar = self._K_dvar[self._rorder, :]
+        self._K_dvar = self._K_dvar[:, self._rorder2]
+        
+        
+        if X2 is None:
+            # Matrix giving scales of each output
+            self._scale = np.zeros((self._t.size, self._t.size))
+            code="""
+            for(int i=0;i<N; i++){
+              scale_mat[i+i*N] = B[index[i]+output_dim*(index[i])];
+              for(int j=0; j<i; j++){
+                  scale_mat[j+i*N] = B[index[i]+output_dim*index[j]];
+                  scale_mat[i+j*N] = scale_mat[j+i*N];
+                }
+              }
+            """
+            scale_mat, B, index = self._scale, self.B, self._index
+            N, output_dim = self._t.size, self.output_dim
+            weave.inline(code,['index',
+                               'scale_mat', 'B',
+                               'N', 'output_dim'])
+        else:
+            self._scale = np.zeros((self._t.size, self._t2.size))
+            code = """
+            for(int i=0; i<N; i++){
+              for(int j=0; j<N2; j++){
+                scale_mat[i+j*N] = B[index[i]+output_dim*index2[j]];
+              }
+            }
+            """
+            scale_mat, B, index, index2 = self._scale, self.B, self._index, self._index2
+            N, N2, output_dim = self._t.size, self._t2.size, self.output_dim
+            weave.inline(code, ['index', 'index2',
+                                'scale_mat', 'B',
+                                'N', 'N2', 'output_dim'])
+


    def _K_compute_eq(self):
        """Compute covariance for latent covariance."""
        t_eq = self._t[self._index==0]
-        if t_eq.shape[0]==0:
-            self._K_eq = np.zeros((0, 0))
-            return
-        
        if self._t2 is None:
+            if t_eq.size==0:
+                self._K_eq = np.zeros((0, 0))
+                return
            self._dist2 = np.square(t_eq[:, None] - t_eq[None, :])
        else:
            t2_eq = self._t2[self._index2==0]
-            if t2_eq.shape[0]==0:
-                self._K_eq_eq = np.zeros((0, 0))
+            if t_eq.size==0 or t2_eq.size==0:
+                self._K_eq = np.zeros((t_eq.size, t2_eq.size))
                return
            self._dist2 = np.square(t_eq[:, None] - t2_eq[None, :])
        
@ -212,63 +248,27 @@ class Eq_ode1(Kernpart):
        :param transpose: if set to false the exponentiated quadratic is on the rows of the matrix and is computed according to self._t, if set to true it is on the columns and is computed according to self._t2 (default=False).
        :type transpose: bool"""

-        if transpose:
-            if self._t2 is not None:
+        if self._t2 is not None:
+            if transpose:
+                t_eq = self._t[self._index==0]
                t_ode = self._t2[self._index2>0]
                index_ode = self._index2[self._index2>0]-1
-                if t_ode.shape[0]==0:
-                    self._K_eq_ode = np.zeros((0, 0))
-                    return
            else:
-                self._K_eq_ode = np.zeros((0, 0))
-                return
-        
-            t_eq = self._t[self._index==0]
-            if t_eq.shape[0]==0:
-                self._K_eq_ode = np.zeros((0, 0))
-                return
+                t_eq = self._t2[self._index2==0]
+                t_ode = self._t[self._index>0]
+                index_ode = self._index[self._index>0]-1
        else:
+            t_eq = self._t[self._index==0]
            t_ode = self._t[self._index>0]
            index_ode = self._index[self._index>0]-1
-            if t_ode.shape[0]==0:
-                self._K_ode_eq = np.zeros((0, 0))
-                return
-            if self._t2 is not None:            
-                t_eq = self._t2[self._index2==0]
-                if t_eq.shape[0]==0:
-                    self._K_ode_eq = np.zeros((0, 0))
-                    return
+
+        if t_ode.size==0 or t_eq.size==0:
+            if transpose:
+                self._K_eq_ode = np.zeros((t_eq.shape[0], t_ode.shape[0]))
            else:
-                self._K_ode_eq = np.zeros((0, 0))
-                return
-        # Matrix giving scales of each output
-        # self._scale = np.zeros((t_ode.shape[0], t_eq.shape[0]))
-        # code="""
-        #     for(int i=0;i<N; i++){
-        #       for(int j=0; j<N2; j++){
-        #           scale_mat[i+j*N] = W[index_ode[i]+index_eq[j]*output_dim];
-        #         }
-        #       }
-        #     """
-        #     scale_mat, B = self._scale, self._B
-        #     N, N2, output_dim = index_ode.size, index_eq.size, self.output_dim
-        #     weave.inline(code,['index_ode', 'index_eq',
-        #                        'scale_mat', 'B',
-        #                        'N', 'N2', 'output_dim'])
-        # else:
-        #     self._scale = np.zeros((t_ode.shape[0], t2_ode.shape[0]))
-        #     code = """
-        #     for(int i=0; i<N; i++){
-        #       for(int j=0; j<N2; j++){
-        #         scale_mat[i+j*N] = B[index_ode[i]+output_dim*index2_ode[j]]
-        #       }
-        #     }
-        #     """
-        #     scale_mat, B = self._scale, self._B
-        #     N, N2, output_dim = index_ode.size, index2.size, self.output_dim
-        #     weave.inline(code, ['index_ode', 'index2_ode',
-        #                         'scale_mat', 'B',
-        #                         'N', 'N2', 'output_dim'])
+                self._K_ode_eq = np.zeros((t_ode.shape[0], t_eq.shape[0]))
+            return
+
        t_ode_mat = t_ode[:, None]
        t_eq_mat = t_eq[None, :]
        if self.delay is not None:
@ -276,13 +276,14 @@ class Eq_ode1(Kernpart):
        diff_t = (t_ode_mat - t_eq_mat)

        inv_sigma_diff_t = 1./self.sigma*diff_t
-        half_sigma_d_i = 0.5*self.sigma*self.decay
+        decay_vals = self.decay[index_ode][:, None]
+        half_sigma_d_i = 0.5*self.sigma*decay_vals

        if self.is_stationary == False:
-            ln_part, signs = ln_diff_erfs(half_sigma_d_i + t_eq_mat/sigma, half_sigma_d_i - inv_sigma_diff_t, return_sign=True)
+            ln_part, signs = ln_diff_erfs(half_sigma_d_i + t_eq_mat/self.sigma, half_sigma_d_i - inv_sigma_diff_t, return_sign=True)
        else:
            ln_part, signs = ln_diff_erfs(inf, half_sigma_d_i - inv_sigma_diff_t, return_sign=True)
-        sK = signs*exp(half_sigma_d_i*half_sigma_d_i - self.decay*diff_t + ln_part)
+        sK = signs*np.exp(half_sigma_d_i*half_sigma_d_i - decay_vals*diff_t + ln_part)

        sK *= 0.5

@ -294,58 +295,24 @@ class Eq_ode1(Kernpart):
            self._K_eq_ode = sK.T
        else:
            self._K_ode_eq = sK
-        return K
        
    def _K_compute_ode(self):
        # Compute covariances between outputs of the ODE models.

        t_ode = self._t[self._index>0]
        index_ode = self._index[self._index>0]-1
-        if t_ode.shape[0]==0:
-            self._K_ode = np.zeros((0, 0))
-            return
-        
        if self._t2 is None:
+            if t_ode.size==0:
+                self._K_ode = np.zeros((0, 0))
+                return        
            t2_ode = t_ode
            index2_ode = index_ode
        else:
            t2_ode = self._t2[self._index2>0]
-            index2_ode = self._index2[self._index2>0]-1
-            if t2_eq.shape[0]==0:
-                self._K_ode = np.zeros((0, 0))
+            if t_ode.size==0 or t2_ode.size==0:
+                self._K_ode = np.zeros((t_ode.size, t2_ode.size))
                return
-        
-        if self._index2 is None:
-            # Matrix giving scales of each output
-            self._scale = np.zeros((t_ode.shape[0], t_ode.shape[0]))
-            code="""
-            for(int i=0;i<N; i++){
-              scale_mat[i+i*N] = B[index_ode[i]+output_dim*(index_ode[i])];
-              for(int j=0; j<i; j++){
-                  scale_mat[j+i*N] = B[index_ode[i]+output_dim*index_ode[j]];
-                  scale_mat[i+j*N] = scale_mat[j+i*N];
-                }
-              }
-            """
-            scale_mat, B = self._scale, self.B
-            N, output_dim = index_ode.size, self.output_dim
-            weave.inline(code,['index_ode',
-                               'scale_mat', 'B',
-                               'N', 'output_dim'])
-        else:
-            self._scale = np.zeros((t_ode.shape[0], t2_ode.shape[0]))
-            code = """
-            for(int i=0; i<N; i++){
-              for(int j=0; j<N2; j++){
-                scale_mat[i+j*N] = B[index_ode[i]+output_dim*index2_ode[j]]
-              }
-            }
-            """
-            scale_mat, B = self._scale, self.B
-            N, N2, output_dim = index_ode.size, index2.size, self.output_dim
-            weave.inline(code, ['index_ode', 'index2_ode',
-                                'scale_mat', 'B',
-                                'N', 'N2', 'output_dim'])
+            index2_ode = self._index2[self._index2>0]-1
        
        # When index is identical
        if self.is_stationary:
@ -360,10 +327,10 @@ class Eq_ode1(Kernpart):
                h2 = self._compute_H_stat(t2_ode, index2_ode, t_ode, index_ode)
            else:
                h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode)                
-            self._K_ode += 0.5 * (h + h2.T)
+            self._K_ode = 0.5 * (h + h2.T)

        if not self.is_normalized:
-            self._K_ode *= np.sqrt(np.pi)*sigma
+            self._K_ode *= np.sqrt(np.pi)*self.sigma
    
    def _compute_H(self, t, index, t2, index2, update_derivatives=False):
        """Helper function for computing part of the ode1 covariance function.
@ -441,7 +408,7 @@ class Eq_ode1(Kernpart):
                          -Decay[:, None]*t_mat-Decay2[None, :]*t2_mat+ln_part_2
                          -np.log(Decay[:, None] + Decay2[None, :]))

-
+        return h
        # if update_derivatives:
        #     sigma2 = self.sigma*self.sigma
        #     # Update ith decay gradient
--- a/GPy/models/gp_multioutput_regression.py
+++ b/GPy/models/gp_multioutput_regression.py
@ -25,14 +25,14 @@ class GPMultioutputRegression(GP):
    :type normalize_X: False|True
    :param normalize_Y:  whether to normalize the input data before computing (predictions will be in original scales)
    :type normalize_Y: False|True
-    :param W_columns: number tuples of the corregionalization parameters 'coregion_W' (see coregionalize kernel documentation)
-    :type W_columns: integer
+    :param rank: number tuples of the corregionalization parameters 'coregion_W' (see coregionalize kernel documentation)
+    :type rank: integer
    """

-    def __init__(self,X_list,Y_list,kernel_list=None,noise_variance_list=None,normalize_X=False,normalize_Y=False,W_columns=1):
+    def __init__(self,X_list,Y_list,kernel_list=None,noise_variance_list=None,normalize_X=False,normalize_Y=False,rank=1):

-        self.num_outputs = len(Y_list)
-        assert len(X_list) == self.num_outputs, 'Number of outputs do not match length of inputs list.'
+        self.output_dim = len(Y_list)
+        assert len(X_list) == self.output_dim, 'Number of outputs do not match length of inputs list.'

        #Inputs indexing
        i = 0
@ -51,7 +51,7 @@ class GPMultioutputRegression(GP):
        #Coregionalization kernel definition
        if kernel_list is None:
            kernel_list = [kern.rbf(original_dim)]
-        mkernel = kern.build_lcm(input_dim=original_dim, num_outputs=self.num_outputs, kernel_list = kernel_list, W_columns=W_columns)
+        mkernel = kern.build_lcm(input_dim=original_dim, output_dim=self.output_dim, kernel_list = kernel_list, rank=rank)

        self.multioutput = True
        GP.__init__(self, X, likelihood, mkernel, normalize_X=normalize_X)
--- a/GPy/models/sparse_gp_multioutput_regression.py
+++ b/GPy/models/sparse_gp_multioutput_regression.py
@ -30,23 +30,23 @@ class SparseGPMultioutputRegression(SparseGP):
    :type Z_list: list of numpy arrays (num_inducing_output_i x input_dim), one array per output | empty list
    :param num_inducing: number of inducing inputs per output, defaults to 10 (ignored if Z_list is not empty)
    :type num_inducing: integer
-    :param W_columns: number tuples of the corregionalization parameters 'coregion_W' (see coregionalize kernel documentation)
-    :type W_columns: integer
+    :param rank: number tuples of the corregionalization parameters 'coregion_W' (see coregionalize kernel documentation)
+    :type rank: integer
    """
    #NOTE not tested with uncertain inputs
-    def __init__(self,X_list,Y_list,kernel_list=None,noise_variance_list=None,normalize_X=False,normalize_Y=False,Z_list=[],num_inducing=10,W_columns=1):
+    def __init__(self,X_list,Y_list,kernel_list=None,noise_variance_list=None,normalize_X=False,normalize_Y=False,Z_list=[],num_inducing=10,rank=1):

-        self.num_outputs = len(Y_list)
-        assert len(X_list) == self.num_outputs, 'Number of outputs do not match length of inputs list.'
+        self.output_dim = len(Y_list)
+        assert len(X_list) == self.output_dim, 'Number of outputs do not match length of inputs list.'

        #Inducing inputs list
        if len(Z_list):
-            assert len(Z_list) == self.num_outputs, 'Number of outputs do not match length of inducing inputs list.'
+            assert len(Z_list) == self.output_dim, 'Number of outputs do not match length of inducing inputs list.'
        else:
            if isinstance(num_inducing,np.int):
-                num_inducing = [num_inducing] * self.num_outputs
+                num_inducing = [num_inducing] * self.output_dim
            num_inducing = np.asarray(num_inducing)
-            assert num_inducing.size == self.num_outputs, 'Number of outputs do not match length of inducing inputs list.'
+            assert num_inducing.size == self.output_dim, 'Number of outputs do not match length of inducing inputs list.'
            for ni,X in zip(num_inducing,X_list):
                i = np.random.permutation(X.shape[0])[:ni]
                Z_list.append(X[i].copy())
@ -72,7 +72,7 @@ class SparseGPMultioutputRegression(SparseGP):
        #Coregionalization kernel definition
        if kernel_list is None:
            kernel_list = [kern.rbf(original_dim)]
-        mkernel = kern.build_lcm(input_dim=original_dim, num_outputs=self.num_outputs, kernel_list = kernel_list, W_columns=W_columns)
+        mkernel = kern.build_lcm(input_dim=original_dim, output_dim=self.output_dim, kernel_list = kernel_list, rank=rank)

        self.multioutput = True
        SparseGP.__init__(self, X, likelihood, mkernel, Z=Z, normalize_X=normalize_X)
--- a/GPy/testing/bcgplvm_tests.py
+++ b/GPy/testing/bcgplvm_tests.py
@ -0,0 +1,50 @@
+# Copyright (c) 2013, GPy authors (see AUTHORS.txt)
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+import unittest
+import numpy as np
+import GPy
+
+class BCGPLVMTests(unittest.TestCase):
+    def test_kernel_backconstraint(self):
+        num_data, num_inducing, input_dim, output_dim = 10, 3, 2, 4
+        X = np.random.rand(num_data, input_dim)
+        k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
+        K = k.K(X)
+        Y = np.random.multivariate_normal(np.zeros(num_data),K,output_dim).T
+        k = GPy.kern.mlp(input_dim) + GPy.kern.bias(input_dim)
+        bk = GPy.kern.rbf(output_dim)
+        mapping = GPy.mappings.Kernel(output_dim=input_dim, X=Y, kernel=bk)
+        m = GPy.models.BCGPLVM(Y, input_dim, kernel = k, mapping=mapping)
+        m.randomize()
+        self.assertTrue(m.checkgrad())
+        
+    def test_linear_backconstraint(self):
+        num_data, num_inducing, input_dim, output_dim = 10, 3, 2, 4
+        X = np.random.rand(num_data, input_dim)
+        k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
+        K = k.K(X)
+        Y = np.random.multivariate_normal(np.zeros(num_data),K,output_dim).T
+        k = GPy.kern.mlp(input_dim) + GPy.kern.bias(input_dim)
+        bk = GPy.kern.rbf(output_dim)
+        mapping = GPy.mappings.Linear(output_dim=input_dim, input_dim=output_dim)
+        m = GPy.models.BCGPLVM(Y, input_dim, kernel = k, mapping=mapping)
+        m.randomize()
+        self.assertTrue(m.checkgrad())
+        
+    def test_mlp_backconstraint(self):
+        num_data, num_inducing, input_dim, output_dim = 10, 3, 2, 4
+        X = np.random.rand(num_data, input_dim)
+        k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
+        K = k.K(X)
+        Y = np.random.multivariate_normal(np.zeros(num_data),K,output_dim).T
+        k = GPy.kern.mlp(input_dim) + GPy.kern.bias(input_dim)
+        bk = GPy.kern.rbf(output_dim)
+        mapping = GPy.mappings.MLP(output_dim=input_dim, input_dim=output_dim, hidden_dim=[5, 4, 7])
+        m = GPy.models.BCGPLVM(Y, input_dim, kernel = k, mapping=mapping)
+        m.randomize()
+        self.assertTrue(m.checkgrad())
+
+if __name__ == "__main__":
+    print "Running unit tests, please be (very) patient..."
+    unittest.main()
--- a/GPy/util/erfcx.py
+++ b/GPy/util/erfcx.py
@ -0,0 +1,63 @@
+## Copyright (C) 2010 Soren Hauberg
+##
+## Copyright James Hensman 2011
+## 
+## This program is free software; you can redistribute it and/or modify it
+## under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 3 of the License, or (at
+## your option) any later version.
+##
+## This program is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; see the file COPYING.  If not, see
+## <http://www.gnu.org/licenses/>.
+
+import numpy as np
+
+def erfcx (arg):
+	arg = np.atleast_1d(arg)
+	assert(np.all(np.isreal(arg)),"erfcx: input must be real")
+
+	## Get precision dependent thresholds -- or not :p
+	xneg = -26.628;
+	xmax = 2.53e+307;
+
+	## Allocate output
+	result = np.zeros (arg.shape)
+
+	## Find values where erfcx can be evaluated
+	idx_neg = (arg < xneg);
+	idx_max = (arg > xmax);
+	idx = ~(idx_neg | idx_max);
+
+	arg = arg [idx];
+
+	## Perform the actual computation
+	t = 3.97886080735226 / (np.abs (arg) + 3.97886080735226);
+	u = t - 0.5;
+	y = (((((((((u * 0.00127109764952614092 + 1.19314022838340944e-4) * u \
+	    - 0.003963850973605135)   * u - 8.70779635317295828e-4) * u +     \
+	      0.00773672528313526668) * u + 0.00383335126264887303) * u -     \
+	      0.0127223813782122755)  * u - 0.0133823644533460069)  * u +     \
+	      0.0161315329733252248)  * u + 0.0390976845588484035)  * u +     \
+	      0.00249367200053503304;
+	y = ((((((((((((y * u - 0.0838864557023001992) * u -           \
+	      0.119463959964325415) * u + 0.0166207924969367356) * u + \
+	      0.357524274449531043) * u + 0.805276408752910567)  * u + \
+	      1.18902982909273333)  * u + 1.37040217682338167)   * u + \
+	      1.31314653831023098)  * u + 1.07925515155856677)   * u + \
+	      0.774368199119538609) * u + 0.490165080585318424)  * u + \
+	      0.275374741597376782) * t;
+
+	y [arg < 0] = 2 * np.exp (arg [arg < 0]**2) - y [arg < 0];
+
+	## Put the results back into something with the same size is the original input
+	result [idx] = y;
+	result [idx_neg] = np.inf;
+	## result (idx_max) = 0; # not needed as we initialise with zeros
+	return(result)
+
--- a/GPy/util/ln_diff_erfs.py
+++ b/GPy/util/ln_diff_erfs.py
@ -18,54 +18,93 @@ def ln_diff_erfs(x1, x2, return_sign=False):
    :type x2: ndarray
    :return: tuple containing (log(abs(erf(x1) - erf(x2))), sign(erf(x1) - erf(x2)))
    
-    Based on MATLAB code that was written by Antti Honkela and modified by David Luengo, originally derived from code by Neil Lawrence.
+    Based on MATLAB code that was written by Antti Honkela and modified by David Luengo and originally derived from code by Neil Lawrence.
    """
    x1 = np.require(x1).real
    x2 = np.require(x2).real
-
-    v = np.zeros(np.max((x1.size, x2.size)))
+    if x1.size==1:
+        x1 = np.reshape(x1, (1, 1))
+    if x2.size==1:
+        x2 = np.reshape(x2, (1, 1))
+        
+    if x1.shape==x2.shape:
+        v = np.zeros_like(x1)
+    else:
+        if x1.size==1:
+            v = np.zeros(x2.shape)
+        elif x2.size==1:
+            v = np.zeros(x1.shape)
+        else:
+            raise ValueError, "This function does not broadcast unless provided with a scalar."
    
-    # if numel(x1) == 1:
-    #     x1 = x1 * ones(size(x2))
+    if x1.size == 1:
+        x1 = np.tile(x1, x2.shape)

-    # if numel(x2) == 1:
-    #     x2 = x2 * ones(size(x1))
+    if x2.size == 1:
+        x2 = np.tile(x2, x1.shape)

    sign = np.sign(x1 - x2)
-    I = sign == -1
-    swap = x1[I]
-    x1[I] = x2[I]
-    x2[I] = swap
+    if x1.size == 1:
+        if sign== -1:
+            swap = x1
+            x1 = x2
+            x2 = swap
+    else:
+        I = sign == -1
+        swap = x1[I]
+        x1[I] = x2[I]
+        x2[I] = swap

-    # TODO: switch off log of zero warnings.
-    # Case 1: arguments of different sign, no problems with loss of accuracy
-    I1 = np.logical_or(np.logical_and(x1>0, x2<0), np.logical_and(x2>0, x1<0)) # I1=(x1*x2)<0
-    v[I1] = np.log( erf(x1[I1]) - erf(x2[I1]) )
+    with np.errstate(divide='ignore'):
+        # switch off log of zero warnings.

-    # Case 2: x1 = x2 so we have log of zero.
-    I2 = (x1 == x2)
-    v[I2] = -np.inf
+        # Case 0: arguments of different sign, no problems with loss of accuracy
+        I0 = np.logical_or(np.logical_and(x1>0, x2<0), np.logical_and(x2>0, x1<0)) # I1=(x1*x2)<0

-    # Case 3: Both arguments are non-negative
-    I3 = np.logical_and(x1 > 0, np.logical_and(np.logical_not(I1),
-                                               np.logical_not(I2)))
-    v[I3] = np.log(erfcx(x2[I3])
-                   -erfcx(x1[I3])*np.exp(x2[I3]**2
-                                         -x1[I3]**2)) - x2[I3]**2
-    
-    # Case 4: Both arguments are non-positive
-    I4 = np.logical_and(np.logical_and(np.logical_not(I1),
-                                       np.logical_not(I2)),
-                        np.logical_not(I3))
-    v[I4] = np.log(erfcx(-x1[I4])
-                   -erfcx(-x2[I4])*np.exp(x1[I4]**2
-                                          -x2[I4]**2))-x1[I4]**2
-    
+        # Case 1: x1 = x2 so we have log of zero.
+        I1 = (x1 == x2)
+
+        # Case 2: Both arguments are non-negative
+        I2 = np.logical_and(x1 > 0, np.logical_and(np.logical_not(I0),
+                                                   np.logical_not(I1)))
+        # Case 3: Both arguments are non-positive
+        I3 = np.logical_and(np.logical_and(np.logical_not(I0),
+                                           np.logical_not(I1)),
+                            np.logical_not(I2))
+        _x2 = x2.flatten()
+        _x1 = x1.flatten()
+        for group, flags in zip((0, 1, 2, 3), (I0, I1, I2, I3)):
+
+            if np.any(flags):
+                if not x1.size==1:
+                    _x1 = x1[flags]
+                if not x2.size==1:
+                    _x2 = x2[flags]
+                if group==0:
+                    v[flags] = np.log( erf(_x1) - erf(_x2) )
+                elif group==1:
+                    v[flags] = -np.inf
+                elif group==2:
+                    v[flags] = np.log(erfcx(_x2)
+                                   -erfcx(_x1)*np.exp(_x2**2
+                                                      -_x1**2)) - _x2**2
+                elif group==3:
+                    v[flags] = np.log(erfcx(-_x1)
+                                   -erfcx(-_x2)*np.exp(_x1**2
+                                                          -_x2**2))-_x1**2
+        
    # TODO: switch back on log of zero warnings.

    if return_sign:
        return v, sign
    else:
-        # Need to add in a complex part because argument is negative.
-        v[I] += np.pi*1j
+        if v.size==1:
+            if sign==-1:
+                v = v.view('complex64')
+                v += np.pi*1j
+        else:
+            # Need to add in a complex part because argument is negative.
+            v = v.view('complex64')
+            v[I] += np.pi*1j

+    return v