diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index c01ba815..d12febbb 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -3,10 +3,11 @@ import numpy as np +import pylab as pb from ..core.parameterised import parameterised from kernpart import kernpart import itertools -from product_orthogonal import product_orthogonal +from product_orthogonal import product_orthogonal class kern(parameterised): def __init__(self,D,parts=[], input_slices=None): @@ -386,3 +387,59 @@ class kern(parameterised): #TODO: there are some extra terms to compute here! return target_mu, target_S + + def plot(self, x = None, plot_limits=None,which_functions='all',resolution=None,*args,**kwargs): + if which_functions=='all': + which_functions = [True]*self.Nparts + if self.D == 1: + if x is None: + x = np.zeros((1,1)) + else: + x = np.asarray(x) + assert x.size == 1, "The size of the fixed variable x is not 1" + x = x.reshape((1,1)) + + if plot_limits == None: + xmin, xmax = (x-5).flatten(), (x+5).flatten() + elif len(plot_limits) == 2: + xmin, xmax = plot_limits + else: + raise ValueError, "Bad limits for plotting" + + Xnew = np.linspace(xmin,xmax,resolution or 201)[:,None] + Kx = self.K(Xnew,x,slices2=which_functions) + pb.plot(Xnew,Kx,*args,**kwargs) + pb.xlim(xmin,xmax) + pb.xlabel("x") + pb.ylabel("k(x,%0.1f)" %x) + + elif self.D == 2: + if x is None: + x = np.zeros((1,2)) + else: + x = np.asarray(x) + assert x.size == 2, "The size of the fixed variable x is not 2" + x = x.reshape((1,2)) + + if plot_limits == None: + xmin, xmax = (x-5).flatten(), (x+5).flatten() + elif len(plot_limits) == 2: + xmin, xmax = plot_limits + else: + raise ValueError, "Bad limits for plotting" + + resolution = resolution or 51 + xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution] + xg = np.linspace(xmin[0],xmax[0],resolution) + yg = np.linspace(xmin[1],xmax[1],resolution) + Xnew = np.vstack((xx.flatten(),yy.flatten())).T + Kx = self.K(Xnew,x,slices2=which_functions) + Kx = Kx.reshape(resolution,resolution).T + pb.contour(xg,yg,Kx,vmin=Kx.min(),vmax=Kx.max(),cmap=pb.cm.jet) + pb.xlim(xmin[0],xmax[0]) + pb.ylim(xmin[1],xmax[1]) + pb.xlabel("x1") + pb.ylabel("x2") + pb.title("k(x1,x2 ; %0.1f,%0.1f)" %(x[0,0],x[0,1]) ) + else: + raise NotImplementedError, "Cannot plot a kernel with more than two input dimensions" diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py index 8e2c5d84..c099d0d5 100644 --- a/GPy/models/__init__.py +++ b/GPy/models/__init__.py @@ -9,5 +9,5 @@ from sparse_GP_regression import sparse_GP_regression from GPLVM import GPLVM from warped_GP import warpedGP from sparse_GPLVM import sparse_GPLVM -#from uncollapsed_sparse_GP import uncollapsed_sparse_GP +from uncollapsed_sparse_GP import uncollapsed_sparse_GP from BGPLVM import Bayesian_GPLVM diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py index 95460d87..9e83af3a 100644 --- a/GPy/models/sparse_GP.py +++ b/GPy/models/sparse_GP.py @@ -176,7 +176,11 @@ class sparse_GP(GP): dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.dL_dpsi1.T, self.Z,self.X, self.X_uncertainty) else: #re-cast computations in psi2 back to psi1: - dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1) + #dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1) + if not self.likelihood.is_heteroscedastic: + dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2[0,:,:],self.psi1) + else: + raise NotImplementedError, "TODO" dL_dtheta += self.kern.dK_dtheta(dL_dpsi1,self.Z,self.X) dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X) @@ -192,7 +196,10 @@ class sparse_GP(GP): dL_dZ += 2.*self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # 'stripes' else: #re-cast computations in psi2 back to psi1: - dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1) + if not self.likelihood.is_heteroscedastic: + dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2[0,:,:],self.psi1) + else: + raise NotImplementedError, "TODO" dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X) return dL_dZ diff --git a/GPy/models/uncollapsed_sparse_GP.py b/GPy/models/uncollapsed_sparse_GP.py index 0fccfc71..21d5e5ff 100644 --- a/GPy/models/uncollapsed_sparse_GP.py +++ b/GPy/models/uncollapsed_sparse_GP.py @@ -4,19 +4,17 @@ import numpy as np import pylab as pb from ..util.linalg import mdot, jitchol, chol_inv, pdinv -from ..util.plot import gpplot from .. import kern from ..likelihoods import likelihood -from sparse_GP_regression import sparse_GP_regression +from sparse_GP import sparse_GP -class uncollapsed_sparse_GP(sparse_GP_regression): +class uncollapsed_sparse_GP(sparse_GP): """ Variational sparse GP model (Regression), where the approximating distribution q(u) is represented explicitly :param X: inputs :type X: np.ndarray (N x Q) - :param Y: observed data - :type Y: np.ndarray of observations (N x D) + :param likelihood: GPy likelihood class, containing observed data :param q_u: canonical parameters of the distribution squasehd into a 1D array :type q_u: np.ndarray :param kernel : the kernel/covariance function. See link kernels @@ -24,29 +22,35 @@ class uncollapsed_sparse_GP(sparse_GP_regression): :param Z: inducing inputs (optional, see note) :type Z: np.ndarray (M x Q) | None :param Zslices: slices for the inducing inputs (see slicing TODO: link) - :param M : Number of inducing points (optional, default 10. Ignored if Z is not None) - :type M: int - :param beta: noise precision. TODO> ignore beta if doing EP - :type beta: float - :param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales) - :type normalize_(X|Y): bool + :param normalize_X : whether to normalize the data before computing (predictions will be in original scales) + :type normalize_X: bool """ - def __init__(self, X, Y, q_u=None, M=10, *args, **kwargs): + def __init__(self, X, likelihood, kernel, Z, q_u=None, **kwargs): self.D = Y.shape[1] + self.M = kwargs['Z'].shape[0] if q_u is None: - if 'Z' in kwargs.keys(): - print kwargs['Z'] - self.M = kwargs['Z'].shape[0] - print self.M - else: - self.M = M q_u = np.hstack((np.random.randn(self.M*self.D),-0.5*np.eye(self.M).flatten())) self.set_vb_param(q_u) - sparse_GP_regression.__init__(self, X, Y, M=self.M,*args, **kwargs) + sparse_GP.__init__(self, X, likelihood, kernel, Z, **kwargs) def _computations(self): - self.V = self.beta*self.Y + # kernel computations, using BGPLVM notation + self.Kmm = self.kern.K(self.Z) + if self.has_uncertain_inputs: + raise NotImplementedError + else: + self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices) + self.psi1 = self.kern.K(self.Z,self.X) + if self.likelihood.is_heteroscedastic: + raise NotImplementedError + else: + tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf) + self.psi2_beta_scaled = np.dot(tmp,tmp.T) + self.psi2 = self.psi1.T[:,:,None]*self.psi1.T[:,None,:] + + + self.V = self.likelihood.precision*self.Y self.VmT = np.dot(self.V,self.q_u_expectation[0].T) self.psi1V = np.dot(self.psi1, self.V) self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T) @@ -68,6 +72,15 @@ class uncollapsed_sparse_GP(sparse_GP_regression): tmp += self.D*(-self.beta*self.psi2 - self.Kmm + self.q_u_expectation[1]) self.dL_dKmm = 0.5*mdot(self.Kmmi,tmp,self.Kmmi) + #Compute the gradient of the log likelihood wrt noise variance + #TODO: suport heteroscedatic noise + dbeta = 0.5 * self.N*self.D/self.beta + dbeta += - 0.5 * self.D * self.trace_K + dbeta += - 0.5 * self.D * np.sum(self.q_u_expectation[1]*mdot(self.Kmmi,self.psi2,self.Kmmi)) + dbeta += - 0.5 * self.trYYT + dbeta += np.sum(np.dot(self.Y.T,self.projected_mean)) + self.partial_for_likelihood = -dbeta*self.likelihood.precision**2 + def log_likelihood(self): """ Compute the (lower bound on the) log marginal likelihood @@ -79,19 +92,6 @@ class uncollapsed_sparse_GP(sparse_GP_regression): E = np.sum(np.dot(self.V.T,self.projected_mean)) return A+B+C+D+E - def dL_dbeta(self): - """ - Compute the gradient of the log likelihood wrt beta. - TODO: suport heteroscedatic noise - """ - dA_dbeta = 0.5 * self.N*self.D/self.beta - dB_dbeta = - 0.5 * self.D * self.trace_K - dC_dbeta = - 0.5 * self.D * np.sum(self.q_u_expectation[1]*mdot(self.Kmmi,self.psi2,self.Kmmi)) - dD_dbeta = - 0.5 * self.trYYT - dE_dbeta = np.sum(np.dot(self.Y.T,self.projected_mean)) - - return np.squeeze(dA_dbeta + dB_dbeta + dC_dbeta + dD_dbeta + dE_dbeta) - def _raw_predict(self, Xnew, slices,full_cov=False): """Internal helper function for making predictions, does not account for normalisation""" Kx = self.kern.K(Xnew,self.Z)