From 6da0feb7fcc50e2527e2453f7478115c42055401 Mon Sep 17 00:00:00 2001 From: Nicolo Fusi Date: Wed, 13 Mar 2013 14:24:10 +0000 Subject: [PATCH 01/21] added fixed effect kernel --- GPy/kern/__init__.py | 2 +- GPy/kern/constructors.py | 14 +++++++++++++ GPy/kern/fixed.py | 42 +++++++++++++++++++++++++++++++++++++ GPy/testing/kernel_tests.py | 11 ++++++++++ 4 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 GPy/kern/fixed.py diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 6852384c..5d8a7d15 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -2,5 +2,5 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) -from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic +from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed from kern import kern diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py index 983674b0..ec34242b 100644 --- a/GPy/kern/constructors.py +++ b/GPy/kern/constructors.py @@ -12,6 +12,7 @@ from exponential import exponential as exponentialpart from Matern32 import Matern32 as Matern32part from Matern52 import Matern52 as Matern52part from bias import bias as biaspart +from fixed import fixed as fixedpart from finite_dimensional import finite_dimensional as finite_dimensionalpart from spline import spline as splinepart from Brownian import Brownian as Brownianpart @@ -296,3 +297,16 @@ def rational_quadratic(D,variance=1., lengthscale=1., power=1.): """ part = rational_quadraticpart(D,variance, lengthscale, power) return kern(D, [part]) + +def fixed(D, K, variance=1.): + """ + Construct a fixed effect kernel. + + Arguments + --------- + D (int), obligatory + K (np.array), obligatory + variance (float) + """ + part = fixedpart(D, K, variance) + return kern(D, [part]) diff --git a/GPy/kern/fixed.py b/GPy/kern/fixed.py new file mode 100644 index 00000000..8732ec8f --- /dev/null +++ b/GPy/kern/fixed.py @@ -0,0 +1,42 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +from kernpart import kernpart +import numpy as np +import hashlib + +class fixed(kernpart): + def __init__(self,D,K,variance=1.): + """ + :param D: the number of input dimensions + :type D: int + :param variance: the variance of the kernel + :type variance: float + """ + self.D = D + self.fixed_K = K + self.Nparam = 1 + self.name = 'fixed' + self._set_params(np.array([variance]).flatten()) + + def _get_params(self): + return self.variance + + def _set_params(self,x): + assert x.shape==(1,) + self.variance = x + + def _get_param_names(self): + return ['variance'] + + def K(self,X,X2,target): + target += self.variance * self.fixed_K + + def dK_dtheta(self,partial,X,X2,target): + target += (partial * self.fixed_K).sum() + + def dK_dX(self, partial,X, X2, target): + pass + + def dKdiag_dX(self,partial,X,target): + pass diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index f1762db8..0f6d8772 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -15,6 +15,17 @@ class KernelTests(unittest.TestCase): m = GPy.models.GP_regression(X,Y,K) self.assertTrue(m.checkgrad()) + def test_fixedkernel(self): + """ + Fixed effect kernel test + """ + X = np.random.rand(30, 4) + K = np.dot(X, X.T) + kernel = GPy.kern.fixed(4, K) + Y = np.ones((30,1)) + m = GPy.models.GP_regression(X,Y,kernel=kernel) + self.assertTrue(m.checkgrad()) + def test_coregionalisation(self): X1 = np.random.rand(50,1)*8 X2 = np.random.rand(30,1)*5 From 0c0c7b000c3eae21a48d91fd70ab8a1f697fc22e Mon Sep 17 00:00:00 2001 From: Ricardo Andrade Date: Tue, 19 Mar 2013 19:08:25 +0000 Subject: [PATCH 02/21] Generalized FITC is back --- GPy/models/__init__.py | 1 + GPy/models/generalized_FITC.py | 203 +++++++++++++++++++++++++++++++++ 2 files changed, 204 insertions(+) create mode 100644 GPy/models/generalized_FITC.py diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py index 22aa803c..f442dc67 100644 --- a/GPy/models/__init__.py +++ b/GPy/models/__init__.py @@ -11,3 +11,4 @@ from warped_GP import warpedGP from sparse_GPLVM import sparse_GPLVM from uncollapsed_sparse_GP import uncollapsed_sparse_GP from Bayesian_GPLVM import Bayesian_GPLVM +from generalized_FITC import generalized_FITC diff --git a/GPy/models/generalized_FITC.py b/GPy/models/generalized_FITC.py new file mode 100644 index 00000000..739059a0 --- /dev/null +++ b/GPy/models/generalized_FITC.py @@ -0,0 +1,203 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import numpy as np +import pylab as pb +from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot +from ..util.plot import gpplot +from .. import kern +from scipy import stats, linalg +from sparse_GP import sparse_GP + +class generalized_FITC(sparse_GP): + """ + Naish-Guzman, A. and Holden, S. (2008) implemantation of EP with FITC. + + :param X: inputs + :type X: np.ndarray (N x Q) + :param likelihood: a likelihood instance, containing the observed data + :type likelihood: GPy.likelihood.(Gaussian | EP) + :param kernel : the kernel/covariance function. See link kernels + :type kernel: a GPy kernel + :param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance) + :type X_uncertainty: np.ndarray (N x Q) | None + :param Z: inducing inputs (optional, see note) + :type Z: np.ndarray (M x Q) | None + :param Zslices: slices for the inducing inputs (see slicing TODO: link) + :param M : Number of inducing points (optional, default 10. Ignored if Z is not None) + :type M: int + :param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales) + :type normalize_(X|Y): bool + """ + + def __init__(self, X, likelihood, kernel, Z, X_uncertainty=None, Xslices=None,Zslices=None, normalize_X=False): + + if type(Z) == int: + #FIXME + self.M = Z + self.Z = (np.random.random_sample(self.D*self.M)*(self.X.max()-self.X.min())+self.X.min()).reshape(self.M,-1) + elif type(Z) == np.ndarray: + self.Z = Z + self.M = self.Z.shape[0] + + self._precision = likelihood.precision + + sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_uncertainty=None, Xslices=None,Zslices=None, normalize_X=False) + + def _set_params(self, p): + self.Z = p[:self.M*self.Q].reshape(self.M, self.Q) + self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam]) + self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:]) + self._compute_kernel_matrices() + self._computations() + self._FITC_computations() + + def update_likelihood_approximation(self): + """ + Approximates a non-gaussian likelihood using Expectation Propagation + + For a Gaussian (or direct: TODO) likelihood, no iteration is required: + this function does nothing + """ + if self.has_uncertain_inputs: + raise NotImplementedError, "FITC approximation not implemented for uncertain inputs" + else: + self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0) + self._precision = self.likelihood.precision # Save the true precision + self.likelihood.precision = self.likelihood.precision/(1. + self.likelihood.precision*self.Diag0[:,None]) # Add the diagonal element of the FITC approximation + self._set_params(self._get_params()) # update the GP + + + def _FITC_computations(self): + """ + FITC approximation doesn't have the correction term in the log-likelihood bound, + but adds a diagonal term to the covariance matrix. + This function: + - computes the diagonal term + - eliminates the extra terms computed in the sparse_GP approximation + - computes the likelihood gradients wrt the true precision. + """ + if self.likelihood.is_heteroscedastic: + # Compute generalized FITC's diagonal term of the covariance + self.Qnn = mdot(self.psi1.T,self.Kmmi,self.psi1) + self.Diag0 = self.psi0 - np.diag(self.Qnn) + + self.Diag = self.Diag0/(1.+ self.Diag0 * self._precision.flatten()) + self.P = (self.Diag / self.Diag0)[:,None] * self.psi1.T + self.RPT0 = np.dot(self.Lmi,self.psi1) + self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - self.Diag/(self.Diag0**2))[:,None]*self.RPT0.T)) + self.R,info = linalg.flapack.dtrtrs(self.L,self.Lmi,lower=1) + self.RPT = np.dot(self.R,self.P.T) + self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT) + self.w = self.Diag * self.likelihood.v_tilde + self.gamma = np.dot(self.R.T, np.dot(self.RPT,self.likelihood.v_tilde)) + self.mu = self.w + np.dot(self.P,self.gamma) + self.mu_tilde = (self.likelihood.v_tilde/self.likelihood.tau_tilde)[:,None] + + # Remove extra term from dL_dpsi1 + self.dL_dpsi1 += -mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB + else: + # Remove extra term from dL_dpsi1 + self.dL_dpsi1 += -mdot(self.Kmmi,self.psi1*self.likelihood.precision) #dB + + + sf = self.scale_factor + sf2 = sf**2 + + # Remove extra term from dL_dKmm + self.dL_dKmm += 0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB + self.dL_dpsi0 = None + + #the partial derivative vector for the likelihood + if self.likelihood.Nparams == 0: + #save computation here. + self.partial_for_likelihood = None + elif self.likelihood.is_heteroscedastic: + raise NotImplementedError, "heteroscedatic derivates not implemented" + else: + #likelihood is not heterscedatic + self.partial_for_likelihood = - 0.5 * self.N*self.D*self._precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self._precision**2 + self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self._precision + self.partial_for_likelihood += self._precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1)) + + #TODO partial derivative vector for the likelihood not implemented + #NOTE the true precison is now '_precison' not 'precision' + + def dL_dtheta(self): + """ + Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel + """ + dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm,self.Z) + if self.has_uncertain_inputs: + raise NotImplementedError, "heteroscedatic derivates not implemented" + else: + #NOTE in sparse_GP this would include the gradient wrt psi0 + dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1,self.Z,self.X) + return dL_dtheta + + + def log_likelihood(self): + """ Compute the (lower bound on the) log marginal likelihood """ + sf2 = self.scale_factor**2 + if self.likelihood.is_heteroscedastic: + A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y) + else: + A = -0.5*self.N*self.D*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT + C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2)) + D = 0.5*np.trace(self.Cpsi1VVpsi1) + return A+C+D + + def _raw_predict(self, Xnew, slices, full_cov=True): + if self.likelihood.is_heteroscedastic: + """ + Make a prediction for the vsGP model + + Arguments + --------- + X : Input prediction data - Nx1 numpy array (floats) + """ + Kx = self.kern.K(self.Z, Xnew) + if full_cov: + Kxx = self.kern.K(Xnew) + else: + Kxx = self.kern.K(Xnew)#FIXME + #raise NotImplementedError + #Kxx = self.kern.Kdiag(Xnew) + + # q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T) + + # Ci = I + (RPT0)Di(RPT0).T + # C = I - [RPT0] * (D+[RPT0].T*[RPT0])^-1*[RPT0].T + # = I - [RPT0] * (D + self.Qnn)^-1 * [RPT0].T + # = I - [RPT0] * (U*U.T)^-1 * [RPT0].T + # = I - V.T * V + U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn) + V,info = linalg.flapack.dtrtrs(U,self.RPT0.T,lower=1) + C = np.eye(self.M) - np.dot(V.T,V) + mu_u = np.dot(C,self.RPT0)*(1./self.Diag0[None,:]) + #self.C = C + #self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T + #self.mu_u = mu_u + #self.U = U + # q(u|y) = N(u| R0i*mu_H,R0i*Sigma_H*R0i.T) + mu_H = np.dot(mu_u,self.mu) + self.mu_H = mu_H + Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T)) + # q(f_star|y) = N(f_star|mu_star,sigma2_star) + KR0T = np.dot(Kx.T,self.Lmi.T) + mu_star = np.dot(KR0T,mu_H) + sigma2_star = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T)) + vdiag = np.diag(sigma2_star) + return mu_star[:,None],vdiag[:,None] + else: + """Internal helper function for making predictions, does not account for normalization""" + Kx = self.kern.K(self.Z, Xnew) + mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V) + if full_cov: + Kxx = self.kern.K(Xnew) + var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting + else: + Kxx = self.kern.Kdiag(Xnew) + var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0) + + return mu,var[:,None] From a375cb9a1b72b9ce8b7a58a52ffc51373db6eb52 Mon Sep 17 00:00:00 2001 From: Ricardo Andrade Date: Fri, 22 Mar 2013 16:04:51 +0000 Subject: [PATCH 03/21] generalized fitc + examples --- GPy/examples/fitc_class_01.py | 53 +++++++++++++++++++++ GPy/examples/fitc_class_02.py | 66 ++++++++++++++++++++++++++ GPy/examples/fitc_regr_01.py | 53 +++++++++++++++++++++ GPy/models/generalized_FITC.py | 84 ++++++++++++++++++---------------- 4 files changed, 216 insertions(+), 40 deletions(-) create mode 100644 GPy/examples/fitc_class_01.py create mode 100644 GPy/examples/fitc_class_02.py create mode 100644 GPy/examples/fitc_regr_01.py diff --git a/GPy/examples/fitc_class_01.py b/GPy/examples/fitc_class_01.py new file mode 100644 index 00000000..413d0c14 --- /dev/null +++ b/GPy/examples/fitc_class_01.py @@ -0,0 +1,53 @@ +import pylab as pb +import numpy as np +import GPy +pb.ion() +pb.close('all') + +""" +Simple 1D classification example +:param seed : seed value for data generation (default is 4). +:type seed: int +""" +seed=10000 + +data = GPy.util.datasets.toy_linear_1d_classification(seed=seed) +X = data['X'] +Y = data['Y'][:, 0:1] +Y[Y == -1] = 0 + + + +#X = np.vstack((np.random.uniform(0,10,(10,1)),np.random.uniform(7,17,(10,1)),np.random.uniform(15,25,(10,1)))) +#Y = np.vstack((np.zeros((10,1)),np.ones((10,1)),np.zeros((10,1)))) + +# Kernel object +kernel = GPy.kern.rbf(1) + GPy.kern.white(1) + +# Likelihood object +distribution = GPy.likelihoods.likelihood_functions.probit() +likelihood = GPy.likelihoods.EP(Y,distribution) + +Z = np.random.uniform(X.min(),X.max(),(10,1)) +#Z = np.array([0,20])[:,None] +print Z + +# Model definition +m = GPy.models.generalized_FITC(X,likelihood=likelihood,kernel=kernel,Z=Z,normalize_X=False) +m.set('len',2.) + +m.ensure_default_constraints() +# Optimize +#m.constrain_fixed('iip') +m.update_likelihood_approximation() +print m.checkgrad(verbose=1) +# Parameters optimization: +m.optimize() +#m.EPEM() #FIXME + +# Plot +pb.subplot(211) +m.plot_f() +pb.subplot(212) +m.plot() +print(m) diff --git a/GPy/examples/fitc_class_02.py b/GPy/examples/fitc_class_02.py new file mode 100644 index 00000000..cd8d6700 --- /dev/null +++ b/GPy/examples/fitc_class_02.py @@ -0,0 +1,66 @@ +import pylab as pb +import numpy as np +import GPy +pb.close('all') + +seed=10000 +"""Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood. + +:param model_type: type of model to fit ['Full', 'FITC', 'DTC']. +:param seed : seed value for data generation. +:type seed: int +:param inducing : number of inducing variables (only used for 'FITC' or 'DTC'). +:type inducing: int +""" + +data = GPy.util.datasets.crescent_data(seed=seed) + +# Kernel object +kernel = GPy.kern.rbf(data['X'].shape[1]) + GPy.kern.white(data['X'].shape[1]) + +# Likelihood object +distribution = GPy.likelihoods.likelihood_functions.probit() +likelihood = GPy.likelihoods.EP(data['Y'],distribution) + +sample = np.random.randint(0,data['X'].shape[0],10) +Z = data['X'][sample,:] +# create sparse GP EP model +#m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z) +m = GPy.models.generalized_FITC(data['X'],likelihood=likelihood,kernel=kernel,Z=Z) +m.ensure_default_constraints() +m.set('len',10.) + +m.update_likelihood_approximation() + +# optimize +m.optimize() +print(m) + +# plot +m.plot() +fitc = m + +pb.figure() +# Kernel object +kernel = GPy.kern.rbf(data['X'].shape[1]) + GPy.kern.white(data['X'].shape[1]) + +# Likelihood object +distribution = GPy.likelihoods.likelihood_functions.probit() +likelihood = GPy.likelihoods.EP(data['Y'],distribution) + +sample = np.random.randint(0,data['X'].shape[0],10) +Z = data['X'][sample,:] +# create sparse GP EP model +m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z) +m.ensure_default_constraints() +m.set('len',10.) + +m.update_likelihood_approximation() + +# optimize +m.optimize() +print(m) + +# plot +m.plot() +variational = m diff --git a/GPy/examples/fitc_regr_01.py b/GPy/examples/fitc_regr_01.py new file mode 100644 index 00000000..815e31a7 --- /dev/null +++ b/GPy/examples/fitc_regr_01.py @@ -0,0 +1,53 @@ +import pylab as pb +import numpy as np +import GPy +pb.ion() +pb.close('all') + +N = 400 +M = 10 +# sample inputs and outputs +X = np.random.uniform(-3.,3.,(N,1)) +Y = np.sin(X)+np.random.randn(N,1)*0.05 + +"""Run a 1D example of a sparse GP regression.""" +""" +rbf = GPy.kern.rbf(1) +noise = GPy.kern.white(1) +kernel = rbf + noise +Z = np.random.uniform(-3,3,(M,1)) +likelihood = GPy.likelihoods.Gaussian(Y) +m = GPy.models.sparse_GP(X, likelihood, kernel, Z) +m.scale_factor=10000 +m.constrain_positive('(variance|lengthscale|precision)') +m.checkgrad(verbose=1) +m.optimize('tnc', messages = 1) +pb.figure() +m.plot() + +variational = m +""" + +# construct kernel +rbf = GPy.kern.rbf(1) +noise = GPy.kern.white(1) +kernel = rbf + noise +#Z = np.random.uniform(-3,3,(M,1)) +Z = variational.Z +likelihood = GPy.likelihoods.Gaussian(Y) +# create simple GP model +m = GPy.models.generalized_FITC(X, likelihood, kernel, Z=Z) +m.constrain_positive('(variance|lengthscale|precision)') +#m.constrain_fixed('iip') +m.checkgrad(verbose=1) +m.optimize('tnc', messages = 1) +#pb.figure() +#m.plot() +""" +Xnew = X.copy().flatten() +Xnew.sort() +Xnew = Xnew[:,None] +mean,var,low,up = m.predict(Xnew) +GPy.util.plot.gpplot(Xnew,mean,low,up) +fitc = m +""" diff --git a/GPy/models/generalized_FITC.py b/GPy/models/generalized_FITC.py index 739059a0..505f442a 100644 --- a/GPy/models/generalized_FITC.py +++ b/GPy/models/generalized_FITC.py @@ -32,14 +32,8 @@ class generalized_FITC(sparse_GP): def __init__(self, X, likelihood, kernel, Z, X_uncertainty=None, Xslices=None,Zslices=None, normalize_X=False): - if type(Z) == int: - #FIXME - self.M = Z - self.Z = (np.random.random_sample(self.D*self.M)*(self.X.max()-self.X.min())+self.X.min()).reshape(self.M,-1) - elif type(Z) == np.ndarray: - self.Z = Z - self.M = self.Z.shape[0] - + self.Z = Z + self.M = self.Z.shape[0] self._precision = likelihood.precision sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_uncertainty=None, Xslices=None,Zslices=None, normalize_X=False) @@ -64,25 +58,26 @@ class generalized_FITC(sparse_GP): else: self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0) self._precision = self.likelihood.precision # Save the true precision - self.likelihood.precision = self.likelihood.precision/(1. + self.likelihood.precision*self.Diag0[:,None]) # Add the diagonal element of the FITC approximation + self.likelihood.precision = self._precision/(1. + self._precision*self.Diag0[:,None]) # Add the diagonal element of the FITC approximation self._set_params(self._get_params()) # update the GP - def _FITC_computations(self): """ FITC approximation doesn't have the correction term in the log-likelihood bound, - but adds a diagonal term to the covariance matrix. + but adds a diagonal term to the covariance matrix: diag(Knn - Qnn). This function: - - computes the diagonal term - - eliminates the extra terms computed in the sparse_GP approximation + - computes the FITC diagonal term + - removes the extra terms computed in the sparse_GP approximation - computes the likelihood gradients wrt the true precision. """ + #NOTE the true precison is now '_precison' not 'precision' if self.likelihood.is_heteroscedastic: + # Compute generalized FITC's diagonal term of the covariance self.Qnn = mdot(self.psi1.T,self.Kmmi,self.psi1) self.Diag0 = self.psi0 - np.diag(self.Qnn) - self.Diag = self.Diag0/(1.+ self.Diag0 * self._precision.flatten()) + self.P = (self.Diag / self.Diag0)[:,None] * self.psi1.T self.RPT0 = np.dot(self.Lmi,self.psi1) self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - self.Diag/(self.Diag0**2))[:,None]*self.RPT0.T)) @@ -92,14 +87,13 @@ class generalized_FITC(sparse_GP): self.w = self.Diag * self.likelihood.v_tilde self.gamma = np.dot(self.R.T, np.dot(self.RPT,self.likelihood.v_tilde)) self.mu = self.w + np.dot(self.P,self.gamma) - self.mu_tilde = (self.likelihood.v_tilde/self.likelihood.tau_tilde)[:,None] # Remove extra term from dL_dpsi1 self.dL_dpsi1 += -mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB else: + raise NotImplementedError, "homoscedastic fitc not implemented" # Remove extra term from dL_dpsi1 - self.dL_dpsi1 += -mdot(self.Kmmi,self.psi1*self.likelihood.precision) #dB - + #self.dL_dpsi1 += -mdot(self.Kmmi,self.psi1*self.likelihood.precision) #dB sf = self.scale_factor sf2 = sf**2 @@ -110,18 +104,16 @@ class generalized_FITC(sparse_GP): #the partial derivative vector for the likelihood if self.likelihood.Nparams == 0: - #save computation here. self.partial_for_likelihood = None elif self.likelihood.is_heteroscedastic: - raise NotImplementedError, "heteroscedatic derivates not implemented" + raise NotImplementedError, "heteroscedastic derivates not implemented" else: + raise NotImplementedError, "homoscedastic derivatives not implemented" #likelihood is not heterscedatic - self.partial_for_likelihood = - 0.5 * self.N*self.D*self._precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self._precision**2 - self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self._precision - self.partial_for_likelihood += self._precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1)) - + #self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self.likelihood.precision**2 + #self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision + #self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1)) #TODO partial derivative vector for the likelihood not implemented - #NOTE the true precison is now '_precison' not 'precision' def dL_dtheta(self): """ @@ -147,23 +139,15 @@ class generalized_FITC(sparse_GP): D = 0.5*np.trace(self.Cpsi1VVpsi1) return A+C+D - def _raw_predict(self, Xnew, slices, full_cov=True): + def _raw_predict(self, Xnew, slices, full_cov=False): if self.likelihood.is_heteroscedastic: """ - Make a prediction for the vsGP model + Make a prediction for the generalized FITC model Arguments --------- X : Input prediction data - Nx1 numpy array (floats) """ - Kx = self.kern.K(self.Z, Xnew) - if full_cov: - Kxx = self.kern.K(Xnew) - else: - Kxx = self.kern.K(Xnew)#FIXME - #raise NotImplementedError - #Kxx = self.kern.Kdiag(Xnew) - # q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T) # Ci = I + (RPT0)Di(RPT0).T @@ -184,13 +168,19 @@ class generalized_FITC(sparse_GP): self.mu_H = mu_H Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T)) # q(f_star|y) = N(f_star|mu_star,sigma2_star) + Kx = self.kern.K(self.Z, Xnew) KR0T = np.dot(Kx.T,self.Lmi.T) mu_star = np.dot(KR0T,mu_H) - sigma2_star = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T)) - vdiag = np.diag(sigma2_star) - return mu_star[:,None],vdiag[:,None] - else: - """Internal helper function for making predictions, does not account for normalization""" + if full_cov: + Kxx = self.kern.K(Xnew) + var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T)) + else: + Kxx = self.kern.Kdiag(Xnew) + Kxx_ = self.kern.K(Xnew) + var_ = Kxx_ + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T)) + var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.M),KR0T.T),0))[:,None] + return mu_star[:,None],var + """ Kx = self.kern.K(self.Z, Xnew) mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V) if full_cov: @@ -199,5 +189,19 @@ class generalized_FITC(sparse_GP): else: Kxx = self.kern.Kdiag(Xnew) var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0) - + a = kjk return mu,var[:,None] + """ + else: + raise NotImplementedError, "homoscedastic fitc not implemented" + """ + Kx = self.kern.K(self.Z, Xnew) + mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V) + if full_cov: + Kxx = self.kern.K(Xnew) + var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting + else: + Kxx = self.kern.Kdiag(Xnew) + var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0) + return mu,var[:,None] + """ From 35a099f4edfa1605e2b02d943438dba1b6454413 Mon Sep 17 00:00:00 2001 From: Ricardo Andrade Date: Fri, 22 Mar 2013 19:03:46 +0000 Subject: [PATCH 04/21] minor changes --- GPy/examples/fitc_class_01.py | 4 ++-- GPy/examples/fitc_class_02.py | 15 ++++++++------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/GPy/examples/fitc_class_01.py b/GPy/examples/fitc_class_01.py index 413d0c14..06c948b5 100644 --- a/GPy/examples/fitc_class_01.py +++ b/GPy/examples/fitc_class_01.py @@ -42,8 +42,8 @@ m.ensure_default_constraints() m.update_likelihood_approximation() print m.checkgrad(verbose=1) # Parameters optimization: -m.optimize() -#m.EPEM() #FIXME +#m.optimize() +m.pseudo_EM() #FIXME # Plot pb.subplot(211) diff --git a/GPy/examples/fitc_class_02.py b/GPy/examples/fitc_class_02.py index cd8d6700..71e10081 100644 --- a/GPy/examples/fitc_class_02.py +++ b/GPy/examples/fitc_class_02.py @@ -25,15 +25,15 @@ likelihood = GPy.likelihoods.EP(data['Y'],distribution) sample = np.random.randint(0,data['X'].shape[0],10) Z = data['X'][sample,:] # create sparse GP EP model -#m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z) m = GPy.models.generalized_FITC(data['X'],likelihood=likelihood,kernel=kernel,Z=Z) m.ensure_default_constraints() m.set('len',10.) -m.update_likelihood_approximation() +#m.update_likelihood_approximation() # optimize -m.optimize() +#m.optimize() +m.pseudo_EM() print(m) # plot @@ -48,17 +48,18 @@ kernel = GPy.kern.rbf(data['X'].shape[1]) + GPy.kern.white(data['X'].shape[1]) distribution = GPy.likelihoods.likelihood_functions.probit() likelihood = GPy.likelihoods.EP(data['Y'],distribution) -sample = np.random.randint(0,data['X'].shape[0],10) -Z = data['X'][sample,:] +#sample = np.random.randint(0,data['X'].shape[0],10) +#Z = data['X'][sample,:] # create sparse GP EP model m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z) m.ensure_default_constraints() m.set('len',10.) -m.update_likelihood_approximation() +#m.update_likelihood_approximation() # optimize -m.optimize() +#m.optimize() +m.pseudo_EM() print(m) # plot From c72d2b99d99e8d97d827def1d3e180fc6d022165 Mon Sep 17 00:00:00 2001 From: Ricardo Andrade Date: Wed, 27 Mar 2013 13:52:51 +0000 Subject: [PATCH 05/21] print iteration number --- GPy/core/model.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/GPy/core/model.py b/GPy/core/model.py index cc8ae2c4..0804f277 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -467,3 +467,5 @@ class model(parameterised): if ll_change < epsilon: stop = True iteration += 1 + if stop: + print "%s iterations." %iteration From adfa6de1d87baced85215985da9ed2ce671c2a93 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Wed, 27 Mar 2013 15:03:46 +0000 Subject: [PATCH 06/21] rbf now works in a more memory friendly fashion --- GPy/examples/regression.py | 2 +- GPy/kern/rbf.py | 30 +++++++++++++++--------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py index f5d0d3b1..7de95d20 100644 --- a/GPy/examples/regression.py +++ b/GPy/examples/regression.py @@ -66,7 +66,7 @@ def silhouette(): # optimize m.ensure_default_constraints() - m.optimize() + m.optimize(messages=True) print(m) return m diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py index ae587202..25e18a41 100644 --- a/GPy/kern/rbf.py +++ b/GPy/kern/rbf.py @@ -85,12 +85,10 @@ class rbf(kernpart): def dK_dtheta(self,dL_dK,X,X2,target): self._K_computations(X,X2) target[0] += np.sum(self._K_dvar*dL_dK) - if self.ARD == True: - dl = self._K_dvar[:,:,None]*self.variance*self._K_dist2/self.lengthscale - target[1:] += (dl*dL_dK[:,:,None]).sum(0).sum(0) + if self.ARD: + [np.add(target[1+q:2+q],self.variance/self.lengthscale[q]**3*np.sum(self._K_dvar*dL_dK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.D)] else: - target[1] += np.sum(self._K_dvar*self.variance*(self._K_dist2.sum(-1))/self.lengthscale*dL_dK) - #np.sum(self._K_dvar*self.variance*self._K_dist2/self.lengthscale*dL_dK) + target[1] += np.sum(self._K_dvar*self.variance*self._K_dist2/self.lengthscale*dL_dK) def dKdiag_dtheta(self,dL_dKdiag,X,target): #NB: derivative of diagonal elements wrt lengthscale is 0 @@ -98,7 +96,7 @@ class rbf(kernpart): def dK_dX(self,dL_dK,X,X2,target): self._K_computations(X,X2) - _K_dist = X[:,None,:]-X2[None,:,:] + _K_dist = X[:,None,:]-X2[None,:,:] #don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena. dK_dX = np.transpose(-self.variance*self._K_dvar[:,:,np.newaxis]*_K_dist/self.lengthscale2,(1,0,2)) target += np.sum(dK_dX*dL_dK.T[:,:,None],0) @@ -183,16 +181,18 @@ class rbf(kernpart): #---------------------------------------# def _K_computations(self,X,X2): - if not (np.all(X==self._X) and np.all(X2==self._X2)): - self._X = X - self._X2 = X2 + if not (np.all(X==self._X) and np.all(X2==self._X2) and np.all(self._params == self._get_params())): + self._X = X.copy() + self._X2 = X2.copy() + self._params == self._get_params().copy() if X2 is None: X2 = X - self._K_dist = X[:,None,:]-X2[None,:,:] # this can be computationally heavy - self._params = np.empty(shape=(1,0)) #ensure the next section gets called - if not np.all(self._params == self._get_params()): - self._params == self._get_params() - self._K_dist2 = np.square(self._K_dist/self.lengthscale) - self._K_dvar = np.exp(-0.5*self._K_dist2.sum(-1)) + #never do this: self._K_dist = X[:,None,:]-X2[None,:,:] # this can be computationally heavy + #_K_dist = X[:,None,:]-X2[None,:,:] + #_K_dist2 = np.square(_K_dist/self.lengthscale) + X = X/self.lengthscale + X2 = X2/self.lengthscale + self._K_dist2 = (-2.*np.dot(X, X2.T) + np.sum(np.square(X),1)[:,None] + np.sum(np.square(X2),1)[None,:]) + self._K_dvar = np.exp(-0.5*self._K_dist2) def _psi_computations(self,Z,mu,S): #here are the "statistics" for psi1 and psi2 From 67d3ba6700983f4adf557d7120ab73ee86d0c62f Mon Sep 17 00:00:00 2001 From: Ricardo Andrade Date: Wed, 27 Mar 2013 15:05:50 +0000 Subject: [PATCH 07/21] small changes --- GPy/examples/fitc_class_01.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/GPy/examples/fitc_class_01.py b/GPy/examples/fitc_class_01.py index 06c948b5..38c9da63 100644 --- a/GPy/examples/fitc_class_01.py +++ b/GPy/examples/fitc_class_01.py @@ -11,15 +11,15 @@ Simple 1D classification example """ seed=10000 -data = GPy.util.datasets.toy_linear_1d_classification(seed=seed) -X = data['X'] -Y = data['Y'][:, 0:1] -Y[Y == -1] = 0 +#data = GPy.util.datasets.toy_linear_1d_classification(seed=seed) +#X = data['X'] +#Y = data['Y'][:, 0:1] +#Y[Y == -1] = 0 -#X = np.vstack((np.random.uniform(0,10,(10,1)),np.random.uniform(7,17,(10,1)),np.random.uniform(15,25,(10,1)))) -#Y = np.vstack((np.zeros((10,1)),np.ones((10,1)),np.zeros((10,1)))) +X = np.vstack((np.random.uniform(0,10,(10,1)),np.random.uniform(7,17,(10,1)),np.random.uniform(15,25,(10,1)))) +Y = np.vstack((np.zeros((10,1)),np.ones((10,1)),np.zeros((10,1)))) # Kernel object kernel = GPy.kern.rbf(1) + GPy.kern.white(1) From bf9e6cbd5f4680152727e908118418ca37a244db Mon Sep 17 00:00:00 2001 From: Ricardo Andrade Date: Wed, 27 Mar 2013 15:06:21 +0000 Subject: [PATCH 08/21] Not needed --- GPy/examples/fitc_class_01.py | 53 --------------------------- GPy/examples/fitc_class_02.py | 67 ----------------------------------- GPy/examples/fitc_regr_01.py | 53 --------------------------- 3 files changed, 173 deletions(-) delete mode 100644 GPy/examples/fitc_class_01.py delete mode 100644 GPy/examples/fitc_class_02.py delete mode 100644 GPy/examples/fitc_regr_01.py diff --git a/GPy/examples/fitc_class_01.py b/GPy/examples/fitc_class_01.py deleted file mode 100644 index 38c9da63..00000000 --- a/GPy/examples/fitc_class_01.py +++ /dev/null @@ -1,53 +0,0 @@ -import pylab as pb -import numpy as np -import GPy -pb.ion() -pb.close('all') - -""" -Simple 1D classification example -:param seed : seed value for data generation (default is 4). -:type seed: int -""" -seed=10000 - -#data = GPy.util.datasets.toy_linear_1d_classification(seed=seed) -#X = data['X'] -#Y = data['Y'][:, 0:1] -#Y[Y == -1] = 0 - - - -X = np.vstack((np.random.uniform(0,10,(10,1)),np.random.uniform(7,17,(10,1)),np.random.uniform(15,25,(10,1)))) -Y = np.vstack((np.zeros((10,1)),np.ones((10,1)),np.zeros((10,1)))) - -# Kernel object -kernel = GPy.kern.rbf(1) + GPy.kern.white(1) - -# Likelihood object -distribution = GPy.likelihoods.likelihood_functions.probit() -likelihood = GPy.likelihoods.EP(Y,distribution) - -Z = np.random.uniform(X.min(),X.max(),(10,1)) -#Z = np.array([0,20])[:,None] -print Z - -# Model definition -m = GPy.models.generalized_FITC(X,likelihood=likelihood,kernel=kernel,Z=Z,normalize_X=False) -m.set('len',2.) - -m.ensure_default_constraints() -# Optimize -#m.constrain_fixed('iip') -m.update_likelihood_approximation() -print m.checkgrad(verbose=1) -# Parameters optimization: -#m.optimize() -m.pseudo_EM() #FIXME - -# Plot -pb.subplot(211) -m.plot_f() -pb.subplot(212) -m.plot() -print(m) diff --git a/GPy/examples/fitc_class_02.py b/GPy/examples/fitc_class_02.py deleted file mode 100644 index 71e10081..00000000 --- a/GPy/examples/fitc_class_02.py +++ /dev/null @@ -1,67 +0,0 @@ -import pylab as pb -import numpy as np -import GPy -pb.close('all') - -seed=10000 -"""Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood. - -:param model_type: type of model to fit ['Full', 'FITC', 'DTC']. -:param seed : seed value for data generation. -:type seed: int -:param inducing : number of inducing variables (only used for 'FITC' or 'DTC'). -:type inducing: int -""" - -data = GPy.util.datasets.crescent_data(seed=seed) - -# Kernel object -kernel = GPy.kern.rbf(data['X'].shape[1]) + GPy.kern.white(data['X'].shape[1]) - -# Likelihood object -distribution = GPy.likelihoods.likelihood_functions.probit() -likelihood = GPy.likelihoods.EP(data['Y'],distribution) - -sample = np.random.randint(0,data['X'].shape[0],10) -Z = data['X'][sample,:] -# create sparse GP EP model -m = GPy.models.generalized_FITC(data['X'],likelihood=likelihood,kernel=kernel,Z=Z) -m.ensure_default_constraints() -m.set('len',10.) - -#m.update_likelihood_approximation() - -# optimize -#m.optimize() -m.pseudo_EM() -print(m) - -# plot -m.plot() -fitc = m - -pb.figure() -# Kernel object -kernel = GPy.kern.rbf(data['X'].shape[1]) + GPy.kern.white(data['X'].shape[1]) - -# Likelihood object -distribution = GPy.likelihoods.likelihood_functions.probit() -likelihood = GPy.likelihoods.EP(data['Y'],distribution) - -#sample = np.random.randint(0,data['X'].shape[0],10) -#Z = data['X'][sample,:] -# create sparse GP EP model -m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z) -m.ensure_default_constraints() -m.set('len',10.) - -#m.update_likelihood_approximation() - -# optimize -#m.optimize() -m.pseudo_EM() -print(m) - -# plot -m.plot() -variational = m diff --git a/GPy/examples/fitc_regr_01.py b/GPy/examples/fitc_regr_01.py deleted file mode 100644 index 815e31a7..00000000 --- a/GPy/examples/fitc_regr_01.py +++ /dev/null @@ -1,53 +0,0 @@ -import pylab as pb -import numpy as np -import GPy -pb.ion() -pb.close('all') - -N = 400 -M = 10 -# sample inputs and outputs -X = np.random.uniform(-3.,3.,(N,1)) -Y = np.sin(X)+np.random.randn(N,1)*0.05 - -"""Run a 1D example of a sparse GP regression.""" -""" -rbf = GPy.kern.rbf(1) -noise = GPy.kern.white(1) -kernel = rbf + noise -Z = np.random.uniform(-3,3,(M,1)) -likelihood = GPy.likelihoods.Gaussian(Y) -m = GPy.models.sparse_GP(X, likelihood, kernel, Z) -m.scale_factor=10000 -m.constrain_positive('(variance|lengthscale|precision)') -m.checkgrad(verbose=1) -m.optimize('tnc', messages = 1) -pb.figure() -m.plot() - -variational = m -""" - -# construct kernel -rbf = GPy.kern.rbf(1) -noise = GPy.kern.white(1) -kernel = rbf + noise -#Z = np.random.uniform(-3,3,(M,1)) -Z = variational.Z -likelihood = GPy.likelihoods.Gaussian(Y) -# create simple GP model -m = GPy.models.generalized_FITC(X, likelihood, kernel, Z=Z) -m.constrain_positive('(variance|lengthscale|precision)') -#m.constrain_fixed('iip') -m.checkgrad(verbose=1) -m.optimize('tnc', messages = 1) -#pb.figure() -#m.plot() -""" -Xnew = X.copy().flatten() -Xnew.sort() -Xnew = Xnew[:,None] -mean,var,low,up = m.predict(Xnew) -GPy.util.plot.gpplot(Xnew,mean,low,up) -fitc = m -""" From 85810030c544cd06c97ef9712e34c01071759c5a Mon Sep 17 00:00:00 2001 From: James Hensman Date: Wed, 27 Mar 2013 15:08:25 +0000 Subject: [PATCH 09/21] small efficiency changes in rbf --- GPy/kern/rbf.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py index 25e18a41..84f7d68d 100644 --- a/GPy/kern/rbf.py +++ b/GPy/kern/rbf.py @@ -86,9 +86,9 @@ class rbf(kernpart): self._K_computations(X,X2) target[0] += np.sum(self._K_dvar*dL_dK) if self.ARD: - [np.add(target[1+q:2+q],self.variance/self.lengthscale[q]**3*np.sum(self._K_dvar*dL_dK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.D)] + [np.add(target[1+q:2+q],(self.variance/self.lengthscale[q]**3)*np.sum(self._K_dvar*dL_dK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.D)] else: - target[1] += np.sum(self._K_dvar*self.variance*self._K_dist2/self.lengthscale*dL_dK) + target[1] += (self.variance/self.lengthscale)*np.sum(self._K_dvar*self._K_dist2*dL_dK) def dKdiag_dtheta(self,dL_dKdiag,X,target): #NB: derivative of diagonal elements wrt lengthscale is 0 @@ -97,7 +97,7 @@ class rbf(kernpart): def dK_dX(self,dL_dK,X,X2,target): self._K_computations(X,X2) _K_dist = X[:,None,:]-X2[None,:,:] #don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena. - dK_dX = np.transpose(-self.variance*self._K_dvar[:,:,np.newaxis]*_K_dist/self.lengthscale2,(1,0,2)) + dK_dX = (-self.variance/self.lengthscale2)*np.transpose(self._K_dvar[:,:,np.newaxis]*_K_dist,(1,0,2)) target += np.sum(dK_dX*dL_dK.T[:,:,None],0) def dKdiag_dX(self,dL_dKdiag,X,target): From 0f168a16670430af60d9885f9498aec9f9da99c8 Mon Sep 17 00:00:00 2001 From: Neil Lawrence Date: Mon, 1 Apr 2013 11:01:16 +0100 Subject: [PATCH 10/21] Added mocap.py for loading in motion capture data. --- GPy/util/mocap.py | 78 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 GPy/util/mocap.py diff --git a/GPy/util/mocap.py b/GPy/util/mocap.py new file mode 100644 index 00000000..7e4e9e11 --- /dev/null +++ b/GPy/util/mocap.py @@ -0,0 +1,78 @@ +import os +import numpy as np + +def load_text_data(dataset, directory, centre=True): + """Load in a data set of marker points from the Ohio State University C3D motion capture files (http://accad.osu.edu/research/mocap/mocap_data.htm).""" + + points, point_names = parse_text(os.path.join(directory, dataset + '.txt'))[0:2] + # Remove markers where there is a NaN + present_index = [i for i in range(points[0].shape[1]) if not (np.any(np.isnan(points[0][:, i])) or np.any(np.isnan(points[0][:, i])) or np.any(np.isnan(points[0][:, i])))] + + # Concatanate the X, Y and Z markers together + Y = np.concatenate((points[0][:, present_index], points[1][:, present_index], points[2][:, present_index]), axis=1) + if centre: + Y = Y - Y.mean(axis=0) + Y = Y/400. + return Y + +def parse_text(file_name): + """Parse data from Ohio State University text mocap files (http://accad.osu.edu/research/mocap/mocap_data.htm).""" + + # Read the header + fid = open(file_name, 'r') + point_names = np.array(fid.readline().split())[2:-1:3] + fid.close() + for i in range(len(point_names)): + point_names[i] = point_names[i][0:-3] + + # Read the matrix data + S = np.loadtxt(file_name, skiprows=1) + field = np.uint(S[:, 0]) + times = S[:, 1] + S = S[:, 2:] + + # Set the -9999.99 markers to be not present + S[S==-9999.99] = np.NaN + + # Store x, y and z in different arrays + points = [] + points.append(S[:, 0:-1:3]) + points.append(S[:, 1:-1:3]) + points.append(S[:, 2:-1:3]) + + return points, point_names, times + +#def read_connections(): + + +# fid = fopen(fileName); +# i = 1; +# rem = fgets(fid); +# while(rem ~= -1) +# [token, rem] = strtok(rem, ','); +# connections{i, 1} = fliplr(deblank(fliplr(deblank(token)))); +# [token, rem] = strtok(rem, ','); +# connections{i, 2} = fliplr(deblank(fliplr(deblank(token)))); +# i = i + 1; +# rem = fgets(fid); +# end + +# connect = zeros(length(pointNames)); +# fclose(fid); +# for i = 1:size(connections, 1); +# for j = 1:length(pointNames) +# if strcmp(pointNames{j}, connections{i, 1}) | ... +# strcmp(pointNames{j}, connections{i, 2}) +# for k = 1:length(pointNames) +# if k == j +# break +# end +# if strcmp(pointNames{k}, connections{i, 1}) | ... +# strcmp(pointNames{k}, connections{i, 2}) +# connect(j, k) = 1; +# end +# end +# end +# end +# end +# connect = sparse(connect); From 3fd0672092f58551ce48fb739709868d45deb49c Mon Sep 17 00:00:00 2001 From: Neil Lawrence Date: Mon, 1 Apr 2013 20:20:01 +0100 Subject: [PATCH 11/21] Added base implementation of data visualization framework for use with GP-LVM. --- GPy/util/visualize.py | 159 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 GPy/util/visualize.py diff --git a/GPy/util/visualize.py b/GPy/util/visualize.py new file mode 100644 index 00000000..eba89b8a --- /dev/null +++ b/GPy/util/visualize.py @@ -0,0 +1,159 @@ +import matplotlib.pyplot as plt +from mpl_toolkits.mplot3d import Axes3D +import GPy +import numpy as np + +class lvm: + def __init__(self, model, data_visualize, ax): + self.cid = ax.figure.canvas.mpl_connect('button_press_event', self.on_click) + self.cid = ax.figure.canvas.mpl_connect('motion_notify_event', self.on_move) + self.data_visualize = data_visualize + self.model = model + self.ax = ax + self.called = False + self.move_on = False + + def on_click(self, event): + print 'click', event.xdata, event.ydata + if event.inaxes!=self.ax: return + self.move_on = not self.move_on + print + if self.called: + self.xs.append(event.xdata) + self.ys.append(event.ydata) + self.line.set_data(self.xs, self.ys) + self.line.figure.canvas.draw() + else: + self.xs = [event.xdata] + self.ys = [event.ydata] + self.line, = ax.plot(event.xdata, event.ydata) + self.called = True + def on_move(self, event): + if event.inaxes!=self.ax: return + if self.called and self.move_on: + # Call modify code on move + #print 'move', event.xdata, event.ydata + latent_values = np.array((event.xdata, event.ydata)) + y = self.model.predict(latent_values)[0] + self.data_visualize.modify(y) + #print 'y', y + +class data_show: + """The data show class is a base class which describes how to visualize a particular data set. For example, motion capture data can be plotted as a stick figure, or images are shown using imshow. This class enables latent to data visualizations for the GP-LVM.""" + + def __init__(self, vals, axis=None): + self.vals = vals + # If no axes are defined, create some. + if axis==None: + fig = plt.figure() + self.axis = fig.add_subplot(111) + else: + self.axis = axis + + def modify(self, vals): + raise NotImplementedError, "this needs to be implemented to use the data_show class" + +class vector_show(data_show): + """A base visualization class that just shows a data vector as a plot of vector elements alongside their indices.""" + def __init__(self, vals, axis=None): + data_show.__init__(self, vals, axis) + self.vals = vals.T + self.handle = plt.plot(np.arange(0, len(vals))[:, None], self.vals)[0] + + def modify(self, vals): + xdata, ydata = self.handle.get_data() + self.vals = vals.T + self.handle.set_data(xdata, self.vals) + self.axis.figure.canvas.draw() + +class image_show(data_show): + """Show a data vector as an image.""" + def __init__(self, vals, axis=None, dimensions=(16,16), transpose=False, invert=False): + data_show.__init__(self, vals, axis) + self.dimensions = dimensions + self.fig_display = plt.figure() + self.set_image(vals) + self.handle = plt.imshow(self.vals) + self.transpose = transpose + self.invert = invert + + def modify(self, vals): + self.set_image(vals) + self.handle.set_array(self.vals) + self.axis.figure.canvas.draw() + + def set_image(self, vals): + self.vals = np.reshape(vals, self.dimensions) + if self.transpose: + self.vals = self.vals.T + if self.invert: + self.vals = -self.vals + +class stick_show(data_show): + """Show a three dimensional point cloud as a figure. Connect elements of the figure together using the matrix connect.""" + + def __init__(self, vals, axis=None, connect=None): + if axis==None: + fig = plt.figure() + axis = fig.add_subplot(111, projection='3d') + data_show.__init__(self, vals, axis) + self.vals = vals.reshape((3, vals.shape[1]/3)).T + self.x_lim = np.array([self.vals[:, 0].min(), self.vals[:, 0].max()]) + self.y_lim = np.array([self.vals[:, 1].min(), self.vals[:, 1].max()]) + self.z_lim = np.array([self.vals[:, 2].min(), self.vals[:, 2].max()]) + self.points_handle = self.axis.scatter(self.vals[:, 0], self.vals[:, 1], self.vals[:, 2]) + self.axis.set_xlim(self.x_lim) + self.axis.set_ylim(self.y_lim) + self.axis.set_zlim(self.z_lim) + self.axis.set_aspect(1) + self.axis.autoscale(enable=False) + + self.connect = connect + if not self.connect==None: + x = [] + y = [] + z = [] + self.I, self.J = np.nonzero(self.connect) + for i in range(len(self.I)): + x.append(self.vals[self.I[i], 0]) + x.append(self.vals[self.J[i], 0]) + x.append(np.NaN) + y.append(self.vals[self.I[i], 1]) + y.append(self.vals[self.J[i], 1]) + y.append(np.NaN) + z.append(self.vals[self.I[i], 2]) + z.append(self.vals[self.J[i], 2]) + z.append(np.NaN) + self.line_handle = self.axis.plot(np.array(x), np.array(y), np.array(z), 'b-') + self.axis.figure.canvas.draw() + + def modify(self, vals): + self.points_handle.remove() + self.line_handle[0].remove() + self.vals = vals.reshape((3, vals.shape[1]/3)).T + self.points_handle = self.axis.scatter(self.vals[:, 0], self.vals[:, 1], self.vals[:, 2]) + self.axis.set_xlim(self.x_lim) + self.axis.set_ylim(self.y_lim) + self.axis.set_zlim(self.z_lim) + self.line_handle = [] + if not self.connect==None: + x = [] + y = [] + z = [] + self.I, self.J = np.nonzero(self.connect) + for i in range(len(self.I)): + x.append(self.vals[self.I[i], 0]) + x.append(self.vals[self.J[i], 0]) + x.append(np.NaN) + y.append(self.vals[self.I[i], 1]) + y.append(self.vals[self.J[i], 1]) + y.append(np.NaN) + z.append(self.vals[self.I[i], 2]) + z.append(self.vals[self.J[i], 2]) + z.append(np.NaN) + self.line_handle = self.axis.plot(np.array(x), np.array(y), np.array(z), 'b-') + + self.axis.figure.canvas.draw() + + + From fce4dd7fdea42d24abb14396d02bf1bf475e15d7 Mon Sep 17 00:00:00 2001 From: Neil Lawrence Date: Tue, 2 Apr 2013 02:20:53 +0200 Subject: [PATCH 12/21] Further edits on visualization code for faces example. --- GPy/core/model.py | 8 +-- GPy/examples/dimensionality_reduction.py | 50 +++++++++++++++++ GPy/examples/regression.py | 6 +- GPy/models/GPLVM.py | 5 +- GPy/util/__init__.py | 2 + GPy/util/datasets.py | 45 ++++++++++----- GPy/util/linalg.py | 3 +- GPy/util/mocap.py | 70 +++++++++++------------- GPy/util/visualize.py | 42 ++++++++------ 9 files changed, 151 insertions(+), 80 deletions(-) diff --git a/GPy/core/model.py b/GPy/core/model.py index 0804f277..9216aea6 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -23,13 +23,13 @@ class model(parameterised): self._set_params(self._get_params()) self.preferred_optimizer = 'tnc' def _get_params(self): - raise NotImplementedError, "this needs to be implemented to utilise the model class" + raise NotImplementedError, "this needs to be implemented to use the model class" def _set_params(self,x): - raise NotImplementedError, "this needs to be implemented to utilise the model class" + raise NotImplementedError, "this needs to be implemented to use the model class" def log_likelihood(self): - raise NotImplementedError, "this needs to be implemented to utilise the model class" + raise NotImplementedError, "this needs to be implemented to use the model class" def _log_likelihood_gradients(self): - raise NotImplementedError, "this needs to be implemented to utilise the model class" + raise NotImplementedError, "this needs to be implemented to use the model class" def set_prior(self,which,what): """ diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index d7610acb..164d04ae 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -3,6 +3,8 @@ import numpy as np import pylab as pb +from matplotlib import pyplot as plt + import GPy default_seed = np.random.seed(123344) @@ -55,3 +57,51 @@ def GPLVM_oil_100(): print(m) m.plot_latent(labels=data['Y'].argmax(axis=1)) return m + +def oil_100(): + data = GPy.util.datasets.oil_100() + m = GPy.models.GPLVM(data['X'], 2) + + # optimize + m.ensure_default_constraints() + m.optimize(messages=1, max_iters=2) + + # plot + print(m) + #m.plot_latent(labels=data['Y'].argmax(axis=1)) + return m + +def brendan_faces(): + data = GPy.util.datasets.brendan_faces() + Y = data['Y'][0:500, :] + m = GPy.models.GPLVM(Y, 2, init='rand') + + # optimize + m.ensure_default_constraints() + m.optimize(messages=1, max_f_eval=40) + + ax = m.plot_latent() + y = m.likelihood.Y[0,:] + data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False) + lvm_visualizer = GPy.util.visualize.lvm(m, data_show, ax) + raw_input('Press enter to finish') + plt.close('all') + + return m + +def stick(): + data = GPy.util.datasets.stick() + m = GPy.models.GPLVM(data['Y'], 2, init='rand') + + # optimize + m.ensure_default_constraints() + m.optimize(messages=1, max_f_eval=10000) + + ax = m.plot_latent() + y = m.likelihood.Y[0,:] + data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) + lvm_visualizer = GPy.util.visualize.lvm(m, data_show, ax) + raw_input('Press enter to finish') + plt.close('all') + + return m diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py index 7de95d20..1a35df2f 100644 --- a/GPy/examples/regression.py +++ b/GPy/examples/regression.py @@ -73,7 +73,7 @@ def silhouette(): def coregionalisation_toy2(): """ - A simple demonstration of coregionalisation on two sinusoidal functions + A simple demonstration of coregionalisation on two sinusoidal functions. """ X1 = np.random.rand(50,1)*8 X2 = np.random.rand(30,1)*5 @@ -106,7 +106,7 @@ def coregionalisation_toy2(): def coregionalisation_toy(): """ - A simple demonstration of coregionalisation on two sinusoidal functions + A simple demonstration of coregionalisation on two sinusoidal functions. """ X1 = np.random.rand(50,1)*8 X2 = np.random.rand(30,1)*5 @@ -139,7 +139,7 @@ def coregionalisation_toy(): def coregionalisation_sparse(): """ - A simple demonstration of coregionalisation on two sinusoidal functions + A simple demonstration of coregionalisation on two sinusoidal functions using sparse approximations. """ X1 = np.random.rand(500,1)*8 X2 = np.random.rand(300,1)*5 diff --git a/GPy/models/GPLVM.py b/GPy/models/GPLVM.py index 32594594..03e9b715 100644 --- a/GPy/models/GPLVM.py +++ b/GPy/models/GPLVM.py @@ -81,6 +81,7 @@ class GPLVM(GP): k = [p for p in self.kern.parts if p.name in ['rbf','linear']] if (not len(k)==1) or (not k[0].ARD): raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'" + input_1, input_2 = self.lengthscale_order() k = k[0] if k.name=='rbf': input_1, input_2 = np.argsort(k.lengthscale)[:2] @@ -92,7 +93,7 @@ class GPLVM(GP): Xtest_full = np.zeros((Xtest.shape[0], self.X.shape[1])) Xtest_full[:, :2] = Xtest mu, var, low, up = self.predict(Xtest_full) - var = var[:, :2] + var = var.mean(axis=1) # this was var[:, :2] edit by Neil pb.imshow(var.reshape(resolution,resolution).T[::-1,:],extent=[xmin[0],xmax[0],xmin[1],xmax[1]],cmap=pb.cm.binary,interpolation='bilinear') @@ -122,4 +123,4 @@ class GPLVM(GP): pb.xlim(xmin[0],xmax[0]) pb.ylim(xmin[1],xmax[1]) - return input_1, input_2 + return pb.gca() #input_1, input_2 temporary removal, to return axes. diff --git a/GPy/util/__init__.py b/GPy/util/__init__.py index c91557d0..56dbd5b9 100644 --- a/GPy/util/__init__.py +++ b/GPy/util/__init__.py @@ -10,4 +10,6 @@ import Tango import misc import warping_functions import datasets +import mocap +import visualize import decorators diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py index ed808f1b..932690ec 100644 --- a/GPy/util/datasets.py +++ b/GPy/util/datasets.py @@ -15,12 +15,12 @@ def sample_class(f): return c def della_gatta_TRP63_gene_expression(gene_number=None): - matData = scipy.io.loadmat(os.path.join(data_path, 'DellaGattadata.mat')) - X = np.double(matData['timepoints']) + mat_data = scipy.io.loadmat(os.path.join(data_path, 'DellaGattadata.mat')) + X = np.double(mat_data['timepoints']) if gene_number == None: - Y = matData['exprs_tp53_RMA'] + Y = mat_data['exprs_tp53_RMA'] else: - Y = matData['exprs_tp53_RMA'][:, gene_number] + Y = mat_data['exprs_tp53_RMA'][:, gene_number] if len(Y.shape) == 1: Y = Y[:, None] return {'X': X, 'Y': Y, 'info': "The full gene expression data set from della Gatta et al (http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2413161/) processed by RMA."} @@ -60,28 +60,42 @@ def pumadyn(seed=default_seed): return {'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'info': "The puma robot arm data with 32 inputs. This data is the non linear case with medium noise (pumadyn-32nm). For training 7,168 examples are sampled without replacement."} +def brendan_faces(): + mat_data = scipy.io.loadmat(os.path.join(data_path, 'frey_rawface.mat')) + Y = mat_data['ff'].T + return {'Y': Y, 'info': "Face data made available by Brendan Frey"} + + + def silhouette(): # Ankur Agarwal and Bill Trigg's silhoutte data. - matData = scipy.io.loadmat(os.path.join(data_path, 'mocap', 'ankur', 'ankurDataPoseSilhouette.mat')) - inMean = np.mean(matData['Y']) - inScales = np.sqrt(np.var(matData['Y'])) - X = matData['Y'] - inMean + mat_data = scipy.io.loadmat(os.path.join(data_path, 'mocap', 'ankur', 'ankurDataPoseSilhouette.mat')) + inMean = np.mean(mat_data['Y']) + inScales = np.sqrt(np.var(mat_data['Y'])) + X = mat_data['Y'] - inMean X = X/inScales - Xtest = matData['Y_test'] - inMean + Xtest = mat_data['Y_test'] - inMean Xtest = Xtest/inScales - Y = matData['Z'] - Ytest = matData['Z_test'] + Y = mat_data['Z'] + Ytest = mat_data['Z_test'] return {'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'info': "Artificial silhouette simulation data developed from Agarwal and Triggs (2004)."} +def stick(): + Y, connect = GPy.util.mocap.load_text_data('run1', data_path) + Y = Y[0:-1:4, :] + lbls = 'connect' + return {'Y': Y, 'connect' : connect, 'info': "Stick man data from Ohio."} + + def swiss_roll_1000(): - matData = scipy.io.loadmat(os.path.join(data_path, 'swiss_roll_data')) - Y = matData['X_data'][:, 0:1000].transpose() + mat_data = scipy.io.loadmat(os.path.join(data_path, 'swiss_roll_data')) + Y = mat_data['X_data'][:, 0:1000].transpose() return {'Y': Y, 'info': "Subsample of the swiss roll data extracting only the first 1000 values."} def swiss_roll(): - matData = scipy.io.loadmat(os.path.join(data_path, 'swiss_roll_data.mat')) - Y = matData['X_data'][:, 0:3000].transpose() + mat_data = scipy.io.loadmat(os.path.join(data_path, 'swiss_roll_data.mat')) + Y = mat_data['X_data'][:, 0:3000].transpose() return {'Y': Y, 'info': "The first 3,000 points from the swiss roll data of Tennenbaum, de Silva and Langford (2001)."} def toy_rbf_1d(seed=default_seed): @@ -202,3 +216,4 @@ def creep_data(): features.extend(range(2, 31)) X = all_data[:,features].copy() return {'X': X, 'y' : y} + diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py index f21502a5..45644b41 100644 --- a/GPy/util/linalg.py +++ b/GPy/util/linalg.py @@ -145,7 +145,8 @@ def PCA(Y, Q): """ if not np.allclose(Y.mean(axis=0), 0.0): print "Y is not zero mean, centering it locally (GPy.util.linalg.PCA)" - Y -= Y.mean(axis=0) + + Y -= Y.mean(axis=0) Z = linalg.svd(Y, full_matrices = False) [X, W] = [Z[0][:,0:Q], np.dot(np.diag(Z[1]), Z[2]).T[:,0:Q]] diff --git a/GPy/util/mocap.py b/GPy/util/mocap.py index 7e4e9e11..e66a36b9 100644 --- a/GPy/util/mocap.py +++ b/GPy/util/mocap.py @@ -8,12 +8,17 @@ def load_text_data(dataset, directory, centre=True): # Remove markers where there is a NaN present_index = [i for i in range(points[0].shape[1]) if not (np.any(np.isnan(points[0][:, i])) or np.any(np.isnan(points[0][:, i])) or np.any(np.isnan(points[0][:, i])))] + point_names = point_names[present_index] + for i in range(3): + points[i] = points[i][:, present_index] + if centre: + points[i] = (points[i].T - points[i].mean(axis=1)).T + # Concatanate the X, Y and Z markers together - Y = np.concatenate((points[0][:, present_index], points[1][:, present_index], points[2][:, present_index]), axis=1) - if centre: - Y = Y - Y.mean(axis=0) + Y = np.concatenate((points[0], points[1], points[2]), axis=1) Y = Y/400. - return Y + connect = read_connections(os.path.join(directory, 'connections.txt'), point_names) + return Y, connect def parse_text(file_name): """Parse data from Ohio State University text mocap files (http://accad.osu.edu/research/mocap/mocap_data.htm).""" @@ -23,7 +28,7 @@ def parse_text(file_name): point_names = np.array(fid.readline().split())[2:-1:3] fid.close() for i in range(len(point_names)): - point_names[i] = point_names[i][0:-3] + point_names[i] = point_names[i][0:-2] # Read the matrix data S = np.loadtxt(file_name, skiprows=1) @@ -42,37 +47,28 @@ def parse_text(file_name): return points, point_names, times -#def read_connections(): +def read_connections(file_name, point_names): + """Read a file detailing which markers should be connected to which for motion capture data.""" + connections = [] + fid = open(file_name, 'r') + line=fid.readline() + while(line): + connections.append(np.array(line.split(','))) + connections[-1][0] = connections[-1][0].strip() + connections[-1][1] = connections[-1][1].strip() + line = fid.readline() + connect = np.zeros((len(point_names), len(point_names)),dtype=bool) + for i in range(len(point_names)): + for j in range(len(point_names)): + for k in range(len(connections)): + if connections[k][0] == point_names[i] and connections[k][1] == point_names[j]: + + connect[i,j]=True + connect[j,i]=True + break + + return connect -# fid = fopen(fileName); -# i = 1; -# rem = fgets(fid); -# while(rem ~= -1) -# [token, rem] = strtok(rem, ','); -# connections{i, 1} = fliplr(deblank(fliplr(deblank(token)))); -# [token, rem] = strtok(rem, ','); -# connections{i, 2} = fliplr(deblank(fliplr(deblank(token)))); -# i = i + 1; -# rem = fgets(fid); -# end - -# connect = zeros(length(pointNames)); -# fclose(fid); -# for i = 1:size(connections, 1); -# for j = 1:length(pointNames) -# if strcmp(pointNames{j}, connections{i, 1}) | ... -# strcmp(pointNames{j}, connections{i, 2}) -# for k = 1:length(pointNames) -# if k == j -# break -# end -# if strcmp(pointNames{k}, connections{i, 1}) | ... -# strcmp(pointNames{k}, connections{i, 2}) -# connect(j, k) = 1; -# end -# end -# end -# end -# end -# connect = sparse(connect); + + diff --git a/GPy/util/visualize.py b/GPy/util/visualize.py index eba89b8a..8a685ebb 100644 --- a/GPy/util/visualize.py +++ b/GPy/util/visualize.py @@ -4,18 +4,18 @@ import GPy import numpy as np class lvm: - def __init__(self, model, data_visualize, ax): - self.cid = ax.figure.canvas.mpl_connect('button_press_event', self.on_click) - self.cid = ax.figure.canvas.mpl_connect('motion_notify_event', self.on_move) + def __init__(self, model, data_visualize, latent_axis): + self.cid = latent_axis.figure.canvas.mpl_connect('button_press_event', self.on_click) + self.cid = latent_axis.figure.canvas.mpl_connect('motion_notify_event', self.on_move) self.data_visualize = data_visualize self.model = model - self.ax = ax + self.latent_axis = latent_axis self.called = False self.move_on = False def on_click(self, event): print 'click', event.xdata, event.ydata - if event.inaxes!=self.ax: return + if event.inaxes!=self.latent_axis: return self.move_on = not self.move_on print if self.called: @@ -26,10 +26,10 @@ class lvm: else: self.xs = [event.xdata] self.ys = [event.ydata] - self.line, = ax.plot(event.xdata, event.ydata) + self.line, = self.latent_axis.plot(event.xdata, event.ydata) self.called = True def on_move(self, event): - if event.inaxes!=self.ax: return + if event.inaxes!=self.latent_axis: return if self.called and self.move_on: # Call modify code on move #print 'move', event.xdata, event.ydata @@ -68,28 +68,34 @@ class vector_show(data_show): class image_show(data_show): """Show a data vector as an image.""" - def __init__(self, vals, axis=None, dimensions=(16,16), transpose=False, invert=False): + def __init__(self, vals, axis=None, dimensions=(16,16), transpose=False, invert=False, scale=False): data_show.__init__(self, vals, axis) self.dimensions = dimensions - self.fig_display = plt.figure() - self.set_image(vals) - self.handle = plt.imshow(self.vals) self.transpose = transpose self.invert = invert + self.scale = scale + self.set_image(vals/255.) + self.handle = self.axis.imshow(self.vals, interpolation='nearest') + plt.show() def modify(self, vals): - self.set_image(vals) + self.set_image(vals/255.) + #self.handle.remove() + #self.handle = self.axis.imshow(self.vals) self.handle.set_array(self.vals) - self.axis.figure.canvas.draw() - + #self.axis.figure.canvas.draw() + plt.show() + def set_image(self, vals): - self.vals = np.reshape(vals, self.dimensions) + self.vals = np.reshape(vals, self.dimensions, order='F') if self.transpose: self.vals = self.vals.T - if self.invert: - self.vals = -self.vals + if not self.scale: + self.vals = self.vals + #if self.invert: + # self.vals = -self.vals -class stick_show(data_show): +class stick_show(data_show): """Show a three dimensional point cloud as a figure. Connect elements of the figure together using the matrix connect.""" def __init__(self, vals, axis=None, connect=None): From 9422c603b1ca1cf854da58145ffbea982c243fee Mon Sep 17 00:00:00 2001 From: Neil Lawrence Date: Tue, 2 Apr 2013 10:49:09 +0200 Subject: [PATCH 13/21] Minor modifications to visualization routines and examples. --- GPy/examples/dimensionality_reduction.py | 8 ++++---- GPy/util/visualize.py | 23 +++++++++++------------ 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 164d04ae..e4308465 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -73,12 +73,12 @@ def oil_100(): def brendan_faces(): data = GPy.util.datasets.brendan_faces() - Y = data['Y'][0:500, :] - m = GPy.models.GPLVM(Y, 2, init='rand') + Y = data['Y'][0:-1:10, :] + m = GPy.models.GPLVM(data['Y'], 2) # optimize m.ensure_default_constraints() - m.optimize(messages=1, max_f_eval=40) + m.optimize(messages=1, max_f_eval=10000) ax = m.plot_latent() y = m.likelihood.Y[0,:] @@ -91,7 +91,7 @@ def brendan_faces(): def stick(): data = GPy.util.datasets.stick() - m = GPy.models.GPLVM(data['Y'], 2, init='rand') + m = GPy.models.GPLVM(data['Y'], 2) # optimize m.ensure_default_constraints() diff --git a/GPy/util/visualize.py b/GPy/util/visualize.py index 8a685ebb..dde9cd32 100644 --- a/GPy/util/visualize.py +++ b/GPy/util/visualize.py @@ -14,19 +14,18 @@ class lvm: self.move_on = False def on_click(self, event): - print 'click', event.xdata, event.ydata + #print 'click', event.xdata, event.ydata if event.inaxes!=self.latent_axis: return self.move_on = not self.move_on - print - if self.called: - self.xs.append(event.xdata) - self.ys.append(event.ydata) - self.line.set_data(self.xs, self.ys) - self.line.figure.canvas.draw() - else: - self.xs = [event.xdata] - self.ys = [event.ydata] - self.line, = self.latent_axis.plot(event.xdata, event.ydata) + # if self.called: + # self.xs.append(event.xdata) + # self.ys.append(event.ydata) + # self.line.set_data(self.xs, self.ys) + # self.line.figure.canvas.draw() + # else: + # self.xs = [event.xdata] + # self.ys = [event.ydata] + # self.line, = self.latent_axis.plot(event.xdata, event.ydata) self.called = True def on_move(self, event): if event.inaxes!=self.latent_axis: return @@ -75,7 +74,7 @@ class image_show(data_show): self.invert = invert self.scale = scale self.set_image(vals/255.) - self.handle = self.axis.imshow(self.vals, interpolation='nearest') + self.handle = self.axis.imshow(self.vals, cmap=plt.cm.gray, interpolation='nearest') plt.show() def modify(self, vals): From 97db2a5bd7e5c5339319e366cc34980c99b537e6 Mon Sep 17 00:00:00 2001 From: Neil Lawrence Date: Tue, 2 Apr 2013 16:53:42 +0200 Subject: [PATCH 14/21] Edit to linalg.py PCA function to stop it changing data matrix. --- GPy/util/linalg.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py index 45644b41..59f598f9 100644 --- a/GPy/util/linalg.py +++ b/GPy/util/linalg.py @@ -146,9 +146,9 @@ def PCA(Y, Q): if not np.allclose(Y.mean(axis=0), 0.0): print "Y is not zero mean, centering it locally (GPy.util.linalg.PCA)" - Y -= Y.mean(axis=0) + #Y -= Y.mean(axis=0) - Z = linalg.svd(Y, full_matrices = False) + Z = linalg.svd(Y-Y.mean(axis=0), full_matrices = False) [X, W] = [Z[0][:,0:Q], np.dot(np.diag(Z[1]), Z[2]).T[:,0:Q]] v = X.std(axis=0) X /= v; From 97704d992803677befeb5cbebeb6b7918bf7cb33 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Fri, 5 Apr 2013 11:09:14 +0100 Subject: [PATCH 15/21] added the rbfcos kernel ARD seems to work dK_dX still todo --- GPy/kern/__init__.py | 2 +- GPy/kern/constructors.py | 8 +++ GPy/kern/rbfcos.py | 117 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 126 insertions(+), 1 deletion(-) create mode 100644 GPy/kern/rbfcos.py diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 5d8a7d15..f062ee56 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -2,5 +2,5 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) -from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed +from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos from kern import kern diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py index 90b13600..6a968da4 100644 --- a/GPy/kern/constructors.py +++ b/GPy/kern/constructors.py @@ -24,6 +24,7 @@ from prod_orthogonal import prod_orthogonal as prod_orthogonalpart from symmetric import symmetric as symmetric_part from coregionalise import coregionalise as coregionalise_part from rational_quadratic import rational_quadratic as rational_quadraticpart +from rbfcos import rbfcos as rbfcospart #TODO these s=constructors are not as clean as we'd like. Tidy the code up #using meta-classes to make the objects construct properly wthout them. @@ -310,3 +311,10 @@ def fixed(D, K, variance=1.): """ part = fixedpart(D, K, variance) return kern(D, [part]) + +def rbfcos(D,variance=1.,frequencies=None,bandwidths=None,ARD=False): + """ + construct a rbfcos kernel + """ + part = rbfcospart(D,variance,frequencies,bandwidths,ARD) + return kern(D,[part]) diff --git a/GPy/kern/rbfcos.py b/GPy/kern/rbfcos.py new file mode 100644 index 00000000..0a9182cc --- /dev/null +++ b/GPy/kern/rbfcos.py @@ -0,0 +1,117 @@ + +# Copyright (c) 2012, James Hensman and Andrew Gordon Wilson +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +from kernpart import kernpart +import numpy as np +import hashlib + +class rbfcos(kernpart): + def __init__(self,D,variance=1.,frequencies=None,bandwidths=None,ARD=False): + self.D = D + self.name = 'rbfcos' + if self.D>10: + print "Warning: the rbfcos kernel requires a lot of memory for high dimensional inputs" + self.ARD = ARD + + #set the default frequencies and bandwidths, appropriate Nparam + if ARD: + self.Nparam = 2*self.D + 1 + if frequencies is not None: + frequencies = np.asarray(frequencies) + assert frequencies.size == self.D, "bad number of frequencies" + else: + frequencies = np.ones(self.D) + if bandwidths is not None: + bandwidths = np.asarray(bandwidths) + assert bandwidths.size == self.D, "bad number of bandwidths" + else: + bandwidths = np.ones(self.D) + else: + self.Nparam = 3 + if frequencies is not None: + frequencies = np.asarray(frequencies) + assert frequencies.size == 1, "Only one frequency needed for non-ARD kernel" + else: + frequencies = np.ones(1) + + if bandwidths is not None: + bandwidths = np.asarray(bandwidths) + assert bandwidths.size == 1, "Only one bandwidth needed for non-ARD kernel" + else: + bandwidths = np.ones(1) + + #initialise cache + self._X, self._X2, self._params = np.empty(shape=(3,1)) + + self._set_params(np.hstack((variance,frequencies.flatten(),bandwidths.flatten()))) + + + def _get_params(self): + return np.hstack((self.variance,self.frequencies, self.bandwidths)) + + def _set_params(self,x): + assert x.size==(self.Nparam) + if self.ARD: + self.variance = x[0] + self.frequencies = x[1:1+self.D] + self.bandwidths = x[1+self.D:] + else: + self.variance, self.frequencies, self.bandwidths = x + + def _get_param_names(self): + if self.Nparam == 3: + return ['variance','frequency','bandwidth'] + else: + return ['variance']+['frequency_%i'%i for i in range(self.D)]+['bandwidth_%i'%i for i in range(self.D)] + + def K(self,X,X2,target): + self._K_computations(X,X2) + target += self.variance*self._dvar + + def Kdiag(self,X,target): + np.add(target,self.variance,target) + + def dK_dtheta(self,dL_dK,X,X2,target): + target[0] += np.sum(dL_dK*self._dvar) + if self.ARD: + for q in xrange(self.D): + target[q+1] += -2.*np.pi*self.variance*np.sum(dL_dK*self._dvar*np.tan(2.*np.pi*self.dist[:,:,q]*self.frequencies[q])*self.dist[:,:,q]) + target[q+1+self.D] += -2.*np.pi**2*self.variance*np.sum(dL_dK*self._dvar*self.dist2[:,:,q]) + else: + target[1] += -2.*np.pi*self.variance*np.sum(dL_dK*self._dvar*np.sum(np.tan(2.*np.pi*self.dist*self.frequencies)*self.dist,-1)) + target[2] += -2.*np.pi**2*self.variance*np.sum(dL_dK*self._dvar*self.dist2.sum(-1)) + + + def dKdiag_dtheta(self,dL_dKdiag,X,target): + target[0] += np.sum(dL_dKdiag) + + def dK_dX(self,dL_dK,X,X2,target): + #TODO!!! + raise NotImplementedError + + def dKdiag_dX(self,dL_dKdiag,X,target): + pass + + def _K_computations(self,X,X2): + if not (np.all(X==self._X) and np.all(X2==self._X2)): + if X2 is None: X2 = X + self._X = X.copy() + self._X2 = X2.copy() + + #do the distances: this will be high memory for large D + #NB: we don't take the abs of the dist because cos is symmetric + self.dist = X[:,None,:] - X2[None,:,:] + self.dist2 = np.square(self.dist) + + #ensure the next section is computed: + self._params = np.empty(self.Nparam) + + if not np.all(self._params == self._get_params()): + self._params == self._get_params().copy() + + self._rbf_part = np.exp(-2.*np.pi**2*np.sum(self.dist2*self.bandwidths,-1)) + self._cos_part = np.prod(np.cos(2.*np.pi*self.dist*self.frequencies),-1) + self._dvar = self._rbf_part*self._cos_part + From 8e4d839b5d6f4211b1e766edbc1d052fbdd77ac9 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Fri, 5 Apr 2013 12:08:21 +0100 Subject: [PATCH 16/21] yak shaving --- GPy/kern/rbfcos.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/GPy/kern/rbfcos.py b/GPy/kern/rbfcos.py index 0a9182cc..094b806b 100644 --- a/GPy/kern/rbfcos.py +++ b/GPy/kern/rbfcos.py @@ -5,7 +5,6 @@ from kernpart import kernpart import numpy as np -import hashlib class rbfcos(kernpart): def __init__(self,D,variance=1.,frequencies=None,bandwidths=None,ARD=False): @@ -32,13 +31,13 @@ class rbfcos(kernpart): self.Nparam = 3 if frequencies is not None: frequencies = np.asarray(frequencies) - assert frequencies.size == 1, "Only one frequency needed for non-ARD kernel" + assert frequencies.size == 1, "Exactly one frequency needed for non-ARD kernel" else: frequencies = np.ones(1) if bandwidths is not None: bandwidths = np.asarray(bandwidths) - assert bandwidths.size == 1, "Only one bandwidth needed for non-ARD kernel" + assert bandwidths.size == 1, "Exactly one bandwidth needed for non-ARD kernel" else: bandwidths = np.ones(1) @@ -74,14 +73,15 @@ class rbfcos(kernpart): np.add(target,self.variance,target) def dK_dtheta(self,dL_dK,X,X2,target): + self._K_computations(X,X2) target[0] += np.sum(dL_dK*self._dvar) if self.ARD: for q in xrange(self.D): - target[q+1] += -2.*np.pi*self.variance*np.sum(dL_dK*self._dvar*np.tan(2.*np.pi*self.dist[:,:,q]*self.frequencies[q])*self.dist[:,:,q]) - target[q+1+self.D] += -2.*np.pi**2*self.variance*np.sum(dL_dK*self._dvar*self.dist2[:,:,q]) + target[q+1] += -2.*np.pi*self.variance*np.sum(dL_dK*self._dvar*np.tan(2.*np.pi*self._dist[:,:,q]*self.frequencies[q])*self._dist[:,:,q]) + target[q+1+self.D] += -2.*np.pi**2*self.variance*np.sum(dL_dK*self._dvar*self._dist2[:,:,q]) else: - target[1] += -2.*np.pi*self.variance*np.sum(dL_dK*self._dvar*np.sum(np.tan(2.*np.pi*self.dist*self.frequencies)*self.dist,-1)) - target[2] += -2.*np.pi**2*self.variance*np.sum(dL_dK*self._dvar*self.dist2.sum(-1)) + target[1] += -2.*np.pi*self.variance*np.sum(dL_dK*self._dvar*np.sum(np.tan(2.*np.pi*self._dist*self.frequencies)*self._dist,-1)) + target[2] += -2.*np.pi**2*self.variance*np.sum(dL_dK*self._dvar*self._dist2.sum(-1)) def dKdiag_dtheta(self,dL_dKdiag,X,target): @@ -102,8 +102,8 @@ class rbfcos(kernpart): #do the distances: this will be high memory for large D #NB: we don't take the abs of the dist because cos is symmetric - self.dist = X[:,None,:] - X2[None,:,:] - self.dist2 = np.square(self.dist) + self._dist = X[:,None,:] - X2[None,:,:] + self._dist2 = np.square(self._dist) #ensure the next section is computed: self._params = np.empty(self.Nparam) @@ -111,7 +111,7 @@ class rbfcos(kernpart): if not np.all(self._params == self._get_params()): self._params == self._get_params().copy() - self._rbf_part = np.exp(-2.*np.pi**2*np.sum(self.dist2*self.bandwidths,-1)) - self._cos_part = np.prod(np.cos(2.*np.pi*self.dist*self.frequencies),-1) + self._rbf_part = np.exp(-2.*np.pi**2*np.sum(self._dist2*self.bandwidths,-1)) + self._cos_part = np.prod(np.cos(2.*np.pi*self._dist*self.frequencies),-1) self._dvar = self._rbf_part*self._cos_part From 22ad5d94e95fe5e464e7c3ffc206f8dc650f9e62 Mon Sep 17 00:00:00 2001 From: Ricardo Andrade Date: Fri, 5 Apr 2013 17:27:44 +0100 Subject: [PATCH 17/21] Changes in FITC approximation computation --- GPy/likelihoods/EP.py | 12 +++++++++--- GPy/models/generalized_FITC.py | 26 ++++++++++---------------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/GPy/likelihoods/EP.py b/GPy/likelihoods/EP.py index b23eda9f..118b226a 100644 --- a/GPy/likelihoods/EP.py +++ b/GPy/likelihoods/EP.py @@ -302,10 +302,16 @@ class EP(likelihood): mu = self.w + np.dot(P,self.gamma) self.iterations += 1 #Sigma recomptutation with Cholesky decompositon - Diag = Diag0/(1.+ Diag0 * self.tau_tilde) - P = (Diag / Diag0)[:,None] * P0 + Iplus_Dprod_i = 1./(1.+ Diag0 * self.tau_tilde) + Diag = Diag0 * Iplus_Dprod_i + P = Iplus_Dprod_i[:,None] * P0 + + #Diag = Diag0/(1.+ Diag0 * self.tau_tilde) + #P = (Diag / Diag0)[:,None] * P0 RPT0 = np.dot(R0,P0.T) - L = jitchol(np.eye(M) + np.dot(RPT0,(1./Diag0 - Diag/(Diag0**2))[:,None]*RPT0.T)) + L = jitchol(np.eye(M) + np.dot(RPT0,((1. - Iplus_Dprod_i)/Diag0)[:,None]*RPT0.T)) + #L = jitchol(np.eye(M) + np.dot(RPT0,(1./Diag0 - Iplus_Dprod_i/Diag0)[:,None]*RPT0.T)) + #L = jitchol(np.eye(M) + np.dot(RPT0,(1./Diag0 - Diag/(Diag0**2))[:,None]*RPT0.T)) R,info = linalg.lapack.flapack.dtrtrs(L,R0,lower=1) RPT = np.dot(R,P.T) Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1) diff --git a/GPy/models/generalized_FITC.py b/GPy/models/generalized_FITC.py index 505f442a..8c983b05 100644 --- a/GPy/models/generalized_FITC.py +++ b/GPy/models/generalized_FITC.py @@ -76,11 +76,17 @@ class generalized_FITC(sparse_GP): # Compute generalized FITC's diagonal term of the covariance self.Qnn = mdot(self.psi1.T,self.Kmmi,self.psi1) self.Diag0 = self.psi0 - np.diag(self.Qnn) - self.Diag = self.Diag0/(1.+ self.Diag0 * self._precision.flatten()) + Iplus_Dprod_i = 1./(1.+ self.Diag0 * self._precision.flatten()) + self.Diag = self.Diag0 * Iplus_Dprod_i + #self.Diag = self.Diag0/(1.+ self.Diag0 * self._precision.flatten()) - self.P = (self.Diag / self.Diag0)[:,None] * self.psi1.T + + self.P = Iplus_Dprod_i[:,None] * self.psi1.T + #self.P = (self.Diag / self.Diag0)[:,None] * self.psi1.T self.RPT0 = np.dot(self.Lmi,self.psi1) - self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - self.Diag/(self.Diag0**2))[:,None]*self.RPT0.T)) + self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T)) + #self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - Iplus_Dprod_i/self.Diag0)[:,None]*self.RPT0.T)) + #self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - self.Diag/(self.Diag0**2))[:,None]*self.RPT0.T)) self.R,info = linalg.flapack.dtrtrs(self.L,self.Lmi,lower=1) self.RPT = np.dot(self.R,self.P.T) self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT) @@ -89,7 +95,7 @@ class generalized_FITC(sparse_GP): self.mu = self.w + np.dot(self.P,self.gamma) # Remove extra term from dL_dpsi1 - self.dL_dpsi1 += -mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB + self.dL_dpsi1 -= mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB else: raise NotImplementedError, "homoscedastic fitc not implemented" # Remove extra term from dL_dpsi1 @@ -180,18 +186,6 @@ class generalized_FITC(sparse_GP): var_ = Kxx_ + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T)) var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.M),KR0T.T),0))[:,None] return mu_star[:,None],var - """ - Kx = self.kern.K(self.Z, Xnew) - mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V) - if full_cov: - Kxx = self.kern.K(Xnew) - var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting - else: - Kxx = self.kern.Kdiag(Xnew) - var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0) - a = kjk - return mu,var[:,None] - """ else: raise NotImplementedError, "homoscedastic fitc not implemented" """ From bb734a6dd79ee8c4150b800fe61cf7249fa60460 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Tue, 9 Apr 2013 15:43:06 +0100 Subject: [PATCH 18/21] added simple BGPLVM_oil demo --- GPy/examples/dimensionality_reduction.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index e4308465..fed79417 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -42,16 +42,36 @@ def BGPLVM(seed = default_seed): return m -def GPLVM_oil_100(): +def GPLVM_oil_100(optimize=True,M=15): + data = GPy.util.datasets.oil_100() + + # create simple GP model + kernel = GPy.kern.rbf(6, ARD = True) + GPy.kern.bias(6) + m = GPy.models.Bayesian_GPLVM(data['X'], 6, kernel=kernel, M=M) + m.data_labels = data['Y'].argmax(axis=1) + + # optimize + m.ensure_default_constraints() + if optimize: + m.optimize('scg',messages=1) + + # plot + print(m) + m.plot_latent(labels=m.data_labels) + return m + +def BGPLVM_oil_100(optimize=True): data = GPy.util.datasets.oil_100() # create simple GP model kernel = GPy.kern.rbf(6, ARD = True) + GPy.kern.bias(6) m = GPy.models.GPLVM(data['X'], 6, kernel = kernel) + m.data_labels = data['Y'].argmax(axis=1) # optimize m.ensure_default_constraints() - m.optimize(messages=1) + if optimize: + m.optimize(messages=1) # plot print(m) From fd0b172e81ceb3c63dc3ca7f5469b4f2cc8bc63f Mon Sep 17 00:00:00 2001 From: James Hensman Date: Wed, 10 Apr 2013 09:28:58 +0100 Subject: [PATCH 19/21] various work on BGPLVM oil demo --- GPy/core/model.py | 23 +++++++++++++++++++ GPy/examples/dimensionality_reduction.py | 28 ++++++++++++++++-------- GPy/models/Bayesian_GPLVM.py | 20 ++++++++++++----- GPy/models/GPLVM.py | 24 +++++++++----------- 4 files changed, 66 insertions(+), 29 deletions(-) diff --git a/GPy/core/model.py b/GPy/core/model.py index 9216aea6..645f7228 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -423,6 +423,28 @@ class model(parameterised): grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name,r,d,g, ng, c0 = cols[0]+9, c1 = cols[1], c2 = cols[2], c3 = cols[3], c4 = cols[4]) print grad_string + def input_sensitivity(self): + """ + return an array describing the sesitivity of the model to each input + + NB. Right now, we're basing this on the lengthscales (or variances) of the kernel. + TODO: proper sensitivity analysis + """ + + if not hasattr(self,'kern'): + raise ValueError, "this model has no kernel" + + k = [p for p in self.kern.parts if p.name in ['rbf','linear']] + if (not len(k)==1) or (not k[0].ARD): + raise ValueError, "cannot determine sensitivity for this kernel" + k = k[0] + + if k.name=='rbf': + return k.lengthscale + elif k.name=='linear': + return 1./k.variances + + def pseudo_EM(self,epsilon=.1,**kwargs): """ TODO: Should this not bein the GP class? @@ -469,3 +491,4 @@ class model(parameterised): iteration += 1 if stop: print "%s iterations." %iteration + diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index fed79417..b3509ec5 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -47,7 +47,7 @@ def GPLVM_oil_100(optimize=True,M=15): # create simple GP model kernel = GPy.kern.rbf(6, ARD = True) + GPy.kern.bias(6) - m = GPy.models.Bayesian_GPLVM(data['X'], 6, kernel=kernel, M=M) + m = GPy.models.GPLVM(data['X'], 6, kernel=kernel, M=M) m.data_labels = data['Y'].argmax(axis=1) # optimize @@ -60,22 +60,32 @@ def GPLVM_oil_100(optimize=True,M=15): m.plot_latent(labels=m.data_labels) return m -def BGPLVM_oil_100(optimize=True): - data = GPy.util.datasets.oil_100() +def BGPLVM_oil(optimize=True,N=100,Q=10,M=15): + data = GPy.util.datasets.oil() # create simple GP model - kernel = GPy.kern.rbf(6, ARD = True) + GPy.kern.bias(6) - m = GPy.models.GPLVM(data['X'], 6, kernel = kernel) - m.data_labels = data['Y'].argmax(axis=1) + kernel = GPy.kern.rbf(Q, ARD = True) + GPy.kern.bias(Q) + GPy.kern.white(Q,0.001) + m = GPy.models.Bayesian_GPLVM(data['X'][:N], Q, kernel = kernel,M=M) + m.data_labels = data['Y'][:N].argmax(axis=1) + + #initial conditions # optimize - m.ensure_default_constraints() if optimize: - m.optimize(messages=1) + m.constrain_fixed('noise',0.05) + m.ensure_default_constraints() + m.optimize('scg',messages=1) + m.unconstrain('noise') + m.constrain_positive('noise') + m.optimize('scg',messages=1) + else: + m.ensure_default_constraints() # plot print(m) - m.plot_latent(labels=data['Y'].argmax(axis=1)) + m.plot_latent(labels=m.data_labels) + pb.figure() + pb.bar(np.arange(m.kern.D),1./m.input_sensitivity()) return m def oil_100(): diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/Bayesian_GPLVM.py index 8f9759c3..0f5758ba 100644 --- a/GPy/models/Bayesian_GPLVM.py +++ b/GPy/models/Bayesian_GPLVM.py @@ -22,12 +22,12 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): :type init: 'PCA'|'random' """ - def __init__(self, Y, Q, X = None, S = None, init='PCA', M=10, Z=None, kernel=None, **kwargs): + def __init__(self, Y, Q, X = None, X_uncertainty = None, init='PCA', M=10, Z=None, kernel=None, **kwargs): if X == None: X = self.initialise_latent(init, Q, Y) - if S is None: - S = np.ones_like(X) * 1e-2# + if X_uncertainty is None: + X_uncertainty = np.ones_like(X) * 0.5 if Z is None: Z = np.random.permutation(X.copy())[:M] @@ -37,7 +37,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): kernel = kern.rbf(Q) + kern.white(Q) - sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_uncertainty=S, **kwargs) + sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_uncertainty=X_uncertainty, **kwargs) def _get_param_names(self): X_names = sum([['X_%i_%i'%(n,q) for q in range(self.Q)] for n in range(self.N)],[]) @@ -84,6 +84,14 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): def _log_likelihood_gradients(self): return np.hstack((self.dL_dmuS().flatten(), sparse_GP._log_likelihood_gradients(self))) - def plot_latent(self, *args, **kwargs): - input_1, input_2 = GPLVM.plot_latent(self, *args, **kwargs) + def plot_latent(self, which_indices=None,*args, **kwargs): + + if which_indices is None: + try: + input_1, input_2 = np.argsort(self.input_sensitivity())[:2] + except: + raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'" + else: + input_1, input_2 = which_indices + GPLVM.plot_latent(self, which_indices=[input_1, input_2],*args, **kwargs) pb.plot(self.Z[:, input_1], self.Z[:, input_2], '^w') diff --git a/GPy/models/GPLVM.py b/GPy/models/GPLVM.py index 03e9b715..cc4be70e 100644 --- a/GPy/models/GPLVM.py +++ b/GPy/models/GPLVM.py @@ -67,7 +67,7 @@ class GPLVM(GP): """ util.plot.Tango.reset() - + if labels is None: labels = np.ones(self.N) if which_indices is None: @@ -77,23 +77,19 @@ class GPLVM(GP): if self.Q==2: input_1, input_2 = 0,1 else: - #try to find a linear of RBF kern in the kernel - k = [p for p in self.kern.parts if p.name in ['rbf','linear']] - if (not len(k)==1) or (not k[0].ARD): + try: + input_1, input_2 = np.argsort(self.input_sensitivity())[:2] + except: raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'" - input_1, input_2 = self.lengthscale_order() - k = k[0] - if k.name=='rbf': - input_1, input_2 = np.argsort(k.lengthscale)[:2] - elif k.name=='linear': - input_1, input_2 = np.argsort(k.variances)[::-1][:2] + else: + input_1, input_2 = which_indices #first, plot the output variance as a function of the latent space Xtest, xx,yy,xmin,xmax = util.plot.x_frame2D(self.X[:,[input_1, input_2]],resolution=resolution) - Xtest_full = np.zeros((Xtest.shape[0], self.X.shape[1])) - Xtest_full[:, :2] = Xtest - mu, var, low, up = self.predict(Xtest_full) - var = var.mean(axis=1) # this was var[:, :2] edit by Neil + Xtest_full = np.zeros((Xtest.shape[0], self.X.shape[1])) + Xtest_full[:, :2] = Xtest + mu, var, low, up = self.predict(Xtest_full) + var = var.mean(axis=1) # this was var[:, :2] edit by Neil pb.imshow(var.reshape(resolution,resolution).T[::-1,:],extent=[xmin[0],xmax[0],xmin[1],xmax[1]],cmap=pb.cm.binary,interpolation='bilinear') From 45403105f9793a88f9e9003f1f953efff9eb10d0 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Wed, 10 Apr 2013 09:34:15 +0100 Subject: [PATCH 20/21] changed X_uncertainty for X_variance (in the code) for consistency with actual naming (in the printing) --- GPy/models/Bayesian_GPLVM.py | 22 +++++++++++----------- GPy/models/GP.py | 2 +- GPy/models/generalized_FITC.py | 8 ++++---- GPy/models/sparse_GP.py | 30 +++++++++++++++--------------- 4 files changed, 31 insertions(+), 31 deletions(-) diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/Bayesian_GPLVM.py index 0f5758ba..f66fabde 100644 --- a/GPy/models/Bayesian_GPLVM.py +++ b/GPy/models/Bayesian_GPLVM.py @@ -22,12 +22,12 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): :type init: 'PCA'|'random' """ - def __init__(self, Y, Q, X = None, X_uncertainty = None, init='PCA', M=10, Z=None, kernel=None, **kwargs): + def __init__(self, Y, Q, X = None, X_variance = None, init='PCA', M=10, Z=None, kernel=None, **kwargs): if X == None: X = self.initialise_latent(init, Q, Y) - if X_uncertainty is None: - X_uncertainty = np.ones_like(X) * 0.5 + if X_variance is None: + X_variance = np.ones_like(X) * 0.5 if Z is None: Z = np.random.permutation(X.copy())[:M] @@ -37,7 +37,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): kernel = kern.rbf(Q) + kern.white(Q) - sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_uncertainty=X_uncertainty, **kwargs) + sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_variance=X_variance, **kwargs) def _get_param_names(self): X_names = sum([['X_%i_%i'%(n,q) for q in range(self.Q)] for n in range(self.N)],[]) @@ -54,28 +54,28 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): =============================================================== """ - return np.hstack((self.X.flatten(), self.X_uncertainty.flatten(), sparse_GP._get_params(self))) + return np.hstack((self.X.flatten(), self.X_variance.flatten(), sparse_GP._get_params(self))) def _set_params(self,x): N, Q = self.N, self.Q self.X = x[:self.X.size].reshape(N,Q).copy() - self.X_uncertainty = x[(N*Q):(2*N*Q)].reshape(N,Q).copy() + self.X_variance = x[(N*Q):(2*N*Q)].reshape(N,Q).copy() sparse_GP._set_params(self, x[(2*N*Q):]) def dL_dmuS(self): - dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi1_dmuS(self.dL_dpsi1,self.Z,self.X,self.X_uncertainty) - dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi0_dmuS(self.dL_dpsi0,self.Z,self.X,self.X_uncertainty) - dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.dL_dpsi2,self.Z,self.X,self.X_uncertainty) + dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi1_dmuS(self.dL_dpsi1,self.Z,self.X,self.X_variance) + dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi0_dmuS(self.dL_dpsi0,self.Z,self.X,self.X_variance) + dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.dL_dpsi2,self.Z,self.X,self.X_variance) dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2 dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2 - dKL_dS = (1. - (1./self.X_uncertainty))*0.5 + dKL_dS = (1. - (1./self.X_variance))*0.5 dKL_dmu = self.X return np.hstack(((dL_dmu - dKL_dmu).flatten(), (dL_dS - dKL_dS).flatten())) def KL_divergence(self): var_mean = np.square(self.X).sum() - var_S = np.sum(self.X_uncertainty - np.log(self.X_uncertainty)) + var_S = np.sum(self.X_variance - np.log(self.X_variance)) return 0.5*(var_mean + var_S) - 0.5*self.Q*self.N def log_likelihood(self): diff --git a/GPy/models/GP.py b/GPy/models/GP.py index 53ba1183..cfda0cfe 100644 --- a/GPy/models/GP.py +++ b/GPy/models/GP.py @@ -269,7 +269,7 @@ class GP(model): Zu = self.Z*self._Xstd + self._Xmean pb.plot(Zu,Zu*0+pb.ylim()[0],'r|',mew=1.5,markersize=12) if self.has_uncertain_inputs: - pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_uncertainty.flatten())) + pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_variance.flatten())) elif self.X.shape[1]==2: #FIXME resolution = resolution or 50 diff --git a/GPy/models/generalized_FITC.py b/GPy/models/generalized_FITC.py index 8c983b05..26875f64 100644 --- a/GPy/models/generalized_FITC.py +++ b/GPy/models/generalized_FITC.py @@ -19,8 +19,8 @@ class generalized_FITC(sparse_GP): :type likelihood: GPy.likelihood.(Gaussian | EP) :param kernel : the kernel/covariance function. See link kernels :type kernel: a GPy kernel - :param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance) - :type X_uncertainty: np.ndarray (N x Q) | None + :param X_variance: The variance in the measurements of X (Gaussian variance) + :type X_variance: np.ndarray (N x Q) | None :param Z: inducing inputs (optional, see note) :type Z: np.ndarray (M x Q) | None :param Zslices: slices for the inducing inputs (see slicing TODO: link) @@ -30,13 +30,13 @@ class generalized_FITC(sparse_GP): :type normalize_(X|Y): bool """ - def __init__(self, X, likelihood, kernel, Z, X_uncertainty=None, Xslices=None,Zslices=None, normalize_X=False): + def __init__(self, X, likelihood, kernel, Z, X_variance=None, Xslices=None,Zslices=None, normalize_X=False): self.Z = Z self.M = self.Z.shape[0] self._precision = likelihood.precision - sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_uncertainty=None, Xslices=None,Zslices=None, normalize_X=False) + sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_variance=None, Xslices=None,Zslices=None, normalize_X=False) def _set_params(self, p): self.Z = p[:self.M*self.Q].reshape(self.M, self.Q) diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py index 3d44ad6b..e19945e2 100644 --- a/GPy/models/sparse_GP.py +++ b/GPy/models/sparse_GP.py @@ -22,8 +22,8 @@ class sparse_GP(GP): :type likelihood: GPy.likelihood.(Gaussian | EP) :param kernel : the kernel/covariance function. See link kernels :type kernel: a GPy kernel - :param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance) - :type X_uncertainty: np.ndarray (N x Q) | None + :param X_variance: The uncertainty in the measurements of X (Gaussian variance) + :type X_variance: np.ndarray (N x Q) | None :param Z: inducing inputs (optional, see note) :type Z: np.ndarray (M x Q) | None :param Zslices: slices for the inducing inputs (see slicing TODO: link) @@ -33,7 +33,7 @@ class sparse_GP(GP): :type normalize_(X|Y): bool """ - def __init__(self, X, likelihood, kernel, Z, X_uncertainty=None, Xslices=None,Zslices=None, normalize_X=False): + def __init__(self, X, likelihood, kernel, Z, X_variance=None, Xslices=None,Zslices=None, normalize_X=False): self.scale_factor = 100.0# a scaling factor to help keep the algorithm stable self.Z = Z @@ -42,12 +42,12 @@ class sparse_GP(GP): self.M = Z.shape[0] self.likelihood = likelihood - if X_uncertainty is None: + if X_variance is None: self.has_uncertain_inputs=False else: - assert X_uncertainty.shape==X.shape + assert X_variance.shape==X.shape self.has_uncertain_inputs=True - self.X_uncertainty = X_uncertainty + self.X_variance = X_variance if not self.likelihood.is_heteroscedastic: self.likelihood.trYYT = np.trace(np.dot(self.likelihood.Y, self.likelihood.Y.T)) # TODO: something more elegant here? @@ -56,16 +56,16 @@ class sparse_GP(GP): #normalize X uncertainty also if self.has_uncertain_inputs: - self.X_uncertainty /= np.square(self._Xstd) + self.X_variance /= np.square(self._Xstd) def _compute_kernel_matrices(self): # kernel computations, using BGPLVM notation self.Kmm = self.kern.K(self.Z) if self.has_uncertain_inputs: - self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty) - self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T - self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty) + self.psi0 = self.kern.psi0(self.Z,self.X, self.X_variance) + self.psi1 = self.kern.psi1(self.Z,self.X, self.X_variance).T + self.psi2 = self.kern.psi2(self.Z,self.X, self.X_variance) else: self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices) self.psi1 = self.kern.K(self.Z,self.X) @@ -210,9 +210,9 @@ class sparse_GP(GP): """ dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm,self.Z) if self.has_uncertain_inputs: - dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_uncertainty) - dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty) - dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z,self.X, self.X_uncertainty) + dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_variance) + dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_variance) + dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z,self.X, self.X_variance) else: dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1,self.Z,self.X) dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X) @@ -225,8 +225,8 @@ class sparse_GP(GP): """ dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ if self.has_uncertain_inputs: - dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_uncertainty) - dL_dZ += 2.*self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # 'stripes' + dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_variance) + dL_dZ += 2.*self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_variance) # 'stripes' else: dL_dZ += self.kern.dK_dX(self.dL_dpsi1,self.Z,self.X) return dL_dZ From eb1d8f211fdebe5ef44d9720dd023aaecd72c645 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Wed, 10 Apr 2013 11:04:49 +0100 Subject: [PATCH 21/21] stability improvements in sparse_GP --- GPy/examples/dimensionality_reduction.py | 2 -- GPy/inference/SCG.py | 7 +++++-- GPy/models/sparse_GP.py | 16 +++++++++++----- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index b3509ec5..61a4abd8 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -68,8 +68,6 @@ def BGPLVM_oil(optimize=True,N=100,Q=10,M=15): m = GPy.models.Bayesian_GPLVM(data['X'][:N], Q, kernel = kernel,M=M) m.data_labels = data['Y'][:N].argmax(axis=1) - #initial conditions - # optimize if optimize: m.constrain_fixed('noise',0.05) diff --git a/GPy/inference/SCG.py b/GPy/inference/SCG.py index f5e7ab22..0e85f243 100644 --- a/GPy/inference/SCG.py +++ b/GPy/inference/SCG.py @@ -23,6 +23,7 @@ import numpy as np +import sys def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xtol=1e-6, ftol=1e-6): """ @@ -103,11 +104,13 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto iteration += 1 if display: - print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta + print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r', + sys.stdout.flush() if success: # Test for termination - if np.max(np.abs(alpha*d)) < xtol or np.max(np.abs(fnew-fold)) < ftol: + if (np.max(np.abs(alpha*d)) < xtol) or (np.abs(fnew-fold) < ftol): + status='converged' return x, flog, function_eval, status else: diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py index e19945e2..88abf77d 100644 --- a/GPy/models/sparse_GP.py +++ b/GPy/models/sparse_GP.py @@ -7,6 +7,7 @@ from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot from ..util.plot import gpplot from .. import kern from GP import GP +from scipy import linalg #Still TODO: # make use of slices properly (kernel can now do this) @@ -96,21 +97,26 @@ class sparse_GP(GP): self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm) self.V = (self.likelihood.precision/self.scale_factor)*self.likelihood.Y - self.A = mdot(self.Lmi, self.psi2_beta_scaled, self.Lmi.T) + + #Compute A = L^-1 psi2 beta L^-T + tmp = linalg.lapack.flapack.dtrtrs(self.Lm,self.psi2_beta_scaled.T,lower=1)[0] + self.A = linalg.lapack.flapack.dtrtrs(self.Lm,np.asarray(tmp.T,order='F'),lower=1)[0] + self.B = np.eye(self.M)/sf2 + self.A self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B) self.psi1V = np.dot(self.psi1, self.V) - tmp = np.dot(self.Lmi.T, self.LBi.T) - self.C = np.dot(tmp,tmp.T) + #tmp = np.dot(self.Lmi.T, self.LBi.T) + tmp = linalg.lapack.clapack.dtrtrs(self.Lm.T,np.asarray(self.LBi.T,order='C'),lower=0)[0] + self.C = np.dot(tmp,tmp.T) #TODO: tmp is triangular. replace with dtrmm (blas) when available self.Cpsi1V = np.dot(self.C,self.psi1V) self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T) - self.E = np.dot(self.Cpsi1VVpsi1,self.C)/sf2 + #self.E = np.dot(self.Cpsi1VVpsi1,self.C)/sf2 + self.E = np.dot(self.Cpsi1V/sf,self.Cpsi1V.T/sf) # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten() - #self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T self.dL_dpsi1 = np.dot(self.Cpsi1V,self.V.T) if self.likelihood.is_heteroscedastic: if self.has_uncertain_inputs: