diff --git a/GPy/core/gp.py b/GPy/core/gp.py index ce2b59de..847cd99d 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -96,8 +96,7 @@ class GP(GPBase): model for a new variable Y* = v_tilde/tau_tilde, with a covariance matrix K* = K + diag(1./tau_tilde) plus a normalization term. """ - return (-0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) - - 0.5 * self.output_dim * self.K_logdet + self._model_fit_term() + self.likelihood.Z) + return - 0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) - 0.5 * self.output_dim * self.K_logdet + self._model_fit_term() + self.likelihood.Z def _log_likelihood_gradients(self): diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index a6478850..93ba5d7d 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -108,7 +108,7 @@ class SparseGP(GPBase): self.B = np.eye(self.num_inducing) + self.A self.LB = jitchol(self.B) - # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency! + #VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency! self.psi1Vf = np.dot(self.psi1.T, self.likelihood.VVT_factor) # back substutue C into psi1Vf @@ -163,7 +163,7 @@ class SparseGP(GPBase): def log_likelihood(self): """ Compute the (lower bound on the) log marginal likelihood """ if self.likelihood.is_heteroscedastic: - A = -0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y) + A = -0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V*self.likelihood.Y) B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A)) else: A = -0.5 * self.num_data * self.output_dim * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT @@ -246,7 +246,7 @@ class SparseGP(GPBase): Kmmi_LmiBLmi = backsub_both_sides(self.Lm, np.eye(self.num_inducing) - Bi) if self.Cpsi1V is None: - psi1V = np.dot(self.psi1.T, self.likelihood.V) + psi1V = np.dot(self.psi1.T,self.likelihood.V) tmp, _ = dtrtrs(self.Lm, np.asfortranarray(psi1V), lower=1, trans=0) tmp, _ = dpotrs(self.LB, tmp, lower=1) self.Cpsi1V, _ = dtrtrs(self.Lm, tmp, lower=1, trans=1) diff --git a/GPy/core/svigp.py b/GPy/core/svigp.py index 5d6bcd8b..97f74045 100644 --- a/GPy/core/svigp.py +++ b/GPy/core/svigp.py @@ -4,7 +4,7 @@ import numpy as np import pylab as pb from .. import kern -from ..util.linalg import pdinv, mdot, tdot, dpotrs, dtrtrs, jitchol, backsub_both_sides +from ..util.linalg import linalg, pdinv, mdot, tdot, dpotrs, dtrtrs, jitchol, backsub_both_sides from ..likelihoods import EP from gp_base import GPBase from model import Model @@ -269,7 +269,6 @@ class SVIGP(GPBase): def optimize(self, iterations, print_interval=10, callback=lambda:None, callback_interval=5): param_step = 0. - #Iterate! for i in range(iterations): @@ -288,6 +287,7 @@ class SVIGP(GPBase): #compute the steps in all parameters vb_step = self.vb_steplength*natgrads[0] if (self.epochs>=1):#only move the parameters after the first epoch + # print "it {} ep {} par {}".format(self.iterations, self.epochs, param_step) param_step = self.momentum*param_step + self.param_steplength*grads else: param_step = 0. @@ -295,8 +295,8 @@ class SVIGP(GPBase): self.set_vb_param(self.get_vb_param() + vb_step) #Note: don't recompute everything here, wait until the next iteration when we have a new batch self._set_params(self._untransform_params(self._get_params_transformed() + param_step), computations=False) - #print messages if desired + if i and (not i%print_interval): print i, np.mean(self._ll_trace[-print_interval:]) #, self.log_likelihood() print np.round(np.mean(self._grad_trace[-print_interval:],0),3) diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 08fdfa8d..4cf2a05b 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -24,7 +24,7 @@ def BGPLVM(seed=default_seed): Y = np.random.multivariate_normal(np.zeros(N), K, Q).T lik = Gaussian(Y, normalize=True) - k = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q) + k = GPy.kern.rbf_inv(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q) # k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001) # k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001) @@ -145,6 +145,7 @@ def BGPLVM_oil(optimize=True, N=200, Q=10, num_inducing=15, max_iters=150, plot= # create simple GP model kernel = GPy.kern.rbf_inv(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2)) + Y = data['X'][:N] Yn = Y - Y.mean(0) Yn /= Yn.std(0) @@ -375,11 +376,12 @@ def stick(): def stick_bgplvm(model=None): data = GPy.util.datasets.stick() Q = 6 - kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2)) - m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel) + kernel = GPy.kern.rbf_inv(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2)) + m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=35,kernel=kernel) # optimize m.ensure_default_constraints() - m.optimize(messages=1, max_f_eval=3000, xtol=1e-300, ftol=1e-300) + m.constrain_bounded('.*rbf_inv',1e-5, 100) + m.optimize(messages=1, max_iters=3000,xtol=1e-300,ftol=1e-300) m._set_params(m._get_params()) plt.clf, (latent_axes, sense_axes) = plt.subplots(1, 2) plt.sca(latent_axes) diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py index cc92c543..66b0bbc6 100644 --- a/GPy/examples/regression.py +++ b/GPy/examples/regression.py @@ -67,8 +67,8 @@ def toy_ARD(optim_iters=1000, kernel_type='linear', N=300, D=4): X4 = np.log(np.sort(np.random.rand(N,1),0)) X = np.hstack((X1, X2, X3, X4)) - Y1 = np.asarray(2*X[:,0]+3).T - Y2 = np.asarray(4*(X[:,2]-1.5*X[:,0])).T + Y1 = np.asmatrix(2*X[:,0]+3).T + Y2 = np.asmatrix(4*(X[:,2]-1.5*X[:,0])).T Y = np.hstack((Y1, Y2)) Y = np.dot(Y, np.random.rand(2,D)); diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index 3f78e5e5..5cd90749 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -66,26 +66,12 @@ class kern(Parameterized): Parameterized.setstate(self, state) - def plot_ARD(self, fignum=None, ax=None, title='', legend=False): - """If an ARD kernel is present, it bar-plots the ARD parameters, - :param fignum: figure number of the plot - :param ax: matplotlib axis to plot on - :param title: - title of the plot, - pass '' to not print a title - pass None for a generic title - """ + def plot_ARD(self, fignum=None, ax=None, title=None): + """If an ARD kernel is present, it bar-plots the ARD parameters""" if ax is None: fig = pb.figure(fignum) ax = fig.add_subplot(111) - from GPy.util import Tango - from matplotlib.textpath import TextPath - Tango.reset() - xticklabels = [] - bars = [] - x0 = 0 for p in self.parts: - c = Tango.nextMedium() if hasattr(p, 'ARD') and p.ARD: if title is None: ax.set_title('ARD parameters, %s kernel' % p.name) @@ -96,32 +82,10 @@ class kern(Parameterized): else: ard_params = 1. / p.lengthscale - x = np.arange(x0, x0 + len(ard_params)) - bars.append(ax.bar(x, ard_params, align='center', color=c, edgecolor='k', linewidth=1.2, label=p.name)) - xticklabels.extend([r"$\mathrm{{{name}}}\ {x}$".format(name=p.name, x=i) for i in np.arange(len(ard_params))]) - x0 += len(ard_params) - x = np.arange(x0) - for bar in bars: - for patch, num in zip(bar.patches, np.arange(len(bar.patches))): - height = patch.get_height() - xi = patch.get_x() + patch.get_width() / 2. - va = 'top' - c = 'w' - t = TextPath((0, 0), "${xi}$".format(xi=xi), rotation=0, usetex=True, ha='center') - if patch.get_extents().height <= t.get_extents().height + 2: - va = 'bottom' - c = 'k' - ax.text(xi, height, "${xi}$".format(xi=int(num)), color=c, rotation=0, ha='center', va=va) - # for xi, t in zip(x, xticklabels): - # ax.text(xi, maxi / 2, t, rotation=90, ha='center', va='center') - # ax.set_xticklabels(xticklabels, rotation=17) - ax.set_xticks([]) - ax.set_xlim(-.5, x0 - .5) - if title is '': - ax.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, - ncol=len(bars), mode="expand", borderaxespad=0.) - else: - ax.legend() + x = np.arange(len(ard_params)) + ax.bar(x - 0.4, ard_params) + ax.set_xticks(x) + ax.set_xticklabels([r"${}$".format(i) for i in x]) return ax def _transform_gradients(self, g): @@ -397,7 +361,6 @@ class kern(Parameterized): # compute the "cross" terms # TODO: better looping, input_slices - for i1, i2 in itertools.permutations(range(len(self.parts)), 2): p1, p2 = self.parts[i1], self.parts[i2] # ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2] diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index ac6c7c69..68b0a133 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -115,8 +115,8 @@ class BayesianGPLVM(SparseGP, GPLVM): self.dbound_dZtheta = SparseGP._log_likelihood_gradients(self) return np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta)) - def plot_latent(self, plot_inducing=True, *args, **kwargs): - return plot_latent.plot_latent(self, plot_inducing=plot_inducing, *args, **kwargs) + def plot_latent(self, *args, **kwargs): + return plot_latent.plot_latent(self, *args, **kwargs) def do_test_latents(self, Y): """ diff --git a/GPy/models/gplvm.py b/GPy/models/gplvm.py index 86c0ac28..2e81b370 100644 --- a/GPy/models/gplvm.py +++ b/GPy/models/gplvm.py @@ -36,10 +36,10 @@ class GPLVM(GP): self.ensure_default_constraints() def initialise_latent(self, init, input_dim, Y): - Xr = np.random.randn(Y.shape[0], input_dim) if init == 'PCA': - Xr[:, :Y.shape[1]] = PCA(Y, input_dim)[0] - return Xr + return PCA(Y, input_dim)[0] + else: + return np.random.randn(Y.shape[0], input_dim) def getstate(self): return GP.getstate(self) diff --git a/GPy/testing/unit_tests.py b/GPy/testing/unit_tests.py index b3029d7f..6e504a69 100644 --- a/GPy/testing/unit_tests.py +++ b/GPy/testing/unit_tests.py @@ -23,7 +23,7 @@ class GradientTests(unittest.TestCase): self.X2D = np.random.uniform(-3., 3., (40, 2)) self.Y2D = np.sin(self.X2D[:, 0:1]) * np.sin(self.X2D[:, 1:2]) + np.random.randn(40, 1) * 0.05 - def check_model_with_white(self, kern, model_type='GPRegression', dimension=1, uncertain_inputs=False): + def check_model_with_white(self, kern, model_type='GPRegression', dimension=1): # Get the correct gradients if dimension == 1: X = self.X1D @@ -36,10 +36,7 @@ class GradientTests(unittest.TestCase): noise = GPy.kern.white(dimension) kern = kern + noise - if uncertain_inputs: - m = model_fit(X, Y, kernel=kern, X_variance=np.random.rand(X.shape[0], X.shape[1])) - else: - m = model_fit(X, Y, kernel=kern) + m = model_fit(X, Y, kernel=kern) m.randomize() # contrain all parameters to be positive self.assertTrue(m.checkgrad()) @@ -144,26 +141,6 @@ class GradientTests(unittest.TestCase): rbf = GPy.kern.rbf(2) self.check_model_with_white(rbf, model_type='SparseGPRegression', dimension=2) - def test_SparseGPRegression_rbf_linear_white_kern_1D(self): - ''' Testing the sparse GP regression with rbf and white kernel on 2d data ''' - rbflin = GPy.kern.rbf(1) + GPy.kern.linear(1) - self.check_model_with_white(rbflin, model_type='SparseGPRegression', dimension=1) - - def test_SparseGPRegression_rbf_linear_white_kern_2D(self): - ''' Testing the sparse GP regression with rbf and white kernel on 2d data ''' - rbflin = GPy.kern.rbf(2) + GPy.kern.linear(2) - self.check_model_with_white(rbflin, model_type='SparseGPRegression', dimension=2) - - def test_SparseGPRegression_rbf_linear_white_kern_2D_uncertain_inputs(self): - ''' Testing the sparse GP regression with rbf, linear and white kernel on 2d data with uncertain inputs''' - rbflin = GPy.kern.rbf(2) + GPy.kern.linear(2) - self.check_model_with_white(rbflin, model_type='SparseGPRegression', dimension=2, uncertain_inputs=1) - - def test_SparseGPRegression_rbf_linear_white_kern_1D_uncertain_inputs(self): - ''' Testing the sparse GP regression with rbf, linear and white kernel on 1d data with uncertain inputs''' - rbflin = GPy.kern.rbf(1) + GPy.kern.linear(1) - self.check_model_with_white(rbflin, model_type='SparseGPRegression', dimension=1, uncertain_inputs=1) - def test_GPLVM_rbf_bias_white_kern_2D(self): """ Testing GPLVM with rbf + bias and white kernel """ N, input_dim, D = 50, 1, 2