diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index c4e064a5..93ce374d 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -107,32 +107,41 @@ def mrd_simulation(): ard1[ard1 == 0] = 1E-10 ard2[ard2 == 0] = 1E-10 - ard1i = 1. / ard1 - ard2i = 1. / ard2 +# ard1i = 1. / ard1 +# ard2i = 1. / ard2 - # make_params = lambda ard: np.hstack([[1], ard, [1, .3]]) +# k = GPy.kern.rbf(Q, ARD=True, lengthscale=ard1i) + GPy.kern.bias(Q, 0) + GPy.kern.white(Q, 0.0001) +# Y1 = np.random.multivariate_normal(np.zeros(N), k.K(X), D1).T +# Y1 -= Y1.mean(0) +# +# k = GPy.kern.rbf(Q, ARD=True, lengthscale=ard2i) + GPy.kern.bias(Q, 0) + GPy.kern.white(Q, 0.0001) +# Y2 = np.random.multivariate_normal(np.zeros(N), k.K(X), D2).T +# Y2 -= Y2.mean(0) +# make_params = lambda ard: np.hstack([[1], ard, [1, .3]]) D1, D2, N, M, Q = 50, 100, 150, 15, 4 - X = np.random.randn(N, Q) + x = np.linspace(0, 2 * np.pi, N)[:, None] - k = GPy.kern.rbf(Q, ARD=True, lengthscale=ard1i) + GPy.kern.bias(Q, 0) + GPy.kern.white(Q, 0.0001) - Y1 = np.random.multivariate_normal(np.zeros(N), k.K(X), D1).T - Y1 -= Y1.mean(0) + s1 = np.vectorize(lambda x: np.sin(x)) + s2 = np.vectorize(lambda x: np.cos(x)) + sS = np.vectorize(lambda x: np.sin(2 * x)) - k = GPy.kern.rbf(Q, ARD=True, lengthscale=ard2i) + GPy.kern.bias(Q, 0) + GPy.kern.white(Q, 0.0001) - Y2 = np.random.multivariate_normal(np.zeros(N), k.K(X), D2).T - Y2 -= Y2.mean(0) + S1 = np.hstack([s1(x), sS(x)]) + S2 = np.hstack([s2(x), sS(x)]) - k = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q, 1.0) + Y1 = S1.dot(np.random.randn(S1.shape[1], D1)) + Y2 = S2.dot(np.random.randn(S2.shape[1], D2)) - m = MRD(Y1, Y2, Q=Q, M=M, kernel=k, _debug=False) + k = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q) + + m = MRD(Y1, Y2, Q=Q, M=M, kernel=k, init="PCA", _debug=False) m.ensure_default_constraints() - fig = pyplot.figure("expected", figsize=(8, 3)) - ax = fig.add_subplot(121) - ax.bar(np.arange(ard1.size) + .1, ard1) - ax = fig.add_subplot(122) - ax.bar(np.arange(ard2.size) + .1, ard2) +# fig = pyplot.figure("expected", figsize=(8, 3)) +# ax = fig.add_subplot(121) +# ax.bar(np.arange(ard1.size) + .1, ard1) +# ax = fig.add_subplot(122) +# ax.bar(np.arange(ard2.size) + .1, ard2) return m diff --git a/GPy/inference/SCG.py b/GPy/inference/SCG.py index 0e85f243..912641f6 100644 --- a/GPy/inference/SCG.py +++ b/GPy/inference/SCG.py @@ -104,7 +104,8 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto iteration += 1 if display: - print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r', + print 'Iteration: {0:<5g} Objective:{1:< 12g} Scale:{2:< 12g}\r'.format(iteration, fnow, beta), + # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r', sys.stdout.flush() if success: diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index 6f9c97f6..447819c1 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -70,7 +70,7 @@ class kern(parameterised): ax.bar(np.arange(len(ard_params)) - 0.4, ard_params) ax.set_xticks(np.arange(len(ard_params)), ["${}$".format(i + 1) for i in range(len(ard_params))]) - + return ax def _transform_gradients(self,g): x = self._get_params() diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/Bayesian_GPLVM.py index ee485e76..dd55b38f 100644 --- a/GPy/models/Bayesian_GPLVM.py +++ b/GPy/models/Bayesian_GPLVM.py @@ -22,7 +22,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): :type init: 'PCA'|'random' """ - def __init__(self, Y, Q, X = None, X_variance = None, init='PCA', M=10, Z=None, kernel=None, **kwargs): + def __init__(self, Y, Q, X=None, X_variance=None, init='PCA', M=10, Z=None, kernel=None, **kwargs): if X == None: X = self.initialise_latent(init, Q, Y) @@ -31,7 +31,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): if Z is None: Z = np.random.permutation(X.copy())[:M] - assert Z.shape[1]==X.shape[1] + assert Z.shape[1] == X.shape[1] if kernel is None: kernel = kern.rbf(Q) + kern.white(Q) @@ -40,8 +40,8 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_variance=X_variance, **kwargs) def _get_param_names(self): - X_names = sum([['X_%i_%i'%(n,q) for q in range(self.Q)] for n in range(self.N)],[]) - S_names = sum([['X_variance_%i_%i'%(n,q) for q in range(self.Q)] for n in range(self.N)],[]) + X_names = sum([['X_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], []) + S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], []) return (X_names + S_names + sparse_GP._get_param_names(self)) def _get_params(self): @@ -56,35 +56,43 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): """ return np.hstack((self.X.flatten(), self.X_variance.flatten(), sparse_GP._get_params(self))) - def _set_params(self,x): + def _set_params(self, x): N, Q = self.N, self.Q - self.X = x[:self.X.size].reshape(N,Q).copy() - self.X_variance = x[(N*Q):(2*N*Q)].reshape(N,Q).copy() - sparse_GP._set_params(self, x[(2*N*Q):]) + self.X = x[:self.X.size].reshape(N, Q).copy() + self.X_variance = x[(N * Q):(2 * N * Q)].reshape(N, Q).copy() + sparse_GP._set_params(self, x[(2 * N * Q):]) + + + def dKL_dmuS(self): + dKL_dS = (1. - (1. / self.X_variance)) * 0.5 + dKL_dmu = self.X + return dKL_dmu, dKL_dS def dL_dmuS(self): - dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi1_dmuS(self.dL_dpsi1,self.Z,self.X,self.X_variance) - dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi0_dmuS(self.dL_dpsi0,self.Z,self.X,self.X_variance) - dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.dL_dpsi2,self.Z,self.X,self.X_variance) + dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi1_dmuS(self.dL_dpsi1, self.Z, self.X, self.X_variance) + dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi0_dmuS(self.dL_dpsi0, self.Z, self.X, self.X_variance) + dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.dL_dpsi2, self.Z, self.X, self.X_variance) dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2 dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2 - dKL_dS = (1. - (1./self.X_variance))*0.5 - dKL_dmu = self.X - return np.hstack(((dL_dmu - dKL_dmu).flatten(), (dL_dS - dKL_dS).flatten())) + return dL_dmu, dL_dS def KL_divergence(self): var_mean = np.square(self.X).sum() var_S = np.sum(self.X_variance - np.log(self.X_variance)) - return 0.5*(var_mean + var_S) - 0.5*self.Q*self.N + return 0.5 * (var_mean + var_S) - 0.5 * self.Q * self.N def log_likelihood(self): return sparse_GP.log_likelihood(self) - self.KL_divergence() def _log_likelihood_gradients(self): - return np.hstack((self.dL_dmuS().flatten(), sparse_GP._log_likelihood_gradients(self))) + dKL_dmu, dKL_dS = self.dKL_dmuS() + dL_dmu, dL_dS = self.dL_dmuS() + # TODO: find way to make faster + dbound_dmuS = np.hstack(((dL_dmu - dKL_dmu).flatten(), (dL_dS - dKL_dS).flatten())) + return np.hstack((dbound_dmuS.flatten(), sparse_GP._log_likelihood_gradients(self))) - def plot_latent(self, which_indices=None,*args, **kwargs): + def plot_latent(self, which_indices=None, *args, **kwargs): if which_indices is None: try: @@ -93,6 +101,6 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'" else: input_1, input_2 = which_indices - ax = GPLVM.plot_latent(self, which_indices=[input_1, input_2],*args, **kwargs) + ax = GPLVM.plot_latent(self, which_indices=[input_1, input_2], *args, **kwargs) ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w') return ax diff --git a/GPy/models/GPLVM.py b/GPy/models/GPLVM.py index 2ce55dda..0c81a9c1 100644 --- a/GPy/models/GPLVM.py +++ b/GPy/models/GPLVM.py @@ -60,7 +60,7 @@ class GPLVM(GP): mu, var, upper, lower = self.predict(Xnew) pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5) - def plot_latent(self,labels=None, which_indices=None, resolution=50): + def plot_latent(self, labels=None, which_indices=None, resolution=50, ax=pb.gca()): """ :param labels: a np.array of size self.N containing labels for the points (can be number, strings, etc) :param resolution: the resolution of the grid on which to evaluate the predictive variance @@ -90,7 +90,7 @@ class GPLVM(GP): Xtest_full[:, :2] = Xtest mu, var, low, up = self.predict(Xtest_full) var = var[:, :1] - pb.imshow(var.reshape(resolution,resolution).T[::-1,:], + ax.imshow(var.reshape(resolution, resolution).T[::-1, :], extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary,interpolation='bilinear') for i,ul in enumerate(np.unique(labels)): @@ -110,15 +110,15 @@ class GPLVM(GP): y = self.X[index,input_2] pb.plot(x,y,marker='o',color=util.plot.Tango.nextMedium(),mew=0,label=this_label,linewidth=0) - pb.xlabel('latent dimension %i'%input_1) - pb.ylabel('latent dimension %i'%input_2) + ax.set_xlabel('latent dimension %i' % input_1) + ax.set_ylabel('latent dimension %i' % input_2) if not np.all(labels==1.): - pb.legend(loc=0,numpoints=1) + ax.legend(loc=0, numpoints=1) - pb.xlim(xmin[0],xmax[0]) - pb.ylim(xmin[1],xmax[1]) - pb.grid(b=False) # remove the grid if present, it doesn't look good - ax = pb.gca() + ax.set_xlim(xmin[0], xmax[0]) + ax.set_ylim(xmin[1], xmax[1]) + ax.grid(b=False) # remove the grid if present, it doesn't look good + # ax = pb.gca() ax.set_aspect('auto') # set a nice aspect ratio return ax diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index 842ef42f..bd1c3528 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -121,23 +121,60 @@ class MRD(model): def log_likelihood(self): - ll = self.gref.KL_divergence() + ll = +self.gref.KL_divergence() for g in self.bgplvms: - ll += sparse_GP.log_likelihood(g) - return ll + ll -= sparse_GP.log_likelihood(g) + return -ll def _log_likelihood_gradients(self): - dldmus = self.gref.dL_dmuS().flatten() - dldzt1 = sparse_GP._log_likelihood_gradients(self.gref) - return numpy.hstack((dldmus, + dLdmu, dLdS = reduce(lambda a, b: [a[0] + b[0], a[1] + b[1]], (g.dL_dmuS() for g in self.bgplvms)) + dKLmu, dKLdS = self.gref.dKL_dmuS() + dLdmu -= dKLmu + dLdS -= dKLdS + dLdmuS = numpy.hstack((dLdmu.flatten(), dLdS.flatten())).flatten() + dldzt1 = reduce(lambda a, b: a + b, (sparse_GP._log_likelihood_gradients(g)[:self.MQ] for g in self.bgplvms)) + + return numpy.hstack((dLdmuS, dldzt1, numpy.hstack([numpy.hstack([g.dL_dtheta(), g.likelihood._gradients(\ partial=g.partial_for_likelihood)]) \ - for g in self.bgplvms[1:]]))) + for g in self.bgplvms]))) - def plot_scales(self): + def plot_X(self): + fig = pylab.figure("MRD X", figsize=(4 * len(self.bgplvms), 3)) + fig.clf() + for i, g in enumerate(self.bgplvms): + ax = fig.add_subplot(1, len(self.bgplvms), i + 1) + ax.imshow(g.X) + pylab.draw() + fig.tight_layout() + return fig + + def plot_predict(self): + fig = pylab.figure("MRD Predictions", figsize=(4 * len(self.bgplvms), 3)) + fig.clf() + for i, g in enumerate(self.bgplvms): + ax = fig.add_subplot(1, len(self.bgplvms), i + 1) + ax.imshow(g.predict(g.X)[0]) + pylab.draw() + fig.tight_layout() + return fig + + def plot_scales(self, *args, **kwargs): fig = pylab.figure("MRD Scales", figsize=(4 * len(self.bgplvms), 3)) for i, g in enumerate(self.bgplvms): ax = fig.add_subplot(1, len(self.bgplvms), i + 1) - g.kern.plot_ARD(ax=ax) + g.kern.plot_ARD(ax=ax, *args, **kwargs) + pylab.draw() + fig.tight_layout() + return fig + + def plot_latent(self, *args, **kwargs): + fig = pylab.figure("MRD Latent Spaces", figsize=(4 * len(self.bgplvms), 3)) + for i, g in enumerate(self.bgplvms): + ax = fig.add_subplot(1, len(self.bgplvms), i + 1) + g.plot_latent(ax=ax, *args, **kwargs) + pylab.draw() + fig.tight_layout() + return fig diff --git a/GPy/testing/mrd_tests.py b/GPy/testing/mrd_tests.py index e25a1040..d90a7430 100644 --- a/GPy/testing/mrd_tests.py +++ b/GPy/testing/mrd_tests.py @@ -10,23 +10,23 @@ import unittest import numpy as np import GPy -# class MRDTests(unittest.TestCase): -# -# # @unittest.skip('') -# def test_gradients(self): -# num_m = 2 -# N, M, Q, D = 10, 3, 2, 4 -# X = np.random.rand(N, Q) -# k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001) -# K = k.K(X) -# Ylist = [np.random.multivariate_normal(np.zeros(N), K, D).T for _ in range(num_m)] -# -# m = GPy.models.MRD(*Ylist, Q=Q, kernel=k, M=M) -# m._debug = True -# m.ensure_default_constraints() -# m.randomize() -# self.assertTrue(m.checkgrad()) -# -# if __name__ == "__main__": -# print "Running unit tests, please be (very) patient..." -# # unittest.main() +class MRDTests(unittest.TestCase): + + def test_gradients(self): + num_m = 3 + N, M, Q, D = 20, 8, 5, 50 + X = np.random.rand(N, Q) + + k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q) + K = k.K(X) + Ylist = [np.random.multivariate_normal(np.zeros(N), K, D).T for _ in range(num_m)] + + m = GPy.models.MRD(*Ylist, Q=Q, kernel=k, M=M) + m.ensure_default_constraints() + m.randomize() + + self.assertTrue(m.checkgrad()) + +if __name__ == "__main__": + print "Running unit tests, please be (very) patient..." + unittest.main()