From cbf58d492065f11acd725805d50f1cb31ee1cde5 Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Mon, 24 Mar 2014 10:13:50 +0000 Subject: [PATCH 01/13] Fixed bug in product kernel gradients diag wrt to X --- GPy/kern/_src/linear.py | 12 ++++++------ GPy/kern/_src/prod.py | 6 ++---- GPy/testing/kernel_tests.py | 12 ++++++++++++ GPy/testing/model_tests.py | 3 ++- 4 files changed, 22 insertions(+), 11 deletions(-) diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py index 7d9eeac2..609903aa 100644 --- a/GPy/kern/_src/linear.py +++ b/GPy/kern/_src/linear.py @@ -121,7 +121,7 @@ class Linear(Kern): gamma = variational_posterior.binary_prob mu = variational_posterior.mean return np.einsum('nq,q,mq,nq->nm',gamma,self.variances,Z,mu) -# return (self.variances*gamma*mu).sum(axis=1) +# return (self.variances*gamma*mu).sum(axis=1) else: return self.K(variational_posterior.mean, Z) #the variance, it does nothing @@ -177,7 +177,7 @@ class Linear(Kern): grad = np.einsum('nm,nq,q,nq->mq',dL_dpsi1,gamma, self.variances,mu) +\ np.einsum('nmo,noq->mq',dL_dpsi2,_dpsi2_dZ) - + return grad else: #psi1 @@ -191,15 +191,15 @@ class Linear(Kern): gamma = variational_posterior.binary_prob mu = variational_posterior.mean S = variational_posterior.variance - mu2S = np.square(mu)+S + mu2S = np.square(mu)+S _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma) - + grad_gamma = np.einsum('n,q,nq->nq',dL_dpsi0,self.variances,mu2S) + np.einsum('nm,q,mq,nq->nq',dL_dpsi1,self.variances,Z,mu) +\ np.einsum('nmo,nmoq->nq',dL_dpsi2,_dpsi2_dgamma) grad_mu = np.einsum('n,nq,q,nq->nq',dL_dpsi0,gamma,2.*self.variances,mu) + np.einsum('nm,nq,q,mq->nq',dL_dpsi1,gamma,self.variances,Z) +\ np.einsum('nmo,nmoq->nq',dL_dpsi2,_dpsi2_dmu) grad_S = np.einsum('n,nq,q->nq',dL_dpsi0,gamma,self.variances) + np.einsum('nmo,nmoq->nq',dL_dpsi2,_dpsi2_dS) - + return grad_mu, grad_S, grad_gamma else: grad_mu, grad_S = np.zeros(variational_posterior.mean.shape), np.zeros(variational_posterior.mean.shape) @@ -210,7 +210,7 @@ class Linear(Kern): grad_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1) # psi2 self._weave_dpsi2_dmuS(dL_dpsi2, Z, variational_posterior, grad_mu, grad_S) - + return grad_mu, grad_S #--------------------------------------------------# diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py index e00f38c3..98b60366 100644 --- a/GPy/kern/_src/prod.py +++ b/GPy/kern/_src/prod.py @@ -58,8 +58,6 @@ class Prod(CombinationKernel): def gradients_X_diag(self, dL_dKdiag, X): target = np.zeros(X.shape) for k1,k2 in itertools.combinations(self.parts, 2): - target += k1.gradients_X(dL_dKdiag*k2.Kdiag(X), X) - target += k2.gradients_X(dL_dKdiag*k1.Kdiag(X), X) + target += k1.gradients_X_diag(dL_dKdiag*k2.Kdiag(X), X) + target += k2.gradients_X_diag(dL_dKdiag*k1.Kdiag(X), X) return target - - diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index 9ed218d8..36d55645 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -8,6 +8,7 @@ import sys verbose = 0 +np.random.seed(50) class Kern_check_model(GPy.core.Model): @@ -243,6 +244,17 @@ class KernelGradientTestsContinuous(unittest.TestCase): k.randomize() self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose)) + def test_Prod2(self): + k = (GPy.kern.RBF(2, active_dims=[0,4]) * GPy.kern.Linear(self.D)) + k.randomize() + self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose)) + + def test_Prod3(self): + k = GPy.kern.Matern32(2, active_dims=[2,3]) * (GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)) + k = (GPy.kern.RBF(2, active_dims=[0,4]) * GPy.kern.Linear(self.D)) + k.randomize() + self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose)) + def test_Add(self): k = GPy.kern.Matern32(2, active_dims=[2,3]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D) k += GPy.kern.Matern32(2, active_dims=[2,3]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D) diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py index 3c39c5e0..b14385d0 100644 --- a/GPy/testing/model_tests.py +++ b/GPy/testing/model_tests.py @@ -59,9 +59,10 @@ class MiscTests(unittest.TestCase): #np.testing.assert_almost_equal(mu_hat, mu) def test_likelihood_replicate(self): + tol = 1e-5 m = GPy.models.GPRegression(self.X, self.Y) m2 = GPy.models.GPRegression(self.X, self.Y) - np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood()) + np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood(), tol=tol) m.randomize() m2[:] = m[''].values() np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood()) From 9c6abfc27001a26e49bb091f01c758fc6a26cad4 Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Mon, 24 Mar 2014 10:15:46 +0000 Subject: [PATCH 02/13] Whoops! --- GPy/testing/model_tests.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py index b14385d0..3c39c5e0 100644 --- a/GPy/testing/model_tests.py +++ b/GPy/testing/model_tests.py @@ -59,10 +59,9 @@ class MiscTests(unittest.TestCase): #np.testing.assert_almost_equal(mu_hat, mu) def test_likelihood_replicate(self): - tol = 1e-5 m = GPy.models.GPRegression(self.X, self.Y) m2 = GPy.models.GPRegression(self.X, self.Y) - np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood(), tol=tol) + np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood()) m.randomize() m2[:] = m[''].values() np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood()) From 321a75100c495db36dfcc3ad724d3d81a6f7d330 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 24 Mar 2014 11:16:57 +0000 Subject: [PATCH 03/13] finally added pca package again --- GPy/models/gplvm.py | 7 ++- GPy/models/mrd.py | 8 +-- GPy/util/initialization.py | 7 ++- GPy/util/linalg.py | 23 ------- GPy/util/pca.py | 122 +++++++++++++++++++++++++++++++++++++ 5 files changed, 134 insertions(+), 33 deletions(-) create mode 100644 GPy/util/pca.py diff --git a/GPy/models/gplvm.py b/GPy/models/gplvm.py index b85540ce..fb7d93e7 100644 --- a/GPy/models/gplvm.py +++ b/GPy/models/gplvm.py @@ -5,7 +5,6 @@ import numpy as np import pylab as pb from .. import kern -from ..util.linalg import PCA from ..core import GP, Param from ..likelihoods import Gaussian from .. import util @@ -29,9 +28,11 @@ class GPLVM(GP): """ if X is None: from ..util.initialization import initialize_latent - X = initialize_latent(init, input_dim, Y) + X, fracs = initialize_latent(init, input_dim, Y) + else: + fracs = np.ones(input_dim) if kernel is None: - kernel = kern.RBF(input_dim, ARD=input_dim > 1) + kern.Bias(input_dim, np.exp(-2)) + kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=input_dim > 1) + kern.Bias(input_dim, np.exp(-2)) likelihood = Gaussian() diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index ac2ef9cd..177ddc19 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -6,12 +6,12 @@ import itertools import pylab from ..core import Model -from ..util.linalg import PCA from ..kern import Kern from ..core.parameterization.variational import NormalPosterior, NormalPrior from ..core.parameterization import Param, Parameterized from ..inference.latent_function_inference.var_dtc import VarDTCMissingData, VarDTC from ..likelihoods import Gaussian +from GPy.util.initialization import initialize_latent class MRD(Model): """ @@ -71,7 +71,7 @@ class MRD(Model): self.num_inducing = self.Z.shape[0] # ensure M==N if M>N if X_variance is None: - X_variance = np.random.uniform(0, .2, X.shape) + X_variance = np.random.uniform(0, .1, X.shape) self.variational_prior = NormalPrior() self.X = NormalPosterior(X, X_variance) @@ -147,11 +147,11 @@ class MRD(Model): if Ylist is None: Ylist = self.Ylist if init in "PCA_concat": - X = PCA(np.hstack(Ylist), self.input_dim)[0] + X = initialize_latent('PCA', np.hstack(Ylist), self.input_dim) elif init in "PCA_single": X = np.zeros((Ylist[0].shape[0], self.input_dim)) for qs, Y in itertools.izip(np.array_split(np.arange(self.input_dim), len(Ylist)), Ylist): - X[:, qs] = PCA(Y, len(qs))[0] + X[:, qs] = initialize_latent('PCA', Y, len(qs)) else: # init == 'random': X = np.random.randn(Ylist[0].shape[0], self.input_dim) return X diff --git a/GPy/util/initialization.py b/GPy/util/initialization.py index 24194b41..86efa3f0 100644 --- a/GPy/util/initialization.py +++ b/GPy/util/initialization.py @@ -5,13 +5,14 @@ Created on 24 Feb 2014 ''' import numpy as np -from linalg import PCA +from GPy.util.pca import pca def initialize_latent(init, input_dim, Y): Xr = np.random.randn(Y.shape[0], input_dim) if init == 'PCA': - PC = PCA(Y, input_dim)[0] + p = pca(Y) + PC = p.project(Y, min(input_dim, Y.shape[1])) Xr[:PC.shape[0], :PC.shape[1]] = PC else: pass - return Xr \ No newline at end of file + return Xr, p.fracs[:input_dim] \ No newline at end of file diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py index 4745c4aa..b204f813 100644 --- a/GPy/util/linalg.py +++ b/GPy/util/linalg.py @@ -580,26 +580,3 @@ def backsub_both_sides(L, X, transpose='left'): tmp, _ = dtrtrs(L, X, lower=1, trans=0) return dtrtrs(L, tmp.T, lower=1, trans=0)[0].T -def PCA(Y, input_dim): - """ - Principal component analysis: maximum likelihood solution by SVD - - :param Y: NxD np.array of data - :param input_dim: int, dimension of projection - - - :rval X: - Nxinput_dim np.array of dimensionality reduced data - :rval W: - input_dimxD mapping from X to Y - - """ - if not np.allclose(Y.mean(axis=0), 0.0): - print "Y is not zero mean, centering it locally (GPy.util.linalg.PCA)" - - # Y -= Y.mean(axis=0) - - Z = linalg.svd(Y - Y.mean(axis=0), full_matrices=False) - [X, W] = [Z[0][:, 0:input_dim], np.dot(np.diag(Z[1]), Z[2]).T[:, 0:input_dim]] - v = X.std(axis=0) - X /= v; - W *= v; - return X, W.T diff --git a/GPy/util/pca.py b/GPy/util/pca.py new file mode 100644 index 00000000..6c548b3d --- /dev/null +++ b/GPy/util/pca.py @@ -0,0 +1,122 @@ +''' +Created on 10 Sep 2012 + +@author: Max Zwiessele +@copyright: Max Zwiessele 2012 +''' +import numpy +import pylab +import matplotlib +from numpy.linalg.linalg import LinAlgError + +class pca(object): + """ + pca module with automatic primal/dual determination. + """ + def __init__(self, X): + self.mu = X.mean(0) + self.sigma = X.std(0) + + X = self.center(X) + + # self.X = input + if X.shape[0] >= X.shape[1]: + # print "N >= D: using primal" + self.eigvals, self.eigvectors = self._primal_eig(X) + else: + # print "N < D: using dual" + self.eigvals, self.eigvectors = self._dual_eig(X) + self.sort = numpy.argsort(self.eigvals)[::-1] + self.eigvals = self.eigvals[self.sort] + self.eigvectors = self.eigvectors[:, self.sort] + self.fracs = self.eigvals / self.eigvals.sum() + self.Q = self.eigvals.shape[0] + + def center(self, X): + """ + Center `X` in pca space. + """ + X = X - self.mu + X = X / numpy.where(self.sigma == 0, 1e-30, self.sigma) + return X + + def _primal_eig(self, X): + return numpy.linalg.eigh(numpy.einsum('ji,jk->ik',X,X)) + + def _dual_eig(self, X): + dual_eigvals, dual_eigvects = numpy.linalg.eigh(numpy.einsum('ij,kj->ik',X,X)) + relevant_dimensions = numpy.argsort(numpy.abs(dual_eigvals))[-X.shape[1]:] + eigvals = dual_eigvals[relevant_dimensions] + eigvects = dual_eigvects[:, relevant_dimensions] + eigvects = (1. / numpy.sqrt(X.shape[0] * numpy.abs(eigvals))) * X.T.dot(eigvects) + eigvects /= numpy.sqrt(numpy.diag(eigvects.T.dot(eigvects))) + return eigvals, eigvects + + def project(self, X, Q=None): + """ + Project X into pca space, defined by the Q highest eigenvalues. + Y = X dot V + """ + if Q is None: + Q = self.Q + if Q > X.shape[1]: + raise IndexError("requested dimension larger then input dimension") + X = self.center(X) + return X.dot(self.eigvectors[:, :Q]) + + def plot_fracs(self, Q=None, ax=None, fignum=None): + """ + Plot fractions of Eigenvalues sorted in descending order. + """ + if ax is None: + fig = pylab.figure(fignum) + ax = fig.add_subplot(111) + if Q is None: + Q = self.Q + ticks = numpy.arange(Q) + bar = ax.bar(ticks - .4, self.fracs[:Q]) + ax.set_xticks(ticks, map(lambda x: r"${}$".format(x), ticks + 1)) + ax.set_ylabel("Eigenvalue fraction") + ax.set_xlabel("PC") + ax.set_ylim(0, ax.get_ylim()[1]) + ax.set_xlim(ticks.min() - .5, ticks.max() + .5) + try: + pylab.tight_layout() + except: + pass + return bar + + def plot_2d(self, X, labels=None, s=20, marker='o', + dimensions=(0, 1), ax=None, colors=None, + fignum=None, cmap=matplotlib.cm.jet, # @UndefinedVariable + ** kwargs): + """ + Plot dimensions `dimensions` with given labels against each other in + PC space. Labels can be any sequence of labels of dimensions X.shape[0]. + Labels can be drawn with a subsequent call to legend() + """ + if ax is None: + fig = pylab.figure(fignum) + ax = fig.add_subplot(111) + if labels is None: + labels = numpy.zeros(X.shape[0]) + ulabels = [] + for lab in labels: + if not lab in ulabels: + ulabels.append(lab) + nlabels = len(ulabels) + if colors is None: + colors = [cmap(float(i) / nlabels) for i in range(nlabels)] + X_ = self.project(X, self.Q)[:,dimensions] + kwargs.update(dict(s=s)) + plots = list() + for i, l in enumerate(ulabels): + kwargs.update(dict(color=colors[i], marker=marker[i % len(marker)])) + plots.append(ax.scatter(*X_[labels == l, :].T, label=str(l), **kwargs)) + ax.set_xlabel(r"PC$_1$") + ax.set_ylabel(r"PC$_2$") + try: + pylab.tight_layout() + except: + pass + return plots \ No newline at end of file From d3054956939b9c4ad807b68ca2f00c7dd2384f9a Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 24 Mar 2014 11:22:02 +0000 Subject: [PATCH 04/13] testing --- GPy/testing/index_operations_tests.py | 6 ++++++ GPy/testing/model_tests.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/GPy/testing/index_operations_tests.py b/GPy/testing/index_operations_tests.py index 12602879..37cec10b 100644 --- a/GPy/testing/index_operations_tests.py +++ b/GPy/testing/index_operations_tests.py @@ -33,6 +33,8 @@ class Test(unittest.TestCase): self.assertListEqual(self.param_index[one].tolist(), [3]) self.assertListEqual(self.param_index.remove('not in there', [2,3,4]).tolist(), []) + self.assertListEqual(self.view.remove('not in there', [2,3,4]).tolist(), []) + def test_shift_left(self): self.view.shift_left(0, 2) self.assertListEqual(self.param_index[three].tolist(), [2,5]) @@ -82,6 +84,10 @@ class Test(unittest.TestCase): self.assertEqual(self.param_index.size, 6) self.assertEqual(self.view.size, 5) + def test_print(self): + print self.param_index + print self.view + if __name__ == "__main__": #import sys;sys.argv = ['', 'Test.test_index_view'] unittest.main() \ No newline at end of file diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py index 3c39c5e0..3a2ef955 100644 --- a/GPy/testing/model_tests.py +++ b/GPy/testing/model_tests.py @@ -130,7 +130,7 @@ class MiscTests(unittest.TestCase): m2.kern[:] = m.kern[''].values() np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood()) -class GradientTests(unittest.TestCase): +class GradientTests(np.testing.TestCase): def setUp(self): ###################################### # # 1 dimensional example From 401540cbf516b0f08182019792e60f4573030225 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 24 Mar 2014 11:22:31 +0000 Subject: [PATCH 05/13] core updates --- GPy/core/parameterization/array_core.py | 21 +++---------------- GPy/core/parameterization/index_operations.py | 2 +- GPy/core/parameterization/parameterized.py | 2 +- GPy/models/bayesian_gplvm.py | 7 +++++-- 4 files changed, 10 insertions(+), 22 deletions(-) diff --git a/GPy/core/parameterization/array_core.py b/GPy/core/parameterization/array_core.py index 780367c8..ab8214f2 100644 --- a/GPy/core/parameterization/array_core.py +++ b/GPy/core/parameterization/array_core.py @@ -1,7 +1,7 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -__updated__ = '2014-03-21' +__updated__ = '2014-03-24' import numpy as np from parameter_core import Observable @@ -38,24 +38,9 @@ class ObsAr(np.ndarray, Observable): np.ndarray.__setstate__(self, state[0]) Observable._setstate(self, state[1]) - def _s_not_empty(self, s): - # this checks whether there is something picked by this slice. - return True - # TODO: disarmed, for performance increase, - if not isinstance(s, (list,tuple,np.ndarray)): - return True - if isinstance(s, (list,tuple)): - return len(s)!=0 - if isinstance(s, np.ndarray): - if s.dtype is bool: - return np.all(s) - else: - return s.size != 0 - def __setitem__(self, s, val): - if self._s_not_empty(s): - super(ObsAr, self).__setitem__(s, val) - self.notify_observers() + super(ObsAr, self).__setitem__(s, val) + self.notify_observers() def __getslice__(self, start, stop): return self.__getitem__(slice(start, stop)) diff --git a/GPy/core/parameterization/index_operations.py b/GPy/core/parameterization/index_operations.py index c22d8b6b..e2a041f7 100644 --- a/GPy/core/parameterization/index_operations.py +++ b/GPy/core/parameterization/index_operations.py @@ -183,7 +183,7 @@ class ParameterIndexOperationsView(object): def remove(self, prop, indices): - removed = self._param_index_ops.remove(prop, indices+self._offset) + removed = self._param_index_ops.remove(prop, numpy.array(indices)+self._offset) if removed.size > 0: return removed - self._size + 1 return removed diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py index 6460c988..bc83d8c8 100644 --- a/GPy/core/parameterization/parameterized.py +++ b/GPy/core/parameterization/parameterized.py @@ -63,7 +63,7 @@ class Parameterized(Parameterizable, Pickleable): # Metaclass for parameters changed after init. # This makes sure, that parameters changed will always be called after __init__ # **Never** call parameters_changed() yourself - __metaclass__ = ParametersChangedMeta + __metaclass__ = ParametersChangedMeta #=========================================================================== def __init__(self, name=None, parameters=[], *a, **kw): super(Parameterized, self).__init__(name=name, *a, **kw) diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index fb821d64..a3ebdb7d 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -26,7 +26,10 @@ class BayesianGPLVM(SparseGP): Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', **kwargs): if X == None: from ..util.initialization import initialize_latent - X = initialize_latent(init, input_dim, Y) + X, fracs = initialize_latent(init, input_dim, Y) + else: + fracs = np.ones(input_dim) + self.init = init if X_variance is None: @@ -38,7 +41,7 @@ class BayesianGPLVM(SparseGP): assert Z.shape[1] == X.shape[1] if kernel is None: - kernel = kern.RBF(input_dim) # + kern.white(input_dim) + kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=True) # + kern.white(input_dim) if likelihood is None: likelihood = Gaussian() From f666d207f2874e75383dd2da7556d5089f2d4ddf Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 24 Mar 2014 11:27:24 +0000 Subject: [PATCH 06/13] GPclassification has to default inference method to EP --- GPy/models/gp_classification.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/GPy/models/gp_classification.py b/GPy/models/gp_classification.py index 9d918cda..339dd2dd 100644 --- a/GPy/models/gp_classification.py +++ b/GPy/models/gp_classification.py @@ -6,6 +6,7 @@ import numpy as np from ..core import GP from .. import likelihoods from .. import kern +from ..inference.latent_function_inference.expectation_propagation import EP class GPClassification(GP): """ @@ -27,4 +28,4 @@ class GPClassification(GP): likelihood = likelihoods.Bernoulli() - GP.__init__(self, X=X, Y=Y, kernel=kernel, likelihood=likelihood, name='gp_classification') + GP.__init__(self, X=X, Y=Y, kernel=kernel, likelihood=likelihood, inference_method=EP(), name='gp_classification') From 5c28fd4d5ede012e282a497231fe3ed8a1d04202 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 24 Mar 2014 11:32:41 +0000 Subject: [PATCH 07/13] deleted unused imports --- GPy/models/gp_classification.py | 1 - 1 file changed, 1 deletion(-) diff --git a/GPy/models/gp_classification.py b/GPy/models/gp_classification.py index 339dd2dd..2a4193ab 100644 --- a/GPy/models/gp_classification.py +++ b/GPy/models/gp_classification.py @@ -2,7 +2,6 @@ # Copyright (c) 2013, the GPy Authors (see AUTHORS.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt) -import numpy as np from ..core import GP from .. import likelihoods from .. import kern From 29ff406c08e91784c062c492be2887dc6662d052 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 24 Mar 2014 12:41:10 +0000 Subject: [PATCH 08/13] objective_function now standalone and only internal robust optimization loop --- GPy/core/model.py | 117 ++++++++++++++++++++++--------------- GPy/testing/model_tests.py | 7 +++ 2 files changed, 77 insertions(+), 47 deletions(-) diff --git a/GPy/core/model.py b/GPy/core/model.py index 1f53885c..f6cb101f 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -24,7 +24,6 @@ class Model(Parameterized): def log_likelihood(self): raise NotImplementedError, "this needs to be implemented to use the model class" - def _log_likelihood_gradients(self): return self.gradient @@ -148,7 +147,60 @@ class Model(Parameterized): """ return self.kern.input_sensitivity() - def objective_function(self, x): + def objective_function(self): + """ + The objective function for the given algorithm. + + This function is the true objective, which wants to be minimized. + Note that all parameters are already set and in place, so you just need + to return the objective function here. + + For probabilistic models this is the negative log_likelihood + (including the MAP prior), so we return it here. If your model is not + probabilistic, just return your objective here! + """ + return -float(self.log_likelihood()) - self.log_prior() + + def objective_function_gradients(self): + """ + The gradients for the objective function for the given algorithm. + + You can find the gradient for the parameters in self.gradient at all times. + This is the place, where gradients get stored for parameters. + + This function is the true objective, which wants to be minimized. + Note that all parameters are already set and in place, so you just need + to return the gradient here. + + For probabilistic models this is the gradient of the negative log_likelihood + (including the MAP prior), so we return it here. If your model is not + probabilistic, just return your gradient here! + """ + return self._log_likelihood_gradients() + self._log_prior_gradients() + + def _grads(self, x): + """ + Gets the gradients from the likelihood and the priors. + + Failures are handled robustly. The algorithm will try several times to + return the gradients, and will raise the original exception if + the objective cannot be computed. + + :param x: the parameters of the model. + :type x: np.array + """ + try: + self._set_params_transformed(x) + obj_grads = -self._transform_gradients(self.objective_function_gradients()) + self._fail_count = 0 + except (LinAlgError, ZeroDivisionError, ValueError): + if self._fail_count >= self._allowed_failures: + raise + self._fail_count += 1 + obj_grads = np.clip(-self._transform_gradients(self.objective_function_gradients()), -1e100, 1e100) + return obj_grads + + def _objective(self, x): """ The objective function passed to the optimizer. It combines the likelihood and the priors. @@ -162,55 +214,26 @@ class Model(Parameterized): """ try: self._set_params_transformed(x) + obj = self.objective_function() self._fail_count = 0 - except (LinAlgError, ZeroDivisionError, ValueError) as e: + except (LinAlgError, ZeroDivisionError, ValueError): if self._fail_count >= self._allowed_failures: - raise e + raise self._fail_count += 1 return np.inf - return -float(self.log_likelihood()) - self.log_prior() + return obj - def objective_function_gradients(self, x): - """ - Gets the gradients from the likelihood and the priors. - - Failures are handled robustly. The algorithm will try several times to - return the gradients, and will raise the original exception if - the objective cannot be computed. - - :param x: the parameters of the model. - :type x: np.array - """ + def _objective_grads(self, x): try: self._set_params_transformed(x) - obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()) + obj_f, obj_grads = self.objective_function(), self.objective_function_gradients() self._fail_count = 0 - except (LinAlgError, ZeroDivisionError, ValueError) as e: + except (LinAlgError, ZeroDivisionError, ValueError): if self._fail_count >= self._allowed_failures: - raise e - self._fail_count += 1 - obj_grads = np.clip(-self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()), -1e100, 1e100) - return obj_grads - - def objective_and_gradients(self, x): - """ - Compute the objective function of the model and the gradient of the model at the point given by x. - - :param x: the point at which gradients are to be computed. - :type x: np.array - """ - - try: - self._set_params_transformed(x) - obj_f = -float(self.log_likelihood()) - self.log_prior() - obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()) - self._fail_count = 0 - except (LinAlgError, ZeroDivisionError, ValueError) as e: - if self._fail_count >= self._allowed_failures: - raise e + raise self._fail_count += 1 obj_f = np.inf - obj_grads = np.clip(-self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()), -1e100, 1e100) + obj_grads = np.clip(-self._transform_gradients(self.objective_function_gradients()), -1e100, 1e100) return obj_f, obj_grads def optimize(self, optimizer=None, start=None, **kwargs): @@ -241,7 +264,7 @@ class Model(Parameterized): optimizer = optimization.get_optimizer(optimizer) opt = optimizer(start, model=self, **kwargs) - opt.run(f_fp=self.objective_and_gradients, f=self.objective_function, fp=self.objective_function_gradients) + opt.run(f_fp=self._objective_grads, f=self._objective, fp=self._grads) self.optimization_runs.append(opt) @@ -292,9 +315,9 @@ class Model(Parameterized): dx[transformed_index] = step * np.sign(np.random.uniform(-1, 1, transformed_index.size)) # evaulate around the point x - f1 = self.objective_function(x + dx) - f2 = self.objective_function(x - dx) - gradient = self.objective_function_gradients(x) + f1 = self._objective(x + dx) + f2 = self._objective(x - dx) + gradient = self._grads(x) dx = dx[transformed_index] gradient = gradient[transformed_index] @@ -337,15 +360,15 @@ class Model(Parameterized): print "No free parameters to check" return - gradient = self.objective_function_gradients(x).copy() + gradient = self._grads(x).copy() np.where(gradient == 0, 1e-312, gradient) ret = True for nind, xind in itertools.izip(param_index, transformed_index): xx = x.copy() xx[xind] += step - f1 = self.objective_function(xx) + f1 = self._objective(xx) xx[xind] -= 2.*step - f2 = self.objective_function(xx) + f2 = self._objective(xx) numerical_gradient = (f1 - f2) / (2 * step) if np.all(gradient[xind]==0): ratio = (f1-f2) == gradient[xind] else: ratio = (f1 - f2) / (2 * step * gradient[xind]) diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py index 3a2ef955..4d20035d 100644 --- a/GPy/testing/model_tests.py +++ b/GPy/testing/model_tests.py @@ -130,6 +130,13 @@ class MiscTests(unittest.TestCase): m2.kern[:] = m.kern[''].values() np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood()) + def test_model_optimize(self): + X = np.random.uniform(-3., 3., (20, 1)) + Y = np.sin(X) + np.random.randn(20, 1) * 0.05 + m = GPy.models.GPRegression(X,Y) + m.optimize() + print m + class GradientTests(np.testing.TestCase): def setUp(self): ###################################### From f675c6b081416f80484c50a0f5fc047860ef108a Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 24 Mar 2014 12:41:39 +0000 Subject: [PATCH 09/13] bugfix for 3d and more dimensional _indices --- GPy/core/parameterization/param.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index 984fc950..de16a1a0 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -282,8 +282,8 @@ class Param(OptimizationHandlable, ObsAr): if isinstance(slice_index, (tuple, list)): clean_curr_slice = [s for s in slice_index if numpy.any(s != Ellipsis)] for i in range(self._realndim_-len(clean_curr_slice)): - i+=len(clean_curr_slice) - clean_curr_slice += range(self._realshape_[i]) + i+=1 + clean_curr_slice += [range(self._realshape_[i])] if (all(isinstance(n, (numpy.ndarray, list, tuple)) for n in clean_curr_slice) and len(set(map(len, clean_curr_slice))) <= 1): return numpy.fromiter(itertools.izip(*clean_curr_slice), From 8d1cae645978d89302ae7dbc0a79259194e61b72 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 24 Mar 2014 13:32:18 +0000 Subject: [PATCH 10/13] pca module for initialization --- GPy/util/initialization.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/GPy/util/initialization.py b/GPy/util/initialization.py index 86efa3f0..22e63b6b 100644 --- a/GPy/util/initialization.py +++ b/GPy/util/initialization.py @@ -14,5 +14,6 @@ def initialize_latent(init, input_dim, Y): PC = p.project(Y, min(input_dim, Y.shape[1])) Xr[:PC.shape[0], :PC.shape[1]] = PC else: - pass + var = Xr.var(0) + return Xr, var/var.max() return Xr, p.fracs[:input_dim] \ No newline at end of file From 6b8e4185979c28fec48d4654383e7326dc882c17 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 24 Mar 2014 13:32:28 +0000 Subject: [PATCH 11/13] vardtc updates --- GPy/inference/latent_function_inference/var_dtc.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py index ee2d6250..53f12722 100644 --- a/GPy/inference/latent_function_inference/var_dtc.py +++ b/GPy/inference/latent_function_inference/var_dtc.py @@ -179,6 +179,7 @@ class VarDTC(object): return post, log_marginal, grad_dict class VarDTCMissingData(object): + const_jitter = 1e-6 def __init__(self, limit=1): from ...util.caching import Cacher self._Y = Cacher(self._subarray_computations, limit) @@ -250,7 +251,7 @@ class VarDTCMissingData(object): for y, trYYT, [v, ind] in itertools.izip(Ys, traces, self._subarray_indices): if het_noise: beta = beta_all[ind] - else: beta = beta_all[0] + else: beta = beta_all VVT_factor = (beta*y) VVT_factor_all[v, ind].flat = VVT_factor.flat @@ -311,7 +312,7 @@ class VarDTCMissingData(object): het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, psi0, psi1, beta, - data_fit, num_data, output_dim, trYYT) + data_fit, num_data, output_dim, trYYT, Y) if full_VVT_factor: woodbury_vector[:, ind] = Cpsi1Vf else: From 3db095338db5124bf5b5fba261493f7be286fba5 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 24 Mar 2014 13:32:56 +0000 Subject: [PATCH 12/13] objective function seperate from calls for optimizer --- GPy/core/model.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/GPy/core/model.py b/GPy/core/model.py index f6cb101f..47243b79 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -176,7 +176,7 @@ class Model(Parameterized): (including the MAP prior), so we return it here. If your model is not probabilistic, just return your gradient here! """ - return self._log_likelihood_gradients() + self._log_prior_gradients() + return -(self._log_likelihood_gradients() + self._log_prior_gradients()) def _grads(self, x): """ @@ -191,13 +191,13 @@ class Model(Parameterized): """ try: self._set_params_transformed(x) - obj_grads = -self._transform_gradients(self.objective_function_gradients()) + obj_grads = self._transform_gradients(self.objective_function_gradients()) self._fail_count = 0 except (LinAlgError, ZeroDivisionError, ValueError): if self._fail_count >= self._allowed_failures: raise self._fail_count += 1 - obj_grads = np.clip(-self._transform_gradients(self.objective_function_gradients()), -1e100, 1e100) + obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e100, 1e100) return obj_grads def _objective(self, x): @@ -226,14 +226,14 @@ class Model(Parameterized): def _objective_grads(self, x): try: self._set_params_transformed(x) - obj_f, obj_grads = self.objective_function(), self.objective_function_gradients() + obj_f, obj_grads = self.objective_function(), self._transform_gradients(self.objective_function_gradients()) self._fail_count = 0 except (LinAlgError, ZeroDivisionError, ValueError): if self._fail_count >= self._allowed_failures: raise self._fail_count += 1 obj_f = np.inf - obj_grads = np.clip(-self._transform_gradients(self.objective_function_gradients()), -1e100, 1e100) + obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e100, 1e100) return obj_f, obj_grads def optimize(self, optimizer=None, start=None, **kwargs): From 1294c24a28bc46b6d8e47b4a820589f454290093 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 24 Mar 2014 13:33:16 +0000 Subject: [PATCH 13/13] mrd and bgplvm updates to conform new vardtc --- GPy/examples/dimensionality_reduction.py | 17 +++++++----- GPy/models/bayesian_gplvm.py | 8 ++++++ GPy/models/mrd.py | 34 ++++++++++++++---------- 3 files changed, 38 insertions(+), 21 deletions(-) diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index ea997d63..8171a032 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -277,7 +277,9 @@ def bgplvm_simulation(optimize=True, verbose=1, k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) #k = kern.RBF(Q, ARD=True, lengthscale=10.) m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k) - + m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape) + m.likelihood.variance = .1 + if optimize: print "Optimizing model:" m.optimize('bfgs', messages=verbose, max_iters=max_iters, @@ -299,15 +301,16 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1, _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) Y = Ylist[0] k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) - + inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool) m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, kernel=k) m.inference_method = VarDTCMissingData() m.Y[inan] = _np.nan - m.X.variance *= .1 + m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape) + m.likelihood.variance = .01 m.parameters_changed() m.Yreal = Y - + if optimize: print "Optimizing model:" m.optimize('bfgs', messages=verbose, max_iters=max_iters, @@ -325,11 +328,11 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw): D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5 _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) - + #Ylist = [Ylist[0]] - k = [kern.Linear(Q, ARD=True) + kern.White(Q, 1e-4) for _ in range(len(Ylist))] + k = [kern.Linear(Q, ARD=True) for _ in range(len(Ylist))] m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing, kernel=k, initx="", initz='permute', **kw) - + m['.*noise'] = [Y.var()/500. for Y in Ylist] #for i, Y in enumerate(Ylist): # m['.*Y_{}.*Gaussian.*noise'.format(i)] = Y.var(1) / 500. diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index a3ebdb7d..ef3462f6 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -50,6 +50,14 @@ class BayesianGPLVM(SparseGP): self.variational_prior = NormalPrior() X = NormalPosterior(X, X_variance) + if inference_method is None: + if np.any(np.isnan(Y)): + from ..inference.latent_function_inference.var_dtc import VarDTCMissingData + inference_method = VarDTCMissingData() + else: + from ..inference.latent_function_inference.var_dtc import VarDTC + inference_method = VarDTC() + SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs) self.add_parameter(self.X, index=0) diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index 177ddc19..36088e35 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -51,24 +51,25 @@ class MRD(Model): inference_method=None, likelihood=None, name='mrd', Ynames=None): super(MRD, self).__init__(name) + self.input_dim = input_dim + self.num_inducing = num_inducing + + self.Ylist = Ylist + self._in_init_ = True + X, fracs = self._init_X(initx, Ylist) + self.Z = Param('inducing inputs', self._init_Z(initz, X)) + self.num_inducing = self.Z.shape[0] # ensure M==N if M>N + # sort out the kernels if kernel is None: from ..kern import RBF - self.kern = [RBF(input_dim, ARD=1, name='rbf'.format(i)) for i in range(len(Ylist))] + self.kern = [RBF(input_dim, ARD=1, lengthscale=fracs[i], name='rbf'.format(i)) for i in range(len(Ylist))] elif isinstance(kernel, Kern): self.kern = [kernel.copy(name='{}'.format(kernel.name, i)) for i in range(len(Ylist))] else: assert len(kernel) == len(Ylist), "need one kernel per output" assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!" self.kern = kernel - self.input_dim = input_dim - self.num_inducing = num_inducing - - self.Ylist = Ylist - self._in_init_ = True - X = self._init_X(initx, Ylist) - self.Z = Param('inducing inputs', self._init_Z(initz, X)) - self.num_inducing = self.Z.shape[0] # ensure M==N if M>N if X_variance is None: X_variance = np.random.uniform(0, .1, X.shape) @@ -108,8 +109,7 @@ class MRD(Model): self._log_marginal_likelihood = 0 self.posteriors = [] self.Z.gradient = 0. - self.X.mean.gradient = 0. - self.X.variance.gradient = 0. + self.X.gradient = 0. for y, k, l, i in itertools.izip(self.Ylist, self.kern, self.likelihood, self.inference_method): posterior, lml, grad_dict = i.inference(k, self.X, self.Z, l, y) @@ -147,14 +147,20 @@ class MRD(Model): if Ylist is None: Ylist = self.Ylist if init in "PCA_concat": - X = initialize_latent('PCA', np.hstack(Ylist), self.input_dim) + X, fracs = initialize_latent('PCA', self.input_dim, np.hstack(Ylist)) + fracs = [fracs]*self.input_dim elif init in "PCA_single": X = np.zeros((Ylist[0].shape[0], self.input_dim)) + fracs = [] for qs, Y in itertools.izip(np.array_split(np.arange(self.input_dim), len(Ylist)), Ylist): - X[:, qs] = initialize_latent('PCA', Y, len(qs)) + x,frcs = initialize_latent('PCA', len(qs), Y) + X[:, qs] = x + fracs.append(frcs) else: # init == 'random': X = np.random.randn(Ylist[0].shape[0], self.input_dim) - return X + fracs = X.var(0) + fracs = [fracs]*self.input_dim + return X, fracs def _init_Z(self, init="permute", X=None): if X is None: