From cbf58d492065f11acd725805d50f1cb31ee1cde5 Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Mon, 24 Mar 2014 10:13:50 +0000 Subject: [PATCH 01/51] Fixed bug in product kernel gradients diag wrt to X --- GPy/kern/_src/linear.py | 12 ++++++------ GPy/kern/_src/prod.py | 6 ++---- GPy/testing/kernel_tests.py | 12 ++++++++++++ GPy/testing/model_tests.py | 3 ++- 4 files changed, 22 insertions(+), 11 deletions(-) diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py index 7d9eeac2..609903aa 100644 --- a/GPy/kern/_src/linear.py +++ b/GPy/kern/_src/linear.py @@ -121,7 +121,7 @@ class Linear(Kern): gamma = variational_posterior.binary_prob mu = variational_posterior.mean return np.einsum('nq,q,mq,nq->nm',gamma,self.variances,Z,mu) -# return (self.variances*gamma*mu).sum(axis=1) +# return (self.variances*gamma*mu).sum(axis=1) else: return self.K(variational_posterior.mean, Z) #the variance, it does nothing @@ -177,7 +177,7 @@ class Linear(Kern): grad = np.einsum('nm,nq,q,nq->mq',dL_dpsi1,gamma, self.variances,mu) +\ np.einsum('nmo,noq->mq',dL_dpsi2,_dpsi2_dZ) - + return grad else: #psi1 @@ -191,15 +191,15 @@ class Linear(Kern): gamma = variational_posterior.binary_prob mu = variational_posterior.mean S = variational_posterior.variance - mu2S = np.square(mu)+S + mu2S = np.square(mu)+S _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma) - + grad_gamma = np.einsum('n,q,nq->nq',dL_dpsi0,self.variances,mu2S) + np.einsum('nm,q,mq,nq->nq',dL_dpsi1,self.variances,Z,mu) +\ np.einsum('nmo,nmoq->nq',dL_dpsi2,_dpsi2_dgamma) grad_mu = np.einsum('n,nq,q,nq->nq',dL_dpsi0,gamma,2.*self.variances,mu) + np.einsum('nm,nq,q,mq->nq',dL_dpsi1,gamma,self.variances,Z) +\ np.einsum('nmo,nmoq->nq',dL_dpsi2,_dpsi2_dmu) grad_S = np.einsum('n,nq,q->nq',dL_dpsi0,gamma,self.variances) + np.einsum('nmo,nmoq->nq',dL_dpsi2,_dpsi2_dS) - + return grad_mu, grad_S, grad_gamma else: grad_mu, grad_S = np.zeros(variational_posterior.mean.shape), np.zeros(variational_posterior.mean.shape) @@ -210,7 +210,7 @@ class Linear(Kern): grad_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1) # psi2 self._weave_dpsi2_dmuS(dL_dpsi2, Z, variational_posterior, grad_mu, grad_S) - + return grad_mu, grad_S #--------------------------------------------------# diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py index e00f38c3..98b60366 100644 --- a/GPy/kern/_src/prod.py +++ b/GPy/kern/_src/prod.py @@ -58,8 +58,6 @@ class Prod(CombinationKernel): def gradients_X_diag(self, dL_dKdiag, X): target = np.zeros(X.shape) for k1,k2 in itertools.combinations(self.parts, 2): - target += k1.gradients_X(dL_dKdiag*k2.Kdiag(X), X) - target += k2.gradients_X(dL_dKdiag*k1.Kdiag(X), X) + target += k1.gradients_X_diag(dL_dKdiag*k2.Kdiag(X), X) + target += k2.gradients_X_diag(dL_dKdiag*k1.Kdiag(X), X) return target - - diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index 9ed218d8..36d55645 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -8,6 +8,7 @@ import sys verbose = 0 +np.random.seed(50) class Kern_check_model(GPy.core.Model): @@ -243,6 +244,17 @@ class KernelGradientTestsContinuous(unittest.TestCase): k.randomize() self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose)) + def test_Prod2(self): + k = (GPy.kern.RBF(2, active_dims=[0,4]) * GPy.kern.Linear(self.D)) + k.randomize() + self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose)) + + def test_Prod3(self): + k = GPy.kern.Matern32(2, active_dims=[2,3]) * (GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)) + k = (GPy.kern.RBF(2, active_dims=[0,4]) * GPy.kern.Linear(self.D)) + k.randomize() + self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose)) + def test_Add(self): k = GPy.kern.Matern32(2, active_dims=[2,3]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D) k += GPy.kern.Matern32(2, active_dims=[2,3]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D) diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py index 3c39c5e0..b14385d0 100644 --- a/GPy/testing/model_tests.py +++ b/GPy/testing/model_tests.py @@ -59,9 +59,10 @@ class MiscTests(unittest.TestCase): #np.testing.assert_almost_equal(mu_hat, mu) def test_likelihood_replicate(self): + tol = 1e-5 m = GPy.models.GPRegression(self.X, self.Y) m2 = GPy.models.GPRegression(self.X, self.Y) - np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood()) + np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood(), tol=tol) m.randomize() m2[:] = m[''].values() np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood()) From 9c6abfc27001a26e49bb091f01c758fc6a26cad4 Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Mon, 24 Mar 2014 10:15:46 +0000 Subject: [PATCH 02/51] Whoops! --- GPy/testing/model_tests.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py index b14385d0..3c39c5e0 100644 --- a/GPy/testing/model_tests.py +++ b/GPy/testing/model_tests.py @@ -59,10 +59,9 @@ class MiscTests(unittest.TestCase): #np.testing.assert_almost_equal(mu_hat, mu) def test_likelihood_replicate(self): - tol = 1e-5 m = GPy.models.GPRegression(self.X, self.Y) m2 = GPy.models.GPRegression(self.X, self.Y) - np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood(), tol=tol) + np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood()) m.randomize() m2[:] = m[''].values() np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood()) From 321a75100c495db36dfcc3ad724d3d81a6f7d330 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 24 Mar 2014 11:16:57 +0000 Subject: [PATCH 03/51] finally added pca package again --- GPy/models/gplvm.py | 7 ++- GPy/models/mrd.py | 8 +-- GPy/util/initialization.py | 7 ++- GPy/util/linalg.py | 23 ------- GPy/util/pca.py | 122 +++++++++++++++++++++++++++++++++++++ 5 files changed, 134 insertions(+), 33 deletions(-) create mode 100644 GPy/util/pca.py diff --git a/GPy/models/gplvm.py b/GPy/models/gplvm.py index b85540ce..fb7d93e7 100644 --- a/GPy/models/gplvm.py +++ b/GPy/models/gplvm.py @@ -5,7 +5,6 @@ import numpy as np import pylab as pb from .. import kern -from ..util.linalg import PCA from ..core import GP, Param from ..likelihoods import Gaussian from .. import util @@ -29,9 +28,11 @@ class GPLVM(GP): """ if X is None: from ..util.initialization import initialize_latent - X = initialize_latent(init, input_dim, Y) + X, fracs = initialize_latent(init, input_dim, Y) + else: + fracs = np.ones(input_dim) if kernel is None: - kernel = kern.RBF(input_dim, ARD=input_dim > 1) + kern.Bias(input_dim, np.exp(-2)) + kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=input_dim > 1) + kern.Bias(input_dim, np.exp(-2)) likelihood = Gaussian() diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index ac2ef9cd..177ddc19 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -6,12 +6,12 @@ import itertools import pylab from ..core import Model -from ..util.linalg import PCA from ..kern import Kern from ..core.parameterization.variational import NormalPosterior, NormalPrior from ..core.parameterization import Param, Parameterized from ..inference.latent_function_inference.var_dtc import VarDTCMissingData, VarDTC from ..likelihoods import Gaussian +from GPy.util.initialization import initialize_latent class MRD(Model): """ @@ -71,7 +71,7 @@ class MRD(Model): self.num_inducing = self.Z.shape[0] # ensure M==N if M>N if X_variance is None: - X_variance = np.random.uniform(0, .2, X.shape) + X_variance = np.random.uniform(0, .1, X.shape) self.variational_prior = NormalPrior() self.X = NormalPosterior(X, X_variance) @@ -147,11 +147,11 @@ class MRD(Model): if Ylist is None: Ylist = self.Ylist if init in "PCA_concat": - X = PCA(np.hstack(Ylist), self.input_dim)[0] + X = initialize_latent('PCA', np.hstack(Ylist), self.input_dim) elif init in "PCA_single": X = np.zeros((Ylist[0].shape[0], self.input_dim)) for qs, Y in itertools.izip(np.array_split(np.arange(self.input_dim), len(Ylist)), Ylist): - X[:, qs] = PCA(Y, len(qs))[0] + X[:, qs] = initialize_latent('PCA', Y, len(qs)) else: # init == 'random': X = np.random.randn(Ylist[0].shape[0], self.input_dim) return X diff --git a/GPy/util/initialization.py b/GPy/util/initialization.py index 24194b41..86efa3f0 100644 --- a/GPy/util/initialization.py +++ b/GPy/util/initialization.py @@ -5,13 +5,14 @@ Created on 24 Feb 2014 ''' import numpy as np -from linalg import PCA +from GPy.util.pca import pca def initialize_latent(init, input_dim, Y): Xr = np.random.randn(Y.shape[0], input_dim) if init == 'PCA': - PC = PCA(Y, input_dim)[0] + p = pca(Y) + PC = p.project(Y, min(input_dim, Y.shape[1])) Xr[:PC.shape[0], :PC.shape[1]] = PC else: pass - return Xr \ No newline at end of file + return Xr, p.fracs[:input_dim] \ No newline at end of file diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py index 4745c4aa..b204f813 100644 --- a/GPy/util/linalg.py +++ b/GPy/util/linalg.py @@ -580,26 +580,3 @@ def backsub_both_sides(L, X, transpose='left'): tmp, _ = dtrtrs(L, X, lower=1, trans=0) return dtrtrs(L, tmp.T, lower=1, trans=0)[0].T -def PCA(Y, input_dim): - """ - Principal component analysis: maximum likelihood solution by SVD - - :param Y: NxD np.array of data - :param input_dim: int, dimension of projection - - - :rval X: - Nxinput_dim np.array of dimensionality reduced data - :rval W: - input_dimxD mapping from X to Y - - """ - if not np.allclose(Y.mean(axis=0), 0.0): - print "Y is not zero mean, centering it locally (GPy.util.linalg.PCA)" - - # Y -= Y.mean(axis=0) - - Z = linalg.svd(Y - Y.mean(axis=0), full_matrices=False) - [X, W] = [Z[0][:, 0:input_dim], np.dot(np.diag(Z[1]), Z[2]).T[:, 0:input_dim]] - v = X.std(axis=0) - X /= v; - W *= v; - return X, W.T diff --git a/GPy/util/pca.py b/GPy/util/pca.py new file mode 100644 index 00000000..6c548b3d --- /dev/null +++ b/GPy/util/pca.py @@ -0,0 +1,122 @@ +''' +Created on 10 Sep 2012 + +@author: Max Zwiessele +@copyright: Max Zwiessele 2012 +''' +import numpy +import pylab +import matplotlib +from numpy.linalg.linalg import LinAlgError + +class pca(object): + """ + pca module with automatic primal/dual determination. + """ + def __init__(self, X): + self.mu = X.mean(0) + self.sigma = X.std(0) + + X = self.center(X) + + # self.X = input + if X.shape[0] >= X.shape[1]: + # print "N >= D: using primal" + self.eigvals, self.eigvectors = self._primal_eig(X) + else: + # print "N < D: using dual" + self.eigvals, self.eigvectors = self._dual_eig(X) + self.sort = numpy.argsort(self.eigvals)[::-1] + self.eigvals = self.eigvals[self.sort] + self.eigvectors = self.eigvectors[:, self.sort] + self.fracs = self.eigvals / self.eigvals.sum() + self.Q = self.eigvals.shape[0] + + def center(self, X): + """ + Center `X` in pca space. + """ + X = X - self.mu + X = X / numpy.where(self.sigma == 0, 1e-30, self.sigma) + return X + + def _primal_eig(self, X): + return numpy.linalg.eigh(numpy.einsum('ji,jk->ik',X,X)) + + def _dual_eig(self, X): + dual_eigvals, dual_eigvects = numpy.linalg.eigh(numpy.einsum('ij,kj->ik',X,X)) + relevant_dimensions = numpy.argsort(numpy.abs(dual_eigvals))[-X.shape[1]:] + eigvals = dual_eigvals[relevant_dimensions] + eigvects = dual_eigvects[:, relevant_dimensions] + eigvects = (1. / numpy.sqrt(X.shape[0] * numpy.abs(eigvals))) * X.T.dot(eigvects) + eigvects /= numpy.sqrt(numpy.diag(eigvects.T.dot(eigvects))) + return eigvals, eigvects + + def project(self, X, Q=None): + """ + Project X into pca space, defined by the Q highest eigenvalues. + Y = X dot V + """ + if Q is None: + Q = self.Q + if Q > X.shape[1]: + raise IndexError("requested dimension larger then input dimension") + X = self.center(X) + return X.dot(self.eigvectors[:, :Q]) + + def plot_fracs(self, Q=None, ax=None, fignum=None): + """ + Plot fractions of Eigenvalues sorted in descending order. + """ + if ax is None: + fig = pylab.figure(fignum) + ax = fig.add_subplot(111) + if Q is None: + Q = self.Q + ticks = numpy.arange(Q) + bar = ax.bar(ticks - .4, self.fracs[:Q]) + ax.set_xticks(ticks, map(lambda x: r"${}$".format(x), ticks + 1)) + ax.set_ylabel("Eigenvalue fraction") + ax.set_xlabel("PC") + ax.set_ylim(0, ax.get_ylim()[1]) + ax.set_xlim(ticks.min() - .5, ticks.max() + .5) + try: + pylab.tight_layout() + except: + pass + return bar + + def plot_2d(self, X, labels=None, s=20, marker='o', + dimensions=(0, 1), ax=None, colors=None, + fignum=None, cmap=matplotlib.cm.jet, # @UndefinedVariable + ** kwargs): + """ + Plot dimensions `dimensions` with given labels against each other in + PC space. Labels can be any sequence of labels of dimensions X.shape[0]. + Labels can be drawn with a subsequent call to legend() + """ + if ax is None: + fig = pylab.figure(fignum) + ax = fig.add_subplot(111) + if labels is None: + labels = numpy.zeros(X.shape[0]) + ulabels = [] + for lab in labels: + if not lab in ulabels: + ulabels.append(lab) + nlabels = len(ulabels) + if colors is None: + colors = [cmap(float(i) / nlabels) for i in range(nlabels)] + X_ = self.project(X, self.Q)[:,dimensions] + kwargs.update(dict(s=s)) + plots = list() + for i, l in enumerate(ulabels): + kwargs.update(dict(color=colors[i], marker=marker[i % len(marker)])) + plots.append(ax.scatter(*X_[labels == l, :].T, label=str(l), **kwargs)) + ax.set_xlabel(r"PC$_1$") + ax.set_ylabel(r"PC$_2$") + try: + pylab.tight_layout() + except: + pass + return plots \ No newline at end of file From d3054956939b9c4ad807b68ca2f00c7dd2384f9a Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 24 Mar 2014 11:22:02 +0000 Subject: [PATCH 04/51] testing --- GPy/testing/index_operations_tests.py | 6 ++++++ GPy/testing/model_tests.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/GPy/testing/index_operations_tests.py b/GPy/testing/index_operations_tests.py index 12602879..37cec10b 100644 --- a/GPy/testing/index_operations_tests.py +++ b/GPy/testing/index_operations_tests.py @@ -33,6 +33,8 @@ class Test(unittest.TestCase): self.assertListEqual(self.param_index[one].tolist(), [3]) self.assertListEqual(self.param_index.remove('not in there', [2,3,4]).tolist(), []) + self.assertListEqual(self.view.remove('not in there', [2,3,4]).tolist(), []) + def test_shift_left(self): self.view.shift_left(0, 2) self.assertListEqual(self.param_index[three].tolist(), [2,5]) @@ -82,6 +84,10 @@ class Test(unittest.TestCase): self.assertEqual(self.param_index.size, 6) self.assertEqual(self.view.size, 5) + def test_print(self): + print self.param_index + print self.view + if __name__ == "__main__": #import sys;sys.argv = ['', 'Test.test_index_view'] unittest.main() \ No newline at end of file diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py index 3c39c5e0..3a2ef955 100644 --- a/GPy/testing/model_tests.py +++ b/GPy/testing/model_tests.py @@ -130,7 +130,7 @@ class MiscTests(unittest.TestCase): m2.kern[:] = m.kern[''].values() np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood()) -class GradientTests(unittest.TestCase): +class GradientTests(np.testing.TestCase): def setUp(self): ###################################### # # 1 dimensional example From 401540cbf516b0f08182019792e60f4573030225 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 24 Mar 2014 11:22:31 +0000 Subject: [PATCH 05/51] core updates --- GPy/core/parameterization/array_core.py | 21 +++---------------- GPy/core/parameterization/index_operations.py | 2 +- GPy/core/parameterization/parameterized.py | 2 +- GPy/models/bayesian_gplvm.py | 7 +++++-- 4 files changed, 10 insertions(+), 22 deletions(-) diff --git a/GPy/core/parameterization/array_core.py b/GPy/core/parameterization/array_core.py index 780367c8..ab8214f2 100644 --- a/GPy/core/parameterization/array_core.py +++ b/GPy/core/parameterization/array_core.py @@ -1,7 +1,7 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -__updated__ = '2014-03-21' +__updated__ = '2014-03-24' import numpy as np from parameter_core import Observable @@ -38,24 +38,9 @@ class ObsAr(np.ndarray, Observable): np.ndarray.__setstate__(self, state[0]) Observable._setstate(self, state[1]) - def _s_not_empty(self, s): - # this checks whether there is something picked by this slice. - return True - # TODO: disarmed, for performance increase, - if not isinstance(s, (list,tuple,np.ndarray)): - return True - if isinstance(s, (list,tuple)): - return len(s)!=0 - if isinstance(s, np.ndarray): - if s.dtype is bool: - return np.all(s) - else: - return s.size != 0 - def __setitem__(self, s, val): - if self._s_not_empty(s): - super(ObsAr, self).__setitem__(s, val) - self.notify_observers() + super(ObsAr, self).__setitem__(s, val) + self.notify_observers() def __getslice__(self, start, stop): return self.__getitem__(slice(start, stop)) diff --git a/GPy/core/parameterization/index_operations.py b/GPy/core/parameterization/index_operations.py index c22d8b6b..e2a041f7 100644 --- a/GPy/core/parameterization/index_operations.py +++ b/GPy/core/parameterization/index_operations.py @@ -183,7 +183,7 @@ class ParameterIndexOperationsView(object): def remove(self, prop, indices): - removed = self._param_index_ops.remove(prop, indices+self._offset) + removed = self._param_index_ops.remove(prop, numpy.array(indices)+self._offset) if removed.size > 0: return removed - self._size + 1 return removed diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py index 6460c988..bc83d8c8 100644 --- a/GPy/core/parameterization/parameterized.py +++ b/GPy/core/parameterization/parameterized.py @@ -63,7 +63,7 @@ class Parameterized(Parameterizable, Pickleable): # Metaclass for parameters changed after init. # This makes sure, that parameters changed will always be called after __init__ # **Never** call parameters_changed() yourself - __metaclass__ = ParametersChangedMeta + __metaclass__ = ParametersChangedMeta #=========================================================================== def __init__(self, name=None, parameters=[], *a, **kw): super(Parameterized, self).__init__(name=name, *a, **kw) diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index fb821d64..a3ebdb7d 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -26,7 +26,10 @@ class BayesianGPLVM(SparseGP): Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', **kwargs): if X == None: from ..util.initialization import initialize_latent - X = initialize_latent(init, input_dim, Y) + X, fracs = initialize_latent(init, input_dim, Y) + else: + fracs = np.ones(input_dim) + self.init = init if X_variance is None: @@ -38,7 +41,7 @@ class BayesianGPLVM(SparseGP): assert Z.shape[1] == X.shape[1] if kernel is None: - kernel = kern.RBF(input_dim) # + kern.white(input_dim) + kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=True) # + kern.white(input_dim) if likelihood is None: likelihood = Gaussian() From f666d207f2874e75383dd2da7556d5089f2d4ddf Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 24 Mar 2014 11:27:24 +0000 Subject: [PATCH 06/51] GPclassification has to default inference method to EP --- GPy/models/gp_classification.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/GPy/models/gp_classification.py b/GPy/models/gp_classification.py index 9d918cda..339dd2dd 100644 --- a/GPy/models/gp_classification.py +++ b/GPy/models/gp_classification.py @@ -6,6 +6,7 @@ import numpy as np from ..core import GP from .. import likelihoods from .. import kern +from ..inference.latent_function_inference.expectation_propagation import EP class GPClassification(GP): """ @@ -27,4 +28,4 @@ class GPClassification(GP): likelihood = likelihoods.Bernoulli() - GP.__init__(self, X=X, Y=Y, kernel=kernel, likelihood=likelihood, name='gp_classification') + GP.__init__(self, X=X, Y=Y, kernel=kernel, likelihood=likelihood, inference_method=EP(), name='gp_classification') From 5c28fd4d5ede012e282a497231fe3ed8a1d04202 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 24 Mar 2014 11:32:41 +0000 Subject: [PATCH 07/51] deleted unused imports --- GPy/models/gp_classification.py | 1 - 1 file changed, 1 deletion(-) diff --git a/GPy/models/gp_classification.py b/GPy/models/gp_classification.py index 339dd2dd..2a4193ab 100644 --- a/GPy/models/gp_classification.py +++ b/GPy/models/gp_classification.py @@ -2,7 +2,6 @@ # Copyright (c) 2013, the GPy Authors (see AUTHORS.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt) -import numpy as np from ..core import GP from .. import likelihoods from .. import kern From 29ff406c08e91784c062c492be2887dc6662d052 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 24 Mar 2014 12:41:10 +0000 Subject: [PATCH 08/51] objective_function now standalone and only internal robust optimization loop --- GPy/core/model.py | 117 ++++++++++++++++++++++--------------- GPy/testing/model_tests.py | 7 +++ 2 files changed, 77 insertions(+), 47 deletions(-) diff --git a/GPy/core/model.py b/GPy/core/model.py index 1f53885c..f6cb101f 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -24,7 +24,6 @@ class Model(Parameterized): def log_likelihood(self): raise NotImplementedError, "this needs to be implemented to use the model class" - def _log_likelihood_gradients(self): return self.gradient @@ -148,7 +147,60 @@ class Model(Parameterized): """ return self.kern.input_sensitivity() - def objective_function(self, x): + def objective_function(self): + """ + The objective function for the given algorithm. + + This function is the true objective, which wants to be minimized. + Note that all parameters are already set and in place, so you just need + to return the objective function here. + + For probabilistic models this is the negative log_likelihood + (including the MAP prior), so we return it here. If your model is not + probabilistic, just return your objective here! + """ + return -float(self.log_likelihood()) - self.log_prior() + + def objective_function_gradients(self): + """ + The gradients for the objective function for the given algorithm. + + You can find the gradient for the parameters in self.gradient at all times. + This is the place, where gradients get stored for parameters. + + This function is the true objective, which wants to be minimized. + Note that all parameters are already set and in place, so you just need + to return the gradient here. + + For probabilistic models this is the gradient of the negative log_likelihood + (including the MAP prior), so we return it here. If your model is not + probabilistic, just return your gradient here! + """ + return self._log_likelihood_gradients() + self._log_prior_gradients() + + def _grads(self, x): + """ + Gets the gradients from the likelihood and the priors. + + Failures are handled robustly. The algorithm will try several times to + return the gradients, and will raise the original exception if + the objective cannot be computed. + + :param x: the parameters of the model. + :type x: np.array + """ + try: + self._set_params_transformed(x) + obj_grads = -self._transform_gradients(self.objective_function_gradients()) + self._fail_count = 0 + except (LinAlgError, ZeroDivisionError, ValueError): + if self._fail_count >= self._allowed_failures: + raise + self._fail_count += 1 + obj_grads = np.clip(-self._transform_gradients(self.objective_function_gradients()), -1e100, 1e100) + return obj_grads + + def _objective(self, x): """ The objective function passed to the optimizer. It combines the likelihood and the priors. @@ -162,55 +214,26 @@ class Model(Parameterized): """ try: self._set_params_transformed(x) + obj = self.objective_function() self._fail_count = 0 - except (LinAlgError, ZeroDivisionError, ValueError) as e: + except (LinAlgError, ZeroDivisionError, ValueError): if self._fail_count >= self._allowed_failures: - raise e + raise self._fail_count += 1 return np.inf - return -float(self.log_likelihood()) - self.log_prior() + return obj - def objective_function_gradients(self, x): - """ - Gets the gradients from the likelihood and the priors. - - Failures are handled robustly. The algorithm will try several times to - return the gradients, and will raise the original exception if - the objective cannot be computed. - - :param x: the parameters of the model. - :type x: np.array - """ + def _objective_grads(self, x): try: self._set_params_transformed(x) - obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()) + obj_f, obj_grads = self.objective_function(), self.objective_function_gradients() self._fail_count = 0 - except (LinAlgError, ZeroDivisionError, ValueError) as e: + except (LinAlgError, ZeroDivisionError, ValueError): if self._fail_count >= self._allowed_failures: - raise e - self._fail_count += 1 - obj_grads = np.clip(-self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()), -1e100, 1e100) - return obj_grads - - def objective_and_gradients(self, x): - """ - Compute the objective function of the model and the gradient of the model at the point given by x. - - :param x: the point at which gradients are to be computed. - :type x: np.array - """ - - try: - self._set_params_transformed(x) - obj_f = -float(self.log_likelihood()) - self.log_prior() - obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()) - self._fail_count = 0 - except (LinAlgError, ZeroDivisionError, ValueError) as e: - if self._fail_count >= self._allowed_failures: - raise e + raise self._fail_count += 1 obj_f = np.inf - obj_grads = np.clip(-self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()), -1e100, 1e100) + obj_grads = np.clip(-self._transform_gradients(self.objective_function_gradients()), -1e100, 1e100) return obj_f, obj_grads def optimize(self, optimizer=None, start=None, **kwargs): @@ -241,7 +264,7 @@ class Model(Parameterized): optimizer = optimization.get_optimizer(optimizer) opt = optimizer(start, model=self, **kwargs) - opt.run(f_fp=self.objective_and_gradients, f=self.objective_function, fp=self.objective_function_gradients) + opt.run(f_fp=self._objective_grads, f=self._objective, fp=self._grads) self.optimization_runs.append(opt) @@ -292,9 +315,9 @@ class Model(Parameterized): dx[transformed_index] = step * np.sign(np.random.uniform(-1, 1, transformed_index.size)) # evaulate around the point x - f1 = self.objective_function(x + dx) - f2 = self.objective_function(x - dx) - gradient = self.objective_function_gradients(x) + f1 = self._objective(x + dx) + f2 = self._objective(x - dx) + gradient = self._grads(x) dx = dx[transformed_index] gradient = gradient[transformed_index] @@ -337,15 +360,15 @@ class Model(Parameterized): print "No free parameters to check" return - gradient = self.objective_function_gradients(x).copy() + gradient = self._grads(x).copy() np.where(gradient == 0, 1e-312, gradient) ret = True for nind, xind in itertools.izip(param_index, transformed_index): xx = x.copy() xx[xind] += step - f1 = self.objective_function(xx) + f1 = self._objective(xx) xx[xind] -= 2.*step - f2 = self.objective_function(xx) + f2 = self._objective(xx) numerical_gradient = (f1 - f2) / (2 * step) if np.all(gradient[xind]==0): ratio = (f1-f2) == gradient[xind] else: ratio = (f1 - f2) / (2 * step * gradient[xind]) diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py index 3a2ef955..4d20035d 100644 --- a/GPy/testing/model_tests.py +++ b/GPy/testing/model_tests.py @@ -130,6 +130,13 @@ class MiscTests(unittest.TestCase): m2.kern[:] = m.kern[''].values() np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood()) + def test_model_optimize(self): + X = np.random.uniform(-3., 3., (20, 1)) + Y = np.sin(X) + np.random.randn(20, 1) * 0.05 + m = GPy.models.GPRegression(X,Y) + m.optimize() + print m + class GradientTests(np.testing.TestCase): def setUp(self): ###################################### From f675c6b081416f80484c50a0f5fc047860ef108a Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 24 Mar 2014 12:41:39 +0000 Subject: [PATCH 09/51] bugfix for 3d and more dimensional _indices --- GPy/core/parameterization/param.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index 984fc950..de16a1a0 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -282,8 +282,8 @@ class Param(OptimizationHandlable, ObsAr): if isinstance(slice_index, (tuple, list)): clean_curr_slice = [s for s in slice_index if numpy.any(s != Ellipsis)] for i in range(self._realndim_-len(clean_curr_slice)): - i+=len(clean_curr_slice) - clean_curr_slice += range(self._realshape_[i]) + i+=1 + clean_curr_slice += [range(self._realshape_[i])] if (all(isinstance(n, (numpy.ndarray, list, tuple)) for n in clean_curr_slice) and len(set(map(len, clean_curr_slice))) <= 1): return numpy.fromiter(itertools.izip(*clean_curr_slice), From 8d1cae645978d89302ae7dbc0a79259194e61b72 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 24 Mar 2014 13:32:18 +0000 Subject: [PATCH 10/51] pca module for initialization --- GPy/util/initialization.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/GPy/util/initialization.py b/GPy/util/initialization.py index 86efa3f0..22e63b6b 100644 --- a/GPy/util/initialization.py +++ b/GPy/util/initialization.py @@ -14,5 +14,6 @@ def initialize_latent(init, input_dim, Y): PC = p.project(Y, min(input_dim, Y.shape[1])) Xr[:PC.shape[0], :PC.shape[1]] = PC else: - pass + var = Xr.var(0) + return Xr, var/var.max() return Xr, p.fracs[:input_dim] \ No newline at end of file From 6b8e4185979c28fec48d4654383e7326dc882c17 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 24 Mar 2014 13:32:28 +0000 Subject: [PATCH 11/51] vardtc updates --- GPy/inference/latent_function_inference/var_dtc.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py index ee2d6250..53f12722 100644 --- a/GPy/inference/latent_function_inference/var_dtc.py +++ b/GPy/inference/latent_function_inference/var_dtc.py @@ -179,6 +179,7 @@ class VarDTC(object): return post, log_marginal, grad_dict class VarDTCMissingData(object): + const_jitter = 1e-6 def __init__(self, limit=1): from ...util.caching import Cacher self._Y = Cacher(self._subarray_computations, limit) @@ -250,7 +251,7 @@ class VarDTCMissingData(object): for y, trYYT, [v, ind] in itertools.izip(Ys, traces, self._subarray_indices): if het_noise: beta = beta_all[ind] - else: beta = beta_all[0] + else: beta = beta_all VVT_factor = (beta*y) VVT_factor_all[v, ind].flat = VVT_factor.flat @@ -311,7 +312,7 @@ class VarDTCMissingData(object): het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, psi0, psi1, beta, - data_fit, num_data, output_dim, trYYT) + data_fit, num_data, output_dim, trYYT, Y) if full_VVT_factor: woodbury_vector[:, ind] = Cpsi1Vf else: From 3db095338db5124bf5b5fba261493f7be286fba5 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 24 Mar 2014 13:32:56 +0000 Subject: [PATCH 12/51] objective function seperate from calls for optimizer --- GPy/core/model.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/GPy/core/model.py b/GPy/core/model.py index f6cb101f..47243b79 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -176,7 +176,7 @@ class Model(Parameterized): (including the MAP prior), so we return it here. If your model is not probabilistic, just return your gradient here! """ - return self._log_likelihood_gradients() + self._log_prior_gradients() + return -(self._log_likelihood_gradients() + self._log_prior_gradients()) def _grads(self, x): """ @@ -191,13 +191,13 @@ class Model(Parameterized): """ try: self._set_params_transformed(x) - obj_grads = -self._transform_gradients(self.objective_function_gradients()) + obj_grads = self._transform_gradients(self.objective_function_gradients()) self._fail_count = 0 except (LinAlgError, ZeroDivisionError, ValueError): if self._fail_count >= self._allowed_failures: raise self._fail_count += 1 - obj_grads = np.clip(-self._transform_gradients(self.objective_function_gradients()), -1e100, 1e100) + obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e100, 1e100) return obj_grads def _objective(self, x): @@ -226,14 +226,14 @@ class Model(Parameterized): def _objective_grads(self, x): try: self._set_params_transformed(x) - obj_f, obj_grads = self.objective_function(), self.objective_function_gradients() + obj_f, obj_grads = self.objective_function(), self._transform_gradients(self.objective_function_gradients()) self._fail_count = 0 except (LinAlgError, ZeroDivisionError, ValueError): if self._fail_count >= self._allowed_failures: raise self._fail_count += 1 obj_f = np.inf - obj_grads = np.clip(-self._transform_gradients(self.objective_function_gradients()), -1e100, 1e100) + obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e100, 1e100) return obj_f, obj_grads def optimize(self, optimizer=None, start=None, **kwargs): From 1294c24a28bc46b6d8e47b4a820589f454290093 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 24 Mar 2014 13:33:16 +0000 Subject: [PATCH 13/51] mrd and bgplvm updates to conform new vardtc --- GPy/examples/dimensionality_reduction.py | 17 +++++++----- GPy/models/bayesian_gplvm.py | 8 ++++++ GPy/models/mrd.py | 34 ++++++++++++++---------- 3 files changed, 38 insertions(+), 21 deletions(-) diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index ea997d63..8171a032 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -277,7 +277,9 @@ def bgplvm_simulation(optimize=True, verbose=1, k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) #k = kern.RBF(Q, ARD=True, lengthscale=10.) m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k) - + m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape) + m.likelihood.variance = .1 + if optimize: print "Optimizing model:" m.optimize('bfgs', messages=verbose, max_iters=max_iters, @@ -299,15 +301,16 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1, _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) Y = Ylist[0] k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) - + inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool) m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, kernel=k) m.inference_method = VarDTCMissingData() m.Y[inan] = _np.nan - m.X.variance *= .1 + m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape) + m.likelihood.variance = .01 m.parameters_changed() m.Yreal = Y - + if optimize: print "Optimizing model:" m.optimize('bfgs', messages=verbose, max_iters=max_iters, @@ -325,11 +328,11 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw): D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5 _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) - + #Ylist = [Ylist[0]] - k = [kern.Linear(Q, ARD=True) + kern.White(Q, 1e-4) for _ in range(len(Ylist))] + k = [kern.Linear(Q, ARD=True) for _ in range(len(Ylist))] m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing, kernel=k, initx="", initz='permute', **kw) - + m['.*noise'] = [Y.var()/500. for Y in Ylist] #for i, Y in enumerate(Ylist): # m['.*Y_{}.*Gaussian.*noise'.format(i)] = Y.var(1) / 500. diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index a3ebdb7d..ef3462f6 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -50,6 +50,14 @@ class BayesianGPLVM(SparseGP): self.variational_prior = NormalPrior() X = NormalPosterior(X, X_variance) + if inference_method is None: + if np.any(np.isnan(Y)): + from ..inference.latent_function_inference.var_dtc import VarDTCMissingData + inference_method = VarDTCMissingData() + else: + from ..inference.latent_function_inference.var_dtc import VarDTC + inference_method = VarDTC() + SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs) self.add_parameter(self.X, index=0) diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index 177ddc19..36088e35 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -51,24 +51,25 @@ class MRD(Model): inference_method=None, likelihood=None, name='mrd', Ynames=None): super(MRD, self).__init__(name) + self.input_dim = input_dim + self.num_inducing = num_inducing + + self.Ylist = Ylist + self._in_init_ = True + X, fracs = self._init_X(initx, Ylist) + self.Z = Param('inducing inputs', self._init_Z(initz, X)) + self.num_inducing = self.Z.shape[0] # ensure M==N if M>N + # sort out the kernels if kernel is None: from ..kern import RBF - self.kern = [RBF(input_dim, ARD=1, name='rbf'.format(i)) for i in range(len(Ylist))] + self.kern = [RBF(input_dim, ARD=1, lengthscale=fracs[i], name='rbf'.format(i)) for i in range(len(Ylist))] elif isinstance(kernel, Kern): self.kern = [kernel.copy(name='{}'.format(kernel.name, i)) for i in range(len(Ylist))] else: assert len(kernel) == len(Ylist), "need one kernel per output" assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!" self.kern = kernel - self.input_dim = input_dim - self.num_inducing = num_inducing - - self.Ylist = Ylist - self._in_init_ = True - X = self._init_X(initx, Ylist) - self.Z = Param('inducing inputs', self._init_Z(initz, X)) - self.num_inducing = self.Z.shape[0] # ensure M==N if M>N if X_variance is None: X_variance = np.random.uniform(0, .1, X.shape) @@ -108,8 +109,7 @@ class MRD(Model): self._log_marginal_likelihood = 0 self.posteriors = [] self.Z.gradient = 0. - self.X.mean.gradient = 0. - self.X.variance.gradient = 0. + self.X.gradient = 0. for y, k, l, i in itertools.izip(self.Ylist, self.kern, self.likelihood, self.inference_method): posterior, lml, grad_dict = i.inference(k, self.X, self.Z, l, y) @@ -147,14 +147,20 @@ class MRD(Model): if Ylist is None: Ylist = self.Ylist if init in "PCA_concat": - X = initialize_latent('PCA', np.hstack(Ylist), self.input_dim) + X, fracs = initialize_latent('PCA', self.input_dim, np.hstack(Ylist)) + fracs = [fracs]*self.input_dim elif init in "PCA_single": X = np.zeros((Ylist[0].shape[0], self.input_dim)) + fracs = [] for qs, Y in itertools.izip(np.array_split(np.arange(self.input_dim), len(Ylist)), Ylist): - X[:, qs] = initialize_latent('PCA', Y, len(qs)) + x,frcs = initialize_latent('PCA', len(qs), Y) + X[:, qs] = x + fracs.append(frcs) else: # init == 'random': X = np.random.randn(Ylist[0].shape[0], self.input_dim) - return X + fracs = X.var(0) + fracs = [fracs]*self.input_dim + return X, fracs def _init_Z(self, init="permute", X=None): if X is None: From 17c2799b1e8377f45cec52be156862b4b416fe41 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Tue, 25 Mar 2014 16:59:52 +0000 Subject: [PATCH 14/51] Full Linear kernel added, inc testing --- GPy/kern/__init__.py | 2 +- GPy/kern/_src/kern.py | 2 +- GPy/kern/_src/linear.py | 44 +++++++++++++++++++++++++++++++++++++ GPy/kern/_src/rbf.py | 2 +- GPy/testing/kernel_tests.py | 5 +++++ 5 files changed, 52 insertions(+), 3 deletions(-) diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 0e265a64..55b69bd7 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -1,6 +1,6 @@ from _src.kern import Kern from _src.rbf import RBF -from _src.linear import Linear +from _src.linear import Linear, LinearFull from _src.static import Bias, White from _src.brownian import Brownian from _src.sympykern import Sympykern diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index 31fa8690..9d8d3f7b 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -11,7 +11,7 @@ from ...util.caching import Cache_this class Kern(Parameterized): #=========================================================================== - # This adds input slice support. The rather ugly code for slicing can be + # This adds input slice support. The rather ugly code for slicing can be # found in kernel_slice_operations __metaclass__ = KernCallsViaSlicerMeta #=========================================================================== diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py index 7d9eeac2..b6b1ec1b 100644 --- a/GPy/kern/_src/linear.py +++ b/GPy/kern/_src/linear.py @@ -313,3 +313,47 @@ class Linear(Kern): def input_sensitivity(self): return np.ones(self.input_dim) * self.variances + +class LinearFull(Kern): + def __init__(self, input_dim, rank, W=None, kappa=None, active_dims=None, name='linear_full'): + super(LinearFull, self).__init__(input_dim, active_dims, name) + if W is None: + W = np.ones((input_dim, rank)) + if kappa is None: + kappa = np.ones(input_dim) + assert W.shape == (input_dim, rank) + assert kappa.shape == (input_dim,) + + self.W = Param('W', W) + self.kappa = Param('kappa', kappa, Logexp()) + self.add_parameters(self.W, self.kappa) + + def K(self, X, X2=None): + P = np.dot(self.W, self.W.T) + np.diag(self.kappa) + return np.einsum('ij,jk,lk->il', X, P, X if X2 is None else X2) + + def update_gradients_full(self, dL_dK, X, X2=None): + self.kappa.gradient = np.einsum('ij,ik,kj->j', X, dL_dK, X if X2 is None else X2) + self.W.gradient = np.einsum('ij,kl,ik,lm->jm', X, X if X2 is None else X2, dL_dK, self.W) + self.W.gradient += np.einsum('ij,kl,ik,jm->lm', X, X if X2 is None else X2, dL_dK, self.W) + + def Kdiag(self, X): + P = np.dot(self.W, self.W.T) + np.diag(self.kappa) + return np.einsum('ij,jk,ik->i', X, P, X) + + def update_gradients_diag(self, dL_dKdiag, X): + self.kappa.gradient = np.einsum('ij,i->j', np.square(X), dL_dKdiag) + self.W.gradient = 2.*np.einsum('ij,ik,jl,i->kl', X, X, self.W, dL_dKdiag) + + def gradients_X(self, dL_dK, X, X2=None): + P = np.dot(self.W, self.W.T) + np.diag(self.kappa) + if X2 is None: + return 2.*np.einsum('ij,jk,kl->il', dL_dK, X, P) + else: + return np.einsum('ij,jk,kl->il', dL_dK, X2, P) + + def gradients_X_diag(self, dL_dKdiag, X): + P = np.dot(self.W, self.W.T) + np.diag(self.kappa) + return 2.*np.einsum('jk,i,ij->ik', P, dL_dKdiag, X) + + diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index c2877d06..0f19dbd1 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -64,7 +64,7 @@ class RBF(Stationary): if self.ARD: self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0) else: - self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).sum() + self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).sum() #from psi2 self.variance.gradient += (dL_dpsi2 * _dpsi2_dvariance).sum() diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index 9ed218d8..0a74143c 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -276,6 +276,11 @@ class KernelGradientTestsContinuous(unittest.TestCase): k.randomize() self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose)) + def test_LinearFull(self): + k = GPy.kern.LinearFull(self.D, self.D-1) + k.randomize() + self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose)) + #TODO: turn off grad checkingwrt X for indexed kernels like coregionalize # class KernelGradientTestsContinuous1D(unittest.TestCase): # def setUp(self): From ebb919bb8b99f708d76b7a0d0bd5a53eb9627add Mon Sep 17 00:00:00 2001 From: mzwiessele Date: Wed, 26 Mar 2014 14:59:08 +0000 Subject: [PATCH 15/51] array list now working with index --- GPy/core/parameterization/lists_and_dicts.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/GPy/core/parameterization/lists_and_dicts.py b/GPy/core/parameterization/lists_and_dicts.py index ca0589c9..31235952 100644 --- a/GPy/core/parameterization/lists_and_dicts.py +++ b/GPy/core/parameterization/lists_and_dicts.py @@ -28,4 +28,11 @@ class ArrayList(list): return True return False + def index(self, item): + index = 0 + for el in self: + if el is item: + return index + index += 1 + raise ValueError, "{} is not in list".format(item) pass From a126f288d2c111b5a22e9b634e498d4f74786652 Mon Sep 17 00:00:00 2001 From: mzwiessele Date: Wed, 26 Mar 2014 14:59:38 +0000 Subject: [PATCH 16/51] slice operations now bound functions, not added after the fact --- GPy/kern/_src/kernel_slice_operations.py | 72 ++++++++++++------------ 1 file changed, 37 insertions(+), 35 deletions(-) diff --git a/GPy/kern/_src/kernel_slice_operations.py b/GPy/kern/_src/kernel_slice_operations.py index 9beb40ab..b3a1c2a7 100644 --- a/GPy/kern/_src/kernel_slice_operations.py +++ b/GPy/kern/_src/kernel_slice_operations.py @@ -6,24 +6,26 @@ Created on 11 Mar 2014 from ...core.parameterization.parameterized import ParametersChangedMeta import numpy as np +def put_clean(dct, name, *args, **kw): + if name in dct: + dct['_clean_{}'.format(name)] = dct[name] + dct[name] = _slice_wrapper(None, dct[name], *args, **kw) + class KernCallsViaSlicerMeta(ParametersChangedMeta): - def __call__(self, *args, **kw): - instance = super(ParametersChangedMeta, self).__call__(*args, **kw) - instance.K = _slice_wrapper(instance, instance.K) - instance.Kdiag = _slice_wrapper(instance, instance.Kdiag, diag=True) - instance.update_gradients_full = _slice_wrapper(instance, instance.update_gradients_full, diag=False, derivative=True) - instance.update_gradients_diag = _slice_wrapper(instance, instance.update_gradients_diag, diag=True, derivative=True) - instance.gradients_X = _slice_wrapper(instance, instance.gradients_X, diag=False, derivative=True, ret_X=True) - instance.gradients_X_diag = _slice_wrapper(instance, instance.gradients_X_diag, diag=True, derivative=True, ret_X=True) - instance.psi0 = _slice_wrapper(instance, instance.psi0, diag=False, derivative=False) - instance.psi1 = _slice_wrapper(instance, instance.psi1, diag=False, derivative=False) - instance.psi2 = _slice_wrapper(instance, instance.psi2, diag=False, derivative=False) - instance.update_gradients_expectations = _slice_wrapper(instance, instance.update_gradients_expectations, derivative=True, psi_stat=True) - instance.gradients_Z_expectations = _slice_wrapper(instance, instance.gradients_Z_expectations, derivative=True, psi_stat_Z=True, ret_X=True) - instance.gradients_qX_expectations = _slice_wrapper(instance, instance.gradients_qX_expectations, derivative=True, psi_stat=True, ret_X=True) - instance.parameters_changed() - return instance - + def __new__(cls, name, bases, dct): + put_clean(dct, 'K') + put_clean(dct, 'Kdiag', diag=True) + put_clean(dct, 'update_gradients_full', diag=False, derivative=True) + put_clean(dct, 'gradients_X', diag=False, derivative=True, ret_X=True) + put_clean(dct, 'gradients_X_diag', diag=True, derivative=True, ret_X=True) + put_clean(dct, 'psi0', diag=False, derivative=False) + put_clean(dct, 'psi1', diag=False, derivative=False) + put_clean(dct, 'psi2', diag=False, derivative=False) + put_clean(dct, 'update_gradients_expectations', derivative=True, psi_stat=True) + put_clean(dct, 'gradients_Z_expectations', derivative=True, psi_stat_Z=True, ret_X=True) + put_clean(dct, 'gradients_qX_expectations', derivative=True, psi_stat=True, ret_X=True) + return super(KernCallsViaSlicerMeta, cls).__new__(cls, name, bases, dct) + def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False, psi_stat_Z=False, ret_X=False): """ This method wraps the functions in kernel to make sure all kernels allways see their respective input dimension. @@ -35,7 +37,7 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False """ if derivative: if diag: - def x_slice_wrapper(dL_dKdiag, X): + def x_slice_wrapper(kern, dL_dKdiag, X): ret_X_not_sliced = ret_X and kern._sliced_X == 0 if ret_X_not_sliced: ret = np.zeros(X.shape) @@ -43,15 +45,15 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False # if the return value is of shape X.shape, we need to make sure to return the right shape kern._sliced_X += 1 try: - if ret_X_not_sliced: ret[:, kern.active_dims] = operation(dL_dKdiag, X) - else: ret = operation(dL_dKdiag, X) + if ret_X_not_sliced: ret[:, kern.active_dims] = operation(kern, dL_dKdiag, X) + else: ret = operation(kern, dL_dKdiag, X) except: raise finally: kern._sliced_X -= 1 return ret elif psi_stat: - def x_slice_wrapper(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + def x_slice_wrapper(kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): ret_X_not_sliced = ret_X and kern._sliced_X == 0 if ret_X_not_sliced: ret1, ret2 = np.zeros(variational_posterior.shape), np.zeros(variational_posterior.shape) @@ -60,44 +62,44 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False # if the return value is of shape X.shape, we need to make sure to return the right shape try: if ret_X_not_sliced: - ret = list(operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)) + ret = list(operation(kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)) r2 = ret[:2] ret[0] = ret1 ret[1] = ret2 ret[0][:, kern.active_dims] = r2[0] ret[1][:, kern.active_dims] = r2[1] del r2 - else: ret = operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior) + else: ret = operation(kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior) except: raise finally: kern._sliced_X -= 1 return ret elif psi_stat_Z: - def x_slice_wrapper(dL_dpsi1, dL_dpsi2, Z, variational_posterior): + def x_slice_wrapper(kern, dL_dpsi1, dL_dpsi2, Z, variational_posterior): ret_X_not_sliced = ret_X and kern._sliced_X == 0 if ret_X_not_sliced: ret = np.zeros(Z.shape) Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior kern._sliced_X += 1 try: if ret_X_not_sliced: - ret[:, kern.active_dims] = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior) - else: ret = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior) + ret[:, kern.active_dims] = operation(kern, dL_dpsi1, dL_dpsi2, Z, variational_posterior) + else: ret = operation(kern, dL_dpsi1, dL_dpsi2, Z, variational_posterior) except: raise finally: kern._sliced_X -= 1 return ret else: - def x_slice_wrapper(dL_dK, X, X2=None): + def x_slice_wrapper(kern, dL_dK, X, X2=None): ret_X_not_sliced = ret_X and kern._sliced_X == 0 if ret_X_not_sliced: ret = np.zeros(X.shape) X, X2 = kern._slice_X(X) if not kern._sliced_X else X, kern._slice_X(X2) if X2 is not None and not kern._sliced_X else X2 kern._sliced_X += 1 try: - if ret_X_not_sliced: ret[:, kern.active_dims] = operation(dL_dK, X, X2) - else: ret = operation(dL_dK, X, X2) + if ret_X_not_sliced: ret[:, kern.active_dims] = operation(kern, dL_dK, X, X2) + else: ret = operation(kern, dL_dK, X, X2) except: raise finally: @@ -105,30 +107,30 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False return ret else: if diag: - def x_slice_wrapper(X, *args, **kw): + def x_slice_wrapper(kern, X, *args, **kw): X = kern._slice_X(X) if not kern._sliced_X else X kern._sliced_X += 1 try: - ret = operation(X, *args, **kw) + ret = operation(kern, X, *args, **kw) except: raise finally: kern._sliced_X -= 1 return ret else: - def x_slice_wrapper(X, X2=None, *args, **kw): + def x_slice_wrapper(kern, X, X2=None, *args, **kw): X, X2 = kern._slice_X(X) if not kern._sliced_X else X, kern._slice_X(X2) if X2 is not None and not kern._sliced_X else X2 kern._sliced_X += 1 try: - ret = operation(X, X2, *args, **kw) + ret = operation(kern, X, X2, *args, **kw) except: raise finally: kern._sliced_X -= 1 return ret x_slice_wrapper._operation = operation x_slice_wrapper.__name__ = ("slicer("+str(operation) - +(","+str(bool(diag)) if diag else'') - +(','+str(bool(derivative)) if derivative else '') + +(","+str('diag') if diag else'') + +(','+str('derivative') if derivative else '') +')') x_slice_wrapper.__doc__ = "**sliced**\n" + (operation.__doc__ or "") return x_slice_wrapper \ No newline at end of file From 9cf37ff10441f04f4d7fea6a2267b926fb695ad3 Mon Sep 17 00:00:00 2001 From: mzwiessele Date: Wed, 26 Mar 2014 15:03:06 +0000 Subject: [PATCH 17/51] started copy implementation, have to get rid of _getstate_ and _setstate_ --- GPy/core/gp.py | 15 ++++++++------- GPy/core/parameterization/parameter_core.py | 13 +++++++++---- GPy/core/parameterization/parameterized.py | 18 ++---------------- GPy/util/caching.py | 2 +- 4 files changed, 20 insertions(+), 28 deletions(-) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 5be3e944..5b41f6d0 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -216,15 +216,16 @@ class GP(Model): """ - return Model._getstate(self) + [self.X, - self.num_data, - self.input_dim, - self.kern, - self.likelihood, - self.output_dim, - ] + return []#Model._getstate(self) + [self.X, +# self.num_data, +# self.input_dim, +# self.kern, +# self.likelihood, +# self.output_dim, +# ] def _setstate(self, state): + return self.output_dim = state.pop() self.likelihood = state.pop() self.kern = state.pop() diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index 1cdeee0b..b804a61a 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -902,15 +902,19 @@ class Parameterizable(OptimizationHandlable): #=========================================================================== def copy(self): """Returns a (deep) copy of the current model""" - raise NotImplementedError, "Copy is not yet implemented, TODO: Observable hierarchy" + #raise NotImplementedError, "Copy is not yet implemented, TODO: Observable hierarchy" import copy from .index_operations import ParameterIndexOperations, ParameterIndexOperationsView from .lists_and_dicts import ArrayList + + param_mapping = [[] for _ in range(self.num_params)] dc = dict() for k, v in self.__dict__.iteritems(): if k not in ['_parent_', '_parameters_', '_parent_index_', '_observer_callables_'] + self.parameter_names(recursive=False): - if isinstance(v, (Constrainable, ParameterIndexOperations, ParameterIndexOperationsView)): + if v in self._parameters_: + param_mapping[self._parameters_.index(v)] += [k] + elif isinstance(v, (Constrainable, ParameterIndexOperations, ParameterIndexOperationsView)): dc[k] = v.copy() else: dc[k] = copy.deepcopy(v) @@ -928,9 +932,10 @@ class Parameterizable(OptimizationHandlable): s = self.__new__(self.__class__) s.__dict__ = dc - for p in params: + for p, mlist in zip(params, param_mapping): s.add_parameter(p, _ignore_added_names=True) - + for m in mlist: + setattr(s, m, p) return s #=========================================================================== diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py index bc83d8c8..529d3733 100644 --- a/GPy/core/parameterization/parameterized.py +++ b/GPy/core/parameterization/parameterized.py @@ -110,29 +110,15 @@ class Parameterized(Parameterizable, Pickleable): Allways append the state of the inherited object and call down to the inherited object in _setstate!! """ - return [ - self._fixes_, - self.priors, - self.constraints, - self._parameters_, - self._name, - self._added_names_, - ] + return [] def _setstate(self, state): - self._added_names_ = state.pop() - self._name = state.pop() - self._parameters_ = state.pop() - self.constraints = state.pop() - self.priors = state.pop() - self._fixes_ = state.pop() - self._connect_parameters() self.parameters_changed() #=========================================================================== # Override copy to handle programmatically added observers #=========================================================================== def copy(self): - c = super(Pickleable, self).copy() + c = super(Parameterized, self).copy() c.add_observer(c, c._parameters_changed_notification, -100) return c diff --git a/GPy/util/caching.py b/GPy/util/caching.py index 282c9f8c..fcb0b726 100644 --- a/GPy/util/caching.py +++ b/GPy/util/caching.py @@ -48,7 +48,7 @@ class Cacher(object): if k in kw and kw[k] is not None: return self.operation(*args, **kw) # TODO: WARNING !!! Cache OFFSWITCH !!! WARNING - #return self.operation(*args) + # return self.operation(*args, **kw) #if the result is cached, return the cached computation state = [all(a is b for a, b in itertools.izip_longest(args, cached_i)) for cached_i in self.cached_inputs] From f8ff2c7df247e62c9451f523172f9691335fbbb0 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Thu, 27 Mar 2014 08:05:22 +0000 Subject: [PATCH 18/51] kern merge commencing --- GPy/kern/_src/kernel_slice_operations.py | 231 +++++++++++------------ 1 file changed, 111 insertions(+), 120 deletions(-) diff --git a/GPy/kern/_src/kernel_slice_operations.py b/GPy/kern/_src/kernel_slice_operations.py index 9beb40ab..7fa98763 100644 --- a/GPy/kern/_src/kernel_slice_operations.py +++ b/GPy/kern/_src/kernel_slice_operations.py @@ -5,130 +5,121 @@ Created on 11 Mar 2014 ''' from ...core.parameterization.parameterized import ParametersChangedMeta import numpy as np +import functools class KernCallsViaSlicerMeta(ParametersChangedMeta): def __call__(self, *args, **kw): instance = super(ParametersChangedMeta, self).__call__(*args, **kw) - instance.K = _slice_wrapper(instance, instance.K) - instance.Kdiag = _slice_wrapper(instance, instance.Kdiag, diag=True) - instance.update_gradients_full = _slice_wrapper(instance, instance.update_gradients_full, diag=False, derivative=True) - instance.update_gradients_diag = _slice_wrapper(instance, instance.update_gradients_diag, diag=True, derivative=True) - instance.gradients_X = _slice_wrapper(instance, instance.gradients_X, diag=False, derivative=True, ret_X=True) - instance.gradients_X_diag = _slice_wrapper(instance, instance.gradients_X_diag, diag=True, derivative=True, ret_X=True) - instance.psi0 = _slice_wrapper(instance, instance.psi0, diag=False, derivative=False) - instance.psi1 = _slice_wrapper(instance, instance.psi1, diag=False, derivative=False) - instance.psi2 = _slice_wrapper(instance, instance.psi2, diag=False, derivative=False) - instance.update_gradients_expectations = _slice_wrapper(instance, instance.update_gradients_expectations, derivative=True, psi_stat=True) - instance.gradients_Z_expectations = _slice_wrapper(instance, instance.gradients_Z_expectations, derivative=True, psi_stat_Z=True, ret_X=True) - instance.gradients_qX_expectations = _slice_wrapper(instance, instance.gradients_qX_expectations, derivative=True, psi_stat=True, ret_X=True) + instance.K = _Slice_wrapper(instance, instance.K) + instance.Kdiag = _Slice_wrapper_diag(instance, instance.Kdiag) + + instance.update_gradients_full = _Slice_wrapper_derivative(instance, instance.update_gradients_full) + instance.update_gradients_diag = _Slice_wrapper_diag_derivative(instance, instance.update_gradients_diag) + + instance.gradients_X = _Slice_wrapper_grad_X(instance, instance.gradients_X) + instance.gradients_X_diag = _Slice_wrapper_grad_X_diag(instance, instance.gradients_X_diag) + + instance.psi0 = _Slice_wrapper(instance, instance.psi0) + instance.psi1 = _Slice_wrapper(instance, instance.psi1) + instance.psi2 = _Slice_wrapper(instance, instance.psi2) + + instance.update_gradients_expectations = _Slice_wrapper_psi_stat_derivative_no_ret(instance, instance.update_gradients_expectations) + instance.gradients_Z_expectations = _Slice_wrapper_psi_stat_derivative_Z(instance, instance.gradients_Z_expectations) + instance.gradients_qX_expectations = _Slice_wrapper_psi_stat_derivative(instance, instance.gradients_qX_expectations) instance.parameters_changed() return instance -def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False, psi_stat_Z=False, ret_X=False): - """ - This method wraps the functions in kernel to make sure all kernels allways see their respective input dimension. - The different switches are: - diag: if X2 exists - derivative: if first arg is dL_dK - psi_stat: if first 3 args are dL_dpsi0..2 - psi_stat_Z: if first 2 args are dL_dpsi1..2 - """ - if derivative: - if diag: - def x_slice_wrapper(dL_dKdiag, X): - ret_X_not_sliced = ret_X and kern._sliced_X == 0 - if ret_X_not_sliced: - ret = np.zeros(X.shape) - X = kern._slice_X(X) if not kern._sliced_X else X - # if the return value is of shape X.shape, we need to make sure to return the right shape - kern._sliced_X += 1 - try: - if ret_X_not_sliced: ret[:, kern.active_dims] = operation(dL_dKdiag, X) - else: ret = operation(dL_dKdiag, X) - except: - raise - finally: - kern._sliced_X -= 1 - return ret - elif psi_stat: - def x_slice_wrapper(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): - ret_X_not_sliced = ret_X and kern._sliced_X == 0 - if ret_X_not_sliced: - ret1, ret2 = np.zeros(variational_posterior.shape), np.zeros(variational_posterior.shape) - Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior - kern._sliced_X += 1 - # if the return value is of shape X.shape, we need to make sure to return the right shape - try: - if ret_X_not_sliced: - ret = list(operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)) - r2 = ret[:2] - ret[0] = ret1 - ret[1] = ret2 - ret[0][:, kern.active_dims] = r2[0] - ret[1][:, kern.active_dims] = r2[1] - del r2 - else: ret = operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior) - except: - raise - finally: - kern._sliced_X -= 1 - return ret - elif psi_stat_Z: - def x_slice_wrapper(dL_dpsi1, dL_dpsi2, Z, variational_posterior): - ret_X_not_sliced = ret_X and kern._sliced_X == 0 - if ret_X_not_sliced: ret = np.zeros(Z.shape) - Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior - kern._sliced_X += 1 - try: - if ret_X_not_sliced: - ret[:, kern.active_dims] = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior) - else: ret = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior) - except: - raise - finally: - kern._sliced_X -= 1 - return ret - else: - def x_slice_wrapper(dL_dK, X, X2=None): - ret_X_not_sliced = ret_X and kern._sliced_X == 0 - if ret_X_not_sliced: - ret = np.zeros(X.shape) - X, X2 = kern._slice_X(X) if not kern._sliced_X else X, kern._slice_X(X2) if X2 is not None and not kern._sliced_X else X2 - kern._sliced_X += 1 - try: - if ret_X_not_sliced: ret[:, kern.active_dims] = operation(dL_dK, X, X2) - else: ret = operation(dL_dK, X, X2) - except: - raise - finally: - kern._sliced_X -= 1 - return ret - else: - if diag: - def x_slice_wrapper(X, *args, **kw): - X = kern._slice_X(X) if not kern._sliced_X else X - kern._sliced_X += 1 - try: - ret = operation(X, *args, **kw) - except: - raise - finally: - kern._sliced_X -= 1 - return ret - else: - def x_slice_wrapper(X, X2=None, *args, **kw): - X, X2 = kern._slice_X(X) if not kern._sliced_X else X, kern._slice_X(X2) if X2 is not None and not kern._sliced_X else X2 - kern._sliced_X += 1 - try: - ret = operation(X, X2, *args, **kw) - except: raise - finally: - kern._sliced_X -= 1 - return ret - x_slice_wrapper._operation = operation - x_slice_wrapper.__name__ = ("slicer("+str(operation) - +(","+str(bool(diag)) if diag else'') - +(','+str(bool(derivative)) if derivative else '') - +')') - x_slice_wrapper.__doc__ = "**sliced**\n" + (operation.__doc__ or "") - return x_slice_wrapper \ No newline at end of file +class _Slice_wrap(object): + def __init__(self, instance, f): + self.k = instance + self.f = f + def copy_to(self, new_instance): + return self.__class__(new_instance, self.f) + def _slice_X(self, X): + return self.k._slice_X(X) if not self.k._sliced_X else X + def _slice_X_X2(self, X, X2): + return self.k._slice_X(X) if not self.k._sliced_X else X, self.k._slice_X(X2) if X2 is not None and not self.k._sliced_X else X2 + def __enter__(self): + self.k._sliced_X += 1 + return self + def __exit__(self, *a): + self.k._sliced_X -= 1 + +class _Slice_wrapper(_Slice_wrap): + def __call__(self, X, X2 = None, *a, **kw): + X, X2 = self._slice_X_X2(X, X2) + with self: + ret = self.f(X, X2, *a, **kw) + return ret + +class _Slice_wrapper_diag(_Slice_wrap): + def __call__(self, X, *a, **kw): + X = self._slice_X(X) + with self: + ret = self.f(X, *a, **kw) + return ret + +class _Slice_wrapper_derivative(_Slice_wrap): + def __call__(self, dL_dK, X, X2=None): + self._slice_X(X) + with self: + ret = self.f(dL_dK, X, X2) + return ret + +class _Slice_wrapper_diag_derivative(_Slice_wrap): + def __call__(self, dL_dKdiag, X): + X = self._slice_X(X) + with self: + ret = self.f(dL_dKdiag, X) + return ret + +class _Slice_wrapper_grad_X(_Slice_wrap): + def __call__(self, dL_dK, X, X2=None): + ret = np.zeros(X.shape) + X, X2 = self._slice_X_X2(X, X2) + with self: + ret[:, self.k.active_dims] = self.f(dL_dK, X, X2) + return ret + +class _Slice_wrapper_grad_X_diag(_Slice_wrap): + def __call__(self, dL_dKdiag, X): + ret = np.zeros(X.shape) + X = self._slice_X(X) + with self: + ret[:, self.k.active_dims] = self.f(dL_dKdiag, X) + return ret + +class _Slice_wrapper_psi_stat_derivative_no_ret(_Slice_wrap): + def __call__(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + Z, variational_posterior = self._slice_X_X2(Z, variational_posterior) + with self: + ret = self.f(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior) + return ret + +class _Slice_wrapper_psi_stat_derivative(_Slice_wrap): + def __call__(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + ret1, ret2 = np.zeros(variational_posterior.shape), np.zeros(variational_posterior.shape) + Z, variational_posterior = self._slice_X_X2(Z, variational_posterior) + with self: + ret = list(self.f(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)) + r2 = ret[:2] + ret[0] = ret1 + ret[1] = ret2 + ret[0][:, self.k.active_dims] = r2[0] + ret[1][:, self.k.active_dims] = r2[1] + del r2 + return ret + +class _Slice_wrapper_psi_stat_derivative_Z(_Slice_wrap): + def __call__(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + ret1, ret2 = np.zeros(variational_posterior.shape), np.zeros(variational_posterior.shape) + Z, variational_posterior = self._slice_X_X2(Z, variational_posterior) + with self: + ret = list(self.f(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)) + r2 = ret[:2] + ret[0] = ret1 + ret[1] = ret2 + ret[0][:, self.k.active_dims] = r2[0] + ret[1][:, self.k.active_dims] = r2[1] + del r2 + return ret From e26e7370141113c5ac5ecbac34824819a6c941ab Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Thu, 27 Mar 2014 09:28:44 +0000 Subject: [PATCH 19/51] new slicing done and first attempts at copy and pickling full models --- GPy/kern/_src/kernel_slice_operations.py | 186 ++++++++++++----------- GPy/util/caching.py | 3 +- 2 files changed, 103 insertions(+), 86 deletions(-) diff --git a/GPy/kern/_src/kernel_slice_operations.py b/GPy/kern/_src/kernel_slice_operations.py index 6620f28c..21421cc0 100644 --- a/GPy/kern/_src/kernel_slice_operations.py +++ b/GPy/kern/_src/kernel_slice_operations.py @@ -5,118 +5,134 @@ Created on 11 Mar 2014 ''' from ...core.parameterization.parameterized import ParametersChangedMeta import numpy as np +from functools import wraps -def put_clean(dct, name, *args, **kw): +def put_clean(dct, name, func): if name in dct: dct['_clean_{}'.format(name)] = dct[name] - dct[name] = _slice_wrapper(None, dct[name], *args, **kw) - + dct[name] = func(dct[name]) + class KernCallsViaSlicerMeta(ParametersChangedMeta): def __new__(cls, name, bases, dct): - put_clean(dct, 'K') - put_clean(dct, 'Kdiag', diag=True) - put_clean(dct, 'update_gradients_full', diag=False, derivative=True) - put_clean(dct, 'gradients_X', diag=False, derivative=True, ret_X=True) - put_clean(dct, 'gradients_X_diag', diag=True, derivative=True, ret_X=True) - put_clean(dct, 'psi0', diag=False, derivative=False) - put_clean(dct, 'psi1', diag=False, derivative=False) - put_clean(dct, 'psi2', diag=False, derivative=False) - put_clean(dct, 'update_gradients_expectations', derivative=True, psi_stat=True) - put_clean(dct, 'gradients_Z_expectations', derivative=True, psi_stat_Z=True, ret_X=True) - put_clean(dct, 'gradients_qX_expectations', derivative=True, psi_stat=True, ret_X=True) + put_clean(dct, 'K', _slice_K) + put_clean(dct, 'Kdiag', _slice_Kdiag) + put_clean(dct, 'update_gradients_full', _slice_update_gradients_full) + put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag) + put_clean(dct, 'gradients_X', _slice_gradients_X) + put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag) + + put_clean(dct, 'psi0', _slice_psi) + put_clean(dct, 'psi1', _slice_psi) + put_clean(dct, 'psi2', _slice_psi) + put_clean(dct, 'update_gradients_expectations', _slice_update_gradients_expectations) + put_clean(dct, 'gradients_Z_expectations', _slice_gradients_Z_expectations) + put_clean(dct, 'gradients_qX_expectations', _slice_gradients_qX_expectations) return super(KernCallsViaSlicerMeta, cls).__new__(cls, name, bases, dct) - + class _Slice_wrap(object): - def __init__(self, instance, f): - self.k = instance - self.f = f - def copy_to(self, new_instance): - return self.__class__(new_instance, self.f) - def _slice_X(self, X): - return self.k._slice_X(X) if not self.k._sliced_X else X - def _slice_X_X2(self, X, X2): - return self.k._slice_X(X) if not self.k._sliced_X else X, self.k._slice_X(X2) if X2 is not None and not self.k._sliced_X else X2 + def __init__(self, k, X, X2=None): + self.k = k + self.shape = X.shape + if self.k._sliced_X == 0: + self.X = self.k._slice_X(X) + self.X2 = self.k._slice_X(X2) if X2 is not None else None + self.ret = True + else: + self.X = X + self.X2 = X2 + self.ret = False def __enter__(self): self.k._sliced_X += 1 return self def __exit__(self, *a): self.k._sliced_X -= 1 + def handle_return_array(self, return_val): + if self.ret: + ret = np.zeros(self.shape) + ret[:, self.k.active_dims] = return_val + return ret + return return_val -class _Slice_wrapper(_Slice_wrap): - def __call__(self, X, X2 = None, *a, **kw): - X, X2 = self._slice_X_X2(X, X2) - with self: - ret = self.f(X, X2, *a, **kw) +def _slice_K(f): + @wraps(f) + def wrap(self, X, X2 = None, *a, **kw): + with _Slice_wrap(self, X, X2) as s: + ret = f(self, s.X, s.X2, *a, **kw) return ret + return wrap -class _Slice_wrapper_diag(_Slice_wrap): - def __call__(self, X, *a, **kw): - X = self._slice_X(X) - with self: - ret = self.f(X, *a, **kw) +def _slice_Kdiag(f): + @wraps(f) + def wrap(self, X, *a, **kw): + with _Slice_wrap(self, X, None) as s: + ret = f(self, s.X, *a, **kw) return ret + return wrap -class _Slice_wrapper_derivative(_Slice_wrap): - def __call__(self, dL_dK, X, X2=None): - self._slice_X(X) - with self: - ret = self.f(dL_dK, X, X2) +def _slice_update_gradients_full(f): + @wraps(f) + def wrap(self, dL_dK, X, X2=None): + with _Slice_wrap(self, X, X2) as s: + ret = f(self, dL_dK, s.X, s.X2) return ret + return wrap -class _Slice_wrapper_diag_derivative(_Slice_wrap): - def __call__(self, dL_dKdiag, X): - X = self._slice_X(X) - with self: - ret = self.f(dL_dKdiag, X) +def _slice_update_gradients_diag(f): + @wraps(f) + def wrap(self, dL_dKdiag, X): + with _Slice_wrap(self, X, None) as s: + ret = f(self, dL_dKdiag, s.X) return ret + return wrap -class _Slice_wrapper_grad_X(_Slice_wrap): - def __call__(self, dL_dK, X, X2=None): - ret = np.zeros(X.shape) - X, X2 = self._slice_X_X2(X, X2) - with self: - ret[:, self.k.active_dims] = self.f(dL_dK, X, X2) +def _slice_gradients_X(f): + @wraps(f) + def wrap(self, dL_dK, X, X2=None): + with _Slice_wrap(self, X, X2) as s: + ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2)) return ret + return wrap -class _Slice_wrapper_grad_X_diag(_Slice_wrap): - def __call__(self, dL_dKdiag, X): - ret = np.zeros(X.shape) - X = self._slice_X(X) - with self: - ret[:, self.k.active_dims] = self.f(dL_dKdiag, X) +def _slice_gradients_X_diag(f): + @wraps(f) + def wrap(self, dL_dKdiag, X): + with _Slice_wrap(self, X, None) as s: + ret = s.handle_return_array(f(self, dL_dKdiag, s.X)) return ret + return wrap -class _Slice_wrapper_psi_stat_derivative_no_ret(_Slice_wrap): - def __call__(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): - Z, variational_posterior = self._slice_X_X2(Z, variational_posterior) - with self: - ret = self.f(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior) +def _slice_psi(f): + @wraps(f) + def wrap(self, Z, variational_posterior): + with _Slice_wrap(self, Z, variational_posterior) as s: + ret = f(self, s.X, s.X2) return ret + return wrap -class _Slice_wrapper_psi_stat_derivative(_Slice_wrap): - def __call__(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): - ret1, ret2 = np.zeros(variational_posterior.shape), np.zeros(variational_posterior.shape) - Z, variational_posterior = self._slice_X_X2(Z, variational_posterior) - with self: - ret = list(self.f(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)) +def _slice_update_gradients_expectations(f): + @wraps(f) + def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + with _Slice_wrap(self, Z, variational_posterior) as s: + ret = f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2) + return ret + return wrap + +def _slice_gradients_Z_expectations(f): + @wraps(f) + def wrap(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + with _Slice_wrap(self, Z, variational_posterior) as s: + ret = s.handle_return_array(f(self, dL_dpsi1, dL_dpsi2, s.X, s.X2)) + return ret + return wrap + +def _slice_gradients_qX_expectations(f): + @wraps(f) + def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + with _Slice_wrap(self, variational_posterior, Z) as s: + ret = list(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X2, s.X)) r2 = ret[:2] - ret[0] = ret1 - ret[1] = ret2 - ret[0][:, self.k.active_dims] = r2[0] - ret[1][:, self.k.active_dims] = r2[1] - del r2 - return ret - -class _Slice_wrapper_psi_stat_derivative_Z(_Slice_wrap): - def __call__(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): - ret1, ret2 = np.zeros(variational_posterior.shape), np.zeros(variational_posterior.shape) - Z, variational_posterior = self._slice_X_X2(Z, variational_posterior) - with self: - ret = list(self.f(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)) - r2 = ret[:2] - ret[0] = ret1 - ret[1] = ret2 - ret[0][:, self.k.active_dims] = r2[0] - ret[1][:, self.k.active_dims] = r2[1] + ret[0] = s.handle_return_array(r2[0]) + ret[1] = s.handle_return_array(r2[1]) del r2 return ret + return wrap diff --git a/GPy/util/caching.py b/GPy/util/caching.py index fcb0b726..0886d0c6 100644 --- a/GPy/util/caching.py +++ b/GPy/util/caching.py @@ -101,7 +101,7 @@ class Cacher(object): def __name__(self): return self.operation.__name__ -from functools import partial +from functools import partial, update_wrapper class Cacher_wrap(object): def __init__(self, f, limit, ignore_args, force_kwargs): @@ -109,6 +109,7 @@ class Cacher_wrap(object): self.ignore_args = ignore_args self.force_kwargs = force_kwargs self.f = f + update_wrapper(self, self.f) def __get__(self, obj, objtype=None): return partial(self, obj) def __call__(self, *args, **kwargs): From a74965b0384714cb53a40c0a6f3e03aae39c0512 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Thu, 27 Mar 2014 10:08:45 +0000 Subject: [PATCH 20/51] moved input_sensitivity to the gp class --- GPy/core/gp.py | 6 ++++++ GPy/core/model.py | 6 ------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 5be3e944..8b52699b 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -208,6 +208,12 @@ class GP(Model): from ..plotting.matplot_dep import models_plots return models_plots.plot_fit(self,*args,**kwargs) + def input_sensitivity(self): + """ + Returns the sensitivity for each dimension of this model + """ + return self.kern.input_sensitivity() + def _getstate(self): """ diff --git a/GPy/core/model.py b/GPy/core/model.py index 47243b79..e04993cb 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -141,12 +141,6 @@ class Model(Parameterized): """ raise DeprecationWarning, 'parameters now have default constraints' - def input_sensitivity(self): - """ - Returns the sensitivity for each dimension of this kernel. - """ - return self.kern.input_sensitivity() - def objective_function(self): """ The objective function for the given algorithm. From d5a8e5dd3aa0983c71cbd0cba31b5e4053a1e4e5 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Thu, 27 Mar 2014 13:08:54 +0000 Subject: [PATCH 21/51] right active dims when adding kernels --- GPy/kern/_src/add.py | 4 +++- GPy/kern/_src/kern.py | 10 +++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index 57e611ed..ddc480de 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -176,5 +176,7 @@ class Add(CombinationKernel): for p in other_params: other.remove_parameter(p) self.add_parameters(*other_params) - else: self.add_parameter(other) + else: + self.add_parameter(other) + self.input_dim, self.active_dims = self.get_input_dim_active_dims(self.parts) return self \ No newline at end of file diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index 9d8d3f7b..5cd71215 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -208,9 +208,7 @@ class CombinationKernel(Kern): :param array-like|slice extra_dims: if needed extra dimensions for the combination kernel to work on """ assert all([isinstance(k, Kern) for k in kernels]) - active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int)) - input_dim = active_dims.max()+1 + len(extra_dims) - active_dims = slice(active_dims.max()+1+len(extra_dims)) + input_dim, active_dims = self.get_input_dim_active_dims(kernels, extra_dims) # initialize the kernel with the full input_dim super(CombinationKernel, self).__init__(input_dim, active_dims, name) self.extra_dims = extra_dims @@ -220,6 +218,12 @@ class CombinationKernel(Kern): def parts(self): return self._parameters_ + def get_input_dim_active_dims(self, kernels, extra_dims = None): + active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int)) + input_dim = active_dims.max()+1 + (len(extra_dims) if extra_dims is not None else 0) + active_dims = slice(input_dim) + return input_dim, active_dims + def input_sensitivity(self): in_sen = np.zeros((self.num_params, self.input_dim)) for i, p in enumerate(self.parts): From 5d6e612a2a9708175029954172c55019495d249f Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Thu, 27 Mar 2014 13:10:51 +0000 Subject: [PATCH 22/51] independent output kernel now with single kernel/multiple kernels --- GPy/kern/_src/independent_outputs.py | 70 ++++++++++++++++------------ 1 file changed, 39 insertions(+), 31 deletions(-) diff --git a/GPy/kern/_src/independent_outputs.py b/GPy/kern/_src/independent_outputs.py index cf015d02..4a9671aa 100644 --- a/GPy/kern/_src/independent_outputs.py +++ b/GPy/kern/_src/independent_outputs.py @@ -54,85 +54,93 @@ class IndependentOutputs(CombinationKernel): self.kern = kernels super(IndependentOutputs, self).__init__(kernels=kernels, extra_dims=[index_dim], name=name) self.index_dim = index_dim - self.kerns = kernels if len(kernels) != 1 else itertools.repeat(kernels[0]) def K(self,X ,X2=None): slices = index_to_slices(X[:,self.index_dim]) + kerns = itertools.repeat(self.kern) if self.single_kern else self.kern if X2 is None: target = np.zeros((X.shape[0], X.shape[0])) - [[target.__setitem__((s,ss), kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for kern, slices_i in zip(self.kerns, slices)] + [[target.__setitem__((s,ss), kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for kern, slices_i in zip(kerns, slices)] else: slices2 = index_to_slices(X2[:,self.index_dim]) target = np.zeros((X.shape[0], X2.shape[0])) - [[target.__setitem__((s,s2), kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(self.kerns, slices,slices2)] + [[target.__setitem__((s,s2), kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(kerns, slices,slices2)] return target def Kdiag(self,X): slices = index_to_slices(X[:,self.index_dim]) + kerns = itertools.repeat(self.kern) if self.single_kern else self.kern target = np.zeros(X.shape[0]) - [[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(self.kerns, slices)] + [[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)] return target def update_gradients_full(self,dL_dK,X,X2=None): slices = index_to_slices(X[:,self.index_dim]) - if self.single_kern: target = np.zeros(self.kern.size) - else: target = [np.zeros(kern.size) for kern, _ in zip(self.kerns, slices)] + if self.single_kern: + target = np.zeros(self.kern.size) + kerns = itertools.repeat(self.kern) + else: + kerns = self.kern + target = [np.zeros(kern.size) for kern, _ in zip(kerns, slices)] def collate_grads(kern, i, dL, X, X2): kern.update_gradients_full(dL,X,X2) if self.single_kern: target[:] += kern.gradient else: target[i][:] += kern.gradient if X2 is None: - [[collate_grads(kern, i, dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for i,(kern,slices_i) in enumerate(zip(self.kerns,slices))] + [[collate_grads(kern, i, dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for i,(kern,slices_i) in enumerate(zip(kerns,slices))] else: slices2 = index_to_slices(X2[:,self.index_dim]) - [[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(self.kerns,slices,slices2))] + [[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(kerns,slices,slices2))] if self.single_kern: kern.gradient = target - else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(self.kerns, slices))] + else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))] def gradients_X(self,dL_dK, X, X2=None): target = np.zeros(X.shape) + kerns = itertools.repeat(self.kern) if self.single_kern else self.kern if X2 is None: # TODO: make use of index_to_slices values = np.unique(X[:,self.index_dim]) slices = [X[:,self.index_dim]==i for i in values] [target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None)) - for kern, s in zip(self.kerns, slices)] + for kern, s in zip(kerns, slices)] #slices = index_to_slices(X[:,self.index_dim]) #[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s]) - # for s in slices_i] for kern, slices_i in zip(self.kerns, slices)] + # for s in slices_i] for kern, slices_i in zip(kerns, slices)] #import ipdb;ipdb.set_trace() #[[(np.add(target[s ], kern.gradients_X(dL_dK[s ,ss],X[s ], X[ss]), out=target[s ]), # np.add(target[ss], kern.gradients_X(dL_dK[ss,s ],X[ss], X[s ]), out=target[ss])) - # for s, ss in itertools.combinations(slices_i, 2)] for kern, slices_i in zip(self.kerns, slices)] + # for s, ss in itertools.combinations(slices_i, 2)] for kern, slices_i in zip(kerns, slices)] else: values = np.unique(X[:,self.index_dim]) slices = [X[:,self.index_dim]==i for i in values] slices2 = [X2[:,self.index_dim]==i for i in values] [target.__setitem__(s, kern.gradients_X(dL_dK[s, :][:, s2],X[s],X2[s2])) - for kern, s, s2 in zip(self.kerns, slices, slices2)] + for kern, s, s2 in zip(kerns, slices, slices2)] # TODO: make work with index_to_slices #slices = index_to_slices(X[:,self.index_dim]) #slices2 = index_to_slices(X2[:,self.index_dim]) - #[[target.__setitem__(s, target[s] + kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s, s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(self.kerns, slices,slices2)] + #[[target.__setitem__(s, target[s] + kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s, s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(kerns, slices,slices2)] return target def gradients_X_diag(self, dL_dKdiag, X): slices = index_to_slices(X[:,self.index_dim]) + kerns = itertools.repeat(self.kern) if self.single_kern else self.kern target = np.zeros(X.shape) - [[target.__setitem__(s, kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for kern, slices_i in zip(self.kerns, slices)] + [[target.__setitem__(s, kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)] return target def update_gradients_diag(self, dL_dKdiag, X): slices = index_to_slices(X[:,self.index_dim]) + kerns = itertools.repeat(self.kern) if self.single_kern else self.kern if self.single_kern: target = np.zeros(self.kern.size) - else: target = [np.zeros(kern.size) for kern, _ in zip(self.kerns, slices)] + else: target = [np.zeros(kern.size) for kern, _ in zip(kerns, slices)] def collate_grads(kern, i, dL, X): kern.update_gradients_diag(dL,X) if self.single_kern: target[:] += kern.gradient else: target[i][:] += kern.gradient - [[collate_grads(kern, i, dL_dKdiag[s], X[s,:]) for s in slices_i] for i, (kern, slices_i) in enumerate(zip(self.kerns, slices))] + [[collate_grads(kern, i, dL_dKdiag[s], X[s,:]) for s in slices_i] for i, (kern, slices_i) in enumerate(zip(kerns, slices))] if self.single_kern: kern.gradient = target - else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(self.kerns, slices))] + else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))] class Hierarchical(CombinationKernel): """ @@ -148,30 +156,30 @@ class Hierarchical(CombinationKernel): def __init__(self, kern, name='hierarchy'): assert all([k.input_dim==kerns[0].input_dim for k in kerns]) super(Hierarchical, self).__init__(kerns[0].input_dim + len(kerns) - 1, name) - self.kerns = kerns - self.add_parameters(self.kerns) + kerns = kerns + self.add_parameters(kerns) def K(self,X ,X2=None): - X, slices = X[:,:-self.levels], [index_to_slices(X[:,i]) for i in range(self.kerns[0].input_dim, self.input_dim)] - K = self.kerns[0].K(X, X2) + X, slices = X[:,:-self.levels], [index_to_slices(X[:,i]) for i in range(kerns[0].input_dim, self.input_dim)] + K = kerns[0].K(X, X2) if X2 is None: - [[[np.copyto(K[s,s], k.K(X[s], None)) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(self.kerns[1:], slices)] + [[[np.copyto(K[s,s], k.K(X[s], None)) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(kerns[1:], slices)] else: X2, slices2 = X2[:,:-1],index_to_slices(X2[:,-1]) - [[[[np.copyto(K[s, s2], self.kern.K(X[s],X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_k,slices_k2)] for k, slices_k, slices_k2 in zip(self.kerns[1:], slices, slices2)] + [[[[np.copyto(K[s, s2], self.kern.K(X[s],X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_k,slices_k2)] for k, slices_k, slices_k2 in zip(kerns[1:], slices, slices2)] return target def Kdiag(self,X): - X, slices = X[:,:-self.levels], [index_to_slices(X[:,i]) for i in range(self.kerns[0].input_dim, self.input_dim)] - K = self.kerns[0].K(X, X2) - [[[np.copyto(target[s], self.kern.Kdiag(X[s])) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(self.kerns[1:], slices)] + X, slices = X[:,:-self.levels], [index_to_slices(X[:,i]) for i in range(kerns[0].input_dim, self.input_dim)] + K = kerns[0].K(X, X2) + [[[np.copyto(target[s], self.kern.Kdiag(X[s])) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(kerns[1:], slices)] return target def update_gradients_full(self,dL_dK,X,X2=None): X,slices = X[:,:-1],index_to_slices(X[:,-1]) if X2 is None: - self.kerns[0].update_gradients_full(dL_dK, X, None) - for k, slices_k in zip(self.kerns[1:], slices): + kerns[0].update_gradients_full(dL_dK, X, None) + for k, slices_k in zip(kerns[1:], slices): target = np.zeros(k.size) def collate_grads(dL, X, X2): k.update_gradients_full(dL,X,X2) @@ -180,8 +188,8 @@ class Hierarchical(CombinationKernel): k._set_gradient(target) else: X2, slices2 = X2[:,:-1], index_to_slices(X2[:,-1]) - self.kerns[0].update_gradients_full(dL_dK, X, None) - for k, slices_k in zip(self.kerns[1:], slices): + kerns[0].update_gradients_full(dL_dK, X, None) + for k, slices_k in zip(kerns[1:], slices): target = np.zeros(k.size) def collate_grads(dL, X, X2): k.update_gradients_full(dL,X,X2) From fd4ad06126b505ca6d2515509e752bde66f04deb Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Thu, 27 Mar 2014 13:11:19 +0000 Subject: [PATCH 23/51] slight adjustment to self.active_dims being a 0:n:1 slice --- GPy/kern/_src/kern.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index 5cd71215..488745c5 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -221,7 +221,7 @@ class CombinationKernel(Kern): def get_input_dim_active_dims(self, kernels, extra_dims = None): active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int)) input_dim = active_dims.max()+1 + (len(extra_dims) if extra_dims is not None else 0) - active_dims = slice(input_dim) + active_dims = slice(0, input_dim, 1) return input_dim, active_dims def input_sensitivity(self): From 1dabf67c936f1534ee13c72ce3362d102864a941 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Fri, 28 Mar 2014 12:02:34 +0000 Subject: [PATCH 24/51] assertion checks for all kernels --- GPy/kern/_src/add.py | 12 ------------ GPy/kern/_src/kernel_slice_operations.py | 2 ++ GPy/kern/_src/prod.py | 2 -- 3 files changed, 2 insertions(+), 14 deletions(-) diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index ddc480de..fb0e114b 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -23,7 +23,6 @@ class Add(CombinationKernel): If a list of parts (of this kernel!) `which_parts` is given, only the parts of the list are taken to compute the covariance. """ - assert X.shape[1] > max(np.r_[self.active_dims]) if which_parts is None: which_parts = self.parts elif not isinstance(which_parts, (list, tuple)): @@ -33,7 +32,6 @@ class Add(CombinationKernel): @Cache_this(limit=2, force_kwargs=['which_parts']) def Kdiag(self, X, which_parts=None): - assert X.shape[1] > max(np.r_[self.active_dims]) if which_parts is None: which_parts = self.parts elif not isinstance(which_parts, (list, tuple)): @@ -160,16 +158,6 @@ class Add(CombinationKernel): target_S += b return target_mu, target_S - def _getstate(self): - """ - Get the current state of the class, - here just all the indices, rest can get recomputed - """ - return super(Add, self)._getstate() - - def _setstate(self, state): - super(Add, self)._setstate(state) - def add(self, other, name='sum'): if isinstance(other, Add): other_params = other._parameters_[:] diff --git a/GPy/kern/_src/kernel_slice_operations.py b/GPy/kern/_src/kernel_slice_operations.py index 21421cc0..42306504 100644 --- a/GPy/kern/_src/kernel_slice_operations.py +++ b/GPy/kern/_src/kernel_slice_operations.py @@ -56,6 +56,7 @@ class _Slice_wrap(object): def _slice_K(f): @wraps(f) def wrap(self, X, X2 = None, *a, **kw): + assert X.shape[1] > max(np.r_[self.active_dims]), "At least {} dimensional X needed".format(max(np.r_[self.active_dims])) with _Slice_wrap(self, X, X2) as s: ret = f(self, s.X, s.X2, *a, **kw) return ret @@ -64,6 +65,7 @@ def _slice_K(f): def _slice_Kdiag(f): @wraps(f) def wrap(self, X, *a, **kw): + assert X.shape[1] > max(np.r_[self.active_dims]), "At least {} dimensional X needed".format(max(np.r_[self.active_dims])) with _Slice_wrap(self, X, None) as s: ret = f(self, s.X, *a, **kw) return ret diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py index 98b60366..b8f92f27 100644 --- a/GPy/kern/_src/prod.py +++ b/GPy/kern/_src/prod.py @@ -23,7 +23,6 @@ class Prod(CombinationKernel): @Cache_this(limit=2, force_kwargs=['which_parts']) def K(self, X, X2=None, which_parts=None): - assert X.shape[1] == self.input_dim if which_parts is None: which_parts = self.parts elif not isinstance(which_parts, (list, tuple)): @@ -33,7 +32,6 @@ class Prod(CombinationKernel): @Cache_this(limit=2, force_kwargs=['which_parts']) def Kdiag(self, X, which_parts=None): - assert X.shape[1] == self.input_dim if which_parts is None: which_parts = self.parts return reduce(np.multiply, (p.Kdiag(X) for p in which_parts)) From 305e8be3b4d30d58cf1bc1a011689d62f391f557 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Fri, 28 Mar 2014 12:06:23 +0000 Subject: [PATCH 25/51] kernel slicer now asserts X dimension on first seeing X --- GPy/kern/_src/kernel_slice_operations.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/GPy/kern/_src/kernel_slice_operations.py b/GPy/kern/_src/kernel_slice_operations.py index 42306504..ea5d2b0a 100644 --- a/GPy/kern/_src/kernel_slice_operations.py +++ b/GPy/kern/_src/kernel_slice_operations.py @@ -34,6 +34,7 @@ class _Slice_wrap(object): self.k = k self.shape = X.shape if self.k._sliced_X == 0: + assert X.shape[1] > max(np.r_[self.k.active_dims]), "At least {} dimensional X needed".format(max(np.r_[self.k.active_dims])) self.X = self.k._slice_X(X) self.X2 = self.k._slice_X(X2) if X2 is not None else None self.ret = True @@ -56,7 +57,6 @@ class _Slice_wrap(object): def _slice_K(f): @wraps(f) def wrap(self, X, X2 = None, *a, **kw): - assert X.shape[1] > max(np.r_[self.active_dims]), "At least {} dimensional X needed".format(max(np.r_[self.active_dims])) with _Slice_wrap(self, X, X2) as s: ret = f(self, s.X, s.X2, *a, **kw) return ret @@ -65,7 +65,6 @@ def _slice_K(f): def _slice_Kdiag(f): @wraps(f) def wrap(self, X, *a, **kw): - assert X.shape[1] > max(np.r_[self.active_dims]), "At least {} dimensional X needed".format(max(np.r_[self.active_dims])) with _Slice_wrap(self, X, None) as s: ret = f(self, s.X, *a, **kw) return ret From 60a071f18faac745a8ea21228bd0d9b5c3afe5e0 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Fri, 28 Mar 2014 12:11:14 +0000 Subject: [PATCH 26/51] exact inference for N>D of Y --- .../latent_function_inference/exact_gaussian_inference.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/GPy/inference/latent_function_inference/exact_gaussian_inference.py b/GPy/inference/latent_function_inference/exact_gaussian_inference.py index bd3fcefb..554d3d1a 100644 --- a/GPy/inference/latent_function_inference/exact_gaussian_inference.py +++ b/GPy/inference/latent_function_inference/exact_gaussian_inference.py @@ -29,10 +29,12 @@ class ExactGaussianInference(object): """ N, D = Y.shape if (N>D): + print "WARNING: D>N we still need caching of L, such that L*L^T = Y, although fine here" return Y else: #if Y in self.cache, return self.Cache[Y], else store Y in cache and return L. - raise NotImplementedError, 'TODO' #TODO + print "WARNING: N>D of Y, we need caching of L, such that L*L^T = Y, returning Y still!" + return Y def inference(self, kern, X, likelihood, Y, Y_metadata=None): """ From 6f86b9b0fa9bbb1d8162044b564370260902d175 Mon Sep 17 00:00:00 2001 From: Neil Lawrence Date: Mon, 31 Mar 2014 09:18:31 +0100 Subject: [PATCH 27/51] Update of symbolic likelihoods. --- GPy/kern/__init__.py | 2 +- GPy/kern/_src/{sympykern.py => symbolic.py} | 186 +++++++++++++++++++- GPy/likelihoods/__init__.py | 1 + GPy/likelihoods/bernoulli.py | 92 +++++----- GPy/likelihoods/likelihood.py | 77 ++++---- GPy/likelihoods/student_t.py | 95 +++++----- 6 files changed, 319 insertions(+), 134 deletions(-) rename GPy/kern/_src/{sympykern.py => symbolic.py} (56%) diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 55b69bd7..37cd71ec 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -3,7 +3,7 @@ from _src.rbf import RBF from _src.linear import Linear, LinearFull from _src.static import Bias, White from _src.brownian import Brownian -from _src.sympykern import Sympykern +from _src.symbolic import Symbolic from _src.stationary import Exponential, Matern32, Matern52, ExpQuad, RatQuad, Cosine from _src.mlp import MLP from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52 diff --git a/GPy/kern/_src/sympykern.py b/GPy/kern/_src/symbolic.py similarity index 56% rename from GPy/kern/_src/sympykern.py rename to GPy/kern/_src/symbolic.py index 6f066e98..2d4cbc59 100644 --- a/GPy/kern/_src/sympykern.py +++ b/GPy/kern/_src/symbolic.py @@ -11,7 +11,7 @@ from kern import Kern from ...core.parameterization import Param from ...core.parameterization.transformations import Logexp -class Sympykern(Kern): +class Symbolic(Kern): """ A kernel object, where all the hard work in done by sympy. @@ -26,10 +26,8 @@ class Sympykern(Kern): - to handle multiple inputs, call them x_1, z_1, etc - to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j. """ - def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None, active_dims=None): + def __init__(self, input_dim, k=None, output_dim=1, name='symbolic', param=None, active_dims=None, operators=None): - if name is None: - name='sympykern' if k is None: raise ValueError, "You must provide an argument for the covariance function." super(Sympykern, self).__init__(input_dim, active_dims, name) @@ -60,7 +58,6 @@ class Sympykern(Kern): # extract parameter names from the covariance thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name) - # Look for parameters with index (subscripts), they are associated with different outputs. if self.output_dim>1: self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name) @@ -117,6 +114,12 @@ class Sympykern(Kern): self.arg_list += self._sp_theta_i + self._sp_theta_j self.diag_arg_list += self._sp_theta_i + # Check if there are additional linear operators on the covariance. + self._sp_operators = operators + # TODO: Deal with linear operators + #if self._sp_operators: + # for operator in self._sp_operators: + # psi_stats aren't yet implemented. if False: self.compute_psi_stats() @@ -254,3 +257,176 @@ class Sympykern(Kern): self._reverse_arguments[theta_i.name] = self._arguments[theta_j.name].T self._reverse_arguments[theta_j.name] = self._arguments[theta_i.name].T +if False: + class Symcombine(CombinationKernel): + """ + Combine list of given sympy covariances together with the provided operations. + """ + def __init__(self, subkerns, operations, name='sympy_combine'): + super(Symcombine, self).__init__(subkerns, name) + for subkern, operation in zip(subkerns, operations): + self._sp_k += self._k_double_operate(subkern._sp_k, operation) + + #def _double_operate(self, k, operation): + + + @Cache_this(limit=2, force_kwargs=['which_parts']) + def K(self, X, X2=None, which_parts=None): + """ + Combine covariances with a linear operator. + """ + assert X.shape[1] == self.input_dim + if which_parts is None: + which_parts = self.parts + elif not isinstance(which_parts, (list, tuple)): + # if only one part is given + which_parts = [which_parts] + return reduce(np.add, (p.K(X, X2) for p in which_parts)) + + @Cache_this(limit=2, force_kwargs=['which_parts']) + def Kdiag(self, X, which_parts=None): + assert X.shape[1] == self.input_dim + if which_parts is None: + which_parts = self.parts + elif not isinstance(which_parts, (list, tuple)): + # if only one part is given + which_parts = [which_parts] + return reduce(np.add, (p.Kdiag(X) for p in which_parts)) + + def update_gradients_full(self, dL_dK, X, X2=None): + [p.update_gradients_full(dL_dK, X, X2) for p in self.parts] + + def update_gradients_diag(self, dL_dK, X): + [p.update_gradients_diag(dL_dK, X) for p in self.parts] + + def gradients_X(self, dL_dK, X, X2=None): + """Compute the gradient of the objective function with respect to X. + + :param dL_dK: An array of gradients of the objective function with respect to the covariance function. + :type dL_dK: np.ndarray (num_samples x num_inducing) + :param X: Observed data inputs + :type X: np.ndarray (num_samples x input_dim) + :param X2: Observed data inputs (optional, defaults to X) + :type X2: np.ndarray (num_inducing x input_dim)""" + + target = np.zeros(X.shape) + [target.__iadd__(p.gradients_X(dL_dK, X, X2)) for p in self.parts] + return target + + def gradients_X_diag(self, dL_dKdiag, X): + target = np.zeros(X.shape) + [target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts] + return target + + def psi0(self, Z, variational_posterior): + return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts)) + + def psi1(self, Z, variational_posterior): + return reduce(np.add, (p.psi1(Z, variational_posterior) for p in self.parts)) + + def psi2(self, Z, variational_posterior): + psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts)) + #return psi2 + # compute the "cross" terms + from static import White, Bias + from rbf import RBF + #from rbf_inv import RBFInv + from linear import Linear + #ffrom fixed import Fixed + + for p1, p2 in itertools.combinations(self.parts, 2): + # i1, i2 = p1.active_dims, p2.active_dims + # white doesn;t combine with anything + if isinstance(p1, White) or isinstance(p2, White): + pass + # rbf X bias + #elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)): + elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)): + tmp = p2.psi1(Z, variational_posterior) + psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :]) + #elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)): + elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)): + tmp = p1.psi1(Z, variational_posterior) + psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :]) + elif isinstance(p2, (RBF, Linear)) and isinstance(p1, (RBF, Linear)): + assert np.intersect1d(p1.active_dims, p2.active_dims).size == 0, "only non overlapping kernel dimensions allowed so far" + tmp1 = p1.psi1(Z, variational_posterior) + tmp2 = p2.psi1(Z, variational_posterior) + psi2 += (tmp1[:, :, None] * tmp2[:, None, :]) + (tmp2[:, :, None] * tmp1[:, None, :]) + else: + raise NotImplementedError, "psi2 cannot be computed for this kernel" + return psi2 + + def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + from static import White, Bias + for p1 in self.parts: + #compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2! + eff_dL_dpsi1 = dL_dpsi1.copy() + for p2 in self.parts: + if p2 is p1: + continue + if isinstance(p2, White): + continue + elif isinstance(p2, Bias): + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. + else:# np.setdiff1d(p1.active_dims, ar2, assume_unique): # TODO: Careful, not correct for overlapping active_dims + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2. + p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) + + def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + from static import White, Bias + target = np.zeros(Z.shape) + for p1 in self.parts: + #compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2! + eff_dL_dpsi1 = dL_dpsi1.copy() + for p2 in self.parts: + if p2 is p1: + continue + if isinstance(p2, White): + continue + elif isinstance(p2, Bias): + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. + else: + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2. + target += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) + return target + + def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + from static import White, Bias + target_mu = np.zeros(variational_posterior.shape) + target_S = np.zeros(variational_posterior.shape) + for p1 in self._parameters_: + #compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2! + eff_dL_dpsi1 = dL_dpsi1.copy() + for p2 in self._parameters_: + if p2 is p1: + continue + if isinstance(p2, White): + continue + elif isinstance(p2, Bias): + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. + else: + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2. + a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) + target_mu += a + target_S += b + return target_mu, target_S + + def _getstate(self): + """ + Get the current state of the class, + here just all the indices, rest can get recomputed + """ + return super(Add, self)._getstate() + + def _setstate(self, state): + super(Add, self)._setstate(state) + + def add(self, other, name='sum'): + if isinstance(other, Add): + other_params = other._parameters_.copy() + for p in other_params: + other.remove_parameter(p) + self.add_parameters(*other_params) + else: self.add_parameter(other) + return self diff --git a/GPy/likelihoods/__init__.py b/GPy/likelihoods/__init__.py index 28e44541..87229081 100644 --- a/GPy/likelihoods/__init__.py +++ b/GPy/likelihoods/__init__.py @@ -6,3 +6,4 @@ from poisson import Poisson from student_t import StudentT from likelihood import Likelihood from mixed_noise import MixedNoise +from symbolic import Symbolic diff --git a/GPy/likelihoods/bernoulli.py b/GPy/likelihoods/bernoulli.py index 371fbe63..7b867954 100644 --- a/GPy/likelihoods/bernoulli.py +++ b/GPy/likelihoods/bernoulli.py @@ -15,7 +15,7 @@ class Bernoulli(Likelihood): p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}} .. Note:: - Y is expected to take values in {-1, 1} TODO: {0, 1}?? + Y takes values in either {-1, 1} or {0, 1}. link function should have the domain [0, 1], e.g. probit (default) or Heaviside .. See also:: @@ -54,10 +54,10 @@ class Bernoulli(Likelihood): """ if Y_i == 1: sign = 1. - elif Y_i == 0: + elif Y_i == 0 or Y_i == -1: sign = -1 else: - raise ValueError("bad value for Bernouilli observation (0, 1)") + raise ValueError("bad value for Bernoulli observation (0, 1)") if isinstance(self.gp_link, link_functions.Probit): z = sign*v_i/np.sqrt(tau_i**2 + tau_i) Z_hat = std_norm_cdf(z) @@ -95,15 +95,15 @@ class Bernoulli(Likelihood): else: return np.nan - def pdf_link(self, link_f, y, Y_metadata=None): + def pdf_link(self, inv_link_f, y, Y_metadata=None): """ - Likelihood function given link(f) + Likelihood function given inverse link of f. .. math:: p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}} - :param link_f: latent variables link(f) - :type link_f: Nx1 array + :param inv_link_f: latent variables inverse link of f. + :type inv_link_f: Nx1 array :param y: data :type y: Nx1 array :param Y_metadata: Y_metadata not used in bernoulli @@ -113,102 +113,106 @@ class Bernoulli(Likelihood): .. Note: Each y_i must be in {0, 1} """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - #objective = (link_f**y) * ((1.-link_f)**(1.-y)) - objective = np.where(y, link_f, 1.-link_f) + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + #objective = (inv_link_f**y) * ((1.-inv_link_f)**(1.-y)) + objective = np.where(y, inv_link_f, 1.-inv_link_f) return np.exp(np.sum(np.log(objective))) - def logpdf_link(self, link_f, y, Y_metadata=None): + def logpdf_link(self, inv_link_f, y, Y_metadata=None): """ - Log Likelihood function given link(f) + Log Likelihood function given inverse link of f. .. math:: \\ln p(y_{i}|\\lambda(f_{i})) = y_{i}\\log\\lambda(f_{i}) + (1-y_{i})\\log (1-f_{i}) - :param link_f: latent variables link(f) - :type link_f: Nx1 array + :param inv_link_f: latent variables inverse link of f. + :type inv_link_f: Nx1 array :param y: data :type y: Nx1 array :param Y_metadata: Y_metadata not used in bernoulli - :returns: log likelihood evaluated at points link(f) + :returns: log likelihood evaluated at points inverse link of f. :rtype: float """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - #objective = y*np.log(link_f) + (1.-y)*np.log(link_f) + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + #objective = y*np.log(inv_link_f) + (1.-y)*np.log(inv_link_f) state = np.seterr(divide='ignore') - objective = np.where(y==1, np.log(link_f), np.log(1-link_f)) + # TODO check y \in {0, 1} or {-1, 1} + objective = np.where(y==1, np.log(inv_link_f), np.log(1-inv_link_f)) np.seterr(**state) return np.sum(objective) - def dlogpdf_dlink(self, link_f, y, Y_metadata=None): + def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None): """ - Gradient of the pdf at y, given link(f) w.r.t link(f) + Gradient of the pdf at y, given inverse link of f w.r.t inverse link of f. .. math:: \\frac{d\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - \\frac{(1 - y_{i})}{(1 - \\lambda(f_{i}))} - :param link_f: latent variables link(f) - :type link_f: Nx1 array + :param inv_link_f: latent variables inverse link of f. + :type inv_link_f: Nx1 array :param y: data :type y: Nx1 array :param Y_metadata: Y_metadata not used in bernoulli - :returns: gradient of log likelihood evaluated at points link(f) + :returns: gradient of log likelihood evaluated at points inverse link of f. :rtype: Nx1 array """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - #grad = (y/link_f) - (1.-y)/(1-link_f) + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + #grad = (y/inv_link_f) - (1.-y)/(1-inv_link_f) state = np.seterr(divide='ignore') - grad = np.where(y, 1./link_f, -1./(1-link_f)) + # TODO check y \in {0, 1} or {-1, 1} + grad = np.where(y, 1./inv_link_f, -1./(1-inv_link_f)) np.seterr(**state) return grad - def d2logpdf_dlink2(self, link_f, y, Y_metadata=None): + def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None): """ - Hessian at y, given link_f, w.r.t link_f the hessian will be 0 unless i == j - i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j) + Hessian at y, given inv_link_f, w.r.t inv_link_f the hessian will be 0 unless i == j + i.e. second derivative logpdf at y given inverse link of f_i and inverse link of f_j w.r.t inverse link of f_i and inverse link of f_j. .. math:: \\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(1-y_{i})}{(1-\\lambda(f))^{2}} - :param link_f: latent variables link(f) - :type link_f: Nx1 array + :param inv_link_f: latent variables inverse link of f. + :type inv_link_f: Nx1 array :param y: data :type y: Nx1 array :param Y_metadata: Y_metadata not used in bernoulli - :returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f)) + :returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points inverse link of f. :rtype: Nx1 array .. Note:: Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases - (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) + (the distribution for y_i depends only on inverse link of f_i not on inverse link of f_(j!=i) """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - #d2logpdf_dlink2 = -y/(link_f**2) - (1-y)/((1-link_f)**2) + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + #d2logpdf_dlink2 = -y/(inv_link_f**2) - (1-y)/((1-inv_link_f)**2) state = np.seterr(divide='ignore') - d2logpdf_dlink2 = np.where(y, -1./np.square(link_f), -1./np.square(1.-link_f)) + # TODO check y \in {0, 1} or {-1, 1} + d2logpdf_dlink2 = np.where(y, -1./np.square(inv_link_f), -1./np.square(1.-inv_link_f)) np.seterr(**state) return d2logpdf_dlink2 - def d3logpdf_dlink3(self, link_f, y, Y_metadata=None): + def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None): """ - Third order derivative log-likelihood function at y given link(f) w.r.t link(f) + Third order derivative log-likelihood function at y given inverse link of f w.r.t inverse link of f .. math:: \\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f)^{3}} - \\frac{2(1-y_{i}}{(1-\\lambda(f))^{3}} - :param link_f: latent variables link(f) - :type link_f: Nx1 array + :param inv_link_f: latent variables passed through inverse link of f. + :type inv_link_f: Nx1 array :param y: data :type y: Nx1 array :param Y_metadata: Y_metadata not used in bernoulli - :returns: third derivative of log likelihood evaluated at points link(f) + :returns: third derivative of log likelihood evaluated at points inverse_link(f) :rtype: Nx1 array """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - #d3logpdf_dlink3 = 2*(y/(link_f**3) - (1-y)/((1-link_f)**3)) + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + #d3logpdf_dlink3 = 2*(y/(inv_link_f**3) - (1-y)/((1-inv_link_f)**3)) state = np.seterr(divide='ignore') - d3logpdf_dlink3 = np.where(y, 2./(link_f**3), -2./((1.-link_f)**3)) + # TODO check y \in {0, 1} or {-1, 1} + d3logpdf_dlink3 = np.where(y, 2./(inv_link_f**3), -2./((1.-inv_link_f)**3)) np.seterr(**state) return d3logpdf_dlink3 diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py index aabe93ef..5761f3fb 100644 --- a/GPy/likelihoods/likelihood.py +++ b/GPy/likelihoods/likelihood.py @@ -16,20 +16,20 @@ class Likelihood(Parameterized): Likelihood base class, used to defing p(y|f). All instances use _inverse_ link functions, which can be swapped out. It is - expected that inherriting classes define a default inverse link function + expected that inheriting classes define a default inverse link function - To use this class, inherrit and define missing functionality. + To use this class, inherit and define missing functionality. - Inherriting classes *must* implement: + Inheriting classes *must* implement: pdf_link : a bound method which turns the output of the link function into the pdf logpdf_link : the logarithm of the above - To enable use with EP, inherriting classes *must* define: + To enable use with EP, inheriting classes *must* define: TODO: a suitable derivative function for any parameters of the class It is also desirable to define: moments_match_ep : a function to compute the EP moments If this isn't defined, the moments will be computed using 1D quadrature. - To enable use with Laplace approximation, inherriting classes *must* define: + To enable use with Laplace approximation, inheriting classes *must* define: Some derivative functions *AS TODO* For exact Gaussian inference, define *JH TODO* @@ -159,7 +159,7 @@ class Likelihood(Parameterized): def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None): """ - Numerical approximation to the predictive variance: V(Y_star) + Approximation to the predictive variance: V(Y_star) The following variance decomposition is used: V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) ) @@ -208,28 +208,28 @@ class Likelihood(Parameterized): # V(Y_star) = E[ V(Y_star|f_star) ] + E(Y_star**2|f_star) - E[Y_star|f_star]**2 return exp_var + var_exp - def pdf_link(self, link_f, y, Y_metadata=None): + def pdf_link(self, inv_link_f, y, Y_metadata=None): raise NotImplementedError - def logpdf_link(self, link_f, y, Y_metadata=None): + def logpdf_link(self, inv_link_f, y, Y_metadata=None): raise NotImplementedError - def dlogpdf_dlink(self, link_f, y, Y_metadata=None): + def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None): raise NotImplementedError - def d2logpdf_dlink2(self, link_f, y, Y_metadata=None): + def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None): raise NotImplementedError - def d3logpdf_dlink3(self, link_f, y, Y_metadata=None): + def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None): raise NotImplementedError - def dlogpdf_link_dtheta(self, link_f, y, Y_metadata=None): + def dlogpdf_link_dtheta(self, inv_link_f, y, Y_metadata=None): raise NotImplementedError - def dlogpdf_dlink_dtheta(self, link_f, y, Y_metadata=None): + def dlogpdf_dlink_dtheta(self, inv_link_f, y, Y_metadata=None): raise NotImplementedError - def d2logpdf_dlink2_dtheta(self, link_f, y, Y_metadata=None): + def d2logpdf_dlink2_dtheta(self, inv_link_f, y, Y_metadata=None): raise NotImplementedError def pdf(self, f, y, Y_metadata=None): @@ -247,8 +247,8 @@ class Likelihood(Parameterized): :returns: likelihood evaluated for this point :rtype: float """ - link_f = self.gp_link.transf(f) - return self.pdf_link(link_f, y, Y_metadata=Y_metadata) + inv_link_f = self.gp_link.transf(f) + return self.pdf_link(inv_link_f, y, Y_metadata=Y_metadata) def logpdf(self, f, y, Y_metadata=None): """ @@ -265,8 +265,8 @@ class Likelihood(Parameterized): :returns: log likelihood evaluated for this point :rtype: float """ - link_f = self.gp_link.transf(f) - return self.logpdf_link(link_f, y, Y_metadata=Y_metadata) + inv_link_f = self.gp_link.transf(f) + return self.logpdf_link(inv_link_f, y, Y_metadata=Y_metadata) def dlogpdf_df(self, f, y, Y_metadata=None): """ @@ -284,8 +284,8 @@ class Likelihood(Parameterized): :returns: derivative of log likelihood evaluated for this point :rtype: 1xN array """ - link_f = self.gp_link.transf(f) - dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata) + inv_link_f = self.gp_link.transf(f) + dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata) dlink_df = self.gp_link.dtransf_df(f) return chain_1(dlogpdf_dlink, dlink_df) @@ -305,10 +305,10 @@ class Likelihood(Parameterized): :returns: second derivative of log likelihood evaluated for this point (diagonal only) :rtype: 1xN array """ - link_f = self.gp_link.transf(f) - d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, Y_metadata=Y_metadata) + inv_link_f = self.gp_link.transf(f) + d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata) dlink_df = self.gp_link.dtransf_df(f) - dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata) + dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata) d2link_df2 = self.gp_link.d2transf_df2(f) return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2) @@ -328,12 +328,12 @@ class Likelihood(Parameterized): :returns: third derivative of log likelihood evaluated for this point :rtype: float """ - link_f = self.gp_link.transf(f) - d3logpdf_dlink3 = self.d3logpdf_dlink3(link_f, y, Y_metadata=Y_metadata) + inv_link_f = self.gp_link.transf(f) + d3logpdf_dlink3 = self.d3logpdf_dlink3(inv_link_f, y, Y_metadata=Y_metadata) dlink_df = self.gp_link.dtransf_df(f) - d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, Y_metadata=Y_metadata) + d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata) d2link_df2 = self.gp_link.d2transf_df2(f) - dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata) + dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata) d3link_df3 = self.gp_link.d3transf_df3(f) return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3) @@ -342,10 +342,10 @@ class Likelihood(Parameterized): TODO: Doc strings """ if self.size > 0: - link_f = self.gp_link.transf(f) - return self.dlogpdf_link_dtheta(link_f, y, Y_metadata=Y_metadata) + inv_link_f = self.gp_link.transf(f) + return self.dlogpdf_link_dtheta(inv_link_f, y, Y_metadata=Y_metadata) else: - #Is no parameters so return an empty array for its derivatives + # There are no parameters so return an empty array for derivatives return np.zeros([1, 0]) def dlogpdf_df_dtheta(self, f, y, Y_metadata=None): @@ -353,12 +353,12 @@ class Likelihood(Parameterized): TODO: Doc strings """ if self.size > 0: - link_f = self.gp_link.transf(f) + inv_link_f = self.gp_link.transf(f) dlink_df = self.gp_link.dtransf_df(f) - dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, Y_metadata=Y_metadata) + dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata) return chain_1(dlogpdf_dlink_dtheta, dlink_df) else: - #Is no parameters so return an empty array for its derivatives + # There are no parameters so return an empty array for derivatives return np.zeros([f.shape[0], 0]) def d2logpdf_df2_dtheta(self, f, y, Y_metadata=None): @@ -366,14 +366,14 @@ class Likelihood(Parameterized): TODO: Doc strings """ if self.size > 0: - link_f = self.gp_link.transf(f) + inv_link_f = self.gp_link.transf(f) dlink_df = self.gp_link.dtransf_df(f) d2link_df2 = self.gp_link.d2transf_df2(f) - d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, Y_metadata=Y_metadata) - dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, Y_metadata=Y_metadata) + d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(inv_link_f, y, Y_metadata=Y_metadata) + dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata) return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2) else: - #Is no parameters so return an empty array for its derivatives + # There are no parameters so return an empty array for derivatives return np.zeros([f.shape[0], 0]) def _laplace_gradients(self, f, y, Y_metadata=None): @@ -411,7 +411,10 @@ class Likelihood(Parameterized): #compute the quantiles by sampling!!! N_samp = 1000 s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu + #ss_f = s.flatten() + #ss_y = self.samples(ss_f, Y_metadata) ss_y = self.samples(s, Y_metadata) + #ss_y = ss_y.reshape(mu.shape[0], N_samp) return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles] diff --git a/GPy/likelihoods/student_t.py b/GPy/likelihoods/student_t.py index 47efd443..c057e789 100644 --- a/GPy/likelihoods/student_t.py +++ b/GPy/likelihoods/student_t.py @@ -26,8 +26,8 @@ class StudentT(Likelihood): gp_link = link_functions.Identity() super(StudentT, self).__init__(gp_link, name='Student_T') - - self.sigma2 = Param('t_noise', float(sigma2), Logexp()) + # sigma2 is not a noise parameter, it is a squared scale. + self.sigma2 = Param('t_scale2', float(sigma2), Logexp()) self.v = Param('deg_free', float(deg_free)) self.add_parameter(self.sigma2) self.add_parameter(self.v) @@ -46,23 +46,23 @@ class StudentT(Likelihood): self.sigma2.gradient = grads[0] self.v.gradient = grads[1] - def pdf_link(self, link_f, y, Y_metadata=None): + def pdf_link(self, inv_link_f, y, Y_metadata=None): """ Likelihood function given link(f) .. math:: p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \\lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}} - :param link_f: latent variables link(f) - :type link_f: Nx1 array + :param inv_link_f: latent variables link(f) + :type inv_link_f: Nx1 array :param y: data :type y: Nx1 array :param Y_metadata: Y_metadata which is not used in student t distribution :returns: likelihood evaluated for this point :rtype: float """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - e = y - link_f + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + e = y - inv_link_f #Careful gamma(big_number) is infinity! objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5)) / (np.sqrt(self.v * np.pi * self.sigma2))) @@ -70,15 +70,15 @@ class StudentT(Likelihood): ) return np.prod(objective) - def logpdf_link(self, link_f, y, Y_metadata=None): + def logpdf_link(self, inv_link_f, y, Y_metadata=None): """ Log Likelihood Function given link(f) .. math:: \\ln p(y_{i}|\lambda(f_{i})) = \\ln \\Gamma\\left(\\frac{v+1}{2}\\right) - \\ln \\Gamma\\left(\\frac{v}{2}\\right) - \\ln \\sqrt{v \\pi\\sigma^{2}} - \\frac{v+1}{2}\\ln \\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right) - :param link_f: latent variables (link(f)) - :type link_f: Nx1 array + :param inv_link_f: latent variables (link(f)) + :type inv_link_f: Nx1 array :param y: data :type y: Nx1 array :param Y_metadata: Y_metadata which is not used in student t distribution @@ -86,11 +86,11 @@ class StudentT(Likelihood): :rtype: float """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - e = y - link_f + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + e = y - inv_link_f #FIXME: - #Why does np.log(1 + (1/self.v)*((y-link_f)**2)/self.sigma2) suppress the divide by zero?! - #But np.log(1 + (1/float(self.v))*((y-link_f)**2)/self.sigma2) throws it correctly + #Why does np.log(1 + (1/self.v)*((y-inv_link_f)**2)/self.sigma2) suppress the divide by zero?! + #But np.log(1 + (1/float(self.v))*((y-inv_link_f)**2)/self.sigma2) throws it correctly #print - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2)) objective = (+ gammaln((self.v + 1) * 0.5) - gammaln(self.v * 0.5) @@ -99,15 +99,15 @@ class StudentT(Likelihood): ) return np.sum(objective) - def dlogpdf_dlink(self, link_f, y, Y_metadata=None): + def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None): """ Gradient of the log likelihood function at y, given link(f) w.r.t link(f) .. math:: \\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{(v+1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v} - :param link_f: latent variables (f) - :type link_f: Nx1 array + :param inv_link_f: latent variables (f) + :type inv_link_f: Nx1 array :param y: data :type y: Nx1 array :param Y_metadata: Y_metadata which is not used in student t distribution @@ -115,12 +115,12 @@ class StudentT(Likelihood): :rtype: Nx1 array """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - e = y - link_f + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + e = y - inv_link_f grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2)) return grad - def d2logpdf_dlink2(self, link_f, y, Y_metadata=None): + def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None): """ Hessian at y, given link(f), w.r.t link(f) i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j) @@ -129,8 +129,8 @@ class StudentT(Likelihood): .. math:: \\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{(v+1)((y_{i}-\lambda(f_{i}))^{2} - \\sigma^{2}v)}{((y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v)^{2}} - :param link_f: latent variables link(f) - :type link_f: Nx1 array + :param inv_link_f: latent variables inv_link(f) + :type inv_link_f: Nx1 array :param y: data :type y: Nx1 array :param Y_metadata: Y_metadata which is not used in student t distribution @@ -141,90 +141,90 @@ class StudentT(Likelihood): Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - e = y - link_f + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + e = y - inv_link_f hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2) return hess - def d3logpdf_dlink3(self, link_f, y, Y_metadata=None): + def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None): """ Third order derivative log-likelihood function at y given link(f) w.r.t link(f) .. math:: \\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{-2(v+1)((y_{i} - \lambda(f_{i}))^3 - 3(y_{i} - \lambda(f_{i})) \\sigma^{2} v))}{((y_{i} - \lambda(f_{i})) + \\sigma^{2} v)^3} - :param link_f: latent variables link(f) - :type link_f: Nx1 array + :param inv_link_f: latent variables link(f) + :type inv_link_f: Nx1 array :param y: data :type y: Nx1 array :param Y_metadata: Y_metadata which is not used in student t distribution :returns: third derivative of likelihood evaluated at points f :rtype: Nx1 array """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - e = y - link_f + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + e = y - inv_link_f d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) / ((e**2 + self.sigma2*self.v)**3) ) return d3lik_dlink3 - def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None): + def dlogpdf_link_dvar(self, inv_link_f, y, Y_metadata=None): """ Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise) .. math:: \\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{v((y_{i} - \lambda(f_{i}))^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})} - :param link_f: latent variables link(f) - :type link_f: Nx1 array + :param inv_link_f: latent variables link(f) + :type inv_link_f: Nx1 array :param y: data :type y: Nx1 array :param Y_metadata: Y_metadata which is not used in student t distribution :returns: derivative of likelihood evaluated at points f w.r.t variance parameter :rtype: float """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - e = y - link_f + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + e = y - inv_link_f dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2)) return np.sum(dlogpdf_dvar) - def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None): + def dlogpdf_dlink_dvar(self, inv_link_f, y, Y_metadata=None): """ Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise) .. math:: \\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^2 + \\sigma^2 v)^2} - :param link_f: latent variables link_f - :type link_f: Nx1 array + :param inv_link_f: latent variables inv_link_f + :type inv_link_f: Nx1 array :param y: data :type y: Nx1 array :param Y_metadata: Y_metadata which is not used in student t distribution :returns: derivative of likelihood evaluated at points f w.r.t variance parameter :rtype: Nx1 array """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - e = y - link_f + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + e = y - inv_link_f dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2) return dlogpdf_dlink_dvar - def d2logpdf_dlink2_dvar(self, link_f, y, Y_metadata=None): + def d2logpdf_dlink2_dvar(self, inv_link_f, y, Y_metadata=None): """ Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise) .. math:: \\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - \lambda(f_{i}))^{2})}{(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})^{3}} - :param link_f: latent variables link(f) - :type link_f: Nx1 array + :param inv_link_f: latent variables link(f) + :type inv_link_f: Nx1 array :param y: data :type y: Nx1 array :param Y_metadata: Y_metadata which is not used in student t distribution :returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter :rtype: Nx1 array """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - e = y - link_f + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + e = y - inv_link_f d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2))) / ((self.sigma2*self.v + (e**2))**3) ) @@ -246,11 +246,12 @@ class StudentT(Likelihood): return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv)) def predictive_mean(self, mu, sigma, Y_metadata=None): - return self.gp_link.transf(mu) # only true in link is monotoci, which it is. + # The comment here confuses mean and median. + return self.gp_link.transf(mu) # only true if link is monotonic, which it is. def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None): - if self.deg_free <2.: - return np.empty(mu.shape)*np.nan #not defined for small degress fo freedom + if self.deg_free<=2.: + return np.empty(mu.shape)*np.nan # does not exist for degrees of freedom <= 2. else: return super(StudentT, self).predictive_variance(mu, variance, predictive_mean, Y_metadata) From 9b2bc907e4ba0a892e9d5236cc228d1370966933 Mon Sep 17 00:00:00 2001 From: Neil Lawrence Date: Mon, 31 Mar 2014 09:20:03 +0100 Subject: [PATCH 28/51] Adding of symbolic likelihoods (not yet fully funcitonal). --- GPy/likelihoods/.DS_Store | Bin 12292 -> 0 bytes GPy/likelihoods/symbolic.py | 243 ++++++++++++++++++++++++++++++++++++ 2 files changed, 243 insertions(+) delete mode 100644 GPy/likelihoods/.DS_Store create mode 100644 GPy/likelihoods/symbolic.py diff --git a/GPy/likelihoods/.DS_Store b/GPy/likelihoods/.DS_Store deleted file mode 100644 index 8228ae90a5144c14ccc985f4a2c9f9cfc9145102..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12292 zcmeHM!EVz)5S?ucggR8T2NVe;eB^}0A4F9_;tCgTCU&4$BzDxqQn=+a_$WSvzX)$; zw{*Q;ClUvQ!fdoVjy-Q?=goSZafrxVHq%!`BO)50vwWOjh%~;=z0;1|ndguOc%qV4 zl;QJ|79DLpp$sSk%78MU3@8Kt0t0wva~t$L_w7|3l>ueozhr>-hZLRV*vXBRvUOmP z5&-f9r={T-b%0<}C&x~1tQ070+UmhLsBtBRv2a{ZSsilhn}h-er`Q4!#NbZ|L+ zq8oUtsi7G@rMxc){e`%Ma%eJY4mkmi-!kk&9a8}>t6)wX%Xcr9dzjH8YELww{b0Mr zNOb>BGmO-{!*$?mv3P%-iyE*o~4d@!a=J-r{BlPtX(QgpXQoK}ssMZXJaa4y3tQ)iTB0KNk z_GElCYhf)&eSal0xa7@vnEh6&k| zx%#L_K$Y|rK3IOiEbIPt+~WPBABlX|bOU?WkvaeLa(A}FMzGJw%zbR!e}-LTa(5>M z{w4FC|8wzQA^wa$V;`yHjw&(x3N~W<4_om0djied%w}MYbG(w+bKaNmeYY(?qNgoe zb0DXZmkhJAR@ibQb7X$wGvu}8TYdItR>PG;uzU2u<8IhvcNuUVuI*mjbXm;ux}Nbb zz*8)KOY2fXkEEirAcjJHBc$;Lm!U^+eEYWokv-rLhhrPmi0l8?{qO$|F?5Yd8Bhis z1LoxF{naJDxZ1iy4A+0^7kKxI8LF;n6Z-^D{%(xufGV; s=f6Jx@zX?~|8o23pbRJj%78MU3@8K2fHI&ACjCZU6uP diff --git a/GPy/likelihoods/symbolic.py b/GPy/likelihoods/symbolic.py new file mode 100644 index 00000000..5eaafb2a --- /dev/null +++ b/GPy/likelihoods/symbolic.py @@ -0,0 +1,243 @@ +# Copyright (c) 2014 GPy Authors +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import numpy as np +import sympy as sp +from sympy.utilities.lambdify import lambdify +import link_functions +from scipy import stats, integrate +from scipy.special import gammaln, gamma, erf +from likelihood import Likelihood +from ..core.parameterization import Param +from ..core.parameterization.transformations import Logexp + +class Symbolic(Likelihood): + """ + Symbolic likelihood. + + Likelihood where the form of the likelihood is provided by a sympy expression. + + """ + def __init__(self, likelihood=None, log_likelihood=None, cdf=None, logZ=None, gp_link=None, name='symbolic', log_concave=False, param=None): + if gp_link is None: + gp_link = link_functions.Identity() + + if likelihood is None and log_likelihood is None and cdf is None: + raise ValueError, "You must provide an argument for the likelihood or the log likelihood." + + super(Symbolic, self).__init__(gp_link, name=name) + + if likelihood is None and log_likelihood: + self._sp_likelihood = sp.exp(log_likelihood).simplify() + self._sp_log_likelihood = log_likelihood + + if log_likelihood is None and likelihood: + self._sp_likelihood = likelihood + self._sp_log_likelihood = sp.log(likelihood).simplify() + + # TODO: build likelihood and log likelihood from CDF or + # compute CDF given likelihood/log-likelihood. Also check log + # likelihood, likelihood and CDF are consistent. + + # pull the variable names out of the symbolic likelihood + sp_vars = [e for e in self._sp_likelihood.atoms() if e.is_Symbol] + self._sp_f = [e for e in sp_vars if e.name=='f'] + if not self._sp_f: + raise ValueError('No variable f in likelihood or log likelihood.') + self._sp_y = [e for e in sp_vars if e.name=='y'] + if not self._sp_f: + raise ValueError('No variable y in likelihood or log likelihood.') + self._sp_theta = sorted([e for e in sp_vars if not (e.name=='f' or e.name=='y')],key=lambda e:e.name) + + # These are all the arguments need to compute likelihoods. + self.arg_list = self._sp_y + self._sp_f + self._sp_theta + + # these are arguments for computing derivatives. + derivative_arguments = self._sp_f + self._sp_theta + + # Do symbolic work to compute derivatives. + self._log_likelihood_derivatives = {theta.name : sp.diff(self._sp_log_likelihood,theta).simplify() for theta in derivative_arguments} + self._log_likelihood_second_derivatives = {theta.name : sp.diff(self._log_likelihood_derivatives['f'],theta).simplify() for theta in derivative_arguments} + self._log_likelihood_third_derivatives = {theta.name : sp.diff(self._log_likelihood_second_derivatives['f'],theta).simplify() for theta in derivative_arguments} + + # Add parameters to the model. + for theta in self._sp_theta: + val = 1.0 + # TODO: need to decide how to handle user passing values for the se parameter vectors. + if param is not None: + if param.has_key(theta): + val = param[theta] + setattr(self, theta.name, Param(theta.name, val, None)) + self.add_parameters(getattr(self, theta.name)) + + + # Is there some way to check whether the likelihood is log + # concave? For the moment, need user to specify. + self.log_concave = log_concave + + # initialise code arguments + self._arguments = {} + + # generate the code for the likelihood and derivatives + self._gen_code() + + def _gen_code(self): + """Generate the code from the symbolic parts that will be used for likleihod computation.""" + # TODO: Check here whether theano is available and set up + # functions accordingly. + self._likelihood_function = lambdify(self.arg_list, self._sp_likelihood, 'numpy') + self._log_likelihood_function = lambdify(self.arg_list, self._sp_log_likelihood, 'numpy') + + # compute code for derivatives (for implicit likelihood terms + # we need up to 3rd derivatives) + setattr(self, '_first_derivative_code', {key: lambdify(self.arg_list, self._log_likelihood_derivatives[key], 'numpy') for key in self._log_likelihood_derivatives.keys()}) + setattr(self, '_second_derivative_code', {key: lambdify(self.arg_list, self._log_likelihood_second_derivatives[key], 'numpy') for key in self._log_likelihood_second_derivatives.keys()}) + setattr(self, '_third_derivative_code', {key: lambdify(self.arg_list, self._log_likelihood_third_derivatives[key], 'numpy') for key in self._log_likelihood_third_derivatives.keys()}) + + # TODO: compute EP code parts based on logZ. We need dlogZ/dmu, d2logZ/dmu2 and dlogZ/dtheta + + def parameters_changed(self): + pass + + def update_gradients(self, grads): + """ + Pull out the gradients, be careful as the order must match the order + in which the parameters are added + """ + # The way the Laplace approximation is run requires the + # covariance function to compute the true gradient (because it + # is dependent on the mode). This means we actually compute + # the gradient outside this object. This function would + # normally ask the object to update its gradients internally, + # but here it provides them externally, because they are + # computed in the inference code. TODO: Thought: How does this + # effect EP? Shouldn't this be done by a separate + # Laplace-approximation specific call? + for grad, theta in zip(grads, self._sp_theta): + parameter = getattr(self, theta.name) + setattr(parameter, 'gradient', grad) + + def _arguments_update(self, f, y): + """Set up argument lists for the derivatives.""" + # If we do make use of Theano, then at this point we would + # need to do a lot of precomputation to ensure that the + # likelihoods and gradients are computed together, then check + # for parameter changes before updating. + for i, fvar in enumerate(self._sp_f): + self._arguments[fvar.name] = f + for i, yvar in enumerate(self._sp_y): + self._arguments[yvar.name] = y + for theta in self._sp_theta: + self._arguments[theta.name] = np.asarray(getattr(self, theta.name)) + + def pdf_link(self, inv_link_f, y, Y_metadata=None): + """ + Likelihood function given inverse link of f. + + :param inv_link_f: inverse link of latent variables. + :type inv_link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param Y_metadata: Y_metadata which is not used in student t distribution + :returns: likelihood evaluated for this point + :rtype: float + """ + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + self._arguments_update(inv_link_f, y) + l = self._likelihood_function(**self._arguments) + return np.prod(l) + + def logpdf_link(self, inv_link_f, y, Y_metadata=None): + """ + Log Likelihood Function given inverse link of latent variables. + + :param inv_inv_link_f: latent variables (inverse link of f) + :type inv_inv_link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param Y_metadata: Y_metadata + :returns: likelihood evaluated for this point + :rtype: float + + """ + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + self._arguments_update(inv_link_f, y) + ll = self._log_likelihood_function(**self._arguments) + return np.sum(ll) + + def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None): + """ + Gradient of log likelihood with respect to the inverse link function. + + :param inv_inv_link_f: latent variables (inverse link of f) + :type inv_inv_link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param Y_metadata: Y_metadata + :returns: gradient of likelihood with respect to each point. + :rtype: Nx1 array + + """ + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + self._arguments_update(inv_link_f, y) + return self._first_derivative_code['f'](**self._arguments) + + def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None): + """ + Hessian of log likelihood given inverse link of latent variables with respect to that inverse link. + i.e. second derivative logpdf at y given inv_link(f_i) and inv_link(f_j) w.r.t inv_link(f_i) and inv_link(f_j). + + + :param inv_link_f: inverse link of the latent variables. + :type inv_link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param Y_metadata: Y_metadata which is not used in student t distribution + :returns: Diagonal of Hessian matrix (second derivative of likelihood evaluated at points f) + :rtype: Nx1 array + + .. Note:: + Returns diagonal of Hessian, since every where else it is + 0, as the likelihood factorizes over cases (the + distribution for y_i depends only on link(f_i) not on + link(f_(j!=i)) + """ + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + self._arguments_update(inv_link_f, y) + return self._second_derivative_code['f'](**self._arguments) + + def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None): + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + self._arguments_update(inv_link_f, y) + return self._third_derivative_code['f'](**self._arguments) + raise NotImplementedError + + def dlogpdf_link_dtheta(self, inv_link_f, y, Y_metadata=None): + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + self._arguments_update(inv_link_f, y) + return np.asarray([self._first_derivative_code[theta.name](**self._arguments).sum() for theta in self._sp_theta]) + + def dlogpdf_dlink_dtheta(self, inv_link_f, y, Y_metadata=None): + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + self._arguments_update(inv_link_f, y) + return np.asarray([self._second_derivative_code[theta.name](**self._arguments).sum() for theta in self._sp_theta]) + + def d2logpdf_dlink2_dtheta(self, inv_link_f, y, Y_metadata=None): + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + self._arguments_update(inv_link_f, y) + return np.asarray([self._third_derivative_code[theta.name](**self._arguments).sum() for theta in self._sp_theta]) + + def predictive_mean(self, mu, sigma, Y_metadata=None): + raise NotImplementedError + + def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None): + raise NotImplementedError + + def conditional_mean(self, gp): + raise NotImplementedError + + def conditional_variance(self, gp): + raise NotImplementedError + + def samples(self, gp, Y_metadata=None): + raise NotImplementedError From f3b74fa85ff628de673e3a652a04d30880646487 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 31 Mar 2014 12:45:09 +0100 Subject: [PATCH 29/51] pickling and caching --- GPy/core/gp.py | 25 -- GPy/core/model.py | 32 +- GPy/core/parameterization/array_core.py | 105 +---- GPy/core/parameterization/index_operations.py | 13 +- GPy/core/parameterization/lists_and_dicts.py | 73 ++++ GPy/core/parameterization/param.py | 125 ++---- GPy/core/parameterization/parameter_core.py | 389 ++++++++---------- GPy/core/parameterization/parameterized.py | 31 +- GPy/core/parameterization/variational.py | 3 +- GPy/core/sparse_gp.py | 12 - GPy/core/svigp.py | 102 ++--- GPy/examples/dimensionality_reduction.py | 9 +- .../latent_function_inference/var_dtc.py | 12 +- GPy/kern/_src/kern.py | 19 +- GPy/kern/_src/kernel_slice_operations.py | 2 +- GPy/models/bayesian_gplvm.py | 13 - GPy/models/gp_regression.py | 5 - GPy/models/gplvm.py | 6 - GPy/models/mrd.py | 13 +- GPy/models/sparse_gp_classification.py | 8 - GPy/models/sparse_gp_regression.py | 8 - GPy/models/sparse_gplvm.py | 8 - GPy/models/svigp_regression.py | 7 - GPy/models/warped_gp.py | 8 - GPy/testing/kernel_tests.py | 122 +++--- GPy/testing/observable_tests.py | 4 +- GPy/testing/parameterized_tests.py | 4 +- GPy/util/caching.py | 9 + 28 files changed, 481 insertions(+), 686 deletions(-) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 6fc127ea..490bcc72 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -214,28 +214,3 @@ class GP(Model): """ return self.kern.input_sensitivity() - def _getstate(self): - """ - - Get the current state of the class, here we return everything that is - needed to recompute the model. - - """ - - return []#Model._getstate(self) + [self.X, -# self.num_data, -# self.input_dim, -# self.kern, -# self.likelihood, -# self.output_dim, -# ] - - def _setstate(self, state): - return - self.output_dim = state.pop() - self.likelihood = state.pop() - self.kern = state.pop() - self.input_dim = state.pop() - self.num_data = state.pop() - self.X = state.pop() - Model._setstate(self, state) diff --git a/GPy/core/model.py b/GPy/core/model.py index e04993cb..a39eceda 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -27,33 +27,6 @@ class Model(Parameterized): def _log_likelihood_gradients(self): return self.gradient - def _getstate(self): - """ - Get the current state of the class. - Inherited from Parameterized, so add those parameters to the state - - :return: list of states from the model. - - """ - return Parameterized._getstate(self) + \ - [self.priors, self.optimization_runs, - self.sampling_runs, self.preferred_optimizer] - - def _setstate(self, state): - """ - set state from previous call to _getstate - call Parameterized with the rest of the state - - :param state: the state of the model. - :type state: list as returned from _getstate. - - """ - self.preferred_optimizer = state.pop() - self.sampling_runs = state.pop() - self.optimization_runs = state.pop() - self.priors = state.pop() - Parameterized._setstate(self, state) - def optimize_restarts(self, num_restarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs): """ Perform random restarts of the model, and set the model to the best @@ -318,7 +291,10 @@ class Model(Parameterized): denominator = (2 * np.dot(dx, gradient)) global_ratio = (f1 - f2) / np.where(denominator==0., 1e-32, denominator) - return np.abs(1. - global_ratio) < tolerance or np.abs(f1-f2).sum() + np.abs((2 * np.dot(dx, gradient))).sum() < tolerance + global_diff = np.abs(f1 - f2) < tolerance and np.allclose(gradient, 0, atol=tolerance) + if global_ratio is np.nan: + global_ratio = 0 + return np.abs(1. - global_ratio) < tolerance or np.abs(f1-f2).sum() + np.abs((2 * np.dot(dx, gradient))).sum() < tolerance or global_diff else: # check the gradient of each parameter individually, and do some pretty printing try: diff --git a/GPy/core/parameterization/array_core.py b/GPy/core/parameterization/array_core.py index ab8214f2..fc9d6cf2 100644 --- a/GPy/core/parameterization/array_core.py +++ b/GPy/core/parameterization/array_core.py @@ -1,12 +1,12 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -__updated__ = '2014-03-24' +__updated__ = '2014-03-31' import numpy as np -from parameter_core import Observable +from parameter_core import Observable, Pickleable -class ObsAr(np.ndarray, Observable): +class ObsAr(np.ndarray, Pickleable, Observable): """ An ndarray which reports changes to its observers. The observers can add themselves with a callable, which @@ -30,13 +30,25 @@ class ObsAr(np.ndarray, Observable): def __array_wrap__(self, out_arr, context=None): return out_arr.view(np.ndarray) + def copy(self): + memo = {} + memo[id(self)] = self + return self.__deepcopy__(memo) + + def __deepcopy__(self, memo): + s = self.__new__(self.__class__, input_array=self.view(np.ndarray).copy()) + memo[id(self)] = s + import copy + s.__dict__.update(copy.deepcopy(self.__dict__, memo)) + return s + def __reduce__(self): - func, args, state = np.ndarray.__reduce__(self) - return func, args, (state, Observable._getstate(self)) + func, args, state = super(ObsAr, self).__reduce__() + return func, args, (state, Pickleable.__getstate__(self)) def __setstate__(self, state): np.ndarray.__setstate__(self, state[0]) - Observable._setstate(self, state[1]) + Pickleable.__setstate__(self, state[1]) def __setitem__(self, s, val): super(ObsAr, self).__setitem__(s, val) @@ -48,12 +60,6 @@ class ObsAr(np.ndarray, Observable): def __setslice__(self, start, stop, val): return self.__setitem__(slice(start, stop), val) - def __copy__(self, *args): - return ObsAr(self.view(np.ndarray).copy()) - - def copy(self, *args): - return self.__copy__(*args) - def __ilshift__(self, *args, **kwargs): r = np.ndarray.__ilshift__(self, *args, **kwargs) self.notify_observers() @@ -128,77 +134,4 @@ class ObsAr(np.ndarray, Observable): def __imul__(self, *args, **kwargs): r = np.ndarray.__imul__(self, *args, **kwargs) self.notify_observers() - return r - - -# def __rrshift__(self, *args, **kwargs): -# r = np.ndarray.__rrshift__(self, *args, **kwargs) -# self.notify_observers() -# return r - - -# def __ror__(self, *args, **kwargs): -# r = np.ndarray.__ror__(self, *args, **kwargs) -# self.notify_observers() -# return r - - -# def __rxor__(self, *args, **kwargs): -# r = np.ndarray.__rxor__(self, *args, **kwargs) -# self.notify_observers() -# return r - - - -# def __rdivmod__(self, *args, **kwargs): -# r = np.ndarray.__rdivmod__(self, *args, **kwargs) -# self.notify_observers() -# return r - - -# def __radd__(self, *args, **kwargs): -# r = np.ndarray.__radd__(self, *args, **kwargs) -# self.notify_observers() -# return r - - -# def __rdiv__(self, *args, **kwargs): -# r = np.ndarray.__rdiv__(self, *args, **kwargs) -# self.notify_observers() -# return r - - -# def __rtruediv__(self, *args, **kwargs): -# r = np.ndarray.__rtruediv__(self, *args, **kwargs) -# self.notify_observers() -# return r - - -# def __rshift__(self, *args, **kwargs): -# r = np.ndarray.__rshift__(self, *args, **kwargs) -# self.notify_observers() -# return r - - -# def __rmul__(self, *args, **kwargs): -# r = np.ndarray.__rmul__(self, *args, **kwargs) -# self.notify_observers() -# return r - - -# def __rpow__(self, *args, **kwargs): -# r = np.ndarray.__rpow__(self, *args, **kwargs) -# self.notify_observers() -# return r - - -# def __rsub__(self, *args, **kwargs): -# r = np.ndarray.__rsub__(self, *args, **kwargs) -# self.notify_observers() -# return r - -# def __rfloordiv__(self, *args, **kwargs): -# r = np.ndarray.__rfloordiv__(self, *args, **kwargs) -# self.notify_observers() -# return r - + return r \ No newline at end of file diff --git a/GPy/core/parameterization/index_operations.py b/GPy/core/parameterization/index_operations.py index e2a041f7..ebfe2904 100644 --- a/GPy/core/parameterization/index_operations.py +++ b/GPy/core/parameterization/index_operations.py @@ -24,12 +24,6 @@ class ParameterIndexOperations(object): for t, i in constraints.iteritems(): self.add(t, i) - def __getstate__(self): - return self._properties - - def __setstate__(self, state): - self._properties = state - def iteritems(self): return self._properties.iteritems() @@ -92,8 +86,10 @@ class ParameterIndexOperations(object): for i, v in parameter_index_view.iteritems(): self.add(i, v+offset) - def copy(self): + return self.__deepcopy__(None) + + def __deepcopy__(self, memo): return ParameterIndexOperations(dict(self.iteritems())) def __getitem__(self, prop): @@ -203,6 +199,9 @@ class ParameterIndexOperationsView(object): def copy(self): + return self.__deepcopy__(None) + + def __deepcopy__(self, memo): return ParameterIndexOperations(dict(self.iteritems())) pass diff --git a/GPy/core/parameterization/lists_and_dicts.py b/GPy/core/parameterization/lists_and_dicts.py index 31235952..6902c249 100644 --- a/GPy/core/parameterization/lists_and_dicts.py +++ b/GPy/core/parameterization/lists_and_dicts.py @@ -36,3 +36,76 @@ class ArrayList(list): index += 1 raise ValueError, "{} is not in list".format(item) pass + +class ObservablesList(object): + def __init__(self): + self._poc = [] + + def remove(self, value): + return self._poc.remove(value) + + + def __delitem__(self, ind): + return self._poc.__delitem__(ind) + + + def __setitem__(self, ind, item): + return self._poc.__setitem__(ind, item) + + + def __getitem__(self, ind): + return self._poc.__getitem__(ind) + + + def __repr__(self): + return self._poc.__repr__() + + + def append(self, obj): + return self._poc.append(obj) + + + def index(self, value): + return self._poc.index(value) + + + def extend(self, iterable): + return self._poc.extend(iterable) + + + def __str__(self): + return self._poc.__str__() + + + def __iter__(self): + return self._poc.__iter__() + + + def insert(self, index, obj): + return self._poc.insert(index, obj) + + + def __len__(self): + return self._poc.__len__() + + def __deepcopy__(self, memo): + s = ObservablesList() + import copy + s._poc = copy.deepcopy(self._poc, memo) + return s + + def __getstate__(self): + from ...util.caching import Cacher + obs = [] + for p, o, c in self: + if (getattr(o, c.__name__, None) is not None + and not isinstance(o, Cacher)): + obs.append((p,o,c.__name__)) + return obs + + def __setstate__(self, state): + self._poc = [] + for p, o, c in state: + self._poc.append((p,o,getattr(o, c))) + + pass diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index de16a1a0..f89b09df 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -43,14 +43,13 @@ class Param(OptimizationHandlable, ObsAr): _fixes_ = None _parameters_ = [] def __new__(cls, name, input_array, default_constraint=None): - obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array, name=name, default_constraint=default_constraint)) + obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array)) cls.__name__ = "Param" obj._current_slice_ = (slice(obj.shape[0]),) obj._realshape_ = obj.shape obj._realsize_ = obj.size obj._realndim_ = obj.ndim obj._original_ = True - obj._gradient_array_ = numpy.zeros(obj.shape, dtype=numpy.float64) return obj def __init__(self, name, input_array, default_constraint=None, *a, **kw): @@ -87,74 +86,30 @@ class Param(OptimizationHandlable, ObsAr): self.priors = getattr(obj, 'priors', None) @property - def _param_array_(self): + def param_array(self): return self + @property + def current_slice(self): + if self._current_slice_ is None: + return slice(0, self.shape[0], 1) + return self._current_slice_ + @property def gradient(self): + """ + Return a view on the gradient, which is in the same shape as this parameter is. + Note: this is not the real gradient array, it is just a view on it. + + To work on the real gradient array use: self.full_gradient + """ + if getattr(self, '_gradient_array_', None) is None: + self._gradient_array_ = numpy.empty(self._realshape_, dtype=numpy.float64) return self._gradient_array_[self._current_slice_] @gradient.setter def gradient(self, val): - self.gradient[:] = val - - #=========================================================================== - # Pickling operations - #=========================================================================== - def __reduce__(self): - func, args, state = super(Param, self).__reduce__() - return func, args, (state, - (self._name, - self._parent_, - self._parent_index_, - self._default_constraint_, - self._current_slice_, - self._realshape_, - self._realsize_, - self._realndim_, - self.constraints, - self.priors - ) - ) - - def __setstate__(self, state): - super(Param, self).__setstate__(state[0]) - state = list(state[1]) - self.priors = state.pop() - self.constraints = state.pop() - self._realndim_ = state.pop() - self._realsize_ = state.pop() - self._realshape_ = state.pop() - self._current_slice_ = state.pop() - self._default_constraint_ = state.pop() - self._parent_index_ = state.pop() - self._parent_ = state.pop() - self._name = state.pop() - - def copy(self, *args): - constr = self.constraints.copy() - priors = self.priors.copy() - p = Param(self.name, self.view(numpy.ndarray).copy(), self._default_constraint_) - p.constraints = constr - p.priors = priors - return p - #=========================================================================== - # get/set parameters - #=========================================================================== -# def _set_params(self, param, trigger_parent=True): -# self.flat = param -# if trigger_parent: min_priority = None -# else: min_priority = -numpy.inf -# self.notify_observers(None, min_priority) -# -# def _get_params(self): -# return self.flat -# -# def _collect_gradient(self, target): -# target += self.gradient.flat -# -# def _set_gradient(self, g): -# self.gradient = g.reshape(self._realshape_) + self._gradient_array_[self._current_slice_] = val #=========================================================================== # Array operations -> done @@ -172,24 +127,6 @@ class Param(OptimizationHandlable, ObsAr): def __setitem__(self, s, val): super(Param, self).__setitem__(s, val) - #=========================================================================== - # Index Operations: - #=========================================================================== - #def _internal_offset(self): - # internal_offset = 0 - # extended_realshape = numpy.cumprod((1,) + self._realshape_[:0:-1])[::-1] - # for i, si in enumerate(self._current_slice_[:self._realndim_]): - # if numpy.all(si == Ellipsis): - # continue - # if isinstance(si, slice): - # a = si.indices(self._realshape_[i])[0] - # elif isinstance(si, (list,numpy.ndarray,tuple)): - # a = si[0] - # else: a = si - # if a < 0: - # a = self._realshape_[i] + a - # internal_offset += a * extended_realshape[i] - # return internal_offset def _raveled_index(self, slice_index=None): # return an index array on the raveled array, which is formed by the current_slice @@ -235,13 +172,21 @@ class Param(OptimizationHandlable, ObsAr): def is_fixed(self): from transformations import __fixed__ return self.constraints[__fixed__].size == self.size - #def round(self, decimals=0, out=None): - # view = super(Param, self).round(decimals, out).view(Param) - # view.__array_finalize__(self) - # return view - #round.__doc__ = numpy.round.__doc__ + def _get_original(self, param): return self + + #=========================================================================== + # Pickling and copying + #=========================================================================== + def __deepcopy__(self, memo): + s = self.__new__(self.__class__, name=self.name, input_array=self.view(numpy.ndarray).copy()) + memo[id(self)] = s + import copy + s.__dict__.update(copy.deepcopy(self.__dict__, memo)) + return s + + #=========================================================================== # Printing -> done #=========================================================================== @@ -250,7 +195,8 @@ class Param(OptimizationHandlable, ObsAr): if self.size <= 1: return [str(self.view(numpy.ndarray)[0])] else: return [str(self.shape)] - def parameter_names(self, add_self=False, adjust_for_printing=False): + def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True): + # this is just overwrighting the parameterized calls to parameter names, in order to maintain OOP if adjust_for_printing: return [adjust_name_for_printing(self.name)] return [self.name] @@ -261,6 +207,9 @@ class Param(OptimizationHandlable, ObsAr): def parameter_shapes(self): return [self.shape] @property + def num_params(self): + return 0 + @property def _constraints_str(self): return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.constraints.iteritems()))] @property @@ -368,7 +317,7 @@ class ParamConcatenation(object): #=========================================================================== def __getitem__(self, s): ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True; - params = [p._param_array_[ind[ps]] for p,ps in zip(self.params, self._param_slices_) if numpy.any(p._param_array_[ind[ps]])] + params = [p.param_array[ind[ps]] for p,ps in zip(self.params, self._param_slices_) if numpy.any(p.param_array[ind[ps]])] if len(params)==1: return params[0] return ParamConcatenation(params) def __setitem__(self, s, val, update=True): @@ -381,7 +330,7 @@ class ParamConcatenation(object): if update: self.update_all_params() def values(self): - return numpy.hstack([p._param_array_ for p in self.params]) + return numpy.hstack([p.param_array for p in self.params]) #=========================================================================== # parameter operations: #=========================================================================== diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index b804a61a..a60b8b38 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -13,10 +13,10 @@ Observable Pattern for patameterization """ -from transformations import Transformation, Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED +from transformations import Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED import numpy as np -__updated__ = '2014-03-24' +__updated__ = '2014-03-31' class HierarchyError(Exception): """ @@ -31,71 +31,8 @@ def adjust_name_for_printing(name): return name.replace(" ", "_").replace(".", "_").replace("-", "_m_").replace("+", "_p_").replace("!", "_I_").replace("**", "_xx_").replace("*", "_x_").replace("/", "_l_").replace("@", '_at_') return '' -class InterfacePickleFunctions(object): - def __init__(self, *a, **kw): - super(InterfacePickleFunctions, self).__init__() - def _getstate(self): - """ - Returns the state of this class in a memento pattern. - The state must be a list-like structure of all the fields - this class needs to run. - - See python doc "pickling" (`__getstate__` and `__setstate__`) for details. - """ - raise NotImplementedError, "To be able to use pickling you need to implement this method" - def _setstate(self, state): - """ - Set the state (memento pattern) of this class to the given state. - Usually this is just the counterpart to _getstate, such that - an object is a copy of another when calling - - copy = .__new__(*args,**kw)._setstate(._getstate()) - - See python doc "pickling" (`__getstate__` and `__setstate__`) for details. - """ - raise NotImplementedError, "To be able to use pickling you need to implement this method" - -class Pickleable(InterfacePickleFunctions): - """ - Make an object pickleable (See python doc 'pickling'). - - This class allows for pickling support by Memento pattern. - _getstate returns a memento of the class, which gets pickled. - _setstate() (re-)sets the state of the class to the memento - """ - def __init__(self, *a, **kw): - super(Pickleable, self).__init__() - #=========================================================================== - # Pickling operations - #=========================================================================== - def pickle(self, f, protocol=-1): - """ - :param f: either filename or open file object to write to. - if it is an open buffer, you have to make sure to close - it properly. - :param protocol: pickling protocol to use, python-pickle for details. - """ - import cPickle - if isinstance(f, str): - with open(f, 'w') as f: - cPickle.dump(self, f, protocol) - else: - cPickle.dump(self, f, protocol) - def __getstate__(self): - if self._has_get_set_state(): - return self._getstate() - return self.__dict__ - def __setstate__(self, state): - if self._has_get_set_state(): - self._setstate(state) - # TODO: maybe parameters_changed() here? - return - self.__dict__ = state - def _has_get_set_state(self): - return '_getstate' in vars(self.__class__) and '_setstate' in vars(self.__class__) - -class Observable(Pickleable): +class Observable(object): """ Observable pattern for parameterization. @@ -105,8 +42,9 @@ class Observable(Pickleable): """ _updated = True def __init__(self, *args, **kwargs): - super(Observable, self).__init__(*args, **kwargs) - self._observer_callables_ = [] + super(Observable, self).__init__() + from lists_and_dicts import ObservablesList + self._observer_callables_ = ObservablesList() def add_observer(self, observer, callble, priority=0): self._insert_sorted(priority, observer, callble) @@ -151,17 +89,11 @@ class Observable(Pickleable): ins += 1 self._observer_callables_.insert(ins, (p, o, c)) - def _getstate(self): - return [self._observer_callables_] - - def _setstate(self, state): - self._observer_callables_ = state.pop() - #=============================================================================== # Foundation framework for parameterized and param objects: #=============================================================================== -class Parentable(Observable): +class Parentable(object): """ Enable an Object to have a parent. @@ -171,7 +103,7 @@ class Parentable(Observable): _parent_ = None _parent_index_ = None def __init__(self, *args, **kwargs): - super(Parentable, self).__init__(*args, **kwargs) + super(Parentable, self).__init__() def has_parent(self): """ @@ -207,7 +139,84 @@ class Parentable(Observable): """ pass -class Gradcheckable(Parentable): +class Pickleable(object): + """ + Make an object pickleable (See python doc 'pickling'). + + This class allows for pickling support by Memento pattern. + _getstate returns a memento of the class, which gets pickled. + _setstate() (re-)sets the state of the class to the memento + """ + def __init__(self, *a, **kw): + super(Pickleable, self).__init__() + #=========================================================================== + # Pickling operations + #=========================================================================== + def pickle(self, f, protocol=-1): + """ + :param f: either filename or open file object to write to. + if it is an open buffer, you have to make sure to close + it properly. + :param protocol: pickling protocol to use, python-pickle for details. + """ + import cPickle as pickle + import pickle #TODO: cPickle + if isinstance(f, str): + with open(f, 'w') as f: + pickle.dump(self, f, protocol) + else: + pickle.dump(self, f, protocol) + + #=========================================================================== + # copy and pickling + #=========================================================================== + def copy(self): + """Returns a (deep) copy of the current model""" + #raise NotImplementedError, "Copy is not yet implemented, TODO: Observable hierarchy" + import copy + memo = {} + memo[id(self._parent_)] = None + memo[id(self._parent_index_)] = None + memo[id(self.gradient)] = None + memo[id(self.param_array)] = None + memo[id(self._fixes_)] = None + c = copy.deepcopy(self, memo) + return c + + def __deepcopy__(self, memo): + s = self.__new__(self.__class__) + memo[id(self)] = s + import copy + s.__dict__.update(copy.deepcopy(self.__dict__, memo)) + return s + + def __getstate__(self): + ignore_list = ([#'_parent_', '_parent_index_', + #'_observer_callables_', + '_param_array_', '_gradient_array_', '_fixes_', + '_Cacher_wrap__cachers'] + #+ self.parameter_names(recursive=False) + ) + dc = dict() + for k,v in self.__dict__.iteritems(): + if k not in ignore_list: + #if hasattr(v, "__getstate__"): + #dc[k] = v.__getstate__() + #else: + dc[k] = v + return dc + + def __setstate__(self, state): + self.__dict__.update(state) + return self + + #def __getstate__(self, memo): + # raise NotImplementedError, "get state must be implemented to be able to pickle objects" + + #def __setstate__(self, memo): + # raise NotImplementedError, "set state must be implemented to be able to pickle objects" + +class Gradcheckable(Pickleable, Parentable): """ Adds the functionality for an object to be gradcheckable. It is just a thin wrapper of a call to the highest parent for now. @@ -312,7 +321,7 @@ class Indexable(object): raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?" -class Constrainable(Nameable, Indexable): +class Constrainable(Nameable, Indexable, Observable): """ Make an object constrainable with Priors and Transformations. TODO: Mappings!! @@ -429,14 +438,14 @@ class Constrainable(Nameable, Indexable): def log_prior(self): """evaluate the prior""" if self.priors.size > 0: - x = self._param_array_ + x = self.param_array return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()), 0) return 0. def _log_prior_gradients(self): """evaluate the gradients of the priors""" if self.priors.size > 0: - x = self._param_array_ + x = self.param_array ret = np.zeros(x.size) [np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()] return ret @@ -455,7 +464,7 @@ class Constrainable(Nameable, Indexable): Constrain the parameter to the given :py:class:`GPy.core.transformations.Transformation`. """ - self._param_array_[:] = transform.initialize(self._param_array_) + self.param_array[:] = transform.initialize(self.param_array) reconstrained = self.unconstrain() self._add_to_index_operations(self.constraints, reconstrained, transform, warning) self.notify_observers(self, None if trigger_parent else -np.inf) @@ -565,14 +574,14 @@ class OptimizationHandlable(Constrainable): super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw) def transform(self): - [np.put(self._param_array_, ind, c.finv(self._param_array_.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] + [np.put(self.param_array, ind, c.finv(self.param_array.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] def untransform(self): - [np.put(self._param_array_, ind, c.f(self._param_array_.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] + [np.put(self.param_array, ind, c.f(self.param_array.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] def _get_params_transformed(self): # transformed parameters (apply transformation rules) - p = self._param_array_.copy() + p = self.param_array.copy() [np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] if self.has_parent() and self.constraints[__fixed__].size != 0: fixes = np.ones(self.size).astype(bool) @@ -583,14 +592,14 @@ class OptimizationHandlable(Constrainable): return p def _set_params_transformed(self, p): - if p is self._param_array_: + if p is self.param_array: p = p.copy() if self.has_parent() and self.constraints[__fixed__].size != 0: fixes = np.ones(self.size).astype(bool) fixes[self.constraints[__fixed__]] = FIXED - self._param_array_.flat[fixes] = p - elif self._has_fixes(): self._param_array_.flat[self._fixes_] = p - else: self._param_array_.flat = p + self.param_array.flat[fixes] = p + elif self._has_fixes(): self.param_array.flat[self._fixes_] = p + else: self.param_array.flat = p self.untransform() self._trigger_params_changed() @@ -600,36 +609,29 @@ class OptimizationHandlable(Constrainable): def _size_transformed(self): return self.size - self.constraints[__fixed__].size -# -# def _untransform_params(self, p): -# # inverse apply transformations for parameters -# #p = p.copy() -# if self._has_fixes(): tmp = self._get_params(); tmp[self._fixes_] = p; p = tmp; del tmp -# [np.put(p, ind, c.f(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] -# return p -# -# def _get_params(self): -# """ -# get all parameters -# """ -# return self._param_array_ -# p = np.empty(self.size, dtype=np.float64) -# if self.size == 0: -# return p -# [np.put(p, ind, par._get_params()) for ind, par in itertools.izip(self._param)] -# return p -# def _set_params(self, params, trigger_parent=True): -# self._param_array_.flat = params -# if trigger_parent: min_priority = None -# else: min_priority = -np.inf -# self.notify_observers(None, min_priority) - # don't overwrite this anymore! - # raise NotImplementedError, "Abstract superclass: This needs to be implemented in Param and Parameterizable" + @property + def num_params(self): + """ + Return the number of parameters of this parameter_handle. + Param objects will allways return 0. + """ + raise NotImplemented, "Abstract, please implement in respective classes" - #=========================================================================== - # Optimization handles: - #=========================================================================== + def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True): + """ + Get the names of all parameters of this model. + + :param bool add_self: whether to add the own name in front of names + :param bool adjust_for_printing: whether to call `adjust_name_for_printing` on names + :param bool recursive: whether to traverse through hierarchy and append leaf node names + """ + if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x) + else: adjust = lambda x: x + if recursive: names = [xi for x in self._parameters_ for xi in x.parameter_names(add_self=True, adjust_for_printing=adjust_for_printing)] + else: names = [adjust(x.name) for x in self._parameters_] + if add_self: names = map(lambda x: adjust(self.name) + "." + x, names) + return names def _get_param_names(self): n = np.array([p.hierarchy_name() + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()]) return n @@ -663,16 +665,30 @@ class OptimizationHandlable(Constrainable): # For shared memory arrays. This does nothing in Param, but sets the memory # for all parameterized objects #=========================================================================== + @property + def full_gradient(self): + """ + Note to users: + This does not return the gradient in the right shape! Use self.gradient + for the right gradient array. + + To work on the gradient array, use this as the gradient handle. + This method exists for in memory use of parameters. + When trying to access the true gradient array, use this. + """ + self.gradient # <<< ensure _gradient_array_ + return self._gradient_array_ + def _propagate_param_grad(self, parray, garray): pi_old_size = 0 for pi in self._parameters_: pislice = slice(pi_old_size, pi_old_size + pi.size) - self._param_array_[pislice] = pi._param_array_.flat # , requirements=['C', 'W']).flat - self._gradient_array_[pislice] = pi._gradient_array_.flat # , requirements=['C', 'W']).flat + self.param_array[pislice] = pi.param_array.flat # , requirements=['C', 'W']).flat + self.full_gradient[pislice] = pi.full_gradient.flat # , requirements=['C', 'W']).flat - pi._param_array_.data = parray[pislice].data - pi._gradient_array_.data = garray[pislice].data + pi.param_array.data = parray[pislice].data + pi.full_gradient.data = garray[pislice].data pi._propagate_param_grad(parray[pislice], garray[pislice]) pi_old_size += pi.size @@ -681,26 +697,32 @@ class Parameterizable(OptimizationHandlable): def __init__(self, *args, **kwargs): super(Parameterizable, self).__init__(*args, **kwargs) from GPy.core.parameterization.lists_and_dicts import ArrayList - _parameters_ = ArrayList() + self._parameters_ = ArrayList() self.size = 0 - self._param_array_ = np.empty(self.size, dtype=np.float64) - self._gradient_array_ = np.empty(self.size, dtype=np.float64) self._added_names_ = set() - def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True): - """ - Get the names of all parameters of this model. + @property + def param_array(self): + if not hasattr(self, '_param_array_'): + self._param_array_ = np.empty(self.size, dtype=np.float64) + return self._param_array_ - :param bool add_self: whether to add the own name in front of names - :param bool adjust_for_printing: whether to call `adjust_name_for_printing` on names - :param bool recursive: whether to traverse through hierarchy and append leaf node names - """ - if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x) - else: adjust = lambda x: x - if recursive: names = [xi for x in self._parameters_ for xi in x.parameter_names(add_self=True, adjust_for_printing=adjust_for_printing)] - else: names = [adjust(x.name) for x in self._parameters_] - if add_self: names = map(lambda x: adjust(self.name) + "." + x, names) - return names + @param_array.setter + def param_array(self, arr): + self._param_array_ = arr + + #========================================================================= + # Gradient handling + #========================================================================= + @property + def gradient(self): + if not hasattr(self, '_gradient_array_'): + self._gradient_array_ = np.empty(self.size, dtype=np.float64) + return self._gradient_array_ + + @gradient.setter + def gradient(self, val): + self._gradient_array_[:] = val @property def num_params(self): @@ -737,34 +759,6 @@ class Parameterizable(OptimizationHandlable): self._remove_parameter_name(None, old_name) self._add_parameter_name(param) - #========================================================================= - # Gradient handling - #========================================================================= - @property - def gradient(self): - return self._gradient_array_ - - @gradient.setter - def gradient(self, val): - self._gradient_array_[:] = val - #=========================================================================== - # def _collect_gradient(self, target): - # [p._collect_gradient(target[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)] - #=========================================================================== - - #=========================================================================== - # def _set_params(self, params, trigger_parent=True): - # [p._set_params(params[s], trigger_parent=False) for p, s in itertools.izip(self._parameters_, self._param_slices_)] - # if trigger_parent: min_priority = None - # else: min_priority = -np.inf - # self.notify_observers(None, min_priority) - #=========================================================================== - - #=========================================================================== - # def _set_gradient(self, g): - # [p._set_gradient(g[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)] - #=========================================================================== - def add_parameter(self, param, index=None, _ignore_added_names=False): """ :param parameters: the parameters to add @@ -864,7 +858,7 @@ class Parameterizable(OptimizationHandlable): # no parameters for this class return old_size = 0 - self._param_array_ = np.empty(self.size, dtype=np.float64) + self.param_array = np.empty(self.size, dtype=np.float64) self._gradient_array_ = np.empty(self.size, dtype=np.float64) self._param_slices_ = [] @@ -874,15 +868,16 @@ class Parameterizable(OptimizationHandlable): pslice = slice(old_size, old_size + p.size) # first connect all children - p._propagate_param_grad(self._param_array_[pslice], self._gradient_array_[pslice]) + p._propagate_param_grad(self.param_array[pslice], self.full_gradient[pslice]) # then connect children to self - self._param_array_[pslice] = p._param_array_.flat # , requirements=['C', 'W']).ravel(order='C') - self._gradient_array_[pslice] = p._gradient_array_.flat # , requirements=['C', 'W']).ravel(order='C') + self.param_array[pslice] = p.param_array.flat # , requirements=['C', 'W']).ravel(order='C') + self.full_gradient[pslice] = p.full_gradient.flat # , requirements=['C', 'W']).ravel(order='C') - if not p._param_array_.flags['C_CONTIGUOUS']: + if not p.param_array.flags['C_CONTIGUOUS']: + raise ValueError, "This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS" import ipdb;ipdb.set_trace() - p._param_array_.data = self._param_array_[pslice].data - p._gradient_array_.data = self._gradient_array_[pslice].data + p.param_array.data = self.param_array[pslice].data + p.full_gradient.data = self.full_gradient[pslice].data self._param_slices_.append(pslice) @@ -898,46 +893,22 @@ class Parameterizable(OptimizationHandlable): self.notify_observers(which=which) #=========================================================================== - # TODO: not working yet + # Pickling #=========================================================================== + def __setstate__(self, state): + super(Parameterizable, self).__setstate__(state) + self._connect_parameters() + self._connect_fixes() + self._notify_parent_change() + + self.parameters_changed() + def copy(self): - """Returns a (deep) copy of the current model""" - #raise NotImplementedError, "Copy is not yet implemented, TODO: Observable hierarchy" - import copy - from .index_operations import ParameterIndexOperations, ParameterIndexOperationsView - from .lists_and_dicts import ArrayList - - param_mapping = [[] for _ in range(self.num_params)] - - dc = dict() - for k, v in self.__dict__.iteritems(): - if k not in ['_parent_', '_parameters_', '_parent_index_', '_observer_callables_'] + self.parameter_names(recursive=False): - if v in self._parameters_: - param_mapping[self._parameters_.index(v)] += [k] - elif isinstance(v, (Constrainable, ParameterIndexOperations, ParameterIndexOperationsView)): - dc[k] = v.copy() - else: - dc[k] = copy.deepcopy(v) - if k == '_parameters_': - params = [p.copy() for p in v] - - dc['_parent_'] = None - dc['_parent_index_'] = None - dc['_observer_callables_'] = [] - dc['_parameters_'] = ArrayList() - dc['constraints'].clear() - dc['priors'].clear() - dc['size'] = 0 - - s = self.__new__(self.__class__) - s.__dict__ = dc - - for p, mlist in zip(params, param_mapping): - s.add_parameter(p, _ignore_added_names=True) - for m in mlist: - setattr(s, m, p) - return s - + c = super(Parameterizable, self).copy() + c._connect_parameters() + c._connect_fixes() + c._notify_parent_change() + return c #=========================================================================== # From being parentable, we have to define the parent_change notification #=========================================================================== diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py index 529d3733..0760f8c6 100644 --- a/GPy/core/parameterization/parameterized.py +++ b/GPy/core/parameterization/parameterized.py @@ -101,34 +101,13 @@ class Parameterized(Parameterizable, Pickleable): return G return node - def _getstate(self): - """ - Get the current state of the class, - here just all the indices, rest can get recomputed - For inheriting from Parameterized: - - Allways append the state of the inherited object - and call down to the inherited object in _setstate!! - """ - return [] - - def _setstate(self, state): - self.parameters_changed() - #=========================================================================== - # Override copy to handle programmatically added observers - #=========================================================================== - def copy(self): - c = super(Parameterized, self).copy() - c.add_observer(c, c._parameters_changed_notification, -100) - return c - #=========================================================================== # Gradient control #=========================================================================== def _transform_gradients(self, g): if self.has_parent(): return g - [numpy.put(g, i, g[i] * c.gradfactor(self._param_array_[i])) for c, i in self.constraints.iteritems() if c != __fixed__] + [numpy.put(g, i, g[i] * c.gradfactor(self.param_array[i])) for c, i in self.constraints.iteritems() if c != __fixed__] if self._has_fixes(): return g[self._fixes_] return g @@ -160,7 +139,7 @@ class Parameterized(Parameterizable, Pickleable): this is not in the global view of things! """ return numpy.r_[:self.size] - + #=========================================================================== # Convenience for fixed, tied checking of param: #=========================================================================== @@ -175,7 +154,7 @@ class Parameterized(Parameterizable, Pickleable): # you can retrieve the original param through this method, by passing # the copy here return self._parameters_[param._parent_index_] - + #=========================================================================== # Get/set parameters: #=========================================================================== @@ -192,7 +171,7 @@ class Parameterized(Parameterizable, Pickleable): def __getitem__(self, name, paramlist=None): if isinstance(name, (int, slice, tuple, np.ndarray)): - return self._param_array_[name] + return self.param_array[name] else: if paramlist is None: paramlist = self.grep_param_names(name) @@ -208,7 +187,7 @@ class Parameterized(Parameterizable, Pickleable): def __setitem__(self, name, value, paramlist=None): if isinstance(name, (slice, tuple, np.ndarray)): try: - self._param_array_[name] = value + self.param_array[name] = value except: raise ValueError, "Setting by slice or index only allowed with array-like" self._trigger_params_changed() diff --git a/GPy/core/parameterization/variational.py b/GPy/core/parameterization/variational.py index ce39e2c9..f8fd165f 100644 --- a/GPy/core/parameterization/variational.py +++ b/GPy/core/parameterization/variational.py @@ -61,7 +61,7 @@ class SpikeAndSlabPrior(VariationalPrior): self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum(axis=0) class VariationalPosterior(Parameterized): - def __init__(self, means=None, variances=None, name=None, *a, **kw): + def __init__(self, means=None, variances=None, name='latent space', *a, **kw): super(VariationalPosterior, self).__init__(name=name, *a, **kw) self.mean = Param("mean", means) self.variance = Param("variance", variances, Logexp()) @@ -119,6 +119,7 @@ class NormalPosterior(VariationalPosterior): import sys assert "matplotlib" in sys.modules, "matplotlib package has not been imported." from ...plotting.matplot_dep import variational_plots + import matplotlib return variational_plots.plot(self,*args) class SpikeAndSlabPosterior(VariationalPosterior): diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 7bf0ca2a..7552b8ac 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -106,15 +106,3 @@ class SparseGP(GP): return mu, var - def _getstate(self): - """ - Get the current state of the class, - """ - return GP._getstate(self) + [ - self.Z, - self.num_inducing] - - def _setstate(self, state): - self.num_inducing = state.pop() - self.Z = state.pop() - GP._setstate(self, state) diff --git a/GPy/core/svigp.py b/GPy/core/svigp.py index a2c7acee..60e8371c 100644 --- a/GPy/core/svigp.py +++ b/GPy/core/svigp.py @@ -89,57 +89,57 @@ class SVIGP(GP): self._param_steplength_trace = [] self._vb_steplength_trace = [] - def _getstate(self): - steplength_params = [self.hbar_t, self.tau_t, self.gbar_t, self.gbar_t1, self.gbar_t2, self.hbar_tp, self.tau_tp, self.gbar_tp, self.adapt_param_steplength, self.adapt_vb_steplength, self.vb_steplength, self.param_steplength] - return GP._getstate(self) + \ - [self.get_vb_param(), - self.Z, - self.num_inducing, - self.has_uncertain_inputs, - self.X_variance, - self.X_batch, - self.X_variance_batch, - steplength_params, - self.batchcounter, - self.batchsize, - self.epochs, - self.momentum, - self.data_prop, - self._param_trace, - self._param_steplength_trace, - self._vb_steplength_trace, - self._ll_trace, - self._grad_trace, - self.Y, - self._permutation, - self.iterations - ] - - def _setstate(self, state): - self.iterations = state.pop() - self._permutation = state.pop() - self.Y = state.pop() - self._grad_trace = state.pop() - self._ll_trace = state.pop() - self._vb_steplength_trace = state.pop() - self._param_steplength_trace = state.pop() - self._param_trace = state.pop() - self.data_prop = state.pop() - self.momentum = state.pop() - self.epochs = state.pop() - self.batchsize = state.pop() - self.batchcounter = state.pop() - steplength_params = state.pop() - (self.hbar_t, self.tau_t, self.gbar_t, self.gbar_t1, self.gbar_t2, self.hbar_tp, self.tau_tp, self.gbar_tp, self.adapt_param_steplength, self.adapt_vb_steplength, self.vb_steplength, self.param_steplength) = steplength_params - self.X_variance_batch = state.pop() - self.X_batch = state.pop() - self.X_variance = state.pop() - self.has_uncertain_inputs = state.pop() - self.num_inducing = state.pop() - self.Z = state.pop() - vb_param = state.pop() - GP._setstate(self, state) - self.set_vb_param(vb_param) +# def _getstate(self): +# steplength_params = [self.hbar_t, self.tau_t, self.gbar_t, self.gbar_t1, self.gbar_t2, self.hbar_tp, self.tau_tp, self.gbar_tp, self.adapt_param_steplength, self.adapt_vb_steplength, self.vb_steplength, self.param_steplength] +# return GP._getstate(self) + \ +# [self.get_vb_param(), +# self.Z, +# self.num_inducing, +# self.has_uncertain_inputs, +# self.X_variance, +# self.X_batch, +# self.X_variance_batch, +# steplength_params, +# self.batchcounter, +# self.batchsize, +# self.epochs, +# self.momentum, +# self.data_prop, +# self._param_trace, +# self._param_steplength_trace, +# self._vb_steplength_trace, +# self._ll_trace, +# self._grad_trace, +# self.Y, +# self._permutation, +# self.iterations +# ] +# +# def _setstate(self, state): +# self.iterations = state.pop() +# self._permutation = state.pop() +# self.Y = state.pop() +# self._grad_trace = state.pop() +# self._ll_trace = state.pop() +# self._vb_steplength_trace = state.pop() +# self._param_steplength_trace = state.pop() +# self._param_trace = state.pop() +# self.data_prop = state.pop() +# self.momentum = state.pop() +# self.epochs = state.pop() +# self.batchsize = state.pop() +# self.batchcounter = state.pop() +# steplength_params = state.pop() +# (self.hbar_t, self.tau_t, self.gbar_t, self.gbar_t1, self.gbar_t2, self.hbar_tp, self.tau_tp, self.gbar_tp, self.adapt_param_steplength, self.adapt_vb_steplength, self.vb_steplength, self.param_steplength) = steplength_params +# self.X_variance_batch = state.pop() +# self.X_batch = state.pop() +# self.X_variance = state.pop() +# self.has_uncertain_inputs = state.pop() +# self.num_inducing = state.pop() +# self.Z = state.pop() +# vb_param = state.pop() +# GP._setstate(self, state) +# self.set_vb_param(vb_param) def _compute_kernel_matrices(self): # kernel computations, using BGPLVM notation diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 8171a032..07623d6b 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -324,18 +324,15 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1, def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw): from GPy import kern from GPy.models import MRD - from GPy.likelihoods import Gaussian D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5 _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) #Ylist = [Ylist[0]] - k = [kern.Linear(Q, ARD=True) for _ in range(len(Ylist))] - m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing, kernel=k, initx="", initz='permute', **kw) + k = kern.Linear(Q, ARD=True) + m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing, kernel=k, initx="PCA_concat", initz='permute', **kw) - m['.*noise'] = [Y.var()/500. for Y in Ylist] - #for i, Y in enumerate(Ylist): - # m['.*Y_{}.*Gaussian.*noise'.format(i)] = Y.var(1) / 500. + m['.*noise'] = [Y.var()/40. for Y in Ylist] if optimize: print "Optimizing Model:" diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py index 53f12722..0e10a175 100644 --- a/GPy/inference/latent_function_inference/var_dtc.py +++ b/GPy/inference/latent_function_inference/var_dtc.py @@ -23,6 +23,7 @@ class VarDTC(object): def __init__(self, limit=1): #self._YYTfactor_cache = caching.cache() from ...util.caching import Cacher + self.limit = limit self.get_trYYT = Cacher(self._get_trYYT, limit) self.get_YYTfactor = Cacher(self._get_YYTfactor, limit) @@ -33,6 +34,15 @@ class VarDTC(object): def _get_trYYT(self, Y): return param_to_array(np.sum(np.square(Y))) + def __getstate__(self): + return self.limit + + def __setstate__(self, state): + self.limit = state + from ...util.caching import Cacher + self.get_trYYT = Cacher(self._get_trYYT, self.limit) + self.get_YYTfactor = Cacher(self._get_YYTfactor, self.limit) + def _get_YYTfactor(self, Y): """ find a matrix L which satisfies LLT = YYT. @@ -126,7 +136,7 @@ class VarDTC(object): delit += output_dim * np.eye(num_inducing) # Compute dL_dKmm dL_dKmm = backsub_both_sides(Lm, delit) - + # derivatives of L w.r.t. psi dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index 488745c5..de99bddb 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -15,7 +15,6 @@ class Kern(Parameterized): # found in kernel_slice_operations __metaclass__ = KernCallsViaSlicerMeta #=========================================================================== - _debug=False def __init__(self, input_dim, active_dims, name, *a, **kw): """ The base class for a kernel: a positive definite function @@ -175,22 +174,6 @@ class Kern(Parameterized): #else: kernels.append(other) return Prod([self, other], name) - def _getstate(self): - """ - Get the current state of the class, - here just all the indices, rest can get recomputed - """ - return super(Kern, self)._getstate() + [ - self.active_dims, - self.input_dim, - self._sliced_X] - - def _setstate(self, state): - self._sliced_X = state.pop() - self.input_dim = state.pop() - self.active_dims = state.pop() - super(Kern, self)._setstate(state) - class CombinationKernel(Kern): """ Abstract super class for combination kernels. @@ -220,7 +203,7 @@ class CombinationKernel(Kern): def get_input_dim_active_dims(self, kernels, extra_dims = None): active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int)) - input_dim = active_dims.max()+1 + (len(extra_dims) if extra_dims is not None else 0) + input_dim = active_dims.max()+1 + (len(np.r_[extra_dims]) if extra_dims is not None else 0) active_dims = slice(0, input_dim, 1) return input_dim, active_dims diff --git a/GPy/kern/_src/kernel_slice_operations.py b/GPy/kern/_src/kernel_slice_operations.py index ea5d2b0a..a4bb8f62 100644 --- a/GPy/kern/_src/kernel_slice_operations.py +++ b/GPy/kern/_src/kernel_slice_operations.py @@ -36,7 +36,7 @@ class _Slice_wrap(object): if self.k._sliced_X == 0: assert X.shape[1] > max(np.r_[self.k.active_dims]), "At least {} dimensional X needed".format(max(np.r_[self.k.active_dims])) self.X = self.k._slice_X(X) - self.X2 = self.k._slice_X(X2) if X2 is not None else None + self.X2 = self.k._slice_X(X2) if X2 is not None else X2 self.ret = True else: self.X = X diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index ef3462f6..d623c8f1 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -2,7 +2,6 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as np -from gplvm import GPLVM from .. import kern from ..core import SparseGP from ..likelihoods import Gaussian @@ -61,18 +60,6 @@ class BayesianGPLVM(SparseGP): SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs) self.add_parameter(self.X, index=0) - def _getstate(self): - """ - Get the current state of the class, - here just all the indices, rest can get recomputed - """ - return SparseGP._getstate(self) + [self.init] - - def _setstate(self, state): - self._const_jitter = None - self.init = state.pop() - SparseGP._setstate(self, state) - def set_X_gradients(self, X, X_grad): """Set the gradients of the posterior distribution of X in its specific form.""" X.mean.gradient, X.variance.gradient = X_grad diff --git a/GPy/models/gp_regression.py b/GPy/models/gp_regression.py index 86e64a54..d56e72b9 100644 --- a/GPy/models/gp_regression.py +++ b/GPy/models/gp_regression.py @@ -29,8 +29,3 @@ class GPRegression(GP): super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata) - def _getstate(self): - return GP._getstate(self) - - def _setstate(self, state): - return GP._setstate(self, state) diff --git a/GPy/models/gplvm.py b/GPy/models/gplvm.py index fb7d93e7..542dcd31 100644 --- a/GPy/models/gplvm.py +++ b/GPy/models/gplvm.py @@ -44,12 +44,6 @@ class GPLVM(GP): super(GPLVM, self).parameters_changed() self.X.gradient = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X, None) - def _getstate(self): - return GP._getstate(self) - - def _setstate(self, state): - GP._setstate(self, state) - def jacobian(self,X): target = np.zeros((X.shape[0],X.shape[1],self.output_dim)) for i in range(self.output_dim): diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index 36088e35..458a70a1 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -65,14 +65,17 @@ class MRD(Model): from ..kern import RBF self.kern = [RBF(input_dim, ARD=1, lengthscale=fracs[i], name='rbf'.format(i)) for i in range(len(Ylist))] elif isinstance(kernel, Kern): - self.kern = [kernel.copy(name='{}'.format(kernel.name, i)) for i in range(len(Ylist))] + self.kern = [] + for i in range(len(Ylist)): + k = kernel.copy() + self.kern.append(k) else: assert len(kernel) == len(Ylist), "need one kernel per output" assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!" self.kern = kernel if X_variance is None: - X_variance = np.random.uniform(0, .1, X.shape) + X_variance = np.random.uniform(0.1, 0.2, X.shape) self.variational_prior = NormalPrior() self.X = NormalPosterior(X, X_variance) @@ -108,8 +111,8 @@ class MRD(Model): def parameters_changed(self): self._log_marginal_likelihood = 0 self.posteriors = [] - self.Z.gradient = 0. - self.X.gradient = 0. + self.Z.gradient[:] = 0. + self.X.gradient[:] = 0. for y, k, l, i in itertools.izip(self.Ylist, self.kern, self.likelihood, self.inference_method): posterior, lml, grad_dict = i.inference(k, self.X, self.Z, l, y) @@ -160,6 +163,8 @@ class MRD(Model): X = np.random.randn(Ylist[0].shape[0], self.input_dim) fracs = X.var(0) fracs = [fracs]*self.input_dim + X -= X.mean() + X /= X.std() return X, fracs def _init_Z(self, init="permute", X=None): diff --git a/GPy/models/sparse_gp_classification.py b/GPy/models/sparse_gp_classification.py index 96f7ac5a..e2c77d95 100644 --- a/GPy/models/sparse_gp_classification.py +++ b/GPy/models/sparse_gp_classification.py @@ -46,11 +46,3 @@ class SparseGPClassification(SparseGP): SparseGP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X) self.ensure_default_constraints() - def _getstate(self): - return SparseGP._getstate(self) - - - def _setstate(self, state): - return SparseGP._setstate(self, state) - - pass diff --git a/GPy/models/sparse_gp_regression.py b/GPy/models/sparse_gp_regression.py index 7edb93e4..f4d5513e 100644 --- a/GPy/models/sparse_gp_regression.py +++ b/GPy/models/sparse_gp_regression.py @@ -51,14 +51,6 @@ class SparseGPRegression(SparseGP): SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method=VarDTC()) - def _getstate(self): - return SparseGP._getstate(self) - - def _setstate(self, state): - return SparseGP._setstate(self, state) - - - class SparseGPRegressionUncertainInput(SparseGP): """ Gaussian Process model for regression with Gaussian variance on the inputs (X_variance) diff --git a/GPy/models/sparse_gplvm.py b/GPy/models/sparse_gplvm.py index 5c10d0b8..638da63e 100644 --- a/GPy/models/sparse_gplvm.py +++ b/GPy/models/sparse_gplvm.py @@ -28,14 +28,6 @@ class SparseGPLVM(SparseGPRegression, GPLVM): SparseGPRegression.__init__(self, X, Y, kernel=kernel, num_inducing=num_inducing) self.ensure_default_constraints() - def _getstate(self): - return SparseGPRegression._getstate(self) - - - def _setstate(self, state): - return SparseGPRegression._setstate(self, state) - - def _get_param_names(self): return (sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) + SparseGPRegression._get_param_names(self)) diff --git a/GPy/models/svigp_regression.py b/GPy/models/svigp_regression.py index 3faa1cab..3397e31e 100644 --- a/GPy/models/svigp_regression.py +++ b/GPy/models/svigp_regression.py @@ -43,10 +43,3 @@ class SVIGPRegression(SVIGP): SVIGP.__init__(self, X, likelihood, kernel, Z, q_u=q_u, batchsize=batchsize) self.load_batch() - def _getstate(self): - return GPBase._getstate(self) - - - def _setstate(self, state): - return GPBase._setstate(self, state) - diff --git a/GPy/models/warped_gp.py b/GPy/models/warped_gp.py index d78f31df..4b982ed2 100644 --- a/GPy/models/warped_gp.py +++ b/GPy/models/warped_gp.py @@ -30,14 +30,6 @@ class WarpedGP(GP): GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X) self._set_params(self._get_params()) - def _getstate(self): - return GP._getstate(self) - - - def _setstate(self, state): - return GP._setstate(self, state) - - def _scale_data(self, Y): self._Ymax = Y.max() self._Ymin = Y.min() diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index 944a054f..bda64f8a 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -5,6 +5,7 @@ import unittest import numpy as np import GPy import sys +from GPy.core.parameterization.param import Param verbose = 0 @@ -30,7 +31,7 @@ class Kern_check_model(GPy.core.Model): dL_dK = np.ones((X.shape[0], X2.shape[0])) self.kernel = kernel - self.X = GPy.core.parameterization.Param('X',X) + self.X = X self.X2 = X2 self.dL_dK = dL_dK @@ -77,10 +78,11 @@ class Kern_check_dK_dX(Kern_check_model): """This class allows gradient checks for the gradient of a kernel with respect to X. """ def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) + self.X = Param('X',X) self.add_parameter(self.X) def parameters_changed(self): - self.X.gradient = self.kernel.gradients_X(self.dL_dK, self.X, self.X2) + self.X.gradient[:] = self.kernel.gradients_X(self.dL_dK, self.X, self.X2) class Kern_check_dKdiag_dX(Kern_check_dK_dX): """This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """ @@ -91,7 +93,7 @@ class Kern_check_dKdiag_dX(Kern_check_dK_dX): return (np.diag(self.dL_dK)*self.kernel.Kdiag(self.X)).sum() def parameters_changed(self): - self.X.gradient = self.kernel.gradients_X_diag(self.dL_dK.diagonal(), self.X) + self.X.gradient[:] = self.kernel.gradients_X_diag(self.dL_dK.diagonal(), self.X) @@ -127,6 +129,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb if not result: print("Positive definite check failed for " + kern.name + " covariance function.") pass_checks = False + assert(result) return False if verbose: @@ -138,6 +141,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True) pass_checks = False + assert(result) return False if verbose: @@ -149,6 +153,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True) pass_checks = False + assert(result) return False if verbose: @@ -165,6 +170,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True) pass_checks = False + assert(result) return False if verbose: @@ -183,6 +189,8 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb if not result: print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") testmodel.checkgrad(verbose=True) + import ipdb;ipdb.set_trace() + assert(result) pass_checks = False return False @@ -202,6 +210,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb if not result: print("Gradient of K(X, X2) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") testmodel.checkgrad(verbose=True) + assert(result) pass_checks = False return False @@ -219,6 +228,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True) pass_checks = False + assert(result) return False return pass_checks @@ -227,7 +237,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb class KernelGradientTestsContinuous(unittest.TestCase): def setUp(self): - self.N, self.D = 100, 5 + self.N, self.D = 10, 5 self.X = np.random.randn(self.N,self.D) self.X2 = np.random.randn(self.N+10,self.D) @@ -339,59 +349,59 @@ class KernelTestsMiscellaneous(unittest.TestCase): self.assertTrue(np.allclose(self.sumkern.K(self.X, which_parts=[self.linear, self.rbf]), self.linear.K(self.X)+self.rbf.K(self.X))) self.assertTrue(np.allclose(self.sumkern.K(self.X, which_parts=self.sumkern.parts[0]), self.rbf.K(self.X))) -class KernelTestsNonContinuous(unittest.TestCase): - def setUp(self): - N0 = 3 - N1 = 9 - N2 = 4 - N = N0+N1+N2 - self.D = 3 - self.X = np.random.randn(N, self.D+1) - indices = np.random.random_integers(0, 2, size=N) - self.X[indices==0, -1] = 0 - self.X[indices==1, -1] = 1 - self.X[indices==2, -1] = 2 - #self.X = self.X[self.X[:, -1].argsort(), :] - self.X2 = np.random.randn((N0+N1)*2, self.D+1) - self.X2[:(N0*2), -1] = 0 - self.X2[(N0*2):, -1] = 1 - - def test_IndependentOutputs(self): - k = GPy.kern.RBF(self.D) - kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single') - self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1)) - k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(self.D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')] - kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split') - self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1)) - - def test_ODE_UY(self): - kern = GPy.kern.ODE_UY(2, active_dims=[0, self.D]) - X = self.X[self.X[:,-1]!=2] - X2 = self.X2[self.X2[:,-1]!=2] - self.assertTrue(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1)) +# class KernelTestsNonContinuous(unittest.TestCase): +# def setUp(self): +# N0 = 3 +# N1 = 9 +# N2 = 4 +# N = N0+N1+N2 +# self.D = 3 +# self.X = np.random.randn(N, self.D+1) +# indices = np.random.random_integers(0, 2, size=N) +# self.X[indices==0, -1] = 0 +# self.X[indices==1, -1] = 1 +# self.X[indices==2, -1] = 2 +# #self.X = self.X[self.X[:, -1].argsort(), :] +# self.X2 = np.random.randn((N0+N1)*2, self.D+1) +# self.X2[:(N0*2), -1] = 0 +# self.X2[(N0*2):, -1] = 1 +# +# def test_IndependentOutputs(self): +# k = GPy.kern.RBF(self.D) +# kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single') +# self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1)) +# k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(self.D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')] +# kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split') +# self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1)) +# +# def test_ODE_UY(self): +# kern = GPy.kern.ODE_UY(2, active_dims=[0, self.D]) +# X = self.X[self.X[:,-1]!=2] +# X2 = self.X2[self.X2[:,-1]!=2] +# self.assertTrue(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1)) if __name__ == "__main__": print "Running unit tests, please be (very) patient..." - #unittest.main() - np.random.seed(0) - N0 = 3 - N1 = 9 - N2 = 4 - N = N0+N1+N2 - D = 3 - X = np.random.randn(N, D+1) - indices = np.random.random_integers(0, 2, size=N) - X[indices==0, -1] = 0 - X[indices==1, -1] = 1 - X[indices==2, -1] = 2 - #X = X[X[:, -1].argsort(), :] - X2 = np.random.randn((N0+N1)*2, D+1) - X2[:(N0*2), -1] = 0 - X2[(N0*2):, -1] = 1 - k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')] - kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split') - assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1)) - k = GPy.kern.RBF(D) - kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single') - assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1)) + unittest.main() +# np.random.seed(0) +# N0 = 3 +# N1 = 9 +# N2 = 4 +# N = N0+N1+N2 +# D = 3 +# X = np.random.randn(N, D+1) +# indices = np.random.random_integers(0, 2, size=N) +# X[indices==0, -1] = 0 +# X[indices==1, -1] = 1 +# X[indices==2, -1] = 2 +# #X = X[X[:, -1].argsort(), :] +# X2 = np.random.randn((N0+N1)*2, D+1) +# X2[:(N0*2), -1] = 0 +# X2[(N0*2):, -1] = 1 +# k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')] +# kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split') +# assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1)) +# k = GPy.kern.RBF(D) +# kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single') +# assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1)) diff --git a/GPy/testing/observable_tests.py b/GPy/testing/observable_tests.py index 90623703..05794dc3 100644 --- a/GPy/testing/observable_tests.py +++ b/GPy/testing/observable_tests.py @@ -93,12 +93,12 @@ class Test(unittest.TestCase): def test_set_params(self): self.assertEqual(self.par.params_changed_count, 0, 'no params changed yet') - self.par._param_array_[:] = 1 + self.par.param_array[:] = 1 self.par._trigger_params_changed() self.assertEqual(self.par.params_changed_count, 1, 'now params changed') self.assertEqual(self.parent.parent_changed_count, self.par.params_changed_count) - self.par._param_array_[:] = 2 + self.par.param_array[:] = 2 self.par._trigger_params_changed() self.assertEqual(self.par.params_changed_count, 2, 'now params changed') self.assertEqual(self.parent.parent_changed_count, self.par.params_changed_count) diff --git a/GPy/testing/parameterized_tests.py b/GPy/testing/parameterized_tests.py index dc59449f..911cde0b 100644 --- a/GPy/testing/parameterized_tests.py +++ b/GPy/testing/parameterized_tests.py @@ -86,9 +86,9 @@ class ParameterizedTest(unittest.TestCase): self.assertListEqual(self.test1.constraints[Logexp()].tolist(), range(self.param.size, self.param.size+self.rbf.size)) def test_remove_parameter_param_array_grad_array(self): - val = self.test1.kern._param_array_.copy() + val = self.test1.kern.param_array.copy() self.test1.kern.remove_parameter(self.white) - self.assertListEqual(self.test1.kern._param_array_.tolist(), val[:2].tolist()) + self.assertListEqual(self.test1.kern.param_array.tolist(), val[:2].tolist()) def test_add_parameter_already_in_hirarchy(self): self.assertRaises(HierarchyError, self.test1.add_parameter, self.white._parameters_[0]) diff --git a/GPy/util/caching.py b/GPy/util/caching.py index 0886d0c6..ced56727 100644 --- a/GPy/util/caching.py +++ b/GPy/util/caching.py @@ -97,6 +97,15 @@ class Cacher(object): self.cached_outputs = [] self.inputs_changed = [] + def __deepcopy__(self, memo=None): + return Cacher(self.operation, self.limit, self.ignore_args, self.force_kwargs) + + def __getstate__(self, memo=None): + raise NotImplementedError, "Trying to pickle Cacher object with function {}, pickling functions not possible.".format(str(self.operation)) + + def __setstate__(self, memo=None): + raise NotImplementedError, "Trying to pickle Cacher object with function {}, pickling functions not possible.".format(str(self.operation)) + @property def __name__(self): return self.operation.__name__ From 89cdd1f6d9510d6387a17366a53d4d03d86bd726 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 31 Mar 2014 12:48:36 +0100 Subject: [PATCH 30/51] added kernel tests again --- GPy/testing/kernel_tests.py | 60 ++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index bda64f8a..91683edc 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -349,36 +349,36 @@ class KernelTestsMiscellaneous(unittest.TestCase): self.assertTrue(np.allclose(self.sumkern.K(self.X, which_parts=[self.linear, self.rbf]), self.linear.K(self.X)+self.rbf.K(self.X))) self.assertTrue(np.allclose(self.sumkern.K(self.X, which_parts=self.sumkern.parts[0]), self.rbf.K(self.X))) -# class KernelTestsNonContinuous(unittest.TestCase): -# def setUp(self): -# N0 = 3 -# N1 = 9 -# N2 = 4 -# N = N0+N1+N2 -# self.D = 3 -# self.X = np.random.randn(N, self.D+1) -# indices = np.random.random_integers(0, 2, size=N) -# self.X[indices==0, -1] = 0 -# self.X[indices==1, -1] = 1 -# self.X[indices==2, -1] = 2 -# #self.X = self.X[self.X[:, -1].argsort(), :] -# self.X2 = np.random.randn((N0+N1)*2, self.D+1) -# self.X2[:(N0*2), -1] = 0 -# self.X2[(N0*2):, -1] = 1 -# -# def test_IndependentOutputs(self): -# k = GPy.kern.RBF(self.D) -# kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single') -# self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1)) -# k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(self.D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')] -# kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split') -# self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1)) -# -# def test_ODE_UY(self): -# kern = GPy.kern.ODE_UY(2, active_dims=[0, self.D]) -# X = self.X[self.X[:,-1]!=2] -# X2 = self.X2[self.X2[:,-1]!=2] -# self.assertTrue(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1)) +class KernelTestsNonContinuous(unittest.TestCase): + def setUp(self): + N0 = 3 + N1 = 9 + N2 = 4 + N = N0+N1+N2 + self.D = 3 + self.X = np.random.randn(N, self.D+1) + indices = np.random.random_integers(0, 2, size=N) + self.X[indices==0, -1] = 0 + self.X[indices==1, -1] = 1 + self.X[indices==2, -1] = 2 + #self.X = self.X[self.X[:, -1].argsort(), :] + self.X2 = np.random.randn((N0+N1)*2, self.D+1) + self.X2[:(N0*2), -1] = 0 + self.X2[(N0*2):, -1] = 1 + + def test_IndependentOutputs(self): + k = GPy.kern.RBF(self.D) + kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single') + self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1)) + k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(self.D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')] + kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split') + self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1)) + + def test_ODE_UY(self): + kern = GPy.kern.ODE_UY(2, active_dims=[0, self.D]) + X = self.X[self.X[:,-1]!=2] + X2 = self.X2[self.X2[:,-1]!=2] + self.assertTrue(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1)) if __name__ == "__main__": From dda9b3dd733db7dba8075baa907c2bd1ba8bc805 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 31 Mar 2014 13:01:16 +0100 Subject: [PATCH 31/51] student t noise now called t_scale2 --- GPy/testing/likelihood_tests.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/GPy/testing/likelihood_tests.py b/GPy/testing/likelihood_tests.py index 7276e108..867851a7 100644 --- a/GPy/testing/likelihood_tests.py +++ b/GPy/testing/likelihood_tests.py @@ -112,7 +112,7 @@ class TestNoiseModels(object): self.f = None self.X = None - def test_noise_models(self): + def test_scale2_models(self): self.setUp() #################################################### @@ -150,64 +150,64 @@ class TestNoiseModels(object): noise_models = {"Student_t_default": { "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var), "grad_params": { - "names": [".*t_noise"], + "names": [".*t_scale2"], "vals": [self.var], - "constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)] - #"constraints": [("t_noise", constrain_positive), ("deg_free", partial(constrain_fixed, value=5))] + "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)] + #"constraints": [("t_scale2", constrain_positive), ("deg_free", partial(constrain_fixed, value=5))] }, "laplace": True }, "Student_t_1_var": { "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var), "grad_params": { - "names": [".*t_noise"], + "names": [".*t_scale2"], "vals": [1.0], - "constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)] + "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)] }, "laplace": True }, "Student_t_small_deg_free": { "model": GPy.likelihoods.StudentT(deg_free=1.5, sigma2=self.var), "grad_params": { - "names": [".*t_noise"], + "names": [".*t_scale2"], "vals": [self.var], - "constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)] + "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)] }, "laplace": True }, "Student_t_small_var": { "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var), "grad_params": { - "names": [".*t_noise"], + "names": [".*t_scale2"], "vals": [0.001], - "constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)] + "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)] }, "laplace": True }, "Student_t_large_var": { "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var), "grad_params": { - "names": [".*t_noise"], + "names": [".*t_scale2"], "vals": [10.0], - "constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)] + "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)] }, "laplace": True }, "Student_t_approx_gauss": { "model": GPy.likelihoods.StudentT(deg_free=1000, sigma2=self.var), "grad_params": { - "names": [".*t_noise"], + "names": [".*t_scale2"], "vals": [self.var], - "constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)] + "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)] }, "laplace": True }, "Student_t_log": { "model": GPy.likelihoods.StudentT(gp_link=link_functions.Log(), deg_free=5, sigma2=self.var), "grad_params": { - "names": [".*t_noise"], + "names": [".*t_scale2"], "vals": [self.var], - "constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)] + "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)] }, "laplace": True }, From 46ee7374929973d1ccb34d068b4bce5d3bd58373 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 31 Mar 2014 13:30:17 +0100 Subject: [PATCH 32/51] copy had slight bug in id(_parent_index_) > ids for ints are shared globally --- GPy/core/parameterization/parameter_core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index a60b8b38..2dac9bf3 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -176,11 +176,11 @@ class Pickleable(object): import copy memo = {} memo[id(self._parent_)] = None - memo[id(self._parent_index_)] = None memo[id(self.gradient)] = None memo[id(self.param_array)] = None memo[id(self._fixes_)] = None c = copy.deepcopy(self, memo) + c._parent_index_ = None return c def __deepcopy__(self, memo): @@ -403,6 +403,7 @@ class Constrainable(Nameable, Indexable, Observable): self._fixes_[fixed_indices] = FIXED else: self._fixes_ = None + del self.constraints[__fixed__] def _has_fixes(self): return hasattr(self, "_fixes_") and self._fixes_ is not None and self._fixes_.size == self.size @@ -875,7 +876,6 @@ class Parameterizable(OptimizationHandlable): if not p.param_array.flags['C_CONTIGUOUS']: raise ValueError, "This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS" - import ipdb;ipdb.set_trace() p.param_array.data = self.param_array[pslice].data p.full_gradient.data = self.full_gradient[pslice].data From 161f352838e5761ebaeb3ce3ed57048681ca7352 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 31 Mar 2014 13:31:50 +0100 Subject: [PATCH 33/51] delete dangling fixed attribute in constraints --- GPy/core/parameterization/index_operations.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/GPy/core/parameterization/index_operations.py b/GPy/core/parameterization/index_operations.py index ebfe2904..891ac522 100644 --- a/GPy/core/parameterization/index_operations.py +++ b/GPy/core/parameterization/index_operations.py @@ -95,6 +95,9 @@ class ParameterIndexOperations(object): def __getitem__(self, prop): return self._properties[prop] + def __delitem__(self, prop): + del self._properties[prop] + def __str__(self, *args, **kwargs): import pprint return pprint.pformat(dict(self._properties)) From 7973d7bf9f7fc60270fa897448801cdc24581bfc Mon Sep 17 00:00:00 2001 From: James Hensman Date: Mon, 31 Mar 2014 13:33:44 +0100 Subject: [PATCH 34/51] removed some dubuggnin --- GPy/kern/_src/rbf.py | 4 ---- GPy/plotting/matplot_dep/models_plots.py | 4 ++-- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index 0f19dbd1..c5914d58 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -80,8 +80,6 @@ class RBF(Stationary): #contributions from psi0: self.variance.gradient = np.sum(dL_dpsi0) - if self._debug: - num_grad = self.lengthscale.gradient.copy() self.lengthscale.gradient = 0. #from psi1 @@ -101,8 +99,6 @@ class RBF(Stationary): else: self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2) - if self._debug: - import ipdb;ipdb.set_trace() self.variance.gradient += 2.*np.sum(dL_dpsi2 * psi2)/self.variance else: diff --git a/GPy/plotting/matplot_dep/models_plots.py b/GPy/plotting/matplot_dep/models_plots.py index b626758f..57b64ae5 100644 --- a/GPy/plotting/matplot_dep/models_plots.py +++ b/GPy/plotting/matplot_dep/models_plots.py @@ -53,8 +53,8 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', which_data_rows = slice(None) if which_data_ycols == 'all': which_data_ycols = np.arange(model.output_dim) - if len(which_data_ycols)==0: - raise ValueError('No data selected for plotting') + #if len(which_data_ycols)==0: + #raise ValueError('No data selected for plotting') if ax is None: fig = pb.figure(num=fignum) ax = fig.add_subplot(111) From 59ff2c8d00e380008471b62aad66e0425774192a Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 31 Mar 2014 14:09:06 +0100 Subject: [PATCH 35/51] gradient can be zero and two parameter cancellation is caught --- GPy/core/model.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/GPy/core/model.py b/GPy/core/model.py index a39eceda..1a539f9f 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -279,7 +279,7 @@ class Model(Parameterized): # just check the global ratio dx = np.zeros(x.shape) - dx[transformed_index] = step * np.sign(np.random.uniform(-1, 1, transformed_index.size)) + dx[transformed_index] = step * (np.sign(np.random.uniform(-1, 1, transformed_index.size)) if transformed_index.size != 2 else 1.) # evaulate around the point x f1 = self._objective(x + dx) @@ -294,7 +294,8 @@ class Model(Parameterized): global_diff = np.abs(f1 - f2) < tolerance and np.allclose(gradient, 0, atol=tolerance) if global_ratio is np.nan: global_ratio = 0 - return np.abs(1. - global_ratio) < tolerance or np.abs(f1-f2).sum() + np.abs((2 * np.dot(dx, gradient))).sum() < tolerance or global_diff + print global_ratio, global_diff, np.allclose(gradient, 0, atol=tolerance), np.abs(1. - global_ratio), np.abs(f1-f2).sum() + np.abs((2 * np.dot(dx, gradient))).sum() + return np.abs(1. - global_ratio) < tolerance or global_diff else: # check the gradient of each parameter individually, and do some pretty printing try: From 68a8c3a691bd4dfdad268c3cedc3d3a72a5988ab Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 31 Mar 2014 14:09:47 +0100 Subject: [PATCH 36/51] gradient can be zero and two parameter cancellation is caught --- GPy/core/model.py | 1 - 1 file changed, 1 deletion(-) diff --git a/GPy/core/model.py b/GPy/core/model.py index 1a539f9f..38e8d4cf 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -294,7 +294,6 @@ class Model(Parameterized): global_diff = np.abs(f1 - f2) < tolerance and np.allclose(gradient, 0, atol=tolerance) if global_ratio is np.nan: global_ratio = 0 - print global_ratio, global_diff, np.allclose(gradient, 0, atol=tolerance), np.abs(1. - global_ratio), np.abs(f1-f2).sum() + np.abs((2 * np.dot(dx, gradient))).sum() return np.abs(1. - global_ratio) < tolerance or global_diff else: # check the gradient of each parameter individually, and do some pretty printing From 498e757eec43d90de3a177261e62d162b7ada25c Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 31 Mar 2014 14:10:36 +0100 Subject: [PATCH 37/51] index operations view delitem added --- GPy/core/parameterization/index_operations.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/GPy/core/parameterization/index_operations.py b/GPy/core/parameterization/index_operations.py index 891ac522..12b3a298 100644 --- a/GPy/core/parameterization/index_operations.py +++ b/GPy/core/parameterization/index_operations.py @@ -192,6 +192,9 @@ class ParameterIndexOperationsView(object): ind = self._filter_index(self._param_index_ops[prop]) return ind + def __delitem__(self, prop): + self.remove(prop, self[prop]) + def __str__(self, *args, **kwargs): import pprint return pprint.pformat(dict(self.iteritems())) From 86c00e3d4dd89e29f73ffbd9992c9157abe96771 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 31 Mar 2014 14:11:11 +0100 Subject: [PATCH 38/51] current_slice is not a property --- GPy/core/parameterization/param.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index f89b09df..182af902 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -89,12 +89,6 @@ class Param(OptimizationHandlable, ObsAr): def param_array(self): return self - @property - def current_slice(self): - if self._current_slice_ is None: - return slice(0, self.shape[0], 1) - return self._current_slice_ - @property def gradient(self): """ From 5ca56d907c21518fbae757284ece6362ed0ff78b Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 31 Mar 2014 14:12:03 +0100 Subject: [PATCH 39/51] parameterized tests deeper still --- GPy/testing/parameterized_tests.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/GPy/testing/parameterized_tests.py b/GPy/testing/parameterized_tests.py index 911cde0b..7b918ff9 100644 --- a/GPy/testing/parameterized_tests.py +++ b/GPy/testing/parameterized_tests.py @@ -34,6 +34,7 @@ class ParameterizedTest(unittest.TestCase): self.param = Param('param', np.random.rand(25,2), Logistic(0, 1)) self.test1 = GPy.core.Parameterized("test model") + self.test1.param = self.param self.test1.kern = self.rbf+self.white self.test1.add_parameter(self.test1.kern) self.test1.add_parameter(self.param, 0) @@ -58,6 +59,9 @@ class ParameterizedTest(unittest.TestCase): self.assertListEqual(self.test1._fixes_.tolist(),[FIXED,UNFIXED,UNFIXED]) self.test1.kern.rbf.fix() self.assertListEqual(self.test1._fixes_.tolist(),[FIXED]*3) + self.test1.fix() + self.assertTrue(self.test1.is_fixed) + self.assertListEqual(self.test1._fixes_.tolist(),[FIXED]*self.test1.size) def test_remove_parameter(self): from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__, Logexp From 439d66b67f55e0230d5534b477fe3e4b9bc0d56e Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 31 Mar 2014 14:12:13 +0100 Subject: [PATCH 40/51] minor changes bits and pieces --- GPy/core/parameterization/parameterized.py | 2 +- .../latent_function_inference/exact_gaussian_inference.py | 1 - GPy/inference/latent_function_inference/var_dtc.py | 2 ++ 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py index 0760f8c6..75085ca2 100644 --- a/GPy/core/parameterization/parameterized.py +++ b/GPy/core/parameterization/parameterized.py @@ -17,7 +17,7 @@ class ParametersChangedMeta(type): instance.parameters_changed() return instance -class Parameterized(Parameterizable, Pickleable): +class Parameterized(Parameterizable): """ Parameterized class diff --git a/GPy/inference/latent_function_inference/exact_gaussian_inference.py b/GPy/inference/latent_function_inference/exact_gaussian_inference.py index 554d3d1a..074b67a6 100644 --- a/GPy/inference/latent_function_inference/exact_gaussian_inference.py +++ b/GPy/inference/latent_function_inference/exact_gaussian_inference.py @@ -29,7 +29,6 @@ class ExactGaussianInference(object): """ N, D = Y.shape if (N>D): - print "WARNING: D>N we still need caching of L, such that L*L^T = Y, although fine here" return Y else: #if Y in self.cache, return self.Cache[Y], else store Y in cache and return L. diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py index 0e10a175..7344b204 100644 --- a/GPy/inference/latent_function_inference/var_dtc.py +++ b/GPy/inference/latent_function_inference/var_dtc.py @@ -35,9 +35,11 @@ class VarDTC(object): return param_to_array(np.sum(np.square(Y))) def __getstate__(self): + # has to be overridden, as Cacher objects cannot be pickled. return self.limit def __setstate__(self, state): + # has to be overridden, as Cacher objects cannot be pickled. self.limit = state from ...util.caching import Cacher self.get_trYYT = Cacher(self._get_trYYT, self.limit) From 46c34d13d5b444945af332f1556ba02017c8f8cf Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 31 Mar 2014 14:30:41 +0100 Subject: [PATCH 41/51] pcikle tests added --- GPy/testing/pickle_tests.py | 206 ++++++++++++++++++++++++++++++++++++ 1 file changed, 206 insertions(+) create mode 100644 GPy/testing/pickle_tests.py diff --git a/GPy/testing/pickle_tests.py b/GPy/testing/pickle_tests.py new file mode 100644 index 00000000..1837f07d --- /dev/null +++ b/GPy/testing/pickle_tests.py @@ -0,0 +1,206 @@ +''' +Created on 13 Mar 2014 + +@author: maxz +''' +import unittest, itertools +import cPickle as pickle +import numpy as np +from GPy.core.parameterization.index_operations import ParameterIndexOperations,\ + ParameterIndexOperationsView +import tempfile +from GPy.core.parameterization.param import Param +from GPy.core.parameterization.array_core import ObsAr +from GPy.core.parameterization.priors import Gaussian +from GPy.kern._src.rbf import RBF +from GPy.kern._src.linear import Linear +from GPy.kern._src.static import Bias, White +from GPy.examples.dimensionality_reduction import mrd_simulation,\ + bgplvm_simulation +from GPy.examples.regression import toy_rbf_1d_50 +from GPy.core.parameterization.variational import NormalPosterior +from GPy.models.gp_regression import GPRegression + +class ListDictTestCase(unittest.TestCase): + def assertListDictEquals(self, d1, d2, msg=None): + for k,v in d1.iteritems(): + self.assertListEqual(list(v), list(d2[k]), msg) + def assertArrayListEquals(self, l1, l2): + for a1, a2 in itertools.izip(l1,l2): + np.testing.assert_array_equal(a1, a2) + +class Test(ListDictTestCase): + def test_parameter_index_operations(self): + pio = ParameterIndexOperations(dict(test1=np.array([4,3,1,6,4]), test2=np.r_[2:130])) + piov = ParameterIndexOperationsView(pio, 20, 250) + self.assertListDictEquals(dict(piov.items()), dict(piov.copy().iteritems())) + self.assertListDictEquals(dict(pio.iteritems()), dict(pio.copy().items())) + + self.assertArrayListEquals(pio.copy().indices(), pio.indices()) + self.assertArrayListEquals(piov.copy().indices(), piov.indices()) + + with tempfile.TemporaryFile('w+b') as f: + pickle.dump(pio, f) + f.seek(0) + pio2 = pickle.load(f) + self.assertListDictEquals(pio._properties, pio2._properties) + + with tempfile.TemporaryFile('w+b') as f: + pickle.dump(piov, f) + f.seek(0) + pio2 = pickle.load(f) + self.assertListDictEquals(dict(piov.items()), dict(pio2.iteritems())) + + def test_param(self): + param = Param('test', np.arange(4*2).reshape(4,2)) + param[0].constrain_positive() + param[1].fix() + param[2].set_prior(Gaussian(0,1)) + pcopy = param.copy() + self.assertListEqual(param.tolist(), pcopy.tolist()) + self.assertListEqual(str(param).split('\n'), str(pcopy).split('\n')) + self.assertIsNot(param, pcopy) + with tempfile.TemporaryFile('w+b') as f: + pickle.dump(param, f) + f.seek(0) + pcopy = pickle.load(f) + self.assertListEqual(param.tolist(), pcopy.tolist()) + self.assertSequenceEqual(str(param), str(pcopy)) + + def test_observable_array(self): + obs = ObsAr(np.arange(4*2).reshape(4,2)) + pcopy = obs.copy() + self.assertListEqual(obs.tolist(), pcopy.tolist()) + with tempfile.TemporaryFile('w+b') as f: + pickle.dump(obs, f) + f.seek(0) + pcopy = pickle.load(f) + self.assertListEqual(obs.tolist(), pcopy.tolist()) + self.assertSequenceEqual(str(obs), str(pcopy)) + + def test_parameterized(self): + par = RBF(1, active_dims=[1]) + Linear(2, active_dims=[0,2]) + Bias(3) + White(3) + par.gradient = 10 + par.randomize() + pcopy = par.copy() + self.assertIsInstance(pcopy.constraints, ParameterIndexOperations) + self.assertIsInstance(pcopy.rbf.constraints, ParameterIndexOperationsView) + self.assertIs(pcopy.constraints, pcopy.rbf.constraints._param_index_ops) + self.assertIs(pcopy.constraints, pcopy.rbf.lengthscale.constraints._param_index_ops) + self.assertIs(pcopy.constraints, pcopy.linear.constraints._param_index_ops) + self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) + self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist()) + self.assertSequenceEqual(str(par), str(pcopy)) + self.assertIsNot(par.param_array, pcopy.param_array) + self.assertIsNot(par.full_gradient, pcopy.full_gradient) + with tempfile.TemporaryFile('w+b') as f: + par.pickle(f) + f.seek(0) + pcopy = pickle.load(f) + self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) + pcopy.gradient = 10 + np.testing.assert_allclose(par.linear.full_gradient, pcopy.linear.full_gradient) + np.testing.assert_allclose(pcopy.linear.full_gradient, 10) + self.assertSequenceEqual(str(par), str(pcopy)) + + def test_model(self): + par = toy_rbf_1d_50(optimize=0, plot=0) + pcopy = par.copy() + self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) + self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist()) + self.assertSequenceEqual(str(par), str(pcopy)) + self.assertIsNot(par.param_array, pcopy.param_array) + self.assertIsNot(par.full_gradient, pcopy.full_gradient) + self.assertTrue(pcopy.checkgrad()) + self.assert_(np.any(pcopy.gradient!=0.0)) + with tempfile.TemporaryFile('w+b') as f: + par.pickle(f) + f.seek(0) + pcopy = pickle.load(f) + self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) + np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient) + self.assertSequenceEqual(str(par), str(pcopy)) + self.assert_(pcopy.checkgrad()) + + def test_modelrecreation(self): + par = toy_rbf_1d_50(optimize=0, plot=0) + pcopy = GPRegression(par.X.copy(), par.Y.copy(), kernel=par.kern.copy()) + self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) + self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist()) + self.assertSequenceEqual(str(par), str(pcopy)) + self.assertIsNot(par.param_array, pcopy.param_array) + self.assertIsNot(par.full_gradient, pcopy.full_gradient) + self.assertTrue(pcopy.checkgrad()) + self.assert_(np.any(pcopy.gradient!=0.0)) + with tempfile.TemporaryFile('w+b') as f: + par.pickle(f) + f.seek(0) + pcopy = pickle.load(f) + self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) + np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient) + self.assertSequenceEqual(str(par), str(pcopy)) + self.assert_(pcopy.checkgrad()) + + def test_posterior(self): + X = np.random.randn(3,5) + Xv = np.random.rand(*X.shape) + par = NormalPosterior(X,Xv) + par.gradient = 10 + pcopy = par.copy() + self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) + self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist()) + self.assertSequenceEqual(str(par), str(pcopy)) + self.assertIsNot(par.param_array, pcopy.param_array) + self.assertIsNot(par.full_gradient, pcopy.full_gradient) + with tempfile.TemporaryFile('w+b') as f: + par.pickle(f) + f.seek(0) + pcopy = pickle.load(f) + self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) + pcopy.gradient = 10 + np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient) + np.testing.assert_allclose(pcopy.mean.full_gradient, 10) + self.assertSequenceEqual(str(par), str(pcopy)) + + def test_model_concat(self): + par = mrd_simulation(optimize=0, plot=0, plot_sim=0) + par.randomize() + pcopy = par.copy() + self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) + self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist()) + self.assertSequenceEqual(str(par), str(pcopy)) + self.assertIsNot(par.param_array, pcopy.param_array) + self.assertIsNot(par.full_gradient, pcopy.full_gradient) + self.assertTrue(pcopy.checkgrad()) + self.assert_(np.any(pcopy.gradient!=0.0)) + with tempfile.TemporaryFile('w+b') as f: + par.pickle(f) + f.seek(0) + pcopy = pickle.load(f) + self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) + np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient) + self.assertSequenceEqual(str(par), str(pcopy)) + self.assert_(pcopy.checkgrad()) + + def _callback(self, what, which): + what.count += 1 + + def test_add_observer(self): + par = toy_rbf_1d_50(optimize=0, plot=0) + par.name = "original" + par.count = 0 + par.add_observer(self, self._callback, 1) + pcopy = GPRegression(par.X.copy(), par.Y.copy(), kernel=par.kern.copy()) + self.assertNotIn(par._observer_callables_[0], pcopy._observer_callables_) + pcopy = par.copy() + pcopy.name = "copy" + self.assertTrue(par.checkgrad()) + self.assertTrue(pcopy.checkgrad()) + self.assertTrue(pcopy.kern.checkgrad()) + self.assertIn(par._observer_callables_[0], pcopy._observer_callables_) + self.assertEqual(par.count, 3) + self.assertEqual(pcopy.count, 6) # 3 of each call to checkgrad + +if __name__ == "__main__": + #import sys;sys.argv = ['', 'Test.test_parameter_index_operations'] + unittest.main() \ No newline at end of file From 292e076a9aacd746deaf858f9d2726959fc07b52 Mon Sep 17 00:00:00 2001 From: Neil Lawrence Date: Mon, 31 Mar 2014 17:51:20 +0100 Subject: [PATCH 42/51] Added first draft of symbolic likelihood (working for a student-t example). --- GPy/likelihoods/likelihood.py | 9 +++------ GPy/likelihoods/symbolic.py | 34 ++++++++++++++++++---------------- GPy/util/symbolic.py | 8 +++++++- 3 files changed, 28 insertions(+), 23 deletions(-) diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py index 5761f3fb..33b51536 100644 --- a/GPy/likelihoods/likelihood.py +++ b/GPy/likelihoods/likelihood.py @@ -383,12 +383,9 @@ class Likelihood(Parameterized): #Parameters are stacked vertically. Must be listed in same order as 'get_param_names' # ensure we have gradients for every parameter we want to optimize - try: - assert len(dlogpdf_dtheta) == self.size #1 x num_param array - assert dlogpdf_df_dtheta.shape[1] == self.size #f x num_param matrix - assert d2logpdf_df2_dtheta.shape[1] == self.size #f x num_param matrix - except Exception as e: - import ipdb; ipdb.set_trace() # XXX BREAKPOINT + assert len(dlogpdf_dtheta) == self.size #1 x num_param array + assert dlogpdf_df_dtheta.shape[1] == self.size #f x num_param matrix + assert d2logpdf_df2_dtheta.shape[1] == self.size #f x num_param matrix return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta diff --git a/GPy/likelihoods/symbolic.py b/GPy/likelihoods/symbolic.py index 5eaafb2a..aecfbb0a 100644 --- a/GPy/likelihoods/symbolic.py +++ b/GPy/likelihoods/symbolic.py @@ -2,15 +2,17 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as np -import sympy as sp +import sympy as sym from sympy.utilities.lambdify import lambdify import link_functions from scipy import stats, integrate -from scipy.special import gammaln, gamma, erf +from scipy.special import gammaln, gamma, erf, polygamma from likelihood import Likelihood from ..core.parameterization import Param from ..core.parameterization.transformations import Logexp +func_modules = ['numpy', {'gamma':gamma, 'gammaln':gammaln, 'erf':erf,'polygamma':polygamma}] + class Symbolic(Likelihood): """ Symbolic likelihood. @@ -28,12 +30,12 @@ class Symbolic(Likelihood): super(Symbolic, self).__init__(gp_link, name=name) if likelihood is None and log_likelihood: - self._sp_likelihood = sp.exp(log_likelihood).simplify() + self._sp_likelihood = sym.exp(log_likelihood).simplify() self._sp_log_likelihood = log_likelihood if log_likelihood is None and likelihood: self._sp_likelihood = likelihood - self._sp_log_likelihood = sp.log(likelihood).simplify() + self._sp_log_likelihood = sym.log(likelihood).simplify() # TODO: build likelihood and log likelihood from CDF or # compute CDF given likelihood/log-likelihood. Also check log @@ -47,7 +49,7 @@ class Symbolic(Likelihood): self._sp_y = [e for e in sp_vars if e.name=='y'] if not self._sp_f: raise ValueError('No variable y in likelihood or log likelihood.') - self._sp_theta = sorted([e for e in sp_vars if not (e.name=='f' or e.name=='y')],key=lambda e:e.name) + self._sp_theta = sorted([e for e in sp_vars if not (e.name=='f' or e.name=='y')],key=lambda e:e.name, reverse=True) # These are all the arguments need to compute likelihoods. self.arg_list = self._sp_y + self._sp_f + self._sp_theta @@ -56,9 +58,9 @@ class Symbolic(Likelihood): derivative_arguments = self._sp_f + self._sp_theta # Do symbolic work to compute derivatives. - self._log_likelihood_derivatives = {theta.name : sp.diff(self._sp_log_likelihood,theta).simplify() for theta in derivative_arguments} - self._log_likelihood_second_derivatives = {theta.name : sp.diff(self._log_likelihood_derivatives['f'],theta).simplify() for theta in derivative_arguments} - self._log_likelihood_third_derivatives = {theta.name : sp.diff(self._log_likelihood_second_derivatives['f'],theta).simplify() for theta in derivative_arguments} + self._log_likelihood_derivatives = {theta.name : sym.diff(self._sp_log_likelihood,theta).simplify() for theta in derivative_arguments} + self._log_likelihood_second_derivatives = {theta.name : sym.diff(self._log_likelihood_derivatives['f'],theta).simplify() for theta in derivative_arguments} + self._log_likelihood_third_derivatives = {theta.name : sym.diff(self._log_likelihood_second_derivatives['f'],theta).simplify() for theta in derivative_arguments} # Add parameters to the model. for theta in self._sp_theta: @@ -85,14 +87,14 @@ class Symbolic(Likelihood): """Generate the code from the symbolic parts that will be used for likleihod computation.""" # TODO: Check here whether theano is available and set up # functions accordingly. - self._likelihood_function = lambdify(self.arg_list, self._sp_likelihood, 'numpy') - self._log_likelihood_function = lambdify(self.arg_list, self._sp_log_likelihood, 'numpy') + self._likelihood_function = lambdify(self.arg_list, self._sp_likelihood, func_modules) + self._log_likelihood_function = lambdify(self.arg_list, self._sp_log_likelihood, func_modules) # compute code for derivatives (for implicit likelihood terms # we need up to 3rd derivatives) - setattr(self, '_first_derivative_code', {key: lambdify(self.arg_list, self._log_likelihood_derivatives[key], 'numpy') for key in self._log_likelihood_derivatives.keys()}) - setattr(self, '_second_derivative_code', {key: lambdify(self.arg_list, self._log_likelihood_second_derivatives[key], 'numpy') for key in self._log_likelihood_second_derivatives.keys()}) - setattr(self, '_third_derivative_code', {key: lambdify(self.arg_list, self._log_likelihood_third_derivatives[key], 'numpy') for key in self._log_likelihood_third_derivatives.keys()}) + setattr(self, '_first_derivative_code', {key: lambdify(self.arg_list, self._log_likelihood_derivatives[key], func_modules) for key in self._log_likelihood_derivatives.keys()}) + setattr(self, '_second_derivative_code', {key: lambdify(self.arg_list, self._log_likelihood_second_derivatives[key], func_modules) for key in self._log_likelihood_second_derivatives.keys()}) + setattr(self, '_third_derivative_code', {key: lambdify(self.arg_list, self._log_likelihood_third_derivatives[key], func_modules) for key in self._log_likelihood_third_derivatives.keys()}) # TODO: compute EP code parts based on logZ. We need dlogZ/dmu, d2logZ/dmu2 and dlogZ/dtheta @@ -215,17 +217,17 @@ class Symbolic(Likelihood): def dlogpdf_link_dtheta(self, inv_link_f, y, Y_metadata=None): assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape self._arguments_update(inv_link_f, y) - return np.asarray([self._first_derivative_code[theta.name](**self._arguments).sum() for theta in self._sp_theta]) + return np.hstack([self._first_derivative_code[theta.name](**self._arguments) for theta in self._sp_theta]).sum(0) def dlogpdf_dlink_dtheta(self, inv_link_f, y, Y_metadata=None): assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape self._arguments_update(inv_link_f, y) - return np.asarray([self._second_derivative_code[theta.name](**self._arguments).sum() for theta in self._sp_theta]) + return np.hstack([self._second_derivative_code[theta.name](**self._arguments) for theta in self._sp_theta]) def d2logpdf_dlink2_dtheta(self, inv_link_f, y, Y_metadata=None): assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape self._arguments_update(inv_link_f, y) - return np.asarray([self._third_derivative_code[theta.name](**self._arguments).sum() for theta in self._sp_theta]) + return np.hstack([self._third_derivative_code[theta.name](**self._arguments) for theta in self._sp_theta]) def predictive_mean(self, mu, sigma, Y_metadata=None): raise NotImplementedError diff --git a/GPy/util/symbolic.py b/GPy/util/symbolic.py index 49c8c33a..d3988ccc 100644 --- a/GPy/util/symbolic.py +++ b/GPy/util/symbolic.py @@ -1,5 +1,11 @@ -from sympy import Function, S, oo, I, cos, sin, asin, log, erf, pi, exp, sqrt, sign +from sympy import Function, S, oo, I, cos, sin, asin, log, erf, pi, exp, sqrt, sign, gamma +class gammaln(Function): + nargs = 1 + @classmethod + def eval(cls, x): + return log(gamma(x)) + class ln_diff_erf(Function): nargs = 2 From 970e133bcae6a559e71f6535796c1bc9461b6093 Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Mon, 31 Mar 2014 18:22:16 +0100 Subject: [PATCH 43/51] Check for sympy --- GPy/likelihoods/symbolic.py | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/GPy/likelihoods/symbolic.py b/GPy/likelihoods/symbolic.py index aecfbb0a..0c2aac89 100644 --- a/GPy/likelihoods/symbolic.py +++ b/GPy/likelihoods/symbolic.py @@ -2,8 +2,12 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as np -import sympy as sym -from sympy.utilities.lambdify import lambdify +try: + import sympy as sym + sympy_available=True + from sympy.utilities.lambdify import lambdify +except ImportError: + sympy_available=False import link_functions from scipy import stats, integrate from scipy.special import gammaln, gamma, erf, polygamma @@ -56,7 +60,7 @@ class Symbolic(Likelihood): # these are arguments for computing derivatives. derivative_arguments = self._sp_f + self._sp_theta - + # Do symbolic work to compute derivatives. self._log_likelihood_derivatives = {theta.name : sym.diff(self._sp_log_likelihood,theta).simplify() for theta in derivative_arguments} self._log_likelihood_second_derivatives = {theta.name : sym.diff(self._log_likelihood_derivatives['f'],theta).simplify() for theta in derivative_arguments} @@ -78,7 +82,7 @@ class Symbolic(Likelihood): self.log_concave = log_concave # initialise code arguments - self._arguments = {} + self._arguments = {} # generate the code for the likelihood and derivatives self._gen_code() @@ -95,7 +99,7 @@ class Symbolic(Likelihood): setattr(self, '_first_derivative_code', {key: lambdify(self.arg_list, self._log_likelihood_derivatives[key], func_modules) for key in self._log_likelihood_derivatives.keys()}) setattr(self, '_second_derivative_code', {key: lambdify(self.arg_list, self._log_likelihood_second_derivatives[key], func_modules) for key in self._log_likelihood_second_derivatives.keys()}) setattr(self, '_third_derivative_code', {key: lambdify(self.arg_list, self._log_likelihood_third_derivatives[key], func_modules) for key in self._log_likelihood_third_derivatives.keys()}) - + # TODO: compute EP code parts based on logZ. We need dlogZ/dmu, d2logZ/dmu2 and dlogZ/dtheta def parameters_changed(self): @@ -157,7 +161,7 @@ class Symbolic(Likelihood): :type inv_inv_link_f: Nx1 array :param y: data :type y: Nx1 array - :param Y_metadata: Y_metadata + :param Y_metadata: Y_metadata :returns: likelihood evaluated for this point :rtype: float @@ -175,12 +179,12 @@ class Symbolic(Likelihood): :type inv_inv_link_f: Nx1 array :param y: data :type y: Nx1 array - :param Y_metadata: Y_metadata + :param Y_metadata: Y_metadata :returns: gradient of likelihood with respect to each point. :rtype: Nx1 array """ - assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape self._arguments_update(inv_link_f, y) return self._first_derivative_code['f'](**self._arguments) @@ -204,28 +208,28 @@ class Symbolic(Likelihood): distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) """ - assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape self._arguments_update(inv_link_f, y) return self._second_derivative_code['f'](**self._arguments) def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None): - assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape self._arguments_update(inv_link_f, y) return self._third_derivative_code['f'](**self._arguments) raise NotImplementedError def dlogpdf_link_dtheta(self, inv_link_f, y, Y_metadata=None): - assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape self._arguments_update(inv_link_f, y) return np.hstack([self._first_derivative_code[theta.name](**self._arguments) for theta in self._sp_theta]).sum(0) - + def dlogpdf_dlink_dtheta(self, inv_link_f, y, Y_metadata=None): - assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape self._arguments_update(inv_link_f, y) return np.hstack([self._second_derivative_code[theta.name](**self._arguments) for theta in self._sp_theta]) def d2logpdf_dlink2_dtheta(self, inv_link_f, y, Y_metadata=None): - assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape self._arguments_update(inv_link_f, y) return np.hstack([self._third_derivative_code[theta.name](**self._arguments) for theta in self._sp_theta]) From f5b8989ef54ebe48383c02154264e7fb61b2a692 Mon Sep 17 00:00:00 2001 From: Neil Lawrence Date: Tue, 1 Apr 2014 07:03:01 +0100 Subject: [PATCH 44/51] Added negative binomial likelihood based on symbolic. --- GPy/kern/_src/symbolic.py | 2 +- GPy/likelihoods/__init__.py | 1 + GPy/likelihoods/link_functions.py | 1 + GPy/likelihoods/negative_binomial.py | 46 +++ GPy/likelihoods/symbolic.py | 445 ++++++++++++++------------- GPy/util/datasets.py | 2 +- GPy/util/symbolic.py | 40 ++- 7 files changed, 318 insertions(+), 219 deletions(-) create mode 100644 GPy/likelihoods/negative_binomial.py diff --git a/GPy/kern/_src/symbolic.py b/GPy/kern/_src/symbolic.py index 2d4cbc59..4f373fae 100644 --- a/GPy/kern/_src/symbolic.py +++ b/GPy/kern/_src/symbolic.py @@ -13,7 +13,7 @@ from ...core.parameterization.transformations import Logexp class Symbolic(Kern): """ - A kernel object, where all the hard work in done by sympy. + A kernel object, where all the hard work is done by sympy. :param k: the covariance function :type k: a positive definite sympy function of x_0, z_0, x_1, z_1, x_2, z_2... diff --git a/GPy/likelihoods/__init__.py b/GPy/likelihoods/__init__.py index 87229081..cfdfaf72 100644 --- a/GPy/likelihoods/__init__.py +++ b/GPy/likelihoods/__init__.py @@ -7,3 +7,4 @@ from student_t import StudentT from likelihood import Likelihood from mixed_noise import MixedNoise from symbolic import Symbolic +from negative_binomial import Negative_binomial diff --git a/GPy/likelihoods/link_functions.py b/GPy/likelihoods/link_functions.py index 942fe2f4..86384155 100644 --- a/GPy/likelihoods/link_functions.py +++ b/GPy/likelihoods/link_functions.py @@ -71,6 +71,7 @@ class Probit(GPTransformation): g(f) = \\Phi^{-1} (mu) + """ def transf(self,f): return std_norm_cdf(f) diff --git a/GPy/likelihoods/negative_binomial.py b/GPy/likelihoods/negative_binomial.py new file mode 100644 index 00000000..5bc5b727 --- /dev/null +++ b/GPy/likelihoods/negative_binomial.py @@ -0,0 +1,46 @@ +# Copyright (c) 2014 The GPy authors (see AUTHORS.txt) +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +try: + import sympy as sym + sympy_available=True + from sympy.utilities.lambdify import lambdify + from GPy.util.symbolic import gammaln, ln_cum_gaussian, cum_gaussian +except ImportError: + sympy_available=False + +import numpy as np +from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf +import link_functions +from symbolic import Symbolic +from scipy import stats + +if sympy_available: + class Negative_binomial(Symbolic): + """ + Negative binomial + + .. math:: + p(y_{i}|\pi(f_{i})) = \left(\frac{r}{r+f_i}\right)^r \frac{\Gamma(r+y_i)}{y!\Gamma(r)}\left(\frac{f_i}{r+f_i}\right)^{y_i} + + .. Note:: + Y takes non zero integer values.. + link function should have a positive domain, e.g. log (default). + + .. See also:: + symbolic.py, for the parent class + """ + def __init__(self, gp_link=None): + if gp_link is None: + gp_link = link_functions.Log() + + dispersion = sym.Symbol('dispersion', positive=True, real=True) + y = sym.Symbol('y', nonnegative=True, integer=True) + f = sym.Symbol('f', positive=True, real=True) + log_pdf=dispersion*sym.log(dispersion) - (dispersion+y)*sym.log(dispersion+f) + gammaln(y+dispersion) - gammaln(y+1) - gammaln(dispersion) + y*sym.log(f) + super(Negative_binomial, self).__init__(log_pdf=log_pdf, gp_link=gp_link, name='Negative_binomial') + + # TODO: Check this. + self.log_concave = False + diff --git a/GPy/likelihoods/symbolic.py b/GPy/likelihoods/symbolic.py index aecfbb0a..ddc430dc 100644 --- a/GPy/likelihoods/symbolic.py +++ b/GPy/likelihoods/symbolic.py @@ -1,245 +1,260 @@ # Copyright (c) 2014 GPy Authors # Licensed under the BSD 3-clause license (see LICENSE.txt) +try: + import sympy as sym + sympy_available=True + from sympy.utilities.lambdify import lambdify +except ImportError: + sympy_available=False + import numpy as np -import sympy as sym -from sympy.utilities.lambdify import lambdify import link_functions from scipy import stats, integrate from scipy.special import gammaln, gamma, erf, polygamma +from GPy.util.functions import cum_gaussian, ln_cum_gaussian from likelihood import Likelihood from ..core.parameterization import Param -from ..core.parameterization.transformations import Logexp -func_modules = ['numpy', {'gamma':gamma, 'gammaln':gammaln, 'erf':erf,'polygamma':polygamma}] +func_modules = ['numpy', {'gamma':gamma, 'gammaln':gammaln, 'erf':erf,'polygamma':polygamma, 'cum_gaussian':cum_gaussian, 'ln_cum_gaussian':ln_cum_gaussian}] -class Symbolic(Likelihood): - """ - Symbolic likelihood. - - Likelihood where the form of the likelihood is provided by a sympy expression. - - """ - def __init__(self, likelihood=None, log_likelihood=None, cdf=None, logZ=None, gp_link=None, name='symbolic', log_concave=False, param=None): - if gp_link is None: - gp_link = link_functions.Identity() - - if likelihood is None and log_likelihood is None and cdf is None: - raise ValueError, "You must provide an argument for the likelihood or the log likelihood." - - super(Symbolic, self).__init__(gp_link, name=name) - - if likelihood is None and log_likelihood: - self._sp_likelihood = sym.exp(log_likelihood).simplify() - self._sp_log_likelihood = log_likelihood - - if log_likelihood is None and likelihood: - self._sp_likelihood = likelihood - self._sp_log_likelihood = sym.log(likelihood).simplify() - - # TODO: build likelihood and log likelihood from CDF or - # compute CDF given likelihood/log-likelihood. Also check log - # likelihood, likelihood and CDF are consistent. - - # pull the variable names out of the symbolic likelihood - sp_vars = [e for e in self._sp_likelihood.atoms() if e.is_Symbol] - self._sp_f = [e for e in sp_vars if e.name=='f'] - if not self._sp_f: - raise ValueError('No variable f in likelihood or log likelihood.') - self._sp_y = [e for e in sp_vars if e.name=='y'] - if not self._sp_f: - raise ValueError('No variable y in likelihood or log likelihood.') - self._sp_theta = sorted([e for e in sp_vars if not (e.name=='f' or e.name=='y')],key=lambda e:e.name, reverse=True) - - # These are all the arguments need to compute likelihoods. - self.arg_list = self._sp_y + self._sp_f + self._sp_theta - - # these are arguments for computing derivatives. - derivative_arguments = self._sp_f + self._sp_theta - - # Do symbolic work to compute derivatives. - self._log_likelihood_derivatives = {theta.name : sym.diff(self._sp_log_likelihood,theta).simplify() for theta in derivative_arguments} - self._log_likelihood_second_derivatives = {theta.name : sym.diff(self._log_likelihood_derivatives['f'],theta).simplify() for theta in derivative_arguments} - self._log_likelihood_third_derivatives = {theta.name : sym.diff(self._log_likelihood_second_derivatives['f'],theta).simplify() for theta in derivative_arguments} - - # Add parameters to the model. - for theta in self._sp_theta: - val = 1.0 - # TODO: need to decide how to handle user passing values for the se parameter vectors. - if param is not None: - if param.has_key(theta): - val = param[theta] - setattr(self, theta.name, Param(theta.name, val, None)) - self.add_parameters(getattr(self, theta.name)) - - - # Is there some way to check whether the likelihood is log - # concave? For the moment, need user to specify. - self.log_concave = log_concave - - # initialise code arguments - self._arguments = {} - - # generate the code for the likelihood and derivatives - self._gen_code() - - def _gen_code(self): - """Generate the code from the symbolic parts that will be used for likleihod computation.""" - # TODO: Check here whether theano is available and set up - # functions accordingly. - self._likelihood_function = lambdify(self.arg_list, self._sp_likelihood, func_modules) - self._log_likelihood_function = lambdify(self.arg_list, self._sp_log_likelihood, func_modules) - - # compute code for derivatives (for implicit likelihood terms - # we need up to 3rd derivatives) - setattr(self, '_first_derivative_code', {key: lambdify(self.arg_list, self._log_likelihood_derivatives[key], func_modules) for key in self._log_likelihood_derivatives.keys()}) - setattr(self, '_second_derivative_code', {key: lambdify(self.arg_list, self._log_likelihood_second_derivatives[key], func_modules) for key in self._log_likelihood_second_derivatives.keys()}) - setattr(self, '_third_derivative_code', {key: lambdify(self.arg_list, self._log_likelihood_third_derivatives[key], func_modules) for key in self._log_likelihood_third_derivatives.keys()}) - - # TODO: compute EP code parts based on logZ. We need dlogZ/dmu, d2logZ/dmu2 and dlogZ/dtheta - - def parameters_changed(self): - pass - - def update_gradients(self, grads): +if sympy_available: + class Symbolic(Likelihood): """ - Pull out the gradients, be careful as the order must match the order - in which the parameters are added - """ - # The way the Laplace approximation is run requires the - # covariance function to compute the true gradient (because it - # is dependent on the mode). This means we actually compute - # the gradient outside this object. This function would - # normally ask the object to update its gradients internally, - # but here it provides them externally, because they are - # computed in the inference code. TODO: Thought: How does this - # effect EP? Shouldn't this be done by a separate - # Laplace-approximation specific call? - for grad, theta in zip(grads, self._sp_theta): - parameter = getattr(self, theta.name) - setattr(parameter, 'gradient', grad) + Symbolic likelihood. - def _arguments_update(self, f, y): - """Set up argument lists for the derivatives.""" - # If we do make use of Theano, then at this point we would - # need to do a lot of precomputation to ensure that the - # likelihoods and gradients are computed together, then check - # for parameter changes before updating. - for i, fvar in enumerate(self._sp_f): - self._arguments[fvar.name] = f - for i, yvar in enumerate(self._sp_y): - self._arguments[yvar.name] = y - for theta in self._sp_theta: - self._arguments[theta.name] = np.asarray(getattr(self, theta.name)) - - def pdf_link(self, inv_link_f, y, Y_metadata=None): - """ - Likelihood function given inverse link of f. - - :param inv_link_f: inverse link of latent variables. - :type inv_link_f: Nx1 array - :param y: data - :type y: Nx1 array - :param Y_metadata: Y_metadata which is not used in student t distribution - :returns: likelihood evaluated for this point - :rtype: float - """ - assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape - self._arguments_update(inv_link_f, y) - l = self._likelihood_function(**self._arguments) - return np.prod(l) - - def logpdf_link(self, inv_link_f, y, Y_metadata=None): - """ - Log Likelihood Function given inverse link of latent variables. - - :param inv_inv_link_f: latent variables (inverse link of f) - :type inv_inv_link_f: Nx1 array - :param y: data - :type y: Nx1 array - :param Y_metadata: Y_metadata - :returns: likelihood evaluated for this point - :rtype: float + Likelihood where the form of the likelihood is provided by a sympy expression. """ - assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape - self._arguments_update(inv_link_f, y) - ll = self._log_likelihood_function(**self._arguments) - return np.sum(ll) + def __init__(self, pdf=None, log_pdf=None, cdf=None, logZ=None, gp_link=None, name='symbolic', log_concave=False, param=None): + if gp_link is None: + gp_link = link_functions.Identity() - def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None): - """ - Gradient of log likelihood with respect to the inverse link function. + if pdf is None and log_pdf is None and cdf is None: + raise ValueError, "You must provide an argument for the pdf or the log pdf." - :param inv_inv_link_f: latent variables (inverse link of f) - :type inv_inv_link_f: Nx1 array - :param y: data - :type y: Nx1 array - :param Y_metadata: Y_metadata - :returns: gradient of likelihood with respect to each point. - :rtype: Nx1 array + super(Symbolic, self).__init__(gp_link, name=name) - """ - assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape - self._arguments_update(inv_link_f, y) - return self._first_derivative_code['f'](**self._arguments) + if pdf is None and log_pdf: + self._sp_pdf = sym.exp(log_pdf).simplify() + self._sp_log_pdf = log_pdf - def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None): - """ - Hessian of log likelihood given inverse link of latent variables with respect to that inverse link. - i.e. second derivative logpdf at y given inv_link(f_i) and inv_link(f_j) w.r.t inv_link(f_i) and inv_link(f_j). + if log_pdf is None and pdf: + self._sp_pdf = pdf + self._sp_log_pdf = sym.log(pdf).simplify() + + # TODO: build pdf and log pdf from CDF or + # compute CDF given pdf/log-pdf. Also check log + # pdf, pdf and CDF are consistent. + + # pull the variable names out of the symbolic pdf + sp_vars = [e for e in self._sp_pdf.atoms() if e.is_Symbol] + self._sp_f = [e for e in sp_vars if e.name=='f'] + if not self._sp_f: + raise ValueError('No variable f in pdf or log pdf.') + self._sp_y = [e for e in sp_vars if e.name=='y'] + if not self._sp_f: + raise ValueError('No variable y in pdf or log pdf.') + self._sp_theta = sorted([e for e in sp_vars if not (e.name=='f' or e.name=='y')],key=lambda e:e.name) + + # These are all the arguments need to compute likelihoods. + self.arg_list = self._sp_y + self._sp_f + self._sp_theta + + # these are arguments for computing derivatives. + derivative_arguments = self._sp_f + self._sp_theta + + # Do symbolic work to compute derivatives. + self._log_pdf_derivatives = {theta.name : sym.diff(self._sp_log_pdf,theta).simplify() for theta in derivative_arguments} + self._log_pdf_second_derivatives = {theta.name : sym.diff(self._log_pdf_derivatives['f'],theta).simplify() for theta in derivative_arguments} + self._log_pdf_third_derivatives = {theta.name : sym.diff(self._log_pdf_second_derivatives['f'],theta).simplify() for theta in derivative_arguments} + + # Add parameters to the model. + for theta in self._sp_theta: + val = 1.0 + # TODO: need to decide how to handle user passing values for the se parameter vectors. + if param is not None: + if param.has_key(theta): + val = param[theta] + setattr(self, theta.name, Param(theta.name, val, None)) + self.add_parameters(getattr(self, theta.name)) - :param inv_link_f: inverse link of the latent variables. - :type inv_link_f: Nx1 array - :param y: data - :type y: Nx1 array - :param Y_metadata: Y_metadata which is not used in student t distribution - :returns: Diagonal of Hessian matrix (second derivative of likelihood evaluated at points f) - :rtype: Nx1 array + # Is there some way to check whether the pdf is log + # concave? For the moment, need user to specify. + self.log_concave = log_concave - .. Note:: - Returns diagonal of Hessian, since every where else it is - 0, as the likelihood factorizes over cases (the - distribution for y_i depends only on link(f_i) not on - link(f_(j!=i)) - """ - assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape - self._arguments_update(inv_link_f, y) - return self._second_derivative_code['f'](**self._arguments) + # initialise code arguments + self._arguments = {} - def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None): - assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape - self._arguments_update(inv_link_f, y) - return self._third_derivative_code['f'](**self._arguments) - raise NotImplementedError + # generate the code for the pdf and derivatives + self._gen_code() - def dlogpdf_link_dtheta(self, inv_link_f, y, Y_metadata=None): - assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape - self._arguments_update(inv_link_f, y) - return np.hstack([self._first_derivative_code[theta.name](**self._arguments) for theta in self._sp_theta]).sum(0) - - def dlogpdf_dlink_dtheta(self, inv_link_f, y, Y_metadata=None): - assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape - self._arguments_update(inv_link_f, y) - return np.hstack([self._second_derivative_code[theta.name](**self._arguments) for theta in self._sp_theta]) + def _gen_code(self): + """Generate the code from the symbolic parts that will be used for likleihod computation.""" + # TODO: Check here whether theano is available and set up + # functions accordingly. + self._pdf_function = lambdify(self.arg_list, self._sp_pdf, func_modules) + self._log_pdf_function = lambdify(self.arg_list, self._sp_log_pdf, func_modules) - def d2logpdf_dlink2_dtheta(self, inv_link_f, y, Y_metadata=None): - assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape - self._arguments_update(inv_link_f, y) - return np.hstack([self._third_derivative_code[theta.name](**self._arguments) for theta in self._sp_theta]) + # compute code for derivatives (for implicit likelihood terms + # we need up to 3rd derivatives) + setattr(self, '_first_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_derivatives[key], func_modules) for key in self._log_pdf_derivatives.keys()}) + setattr(self, '_second_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_second_derivatives[key], func_modules) for key in self._log_pdf_second_derivatives.keys()}) + setattr(self, '_third_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_third_derivatives[key], func_modules) for key in self._log_pdf_third_derivatives.keys()}) - def predictive_mean(self, mu, sigma, Y_metadata=None): - raise NotImplementedError + # TODO: compute EP code parts based on logZ. We need dlogZ/dmu, d2logZ/dmu2 and dlogZ/dtheta - def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None): - raise NotImplementedError + def parameters_changed(self): + pass - def conditional_mean(self, gp): - raise NotImplementedError + def update_gradients(self, grads): + """ + Pull out the gradients, be careful as the order must match the order + in which the parameters are added + """ + # The way the Laplace approximation is run requires the + # covariance function to compute the true gradient (because it + # is dependent on the mode). This means we actually compute + # the gradient outside this object. This function would + # normally ask the object to update its gradients internally, + # but here it provides them externally, because they are + # computed in the inference code. TODO: Thought: How does this + # effect EP? Shouldn't this be done by a separate + # Laplace-approximation specific call? + for grad, theta in zip(grads, self._sp_theta): + parameter = getattr(self, theta.name) + setattr(parameter, 'gradient', grad) - def conditional_variance(self, gp): - raise NotImplementedError + def _arguments_update(self, f, y): + """Set up argument lists for the derivatives.""" + # If we do make use of Theano, then at this point we would + # need to do a lot of precomputation to ensure that the + # likelihoods and gradients are computed together, then check + # for parameter changes before updating. + for i, fvar in enumerate(self._sp_f): + self._arguments[fvar.name] = f + for i, yvar in enumerate(self._sp_y): + self._arguments[yvar.name] = y + for theta in self._sp_theta: + self._arguments[theta.name] = np.asarray(getattr(self, theta.name)) - def samples(self, gp, Y_metadata=None): - raise NotImplementedError + def pdf_link(self, inv_link_f, y, Y_metadata=None): + """ + Likelihood function given inverse link of f. + + :param inv_link_f: inverse link of latent variables. + :type inv_link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param Y_metadata: Y_metadata which is not used in student t distribution + :returns: likelihood evaluated for this point + :rtype: float + """ + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + self._arguments_update(inv_link_f, y) + l = self._pdf_function(**self._arguments) + return np.prod(l) + + def logpdf_link(self, inv_link_f, y, Y_metadata=None): + """ + Log Likelihood Function given inverse link of latent variables. + + :param inv_inv_link_f: latent variables (inverse link of f) + :type inv_inv_link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param Y_metadata: Y_metadata + :returns: likelihood evaluated for this point + :rtype: float + + """ + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + self._arguments_update(inv_link_f, y) + ll = self._log_pdf_function(**self._arguments) + return np.sum(ll) + + def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None): + """ + Gradient of log likelihood with respect to the inverse link function. + + :param inv_inv_link_f: latent variables (inverse link of f) + :type inv_inv_link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param Y_metadata: Y_metadata + :returns: gradient of likelihood with respect to each point. + :rtype: Nx1 array + + """ + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + self._arguments_update(inv_link_f, y) + return self._first_derivative_code['f'](**self._arguments) + + def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None): + """ + Hessian of log likelihood given inverse link of latent variables with respect to that inverse link. + i.e. second derivative logpdf at y given inv_link(f_i) and inv_link(f_j) w.r.t inv_link(f_i) and inv_link(f_j). + + + :param inv_link_f: inverse link of the latent variables. + :type inv_link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param Y_metadata: Y_metadata which is not used in student t distribution + :returns: Diagonal of Hessian matrix (second derivative of likelihood evaluated at points f) + :rtype: Nx1 array + + .. Note:: + Returns diagonal of Hessian, since every where else it is + 0, as the likelihood factorizes over cases (the + distribution for y_i depends only on link(f_i) not on + link(f_(j!=i)) + """ + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + self._arguments_update(inv_link_f, y) + return self._second_derivative_code['f'](**self._arguments) + + def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None): + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + self._arguments_update(inv_link_f, y) + return self._third_derivative_code['f'](**self._arguments) + raise NotImplementedError + + def dlogpdf_link_dtheta(self, inv_link_f, y, Y_metadata=None): + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + self._arguments_update(inv_link_f, y) + g = np.zeros((y.shape[0], len(self._sp_theta))) + for i, theta in enumerate(self._sp_theta): + g[:, i:i+1] = self._first_derivative_code[theta.name](**self._arguments) + return g.sum(0) + + def dlogpdf_dlink_dtheta(self, inv_link_f, y, Y_metadata=None): + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + self._arguments_update(inv_link_f, y) + g = np.zeros((y.shape[0], len(self._sp_theta))) + for i, theta in enumerate(self._sp_theta): + g[:, i:i+1] = self._second_derivative_code[theta.name](**self._arguments) + return g + + def d2logpdf_dlink2_dtheta(self, inv_link_f, y, Y_metadata=None): + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + self._arguments_update(inv_link_f, y) + g = np.zeros((y.shape[0], len(self._sp_theta))) + for i, theta in enumerate(self._sp_theta): + g[:, i:i+1] = self._third_derivative_code[theta.name](**self._arguments) + return g + + def predictive_mean(self, mu, sigma, Y_metadata=None): + raise NotImplementedError + + def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None): + raise NotImplementedError + + def conditional_mean(self, gp): + raise NotImplementedError + + def conditional_variance(self, gp): + raise NotImplementedError + + def samples(self, gp, Y_metadata=None): + raise NotImplementedError diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py index 54e42733..f53163f4 100644 --- a/GPy/util/datasets.py +++ b/GPy/util/datasets.py @@ -331,7 +331,7 @@ def football_data(season='1314', data_set='football_data'): # This will be for downloading google trends data. def google_trends(query_terms=['big data', 'machine learning', 'data science'], data_set='google_trends'): - """Data downloaded from Google trends for given query terms.""" + """Data downloaded from Google trends for given query terms. Warning, if you use this function multiple times in a row you get blocked due to terms of service violations.""" # Inspired by this notebook: # http://nbviewer.ipython.org/github/sahuguet/notebooks/blob/master/GoogleTrends%20meet%20Notebook.ipynb diff --git a/GPy/util/symbolic.py b/GPy/util/symbolic.py index d3988ccc..5074a42c 100644 --- a/GPy/util/symbolic.py +++ b/GPy/util/symbolic.py @@ -1,12 +1,48 @@ -from sympy import Function, S, oo, I, cos, sin, asin, log, erf, pi, exp, sqrt, sign, gamma +from sympy import Function, S, oo, I, cos, sin, asin, log, erf, pi, exp, sqrt, sign, gamma,polygamma class gammaln(Function): nargs = 1 + + def fdiff(self, argindex=1): + x=self.args[0] + return polygamma(0, x) + @classmethod def eval(cls, x): - return log(gamma(x)) + if x.is_Number: + return log(gamma(x)) +class ln_cum_gaussian(Function): + nargs = 1 + + def fdiff(self, argindex=1): + x = self.args[0] + return 1/cum_gaussian(x)*gaussian(x) + + @classmethod + def eval(cls, x): + if x.is_Number: + return log(cum_gaussian(x)) + +class cum_gaussian(Function): + nargs = 1 + def fdiff(self, argindex=1): + x = self.args[0] + return gaussian(x) + + @classmethod + def eval(cls, x): + if x.is_Number: + return 0.5*(1+erf(sqrt(2)/2*x)) + +class gaussian(Function): + nargs = 1 + @classmethod + def eval(cls, x): + return 1/sqrt(2*pi)*exp(-0.5*x*x) + + class ln_diff_erf(Function): nargs = 2 From c71ee8949c2d1278760ff53629223ca37f6faae9 Mon Sep 17 00:00:00 2001 From: mzwiessele Date: Tue, 1 Apr 2014 10:20:46 +0100 Subject: [PATCH 45/51] renamed array_core to observable_array --- GPy/core/parameterization/array_core.py | 137 ------------------------ 1 file changed, 137 deletions(-) delete mode 100644 GPy/core/parameterization/array_core.py diff --git a/GPy/core/parameterization/array_core.py b/GPy/core/parameterization/array_core.py deleted file mode 100644 index fc9d6cf2..00000000 --- a/GPy/core/parameterization/array_core.py +++ /dev/null @@ -1,137 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - -__updated__ = '2014-03-31' - -import numpy as np -from parameter_core import Observable, Pickleable - -class ObsAr(np.ndarray, Pickleable, Observable): - """ - An ndarray which reports changes to its observers. - The observers can add themselves with a callable, which - will be called every time this array changes. The callable - takes exactly one argument, which is this array itself. - """ - __array_priority__ = -1 # Never give back ObsAr - def __new__(cls, input_array, *a, **kw): - if not isinstance(input_array, ObsAr): - obj = np.atleast_1d(np.require(input_array, dtype=np.float64, requirements=['W', 'C'])).view(cls) - else: obj = input_array - #cls.__name__ = "ObsAr" # because of fixed printing of `array` in np printing - super(ObsAr, obj).__init__(*a, **kw) - return obj - - def __array_finalize__(self, obj): - # see InfoArray.__array_finalize__ for comments - if obj is None: return - self._observer_callables_ = getattr(obj, '_observer_callables_', None) - - def __array_wrap__(self, out_arr, context=None): - return out_arr.view(np.ndarray) - - def copy(self): - memo = {} - memo[id(self)] = self - return self.__deepcopy__(memo) - - def __deepcopy__(self, memo): - s = self.__new__(self.__class__, input_array=self.view(np.ndarray).copy()) - memo[id(self)] = s - import copy - s.__dict__.update(copy.deepcopy(self.__dict__, memo)) - return s - - def __reduce__(self): - func, args, state = super(ObsAr, self).__reduce__() - return func, args, (state, Pickleable.__getstate__(self)) - - def __setstate__(self, state): - np.ndarray.__setstate__(self, state[0]) - Pickleable.__setstate__(self, state[1]) - - def __setitem__(self, s, val): - super(ObsAr, self).__setitem__(s, val) - self.notify_observers() - - def __getslice__(self, start, stop): - return self.__getitem__(slice(start, stop)) - - def __setslice__(self, start, stop, val): - return self.__setitem__(slice(start, stop), val) - - def __ilshift__(self, *args, **kwargs): - r = np.ndarray.__ilshift__(self, *args, **kwargs) - self.notify_observers() - return r - - def __irshift__(self, *args, **kwargs): - r = np.ndarray.__irshift__(self, *args, **kwargs) - self.notify_observers() - return r - - - def __ixor__(self, *args, **kwargs): - r = np.ndarray.__ixor__(self, *args, **kwargs) - self.notify_observers() - return r - - - def __ipow__(self, *args, **kwargs): - r = np.ndarray.__ipow__(self, *args, **kwargs) - self.notify_observers() - return r - - - def __ifloordiv__(self, *args, **kwargs): - r = np.ndarray.__ifloordiv__(self, *args, **kwargs) - self.notify_observers() - return r - - - def __isub__(self, *args, **kwargs): - r = np.ndarray.__isub__(self, *args, **kwargs) - self.notify_observers() - return r - - - def __ior__(self, *args, **kwargs): - r = np.ndarray.__ior__(self, *args, **kwargs) - self.notify_observers() - return r - - - def __itruediv__(self, *args, **kwargs): - r = np.ndarray.__itruediv__(self, *args, **kwargs) - self.notify_observers() - return r - - - def __idiv__(self, *args, **kwargs): - r = np.ndarray.__idiv__(self, *args, **kwargs) - self.notify_observers() - return r - - - def __iand__(self, *args, **kwargs): - r = np.ndarray.__iand__(self, *args, **kwargs) - self.notify_observers() - return r - - - def __imod__(self, *args, **kwargs): - r = np.ndarray.__imod__(self, *args, **kwargs) - self.notify_observers() - return r - - - def __iadd__(self, *args, **kwargs): - r = np.ndarray.__iadd__(self, *args, **kwargs) - self.notify_observers() - return r - - - def __imul__(self, *args, **kwargs): - r = np.ndarray.__imul__(self, *args, **kwargs) - self.notify_observers() - return r \ No newline at end of file From ef501e452950e8d9af1cc4e9afe648e91b097288 Mon Sep 17 00:00:00 2001 From: mzwiessele Date: Tue, 1 Apr 2014 10:22:20 +0100 Subject: [PATCH 46/51] renamed array_core to observable array --- GPy/core/parameterization/observable_array.py | 137 ++++++++++++++++++ GPy/core/parameterization/param.py | 2 +- GPy/likelihoods/__init__.py | 4 +- GPy/testing/parameterized_tests.py | 2 +- GPy/testing/pickle_tests.py | 2 +- 5 files changed, 142 insertions(+), 5 deletions(-) create mode 100644 GPy/core/parameterization/observable_array.py diff --git a/GPy/core/parameterization/observable_array.py b/GPy/core/parameterization/observable_array.py new file mode 100644 index 00000000..fc9d6cf2 --- /dev/null +++ b/GPy/core/parameterization/observable_array.py @@ -0,0 +1,137 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +__updated__ = '2014-03-31' + +import numpy as np +from parameter_core import Observable, Pickleable + +class ObsAr(np.ndarray, Pickleable, Observable): + """ + An ndarray which reports changes to its observers. + The observers can add themselves with a callable, which + will be called every time this array changes. The callable + takes exactly one argument, which is this array itself. + """ + __array_priority__ = -1 # Never give back ObsAr + def __new__(cls, input_array, *a, **kw): + if not isinstance(input_array, ObsAr): + obj = np.atleast_1d(np.require(input_array, dtype=np.float64, requirements=['W', 'C'])).view(cls) + else: obj = input_array + #cls.__name__ = "ObsAr" # because of fixed printing of `array` in np printing + super(ObsAr, obj).__init__(*a, **kw) + return obj + + def __array_finalize__(self, obj): + # see InfoArray.__array_finalize__ for comments + if obj is None: return + self._observer_callables_ = getattr(obj, '_observer_callables_', None) + + def __array_wrap__(self, out_arr, context=None): + return out_arr.view(np.ndarray) + + def copy(self): + memo = {} + memo[id(self)] = self + return self.__deepcopy__(memo) + + def __deepcopy__(self, memo): + s = self.__new__(self.__class__, input_array=self.view(np.ndarray).copy()) + memo[id(self)] = s + import copy + s.__dict__.update(copy.deepcopy(self.__dict__, memo)) + return s + + def __reduce__(self): + func, args, state = super(ObsAr, self).__reduce__() + return func, args, (state, Pickleable.__getstate__(self)) + + def __setstate__(self, state): + np.ndarray.__setstate__(self, state[0]) + Pickleable.__setstate__(self, state[1]) + + def __setitem__(self, s, val): + super(ObsAr, self).__setitem__(s, val) + self.notify_observers() + + def __getslice__(self, start, stop): + return self.__getitem__(slice(start, stop)) + + def __setslice__(self, start, stop, val): + return self.__setitem__(slice(start, stop), val) + + def __ilshift__(self, *args, **kwargs): + r = np.ndarray.__ilshift__(self, *args, **kwargs) + self.notify_observers() + return r + + def __irshift__(self, *args, **kwargs): + r = np.ndarray.__irshift__(self, *args, **kwargs) + self.notify_observers() + return r + + + def __ixor__(self, *args, **kwargs): + r = np.ndarray.__ixor__(self, *args, **kwargs) + self.notify_observers() + return r + + + def __ipow__(self, *args, **kwargs): + r = np.ndarray.__ipow__(self, *args, **kwargs) + self.notify_observers() + return r + + + def __ifloordiv__(self, *args, **kwargs): + r = np.ndarray.__ifloordiv__(self, *args, **kwargs) + self.notify_observers() + return r + + + def __isub__(self, *args, **kwargs): + r = np.ndarray.__isub__(self, *args, **kwargs) + self.notify_observers() + return r + + + def __ior__(self, *args, **kwargs): + r = np.ndarray.__ior__(self, *args, **kwargs) + self.notify_observers() + return r + + + def __itruediv__(self, *args, **kwargs): + r = np.ndarray.__itruediv__(self, *args, **kwargs) + self.notify_observers() + return r + + + def __idiv__(self, *args, **kwargs): + r = np.ndarray.__idiv__(self, *args, **kwargs) + self.notify_observers() + return r + + + def __iand__(self, *args, **kwargs): + r = np.ndarray.__iand__(self, *args, **kwargs) + self.notify_observers() + return r + + + def __imod__(self, *args, **kwargs): + r = np.ndarray.__imod__(self, *args, **kwargs) + self.notify_observers() + return r + + + def __iadd__(self, *args, **kwargs): + r = np.ndarray.__iadd__(self, *args, **kwargs) + self.notify_observers() + return r + + + def __imul__(self, *args, **kwargs): + r = np.ndarray.__imul__(self, *args, **kwargs) + self.notify_observers() + return r \ No newline at end of file diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index 182af902..60bdfe9d 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -4,7 +4,7 @@ import itertools import numpy from parameter_core import OptimizationHandlable, adjust_name_for_printing -from array_core import ObsAr +from observable_array import ObsAr ###### printing __constraints_name__ = "Constraint" diff --git a/GPy/likelihoods/__init__.py b/GPy/likelihoods/__init__.py index cfdfaf72..1cd591a8 100644 --- a/GPy/likelihoods/__init__.py +++ b/GPy/likelihoods/__init__.py @@ -6,5 +6,5 @@ from poisson import Poisson from student_t import StudentT from likelihood import Likelihood from mixed_noise import MixedNoise -from symbolic import Symbolic -from negative_binomial import Negative_binomial +#from symbolic import Symbolic +#from negative_binomial import Negative_binomial diff --git a/GPy/testing/parameterized_tests.py b/GPy/testing/parameterized_tests.py index 7b918ff9..8bfaab4e 100644 --- a/GPy/testing/parameterized_tests.py +++ b/GPy/testing/parameterized_tests.py @@ -7,7 +7,7 @@ import unittest import GPy import numpy as np from GPy.core.parameterization.parameter_core import HierarchyError -from GPy.core.parameterization.array_core import ObsAr +from GPy.core.parameterization.observable_array import ObsAr class ArrayCoreTest(unittest.TestCase): def setUp(self): diff --git a/GPy/testing/pickle_tests.py b/GPy/testing/pickle_tests.py index 1837f07d..fc52581a 100644 --- a/GPy/testing/pickle_tests.py +++ b/GPy/testing/pickle_tests.py @@ -10,7 +10,7 @@ from GPy.core.parameterization.index_operations import ParameterIndexOperations, ParameterIndexOperationsView import tempfile from GPy.core.parameterization.param import Param -from GPy.core.parameterization.array_core import ObsAr +from GPy.core.parameterization.observable_array import ObsAr from GPy.core.parameterization.priors import Gaussian from GPy.kern._src.rbf import RBF from GPy.kern._src.linear import Linear From 6551c343c906f2b7ef5162e51d2c63e49ce043a5 Mon Sep 17 00:00:00 2001 From: Neil Lawrence Date: Thu, 3 Apr 2014 09:10:50 +0200 Subject: [PATCH 47/51] Adding missing functions file. --- GPy/core/mapping.py | 4 +- .../latent_function_inference/laplace.py | 26 ++- GPy/kern/_src/symbolic.py | 4 +- GPy/likelihoods/__init__.py | 4 + GPy/likelihoods/symbolic.py | 152 +++++++++++------- GPy/util/functions.py | 18 +++ GPy/util/symbolic.py | 10 +- 7 files changed, 148 insertions(+), 70 deletions(-) create mode 100644 GPy/util/functions.py diff --git a/GPy/core/mapping.py b/GPy/core/mapping.py index efd9476f..6eaaf96c 100644 --- a/GPy/core/mapping.py +++ b/GPy/core/mapping.py @@ -34,7 +34,7 @@ class Mapping(Parameterized): raise NotImplementedError def df_dtheta(self, dL_df, X): - """The gradient of the outputs of the multi-layer perceptron with respect to each of the parameters. + """The gradient of the outputs of the mapping with respect to each of the parameters. :param dL_df: gradient of the objective with respect to the function. :type dL_df: ndarray (num_data x output_dim) @@ -50,7 +50,7 @@ class Mapping(Parameterized): """ Plots the mapping associated with the model. - In one dimension, the function is plotted. - - In two dimsensions, a contour-plot shows the function + - In two dimensions, a contour-plot shows the function - In higher dimensions, we've not implemented this yet !TODO! Can plot only part of the data and part of the posterior functions diff --git a/GPy/inference/latent_function_inference/laplace.py b/GPy/inference/latent_function_inference/laplace.py index 12315a29..9ba3f83f 100644 --- a/GPy/inference/latent_function_inference/laplace.py +++ b/GPy/inference/latent_function_inference/laplace.py @@ -1,4 +1,4 @@ -# Copyright (c) 2013, GPy authors (see AUTHORS.txt). +# Copyright (c) 2013, 2014 GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) # #Parts of this file were influenced by the Matlab GPML framework written by @@ -91,7 +91,11 @@ class Laplace(object): iteration = 0 while difference > self._mode_finding_tolerance and iteration < self._mode_finding_max_iter: W = -likelihood.d2logpdf_df2(f, Y, Y_metadata=Y_metadata) + if np.any(np.isnan(W)): + raise ValueError('One or more element(s) of W is NaN') grad = likelihood.dlogpdf_df(f, Y, Y_metadata=Y_metadata) + if np.any(np.isnan(grad)): + raise ValueError('One or more element(s) of grad is NaN') W_f = W*f @@ -141,25 +145,30 @@ class Laplace(object): """ #At this point get the hessian matrix (or vector as W is diagonal) W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata) + if np.any(np.isnan(W)): + raise ValueError('One or more element(s) of W is NaN') K_Wi_i, L, LiW12 = self._compute_B_statistics(K, W, likelihood.log_concave) #compute vital matrices C = np.dot(LiW12, K) - Ki_W_i = K - C.T.dot(C) #Could this be wrong? + Ki_W_i = K - C.T.dot(C) #compute the log marginal log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata) - np.sum(np.log(np.diag(L))) - #Compute vival matrices for derivatives + # Compute matrices for derivatives dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat - dL_dfhat = -0.5*(np.diag(Ki_W_i)[:, None]*dW_df) #why isn't this -0.5? s2 in R&W p126 line 9. + if np.any(np.isnan(dW_df)): + raise ValueError('One or more element(s) of dW_df is NaN') + + dL_dfhat = -0.5*(np.diag(Ki_W_i)[:, None]*dW_df) # s2 in R&W p126 line 9. #BiK, _ = dpotrs(L, K, lower=1) #dL_dfhat = 0.5*np.diag(BiK)[:, None]*dW_df I_KW_i = np.eye(Y.shape[0]) - np.dot(K, K_Wi_i) #################### - #compute dL_dK# + # compute dL_dK # #################### if kern.size > 0 and not kern.is_fixed: #Explicit @@ -202,12 +211,12 @@ class Laplace(object): def _compute_B_statistics(self, K, W, log_concave): """ Rasmussen suggests the use of a numerically stable positive definite matrix B - Which has a positive diagonal element and can be easyily inverted + Which has a positive diagonal elements and can be easily inverted :param K: Prior Covariance matrix evaluated at locations X :type K: NxN matrix :param W: Negative hessian at a point (diagonal matrix) - :type W: Vector of diagonal values of hessian (1xN) + :type W: Vector of diagonal values of Hessian (1xN) :returns: (W12BiW12, L_B, Li_W12) """ if not log_concave: @@ -218,7 +227,8 @@ class Laplace(object): # If the likelihood is non-log-concave. We wan't to say that there is a negative variance # To cause the posterior to become less certain than the prior and likelihood, # This is a property only held by non-log-concave likelihoods - + if np.any(np.isnan(W)): + raise ValueError('One or more element(s) of W is NaN') #W is diagonal so its sqrt is just the sqrt of the diagonal elements W_12 = np.sqrt(W) B = np.eye(K.shape[0]) + W_12*K*W_12.T diff --git a/GPy/kern/_src/symbolic.py b/GPy/kern/_src/symbolic.py index 4f373fae..c7bbae73 100644 --- a/GPy/kern/_src/symbolic.py +++ b/GPy/kern/_src/symbolic.py @@ -94,8 +94,8 @@ class Symbolic(Kern): val = 1.0 # TODO: what if user has passed a parameter vector, how should that be stored and interpreted? This is the old way before params class. if param is not None: - if param.has_key(theta): - val = param[theta] + if param.has_key(theta.name): + val = param[theta.name] setattr(self, theta.name, Param(theta.name, val, None)) self.add_parameters(getattr(self, theta.name)) diff --git a/GPy/likelihoods/__init__.py b/GPy/likelihoods/__init__.py index cfdfaf72..cf3f4287 100644 --- a/GPy/likelihoods/__init__.py +++ b/GPy/likelihoods/__init__.py @@ -4,7 +4,11 @@ from gaussian import Gaussian from gamma import Gamma from poisson import Poisson from student_t import StudentT +from sstudent_t import SstudentT from likelihood import Likelihood from mixed_noise import MixedNoise from symbolic import Symbolic from negative_binomial import Negative_binomial +from skew_normal import Skew_normal +from skew_exponential import Skew_exponential +from null_category import Null_category diff --git a/GPy/likelihoods/symbolic.py b/GPy/likelihoods/symbolic.py index ddc430dc..5d052119 100644 --- a/GPy/likelihoods/symbolic.py +++ b/GPy/likelihoods/symbolic.py @@ -16,7 +16,6 @@ from GPy.util.functions import cum_gaussian, ln_cum_gaussian from likelihood import Likelihood from ..core.parameterization import Param -func_modules = ['numpy', {'gamma':gamma, 'gammaln':gammaln, 'erf':erf,'polygamma':polygamma, 'cum_gaussian':cum_gaussian, 'ln_cum_gaussian':ln_cum_gaussian}] if sympy_available: class Symbolic(Likelihood): @@ -26,55 +25,73 @@ if sympy_available: Likelihood where the form of the likelihood is provided by a sympy expression. """ - def __init__(self, pdf=None, log_pdf=None, cdf=None, logZ=None, gp_link=None, name='symbolic', log_concave=False, param=None): + def __init__(self, log_pdf=None, logZ=None, missing_log_pdf=None, gp_link=None, name='symbolic', log_concave=False, param=None, func_modules=[]): + if gp_link is None: gp_link = link_functions.Identity() - if pdf is None and log_pdf is None and cdf is None: - raise ValueError, "You must provide an argument for the pdf or the log pdf." + if log_pdf is None: + raise ValueError, "You must provide an argument for the log pdf." + + self.func_modules = func_modules + [{'gamma':gamma, 'gammaln':gammaln, 'erf':erf,'polygamma':polygamma, 'cum_gaussian':cum_gaussian, 'ln_cum_gaussian':ln_cum_gaussian}, 'numpy'] super(Symbolic, self).__init__(gp_link, name=name) - - if pdf is None and log_pdf: - self._sp_pdf = sym.exp(log_pdf).simplify() - self._sp_log_pdf = log_pdf - - if log_pdf is None and pdf: - self._sp_pdf = pdf - self._sp_log_pdf = sym.log(pdf).simplify() - - # TODO: build pdf and log pdf from CDF or - # compute CDF given pdf/log-pdf. Also check log - # pdf, pdf and CDF are consistent. + self.missing_data = False + self._sym_log_pdf = log_pdf + if missing_log_pdf: + self.missing_data = True + self._sym_missing_log_pdf = missing_log_pdf # pull the variable names out of the symbolic pdf - sp_vars = [e for e in self._sp_pdf.atoms() if e.is_Symbol] - self._sp_f = [e for e in sp_vars if e.name=='f'] - if not self._sp_f: - raise ValueError('No variable f in pdf or log pdf.') - self._sp_y = [e for e in sp_vars if e.name=='y'] - if not self._sp_f: - raise ValueError('No variable y in pdf or log pdf.') - self._sp_theta = sorted([e for e in sp_vars if not (e.name=='f' or e.name=='y')],key=lambda e:e.name) + sym_vars = [e for e in self._sym_log_pdf.atoms() if e.is_Symbol] + self._sym_f = [e for e in sym_vars if e.name=='f'] + if not self._sym_f: + raise ValueError('No variable f in log pdf.') + self._sym_y = [e for e in sym_vars if e.name=='y'] + if not self._sym_y: + raise ValueError('No variable y in log pdf.') + self._sym_theta = sorted([e for e in sym_vars if not (e.name=='f' or e.name=='y')],key=lambda e:e.name) + + theta_names = [theta.name for theta in self._sym_theta] + if self.missing_data: + # pull the variable names out of missing data + sym_vars = [e for e in self._sym_missing_log_pdf.atoms() if e.is_Symbol] + sym_f = [e for e in sym_vars if e.name=='f'] + if not sym_f: + raise ValueError('No variable f in missing log pdf.') + sym_y = [e for e in sym_vars if e.name=='y'] + if sym_y: + raise ValueError('Data is present in missing data portion of likelihood.') + # additional missing data parameters + missing_theta = sorted([e for e in sym_vars if not (e.name=='f' or e.name=='missing' or e.name in theta_names)],key=lambda e:e.name) + self._sym_theta += missing_theta + self._sym_theta = sorted(self._sym_theta, key=lambda e:e.name) # These are all the arguments need to compute likelihoods. - self.arg_list = self._sp_y + self._sp_f + self._sp_theta + self.arg_list = self._sym_y + self._sym_f + self._sym_theta # these are arguments for computing derivatives. - derivative_arguments = self._sp_f + self._sp_theta + derivative_arguments = self._sym_f + self._sym_theta # Do symbolic work to compute derivatives. - self._log_pdf_derivatives = {theta.name : sym.diff(self._sp_log_pdf,theta).simplify() for theta in derivative_arguments} + self._log_pdf_derivatives = {theta.name : sym.diff(self._sym_log_pdf,theta).simplify() for theta in derivative_arguments} self._log_pdf_second_derivatives = {theta.name : sym.diff(self._log_pdf_derivatives['f'],theta).simplify() for theta in derivative_arguments} self._log_pdf_third_derivatives = {theta.name : sym.diff(self._log_pdf_second_derivatives['f'],theta).simplify() for theta in derivative_arguments} + if self.missing_data: + # Do symbolic work to compute derivatives. + self._missing_log_pdf_derivatives = {theta.name : sym.diff(self._sym_missing_log_pdf,theta).simplify() for theta in derivative_arguments} + self._missing_log_pdf_second_derivatives = {theta.name : sym.diff(self._missing_log_pdf_derivatives['f'],theta).simplify() for theta in derivative_arguments} + self._missing_log_pdf_third_derivatives = {theta.name : sym.diff(self._missing_log_pdf_second_derivatives['f'],theta).simplify() for theta in derivative_arguments} + + # Add parameters to the model. - for theta in self._sp_theta: + for theta in self._sym_theta: val = 1.0 # TODO: need to decide how to handle user passing values for the se parameter vectors. if param is not None: - if param.has_key(theta): - val = param[theta] + if param.has_key(theta.name): + val = param[theta.name] setattr(self, theta.name, Param(theta.name, val, None)) self.add_parameters(getattr(self, theta.name)) @@ -93,14 +110,18 @@ if sympy_available: """Generate the code from the symbolic parts that will be used for likleihod computation.""" # TODO: Check here whether theano is available and set up # functions accordingly. - self._pdf_function = lambdify(self.arg_list, self._sp_pdf, func_modules) - self._log_pdf_function = lambdify(self.arg_list, self._sp_log_pdf, func_modules) + self._log_pdf_function = lambdify(self.arg_list, self._sym_log_pdf, self.func_modules) # compute code for derivatives (for implicit likelihood terms # we need up to 3rd derivatives) - setattr(self, '_first_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_derivatives[key], func_modules) for key in self._log_pdf_derivatives.keys()}) - setattr(self, '_second_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_second_derivatives[key], func_modules) for key in self._log_pdf_second_derivatives.keys()}) - setattr(self, '_third_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_third_derivatives[key], func_modules) for key in self._log_pdf_third_derivatives.keys()}) + setattr(self, '_first_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_derivatives[key], self.func_modules) for key in self._log_pdf_derivatives.keys()}) + setattr(self, '_second_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_second_derivatives[key], self.func_modules) for key in self._log_pdf_second_derivatives.keys()}) + setattr(self, '_third_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_third_derivatives[key], self.func_modules) for key in self._log_pdf_third_derivatives.keys()}) + + if self.missing_data: + setattr(self, '_missing_first_derivative_code', {key: lambdify(self.arg_list, self._missing_log_pdf_derivatives[key], self.func_modules) for key in self._missing_log_pdf_derivatives.keys()}) + setattr(self, '_missing_second_derivative_code', {key: lambdify(self.arg_list, self._missing_log_pdf_second_derivatives[key], self.func_modules) for key in self._missing_log_pdf_second_derivatives.keys()}) + setattr(self, '_missing_third_derivative_code', {key: lambdify(self.arg_list, self._missing_log_pdf_third_derivatives[key], self.func_modules) for key in self._missing_log_pdf_third_derivatives.keys()}) # TODO: compute EP code parts based on logZ. We need dlogZ/dmu, d2logZ/dmu2 and dlogZ/dtheta @@ -121,7 +142,7 @@ if sympy_available: # computed in the inference code. TODO: Thought: How does this # effect EP? Shouldn't this be done by a separate # Laplace-approximation specific call? - for grad, theta in zip(grads, self._sp_theta): + for grad, theta in zip(grads, self._sym_theta): parameter = getattr(self, theta.name) setattr(parameter, 'gradient', grad) @@ -131,11 +152,11 @@ if sympy_available: # need to do a lot of precomputation to ensure that the # likelihoods and gradients are computed together, then check # for parameter changes before updating. - for i, fvar in enumerate(self._sp_f): + for i, fvar in enumerate(self._sym_f): self._arguments[fvar.name] = f - for i, yvar in enumerate(self._sp_y): + for i, yvar in enumerate(self._sym_y): self._arguments[yvar.name] = y - for theta in self._sp_theta: + for theta in self._sym_theta: self._arguments[theta.name] = np.asarray(getattr(self, theta.name)) def pdf_link(self, inv_link_f, y, Y_metadata=None): @@ -150,10 +171,7 @@ if sympy_available: :returns: likelihood evaluated for this point :rtype: float """ - assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape - self._arguments_update(inv_link_f, y) - l = self._pdf_function(**self._arguments) - return np.prod(l) + return np.exp(self.logpdf_link(inv_link_f, y, Y_metadata=None)) def logpdf_link(self, inv_link_f, y, Y_metadata=None): """ @@ -170,7 +188,10 @@ if sympy_available: """ assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape self._arguments_update(inv_link_f, y) - ll = self._log_pdf_function(**self._arguments) + if self.missing_data: + ll = np.where(np.isnan(y), self._missing_log_pdf_function(**self._missing_arguments), self._log_pdf_function(**self._arguments)) + else: + ll = np.where(np.isnan(y), 0., self._log_pdf_function(**self._arguments)) return np.sum(ll) def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None): @@ -188,7 +209,10 @@ if sympy_available: """ assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape self._arguments_update(inv_link_f, y) - return self._first_derivative_code['f'](**self._arguments) + if self.missing_data: + return np.where(np.isnan(y), self._missing_first_derivative_code['f'](**self._missing_argments), self._first_derivative_code['f'](**self._argments)) + else: + return np.where(np.isnan(y), 0., self._first_derivative_code['f'](**self._arguments)) def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None): """ @@ -212,36 +236,50 @@ if sympy_available: """ assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape self._arguments_update(inv_link_f, y) - return self._second_derivative_code['f'](**self._arguments) + if self.missing_data: + return np.where(np.isnan(y), self._missing_second_derivative_code['f'](**self._missing_argments), self._second_derivative_code['f'](**self._argments)) + else: + return np.where(np.isnan(y), 0., self._second_derivative_code['f'](**self._arguments)) def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None): assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape self._arguments_update(inv_link_f, y) - return self._third_derivative_code['f'](**self._arguments) - raise NotImplementedError + if self.missing_data: + return np.where(np.isnan(y), self._missing_third_derivative_code['f'](**self._missing_argments), self._third_derivative_code['f'](**self._argments)) + else: + return np.where(np.isnan(y), 0., self._third_derivative_code['f'](**self._arguments)) def dlogpdf_link_dtheta(self, inv_link_f, y, Y_metadata=None): assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape self._arguments_update(inv_link_f, y) - g = np.zeros((y.shape[0], len(self._sp_theta))) - for i, theta in enumerate(self._sp_theta): - g[:, i:i+1] = self._first_derivative_code[theta.name](**self._arguments) + g = np.zeros((np.atleast_1d(y).shape[0], len(self._sym_theta))) + for i, theta in enumerate(self._sym_theta): + if self.missing_data: + g[:, i:i+1] = np.where(np.isnan(y), self._missing_first_derivative_code[theta.name](**self._arguments), self._first_derivative_code[theta.name](**self._arguments)) + else: + g[:, i:i+1] = np.where(np.isnan(y), 0., self._first_derivative_code[theta.name](**self._arguments)) return g.sum(0) def dlogpdf_dlink_dtheta(self, inv_link_f, y, Y_metadata=None): assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape self._arguments_update(inv_link_f, y) - g = np.zeros((y.shape[0], len(self._sp_theta))) - for i, theta in enumerate(self._sp_theta): - g[:, i:i+1] = self._second_derivative_code[theta.name](**self._arguments) + g = np.zeros((np.atleast_1d(y).shape[0], len(self._sym_theta))) + for i, theta in enumerate(self._sym_theta): + if self.missing_data: + g[:, i:i+1] = np.where(np.isnan(y), self._missing_second_derivative_code[theta.name](**self._arguments), self._second_derivative_code[theta.name](**self._arguments)) + else: + g[:, i:i+1] = np.where(np.isnan(y), 0., self._second_derivative_code[theta.name](**self._arguments)) return g def d2logpdf_dlink2_dtheta(self, inv_link_f, y, Y_metadata=None): assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape self._arguments_update(inv_link_f, y) - g = np.zeros((y.shape[0], len(self._sp_theta))) - for i, theta in enumerate(self._sp_theta): - g[:, i:i+1] = self._third_derivative_code[theta.name](**self._arguments) + g = np.zeros((np.atleast_1d(y).shape[0], len(self._sym_theta))) + for i, theta in enumerate(self._sym_theta): + if self.missing_data: + g[:, i:i+1] = np.where(np.isnan(y), self._missing_third_derivative_code[theta.name](**self._arguments), self._third_derivative_code[theta.name](**self._arguments)) + else: + g[:, i:i+1] = np.where(np.isnan(y), 0., self._third_derivative_code[theta.name](**self._arguments)) return g def predictive_mean(self, mu, sigma, Y_metadata=None): diff --git a/GPy/util/functions.py b/GPy/util/functions.py new file mode 100644 index 00000000..a9ee1084 --- /dev/null +++ b/GPy/util/functions.py @@ -0,0 +1,18 @@ +import numpy as np +from scipy.special import erf, erfcx +import sys +epsilon = sys.float_info.epsilon +lim_val = -np.log(epsilon) + +def cum_gaussian(x): + g=0.5*(1+erf(x/np.sqrt(2))) + return np.where(g==0, epsilon, np.where(g==1, 1-epsilon, g)) + +def ln_cum_gaussian(x): + return np.where(x < 0, -.5*x*x + np.log(.5) + np.log(erfcx(-np.sqrt(2)/2*x)), np.log(cum_gaussian(x))) + +def clip_exp(x): + if any(x>=lim_val) or any(x<=-lim_val): + return np.where(x-lim_val, np.exp(x), np.exp(-lim_val)), np.exp(lim_val)) + else: + return np.exp(x) diff --git a/GPy/util/symbolic.py b/GPy/util/symbolic.py index 5074a42c..5b3ac312 100644 --- a/GPy/util/symbolic.py +++ b/GPy/util/symbolic.py @@ -18,13 +18,16 @@ class ln_cum_gaussian(Function): def fdiff(self, argindex=1): x = self.args[0] - return 1/cum_gaussian(x)*gaussian(x) + return exp(-ln_cum_gaussian(x) - 0.5*x*x)/sqrt(2*pi) @classmethod def eval(cls, x): if x.is_Number: return log(cum_gaussian(x)) + def _eval_is_real(self): + return self.args[0].is_real + class cum_gaussian(Function): nargs = 1 def fdiff(self, argindex=1): @@ -36,12 +39,17 @@ class cum_gaussian(Function): if x.is_Number: return 0.5*(1+erf(sqrt(2)/2*x)) + def _eval_is_real(self): + return self.args[0].is_real + class gaussian(Function): nargs = 1 @classmethod def eval(cls, x): return 1/sqrt(2*pi)*exp(-0.5*x*x) + def _eval_is_real(self): + return True class ln_diff_erf(Function): nargs = 2 From 22e4f8a1e83947c89f573db4715b7641ebe82d61 Mon Sep 17 00:00:00 2001 From: mzwiessele Date: Fri, 4 Apr 2014 13:14:07 +0100 Subject: [PATCH 48/51] not importable --- GPy/likelihoods/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/GPy/likelihoods/__init__.py b/GPy/likelihoods/__init__.py index 5d5d692a..d7ad5753 100644 --- a/GPy/likelihoods/__init__.py +++ b/GPy/likelihoods/__init__.py @@ -15,8 +15,8 @@ except ImportError: if sympy_available: # These are likelihoods that rely on symbolic. from symbolic import Symbolic - from sstudent_t import SstudentT + #from sstudent_t import SstudentT from negative_binomial import Negative_binomial - from skew_normal import Skew_normal - from skew_exponential import Skew_exponential - from null_category import Null_category + #from skew_normal import Skew_normal + #from skew_exponential import Skew_exponential + #from null_category import Null_category From 04a889b3a924c60e1d04ef304f12f5b668ffe86b Mon Sep 17 00:00:00 2001 From: mzwiessele Date: Fri, 4 Apr 2014 13:47:02 +0100 Subject: [PATCH 49/51] making observables accessable --- GPy/core/__init__.py | 1 + GPy/core/parameterization/lists_and_dicts.py | 66 ++++++++----------- GPy/core/parameterization/observable_array.py | 2 +- GPy/core/parameterization/param.py | 2 +- GPy/core/parameterization/parameter_core.py | 27 +++----- GPy/core/parameterization/parameterized.py | 2 +- GPy/testing/pickle_tests.py | 4 +- 7 files changed, 45 insertions(+), 59 deletions(-) diff --git a/GPy/core/__init__.py b/GPy/core/__init__.py index a42d76ed..25651827 100644 --- a/GPy/core/__init__.py +++ b/GPy/core/__init__.py @@ -4,6 +4,7 @@ from model import * from parameterization.parameterized import adjust_name_for_printing, Parameterizable from parameterization.param import Param, ParamConcatenation +from parameterization.observable_array import ObsAr from gp import GP from sparse_gp import SparseGP diff --git a/GPy/core/parameterization/lists_and_dicts.py b/GPy/core/parameterization/lists_and_dicts.py index 6902c249..dd93c5ba 100644 --- a/GPy/core/parameterization/lists_and_dicts.py +++ b/GPy/core/parameterization/lists_and_dicts.py @@ -5,6 +5,7 @@ Created on 27 Feb 2014 ''' from collections import defaultdict +import weakref def intarray_default_factory(): import numpy as np @@ -41,49 +42,40 @@ class ObservablesList(object): def __init__(self): self._poc = [] - def remove(self, value): - return self._poc.remove(value) - - - def __delitem__(self, ind): - return self._poc.__delitem__(ind) - - - def __setitem__(self, ind, item): - return self._poc.__setitem__(ind, item) - - - def __getitem__(self, ind): - return self._poc.__getitem__(ind) - + def remove(self, priority, observable, callble): + """ + """ + self._poc.remove((priority, observable, callble)) def __repr__(self): return self._poc.__repr__() - - def append(self, obj): - return self._poc.append(obj) - - - def index(self, value): - return self._poc.index(value) - - - def extend(self, iterable): - return self._poc.extend(iterable) - - + def add(self, priority, observable, callble): + i = 0 + for i, [p, _, _] in enumerate(self._poc): + if p < priority: + break + self._poc.insert(i, (priority, weakref.ref(observable), callble)) + def __str__(self): - return self._poc.__str__() - + ret = [] + curr_p = None + for p, o, c in self: + curr = '' + if curr_p != p: + pre = "{!s}: ".format(p) + curr_pre = pre + else: curr_pre = " "*len(pre) + curr_p = p + curr += curr_pre + ret.append(curr + ", ".join(map(str, [o,c]))) + return '\n'.join(ret) def __iter__(self): - return self._poc.__iter__() - - - def insert(self, index, obj): - return self._poc.insert(index, obj) - + self._poc = [(p,o,c) for p,o,c in self._poc if o() is not None] + for p, o, c in self._poc: + if o() is not None: + yield p, o(), c def __len__(self): return self._poc.__len__() @@ -106,6 +98,6 @@ class ObservablesList(object): def __setstate__(self, state): self._poc = [] for p, o, c in state: - self._poc.append((p,o,getattr(o, c))) + self.add(p,o,getattr(o, c)) pass diff --git a/GPy/core/parameterization/observable_array.py b/GPy/core/parameterization/observable_array.py index fc9d6cf2..56d33bfc 100644 --- a/GPy/core/parameterization/observable_array.py +++ b/GPy/core/parameterization/observable_array.py @@ -25,7 +25,7 @@ class ObsAr(np.ndarray, Pickleable, Observable): def __array_finalize__(self, obj): # see InfoArray.__array_finalize__ for comments if obj is None: return - self._observer_callables_ = getattr(obj, '_observer_callables_', None) + self.observers = getattr(obj, 'observers', None) def __array_wrap__(self, out_arr, context=None): return out_arr.view(np.ndarray) diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index 60bdfe9d..4490a8ee 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -59,7 +59,7 @@ class Param(OptimizationHandlable, ObsAr): import pydot node = pydot.Node(id(self), shape='record', label=self.name) G.add_node(node) - for o in self._observer_callables_.keys(): + for o in self.observers.keys(): label = o.name if hasattr(o, 'name') else str(o) observed_node = pydot.Node(id(o), label=label) G.add_node(observed_node) diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index 2dac9bf3..43bc7177 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -44,22 +44,23 @@ class Observable(object): def __init__(self, *args, **kwargs): super(Observable, self).__init__() from lists_and_dicts import ObservablesList - self._observer_callables_ = ObservablesList() + self.observers = ObservablesList() def add_observer(self, observer, callble, priority=0): - self._insert_sorted(priority, observer, callble) + self.observers.add(priority, observer, callble) def remove_observer(self, observer, callble=None): to_remove = [] - for p, obs, clble in self._observer_callables_: + for poc in self.observers: + _, obs, clble = poc if callble is not None: if (obs == observer) and (callble == clble): - to_remove.append((p, obs, clble)) + to_remove.append(poc) else: if obs is observer: - to_remove.append((p, obs, clble)) + to_remove.append(poc) for r in to_remove: - self._observer_callables_.remove(r) + self.observers.remove(*r) def notify_observers(self, which=None, min_priority=None): """ @@ -74,21 +75,13 @@ class Observable(object): if which is None: which = self if min_priority is None: - [callble(self, which=which) for _, _, callble in self._observer_callables_] + [callble(self, which=which) for _, _, callble in self.observers] else: - for p, _, callble in self._observer_callables_: + for p, _, callble in self.observers: if p <= min_priority: break callble(self, which=which) - def _insert_sorted(self, p, o, c): - ins = 0 - for pr, _, _ in self._observer_callables_: - if p > pr: - break - ins += 1 - self._observer_callables_.insert(ins, (p, o, c)) - #=============================================================================== # Foundation framework for parameterized and param objects: #=============================================================================== @@ -192,7 +185,7 @@ class Pickleable(object): def __getstate__(self): ignore_list = ([#'_parent_', '_parent_index_', - #'_observer_callables_', + #'observers', '_param_array_', '_gradient_array_', '_fixes_', '_Cacher_wrap__cachers'] #+ self.parameter_names(recursive=False) diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py index 75085ca2..a794ab40 100644 --- a/GPy/core/parameterization/parameterized.py +++ b/GPy/core/parameterization/parameterized.py @@ -90,7 +90,7 @@ class Parameterized(Parameterizable): child_node = child.build_pydot(G) G.add_edge(pydot.Edge(node, child_node)) - for o in self._observer_callables_.keys(): + for o in self.observers.keys(): label = o.name if hasattr(o, 'name') else str(o) observed_node = pydot.Node(id(o), label=label) G.add_node(observed_node) diff --git a/GPy/testing/pickle_tests.py b/GPy/testing/pickle_tests.py index fc52581a..b888353c 100644 --- a/GPy/testing/pickle_tests.py +++ b/GPy/testing/pickle_tests.py @@ -191,13 +191,13 @@ class Test(ListDictTestCase): par.count = 0 par.add_observer(self, self._callback, 1) pcopy = GPRegression(par.X.copy(), par.Y.copy(), kernel=par.kern.copy()) - self.assertNotIn(par._observer_callables_[0], pcopy._observer_callables_) + self.assertNotIn(par.observers[0], pcopy.observers) pcopy = par.copy() pcopy.name = "copy" self.assertTrue(par.checkgrad()) self.assertTrue(pcopy.checkgrad()) self.assertTrue(pcopy.kern.checkgrad()) - self.assertIn(par._observer_callables_[0], pcopy._observer_callables_) + self.assertIn(par.observers[0], pcopy.observers) self.assertEqual(par.count, 3) self.assertEqual(pcopy.count, 6) # 3 of each call to checkgrad From 11059fb6152b36783780eaa1ef5d4b5aadce7bd0 Mon Sep 17 00:00:00 2001 From: mzwiessele Date: Fri, 4 Apr 2014 14:20:10 +0100 Subject: [PATCH 50/51] made observers accessible and observers now only weak reference the observables --- GPy/core/parameterization/lists_and_dicts.py | 29 +++++++++++++++----- GPy/core/parameterization/param.py | 2 +- GPy/testing/pickle_tests.py | 1 + 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/GPy/core/parameterization/lists_and_dicts.py b/GPy/core/parameterization/lists_and_dicts.py index dd93c5ba..604d0a01 100644 --- a/GPy/core/parameterization/lists_and_dicts.py +++ b/GPy/core/parameterization/lists_and_dicts.py @@ -42,20 +42,29 @@ class ObservablesList(object): def __init__(self): self._poc = [] + def __getitem__(self, ind): + p,o,c = self._poc[ind] + return p, o(), c + def remove(self, priority, observable, callble): """ """ - self._poc.remove((priority, observable, callble)) + self.flush() + for i in range(len(self) - 1, -1, -1): + p,o,c = self[i] + if priority==p and observable==o and callble==c: + del self._poc[i] def __repr__(self): return self._poc.__repr__() def add(self, priority, observable, callble): - i = 0 - for i, [p, _, _] in enumerate(self._poc): - if p < priority: + ins = 0 + for pr, _, _ in self: + if priority > pr: break - self._poc.insert(i, (priority, weakref.ref(observable), callble)) + ins += 1 + self._poc.insert(ins, (priority, weakref.ref(observable), callble)) def __str__(self): ret = [] @@ -68,25 +77,31 @@ class ObservablesList(object): else: curr_pre = " "*len(pre) curr_p = p curr += curr_pre - ret.append(curr + ", ".join(map(str, [o,c]))) + ret.append(curr + ", ".join(map(repr, [o,c]))) return '\n'.join(ret) - def __iter__(self): + def flush(self): self._poc = [(p,o,c) for p,o,c in self._poc if o() is not None] + + def __iter__(self): + self.flush() for p, o, c in self._poc: if o() is not None: yield p, o(), c def __len__(self): + self.flush() return self._poc.__len__() def __deepcopy__(self, memo): + self.flush() s = ObservablesList() import copy s._poc = copy.deepcopy(self._poc, memo) return s def __getstate__(self): + self.flush() from ...util.caching import Cacher obs = [] for p, o, c in self: diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index 4490a8ee..9c3d7bd3 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -324,7 +324,7 @@ class ParamConcatenation(object): if update: self.update_all_params() def values(self): - return numpy.hstack([p.param_array for p in self.params]) + return numpy.hstack([p.param_array.flat for p in self.params]) #=========================================================================== # parameter operations: #=========================================================================== diff --git a/GPy/testing/pickle_tests.py b/GPy/testing/pickle_tests.py index b888353c..d975aaa3 100644 --- a/GPy/testing/pickle_tests.py +++ b/GPy/testing/pickle_tests.py @@ -197,6 +197,7 @@ class Test(ListDictTestCase): self.assertTrue(par.checkgrad()) self.assertTrue(pcopy.checkgrad()) self.assertTrue(pcopy.kern.checkgrad()) + import ipdb;ipdb.set_trace() self.assertIn(par.observers[0], pcopy.observers) self.assertEqual(par.count, 3) self.assertEqual(pcopy.count, 6) # 3 of each call to checkgrad From 9d312ab8ffca515650173455be3b4416e6673e66 Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Sat, 5 Apr 2014 00:10:35 +0100 Subject: [PATCH 51/51] bug fix: caching.py w.r.t. ignore_args --- GPy/util/caching.py | 1 + 1 file changed, 1 insertion(+) diff --git a/GPy/util/caching.py b/GPy/util/caching.py index ced56727..bb162ee3 100644 --- a/GPy/util/caching.py +++ b/GPy/util/caching.py @@ -66,6 +66,7 @@ class Cacher(object): #first make sure the depth limit isn't exceeded if len(self.cached_inputs) == self.limit: args_ = self.cached_inputs.pop(0) + args_ = [a for i,a in enumerate(args_) if i not in self.ignore_args and i not in self.force_kwargs] [a.remove_observer(self, self.on_cache_changed) for a in args_ if a is not None] self.inputs_changed.pop(0) self.cached_outputs.pop(0)