diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 1add8268..6441561b 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -48,7 +48,7 @@ class GP(Model): self.Y_metadata = None assert isinstance(kernel, kern.Kern) - assert self.input_dim == kernel.input_dim + #assert self.input_dim == kernel.input_dim self.kern = kernel assert isinstance(likelihood, likelihoods.Likelihood) @@ -68,8 +68,9 @@ class GP(Model): def parameters_changed(self): self.posterior, self._log_marginal_likelihood, grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y, Y_metadata=self.Y_metadata) + self.likelihood.update_gradients(np.diag(grad_dict['dL_dK'])) self.kern.update_gradients_full(grad_dict['dL_dK'], self.X) - + def log_likelihood(self): return self._log_marginal_likelihood diff --git a/GPy/core/parameterization/array_core.py b/GPy/core/parameterization/array_core.py index 27801e23..9ce0e8f6 100644 --- a/GPy/core/parameterization/array_core.py +++ b/GPy/core/parameterization/array_core.py @@ -16,7 +16,7 @@ class ObservableArray(np.ndarray, Observable): __array_priority__ = -1 # Never give back ObservableArray def __new__(cls, input_array): if not isinstance(input_array, ObservableArray): - obj = np.atleast_1d(input_array).view(cls) + obj = np.atleast_1d(np.require(input_array, dtype=np.float64, requirements=['W', 'C'])).view(cls) else: obj = input_array cls.__name__ = "ObservableArray\n " return obj diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index a78cf02d..351eacef 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -15,7 +15,6 @@ Observable Pattern for patameterization from transformations import Transformation, Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED import numpy as np -import itertools __updated__ = '2013-12-16' @@ -43,6 +42,7 @@ class Observable(object): _updated = True def __init__(self, *args, **kwargs): self._observer_callables_ = [] + def __del__(self, *args, **kwargs): del self._observer_callables_ @@ -551,8 +551,8 @@ class OptimizationHandlable(Constrainable, Observable): return p def _set_params_transformed(self, p): - if p is self._param_array_: - p = p.copy() + #if p is self._param_array_: + p = p.copy() if self._has_fixes(): self._param_array_[self._fixes_] = p else: self._param_array_[:] = p self.untransform() diff --git a/GPy/core/parameterization/variational.py b/GPy/core/parameterization/variational.py index 8bc7ca59..4c929cc8 100644 --- a/GPy/core/parameterization/variational.py +++ b/GPy/core/parameterization/variational.py @@ -66,10 +66,10 @@ class VariationalPosterior(Parameterized): def __init__(self, means=None, variances=None, name=None, **kw): super(VariationalPosterior, self).__init__(name=name, **kw) self.mean = Param("mean", means) - self.ndim = self.mean.ndim - self.shape = self.mean.shape self.variance = Param("variance", variances, Logexp()) self.add_parameters(self.mean, self.variance) + self.ndim = self.mean.ndim + self.shape = self.mean.shape self.num_data, self.input_dim = self.mean.shape if self.has_uncertain_inputs(): assert self.variance.shape == self.mean.shape, "need one variance per sample and dimenion" @@ -77,6 +77,18 @@ class VariationalPosterior(Parameterized): def has_uncertain_inputs(self): return not self.variance is None + def __getitem__(self, s): + import copy + n = self.__new__(self.__class__) + dc = copy.copy(self.__dict__) + dc['mean'] = dc['mean'][s] + dc['variance'] = dc['variance'][s] + dc['shape'] = dc['mean'].shape + dc['ndim'] = dc['ndim'] + dc['num_data'], dc['input_dim'] = self.mean.shape[0], self.mean.shape[1] if dc['ndim'] > 1 else 1 + n.__dict__ = dc + return n + class NormalPosterior(VariationalPosterior): ''' diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index f4f34a5e..16b66676 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -64,8 +64,8 @@ class SparseGP(GP): self.kern.gradient += target #gradients wrt Z - self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z) - self.Z.gradient += self.kern.gradients_Z_expectations( + self.Z.gradient[:,self.kern.active_dims] = self.kern.gradients_X(dL_dKmm, self.Z) + self.Z.gradient[:,self.kern.active_dims] += self.kern.gradients_Z_expectations( self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X) else: #gradients wrt kernel @@ -77,8 +77,8 @@ class SparseGP(GP): self.kern.gradient += target #gradients wrt Z - self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) - self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X) + self.Z.gradient[:,self.kern.active_dims] = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) + self.Z.gradient[:,self.kern.active_dims] += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X) def _raw_predict(self, Xnew, full_cov=False): """ diff --git a/GPy/inference/latent_function_inference/exact_gaussian_inference.py b/GPy/inference/latent_function_inference/exact_gaussian_inference.py index 922b52f4..47f6ea09 100644 --- a/GPy/inference/latent_function_inference/exact_gaussian_inference.py +++ b/GPy/inference/latent_function_inference/exact_gaussian_inference.py @@ -49,9 +49,6 @@ class ExactGaussianInference(object): dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi) - #TODO: does this really live here? - likelihood.update_gradients(np.diag(dL_dK)) - return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK} diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index 77fe057d..87dda365 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -1,12 +1,10 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -import sys import numpy as np import itertools -from linear import Linear from ...core.parameterization import Parameterized -from ...core.parameterization.param import Param +from ...util.caching import Cache_this from kern import Kern class Add(Kern): @@ -14,19 +12,24 @@ class Add(Kern): assert all([isinstance(k, Kern) for k in subkerns]) if tensor: input_dim = sum([k.input_dim for k in subkerns]) - self.input_slices = [] + self.self.active_dims = [] n = 0 for k in subkerns: - self.input_slices.append(slice(n, n+k.input_dim)) + self.self.active_dims.append(slice(n, n+k.input_dim)) n += k.input_dim else: - assert all([k.input_dim == subkerns[0].input_dim for k in subkerns]) - input_dim = subkerns[0].input_dim - self.input_slices = [slice(None) for k in subkerns] + #assert all([k.input_dim == subkerns[0].input_dim for k in subkerns]) + #input_dim = subkerns[0].input_dim + #self.input_slices = [slice(None) for k in subkerns] + input_dim = reduce(np.union1d, map(lambda x: np.r_[x.active_dims], subkerns)) super(Add, self).__init__(input_dim, 'add') self.add_parameters(*subkerns) - - + + @property + def parts(self): + return self._parameters_ + + @Cache_this(limit=1, force_kwargs=('which_parts',)) def K(self, X, X2=None): """ Compute the kernel function. @@ -37,13 +40,19 @@ class Add(Kern): handLes this as X2 == X. """ assert X.shape[1] == self.input_dim - if X2 is None: - return sum([p.K(X[:, i_s], None) for p, i_s in zip(self._parameters_, self.input_slices)]) - else: - return sum([p.K(X[:, i_s], X2[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]) + which_parts=None + if which_parts is None: + which_parts = self.parts + elif not isinstance(which_parts, (list, tuple)): + # if only one part is given + which_parts = [which_parts] + return sum([p.K(X, X2) for p in which_parts]) - def update_gradients_full(self, dL_dK, X): - [p.update_gradients_full(dL_dK, X[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + def update_gradients_full(self, dL_dK, X, X2=None): + [p.update_gradients_full(dL_dK, X, X2) for p in self.parts] + + def update_gradients_diag(self, dL_dK, X): + [p.update_gradients_diag(dL_dK, X) for p in self.parts] def gradients_X(self, dL_dK, X, X2=None): """Compute the gradient of the objective function with respect to X. @@ -55,16 +64,17 @@ class Add(Kern): :param X2: Observed data inputs (optional, defaults to X) :type X2: np.ndarray (num_inducing x input_dim)""" - target = np.zeros_like(X) - if X2 is None: - [np.add(target[:,i_s], p.gradients_X(dL_dK, X[:, i_s], None), target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - else: - [np.add(target[:,i_s], p.gradients_X(dL_dK, X[:, i_s], X2[:,i_s]), target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + target = np.zeros(X.shape) + for p in self.parts: + target[:, p.active_dims] += p.gradients_X(dL_dK, X, X2) return target def Kdiag(self, X): + which_parts=None assert X.shape[1] == self.input_dim - return sum([p.Kdiag(X[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]) + if which_parts is None: + which_parts = self.parts + return sum([p.Kdiag(X) for p in which_parts]) def psi0(self, Z, variational_posterior): diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index 47166156..33b9ff08 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -2,13 +2,22 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) import sys -import numpy as np -import itertools -from ...core.parameterization import Parameterized -from ...core.parameterization.param import Param - +from ...core.parameterization.parameterized import ParametersChangedMeta, Parameterized +from ...util.caching import Cache_this +class KernCallsViaSlicerMeta(ParametersChangedMeta): + def __call__(self, *args, **kw): + instance = super(KernCallsViaSlicerMeta, self).__call__(*args, **kw) + instance.K = instance._slice_wrapper(instance.K) + instance.Kdiag = instance._slice_wrapper(instance.Kdiag, True) + instance.update_gradients_full = instance._slice_wrapper(instance.update_gradients_full, False, True) + instance.update_gradients_diag = instance._slice_wrapper(instance.update_gradients_diag, True, True) + instance.gradients_X = instance._slice_wrapper(instance.gradients_X, False, True) + instance.gradients_X_diag = instance._slice_wrapper(instance.gradients_X_diag, True, True) + return instance + class Kern(Parameterized): + __metaclass__ = KernCallsViaSlicerMeta def __init__(self, input_dim, name, *a, **kw): """ The base class for a kernel: a positive definite function @@ -20,11 +29,83 @@ class Kern(Parameterized): Do not instantiate. """ super(Kern, self).__init__(name=name, *a, **kw) - self.input_dim = input_dim - + if isinstance(input_dim, int): + self.active_dims = slice(0, input_dim) + self.input_dim = input_dim + else: + self.active_dims = input_dim + self.input_dim = len(self.active_dims) + self._sliced_X = False + self._sliced_X2 = False + + @Cache_this(limit=10, ignore_args = (0,)) + def _slice_X(self, X): + return X[:, self.active_dims] + + def _slice_wrapper(self, operation, diag=False, derivative=False): + """ + This method wraps the functions in kernel to make sure all kernels allways see their respective input dimension. + The different switches are: + diag: if X2 exists + derivative: if firest arg is dL_dK + """ + if derivative: + if diag: + def x_slice_wrapper(dL_dK, X, *args, **kw): + X = self._slice_X(X) if not self._sliced_X else X + self._sliced_X = True + try: + ret = operation(dL_dK, X, *args, **kw) + except: raise + finally: + self._sliced_X = False + return ret + else: + def x_slice_wrapper(dL_dK, X, X2=None, *args, **kw): + X, X2 = self._slice_X(X) if not self._sliced_X else X, self._slice_X(X2) if X2 is not None and not self._sliced_X2 else X2 + self._sliced_X = True + self._sliced_X2 = True + try: + ret = operation(dL_dK, X, X2, *args, **kw) + except: raise + finally: + self._sliced_X = False + self._sliced_X2 = False + return ret + else: + if diag: + def x_slice_wrapper(X, *args, **kw): + X = self._slice_X(X) if not self._sliced_X else X + self._sliced_X = True + try: + ret = operation(X, *args, **kw) + except: raise + finally: + self._sliced_X = False + return ret + else: + def x_slice_wrapper(X, X2=None, *args, **kw): + X, X2 = self._slice_X(X) if not self._sliced_X else X, self._slice_X(X2) if X2 is not None and not self._sliced_X2 else X2 + self._sliced_X = True + self._sliced_X2 = True + try: + ret = operation(X, X2, *args, **kw) + except: raise + finally: + self._sliced_X = False + self._sliced_X2 = False + return ret + x_slice_wrapper._operation = operation + x_slice_wrapper.__name__ = ("slicer("+operation.__name__ + +(","+str(bool(diag)) if diag else'') + +(','+str(bool(derivative)) if derivative else '') + +')') + x_slice_wrapper.__doc__ = "**sliced**\n\n" + (operation.__doc__ or "") + return x_slice_wrapper + def K(self, X, X2): raise NotImplementedError - def Kdiag(self, Xa): + def Kdiag(self, X): raise NotImplementedError def psi0(self, Z, variational_posterior): raise NotImplementedError @@ -34,13 +115,16 @@ class Kern(Parameterized): raise NotImplementedError def gradients_X(self, dL_dK, X, X2): raise NotImplementedError - def gradients_X_diag(self, dL_dK, X): + def gradients_X_diag(self, dL_dKdiag, X): raise NotImplementedError - + def update_gradients_full(self, dL_dK, X, X2): """Set the gradients of all parameters when doing full (N) inference.""" raise NotImplementedError - + def update_gradients_diag(self, dL_dKdiag, X): + """Set the gradients for all parameters for the derivative of the diagonal of the covariance w.r.t the kernel parameters.""" + raise NotImplementedError + def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): """ Set the gradients of all parameters when doing inference with diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py index 44e17d8a..725f8660 100644 --- a/GPy/kern/_src/stationary.py +++ b/GPy/kern/_src/stationary.py @@ -57,7 +57,7 @@ class Stationary(Kern): if lengthscale.size != input_dim: lengthscale = np.ones(input_dim)*lengthscale else: - lengthscale = np.ones(self.input_dim) + lengthscale = np.ones(self.input_dim) self.lengthscale = Param('lengthscale', lengthscale, Logexp()) self.variance = Param('variance', variance, Logexp()) assert self.variance.size==1 @@ -85,12 +85,14 @@ class Stationary(Kern): Compute the Euclidean distance between each row of X and X2, or between each pair of rows of X if X2 is None. """ + #X, = self._slice_X(X) if X2 is None: Xsq = np.sum(np.square(X),1) r2 = -2.*tdot(X) + (Xsq[:,None] + Xsq[None,:]) util.diag.view(r2)[:,]= 0. # force diagnoal to be zero: sometime numerically a little negative return np.sqrt(r2) else: + #X2, = self._slice_X(X2) X1sq = np.sum(np.square(X),1) X2sq = np.sum(np.square(X2),1) return np.sqrt(-2.*np.dot(X, X2.T) + (X1sq[:,None] + X2sq[None,:])) @@ -124,7 +126,6 @@ class Stationary(Kern): self.lengthscale.gradient = 0. def update_gradients_full(self, dL_dK, X, X2=None): - self.variance.gradient = np.einsum('ij,ij,i', self.K(X, X2), dL_dK, 1./self.variance) #now the lengthscale gradient(s) @@ -136,7 +137,7 @@ class Stationary(Kern): #self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)/self.lengthscale**3 tmp = dL_dr*self._inv_dist(X, X2) if X2 is None: X2 = X - self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)]) + self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(self._slice_X(X)[:,q:q+1] - self._slice_X(X2)[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)]) else: r = self._scaled_dist(X, X2) self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale @@ -176,7 +177,6 @@ class Stationary(Kern): ret = np.empty(X.shape, dtype=np.float64) [np.einsum('ij,ij->i', tmp, X[:,q][:,None]-X2[:,q][None,:], out=ret[:,q]) for q in xrange(self.input_dim)] ret /= self.lengthscale**2 - return ret def gradients_X_diag(self, dL_dKdiag, X): diff --git a/GPy/util/caching.py b/GPy/util/caching.py index 250efe11..0b6f7234 100644 --- a/GPy/util/caching.py +++ b/GPy/util/caching.py @@ -9,24 +9,27 @@ class Cacher(object): """ - def __init__(self, operation, limit=5, ignore_args=()): + def __init__(self, operation, limit=5, ignore_args=(), force_kwargs=()): self.limit = int(limit) self.ignore_args = ignore_args + self.force_kwargs = force_kwargs self.operation=operation self.cached_inputs = [] self.cached_outputs = [] self.inputs_changed = [] - def __call__(self, *args): + def __call__(self, *args, **kw): """ A wrapper function for self.operation, """ #ensure that specified arguments are ignored + items = sorted(kw.items(), key=lambda x: x[0]) + oa_all = args + tuple(a for _,a in items) if len(self.ignore_args) != 0: - oa = [a for i,a in enumerate(args) if i not in self.ignore_args] + oa = [a for i,a in itertools.chain(enumerate(args), items) if i not in self.ignore_args and i not in self.force_kwargs] else: - oa = args + oa = oa_all # this makes sure we only add an observer once, and that None can be in args observable_args = [] @@ -37,8 +40,13 @@ class Cacher(object): #make sure that all the found argument really are observable: #otherswise don't cache anything, pass args straight though if not all([isinstance(arg, Observable) for arg in observable_args]): - return self.operation(*args) + return self.operation(*args, **kw) + if len(self.force_kwargs) != 0: + # check if there are force args, which force reloading + for k in self.force_kwargs: + if k in kw and kw[k] is not None: + return self.operation(*args, **kw) # TODO: WARNING !!! Cache OFFSWITCH !!! WARNING # return self.operation(*args) @@ -48,7 +56,7 @@ class Cacher(object): i = state.index(True) if self.inputs_changed[i]: #(elements of) the args have changed since we last computed: update - self.cached_outputs[i] = self.operation(*args) + self.cached_outputs[i] = self.operation(*args, **kw) self.inputs_changed[i] = False return self.cached_outputs[i] else: @@ -62,11 +70,11 @@ class Cacher(object): self.cached_outputs.pop(0) #compute - self.cached_inputs.append(args) - self.cached_outputs.append(self.operation(*args)) + self.cached_inputs.append(oa_all) + self.cached_outputs.append(self.operation(*args, **kw)) self.inputs_changed.append(False) [a.add_observer(self, self.on_cache_changed) for a in observable_args] - return self.cached_outputs[-1]#Max says return. + return self.cached_outputs[-1]#return def on_cache_changed(self, arg): """ @@ -90,15 +98,16 @@ class Cache_this(object): """ A decorator which can be applied to bound methods in order to cache them """ - def __init__(self, limit=5, ignore_args=()): + def __init__(self, limit=5, ignore_args=(), force_kwargs=()): self.limit = limit self.ignore_args = ignore_args + self.force_args = force_kwargs self.c = None def __call__(self, f): - def f_wrap(*args): + def f_wrap(*args, **kw): if self.c is None: - self.c = Cacher(f, self.limit, ignore_args=self.ignore_args) - return self.c(*args) + self.c = Cacher(f, self.limit, ignore_args=self.ignore_args, force_kwargs=self.force_args) + return self.c(*args, **kw) f_wrap._cacher = self f_wrap.__doc__ = "**cached**\n\n" + (f.__doc__ or "") return f_wrap