diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 2dcf0e14..13336ef5 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -44,6 +44,7 @@ class GP(Model): self.Y_metadata = None assert isinstance(kernel, kern.Kern) + assert self.input_dim == kernel.input_dim self.kern = kernel assert isinstance(likelihood, likelihoods.Likelihood) diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index f54c0117..016ecbf6 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -23,7 +23,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri :param input_array: array which this parameter handles :type input_array: numpy.ndarray :param default_constraint: The default constraint for this parameter - :type default_constraint: + :type default_constraint: You can add/remove constraints by calling constrain on the parameter itself, e.g: @@ -59,7 +59,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri def __init__(self, name, input_array, default_constraint=None): super(Param, self).__init__(name=name, default_constraint=default_constraint) - + def __array_finalize__(self, obj): # see InfoArray.__array_finalize__ for comments if obj is None: return @@ -192,7 +192,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri return numpy.r_[a] return numpy.r_[:b] return itertools.imap(f, itertools.izip_longest(slice_index[:self._realndim_], self._realshape_, fillvalue=slice(self.size))) - + #=========================================================================== # Convenience #=========================================================================== @@ -260,7 +260,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri clean_curr_slice = [s for s in slice_index if numpy.any(s != Ellipsis)] for i in range(self._realndim_-len(clean_curr_slice)): i+=len(clean_curr_slice) - clean_curr_slice += range(self._realshape_[i]) + clean_curr_slice += range(self._realshape_[i]) if (all(isinstance(n, (numpy.ndarray, list, tuple)) for n in clean_curr_slice) and len(set(map(len, clean_curr_slice))) <= 1): return numpy.fromiter(itertools.izip(*clean_curr_slice), @@ -426,4 +426,4 @@ class ParamConcatenation(object): start = False return "\n".join(strings) def __repr__(self): - return "\n".join(map(repr,self.params)) \ No newline at end of file + return "\n".join(map(repr,self.params)) diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index 275198b2..5acdec58 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -18,8 +18,8 @@ class Observable(object): def remove_observer(self, observer): del self._observers_[observer] def _notify_observers(self): - [callble(self) for callble in self._observers_.itervalues()] - + [callble(self) for callble in self._observers_.values()] + class Pickleable(object): def _getstate(self): """ @@ -51,7 +51,7 @@ class Parentable(object): super(Parentable,self).__init__() self._direct_parent_ = direct_parent self._parent_index_ = parent_index - + def has_parent(self): return self._direct_parent_ is not None @@ -82,7 +82,7 @@ class Nameable(Parentable): from_name = self.name self._name = name if self.has_parent(): - self._direct_parent_._name_changed(self, from_name) + self._direct_parent_._name_changed(self, from_name) class Parameterizable(Parentable): @@ -90,7 +90,7 @@ class Parameterizable(Parentable): super(Parameterizable, self).__init__(*args, **kwargs) from GPy.core.parameterization.array_core import ParamList _parameters_ = ParamList() - + def parameter_names(self, add_name=False): if add_name: return [adjust_name_for_printing(self.name) + "." + xi for x in self._parameters_ for xi in x.parameter_names(add_name=True)] @@ -142,21 +142,21 @@ class Gradcheckable(Parentable): class Indexable(object): def _raveled_index(self): raise NotImplementedError, "Need to be able to get the raveled Index" - + def _internal_offset(self): return 0 - + def _offset_for(self, param): raise NotImplementedError, "shouldnt happen, offset required from non parameterization object?" - + def _raveled_index_for(self, param): """ get the raveled index for a param that is an int array, containing the indexes for the flattened param inside this parameterized logic. """ - raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?" - + raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?" + class Constrainable(Nameable, Indexable, Parameterizable): def __init__(self, name, default_constraint=None): super(Constrainable,self).__init__(name) @@ -166,7 +166,7 @@ class Constrainable(Nameable, Indexable, Parameterizable): self.priors = ParameterIndexOperations() if self._default_constraint_ is not None: self.constrain(self._default_constraint_) - + #=========================================================================== # Fixing Parameters: #=========================================================================== @@ -182,21 +182,21 @@ class Constrainable(Nameable, Indexable, Parameterizable): rav_i = self._highest_parent_._raveled_index_for(self) self._highest_parent_._set_fixed(rav_i) fix = constrain_fixed - + def unconstrain_fixed(self): """ This parameter will no longer be fixed. """ unconstrained = self.unconstrain(__fixed__) - self._highest_parent_._set_unfixed(unconstrained) + self._highest_parent_._set_unfixed(unconstrained) unfix = unconstrain_fixed - + def _set_fixed(self, index): import numpy as np if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool) self._fixes_[index] = FIXED if np.all(self._fixes_): self._fixes_ = None # ==UNFIXED - + def _set_unfixed(self, index): import numpy as np if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool) @@ -212,7 +212,7 @@ class Constrainable(Nameable, Indexable, Parameterizable): self._fixes_[fixed_indices] = FIXED else: self._fixes_ = None - + def _has_fixes(self): return hasattr(self, "_fixes_") and self._fixes_ is not None @@ -222,17 +222,17 @@ class Constrainable(Nameable, Indexable, Parameterizable): def set_prior(self, prior, warning=True, update=True): repriorized = self.unset_priors() self._add_to_index_operations(self.priors, repriorized, prior, warning, update) - + def unset_priors(self, *priors): return self._remove_from_index_operations(self.priors, priors) - + def log_prior(self): """evaluate the prior""" if self.priors.size > 0: x = self._get_params() return reduce(lambda a,b: a+b, [p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()], 0) return 0. - + def _log_prior_gradients(self): """evaluate the gradients of the priors""" import numpy as np @@ -242,7 +242,7 @@ class Constrainable(Nameable, Indexable, Parameterizable): [np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()] return ret return 0. - + #=========================================================================== # Constrain operations -> done #=========================================================================== @@ -269,7 +269,7 @@ class Constrainable(Nameable, Indexable, Parameterizable): transformats of this parameter object. """ return self._remove_from_index_operations(self.constraints, transforms) - + def constrain_positive(self, warning=True, update=True): """ :param warning: print a warning if re-constraining parameters. @@ -314,7 +314,7 @@ class Constrainable(Nameable, Indexable, Parameterizable): Remove (lower, upper) bounded constrain from this parameter/ """ self.unconstrain(Logistic(lower, upper)) - + def _parent_changed(self, parent): from index_operations import ParameterIndexOperationsView self.constraints = ParameterIndexOperationsView(parent.constraints, parent._offset_for(self), self.size) @@ -340,7 +340,7 @@ class Constrainable(Nameable, Indexable, Parameterizable): removed = np.union1d(removed, unconstrained) if t is __fixed__: self._highest_parent_._set_unfixed(unconstrained) - + return removed diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 128dfca3..c72de182 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -53,20 +53,19 @@ class SparseGP(GP): self.add_parameter(self.Z, index=0) self.parameters_changed() - def _update_gradients_Z(self, add=False): - #The derivative of the bound wrt the inducing inputs Z ( unless they're all fixed) + def _gradients_Z(self): + #The derivative of the bound wrt the inducing inputs Z ( unless they're all fixed) if not self.Z.is_fixed: - if add: self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) - else: self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) if self.X_variance is None: - self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X) + self.Z.gradient = self.kern.gradients_Z_sparse(X=self.X, Z=self.Z, **self.grad_dict) else: - self.Z.gradient += self.kern.dpsi1_dZ(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance) - self.Z.gradient += self.kern.dpsi2_dZ(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance) + self.Z.gradient = self.kern.gradients_Z_variational(mu=self.X, S=self.X_variance, Z=self.Z, **self.grad_dict) + print self.Z.gradient + print id(self.Z) def parameters_changed(self): self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y) - self._update_gradients_Z(add=False) + self.Z.gradient = self._gradients_Z() def _raw_predict(self, Xnew, X_variance_new=None, full_cov=False): """ diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index a7eb0adb..a5e8615d 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -22,18 +22,18 @@ def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False, # generate GPLVM-like data X = _np.random.rand(num_inputs, input_dim) lengthscales = _np.random.rand(input_dim) - k = (GPy.kern.rbf(input_dim, .5, lengthscales, ARD=True) + k = (GPy.kern.RBF(input_dim, .5, lengthscales, ARD=True) #+ GPy.kern.white(input_dim, 0.01) ) K = k.K(X) Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, (output_dim,)).T - # k = GPy.kern.rbf_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim) - k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) - # k = GPy.kern.rbf(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001) - # k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.rbf(input_dim, .3, _np.ones(input_dim) * .2, ARD=True) - # k = GPy.kern.rbf(input_dim, .5, 2., ARD=0) + GPy.kern.rbf(input_dim, .3, .2, ARD=0) - # k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True) + # k = GPy.kern.RBF_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim) + #k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) + # k = GPy.kern.RBF(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001) + # k = GPy.kern.RBF(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.RBF(input_dim, .3, _np.ones(input_dim) * .2, ARD=True) + # k = GPy.kern.RBF(input_dim, .5, 2., ARD=0) + GPy.kern.RBF(input_dim, .3, .2, ARD=0) + # k = GPy.kern.RBF(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True) p = .3 @@ -73,7 +73,7 @@ def gplvm_oil_100(optimize=True, verbose=1, plot=True): data = GPy.util.datasets.oil_100() Y = data['X'] # create simple GP model - kernel = GPy.kern.rbf(6, ARD=True) + GPy.kern.bias(6) + kernel = GPy.kern.RBF(6, ARD=True) + GPy.kern.bias(6) m = GPy.models.GPLVM(Y, 6, kernel=kernel) m.data_labels = data['Y'].argmax(axis=1) if optimize: m.optimize('scg', messages=verbose) @@ -88,7 +88,7 @@ def sparse_gplvm_oil(optimize=True, verbose=0, plot=True, N=100, Q=6, num_induci Y = Y - Y.mean(0) Y /= Y.std(0) # Create the model - kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.bias(Q) m = GPy.models.SparseGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing) m.data_labels = data['Y'][:N].argmax(axis=1) @@ -138,7 +138,7 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=15, Q=4 (1 - var))) + .001 Z = _np.random.permutation(X)[:num_inducing] - kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2)) + kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2)) m = BayesianGPLVM(Y, Q, X=X, X_variance=S, num_inducing=num_inducing, Z=Z, kernel=kernel) m.data_colors = c @@ -164,7 +164,7 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, _np.random.seed(0) data = GPy.util.datasets.oil() - kernel = GPy.kern.rbf_inv(Q, 1., [.1] * Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + kernel = GPy.kern.RBF_inv(Q, 1., [.1] * Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) Y = data['X'][:N] Yn = Gaussian(Y, normalize=True) m = GPy.models.BayesianGPLVM(Yn, Q, kernel=kernel, num_inducing=num_inducing, **k) @@ -435,7 +435,7 @@ def bcgplvm_stick(kernel=None, optimize=True, verbose=True, plot=True): data = GPy.util.datasets.osu_run1() # optimize - back_kernel=GPy.kern.rbf(data['Y'].shape[1], lengthscale=5.) + back_kernel=GPy.kern.RBF(data['Y'].shape[1], lengthscale=5.) mapping = GPy.mappings.Kernel(X=data['Y'], output_dim=2, kernel=back_kernel) m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping) if optimize: m.optimize(messages=verbose, max_f_eval=10000) @@ -470,7 +470,7 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True): data = GPy.util.datasets.osu_run1() Q = 6 - kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2)) + kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2)) m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel) # optimize m.ensure_default_constraints() diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 214e230f..630d74da 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -1,7 +1,7 @@ from _src.rbf import RBF from _src.white import White from _src.kern import Kern -Linear = 'foo' +from _src.linear import Linear #import bias #import Brownian #import coregionalize diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index edb82ef0..acc69fd4 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -34,7 +34,7 @@ class Add(Kern): :param X: the first set of inputs to the kernel :param X2: (optional) the second set of arguments to the kernel. If X2 is None, this is passed throgh to the 'part' object, which - handles this as X2 == X. + handLes this as X2 == X. """ assert X.shape[1] == self.input_dim if X2 is None: @@ -48,9 +48,6 @@ class Add(Kern): def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): [p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X[:,i_s], Z[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): - [p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_] - def gradients_X(self, dL_dK, X, X2=None): """Compute the gradient of the objective function with respect to X. @@ -69,123 +66,125 @@ class Add(Kern): return target def Kdiag(self, X): - """Compute the diagonal of the covariance function for inputs X.""" assert X.shape[1] == self.input_dim return sum([p.Kdiag(X[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]) + def psi0(self, Z, mu, S): - target = np.zeros(mu.shape[0]) - [p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] - return target - - def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S): - target = np.zeros(self.size) - [p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)] - return self._transform_gradients(target) - - def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S): - target_mu, target_S = np.zeros_like(mu), np.zeros_like(S) - [p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target_mu, target_S + return np.sum([p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices))],0) def psi1(self, Z, mu, S): - target = np.zeros((mu.shape[0], Z.shape[0])) - [p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] - return target - - def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S): - target = np.zeros((self.size)) - [p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)] - return self._transform_gradients(target) - - def dpsi1_dZ(self, dL_dpsi1, Z, mu, S): - target = np.zeros_like(Z) - [p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target - - def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S): - """return shapes are num_samples,num_inducing,input_dim""" - target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1])) - [p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target_mu, target_S + return np.sum([p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0) def psi2(self, Z, mu, S): - """ - Computer the psi2 statistics for the covariance function. - - :param Z: np.ndarray of inducing inputs (num_inducing x input_dim) - :param mu, S: np.ndarrays of means and variances (each num_samples x input_dim) - :returns psi2: np.ndarray (num_samples,num_inducing,num_inducing) - - """ - target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0])) - [p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] + psi2 = np.sum([p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0) # compute the "cross" terms - # TODO: input_slices needed - crossterms = 0 + from white import White + from rbf import RBF + #from rbf_inv import RBFInv + #from bias import Bias + from linear import Linear + #ffrom fixed import Fixed - for [p1, i_s1], [p2, i_s2] in itertools.combinations(zip(self._parameters_, self.input_slices), 2): - if i_s1 == i_s2: - # TODO psi1 this must be faster/better/precached/more nice - tmp1 = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z[:, i_s1], mu[:, i_s1], S[:, i_s1], tmp1) - tmp2 = np.zeros((mu.shape[0], Z.shape[0])) - p2.psi1(Z[:, i_s2], mu[:, i_s2], S[:, i_s2], tmp2) + for (p1, i1), (p2, i2) in itertools.combinations(itertools.izip(self._parameters_, self.input_slices), 2): + # white doesn;t combine with anything + if isinstance(p1, White) or isinstance(p2, White): + pass + # rbf X bias + #elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)): + elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear))): + tmp = p2.psi1(Z[:,i2], mu[:,i2], S[:,i2]) + psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :]) + #elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)): + elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)): + tmp = p1.psi1(Z[:,i1], mu[:,i1], S[:,i1]) + psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :]) + else: + raise NotImplementedError, "psi2 cannot be computed for this kernel" + return psi2 - prod = np.multiply(tmp1, tmp2) - crossterms += prod[:, :, None] + prod[:, None, :] + def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + from white import White + from rbf import RBF + #from rbf_inv import RBFInv + #from bias import Bias + from linear import Linear + #ffrom fixed import Fixed - target += crossterms + for p1, is1 in zip(self._parameters_, self.input_slices): + + #compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2! + eff_dL_dpsi1 = dL_dpsi1.copy() + for p2, is2 in zip(self._parameters_, self.input_slices): + if p2 is p1: + continue + if isinstance(p2, White): + continue + elif isinstance(p2, Bias): + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. + else: + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2. + + + p1.update_gradients_variational(dL_dKmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1]) + + + def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + from white import white + from rbf import rbf + #from rbf_inv import rbfinv + #from bias import bias + from linear import linear + #ffrom fixed import fixed + + target = np.zeros(Z.shape) + for p1, is1 in zip(self._parameters_, self.input_slices): + + #compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2! + eff_dL_dpsi1 = dL_dpsi1.copy() + for p2, is2 in zip(self._parameters_, self.input_slices): + if p2 is p1: + continue + if isinstance(p2, white): + continue + elif isinstance(p2, bias): + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. + else: + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2. + + + target += p1.gradients_z_variational(dL_dkmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1]) return target - def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S): - """Gradient of the psi2 statistics with respect to the parameters.""" - target = np.zeros(self.size) - [p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)] + def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + from white import white + from rbf import rbf + #from rbf_inv import rbfinv + #from bias import bias + from linear import linear + #ffrom fixed import fixed - # compute the "cross" terms - # TODO: better looping, input_slices - for i1, i2 in itertools.permutations(range(len(self._parameters_)), 2): - p1, p2 = self._parameters_[i1], self._parameters_[i2] -# ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2] - ps1, ps2 = self._param_slices_[i1], self._param_slices_[i2] + target_mu = np.zeros(mu.shape) + target_S = np.zeros(S.shape) + for p1, is1 in zip(self._parameters_, self.input_slices): - tmp = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dtheta((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target[ps2]) + #compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2! + eff_dL_dpsi1 = dL_dpsi1.copy() + for p2, is2 in zip(self._parameters_, self.input_slices): + if p2 is p1: + continue + if isinstance(p2, white): + continue + elif isinstance(p2, bias): + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. + else: + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2. - return self._transform_gradients(target) - - def dpsi2_dZ(self, dL_dpsi2, Z, mu, S): - target = np.zeros_like(Z) - [p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - # target *= 2 - - # compute the "cross" terms - # TODO: we need input_slices here. - for p1, p2 in itertools.permutations(self._parameters_, 2): -# if p1.name == 'linear' and p2.name == 'linear': -# raise NotImplementedError("We don't handle linear/linear cross-terms") - tmp = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dZ((tmp[:, None, :] * dL_dpsi2).sum(1), Z, mu, S, target) - - return target * 2 - - def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S): - target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1])) - [p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - - # compute the "cross" terms - # TODO: we need input_slices here. - for p1, p2 in itertools.permutations(self._parameters_, 2): -# if p1.name == 'linear' and p2.name == 'linear': -# raise NotImplementedError("We don't handle linear/linear cross-terms") - tmp = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dmuS((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target_mu, target_S) + a, b = p1.gradients_muS_variational(dL_dkmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1]) + target_mu += a + target_S += b return target_mu, target_S def plot(self, *args, **kwargs): diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index b5b84305..dd87200e 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -9,7 +9,7 @@ from ...core.parameterization.param import Param class Kern(Parameterized): - def __init__(self,input_dim,name): + def __init__(self, input_dim, name): """ The base class for a kernel: a positive definite function which forms of a covariance function (kernel). @@ -22,21 +22,15 @@ class Kern(Parameterized): super(Kern, self).__init__(name) self.input_dim = input_dim - def K(self,X,X2,target): + def K(self, X, X2, target): raise NotImplementedError - def Kdiag(self,X,target): + def Kdiag(self, Xa ,target): raise NotImplementedError - def _param_grad_helper(self,dL_dK,X,X2,target): + def _param_grad_helper(self, dL_dK,X, X2, target): raise NotImplementedError - def dKdiag_dtheta(self,dL_dKdiag,X,target): # TODO: Max?? - # In the base case compute this by calling _param_grad_helper. Need to - # override for stationary covariances (for example) to save - # time. - for i in range(X.shape[0]): - self._param_grad_helper(dL_dKdiag[i], X[i, :][None, :], X2=None, target=target) def psi0(self,Z,mu,S,target): raise NotImplementedError - def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target): + def dpsi0_dtheta(self,dL_dpsi0, Z,mu,S,target): raise NotImplementedError def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S): raise NotImplementedError diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py index b3765774..7f5d43d3 100644 --- a/GPy/kern/_src/linear.py +++ b/GPy/kern/_src/linear.py @@ -9,6 +9,7 @@ from ...util.linalg import tdot from ...util.misc import fast_array_equal, param_to_array from ...core.parameterization import Param from ...core.parameterization.transformations import Logexp +from ...util.caching import Cacher, cache_this class Linear(Kern): """ @@ -45,22 +46,35 @@ class Linear(Kern): variances = np.ones(self.input_dim) self.variances = Param('variances', variances, Logexp()) - #TODO: remove?self.variances.gradient = np.zeros(self.variances.shape) self.add_parameter(self.variances) - self.variances.add_observer(self, self.update_variance) + self.variances.add_observer(self, self._on_changed) - # initialize cache - self._Z, self._mu, self._S = np.empty(shape=(3, 1)) - self._X, self._X2 = np.empty(shape=(2, 1)) + def _on_changed(self, obj): + self._notify_observers() - def update_variance(self, v): - self.variances2 = np.square(self.variances) + @cache_this(limit=3, reset_on_self=True) + def K(self, X, X2=None): + if self.ARD: + if X2 is None: + return tdot(X*np.sqrt(self.variances)) + else: + rv = np.sqrt(self.variances) + return np.dot(X*rv, (X2*rv).T) + else: + return self._dot_product(X, X2) * self.variances - def on_input_change(self, X): - self._K_computations(X, None) + @cache_this(limit=3, reset_on_self=False) + def _dot_product(self, X, X2=None): + if X2 is None: + return tdot(X) + else: + return np.dot(X, X2.T) + + def Kdiag(self, X): + return np.sum(self.variances * np.square(X), -1) def update_gradients_full(self, dL_dK, X): - self.variances.gradient[:] = 0 + self.variances.gradient = np.zeros(self.variances.size) self._param_grad_helper(dL_dK, X, None, self.variances.gradient) def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): @@ -68,7 +82,7 @@ class Linear(Kern): if self.ARD: self.variances.gradient = tmp.sum(0) else: - self.variances.gradient = tmp.sum() + self.variances.gradient = np.atleast_1d(tmp.sum()) self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient) self._param_grad_helper(dL_dKnm, X, Z, self.variances.gradient) @@ -85,25 +99,8 @@ class Linear(Kern): if self.ARD: self.variances.gradient += tmp.sum(0).sum(0).sum(0) else: self.variances.gradient += tmp.sum() #from Kmm - self._K_computations(Z, None) self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient) - def K(self, X, X2, target): - if self.ARD: - XX = X * np.sqrt(self.variances) - if X2 is None: - target += tdot(XX) - else: - XX2 = X2 * np.sqrt(self.variances) - target += np.dot(XX, XX2.T) - else: - if X is not self._X or X2 is not None: - self._K_computations(X, X2) - target += self.variances * self._dot_product - - def Kdiag(self, X, target): - np.add(target, np.sum(self.variances * np.square(X), -1), target) - def _param_grad_helper(self, dL_dK, X, X2, target): if self.ARD: if X2 is None: @@ -112,18 +109,16 @@ class Linear(Kern): product = X[:, None, :] * X2[None, :, :] target += (dL_dK[:, :, None] * product).sum(0).sum(0) else: - if X is not self._X or X2 is not None: - self._K_computations(X, X2) - target += np.sum(self._dot_product * dL_dK) + target += np.sum(self._dot_product(X, X2) * dL_dK) - def gradients_X(self, dL_dK, X, X2, target): + def gradients_X(self, dL_dK, X, X2=None): if X2 is None: - target += 2*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1) + return 2.*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1) else: - target += (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1) + return (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1) - def dKdiag_dX(self,dL_dKdiag,X,target): - target += 2.*self.variances*dL_dKdiag[:,None]*X + def gradients_X_diag(self, dL_dKdiag, X): + return 2.*self.variances*dL_dKdiag[:,None]*X #---------------------------------------# # PSI statistics # @@ -273,15 +268,15 @@ class Linear(Kern): # Precomputations # #---------------------------------------# - def _K_computations(self, X, X2): - if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)): - self._X = X.copy() - if X2 is None: - self._dot_product = tdot(param_to_array(X)) - self._X2 = None - else: - self._X2 = X2.copy() - self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T)) + #def _K_computations(self, X, X2): + #if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)): + #self._X = X.copy() + #if X2 is None: + ##self._dot_product = tdot(param_to_array(X)) + #self._X2 = None + #else: + #self._X2 = X2.copy() + #self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T)) def _psi_computations(self, Z, mu, S): # here are the "statistics" for psi1 and psi2 diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py index 67637770..1d033f70 100644 --- a/GPy/kern/_src/prod.py +++ b/GPy/kern/_src/prod.py @@ -2,9 +2,7 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) from kern import Kern -from coregionalize import Coregionalize import numpy as np -import hashlib class Prod(Kern): """ @@ -17,7 +15,7 @@ class Prod(Kern): :rtype: kernel object """ - def __init__(self,k1,k2,tensor=False): + def __init__(self, k1, k2, tensor=False): if tensor: super(Prod, self).__init__(k1.input_dim + k2.input_dim, k1.name + '_xx_' + k2.name) self.slice1 = slice(0,k1.input_dim) @@ -25,64 +23,43 @@ class Prod(Kern): else: assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to multiply don't have the same dimension." super(Prod, self).__init__(k1.input_dim, k1.name + '_x_' + k2.name) - self.slice1 = slice(0,self.input_dim) - self.slice2 = slice(0,self.input_dim) + self.slice1 = slice(0, self.input_dim) + self.slice2 = slice(0, self.input_dim) self.k1 = k1 self.k2 = k2 self.add_parameters(self.k1, self.k2) - #initialize cache - self._X, self._X2 = np.empty(shape=(2,1)) - self._params = None - def K(self, X, X2=None): - self._K_computations(X, X2) - return self._K1 * self._K2 + if X2 is None: + return self.k1.K(X[:,self.slice1], None) * self.k2.K(X[:,self.slice2], None) + else: + return self.k1.K(X[:,self.slice1], X2[:,self.slice1]) * self.k2.K(X[:,self.slice2], X2[:,self.slice2]) def Kdiag(self, X): return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2]) def update_gradients_full(self, dL_dK, X): - self._K_computations(X, None) - self.k1.update_gradients_full(dL_dK*self._K2, X[:,self.slice1]) - self.k2.update_gradients_full(dL_dK*self._K1, X[:,self.slice2]) + self.k1.update_gradients_full(dL_dK*self.k2(X[:,self.slice2]), X[:,self.slice1]) + self.k2.update_gradients_full(dL_dK*self.k1(X[:,self.slice1]), X[:,self.slice2]) def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): self.k1.update_gradients_sparse(dL_dKmm * self.k2.K(Z[:,self.slice2]), dL_dKnm * self.k2(X[:,self.slice2], Z[:,self.slice2]), dL_dKdiag * self.k2.Kdiag(X[:,self.slice2]), X[:,self.slice1], Z[:,self.slice1] ) self.k2.update_gradients_sparse(dL_dKmm * self.k1.K(Z[:,self.slice1]), dL_dKnm * self.k1(X[:,self.slice1], Z[:,self.slice1]), dL_dKdiag * self.k1.Kdiag(X[:,self.slice1]), X[:,self.slice2], Z[:,self.slice2] ) def gradients_X(self, dL_dK, X, X2=None): - """derivative of the covariance matrix with respect to X.""" - self._K_computations(X, X2) target = np.zeros(X.shape) if X2 is None: - target[:,self.slice1] += self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None) - target[:,self.slice2] += self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None) + target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2(X[:,self.slice2]), X[:,self.slice1], None) + target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1(X[:,self.slice1]), X[:,self.slice2], None) else: - target[:,self.slice1] += self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1]) - target[:,self.slice2] += self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2]) - + target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2(X[:,self.slice2], X2[:,self.slice2]), X[:,self.slice1], X2[:,self.slice1]) + target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1(X[:,self.slice1], X2[:,self.slice1]), X[:,self.slice2], X2[:,self.slice2]) return target - def dKdiag_dX(self, dL_dKdiag, X, target): - K1 = np.zeros(X.shape[0]) - K2 = np.zeros(X.shape[0]) - self.k1.Kdiag(X[:,self.slice1],K1) - self.k2.Kdiag(X[:,self.slice2],K2) + def gradients_X_diag(self, dL_dKdiag, X): + target = np.zeros(X.shape) + target[:,self.slice1] = self.k1.gradients_X(dL_dKdiag*self.k2.Kdiag(X[:,self.slice2]), X[:,self.slice1]) + target[:,self.slice2] += self.k2.gradients_X(dL_dKdiag*self.k1.Kdiag(X[:,self.slice1]), X[:,self.slice2]) + return target - self.k1.gradients_X(dL_dKdiag*K2, X[:,self.slice1], target[:,self.slice1]) - self.k2.gradients_X(dL_dKdiag*K1, X[:,self.slice2], target[:,self.slice2]) - - def _K_computations(self, X, X2): - if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())): - self._X = X.copy() - self._params == self._get_params().copy() - if X2 is None: - self._X2 = None - self._K1 = self.k1.K(X[:,self.slice1],None) - self._K2 = self.k2.K(X[:,self.slice2],None) - else: - self._X2 = X2.copy() - self._K1 = self.k1.K(X[:,self.slice1],X2[:,self.slice1]) - self._K2 = self.k2.K(X[:,self.slice2],X2[:,self.slice2]) diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index 02640fdc..0508436f 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -79,17 +79,18 @@ class RBF(Kern): ret[:] = self.variance return ret - #TODO: remove TARGET! - def psi0(self, Z, mu, S, target): - target += self.variance + def psi0(self, Z, mu, S): + ret = np.empty(mu.shape[0], dtype=np.float64) + ret[:] = self.variance + return ret - def psi1(self, Z, mu, S, target): + def psi1(self, Z, mu, S): self._psi_computations(Z, mu, S) - target += self._psi1 + return self._psi1 - def psi2(self, Z, mu, S, target): + def psi2(self, Z, mu, S): self._psi_computations(Z, mu, S) - target += self._psi2 + return self._psi2 def update_gradients_full(self, dL_dK, X): self._K_computations(X, None) @@ -154,6 +155,37 @@ class RBF(Kern): else: self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm) + def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + self._psi_computations(Z, mu, S) + + #psi1 + denominator = (self.lengthscale2 * (self._psi1_denom)) + dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator)) + grad = np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0) + + #psi2 + term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim + term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim + dZ = self._psi2[:, :, :, None] * (term1[None] + term2) + grad += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0) + + return grad + + def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + self._psi_computations(Z, mu, S) + #psi1 + tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom + grad_mu = np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1) + grad_S = np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1) + + tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom + grad_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1) + grad_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1) + + return grad_mu, grad_S + + + def gradients_X(self, dL_dK, X, X2=None): #if self._X is None or X.base is not self._X.base or X2 is not None: self._K_computations(X, X2) @@ -171,36 +203,7 @@ class RBF(Kern): # PSI statistics # #---------------------------------------# - def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S): - pass - - def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target): - self._psi_computations(Z, mu, S) - denominator = (self.lengthscale2 * (self._psi1_denom)) - dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator)) - target += np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0) - - def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S): - self._psi_computations(Z, mu, S) - tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom - target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1) - target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1) - - def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target): - self._psi_computations(Z, mu, S) - term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim - term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim - dZ = self._psi2[:, :, :, None] * (term1[None] + term2) - target += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0) - - def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S): - """Think N,num_inducing,num_inducing,input_dim """ - self._psi_computations(Z, mu, S) - tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom - target_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1) - target_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1) - - #---------------------------------------# + #---------------------------------------# # Precomputations # #---------------------------------------# @@ -362,6 +365,7 @@ class RBF(Kern): #include #include """ + mu = param_to_array(mu) weave.inline(code, support_code=support_code, libraries=['gomp'], arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'], type_converters=weave.converters.blitz, **self.weave_options) diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index 914ca4ae..5fb1ca59 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -57,26 +57,16 @@ class BayesianGPLVM(SparseGP, GPLVM): self.init = state.pop() SparseGP._setstate(self, state) - def dL_dmuS(self): - dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi0_dmuS(self.grad_dict['dL_dpsi0'], self.Z, self.X, self.X_variance) - dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi1_dmuS(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance) - dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance) - dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2 - dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2 - - return dL_dmu, dL_dS - def KL_divergence(self): var_mean = np.square(self.X).sum() var_S = np.sum(self.X_variance - np.log(self.X_variance)) return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.num_data def parameters_changed(self): - self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y) - self._update_gradients_Z(add=False) + super(BayesianGPLVM, self).parameters_changed() self._log_marginal_likelihood -= self.KL_divergence() - dL_dmu, dL_dS = self.dL_dmuS() + dL_dmu, dL_dS = self.kern.gradients_muS_variational(mu=self.X, S=self.X_variance, Z=self.Z, **self.grad_dict) # dL: self.q.mean.gradient = dL_dmu diff --git a/GPy/util/caching.py b/GPy/util/caching.py index 51ba56f3..1f10cd64 100644 --- a/GPy/util/caching.py +++ b/GPy/util/caching.py @@ -1,46 +1,89 @@ -from ..core.parameterization.array_core import ObservableArray, ParamList +from ..core.parameterization.parameter_core import Observable +from ..core.parameterization.array_core import ParamList + class Cacher(object): - def __init__(self, operation, limit=5): + def __init__(self, operation, limit=5, reset_on_first=False): self.limit = int(limit) + self._reset_on_first = reset_on_first self.operation=operation - self.cached_inputs = ParamList([]) + self.cached_inputs = [] self.cached_outputs = [] self.inputs_changed = [] - def __call__(self, X): - assert isinstance(X, ObservableArray) - if X in self.cached_inputs: - i = self.cached_inputs.index(X) + def __call__(self, *args): + if self._reset_on_first: + assert isinstance(args[0], Observable) + args[0].add_observer(args[0], self.reset) + cached_args = args + else: + cached_args = args[1:] + + + if not all([isinstance(arg, Observable) for arg in cached_args]): + return self.operation(*args) + if cached_args in self.cached_inputs: + i = self.cached_inputs.index(cached_args) if self.inputs_changed[i]: - self.cached_outputs[i] = self.operation(X) + self.cached_outputs[i] = self.operation(*args) self.inputs_changed[i] = False return self.cached_outputs[i] else: if len(self.cached_inputs) == self.limit: - X_ = self.cached_inputs.pop(0) - X_.remove_observer(self) + args_ = self.cached_inputs.pop(0) + [a.remove_observer(self) for a in args_] self.inputs_changed.pop(0) self.cached_outputs.pop(0) - self.cached_inputs.append(X) - self.cached_outputs.append(self.operation(X)) + self.cached_inputs.append(cached_args) + self.cached_outputs.append(self.operation(*args)) self.inputs_changed.append(False) - X.add_observer(self, self.on_cache_changed) + [a.add_observer(self, self.on_cache_changed) for a in args] return self.cached_outputs[-1] - def on_cache_changed(self, X): - #print id(X) - Xbase = X - while Xbase is not None: - try: - i = self.cached_inputs.index(X) - break - except ValueError: - Xbase = X.base - continue - self.inputs_changed[i] = True + def on_cache_changed(self, arg): + self.inputs_changed = [any([a is arg for a in args]) or old_ic for args, old_ic in zip(self.cached_inputs, self.inputs_changed)] + + def reset(self, obj): + [[a.remove_observer(self) for a in args] for args in self.cached_inputs] + self.cached_inputs = [] + self.cached_outputs = [] + self.inputs_changed = [] + + + + +def cache_this(limit=5, reset_on_self=False): + def limited_cache(f): + c = Cacher(f, limit, reset_on_first=reset_on_self) + def f_wrap(*args): + return c(*args) + f_wrap._cacher = c + return f_wrap + return limited_cache + + + + + + + + + + + + + #Xbase = X + #while Xbase is not None: + #try: + #i = self.cached_inputs.index(X) + #break + #except ValueError: + #Xbase = X.base + #continue + #self.inputs_changed[i] = True + + -