diff --git a/GPy/core/parameterization/array_core.py b/GPy/core/parameterization/array_core.py index 9ce0e8f6..e3a5b137 100644 --- a/GPy/core/parameterization/array_core.py +++ b/GPy/core/parameterization/array_core.py @@ -14,21 +14,19 @@ class ObservableArray(np.ndarray, Observable): takes exactly one argument, which is this array itself. """ __array_priority__ = -1 # Never give back ObservableArray - def __new__(cls, input_array): + def __new__(cls, input_array, *a, **kw): if not isinstance(input_array, ObservableArray): obj = np.atleast_1d(np.require(input_array, dtype=np.float64, requirements=['W', 'C'])).view(cls) else: obj = input_array cls.__name__ = "ObservableArray\n " + super(ObservableArray, obj).__init__(*a, **kw) return obj - - def __init__(self, *a, **kw): - super(ObservableArray, self).__init__(*a, **kw) - + def __array_finalize__(self, obj): # see InfoArray.__array_finalize__ for comments if obj is None: return self._observer_callables_ = getattr(obj, '_observer_callables_', None) - + def __array_wrap__(self, out_arr, context=None): return out_arr.view(np.ndarray) @@ -50,10 +48,10 @@ class ObservableArray(np.ndarray, Observable): if self._s_not_empty(s): super(ObservableArray, self).__setitem__(s, val) self.notify_observers(self[s]) - + def __getslice__(self, start, stop): return self.__getitem__(slice(start, stop)) - + def __setslice__(self, start, stop, val): return self.__setitem__(slice(start, stop), val) @@ -85,7 +83,7 @@ class ObservableArray(np.ndarray, Observable): self.notify_observers() return r - + def __ifloordiv__(self, *args, **kwargs): r = np.ndarray.__ifloordiv__(self, *args, **kwargs) self.notify_observers() diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index 3ebeb566..a2dc9514 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -3,7 +3,7 @@ import itertools import numpy -from parameter_core import OptimizationHandlable, Gradcheckable, adjust_name_for_printing +from parameter_core import OptimizationHandlable, adjust_name_for_printing from array_core import ObservableArray ###### printing @@ -43,13 +43,12 @@ class Param(OptimizationHandlable, ObservableArray): _fixes_ = None _parameters_ = [] def __new__(cls, name, input_array, default_constraint=None): - obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array)) + obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array, name=name, default_constraint=default_constraint)) cls.__name__ = "Param" obj._current_slice_ = (slice(obj.shape[0]),) obj._realshape_ = obj.shape obj._realsize_ = obj.size obj._realndim_ = obj.ndim - obj._updated_ = False from lists_and_dicts import SetDict obj._tied_to_me_ = SetDict() obj._tied_to_ = [] @@ -86,7 +85,6 @@ class Param(OptimizationHandlable, ObservableArray): self._realshape_ = getattr(obj, '_realshape_', None) self._realsize_ = getattr(obj, '_realsize_', None) self._realndim_ = getattr(obj, '_realndim_', None) - self._updated_ = getattr(obj, '_updated_', None) self._original_ = getattr(obj, '_original_', None) self._name = getattr(obj, 'name', None) self._gradient_array_ = getattr(obj, '_gradient_array_', None) @@ -121,14 +119,12 @@ class Param(OptimizationHandlable, ObservableArray): self._realndim_, self._tied_to_me_, self._tied_to_, - self._updated_, ) ) def __setstate__(self, state): super(Param, self).__setstate__(state[0]) state = list(state[1]) - self._updated_ = state.pop() self._tied_to_ = state.pop() self._tied_to_me_ = state.pop() self._realndim_ = state.pop() diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index 351eacef..38fe0526 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -41,10 +41,8 @@ class Observable(object): """ _updated = True def __init__(self, *args, **kwargs): + super(Observable, self).__init__(*args, **kwargs) self._observer_callables_ = [] - - def __del__(self, *args, **kwargs): - del self._observer_callables_ def add_observer(self, observer, callble, priority=0): self._insert_sorted(priority, observer, callble) @@ -161,7 +159,9 @@ class Parentable(object): """ _parent_ = None _parent_index_ = None - + def __init__(self, *args, **kwargs): + super(Parentable, self).__init__(*args, **kwargs) + def has_parent(self): """ Return whether this parentable object currently has a parent. @@ -205,6 +205,7 @@ class Gradcheckable(Parentable): """ def __init__(self, *a, **kw): super(Gradcheckable, self).__init__(*a, **kw) + def checkgrad(self, verbose=0, step=1e-6, tolerance=1e-3): """ Check the gradient of this parameter with respect to the highest parent's @@ -272,6 +273,9 @@ class Indexable(object): Enable enraveled indexes and offsets for this object. The raveled index of an object is the index for its parameters in a flattened int array. """ + def __init__(self, *a, **kw): + super(Indexable, self).__init__(*a, **kw) + def _raveled_index(self): """ Flattened array of ints, specifying the index of this object. @@ -534,8 +538,11 @@ class OptimizationHandlable(Constrainable, Observable): """ This enables optimization handles on an Object as done in GPy 0.4. - transformed: make sure the transformations and constraints etc are handled + `..._transformed`: make sure the transformations and constraints etc are handled """ + def __init__(self, name, default_constraint=None, *a, **kw): + super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw) + def transform(self): [np.put(self._param_array_, ind, c.finv(self._param_array_[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] @@ -551,8 +558,8 @@ class OptimizationHandlable(Constrainable, Observable): return p def _set_params_transformed(self, p): - #if p is self._param_array_: - p = p.copy() + if p is self._param_array_: + p = p.copy() if self._has_fixes(): self._param_array_[self._fixes_] = p else: self._param_array_[:] = p self.untransform() @@ -625,6 +632,24 @@ class OptimizationHandlable(Constrainable, Observable): [np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None] self._set_params_transformed(x) # makes sure all of the tied parameters get the same init (since there's only one prior object...) + #=========================================================================== + # For shared memory arrays. This does nothing in Param, but sets the memory + # for all parameterized objects + #=========================================================================== + def _propagate_param_grad(self, parray, garray): + pi_old_size = 0 + for pi in self._parameters_: + pislice = slice(pi_old_size, pi_old_size+pi.size) + + self._param_array_[pislice] = pi._param_array_.ravel()#, requirements=['C', 'W']).flat + self._gradient_array_[pislice] = pi._gradient_array_.ravel()#, requirements=['C', 'W']).flat + + pi._param_array_.data = parray[pislice].data + pi._gradient_array_.data = garray[pislice].data + + pi._propagate_param_grad(parray[pislice], garray[pislice]) + pi_old_size += pi.size + class Parameterizable(OptimizationHandlable): def __init__(self, *args, **kwargs): super(Parameterizable, self).__init__(*args, **kwargs) @@ -811,22 +836,21 @@ class Parameterizable(OptimizationHandlable): p._parent_index_ = i pslice = slice(old_size, old_size+p.size) - pi_old_size = old_size - for pi in p.flattened_parameters: - pislice = slice(pi_old_size, pi_old_size+pi.size) - - self._param_array_[pislice] = pi._param_array_.flat - self._gradient_array_[pislice] = pi._gradient_array_.flat - - pi._param_array_.data = self._param_array_[pislice].data - pi._gradient_array_.data = self._gradient_array_[pislice].data - - pi_old_size += pi.size + # first connect all children + p._propagate_param_grad(self._param_array_[pslice], self._gradient_array_[pslice]) + + # then connect children to self + self._param_array_[pslice] = p._param_array_.ravel()#, requirements=['C', 'W']).ravel(order='C') + self._gradient_array_[pslice] = p._gradient_array_.ravel()#, requirements=['C', 'W']).ravel(order='C') + + if not p._param_array_.flags['C_CONTIGUOUS']: + import ipdb;ipdb.set_trace() p._param_array_.data = self._param_array_[pslice].data p._gradient_array_.data = self._gradient_array_[pslice].data self._param_slices_.append(pslice) + self._add_parameter_name(p, ignore_added_names=ignore_added_names) old_size += p.size diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py index 6d06018a..a98f0098 100644 --- a/GPy/core/parameterization/parameterized.py +++ b/GPy/core/parameterization/parameterized.py @@ -65,8 +65,8 @@ class Parameterized(Parameterizable, Pickleable): # **Never** call parameters_changed() yourself __metaclass__ = ParametersChangedMeta #=========================================================================== - def __init__(self, name=None, *a, **kw): - super(Parameterized, self).__init__(name=name, parent=None, parent_index=None, *a, **kw) + def __init__(self, name=None, parameters=[], *a, **kw): + super(Parameterized, self).__init__(name=name, *a, **kw) self._in_init_ = True self._parameters_ = ArrayList() self.size = sum(p.size for p in self._parameters_) @@ -76,6 +76,7 @@ class Parameterized(Parameterizable, Pickleable): self._param_slices_ = [] self._connect_parameters() del self._in_init_ + self.add_parameters(*parameters) def build_pydot(self, G=None): import pydot # @UnresolvedImport @@ -205,25 +206,29 @@ class Parameterized(Parameterizable, Pickleable): return found_params def __getitem__(self, name, paramlist=None): - if paramlist is None: - paramlist = self.grep_param_names(name) - if len(paramlist) < 1: raise AttributeError, name - if len(paramlist) == 1: - if isinstance(paramlist[-1], Parameterized): - paramlist = paramlist[-1].flattened_parameters - if len(paramlist) != 1: - return ParamConcatenation(paramlist) - return paramlist[-1] - return ParamConcatenation(paramlist) + if isinstance(name, (int, slice, tuple, np.ndarray)): + return self._param_array_[name] + else: + if paramlist is None: + paramlist = self.grep_param_names(name) + if len(paramlist) < 1: raise AttributeError, name + if len(paramlist) == 1: + if isinstance(paramlist[-1], Parameterized): + paramlist = paramlist[-1].flattened_parameters + if len(paramlist) != 1: + return ParamConcatenation(paramlist) + return paramlist[-1] + return ParamConcatenation(paramlist) def __setitem__(self, name, value, paramlist=None): if isinstance(name, (slice, tuple, np.ndarray)): self._param_array_[name] = value + self.notify_observers() else: try: param = self.__getitem__(name, paramlist) except AttributeError as a: raise a param[:] = value - + def __setattr__(self, name, val): # override the default behaviour, if setting a param, so broadcasting can by used if hasattr(self, '_parameters_'): diff --git a/GPy/core/parameterization/variational.py b/GPy/core/parameterization/variational.py index 4c929cc8..01706922 100644 --- a/GPy/core/parameterization/variational.py +++ b/GPy/core/parameterization/variational.py @@ -63,14 +63,15 @@ class SpikeAndSlabPrior(VariationalPrior): class VariationalPosterior(Parameterized): - def __init__(self, means=None, variances=None, name=None, **kw): - super(VariationalPosterior, self).__init__(name=name, **kw) + def __init__(self, means=None, variances=None, name=None, *a, **kw): + super(VariationalPosterior, self).__init__(name=name, *a, **kw) self.mean = Param("mean", means) self.variance = Param("variance", variances, Logexp()) - self.add_parameters(self.mean, self.variance) self.ndim = self.mean.ndim self.shape = self.mean.shape self.num_data, self.input_dim = self.mean.shape + self.add_parameters(self.mean, self.variance) + self.num_data, self.input_dim = self.mean.shape if self.has_uncertain_inputs(): assert self.variance.shape == self.mean.shape, "need one variance per sample and dimenion" @@ -78,17 +79,23 @@ class VariationalPosterior(Parameterized): return not self.variance is None def __getitem__(self, s): - import copy - n = self.__new__(self.__class__) - dc = copy.copy(self.__dict__) - dc['mean'] = dc['mean'][s] - dc['variance'] = dc['variance'][s] - dc['shape'] = dc['mean'].shape - dc['ndim'] = dc['ndim'] - dc['num_data'], dc['input_dim'] = self.mean.shape[0], self.mean.shape[1] if dc['ndim'] > 1 else 1 - n.__dict__ = dc - return n - + if isinstance(s, (int, slice, tuple, list, np.ndarray)): + import copy + n = self.__new__(self.__class__, self.name) + dc = self.__dict__.copy() + dc['mean'] = self.mean[s] + dc['variance'] = self.variance[s] + dc['_parameters_'] = copy.copy(self._parameters_) + n.__dict__.update(dc) + n._parameters_[dc['mean']._parent_index_] = dc['mean'] + n._parameters_[dc['variance']._parent_index_] = dc['variance'] + n.ndim = n.mean.ndim + n.shape = n.mean.shape + n.num_data = n.mean.shape[0] + n.input_dim = n.mean.shape[1] if n.ndim != 1 else 1 + return n + else: + return super(VariationalPrior, self).__getitem__(s) class NormalPosterior(VariationalPosterior): ''' diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 818dff69..dfd922f0 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -324,14 +324,14 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw): D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5 _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) - likelihood_list = [Gaussian(x, normalize=True) for x in Ylist] - - k = kern.Linear(Q, ARD=True) + kern.Bias(Q, _np.exp(-2)) + kern.White(Q, _np.exp(-2)) - m = MRD(likelihood_list, input_dim=Q, num_inducing=num_inducing, kernels=k, initx="", initz='permute', **kw) - m.ensure_default_constraints() - - for i, bgplvm in enumerate(m.bgplvms): - m['{}_noise'.format(i)] = bgplvm.likelihood.Y.var() / 500. + + #Ylist = [Ylist[0]] + k = [kern.Linear(Q, ARD=True) + kern.White(Q, 1e-4) for _ in range(len(Ylist))] + m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing, kernel=k, initx="", initz='permute', **kw) + + m['.*noise'] = [Y.var()/500. for Y in Ylist] + #for i, Y in enumerate(Ylist): + # m['.*Y_{}.*Gaussian.*noise'.format(i)] = Y.var(1) / 500. if optimize: print "Optimizing Model:" diff --git a/GPy/inference/latent_function_inference/exact_gaussian_inference.py b/GPy/inference/latent_function_inference/exact_gaussian_inference.py index 47f6ea09..561f0d1a 100644 --- a/GPy/inference/latent_function_inference/exact_gaussian_inference.py +++ b/GPy/inference/latent_function_inference/exact_gaussian_inference.py @@ -46,7 +46,7 @@ class ExactGaussianInference(object): alpha, _ = dpotrs(LW, YYT_factor, lower=1) log_marginal = 0.5*(-Y.size * log_2_pi - Y.shape[1] * W_logdet - np.sum(alpha * YYT_factor)) - + dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi) return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK} diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py index d35080f6..52f44cdf 100644 --- a/GPy/inference/latent_function_inference/var_dtc.py +++ b/GPy/inference/latent_function_inference/var_dtc.py @@ -77,7 +77,8 @@ class VarDTC(object): num_inducing = Z.shape[0] num_data = Y.shape[0] # kernel computations, using BGPLVM notation - Kmm = kern.K(Z) + + Kmm = kern.K(Z) +np.eye(Z.shape[0]) * self.const_jitter Lm = jitchol(Kmm+np.eye(Z.shape[0])*self.const_jitter) diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index 87dda365..3514a224 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -12,24 +12,19 @@ class Add(Kern): assert all([isinstance(k, Kern) for k in subkerns]) if tensor: input_dim = sum([k.input_dim for k in subkerns]) - self.self.active_dims = [] + self.input_slices = [] n = 0 for k in subkerns: - self.self.active_dims.append(slice(n, n+k.input_dim)) + self.input_slices.append(slice(n, n+k.input_dim)) n += k.input_dim else: - #assert all([k.input_dim == subkerns[0].input_dim for k in subkerns]) - #input_dim = subkerns[0].input_dim - #self.input_slices = [slice(None) for k in subkerns] - input_dim = reduce(np.union1d, map(lambda x: np.r_[x.active_dims], subkerns)) + assert all([k.input_dim == subkerns[0].input_dim for k in subkerns]) + input_dim = subkerns[0].input_dim + self.input_slices = [slice(None) for k in subkerns] super(Add, self).__init__(input_dim, 'add') self.add_parameters(*subkerns) - - @property - def parts(self): - return self._parameters_ - - @Cache_this(limit=1, force_kwargs=('which_parts',)) + + def K(self, X, X2=None): """ Compute the kernel function. @@ -78,19 +73,18 @@ class Add(Kern): def psi0(self, Z, variational_posterior): - return np.sum([p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)],0) + return np.sum([p.psi0(Z[:, i_s], variational_posterior[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)],0) def psi1(self, Z, variational_posterior): - return np.sum([p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0) + return np.sum([p.psi1(Z[:, i_s], variational_posterior[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0) def psi2(self, Z, variational_posterior): - psi2 = np.sum([p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0) + psi2 = np.sum([p.psi2(Z[:, i_s], variational_posterior[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0) # compute the "cross" terms - from white import White + from static import White, Bias from rbf import RBF #from rbf_inv import RBFInv - from bias import Bias from linear import Linear #ffrom fixed import Fixed @@ -101,24 +95,20 @@ class Add(Kern): # rbf X bias #elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)): elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)): - tmp = p2.psi1(Z[:,i2], mu[:,i2], S[:,i2]) + tmp = p2.psi1(Z[:,i2], variational_posterior[:, i_s]) psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :]) #elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)): elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)): - tmp = p1.psi1(Z[:,i1], mu[:,i1], S[:,i1]) + tmp = p1.psi1(Z[:,i1], variational_posterior[:, i_s]) psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :]) else: raise NotImplementedError, "psi2 cannot be computed for this kernel" return psi2 def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): - from white import White - from rbf import RBF - #from rbf_inv import RBFInv - #from bias import Bias - from linear import Linear - #ffrom fixed import Fixed - + from static import White, Bias + mu, S = variational_posterior.mean, variational_posterior.variance + for p1, is1 in zip(self._parameters_, self.input_slices): #compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2! @@ -131,20 +121,15 @@ class Add(Kern): elif isinstance(p2, Bias): eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. else: - eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2. + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], variational_posterior[:, is1]) * 2. - p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1]) + p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z[:,is1], variational_posterior[:, is1]) def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): - from white import White - from rbf import RBF - #from rbf_inv import rbfinv - from bias import Bias - from linear import Linear - #ffrom fixed import fixed - + from static import White, Bias + target = np.zeros(Z.shape) for p1, is1 in zip(self._parameters_, self.input_slices): @@ -158,22 +143,17 @@ class Add(Kern): elif isinstance(p2, Bias): eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. else: - eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2. + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], variational_posterior[:, is2]) * 2. - target += p1.gradients_z_variational(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1]) + target += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z[:,is1], variational_posterior[:, is1]) return target def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): - from white import white - from rbf import rbf - #from rbf_inv import rbfinv - #from bias import bias - from linear import linear - #ffrom fixed import fixed - - target_mu = np.zeros(mu.shape) - target_S = np.zeros(S.shape) + from static import White, Bias + + target_mu = np.zeros(variational_posterior.shape) + target_S = np.zeros(variational_posterior.shape) for p1, is1 in zip(self._parameters_, self.input_slices): #compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2! @@ -181,15 +161,15 @@ class Add(Kern): for p2, is2 in zip(self._parameters_, self.input_slices): if p2 is p1: continue - if isinstance(p2, white): + if isinstance(p2, White): continue - elif isinstance(p2, bias): + elif isinstance(p2, Bias): eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. else: - eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2. + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], variational_posterior[:, is2]) * 2. - a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1]) + a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z[:,is1], variational_posterior[:, is1]) target_mu += a target_S += b return target_mu, target_S diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index 33b9ff08..96bab646 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -173,7 +173,7 @@ class Kern(Parameterized): """ Returns the sensitivity for each dimension of this kernel. """ - return self.kern.input_sensitivity() + return np.zeros(self.input_dim) def __add__(self, other): """ Overloading of the '+' operator. for more control, see self.add """ diff --git a/GPy/kern/_src/static.py b/GPy/kern/_src/static.py index 135e3f9e..f344357c 100644 --- a/GPy/kern/_src/static.py +++ b/GPy/kern/_src/static.py @@ -55,7 +55,7 @@ class White(Static): def psi2(self, Z, variational_posterior): return np.zeros((variational_posterior.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64) - def update_gradients_full(self, dL_dK, X): + def update_gradients_full(self, dL_dK, X, X2=None): self.variance.gradient = np.trace(dL_dK) def update_gradients_diag(self, dL_dKdiag, X): @@ -79,10 +79,10 @@ class Bias(Static): self.variance.gradient = dL_dK.sum() def update_gradients_diag(self, dL_dKdiag, X): - self.variance.gradient = dL_dK.sum() + self.variance.gradient = dL_dKdiag.sum() def psi2(self, Z, variational_posterior): - ret = np.empty((mu.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64) + ret = np.empty((variational_posterior.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64) ret[:] = self.variance**2 return ret diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index dd1c44ba..b547f2d1 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -5,15 +5,15 @@ import numpy as np import itertools import pylab -from ..core import Model, SparseGP +from ..core import Model from ..util.linalg import PCA from ..kern import Kern -from bayesian_gplvm import BayesianGPLVM from ..core.parameterization.variational import NormalPosterior, NormalPrior -from ..inference.latent_function_inference.var_dtc import VarDTCMissingData -from ..likelihoods.gaussian import Gaussian +from ..core.parameterization import Param, Parameterized +from ..inference.latent_function_inference.var_dtc import VarDTCMissingData, VarDTC +from ..likelihoods import Gaussian -class MRD2(Model): +class MRD(Model): """ Apply MRD to all given datasets Y in Ylist. @@ -43,61 +43,110 @@ class MRD2(Model): :param :class:`~GPy.inference.latent_function_inference inference_method: the inference method to use :param :class:`~GPy.likelihoods.likelihood.Likelihood` likelihood: the likelihood to use :param str name: the name of this model + :param [str] Ynames: the names for the datasets given, must be of equal length as Ylist or None """ def __init__(self, Ylist, input_dim, X=None, X_variance=None, initx = 'PCA', initz = 'permute', num_inducing=10, Z=None, kernel=None, - inference_method=None, likelihood=None, name='mrd'): - super(MRD2, self).__init__(name) + inference_method=None, likelihood=None, name='mrd', Ynames=None): + super(MRD, self).__init__(name) # sort out the kernels if kernel is None: from ..kern import RBF - self.kern = [RBF(input_dim, ARD=1, name='Y_{}'.format(i)) for i in range(len(Ylist))] + self.kern = [RBF(input_dim, ARD=1, name='rbf'.format(i)) for i in range(len(Ylist))] elif isinstance(kernel, Kern): - self.kern = [kernel.copy(name='Y_{}'.format(i)) for i in range(len(Ylist))] + self.kern = [kernel.copy(name='{}'.format(kernel.name, i)) for i in range(len(Ylist))] else: assert len(kernel) == len(Ylist), "need one kernel per output" assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!" - + self.kern = kernel self.input_dim = input_dim self.num_inducing = num_inducing - + + self.Ylist = Ylist self._in_init_ = True X = self._init_X(initx, Ylist) - self.Z = self._init_Z(initz, X) + self.Z = Param('inducing inputs', self._init_Z(initz, X)) self.num_inducing = self.Z.shape[0] # ensure M==N if M>N if X_variance is None: - X_variance = np.random.uniform(0,.2,X.shape) + X_variance = np.random.uniform(0, .2, X.shape) self.variational_prior = NormalPrior() self.X = NormalPosterior(X, X_variance) if likelihood is None: - likelihood = Gaussian() + self.likelihood = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))] + else: self.likelihood = likelihood if inference_method is None: - if any(np.any(np.isnan(y)) for y in Ylist): - self.inference_method = VarDTCMissingData(limit=len(Ylist)) + self.inference_method= [] + for y in Ylist: + if np.any(np.isnan(y)): + self.inference_method.append(VarDTCMissingData(limit=1)) + else: + self.inference_method.append(VarDTC(limit=1)) + else: + self.inference_method = inference_method + self.inference_method.set_limit(len(Ylist)) - self.Ylist = Ylist - + self.add_parameters(self.X, self.Z) + + if Ynames is None: + Ynames = ['Y{}'.format(i) for i in range(len(Ylist))] + + for i, n, k, l in itertools.izip(itertools.count(), Ynames, self.kern, self.likelihood): + p = Parameterized(name=n) + p.add_parameter(k) + p.add_parameter(l) + setattr(self, 'Y{}'.format(i), p) + self.add_parameter(p) + self._in_init_ = False + def parameters_changed(self): - for y in self.Ylist: - pass + self._log_marginal_likelihood = 0 + self.posteriors = [] + self.Z.gradient = 0. + self.X.mean.gradient = 0. + self.X.variance.gradient = 0. + + for y, k, l, i in itertools.izip(self.Ylist, self.kern, self.likelihood, self.inference_method): + posterior, lml, grad_dict = i.inference(k, self.X, self.Z, l, y) + + self.posteriors.append(posterior) + self._log_marginal_likelihood += lml + + # likelihood gradients + l.update_gradients(grad_dict.pop('partial_for_likelihood')) + + #gradients wrt kernel + dL_dKmm = grad_dict.pop('dL_dKmm') + k.update_gradients_full(dL_dKmm, self.Z, None) + target = k.gradient.copy() + k.update_gradients_expectations(variational_posterior=self.X, Z=self.Z, **grad_dict) + k.gradient += target - def _init_X(self, init='PCA', likelihood_list=None): - if likelihood_list is None: - likelihood_list = self.likelihood_list - Ylist = [] - for likelihood_or_Y in likelihood_list: - if type(likelihood_or_Y) is np.ndarray: - Ylist.append(likelihood_or_Y) - else: - Ylist.append(likelihood_or_Y.Y) - del likelihood_list + #gradients wrt Z + self.Z.gradient += k.gradients_X(dL_dKmm, self.Z) + self.Z.gradient += k.gradients_Z_expectations( + grad_dict['dL_dpsi1'], grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X) + + dL_dmean, dL_dS = k.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, **grad_dict) + self.X.mean.gradient += dL_dmean + self.X.variance.gradient += dL_dS + + # update for the KL divergence + self.variational_prior.update_gradients_KL(self.X) + self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X) + + def log_likelihood(self): + return self._log_marginal_likelihood + + def _init_X(self, init='PCA', Ylist=None): + if Ylist is None: + Ylist = self.Ylist if init in "PCA_concat": X = PCA(np.hstack(Ylist), self.input_dim)[0] elif init in "PCA_single": @@ -106,7 +155,6 @@ class MRD2(Model): X[:, qs] = PCA(Y, len(qs))[0] else: # init == 'random': X = np.random.randn(Ylist[0].shape[0], self.input_dim) - self.X = X return X def _init_Z(self, init="permute", X=None): @@ -116,259 +164,8 @@ class MRD2(Model): Z = np.random.permutation(X.copy())[:self.num_inducing] elif init in "random": Z = np.random.randn(self.num_inducing, self.input_dim) * X.var() - self.Z = Z return Z -class MRD(Model): - """ - Do MRD on given Datasets in Ylist. - All Ys in likelihood_list are in [N x Dn], where Dn can be different per Yn, - N must be shared across datasets though. - - :param likelihood_list: list of observed datasets (:py:class:`~GPy.likelihoods.gaussian.Gaussian` if not supplied directly) - :type likelihood_list: [:py:class:`~GPy.likelihoods.likelihood.likelihood` | :py:class:`ndarray`] - :param names: names for different gplvm models - :type names: [str] - :param input_dim: latent dimensionality - :type input_dim: int - :param initx: initialisation method for the latent space : - - * 'concat' - PCA on concatenation of all datasets - * 'single' - Concatenation of PCA on datasets, respectively - * 'random' - Random draw from a normal - - :type initx: ['concat'|'single'|'random'] - :param initz: initialisation method for inducing inputs - :type initz: 'permute'|'random' - :param X: Initial latent space - :param X_variance: Initial latent space variance - :param Z: initial inducing inputs - :param num_inducing: number of inducing inputs to use - :param kernels: list of kernels or kernel shared for all BGPLVMS - :type kernels: [GPy.kern.kern] | GPy.kern.kern | None (default) - - """ - def __init__(self, likelihood_or_Y_list, input_dim, num_inducing=10, names=None, - kernels=None, initx='PCA', - initz='permute', _debug=False, **kw): - if names is None: - self.names = ["{}".format(i) for i in range(len(likelihood_or_Y_list))] - - # sort out the kernels - if kernels is None: - kernels = [None] * len(likelihood_or_Y_list) - elif isinstance(kernels, Kern): - kernels = [kernels.copy() for i in range(len(likelihood_or_Y_list))] - else: - assert len(kernels) == len(likelihood_or_Y_list), "need one kernel per output" - assert all([isinstance(k, Kern) for k in kernels]), "invalid kernel object detected!" - assert not ('kernel' in kw), "pass kernels through `kernels` argument" - - self.input_dim = input_dim - self._debug = _debug - self.num_inducing = num_inducing - - self._in_init_ = True - X = self._init_X(initx, likelihood_or_Y_list) - Z = self._init_Z(initz, X) - self.num_inducing = Z.shape[0] # ensure M==N if M>N - - self.bgplvms = [BayesianGPLVM(l, input_dim=input_dim, kernel=k, X=X, Z=Z, num_inducing=self.num_inducing, **kw) for l, k in zip(likelihood_or_Y_list, kernels)] - del self._in_init_ - - self.gref = self.bgplvms[0] - nparams = np.array([0] + [SparseGP._get_params(g).size - g.Z.size for g in self.bgplvms]) - self.nparams = nparams.cumsum() - - self.num_data = self.gref.num_data - - self.NQ = self.num_data * self.input_dim - self.MQ = self.num_inducing * self.input_dim - - Model.__init__(self) - self.ensure_default_constraints() - - def _getstate(self): - return Model._getstate(self) + [self.names, - self.bgplvms, - self.gref, - self.nparams, - self.input_dim, - self.num_inducing, - self.num_data, - self.NQ, - self.MQ] - - def _setstate(self, state): - self.MQ = state.pop() - self.NQ = state.pop() - self.num_data = state.pop() - self.num_inducing = state.pop() - self.input_dim = state.pop() - self.nparams = state.pop() - self.gref = state.pop() - self.bgplvms = state.pop() - self.names = state.pop() - Model._setstate(self, state) - - @property - def X(self): - return self.gref.X - @X.setter - def X(self, X): - try: - self.propagate_param(X=X) - except AttributeError: - if not self._in_init_: - raise AttributeError("bgplvm list not initialized") - @property - def Z(self): - return self.gref.Z - @Z.setter - def Z(self, Z): - try: - self.propagate_param(Z=Z) - except AttributeError: - if not self._in_init_: - raise AttributeError("bgplvm list not initialized") - @property - def X_variance(self): - return self.gref.X_variance - @X_variance.setter - def X_variance(self, X_var): - try: - self.propagate_param(X_variance=X_var) - except AttributeError: - if not self._in_init_: - raise AttributeError("bgplvm list not initialized") - @property - def likelihood_list(self): - return [g.likelihood.Y for g in self.bgplvms] - @likelihood_list.setter - def likelihood_list(self, likelihood_list): - for g, Y in itertools.izip(self.bgplvms, likelihood_list): - g.likelihood.Y = Y - - @property - def auto_scale_factor(self): - """ - set auto_scale_factor for all gplvms - :param b: auto_scale_factor - :type b: - """ - return self.gref.auto_scale_factor - @auto_scale_factor.setter - def auto_scale_factor(self, b): - self.propagate_param(auto_scale_factor=b) - - def propagate_param(self, **kwargs): - for key, val in kwargs.iteritems(): - for g in self.bgplvms: - g.__setattr__(key, val) - - def randomize(self, initx='concat', initz='permute', *args, **kw): - super(MRD, self).randomize(*args, **kw) - self._init_X(initx, self.likelihood_list) - self._init_Z(initz, self.X) - - #def _get_latent_param_names(self): - def _get_param_names(self): - n1 = self.gref._get_param_names() - n1var = n1[:self.NQ * 2 + self.MQ] - # return n1var - # - #def _get_kernel_names(self): - map_names = lambda ns, name: map(lambda x: "{1}_{0}".format(*x), - itertools.izip(ns, - itertools.repeat(name))) - return list(itertools.chain(n1var, *(map_names(\ - SparseGP._get_param_names(g)[self.MQ:], n) \ - for g, n in zip(self.bgplvms, self.names)))) - # kernel_names = (map_names(SparseGP._get_param_names(g)[self.MQ:], n) for g, n in zip(self.bgplvms, self.names)) - # return kernel_names - - #def _get_param_names(self): - # X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) - # S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) - # n1var = self._get_latent_param_names() - # kernel_names = self._get_kernel_names() - # return list(itertools.chain(n1var, *kernel_names)) - - #def _get_print_names(self): - # return list(itertools.chain(*self._get_kernel_names())) - - def _get_params(self): - """ - return parameter list containing private and shared parameters as follows: - - ================================================================= - | mu | S | Z || theta1 | theta2 | .. | thetaN | - ================================================================= - """ - X = self.gref.X.ravel() - X_var = self.gref.X_variance.ravel() - Z = self.gref.Z.ravel() - thetas = [SparseGP._get_params(g)[g.Z.size:] for g in self.bgplvms] - params = np.hstack([X, X_var, Z, np.hstack(thetas)]) - return params - -# def _set_var_params(self, g, X, X_var, Z): -# g.X = X.reshape(self.num_data, self.input_dim) -# g.X_variance = X_var.reshape(self.num_data, self.input_dim) -# g.Z = Z.reshape(self.num_inducing, self.input_dim) -# -# def _set_kern_params(self, g, p): -# g.kern._set_params(p[:g.kern.num_params]) -# g.likelihood._set_params(p[g.kern.num_params:]) - - def _set_params(self, x): - start = 0; end = self.NQ - X = x[start:end] - start = end; end += start - X_var = x[start:end] - start = end; end += self.MQ - Z = x[start:end] - thetas = x[end:] - - # set params for all: - for g, s, e in itertools.izip(self.bgplvms, self.nparams, self.nparams[1:]): - g._set_params(np.hstack([X, X_var, Z, thetas[s:e]])) -# self._set_var_params(g, X, X_var, Z) -# self._set_kern_params(g, thetas[s:e].copy()) -# g._compute_kernel_matrices() -# if self.auto_scale_factor: -# g.scale_factor = np.sqrt(g.psi2.sum(0).mean() * g.likelihood.precision) -# # self.scale_factor = np.sqrt(self.psi2.sum(0).mean() * self.likelihood.precision) -# g._computations() - - - def update_likelihood_approximation(self): # TODO: object oriented vs script base - for bgplvm in self.bgplvms: - bgplvm.update_likelihood_approximation() - - def log_likelihood(self): - ll = -self.gref.KL_divergence() - for g in self.bgplvms: - ll += SparseGP.log_likelihood(g) - return ll - - def _log_likelihood_gradients(self): - dLdmu, dLdS = reduce(lambda a, b: [a[0] + b[0], a[1] + b[1]], (g.dL_dmuS() for g in self.bgplvms)) - dKLmu, dKLdS = self.gref.dKL_dmuS() - dLdmu -= dKLmu - dLdS -= dKLdS - dLdmuS = np.hstack((dLdmu.flatten(), dLdS.flatten())).flatten() - dldzt1 = reduce(lambda a, b: a + b, (SparseGP._log_likelihood_gradients(g)[:self.MQ] for g in self.bgplvms)) - - return np.hstack((dLdmuS, - dldzt1, - np.hstack([np.hstack([g.dL_dtheta(), - g.likelihood._gradients(\ - partial=g.partial_for_likelihood)]) \ - for g in self.bgplvms]))) - - - def _handle_plotting(self, fignum, axes, plotf, sharex=False, sharey=False): if axes is None: fig = pylab.figure(num=fignum) diff --git a/GPy/testing/likelihood_tests.py b/GPy/testing/likelihood_tests.py index d2a236dd..631f2ec2 100644 --- a/GPy/testing/likelihood_tests.py +++ b/GPy/testing/likelihood_tests.py @@ -651,7 +651,7 @@ class LaplaceTests(unittest.TestCase): m2['.*white'].constrain_fixed(1e-6) m2['.*rbf.variance'].constrain_bounded(1e-4, 10) m2.randomize() - + if debug: print m1 print m2 @@ -663,9 +663,8 @@ class LaplaceTests(unittest.TestCase): if debug: print m1 print m2 - - m2.parameters_changed() - #m2._set_params(m1._get_params()) + + m2[:] = m1[:] #Predict for training points to get posterior mean and variance post_mean, post_var, _, _ = m1.predict(X) @@ -701,8 +700,9 @@ class LaplaceTests(unittest.TestCase): np.testing.assert_almost_equal(m1.log_likelihood(), m2.log_likelihood(), decimal=2) #Check marginals are the same with random m1.randomize() - #m2._set_params(m1._get_params()) - m2.parameters_changed() + import ipdb;ipdb.set_trace() + m2[:] = m1[:] + np.testing.assert_almost_equal(m1.log_likelihood(), m2.log_likelihood(), decimal=2) #Check they are checkgradding