diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 8f99ad9f..185cc149 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -67,11 +67,9 @@ class GP(Model): self.add_parameter(self.likelihood) def parameters_changed(self): - self.posterior, self._log_marginal_likelihood, grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y, **self.Y_metadata) - self.likelihood.update_gradients(np.diag(grad_dict['dL_dK']), **self.Y_metadata) - #self.posterior, self._log_marginal_likelihood, grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y, Y_metadata=self.Y_metadata) - #self.likelihood.update_gradients(np.diag(grad_dict['dL_dK'])) - self.kern.update_gradients_full(grad_dict['dL_dK'], self.X) + self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y, **self.Y_metadata) + self.likelihood.update_gradients(np.diag(self.grad_dict['dL_dK']), **self.Y_metadata) + self.kern.update_gradients_full(self.grad_dict['dL_dK'], self.X) def log_likelihood(self): return self._log_marginal_likelihood diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index d1122f79..2a61c970 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -16,7 +16,7 @@ Observable Pattern for patameterization from transformations import Transformation, Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED import numpy as np -__updated__ = '2014-03-12' +__updated__ = '2014-03-13' class HierarchyError(Exception): """ @@ -644,10 +644,10 @@ class OptimizationHandlable(Constrainable, Observable): self._param_array_[pislice] = pi._param_array_.ravel()#, requirements=['C', 'W']).flat self._gradient_array_[pislice] = pi._gradient_array_.ravel()#, requirements=['C', 'W']).flat - + pi._param_array_.data = parray[pislice].data pi._gradient_array_.data = garray[pislice].data - + pi._propagate_param_grad(parray[pislice], garray[pislice]) pi_old_size += pi.size @@ -660,11 +660,11 @@ class Parameterizable(OptimizationHandlable): self._param_array_ = np.empty(self.size, dtype=np.float64) self._gradient_array_ = np.empty(self.size, dtype=np.float64) self._added_names_ = set() - + def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True): """ Get the names of all parameters of this model. - + :param bool add_self: whether to add the own name in front of names :param bool adjust_for_printing: whether to call `adjust_name_for_printing` on names :param bool recursive: whether to traverse through hierarchy and append leaf node names @@ -675,11 +675,11 @@ class Parameterizable(OptimizationHandlable): else: names = [adjust(x.name) for x in self._parameters_] if add_self: names = map(lambda x: adjust(self.name) + "." + x, names) return names - + @property def num_params(self): return len(self._parameters_) - + def _add_parameter_name(self, param, ignore_added_names=False): pname = adjust_name_for_printing(param.name) if ignore_added_names: @@ -694,7 +694,7 @@ class Parameterizable(OptimizationHandlable): elif pname not in dir(self): self.__dict__[pname] = param self._added_names_.add(pname) - + def _remove_parameter_name(self, param=None, pname=None): assert param is None or pname is None, "can only delete either param by name, or the name of a param" pname = adjust_name_for_printing(pname) or adjust_name_for_printing(param.name) @@ -706,14 +706,14 @@ class Parameterizable(OptimizationHandlable): def _name_changed(self, param, old_name): self._remove_parameter_name(None, old_name) self._add_parameter_name(param) - + #========================================================================= # Gradient handling #========================================================================= @property def gradient(self): return self._gradient_array_ - + @gradient.setter def gradient(self, val): self._gradient_array_[:] = val @@ -734,13 +734,13 @@ class Parameterizable(OptimizationHandlable): # def _set_gradient(self, g): # [p._set_gradient(g[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)] #=========================================================================== - + def add_parameter(self, param, index=None, _ignore_added_names=False): """ :param parameters: the parameters to add :type parameters: list of or one :py:class:`GPy.core.param.Param` :param [index]: index of where to put parameters - + :param bool _ignore_added_names: whether the name of the parameter overrides a possibly existing field Add all parameters to this param class, you can insert parameters @@ -771,9 +771,9 @@ class Parameterizable(OptimizationHandlable): self.constraints.update(param.constraints, start) self.priors.update(param.priors, start) self._parameters_.insert(index, param) - + param.add_observer(self, self._pass_through_notify_observers, -np.inf) - + self.size += param.size self._connect_parameters(ignore_added_names=_ignore_added_names) diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py index a98f0098..8551c831 100644 --- a/GPy/core/parameterization/parameterized.py +++ b/GPy/core/parameterization/parameterized.py @@ -101,7 +101,6 @@ class Parameterized(Parameterizable, Pickleable): return G return node - def _getstate(self): """ Get the current state of the class, diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index 3f00b22d..fdebdfac 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -46,9 +46,6 @@ class Add(CombinationKernel): def update_gradients_diag(self, dL_dK, X): [p.update_gradients_diag(dL_dK, X) for p in self.parts] - def update_gradients_diag(self, dL_dKdiag, X): - [p.update_gradients_diag(dL_dKdiag, X[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - def gradients_X(self, dL_dK, X, X2=None): """Compute the gradient of the objective function with respect to X. diff --git a/GPy/kern/_src/independent_outputs.py b/GPy/kern/_src/independent_outputs.py index 252a7bc3..5588fdb2 100644 --- a/GPy/kern/_src/independent_outputs.py +++ b/GPy/kern/_src/independent_outputs.py @@ -40,24 +40,26 @@ class IndependentOutputs(Kern): the rest of the columns of X are passed to the underlying kernel for computation (in blocks). """ - def __init__(self, kern, name='independ'): - super(IndependentOutputs, self).__init__(kern.input_dim+1, name) + def __init__(self, active_dim, kern, name='independ'): + assert isinstance(active_dim, int), "IndependentOutputs kernel is only defined with one input dimension being the indeces" + super(IndependentOutputs, self).__init__(np.r_[0:max(max(kern.active_dims)+1, active_dim+1)], name) + self.index_dim = active_dim self.kern = kern self.add_parameters(self.kern) def K(self,X ,X2=None): - X, slices = X[:,:-1], index_to_slices(X[:,-1]) + slices = index_to_slices(X[:,self.index_dim]) if X2 is None: target = np.zeros((X.shape[0], X.shape[0])) - [[np.copyto(target[s,s], self.kern.K(X[s], None)) for s in slices_i] for slices_i in slices] + [[np.copyto(target[s,s], self.kern.K(X[s,:], None)) for s in slices_i] for slices_i in slices] else: - X2, slices2 = X2[:,:-1],index_to_slices(X2[:,-1]) + slices2 = index_to_slices(X2[:,self.index_dim]) target = np.zeros((X.shape[0], X2.shape[0])) - [[[np.copyto(target[s, s2], self.kern.K(X[s],X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] + [[[np.copyto(target[s, s2], self.kern.K(X[s,:],X2[s2,:])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] return target def Kdiag(self,X): - X, slices = X[:,:-1], index_to_slices(X[:,-1]) + slices = index_to_slices(X[:,self.index_dim]) target = np.zeros(X.shape[0]) [[np.copyto(target[s], self.kern.Kdiag(X[s])) for s in slices_i] for slices_i in slices] return target @@ -66,20 +68,19 @@ class IndependentOutputs(Kern): target = np.zeros(self.kern.size) def collate_grads(dL, X, X2): self.kern.update_gradients_full(dL,X,X2) - self.kern._collect_gradient(target) + target += self.kern.gradient - X,slices = X[:,:-1],index_to_slices(X[:,-1]) + slices = index_to_slices(X[:,self.index_dim]) if X2 is None: [[collate_grads(dL_dK[s,s], X[s], None) for s in slices_i] for slices_i in slices] else: - X2, slices2 = X2[:,:-1], index_to_slices(X2[:,-1]) + slices2 = index_to_slices(X2[:,self.index_dim]) [[[collate_grads(dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] - - self.kern._set_gradient(target) + self.kern.gradient = target def gradients_X(self,dL_dK, X, X2=None): target = np.zeros_like(X) - X, slices = X[:,:-1],index_to_slices(X[:,-1]) + slices = index_to_slices(X[:,self.index_dim]) if X2 is None: [[np.copyto(target[s,:-1], self.kern.gradients_X(dL_dK[s,s],X[s],None)) for s in slices_i] for slices_i in slices] else: @@ -88,7 +89,7 @@ class IndependentOutputs(Kern): return target def gradients_X_diag(self, dL_dKdiag, X): - X, slices = X[:,:-1], index_to_slices(X[:,-1]) + slices = index_to_slices(X[:,self.index_dim]) target = np.zeros(X.shape) [[np.copyto(target[s,:-1], self.kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for slices_i in slices] return target @@ -97,10 +98,10 @@ class IndependentOutputs(Kern): target = np.zeros(self.kern.size) def collate_grads(dL, X): self.kern.update_gradients_diag(dL,X) - self.kern._collect_gradient(target) + self.target += self.kern.gradient X,slices = X[:,:-1],index_to_slices(X[:,-1]) [[collate_grads(dL_dKdiag[s], X[s,:]) for s in slices_i] for slices_i in slices] - self.kern._set_gradient(target) + self.kern.gradient = target class Hierarchical(Kern): """ diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index f8f2d588..0aa414ca 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -147,11 +147,14 @@ class Kern(Parameterized): """ Here we overload the '*' operator. See self.prod for more information""" return self.prod(other) - #def __pow__(self, other): - # """ - # Shortcut for tensor `prod`. - # """ - # return self.prod(other, tensor=True) + def __pow__(self, other): + """ + Shortcut for tensor `prod`. + """ + assert self.active_dims == range(self.input_dim), "Can only use kernels, which have their input_dims defined from 0" + assert other.active_dims == range(other.input_dim), "Can only use kernels, which have their input_dims defined from 0" + other.active_dims += self.input_dim + return self.prod(other) def prod(self, other, name=None): """ @@ -192,7 +195,8 @@ class Kern(Parameterized): class CombinationKernel(Kern): def __init__(self, kernels, name): assert all([isinstance(k, Kern) for k in kernels]) - input_dim = reduce(np.union1d, (x.active_dims for x in kernels)) + ma = reduce(lambda a,b: max(a, max(b)), (x.active_dims for x in kernels), 0) + input_dim = np.r_[0:ma+1] super(CombinationKernel, self).__init__(input_dim, name) self.add_parameters(*kernels) diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py index a253c63d..34e5a17e 100644 --- a/GPy/models/__init__.py +++ b/GPy/models/__init__.py @@ -15,4 +15,4 @@ from mrd import MRD from gradient_checker import GradientChecker from ss_gplvm import SSGPLVM from gp_coregionalized_regression import GPCoregionalizedRegression -from sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression +#.py file not included!!! #from sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression diff --git a/GPy/models/gplvm.py b/GPy/models/gplvm.py index ba270dad..5f7e3265 100644 --- a/GPy/models/gplvm.py +++ b/GPy/models/gplvm.py @@ -41,7 +41,7 @@ class GPLVM(GP): def parameters_changed(self): super(GPLVM, self).parameters_changed() - self.X.gradient = self.kern.gradients_X(self.dL_dK, self.X, None) + self.X.gradient = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X, None) def _getstate(self): return GP._getstate(self)