diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 31210aa1..defb0cef 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -103,13 +103,7 @@ class GP(GPBase): # else: # tmp = np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK)))) # return tmp - - def dL_dtheta(self): - return self.kern.dK_dtheta(self.dL_dK, self.X) - - def dL_dlikelihood(self): - return self.likelihood._gradients(partial=np.diag(self.dL_dK)) - + def _raw_predict(self, _Xnew, which_parts='all', full_cov=False, stop=False): """ Internal helper function for making predictions, does not account diff --git a/GPy/core/gp_base.py b/GPy/core/gp_base.py index 547985ef..88c76f4c 100644 --- a/GPy/core/gp_base.py +++ b/GPy/core/gp_base.py @@ -12,8 +12,8 @@ class GPBase(Model): Gaussian process base model for holding shared behaviour between sparse_GP and GP models. """ - def __init__(self, X, likelihood, kernel, normalize_X=False): - super(GPBase, self).__init__() + def __init__(self, X, likelihood, kernel, normalize_X=False, name=''): + super(GPBase, self).__init__(name) self.X = ObservableArray(X) assert len(self.X.shape) == 2 @@ -44,6 +44,13 @@ class GPBase(Model): self.kern.parameters_changed() self.likelihood.parameters_changed() + def dL_dtheta(self): + return self.kern.dK_dtheta(self.dL_dK, self.X) + + def dL_dlikelihood(self): + return self.likelihood._gradients(partial=np.diag(self.dL_dK)) + + def getstate(self): """ Get the current state of the class, here we return everything that is needed to recompute the model. diff --git a/GPy/core/model.py b/GPy/core/model.py index 0e136920..305dea4b 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -18,8 +18,8 @@ import itertools class Model(Parameterized): _fail_count = 0 # Count of failed optimization steps (see objective) _allowed_failures = 10 # number of allowed failures - def __init__(self): - super(Model, self).__init__()#Parameterized.__init__(self) + def __init__(self, name): + super(Model, self).__init__(name)#Parameterized.__init__(self) self.priors = [] self._priors = ParameterIndexOperations() self.optimization_runs = [] @@ -488,7 +488,6 @@ class Model(Parameterized): names = self._get_param_names_transformed() except NotImplementedError: names = ['Variable %i' % i for i in range(len(x))] - import ipdb;ipdb.set_trace() # Prepare for pretty-printing header = ['Name', 'Ratio', 'Difference', 'Analytical', 'Numerical'] max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])]) diff --git a/GPy/core/parameter.py b/GPy/core/parameter.py index dcf26c3b..0ae7c0d7 100644 --- a/GPy/core/parameter.py +++ b/GPy/core/parameter.py @@ -49,7 +49,10 @@ class ObservableArray(ListArray): [callble(self) for callble in self._observers_.itervalues()] def __setitem__(self, s, val): if not numpy.all(numpy.equal(self[s], val)): - numpy.put(self,s,val) + if isinstance(s, slice): + super(ObservableArray, self).__setitem__(s, val) + else: + numpy.put(self,s,val) self._notify_observers() def __getslice__(self, start, stop): return self.__getitem__(slice(start, stop)) @@ -84,7 +87,7 @@ class Param(ObservableArray, Nameable, Pickleable): def __new__(cls, name, input_array, *args, **kwargs): obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array)) obj._direct_parent_ = None - obj._name_ = name + #obj.name = name obj._parent_index_ = None obj._highest_parent_ = None obj._current_slice_ = (slice(obj.shape[0]),) @@ -103,7 +106,8 @@ class Param(ObservableArray, Nameable, Pickleable): def __array_finalize__(self, obj): # see InfoArray.__array_finalize__ for comments if obj is None: return - self._name_ = getattr(obj, '_name_', None) + super(Param, self).__array_finalize__(obj) + self.name = getattr(obj, 'name', None) self._current_slice_ = getattr(obj, '_current_slice_', None) self._direct_parent_ = getattr(obj, '_direct_parent_', None) self._parent_index_ = getattr(obj, '_parent_index_', None) @@ -124,7 +128,7 @@ class Param(ObservableArray, Nameable, Pickleable): def __reduce__(self): func, args, state = super(Param, self).__reduce__() return func, args, (state, - (self._name_, + (self.name, self._direct_parent_, self._parent_index_, self._highest_parent_, @@ -150,13 +154,15 @@ class Param(ObservableArray, Nameable, Pickleable): self._highest_parent_ = state.pop() self._parent_index_ = state.pop() self._direct_parent_ = state.pop() - self._name_ = state.pop() + self.name = state.pop() #=========================================================================== # get/set parameters #=========================================================================== def _set_params(self, param): self.flat = param + self._notify_observers() self._notify_tied_parameters() + def _get_params(self): return self.flat # @property @@ -166,13 +172,13 @@ class Param(ObservableArray, Nameable, Pickleable): # This can be a callable without parameters. The callable will be called # every time the name property is accessed. # """ -# if callable(self._name_): -# return self._name_() -# return self._name_ +# if callable(self.name): +# return self.name() +# return self.name # @name.setter # def name(self, new_name): # from_name = self.name -# self._name_ = new_name +# self.name = new_name # self._direct_parent_._name_changed(self, from_name) @property def _parameters_(self): diff --git a/GPy/core/parameterized.py b/GPy/core/parameterized.py index 669f0d72..31a44949 100644 --- a/GPy/core/parameterized.py +++ b/GPy/core/parameterized.py @@ -121,6 +121,7 @@ class Parameterized(Nameable, Pickleable): self._connect_parameters() self.gradient_mapping = {} del self._in_init_ + @property def constraints(self): @@ -169,10 +170,14 @@ class Parameterized(Nameable, Pickleable): Add all parameters to this parameter class, you can insert parameters at any given index using the :py:func:`list.insert` syntax """ - if index is None: - self._parameters_.append(parameter) - else: + if parameter in self._parameters_ and index is not None: + del self._parameters_[parameter._parent_index_] self._parameters_.insert(index, parameter) + elif parameter not in self._parameters_: + if index is None: + self._parameters_.append(parameter) + else: + self._parameters_.insert(index, parameter) self._connect_parameters() if gradient: self.gradient_mapping[parameter] = gradient @@ -226,17 +231,18 @@ class Parameterized(Nameable, Pickleable): # if fast_array_equal(v,p): # self.__dict__[k] = p # except: # parameter comparison, just for convenience -# pass - if p.name in self.__dict__: - if not p is self.__dict__[p.name]: - not_unique.append(p.name) - del self.__dict__[p.name] - elif not (p.name in not_unique): - self.__dict__[p.name] = p +# pass + pname = p.name.replace(" ", "_").replace(".","_") + if pname in self.__dict__: + if not p is self.__dict__[pname]: + not_unique.append(pname) + del self.__dict__[pname] + elif not (pname in not_unique): + self.__dict__[pname] = p sizes = numpy.cumsum([0] + self._parameter_sizes_) self.size = sizes[-1] self._param_slices_ = [slice(start, stop) for start,stop in zip(sizes, sizes[1:])] - self.parameters_changed() +# self.parameters_changed() #=========================================================================== # Pickling operations #=========================================================================== diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 8029b920..52029d6a 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -7,6 +7,7 @@ from ..util.linalg import mdot, jitchol, tdot, symmetrify, backsub_both_sides, c from scipy import linalg from ..likelihoods import Gaussian, EP,EP_Mixed_Noise from gp_base import GPBase +from GPy.core.parameter import Param class SparseGP(GPBase): """ @@ -30,7 +31,7 @@ class SparseGP(GPBase): """ def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False): - GPBase.__init__(self, X, likelihood, kernel, normalize_X=normalize_X) + GPBase.__init__(self, X, likelihood, kernel, normalize_X=normalize_X, name="sparse GP") self.Z = Z self.num_inducing = Z.shape[0] @@ -50,6 +51,14 @@ class SparseGP(GPBase): if self.has_uncertain_inputs: self.X_variance /= np.square(self._Xscale) + self.Z = Param('inducing input', self.Z) + self.add_parameter(self.Z, gradient=self.dL_dZ, index=0) + self.add_parameter(self.kern, gradient=self.dL_dtheta) + + self._compute_kernel_matrices() + self.Z.add_observer(self, lambda Z: self._compute_kernel_matrices()) + #self.Z._notify_observers() + self._const_jitter = None def getstate(self): @@ -197,13 +206,17 @@ class SparseGP(GPBase): D = 0.5 * self.data_fit return A + B + C + D + self.likelihood.Z - def _set_params(self, p): - self.Z = p[:self.num_inducing * self.input_dim].reshape(self.num_inducing, self.input_dim) - self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.num_params]) - self.likelihood._set_params(p[self.Z.size + self.kern.num_params:]) + #def _set_params(self, p): + def parameters_changed(self): + #self.Z = p[:self.num_inducing * self.input_dim].reshape(self.num_inducing, self.input_dim) + #self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.num_params]) + #self.likelihood._set_params(p[self.Z.size + self.kern.num_params:]) + #self._compute_kernel_matrices() self._compute_kernel_matrices() + import ipdb;ipdb.set_trace() self._computations() self.Cpsi1V = None + super(SparseGP, self).parameters_changed() def _get_params(self): return np.hstack([self.Z.flatten(), self.kern._get_params_transformed(), self.likelihood._get_params()]) diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py index 592299ef..8f8b3d6e 100644 --- a/GPy/examples/regression.py +++ b/GPy/examples/regression.py @@ -378,7 +378,7 @@ def silhouette(max_iters=100): print(m) return m -def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100): +def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, optimize=True, checkgrad=True): """Run a 1D example of a sparse GP regression.""" # sample inputs and outputs X = np.random.uniform(-3., 3., (num_samples, 1)) @@ -388,9 +388,10 @@ def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100): # create simple GP Model m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing) - - m.checkgrad(verbose=1) - m.optimize('tnc', messages=1, max_iters=max_iters) + if checkgrad: + m.checkgrad(verbose=1) + if optimize: + m.optimize('tnc', messages=1, max_iters=max_iters) m.plot() return m diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index 7c18fc87..521904aa 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -402,7 +402,7 @@ class kern(Parameterized): """Compute the gradient of the diagonal of the covariance function with respect to the parameters.""" assert X.shape[1] == self.input_dim assert dL_dKdiag.size == X.shape[0] - target = np.zeros(self.num_params) + target = np.zeros(self.size) [p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)] return self._transform_gradients(target) @@ -418,7 +418,7 @@ class kern(Parameterized): return target def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S): - target = np.zeros(self.num_params) + target = np.zeros(self.size) [p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)] return self._transform_gradients(target) @@ -433,7 +433,7 @@ class kern(Parameterized): return target def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S): - target = np.zeros((self.num_params)) + target = np.zeros((self.size)) [p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)] return self._transform_gradients(target) @@ -480,7 +480,7 @@ class kern(Parameterized): def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S): """Gradient of the psi2 statistics with respect to the parameters.""" - target = np.zeros(self.num_params) + target = np.zeros(self.size) [p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)] # compute the "cross" terms diff --git a/GPy/kern/parts/rbf.py b/GPy/kern/parts/rbf.py index 8e824404..adc54c6d 100644 --- a/GPy/kern/parts/rbf.py +++ b/GPy/kern/parts/rbf.py @@ -55,6 +55,8 @@ class RBF(Kernpart): self.lengthscale.add_observer(self, self.update_lengthscale) self.add_parameters(self.variance, self.lengthscale) + self.update_lengthscale(self.lengthscale) + self.parameters_changed() # initialize cache #self._Z, self._mu, self._S = np.empty(shape=(3, 1)) #self._X, self._X2, self._params_save = np.empty(shape=(3, 1)) @@ -65,7 +67,8 @@ class RBF(Kernpart): 'extra_link_args' : ['-lgomp']} def on_input_change(self, X): - self._K_computations(X, None) + #self._K_computations(X, None) + pass def update_lengthscale(self, l): self.lengthscale2 = np.square(self.lengthscale) @@ -74,8 +77,8 @@ class RBF(Kernpart): # reset cached results #self._X, self._X2, self._params_save = np.empty(shape=(3, 1)) #self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S - #self._X, self._X2 = np.empty(shape=(2, 1)) - #self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S + self._X, self._X2 = np.empty(shape=(2, 1)) + self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S pass # def _get_params(self): # return np.hstack((self.variance, self.lengthscale)) @@ -98,17 +101,16 @@ class RBF(Kernpart): # return ['variance'] + ['lengthscale_%i' % i for i in range(self.lengthscale.size)] def K(self, X, X2, target): - if self._X is None or X.base is not self._X.base or X2 is not None: - import pdb;pdb.set_trace() - self._K_computations(X, X2) + #if self._X is None or X.base is not self._X.base or X2 is not None: + self._K_computations(X, X2) target += self.variance * self._K_dvar def Kdiag(self, X, target): np.add(target, self.variance, target) def dK_dtheta(self, dL_dK, X, X2, target): - if self._X is None or X.base is not self._X.base or X2 is not None: - self._K_computations(X, X2) + #if self._X is None or X.base is not self._X.base or X2 is not None: + self._K_computations(X, X2) target[0] += np.sum(self._K_dvar * dL_dK) if self.ARD: dvardLdK = self._K_dvar * dL_dK @@ -156,8 +158,8 @@ class RBF(Kernpart): target[0] += np.sum(dL_dKdiag) def dK_dX(self, dL_dK, X, X2, target): - if self._X is None or X.base is not self._X.base or X2 is not None: - self._K_computations(X, X2) + #if self._X is None or X.base is not self._X.base or X2 is not None: + self._K_computations(X, X2) if X2 is None: _K_dist = 2*(X[:, None, :] - X[None, :, :]) else: diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py index 21b4f4d3..f844b5c6 100644 --- a/GPy/likelihoods/gaussian.py +++ b/GPy/likelihoods/gaussian.py @@ -37,8 +37,7 @@ class Gaussian(likelihood): self._variance = variance + 1 self.add_parameter(self.variance) - - + self.parameters_changed() # self._set_params(np.asarray(variance))