diff --git a/GPy/core/gp.py b/GPy/core/gp.py index d7f07a47..5be3e944 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -7,7 +7,7 @@ import warnings from .. import kern from ..util.linalg import dtrtrs from model import Model -from parameterization import ObservableArray +from parameterization import ObsAr from .. import likelihoods from ..likelihoods.gaussian import Gaussian from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation @@ -31,21 +31,19 @@ class GP(Model): super(GP, self).__init__(name) assert X.ndim == 2 - if isinstance(X, (ObservableArray, VariationalPosterior)): + if isinstance(X, (ObsAr, VariationalPosterior)): self.X = X - else: self.X = ObservableArray(X) + else: self.X = ObsAr(X) self.num_data, self.input_dim = self.X.shape assert Y.ndim == 2 - self.Y = ObservableArray(Y) + self.Y = ObsAr(Y) assert Y.shape[0] == self.num_data _, self.output_dim = self.Y.shape - if Y_metadata is None: - self.Y_metadata = {} - else: - self.Y_metadata = Y_metadata + #TODO: check the type of this is okay? + self.Y_metadata = Y_metadata assert isinstance(kernel, kern.Kern) #assert self.input_dim == kernel.input_dim @@ -76,25 +74,27 @@ class GP(Model): def _raw_predict(self, _Xnew, full_cov=False): """ - Internal helper function for making predictions, does not account - for normalization or likelihood + For making predictions, does not account for normalization or likelihood full_cov is a boolean which defines whether the full covariance matrix of the prediction is computed. If full_cov is False (default), only the diagonal of the covariance is returned. + $$ + p(f*|X*, X, Y) = \int^{\inf}_{\inf} p(f*|f,X*)p(f|X,Y) df + = N(f*| K_{x*x}(K_{xx} + \Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \Sigma)^{-1}K_{xx*} + \Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance} + $$ + """ Kx = self.kern.K(_Xnew, self.X).T - #LiKx, _ = dtrtrs(self.posterior.woodbury_chol, np.asfortranarray(Kx), lower=1) WiKx = np.dot(self.posterior.woodbury_inv, Kx) mu = np.dot(Kx.T, self.posterior.woodbury_vector) if full_cov: Kxx = self.kern.K(_Xnew) - #var = Kxx - tdot(LiKx.T) - var = np.dot(Kx.T, WiKx) + var = Kxx - np.dot(Kx.T, WiKx) else: Kxx = self.kern.Kdiag(_Xnew) - #var = Kxx - np.sum(LiKx*LiKx, 0) var = Kxx - np.sum(WiKx*Kx, 0) var = var.reshape(-1, 1) diff --git a/GPy/core/mapping.py b/GPy/core/mapping.py index ef0af16c..efd9476f 100644 --- a/GPy/core/mapping.py +++ b/GPy/core/mapping.py @@ -10,11 +10,11 @@ class Mapping(Parameterized): Base model for shared behavior between models that can act like a mapping. """ - def __init__(self, input_dim, output_dim): + def __init__(self, input_dim, output_dim, name='mapping'): self.input_dim = input_dim self.output_dim = output_dim - super(Mapping, self).__init__() + super(Mapping, self).__init__(name=name) # Model.__init__(self) # All leaf nodes should call self._set_params(self._get_params()) at # the end diff --git a/GPy/core/parameterization/__init__.py b/GPy/core/parameterization/__init__.py index ccbac39d..8e9aa094 100644 --- a/GPy/core/parameterization/__init__.py +++ b/GPy/core/parameterization/__init__.py @@ -1,5 +1,5 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -from param import Param, ObservableArray +from param import Param, ObsAr from parameterized import Parameterized diff --git a/GPy/core/parameterization/array_core.py b/GPy/core/parameterization/array_core.py index e3a5b137..a120f004 100644 --- a/GPy/core/parameterization/array_core.py +++ b/GPy/core/parameterization/array_core.py @@ -1,25 +1,25 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -__updated__ = '2013-12-16' +__updated__ = '2014-03-17' import numpy as np from parameter_core import Observable -class ObservableArray(np.ndarray, Observable): +class ObsAr(np.ndarray, Observable): """ An ndarray which reports changes to its observers. The observers can add themselves with a callable, which will be called every time this array changes. The callable takes exactly one argument, which is this array itself. """ - __array_priority__ = -1 # Never give back ObservableArray + __array_priority__ = -1 # Never give back ObsAr def __new__(cls, input_array, *a, **kw): - if not isinstance(input_array, ObservableArray): + if not isinstance(input_array, ObsAr): obj = np.atleast_1d(np.require(input_array, dtype=np.float64, requirements=['W', 'C'])).view(cls) else: obj = input_array - cls.__name__ = "ObservableArray\n " - super(ObservableArray, obj).__init__(*a, **kw) + #cls.__name__ = "ObsAr" # because of fixed printing of `array` in np printing + super(ObsAr, obj).__init__(*a, **kw) return obj def __array_finalize__(self, obj): @@ -30,6 +30,14 @@ class ObservableArray(np.ndarray, Observable): def __array_wrap__(self, out_arr, context=None): return out_arr.view(np.ndarray) + def __reduce__(self): + func, args, state = np.ndarray.__reduce__(self) + return func, args, (state, Observable._getstate(self)) + + def __setstate__(self, state): + np.ndarray.__setstate__(self, state[0]) + Observable._setstate(self, state[1]) + def _s_not_empty(self, s): # this checks whether there is something picked by this slice. return True @@ -46,7 +54,7 @@ class ObservableArray(np.ndarray, Observable): def __setitem__(self, s, val): if self._s_not_empty(s): - super(ObservableArray, self).__setitem__(s, val) + super(ObsAr, self).__setitem__(s, val) self.notify_observers(self[s]) def __getslice__(self, start, stop): @@ -56,7 +64,7 @@ class ObservableArray(np.ndarray, Observable): return self.__setitem__(slice(start, stop), val) def __copy__(self, *args): - return ObservableArray(self.view(np.ndarray).copy()) + return ObsAr(self.view(np.ndarray).copy()) def copy(self, *args): return self.__copy__(*args) diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index 2ede8436..b73e7dfa 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -4,7 +4,7 @@ import itertools import numpy from parameter_core import OptimizationHandlable, adjust_name_for_printing -from array_core import ObservableArray +from array_core import ObsAr ###### printing __constraints_name__ = "Constraint" @@ -15,7 +15,7 @@ __precision__ = numpy.get_printoptions()['precision'] # numpy printing precision __print_threshold__ = 5 ###### -class Param(OptimizationHandlable, ObservableArray): +class Param(OptimizationHandlable, ObsAr): """ Parameter object for GPy models. @@ -226,7 +226,7 @@ class Param(OptimizationHandlable, ObservableArray): # Constrainable #=========================================================================== def _ensure_fixes(self): - self._fixes_ = numpy.ones(self._realsize_, dtype=bool) + if not self._has_fixes(): self._fixes_ = numpy.ones(self._realsize_, dtype=bool) #=========================================================================== # Convenience @@ -269,6 +269,8 @@ class Param(OptimizationHandlable, ObservableArray): @property def _ties_str(self): return [''] + def _ties_for(self, ravi): + return [['N/A']]*ravi.size def __repr__(self, *args, **kwargs): name = "\033[1m{x:s}\033[0;0m:\n".format( x=self.hierarchy_name()) @@ -312,7 +314,7 @@ class Param(OptimizationHandlable, ObservableArray): ravi = self._raveled_index(filter_) if constr_matrix is None: constr_matrix = self.constraints.properties_for(ravi) if prirs is None: prirs = self.priors.properties_for(ravi) - if ties is None: ties = [['N/A']]*self.size + if ties is None: ties = self._ties_for(ravi) ties = [' '.join(map(lambda x: x, t)) for t in ties] if lc is None: lc = self._max_len_names(constr_matrix, __constraints_name__) if lx is None: lx = self._max_len_values() diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index f58143bd..48fe69c2 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -16,7 +16,7 @@ Observable Pattern for patameterization from transformations import Transformation, Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED import numpy as np -__updated__ = '2014-03-14' +__updated__ = '2014-03-18' class HierarchyError(Exception): """ @@ -56,7 +56,7 @@ class InterfacePickleFunctions(object): """ raise NotImplementedError, "To be able to use pickling you need to implement this method" -class Pickleable(object): +class Pickleable(InterfacePickleFunctions): """ Make an object pickleable (See python doc 'pickling'). @@ -95,7 +95,7 @@ class Pickleable(object): def _has_get_set_state(self): return '_getstate' in vars(self.__class__) and '_setstate' in vars(self.__class__) -class Observable(InterfacePickleFunctions): +class Observable(Pickleable): """ Observable pattern for parameterization. @@ -155,6 +155,7 @@ class Observable(InterfacePickleFunctions): def _getstate(self): return [self._observer_callables_] + def _setstate(self, state): self._observer_callables_ = state.pop() @@ -376,7 +377,7 @@ class Constrainable(Nameable, Indexable): # Ensure that the fixes array is set: # Parameterized: ones(self.size) # Param: ones(self._realsize_ - self._fixes_ = np.ones(self.size, dtype=bool) + if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool) def _set_fixed(self, index): self._ensure_fixes() @@ -397,7 +398,7 @@ class Constrainable(Nameable, Indexable): self._fixes_ = None def _has_fixes(self): - return hasattr(self, "_fixes_") and self._fixes_ is not None + return hasattr(self, "_fixes_") and self._fixes_ is not None and self._fixes_.size == self.size #=========================================================================== # Prior Operations @@ -540,12 +541,12 @@ class Constrainable(Nameable, Indexable): print "WARNING: reconstraining parameters {}".format(self.parameter_names() or self.name) which.add(what, self._raveled_index()) - def _remove_from_index_operations(self, which, what): + def _remove_from_index_operations(self, which, transforms): """ Helper preventing copy code. Remove given what (transform prior etc) from which param index ops. """ - if len(what) == 0: + if len(transforms) == 0: transforms = which.properties() removed = np.empty((0,), dtype=int) for t in transforms: @@ -566,24 +567,32 @@ class OptimizationHandlable(Constrainable): super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw) def transform(self): - [np.put(self._param_array_, ind, c.finv(self._param_array_[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] + [np.put(self._param_array_, ind, c.finv(self._param_array_.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] def untransform(self): - [np.put(self._param_array_, ind, c.f(self._param_array_[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] + [np.put(self._param_array_, ind, c.f(self._param_array_.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] def _get_params_transformed(self): # transformed parameters (apply transformation rules) p = self._param_array_.copy() [np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] - if self._has_fixes(): + if self.has_parent() and self.constraints[__fixed__].size != 0: + fixes = np.ones(self.size).astype(bool) + fixes[self.constraints[__fixed__]] = FIXED + return p[fixes] + elif self._has_fixes(): return p[self._fixes_] return p def _set_params_transformed(self, p): if p is self._param_array_: p = p.copy() - if self._has_fixes(): self._param_array_[self._fixes_] = p - else: self._param_array_[:] = p + if self.has_parent() and self.constraints[__fixed__].size != 0: + fixes = np.ones(self.size).astype(bool) + fixes[self.constraints[__fixed__]] = FIXED + self._param_array_.flat[fixes] = p + elif self._has_fixes(): self._param_array_.flat[self._fixes_] = p + else: self._param_array_.flat = p self.untransform() self._trigger_params_changed() @@ -661,8 +670,8 @@ class OptimizationHandlable(Constrainable): for pi in self._parameters_: pislice = slice(pi_old_size, pi_old_size+pi.size) - self._param_array_[pislice] = pi._param_array_.ravel()#, requirements=['C', 'W']).flat - self._gradient_array_[pislice] = pi._gradient_array_.ravel()#, requirements=['C', 'W']).flat + self._param_array_[pislice] = pi._param_array_.flat#, requirements=['C', 'W']).flat + self._gradient_array_[pislice] = pi._gradient_array_.flat#, requirements=['C', 'W']).flat pi._param_array_.data = parray[pislice].data pi._gradient_array_.data = garray[pislice].data @@ -769,11 +778,11 @@ class Parameterizable(OptimizationHandlable): Add all parameters to this param class, you can insert parameters at any given index using the :func:`list.insert` syntax """ - # if param.has_parent(): - # raise AttributeError, "parameter {} already in another model, create new object (or copy) for adding".format(param._short()) if param in self._parameters_ and index is not None: self.remove_parameter(param) self.add_parameter(param, index) + #elif param.has_parent(): + # raise HierarchyError, "parameter {} already in another model ({}), create new object (or copy) for adding".format(param._short(), param._highest_parent_._short()) elif param not in self._parameters_: if param.has_parent(): parent = param._parent_ @@ -797,13 +806,19 @@ class Parameterizable(OptimizationHandlable): param.add_observer(self, self._pass_through_notify_observers, -np.inf) - self.size += param.size + parent = self + while parent is not None: + parent.size += param.size + parent = parent._parent_ + + self._connect_parameters() + + self._highest_parent_._connect_parameters(ignore_added_names=_ignore_added_names) + self._highest_parent_._notify_parent_change() + self._highest_parent_._connect_fixes() - self._connect_parameters(ignore_added_names=_ignore_added_names) - self._notify_parent_change() - self._connect_fixes() else: - raise RuntimeError, """Parameter exists already added and no copy made""" + raise HierarchyError, """Parameter exists already and no copy made""" def add_parameters(self, *parameters): @@ -829,17 +844,18 @@ class Parameterizable(OptimizationHandlable): param.remove_observer(self, self._pass_through_notify_observers) self.constraints.shift_left(start, param.size) - self._connect_fixes() self._connect_parameters() self._notify_parent_change() parent = self._parent_ while parent is not None: - parent._connect_fixes() - parent._connect_parameters() - parent._notify_parent_change() + parent.size -= param.size parent = parent._parent_ + self._highest_parent_._connect_parameters() + self._highest_parent_._connect_fixes() + self._highest_parent_._notify_parent_change() + def _connect_parameters(self, ignore_added_names=False): # connect parameterlist to this parameterized object # This just sets up the right connection for the params objects @@ -862,8 +878,8 @@ class Parameterizable(OptimizationHandlable): # first connect all children p._propagate_param_grad(self._param_array_[pslice], self._gradient_array_[pslice]) # then connect children to self - self._param_array_[pslice] = p._param_array_.ravel()#, requirements=['C', 'W']).ravel(order='C') - self._gradient_array_[pslice] = p._gradient_array_.ravel()#, requirements=['C', 'W']).ravel(order='C') + self._param_array_[pslice] = p._param_array_.flat#, requirements=['C', 'W']).ravel(order='C') + self._gradient_array_[pslice] = p._gradient_array_.flat#, requirements=['C', 'W']).ravel(order='C') if not p._param_array_.flags['C_CONTIGUOUS']: import ipdb;ipdb.set_trace() diff --git a/GPy/core/parameterization/transformations.py b/GPy/core/parameterization/transformations.py index 5cda8d46..506d80cd 100644 --- a/GPy/core/parameterization/transformations.py +++ b/GPy/core/parameterization/transformations.py @@ -7,10 +7,10 @@ from domains import _POSITIVE,_NEGATIVE, _BOUNDED import weakref import sys -#_lim_val = -np.log(sys.float_info.epsilon) _exp_lim_val = np.finfo(np.float64).max -_lim_val = np.log(_exp_lim_val) +_lim_val = 36.0 +epsilon = np.finfo(np.float64).resolution #=============================================================================== # Fixing constants @@ -54,19 +54,19 @@ class Transformation(object): class Logexp(Transformation): domain = _POSITIVE def f(self, x): - return np.where(x>_lim_val, x, np.log(1. + np.exp(np.clip(x, -_lim_val, _lim_val)))) + return np.where(x>_lim_val, x, np.log(1. + np.exp(np.clip(x, -_lim_val, _lim_val)))) + epsilon #raises overflow warning: return np.where(x>_lim_val, x, np.log(1. + np.exp(x))) def finv(self, f): return np.where(f>_lim_val, f, np.log(np.exp(f+1e-20) - 1.)) def gradfactor(self, f): - return np.where(f>_lim_val, 1., 1 - np.exp(-f)) + return np.where(f>_lim_val, 1., 1. - np.exp(-f)) def initialize(self, f): if np.any(f < 0.): print "Warning: changing parameters to satisfy constraints" return np.abs(f) def __str__(self): return '+ve' - + class LogexpNeg(Transformation): domain = _POSITIVE @@ -98,7 +98,7 @@ class NegativeLogexp(Transformation): return -self.logexp.initialize(f) # np.abs(f) def __str__(self): return '-ve' - + class LogexpClipped(Logexp): max_bound = 1e100 min_bound = 1e-10 diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index a0b09564..7bf0ca2a 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -64,8 +64,8 @@ class SparseGP(GP): self.kern.gradient += target #gradients wrt Z - self.Z.gradient[:,self.kern.active_dims] = self.kern.gradients_X(dL_dKmm, self.Z) - self.Z.gradient[:,self.kern.active_dims] += self.kern.gradients_Z_expectations( + self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z) + self.Z.gradient += self.kern.gradients_Z_expectations( self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X) else: #gradients wrt kernel @@ -76,8 +76,8 @@ class SparseGP(GP): self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z, None) self.kern.gradient += target #gradients wrt Z - self.Z.gradient[:,self.kern.active_dims] = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) - self.Z.gradient[:,self.kern.active_dims] += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X) + self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) + self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X) def _raw_predict(self, Xnew, full_cov=False): """ @@ -88,8 +88,9 @@ class SparseGP(GP): mu = np.dot(Kx.T, self.posterior.woodbury_vector) if full_cov: Kxx = self.kern.K(Xnew) - #var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx) - var = Kxx - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T, Kx, [1,0]).swapaxes(1,2) + var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx)) + #var = Kxx[:,:,None] - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T, Kx, [1,0]).swapaxes(1,2) + var = var.squeeze() else: Kxx = self.kern.Kdiag(Xnew) var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T diff --git a/GPy/examples/__init__.py b/GPy/examples/__init__.py index 87548553..c575bb33 100644 --- a/GPy/examples/__init__.py +++ b/GPy/examples/__init__.py @@ -6,4 +6,4 @@ import regression import dimensionality_reduction import tutorials import stochastic -import non_gaussian \ No newline at end of file +import non_gaussian diff --git a/GPy/examples/coreg_example.py b/GPy/examples/coreg_example.py index 8f4cfcc6..66ba143d 100644 --- a/GPy/examples/coreg_example.py +++ b/GPy/examples/coreg_example.py @@ -23,13 +23,10 @@ K = Bias.prod(Coreg,name='X') #K.coregion.W = 0 #print K.coregion.W - #print Bias.K(_X,_X) #print K.K(X,X) - #pb.matshow(K.K(X,X)) - Mlist = [GPy.kern.Matern32(1,lengthscale=20.,name="Mat")] kern = GPy.util.multioutput.LCM(input_dim=1,num_outputs=2,kernels_list=Mlist,name='H') kern.B.W = 0 @@ -37,16 +34,22 @@ kern.B.kappa = 1. #kern.B.W.fix() #kern.B.kappa.fix() #m = GPy.models.GPCoregionalizedRegression(X_list=[X1,X2], Y_list=[Y1,Y2], kernel=kern) -m = GPy.models.SparseGPCoregionalizedRegression(X_list=[X1], Y_list=[Y1], kernel=kern) + + +Z1 = np.array([1.5,2.5])[:,None] + +m = GPy.models.SparseGPCoregionalizedRegression(X_list=[X1], Y_list=[Y1], Z_list = [Z1], kernel=kern) #m.optimize() m.checkgrad(verbose=1) +""" fig = pb.figure() ax0 = fig.add_subplot(211) ax1 = fig.add_subplot(212) slices = GPy.util.multioutput.get_slices([Y1,Y2]) m.plot(fixed_inputs=[(1,0)],which_data_rows=slices[0],ax=ax0) #m.plot(fixed_inputs=[(1,1)],which_data_rows=slices[1],ax=ax1) +""" diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index dfd922f0..ea997d63 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -160,6 +160,7 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=15, Q=4 def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k): import GPy from matplotlib import pyplot as plt + from ..util.misc import param_to_array _np.random.seed(0) data = GPy.util.datasets.oil() @@ -173,11 +174,11 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, m.optimize('scg', messages=verbose, max_iters=max_iters, gtol=.05) if plot: - y = m.Y[0, :] + y = m.Y fig, (latent_axes, sense_axes) = plt.subplots(1, 2) m.plot_latent(ax=latent_axes) data_show = GPy.plotting.matplot_dep.visualize.vector_show(y) - lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X[0, :], # @UnusedVariable + lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(param_to_array(m.X.mean), # @UnusedVariable m, data_show, latent_axes=latent_axes, sense_axes=sense_axes) raw_input('Press enter to finish') plt.close(fig) diff --git a/GPy/examples/non_gaussian.py b/GPy/examples/non_gaussian.py index cc87b360..c0fcd693 100644 --- a/GPy/examples/non_gaussian.py +++ b/GPy/examples/non_gaussian.py @@ -158,7 +158,7 @@ def boston_example(optimize=True, plot=True): #Gaussian GP print "Gauss GP" mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp.copy()) - mgp.constrain_fixed('white', 1e-5) + mgp.constrain_fixed('.*white', 1e-5) mgp['rbf_len'] = rbf_len mgp['noise'] = noise print mgp @@ -176,7 +176,7 @@ def boston_example(optimize=True, plot=True): g_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), g_distribution) mg = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=g_likelihood) mg.constrain_positive('noise_variance') - mg.constrain_fixed('white', 1e-5) + mg.constrain_fixed('.*white', 1e-5) mg['rbf_len'] = rbf_len mg['noise'] = noise print mg @@ -194,10 +194,10 @@ def boston_example(optimize=True, plot=True): t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=df, sigma2=noise) stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution) mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood) - mstu_t.constrain_fixed('white', 1e-5) - mstu_t.constrain_bounded('t_noise', 0.0001, 1000) + mstu_t.constrain_fixed('.*white', 1e-5) + mstu_t.constrain_bounded('.*t_noise', 0.0001, 1000) mstu_t['rbf_len'] = rbf_len - mstu_t['t_noise'] = noise + mstu_t['.*t_noise'] = noise print mstu_t if optimize: mstu_t.optimize(optimizer=optimizer, messages=messages) diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py index 190af93b..2a4b91b3 100644 --- a/GPy/examples/regression.py +++ b/GPy/examples/regression.py @@ -25,80 +25,51 @@ def olympic_marathon_men(optimize=True, plot=True): return m -def coregionalization_toy2(optimize=True, plot=True): +def coregionalization_toy(optimize=True, plot=True): """ A simple demonstration of coregionalization on two sinusoidal functions. """ #build a design matrix with a column of integers indicating the output X1 = np.random.rand(50, 1) * 8 X2 = np.random.rand(30, 1) * 5 - index = np.vstack((np.zeros_like(X1), np.ones_like(X2))) - X = np.hstack((np.vstack((X1, X2)), index)) #build a suitable set of observed variables Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05 Y2 = np.sin(X2) + np.random.randn(*X2.shape) * 0.05 + 2. - Y = np.vstack((Y1, Y2)) - #build the kernel - k1 = GPy.kern.RBF(1) + GPy.kern.Bias(1) - k2 = GPy.kern.Coregionalize(2,1) - k = k1**k2 - m = GPy.models.GPRegression(X, Y, kernel=k) + m = GPy.models.GPCoregionalizedRegression(X_list=[X1,X2], Y_list=[Y1,Y2]) if optimize: m.optimize('bfgs', max_iters=100) if plot: - m.plot(fixed_inputs=[(1,0)]) - m.plot(fixed_inputs=[(1,1)], ax=pb.gca()) - + slices = GPy.util.multioutput.get_slices([X1,X2]) + m.plot(fixed_inputs=[(1,0)],which_data_rows=slices[0],Y_metadata={'output_index':0}) + m.plot(fixed_inputs=[(1,1)],which_data_rows=slices[1],Y_metadata={'output_index':1},ax=pb.gca()) return m -#FIXME: Needs recovering once likelihoods are consolidated -#def coregionalization_toy(optimize=True, plot=True): -# """ -# A simple demonstration of coregionalization on two sinusoidal functions. -# """ -# X1 = np.random.rand(50, 1) * 8 -# X2 = np.random.rand(30, 1) * 5 -# X = np.vstack((X1, X2)) -# Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05 -# Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05 -# Y = np.vstack((Y1, Y2)) -# -# k1 = GPy.kern.RBF(1) -# m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1]) -# m.constrain_fixed('.*rbf_var', 1.) -# m.optimize(max_iters=100) -# -# fig, axes = pb.subplots(2,1) -# m.plot(fixed_inputs=[(1,0)],ax=axes[0]) -# m.plot(fixed_inputs=[(1,1)],ax=axes[1]) -# axes[0].set_title('Output 0') -# axes[1].set_title('Output 1') -# return m - def coregionalization_sparse(optimize=True, plot=True): """ A simple demonstration of coregionalization on two sinusoidal functions using sparse approximations. """ - #fetch the data from the non sparse examples - m = coregionalization_toy2(optimize=False, plot=False) - X, Y = m.X, m.Y + #build a design matrix with a column of integers indicating the output + X1 = np.random.rand(50, 1) * 8 + X2 = np.random.rand(30, 1) * 5 - k = GPy.kern.RBF(1)**GPy.kern.Coregionalize(2) + #build a suitable set of observed variables + Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05 + Y2 = np.sin(X2) + np.random.randn(*X2.shape) * 0.05 + 2. - #construct a model - m = GPy.models.SparseGPRegression(X,Y, num_inducing=25, kernel=k) - m.Z[:,1].fix() # don't optimize the inducing input indexes + m = GPy.models.SparseGPCoregionalizedRegression(X_list=[X1,X2], Y_list=[Y1,Y2]) if optimize: - m.optimize('bfgs', max_iters=100, messages=1) + m.optimize('bfgs', max_iters=100) if plot: - m.plot(fixed_inputs=[(1,0)]) - m.plot(fixed_inputs=[(1,1)], ax=pb.gca()) + slices = GPy.util.multioutput.get_slices([X1,X2]) + m.plot(fixed_inputs=[(1,0)],which_data_rows=slices[0],Y_metadata={'output_index':0}) + m.plot(fixed_inputs=[(1,1)],which_data_rows=slices[1],Y_metadata={'output_index':1},ax=pb.gca()) + pb.ylim(-3,) return m diff --git a/GPy/inference/latent_function_inference/dtc.py b/GPy/inference/latent_function_inference/dtc.py index 5ebc5e53..1a84da6b 100644 --- a/GPy/inference/latent_function_inference/dtc.py +++ b/GPy/inference/latent_function_inference/dtc.py @@ -19,19 +19,15 @@ class DTC(object): def __init__(self): self.const_jitter = 1e-6 - def inference(self, kern, X, Z, likelihood, Y): + def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None): assert X_variance is None, "cannot use X_variance with DTC. Try varDTC." - #TODO: MAX! fix this! - from ...util.misc import param_to_array - Y = param_to_array(Y) - num_inducing, _ = Z.shape num_data, output_dim = Y.shape #make sure the noise is not hetero - beta = 1./np.squeeze(likelihood.variance) - if beta.size <1: + beta = 1./likelihood.gaussian_variance(Y_metadata) + if beta.size > 1: raise NotImplementedError, "no hetero noise with this implementation of DTC" Kmm = kern.K(Z) @@ -91,19 +87,15 @@ class vDTC(object): def __init__(self): self.const_jitter = 1e-6 - def inference(self, kern, X, X_variance, Z, likelihood, Y): + def inference(self, kern, X, X_variance, Z, likelihood, Y, Y_metadata): assert X_variance is None, "cannot use X_variance with DTC. Try varDTC." - #TODO: MAX! fix this! - from ...util.misc import param_to_array - Y = param_to_array(Y) - num_inducing, _ = Z.shape num_data, output_dim = Y.shape #make sure the noise is not hetero - beta = 1./np.squeeze(likelihood.variance) - if beta.size <1: + beta = 1./likelihood.gaussian_variance(Y_metadata) + if beta.size > 1: raise NotImplementedError, "no hetero noise with this implementation of DTC" Kmm = kern.K(Z) @@ -112,7 +104,7 @@ class vDTC(object): U = Knm Uy = np.dot(U.T,Y) - #factor Kmm + #factor Kmm Kmmi, L, Li, _ = pdinv(Kmm) # Compute A diff --git a/GPy/inference/latent_function_inference/exact_gaussian_inference.py b/GPy/inference/latent_function_inference/exact_gaussian_inference.py index c4ed0a0f..bd3fcefb 100644 --- a/GPy/inference/latent_function_inference/exact_gaussian_inference.py +++ b/GPy/inference/latent_function_inference/exact_gaussian_inference.py @@ -3,6 +3,7 @@ from posterior import Posterior from ...util.linalg import pdinv, dpotrs, tdot +from ...util import diag import numpy as np log_2_pi = np.log(2*np.pi) @@ -41,7 +42,9 @@ class ExactGaussianInference(object): K = kern.K(X) - Wi, LW, LWi, W_logdet = pdinv(K + likelihood.covariance_matrix(Y, Y_metadata)) + Ky = K.copy() + diag.add(Ky, likelihood.gaussian_variance(Y_metadata)) + Wi, LW, LWi, W_logdet = pdinv(Ky) alpha, _ = dpotrs(LW, YYT_factor, lower=1) diff --git a/GPy/inference/latent_function_inference/expectation_propagation.py b/GPy/inference/latent_function_inference/expectation_propagation.py index 514a6dc7..ff60d2e3 100644 --- a/GPy/inference/latent_function_inference/expectation_propagation.py +++ b/GPy/inference/latent_function_inference/expectation_propagation.py @@ -11,9 +11,9 @@ class EP(object): :param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float) :type epsilon: float - :param eta: Power EP thing TODO: Ricardo: what, exactly? + :param eta: parameter for fractional EP updates. :type eta: float64 - :param delta: Power EP thing TODO: Ricardo: what, exactly? + :param delta: damping EP updates factor. :type delta: float64 """ self.epsilon, self.eta, self.delta = epsilon, eta, delta diff --git a/GPy/inference/latent_function_inference/fitc.py b/GPy/inference/latent_function_inference/fitc.py index c4147d06..de47e5d5 100644 --- a/GPy/inference/latent_function_inference/fitc.py +++ b/GPy/inference/latent_function_inference/fitc.py @@ -17,14 +17,14 @@ class FITC(object): """ const_jitter = 1e-6 - def inference(self, kern, X, Z, likelihood, Y): + def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None): num_inducing, _ = Z.shape num_data, output_dim = Y.shape #make sure the noise is not hetero - sigma_n = np.squeeze(likelihood.variance) - if sigma_n.size <1: + sigma_n = likelihood.gaussian_variance(Y_metadata) + if sigma_n.size >1: raise NotImplementedError, "no hetero noise with this implementation of FITC" Kmm = kern.K(Z) diff --git a/GPy/inference/latent_function_inference/laplace.py b/GPy/inference/latent_function_inference/laplace.py index 4f529d5e..12315a29 100644 --- a/GPy/inference/latent_function_inference/laplace.py +++ b/GPy/inference/latent_function_inference/laplace.py @@ -51,12 +51,11 @@ class Laplace(object): Ki_f_init = self._previous_Ki_fhat f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata) - self.f_hat = f_hat self.Ki_fhat = Ki_fhat self.K = K.copy() #Compute hessian and other variables at mode - log_marginal, woodbury_vector, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata) + log_marginal, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata) self._previous_Ki_fhat = Ki_fhat.copy() return Posterior(woodbury_vector=Ki_fhat, woodbury_inv=woodbury_inv, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL} @@ -86,13 +85,13 @@ class Laplace(object): #define the objective function (to be maximised) def obj(Ki_f, f): - return -0.5*np.dot(Ki_f.flatten(), f.flatten()) + likelihood.logpdf(f, Y, extra_data=Y_metadata) + return -0.5*np.dot(Ki_f.flatten(), f.flatten()) + likelihood.logpdf(f, Y, Y_metadata=Y_metadata) difference = np.inf iteration = 0 while difference > self._mode_finding_tolerance and iteration < self._mode_finding_max_iter: - W = -likelihood.d2logpdf_df2(f, Y, extra_data=Y_metadata) - grad = likelihood.dlogpdf_df(f, Y, extra_data=Y_metadata) + W = -likelihood.d2logpdf_df2(f, Y, Y_metadata=Y_metadata) + grad = likelihood.dlogpdf_df(f, Y, Y_metadata=Y_metadata) W_f = W*f @@ -136,13 +135,12 @@ class Laplace(object): At the mode, compute the hessian and effective covariance matrix. returns: logZ : approximation to the marginal likelihood - woodbury_vector : variable required for calculating the approximation to the covariance matrix woodbury_inv : variable required for calculating the approximation to the covariance matrix dL_dthetaL : array of derivatives (1 x num_kernel_params) dL_dthetaL : array of derivatives (1 x num_likelihood_params) """ #At this point get the hessian matrix (or vector as W is diagonal) - W = -likelihood.d2logpdf_df2(f_hat, Y, extra_data=Y_metadata) + W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata) K_Wi_i, L, LiW12 = self._compute_B_statistics(K, W, likelihood.log_concave) @@ -151,11 +149,10 @@ class Laplace(object): Ki_W_i = K - C.T.dot(C) #Could this be wrong? #compute the log marginal - log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, extra_data=Y_metadata) - np.sum(np.log(np.diag(L))) + log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata) - np.sum(np.log(np.diag(L))) #Compute vival matrices for derivatives - dW_df = -likelihood.d3logpdf_df3(f_hat, Y, extra_data=Y_metadata) # -d3lik_d3fhat - woodbury_vector = likelihood.dlogpdf_df(f_hat, Y, extra_data=Y_metadata) + dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat dL_dfhat = -0.5*(np.diag(Ki_W_i)[:, None]*dW_df) #why isn't this -0.5? s2 in R&W p126 line 9. #BiK, _ = dpotrs(L, K, lower=1) #dL_dfhat = 0.5*np.diag(BiK)[:, None]*dW_df @@ -169,7 +166,7 @@ class Laplace(object): explicit_part = 0.5*(np.dot(Ki_f, Ki_f.T) - K_Wi_i) #Implicit - implicit_part = np.dot(woodbury_vector, dL_dfhat.T).dot(I_KW_i) + implicit_part = np.dot(Ki_f, dL_dfhat.T).dot(I_KW_i) dL_dK = explicit_part + implicit_part else: @@ -179,7 +176,7 @@ class Laplace(object): #compute dL_dthetaL# #################### if likelihood.size > 0 and not likelihood.is_fixed: - dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = likelihood._laplace_gradients(f_hat, Y, extra_data=Y_metadata) + dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = likelihood._laplace_gradients(f_hat, Y, Y_metadata=Y_metadata) num_params = likelihood.size # make space for one derivative for each likelihood parameter @@ -200,7 +197,7 @@ class Laplace(object): else: dL_dthetaL = np.zeros(likelihood.size) - return log_marginal, woodbury_vector, K_Wi_i, dL_dK, dL_dthetaL + return log_marginal, K_Wi_i, dL_dK, dL_dthetaL def _compute_B_statistics(self, K, W, log_concave): """ diff --git a/GPy/inference/latent_function_inference/posterior.py b/GPy/inference/latent_function_inference/posterior.py index b8fbcee4..309cb7e5 100644 --- a/GPy/inference/latent_function_inference/posterior.py +++ b/GPy/inference/latent_function_inference/posterior.py @@ -73,20 +73,37 @@ class Posterior(object): @property def mean(self): + """ + Posterior mean + $$ + K_{xx}v + v := \texttt{Woodbury vector} + $$ + """ if self._mean is None: self._mean = np.dot(self._K, self.woodbury_vector) return self._mean @property def covariance(self): + """ + Posterior covariance + $$ + K_{xx} - K_{xx}W_{xx}^{-1}K_{xx} + W_{xx} := \texttt{Woodbury inv} + $$ + """ if self._covariance is None: #LiK, _ = dtrtrs(self.woodbury_chol, self._K, lower=1) - self._covariance = self._K - np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T + self._covariance = self._K - (np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T).squeeze() #self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K) - return self._covariance.squeeze() + return self._covariance @property def precision(self): + """ + Inverse of posterior covariance + """ if self._precision is None: cov = np.atleast_3d(self.covariance) self._precision = np.zeros(cov.shape) # if one covariance per dimension @@ -96,8 +113,15 @@ class Posterior(object): @property def woodbury_chol(self): + """ + return $L_{W}$ where L is the lower triangular Cholesky decomposition of the Woodbury matrix + $$ + L_{W}L_{W}^{\top} = W^{-1} + W^{-1} := \texttt{Woodbury inv} + $$ + """ if self._woodbury_chol is None: - #compute woodbury chol from + #compute woodbury chol from if self._woodbury_inv is not None: winv = np.atleast_3d(self._woodbury_inv) self._woodbury_chol = np.zeros(winv.shape) @@ -121,6 +145,13 @@ class Posterior(object): @property def woodbury_inv(self): + """ + The inverse of the woodbury matrix, in the gaussian likelihood case it is defined as + $$ + (K_{xx} + \Sigma_{xx})^{-1} + \Sigma_{xx} := \texttt{Likelihood.variance / Approximate likelihood covariance} + $$ + """ if self._woodbury_inv is None: self._woodbury_inv, _ = dpotri(self.woodbury_chol, lower=1) #self._woodbury_inv, _ = dpotrs(self.woodbury_chol, np.eye(self.woodbury_chol.shape[0]), lower=1) @@ -129,17 +160,22 @@ class Posterior(object): @property def woodbury_vector(self): + """ + Woodbury vector in the gaussian likelihood case only is defined as + $$ + (K_{xx} + \Sigma)^{-1}Y + \Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance} + $$ + """ if self._woodbury_vector is None: self._woodbury_vector, _ = dpotrs(self.K_chol, self.mean) return self._woodbury_vector @property def K_chol(self): + """ + Cholesky of the prior covariance K + """ if self._K_chol is None: self._K_chol = jitchol(self._K) return self._K_chol - - - - - diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py index a7166c8d..ee2d6250 100644 --- a/GPy/inference/latent_function_inference/var_dtc.py +++ b/GPy/inference/latent_function_inference/var_dtc.py @@ -176,7 +176,6 @@ class VarDTC(object): #construct a posterior object post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm) - return post, log_marginal, grad_dict class VarDTCMissingData(object): @@ -365,7 +364,7 @@ class VarDTCMissingData(object): return post, log_marginal, grad_dict def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, psi1, het_noise, uncertain_inputs): - dL_dpsi0 = -0.5 * output_dim * (beta * np.ones([num_data, 1])).flatten() + dL_dpsi0 = -0.5 * output_dim * (beta[:,None] * np.ones([num_data, 1])).flatten() dL_dpsi1 = np.dot(VVT_factor, Cpsi1Vf.T) dL_dpsi2_beta = 0.5 * backsub_both_sides(Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi) if het_noise: diff --git a/GPy/inference/optimization/BayesOpt.py b/GPy/inference/optimization/BayesOpt.py new file mode 100644 index 00000000..2e54a23b --- /dev/null +++ b/GPy/inference/optimization/BayesOpt.py @@ -0,0 +1,63 @@ +import numpy as np +from scipy.stats import norm +import matplotlib.pyplot as plt + + +####### Preliminar BO with standad acquisition functions ############################### +# Types of BO +# MM: Maximum (or minimum) mean +# MPI: Maximum posterior improvement +# MUI: Maximum upper interval + +def BOacquisition(X,Y,model,type_bo="MPI",type_objective="max",par_mpi = 0,z_mui=1.96,plot=True,n_eval = 500): + + # Only works in dimension 1 + # Grid where the GP will be evaluated + X_star = np.linspace(min(X)-10,max(X)+10,n_eval) + X_star = X_star[:,None] + + # Posterior GP evaluated on the grid + fest = model.predict(X_star) + + # Calculate the acquisition function + ## IF Maximize + if type_objective == "max": + if type_bo == "MPI": # add others here + acqu = norm.cdf((fest[0]-(1+par_mpi)*max(fest[0])) / fest[1]) + acqu = acqu/(2*max(acqu)) + if type_bo == "MM": + acqu = fest[0]/max(fest[0]) + acqu = acqu/(2*max(acqu)) + if type_bo == "MUI": + acqu = fest[0]+z_mui*np.sqrt(fest[1]) + acqu = acqu/(2*max(acqu)) + optimal_loc = np.argmax(acqu) + x_new = X_star[optimal_loc] + + ## IF Minimize + if type_objective == "min": + if type_bo == "MPI": # add others here + acqu = 1-norm.cdf((fest[0]-(1+par_mpi)*min(fest[0])) / fest[1]) + acqu = acqu/(2*max(acqu)) + if type_bo == "MM": + acqu = 1-fest[0]/max(fest[0]) + acqu = acqu/(2*max(acqu)) + if type_bo == "MUI": + acqu = -fest[0]+z_mui*np.sqrt(fest[1]) + acqu = acqu/(2*max(acqu)) + optimal_loc = np.argmax(acqu) + x_new = X_star[optimal_loc] + + # Plot GP posterior, collected data and the acquisition function + if plot: + plt.plot(X,Y , 'p') + plt.title('Acquisition function') + model.plot() + plt.plot(X_star, acqu, 'r--') + + + # Return the point where we shoould take the new sample + return x_new + ############################################################### + + diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 5dd6e554..0e265a64 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -9,4 +9,6 @@ from _src.mlp import MLP from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52 from _src.independent_outputs import IndependentOutputs, Hierarchical from _src.coregionalize import Coregionalize -from _src.ssrbf import SSRBF +from _src.ssrbf import SSRBF # TODO: ZD: did you remove this? +from _src.ODE_UY import ODE_UY + diff --git a/GPy/kern/_src/ODE_UY.py b/GPy/kern/_src/ODE_UY.py new file mode 100644 index 00000000..510b4f7c --- /dev/null +++ b/GPy/kern/_src/ODE_UY.py @@ -0,0 +1,282 @@ +# Copyright (c) 2013, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +from kern import Kern +from ...core.parameterization import Param +from ...core.parameterization.transformations import Logexp +import numpy as np +from independent_outputs import index_to_slices + +class ODE_UY(Kern): + def __init__(self, input_dim, variance_U=3., variance_Y=1., lengthscale_U=1., lengthscale_Y=1., active_dims=None, name='ode_uy'): + assert input_dim ==2, "only defined for 2 input dims" + super(ODE_UY, self).__init__(input_dim, active_dims, name) + + self.variance_Y = Param('variance_Y', variance_Y, Logexp()) + self.variance_U = Param('variance_U', variance_Y, Logexp()) + self.lengthscale_Y = Param('lengthscale_Y', lengthscale_Y, Logexp()) + self.lengthscale_U = Param('lengthscale_U', lengthscale_Y, Logexp()) + + self.add_parameters(self.variance_Y, self.variance_U, self.lengthscale_Y, self.lengthscale_U) + + def K(self, X, X2=None): + # model : a * dy/dt + b * y = U + #lu=sqrt(3)/theta1 ly=1/theta2 theta2= a/b :thetay sigma2=1/(2ab) :sigmay + + X,slices = X[:,:-1],index_to_slices(X[:,-1]) + if X2 is None: + X2,slices2 = X,slices + K = np.zeros((X.shape[0], X.shape[0])) + else: + X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1]) + K = np.zeros((X.shape[0], X2.shape[0])) + + + #rdist = X[:,0][:,None] - X2[:,0][:,None].T + rdist = X - X2.T + ly=1/self.lengthscale_Y + lu=np.sqrt(3)/self.lengthscale_U + #iu=self.input_lengthU #dimention of U + Vu=self.variance_U + Vy=self.variance_Y + #Vy=ly/2 + #stop + + + # kernel for kuu matern3/2 + kuu = lambda dist:Vu * (1 + lu* np.abs(dist)) * np.exp(-lu * np.abs(dist)) + + # kernel for kyy + k1 = lambda dist:np.exp(-ly*np.abs(dist))*(2*lu+ly)/(lu+ly)**2 + k2 = lambda dist:(np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2 + k3 = lambda dist:np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 ) + kyy = lambda dist:Vu*Vy*(k1(dist) + k2(dist) + k3(dist)) + + + # cross covariance function + kyu3 = lambda dist:np.exp(-lu*dist)/(lu+ly)*(1+lu*(dist+1/(lu+ly))) + #kyu3 = lambda dist: 0 + + k1cros = lambda dist:np.exp(ly*dist)/(lu-ly) * ( 1- np.exp( (lu-ly)*dist) + lu* ( dist*np.exp( (lu-ly)*dist ) + (1- np.exp( (lu-ly)*dist ) ) /(lu-ly) ) ) + #k1cros = lambda dist:0 + + k2cros = lambda dist:np.exp(ly*dist)*( 1/(lu+ly) + lu/(lu+ly)**2 ) + #k2cros = lambda dist:0 + + Vyu=np.sqrt(Vy*ly*2) + + # cross covariance kuy + kuyp = lambda dist:Vu*Vyu*(kyu3(dist)) #t>0 kuy + kuyn = lambda dist:Vu*Vyu*(k1cros(dist)+k2cros(dist)) #t<0 kuy + # cross covariance kyu + kyup = lambda dist:Vu*Vyu*(k1cros(-dist)+k2cros(-dist)) #t>0 kyu + kyun = lambda dist:Vu*Vyu*(kyu3(-dist)) #t<0 kyu + + + for i, s1 in enumerate(slices): + for j, s2 in enumerate(slices2): + for ss1 in s1: + for ss2 in s2: + if i==0 and j==0: + K[ss1,ss2] = kuu(np.abs(rdist[ss1,ss2])) + elif i==0 and j==1: + #K[ss1,ss2]= np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[ss1,ss2]) ) ) + K[ss1,ss2]= np.where( rdist[ss1,ss2]>0 , kuyp(rdist[ss1,ss2]), kuyn(rdist[ss1,ss2] ) ) + elif i==1 and j==1: + K[ss1,ss2] = kyy(np.abs(rdist[ss1,ss2])) + else: + #K[ss1,ss2]= 0 + #K[ss1,ss2]= np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[ss1,ss2]) ) ) + K[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(rdist[ss1,ss2]), kyun(rdist[ss1,ss2] ) ) + return K + + + + def Kdiag(self, X): + """Compute the diagonal of the covariance matrix associated to X.""" + Kdiag = np.zeros(X.shape[0]) + ly=1/self.lengthscale_Y + lu=np.sqrt(3)/self.lengthscale_U + + Vu = self.variance_U + Vy=self.variance_Y + + k1 = (2*lu+ly)/(lu+ly)**2 + k2 = (ly-2*lu + 2*lu-ly ) / (ly-lu)**2 + k3 = 1/(lu+ly) + (lu)/(lu+ly)**2 + + slices = index_to_slices(X[:,-1]) + + for i, ss1 in enumerate(slices): + for s1 in ss1: + if i==0: + Kdiag[s1]+= self.variance_U + elif i==1: + Kdiag[s1]+= Vu*Vy*(k1+k2+k3) + else: + raise ValueError, "invalid input/output index" + #Kdiag[slices[0][0]]+= self.variance_U #matern32 diag + #Kdiag[slices[1][0]]+= self.variance_U*self.variance_Y*(k1+k2+k3) # diag + return Kdiag + + + def update_gradients_full(self, dL_dK, X, X2=None): + """derivative of the covariance matrix with respect to the parameters.""" + X,slices = X[:,:-1],index_to_slices(X[:,-1]) + if X2 is None: + X2,slices2 = X,slices + else: + X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1]) + #rdist = X[:,0][:,None] - X2[:,0][:,None].T + + rdist = X - X2.T + ly=1/self.lengthscale_Y + lu=np.sqrt(3)/self.lengthscale_U + + Vu=self.variance_U + Vy=self.variance_Y + Vyu = np.sqrt(Vy*ly*2) + dVdly = 0.5/np.sqrt(ly)*np.sqrt(2*Vy) + dVdVy = 0.5/np.sqrt(Vy)*np.sqrt(2*ly) + + rd=rdist.shape + dktheta1 = np.zeros(rd) + dktheta2 = np.zeros(rd) + dkUdvar = np.zeros(rd) + dkYdvar = np.zeros(rd) + + # dk dtheta for UU + UUdtheta1 = lambda dist: np.exp(-lu* dist)*dist + (-dist)*np.exp(-lu* dist)*(1+lu*dist) + UUdtheta2 = lambda dist: 0 + #UUdvar = lambda dist: (1 + lu*dist)*np.exp(-lu*dist) + UUdvar = lambda dist: (1 + lu* np.abs(dist)) * np.exp(-lu * np.abs(dist)) + + # dk dtheta for YY + + dk1theta1 = lambda dist: np.exp(-ly*dist)*2*(-lu)/(lu+ly)**3 + + dk2theta1 = lambda dist: (1.0)*( + np.exp(-lu*dist)*dist*(-ly+2*lu-lu*ly*dist+dist*lu**2)*(ly-lu)**(-2) + np.exp(-lu*dist)*(-2+ly*dist-2*dist*lu)*(ly-lu)**(-2) + +np.exp(-dist*lu)*(ly-2*lu+ly*lu*dist-dist*lu**2)*2*(ly-lu)**(-3) + +np.exp(-dist*ly)*2*(ly-lu)**(-2) + +np.exp(-dist*ly)*2*(2*lu-ly)*(ly-lu)**(-3) + ) + + dk3theta1 = lambda dist: np.exp(-dist*lu)*(lu+ly)**(-2)*((2*lu+ly+dist*lu**2+lu*ly*dist)*(-dist-2/(lu+ly))+2+2*lu*dist+ly*dist) + + #dktheta1 = lambda dist: self.variance_U*self.variance_Y*(dk1theta1+dk2theta1+dk3theta1) + + + + + dk1theta2 = lambda dist: np.exp(-ly*dist) * ((lu+ly)**(-2)) * ( (-dist)*(2*lu+ly) + 1 + (-2)*(2*lu+ly)/(lu+ly) ) + + dk2theta2 =lambda dist: 1*( + np.exp(-dist*lu)*(ly-lu)**(-2) * ( 1+lu*dist+(-2)*(ly-2*lu+lu*ly*dist-dist*lu**2)*(ly-lu)**(-1) ) + +np.exp(-dist*ly)*(ly-lu)**(-2) * ( (-dist)*(2*lu-ly) -1+(2*lu-ly)*(-2)*(ly-lu)**(-1) ) + ) + + dk3theta2 = lambda dist: np.exp(-dist*lu) * (-3*lu-ly-dist*lu**2-lu*ly*dist)/(lu+ly)**3 + + #dktheta2 = lambda dist: self.variance_U*self.variance_Y*(dk1theta2 + dk2theta2 +dk3theta2) + + # kyy kernel + + k1 = lambda dist: np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2 + k2 = lambda dist: (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2 + k3 = lambda dist: np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 ) + #dkdvar = k1+k2+k3 + + + + # cross covariance function + kyu3 = lambda dist:np.exp(-lu*dist)/(lu+ly)*(1+lu*(dist+1/(lu+ly))) + + k1cros = lambda dist:np.exp(ly*dist)/(lu-ly) * ( 1- np.exp( (lu-ly)*dist) + lu* ( dist*np.exp( (lu-ly)*dist ) + (1- np.exp( (lu-ly)*dist ) ) /(lu-ly) ) ) + + k2cros = lambda dist:np.exp(ly*dist)*( 1/(lu+ly) + lu/(lu+ly)**2 ) + # cross covariance kuy + kuyp = lambda dist:(kyu3(dist)) #t>0 kuy + kuyn = lambda dist:(k1cros(dist)+k2cros(dist)) #t<0 kuy + # cross covariance kyu + kyup = lambda dist:(k1cros(-dist)+k2cros(-dist)) #t>0 kyu + kyun = lambda dist:(kyu3(-dist)) #t<0 kyu + + # dk dtheta for UY + + + dkyu3dtheta2 = lambda dist: np.exp(-lu*dist) * ( (-1)*(lu+ly)**(-2)*(1+lu*dist+lu*(lu+ly)**(-1)) + (lu+ly)**(-1)*(-lu)*(lu+ly)**(-2) ) + dkyu3dtheta1 = lambda dist: np.exp(-lu*dist)*(lu+ly)**(-1)* ( (-dist)*(1+dist*lu+lu*(lu+ly)**(-1)) -\ + (lu+ly)**(-1)*(1+dist*lu+lu*(lu+ly)**(-1)) +dist+(lu+ly)**(-1)-lu*(lu+ly)**(-2) ) + + dkcros2dtheta1 = lambda dist: np.exp(ly*dist)* ( -(ly+lu)**(-2) + (ly+lu)**(-2) + (-2)*lu*(lu+ly)**(-3) ) + dkcros2dtheta2 = lambda dist: np.exp(ly*dist)*dist* ( (ly+lu)**(-1) + lu*(lu+ly)**(-2) ) + \ + np.exp(ly*dist)*( -(lu+ly)**(-2) + lu*(-2)*(lu+ly)**(-3) ) + + dkcros1dtheta1 = lambda dist: np.exp(ly*dist)*( -(lu-ly)**(-2)*( 1-np.exp((lu-ly)*dist) + lu*dist*np.exp((lu-ly)*dist)+ \ + lu*(1-np.exp((lu-ly)*dist))/(lu-ly) ) + (lu-ly)**(-1)*( -np.exp( (lu-ly)*dist )*dist + dist*np.exp( (lu-ly)*dist)+\ + lu*dist**2*np.exp((lu-ly)*dist)+(1-np.exp((lu-ly)*dist))/(lu-ly) - lu*np.exp((lu-ly)*dist)*dist/(lu-ly) -\ + lu*(1-np.exp((lu-ly)*dist))/(lu-ly)**2 ) ) + + dkcros1dtheta2 = lambda t: np.exp(ly*t)*t/(lu-ly)*( 1-np.exp((lu-ly)*t) +lu*t*np.exp((lu-ly)*t)+\ + lu*(1-np.exp((lu-ly)*t))/(lu-ly) )+\ + np.exp(ly*t)/(lu-ly)**2* ( 1-np.exp((lu-ly)*t) +lu*t*np.exp((lu-ly)*t) + lu*( 1-np.exp((lu-ly)*t) )/(lu-ly) )+\ + np.exp(ly*t)/(lu-ly)*( np.exp((lu-ly)*t)*t -lu*t*t*np.exp((lu-ly)*t) +lu*t*np.exp((lu-ly)*t)/(lu-ly)+\ + lu*( 1-np.exp((lu-ly)*t) )/(lu-ly)**2 ) + + dkuypdtheta1 = lambda dist:(dkyu3dtheta1(dist)) #t>0 kuy + dkuyndtheta1 = lambda dist:(dkcros1dtheta1(dist)+dkcros2dtheta1(dist)) #t<0 kuy + # cross covariance kyu + dkyupdtheta1 = lambda dist:(dkcros1dtheta1(-dist)+dkcros2dtheta1(-dist)) #t>0 kyu + dkyundtheta1 = lambda dist:(dkyu3dtheta1(-dist)) #t<0 kyu + + dkuypdtheta2 = lambda dist:(dkyu3dtheta2(dist)) #t>0 kuy + dkuyndtheta2 = lambda dist:(dkcros1dtheta2(dist)+dkcros2dtheta2(dist)) #t<0 kuy + # cross covariance kyu + dkyupdtheta2 = lambda dist:(dkcros1dtheta2(-dist)+dkcros2dtheta2(-dist)) #t>0 kyu + dkyundtheta2 = lambda dist:(dkyu3dtheta2(-dist)) #t<0 kyu + + + for i, s1 in enumerate(slices): + for j, s2 in enumerate(slices2): + for ss1 in s1: + for ss2 in s2: + if i==0 and j==0: + #target[ss1,ss2] = kuu(np.abs(rdist[ss1,ss2])) + dktheta1[ss1,ss2] = Vu*UUdtheta1(np.abs(rdist[ss1,ss2])) + dktheta2[ss1,ss2] = 0 + dkUdvar[ss1,ss2] = UUdvar(np.abs(rdist[ss1,ss2])) + dkYdvar[ss1,ss2] = 0 + elif i==0 and j==1: + ########target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) ) + #np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) ) + #dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , self.variance_U*self.variance_Y*dkcrtheta1(np.abs(rdist[ss1,ss2])) ,self.variance_U*self.variance_Y*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))) ) + #dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , self.variance_U*self.variance_Y*dkcrtheta2(np.abs(rdist[ss1,ss2])) ,self.variance_U*self.variance_Y*(dk1theta2(np.abs(rdist[ss1,ss2]))+dk2theta2(np.abs(rdist[ss1,ss2]))) ) + dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkuypdtheta1(rdist[ss1,ss2]),Vu*Vyu*dkuyndtheta1(rdist[ss1,ss2]) ) + dkUdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vyu*kuyp(rdist[ss1,ss2]), Vyu* kuyn(rdist[ss1,ss2]) ) + dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkuypdtheta2(rdist[ss1,ss2])+Vu*dVdly*kuyp(rdist[ss1,ss2]),Vu*Vyu*dkuyndtheta2(rdist[ss1,ss2])+Vu*dVdly*kuyn(rdist[ss1,ss2]) ) + dkYdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*dVdVy*kuyp(rdist[ss1,ss2]), Vu*dVdVy* kuyn(rdist[ss1,ss2]) ) + elif i==1 and j==1: + #target[ss1,ss2] = kyy(np.abs(rdist[ss1,ss2])) + dktheta1[ss1,ss2] = self.variance_U*self.variance_Y*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))+dk3theta1(np.abs(rdist[ss1,ss2]))) + dktheta2[ss1,ss2] = self.variance_U*self.variance_Y*(dk1theta2(np.abs(rdist[ss1,ss2])) + dk2theta2(np.abs(rdist[ss1,ss2])) +dk3theta2(np.abs(rdist[ss1,ss2]))) + dkUdvar[ss1,ss2] = self.variance_Y*(k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2]))+k3(np.abs(rdist[ss1,ss2])) ) + dkYdvar[ss1,ss2] = self.variance_U*(k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2]))+k3(np.abs(rdist[ss1,ss2])) ) + else: + #######target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[s1[0],s2[0]]) ) ) + #dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 ,self.variance_U*self.variance_Y*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))) , self.variance_U*self.variance_Y*dkcrtheta1(np.abs(rdist[ss1,ss2])) ) + #dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 ,self.variance_U*self.variance_Y*(dk1theta2(np.abs(rdist[ss1,ss2]))+dk2theta2(np.abs(rdist[ss1,ss2]))) , self.variance_U*self.variance_Y*dkcrtheta2(np.abs(rdist[ss1,ss2])) ) + dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkyupdtheta1(rdist[ss1,ss2]),Vu*Vyu*dkyundtheta1(rdist[ss1,ss2]) ) + dkUdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vyu*kyup(rdist[ss1,ss2]),Vyu*kyun(rdist[ss1,ss2])) + dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkyupdtheta2(rdist[ss1,ss2])+Vu*dVdly*kyup(rdist[ss1,ss2]),Vu*Vyu*dkyundtheta2(rdist[ss1,ss2])+Vu*dVdly*kyun(rdist[ss1,ss2]) ) + dkYdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*dVdVy*kyup(rdist[ss1,ss2]), Vu*dVdVy*kyun(rdist[ss1,ss2])) + + #stop + self.variance_U.gradient = np.sum(dkUdvar * dL_dK) # Vu + + self.variance_Y.gradient = np.sum(dkYdvar * dL_dK) # Vy + + self.lengthscale_U.gradient = np.sum(dktheta1*(-np.sqrt(3)*self.lengthscale_U**(-2))* dL_dK) #lu + + self.lengthscale_Y.gradient = np.sum(dktheta2*(-self.lengthscale_Y**(-2)) * dL_dK) #ly + diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index 97afd1f0..57e611ed 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -23,7 +23,7 @@ class Add(CombinationKernel): If a list of parts (of this kernel!) `which_parts` is given, only the parts of the list are taken to compute the covariance. """ - assert X.shape[1] == self.input_dim + assert X.shape[1] > max(np.r_[self.active_dims]) if which_parts is None: which_parts = self.parts elif not isinstance(which_parts, (list, tuple)): @@ -33,7 +33,7 @@ class Add(CombinationKernel): @Cache_this(limit=2, force_kwargs=['which_parts']) def Kdiag(self, X, which_parts=None): - assert X.shape[1] == self.input_dim + assert X.shape[1] > max(np.r_[self.active_dims]) if which_parts is None: which_parts = self.parts elif not isinstance(which_parts, (list, tuple)): @@ -58,7 +58,12 @@ class Add(CombinationKernel): :type X2: np.ndarray (num_inducing x input_dim)""" target = np.zeros(X.shape) - [target.__setitem__([Ellipsis, p.active_dims], target[:, p.active_dims]+p.gradients_X(dL_dK, X, X2)) for p in self.parts] + [target.__iadd__(p.gradients_X(dL_dK, X, X2)) for p in self.parts] + return target + + def gradients_X_diag(self, dL_dKdiag, X): + target = np.zeros(X.shape) + [target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts] return target def psi0(self, Z, variational_posterior): @@ -131,7 +136,7 @@ class Add(CombinationKernel): eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. else: eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2. - target[:, p1.active_dims] += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) + target += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) return target def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): @@ -151,8 +156,8 @@ class Add(CombinationKernel): else: eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2. a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) - target_mu[:, p1.active_dims] += a - target_S[:, p1.active_dims] += b + target_mu += a + target_S += b return target_mu, target_S def _getstate(self): @@ -165,4 +170,11 @@ class Add(CombinationKernel): def _setstate(self, state): super(Add, self)._setstate(state) - + def add(self, other, name='sum'): + if isinstance(other, Add): + other_params = other._parameters_[:] + for p in other_params: + other.remove_parameter(p) + self.add_parameters(*other_params) + else: self.add_parameter(other) + return self \ No newline at end of file diff --git a/GPy/kern/_src/independent_outputs.py b/GPy/kern/_src/independent_outputs.py index 85cb95bc..cf015d02 100644 --- a/GPy/kern/_src/independent_outputs.py +++ b/GPy/kern/_src/independent_outputs.py @@ -8,7 +8,7 @@ import itertools def index_to_slices(index): """ - take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index. + take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index. e.g. >>> index = np.asarray([0,0,0,1,1,1,2,2,2]) @@ -39,73 +39,102 @@ class IndependentOutputs(CombinationKernel): The index of the functions is given by the last column in the input X the rest of the columns of X are passed to the underlying kernel for computation (in blocks). - - Kern is wrapped with a slicer metaclass + + :param kernels: either a kernel, or list of kernels to work with. If it is a list of kernels + the indices in the index_dim, index the kernels you gave! """ - def __init__(self, kern, index_dim=-1, name='independ'): + def __init__(self, kernels, index_dim=-1, name='independ'): assert isinstance(index_dim, int), "IndependentOutputs kernel is only defined with one input dimension being the indeces" - super(IndependentOutputs, self).__init__(kernels=[kern], extra_dims=[index_dim], name=name) + if not isinstance(kernels, list): + self.single_kern = True + self.kern = kernels + kernels = [kernels] + else: + self.single_kern = False + self.kern = kernels + super(IndependentOutputs, self).__init__(kernels=kernels, extra_dims=[index_dim], name=name) self.index_dim = index_dim - self.kern = kern - #self.add_parameters(self.kern) + self.kerns = kernels if len(kernels) != 1 else itertools.repeat(kernels[0]) def K(self,X ,X2=None): slices = index_to_slices(X[:,self.index_dim]) if X2 is None: target = np.zeros((X.shape[0], X.shape[0])) - [[np.copyto(target[s,ss], self.kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for slices_i in slices] + [[target.__setitem__((s,ss), kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for kern, slices_i in zip(self.kerns, slices)] else: slices2 = index_to_slices(X2[:,self.index_dim]) target = np.zeros((X.shape[0], X2.shape[0])) - [[[np.copyto(target[s, s2], self.kern.K(X[s,:],X2[s2,:])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] + [[target.__setitem__((s,s2), kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(self.kerns, slices,slices2)] return target def Kdiag(self,X): slices = index_to_slices(X[:,self.index_dim]) target = np.zeros(X.shape[0]) - [[np.copyto(target[s], self.kern.Kdiag(X[s])) for s in slices_i] for slices_i in slices] + [[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(self.kerns, slices)] return target def update_gradients_full(self,dL_dK,X,X2=None): - target = np.zeros(self.kern.size) - def collate_grads(dL, X, X2): - self.kern.update_gradients_full(dL,X,X2) - target[:] += self.kern.gradient - slices = index_to_slices(X[:,self.index_dim]) + if self.single_kern: target = np.zeros(self.kern.size) + else: target = [np.zeros(kern.size) for kern, _ in zip(self.kerns, slices)] + def collate_grads(kern, i, dL, X, X2): + kern.update_gradients_full(dL,X,X2) + if self.single_kern: target[:] += kern.gradient + else: target[i][:] += kern.gradient if X2 is None: - [[collate_grads(dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for slices_i in slices] + [[collate_grads(kern, i, dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for i,(kern,slices_i) in enumerate(zip(self.kerns,slices))] else: slices2 = index_to_slices(X2[:,self.index_dim]) - [[[collate_grads(dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] - self.kern.gradient = target + [[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(self.kerns,slices,slices2))] + if self.single_kern: kern.gradient = target + else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(self.kerns, slices))] def gradients_X(self,dL_dK, X, X2=None): target = np.zeros(X.shape) - slices = index_to_slices(X[:,self.index_dim]) if X2 is None: - [[np.copyto(target[s,self.kern.active_dims], self.kern.gradients_X(dL_dK[s,ss],X[s],X[ss])) for s, ss in itertools.product(slices_i, slices_i)] for slices_i in slices] + # TODO: make use of index_to_slices + values = np.unique(X[:,self.index_dim]) + slices = [X[:,self.index_dim]==i for i in values] + [target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None)) + for kern, s in zip(self.kerns, slices)] + #slices = index_to_slices(X[:,self.index_dim]) + #[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s]) + # for s in slices_i] for kern, slices_i in zip(self.kerns, slices)] + #import ipdb;ipdb.set_trace() + #[[(np.add(target[s ], kern.gradients_X(dL_dK[s ,ss],X[s ], X[ss]), out=target[s ]), + # np.add(target[ss], kern.gradients_X(dL_dK[ss,s ],X[ss], X[s ]), out=target[ss])) + # for s, ss in itertools.combinations(slices_i, 2)] for kern, slices_i in zip(self.kerns, slices)] else: - slices2 = index_to_slices(X2[:,self.index_dim]) - [[[np.copyto(target[s,self.kern.active_dims], self.kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] + values = np.unique(X[:,self.index_dim]) + slices = [X[:,self.index_dim]==i for i in values] + slices2 = [X2[:,self.index_dim]==i for i in values] + [target.__setitem__(s, kern.gradients_X(dL_dK[s, :][:, s2],X[s],X2[s2])) + for kern, s, s2 in zip(self.kerns, slices, slices2)] + # TODO: make work with index_to_slices + #slices = index_to_slices(X[:,self.index_dim]) + #slices2 = index_to_slices(X2[:,self.index_dim]) + #[[target.__setitem__(s, target[s] + kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s, s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(self.kerns, slices,slices2)] return target def gradients_X_diag(self, dL_dKdiag, X): slices = index_to_slices(X[:,self.index_dim]) target = np.zeros(X.shape) - [[np.copyto(target[s,self.kern.active_dims], self.kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for slices_i in slices] + [[target.__setitem__(s, kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for kern, slices_i in zip(self.kerns, slices)] return target def update_gradients_diag(self, dL_dKdiag, X): - target = np.zeros(self.kern.size) - def collate_grads(dL, X): - self.kern.update_gradients_diag(dL,X) - target[:] += self.kern.gradient slices = index_to_slices(X[:,self.index_dim]) - [[collate_grads(dL_dKdiag[s], X[s,:]) for s in slices_i] for slices_i in slices] - self.kern.gradient = target + if self.single_kern: target = np.zeros(self.kern.size) + else: target = [np.zeros(kern.size) for kern, _ in zip(self.kerns, slices)] + def collate_grads(kern, i, dL, X): + kern.update_gradients_diag(dL,X) + if self.single_kern: target[:] += kern.gradient + else: target[i][:] += kern.gradient + [[collate_grads(kern, i, dL_dKdiag[s], X[s,:]) for s in slices_i] for i, (kern, slices_i) in enumerate(zip(self.kerns, slices))] + if self.single_kern: kern.gradient = target + else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(self.kerns, slices))] -class Hierarchical(Kern): +class Hierarchical(CombinationKernel): """ A kernel which can reopresent a simple hierarchical model. @@ -116,7 +145,7 @@ class Hierarchical(Kern): The index of the functions is given by additional columns in the input X. """ - def __init__(self, kerns, name='hierarchy'): + def __init__(self, kern, name='hierarchy'): assert all([k.input_dim==kerns[0].input_dim for k in kerns]) super(Hierarchical, self).__init__(kerns[0].input_dim + len(kerns) - 1, name) self.kerns = kerns diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index 5924d250..31fa8690 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -140,12 +140,7 @@ class Kern(Parameterized): """ assert isinstance(other, Kern), "only kernels can be added to kernels..." from add import Add - kernels = [] - if isinstance(self, Add): kernels.extend(self._parameters_) - else: kernels.append(self) - if isinstance(other, Add): kernels.extend(other._parameters_) - else: kernels.append(other) - return Add(kernels, name=name) + return Add([self, other], name=name) def __mul__(self, other): """ Here we overload the '*' operator. See self.prod for more information""" diff --git a/GPy/kern/_src/kernel_slice_operations.py b/GPy/kern/_src/kernel_slice_operations.py index ff33cc24..c355ccad 100644 --- a/GPy/kern/_src/kernel_slice_operations.py +++ b/GPy/kern/_src/kernel_slice_operations.py @@ -4,6 +4,7 @@ Created on 11 Mar 2014 @author: maxz ''' from ...core.parameterization.parameterized import ParametersChangedMeta +import numpy as np class KernCallsViaSlicerMeta(ParametersChangedMeta): def __call__(self, *args, **kw): @@ -12,18 +13,18 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta): instance.Kdiag = _slice_wrapper(instance, instance.Kdiag, diag=True) instance.update_gradients_full = _slice_wrapper(instance, instance.update_gradients_full, diag=False, derivative=True) instance.update_gradients_diag = _slice_wrapper(instance, instance.update_gradients_diag, diag=True, derivative=True) - instance.gradients_X = _slice_wrapper(instance, instance.gradients_X, diag=False, derivative=True) - instance.gradients_X_diag = _slice_wrapper(instance, instance.gradients_X_diag, diag=True, derivative=True) + instance.gradients_X = _slice_wrapper(instance, instance.gradients_X, diag=False, derivative=True, ret_X=True) + instance.gradients_X_diag = _slice_wrapper(instance, instance.gradients_X_diag, diag=True, derivative=True, ret_X=True) instance.psi0 = _slice_wrapper(instance, instance.psi0, diag=False, derivative=False) instance.psi1 = _slice_wrapper(instance, instance.psi1, diag=False, derivative=False) instance.psi2 = _slice_wrapper(instance, instance.psi2, diag=False, derivative=False) instance.update_gradients_expectations = _slice_wrapper(instance, instance.update_gradients_expectations, derivative=True, psi_stat=True) - instance.gradients_Z_expectations = _slice_wrapper(instance, instance.gradients_Z_expectations, derivative=True, psi_stat_Z=True) - instance.gradients_qX_expectations = _slice_wrapper(instance, instance.gradients_qX_expectations, derivative=True, psi_stat=True) + instance.gradients_Z_expectations = _slice_wrapper(instance, instance.gradients_Z_expectations, derivative=True, psi_stat_Z=True, ret_X=True) + instance.gradients_qX_expectations = _slice_wrapper(instance, instance.gradients_qX_expectations, derivative=True, psi_stat=True, ret_X=True) instance.parameters_changed() return instance -def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False, psi_stat_Z=False): +def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False, psi_stat_Z=False, ret_X=False): """ This method wraps the functions in kernel to make sure all kernels allways see their respective input dimension. The different switches are: @@ -34,11 +35,16 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False """ if derivative: if diag: - def x_slice_wrapper(dL_dK, X): + def x_slice_wrapper(dL_dKdiag, X): + ret_X_not_sliced = ret_X and kern._sliced_X == 0 + if ret_X_not_sliced: + ret = np.zeros(X.shape) X = kern._slice_X(X) if not kern._sliced_X else X + # if the return value is of shape X.shape, we need to make sure to return the right shape kern._sliced_X += 1 try: - ret = operation(dL_dK, X) + if ret_X_not_sliced: ret[:, kern.active_dims] = operation(dL_dKdiag, X) + else: ret = operation(dL_dKdiag, X) except: raise finally: @@ -46,10 +52,22 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False return ret elif psi_stat: def x_slice_wrapper(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + ret_X_not_sliced = ret_X and kern._sliced_X == 0 + if ret_X_not_sliced: + ret1, ret2 = np.zeros(variational_posterior.shape), np.zeros(variational_posterior.shape) Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior kern._sliced_X += 1 + # if the return value is of shape X.shape, we need to make sure to return the right shape try: - ret = operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior) + if ret_X_not_sliced: + ret = list(operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)) + r2 = ret[:2] + ret[0] = ret1 + ret[1] = ret2 + ret[0][:, kern.active_dims] = r2[0] + ret[1][:, kern.active_dims] = r2[1] + del r2 + else: ret = operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior) except: raise finally: @@ -57,10 +75,14 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False return ret elif psi_stat_Z: def x_slice_wrapper(dL_dpsi1, dL_dpsi2, Z, variational_posterior): + ret_X_not_sliced = ret_X and kern._sliced_X == 0 + if ret_X_not_sliced: ret = np.zeros(Z.shape) Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior kern._sliced_X += 1 try: - ret = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior) + if ret_X_not_sliced: + ret[:, kern.active_dims] = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior) + else: ret = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior) except: raise finally: @@ -68,10 +90,14 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False return ret else: def x_slice_wrapper(dL_dK, X, X2=None): + ret_X_not_sliced = ret_X and kern._sliced_X == 0 + if ret_X_not_sliced: + ret = np.zeros(X.shape) X, X2 = kern._slice_X(X) if not kern._sliced_X else X, kern._slice_X(X2) if X2 is not None and not kern._sliced_X else X2 kern._sliced_X += 1 try: - ret = operation(dL_dK, X, X2) + if ret_X_not_sliced: ret[:, kern.active_dims] = operation(dL_dK, X, X2) + else: ret = operation(dL_dK, X, X2) except: raise finally: diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py index 15e23d5c..7d9eeac2 100644 --- a/GPy/kern/_src/linear.py +++ b/GPy/kern/_src/linear.py @@ -312,5 +312,4 @@ class Linear(Kern): return np.dot(ZA, inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x num_data x input_dim]! def input_sensitivity(self): - if self.ARD: return self.variances - else: return self.variances.repeat(self.input_dim) + return np.ones(self.input_dim) * self.variances diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py index f3b2b50f..e00f38c3 100644 --- a/GPy/kern/_src/prod.py +++ b/GPy/kern/_src/prod.py @@ -51,15 +51,15 @@ class Prod(CombinationKernel): def gradients_X(self, dL_dK, X, X2=None): target = np.zeros(X.shape) for k1,k2 in itertools.combinations(self.parts, 2): - target[:,k1.active_dims] += k1.gradients_X(dL_dK*k2.K(X, X2), X, X2) - target[:,k2.active_dims] += k2.gradients_X(dL_dK*k1.K(X, X2), X, X2) + target += k1.gradients_X(dL_dK*k2.K(X, X2), X, X2) + target += k2.gradients_X(dL_dK*k1.K(X, X2), X, X2) return target def gradients_X_diag(self, dL_dKdiag, X): target = np.zeros(X.shape) for k1,k2 in itertools.combinations(self.parts, 2): - target[:,k1.active_dims] += k1.gradients_X(dL_dKdiag*k2.Kdiag(X), X) - target[:,k2.active_dims] += k2.gradients_X(dL_dKdiag*k1.Kdiag(X), X) + target += k1.gradients_X(dL_dKdiag*k2.Kdiag(X), X) + target += k2.gradients_X(dL_dKdiag*k1.Kdiag(X), X) return target diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py index a9e837a9..b6fea5ef 100644 --- a/GPy/kern/_src/stationary.py +++ b/GPy/kern/_src/stationary.py @@ -152,12 +152,7 @@ class Stationary(Kern): This term appears in derviatives. """ dist = self._scaled_dist(X, X2).copy() - if X2 is None: - nondiag = util.diag.offdiag_view(dist) - nondiag[:] = 1./nondiag - return dist - else: - return 1./np.where(dist != 0., dist, np.inf) + return 1./np.where(dist != 0., dist, np.inf) def gradients_X(self, dL_dK, X, X2=None): """ diff --git a/GPy/likelihoods/bernoulli.py b/GPy/likelihoods/bernoulli.py index 6c22e3d1..371fbe63 100644 --- a/GPy/likelihoods/bernoulli.py +++ b/GPy/likelihoods/bernoulli.py @@ -95,7 +95,7 @@ class Bernoulli(Likelihood): else: return np.nan - def pdf_link(self, link_f, y, extra_data=None): + def pdf_link(self, link_f, y, Y_metadata=None): """ Likelihood function given link(f) @@ -106,7 +106,7 @@ class Bernoulli(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data not used in bernoulli + :param Y_metadata: Y_metadata not used in bernoulli :returns: likelihood evaluated for this point :rtype: float @@ -118,7 +118,7 @@ class Bernoulli(Likelihood): objective = np.where(y, link_f, 1.-link_f) return np.exp(np.sum(np.log(objective))) - def logpdf_link(self, link_f, y, extra_data=None): + def logpdf_link(self, link_f, y, Y_metadata=None): """ Log Likelihood function given link(f) @@ -129,7 +129,7 @@ class Bernoulli(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data not used in bernoulli + :param Y_metadata: Y_metadata not used in bernoulli :returns: log likelihood evaluated at points link(f) :rtype: float """ @@ -140,7 +140,7 @@ class Bernoulli(Likelihood): np.seterr(**state) return np.sum(objective) - def dlogpdf_dlink(self, link_f, y, extra_data=None): + def dlogpdf_dlink(self, link_f, y, Y_metadata=None): """ Gradient of the pdf at y, given link(f) w.r.t link(f) @@ -151,7 +151,7 @@ class Bernoulli(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data not used in bernoulli + :param Y_metadata: Y_metadata not used in bernoulli :returns: gradient of log likelihood evaluated at points link(f) :rtype: Nx1 array """ @@ -162,7 +162,7 @@ class Bernoulli(Likelihood): np.seterr(**state) return grad - def d2logpdf_dlink2(self, link_f, y, extra_data=None): + def d2logpdf_dlink2(self, link_f, y, Y_metadata=None): """ Hessian at y, given link_f, w.r.t link_f the hessian will be 0 unless i == j i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j) @@ -175,7 +175,7 @@ class Bernoulli(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data not used in bernoulli + :param Y_metadata: Y_metadata not used in bernoulli :returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f)) :rtype: Nx1 array @@ -190,7 +190,7 @@ class Bernoulli(Likelihood): np.seterr(**state) return d2logpdf_dlink2 - def d3logpdf_dlink3(self, link_f, y, extra_data=None): + def d3logpdf_dlink3(self, link_f, y, Y_metadata=None): """ Third order derivative log-likelihood function at y given link(f) w.r.t link(f) @@ -201,7 +201,7 @@ class Bernoulli(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data not used in bernoulli + :param Y_metadata: Y_metadata not used in bernoulli :returns: third derivative of log likelihood evaluated at points link(f) :rtype: Nx1 array """ diff --git a/GPy/likelihoods/exponential.py b/GPy/likelihoods/exponential.py index 8d2e8cdc..1dd548f6 100644 --- a/GPy/likelihoods/exponential.py +++ b/GPy/likelihoods/exponential.py @@ -18,13 +18,12 @@ class Exponential(Likelihood): L(x) = \exp(\lambda) * \lambda**Y_i / Y_i! $$ """ - def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False): - super(Exponential, self).__init__(gp_link,analytical_mean,analytical_variance) + def __init__(self,gp_link=None): + if gp_link is None: + gp_link = link_functions.Log() + super(Exponential, self).__init__(gp_link, 'ExpLikelihood') - def _preprocess_values(self,Y): - return Y - - def pdf_link(self, link_f, y, extra_data=None): + def pdf_link(self, link_f, y, Y_metadata=None): """ Likelihood function given link(f) @@ -35,16 +34,15 @@ class Exponential(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in exponential distribution + :param Y_metadata: Y_metadata which is not used in exponential distribution :returns: likelihood evaluated for this point :rtype: float """ assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape log_objective = link_f*np.exp(-y*link_f) return np.exp(np.sum(np.log(log_objective))) - #return np.exp(np.sum(-y/link_f - np.log(link_f) )) - def logpdf_link(self, link_f, y, extra_data=None): + def logpdf_link(self, link_f, y, Y_metadata=None): """ Log Likelihood Function given link(f) @@ -55,17 +53,16 @@ class Exponential(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in exponential distribution + :param Y_metadata: Y_metadata which is not used in exponential distribution :returns: likelihood evaluated for this point :rtype: float """ assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape log_objective = np.log(link_f) - y*link_f - #logpdf_link = np.sum(-np.log(link_f) - y/link_f) return np.sum(log_objective) - def dlogpdf_dlink(self, link_f, y, extra_data=None): + def dlogpdf_dlink(self, link_f, y, Y_metadata=None): """ Gradient of the log likelihood function at y, given link(f) w.r.t link(f) @@ -76,7 +73,7 @@ class Exponential(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in exponential distribution + :param Y_metadata: Y_metadata which is not used in exponential distribution :returns: gradient of likelihood evaluated at points :rtype: Nx1 array @@ -86,7 +83,7 @@ class Exponential(Likelihood): #grad = y/(link_f**2) - 1./link_f return grad - def d2logpdf_dlink2(self, link_f, y, extra_data=None): + def d2logpdf_dlink2(self, link_f, y, Y_metadata=None): """ Hessian at y, given link(f), w.r.t link(f) i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j) @@ -99,7 +96,7 @@ class Exponential(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in exponential distribution + :param Y_metadata: Y_metadata which is not used in exponential distribution :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f) :rtype: Nx1 array @@ -112,7 +109,7 @@ class Exponential(Likelihood): #hess = -2*y/(link_f**3) + 1/(link_f**2) return hess - def d3logpdf_dlink3(self, link_f, y, extra_data=None): + def d3logpdf_dlink3(self, link_f, y, Y_metadata=None): """ Third order derivative log-likelihood function at y given link(f) w.r.t link(f) @@ -123,7 +120,7 @@ class Exponential(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in exponential distribution + :param Y_metadata: Y_metadata which is not used in exponential distribution :returns: third derivative of likelihood evaluated at points f :rtype: Nx1 array """ @@ -132,18 +129,6 @@ class Exponential(Likelihood): #d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3) return d3lik_dlink3 - def _mean(self,gp): - """ - Mass (or density) function - """ - return self.gp_link.transf(gp) - - def _variance(self,gp): - """ - Mass (or density) function - """ - return self.gp_link.transf(gp)**2 - def samples(self, gp): """ Returns a set of samples of observations based on a given value of the latent variable. diff --git a/GPy/likelihoods/gamma.py b/GPy/likelihoods/gamma.py index 0ac70a9f..a6436616 100644 --- a/GPy/likelihoods/gamma.py +++ b/GPy/likelihoods/gamma.py @@ -1,11 +1,12 @@ -# Copyright (c) 2012, 2013 Ricardo Andrade +# Copyright (c) 2012 - 2014, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as np from scipy import stats,special import scipy as sp -from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf +from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf +from ..core.parameterization import Param import link_functions from likelihood import Likelihood @@ -18,14 +19,16 @@ class Gamma(Likelihood): \\alpha_{i} = \\beta y_{i} """ - def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,beta=1.): - self.beta = beta - super(Gamma, self).__init__(gp_link,analytical_mean,analytical_variance) + def __init__(self,gp_link=None,beta=1.): + if gp_link is None: + gp_link = link_functions.Log() + super(Gamma, self).__init__(gp_link, 'Gamma') - def _preprocess_values(self,Y): - return Y + self.beta = Param('beta', beta) + self.add_parameter(self.beta) + self.beta.fix()#TODO: gradients! - def pdf_link(self, link_f, y, extra_data=None): + def pdf_link(self, link_f, y, Y_metadata=None): """ Likelihood function given link(f) @@ -37,7 +40,7 @@ class Gamma(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in poisson distribution + :param Y_metadata: Y_metadata which is not used in poisson distribution :returns: likelihood evaluated for this point :rtype: float """ @@ -47,7 +50,7 @@ class Gamma(Likelihood): objective = (y**(alpha - 1.) * np.exp(-self.beta*y) * self.beta**alpha)/ special.gamma(alpha) return np.exp(np.sum(np.log(objective))) - def logpdf_link(self, link_f, y, extra_data=None): + def logpdf_link(self, link_f, y, Y_metadata=None): """ Log Likelihood Function given link(f) @@ -59,7 +62,7 @@ class Gamma(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in poisson distribution + :param Y_metadata: Y_metadata which is not used in poisson distribution :returns: likelihood evaluated for this point :rtype: float @@ -71,7 +74,7 @@ class Gamma(Likelihood): log_objective = alpha*np.log(self.beta) - np.log(special.gamma(alpha)) + (alpha - 1)*np.log(y) - self.beta*y return np.sum(log_objective) - def dlogpdf_dlink(self, link_f, y, extra_data=None): + def dlogpdf_dlink(self, link_f, y, Y_metadata=None): """ Gradient of the log likelihood function at y, given link(f) w.r.t link(f) @@ -83,7 +86,7 @@ class Gamma(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in gamma distribution + :param Y_metadata: Y_metadata which is not used in gamma distribution :returns: gradient of likelihood evaluated at points :rtype: Nx1 array @@ -94,7 +97,7 @@ class Gamma(Likelihood): #return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta return grad - def d2logpdf_dlink2(self, link_f, y, extra_data=None): + def d2logpdf_dlink2(self, link_f, y, Y_metadata=None): """ Hessian at y, given link(f), w.r.t link(f) i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j) @@ -108,7 +111,7 @@ class Gamma(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in gamma distribution + :param Y_metadata: Y_metadata which is not used in gamma distribution :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f) :rtype: Nx1 array @@ -122,7 +125,7 @@ class Gamma(Likelihood): #return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta return hess - def d3logpdf_dlink3(self, link_f, y, extra_data=None): + def d3logpdf_dlink3(self, link_f, y, Y_metadata=None): """ Third order derivative log-likelihood function at y given link(f) w.r.t link(f) @@ -134,22 +137,10 @@ class Gamma(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in gamma distribution + :param Y_metadata: Y_metadata which is not used in gamma distribution :returns: third derivative of likelihood evaluated at points f :rtype: Nx1 array """ assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape d3lik_dlink3 = -special.polygamma(2, self.beta*link_f)*(self.beta**3) return d3lik_dlink3 - - def _mean(self,gp): - """ - Mass (or density) function - """ - return self.gp_link.transf(gp) - - def _variance(self,gp): - """ - Mass (or density) function - """ - return self.gp_link.transf(gp)/self.beta diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py index 0c73e485..6f08b4b4 100644 --- a/GPy/likelihoods/gaussian.py +++ b/GPy/likelihoods/gaussian.py @@ -35,12 +35,7 @@ class Gaussian(Likelihood): if gp_link is None: gp_link = link_functions.Identity() - if isinstance(gp_link, link_functions.Identity): - analytical_variance = True - analytical_mean = True - else: - analytical_variance = False - analytical_mean = False + assert isinstance(gp_link, link_functions.Identity), "the likelihood only implemented for the identity link" super(Gaussian, self).__init__(gp_link, name=name) @@ -51,14 +46,12 @@ class Gaussian(Likelihood): self.log_concave = True def betaY(self,Y,Y_metadata=None): + #TODO: ~Ricardo this does not live here return Y/self.gaussian_variance(Y_metadata) def gaussian_variance(self, Y_metadata=None): return self.variance - def covariance_matrix(self, Y, Y_metadata=None): - return np.eye(Y.shape[0]) * self.variance - def update_gradients(self, grad): self.variance.gradient = grad @@ -99,10 +92,10 @@ class Gaussian(Likelihood): def predictive_variance(self, mu, sigma, predictive_mean=None): return self.variance + sigma**2 - def predictive_quantiles(self, mu, var, quantiles, Y_metadata): - return [stats.norm.ppf(q/100.)*np.sqrt(var) + mu for q in quantiles] + def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None): + return [stats.norm.ppf(q/100.)*np.sqrt(var + self.variance) + mu for q in quantiles] - def pdf_link(self, link_f, y, extra_data=None): + def pdf_link(self, link_f, y, Y_metadata=None): """ Likelihood function given link(f) @@ -113,14 +106,14 @@ class Gaussian(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data not used in gaussian + :param Y_metadata: Y_metadata not used in gaussian :returns: likelihood evaluated for this point :rtype: float """ #Assumes no covariance, exp, sum, log for numerical stability return np.exp(np.sum(np.log(stats.norm.pdf(y, link_f, np.sqrt(self.variance))))) - def logpdf_link(self, link_f, y, extra_data=None): + def logpdf_link(self, link_f, y, Y_metadata=None): """ Log likelihood function given link(f) @@ -131,7 +124,7 @@ class Gaussian(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data not used in gaussian + :param Y_metadata: Y_metadata not used in gaussian :returns: log likelihood evaluated for this point :rtype: float """ @@ -141,7 +134,7 @@ class Gaussian(Likelihood): return -0.5*(np.sum((y-link_f)**2/self.variance) + ln_det_cov + N*np.log(2.*np.pi)) - def dlogpdf_dlink(self, link_f, y, extra_data=None): + def dlogpdf_dlink(self, link_f, y, Y_metadata=None): """ Gradient of the pdf at y, given link(f) w.r.t link(f) @@ -152,7 +145,7 @@ class Gaussian(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data not used in gaussian + :param Y_metadata: Y_metadata not used in gaussian :returns: gradient of log likelihood evaluated at points link(f) :rtype: Nx1 array """ @@ -161,7 +154,7 @@ class Gaussian(Likelihood): grad = s2_i*y - s2_i*link_f return grad - def d2logpdf_dlink2(self, link_f, y, extra_data=None): + def d2logpdf_dlink2(self, link_f, y, Y_metadata=None): """ Hessian at y, given link_f, w.r.t link_f. i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j) @@ -175,7 +168,7 @@ class Gaussian(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data not used in gaussian + :param Y_metadata: Y_metadata not used in gaussian :returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f)) :rtype: Nx1 array @@ -188,7 +181,7 @@ class Gaussian(Likelihood): hess = -(1.0/self.variance)*np.ones((N, 1)) return hess - def d3logpdf_dlink3(self, link_f, y, extra_data=None): + def d3logpdf_dlink3(self, link_f, y, Y_metadata=None): """ Third order derivative log-likelihood function at y given link(f) w.r.t link(f) @@ -199,7 +192,7 @@ class Gaussian(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data not used in gaussian + :param Y_metadata: Y_metadata not used in gaussian :returns: third derivative of log likelihood evaluated at points link(f) :rtype: Nx1 array """ @@ -208,7 +201,7 @@ class Gaussian(Likelihood): d3logpdf_dlink3 = np.zeros((N,1)) return d3logpdf_dlink3 - def dlogpdf_link_dvar(self, link_f, y, extra_data=None): + def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None): """ Gradient of the log-likelihood function at y given link(f), w.r.t variance parameter (noise_variance) @@ -219,7 +212,7 @@ class Gaussian(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data not used in gaussian + :param Y_metadata: Y_metadata not used in gaussian :returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter :rtype: float """ @@ -230,7 +223,7 @@ class Gaussian(Likelihood): dlik_dsigma = -0.5*N/self.variance + 0.5*s_4*np.sum(np.square(e)) return np.sum(dlik_dsigma) # Sure about this sum? - def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None): + def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None): """ Derivative of the dlogpdf_dlink w.r.t variance parameter (noise_variance) @@ -241,7 +234,7 @@ class Gaussian(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data not used in gaussian + :param Y_metadata: Y_metadata not used in gaussian :returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter :rtype: Nx1 array """ @@ -250,7 +243,7 @@ class Gaussian(Likelihood): dlik_grad_dsigma = -s_4*y + s_4*link_f return dlik_grad_dsigma - def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None): + def d2logpdf_dlink2_dvar(self, link_f, y, Y_metadata=None): """ Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (noise_variance) @@ -261,7 +254,7 @@ class Gaussian(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data not used in gaussian + :param Y_metadata: Y_metadata not used in gaussian :returns: derivative of log hessian evaluated at points link(f_i) and link(f_j) w.r.t variance parameter :rtype: Nx1 array """ @@ -271,16 +264,16 @@ class Gaussian(Likelihood): d2logpdf_dlink2_dvar = np.ones((N,1))*s_4 return d2logpdf_dlink2_dvar - def dlogpdf_link_dtheta(self, f, y, extra_data=None): - dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data) + def dlogpdf_link_dtheta(self, f, y, Y_metadata=None): + dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata) return np.asarray([[dlogpdf_dvar]]) - def dlogpdf_dlink_dtheta(self, f, y, extra_data=None): - dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data) + def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None): + dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata) return dlogpdf_dlink_dvar - def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None): - d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data) + def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None): + d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata) return d2logpdf_dlink2_dvar def _mean(self, gp): diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py index e7bad74a..aabe93ef 100644 --- a/GPy/likelihoods/likelihood.py +++ b/GPy/likelihoods/likelihood.py @@ -153,6 +153,10 @@ class Likelihood(Parameterized): return mean + def _conditional_mean(self, f): + """Quadrature calculation of the conditional mean: E(Y_star|f)""" + raise NotImplementedError, "implement this function to make predictions" + def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None): """ Numerical approximation to the predictive variance: V(Y_star) @@ -204,31 +208,31 @@ class Likelihood(Parameterized): # V(Y_star) = E[ V(Y_star|f_star) ] + E(Y_star**2|f_star) - E[Y_star|f_star]**2 return exp_var + var_exp - def pdf_link(self, link_f, y, extra_data=None): + def pdf_link(self, link_f, y, Y_metadata=None): raise NotImplementedError - def logpdf_link(self, link_f, y, extra_data=None): + def logpdf_link(self, link_f, y, Y_metadata=None): raise NotImplementedError - def dlogpdf_dlink(self, link_f, y, extra_data=None): + def dlogpdf_dlink(self, link_f, y, Y_metadata=None): raise NotImplementedError - def d2logpdf_dlink2(self, link_f, y, extra_data=None): + def d2logpdf_dlink2(self, link_f, y, Y_metadata=None): raise NotImplementedError - def d3logpdf_dlink3(self, link_f, y, extra_data=None): + def d3logpdf_dlink3(self, link_f, y, Y_metadata=None): raise NotImplementedError - def dlogpdf_link_dtheta(self, link_f, y, extra_data=None): + def dlogpdf_link_dtheta(self, link_f, y, Y_metadata=None): raise NotImplementedError - def dlogpdf_dlink_dtheta(self, link_f, y, extra_data=None): + def dlogpdf_dlink_dtheta(self, link_f, y, Y_metadata=None): raise NotImplementedError - def d2logpdf_dlink2_dtheta(self, link_f, y, extra_data=None): + def d2logpdf_dlink2_dtheta(self, link_f, y, Y_metadata=None): raise NotImplementedError - def pdf(self, f, y, extra_data=None): + def pdf(self, f, y, Y_metadata=None): """ Evaluates the link function link(f) then computes the likelihood (pdf) using it @@ -239,14 +243,14 @@ class Likelihood(Parameterized): :type f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in student t distribution - not used + :param Y_metadata: Y_metadata which is not used in student t distribution - not used :returns: likelihood evaluated for this point :rtype: float """ link_f = self.gp_link.transf(f) - return self.pdf_link(link_f, y, extra_data=extra_data) + return self.pdf_link(link_f, y, Y_metadata=Y_metadata) - def logpdf(self, f, y, extra_data=None): + def logpdf(self, f, y, Y_metadata=None): """ Evaluates the link function link(f) then computes the log likelihood (log pdf) using it @@ -257,14 +261,14 @@ class Likelihood(Parameterized): :type f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in student t distribution - not used + :param Y_metadata: Y_metadata which is not used in student t distribution - not used :returns: log likelihood evaluated for this point :rtype: float """ link_f = self.gp_link.transf(f) - return self.logpdf_link(link_f, y, extra_data=extra_data) + return self.logpdf_link(link_f, y, Y_metadata=Y_metadata) - def dlogpdf_df(self, f, y, extra_data=None): + def dlogpdf_df(self, f, y, Y_metadata=None): """ Evaluates the link function link(f) then computes the derivative of log likelihood using it Uses the Faa di Bruno's formula for the chain rule @@ -276,16 +280,16 @@ class Likelihood(Parameterized): :type f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in student t distribution - not used + :param Y_metadata: Y_metadata which is not used in student t distribution - not used :returns: derivative of log likelihood evaluated for this point :rtype: 1xN array """ link_f = self.gp_link.transf(f) - dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data) + dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata) dlink_df = self.gp_link.dtransf_df(f) return chain_1(dlogpdf_dlink, dlink_df) - def d2logpdf_df2(self, f, y, extra_data=None): + def d2logpdf_df2(self, f, y, Y_metadata=None): """ Evaluates the link function link(f) then computes the second derivative of log likelihood using it Uses the Faa di Bruno's formula for the chain rule @@ -297,18 +301,18 @@ class Likelihood(Parameterized): :type f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in student t distribution - not used + :param Y_metadata: Y_metadata which is not used in student t distribution - not used :returns: second derivative of log likelihood evaluated for this point (diagonal only) :rtype: 1xN array """ link_f = self.gp_link.transf(f) - d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, extra_data=extra_data) + d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, Y_metadata=Y_metadata) dlink_df = self.gp_link.dtransf_df(f) - dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data) + dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata) d2link_df2 = self.gp_link.d2transf_df2(f) return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2) - def d3logpdf_df3(self, f, y, extra_data=None): + def d3logpdf_df3(self, f, y, Y_metadata=None): """ Evaluates the link function link(f) then computes the third derivative of log likelihood using it Uses the Faa di Bruno's formula for the chain rule @@ -320,44 +324,44 @@ class Likelihood(Parameterized): :type f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in student t distribution - not used + :param Y_metadata: Y_metadata which is not used in student t distribution - not used :returns: third derivative of log likelihood evaluated for this point :rtype: float """ link_f = self.gp_link.transf(f) - d3logpdf_dlink3 = self.d3logpdf_dlink3(link_f, y, extra_data=extra_data) + d3logpdf_dlink3 = self.d3logpdf_dlink3(link_f, y, Y_metadata=Y_metadata) dlink_df = self.gp_link.dtransf_df(f) - d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, extra_data=extra_data) + d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, Y_metadata=Y_metadata) d2link_df2 = self.gp_link.d2transf_df2(f) - dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data) + dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata) d3link_df3 = self.gp_link.d3transf_df3(f) return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3) - def dlogpdf_dtheta(self, f, y, extra_data=None): + def dlogpdf_dtheta(self, f, y, Y_metadata=None): """ TODO: Doc strings """ if self.size > 0: link_f = self.gp_link.transf(f) - return self.dlogpdf_link_dtheta(link_f, y, extra_data=extra_data) + return self.dlogpdf_link_dtheta(link_f, y, Y_metadata=Y_metadata) else: #Is no parameters so return an empty array for its derivatives return np.zeros([1, 0]) - def dlogpdf_df_dtheta(self, f, y, extra_data=None): + def dlogpdf_df_dtheta(self, f, y, Y_metadata=None): """ TODO: Doc strings """ if self.size > 0: link_f = self.gp_link.transf(f) dlink_df = self.gp_link.dtransf_df(f) - dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data) + dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, Y_metadata=Y_metadata) return chain_1(dlogpdf_dlink_dtheta, dlink_df) else: #Is no parameters so return an empty array for its derivatives return np.zeros([f.shape[0], 0]) - def d2logpdf_df2_dtheta(self, f, y, extra_data=None): + def d2logpdf_df2_dtheta(self, f, y, Y_metadata=None): """ TODO: Doc strings """ @@ -365,17 +369,17 @@ class Likelihood(Parameterized): link_f = self.gp_link.transf(f) dlink_df = self.gp_link.dtransf_df(f) d2link_df2 = self.gp_link.d2transf_df2(f) - d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, extra_data=extra_data) - dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data) + d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, Y_metadata=Y_metadata) + dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, Y_metadata=Y_metadata) return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2) else: #Is no parameters so return an empty array for its derivatives return np.zeros([f.shape[0], 0]) - def _laplace_gradients(self, f, y, extra_data=None): - dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, extra_data=extra_data) - dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, extra_data=extra_data) - d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, extra_data=extra_data) + def _laplace_gradients(self, f, y, Y_metadata=None): + dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata) + dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, Y_metadata=Y_metadata) + d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, Y_metadata=Y_metadata) #Parameters are stacked vertically. Must be listed in same order as 'get_param_names' # ensure we have gradients for every parameter we want to optimize @@ -390,7 +394,7 @@ class Likelihood(Parameterized): def predictive_values(self, mu, var, full_cov=False, Y_metadata=None): """ - Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction. + Compute mean, variance of the predictive distibution. :param mu: mean of the latent variable, f, of posterior :param var: variance of the latent variable, f, of posterior @@ -407,10 +411,7 @@ class Likelihood(Parameterized): #compute the quantiles by sampling!!! N_samp = 1000 s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu - #ss_f = s.flatten() - #ss_y = self.samples(ss_f, Y_metadata) ss_y = self.samples(s, Y_metadata) - #ss_y = ss_y.reshape(mu.shape[0], N_samp) return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles] diff --git a/GPy/likelihoods/mixed_noise.py b/GPy/likelihoods/mixed_noise.py index b4960f3a..c2435508 100644 --- a/GPy/likelihoods/mixed_noise.py +++ b/GPy/likelihoods/mixed_noise.py @@ -11,7 +11,7 @@ import itertools class MixedNoise(Likelihood): def __init__(self, likelihoods_list, name='mixed_noise'): - + #NOTE at the moment this likelihood only works for using a list of gaussians super(Likelihood, self).__init__(name=name) self.add_parameters(*likelihoods_list) @@ -24,10 +24,11 @@ class MixedNoise(Likelihood): variance = np.zeros(ind.size) for lik, j in zip(self.likelihoods_list, range(len(self.likelihoods_list))): variance[ind==j] = lik.variance - return variance[:,None] + return variance def betaY(self,Y,Y_metadata): - return Y/self.gaussian_variance(Y_metadata=Y_metadata) + #TODO not here. + return Y/self.gaussian_variance(Y_metadata=Y_metadata)[:,None] def update_gradients(self, gradients): self.gradient = gradients @@ -38,34 +39,27 @@ class MixedNoise(Likelihood): return np.array([dL_dKdiag[ind==i].sum() for i in range(len(self.likelihoods_list))]) def predictive_values(self, mu, var, full_cov=False, Y_metadata=None): - if all([isinstance(l, Gaussian) for l in self.likelihoods_list]): - ind = Y_metadata['output_index'].flatten() - _variance = np.array([self.likelihoods_list[j].variance for j in ind ]) - if full_cov: - var += np.eye(var.shape[0])*_variance - else: - var += _variance - return mu, var + ind = Y_metadata['output_index'].flatten() + _variance = np.array([self.likelihoods_list[j].variance for j in ind ]) + if full_cov: + var += np.eye(var.shape[0])*_variance else: - raise NotImplementedError + var += _variance + return mu, var - def predictive_variance(self, mu, sigma, **other_shit): - if isinstance(noise_index,int): - _variance = self.variance[noise_index] - else: - _variance = np.array([ self.variance[j] for j in noise_index ])[:,None] + def predictive_variance(self, mu, sigma, Y_metadata): + _variance = self.gaussian_variance(Y_metadata) return _variance + sigma**2 - - def covariance_matrix(self, Y, Y_metadata): - #assert all([isinstance(l, Gaussian) for l in self.likelihoods_list]) - #ind = Y_metadata['output_index'].flatten() - #variance = np.zeros(Y.shape[0]) - #for lik, j in zip(self.likelihoods_list, range(len(self.likelihoods_list))): - # variance[ind==j] = lik.variance - #return np.diag(variance) - return np.diag(self.gaussian_variance(Y_metadata).flatten()) - + def predictive_quantiles(self, mu, var, quantiles, Y_metadata): + ind = Y_metadata['output_index'].flatten() + outputs = np.unique(ind) + Q = np.zeros( (mu.size,len(quantiles)) ) + for j in outputs: + q = self.likelihoods_list[j].predictive_quantiles(mu[ind==j,:], + var[ind==j,:],quantiles,Y_metadata=None) + Q[ind==j,:] = np.hstack(q) + return [q[:,None] for q in Q.T] def samples(self, gp, Y_metadata): """ @@ -84,4 +78,3 @@ class MixedNoise(Likelihood): _ysim = np.array([np.random.normal(lik.gp_link.transf(gpj), scale=np.sqrt(lik.variance), size=1) for gpj in gp_filtered.flatten()]) Ysim[flt,:] = _ysim.reshape(n1,N2) return Ysim - diff --git a/GPy/likelihoods/poisson.py b/GPy/likelihoods/poisson.py index c0e2c81f..862f873f 100644 --- a/GPy/likelihoods/poisson.py +++ b/GPy/likelihoods/poisson.py @@ -25,10 +25,13 @@ class Poisson(Likelihood): super(Poisson, self).__init__(gp_link, name='Poisson') - def _preprocess_values(self,Y): - return Y + def _conditional_mean(self, f): + """ + the expected value of y given a value of f + """ + return self.gp_link.transf(gp) - def pdf_link(self, link_f, y, extra_data=None): + def pdf_link(self, link_f, y, Y_metadata=None): """ Likelihood function given link(f) @@ -39,14 +42,14 @@ class Poisson(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in poisson distribution + :param Y_metadata: Y_metadata which is not used in poisson distribution :returns: likelihood evaluated for this point :rtype: float """ assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape return np.prod(stats.poisson.pmf(y,link_f)) - def logpdf_link(self, link_f, y, extra_data=None): + def logpdf_link(self, link_f, y, Y_metadata=None): """ Log Likelihood Function given link(f) @@ -57,7 +60,7 @@ class Poisson(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in poisson distribution + :param Y_metadata: Y_metadata which is not used in poisson distribution :returns: likelihood evaluated for this point :rtype: float @@ -65,7 +68,7 @@ class Poisson(Likelihood): assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape return np.sum(-link_f + y*np.log(link_f) - special.gammaln(y+1)) - def dlogpdf_dlink(self, link_f, y, extra_data=None): + def dlogpdf_dlink(self, link_f, y, Y_metadata=None): """ Gradient of the log likelihood function at y, given link(f) w.r.t link(f) @@ -76,7 +79,7 @@ class Poisson(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in poisson distribution + :param Y_metadata: Y_metadata which is not used in poisson distribution :returns: gradient of likelihood evaluated at points :rtype: Nx1 array @@ -84,7 +87,7 @@ class Poisson(Likelihood): assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape return y/link_f - 1 - def d2logpdf_dlink2(self, link_f, y, extra_data=None): + def d2logpdf_dlink2(self, link_f, y, Y_metadata=None): """ Hessian at y, given link(f), w.r.t link(f) i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j) @@ -97,7 +100,7 @@ class Poisson(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in poisson distribution + :param Y_metadata: Y_metadata which is not used in poisson distribution :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f) :rtype: Nx1 array @@ -112,7 +115,7 @@ class Poisson(Likelihood): #transf = self.gp_link.transf(gp) #return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df - def d3logpdf_dlink3(self, link_f, y, extra_data=None): + def d3logpdf_dlink3(self, link_f, y, Y_metadata=None): """ Third order derivative log-likelihood function at y given link(f) w.r.t link(f) @@ -123,7 +126,7 @@ class Poisson(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in poisson distribution + :param Y_metadata: Y_metadata which is not used in poisson distribution :returns: third derivative of likelihood evaluated at points f :rtype: Nx1 array """ diff --git a/GPy/likelihoods/student_t.py b/GPy/likelihoods/student_t.py index 15fd9fa0..47efd443 100644 --- a/GPy/likelihoods/student_t.py +++ b/GPy/likelihoods/student_t.py @@ -46,7 +46,7 @@ class StudentT(Likelihood): self.sigma2.gradient = grads[0] self.v.gradient = grads[1] - def pdf_link(self, link_f, y, extra_data=None): + def pdf_link(self, link_f, y, Y_metadata=None): """ Likelihood function given link(f) @@ -57,7 +57,7 @@ class StudentT(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in student t distribution + :param Y_metadata: Y_metadata which is not used in student t distribution :returns: likelihood evaluated for this point :rtype: float """ @@ -70,7 +70,7 @@ class StudentT(Likelihood): ) return np.prod(objective) - def logpdf_link(self, link_f, y, extra_data=None): + def logpdf_link(self, link_f, y, Y_metadata=None): """ Log Likelihood Function given link(f) @@ -81,7 +81,7 @@ class StudentT(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in student t distribution + :param Y_metadata: Y_metadata which is not used in student t distribution :returns: likelihood evaluated for this point :rtype: float @@ -99,7 +99,7 @@ class StudentT(Likelihood): ) return np.sum(objective) - def dlogpdf_dlink(self, link_f, y, extra_data=None): + def dlogpdf_dlink(self, link_f, y, Y_metadata=None): """ Gradient of the log likelihood function at y, given link(f) w.r.t link(f) @@ -110,7 +110,7 @@ class StudentT(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in student t distribution + :param Y_metadata: Y_metadata which is not used in student t distribution :returns: gradient of likelihood evaluated at points :rtype: Nx1 array @@ -120,7 +120,7 @@ class StudentT(Likelihood): grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2)) return grad - def d2logpdf_dlink2(self, link_f, y, extra_data=None): + def d2logpdf_dlink2(self, link_f, y, Y_metadata=None): """ Hessian at y, given link(f), w.r.t link(f) i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j) @@ -133,7 +133,7 @@ class StudentT(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in student t distribution + :param Y_metadata: Y_metadata which is not used in student t distribution :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f) :rtype: Nx1 array @@ -146,7 +146,7 @@ class StudentT(Likelihood): hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2) return hess - def d3logpdf_dlink3(self, link_f, y, extra_data=None): + def d3logpdf_dlink3(self, link_f, y, Y_metadata=None): """ Third order derivative log-likelihood function at y given link(f) w.r.t link(f) @@ -157,7 +157,7 @@ class StudentT(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in student t distribution + :param Y_metadata: Y_metadata which is not used in student t distribution :returns: third derivative of likelihood evaluated at points f :rtype: Nx1 array """ @@ -168,7 +168,7 @@ class StudentT(Likelihood): ) return d3lik_dlink3 - def dlogpdf_link_dvar(self, link_f, y, extra_data=None): + def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None): """ Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise) @@ -179,7 +179,7 @@ class StudentT(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in student t distribution + :param Y_metadata: Y_metadata which is not used in student t distribution :returns: derivative of likelihood evaluated at points f w.r.t variance parameter :rtype: float """ @@ -188,7 +188,7 @@ class StudentT(Likelihood): dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2)) return np.sum(dlogpdf_dvar) - def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None): + def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None): """ Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise) @@ -199,7 +199,7 @@ class StudentT(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in student t distribution + :param Y_metadata: Y_metadata which is not used in student t distribution :returns: derivative of likelihood evaluated at points f w.r.t variance parameter :rtype: Nx1 array """ @@ -208,7 +208,7 @@ class StudentT(Likelihood): dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2) return dlogpdf_dlink_dvar - def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None): + def d2logpdf_dlink2_dvar(self, link_f, y, Y_metadata=None): """ Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise) @@ -219,7 +219,7 @@ class StudentT(Likelihood): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in student t distribution + :param Y_metadata: Y_metadata which is not used in student t distribution :returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter :rtype: Nx1 array """ @@ -230,25 +230,22 @@ class StudentT(Likelihood): ) return d2logpdf_dlink2_dvar - def dlogpdf_link_dtheta(self, f, y, extra_data=None): - dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data) + def dlogpdf_link_dtheta(self, f, y, Y_metadata=None): + dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata) dlogpdf_dv = np.zeros_like(dlogpdf_dvar) #FIXME: Not done yet return np.hstack((dlogpdf_dvar, dlogpdf_dv)) - def dlogpdf_dlink_dtheta(self, f, y, extra_data=None): - dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data) + def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None): + dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata) dlogpdf_dlink_dv = np.zeros_like(dlogpdf_dlink_dvar) #FIXME: Not done yet return np.hstack((dlogpdf_dlink_dvar, dlogpdf_dlink_dv)) - def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None): - d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data) + def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None): + d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata) d2logpdf_dlink2_dv = np.zeros_like(d2logpdf_dlink2_dvar) #FIXME: Not done yet return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv)) def predictive_mean(self, mu, sigma, Y_metadata=None): - """ - Compute mean of the prediction - """ return self.gp_link.transf(mu) # only true in link is monotoci, which it is. def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None): diff --git a/GPy/mappings/linear.py b/GPy/mappings/linear.py index 5846903d..075b8556 100644 --- a/GPy/mappings/linear.py +++ b/GPy/mappings/linear.py @@ -3,6 +3,7 @@ import numpy as np from ..core.mapping import Mapping +from ..core.parameterization import Param class Linear(Mapping): """ @@ -16,38 +17,22 @@ class Linear(Mapping): :type X: ndarray :param output_dim: dimension of output. :type output_dim: int - + """ - def __init__(self, input_dim=1, output_dim=1): - self.name = 'linear' - Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim) - self.num_params = self.output_dim*(self.input_dim + 1) - self.W = np.array((self.input_dim, self.output_dim)) - self.bias = np.array(self.output_dim) - self.randomize() - - def _get_param_names(self): - return sum([['W_%i_%i' % (n, d) for d in range(self.output_dim)] for n in range(self.input_dim)], []) + ['bias_%i' % d for d in range(self.output_dim)] - - def _get_params(self): - return np.hstack((self.W.flatten(), self.bias)) - - def _set_params(self, x): - self.W = x[:self.input_dim * self.output_dim].reshape(self.input_dim, self.output_dim).copy() - self.bias = x[self.input_dim*self.output_dim:].copy() - def randomize(self): - self.W = np.random.randn(self.input_dim, self.output_dim)/np.sqrt(self.input_dim + 1) - self.bias = np.random.randn(self.output_dim)/np.sqrt(self.input_dim + 1) + def __init__(self, input_dim=1, output_dim=1, name='linear_map'): + Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name) + self.W = Param('W',np.array((self.input_dim, self.output_dim))) + self.bias = Param('bias',np.array(self.output_dim)) + self.add_parameters(self.W, self.bias) def f(self, X): return np.dot(X,self.W) + self.bias def df_dtheta(self, dL_df, X): - self._df_dW = (dL_df[:, :, None]*X[:, None, :]).sum(0).T - self._df_dbias = (dL_df.sum(0)) - return np.hstack((self._df_dW.flatten(), self._df_dbias)) - - def df_dX(self, dL_df, X): - return (dL_df[:, None, :]*self.W[None, :, :]).sum(2) - + df_dW = (dL_df[:, :, None]*X[:, None, :]).sum(0).T + df_dbias = (dL_df.sum(0)) + return np.hstack((df_dW.flatten(), df_dbias)) + + def dL_dX(self, dL_df, X): + return (dL_df[:, None, :]*self.W[None, :, :]).sum(2) diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index 4c6b51a3..fb821d64 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -75,15 +75,19 @@ class BayesianGPLVM(SparseGP): # update for the KL divergence self.variational_prior.update_gradients_KL(self.X) - def plot_latent(self, plot_inducing=True, *args, **kwargs): - """ - See GPy.plotting.matplot_dep.dim_reduction_plots.plot_latent - """ + def plot_latent(self, labels=None, which_indices=None, + resolution=50, ax=None, marker='o', s=40, + fignum=None, plot_inducing=True, legend=True, + plot_limits=None, + aspect='auto', updates=False, **kwargs): import sys assert "matplotlib" in sys.modules, "matplotlib package has not been imported." from ..plotting.matplot_dep import dim_reduction_plots - return dim_reduction_plots.plot_latent(self, plot_inducing=plot_inducing, *args, **kwargs) + return dim_reduction_plots.plot_latent(self, labels, which_indices, + resolution, ax, marker, s, + fignum, plot_inducing, legend, + plot_limits, aspect, updates, **kwargs) def do_test_latents(self, Y): """ diff --git a/GPy/models/gp_coregionalized_regression.py b/GPy/models/gp_coregionalized_regression.py index 6d478fd9..5854fe63 100644 --- a/GPy/models/gp_coregionalized_regression.py +++ b/GPy/models/gp_coregionalized_regression.py @@ -36,7 +36,7 @@ class GPCoregionalizedRegression(GP): #Kernel if kernel is None: - kernel = util.multioutput.ICM(input_dim=X.shape[1]-1, num_outputs=Ny, kernel=GPy.kern.rbf(X.shape[1]-1), W_rank=1,name=kernel_name) + kernel = util.multioutput.ICM(input_dim=X.shape[1]-1, num_outputs=Ny, kernel=kern.RBF(X.shape[1]-1), W_rank=1,name=kernel_name) #Likelihood likelihood = util.multioutput.build_likelihood(Y_list,self.output_index,likelihoods_list) diff --git a/GPy/models/gp_regression.py b/GPy/models/gp_regression.py index 5e83db09..86e64a54 100644 --- a/GPy/models/gp_regression.py +++ b/GPy/models/gp_regression.py @@ -20,14 +20,14 @@ class GPRegression(GP): """ - def __init__(self, X, Y, kernel=None): + def __init__(self, X, Y, kernel=None, Y_metadata=None): if kernel is None: kernel = kern.RBF(X.shape[1]) likelihood = likelihoods.Gaussian() - super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression') + super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata) def _getstate(self): return GP._getstate(self) diff --git a/GPy/models/gplvm.py b/GPy/models/gplvm.py index 5f7e3265..b85540ce 100644 --- a/GPy/models/gplvm.py +++ b/GPy/models/gplvm.py @@ -67,12 +67,22 @@ class GPLVM(GP): assert self.likelihood.Y.shape[1] == 2 pb.scatter(self.likelihood.Y[:, 0], self.likelihood.Y[:, 1], 40, self.X[:, 0].copy(), linewidth=0, cmap=pb.cm.jet) # @UndefinedVariable Xnew = np.linspace(self.X.min(), self.X.max(), 200)[:, None] - mu, var, upper, lower = self.predict(Xnew) + mu, _ = self.predict(Xnew) pb.plot(mu[:, 0], mu[:, 1], 'k', linewidth=1.5) - def plot_latent(self, *args, **kwargs): + def plot_latent(self, labels=None, which_indices=None, + resolution=50, ax=None, marker='o', s=40, + fignum=None, legend=True, + plot_limits=None, + aspect='auto', updates=False, **kwargs): + import sys + assert "matplotlib" in sys.modules, "matplotlib package has not been imported." from ..plotting.matplot_dep import dim_reduction_plots - return dim_reduction_plots.plot_latent(self, *args, **kwargs) + return dim_reduction_plots.plot_latent(self, labels, which_indices, + resolution, ax, marker, s, + fignum, False, legend, + plot_limits, aspect, updates, **kwargs) + def plot_magnification(self, *args, **kwargs): return util.plot_latent.plot_magnification(self, *args, **kwargs) diff --git a/GPy/models/sparse_gp_coregionalized_regression.py b/GPy/models/sparse_gp_coregionalized_regression.py index a97696d2..ea6ec9f1 100644 --- a/GPy/models/sparse_gp_coregionalized_regression.py +++ b/GPy/models/sparse_gp_coregionalized_regression.py @@ -43,14 +43,14 @@ class SparseGPCoregionalizedRegression(SparseGP): #Kernel if kernel is None: - kernel = util.multioutput.ICM(input_dim=X.shape[1]-1, num_outputs=Ny, kernel=GPy.kern.rbf(X.shape[1]-1), W_rank=1,name=kernel_name) + kernel = util.multioutput.ICM(input_dim=X.shape[1]-1, num_outputs=Ny, kernel=kern.RBF(X.shape[1]-1), W_rank=1,name=kernel_name) #Likelihood likelihood = util.multioutput.build_likelihood(Y_list,self.output_index,likelihoods_list) #Inducing inputs list if len(Z_list): - assert len(Z_list) == self.output_dim, 'Number of outputs do not match length of inducing inputs list.' + assert len(Z_list) == Ny, 'Number of outputs do not match length of inducing inputs list.' else: if isinstance(num_inducing,np.int): num_inducing = [num_inducing] * Ny diff --git a/GPy/testing/old_tests/bcgplvm_tests.py b/GPy/old_tests/bcgplvm_tests.py similarity index 100% rename from GPy/testing/old_tests/bcgplvm_tests.py rename to GPy/old_tests/bcgplvm_tests.py diff --git a/GPy/testing/old_tests/cgd_tests.py b/GPy/old_tests/cgd_tests.py similarity index 100% rename from GPy/testing/old_tests/cgd_tests.py rename to GPy/old_tests/cgd_tests.py diff --git a/GPy/testing/gp_transformation_tests.py b/GPy/old_tests/gp_transformation_tests.py similarity index 100% rename from GPy/testing/gp_transformation_tests.py rename to GPy/old_tests/gp_transformation_tests.py diff --git a/GPy/testing/gplvm_tests.py b/GPy/old_tests/gplvm_tests.py similarity index 100% rename from GPy/testing/gplvm_tests.py rename to GPy/old_tests/gplvm_tests.py diff --git a/GPy/testing/mapping_tests.py b/GPy/old_tests/mapping_tests.py similarity index 96% rename from GPy/testing/mapping_tests.py rename to GPy/old_tests/mapping_tests.py index cd28e71a..d501df1d 100644 --- a/GPy/testing/mapping_tests.py +++ b/GPy/old_tests/mapping_tests.py @@ -4,7 +4,7 @@ import unittest import numpy as np import GPy - + class MappingTests(unittest.TestCase): @@ -23,12 +23,11 @@ class MappingTests(unittest.TestCase): def test_mlpmapping(self): verbose = False - mapping = GPy.mappings.MLP(input_dim=2, hidden_dim=[3, 4, 8, 2], output_dim=2) + mapping = GPy.mappings.MLP(input_dim=2, hidden_dim=[3, 4, 8, 2], output_dim=2) self.assertTrue(GPy.core.Mapping_check_df_dtheta(mapping=mapping).checkgrad(verbose=verbose)) self.assertTrue(GPy.core.Mapping_check_df_dX(mapping=mapping).checkgrad(verbose=verbose)) - if __name__ == "__main__": print "Running unit tests, please be (very) patient..." unittest.main() diff --git a/GPy/testing/psi_stat_expectation_tests.py b/GPy/old_tests/psi_stat_expectation_tests.py similarity index 100% rename from GPy/testing/psi_stat_expectation_tests.py rename to GPy/old_tests/psi_stat_expectation_tests.py diff --git a/GPy/testing/psi_stat_gradient_tests.py b/GPy/old_tests/psi_stat_gradient_tests.py similarity index 100% rename from GPy/testing/psi_stat_gradient_tests.py rename to GPy/old_tests/psi_stat_gradient_tests.py diff --git a/GPy/testing/sparse_gplvm_tests.py b/GPy/old_tests/sparse_gplvm_tests.py similarity index 100% rename from GPy/testing/sparse_gplvm_tests.py rename to GPy/old_tests/sparse_gplvm_tests.py diff --git a/GPy/plotting/matplot_dep/dim_reduction_plots.py b/GPy/plotting/matplot_dep/dim_reduction_plots.py index bf9297b9..57d932cc 100644 --- a/GPy/plotting/matplot_dep/dim_reduction_plots.py +++ b/GPy/plotting/matplot_dep/dim_reduction_plots.py @@ -30,7 +30,8 @@ def most_significant_input_dimensions(model, which_indices): def plot_latent(model, labels=None, which_indices=None, resolution=50, ax=None, marker='o', s=40, fignum=None, plot_inducing=False, legend=True, - aspect='auto', updates=False): + plot_limits=None, + aspect='auto', updates=False, **kwargs): """ :param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc) :param resolution: the resolution of the grid on which to evaluate the predictive variance @@ -38,6 +39,8 @@ def plot_latent(model, labels=None, which_indices=None, if ax is None: fig = pb.figure(num=fignum) ax = fig.add_subplot(111) + else: + fig = ax.figure Tango.reset() if labels is None: @@ -57,15 +60,28 @@ def plot_latent(model, labels=None, which_indices=None, def plot_function(x): Xtest_full = np.zeros((x.shape[0], model.X.shape[1])) Xtest_full[:, [input_1, input_2]] = x - mu, var, low, up = model.predict(Xtest_full) + _, var = model.predict(Xtest_full) var = var[:, :1] return np.log(var) #Create an IMshow controller that can re-plot the latent space shading at a good resolution + if plot_limits is None: + xmin, ymin = X[:, [input_1, input_2]].min(0) + xmax, ymax = X[:, [input_1, input_2]].max(0) + x_r, y_r = xmax-xmin, ymax-ymin + xmin -= .1*x_r + xmax += .1*x_r + ymin -= .1*y_r + ymax += .1*y_r + else: + try: + xmin, xmax, ymin, ymax = plot_limits + except (TypeError, ValueError) as e: + raise e.__class__, "Wrong plot limits: {} given -> need (xmin, xmax, ymin, ymax)".format(plot_limits) view = ImshowController(ax, plot_function, - tuple(X[:, [input_1, input_2]].min(0)) + tuple(X[:, [input_1, input_2]].max(0)), + (xmin, ymin, xmax, ymax), resolution, aspect=aspect, interpolation='bilinear', - cmap=pb.cm.binary) + cmap=pb.cm.binary, **kwargs) # make sure labels are in order of input: ulabels = [] @@ -99,18 +115,31 @@ def plot_latent(model, labels=None, which_indices=None, if not np.all(labels == 1.) and legend: ax.legend(loc=0, numpoints=1) - #ax.set_xlim(xmin[0], xmax[0]) - #ax.set_ylim(xmin[1], xmax[1]) ax.grid(b=False) # remove the grid if present, it doesn't look good ax.set_aspect('auto') # set a nice aspect ratio if plot_inducing: Z = param_to_array(model.Z) ax.plot(Z[:, input_1], Z[:, input_2], '^w') + + ax.set_xlim((xmin, xmax)) + ax.set_ylim((ymin, ymax)) + try: + fig.canvas.draw() + fig.tight_layout() + fig.canvas.draw() + except Exception as e: + print "Could not invoke tight layout: {}".format(e) + pass + if updates: - ax.figure.canvas.show() + try: + ax.figure.canvas.show() + except Exception as e: + print "Could not invoke show: {}".format(e) raw_input('Enter to continue') + view.deactivate() return ax def plot_magnification(model, labels=None, which_indices=None, @@ -186,7 +215,7 @@ def plot_magnification(model, labels=None, which_indices=None, ax.plot(model.Z[:, input_1], model.Z[:, input_2], '^w') if updates: - ax.figure.canvas.show() + fig.canvas.show() raw_input('Enter to continue') pb.title('Magnification Factor') diff --git a/GPy/plotting/matplot_dep/latent_space_visualizations/controllers/axis_event_controller.py b/GPy/plotting/matplot_dep/latent_space_visualizations/controllers/axis_event_controller.py index d5aaefd2..62b622c5 100644 --- a/GPy/plotting/matplot_dep/latent_space_visualizations/controllers/axis_event_controller.py +++ b/GPy/plotting/matplot_dep/latent_space_visualizations/controllers/axis_event_controller.py @@ -33,7 +33,7 @@ class AxisChangedController(AxisEventController): Constructor ''' super(AxisChangedController, self).__init__(ax) - self._lim_ratio_threshold = update_lim or .8 + self._lim_ratio_threshold = update_lim or .95 self._x_lim = self.ax.get_xlim() self._y_lim = self.ax.get_ylim() @@ -80,6 +80,10 @@ class AxisChangedController(AxisEventController): class BufferedAxisChangedController(AxisChangedController): def __init__(self, ax, plot_function, plot_limits, resolution=50, update_lim=None, **kwargs): """ + Buffered axis changed controller. Controls the buffer and handles update events for when the axes changed. + + Updated plotting will be after first reload (first time will be within plot limits, after that the limits will be buffered) + :param plot_function: function to use for creating image for plotting (return ndarray-like) plot_function gets called with (2D!) Xtest grid if replotting required @@ -91,11 +95,13 @@ class BufferedAxisChangedController(AxisChangedController): """ super(BufferedAxisChangedController, self).__init__(ax, update_lim=update_lim) self.plot_function = plot_function - xmin, xmax = self._x_lim # self._compute_buffered(*self._x_lim) - ymin, ymax = self._y_lim # self._compute_buffered(*self._y_lim) + xmin, ymin, xmax, ymax = plot_limits#self._x_lim # self._compute_buffered(*self._x_lim) + # imshow acts on the limits of the plot, this is why we need to override the limits here, to make sure the right plot limits are used: + self._x_lim = xmin, xmax + self._y_lim = ymin, ymax self.resolution = resolution self._not_init = False - self.view = self._init_view(self.ax, self.recompute_X(), xmin, xmax, ymin, ymax, **kwargs) + self.view = self._init_view(self.ax, self.recompute_X(buffered=False), xmin, xmax, ymin, ymax, **kwargs) self._not_init = True def update(self, ax): @@ -111,14 +117,16 @@ class BufferedAxisChangedController(AxisChangedController): def update_view(self, view, X, xmin, xmax, ymin, ymax): raise NotImplementedError('update view given in here') - def get_grid(self): - xmin, xmax = self._compute_buffered(*self._x_lim) - ymin, ymax = self._compute_buffered(*self._y_lim) + def get_grid(self, buffered=True): + if buffered: comp = self._compute_buffered + else: comp = lambda a,b: (a,b) + xmin, xmax = comp(*self._x_lim) + ymin, ymax = comp(*self._y_lim) x, y = numpy.mgrid[xmin:xmax:1j * self.resolution, ymin:ymax:1j * self.resolution] return numpy.hstack((x.flatten()[:, None], y.flatten()[:, None])) - def recompute_X(self): - X = self.plot_function(self.get_grid()) + def recompute_X(self, buffered=True): + X = self.plot_function(self.get_grid(buffered)) if isinstance(X, (tuple, list)): for x in X: x.shape = [self.resolution, self.resolution] diff --git a/GPy/plotting/matplot_dep/latent_space_visualizations/controllers/imshow_controller.py b/GPy/plotting/matplot_dep/latent_space_visualizations/controllers/imshow_controller.py index b473dd96..de1114a2 100644 --- a/GPy/plotting/matplot_dep/latent_space_visualizations/controllers/imshow_controller.py +++ b/GPy/plotting/matplot_dep/latent_space_visualizations/controllers/imshow_controller.py @@ -9,7 +9,7 @@ import numpy class ImshowController(BufferedAxisChangedController): - def __init__(self, ax, plot_function, plot_limits, resolution=50, update_lim=.5, **kwargs): + def __init__(self, ax, plot_function, plot_limits, resolution=50, update_lim=.8, **kwargs): """ :param plot_function: function to use for creating image for plotting (return ndarray-like) diff --git a/GPy/plotting/matplot_dep/models_plots.py b/GPy/plotting/matplot_dep/models_plots.py index cbb213b1..b626758f 100644 --- a/GPy/plotting/matplot_dep/models_plots.py +++ b/GPy/plotting/matplot_dep/models_plots.py @@ -123,6 +123,8 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', #add inducing inputs (if a sparse model is used) if hasattr(model,"Z"): #Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims] + if isinstance(model,SparseGPCoregionalizedRegression): + Z = Z[Z[:,-1] == Y_metadata['output_index'],:] Zu = Z[:,free_dims] z_height = ax.get_ylim()[0] plots['inducing_inputs'] = ax.plot(Zu, np.zeros_like(Zu) + z_height, 'r|', mew=1.5, markersize=12) diff --git a/GPy/plotting/matplot_dep/visualize.py b/GPy/plotting/matplot_dep/visualize.py index fb085f39..f8bcc9f9 100644 --- a/GPy/plotting/matplot_dep/visualize.py +++ b/GPy/plotting/matplot_dep/visualize.py @@ -4,6 +4,8 @@ import GPy import numpy as np import matplotlib as mpl import time +from ...util.misc import param_to_array +from GPy.core.parameterization.variational import VariationalPosterior try: import visual visual_available = True @@ -72,12 +74,13 @@ class vector_show(matplotlib_show): """ def __init__(self, vals, axes=None): matplotlib_show.__init__(self, vals, axes) - self.handle = self.axes.plot(np.arange(0, len(vals))[:, None], self.vals.T)[0] + self.handle = self.axes.plot(np.arange(0, len(vals))[:, None], self.vals) def modify(self, vals): self.vals = vals.copy() - xdata, ydata = self.handle.get_data() - self.handle.set_data(xdata, self.vals.T) + for handle, vals in zip(self.handle, self.vals.T): + xdata, ydata = handle.get_data() + handle.set_data(xdata, vals) self.axes.figure.canvas.draw() @@ -91,8 +94,12 @@ class lvm(matplotlib_show): :param latent_axes: the axes where the latent visualization should be plotted. """ if vals == None: - vals = model.X[0] - + if isinstance(model.X, VariationalPosterior): + vals = param_to_array(model.X.mean) + else: + vals = param_to_array(model.X) + + vals = param_to_array(vals) matplotlib_show.__init__(self, vals, axes=latent_axes) if isinstance(latent_axes,mpl.axes.Axes): diff --git a/GPy/testing/fitc.py b/GPy/testing/fitc.py new file mode 100644 index 00000000..58f009d2 --- /dev/null +++ b/GPy/testing/fitc.py @@ -0,0 +1,34 @@ +# Copyright (c) 2014, James Hensman +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import unittest +import numpy as np +import GPy + +class FITCtest(unittest.TestCase): + def setUp(self): + ###################################### + # # 1 dimensional example + + N = 20 + # sample inputs and outputs + self.X1D = np.random.uniform(-3., 3., (N, 1)) + self.Y1D = np.sin(self.X1D) + np.random.randn(N, 1) * 0.05 + + ###################################### + # # 2 dimensional example + + # sample inputs and outputs + self.X2D = np.random.uniform(-3., 3., (N, 2)) + self.Y2D = np.sin(self.X2D[:, 0:1]) * np.sin(self.X2D[:, 1:2]) + np.random.randn(N, 1) * 0.05 + + def test_fitc_1d(self): + m = GPy.models.SparseGPRegression(self.X1D, self.Y1D) + m.inference_method=GPy.inference.latent_function_inference.FITC() + self.assertTrue(m.checkgrad()) + + def test_fitc_2d(self): + m = GPy.models.SparseGPRegression(self.X2D, self.Y2D) + m.inference_method=GPy.inference.latent_function_inference.FITC() + self.assertTrue(m.checkgrad()) + diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index 3b17a3d0..9ed218d8 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -94,7 +94,7 @@ class Kern_check_dKdiag_dX(Kern_check_dK_dX): -def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verbose=False): +def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verbose=False, fixed_X_dims=None): """ This function runs on kernels to check the correctness of their implementation. It checks that the covariance function is positive definite @@ -109,19 +109,17 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb """ pass_checks = True - if X==None: + if X is None: X = np.random.randn(10, kern.input_dim) if output_ind is not None: X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0]) - if X2==None: + if X2 is None: X2 = np.random.randn(20, kern.input_dim) if output_ind is not None: X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0]) if verbose: print("Checking covariance function is positive definite.") - #if isinstance(kern, GPy.kern.IndependentOutputs): - #import ipdb; ipdb.set_trace() # XXX BREAKPOINT result = Kern_check_model(kern, X=X).is_positive_semi_definite() if result and verbose: print("Check passed.") @@ -154,7 +152,12 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb if verbose: print("Checking gradients of Kdiag(X) wrt theta.") - result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose) + try: + result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose) + except NotImplementedError: + result=True + if verbose: + print("update_gradients_diag not implemented for " + kern.name) if result and verbose: print("Check passed.") if not result: @@ -166,7 +169,10 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb if verbose: print("Checking gradients of K(X, X) wrt X.") try: - result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose) + testmodel = Kern_check_dK_dX(kern, X=X, X2=None) + if fixed_X_dims is not None: + testmodel.X[:,fixed_X_dims].fix() + result = testmodel.checkgrad(verbose=verbose) except NotImplementedError: result=True if verbose: @@ -175,14 +181,17 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb print("Check passed.") if not result: print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") - Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True) + testmodel.checkgrad(verbose=True) pass_checks = False return False if verbose: print("Checking gradients of K(X, X2) wrt X.") try: - result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose) + testmodel = Kern_check_dK_dX(kern, X=X, X2=X2) + if fixed_X_dims is not None: + testmodel.X[:,fixed_X_dims].fix() + result = testmodel.checkgrad(verbose=verbose) except NotImplementedError: result=True if verbose: @@ -190,8 +199,8 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb if result and verbose: print("Check passed.") if not result: - print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") - Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True) + print("Gradient of K(X, X2) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") + testmodel.checkgrad(verbose=True) pass_checks = False return False @@ -236,9 +245,22 @@ class KernelGradientTestsContinuous(unittest.TestCase): def test_Add(self): k = GPy.kern.Matern32(2, active_dims=[2,3]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D) + k += GPy.kern.Matern32(2, active_dims=[2,3]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D) k.randomize() self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose)) + def test_Add_dims(self): + k = GPy.kern.Matern32(2, active_dims=[2,self.D]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D) + k.randomize() + self.assertRaises(AssertionError, k.K, self.X) + k = GPy.kern.Matern32(2, active_dims=[2,self.D-1]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D) + k.randomize() + # assert it runs: + try: + k.K(self.X) + except AssertionError: + raise AssertionError, "k.K(X) should run on self.D-1 dimension" + def test_Matern52(self): k = GPy.kern.Matern52(self.D) k.randomize() @@ -302,29 +324,57 @@ class KernelTestsMiscellaneous(unittest.TestCase): class KernelTestsNonContinuous(unittest.TestCase): def setUp(self): - N = 100 - N1 = 110 - self.D = 2 - D = self.D - self.X = np.random.randn(N,D) - self.X2 = np.random.randn(N1,D) - #self.X_block = np.zeros((N+N1, D+D+1)) - #self.X_block[0:N, 0:D] = self.X - #self.X_block[N:N+N1, D:D+D] = self.X2 - #self.X_block[0:N, -1] = 0 - #self.X_block[N:N+N1, -1] = 1 - self.X_block = np.zeros((N+N1, D+1)) - self.X_block[0:N, 0:D] = self.X - self.X_block[N:N+N1, 0:D] = self.X2 - self.X_block[0:N, -1] = 0 - self.X_block[N:N+N1, -1] = 1 - self.X_block = self.X_block[self.X_block.argsort(0)[:, -1], :] + N0 = 3 + N1 = 9 + N2 = 4 + N = N0+N1+N2 + self.D = 3 + self.X = np.random.randn(N, self.D+1) + indices = np.random.random_integers(0, 2, size=N) + self.X[indices==0, -1] = 0 + self.X[indices==1, -1] = 1 + self.X[indices==2, -1] = 2 + #self.X = self.X[self.X[:, -1].argsort(), :] + self.X2 = np.random.randn((N0+N1)*2, self.D+1) + self.X2[:(N0*2), -1] = 0 + self.X2[(N0*2):, -1] = 1 def test_IndependentOutputs(self): k = GPy.kern.RBF(self.D) - kern = GPy.kern.IndependentOutputs(k, -1) - self.assertTrue(check_kernel_gradient_functions(kern, X=self.X_block, verbose=verbose)) + kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single') + self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1)) + k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(self.D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')] + kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split') + self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1)) + + def test_ODE_UY(self): + kern = GPy.kern.ODE_UY(2, active_dims=[0, self.D]) + X = self.X[self.X[:,-1]!=2] + X2 = self.X2[self.X2[:,-1]!=2] + self.assertTrue(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1)) + if __name__ == "__main__": print "Running unit tests, please be (very) patient..." - unittest.main() + #unittest.main() + np.random.seed(0) + N0 = 3 + N1 = 9 + N2 = 4 + N = N0+N1+N2 + D = 3 + X = np.random.randn(N, D+1) + indices = np.random.random_integers(0, 2, size=N) + X[indices==0, -1] = 0 + X[indices==1, -1] = 1 + X[indices==2, -1] = 2 + #X = X[X[:, -1].argsort(), :] + X2 = np.random.randn((N0+N1)*2, D+1) + X2[:(N0*2), -1] = 0 + X2[(N0*2):, -1] = 1 + k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')] + kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split') + assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1)) + k = GPy.kern.RBF(D) + kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single') + assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1)) diff --git a/GPy/testing/likelihood_tests.py b/GPy/testing/likelihood_tests.py index 461a00c4..341b61d4 100644 --- a/GPy/testing/likelihood_tests.py +++ b/GPy/testing/likelihood_tests.py @@ -255,21 +255,21 @@ class TestNoiseModels(object): "Y": self.binary_Y, "ep": False # FIXME: Should be True when we have it working again }, - #"Exponential_default": { - #"model": GPy.likelihoods.exponential(), - #"link_f_constraints": [constrain_positive], - #"Y": self.positive_Y, - #"laplace": True, - #}, - #"Poisson_default": { - #"model": GPy.likelihoods.poisson(), - #"link_f_constraints": [constrain_positive], - #"Y": self.integer_Y, - #"laplace": True, - #"ep": False #Should work though... - #}, - #"Gamma_default": { - #"model": GPy.likelihoods.gamma(), + "Exponential_default": { + "model": GPy.likelihoods.Exponential(), + "link_f_constraints": [constrain_positive], + "Y": self.positive_Y, + "laplace": True, + }, + "Poisson_default": { + "model": GPy.likelihoods.Poisson(), + "link_f_constraints": [constrain_positive], + "Y": self.integer_Y, + "laplace": True, + "ep": False #Should work though... + }#, + #GAMMA needs some work!"Gamma_default": { + #"model": GPy.likelihoods.Gamma(), #"link_f_constraints": [constrain_positive], #"Y": self.positive_Y, #"laplace": True @@ -589,7 +589,8 @@ class LaplaceTests(unittest.TestCase): self.var = np.random.rand(1) self.stu_t = GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var) - self.gauss = GPy.likelihoods.Gaussian(gp_link=link_functions.Log(), variance=self.var) + #TODO: gaussians with on Identity link. self.gauss = GPy.likelihoods.Gaussian(gp_link=link_functions.Log(), variance=self.var) + self.gauss = GPy.likelihoods.Gaussian(variance=self.var) #Make a bigger step as lower bound can be quite curved self.step = 1e-6 @@ -604,7 +605,6 @@ class LaplaceTests(unittest.TestCase): def test_gaussian_d2logpdf_df2_2(self): print "\n{}".format(inspect.stack()[0][3]) self.Y = None - self.gauss = None self.N = 2 self.D = 1 @@ -613,7 +613,6 @@ class LaplaceTests(unittest.TestCase): noise = np.random.randn(*self.X.shape)*self.real_std self.Y = np.sin(self.X*2*np.pi) + noise self.f = np.random.rand(self.N, 1) - self.gauss = GPy.likelihoods.Gaussian(variance=self.var) dlogpdf_df = functools.partial(self.gauss.dlogpdf_df, y=self.Y) d2logpdf_df2 = functools.partial(self.gauss.d2logpdf_df2, y=self.Y) diff --git a/GPy/testing/unit_tests.py b/GPy/testing/model_tests.py similarity index 83% rename from GPy/testing/unit_tests.py rename to GPy/testing/model_tests.py index a7ebe6fe..2767b559 100644 --- a/GPy/testing/unit_tests.py +++ b/GPy/testing/model_tests.py @@ -6,6 +6,60 @@ import unittest import numpy as np import GPy +class MiscTests(unittest.TestCase): + def setUp(self): + self.N = 20 + self.N_new = 50 + self.D = 1 + self.X = np.random.uniform(-3., 3., (self.N, 1)) + self.Y = np.sin(self.X) + np.random.randn(self.N, self.D) * 0.05 + self.X_new = np.random.uniform(-3., 3., (self.N_new, 1)) + + def test_raw_predict(self): + k = GPy.kern.RBF(1) + m = GPy.models.GPRegression(self.X, self.Y, kernel=k) + m.randomize() + Kinv = np.linalg.pinv(k.K(self.X) + np.eye(self.N)*m.Gaussian_noise.variance) + K_hat = k.K(self.X_new) - k.K(self.X_new, self.X).dot(Kinv).dot(k.K(self.X, self.X_new)) + mu_hat = k.K(self.X_new, self.X).dot(Kinv).dot(self.Y) + + mu, covar = m._raw_predict(self.X_new, full_cov=True) + self.assertEquals(mu.shape, (self.N_new, self.D)) + self.assertEquals(covar.shape, (self.N_new, self.N_new)) + np.testing.assert_almost_equal(K_hat, covar) + np.testing.assert_almost_equal(mu_hat, mu) + + mu, var = m._raw_predict(self.X_new) + self.assertEquals(mu.shape, (self.N_new, self.D)) + self.assertEquals(var.shape, (self.N_new, 1)) + np.testing.assert_almost_equal(np.diag(K_hat)[:, None], var) + np.testing.assert_almost_equal(mu_hat, mu) + + def test_sparse_raw_predict(self): + k = GPy.kern.RBF(1) + m = GPy.models.SparseGPRegression(self.X, self.Y, kernel=k) + m.randomize() + Z = m.Z[:] + X = self.X[:] + + #Not easy to check if woodbury_inv is correct in itself as it requires a large derivation and expression + Kinv = m.posterior.woodbury_inv + K_hat = k.K(self.X_new) - k.K(self.X_new, Z).dot(Kinv).dot(k.K(Z, self.X_new)) + + mu, covar = m._raw_predict(self.X_new, full_cov=True) + self.assertEquals(mu.shape, (self.N_new, self.D)) + self.assertEquals(covar.shape, (self.N_new, self.N_new)) + np.testing.assert_almost_equal(K_hat, covar) + #np.testing.assert_almost_equal(mu_hat, mu) + + mu, var = m._raw_predict(self.X_new) + self.assertEquals(mu.shape, (self.N_new, self.D)) + self.assertEquals(var.shape, (self.N_new, 1)) + np.testing.assert_almost_equal(np.diag(K_hat)[:, None], var) + #np.testing.assert_almost_equal(mu_hat, mu) + + + class GradientTests(unittest.TestCase): def setUp(self): ###################################### @@ -198,6 +252,7 @@ class GradientTests(unittest.TestCase): m = GPy.models.GPLVM(Y, input_dim, init='PCA', kernel=k) self.assertTrue(m.checkgrad()) + @unittest.expectedFailure def test_GP_EP_probit(self): N = 20 X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None] @@ -207,6 +262,7 @@ class GradientTests(unittest.TestCase): m.update_likelihood_approximation() self.assertTrue(m.checkgrad()) + @unittest.expectedFailure def test_sparse_EP_DTC_probit(self): N = 20 X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None] @@ -221,6 +277,7 @@ class GradientTests(unittest.TestCase): m.update_likelihood_approximation() self.assertTrue(m.checkgrad()) + @unittest.expectedFailure def test_generalized_FITC(self): N = 20 X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None] diff --git a/GPy/testing/observable_tests.py b/GPy/testing/observable_tests.py index ebda1630..f8be4a48 100644 --- a/GPy/testing/observable_tests.py +++ b/GPy/testing/observable_tests.py @@ -8,7 +8,7 @@ from GPy.core.parameterization.parameterized import Parameterized from GPy.core.parameterization.param import Param import numpy -# One trigger in init +# One trigger in init _trigger_start = -1 class ParamTestParent(Parameterized): @@ -21,11 +21,9 @@ class ParameterizedTest(Parameterized): params_changed_count = _trigger_start def parameters_changed(self): self.params_changed_count += 1 - def _set_params(self, params, trigger_parent=True): - Parameterized._set_params(self, params, trigger_parent=trigger_parent) class Test(unittest.TestCase): - + def setUp(self): self.parent = ParamTestParent('test parent') self.par = ParameterizedTest('test model') @@ -41,12 +39,12 @@ class Test(unittest.TestCase): self.parent.add_parameter(self.par) self.parent.add_parameter(self.par2) - + self._observer_triggered = None self._trigger_count = 0 self._first = None self._second = None - + def _trigger(self, which): self._observer_triggered = float(which) self._trigger_count += 1 @@ -54,18 +52,18 @@ class Test(unittest.TestCase): self._second = self._trigger else: self._first = self._trigger - + def _trigger_priority(self, which): if self._first is not None: self._second = self._trigger_priority else: self._first = self._trigger_priority - + def test_observable(self): self.par.add_observer(self, self._trigger, -1) self.assertEqual(self.par.params_changed_count, 0, 'no params changed yet') self.assertEqual(self.par.params_changed_count, self.parent.parent_changed_count, 'parent should be triggered as often as param') - + self.p[0,1] = 3 # trigger observers self.assertEqual(self._observer_triggered, 3, 'observer should have triggered') self.assertEqual(self._trigger_count, 1, 'observer should have triggered once') @@ -78,14 +76,14 @@ class Test(unittest.TestCase): self.assertEqual(self._trigger_count, 1, 'observer should have triggered once') self.assertEqual(self.par.params_changed_count, 2, 'params changed second') self.assertEqual(self.par.params_changed_count, self.parent.parent_changed_count, 'parent should be triggered as often as param') - + self.par.add_observer(self, self._trigger, -1) self.p[2,1] = 4 self.assertEqual(self._observer_triggered, 4, 'observer should have triggered') self.assertEqual(self._trigger_count, 2, 'observer should have triggered once') self.assertEqual(self.par.params_changed_count, 3, 'params changed second') self.assertEqual(self.par.params_changed_count, self.parent.parent_changed_count, 'parent should be triggered as often as param') - + self.par.remove_observer(self, self._trigger) self.p[0,1] = 3 self.assertEqual(self._observer_triggered, 4, 'observer should not have triggered') @@ -99,7 +97,7 @@ class Test(unittest.TestCase): self.par._trigger_params_changed() self.assertEqual(self.par.params_changed_count, 1, 'now params changed') self.assertEqual(self.parent.parent_changed_count, self.par.params_changed_count) - + self.par._param_array_[:] = 2 self.par._trigger_params_changed() self.assertEqual(self.par.params_changed_count, 2, 'now params changed') @@ -125,13 +123,13 @@ class Test(unittest.TestCase): self.par.remove_observer(self) self._first = self._second = None - + self.par.add_observer(self, self._trigger, 1) self.par.add_observer(self, self._trigger_priority, 0) self.par.notify_observers(0) self.assertEqual(self._first, self._trigger, 'priority should be second') self.assertEqual(self._second, self._trigger_priority, 'priority should be second') - + if __name__ == "__main__": #import sys;sys.argv = ['', 'Test.testName'] diff --git a/GPy/testing/parameterized_tests.py b/GPy/testing/parameterized_tests.py index 5b718cbd..dc59449f 100644 --- a/GPy/testing/parameterized_tests.py +++ b/GPy/testing/parameterized_tests.py @@ -7,16 +7,16 @@ import unittest import GPy import numpy as np from GPy.core.parameterization.parameter_core import HierarchyError -from GPy.core.parameterization.array_core import ObservableArray +from GPy.core.parameterization.array_core import ObsAr class ArrayCoreTest(unittest.TestCase): def setUp(self): self.X = np.random.normal(1,1, size=(100,10)) - self.obsX = ObservableArray(self.X) + self.obsX = ObsAr(self.X) def test_init(self): - X = ObservableArray(self.X) - X2 = ObservableArray(X) + X = ObsAr(self.X) + X2 = ObsAr(X) self.assertIs(X, X2, "no new Observable array, when Observable is given") def test_slice(self): @@ -34,9 +34,9 @@ class ParameterizedTest(unittest.TestCase): self.param = Param('param', np.random.rand(25,2), Logistic(0, 1)) self.test1 = GPy.core.Parameterized("test model") - self.test1.add_parameter(self.white) - self.test1.add_parameter(self.rbf, 0) - self.test1.add_parameter(self.param) + self.test1.kern = self.rbf+self.white + self.test1.add_parameter(self.test1.kern) + self.test1.add_parameter(self.param, 0) x = np.linspace(-2,6,4)[:,None] y = np.sin(x) @@ -45,22 +45,24 @@ class ParameterizedTest(unittest.TestCase): def test_add_parameter(self): self.assertEquals(self.rbf._parent_index_, 0) self.assertEquals(self.white._parent_index_, 1) + self.assertEquals(self.param._parent_index_, 0) pass def test_fixes(self): self.white.fix(warning=False) - self.test1.remove_parameter(self.test1.param) + self.test1.remove_parameter(self.param) self.assertTrue(self.test1._has_fixes()) from GPy.core.parameterization.transformations import FIXED, UNFIXED self.assertListEqual(self.test1._fixes_.tolist(),[UNFIXED,UNFIXED,FIXED]) - - self.test1.add_parameter(self.white, 0) + self.test1.kern.add_parameter(self.white, 0) self.assertListEqual(self.test1._fixes_.tolist(),[FIXED,UNFIXED,UNFIXED]) + self.test1.kern.rbf.fix() + self.assertListEqual(self.test1._fixes_.tolist(),[FIXED]*3) def test_remove_parameter(self): from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__, Logexp self.white.fix() - self.test1.remove_parameter(self.white) + self.test1.kern.remove_parameter(self.white) self.assertIs(self.test1._fixes_,None) self.assertListEqual(self.white._fixes_.tolist(), [FIXED]) @@ -81,7 +83,12 @@ class ParameterizedTest(unittest.TestCase): self.assertListEqual(self.white._fixes_.tolist(), [FIXED]) self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops) self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops) - self.assertListEqual(self.test1.constraints[Logexp()].tolist(), [0,1]) + self.assertListEqual(self.test1.constraints[Logexp()].tolist(), range(self.param.size, self.param.size+self.rbf.size)) + + def test_remove_parameter_param_array_grad_array(self): + val = self.test1.kern._param_array_.copy() + self.test1.kern.remove_parameter(self.white) + self.assertListEqual(self.test1.kern._param_array_.tolist(), val[:2].tolist()) def test_add_parameter_already_in_hirarchy(self): self.assertRaises(HierarchyError, self.test1.add_parameter, self.white._parameters_[0]) @@ -91,34 +98,51 @@ class ParameterizedTest(unittest.TestCase): self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops) self.assertListEqual(self.rbf.constraints.indices()[0].tolist(), range(2)) from GPy.core.parameterization.transformations import Logexp - kern = self.rbf+self.white + kern = self.test1.kern + self.test1.remove_parameter(kern) self.assertListEqual(kern.constraints[Logexp()].tolist(), range(3)) def test_constraints(self): self.rbf.constrain(GPy.transformations.Square(), False) - self.assertListEqual(self.test1.constraints[GPy.transformations.Square()].tolist(), range(2)) - self.assertListEqual(self.test1.constraints[GPy.transformations.Logexp()].tolist(), [2]) + self.assertListEqual(self.test1.constraints[GPy.transformations.Square()].tolist(), range(self.param.size, self.param.size+self.rbf.size)) + self.assertListEqual(self.test1.constraints[GPy.transformations.Logexp()].tolist(), [self.param.size+self.rbf.size]) - self.test1.remove_parameter(self.rbf) + self.test1.kern.remove_parameter(self.rbf) self.assertListEqual(self.test1.constraints[GPy.transformations.Square()].tolist(), []) def test_constraints_views(self): - self.assertEqual(self.white.constraints._offset, 2) - self.assertEqual(self.rbf.constraints._offset, 0) - self.assertEqual(self.param.constraints._offset, 3) + self.assertEqual(self.white.constraints._offset, self.param.size+self.rbf.size) + self.assertEqual(self.rbf.constraints._offset, self.param.size) + self.assertEqual(self.param.constraints._offset, 0) def test_fixing_randomize(self): - self.white.fix(warning=False) - val = float(self.test1.white.variance) + self.white.fix(warning=True) + val = float(self.white.variance) self.test1.randomize() self.assertEqual(val, self.white.variance) + def test_randomize(self): + ps = self.test1.param.view(np.ndarray).copy() + self.test1.param.randomize() + self.assertFalse(np.all(ps==self.test1.param)) + + def test_fixing_randomize_parameter_handling(self): + self.rbf.fix(warning=True) + val = float(self.rbf.variance) + self.test1.kern.randomize() + self.assertEqual(val, self.rbf.variance) + def test_fixing_optimize(self): self.testmodel.kern.lengthscale.fix() val = float(self.testmodel.kern.lengthscale) self.testmodel.randomize() self.assertEqual(val, self.testmodel.kern.lengthscale) + def test_printing(self): + print self.test1 + print self.param + print self.test1[''] + if __name__ == "__main__": #import sys;sys.argv = ['', 'Test.test_add_parameter'] unittest.main() \ No newline at end of file diff --git a/GPy/util/multioutput.py b/GPy/util/multioutput.py index d9e8b704..b769d6a5 100644 --- a/GPy/util/multioutput.py +++ b/GPy/util/multioutput.py @@ -56,8 +56,6 @@ def ICM(input_dim, num_outputs, kernel, W_rank=1,W=None,kappa=None,name='X'): warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.") K = kernel.prod(GPy.kern.Coregionalize(1, num_outputs, active_dims=[input_dim], rank=W_rank,W=W,kappa=kappa,name='B'),name=name) - #K = kernel * GPy.kern.Coregionalize(1, num_outputs, active_dims=[input_dim], rank=W_rank,W=W,kappa=kappa,name='B') - #K = kernel ** GPy.kern.Coregionalize(input_dim, num_outputs,W_rank,W,kappa, name= 'B') K['.*variance'] = 1. K['.*variance'].fix() return K diff --git a/setup.py b/setup.py index 9ccf3990..ace1d8b2 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ setup(name = 'GPy', license = "BSD 3-clause", keywords = "machine-learning gaussian-processes kernels", url = "http://sheffieldml.github.com/GPy/", - packages = ['GPy', 'GPy.core', 'GPy.kern', 'GPy.util', 'GPy.models', 'GPy.inference', 'GPy.examples', 'GPy.likelihoods', 'GPy.testing', 'GPy.util.latent_space_visualizations', 'GPy.util.latent_space_visualizations.controllers', 'GPy.likelihoods.noise_models', 'GPy.kern.parts', 'GPy.mappings'], + packages = ["GPy.models", "GPy.inference.optimization", "GPy.inference", "GPy.inference.latent_function_inference", "GPy.likelihoods", "GPy.mappings", "GPy.examples", "GPy.core.parameterization", "GPy.core", "GPy.testing", "GPy", "GPy.util", "GPy.kern", "GPy.kern._src.psi_comp", "GPy.kern._src", "GPy.plotting.matplot_dep.latent_space_visualizations.controllers", "GPy.plotting.matplot_dep.latent_space_visualizations", "GPy.plotting.matplot_dep", "GPy.plotting"], package_dir={'GPy': 'GPy'}, package_data = {'GPy': ['GPy/examples']}, py_modules = ['GPy.__init__'], @@ -29,6 +29,4 @@ setup(name = 'GPy', }, classifiers=[ "License :: OSI Approved :: BSD License"], - #ext_modules = [Extension(name = 'GPy.kern.lfmUpsilonf2py', - # sources = ['GPy/kern/src/lfmUpsilonf2py.f90'])], )