diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index c0dd3fea..80661de0 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -4,40 +4,40 @@ import itertools import numpy from parameter_core import Constrainable, adjust_name_for_printing -from array_core import ObservableArray, ParamList +from array_core import ObservableArray ###### printing __constraints_name__ = "Constraint" __index_name__ = "Index" __tie_name__ = "Tied to" -__precision__ = numpy.get_printoptions()['precision'] # numpy printing precision used, sublassing numpy ndarray after all +__precision__ = numpy.get_printoptions()['precision'] # numpy printing precision used, sublassing numpy ndarray after all __print_threshold__ = 5 -###### +###### class Float(numpy.float64, Constrainable): def __init__(self, f, base): - super(Float, self).__init__(f) + super(Float,self).__init__(f) self._base = base - - + + class Param(ObservableArray, Constrainable): """ Parameter object for GPy models. :param name: name of the parameter to be printed :param input_array: array which this parameter handles - + You can add/remove constraints by calling constrain on the parameter itself, e.g: - + - self[:,1].constrain_positive() - self[0].tie_to(other) - self.untie() - self[:3,:].unconstrain() - self[1].fix() - + Fixing parameters will fix them to the value they are right now. If you change the fixed value, it will be fixed to the new value! - + See :py:class:`GPy.core.parameterized.Parameterized` for more details on constraining etc. This ndarray can be stored in lists and checked if it is in. @@ -46,11 +46,11 @@ class Param(ObservableArray, Constrainable): >>> x = np.random.normal(size=(10,3)) >>> x in [[1], x, [3]] True - + WARNING: This overrides the functionality of x==y!!! Use numpy.equal(x,y) for element-wise equality testing. """ - __array_priority__ = 0 # Never give back Param + __array_priority__ = 0 # Never give back Param _fixes_ = None def __new__(cls, name, input_array, *args, **kwargs): obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array)) @@ -68,7 +68,7 @@ class Param(ObservableArray, Constrainable): def __init__(self, name, input_array): super(Param, self).__init__(name=name) - + def __array_finalize__(self, obj): # see InfoArray.__array_finalize__ for comments if obj is None: return @@ -85,7 +85,7 @@ class Param(ObservableArray, Constrainable): self._original_ = getattr(obj, '_original_', None) self._name = getattr(obj, 'name', None) self.gradient = getattr(obj, 'gradient', None) - + def __array_wrap__(self, out_arr, context=None): return out_arr.view(numpy.ndarray) #=========================================================================== @@ -129,13 +129,13 @@ class Param(ObservableArray, Constrainable): self.flat = param self._notify_tied_parameters() self._notify_observers() - + def _get_params(self): return self.flat # @property # def name(self): # """ -# Name of this parameter. +# Name of this parameter. # This can be a callable without parameters. The callable will be called # every time the name property is accessed. # """ @@ -158,7 +158,7 @@ class Param(ObservableArray, Constrainable): def constrain_fixed(self, warning=True): """ Constrain this paramter to be fixed to the current value it carries. - + :param warning: print a warning for overwriting constraints. """ self._highest_parent_._fix(self, warning) @@ -185,7 +185,7 @@ class Param(ObservableArray, Constrainable): Note: For now only one parameter can have ties, so all of a parameter will be removed, when re-tieing! """ - # Note: this method will tie to the parameter which is the last in + #Note: this method will tie to the parameter which is the last in # the chain of ties. Thus, if you tie to a tied parameter, # this tie will be created to the parameter the param is tied # to. @@ -195,12 +195,12 @@ class Param(ObservableArray, Constrainable): if param.size != 1: raise NotImplementedError, "Broadcast tying is not implemented yet" try: - if self._original_: + if self._original_: self[:] = param - else: # this happens when indexing created a copy of the array + else: # this happens when indexing created a copy of the array self._direct_parent_._get_original(self)[self._current_slice_] = param except ValueError: - raise ValueError("Trying to tie {} with shape {} to {} with shape {}".format(self.name, self.shape, param.name, param.shape)) + raise ValueError("Trying to tie {} with shape {} to {} with shape {}".format(self.name, self.shape, param.name, param.shape)) if param is self: raise RuntimeError, 'Cyclic tieing is not allowed' # if len(param._tied_to_) > 0: @@ -288,7 +288,7 @@ class Param(ObservableArray, Constrainable): def unset_prior(self, *priors): """ :param priors: priors to remove from this parameter - + Remove all priors from this parameter """ self._highest_parent_._remove_prior(self, *priors) @@ -319,8 +319,8 @@ class Param(ObservableArray, Constrainable): if numpy.all(si == Ellipsis): continue if isinstance(si, slice): - a = si.indices(self._realshape_[i])[0] - elif isinstance(si, (list, numpy.ndarray, tuple)): + a = si.indices(self._realshape_[i])[0] + elif isinstance(si, (list,numpy.ndarray,tuple)): a = si[0] else: a = si if a < 0: @@ -475,20 +475,20 @@ class ParamConcatenation(object): self.params.append(p) self._param_sizes = [p.size for p in self.params] startstops = numpy.cumsum([0] + self._param_sizes) - self._param_slices_ = [slice(start, stop) for start, stop in zip(startstops, startstops[1:])] + self._param_slices_ = [slice(start, stop) for start,stop in zip(startstops, startstops[1:])] #=========================================================================== # Get/set items, enable broadcasting #=========================================================================== def __getitem__(self, s): - ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True; - params = [p._get_params()[ind[ps]] for p, ps in zip(self.params, self._param_slices_) if numpy.any(p._get_params()[ind[ps]])] - if len(params) == 1: return params[0] + ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True; + params = [p._get_params()[ind[ps]] for p,ps in zip(self.params, self._param_slices_) if numpy.any(p._get_params()[ind[ps]])] + if len(params)==1: return params[0] return ParamConcatenation(params) def __setitem__(self, s, val, update=True): - ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True; + ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True; vals = self._vals(); vals[s] = val; del val - [numpy.place(p, ind[ps], vals[ps]) and p._notify_tied_parameters() - for p, ps in zip(self.params, self._param_slices_)] + [numpy.place(p, ind[ps], vals[ps]) and p._notify_tied_parameters() + for p, ps in zip(self.params, self._param_slices_)] if update: self.params[0]._highest_parent_.parameters_changed() def _vals(self): @@ -496,38 +496,55 @@ class ParamConcatenation(object): #=========================================================================== # parameter operations: #=========================================================================== + def update_all_params(self): + self.params[0]._highest_parent_.parameters_changed() + def constrain(self, constraint, warning=True): - [param.constrain(constraint) for param in self.params] + [param.constrain(constraint, update=False) for param in self.params] + self.update_all_params() constrain.__doc__ = Param.constrain.__doc__ + def constrain_positive(self, warning=True): - [param.constrain_positive(warning) for param in self.params] + [param.constrain_positive(warning, update=False) for param in self.params] + self.update_all_params() constrain_positive.__doc__ = Param.constrain_positive.__doc__ + def constrain_fixed(self, warning=True): [param.constrain_fixed(warning) for param in self.params] constrain_fixed.__doc__ = Param.constrain_fixed.__doc__ fix = constrain_fixed + def constrain_negative(self, warning=True): - [param.constrain_negative(warning) for param in self.params] + [param.constrain_negative(warning, update=False) for param in self.params] + self.update_all_params() constrain_negative.__doc__ = Param.constrain_negative.__doc__ + def constrain_bounded(self, lower, upper, warning=True): - [param.constrain_bounded(lower, upper, warning) for param in self.params] + [param.constrain_bounded(lower, upper, warning, update=False) for param in self.params] + self.update_all_params() constrain_bounded.__doc__ = Param.constrain_bounded.__doc__ + def unconstrain(self, *constraints): [param.unconstrain(*constraints) for param in self.params] unconstrain.__doc__ = Param.unconstrain.__doc__ + def unconstrain_negative(self): [param.unconstrain_negative() for param in self.params] unconstrain_negative.__doc__ = Param.unconstrain_negative.__doc__ + def unconstrain_positive(self): [param.unconstrain_positive() for param in self.params] unconstrain_positive.__doc__ = Param.unconstrain_positive.__doc__ + def unconstrain_fixed(self): [param.unconstrain_fixed() for param in self.params] unconstrain_fixed.__doc__ = Param.unconstrain_fixed.__doc__ unfix = unconstrain_fixed + def unconstrain_bounded(self, lower, upper): [param.unconstrain_bounded(lower, upper) for param in self.params] unconstrain_bounded.__doc__ = Param.unconstrain_bounded.__doc__ + def untie(self, *ties): [param.untie(*ties) for param in self.params] __lt__ = lambda self, val: self._vals() < val @@ -547,20 +564,20 @@ class ParamConcatenation(object): lx = max([p._max_len_values() for p in params]) li = max([p._max_len_index(i) for p, i in itertools.izip(params, indices)]) lt = max([p._max_len_names(tm, __tie_name__) for p, tm in itertools.izip(params, ties_matrices)]) - strings = [p.__str__(cm, i, tm, lc, lx, li, lt) for p, cm, i, tm in itertools.izip(params, constr_matrices, indices, ties_matrices)] + strings = [p.__str__(cm, i, tm, lc, lx, li, lt) for p, cm, i, tm in itertools.izip(params,constr_matrices,indices,ties_matrices)] return "\n".join(strings) - return "\n{}\n".format(" -" + "- | -".join(['-' * l for l in [li, lx, lc, lt]])).join(strings) + return "\n{}\n".format(" -"+"- | -".join(['-'*l for l in [li,lx,lc,lt]])).join(strings) def __repr__(self): - return "\n".join(map(repr, self.params)) - + return "\n".join(map(repr,self.params)) + if __name__ == '__main__': - + from GPy.core.parameterized import Parameterized from GPy.core.parameter import Param - # X = numpy.random.randn(2,3,1,5,2,4,3) - X = numpy.random.randn(3, 2) + #X = numpy.random.randn(2,3,1,5,2,4,3) + X = numpy.random.randn(3,2) print "random done" p = Param("q_mean", X) p1 = Param("q_variance", numpy.random.rand(*p.shape)) @@ -568,23 +585,23 @@ if __name__ == '__main__': p3 = Param("variance", numpy.random.rand()) p4 = Param("lengthscale", numpy.random.rand(2)) - + m = Parameterized() rbf = Parameterized(name='rbf') - - rbf.add_parameter(p3, p4) - m.add_parameter(p, p1, rbf) - + + rbf.add_parameter(p3,p4) + m.add_parameter(p,p1,rbf) + print "setting params" - # print m.q_v[3:5,[1,4,5]] + #print m.q_v[3:5,[1,4,5]] print "constraining variance" - # m[".*variance"].constrain_positive() - # print "constraining rbf" - # m.rbf_l.constrain_positive() - # m.q_variance[1,[0,5,11,19,2]].tie_to(m.rbf_v) - # m.rbf_v.tie_to(m.rbf_l[0]) - # m.rbf_l[0].tie_to(m.rbf_l[1]) - # m.q_v.tie_to(m.rbf_v) + #m[".*variance"].constrain_positive() + #print "constraining rbf" + #m.rbf_l.constrain_positive() + #m.q_variance[1,[0,5,11,19,2]].tie_to(m.rbf_v) + #m.rbf_v.tie_to(m.rbf_l[0]) + #m.rbf_l[0].tie_to(m.rbf_l[1]) + #m.q_v.tie_to(m.rbf_v) # m.rbf_l.tie_to(m.rbf_va) # pt = numpy.array(params._get_params_transformed()) # ptr = numpy.random.randn(*pt.shape) diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index a06211fe..51d9a110 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -20,7 +20,7 @@ class Observable(object): def _notify_observers(self): [callble(self) for callble in self._observers_.itervalues()] - + class Pickleable(object): def _getstate(self): """ @@ -36,9 +36,9 @@ class Pickleable(object): Set the state (memento pattern) of this class to the given state. Usually this is just the counterpart to _getstate, such that an object is a copy of another when calling - + copy = .__new__(*args,**kw)._setstate(._getstate()) - + See python doc "pickling" (`__getstate__` and `__setstate__`) for details. """ raise NotImplementedError, "To be able to use pickling you need to implement this method" @@ -52,7 +52,8 @@ class Parentable(object): super(Parentable,self).__init__() self._direct_parent_ = direct_parent self._parent_index_ = parent_index - + self._highest_parent_ = highest_parent + def has_parent(self): return self._direct_parent_ is not None @@ -74,10 +75,10 @@ class Nameable(Parentable): @name.setter def name(self, name): from_name = self.name - self._name = name + self._name = name if self.has_parent(): self._direct_parent_._name_changed(self, from_name) - + class Constrainable(Nameable): def __init__(self, name): super(Constrainable,self).__init__(name) @@ -89,7 +90,7 @@ class Constrainable(Nameable): :param transform: the :py:class:`GPy.core.transformations.Transformation` to constrain the this parameter to. :param warning: print a warning if re-constraining parameters. - + Constrain the parameter to the given :py:class:`GPy.core.transformations.Transformation`. """ @@ -102,37 +103,37 @@ class Constrainable(Nameable): self._add_constrain(p, transform, warning) if update: self.parameters_changed() - - def constrain_positive(self, warning=True): + + def constrain_positive(self, warning=True, update=True): """ :param warning: print a warning if re-constraining parameters. - + Constrain this parameter to the default positive constraint. """ - self.constrain(Logexp(), warning) + self.constrain(Logexp(), warning=warning, update=update) - def constrain_negative(self, warning=True): + def constrain_negative(self, warning=True, update=True): """ :param warning: print a warning if re-constraining parameters. - + Constrain this parameter to the default negative constraint. """ - self.constrain(NegativeLogexp(), warning) + self.constrain(NegativeLogexp(), warning=warning, update=update) - def constrain_bounded(self, lower, upper, warning=True): + def constrain_bounded(self, lower, upper, warning=True, update=True): """ :param lower, upper: the limits to bound this parameter to :param warning: print a warning if re-constraining parameters. - + Constrain this parameter to lie within the given range. """ - self.constrain(Logistic(lower, upper), warning) + self.constrain(Logistic(lower, upper), warning=warning, update=update) def unconstrain(self, *transforms): """ :param transforms: The transformations to unconstrain from. - - remove all :py:class:`GPy.core.transformations.Transformation` + + remove all :py:class:`GPy.core.transformations.Transformation` transformats of this parameter object. """ if self.has_parent(): @@ -143,20 +144,20 @@ class Constrainable(Nameable): def unconstrain_positive(self): """ - Remove positive constraint of this parameter. + Remove positive constraint of this parameter. """ self.unconstrain(Logexp()) def unconstrain_negative(self): """ - Remove negative constraint of this parameter. + Remove negative constraint of this parameter. """ self.unconstrain(NegativeLogexp()) def unconstrain_bounded(self, lower, upper): """ :param lower, upper: the limits to unbound this parameter from - + Remove (lower, upper) bounded constrain from this parameter/ """ self.unconstrain(Logistic(lower, upper)) diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py index 2f00fbe8..61097951 100644 --- a/GPy/core/parameterization/parameterized.py +++ b/GPy/core/parameterization/parameterized.py @@ -248,6 +248,16 @@ class Parameterized(Constrainable, Pickleable, Observable): cPickle.dump(self, f, protocol) def copy(self): """Returns a (deep) copy of the current model """ + #dc = dict() + #for k, v in self.__dict__.iteritems(): + #if k not in ['_highest_parent_', '_direct_parent_']: + #dc[k] = copy.deepcopy(v) + + #dc = copy.deepcopy(self.__dict__) + #dc['_highest_parent_'] = None + #dc['_direct_parent_'] = None + #s = self.__class__.new() + #s.__dict__ = dc return copy.deepcopy(self) def __getstate__(self): if self._has_get_set_state(): @@ -413,6 +423,8 @@ class Parameterized(Constrainable, Pickleable, Observable): #=========================================================================== # Convenience for fixed, tied checking of param: #=========================================================================== + def fixed_indices(self): + return np.array([x.is_fixed for x in self._parameters_]) def _is_fixed(self, param): # returns if the whole param is fixed if not self._has_fixes(): @@ -442,7 +454,8 @@ class Parameterized(Constrainable, Pickleable, Observable): # if removing constraints before adding new is not wanted, just delete the above line! self.constraints.add(transform, rav_i) param = self._get_original(param) - param._set_params(transform.initialize(param._get_params())) + if not (transform == __fixed__): + param._set_params(transform.initialize(param._get_params()), update=False) if warning and any(reconstrained): # if you want to print the whole params object, which was reconstrained use: # m = str(param[self._backtranslate_index(param, reconstrained)]) diff --git a/GPy/core/parameterization/transformations.py b/GPy/core/parameterization/transformations.py index fd2c3ee5..c4cab1e9 100644 --- a/GPy/core/parameterization/transformations.py +++ b/GPy/core/parameterization/transformations.py @@ -4,9 +4,9 @@ import numpy as np from domains import _POSITIVE,_NEGATIVE, _BOUNDED -import sys +import sys import weakref -_lim_val = -np.log(sys.float_info.epsilon) +_lim_val = -np.log(sys.float_info.epsilon) class Transformation(object): domain = None @@ -94,7 +94,7 @@ class LogexpClipped(Logexp): def __str__(self): return '+ve_c' - + class Exponent(Transformation): # TODO: can't allow this to go to zero, need to set a lower bound. Similar with negative Exponent below. See old MATLAB code. domain = _POSITIVE @@ -162,9 +162,11 @@ class Logistic(Transformation): def initialize(self, f): if np.any(np.logical_or(f < self.lower, f > self.upper)): print "Warning: changing parameters to satisfy constraints" - return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(f * 0.), f) + #return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(f * 0.), f) + #FIXME: Max, zeros_like right? + return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(np.zeros_like(f)), f) def __str__(self): return '{},{}'.format(self.lower, self.upper) - + diff --git a/GPy/inference/latent_function_inference/laplace.py b/GPy/inference/latent_function_inference/laplace.py index 6e252406..82313eab 100644 --- a/GPy/inference/latent_function_inference/laplace.py +++ b/GPy/inference/latent_function_inference/laplace.py @@ -32,7 +32,7 @@ class LaplaceInference(object): self._mode_finding_tolerance = 1e-7 self._mode_finding_max_iter = 40 self.bad_fhat = True - + self._previous_Ki_fhat = None def inference(self, kern, X, likelihood, Y, Y_metadata=None): """ @@ -50,16 +50,17 @@ class LaplaceInference(object): Ki_f_init = np.zeros_like(Y) else: Ki_f_init = self._previous_Ki_fhat + f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata) #Compute hessian and other variables at mode - log_marginal, Ki_W_i, K_Wi_i, dL_dK, woodbury_vector = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, Y_metadata) + log_marginal, woodbury_vector, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata) - #likelihood.gradient = self.likelihood_gradients() kern.update_gradients_full(dL_dK, X) + likelihood.update_gradients(dL_dthetaL) self._previous_Ki_fhat = Ki_fhat.copy() - return Posterior(woodbury_vector=woodbury_vector, woodbury_inv = K_Wi_i, K=K), log_marginal, {'dL_dK':dL_dK} + return Posterior(woodbury_vector=woodbury_vector, woodbury_inv=woodbury_inv, K=K), log_marginal, {'dL_dK':dL_dK} def rasm_mode(self, K, Y, likelihood, Ki_f_init, Y_metadata=None): """ @@ -133,13 +134,15 @@ class LaplaceInference(object): return f, Ki_f - - def mode_computations(self, f_hat, Ki_f, K, Y, likelihood, Y_metadata): + def mode_computations(self, f_hat, Ki_f, K, Y, likelihood, kern, Y_metadata): """ At the mode, compute the hessian and effective covariance matrix. returns: logZ : approximation to the marginal likelihood - Cov : the approximation to the covariance matrix + woodbury_vector : variable required for calculating the approximation to the covariance matrix + woodbury_inv : variable required for calculating the approximation to the covariance matrix + dL_dthetaL : array of derivatives (1 x num_kernel_params) + dL_dthetaL : array of derivatives (1 x num_likelihood_params) """ #At this point get the hessian matrix (or vector as W is diagonal) W = -likelihood.d2logpdf_df2(f_hat, Y, extra_data=Y_metadata) @@ -153,45 +156,62 @@ class LaplaceInference(object): #compute the log marginal log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, extra_data=Y_metadata) - np.sum(np.log(np.diag(L))) - #compute dL_dK - explicit_part = 0.5*(np.dot(Ki_f, Ki_f.T) - K_Wi_i) - - #Implicit - d3lik_d3fhat = likelihood.d3logpdf_df3(f_hat, Y, extra_data=Y_metadata) - dL_dfhat = 0.5*(np.diag(Ki_W_i)[:, None]*d3lik_d3fhat) #why isn't this -0.5? s2 in R&W p126 line 9. + #Compute vival matrices for derivatives + dW_df = -likelihood.d3logpdf_df3(f_hat, Y, extra_data=Y_metadata) # -d3lik_d3fhat woodbury_vector = likelihood.dlogpdf_df(f_hat, Y, extra_data=Y_metadata) - implicit_part = np.dot(woodbury_vector, dL_dfhat.T).dot(np.eye(Y.shape[0]) - np.dot(K, K_Wi_i)) + dL_dfhat = -0.5*(np.diag(Ki_W_i)[:, None]*dW_df) #why isn't this -0.5? s2 in R&W p126 line 9. + #BiK, _ = dpotrs(L, K, lower=1) + #dL_dfhat = 0.5*np.diag(BiK)[:, None]*dW_df + I_KW_i = np.eye(Y.shape[0]) - np.dot(K, K_Wi_i) - dL_dK = explicit_part + implicit_part - - return log_marginal, Ki_W_i, K_Wi_i, dL_dK, woodbury_vector - - - def likelihood_gradients(self): - """ - Gradients with respect to likelihood parameters (dL_dthetaL) - - :rtype: array of derivatives (1 x num_likelihood_params) - """ - dL_dfhat, I_KW_i = self._shared_gradients_components() - dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = likelihood._laplace_gradients(self.f_hat, self.data, extra_data=self.extra_data) - - num_params = len(self._get_param_names()) - # make space for one derivative for each likelihood parameter - dL_dthetaL = np.zeros(num_params) - for thetaL_i in range(num_params): + #################### + #compute dL_dK# + #################### + if kern.size > 0 and not kern.is_fixed: #Explicit - dL_dthetaL_exp = ( np.sum(dlik_dthetaL[:, thetaL_i]) - #- 0.5*np.trace(mdot(self.Ki_W_i, (self.K, np.diagflat(dlik_hess_dthetaL[thetaL_i])))) - + np.dot(0.5*np.diag(self.Ki_W_i)[:,None].T, dlik_hess_dthetaL[:, thetaL_i]) - ) + explicit_part = 0.5*(np.dot(Ki_f, Ki_f.T) - K_Wi_i) #Implicit - dfhat_dthetaL = mdot(I_KW_i, self.K, dlik_grad_dthetaL[:, thetaL_i]) - dL_dthetaL_imp = np.dot(dL_dfhat, dfhat_dthetaL) - dL_dthetaL[thetaL_i] = dL_dthetaL_exp + dL_dthetaL_imp + implicit_part = np.dot(woodbury_vector, dL_dfhat.T).dot(I_KW_i) - return dL_dthetaL + dL_dK = explicit_part + implicit_part + else: + dL_dK = np.zeros(likelihood.size) + + #################### + #compute dL_dthetaL# + #################### + if likelihood.size > 0 and not likelihood.is_fixed: + dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = likelihood._laplace_gradients(f_hat, Y, extra_data=Y_metadata) + + num_params = likelihood.size + # make space for one derivative for each likelihood parameter + dL_dthetaL = np.zeros(num_params) + for thetaL_i in range(num_params): + #Explicit + dL_dthetaL_exp = ( np.sum(dlik_dthetaL[thetaL_i]) + # The + comes from the fact that dlik_hess_dthetaL == -dW_dthetaL + + 0.5*np.sum(np.diag(Ki_W_i).flatten()*dlik_hess_dthetaL[:, thetaL_i].flatten()) + ) + + #Implicit + dfhat_dthetaL = mdot(I_KW_i, K, dlik_grad_dthetaL[:, thetaL_i]) + #dfhat_dthetaL = mdot(Ki_W_i, dlik_grad_dthetaL[:, thetaL_i]) + dL_dthetaL_imp = np.dot(dL_dfhat.T, dfhat_dthetaL) + dL_dthetaL[thetaL_i] = dL_dthetaL_exp + dL_dthetaL_imp + + else: + dL_dthetaL = np.zeros(likelihood.size) + + return log_marginal, woodbury_vector, K_Wi_i, dL_dK, dL_dthetaL + + + #def likelihood_gradients(self, f_hat, K, Y, Ki_W_i, dL_dfhat, I_KW_i, likelihood, Y_metadata): + #""" + #Gradients with respect to likelihood parameters (dL_dthetaL) + + #:rtype: array of derivatives (1 x num_likelihood_params) + #""" def _compute_B_statistics(self, K, W, log_concave): """ @@ -219,7 +239,7 @@ class LaplaceInference(object): LiW12, _ = dtrtrs(L, np.diagflat(W_12), lower=1, trans=0) K_Wi_i = np.dot(LiW12.T, LiW12) # R = W12BiW12, in R&W p 126, eq 5.25 - #here's a better way to compute the required matrix. + #here's a better way to compute the required matrix. # you could do the model finding witha backsub, instead of a dot... #L2 = L/W_12 #K_Wi_i_2 , _= dpotri(L2) diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py index c047e573..e6be2261 100644 --- a/GPy/likelihoods/gaussian.py +++ b/GPy/likelihoods/gaussian.py @@ -3,9 +3,9 @@ #TODO """ -A lot of this code assumes that the link functio nis the identity. +A lot of this code assumes that the link functio nis the identity. -I think laplace code is okay, but I'm quite sure that the EP moments will only work if the link is identity. +I think laplace code is okay, but I'm quite sure that the EP moments will only work if the link is identity. Furthermore, exact Guassian inference can only be done for the identity link, so we should be asserting so for all calls which relate to that. @@ -130,7 +130,10 @@ class Gaussian(Likelihood): :rtype: float """ assert np.asarray(link_f).shape == np.asarray(y).shape - return -0.5*(np.sum((y-link_f)**2/self.variance) + self.ln_det_K + self.N*np.log(2.*np.pi)) + N = y.shape[0] + ln_det_cov = N*np.log(self.variance) + + return -0.5*(np.sum((y-link_f)**2/self.variance) + ln_det_cov + N*np.log(2.*np.pi)) def dlogpdf_dlink(self, link_f, y, extra_data=None): """ @@ -175,7 +178,8 @@ class Gaussian(Likelihood): (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) """ assert np.asarray(link_f).shape == np.asarray(y).shape - hess = -(1.0/self.variance)*np.ones((self.N, 1)) + N = y.shape[0] + hess = -(1.0/self.variance)*np.ones((N, 1)) return hess def d3logpdf_dlink3(self, link_f, y, extra_data=None): @@ -194,7 +198,8 @@ class Gaussian(Likelihood): :rtype: Nx1 array """ assert np.asarray(link_f).shape == np.asarray(y).shape - d3logpdf_dlink3 = np.diagonal(0*self.I)[:, None] + N = y.shape[0] + d3logpdf_dlink3 = np.zeros((N,1)) return d3logpdf_dlink3 def dlogpdf_link_dvar(self, link_f, y, extra_data=None): @@ -215,7 +220,8 @@ class Gaussian(Likelihood): assert np.asarray(link_f).shape == np.asarray(y).shape e = y - link_f s_4 = 1.0/(self.variance**2) - dlik_dsigma = -0.5*self.N/self.variance + 0.5*s_4*np.sum(np.square(e)) + N = y.shape[0] + dlik_dsigma = -0.5*N/self.variance + 0.5*s_4*np.sum(np.square(e)) return np.sum(dlik_dsigma) # Sure about this sum? def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None): @@ -255,7 +261,8 @@ class Gaussian(Likelihood): """ assert np.asarray(link_f).shape == np.asarray(y).shape s_4 = 1.0/(self.variance**2) - d2logpdf_dlink2_dvar = np.diag(s_4*self.I)[:, None] + N = y.shape[0] + d2logpdf_dlink2_dvar = np.ones((N,1))*s_4 return d2logpdf_dlink2_dvar def dlogpdf_link_dtheta(self, f, y, extra_data=None): diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py index b0ecfc37..701a5a2f 100644 --- a/GPy/likelihoods/likelihood.py +++ b/GPy/likelihoods/likelihood.py @@ -13,12 +13,12 @@ from ..core.parameterization import Parameterized class Likelihood(Parameterized): """ - Likelihood base class, used to defing p(y|f). + Likelihood base class, used to defing p(y|f). All instances use _inverse_ link functions, which can be swapped out. It is expected that inherriting classes define a default inverse link function - To use this class, inherrit and define missing functionality. + To use this class, inherrit and define missing functionality. Inherriting classes *must* implement: pdf_link : a bound method which turns the output of the link function into the pdf @@ -27,7 +27,7 @@ class Likelihood(Parameterized): To enable use with EP, inherriting classes *must* define: TODO: a suitable derivative function for any parameters of the class It is also desirable to define: - moments_match_ep : a function to compute the EP moments If this isn't defined, the moments will be computed using 1D quadrature. + moments_match_ep : a function to compute the EP moments If this isn't defined, the moments will be computed using 1D quadrature. To enable use with Laplace approximation, inherriting classes *must* define: Some derivative functions *AS TODO* @@ -36,7 +36,7 @@ class Likelihood(Parameterized): """ def __init__(self, gp_link, name): - super(Likelihood, self).__init__(name) + super(Likelihood, self).__init__(name) assert isinstance(gp_link,link_functions.GPTransformation), "gp_link is not a valid GPTransformation." self.gp_link = gp_link self.log_concave = False @@ -44,6 +44,10 @@ class Likelihood(Parameterized): def _gradients(self,partial): return np.zeros(0) + def update_gradients(self, partial): + if self.size > 0: + raise NotImplementedError('Must be implemented for likelihoods with parameters to be optimized') + def _preprocess_values(self,Y): """ In case it is needed, this function assess the output values or makes any pertinent transformation on them. @@ -303,31 +307,31 @@ class Likelihood(Parameterized): """ TODO: Doc strings """ - if len(self._get_param_names()) > 0: + if self.size > 0: link_f = self.gp_link.transf(f) return self.dlogpdf_link_dtheta(link_f, y, extra_data=extra_data) else: #Is no parameters so return an empty array for its derivatives - return np.empty([1, 0]) + return np.zeros([1, 0]) def dlogpdf_df_dtheta(self, f, y, extra_data=None): """ TODO: Doc strings """ - if len(self._get_param_names()) > 0: + if self.size > 0: link_f = self.gp_link.transf(f) dlink_df = self.gp_link.dtransf_df(f) dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data) return chain_1(dlogpdf_dlink_dtheta, dlink_df) else: #Is no parameters so return an empty array for its derivatives - return np.empty([f.shape[0], 0]) + return np.zeros([f.shape[0], 0]) def d2logpdf_df2_dtheta(self, f, y, extra_data=None): """ TODO: Doc strings """ - if len(self._get_param_names()) > 0: + if self.size > 0: link_f = self.gp_link.transf(f) dlink_df = self.gp_link.dtransf_df(f) d2link_df2 = self.gp_link.d2transf_df2(f) @@ -336,7 +340,7 @@ class Likelihood(Parameterized): return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2) else: #Is no parameters so return an empty array for its derivatives - return np.empty([f.shape[0], 0]) + return np.zeros([f.shape[0], 0]) def _laplace_gradients(self, f, y, extra_data=None): dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, extra_data=extra_data) @@ -345,9 +349,12 @@ class Likelihood(Parameterized): #Parameters are stacked vertically. Must be listed in same order as 'get_param_names' # ensure we have gradients for every parameter we want to optimize - assert dlogpdf_dtheta.shape[1] == len(self._get_param_names()) - assert dlogpdf_df_dtheta.shape[1] == len(self._get_param_names()) - assert d2logpdf_df2_dtheta.shape[1] == len(self._get_param_names()) + try: + assert len(dlogpdf_dtheta) == self.size #1 x num_param array + assert dlogpdf_df_dtheta.shape[1] == self.size #f x num_param matrix + assert d2logpdf_df2_dtheta.shape[1] == self.size #f x num_param matrix + except Exception as e: + import ipdb; ipdb.set_trace() # XXX BREAKPOINT return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta diff --git a/GPy/likelihoods/student_t.py b/GPy/likelihoods/student_t.py index 587e1b23..e815a399 100644 --- a/GPy/likelihoods/student_t.py +++ b/GPy/likelihoods/student_t.py @@ -8,6 +8,7 @@ import link_functions from scipy import stats, integrate from scipy.special import gammaln, gamma from likelihood import Likelihood +from ..core.parameterization import Param class StudentT(Likelihood): """ @@ -19,26 +20,30 @@ class StudentT(Likelihood): p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - f_{i})^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}} """ - def __init__(self,gp_link=None,analytical_mean=True,analytical_variance=True, deg_free=5, sigma2=2): - self.v = deg_free - self.sigma2 = sigma2 + def __init__(self,gp_link=None, deg_free=5, sigma2=2): + if gp_link is None: + gp_link = link_functions.Identity() + + super(StudentT, self).__init__(gp_link, name='Student_T') + + self.sigma2 = Param('t_noise', float(sigma2)) + self.v = Param('deg_free', float(deg_free)) + self.add_parameter(self.sigma2) + self.add_parameter(self.v) + self.v.constrain_fixed() - self._set_params(np.asarray(sigma2)) - super(StudentT, self).__init__(gp_link,analytical_mean,analytical_variance) self.log_concave = False - def _get_params(self): - return np.asarray(self.sigma2) + def parameters_changed(self): + self.variance = (self.v / float(self.v - 2)) * self.sigma2 - def _get_param_names(self): - return ["t_noise_std2"] - - def _set_params(self, x): - self.sigma2 = float(x) - - @property - def variance(self, extra_data=None): - return (self.v / float(self.v - 2)) * self.sigma2 + def update_gradients(self, partial): + """ + Pull out the gradients, be careful as the order must match the order + in which the parameters are added + """ + self.sigma2.gradient = partial[0] + self.v.gradient = partial[1] def pdf_link(self, link_f, y, extra_data=None): """ @@ -82,10 +87,14 @@ class StudentT(Likelihood): """ assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape e = y - link_f + #FIXME: + #Why does np.log(1 + (1/self.v)*((y-link_f)**2)/self.sigma2) suppress the divide by zero?! + #But np.log(1 + (1/float(self.v))*((y-link_f)**2)/self.sigma2) throws it correctly + #print - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2)) objective = (+ gammaln((self.v + 1) * 0.5) - - gammaln(self.v * 0.5) - - 0.5*np.log(self.sigma2 * self.v * np.pi) - - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2)) + - gammaln(self.v * 0.5) + - 0.5*np.log(self.sigma2 * self.v * np.pi) + - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2)) ) return np.sum(objective) @@ -222,15 +231,18 @@ class StudentT(Likelihood): def dlogpdf_link_dtheta(self, f, y, extra_data=None): dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data) - return np.asarray([[dlogpdf_dvar]]) + dlogpdf_dv = np.zeros_like(dlogpdf_dvar) #FIXME: Not done yet + return np.hstack((dlogpdf_dvar, dlogpdf_dv)) def dlogpdf_dlink_dtheta(self, f, y, extra_data=None): dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data) - return dlogpdf_dlink_dvar + dlogpdf_dlink_dv = np.zeros_like(dlogpdf_dlink_dvar) #FIXME: Not done yet + return np.hstack((dlogpdf_dlink_dvar, dlogpdf_dlink_dv)) def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None): d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data) - return d2logpdf_dlink2_dvar + d2logpdf_dlink2_dv = np.zeros_like(d2logpdf_dlink2_dvar) #FIXME: Not done yet + return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv)) def _predictive_variance_analytical(self, mu, sigma, predictive_mean=None): """ diff --git a/GPy/models/gradient_checker.py b/GPy/models/gradient_checker.py index 775334ac..b7c78449 100644 --- a/GPy/models/gradient_checker.py +++ b/GPy/models/gradient_checker.py @@ -1,9 +1,10 @@ # ## Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -from GPy.core.model import Model +from ..core.model import Model import itertools import numpy +from ..core.parameterization import Param def get_shape(x): if isinstance(x, numpy.ndarray): @@ -24,42 +25,42 @@ class GradientChecker(Model): """ :param f: Function to check gradient for :param df: Gradient of function to check - :param x0: + :param x0: Initial guess for inputs x (if it has a shape (a,b) this will be reflected in the parameter names). - Can be a list of arrays, if takes a list of arrays. This list will be passed + Can be a list of arrays, if takes a list of arrays. This list will be passed to f and df in the same order as given here. If only one argument, make sure not to pass a list!!! - + :type x0: [array-like] | array-like | float | int :param names: Names to print, when performing gradcheck. If a list was passed to x0 a list of names with the same length is expected. :param args: Arguments passed as f(x, *args, **kwargs) and df(x, *args, **kwargs) - + Examples: --------- from GPy.models import GradientChecker N, M, Q = 10, 5, 3 - + Sinusoid: - + X = numpy.random.rand(N, Q) grad = GradientChecker(numpy.sin,numpy.cos,X,'x') grad.checkgrad(verbose=1) - + Using GPy: - + X, Z = numpy.random.randn(N,Q), numpy.random.randn(M,Q) kern = GPy.kern.linear(Q, ARD=True) + GPy.kern.rbf(Q, ARD=True) - grad = GradientChecker(kern.K, + grad = GradientChecker(kern.K, lambda x: 2*kern.dK_dX(numpy.ones((1,1)), x), x0 = X.copy(), - names='X') + names='X') grad.checkgrad(verbose=1) grad.randomize() - grad.checkgrad(verbose=1) + grad.checkgrad(verbose=1) """ - Model.__init__(self) + Model.__init__(self, 'GradientChecker') if isinstance(x0, (list, tuple)) and names is None: self.shapes = [get_shape(xi) for xi in x0] self.names = ['X{i}'.format(i=i) for i in range(len(x0))] @@ -72,8 +73,10 @@ class GradientChecker(Model): else: self.names = names self.shapes = [get_shape(x0)] + for name, xi in zip(self.names, at_least_one_element(x0)): - self.__setattr__(name, xi) + self.__setattr__(name, Param(name, xi)) + self.add_parameter(self.__getattribute__(name)) # self._param_names = [] # for name, shape in zip(self.names, self.shapes): # self._param_names.extend(map(lambda nameshape: ('_'.join(nameshape)).strip('_'), itertools.izip(itertools.repeat(name), itertools.imap(lambda t: '_'.join(map(str, t)), itertools.product(*map(lambda xi: range(xi), shape)))))) @@ -93,20 +96,18 @@ class GradientChecker(Model): def _log_likelihood_gradients(self): return numpy.atleast_1d(self.df(*self._get_x(), **self.kwargs)).flatten() + #def _get_params(self): + #return numpy.atleast_1d(numpy.hstack(map(lambda name: flatten_if_needed(self.__getattribute__(name)), self.names))) - def _get_params(self): - return numpy.atleast_1d(numpy.hstack(map(lambda name: flatten_if_needed(self.__getattribute__(name)), self.names))) + #def _set_params(self, x): + #current_index = 0 + #for name, shape in zip(self.names, self.shapes): + #current_size = numpy.prod(shape) + #self.__setattr__(name, x[current_index:current_index + current_size].reshape(shape)) + #current_index += current_size - - def _set_params(self, x): - current_index = 0 - for name, shape in zip(self.names, self.shapes): - current_size = numpy.prod(shape) - self.__setattr__(name, x[current_index:current_index + current_size].reshape(shape)) - current_index += current_size - - def _get_param_names(self): - _param_names = [] - for name, shape in zip(self.names, self.shapes): - _param_names.extend(map(lambda nameshape: ('_'.join(nameshape)).strip('_'), itertools.izip(itertools.repeat(name), itertools.imap(lambda t: '_'.join(map(str, t)), itertools.product(*map(lambda xi: range(xi), shape)))))) - return _param_names + #def _get_param_names(self): + #_param_names = [] + #for name, shape in zip(self.names, self.shapes): + #_param_names.extend(map(lambda nameshape: ('_'.join(nameshape)).strip('_'), itertools.izip(itertools.repeat(name), itertools.imap(lambda t: '_'.join(map(str, t)), itertools.product(*map(lambda xi: range(xi), shape)))))) + #return _param_names diff --git a/GPy/testing/likelihood_tests.py b/GPy/testing/likelihood_tests.py index d14c9a41..d344e23d 100644 --- a/GPy/testing/likelihood_tests.py +++ b/GPy/testing/likelihood_tests.py @@ -4,10 +4,11 @@ import GPy from GPy.models import GradientChecker import functools import inspect -from GPy.likelihoods.noise_models import gp_transformations +from GPy.likelihoods import link_functions +from ..core.parameterization import Param from functools import partial #np.random.seed(300) -np.random.seed(7) +#np.random.seed(7) def dparam_partial(inst_func, *args): """ @@ -22,12 +23,14 @@ def dparam_partial(inst_func, *args): the f or Y that are being used in the function whilst we tweak the param """ - def param_func(param, inst_func, args): - inst_func.im_self._set_params(param) + def param_func(param_val, param_name, inst_func, args): + #inst_func.im_self._set_params(param) + #inst_func.im_self.add_parameter(Param(param_name, param_val)) + inst_func.im_self[param_name] = param_val return inst_func(*args) return functools.partial(param_func, inst_func=inst_func, args=args) -def dparam_checkgrad(func, dfunc, params, args, constraints=None, randomize=False, verbose=False): +def dparam_checkgrad(func, dfunc, params, params_names, args, constraints=None, randomize=False, verbose=False): """ checkgrad expects a f: R^N -> R^1 and df: R^N -> R^N However if we are holding other parameters fixed and moving something else @@ -38,27 +41,34 @@ def dparam_checkgrad(func, dfunc, params, args, constraints=None, randomize=Fals The number of parameters and N is the number of data Need to take a slice out from f and a slice out of df """ - #print "\n{} likelihood: {} vs {}".format(func.im_self.__class__.__name__, - #func.__name__, dfunc.__name__) + print "\n{} likelihood: {} vs {}".format(func.im_self.__class__.__name__, + func.__name__, dfunc.__name__) partial_f = dparam_partial(func, *args) partial_df = dparam_partial(dfunc, *args) gradchecking = True - for param in params: - fnum = np.atleast_1d(partial_f(param)).shape[0] - dfnum = np.atleast_1d(partial_df(param)).shape[0] + zipped_params = zip(params, params_names) + for param_ind, (param_val, param_name) in enumerate(zipped_params): + #Check one parameter at a time, make sure it is 2d (as some gradients only return arrays) then strip out the parameter + fnum = np.atleast_2d(partial_f(param_val, param_name))[:, param_ind].shape[0] + dfnum = np.atleast_2d(partial_df(param_val, param_name))[:, param_ind].shape[0] for fixed_val in range(dfnum): #dlik and dlik_dvar gives back 1 value for each f_ind = min(fnum, fixed_val+1) - 1 print "fnum: {} dfnum: {} f_ind: {} fixed_val: {}".format(fnum, dfnum, f_ind, fixed_val) #Make grad checker with this param moving, note that set_params is NOT being called #The parameter is being set directly with __setattr__ - grad = GradientChecker(lambda x: np.atleast_1d(partial_f(x))[f_ind], - lambda x : np.atleast_1d(partial_df(x))[fixed_val], - param, 'p') - #This is not general for more than one param... + #Check only the parameter and function value we wish to check at a time + grad = GradientChecker(lambda p_val: np.atleast_2d(partial_f(p_val, param_name))[f_ind, param_ind], + lambda p_val: np.atleast_2d(partial_df(p_val, param_name))[fixed_val, param_ind], + param_val, [param_name]) + if constraints is not None: - for constraint in constraints: - constraint('p', grad) + for constrain_param, constraint in constraints: + if grad.grep_param_names(constrain_param): + constraint(constrain_param, grad) + else: + print "parameter didn't exist" + print constrain_param, " ", constraint if randomize: grad.randomize() if verbose: @@ -107,17 +117,20 @@ class TestNoiseModels(object): #################################################### # Constraint wrappers so we can just list them off # #################################################### + def constrain_fixed(regex, model): + model[regex].constrain_fixed() + def constrain_negative(regex, model): - model.constrain_negative(regex) + model[regex].constrain_negative() def constrain_positive(regex, model): - model.constrain_positive(regex) + model[regex].constrain_positive() def constrain_bounded(regex, model, lower, upper): """ Used like: partial(constrain_bounded, lower=0, upper=1) """ - model.constrain_bounded(regex, lower, upper) + model[regex].constrain_bounded(lower, upper) """ Dictionary where we nest models we would like to check @@ -134,71 +147,72 @@ class TestNoiseModels(object): } """ noise_models = {"Student_t_default": { - "model": GPy.likelihoods.student_t(deg_free=5, sigma2=self.var), + "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var), "grad_params": { "names": ["t_noise"], "vals": [self.var], - "constraints": [constrain_positive] + "constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)] + #"constraints": [("t_noise", constrain_positive), ("deg_free", partial(constrain_fixed, value=5))] }, "laplace": True }, "Student_t_1_var": { - "model": GPy.likelihoods.student_t(deg_free=5, sigma2=self.var), + "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var), "grad_params": { "names": ["t_noise"], "vals": [1.0], - "constraints": [constrain_positive] + "constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)] }, "laplace": True }, "Student_t_small_var": { - "model": GPy.likelihoods.student_t(deg_free=5, sigma2=self.var), + "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var), "grad_params": { "names": ["t_noise"], "vals": [0.01], - "constraints": [constrain_positive] + "constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)] }, "laplace": True }, "Student_t_large_var": { - "model": GPy.likelihoods.student_t(deg_free=5, sigma2=self.var), + "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var), "grad_params": { "names": ["t_noise"], "vals": [10.0], - "constraints": [constrain_positive] + "constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)] }, "laplace": True }, "Student_t_approx_gauss": { - "model": GPy.likelihoods.student_t(deg_free=1000, sigma2=self.var), + "model": GPy.likelihoods.StudentT(deg_free=1000, sigma2=self.var), "grad_params": { "names": ["t_noise"], "vals": [self.var], - "constraints": [constrain_positive] + "constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)] }, "laplace": True }, "Student_t_log": { - "model": GPy.likelihoods.student_t(gp_link=gp_transformations.Log(), deg_free=5, sigma2=self.var), + "model": GPy.likelihoods.StudentT(gp_link=link_functions.Log(), deg_free=5, sigma2=self.var), "grad_params": { "names": ["t_noise"], "vals": [self.var], - "constraints": [constrain_positive] + "constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)] }, "laplace": True }, "Gaussian_default": { - "model": GPy.likelihoods.gaussian(variance=self.var, D=self.D, N=self.N), + "model": GPy.likelihoods.Gaussian(variance=self.var), "grad_params": { - "names": ["noise_model_variance"], + "names": ["variance"], "vals": [self.var], - "constraints": [constrain_positive] + "constraints": [("variance", constrain_positive)] }, "laplace": True, "ep": True }, #"Gaussian_log": { - #"model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Log(), variance=self.var, D=self.D, N=self.N), + #"model": GPy.likelihoods.gaussian(gp_link=link_functions.Log(), variance=self.var, D=self.D, N=self.N), #"grad_params": { #"names": ["noise_model_variance"], #"vals": [self.var], @@ -207,7 +221,7 @@ class TestNoiseModels(object): #"laplace": True #}, #"Gaussian_probit": { - #"model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Probit(), variance=self.var, D=self.D, N=self.N), + #"model": GPy.likelihoods.gaussian(gp_link=link_functions.Probit(), variance=self.var, D=self.D, N=self.N), #"grad_params": { #"names": ["noise_model_variance"], #"vals": [self.var], @@ -216,7 +230,7 @@ class TestNoiseModels(object): #"laplace": True #}, #"Gaussian_log_ex": { - #"model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Log_ex_1(), variance=self.var, D=self.D, N=self.N), + #"model": GPy.likelihoods.gaussian(gp_link=link_functions.Log_ex_1(), variance=self.var, D=self.D, N=self.N), #"grad_params": { #"names": ["noise_model_variance"], #"vals": [self.var], @@ -225,31 +239,31 @@ class TestNoiseModels(object): #"laplace": True #}, "Bernoulli_default": { - "model": GPy.likelihoods.bernoulli(), + "model": GPy.likelihoods.Bernoulli(), "link_f_constraints": [partial(constrain_bounded, lower=0, upper=1)], "laplace": True, "Y": self.binary_Y, "ep": True }, - "Exponential_default": { - "model": GPy.likelihoods.exponential(), - "link_f_constraints": [constrain_positive], - "Y": self.positive_Y, - "laplace": True, - }, - "Poisson_default": { - "model": GPy.likelihoods.poisson(), - "link_f_constraints": [constrain_positive], - "Y": self.integer_Y, - "laplace": True, - "ep": False #Should work though... - }, - "Gamma_default": { - "model": GPy.likelihoods.gamma(), - "link_f_constraints": [constrain_positive], - "Y": self.positive_Y, - "laplace": True - } + #"Exponential_default": { + #"model": GPy.likelihoods.exponential(), + #"link_f_constraints": [constrain_positive], + #"Y": self.positive_Y, + #"laplace": True, + #}, + #"Poisson_default": { + #"model": GPy.likelihoods.poisson(), + #"link_f_constraints": [constrain_positive], + #"Y": self.integer_Y, + #"laplace": True, + #"ep": False #Should work though... + #}, + #"Gamma_default": { + #"model": GPy.likelihoods.gamma(), + #"link_f_constraints": [constrain_positive], + #"Y": self.positive_Y, + #"laplace": True + #} } for name, attributes in noise_models.iteritems(): @@ -286,8 +300,8 @@ class TestNoiseModels(object): else: ep = False - if len(param_vals) > 1: - raise NotImplementedError("Cannot support multiple params in likelihood yet!") + #if len(param_vals) > 1: + #raise NotImplementedError("Cannot support multiple params in likelihood yet!") #Required by all #Normal derivatives @@ -302,13 +316,13 @@ class TestNoiseModels(object): yield self.t_d3logpdf_df3, model, Y, f yield self.t_d3logpdf_dlink3, model, Y, f, link_f_constraints #Params - yield self.t_dlogpdf_dparams, model, Y, f, param_vals, param_constraints - yield self.t_dlogpdf_df_dparams, model, Y, f, param_vals, param_constraints - yield self.t_d2logpdf2_df2_dparams, model, Y, f, param_vals, param_constraints + yield self.t_dlogpdf_dparams, model, Y, f, param_vals, param_names, param_constraints + yield self.t_dlogpdf_df_dparams, model, Y, f, param_vals, param_names, param_constraints + yield self.t_d2logpdf2_df2_dparams, model, Y, f, param_vals, param_names, param_constraints #Link params - yield self.t_dlogpdf_link_dparams, model, Y, f, param_vals, param_constraints - yield self.t_dlogpdf_dlink_dparams, model, Y, f, param_vals, param_constraints - yield self.t_d2logpdf2_dlink2_dparams, model, Y, f, param_vals, param_constraints + yield self.t_dlogpdf_link_dparams, model, Y, f, param_vals, param_names, param_constraints + yield self.t_dlogpdf_dlink_dparams, model, Y, f, param_vals, param_names, param_constraints + yield self.t_d2logpdf2_dlink2_dparams, model, Y, f, param_vals, param_names, param_constraints #laplace likelihood gradcheck yield self.t_laplace_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints @@ -370,33 +384,33 @@ class TestNoiseModels(object): # df_dparams # ############## @with_setup(setUp, tearDown) - def t_dlogpdf_dparams(self, model, Y, f, params, param_constraints): + def t_dlogpdf_dparams(self, model, Y, f, params, params_names, param_constraints): print "\n{}".format(inspect.stack()[0][3]) print model assert ( dparam_checkgrad(model.logpdf, model.dlogpdf_dtheta, - params, args=(f, Y), constraints=param_constraints, - randomize=True, verbose=True) + params, params_names, args=(f, Y), constraints=param_constraints, + randomize=False, verbose=True) ) @with_setup(setUp, tearDown) - def t_dlogpdf_df_dparams(self, model, Y, f, params, param_constraints): + def t_dlogpdf_df_dparams(self, model, Y, f, params, params_names, param_constraints): print "\n{}".format(inspect.stack()[0][3]) print model assert ( dparam_checkgrad(model.dlogpdf_df, model.dlogpdf_df_dtheta, - params, args=(f, Y), constraints=param_constraints, - randomize=True, verbose=True) + params, params_names, args=(f, Y), constraints=param_constraints, + randomize=False, verbose=True) ) @with_setup(setUp, tearDown) - def t_d2logpdf2_df2_dparams(self, model, Y, f, params, param_constraints): + def t_d2logpdf2_df2_dparams(self, model, Y, f, params, params_names, param_constraints): print "\n{}".format(inspect.stack()[0][3]) print model assert ( dparam_checkgrad(model.d2logpdf_df2, model.d2logpdf_df2_dtheta, - params, args=(f, Y), constraints=param_constraints, - randomize=True, verbose=True) + params, params_names, args=(f, Y), constraints=param_constraints, + randomize=False, verbose=True) ) ################ @@ -454,32 +468,32 @@ class TestNoiseModels(object): # dlink_dparams # ################# @with_setup(setUp, tearDown) - def t_dlogpdf_link_dparams(self, model, Y, f, params, param_constraints): + def t_dlogpdf_link_dparams(self, model, Y, f, params, param_names, param_constraints): print "\n{}".format(inspect.stack()[0][3]) print model assert ( dparam_checkgrad(model.logpdf_link, model.dlogpdf_link_dtheta, - params, args=(f, Y), constraints=param_constraints, + params, param_names, args=(f, Y), constraints=param_constraints, randomize=False, verbose=True) ) @with_setup(setUp, tearDown) - def t_dlogpdf_dlink_dparams(self, model, Y, f, params, param_constraints): + def t_dlogpdf_dlink_dparams(self, model, Y, f, params, param_names, param_constraints): print "\n{}".format(inspect.stack()[0][3]) print model assert ( dparam_checkgrad(model.dlogpdf_dlink, model.dlogpdf_dlink_dtheta, - params, args=(f, Y), constraints=param_constraints, + params, param_names, args=(f, Y), constraints=param_constraints, randomize=False, verbose=True) ) @with_setup(setUp, tearDown) - def t_d2logpdf2_dlink2_dparams(self, model, Y, f, params, param_constraints): + def t_d2logpdf2_dlink2_dparams(self, model, Y, f, params, param_names, param_constraints): print "\n{}".format(inspect.stack()[0][3]) print model assert ( dparam_checkgrad(model.d2logpdf_dlink2, model.d2logpdf_dlink2_dtheta, - params, args=(f, Y), constraints=param_constraints, + params, param_names, args=(f, Y), constraints=param_constraints, randomize=False, verbose=True) ) @@ -493,18 +507,23 @@ class TestNoiseModels(object): Y = Y/Y.max() white_var = 1e-6 kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) - laplace_likelihood = GPy.likelihoods.Laplace(Y.copy(), model) - m = GPy.models.GPRegression(X.copy(), Y.copy(), kernel, likelihood=laplace_likelihood) + laplace_likelihood = GPy.inference.latent_function_inference.LaplaceInference() + m = GPy.core.GP(X.copy(), Y.copy(), kernel, likelihood=model, inference_method=laplace_likelihood) m.ensure_default_constraints() - m.constrain_fixed('white', white_var) + m['white'].constrain_fixed(white_var) - for param_num in range(len(param_names)): - name = param_names[param_num] - m[name] = param_vals[param_num] - constraints[param_num](name, m) + #Set constraints + for constrain_param, constraint in constraints: + constraint(constrain_param, m) print m m.randomize() + + #Set params + for param_num in range(len(param_names)): + name = param_names[param_num] + m[name] = param_vals[param_num] + #m.optimize(max_iters=8) print m m.checkgrad(verbose=1, step=step) @@ -525,10 +544,10 @@ class TestNoiseModels(object): Y = Y/Y.max() white_var = 1e-6 kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) - ep_likelihood = GPy.likelihoods.EP(Y.copy(), model) - m = GPy.models.GPRegression(X.copy(), Y.copy(), kernel, likelihood=ep_likelihood) + ep_inf = GPy.inference.latent_function_inference.EP() + m = GPy.core.GP(X.copy(), Y.copy(), kernel=kernel, likelihood=model, inference_method=ep_inf) m.ensure_default_constraints() - m.constrain_fixed('white', white_var) + m['white'].constrain_fixed(white_var) for param_num in range(len(param_names)): name = param_names[param_num] @@ -559,8 +578,8 @@ class LaplaceTests(unittest.TestCase): self.var = 0.2 self.var = np.random.rand(1) - self.stu_t = GPy.likelihoods.student_t(deg_free=5, sigma2=self.var) - self.gauss = GPy.likelihoods.gaussian(gp_transformations.Log(), variance=self.var, D=self.D, N=self.N) + self.stu_t = GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var) + self.gauss = GPy.likelihoods.Gaussian(gp_link=link_functions.Log(), variance=self.var) #Make a bigger step as lower bound can be quite curved self.step = 1e-6 @@ -584,7 +603,7 @@ class LaplaceTests(unittest.TestCase): noise = np.random.randn(*self.X.shape)*self.real_std self.Y = np.sin(self.X*2*np.pi) + noise self.f = np.random.rand(self.N, 1) - self.gauss = GPy.likelihoods.gaussian(variance=self.var, D=self.D, N=self.N) + self.gauss = GPy.likelihoods.Gaussian(variance=self.var) dlogpdf_df = functools.partial(self.gauss.dlogpdf_df, y=self.Y) d2logpdf_df2 = functools.partial(self.gauss.d2logpdf_df2, y=self.Y) @@ -605,23 +624,27 @@ class LaplaceTests(unittest.TestCase): #Yc = Y.copy() #Yc[75:80] += 1 kernel1 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) - kernel2 = kernel1.copy() + #FIXME: Make sure you can copy kernels when params is fixed + #kernel2 = kernel1.copy() + kernel2 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) - m1 = GPy.models.GPRegression(X, Y.copy(), kernel=kernel1) - m1.constrain_fixed('white', 1e-6) - m1['noise'] = initial_var_guess - m1.constrain_bounded('noise', 1e-4, 10) - m1.constrain_bounded('rbf', 1e-4, 10) + gauss_distr1 = GPy.likelihoods.Gaussian(variance=initial_var_guess) + exact_inf = GPy.inference.latent_function_inference.ExactGaussianInference() + m1 = GPy.core.GP(X, Y.copy(), kernel=kernel1, likelihood=gauss_distr1, inference_method=exact_inf) + m1['white'].constrain_fixed(1e-6) + m1['variance'] = initial_var_guess + m1['variance'].constrain_bounded(1e-4, 10) + m1['rbf'].constrain_bounded(1e-4, 10) m1.ensure_default_constraints() m1.randomize() - gauss_distr = GPy.likelihoods.gaussian(variance=initial_var_guess, D=1, N=Y.shape[0]) - laplace_likelihood = GPy.likelihoods.Laplace(Y.copy(), gauss_distr) - m2 = GPy.models.GPRegression(X, Y.copy(), kernel=kernel2, likelihood=laplace_likelihood) + gauss_distr2 = GPy.likelihoods.Gaussian(variance=initial_var_guess) + laplace_inf = GPy.inference.latent_function_inference.LaplaceInference() + m2 = GPy.core.GP(X, Y.copy(), kernel=kernel2, likelihood=gauss_distr2, inference_method=laplace_inf) m2.ensure_default_constraints() - m2.constrain_fixed('white', 1e-6) - m2.constrain_bounded('rbf', 1e-4, 10) - m2.constrain_bounded('noise', 1e-4, 10) + m2['white'].constrain_fixed(1e-6) + m2['rbf'].constrain_bounded(1e-4, 10) + m2['variance'].constrain_bounded(1e-4, 10) m2.randomize() if debug: @@ -667,7 +690,7 @@ class LaplaceTests(unittest.TestCase): #Check Y's are the same - np.testing.assert_almost_equal(Y, m2.likelihood.Y, decimal=5) + np.testing.assert_almost_equal(m1.Y, m2.Y, decimal=5) #Check marginals are the same np.testing.assert_almost_equal(m1.log_likelihood(), m2.log_likelihood(), decimal=2) #Check marginals are the same with random