merge changes

This commit is contained in:
Zhenwen Dai 2014-03-21 22:10:24 +00:00
commit 8a83845937
65 changed files with 1197 additions and 600 deletions

View file

@ -7,7 +7,7 @@ import warnings
from .. import kern from .. import kern
from ..util.linalg import dtrtrs from ..util.linalg import dtrtrs
from model import Model from model import Model
from parameterization import ObservableArray from parameterization import ObsAr
from .. import likelihoods from .. import likelihoods
from ..likelihoods.gaussian import Gaussian from ..likelihoods.gaussian import Gaussian
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
@ -31,20 +31,18 @@ class GP(Model):
super(GP, self).__init__(name) super(GP, self).__init__(name)
assert X.ndim == 2 assert X.ndim == 2
if isinstance(X, (ObservableArray, VariationalPosterior)): if isinstance(X, (ObsAr, VariationalPosterior)):
self.X = X self.X = X
else: self.X = ObservableArray(X) else: self.X = ObsAr(X)
self.num_data, self.input_dim = self.X.shape self.num_data, self.input_dim = self.X.shape
assert Y.ndim == 2 assert Y.ndim == 2
self.Y = ObservableArray(Y) self.Y = ObsAr(Y)
assert Y.shape[0] == self.num_data assert Y.shape[0] == self.num_data
_, self.output_dim = self.Y.shape _, self.output_dim = self.Y.shape
if Y_metadata is None: #TODO: check the type of this is okay?
self.Y_metadata = {}
else:
self.Y_metadata = Y_metadata self.Y_metadata = Y_metadata
assert isinstance(kernel, kern.Kern) assert isinstance(kernel, kern.Kern)
@ -76,25 +74,27 @@ class GP(Model):
def _raw_predict(self, _Xnew, full_cov=False): def _raw_predict(self, _Xnew, full_cov=False):
""" """
Internal helper function for making predictions, does not account For making predictions, does not account for normalization or likelihood
for normalization or likelihood
full_cov is a boolean which defines whether the full covariance matrix full_cov is a boolean which defines whether the full covariance matrix
of the prediction is computed. If full_cov is False (default), only the of the prediction is computed. If full_cov is False (default), only the
diagonal of the covariance is returned. diagonal of the covariance is returned.
$$
p(f*|X*, X, Y) = \int^{\inf}_{\inf} p(f*|f,X*)p(f|X,Y) df
= N(f*| K_{x*x}(K_{xx} + \Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \Sigma)^{-1}K_{xx*}
\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
$$
""" """
Kx = self.kern.K(_Xnew, self.X).T Kx = self.kern.K(_Xnew, self.X).T
#LiKx, _ = dtrtrs(self.posterior.woodbury_chol, np.asfortranarray(Kx), lower=1)
WiKx = np.dot(self.posterior.woodbury_inv, Kx) WiKx = np.dot(self.posterior.woodbury_inv, Kx)
mu = np.dot(Kx.T, self.posterior.woodbury_vector) mu = np.dot(Kx.T, self.posterior.woodbury_vector)
if full_cov: if full_cov:
Kxx = self.kern.K(_Xnew) Kxx = self.kern.K(_Xnew)
#var = Kxx - tdot(LiKx.T) var = Kxx - np.dot(Kx.T, WiKx)
var = np.dot(Kx.T, WiKx)
else: else:
Kxx = self.kern.Kdiag(_Xnew) Kxx = self.kern.Kdiag(_Xnew)
#var = Kxx - np.sum(LiKx*LiKx, 0)
var = Kxx - np.sum(WiKx*Kx, 0) var = Kxx - np.sum(WiKx*Kx, 0)
var = var.reshape(-1, 1) var = var.reshape(-1, 1)

View file

@ -10,11 +10,11 @@ class Mapping(Parameterized):
Base model for shared behavior between models that can act like a mapping. Base model for shared behavior between models that can act like a mapping.
""" """
def __init__(self, input_dim, output_dim): def __init__(self, input_dim, output_dim, name='mapping'):
self.input_dim = input_dim self.input_dim = input_dim
self.output_dim = output_dim self.output_dim = output_dim
super(Mapping, self).__init__() super(Mapping, self).__init__(name=name)
# Model.__init__(self) # Model.__init__(self)
# All leaf nodes should call self._set_params(self._get_params()) at # All leaf nodes should call self._set_params(self._get_params()) at
# the end # the end

View file

@ -1,5 +1,5 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from param import Param, ObservableArray from param import Param, ObsAr
from parameterized import Parameterized from parameterized import Parameterized

View file

@ -1,25 +1,25 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
__updated__ = '2013-12-16' __updated__ = '2014-03-17'
import numpy as np import numpy as np
from parameter_core import Observable from parameter_core import Observable
class ObservableArray(np.ndarray, Observable): class ObsAr(np.ndarray, Observable):
""" """
An ndarray which reports changes to its observers. An ndarray which reports changes to its observers.
The observers can add themselves with a callable, which The observers can add themselves with a callable, which
will be called every time this array changes. The callable will be called every time this array changes. The callable
takes exactly one argument, which is this array itself. takes exactly one argument, which is this array itself.
""" """
__array_priority__ = -1 # Never give back ObservableArray __array_priority__ = -1 # Never give back ObsAr
def __new__(cls, input_array, *a, **kw): def __new__(cls, input_array, *a, **kw):
if not isinstance(input_array, ObservableArray): if not isinstance(input_array, ObsAr):
obj = np.atleast_1d(np.require(input_array, dtype=np.float64, requirements=['W', 'C'])).view(cls) obj = np.atleast_1d(np.require(input_array, dtype=np.float64, requirements=['W', 'C'])).view(cls)
else: obj = input_array else: obj = input_array
cls.__name__ = "ObservableArray\n " #cls.__name__ = "ObsAr" # because of fixed printing of `array` in np printing
super(ObservableArray, obj).__init__(*a, **kw) super(ObsAr, obj).__init__(*a, **kw)
return obj return obj
def __array_finalize__(self, obj): def __array_finalize__(self, obj):
@ -30,6 +30,14 @@ class ObservableArray(np.ndarray, Observable):
def __array_wrap__(self, out_arr, context=None): def __array_wrap__(self, out_arr, context=None):
return out_arr.view(np.ndarray) return out_arr.view(np.ndarray)
def __reduce__(self):
func, args, state = np.ndarray.__reduce__(self)
return func, args, (state, Observable._getstate(self))
def __setstate__(self, state):
np.ndarray.__setstate__(self, state[0])
Observable._setstate(self, state[1])
def _s_not_empty(self, s): def _s_not_empty(self, s):
# this checks whether there is something picked by this slice. # this checks whether there is something picked by this slice.
return True return True
@ -46,7 +54,7 @@ class ObservableArray(np.ndarray, Observable):
def __setitem__(self, s, val): def __setitem__(self, s, val):
if self._s_not_empty(s): if self._s_not_empty(s):
super(ObservableArray, self).__setitem__(s, val) super(ObsAr, self).__setitem__(s, val)
self.notify_observers(self[s]) self.notify_observers(self[s])
def __getslice__(self, start, stop): def __getslice__(self, start, stop):
@ -56,7 +64,7 @@ class ObservableArray(np.ndarray, Observable):
return self.__setitem__(slice(start, stop), val) return self.__setitem__(slice(start, stop), val)
def __copy__(self, *args): def __copy__(self, *args):
return ObservableArray(self.view(np.ndarray).copy()) return ObsAr(self.view(np.ndarray).copy())
def copy(self, *args): def copy(self, *args):
return self.__copy__(*args) return self.__copy__(*args)

View file

@ -4,7 +4,7 @@
import itertools import itertools
import numpy import numpy
from parameter_core import OptimizationHandlable, adjust_name_for_printing from parameter_core import OptimizationHandlable, adjust_name_for_printing
from array_core import ObservableArray from array_core import ObsAr
###### printing ###### printing
__constraints_name__ = "Constraint" __constraints_name__ = "Constraint"
@ -15,7 +15,7 @@ __precision__ = numpy.get_printoptions()['precision'] # numpy printing precision
__print_threshold__ = 5 __print_threshold__ = 5
###### ######
class Param(OptimizationHandlable, ObservableArray): class Param(OptimizationHandlable, ObsAr):
""" """
Parameter object for GPy models. Parameter object for GPy models.
@ -226,7 +226,7 @@ class Param(OptimizationHandlable, ObservableArray):
# Constrainable # Constrainable
#=========================================================================== #===========================================================================
def _ensure_fixes(self): def _ensure_fixes(self):
self._fixes_ = numpy.ones(self._realsize_, dtype=bool) if not self._has_fixes(): self._fixes_ = numpy.ones(self._realsize_, dtype=bool)
#=========================================================================== #===========================================================================
# Convenience # Convenience
@ -269,6 +269,8 @@ class Param(OptimizationHandlable, ObservableArray):
@property @property
def _ties_str(self): def _ties_str(self):
return [''] return ['']
def _ties_for(self, ravi):
return [['N/A']]*ravi.size
def __repr__(self, *args, **kwargs): def __repr__(self, *args, **kwargs):
name = "\033[1m{x:s}\033[0;0m:\n".format( name = "\033[1m{x:s}\033[0;0m:\n".format(
x=self.hierarchy_name()) x=self.hierarchy_name())
@ -312,7 +314,7 @@ class Param(OptimizationHandlable, ObservableArray):
ravi = self._raveled_index(filter_) ravi = self._raveled_index(filter_)
if constr_matrix is None: constr_matrix = self.constraints.properties_for(ravi) if constr_matrix is None: constr_matrix = self.constraints.properties_for(ravi)
if prirs is None: prirs = self.priors.properties_for(ravi) if prirs is None: prirs = self.priors.properties_for(ravi)
if ties is None: ties = [['N/A']]*self.size if ties is None: ties = self._ties_for(ravi)
ties = [' '.join(map(lambda x: x, t)) for t in ties] ties = [' '.join(map(lambda x: x, t)) for t in ties]
if lc is None: lc = self._max_len_names(constr_matrix, __constraints_name__) if lc is None: lc = self._max_len_names(constr_matrix, __constraints_name__)
if lx is None: lx = self._max_len_values() if lx is None: lx = self._max_len_values()

View file

@ -16,7 +16,7 @@ Observable Pattern for patameterization
from transformations import Transformation, Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED from transformations import Transformation, Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
import numpy as np import numpy as np
__updated__ = '2014-03-14' __updated__ = '2014-03-18'
class HierarchyError(Exception): class HierarchyError(Exception):
""" """
@ -56,7 +56,7 @@ class InterfacePickleFunctions(object):
""" """
raise NotImplementedError, "To be able to use pickling you need to implement this method" raise NotImplementedError, "To be able to use pickling you need to implement this method"
class Pickleable(object): class Pickleable(InterfacePickleFunctions):
""" """
Make an object pickleable (See python doc 'pickling'). Make an object pickleable (See python doc 'pickling').
@ -95,7 +95,7 @@ class Pickleable(object):
def _has_get_set_state(self): def _has_get_set_state(self):
return '_getstate' in vars(self.__class__) and '_setstate' in vars(self.__class__) return '_getstate' in vars(self.__class__) and '_setstate' in vars(self.__class__)
class Observable(InterfacePickleFunctions): class Observable(Pickleable):
""" """
Observable pattern for parameterization. Observable pattern for parameterization.
@ -155,6 +155,7 @@ class Observable(InterfacePickleFunctions):
def _getstate(self): def _getstate(self):
return [self._observer_callables_] return [self._observer_callables_]
def _setstate(self, state): def _setstate(self, state):
self._observer_callables_ = state.pop() self._observer_callables_ = state.pop()
@ -376,7 +377,7 @@ class Constrainable(Nameable, Indexable):
# Ensure that the fixes array is set: # Ensure that the fixes array is set:
# Parameterized: ones(self.size) # Parameterized: ones(self.size)
# Param: ones(self._realsize_ # Param: ones(self._realsize_
self._fixes_ = np.ones(self.size, dtype=bool) if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool)
def _set_fixed(self, index): def _set_fixed(self, index):
self._ensure_fixes() self._ensure_fixes()
@ -397,7 +398,7 @@ class Constrainable(Nameable, Indexable):
self._fixes_ = None self._fixes_ = None
def _has_fixes(self): def _has_fixes(self):
return hasattr(self, "_fixes_") and self._fixes_ is not None return hasattr(self, "_fixes_") and self._fixes_ is not None and self._fixes_.size == self.size
#=========================================================================== #===========================================================================
# Prior Operations # Prior Operations
@ -540,12 +541,12 @@ class Constrainable(Nameable, Indexable):
print "WARNING: reconstraining parameters {}".format(self.parameter_names() or self.name) print "WARNING: reconstraining parameters {}".format(self.parameter_names() or self.name)
which.add(what, self._raveled_index()) which.add(what, self._raveled_index())
def _remove_from_index_operations(self, which, what): def _remove_from_index_operations(self, which, transforms):
""" """
Helper preventing copy code. Helper preventing copy code.
Remove given what (transform prior etc) from which param index ops. Remove given what (transform prior etc) from which param index ops.
""" """
if len(what) == 0: if len(transforms) == 0:
transforms = which.properties() transforms = which.properties()
removed = np.empty((0,), dtype=int) removed = np.empty((0,), dtype=int)
for t in transforms: for t in transforms:
@ -566,24 +567,32 @@ class OptimizationHandlable(Constrainable):
super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw) super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw)
def transform(self): def transform(self):
[np.put(self._param_array_, ind, c.finv(self._param_array_[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] [np.put(self._param_array_, ind, c.finv(self._param_array_.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
def untransform(self): def untransform(self):
[np.put(self._param_array_, ind, c.f(self._param_array_[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] [np.put(self._param_array_, ind, c.f(self._param_array_.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
def _get_params_transformed(self): def _get_params_transformed(self):
# transformed parameters (apply transformation rules) # transformed parameters (apply transformation rules)
p = self._param_array_.copy() p = self._param_array_.copy()
[np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] [np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
if self._has_fixes(): if self.has_parent() and self.constraints[__fixed__].size != 0:
fixes = np.ones(self.size).astype(bool)
fixes[self.constraints[__fixed__]] = FIXED
return p[fixes]
elif self._has_fixes():
return p[self._fixes_] return p[self._fixes_]
return p return p
def _set_params_transformed(self, p): def _set_params_transformed(self, p):
if p is self._param_array_: if p is self._param_array_:
p = p.copy() p = p.copy()
if self._has_fixes(): self._param_array_[self._fixes_] = p if self.has_parent() and self.constraints[__fixed__].size != 0:
else: self._param_array_[:] = p fixes = np.ones(self.size).astype(bool)
fixes[self.constraints[__fixed__]] = FIXED
self._param_array_.flat[fixes] = p
elif self._has_fixes(): self._param_array_.flat[self._fixes_] = p
else: self._param_array_.flat = p
self.untransform() self.untransform()
self._trigger_params_changed() self._trigger_params_changed()
@ -661,8 +670,8 @@ class OptimizationHandlable(Constrainable):
for pi in self._parameters_: for pi in self._parameters_:
pislice = slice(pi_old_size, pi_old_size+pi.size) pislice = slice(pi_old_size, pi_old_size+pi.size)
self._param_array_[pislice] = pi._param_array_.ravel()#, requirements=['C', 'W']).flat self._param_array_[pislice] = pi._param_array_.flat#, requirements=['C', 'W']).flat
self._gradient_array_[pislice] = pi._gradient_array_.ravel()#, requirements=['C', 'W']).flat self._gradient_array_[pislice] = pi._gradient_array_.flat#, requirements=['C', 'W']).flat
pi._param_array_.data = parray[pislice].data pi._param_array_.data = parray[pislice].data
pi._gradient_array_.data = garray[pislice].data pi._gradient_array_.data = garray[pislice].data
@ -769,11 +778,11 @@ class Parameterizable(OptimizationHandlable):
Add all parameters to this param class, you can insert parameters Add all parameters to this param class, you can insert parameters
at any given index using the :func:`list.insert` syntax at any given index using the :func:`list.insert` syntax
""" """
# if param.has_parent():
# raise AttributeError, "parameter {} already in another model, create new object (or copy) for adding".format(param._short())
if param in self._parameters_ and index is not None: if param in self._parameters_ and index is not None:
self.remove_parameter(param) self.remove_parameter(param)
self.add_parameter(param, index) self.add_parameter(param, index)
#elif param.has_parent():
# raise HierarchyError, "parameter {} already in another model ({}), create new object (or copy) for adding".format(param._short(), param._highest_parent_._short())
elif param not in self._parameters_: elif param not in self._parameters_:
if param.has_parent(): if param.has_parent():
parent = param._parent_ parent = param._parent_
@ -797,13 +806,19 @@ class Parameterizable(OptimizationHandlable):
param.add_observer(self, self._pass_through_notify_observers, -np.inf) param.add_observer(self, self._pass_through_notify_observers, -np.inf)
self.size += param.size parent = self
while parent is not None:
parent.size += param.size
parent = parent._parent_
self._connect_parameters()
self._highest_parent_._connect_parameters(ignore_added_names=_ignore_added_names)
self._highest_parent_._notify_parent_change()
self._highest_parent_._connect_fixes()
self._connect_parameters(ignore_added_names=_ignore_added_names)
self._notify_parent_change()
self._connect_fixes()
else: else:
raise RuntimeError, """Parameter exists already added and no copy made""" raise HierarchyError, """Parameter exists already and no copy made"""
def add_parameters(self, *parameters): def add_parameters(self, *parameters):
@ -829,17 +844,18 @@ class Parameterizable(OptimizationHandlable):
param.remove_observer(self, self._pass_through_notify_observers) param.remove_observer(self, self._pass_through_notify_observers)
self.constraints.shift_left(start, param.size) self.constraints.shift_left(start, param.size)
self._connect_fixes()
self._connect_parameters() self._connect_parameters()
self._notify_parent_change() self._notify_parent_change()
parent = self._parent_ parent = self._parent_
while parent is not None: while parent is not None:
parent._connect_fixes() parent.size -= param.size
parent._connect_parameters()
parent._notify_parent_change()
parent = parent._parent_ parent = parent._parent_
self._highest_parent_._connect_parameters()
self._highest_parent_._connect_fixes()
self._highest_parent_._notify_parent_change()
def _connect_parameters(self, ignore_added_names=False): def _connect_parameters(self, ignore_added_names=False):
# connect parameterlist to this parameterized object # connect parameterlist to this parameterized object
# This just sets up the right connection for the params objects # This just sets up the right connection for the params objects
@ -862,8 +878,8 @@ class Parameterizable(OptimizationHandlable):
# first connect all children # first connect all children
p._propagate_param_grad(self._param_array_[pslice], self._gradient_array_[pslice]) p._propagate_param_grad(self._param_array_[pslice], self._gradient_array_[pslice])
# then connect children to self # then connect children to self
self._param_array_[pslice] = p._param_array_.ravel()#, requirements=['C', 'W']).ravel(order='C') self._param_array_[pslice] = p._param_array_.flat#, requirements=['C', 'W']).ravel(order='C')
self._gradient_array_[pslice] = p._gradient_array_.ravel()#, requirements=['C', 'W']).ravel(order='C') self._gradient_array_[pslice] = p._gradient_array_.flat#, requirements=['C', 'W']).ravel(order='C')
if not p._param_array_.flags['C_CONTIGUOUS']: if not p._param_array_.flags['C_CONTIGUOUS']:
import ipdb;ipdb.set_trace() import ipdb;ipdb.set_trace()

View file

@ -7,10 +7,10 @@ from domains import _POSITIVE,_NEGATIVE, _BOUNDED
import weakref import weakref
import sys import sys
#_lim_val = -np.log(sys.float_info.epsilon)
_exp_lim_val = np.finfo(np.float64).max _exp_lim_val = np.finfo(np.float64).max
_lim_val = np.log(_exp_lim_val) _lim_val = 36.0
epsilon = np.finfo(np.float64).resolution
#=============================================================================== #===============================================================================
# Fixing constants # Fixing constants
@ -54,12 +54,12 @@ class Transformation(object):
class Logexp(Transformation): class Logexp(Transformation):
domain = _POSITIVE domain = _POSITIVE
def f(self, x): def f(self, x):
return np.where(x>_lim_val, x, np.log(1. + np.exp(np.clip(x, -_lim_val, _lim_val)))) return np.where(x>_lim_val, x, np.log(1. + np.exp(np.clip(x, -_lim_val, _lim_val)))) + epsilon
#raises overflow warning: return np.where(x>_lim_val, x, np.log(1. + np.exp(x))) #raises overflow warning: return np.where(x>_lim_val, x, np.log(1. + np.exp(x)))
def finv(self, f): def finv(self, f):
return np.where(f>_lim_val, f, np.log(np.exp(f+1e-20) - 1.)) return np.where(f>_lim_val, f, np.log(np.exp(f+1e-20) - 1.))
def gradfactor(self, f): def gradfactor(self, f):
return np.where(f>_lim_val, 1., 1 - np.exp(-f)) return np.where(f>_lim_val, 1., 1. - np.exp(-f))
def initialize(self, f): def initialize(self, f):
if np.any(f < 0.): if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints" print "Warning: changing parameters to satisfy constraints"

View file

@ -64,8 +64,8 @@ class SparseGP(GP):
self.kern.gradient += target self.kern.gradient += target
#gradients wrt Z #gradients wrt Z
self.Z.gradient[:,self.kern.active_dims] = self.kern.gradients_X(dL_dKmm, self.Z) self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
self.Z.gradient[:,self.kern.active_dims] += self.kern.gradients_Z_expectations( self.Z.gradient += self.kern.gradients_Z_expectations(
self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X) self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X)
else: else:
#gradients wrt kernel #gradients wrt kernel
@ -76,8 +76,8 @@ class SparseGP(GP):
self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z, None) self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z, None)
self.kern.gradient += target self.kern.gradient += target
#gradients wrt Z #gradients wrt Z
self.Z.gradient[:,self.kern.active_dims] = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
self.Z.gradient[:,self.kern.active_dims] += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X) self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
def _raw_predict(self, Xnew, full_cov=False): def _raw_predict(self, Xnew, full_cov=False):
""" """
@ -88,8 +88,9 @@ class SparseGP(GP):
mu = np.dot(Kx.T, self.posterior.woodbury_vector) mu = np.dot(Kx.T, self.posterior.woodbury_vector)
if full_cov: if full_cov:
Kxx = self.kern.K(Xnew) Kxx = self.kern.K(Xnew)
#var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx) var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
var = Kxx - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T, Kx, [1,0]).swapaxes(1,2) #var = Kxx[:,:,None] - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T, Kx, [1,0]).swapaxes(1,2)
var = var.squeeze()
else: else:
Kxx = self.kern.Kdiag(Xnew) Kxx = self.kern.Kdiag(Xnew)
var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T

View file

@ -23,13 +23,10 @@ K = Bias.prod(Coreg,name='X')
#K.coregion.W = 0 #K.coregion.W = 0
#print K.coregion.W #print K.coregion.W
#print Bias.K(_X,_X) #print Bias.K(_X,_X)
#print K.K(X,X) #print K.K(X,X)
#pb.matshow(K.K(X,X)) #pb.matshow(K.K(X,X))
Mlist = [GPy.kern.Matern32(1,lengthscale=20.,name="Mat")] Mlist = [GPy.kern.Matern32(1,lengthscale=20.,name="Mat")]
kern = GPy.util.multioutput.LCM(input_dim=1,num_outputs=2,kernels_list=Mlist,name='H') kern = GPy.util.multioutput.LCM(input_dim=1,num_outputs=2,kernels_list=Mlist,name='H')
kern.B.W = 0 kern.B.W = 0
@ -37,16 +34,22 @@ kern.B.kappa = 1.
#kern.B.W.fix() #kern.B.W.fix()
#kern.B.kappa.fix() #kern.B.kappa.fix()
#m = GPy.models.GPCoregionalizedRegression(X_list=[X1,X2], Y_list=[Y1,Y2], kernel=kern) #m = GPy.models.GPCoregionalizedRegression(X_list=[X1,X2], Y_list=[Y1,Y2], kernel=kern)
m = GPy.models.SparseGPCoregionalizedRegression(X_list=[X1], Y_list=[Y1], kernel=kern)
Z1 = np.array([1.5,2.5])[:,None]
m = GPy.models.SparseGPCoregionalizedRegression(X_list=[X1], Y_list=[Y1], Z_list = [Z1], kernel=kern)
#m.optimize() #m.optimize()
m.checkgrad(verbose=1) m.checkgrad(verbose=1)
"""
fig = pb.figure() fig = pb.figure()
ax0 = fig.add_subplot(211) ax0 = fig.add_subplot(211)
ax1 = fig.add_subplot(212) ax1 = fig.add_subplot(212)
slices = GPy.util.multioutput.get_slices([Y1,Y2]) slices = GPy.util.multioutput.get_slices([Y1,Y2])
m.plot(fixed_inputs=[(1,0)],which_data_rows=slices[0],ax=ax0) m.plot(fixed_inputs=[(1,0)],which_data_rows=slices[0],ax=ax0)
#m.plot(fixed_inputs=[(1,1)],which_data_rows=slices[1],ax=ax1) #m.plot(fixed_inputs=[(1,1)],which_data_rows=slices[1],ax=ax1)
"""

View file

@ -160,6 +160,7 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=15, Q=4
def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k): def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k):
import GPy import GPy
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
from ..util.misc import param_to_array
_np.random.seed(0) _np.random.seed(0)
data = GPy.util.datasets.oil() data = GPy.util.datasets.oil()
@ -173,11 +174,11 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
m.optimize('scg', messages=verbose, max_iters=max_iters, gtol=.05) m.optimize('scg', messages=verbose, max_iters=max_iters, gtol=.05)
if plot: if plot:
y = m.Y[0, :] y = m.Y
fig, (latent_axes, sense_axes) = plt.subplots(1, 2) fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
m.plot_latent(ax=latent_axes) m.plot_latent(ax=latent_axes)
data_show = GPy.plotting.matplot_dep.visualize.vector_show(y) data_show = GPy.plotting.matplot_dep.visualize.vector_show(y)
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X[0, :], # @UnusedVariable lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(param_to_array(m.X.mean), # @UnusedVariable
m, data_show, latent_axes=latent_axes, sense_axes=sense_axes) m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
raw_input('Press enter to finish') raw_input('Press enter to finish')
plt.close(fig) plt.close(fig)

View file

@ -158,7 +158,7 @@ def boston_example(optimize=True, plot=True):
#Gaussian GP #Gaussian GP
print "Gauss GP" print "Gauss GP"
mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp.copy()) mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp.copy())
mgp.constrain_fixed('white', 1e-5) mgp.constrain_fixed('.*white', 1e-5)
mgp['rbf_len'] = rbf_len mgp['rbf_len'] = rbf_len
mgp['noise'] = noise mgp['noise'] = noise
print mgp print mgp
@ -176,7 +176,7 @@ def boston_example(optimize=True, plot=True):
g_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), g_distribution) g_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), g_distribution)
mg = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=g_likelihood) mg = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=g_likelihood)
mg.constrain_positive('noise_variance') mg.constrain_positive('noise_variance')
mg.constrain_fixed('white', 1e-5) mg.constrain_fixed('.*white', 1e-5)
mg['rbf_len'] = rbf_len mg['rbf_len'] = rbf_len
mg['noise'] = noise mg['noise'] = noise
print mg print mg
@ -194,10 +194,10 @@ def boston_example(optimize=True, plot=True):
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=df, sigma2=noise) t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=df, sigma2=noise)
stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution) stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood) mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
mstu_t.constrain_fixed('white', 1e-5) mstu_t.constrain_fixed('.*white', 1e-5)
mstu_t.constrain_bounded('t_noise', 0.0001, 1000) mstu_t.constrain_bounded('.*t_noise', 0.0001, 1000)
mstu_t['rbf_len'] = rbf_len mstu_t['rbf_len'] = rbf_len
mstu_t['t_noise'] = noise mstu_t['.*t_noise'] = noise
print mstu_t print mstu_t
if optimize: if optimize:
mstu_t.optimize(optimizer=optimizer, messages=messages) mstu_t.optimize(optimizer=optimizer, messages=messages)

View file

@ -25,80 +25,51 @@ def olympic_marathon_men(optimize=True, plot=True):
return m return m
def coregionalization_toy2(optimize=True, plot=True): def coregionalization_toy(optimize=True, plot=True):
""" """
A simple demonstration of coregionalization on two sinusoidal functions. A simple demonstration of coregionalization on two sinusoidal functions.
""" """
#build a design matrix with a column of integers indicating the output #build a design matrix with a column of integers indicating the output
X1 = np.random.rand(50, 1) * 8 X1 = np.random.rand(50, 1) * 8
X2 = np.random.rand(30, 1) * 5 X2 = np.random.rand(30, 1) * 5
index = np.vstack((np.zeros_like(X1), np.ones_like(X2)))
X = np.hstack((np.vstack((X1, X2)), index))
#build a suitable set of observed variables #build a suitable set of observed variables
Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05 Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05
Y2 = np.sin(X2) + np.random.randn(*X2.shape) * 0.05 + 2. Y2 = np.sin(X2) + np.random.randn(*X2.shape) * 0.05 + 2.
Y = np.vstack((Y1, Y2))
#build the kernel m = GPy.models.GPCoregionalizedRegression(X_list=[X1,X2], Y_list=[Y1,Y2])
k1 = GPy.kern.RBF(1) + GPy.kern.Bias(1)
k2 = GPy.kern.Coregionalize(2,1)
k = k1**k2
m = GPy.models.GPRegression(X, Y, kernel=k)
if optimize: if optimize:
m.optimize('bfgs', max_iters=100) m.optimize('bfgs', max_iters=100)
if plot: if plot:
m.plot(fixed_inputs=[(1,0)]) slices = GPy.util.multioutput.get_slices([X1,X2])
m.plot(fixed_inputs=[(1,1)], ax=pb.gca()) m.plot(fixed_inputs=[(1,0)],which_data_rows=slices[0],Y_metadata={'output_index':0})
m.plot(fixed_inputs=[(1,1)],which_data_rows=slices[1],Y_metadata={'output_index':1},ax=pb.gca())
return m return m
#FIXME: Needs recovering once likelihoods are consolidated
#def coregionalization_toy(optimize=True, plot=True):
# """
# A simple demonstration of coregionalization on two sinusoidal functions.
# """
# X1 = np.random.rand(50, 1) * 8
# X2 = np.random.rand(30, 1) * 5
# X = np.vstack((X1, X2))
# Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05
# Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05
# Y = np.vstack((Y1, Y2))
#
# k1 = GPy.kern.RBF(1)
# m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1])
# m.constrain_fixed('.*rbf_var', 1.)
# m.optimize(max_iters=100)
#
# fig, axes = pb.subplots(2,1)
# m.plot(fixed_inputs=[(1,0)],ax=axes[0])
# m.plot(fixed_inputs=[(1,1)],ax=axes[1])
# axes[0].set_title('Output 0')
# axes[1].set_title('Output 1')
# return m
def coregionalization_sparse(optimize=True, plot=True): def coregionalization_sparse(optimize=True, plot=True):
""" """
A simple demonstration of coregionalization on two sinusoidal functions using sparse approximations. A simple demonstration of coregionalization on two sinusoidal functions using sparse approximations.
""" """
#fetch the data from the non sparse examples #build a design matrix with a column of integers indicating the output
m = coregionalization_toy2(optimize=False, plot=False) X1 = np.random.rand(50, 1) * 8
X, Y = m.X, m.Y X2 = np.random.rand(30, 1) * 5
k = GPy.kern.RBF(1)**GPy.kern.Coregionalize(2) #build a suitable set of observed variables
Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05
Y2 = np.sin(X2) + np.random.randn(*X2.shape) * 0.05 + 2.
#construct a model m = GPy.models.SparseGPCoregionalizedRegression(X_list=[X1,X2], Y_list=[Y1,Y2])
m = GPy.models.SparseGPRegression(X,Y, num_inducing=25, kernel=k)
m.Z[:,1].fix() # don't optimize the inducing input indexes
if optimize: if optimize:
m.optimize('bfgs', max_iters=100, messages=1) m.optimize('bfgs', max_iters=100)
if plot: if plot:
m.plot(fixed_inputs=[(1,0)]) slices = GPy.util.multioutput.get_slices([X1,X2])
m.plot(fixed_inputs=[(1,1)], ax=pb.gca()) m.plot(fixed_inputs=[(1,0)],which_data_rows=slices[0],Y_metadata={'output_index':0})
m.plot(fixed_inputs=[(1,1)],which_data_rows=slices[1],Y_metadata={'output_index':1},ax=pb.gca())
pb.ylim(-3,)
return m return m

View file

@ -19,19 +19,15 @@ class DTC(object):
def __init__(self): def __init__(self):
self.const_jitter = 1e-6 self.const_jitter = 1e-6
def inference(self, kern, X, Z, likelihood, Y): def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
assert X_variance is None, "cannot use X_variance with DTC. Try varDTC." assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
#TODO: MAX! fix this!
from ...util.misc import param_to_array
Y = param_to_array(Y)
num_inducing, _ = Z.shape num_inducing, _ = Z.shape
num_data, output_dim = Y.shape num_data, output_dim = Y.shape
#make sure the noise is not hetero #make sure the noise is not hetero
beta = 1./np.squeeze(likelihood.variance) beta = 1./likelihood.gaussian_variance(Y_metadata)
if beta.size <1: if beta.size > 1:
raise NotImplementedError, "no hetero noise with this implementation of DTC" raise NotImplementedError, "no hetero noise with this implementation of DTC"
Kmm = kern.K(Z) Kmm = kern.K(Z)
@ -91,19 +87,15 @@ class vDTC(object):
def __init__(self): def __init__(self):
self.const_jitter = 1e-6 self.const_jitter = 1e-6
def inference(self, kern, X, X_variance, Z, likelihood, Y): def inference(self, kern, X, X_variance, Z, likelihood, Y, Y_metadata):
assert X_variance is None, "cannot use X_variance with DTC. Try varDTC." assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
#TODO: MAX! fix this!
from ...util.misc import param_to_array
Y = param_to_array(Y)
num_inducing, _ = Z.shape num_inducing, _ = Z.shape
num_data, output_dim = Y.shape num_data, output_dim = Y.shape
#make sure the noise is not hetero #make sure the noise is not hetero
beta = 1./np.squeeze(likelihood.variance) beta = 1./likelihood.gaussian_variance(Y_metadata)
if beta.size <1: if beta.size > 1:
raise NotImplementedError, "no hetero noise with this implementation of DTC" raise NotImplementedError, "no hetero noise with this implementation of DTC"
Kmm = kern.K(Z) Kmm = kern.K(Z)

View file

@ -3,6 +3,7 @@
from posterior import Posterior from posterior import Posterior
from ...util.linalg import pdinv, dpotrs, tdot from ...util.linalg import pdinv, dpotrs, tdot
from ...util import diag
import numpy as np import numpy as np
log_2_pi = np.log(2*np.pi) log_2_pi = np.log(2*np.pi)
@ -41,7 +42,9 @@ class ExactGaussianInference(object):
K = kern.K(X) K = kern.K(X)
Wi, LW, LWi, W_logdet = pdinv(K + likelihood.covariance_matrix(Y, Y_metadata)) Ky = K.copy()
diag.add(Ky, likelihood.gaussian_variance(Y_metadata))
Wi, LW, LWi, W_logdet = pdinv(Ky)
alpha, _ = dpotrs(LW, YYT_factor, lower=1) alpha, _ = dpotrs(LW, YYT_factor, lower=1)

View file

@ -11,9 +11,9 @@ class EP(object):
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float) :param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
:type epsilon: float :type epsilon: float
:param eta: Power EP thing TODO: Ricardo: what, exactly? :param eta: parameter for fractional EP updates.
:type eta: float64 :type eta: float64
:param delta: Power EP thing TODO: Ricardo: what, exactly? :param delta: damping EP updates factor.
:type delta: float64 :type delta: float64
""" """
self.epsilon, self.eta, self.delta = epsilon, eta, delta self.epsilon, self.eta, self.delta = epsilon, eta, delta

View file

@ -17,14 +17,14 @@ class FITC(object):
""" """
const_jitter = 1e-6 const_jitter = 1e-6
def inference(self, kern, X, Z, likelihood, Y): def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
num_inducing, _ = Z.shape num_inducing, _ = Z.shape
num_data, output_dim = Y.shape num_data, output_dim = Y.shape
#make sure the noise is not hetero #make sure the noise is not hetero
sigma_n = np.squeeze(likelihood.variance) sigma_n = likelihood.gaussian_variance(Y_metadata)
if sigma_n.size <1: if sigma_n.size >1:
raise NotImplementedError, "no hetero noise with this implementation of FITC" raise NotImplementedError, "no hetero noise with this implementation of FITC"
Kmm = kern.K(Z) Kmm = kern.K(Z)

View file

@ -51,12 +51,11 @@ class Laplace(object):
Ki_f_init = self._previous_Ki_fhat Ki_f_init = self._previous_Ki_fhat
f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata) f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
self.f_hat = f_hat self.f_hat = f_hat
self.Ki_fhat = Ki_fhat self.Ki_fhat = Ki_fhat
self.K = K.copy() self.K = K.copy()
#Compute hessian and other variables at mode #Compute hessian and other variables at mode
log_marginal, woodbury_vector, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata) log_marginal, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata)
self._previous_Ki_fhat = Ki_fhat.copy() self._previous_Ki_fhat = Ki_fhat.copy()
return Posterior(woodbury_vector=Ki_fhat, woodbury_inv=woodbury_inv, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL} return Posterior(woodbury_vector=Ki_fhat, woodbury_inv=woodbury_inv, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}
@ -86,13 +85,13 @@ class Laplace(object):
#define the objective function (to be maximised) #define the objective function (to be maximised)
def obj(Ki_f, f): def obj(Ki_f, f):
return -0.5*np.dot(Ki_f.flatten(), f.flatten()) + likelihood.logpdf(f, Y, extra_data=Y_metadata) return -0.5*np.dot(Ki_f.flatten(), f.flatten()) + likelihood.logpdf(f, Y, Y_metadata=Y_metadata)
difference = np.inf difference = np.inf
iteration = 0 iteration = 0
while difference > self._mode_finding_tolerance and iteration < self._mode_finding_max_iter: while difference > self._mode_finding_tolerance and iteration < self._mode_finding_max_iter:
W = -likelihood.d2logpdf_df2(f, Y, extra_data=Y_metadata) W = -likelihood.d2logpdf_df2(f, Y, Y_metadata=Y_metadata)
grad = likelihood.dlogpdf_df(f, Y, extra_data=Y_metadata) grad = likelihood.dlogpdf_df(f, Y, Y_metadata=Y_metadata)
W_f = W*f W_f = W*f
@ -136,13 +135,12 @@ class Laplace(object):
At the mode, compute the hessian and effective covariance matrix. At the mode, compute the hessian and effective covariance matrix.
returns: logZ : approximation to the marginal likelihood returns: logZ : approximation to the marginal likelihood
woodbury_vector : variable required for calculating the approximation to the covariance matrix
woodbury_inv : variable required for calculating the approximation to the covariance matrix woodbury_inv : variable required for calculating the approximation to the covariance matrix
dL_dthetaL : array of derivatives (1 x num_kernel_params) dL_dthetaL : array of derivatives (1 x num_kernel_params)
dL_dthetaL : array of derivatives (1 x num_likelihood_params) dL_dthetaL : array of derivatives (1 x num_likelihood_params)
""" """
#At this point get the hessian matrix (or vector as W is diagonal) #At this point get the hessian matrix (or vector as W is diagonal)
W = -likelihood.d2logpdf_df2(f_hat, Y, extra_data=Y_metadata) W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata)
K_Wi_i, L, LiW12 = self._compute_B_statistics(K, W, likelihood.log_concave) K_Wi_i, L, LiW12 = self._compute_B_statistics(K, W, likelihood.log_concave)
@ -151,11 +149,10 @@ class Laplace(object):
Ki_W_i = K - C.T.dot(C) #Could this be wrong? Ki_W_i = K - C.T.dot(C) #Could this be wrong?
#compute the log marginal #compute the log marginal
log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, extra_data=Y_metadata) - np.sum(np.log(np.diag(L))) log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata) - np.sum(np.log(np.diag(L)))
#Compute vival matrices for derivatives #Compute vival matrices for derivatives
dW_df = -likelihood.d3logpdf_df3(f_hat, Y, extra_data=Y_metadata) # -d3lik_d3fhat dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat
woodbury_vector = likelihood.dlogpdf_df(f_hat, Y, extra_data=Y_metadata)
dL_dfhat = -0.5*(np.diag(Ki_W_i)[:, None]*dW_df) #why isn't this -0.5? s2 in R&W p126 line 9. dL_dfhat = -0.5*(np.diag(Ki_W_i)[:, None]*dW_df) #why isn't this -0.5? s2 in R&W p126 line 9.
#BiK, _ = dpotrs(L, K, lower=1) #BiK, _ = dpotrs(L, K, lower=1)
#dL_dfhat = 0.5*np.diag(BiK)[:, None]*dW_df #dL_dfhat = 0.5*np.diag(BiK)[:, None]*dW_df
@ -169,7 +166,7 @@ class Laplace(object):
explicit_part = 0.5*(np.dot(Ki_f, Ki_f.T) - K_Wi_i) explicit_part = 0.5*(np.dot(Ki_f, Ki_f.T) - K_Wi_i)
#Implicit #Implicit
implicit_part = np.dot(woodbury_vector, dL_dfhat.T).dot(I_KW_i) implicit_part = np.dot(Ki_f, dL_dfhat.T).dot(I_KW_i)
dL_dK = explicit_part + implicit_part dL_dK = explicit_part + implicit_part
else: else:
@ -179,7 +176,7 @@ class Laplace(object):
#compute dL_dthetaL# #compute dL_dthetaL#
#################### ####################
if likelihood.size > 0 and not likelihood.is_fixed: if likelihood.size > 0 and not likelihood.is_fixed:
dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = likelihood._laplace_gradients(f_hat, Y, extra_data=Y_metadata) dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = likelihood._laplace_gradients(f_hat, Y, Y_metadata=Y_metadata)
num_params = likelihood.size num_params = likelihood.size
# make space for one derivative for each likelihood parameter # make space for one derivative for each likelihood parameter
@ -200,7 +197,7 @@ class Laplace(object):
else: else:
dL_dthetaL = np.zeros(likelihood.size) dL_dthetaL = np.zeros(likelihood.size)
return log_marginal, woodbury_vector, K_Wi_i, dL_dK, dL_dthetaL return log_marginal, K_Wi_i, dL_dK, dL_dthetaL
def _compute_B_statistics(self, K, W, log_concave): def _compute_B_statistics(self, K, W, log_concave):
""" """

View file

@ -73,20 +73,37 @@ class Posterior(object):
@property @property
def mean(self): def mean(self):
"""
Posterior mean
$$
K_{xx}v
v := \texttt{Woodbury vector}
$$
"""
if self._mean is None: if self._mean is None:
self._mean = np.dot(self._K, self.woodbury_vector) self._mean = np.dot(self._K, self.woodbury_vector)
return self._mean return self._mean
@property @property
def covariance(self): def covariance(self):
"""
Posterior covariance
$$
K_{xx} - K_{xx}W_{xx}^{-1}K_{xx}
W_{xx} := \texttt{Woodbury inv}
$$
"""
if self._covariance is None: if self._covariance is None:
#LiK, _ = dtrtrs(self.woodbury_chol, self._K, lower=1) #LiK, _ = dtrtrs(self.woodbury_chol, self._K, lower=1)
self._covariance = self._K - np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T self._covariance = self._K - (np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T).squeeze()
#self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K) #self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K)
return self._covariance.squeeze() return self._covariance
@property @property
def precision(self): def precision(self):
"""
Inverse of posterior covariance
"""
if self._precision is None: if self._precision is None:
cov = np.atleast_3d(self.covariance) cov = np.atleast_3d(self.covariance)
self._precision = np.zeros(cov.shape) # if one covariance per dimension self._precision = np.zeros(cov.shape) # if one covariance per dimension
@ -96,6 +113,13 @@ class Posterior(object):
@property @property
def woodbury_chol(self): def woodbury_chol(self):
"""
return $L_{W}$ where L is the lower triangular Cholesky decomposition of the Woodbury matrix
$$
L_{W}L_{W}^{\top} = W^{-1}
W^{-1} := \texttt{Woodbury inv}
$$
"""
if self._woodbury_chol is None: if self._woodbury_chol is None:
#compute woodbury chol from #compute woodbury chol from
if self._woodbury_inv is not None: if self._woodbury_inv is not None:
@ -121,6 +145,13 @@ class Posterior(object):
@property @property
def woodbury_inv(self): def woodbury_inv(self):
"""
The inverse of the woodbury matrix, in the gaussian likelihood case it is defined as
$$
(K_{xx} + \Sigma_{xx})^{-1}
\Sigma_{xx} := \texttt{Likelihood.variance / Approximate likelihood covariance}
$$
"""
if self._woodbury_inv is None: if self._woodbury_inv is None:
self._woodbury_inv, _ = dpotri(self.woodbury_chol, lower=1) self._woodbury_inv, _ = dpotri(self.woodbury_chol, lower=1)
#self._woodbury_inv, _ = dpotrs(self.woodbury_chol, np.eye(self.woodbury_chol.shape[0]), lower=1) #self._woodbury_inv, _ = dpotrs(self.woodbury_chol, np.eye(self.woodbury_chol.shape[0]), lower=1)
@ -129,17 +160,22 @@ class Posterior(object):
@property @property
def woodbury_vector(self): def woodbury_vector(self):
"""
Woodbury vector in the gaussian likelihood case only is defined as
$$
(K_{xx} + \Sigma)^{-1}Y
\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
$$
"""
if self._woodbury_vector is None: if self._woodbury_vector is None:
self._woodbury_vector, _ = dpotrs(self.K_chol, self.mean) self._woodbury_vector, _ = dpotrs(self.K_chol, self.mean)
return self._woodbury_vector return self._woodbury_vector
@property @property
def K_chol(self): def K_chol(self):
"""
Cholesky of the prior covariance K
"""
if self._K_chol is None: if self._K_chol is None:
self._K_chol = jitchol(self._K) self._K_chol = jitchol(self._K)
return self._K_chol return self._K_chol

View file

@ -176,7 +176,6 @@ class VarDTC(object):
#construct a posterior object #construct a posterior object
post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm) post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
return post, log_marginal, grad_dict return post, log_marginal, grad_dict
class VarDTCMissingData(object): class VarDTCMissingData(object):
@ -365,7 +364,7 @@ class VarDTCMissingData(object):
return post, log_marginal, grad_dict return post, log_marginal, grad_dict
def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, psi1, het_noise, uncertain_inputs): def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, psi1, het_noise, uncertain_inputs):
dL_dpsi0 = -0.5 * output_dim * (beta * np.ones([num_data, 1])).flatten() dL_dpsi0 = -0.5 * output_dim * (beta[:,None] * np.ones([num_data, 1])).flatten()
dL_dpsi1 = np.dot(VVT_factor, Cpsi1Vf.T) dL_dpsi1 = np.dot(VVT_factor, Cpsi1Vf.T)
dL_dpsi2_beta = 0.5 * backsub_both_sides(Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi) dL_dpsi2_beta = 0.5 * backsub_both_sides(Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi)
if het_noise: if het_noise:

View file

@ -0,0 +1,63 @@
import numpy as np
from scipy.stats import norm
import matplotlib.pyplot as plt
####### Preliminar BO with standad acquisition functions ###############################
# Types of BO
# MM: Maximum (or minimum) mean
# MPI: Maximum posterior improvement
# MUI: Maximum upper interval
def BOacquisition(X,Y,model,type_bo="MPI",type_objective="max",par_mpi = 0,z_mui=1.96,plot=True,n_eval = 500):
# Only works in dimension 1
# Grid where the GP will be evaluated
X_star = np.linspace(min(X)-10,max(X)+10,n_eval)
X_star = X_star[:,None]
# Posterior GP evaluated on the grid
fest = model.predict(X_star)
# Calculate the acquisition function
## IF Maximize
if type_objective == "max":
if type_bo == "MPI": # add others here
acqu = norm.cdf((fest[0]-(1+par_mpi)*max(fest[0])) / fest[1])
acqu = acqu/(2*max(acqu))
if type_bo == "MM":
acqu = fest[0]/max(fest[0])
acqu = acqu/(2*max(acqu))
if type_bo == "MUI":
acqu = fest[0]+z_mui*np.sqrt(fest[1])
acqu = acqu/(2*max(acqu))
optimal_loc = np.argmax(acqu)
x_new = X_star[optimal_loc]
## IF Minimize
if type_objective == "min":
if type_bo == "MPI": # add others here
acqu = 1-norm.cdf((fest[0]-(1+par_mpi)*min(fest[0])) / fest[1])
acqu = acqu/(2*max(acqu))
if type_bo == "MM":
acqu = 1-fest[0]/max(fest[0])
acqu = acqu/(2*max(acqu))
if type_bo == "MUI":
acqu = -fest[0]+z_mui*np.sqrt(fest[1])
acqu = acqu/(2*max(acqu))
optimal_loc = np.argmax(acqu)
x_new = X_star[optimal_loc]
# Plot GP posterior, collected data and the acquisition function
if plot:
plt.plot(X,Y , 'p')
plt.title('Acquisition function')
model.plot()
plt.plot(X_star, acqu, 'r--')
# Return the point where we shoould take the new sample
return x_new
###############################################################

View file

@ -9,4 +9,6 @@ from _src.mlp import MLP
from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52 from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52
from _src.independent_outputs import IndependentOutputs, Hierarchical from _src.independent_outputs import IndependentOutputs, Hierarchical
from _src.coregionalize import Coregionalize from _src.coregionalize import Coregionalize
from _src.ssrbf import SSRBF from _src.ssrbf import SSRBF # TODO: ZD: did you remove this?
from _src.ODE_UY import ODE_UY

282
GPy/kern/_src/ODE_UY.py Normal file
View file

@ -0,0 +1,282 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
import numpy as np
from independent_outputs import index_to_slices
class ODE_UY(Kern):
def __init__(self, input_dim, variance_U=3., variance_Y=1., lengthscale_U=1., lengthscale_Y=1., active_dims=None, name='ode_uy'):
assert input_dim ==2, "only defined for 2 input dims"
super(ODE_UY, self).__init__(input_dim, active_dims, name)
self.variance_Y = Param('variance_Y', variance_Y, Logexp())
self.variance_U = Param('variance_U', variance_Y, Logexp())
self.lengthscale_Y = Param('lengthscale_Y', lengthscale_Y, Logexp())
self.lengthscale_U = Param('lengthscale_U', lengthscale_Y, Logexp())
self.add_parameters(self.variance_Y, self.variance_U, self.lengthscale_Y, self.lengthscale_U)
def K(self, X, X2=None):
# model : a * dy/dt + b * y = U
#lu=sqrt(3)/theta1 ly=1/theta2 theta2= a/b :thetay sigma2=1/(2ab) :sigmay
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
K = np.zeros((X.shape[0], X.shape[0]))
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
K = np.zeros((X.shape[0], X2.shape[0]))
#rdist = X[:,0][:,None] - X2[:,0][:,None].T
rdist = X - X2.T
ly=1/self.lengthscale_Y
lu=np.sqrt(3)/self.lengthscale_U
#iu=self.input_lengthU #dimention of U
Vu=self.variance_U
Vy=self.variance_Y
#Vy=ly/2
#stop
# kernel for kuu matern3/2
kuu = lambda dist:Vu * (1 + lu* np.abs(dist)) * np.exp(-lu * np.abs(dist))
# kernel for kyy
k1 = lambda dist:np.exp(-ly*np.abs(dist))*(2*lu+ly)/(lu+ly)**2
k2 = lambda dist:(np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
k3 = lambda dist:np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
kyy = lambda dist:Vu*Vy*(k1(dist) + k2(dist) + k3(dist))
# cross covariance function
kyu3 = lambda dist:np.exp(-lu*dist)/(lu+ly)*(1+lu*(dist+1/(lu+ly)))
#kyu3 = lambda dist: 0
k1cros = lambda dist:np.exp(ly*dist)/(lu-ly) * ( 1- np.exp( (lu-ly)*dist) + lu* ( dist*np.exp( (lu-ly)*dist ) + (1- np.exp( (lu-ly)*dist ) ) /(lu-ly) ) )
#k1cros = lambda dist:0
k2cros = lambda dist:np.exp(ly*dist)*( 1/(lu+ly) + lu/(lu+ly)**2 )
#k2cros = lambda dist:0
Vyu=np.sqrt(Vy*ly*2)
# cross covariance kuy
kuyp = lambda dist:Vu*Vyu*(kyu3(dist)) #t>0 kuy
kuyn = lambda dist:Vu*Vyu*(k1cros(dist)+k2cros(dist)) #t<0 kuy
# cross covariance kyu
kyup = lambda dist:Vu*Vyu*(k1cros(-dist)+k2cros(-dist)) #t>0 kyu
kyun = lambda dist:Vu*Vyu*(kyu3(-dist)) #t<0 kyu
for i, s1 in enumerate(slices):
for j, s2 in enumerate(slices2):
for ss1 in s1:
for ss2 in s2:
if i==0 and j==0:
K[ss1,ss2] = kuu(np.abs(rdist[ss1,ss2]))
elif i==0 and j==1:
#K[ss1,ss2]= np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[ss1,ss2]) ) )
K[ss1,ss2]= np.where( rdist[ss1,ss2]>0 , kuyp(rdist[ss1,ss2]), kuyn(rdist[ss1,ss2] ) )
elif i==1 and j==1:
K[ss1,ss2] = kyy(np.abs(rdist[ss1,ss2]))
else:
#K[ss1,ss2]= 0
#K[ss1,ss2]= np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[ss1,ss2]) ) )
K[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(rdist[ss1,ss2]), kyun(rdist[ss1,ss2] ) )
return K
def Kdiag(self, X):
"""Compute the diagonal of the covariance matrix associated to X."""
Kdiag = np.zeros(X.shape[0])
ly=1/self.lengthscale_Y
lu=np.sqrt(3)/self.lengthscale_U
Vu = self.variance_U
Vy=self.variance_Y
k1 = (2*lu+ly)/(lu+ly)**2
k2 = (ly-2*lu + 2*lu-ly ) / (ly-lu)**2
k3 = 1/(lu+ly) + (lu)/(lu+ly)**2
slices = index_to_slices(X[:,-1])
for i, ss1 in enumerate(slices):
for s1 in ss1:
if i==0:
Kdiag[s1]+= self.variance_U
elif i==1:
Kdiag[s1]+= Vu*Vy*(k1+k2+k3)
else:
raise ValueError, "invalid input/output index"
#Kdiag[slices[0][0]]+= self.variance_U #matern32 diag
#Kdiag[slices[1][0]]+= self.variance_U*self.variance_Y*(k1+k2+k3) # diag
return Kdiag
def update_gradients_full(self, dL_dK, X, X2=None):
"""derivative of the covariance matrix with respect to the parameters."""
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
#rdist = X[:,0][:,None] - X2[:,0][:,None].T
rdist = X - X2.T
ly=1/self.lengthscale_Y
lu=np.sqrt(3)/self.lengthscale_U
Vu=self.variance_U
Vy=self.variance_Y
Vyu = np.sqrt(Vy*ly*2)
dVdly = 0.5/np.sqrt(ly)*np.sqrt(2*Vy)
dVdVy = 0.5/np.sqrt(Vy)*np.sqrt(2*ly)
rd=rdist.shape
dktheta1 = np.zeros(rd)
dktheta2 = np.zeros(rd)
dkUdvar = np.zeros(rd)
dkYdvar = np.zeros(rd)
# dk dtheta for UU
UUdtheta1 = lambda dist: np.exp(-lu* dist)*dist + (-dist)*np.exp(-lu* dist)*(1+lu*dist)
UUdtheta2 = lambda dist: 0
#UUdvar = lambda dist: (1 + lu*dist)*np.exp(-lu*dist)
UUdvar = lambda dist: (1 + lu* np.abs(dist)) * np.exp(-lu * np.abs(dist))
# dk dtheta for YY
dk1theta1 = lambda dist: np.exp(-ly*dist)*2*(-lu)/(lu+ly)**3
dk2theta1 = lambda dist: (1.0)*(
np.exp(-lu*dist)*dist*(-ly+2*lu-lu*ly*dist+dist*lu**2)*(ly-lu)**(-2) + np.exp(-lu*dist)*(-2+ly*dist-2*dist*lu)*(ly-lu)**(-2)
+np.exp(-dist*lu)*(ly-2*lu+ly*lu*dist-dist*lu**2)*2*(ly-lu)**(-3)
+np.exp(-dist*ly)*2*(ly-lu)**(-2)
+np.exp(-dist*ly)*2*(2*lu-ly)*(ly-lu)**(-3)
)
dk3theta1 = lambda dist: np.exp(-dist*lu)*(lu+ly)**(-2)*((2*lu+ly+dist*lu**2+lu*ly*dist)*(-dist-2/(lu+ly))+2+2*lu*dist+ly*dist)
#dktheta1 = lambda dist: self.variance_U*self.variance_Y*(dk1theta1+dk2theta1+dk3theta1)
dk1theta2 = lambda dist: np.exp(-ly*dist) * ((lu+ly)**(-2)) * ( (-dist)*(2*lu+ly) + 1 + (-2)*(2*lu+ly)/(lu+ly) )
dk2theta2 =lambda dist: 1*(
np.exp(-dist*lu)*(ly-lu)**(-2) * ( 1+lu*dist+(-2)*(ly-2*lu+lu*ly*dist-dist*lu**2)*(ly-lu)**(-1) )
+np.exp(-dist*ly)*(ly-lu)**(-2) * ( (-dist)*(2*lu-ly) -1+(2*lu-ly)*(-2)*(ly-lu)**(-1) )
)
dk3theta2 = lambda dist: np.exp(-dist*lu) * (-3*lu-ly-dist*lu**2-lu*ly*dist)/(lu+ly)**3
#dktheta2 = lambda dist: self.variance_U*self.variance_Y*(dk1theta2 + dk2theta2 +dk3theta2)
# kyy kernel
k1 = lambda dist: np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2
k2 = lambda dist: (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
k3 = lambda dist: np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
#dkdvar = k1+k2+k3
# cross covariance function
kyu3 = lambda dist:np.exp(-lu*dist)/(lu+ly)*(1+lu*(dist+1/(lu+ly)))
k1cros = lambda dist:np.exp(ly*dist)/(lu-ly) * ( 1- np.exp( (lu-ly)*dist) + lu* ( dist*np.exp( (lu-ly)*dist ) + (1- np.exp( (lu-ly)*dist ) ) /(lu-ly) ) )
k2cros = lambda dist:np.exp(ly*dist)*( 1/(lu+ly) + lu/(lu+ly)**2 )
# cross covariance kuy
kuyp = lambda dist:(kyu3(dist)) #t>0 kuy
kuyn = lambda dist:(k1cros(dist)+k2cros(dist)) #t<0 kuy
# cross covariance kyu
kyup = lambda dist:(k1cros(-dist)+k2cros(-dist)) #t>0 kyu
kyun = lambda dist:(kyu3(-dist)) #t<0 kyu
# dk dtheta for UY
dkyu3dtheta2 = lambda dist: np.exp(-lu*dist) * ( (-1)*(lu+ly)**(-2)*(1+lu*dist+lu*(lu+ly)**(-1)) + (lu+ly)**(-1)*(-lu)*(lu+ly)**(-2) )
dkyu3dtheta1 = lambda dist: np.exp(-lu*dist)*(lu+ly)**(-1)* ( (-dist)*(1+dist*lu+lu*(lu+ly)**(-1)) -\
(lu+ly)**(-1)*(1+dist*lu+lu*(lu+ly)**(-1)) +dist+(lu+ly)**(-1)-lu*(lu+ly)**(-2) )
dkcros2dtheta1 = lambda dist: np.exp(ly*dist)* ( -(ly+lu)**(-2) + (ly+lu)**(-2) + (-2)*lu*(lu+ly)**(-3) )
dkcros2dtheta2 = lambda dist: np.exp(ly*dist)*dist* ( (ly+lu)**(-1) + lu*(lu+ly)**(-2) ) + \
np.exp(ly*dist)*( -(lu+ly)**(-2) + lu*(-2)*(lu+ly)**(-3) )
dkcros1dtheta1 = lambda dist: np.exp(ly*dist)*( -(lu-ly)**(-2)*( 1-np.exp((lu-ly)*dist) + lu*dist*np.exp((lu-ly)*dist)+ \
lu*(1-np.exp((lu-ly)*dist))/(lu-ly) ) + (lu-ly)**(-1)*( -np.exp( (lu-ly)*dist )*dist + dist*np.exp( (lu-ly)*dist)+\
lu*dist**2*np.exp((lu-ly)*dist)+(1-np.exp((lu-ly)*dist))/(lu-ly) - lu*np.exp((lu-ly)*dist)*dist/(lu-ly) -\
lu*(1-np.exp((lu-ly)*dist))/(lu-ly)**2 ) )
dkcros1dtheta2 = lambda t: np.exp(ly*t)*t/(lu-ly)*( 1-np.exp((lu-ly)*t) +lu*t*np.exp((lu-ly)*t)+\
lu*(1-np.exp((lu-ly)*t))/(lu-ly) )+\
np.exp(ly*t)/(lu-ly)**2* ( 1-np.exp((lu-ly)*t) +lu*t*np.exp((lu-ly)*t) + lu*( 1-np.exp((lu-ly)*t) )/(lu-ly) )+\
np.exp(ly*t)/(lu-ly)*( np.exp((lu-ly)*t)*t -lu*t*t*np.exp((lu-ly)*t) +lu*t*np.exp((lu-ly)*t)/(lu-ly)+\
lu*( 1-np.exp((lu-ly)*t) )/(lu-ly)**2 )
dkuypdtheta1 = lambda dist:(dkyu3dtheta1(dist)) #t>0 kuy
dkuyndtheta1 = lambda dist:(dkcros1dtheta1(dist)+dkcros2dtheta1(dist)) #t<0 kuy
# cross covariance kyu
dkyupdtheta1 = lambda dist:(dkcros1dtheta1(-dist)+dkcros2dtheta1(-dist)) #t>0 kyu
dkyundtheta1 = lambda dist:(dkyu3dtheta1(-dist)) #t<0 kyu
dkuypdtheta2 = lambda dist:(dkyu3dtheta2(dist)) #t>0 kuy
dkuyndtheta2 = lambda dist:(dkcros1dtheta2(dist)+dkcros2dtheta2(dist)) #t<0 kuy
# cross covariance kyu
dkyupdtheta2 = lambda dist:(dkcros1dtheta2(-dist)+dkcros2dtheta2(-dist)) #t>0 kyu
dkyundtheta2 = lambda dist:(dkyu3dtheta2(-dist)) #t<0 kyu
for i, s1 in enumerate(slices):
for j, s2 in enumerate(slices2):
for ss1 in s1:
for ss2 in s2:
if i==0 and j==0:
#target[ss1,ss2] = kuu(np.abs(rdist[ss1,ss2]))
dktheta1[ss1,ss2] = Vu*UUdtheta1(np.abs(rdist[ss1,ss2]))
dktheta2[ss1,ss2] = 0
dkUdvar[ss1,ss2] = UUdvar(np.abs(rdist[ss1,ss2]))
dkYdvar[ss1,ss2] = 0
elif i==0 and j==1:
########target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) )
#np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) )
#dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , self.variance_U*self.variance_Y*dkcrtheta1(np.abs(rdist[ss1,ss2])) ,self.variance_U*self.variance_Y*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))) )
#dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , self.variance_U*self.variance_Y*dkcrtheta2(np.abs(rdist[ss1,ss2])) ,self.variance_U*self.variance_Y*(dk1theta2(np.abs(rdist[ss1,ss2]))+dk2theta2(np.abs(rdist[ss1,ss2]))) )
dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkuypdtheta1(rdist[ss1,ss2]),Vu*Vyu*dkuyndtheta1(rdist[ss1,ss2]) )
dkUdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vyu*kuyp(rdist[ss1,ss2]), Vyu* kuyn(rdist[ss1,ss2]) )
dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkuypdtheta2(rdist[ss1,ss2])+Vu*dVdly*kuyp(rdist[ss1,ss2]),Vu*Vyu*dkuyndtheta2(rdist[ss1,ss2])+Vu*dVdly*kuyn(rdist[ss1,ss2]) )
dkYdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*dVdVy*kuyp(rdist[ss1,ss2]), Vu*dVdVy* kuyn(rdist[ss1,ss2]) )
elif i==1 and j==1:
#target[ss1,ss2] = kyy(np.abs(rdist[ss1,ss2]))
dktheta1[ss1,ss2] = self.variance_U*self.variance_Y*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))+dk3theta1(np.abs(rdist[ss1,ss2])))
dktheta2[ss1,ss2] = self.variance_U*self.variance_Y*(dk1theta2(np.abs(rdist[ss1,ss2])) + dk2theta2(np.abs(rdist[ss1,ss2])) +dk3theta2(np.abs(rdist[ss1,ss2])))
dkUdvar[ss1,ss2] = self.variance_Y*(k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2]))+k3(np.abs(rdist[ss1,ss2])) )
dkYdvar[ss1,ss2] = self.variance_U*(k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2]))+k3(np.abs(rdist[ss1,ss2])) )
else:
#######target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[s1[0],s2[0]]) ) )
#dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 ,self.variance_U*self.variance_Y*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))) , self.variance_U*self.variance_Y*dkcrtheta1(np.abs(rdist[ss1,ss2])) )
#dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 ,self.variance_U*self.variance_Y*(dk1theta2(np.abs(rdist[ss1,ss2]))+dk2theta2(np.abs(rdist[ss1,ss2]))) , self.variance_U*self.variance_Y*dkcrtheta2(np.abs(rdist[ss1,ss2])) )
dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkyupdtheta1(rdist[ss1,ss2]),Vu*Vyu*dkyundtheta1(rdist[ss1,ss2]) )
dkUdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vyu*kyup(rdist[ss1,ss2]),Vyu*kyun(rdist[ss1,ss2]))
dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkyupdtheta2(rdist[ss1,ss2])+Vu*dVdly*kyup(rdist[ss1,ss2]),Vu*Vyu*dkyundtheta2(rdist[ss1,ss2])+Vu*dVdly*kyun(rdist[ss1,ss2]) )
dkYdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*dVdVy*kyup(rdist[ss1,ss2]), Vu*dVdVy*kyun(rdist[ss1,ss2]))
#stop
self.variance_U.gradient = np.sum(dkUdvar * dL_dK) # Vu
self.variance_Y.gradient = np.sum(dkYdvar * dL_dK) # Vy
self.lengthscale_U.gradient = np.sum(dktheta1*(-np.sqrt(3)*self.lengthscale_U**(-2))* dL_dK) #lu
self.lengthscale_Y.gradient = np.sum(dktheta2*(-self.lengthscale_Y**(-2)) * dL_dK) #ly

View file

@ -23,7 +23,7 @@ class Add(CombinationKernel):
If a list of parts (of this kernel!) `which_parts` is given, only If a list of parts (of this kernel!) `which_parts` is given, only
the parts of the list are taken to compute the covariance. the parts of the list are taken to compute the covariance.
""" """
assert X.shape[1] == self.input_dim assert X.shape[1] > max(np.r_[self.active_dims])
if which_parts is None: if which_parts is None:
which_parts = self.parts which_parts = self.parts
elif not isinstance(which_parts, (list, tuple)): elif not isinstance(which_parts, (list, tuple)):
@ -33,7 +33,7 @@ class Add(CombinationKernel):
@Cache_this(limit=2, force_kwargs=['which_parts']) @Cache_this(limit=2, force_kwargs=['which_parts'])
def Kdiag(self, X, which_parts=None): def Kdiag(self, X, which_parts=None):
assert X.shape[1] == self.input_dim assert X.shape[1] > max(np.r_[self.active_dims])
if which_parts is None: if which_parts is None:
which_parts = self.parts which_parts = self.parts
elif not isinstance(which_parts, (list, tuple)): elif not isinstance(which_parts, (list, tuple)):
@ -58,7 +58,12 @@ class Add(CombinationKernel):
:type X2: np.ndarray (num_inducing x input_dim)""" :type X2: np.ndarray (num_inducing x input_dim)"""
target = np.zeros(X.shape) target = np.zeros(X.shape)
[target.__setitem__([Ellipsis, p.active_dims], target[:, p.active_dims]+p.gradients_X(dL_dK, X, X2)) for p in self.parts] [target.__iadd__(p.gradients_X(dL_dK, X, X2)) for p in self.parts]
return target
def gradients_X_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape)
[target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts]
return target return target
def psi0(self, Z, variational_posterior): def psi0(self, Z, variational_posterior):
@ -131,7 +136,7 @@ class Add(CombinationKernel):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else: else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2. eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
target[:, p1.active_dims] += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) target += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
return target return target
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
@ -151,8 +156,8 @@ class Add(CombinationKernel):
else: else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2. eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
target_mu[:, p1.active_dims] += a target_mu += a
target_S[:, p1.active_dims] += b target_S += b
return target_mu, target_S return target_mu, target_S
def _getstate(self): def _getstate(self):
@ -165,4 +170,11 @@ class Add(CombinationKernel):
def _setstate(self, state): def _setstate(self, state):
super(Add, self)._setstate(state) super(Add, self)._setstate(state)
def add(self, other, name='sum'):
if isinstance(other, Add):
other_params = other._parameters_[:]
for p in other_params:
other.remove_parameter(p)
self.add_parameters(*other_params)
else: self.add_parameter(other)
return self

View file

@ -40,72 +40,101 @@ class IndependentOutputs(CombinationKernel):
The index of the functions is given by the last column in the input X The index of the functions is given by the last column in the input X
the rest of the columns of X are passed to the underlying kernel for computation (in blocks). the rest of the columns of X are passed to the underlying kernel for computation (in blocks).
Kern is wrapped with a slicer metaclass :param kernels: either a kernel, or list of kernels to work with. If it is a list of kernels
the indices in the index_dim, index the kernels you gave!
""" """
def __init__(self, kern, index_dim=-1, name='independ'): def __init__(self, kernels, index_dim=-1, name='independ'):
assert isinstance(index_dim, int), "IndependentOutputs kernel is only defined with one input dimension being the indeces" assert isinstance(index_dim, int), "IndependentOutputs kernel is only defined with one input dimension being the indeces"
super(IndependentOutputs, self).__init__(kernels=[kern], extra_dims=[index_dim], name=name) if not isinstance(kernels, list):
self.single_kern = True
self.kern = kernels
kernels = [kernels]
else:
self.single_kern = False
self.kern = kernels
super(IndependentOutputs, self).__init__(kernels=kernels, extra_dims=[index_dim], name=name)
self.index_dim = index_dim self.index_dim = index_dim
self.kern = kern self.kerns = kernels if len(kernels) != 1 else itertools.repeat(kernels[0])
#self.add_parameters(self.kern)
def K(self,X ,X2=None): def K(self,X ,X2=None):
slices = index_to_slices(X[:,self.index_dim]) slices = index_to_slices(X[:,self.index_dim])
if X2 is None: if X2 is None:
target = np.zeros((X.shape[0], X.shape[0])) target = np.zeros((X.shape[0], X.shape[0]))
[[np.copyto(target[s,ss], self.kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for slices_i in slices] [[target.__setitem__((s,ss), kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for kern, slices_i in zip(self.kerns, slices)]
else: else:
slices2 = index_to_slices(X2[:,self.index_dim]) slices2 = index_to_slices(X2[:,self.index_dim])
target = np.zeros((X.shape[0], X2.shape[0])) target = np.zeros((X.shape[0], X2.shape[0]))
[[[np.copyto(target[s, s2], self.kern.K(X[s,:],X2[s2,:])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] [[target.__setitem__((s,s2), kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(self.kerns, slices,slices2)]
return target return target
def Kdiag(self,X): def Kdiag(self,X):
slices = index_to_slices(X[:,self.index_dim]) slices = index_to_slices(X[:,self.index_dim])
target = np.zeros(X.shape[0]) target = np.zeros(X.shape[0])
[[np.copyto(target[s], self.kern.Kdiag(X[s])) for s in slices_i] for slices_i in slices] [[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(self.kerns, slices)]
return target return target
def update_gradients_full(self,dL_dK,X,X2=None): def update_gradients_full(self,dL_dK,X,X2=None):
target = np.zeros(self.kern.size)
def collate_grads(dL, X, X2):
self.kern.update_gradients_full(dL,X,X2)
target[:] += self.kern.gradient
slices = index_to_slices(X[:,self.index_dim]) slices = index_to_slices(X[:,self.index_dim])
if self.single_kern: target = np.zeros(self.kern.size)
else: target = [np.zeros(kern.size) for kern, _ in zip(self.kerns, slices)]
def collate_grads(kern, i, dL, X, X2):
kern.update_gradients_full(dL,X,X2)
if self.single_kern: target[:] += kern.gradient
else: target[i][:] += kern.gradient
if X2 is None: if X2 is None:
[[collate_grads(dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for slices_i in slices] [[collate_grads(kern, i, dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for i,(kern,slices_i) in enumerate(zip(self.kerns,slices))]
else: else:
slices2 = index_to_slices(X2[:,self.index_dim]) slices2 = index_to_slices(X2[:,self.index_dim])
[[[collate_grads(dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] [[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(self.kerns,slices,slices2))]
self.kern.gradient = target if self.single_kern: kern.gradient = target
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(self.kerns, slices))]
def gradients_X(self,dL_dK, X, X2=None): def gradients_X(self,dL_dK, X, X2=None):
target = np.zeros(X.shape) target = np.zeros(X.shape)
slices = index_to_slices(X[:,self.index_dim])
if X2 is None: if X2 is None:
[[np.copyto(target[s,self.kern.active_dims], self.kern.gradients_X(dL_dK[s,ss],X[s],X[ss])) for s, ss in itertools.product(slices_i, slices_i)] for slices_i in slices] # TODO: make use of index_to_slices
values = np.unique(X[:,self.index_dim])
slices = [X[:,self.index_dim]==i for i in values]
[target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None))
for kern, s in zip(self.kerns, slices)]
#slices = index_to_slices(X[:,self.index_dim])
#[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s])
# for s in slices_i] for kern, slices_i in zip(self.kerns, slices)]
#import ipdb;ipdb.set_trace()
#[[(np.add(target[s ], kern.gradients_X(dL_dK[s ,ss],X[s ], X[ss]), out=target[s ]),
# np.add(target[ss], kern.gradients_X(dL_dK[ss,s ],X[ss], X[s ]), out=target[ss]))
# for s, ss in itertools.combinations(slices_i, 2)] for kern, slices_i in zip(self.kerns, slices)]
else: else:
slices2 = index_to_slices(X2[:,self.index_dim]) values = np.unique(X[:,self.index_dim])
[[[np.copyto(target[s,self.kern.active_dims], self.kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] slices = [X[:,self.index_dim]==i for i in values]
slices2 = [X2[:,self.index_dim]==i for i in values]
[target.__setitem__(s, kern.gradients_X(dL_dK[s, :][:, s2],X[s],X2[s2]))
for kern, s, s2 in zip(self.kerns, slices, slices2)]
# TODO: make work with index_to_slices
#slices = index_to_slices(X[:,self.index_dim])
#slices2 = index_to_slices(X2[:,self.index_dim])
#[[target.__setitem__(s, target[s] + kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s, s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(self.kerns, slices,slices2)]
return target return target
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
slices = index_to_slices(X[:,self.index_dim]) slices = index_to_slices(X[:,self.index_dim])
target = np.zeros(X.shape) target = np.zeros(X.shape)
[[np.copyto(target[s,self.kern.active_dims], self.kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for slices_i in slices] [[target.__setitem__(s, kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for kern, slices_i in zip(self.kerns, slices)]
return target return target
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
target = np.zeros(self.kern.size)
def collate_grads(dL, X):
self.kern.update_gradients_diag(dL,X)
target[:] += self.kern.gradient
slices = index_to_slices(X[:,self.index_dim]) slices = index_to_slices(X[:,self.index_dim])
[[collate_grads(dL_dKdiag[s], X[s,:]) for s in slices_i] for slices_i in slices] if self.single_kern: target = np.zeros(self.kern.size)
self.kern.gradient = target else: target = [np.zeros(kern.size) for kern, _ in zip(self.kerns, slices)]
def collate_grads(kern, i, dL, X):
kern.update_gradients_diag(dL,X)
if self.single_kern: target[:] += kern.gradient
else: target[i][:] += kern.gradient
[[collate_grads(kern, i, dL_dKdiag[s], X[s,:]) for s in slices_i] for i, (kern, slices_i) in enumerate(zip(self.kerns, slices))]
if self.single_kern: kern.gradient = target
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(self.kerns, slices))]
class Hierarchical(Kern): class Hierarchical(CombinationKernel):
""" """
A kernel which can reopresent a simple hierarchical model. A kernel which can reopresent a simple hierarchical model.
@ -116,7 +145,7 @@ class Hierarchical(Kern):
The index of the functions is given by additional columns in the input X. The index of the functions is given by additional columns in the input X.
""" """
def __init__(self, kerns, name='hierarchy'): def __init__(self, kern, name='hierarchy'):
assert all([k.input_dim==kerns[0].input_dim for k in kerns]) assert all([k.input_dim==kerns[0].input_dim for k in kerns])
super(Hierarchical, self).__init__(kerns[0].input_dim + len(kerns) - 1, name) super(Hierarchical, self).__init__(kerns[0].input_dim + len(kerns) - 1, name)
self.kerns = kerns self.kerns = kerns

View file

@ -140,12 +140,7 @@ class Kern(Parameterized):
""" """
assert isinstance(other, Kern), "only kernels can be added to kernels..." assert isinstance(other, Kern), "only kernels can be added to kernels..."
from add import Add from add import Add
kernels = [] return Add([self, other], name=name)
if isinstance(self, Add): kernels.extend(self._parameters_)
else: kernels.append(self)
if isinstance(other, Add): kernels.extend(other._parameters_)
else: kernels.append(other)
return Add(kernels, name=name)
def __mul__(self, other): def __mul__(self, other):
""" Here we overload the '*' operator. See self.prod for more information""" """ Here we overload the '*' operator. See self.prod for more information"""

View file

@ -4,6 +4,7 @@ Created on 11 Mar 2014
@author: maxz @author: maxz
''' '''
from ...core.parameterization.parameterized import ParametersChangedMeta from ...core.parameterization.parameterized import ParametersChangedMeta
import numpy as np
class KernCallsViaSlicerMeta(ParametersChangedMeta): class KernCallsViaSlicerMeta(ParametersChangedMeta):
def __call__(self, *args, **kw): def __call__(self, *args, **kw):
@ -12,18 +13,18 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta):
instance.Kdiag = _slice_wrapper(instance, instance.Kdiag, diag=True) instance.Kdiag = _slice_wrapper(instance, instance.Kdiag, diag=True)
instance.update_gradients_full = _slice_wrapper(instance, instance.update_gradients_full, diag=False, derivative=True) instance.update_gradients_full = _slice_wrapper(instance, instance.update_gradients_full, diag=False, derivative=True)
instance.update_gradients_diag = _slice_wrapper(instance, instance.update_gradients_diag, diag=True, derivative=True) instance.update_gradients_diag = _slice_wrapper(instance, instance.update_gradients_diag, diag=True, derivative=True)
instance.gradients_X = _slice_wrapper(instance, instance.gradients_X, diag=False, derivative=True) instance.gradients_X = _slice_wrapper(instance, instance.gradients_X, diag=False, derivative=True, ret_X=True)
instance.gradients_X_diag = _slice_wrapper(instance, instance.gradients_X_diag, diag=True, derivative=True) instance.gradients_X_diag = _slice_wrapper(instance, instance.gradients_X_diag, diag=True, derivative=True, ret_X=True)
instance.psi0 = _slice_wrapper(instance, instance.psi0, diag=False, derivative=False) instance.psi0 = _slice_wrapper(instance, instance.psi0, diag=False, derivative=False)
instance.psi1 = _slice_wrapper(instance, instance.psi1, diag=False, derivative=False) instance.psi1 = _slice_wrapper(instance, instance.psi1, diag=False, derivative=False)
instance.psi2 = _slice_wrapper(instance, instance.psi2, diag=False, derivative=False) instance.psi2 = _slice_wrapper(instance, instance.psi2, diag=False, derivative=False)
instance.update_gradients_expectations = _slice_wrapper(instance, instance.update_gradients_expectations, derivative=True, psi_stat=True) instance.update_gradients_expectations = _slice_wrapper(instance, instance.update_gradients_expectations, derivative=True, psi_stat=True)
instance.gradients_Z_expectations = _slice_wrapper(instance, instance.gradients_Z_expectations, derivative=True, psi_stat_Z=True) instance.gradients_Z_expectations = _slice_wrapper(instance, instance.gradients_Z_expectations, derivative=True, psi_stat_Z=True, ret_X=True)
instance.gradients_qX_expectations = _slice_wrapper(instance, instance.gradients_qX_expectations, derivative=True, psi_stat=True) instance.gradients_qX_expectations = _slice_wrapper(instance, instance.gradients_qX_expectations, derivative=True, psi_stat=True, ret_X=True)
instance.parameters_changed() instance.parameters_changed()
return instance return instance
def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False, psi_stat_Z=False): def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False, psi_stat_Z=False, ret_X=False):
""" """
This method wraps the functions in kernel to make sure all kernels allways see their respective input dimension. This method wraps the functions in kernel to make sure all kernels allways see their respective input dimension.
The different switches are: The different switches are:
@ -34,11 +35,16 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False
""" """
if derivative: if derivative:
if diag: if diag:
def x_slice_wrapper(dL_dK, X): def x_slice_wrapper(dL_dKdiag, X):
ret_X_not_sliced = ret_X and kern._sliced_X == 0
if ret_X_not_sliced:
ret = np.zeros(X.shape)
X = kern._slice_X(X) if not kern._sliced_X else X X = kern._slice_X(X) if not kern._sliced_X else X
# if the return value is of shape X.shape, we need to make sure to return the right shape
kern._sliced_X += 1 kern._sliced_X += 1
try: try:
ret = operation(dL_dK, X) if ret_X_not_sliced: ret[:, kern.active_dims] = operation(dL_dKdiag, X)
else: ret = operation(dL_dKdiag, X)
except: except:
raise raise
finally: finally:
@ -46,10 +52,22 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False
return ret return ret
elif psi_stat: elif psi_stat:
def x_slice_wrapper(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def x_slice_wrapper(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
ret_X_not_sliced = ret_X and kern._sliced_X == 0
if ret_X_not_sliced:
ret1, ret2 = np.zeros(variational_posterior.shape), np.zeros(variational_posterior.shape)
Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior
kern._sliced_X += 1 kern._sliced_X += 1
# if the return value is of shape X.shape, we need to make sure to return the right shape
try: try:
ret = operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior) if ret_X_not_sliced:
ret = list(operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior))
r2 = ret[:2]
ret[0] = ret1
ret[1] = ret2
ret[0][:, kern.active_dims] = r2[0]
ret[1][:, kern.active_dims] = r2[1]
del r2
else: ret = operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)
except: except:
raise raise
finally: finally:
@ -57,10 +75,14 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False
return ret return ret
elif psi_stat_Z: elif psi_stat_Z:
def x_slice_wrapper(dL_dpsi1, dL_dpsi2, Z, variational_posterior): def x_slice_wrapper(dL_dpsi1, dL_dpsi2, Z, variational_posterior):
ret_X_not_sliced = ret_X and kern._sliced_X == 0
if ret_X_not_sliced: ret = np.zeros(Z.shape)
Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior
kern._sliced_X += 1 kern._sliced_X += 1
try: try:
ret = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior) if ret_X_not_sliced:
ret[:, kern.active_dims] = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior)
else: ret = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior)
except: except:
raise raise
finally: finally:
@ -68,10 +90,14 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False
return ret return ret
else: else:
def x_slice_wrapper(dL_dK, X, X2=None): def x_slice_wrapper(dL_dK, X, X2=None):
ret_X_not_sliced = ret_X and kern._sliced_X == 0
if ret_X_not_sliced:
ret = np.zeros(X.shape)
X, X2 = kern._slice_X(X) if not kern._sliced_X else X, kern._slice_X(X2) if X2 is not None and not kern._sliced_X else X2 X, X2 = kern._slice_X(X) if not kern._sliced_X else X, kern._slice_X(X2) if X2 is not None and not kern._sliced_X else X2
kern._sliced_X += 1 kern._sliced_X += 1
try: try:
ret = operation(dL_dK, X, X2) if ret_X_not_sliced: ret[:, kern.active_dims] = operation(dL_dK, X, X2)
else: ret = operation(dL_dK, X, X2)
except: except:
raise raise
finally: finally:

View file

@ -312,5 +312,4 @@ class Linear(Kern):
return np.dot(ZA, inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x num_data x input_dim]! return np.dot(ZA, inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x num_data x input_dim]!
def input_sensitivity(self): def input_sensitivity(self):
if self.ARD: return self.variances return np.ones(self.input_dim) * self.variances
else: return self.variances.repeat(self.input_dim)

View file

@ -51,15 +51,15 @@ class Prod(CombinationKernel):
def gradients_X(self, dL_dK, X, X2=None): def gradients_X(self, dL_dK, X, X2=None):
target = np.zeros(X.shape) target = np.zeros(X.shape)
for k1,k2 in itertools.combinations(self.parts, 2): for k1,k2 in itertools.combinations(self.parts, 2):
target[:,k1.active_dims] += k1.gradients_X(dL_dK*k2.K(X, X2), X, X2) target += k1.gradients_X(dL_dK*k2.K(X, X2), X, X2)
target[:,k2.active_dims] += k2.gradients_X(dL_dK*k1.K(X, X2), X, X2) target += k2.gradients_X(dL_dK*k1.K(X, X2), X, X2)
return target return target
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape) target = np.zeros(X.shape)
for k1,k2 in itertools.combinations(self.parts, 2): for k1,k2 in itertools.combinations(self.parts, 2):
target[:,k1.active_dims] += k1.gradients_X(dL_dKdiag*k2.Kdiag(X), X) target += k1.gradients_X(dL_dKdiag*k2.Kdiag(X), X)
target[:,k2.active_dims] += k2.gradients_X(dL_dKdiag*k1.Kdiag(X), X) target += k2.gradients_X(dL_dKdiag*k1.Kdiag(X), X)
return target return target

View file

@ -152,11 +152,6 @@ class Stationary(Kern):
This term appears in derviatives. This term appears in derviatives.
""" """
dist = self._scaled_dist(X, X2).copy() dist = self._scaled_dist(X, X2).copy()
if X2 is None:
nondiag = util.diag.offdiag_view(dist)
nondiag[:] = 1./nondiag
return dist
else:
return 1./np.where(dist != 0., dist, np.inf) return 1./np.where(dist != 0., dist, np.inf)
def gradients_X(self, dL_dK, X, X2=None): def gradients_X(self, dL_dK, X, X2=None):

View file

@ -95,7 +95,7 @@ class Bernoulli(Likelihood):
else: else:
return np.nan return np.nan
def pdf_link(self, link_f, y, extra_data=None): def pdf_link(self, link_f, y, Y_metadata=None):
""" """
Likelihood function given link(f) Likelihood function given link(f)
@ -106,7 +106,7 @@ class Bernoulli(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data not used in bernoulli :param Y_metadata: Y_metadata not used in bernoulli
:returns: likelihood evaluated for this point :returns: likelihood evaluated for this point
:rtype: float :rtype: float
@ -118,7 +118,7 @@ class Bernoulli(Likelihood):
objective = np.where(y, link_f, 1.-link_f) objective = np.where(y, link_f, 1.-link_f)
return np.exp(np.sum(np.log(objective))) return np.exp(np.sum(np.log(objective)))
def logpdf_link(self, link_f, y, extra_data=None): def logpdf_link(self, link_f, y, Y_metadata=None):
""" """
Log Likelihood function given link(f) Log Likelihood function given link(f)
@ -129,7 +129,7 @@ class Bernoulli(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data not used in bernoulli :param Y_metadata: Y_metadata not used in bernoulli
:returns: log likelihood evaluated at points link(f) :returns: log likelihood evaluated at points link(f)
:rtype: float :rtype: float
""" """
@ -140,7 +140,7 @@ class Bernoulli(Likelihood):
np.seterr(**state) np.seterr(**state)
return np.sum(objective) return np.sum(objective)
def dlogpdf_dlink(self, link_f, y, extra_data=None): def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
""" """
Gradient of the pdf at y, given link(f) w.r.t link(f) Gradient of the pdf at y, given link(f) w.r.t link(f)
@ -151,7 +151,7 @@ class Bernoulli(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data not used in bernoulli :param Y_metadata: Y_metadata not used in bernoulli
:returns: gradient of log likelihood evaluated at points link(f) :returns: gradient of log likelihood evaluated at points link(f)
:rtype: Nx1 array :rtype: Nx1 array
""" """
@ -162,7 +162,7 @@ class Bernoulli(Likelihood):
np.seterr(**state) np.seterr(**state)
return grad return grad
def d2logpdf_dlink2(self, link_f, y, extra_data=None): def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
""" """
Hessian at y, given link_f, w.r.t link_f the hessian will be 0 unless i == j Hessian at y, given link_f, w.r.t link_f the hessian will be 0 unless i == j
i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j) i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j)
@ -175,7 +175,7 @@ class Bernoulli(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data not used in bernoulli :param Y_metadata: Y_metadata not used in bernoulli
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f)) :returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f))
:rtype: Nx1 array :rtype: Nx1 array
@ -190,7 +190,7 @@ class Bernoulli(Likelihood):
np.seterr(**state) np.seterr(**state)
return d2logpdf_dlink2 return d2logpdf_dlink2
def d3logpdf_dlink3(self, link_f, y, extra_data=None): def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
""" """
Third order derivative log-likelihood function at y given link(f) w.r.t link(f) Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
@ -201,7 +201,7 @@ class Bernoulli(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data not used in bernoulli :param Y_metadata: Y_metadata not used in bernoulli
:returns: third derivative of log likelihood evaluated at points link(f) :returns: third derivative of log likelihood evaluated at points link(f)
:rtype: Nx1 array :rtype: Nx1 array
""" """

View file

@ -18,13 +18,12 @@ class Exponential(Likelihood):
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i! L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
$$ $$
""" """
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False): def __init__(self,gp_link=None):
super(Exponential, self).__init__(gp_link,analytical_mean,analytical_variance) if gp_link is None:
gp_link = link_functions.Log()
super(Exponential, self).__init__(gp_link, 'ExpLikelihood')
def _preprocess_values(self,Y): def pdf_link(self, link_f, y, Y_metadata=None):
return Y
def pdf_link(self, link_f, y, extra_data=None):
""" """
Likelihood function given link(f) Likelihood function given link(f)
@ -35,16 +34,15 @@ class Exponential(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution :param Y_metadata: Y_metadata which is not used in exponential distribution
:returns: likelihood evaluated for this point :returns: likelihood evaluated for this point
:rtype: float :rtype: float
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
log_objective = link_f*np.exp(-y*link_f) log_objective = link_f*np.exp(-y*link_f)
return np.exp(np.sum(np.log(log_objective))) return np.exp(np.sum(np.log(log_objective)))
#return np.exp(np.sum(-y/link_f - np.log(link_f) ))
def logpdf_link(self, link_f, y, extra_data=None): def logpdf_link(self, link_f, y, Y_metadata=None):
""" """
Log Likelihood Function given link(f) Log Likelihood Function given link(f)
@ -55,17 +53,16 @@ class Exponential(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution :param Y_metadata: Y_metadata which is not used in exponential distribution
:returns: likelihood evaluated for this point :returns: likelihood evaluated for this point
:rtype: float :rtype: float
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
log_objective = np.log(link_f) - y*link_f log_objective = np.log(link_f) - y*link_f
#logpdf_link = np.sum(-np.log(link_f) - y/link_f)
return np.sum(log_objective) return np.sum(log_objective)
def dlogpdf_dlink(self, link_f, y, extra_data=None): def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
""" """
Gradient of the log likelihood function at y, given link(f) w.r.t link(f) Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
@ -76,7 +73,7 @@ class Exponential(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution :param Y_metadata: Y_metadata which is not used in exponential distribution
:returns: gradient of likelihood evaluated at points :returns: gradient of likelihood evaluated at points
:rtype: Nx1 array :rtype: Nx1 array
@ -86,7 +83,7 @@ class Exponential(Likelihood):
#grad = y/(link_f**2) - 1./link_f #grad = y/(link_f**2) - 1./link_f
return grad return grad
def d2logpdf_dlink2(self, link_f, y, extra_data=None): def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
""" """
Hessian at y, given link(f), w.r.t link(f) Hessian at y, given link(f), w.r.t link(f)
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j) i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
@ -99,7 +96,7 @@ class Exponential(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution :param Y_metadata: Y_metadata which is not used in exponential distribution
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f) :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array :rtype: Nx1 array
@ -112,7 +109,7 @@ class Exponential(Likelihood):
#hess = -2*y/(link_f**3) + 1/(link_f**2) #hess = -2*y/(link_f**3) + 1/(link_f**2)
return hess return hess
def d3logpdf_dlink3(self, link_f, y, extra_data=None): def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
""" """
Third order derivative log-likelihood function at y given link(f) w.r.t link(f) Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
@ -123,7 +120,7 @@ class Exponential(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution :param Y_metadata: Y_metadata which is not used in exponential distribution
:returns: third derivative of likelihood evaluated at points f :returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array :rtype: Nx1 array
""" """
@ -132,18 +129,6 @@ class Exponential(Likelihood):
#d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3) #d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3)
return d3lik_dlink3 return d3lik_dlink3
def _mean(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)
def _variance(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)**2
def samples(self, gp): def samples(self, gp):
""" """
Returns a set of samples of observations based on a given value of the latent variable. Returns a set of samples of observations based on a given value of the latent variable.

View file

@ -1,11 +1,12 @@
# Copyright (c) 2012, 2013 Ricardo Andrade # Copyright (c) 2012 - 2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
from scipy import stats,special from scipy import stats,special
import scipy as sp import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
from ..core.parameterization import Param
import link_functions import link_functions
from likelihood import Likelihood from likelihood import Likelihood
@ -18,14 +19,16 @@ class Gamma(Likelihood):
\\alpha_{i} = \\beta y_{i} \\alpha_{i} = \\beta y_{i}
""" """
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,beta=1.): def __init__(self,gp_link=None,beta=1.):
self.beta = beta if gp_link is None:
super(Gamma, self).__init__(gp_link,analytical_mean,analytical_variance) gp_link = link_functions.Log()
super(Gamma, self).__init__(gp_link, 'Gamma')
def _preprocess_values(self,Y): self.beta = Param('beta', beta)
return Y self.add_parameter(self.beta)
self.beta.fix()#TODO: gradients!
def pdf_link(self, link_f, y, extra_data=None): def pdf_link(self, link_f, y, Y_metadata=None):
""" """
Likelihood function given link(f) Likelihood function given link(f)
@ -37,7 +40,7 @@ class Gamma(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution :param Y_metadata: Y_metadata which is not used in poisson distribution
:returns: likelihood evaluated for this point :returns: likelihood evaluated for this point
:rtype: float :rtype: float
""" """
@ -47,7 +50,7 @@ class Gamma(Likelihood):
objective = (y**(alpha - 1.) * np.exp(-self.beta*y) * self.beta**alpha)/ special.gamma(alpha) objective = (y**(alpha - 1.) * np.exp(-self.beta*y) * self.beta**alpha)/ special.gamma(alpha)
return np.exp(np.sum(np.log(objective))) return np.exp(np.sum(np.log(objective)))
def logpdf_link(self, link_f, y, extra_data=None): def logpdf_link(self, link_f, y, Y_metadata=None):
""" """
Log Likelihood Function given link(f) Log Likelihood Function given link(f)
@ -59,7 +62,7 @@ class Gamma(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution :param Y_metadata: Y_metadata which is not used in poisson distribution
:returns: likelihood evaluated for this point :returns: likelihood evaluated for this point
:rtype: float :rtype: float
@ -71,7 +74,7 @@ class Gamma(Likelihood):
log_objective = alpha*np.log(self.beta) - np.log(special.gamma(alpha)) + (alpha - 1)*np.log(y) - self.beta*y log_objective = alpha*np.log(self.beta) - np.log(special.gamma(alpha)) + (alpha - 1)*np.log(y) - self.beta*y
return np.sum(log_objective) return np.sum(log_objective)
def dlogpdf_dlink(self, link_f, y, extra_data=None): def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
""" """
Gradient of the log likelihood function at y, given link(f) w.r.t link(f) Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
@ -83,7 +86,7 @@ class Gamma(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in gamma distribution :param Y_metadata: Y_metadata which is not used in gamma distribution
:returns: gradient of likelihood evaluated at points :returns: gradient of likelihood evaluated at points
:rtype: Nx1 array :rtype: Nx1 array
@ -94,7 +97,7 @@ class Gamma(Likelihood):
#return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta #return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta
return grad return grad
def d2logpdf_dlink2(self, link_f, y, extra_data=None): def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
""" """
Hessian at y, given link(f), w.r.t link(f) Hessian at y, given link(f), w.r.t link(f)
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j) i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
@ -108,7 +111,7 @@ class Gamma(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in gamma distribution :param Y_metadata: Y_metadata which is not used in gamma distribution
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f) :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array :rtype: Nx1 array
@ -122,7 +125,7 @@ class Gamma(Likelihood):
#return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta #return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta
return hess return hess
def d3logpdf_dlink3(self, link_f, y, extra_data=None): def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
""" """
Third order derivative log-likelihood function at y given link(f) w.r.t link(f) Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
@ -134,22 +137,10 @@ class Gamma(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in gamma distribution :param Y_metadata: Y_metadata which is not used in gamma distribution
:returns: third derivative of likelihood evaluated at points f :returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
d3lik_dlink3 = -special.polygamma(2, self.beta*link_f)*(self.beta**3) d3lik_dlink3 = -special.polygamma(2, self.beta*link_f)*(self.beta**3)
return d3lik_dlink3 return d3lik_dlink3
def _mean(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)
def _variance(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)/self.beta

View file

@ -35,12 +35,7 @@ class Gaussian(Likelihood):
if gp_link is None: if gp_link is None:
gp_link = link_functions.Identity() gp_link = link_functions.Identity()
if isinstance(gp_link, link_functions.Identity): assert isinstance(gp_link, link_functions.Identity), "the likelihood only implemented for the identity link"
analytical_variance = True
analytical_mean = True
else:
analytical_variance = False
analytical_mean = False
super(Gaussian, self).__init__(gp_link, name=name) super(Gaussian, self).__init__(gp_link, name=name)
@ -51,14 +46,12 @@ class Gaussian(Likelihood):
self.log_concave = True self.log_concave = True
def betaY(self,Y,Y_metadata=None): def betaY(self,Y,Y_metadata=None):
#TODO: ~Ricardo this does not live here
return Y/self.gaussian_variance(Y_metadata) return Y/self.gaussian_variance(Y_metadata)
def gaussian_variance(self, Y_metadata=None): def gaussian_variance(self, Y_metadata=None):
return self.variance return self.variance
def covariance_matrix(self, Y, Y_metadata=None):
return np.eye(Y.shape[0]) * self.variance
def update_gradients(self, grad): def update_gradients(self, grad):
self.variance.gradient = grad self.variance.gradient = grad
@ -99,10 +92,10 @@ class Gaussian(Likelihood):
def predictive_variance(self, mu, sigma, predictive_mean=None): def predictive_variance(self, mu, sigma, predictive_mean=None):
return self.variance + sigma**2 return self.variance + sigma**2
def predictive_quantiles(self, mu, var, quantiles, Y_metadata): def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None):
return [stats.norm.ppf(q/100.)*np.sqrt(var) + mu for q in quantiles] return [stats.norm.ppf(q/100.)*np.sqrt(var + self.variance) + mu for q in quantiles]
def pdf_link(self, link_f, y, extra_data=None): def pdf_link(self, link_f, y, Y_metadata=None):
""" """
Likelihood function given link(f) Likelihood function given link(f)
@ -113,14 +106,14 @@ class Gaussian(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data not used in gaussian :param Y_metadata: Y_metadata not used in gaussian
:returns: likelihood evaluated for this point :returns: likelihood evaluated for this point
:rtype: float :rtype: float
""" """
#Assumes no covariance, exp, sum, log for numerical stability #Assumes no covariance, exp, sum, log for numerical stability
return np.exp(np.sum(np.log(stats.norm.pdf(y, link_f, np.sqrt(self.variance))))) return np.exp(np.sum(np.log(stats.norm.pdf(y, link_f, np.sqrt(self.variance)))))
def logpdf_link(self, link_f, y, extra_data=None): def logpdf_link(self, link_f, y, Y_metadata=None):
""" """
Log likelihood function given link(f) Log likelihood function given link(f)
@ -131,7 +124,7 @@ class Gaussian(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data not used in gaussian :param Y_metadata: Y_metadata not used in gaussian
:returns: log likelihood evaluated for this point :returns: log likelihood evaluated for this point
:rtype: float :rtype: float
""" """
@ -141,7 +134,7 @@ class Gaussian(Likelihood):
return -0.5*(np.sum((y-link_f)**2/self.variance) + ln_det_cov + N*np.log(2.*np.pi)) return -0.5*(np.sum((y-link_f)**2/self.variance) + ln_det_cov + N*np.log(2.*np.pi))
def dlogpdf_dlink(self, link_f, y, extra_data=None): def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
""" """
Gradient of the pdf at y, given link(f) w.r.t link(f) Gradient of the pdf at y, given link(f) w.r.t link(f)
@ -152,7 +145,7 @@ class Gaussian(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data not used in gaussian :param Y_metadata: Y_metadata not used in gaussian
:returns: gradient of log likelihood evaluated at points link(f) :returns: gradient of log likelihood evaluated at points link(f)
:rtype: Nx1 array :rtype: Nx1 array
""" """
@ -161,7 +154,7 @@ class Gaussian(Likelihood):
grad = s2_i*y - s2_i*link_f grad = s2_i*y - s2_i*link_f
return grad return grad
def d2logpdf_dlink2(self, link_f, y, extra_data=None): def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
""" """
Hessian at y, given link_f, w.r.t link_f. Hessian at y, given link_f, w.r.t link_f.
i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j) i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j)
@ -175,7 +168,7 @@ class Gaussian(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data not used in gaussian :param Y_metadata: Y_metadata not used in gaussian
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f)) :returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f))
:rtype: Nx1 array :rtype: Nx1 array
@ -188,7 +181,7 @@ class Gaussian(Likelihood):
hess = -(1.0/self.variance)*np.ones((N, 1)) hess = -(1.0/self.variance)*np.ones((N, 1))
return hess return hess
def d3logpdf_dlink3(self, link_f, y, extra_data=None): def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
""" """
Third order derivative log-likelihood function at y given link(f) w.r.t link(f) Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
@ -199,7 +192,7 @@ class Gaussian(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data not used in gaussian :param Y_metadata: Y_metadata not used in gaussian
:returns: third derivative of log likelihood evaluated at points link(f) :returns: third derivative of log likelihood evaluated at points link(f)
:rtype: Nx1 array :rtype: Nx1 array
""" """
@ -208,7 +201,7 @@ class Gaussian(Likelihood):
d3logpdf_dlink3 = np.zeros((N,1)) d3logpdf_dlink3 = np.zeros((N,1))
return d3logpdf_dlink3 return d3logpdf_dlink3
def dlogpdf_link_dvar(self, link_f, y, extra_data=None): def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None):
""" """
Gradient of the log-likelihood function at y given link(f), w.r.t variance parameter (noise_variance) Gradient of the log-likelihood function at y given link(f), w.r.t variance parameter (noise_variance)
@ -219,7 +212,7 @@ class Gaussian(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data not used in gaussian :param Y_metadata: Y_metadata not used in gaussian
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter :returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
:rtype: float :rtype: float
""" """
@ -230,7 +223,7 @@ class Gaussian(Likelihood):
dlik_dsigma = -0.5*N/self.variance + 0.5*s_4*np.sum(np.square(e)) dlik_dsigma = -0.5*N/self.variance + 0.5*s_4*np.sum(np.square(e))
return np.sum(dlik_dsigma) # Sure about this sum? return np.sum(dlik_dsigma) # Sure about this sum?
def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None): def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None):
""" """
Derivative of the dlogpdf_dlink w.r.t variance parameter (noise_variance) Derivative of the dlogpdf_dlink w.r.t variance parameter (noise_variance)
@ -241,7 +234,7 @@ class Gaussian(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data not used in gaussian :param Y_metadata: Y_metadata not used in gaussian
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter :returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
:rtype: Nx1 array :rtype: Nx1 array
""" """
@ -250,7 +243,7 @@ class Gaussian(Likelihood):
dlik_grad_dsigma = -s_4*y + s_4*link_f dlik_grad_dsigma = -s_4*y + s_4*link_f
return dlik_grad_dsigma return dlik_grad_dsigma
def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None): def d2logpdf_dlink2_dvar(self, link_f, y, Y_metadata=None):
""" """
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (noise_variance) Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (noise_variance)
@ -261,7 +254,7 @@ class Gaussian(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data not used in gaussian :param Y_metadata: Y_metadata not used in gaussian
:returns: derivative of log hessian evaluated at points link(f_i) and link(f_j) w.r.t variance parameter :returns: derivative of log hessian evaluated at points link(f_i) and link(f_j) w.r.t variance parameter
:rtype: Nx1 array :rtype: Nx1 array
""" """
@ -271,16 +264,16 @@ class Gaussian(Likelihood):
d2logpdf_dlink2_dvar = np.ones((N,1))*s_4 d2logpdf_dlink2_dvar = np.ones((N,1))*s_4
return d2logpdf_dlink2_dvar return d2logpdf_dlink2_dvar
def dlogpdf_link_dtheta(self, f, y, extra_data=None): def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data) dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
return np.asarray([[dlogpdf_dvar]]) return np.asarray([[dlogpdf_dvar]])
def dlogpdf_dlink_dtheta(self, f, y, extra_data=None): def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data) dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
return dlogpdf_dlink_dvar return dlogpdf_dlink_dvar
def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None): def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data) d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
return d2logpdf_dlink2_dvar return d2logpdf_dlink2_dvar
def _mean(self, gp): def _mean(self, gp):

View file

@ -153,6 +153,10 @@ class Likelihood(Parameterized):
return mean return mean
def _conditional_mean(self, f):
"""Quadrature calculation of the conditional mean: E(Y_star|f)"""
raise NotImplementedError, "implement this function to make predictions"
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None): def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
""" """
Numerical approximation to the predictive variance: V(Y_star) Numerical approximation to the predictive variance: V(Y_star)
@ -204,31 +208,31 @@ class Likelihood(Parameterized):
# V(Y_star) = E[ V(Y_star|f_star) ] + E(Y_star**2|f_star) - E[Y_star|f_star]**2 # V(Y_star) = E[ V(Y_star|f_star) ] + E(Y_star**2|f_star) - E[Y_star|f_star]**2
return exp_var + var_exp return exp_var + var_exp
def pdf_link(self, link_f, y, extra_data=None): def pdf_link(self, link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def logpdf_link(self, link_f, y, extra_data=None): def logpdf_link(self, link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def dlogpdf_dlink(self, link_f, y, extra_data=None): def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def d2logpdf_dlink2(self, link_f, y, extra_data=None): def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def d3logpdf_dlink3(self, link_f, y, extra_data=None): def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def dlogpdf_link_dtheta(self, link_f, y, extra_data=None): def dlogpdf_link_dtheta(self, link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def dlogpdf_dlink_dtheta(self, link_f, y, extra_data=None): def dlogpdf_dlink_dtheta(self, link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def d2logpdf_dlink2_dtheta(self, link_f, y, extra_data=None): def d2logpdf_dlink2_dtheta(self, link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def pdf(self, f, y, extra_data=None): def pdf(self, f, y, Y_metadata=None):
""" """
Evaluates the link function link(f) then computes the likelihood (pdf) using it Evaluates the link function link(f) then computes the likelihood (pdf) using it
@ -239,14 +243,14 @@ class Likelihood(Parameterized):
:type f: Nx1 array :type f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param Y_metadata: Y_metadata which is not used in student t distribution - not used
:returns: likelihood evaluated for this point :returns: likelihood evaluated for this point
:rtype: float :rtype: float
""" """
link_f = self.gp_link.transf(f) link_f = self.gp_link.transf(f)
return self.pdf_link(link_f, y, extra_data=extra_data) return self.pdf_link(link_f, y, Y_metadata=Y_metadata)
def logpdf(self, f, y, extra_data=None): def logpdf(self, f, y, Y_metadata=None):
""" """
Evaluates the link function link(f) then computes the log likelihood (log pdf) using it Evaluates the link function link(f) then computes the log likelihood (log pdf) using it
@ -257,14 +261,14 @@ class Likelihood(Parameterized):
:type f: Nx1 array :type f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param Y_metadata: Y_metadata which is not used in student t distribution - not used
:returns: log likelihood evaluated for this point :returns: log likelihood evaluated for this point
:rtype: float :rtype: float
""" """
link_f = self.gp_link.transf(f) link_f = self.gp_link.transf(f)
return self.logpdf_link(link_f, y, extra_data=extra_data) return self.logpdf_link(link_f, y, Y_metadata=Y_metadata)
def dlogpdf_df(self, f, y, extra_data=None): def dlogpdf_df(self, f, y, Y_metadata=None):
""" """
Evaluates the link function link(f) then computes the derivative of log likelihood using it Evaluates the link function link(f) then computes the derivative of log likelihood using it
Uses the Faa di Bruno's formula for the chain rule Uses the Faa di Bruno's formula for the chain rule
@ -276,16 +280,16 @@ class Likelihood(Parameterized):
:type f: Nx1 array :type f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param Y_metadata: Y_metadata which is not used in student t distribution - not used
:returns: derivative of log likelihood evaluated for this point :returns: derivative of log likelihood evaluated for this point
:rtype: 1xN array :rtype: 1xN array
""" """
link_f = self.gp_link.transf(f) link_f = self.gp_link.transf(f)
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data) dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f) dlink_df = self.gp_link.dtransf_df(f)
return chain_1(dlogpdf_dlink, dlink_df) return chain_1(dlogpdf_dlink, dlink_df)
def d2logpdf_df2(self, f, y, extra_data=None): def d2logpdf_df2(self, f, y, Y_metadata=None):
""" """
Evaluates the link function link(f) then computes the second derivative of log likelihood using it Evaluates the link function link(f) then computes the second derivative of log likelihood using it
Uses the Faa di Bruno's formula for the chain rule Uses the Faa di Bruno's formula for the chain rule
@ -297,18 +301,18 @@ class Likelihood(Parameterized):
:type f: Nx1 array :type f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param Y_metadata: Y_metadata which is not used in student t distribution - not used
:returns: second derivative of log likelihood evaluated for this point (diagonal only) :returns: second derivative of log likelihood evaluated for this point (diagonal only)
:rtype: 1xN array :rtype: 1xN array
""" """
link_f = self.gp_link.transf(f) link_f = self.gp_link.transf(f)
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, extra_data=extra_data) d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f) dlink_df = self.gp_link.dtransf_df(f)
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data) dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata)
d2link_df2 = self.gp_link.d2transf_df2(f) d2link_df2 = self.gp_link.d2transf_df2(f)
return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2) return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
def d3logpdf_df3(self, f, y, extra_data=None): def d3logpdf_df3(self, f, y, Y_metadata=None):
""" """
Evaluates the link function link(f) then computes the third derivative of log likelihood using it Evaluates the link function link(f) then computes the third derivative of log likelihood using it
Uses the Faa di Bruno's formula for the chain rule Uses the Faa di Bruno's formula for the chain rule
@ -320,44 +324,44 @@ class Likelihood(Parameterized):
:type f: Nx1 array :type f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param Y_metadata: Y_metadata which is not used in student t distribution - not used
:returns: third derivative of log likelihood evaluated for this point :returns: third derivative of log likelihood evaluated for this point
:rtype: float :rtype: float
""" """
link_f = self.gp_link.transf(f) link_f = self.gp_link.transf(f)
d3logpdf_dlink3 = self.d3logpdf_dlink3(link_f, y, extra_data=extra_data) d3logpdf_dlink3 = self.d3logpdf_dlink3(link_f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f) dlink_df = self.gp_link.dtransf_df(f)
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, extra_data=extra_data) d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, Y_metadata=Y_metadata)
d2link_df2 = self.gp_link.d2transf_df2(f) d2link_df2 = self.gp_link.d2transf_df2(f)
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data) dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata)
d3link_df3 = self.gp_link.d3transf_df3(f) d3link_df3 = self.gp_link.d3transf_df3(f)
return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3) return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
def dlogpdf_dtheta(self, f, y, extra_data=None): def dlogpdf_dtheta(self, f, y, Y_metadata=None):
""" """
TODO: Doc strings TODO: Doc strings
""" """
if self.size > 0: if self.size > 0:
link_f = self.gp_link.transf(f) link_f = self.gp_link.transf(f)
return self.dlogpdf_link_dtheta(link_f, y, extra_data=extra_data) return self.dlogpdf_link_dtheta(link_f, y, Y_metadata=Y_metadata)
else: else:
#Is no parameters so return an empty array for its derivatives #Is no parameters so return an empty array for its derivatives
return np.zeros([1, 0]) return np.zeros([1, 0])
def dlogpdf_df_dtheta(self, f, y, extra_data=None): def dlogpdf_df_dtheta(self, f, y, Y_metadata=None):
""" """
TODO: Doc strings TODO: Doc strings
""" """
if self.size > 0: if self.size > 0:
link_f = self.gp_link.transf(f) link_f = self.gp_link.transf(f)
dlink_df = self.gp_link.dtransf_df(f) dlink_df = self.gp_link.dtransf_df(f)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data) dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, Y_metadata=Y_metadata)
return chain_1(dlogpdf_dlink_dtheta, dlink_df) return chain_1(dlogpdf_dlink_dtheta, dlink_df)
else: else:
#Is no parameters so return an empty array for its derivatives #Is no parameters so return an empty array for its derivatives
return np.zeros([f.shape[0], 0]) return np.zeros([f.shape[0], 0])
def d2logpdf_df2_dtheta(self, f, y, extra_data=None): def d2logpdf_df2_dtheta(self, f, y, Y_metadata=None):
""" """
TODO: Doc strings TODO: Doc strings
""" """
@ -365,17 +369,17 @@ class Likelihood(Parameterized):
link_f = self.gp_link.transf(f) link_f = self.gp_link.transf(f)
dlink_df = self.gp_link.dtransf_df(f) dlink_df = self.gp_link.dtransf_df(f)
d2link_df2 = self.gp_link.d2transf_df2(f) d2link_df2 = self.gp_link.d2transf_df2(f)
d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, extra_data=extra_data) d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, Y_metadata=Y_metadata)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data) dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, Y_metadata=Y_metadata)
return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2) return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
else: else:
#Is no parameters so return an empty array for its derivatives #Is no parameters so return an empty array for its derivatives
return np.zeros([f.shape[0], 0]) return np.zeros([f.shape[0], 0])
def _laplace_gradients(self, f, y, extra_data=None): def _laplace_gradients(self, f, y, Y_metadata=None):
dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, extra_data=extra_data) dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata)
dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, extra_data=extra_data) dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, Y_metadata=Y_metadata)
d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, extra_data=extra_data) d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, Y_metadata=Y_metadata)
#Parameters are stacked vertically. Must be listed in same order as 'get_param_names' #Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
# ensure we have gradients for every parameter we want to optimize # ensure we have gradients for every parameter we want to optimize
@ -390,7 +394,7 @@ class Likelihood(Parameterized):
def predictive_values(self, mu, var, full_cov=False, Y_metadata=None): def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
""" """
Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction. Compute mean, variance of the predictive distibution.
:param mu: mean of the latent variable, f, of posterior :param mu: mean of the latent variable, f, of posterior
:param var: variance of the latent variable, f, of posterior :param var: variance of the latent variable, f, of posterior
@ -407,10 +411,7 @@ class Likelihood(Parameterized):
#compute the quantiles by sampling!!! #compute the quantiles by sampling!!!
N_samp = 1000 N_samp = 1000
s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu
#ss_f = s.flatten()
#ss_y = self.samples(ss_f, Y_metadata)
ss_y = self.samples(s, Y_metadata) ss_y = self.samples(s, Y_metadata)
#ss_y = ss_y.reshape(mu.shape[0], N_samp)
return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles] return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles]

View file

@ -11,7 +11,7 @@ import itertools
class MixedNoise(Likelihood): class MixedNoise(Likelihood):
def __init__(self, likelihoods_list, name='mixed_noise'): def __init__(self, likelihoods_list, name='mixed_noise'):
#NOTE at the moment this likelihood only works for using a list of gaussians
super(Likelihood, self).__init__(name=name) super(Likelihood, self).__init__(name=name)
self.add_parameters(*likelihoods_list) self.add_parameters(*likelihoods_list)
@ -24,10 +24,11 @@ class MixedNoise(Likelihood):
variance = np.zeros(ind.size) variance = np.zeros(ind.size)
for lik, j in zip(self.likelihoods_list, range(len(self.likelihoods_list))): for lik, j in zip(self.likelihoods_list, range(len(self.likelihoods_list))):
variance[ind==j] = lik.variance variance[ind==j] = lik.variance
return variance[:,None] return variance
def betaY(self,Y,Y_metadata): def betaY(self,Y,Y_metadata):
return Y/self.gaussian_variance(Y_metadata=Y_metadata) #TODO not here.
return Y/self.gaussian_variance(Y_metadata=Y_metadata)[:,None]
def update_gradients(self, gradients): def update_gradients(self, gradients):
self.gradient = gradients self.gradient = gradients
@ -38,7 +39,6 @@ class MixedNoise(Likelihood):
return np.array([dL_dKdiag[ind==i].sum() for i in range(len(self.likelihoods_list))]) return np.array([dL_dKdiag[ind==i].sum() for i in range(len(self.likelihoods_list))])
def predictive_values(self, mu, var, full_cov=False, Y_metadata=None): def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
if all([isinstance(l, Gaussian) for l in self.likelihoods_list]):
ind = Y_metadata['output_index'].flatten() ind = Y_metadata['output_index'].flatten()
_variance = np.array([self.likelihoods_list[j].variance for j in ind ]) _variance = np.array([self.likelihoods_list[j].variance for j in ind ])
if full_cov: if full_cov:
@ -46,26 +46,20 @@ class MixedNoise(Likelihood):
else: else:
var += _variance var += _variance
return mu, var return mu, var
else:
raise NotImplementedError
def predictive_variance(self, mu, sigma, **other_shit): def predictive_variance(self, mu, sigma, Y_metadata):
if isinstance(noise_index,int): _variance = self.gaussian_variance(Y_metadata)
_variance = self.variance[noise_index]
else:
_variance = np.array([ self.variance[j] for j in noise_index ])[:,None]
return _variance + sigma**2 return _variance + sigma**2
def predictive_quantiles(self, mu, var, quantiles, Y_metadata):
def covariance_matrix(self, Y, Y_metadata): ind = Y_metadata['output_index'].flatten()
#assert all([isinstance(l, Gaussian) for l in self.likelihoods_list]) outputs = np.unique(ind)
#ind = Y_metadata['output_index'].flatten() Q = np.zeros( (mu.size,len(quantiles)) )
#variance = np.zeros(Y.shape[0]) for j in outputs:
#for lik, j in zip(self.likelihoods_list, range(len(self.likelihoods_list))): q = self.likelihoods_list[j].predictive_quantiles(mu[ind==j,:],
# variance[ind==j] = lik.variance var[ind==j,:],quantiles,Y_metadata=None)
#return np.diag(variance) Q[ind==j,:] = np.hstack(q)
return np.diag(self.gaussian_variance(Y_metadata).flatten()) return [q[:,None] for q in Q.T]
def samples(self, gp, Y_metadata): def samples(self, gp, Y_metadata):
""" """
@ -84,4 +78,3 @@ class MixedNoise(Likelihood):
_ysim = np.array([np.random.normal(lik.gp_link.transf(gpj), scale=np.sqrt(lik.variance), size=1) for gpj in gp_filtered.flatten()]) _ysim = np.array([np.random.normal(lik.gp_link.transf(gpj), scale=np.sqrt(lik.variance), size=1) for gpj in gp_filtered.flatten()])
Ysim[flt,:] = _ysim.reshape(n1,N2) Ysim[flt,:] = _ysim.reshape(n1,N2)
return Ysim return Ysim

View file

@ -25,10 +25,13 @@ class Poisson(Likelihood):
super(Poisson, self).__init__(gp_link, name='Poisson') super(Poisson, self).__init__(gp_link, name='Poisson')
def _preprocess_values(self,Y): def _conditional_mean(self, f):
return Y """
the expected value of y given a value of f
"""
return self.gp_link.transf(gp)
def pdf_link(self, link_f, y, extra_data=None): def pdf_link(self, link_f, y, Y_metadata=None):
""" """
Likelihood function given link(f) Likelihood function given link(f)
@ -39,14 +42,14 @@ class Poisson(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution :param Y_metadata: Y_metadata which is not used in poisson distribution
:returns: likelihood evaluated for this point :returns: likelihood evaluated for this point
:rtype: float :rtype: float
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
return np.prod(stats.poisson.pmf(y,link_f)) return np.prod(stats.poisson.pmf(y,link_f))
def logpdf_link(self, link_f, y, extra_data=None): def logpdf_link(self, link_f, y, Y_metadata=None):
""" """
Log Likelihood Function given link(f) Log Likelihood Function given link(f)
@ -57,7 +60,7 @@ class Poisson(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution :param Y_metadata: Y_metadata which is not used in poisson distribution
:returns: likelihood evaluated for this point :returns: likelihood evaluated for this point
:rtype: float :rtype: float
@ -65,7 +68,7 @@ class Poisson(Likelihood):
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
return np.sum(-link_f + y*np.log(link_f) - special.gammaln(y+1)) return np.sum(-link_f + y*np.log(link_f) - special.gammaln(y+1))
def dlogpdf_dlink(self, link_f, y, extra_data=None): def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
""" """
Gradient of the log likelihood function at y, given link(f) w.r.t link(f) Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
@ -76,7 +79,7 @@ class Poisson(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution :param Y_metadata: Y_metadata which is not used in poisson distribution
:returns: gradient of likelihood evaluated at points :returns: gradient of likelihood evaluated at points
:rtype: Nx1 array :rtype: Nx1 array
@ -84,7 +87,7 @@ class Poisson(Likelihood):
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
return y/link_f - 1 return y/link_f - 1
def d2logpdf_dlink2(self, link_f, y, extra_data=None): def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
""" """
Hessian at y, given link(f), w.r.t link(f) Hessian at y, given link(f), w.r.t link(f)
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j) i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
@ -97,7 +100,7 @@ class Poisson(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution :param Y_metadata: Y_metadata which is not used in poisson distribution
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f) :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array :rtype: Nx1 array
@ -112,7 +115,7 @@ class Poisson(Likelihood):
#transf = self.gp_link.transf(gp) #transf = self.gp_link.transf(gp)
#return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df #return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df
def d3logpdf_dlink3(self, link_f, y, extra_data=None): def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
""" """
Third order derivative log-likelihood function at y given link(f) w.r.t link(f) Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
@ -123,7 +126,7 @@ class Poisson(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution :param Y_metadata: Y_metadata which is not used in poisson distribution
:returns: third derivative of likelihood evaluated at points f :returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array :rtype: Nx1 array
""" """

View file

@ -46,7 +46,7 @@ class StudentT(Likelihood):
self.sigma2.gradient = grads[0] self.sigma2.gradient = grads[0]
self.v.gradient = grads[1] self.v.gradient = grads[1]
def pdf_link(self, link_f, y, extra_data=None): def pdf_link(self, link_f, y, Y_metadata=None):
""" """
Likelihood function given link(f) Likelihood function given link(f)
@ -57,7 +57,7 @@ class StudentT(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
:returns: likelihood evaluated for this point :returns: likelihood evaluated for this point
:rtype: float :rtype: float
""" """
@ -70,7 +70,7 @@ class StudentT(Likelihood):
) )
return np.prod(objective) return np.prod(objective)
def logpdf_link(self, link_f, y, extra_data=None): def logpdf_link(self, link_f, y, Y_metadata=None):
""" """
Log Likelihood Function given link(f) Log Likelihood Function given link(f)
@ -81,7 +81,7 @@ class StudentT(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
:returns: likelihood evaluated for this point :returns: likelihood evaluated for this point
:rtype: float :rtype: float
@ -99,7 +99,7 @@ class StudentT(Likelihood):
) )
return np.sum(objective) return np.sum(objective)
def dlogpdf_dlink(self, link_f, y, extra_data=None): def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
""" """
Gradient of the log likelihood function at y, given link(f) w.r.t link(f) Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
@ -110,7 +110,7 @@ class StudentT(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
:returns: gradient of likelihood evaluated at points :returns: gradient of likelihood evaluated at points
:rtype: Nx1 array :rtype: Nx1 array
@ -120,7 +120,7 @@ class StudentT(Likelihood):
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2)) grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
return grad return grad
def d2logpdf_dlink2(self, link_f, y, extra_data=None): def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
""" """
Hessian at y, given link(f), w.r.t link(f) Hessian at y, given link(f), w.r.t link(f)
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j) i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
@ -133,7 +133,7 @@ class StudentT(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f) :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array :rtype: Nx1 array
@ -146,7 +146,7 @@ class StudentT(Likelihood):
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2) hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
return hess return hess
def d3logpdf_dlink3(self, link_f, y, extra_data=None): def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
""" """
Third order derivative log-likelihood function at y given link(f) w.r.t link(f) Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
@ -157,7 +157,7 @@ class StudentT(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
:returns: third derivative of likelihood evaluated at points f :returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array :rtype: Nx1 array
""" """
@ -168,7 +168,7 @@ class StudentT(Likelihood):
) )
return d3lik_dlink3 return d3lik_dlink3
def dlogpdf_link_dvar(self, link_f, y, extra_data=None): def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None):
""" """
Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise) Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise)
@ -179,7 +179,7 @@ class StudentT(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter :returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: float :rtype: float
""" """
@ -188,7 +188,7 @@ class StudentT(Likelihood):
dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2)) dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
return np.sum(dlogpdf_dvar) return np.sum(dlogpdf_dvar)
def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None): def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None):
""" """
Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise) Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise)
@ -199,7 +199,7 @@ class StudentT(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter :returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: Nx1 array :rtype: Nx1 array
""" """
@ -208,7 +208,7 @@ class StudentT(Likelihood):
dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2) dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
return dlogpdf_dlink_dvar return dlogpdf_dlink_dvar
def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None): def d2logpdf_dlink2_dvar(self, link_f, y, Y_metadata=None):
""" """
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise) Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise)
@ -219,7 +219,7 @@ class StudentT(Likelihood):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter :returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
:rtype: Nx1 array :rtype: Nx1 array
""" """
@ -230,25 +230,22 @@ class StudentT(Likelihood):
) )
return d2logpdf_dlink2_dvar return d2logpdf_dlink2_dvar
def dlogpdf_link_dtheta(self, f, y, extra_data=None): def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data) dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
dlogpdf_dv = np.zeros_like(dlogpdf_dvar) #FIXME: Not done yet dlogpdf_dv = np.zeros_like(dlogpdf_dvar) #FIXME: Not done yet
return np.hstack((dlogpdf_dvar, dlogpdf_dv)) return np.hstack((dlogpdf_dvar, dlogpdf_dv))
def dlogpdf_dlink_dtheta(self, f, y, extra_data=None): def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data) dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
dlogpdf_dlink_dv = np.zeros_like(dlogpdf_dlink_dvar) #FIXME: Not done yet dlogpdf_dlink_dv = np.zeros_like(dlogpdf_dlink_dvar) #FIXME: Not done yet
return np.hstack((dlogpdf_dlink_dvar, dlogpdf_dlink_dv)) return np.hstack((dlogpdf_dlink_dvar, dlogpdf_dlink_dv))
def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None): def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data) d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
d2logpdf_dlink2_dv = np.zeros_like(d2logpdf_dlink2_dvar) #FIXME: Not done yet d2logpdf_dlink2_dv = np.zeros_like(d2logpdf_dlink2_dvar) #FIXME: Not done yet
return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv)) return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
def predictive_mean(self, mu, sigma, Y_metadata=None): def predictive_mean(self, mu, sigma, Y_metadata=None):
"""
Compute mean of the prediction
"""
return self.gp_link.transf(mu) # only true in link is monotoci, which it is. return self.gp_link.transf(mu) # only true in link is monotoci, which it is.
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None): def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):

View file

@ -3,6 +3,7 @@
import numpy as np import numpy as np
from ..core.mapping import Mapping from ..core.mapping import Mapping
from ..core.parameterization import Param
class Linear(Mapping): class Linear(Mapping):
""" """
@ -19,35 +20,19 @@ class Linear(Mapping):
""" """
def __init__(self, input_dim=1, output_dim=1): def __init__(self, input_dim=1, output_dim=1, name='linear_map'):
self.name = 'linear' Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim) self.W = Param('W',np.array((self.input_dim, self.output_dim)))
self.num_params = self.output_dim*(self.input_dim + 1) self.bias = Param('bias',np.array(self.output_dim))
self.W = np.array((self.input_dim, self.output_dim)) self.add_parameters(self.W, self.bias)
self.bias = np.array(self.output_dim)
self.randomize()
def _get_param_names(self):
return sum([['W_%i_%i' % (n, d) for d in range(self.output_dim)] for n in range(self.input_dim)], []) + ['bias_%i' % d for d in range(self.output_dim)]
def _get_params(self):
return np.hstack((self.W.flatten(), self.bias))
def _set_params(self, x):
self.W = x[:self.input_dim * self.output_dim].reshape(self.input_dim, self.output_dim).copy()
self.bias = x[self.input_dim*self.output_dim:].copy()
def randomize(self):
self.W = np.random.randn(self.input_dim, self.output_dim)/np.sqrt(self.input_dim + 1)
self.bias = np.random.randn(self.output_dim)/np.sqrt(self.input_dim + 1)
def f(self, X): def f(self, X):
return np.dot(X,self.W) + self.bias return np.dot(X,self.W) + self.bias
def df_dtheta(self, dL_df, X): def df_dtheta(self, dL_df, X):
self._df_dW = (dL_df[:, :, None]*X[:, None, :]).sum(0).T df_dW = (dL_df[:, :, None]*X[:, None, :]).sum(0).T
self._df_dbias = (dL_df.sum(0)) df_dbias = (dL_df.sum(0))
return np.hstack((self._df_dW.flatten(), self._df_dbias)) return np.hstack((df_dW.flatten(), df_dbias))
def df_dX(self, dL_df, X): def dL_dX(self, dL_df, X):
return (dL_df[:, None, :]*self.W[None, :, :]).sum(2) return (dL_df[:, None, :]*self.W[None, :, :]).sum(2)

View file

@ -75,15 +75,19 @@ class BayesianGPLVM(SparseGP):
# update for the KL divergence # update for the KL divergence
self.variational_prior.update_gradients_KL(self.X) self.variational_prior.update_gradients_KL(self.X)
def plot_latent(self, plot_inducing=True, *args, **kwargs): def plot_latent(self, labels=None, which_indices=None,
""" resolution=50, ax=None, marker='o', s=40,
See GPy.plotting.matplot_dep.dim_reduction_plots.plot_latent fignum=None, plot_inducing=True, legend=True,
""" plot_limits=None,
aspect='auto', updates=False, **kwargs):
import sys import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported." assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import dim_reduction_plots from ..plotting.matplot_dep import dim_reduction_plots
return dim_reduction_plots.plot_latent(self, plot_inducing=plot_inducing, *args, **kwargs) return dim_reduction_plots.plot_latent(self, labels, which_indices,
resolution, ax, marker, s,
fignum, plot_inducing, legend,
plot_limits, aspect, updates, **kwargs)
def do_test_latents(self, Y): def do_test_latents(self, Y):
""" """

View file

@ -36,7 +36,7 @@ class GPCoregionalizedRegression(GP):
#Kernel #Kernel
if kernel is None: if kernel is None:
kernel = util.multioutput.ICM(input_dim=X.shape[1]-1, num_outputs=Ny, kernel=GPy.kern.rbf(X.shape[1]-1), W_rank=1,name=kernel_name) kernel = util.multioutput.ICM(input_dim=X.shape[1]-1, num_outputs=Ny, kernel=kern.RBF(X.shape[1]-1), W_rank=1,name=kernel_name)
#Likelihood #Likelihood
likelihood = util.multioutput.build_likelihood(Y_list,self.output_index,likelihoods_list) likelihood = util.multioutput.build_likelihood(Y_list,self.output_index,likelihoods_list)

View file

@ -20,14 +20,14 @@ class GPRegression(GP):
""" """
def __init__(self, X, Y, kernel=None): def __init__(self, X, Y, kernel=None, Y_metadata=None):
if kernel is None: if kernel is None:
kernel = kern.RBF(X.shape[1]) kernel = kern.RBF(X.shape[1])
likelihood = likelihoods.Gaussian() likelihood = likelihoods.Gaussian()
super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression') super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata)
def _getstate(self): def _getstate(self):
return GP._getstate(self) return GP._getstate(self)

View file

@ -67,12 +67,22 @@ class GPLVM(GP):
assert self.likelihood.Y.shape[1] == 2 assert self.likelihood.Y.shape[1] == 2
pb.scatter(self.likelihood.Y[:, 0], self.likelihood.Y[:, 1], 40, self.X[:, 0].copy(), linewidth=0, cmap=pb.cm.jet) # @UndefinedVariable pb.scatter(self.likelihood.Y[:, 0], self.likelihood.Y[:, 1], 40, self.X[:, 0].copy(), linewidth=0, cmap=pb.cm.jet) # @UndefinedVariable
Xnew = np.linspace(self.X.min(), self.X.max(), 200)[:, None] Xnew = np.linspace(self.X.min(), self.X.max(), 200)[:, None]
mu, var, upper, lower = self.predict(Xnew) mu, _ = self.predict(Xnew)
pb.plot(mu[:, 0], mu[:, 1], 'k', linewidth=1.5) pb.plot(mu[:, 0], mu[:, 1], 'k', linewidth=1.5)
def plot_latent(self, *args, **kwargs): def plot_latent(self, labels=None, which_indices=None,
resolution=50, ax=None, marker='o', s=40,
fignum=None, legend=True,
plot_limits=None,
aspect='auto', updates=False, **kwargs):
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import dim_reduction_plots from ..plotting.matplot_dep import dim_reduction_plots
return dim_reduction_plots.plot_latent(self, *args, **kwargs) return dim_reduction_plots.plot_latent(self, labels, which_indices,
resolution, ax, marker, s,
fignum, False, legend,
plot_limits, aspect, updates, **kwargs)
def plot_magnification(self, *args, **kwargs): def plot_magnification(self, *args, **kwargs):
return util.plot_latent.plot_magnification(self, *args, **kwargs) return util.plot_latent.plot_magnification(self, *args, **kwargs)

View file

@ -43,14 +43,14 @@ class SparseGPCoregionalizedRegression(SparseGP):
#Kernel #Kernel
if kernel is None: if kernel is None:
kernel = util.multioutput.ICM(input_dim=X.shape[1]-1, num_outputs=Ny, kernel=GPy.kern.rbf(X.shape[1]-1), W_rank=1,name=kernel_name) kernel = util.multioutput.ICM(input_dim=X.shape[1]-1, num_outputs=Ny, kernel=kern.RBF(X.shape[1]-1), W_rank=1,name=kernel_name)
#Likelihood #Likelihood
likelihood = util.multioutput.build_likelihood(Y_list,self.output_index,likelihoods_list) likelihood = util.multioutput.build_likelihood(Y_list,self.output_index,likelihoods_list)
#Inducing inputs list #Inducing inputs list
if len(Z_list): if len(Z_list):
assert len(Z_list) == self.output_dim, 'Number of outputs do not match length of inducing inputs list.' assert len(Z_list) == Ny, 'Number of outputs do not match length of inducing inputs list.'
else: else:
if isinstance(num_inducing,np.int): if isinstance(num_inducing,np.int):
num_inducing = [num_inducing] * Ny num_inducing = [num_inducing] * Ny

View file

@ -28,7 +28,6 @@ class MappingTests(unittest.TestCase):
self.assertTrue(GPy.core.Mapping_check_df_dX(mapping=mapping).checkgrad(verbose=verbose)) self.assertTrue(GPy.core.Mapping_check_df_dX(mapping=mapping).checkgrad(verbose=verbose))
if __name__ == "__main__": if __name__ == "__main__":
print "Running unit tests, please be (very) patient..." print "Running unit tests, please be (very) patient..."
unittest.main() unittest.main()

View file

@ -30,7 +30,8 @@ def most_significant_input_dimensions(model, which_indices):
def plot_latent(model, labels=None, which_indices=None, def plot_latent(model, labels=None, which_indices=None,
resolution=50, ax=None, marker='o', s=40, resolution=50, ax=None, marker='o', s=40,
fignum=None, plot_inducing=False, legend=True, fignum=None, plot_inducing=False, legend=True,
aspect='auto', updates=False): plot_limits=None,
aspect='auto', updates=False, **kwargs):
""" """
:param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc) :param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc)
:param resolution: the resolution of the grid on which to evaluate the predictive variance :param resolution: the resolution of the grid on which to evaluate the predictive variance
@ -38,6 +39,8 @@ def plot_latent(model, labels=None, which_indices=None,
if ax is None: if ax is None:
fig = pb.figure(num=fignum) fig = pb.figure(num=fignum)
ax = fig.add_subplot(111) ax = fig.add_subplot(111)
else:
fig = ax.figure
Tango.reset() Tango.reset()
if labels is None: if labels is None:
@ -57,15 +60,28 @@ def plot_latent(model, labels=None, which_indices=None,
def plot_function(x): def plot_function(x):
Xtest_full = np.zeros((x.shape[0], model.X.shape[1])) Xtest_full = np.zeros((x.shape[0], model.X.shape[1]))
Xtest_full[:, [input_1, input_2]] = x Xtest_full[:, [input_1, input_2]] = x
mu, var, low, up = model.predict(Xtest_full) _, var = model.predict(Xtest_full)
var = var[:, :1] var = var[:, :1]
return np.log(var) return np.log(var)
#Create an IMshow controller that can re-plot the latent space shading at a good resolution #Create an IMshow controller that can re-plot the latent space shading at a good resolution
if plot_limits is None:
xmin, ymin = X[:, [input_1, input_2]].min(0)
xmax, ymax = X[:, [input_1, input_2]].max(0)
x_r, y_r = xmax-xmin, ymax-ymin
xmin -= .1*x_r
xmax += .1*x_r
ymin -= .1*y_r
ymax += .1*y_r
else:
try:
xmin, xmax, ymin, ymax = plot_limits
except (TypeError, ValueError) as e:
raise e.__class__, "Wrong plot limits: {} given -> need (xmin, xmax, ymin, ymax)".format(plot_limits)
view = ImshowController(ax, plot_function, view = ImshowController(ax, plot_function,
tuple(X[:, [input_1, input_2]].min(0)) + tuple(X[:, [input_1, input_2]].max(0)), (xmin, ymin, xmax, ymax),
resolution, aspect=aspect, interpolation='bilinear', resolution, aspect=aspect, interpolation='bilinear',
cmap=pb.cm.binary) cmap=pb.cm.binary, **kwargs)
# make sure labels are in order of input: # make sure labels are in order of input:
ulabels = [] ulabels = []
@ -99,8 +115,6 @@ def plot_latent(model, labels=None, which_indices=None,
if not np.all(labels == 1.) and legend: if not np.all(labels == 1.) and legend:
ax.legend(loc=0, numpoints=1) ax.legend(loc=0, numpoints=1)
#ax.set_xlim(xmin[0], xmax[0])
#ax.set_ylim(xmin[1], xmax[1])
ax.grid(b=False) # remove the grid if present, it doesn't look good ax.grid(b=False) # remove the grid if present, it doesn't look good
ax.set_aspect('auto') # set a nice aspect ratio ax.set_aspect('auto') # set a nice aspect ratio
@ -108,9 +122,24 @@ def plot_latent(model, labels=None, which_indices=None,
Z = param_to_array(model.Z) Z = param_to_array(model.Z)
ax.plot(Z[:, input_1], Z[:, input_2], '^w') ax.plot(Z[:, input_1], Z[:, input_2], '^w')
ax.set_xlim((xmin, xmax))
ax.set_ylim((ymin, ymax))
try:
fig.canvas.draw()
fig.tight_layout()
fig.canvas.draw()
except Exception as e:
print "Could not invoke tight layout: {}".format(e)
pass
if updates: if updates:
try:
ax.figure.canvas.show() ax.figure.canvas.show()
except Exception as e:
print "Could not invoke show: {}".format(e)
raw_input('Enter to continue') raw_input('Enter to continue')
view.deactivate()
return ax return ax
def plot_magnification(model, labels=None, which_indices=None, def plot_magnification(model, labels=None, which_indices=None,
@ -186,7 +215,7 @@ def plot_magnification(model, labels=None, which_indices=None,
ax.plot(model.Z[:, input_1], model.Z[:, input_2], '^w') ax.plot(model.Z[:, input_1], model.Z[:, input_2], '^w')
if updates: if updates:
ax.figure.canvas.show() fig.canvas.show()
raw_input('Enter to continue') raw_input('Enter to continue')
pb.title('Magnification Factor') pb.title('Magnification Factor')

View file

@ -33,7 +33,7 @@ class AxisChangedController(AxisEventController):
Constructor Constructor
''' '''
super(AxisChangedController, self).__init__(ax) super(AxisChangedController, self).__init__(ax)
self._lim_ratio_threshold = update_lim or .8 self._lim_ratio_threshold = update_lim or .95
self._x_lim = self.ax.get_xlim() self._x_lim = self.ax.get_xlim()
self._y_lim = self.ax.get_ylim() self._y_lim = self.ax.get_ylim()
@ -80,6 +80,10 @@ class AxisChangedController(AxisEventController):
class BufferedAxisChangedController(AxisChangedController): class BufferedAxisChangedController(AxisChangedController):
def __init__(self, ax, plot_function, plot_limits, resolution=50, update_lim=None, **kwargs): def __init__(self, ax, plot_function, plot_limits, resolution=50, update_lim=None, **kwargs):
""" """
Buffered axis changed controller. Controls the buffer and handles update events for when the axes changed.
Updated plotting will be after first reload (first time will be within plot limits, after that the limits will be buffered)
:param plot_function: :param plot_function:
function to use for creating image for plotting (return ndarray-like) function to use for creating image for plotting (return ndarray-like)
plot_function gets called with (2D!) Xtest grid if replotting required plot_function gets called with (2D!) Xtest grid if replotting required
@ -91,11 +95,13 @@ class BufferedAxisChangedController(AxisChangedController):
""" """
super(BufferedAxisChangedController, self).__init__(ax, update_lim=update_lim) super(BufferedAxisChangedController, self).__init__(ax, update_lim=update_lim)
self.plot_function = plot_function self.plot_function = plot_function
xmin, xmax = self._x_lim # self._compute_buffered(*self._x_lim) xmin, ymin, xmax, ymax = plot_limits#self._x_lim # self._compute_buffered(*self._x_lim)
ymin, ymax = self._y_lim # self._compute_buffered(*self._y_lim) # imshow acts on the limits of the plot, this is why we need to override the limits here, to make sure the right plot limits are used:
self._x_lim = xmin, xmax
self._y_lim = ymin, ymax
self.resolution = resolution self.resolution = resolution
self._not_init = False self._not_init = False
self.view = self._init_view(self.ax, self.recompute_X(), xmin, xmax, ymin, ymax, **kwargs) self.view = self._init_view(self.ax, self.recompute_X(buffered=False), xmin, xmax, ymin, ymax, **kwargs)
self._not_init = True self._not_init = True
def update(self, ax): def update(self, ax):
@ -111,14 +117,16 @@ class BufferedAxisChangedController(AxisChangedController):
def update_view(self, view, X, xmin, xmax, ymin, ymax): def update_view(self, view, X, xmin, xmax, ymin, ymax):
raise NotImplementedError('update view given in here') raise NotImplementedError('update view given in here')
def get_grid(self): def get_grid(self, buffered=True):
xmin, xmax = self._compute_buffered(*self._x_lim) if buffered: comp = self._compute_buffered
ymin, ymax = self._compute_buffered(*self._y_lim) else: comp = lambda a,b: (a,b)
xmin, xmax = comp(*self._x_lim)
ymin, ymax = comp(*self._y_lim)
x, y = numpy.mgrid[xmin:xmax:1j * self.resolution, ymin:ymax:1j * self.resolution] x, y = numpy.mgrid[xmin:xmax:1j * self.resolution, ymin:ymax:1j * self.resolution]
return numpy.hstack((x.flatten()[:, None], y.flatten()[:, None])) return numpy.hstack((x.flatten()[:, None], y.flatten()[:, None]))
def recompute_X(self): def recompute_X(self, buffered=True):
X = self.plot_function(self.get_grid()) X = self.plot_function(self.get_grid(buffered))
if isinstance(X, (tuple, list)): if isinstance(X, (tuple, list)):
for x in X: for x in X:
x.shape = [self.resolution, self.resolution] x.shape = [self.resolution, self.resolution]

View file

@ -9,7 +9,7 @@ import numpy
class ImshowController(BufferedAxisChangedController): class ImshowController(BufferedAxisChangedController):
def __init__(self, ax, plot_function, plot_limits, resolution=50, update_lim=.5, **kwargs): def __init__(self, ax, plot_function, plot_limits, resolution=50, update_lim=.8, **kwargs):
""" """
:param plot_function: :param plot_function:
function to use for creating image for plotting (return ndarray-like) function to use for creating image for plotting (return ndarray-like)

View file

@ -123,6 +123,8 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#add inducing inputs (if a sparse model is used) #add inducing inputs (if a sparse model is used)
if hasattr(model,"Z"): if hasattr(model,"Z"):
#Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims] #Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
if isinstance(model,SparseGPCoregionalizedRegression):
Z = Z[Z[:,-1] == Y_metadata['output_index'],:]
Zu = Z[:,free_dims] Zu = Z[:,free_dims]
z_height = ax.get_ylim()[0] z_height = ax.get_ylim()[0]
plots['inducing_inputs'] = ax.plot(Zu, np.zeros_like(Zu) + z_height, 'r|', mew=1.5, markersize=12) plots['inducing_inputs'] = ax.plot(Zu, np.zeros_like(Zu) + z_height, 'r|', mew=1.5, markersize=12)

View file

@ -4,6 +4,8 @@ import GPy
import numpy as np import numpy as np
import matplotlib as mpl import matplotlib as mpl
import time import time
from ...util.misc import param_to_array
from GPy.core.parameterization.variational import VariationalPosterior
try: try:
import visual import visual
visual_available = True visual_available = True
@ -72,12 +74,13 @@ class vector_show(matplotlib_show):
""" """
def __init__(self, vals, axes=None): def __init__(self, vals, axes=None):
matplotlib_show.__init__(self, vals, axes) matplotlib_show.__init__(self, vals, axes)
self.handle = self.axes.plot(np.arange(0, len(vals))[:, None], self.vals.T)[0] self.handle = self.axes.plot(np.arange(0, len(vals))[:, None], self.vals)
def modify(self, vals): def modify(self, vals):
self.vals = vals.copy() self.vals = vals.copy()
xdata, ydata = self.handle.get_data() for handle, vals in zip(self.handle, self.vals.T):
self.handle.set_data(xdata, self.vals.T) xdata, ydata = handle.get_data()
handle.set_data(xdata, vals)
self.axes.figure.canvas.draw() self.axes.figure.canvas.draw()
@ -91,8 +94,12 @@ class lvm(matplotlib_show):
:param latent_axes: the axes where the latent visualization should be plotted. :param latent_axes: the axes where the latent visualization should be plotted.
""" """
if vals == None: if vals == None:
vals = model.X[0] if isinstance(model.X, VariationalPosterior):
vals = param_to_array(model.X.mean)
else:
vals = param_to_array(model.X)
vals = param_to_array(vals)
matplotlib_show.__init__(self, vals, axes=latent_axes) matplotlib_show.__init__(self, vals, axes=latent_axes)
if isinstance(latent_axes,mpl.axes.Axes): if isinstance(latent_axes,mpl.axes.Axes):

34
GPy/testing/fitc.py Normal file
View file

@ -0,0 +1,34 @@
# Copyright (c) 2014, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import unittest
import numpy as np
import GPy
class FITCtest(unittest.TestCase):
def setUp(self):
######################################
# # 1 dimensional example
N = 20
# sample inputs and outputs
self.X1D = np.random.uniform(-3., 3., (N, 1))
self.Y1D = np.sin(self.X1D) + np.random.randn(N, 1) * 0.05
######################################
# # 2 dimensional example
# sample inputs and outputs
self.X2D = np.random.uniform(-3., 3., (N, 2))
self.Y2D = np.sin(self.X2D[:, 0:1]) * np.sin(self.X2D[:, 1:2]) + np.random.randn(N, 1) * 0.05
def test_fitc_1d(self):
m = GPy.models.SparseGPRegression(self.X1D, self.Y1D)
m.inference_method=GPy.inference.latent_function_inference.FITC()
self.assertTrue(m.checkgrad())
def test_fitc_2d(self):
m = GPy.models.SparseGPRegression(self.X2D, self.Y2D)
m.inference_method=GPy.inference.latent_function_inference.FITC()
self.assertTrue(m.checkgrad())

View file

@ -94,7 +94,7 @@ class Kern_check_dKdiag_dX(Kern_check_dK_dX):
def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verbose=False): def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verbose=False, fixed_X_dims=None):
""" """
This function runs on kernels to check the correctness of their This function runs on kernels to check the correctness of their
implementation. It checks that the covariance function is positive definite implementation. It checks that the covariance function is positive definite
@ -109,19 +109,17 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
""" """
pass_checks = True pass_checks = True
if X==None: if X is None:
X = np.random.randn(10, kern.input_dim) X = np.random.randn(10, kern.input_dim)
if output_ind is not None: if output_ind is not None:
X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0]) X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0])
if X2==None: if X2 is None:
X2 = np.random.randn(20, kern.input_dim) X2 = np.random.randn(20, kern.input_dim)
if output_ind is not None: if output_ind is not None:
X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0]) X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0])
if verbose: if verbose:
print("Checking covariance function is positive definite.") print("Checking covariance function is positive definite.")
#if isinstance(kern, GPy.kern.IndependentOutputs):
#import ipdb; ipdb.set_trace() # XXX BREAKPOINT
result = Kern_check_model(kern, X=X).is_positive_semi_definite() result = Kern_check_model(kern, X=X).is_positive_semi_definite()
if result and verbose: if result and verbose:
print("Check passed.") print("Check passed.")
@ -154,7 +152,12 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
if verbose: if verbose:
print("Checking gradients of Kdiag(X) wrt theta.") print("Checking gradients of Kdiag(X) wrt theta.")
try:
result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose) result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("update_gradients_diag not implemented for " + kern.name)
if result and verbose: if result and verbose:
print("Check passed.") print("Check passed.")
if not result: if not result:
@ -166,7 +169,10 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
if verbose: if verbose:
print("Checking gradients of K(X, X) wrt X.") print("Checking gradients of K(X, X) wrt X.")
try: try:
result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose) testmodel = Kern_check_dK_dX(kern, X=X, X2=None)
if fixed_X_dims is not None:
testmodel.X[:,fixed_X_dims].fix()
result = testmodel.checkgrad(verbose=verbose)
except NotImplementedError: except NotImplementedError:
result=True result=True
if verbose: if verbose:
@ -175,14 +181,17 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
print("Check passed.") print("Check passed.")
if not result: if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True) testmodel.checkgrad(verbose=True)
pass_checks = False pass_checks = False
return False return False
if verbose: if verbose:
print("Checking gradients of K(X, X2) wrt X.") print("Checking gradients of K(X, X2) wrt X.")
try: try:
result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose) testmodel = Kern_check_dK_dX(kern, X=X, X2=X2)
if fixed_X_dims is not None:
testmodel.X[:,fixed_X_dims].fix()
result = testmodel.checkgrad(verbose=verbose)
except NotImplementedError: except NotImplementedError:
result=True result=True
if verbose: if verbose:
@ -190,8 +199,8 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
if result and verbose: if result and verbose:
print("Check passed.") print("Check passed.")
if not result: if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") print("Gradient of K(X, X2) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True) testmodel.checkgrad(verbose=True)
pass_checks = False pass_checks = False
return False return False
@ -236,9 +245,22 @@ class KernelGradientTestsContinuous(unittest.TestCase):
def test_Add(self): def test_Add(self):
k = GPy.kern.Matern32(2, active_dims=[2,3]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D) k = GPy.kern.Matern32(2, active_dims=[2,3]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
k += GPy.kern.Matern32(2, active_dims=[2,3]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
k.randomize() k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose)) self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
def test_Add_dims(self):
k = GPy.kern.Matern32(2, active_dims=[2,self.D]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
k.randomize()
self.assertRaises(AssertionError, k.K, self.X)
k = GPy.kern.Matern32(2, active_dims=[2,self.D-1]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
k.randomize()
# assert it runs:
try:
k.K(self.X)
except AssertionError:
raise AssertionError, "k.K(X) should run on self.D-1 dimension"
def test_Matern52(self): def test_Matern52(self):
k = GPy.kern.Matern52(self.D) k = GPy.kern.Matern52(self.D)
k.randomize() k.randomize()
@ -302,29 +324,57 @@ class KernelTestsMiscellaneous(unittest.TestCase):
class KernelTestsNonContinuous(unittest.TestCase): class KernelTestsNonContinuous(unittest.TestCase):
def setUp(self): def setUp(self):
N = 100 N0 = 3
N1 = 110 N1 = 9
self.D = 2 N2 = 4
D = self.D N = N0+N1+N2
self.X = np.random.randn(N,D) self.D = 3
self.X2 = np.random.randn(N1,D) self.X = np.random.randn(N, self.D+1)
#self.X_block = np.zeros((N+N1, D+D+1)) indices = np.random.random_integers(0, 2, size=N)
#self.X_block[0:N, 0:D] = self.X self.X[indices==0, -1] = 0
#self.X_block[N:N+N1, D:D+D] = self.X2 self.X[indices==1, -1] = 1
#self.X_block[0:N, -1] = 0 self.X[indices==2, -1] = 2
#self.X_block[N:N+N1, -1] = 1 #self.X = self.X[self.X[:, -1].argsort(), :]
self.X_block = np.zeros((N+N1, D+1)) self.X2 = np.random.randn((N0+N1)*2, self.D+1)
self.X_block[0:N, 0:D] = self.X self.X2[:(N0*2), -1] = 0
self.X_block[N:N+N1, 0:D] = self.X2 self.X2[(N0*2):, -1] = 1
self.X_block[0:N, -1] = 0
self.X_block[N:N+N1, -1] = 1
self.X_block = self.X_block[self.X_block.argsort(0)[:, -1], :]
def test_IndependentOutputs(self): def test_IndependentOutputs(self):
k = GPy.kern.RBF(self.D) k = GPy.kern.RBF(self.D)
kern = GPy.kern.IndependentOutputs(k, -1) kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single')
self.assertTrue(check_kernel_gradient_functions(kern, X=self.X_block, verbose=verbose)) self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1))
k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(self.D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')]
kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split')
self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1))
def test_ODE_UY(self):
kern = GPy.kern.ODE_UY(2, active_dims=[0, self.D])
X = self.X[self.X[:,-1]!=2]
X2 = self.X2[self.X2[:,-1]!=2]
self.assertTrue(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))
if __name__ == "__main__": if __name__ == "__main__":
print "Running unit tests, please be (very) patient..." print "Running unit tests, please be (very) patient..."
unittest.main() #unittest.main()
np.random.seed(0)
N0 = 3
N1 = 9
N2 = 4
N = N0+N1+N2
D = 3
X = np.random.randn(N, D+1)
indices = np.random.random_integers(0, 2, size=N)
X[indices==0, -1] = 0
X[indices==1, -1] = 1
X[indices==2, -1] = 2
#X = X[X[:, -1].argsort(), :]
X2 = np.random.randn((N0+N1)*2, D+1)
X2[:(N0*2), -1] = 0
X2[(N0*2):, -1] = 1
k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')]
kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split')
assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))
k = GPy.kern.RBF(D)
kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single')
assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))

View file

@ -255,21 +255,21 @@ class TestNoiseModels(object):
"Y": self.binary_Y, "Y": self.binary_Y,
"ep": False # FIXME: Should be True when we have it working again "ep": False # FIXME: Should be True when we have it working again
}, },
#"Exponential_default": { "Exponential_default": {
#"model": GPy.likelihoods.exponential(), "model": GPy.likelihoods.Exponential(),
#"link_f_constraints": [constrain_positive], "link_f_constraints": [constrain_positive],
#"Y": self.positive_Y, "Y": self.positive_Y,
#"laplace": True, "laplace": True,
#}, },
#"Poisson_default": { "Poisson_default": {
#"model": GPy.likelihoods.poisson(), "model": GPy.likelihoods.Poisson(),
#"link_f_constraints": [constrain_positive], "link_f_constraints": [constrain_positive],
#"Y": self.integer_Y, "Y": self.integer_Y,
#"laplace": True, "laplace": True,
#"ep": False #Should work though... "ep": False #Should work though...
#}, }#,
#"Gamma_default": { #GAMMA needs some work!"Gamma_default": {
#"model": GPy.likelihoods.gamma(), #"model": GPy.likelihoods.Gamma(),
#"link_f_constraints": [constrain_positive], #"link_f_constraints": [constrain_positive],
#"Y": self.positive_Y, #"Y": self.positive_Y,
#"laplace": True #"laplace": True
@ -589,7 +589,8 @@ class LaplaceTests(unittest.TestCase):
self.var = np.random.rand(1) self.var = np.random.rand(1)
self.stu_t = GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var) self.stu_t = GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var)
self.gauss = GPy.likelihoods.Gaussian(gp_link=link_functions.Log(), variance=self.var) #TODO: gaussians with on Identity link. self.gauss = GPy.likelihoods.Gaussian(gp_link=link_functions.Log(), variance=self.var)
self.gauss = GPy.likelihoods.Gaussian(variance=self.var)
#Make a bigger step as lower bound can be quite curved #Make a bigger step as lower bound can be quite curved
self.step = 1e-6 self.step = 1e-6
@ -604,7 +605,6 @@ class LaplaceTests(unittest.TestCase):
def test_gaussian_d2logpdf_df2_2(self): def test_gaussian_d2logpdf_df2_2(self):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
self.Y = None self.Y = None
self.gauss = None
self.N = 2 self.N = 2
self.D = 1 self.D = 1
@ -613,7 +613,6 @@ class LaplaceTests(unittest.TestCase):
noise = np.random.randn(*self.X.shape)*self.real_std noise = np.random.randn(*self.X.shape)*self.real_std
self.Y = np.sin(self.X*2*np.pi) + noise self.Y = np.sin(self.X*2*np.pi) + noise
self.f = np.random.rand(self.N, 1) self.f = np.random.rand(self.N, 1)
self.gauss = GPy.likelihoods.Gaussian(variance=self.var)
dlogpdf_df = functools.partial(self.gauss.dlogpdf_df, y=self.Y) dlogpdf_df = functools.partial(self.gauss.dlogpdf_df, y=self.Y)
d2logpdf_df2 = functools.partial(self.gauss.d2logpdf_df2, y=self.Y) d2logpdf_df2 = functools.partial(self.gauss.d2logpdf_df2, y=self.Y)

View file

@ -6,6 +6,60 @@ import unittest
import numpy as np import numpy as np
import GPy import GPy
class MiscTests(unittest.TestCase):
def setUp(self):
self.N = 20
self.N_new = 50
self.D = 1
self.X = np.random.uniform(-3., 3., (self.N, 1))
self.Y = np.sin(self.X) + np.random.randn(self.N, self.D) * 0.05
self.X_new = np.random.uniform(-3., 3., (self.N_new, 1))
def test_raw_predict(self):
k = GPy.kern.RBF(1)
m = GPy.models.GPRegression(self.X, self.Y, kernel=k)
m.randomize()
Kinv = np.linalg.pinv(k.K(self.X) + np.eye(self.N)*m.Gaussian_noise.variance)
K_hat = k.K(self.X_new) - k.K(self.X_new, self.X).dot(Kinv).dot(k.K(self.X, self.X_new))
mu_hat = k.K(self.X_new, self.X).dot(Kinv).dot(self.Y)
mu, covar = m._raw_predict(self.X_new, full_cov=True)
self.assertEquals(mu.shape, (self.N_new, self.D))
self.assertEquals(covar.shape, (self.N_new, self.N_new))
np.testing.assert_almost_equal(K_hat, covar)
np.testing.assert_almost_equal(mu_hat, mu)
mu, var = m._raw_predict(self.X_new)
self.assertEquals(mu.shape, (self.N_new, self.D))
self.assertEquals(var.shape, (self.N_new, 1))
np.testing.assert_almost_equal(np.diag(K_hat)[:, None], var)
np.testing.assert_almost_equal(mu_hat, mu)
def test_sparse_raw_predict(self):
k = GPy.kern.RBF(1)
m = GPy.models.SparseGPRegression(self.X, self.Y, kernel=k)
m.randomize()
Z = m.Z[:]
X = self.X[:]
#Not easy to check if woodbury_inv is correct in itself as it requires a large derivation and expression
Kinv = m.posterior.woodbury_inv
K_hat = k.K(self.X_new) - k.K(self.X_new, Z).dot(Kinv).dot(k.K(Z, self.X_new))
mu, covar = m._raw_predict(self.X_new, full_cov=True)
self.assertEquals(mu.shape, (self.N_new, self.D))
self.assertEquals(covar.shape, (self.N_new, self.N_new))
np.testing.assert_almost_equal(K_hat, covar)
#np.testing.assert_almost_equal(mu_hat, mu)
mu, var = m._raw_predict(self.X_new)
self.assertEquals(mu.shape, (self.N_new, self.D))
self.assertEquals(var.shape, (self.N_new, 1))
np.testing.assert_almost_equal(np.diag(K_hat)[:, None], var)
#np.testing.assert_almost_equal(mu_hat, mu)
class GradientTests(unittest.TestCase): class GradientTests(unittest.TestCase):
def setUp(self): def setUp(self):
###################################### ######################################
@ -198,6 +252,7 @@ class GradientTests(unittest.TestCase):
m = GPy.models.GPLVM(Y, input_dim, init='PCA', kernel=k) m = GPy.models.GPLVM(Y, input_dim, init='PCA', kernel=k)
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@unittest.expectedFailure
def test_GP_EP_probit(self): def test_GP_EP_probit(self):
N = 20 N = 20
X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None] X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None]
@ -207,6 +262,7 @@ class GradientTests(unittest.TestCase):
m.update_likelihood_approximation() m.update_likelihood_approximation()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@unittest.expectedFailure
def test_sparse_EP_DTC_probit(self): def test_sparse_EP_DTC_probit(self):
N = 20 N = 20
X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None] X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None]
@ -221,6 +277,7 @@ class GradientTests(unittest.TestCase):
m.update_likelihood_approximation() m.update_likelihood_approximation()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@unittest.expectedFailure
def test_generalized_FITC(self): def test_generalized_FITC(self):
N = 20 N = 20
X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None] X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None]

View file

@ -21,8 +21,6 @@ class ParameterizedTest(Parameterized):
params_changed_count = _trigger_start params_changed_count = _trigger_start
def parameters_changed(self): def parameters_changed(self):
self.params_changed_count += 1 self.params_changed_count += 1
def _set_params(self, params, trigger_parent=True):
Parameterized._set_params(self, params, trigger_parent=trigger_parent)
class Test(unittest.TestCase): class Test(unittest.TestCase):

View file

@ -7,16 +7,16 @@ import unittest
import GPy import GPy
import numpy as np import numpy as np
from GPy.core.parameterization.parameter_core import HierarchyError from GPy.core.parameterization.parameter_core import HierarchyError
from GPy.core.parameterization.array_core import ObservableArray from GPy.core.parameterization.array_core import ObsAr
class ArrayCoreTest(unittest.TestCase): class ArrayCoreTest(unittest.TestCase):
def setUp(self): def setUp(self):
self.X = np.random.normal(1,1, size=(100,10)) self.X = np.random.normal(1,1, size=(100,10))
self.obsX = ObservableArray(self.X) self.obsX = ObsAr(self.X)
def test_init(self): def test_init(self):
X = ObservableArray(self.X) X = ObsAr(self.X)
X2 = ObservableArray(X) X2 = ObsAr(X)
self.assertIs(X, X2, "no new Observable array, when Observable is given") self.assertIs(X, X2, "no new Observable array, when Observable is given")
def test_slice(self): def test_slice(self):
@ -34,9 +34,9 @@ class ParameterizedTest(unittest.TestCase):
self.param = Param('param', np.random.rand(25,2), Logistic(0, 1)) self.param = Param('param', np.random.rand(25,2), Logistic(0, 1))
self.test1 = GPy.core.Parameterized("test model") self.test1 = GPy.core.Parameterized("test model")
self.test1.add_parameter(self.white) self.test1.kern = self.rbf+self.white
self.test1.add_parameter(self.rbf, 0) self.test1.add_parameter(self.test1.kern)
self.test1.add_parameter(self.param) self.test1.add_parameter(self.param, 0)
x = np.linspace(-2,6,4)[:,None] x = np.linspace(-2,6,4)[:,None]
y = np.sin(x) y = np.sin(x)
@ -45,22 +45,24 @@ class ParameterizedTest(unittest.TestCase):
def test_add_parameter(self): def test_add_parameter(self):
self.assertEquals(self.rbf._parent_index_, 0) self.assertEquals(self.rbf._parent_index_, 0)
self.assertEquals(self.white._parent_index_, 1) self.assertEquals(self.white._parent_index_, 1)
self.assertEquals(self.param._parent_index_, 0)
pass pass
def test_fixes(self): def test_fixes(self):
self.white.fix(warning=False) self.white.fix(warning=False)
self.test1.remove_parameter(self.test1.param) self.test1.remove_parameter(self.param)
self.assertTrue(self.test1._has_fixes()) self.assertTrue(self.test1._has_fixes())
from GPy.core.parameterization.transformations import FIXED, UNFIXED from GPy.core.parameterization.transformations import FIXED, UNFIXED
self.assertListEqual(self.test1._fixes_.tolist(),[UNFIXED,UNFIXED,FIXED]) self.assertListEqual(self.test1._fixes_.tolist(),[UNFIXED,UNFIXED,FIXED])
self.test1.kern.add_parameter(self.white, 0)
self.test1.add_parameter(self.white, 0)
self.assertListEqual(self.test1._fixes_.tolist(),[FIXED,UNFIXED,UNFIXED]) self.assertListEqual(self.test1._fixes_.tolist(),[FIXED,UNFIXED,UNFIXED])
self.test1.kern.rbf.fix()
self.assertListEqual(self.test1._fixes_.tolist(),[FIXED]*3)
def test_remove_parameter(self): def test_remove_parameter(self):
from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__, Logexp from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__, Logexp
self.white.fix() self.white.fix()
self.test1.remove_parameter(self.white) self.test1.kern.remove_parameter(self.white)
self.assertIs(self.test1._fixes_,None) self.assertIs(self.test1._fixes_,None)
self.assertListEqual(self.white._fixes_.tolist(), [FIXED]) self.assertListEqual(self.white._fixes_.tolist(), [FIXED])
@ -81,7 +83,12 @@ class ParameterizedTest(unittest.TestCase):
self.assertListEqual(self.white._fixes_.tolist(), [FIXED]) self.assertListEqual(self.white._fixes_.tolist(), [FIXED])
self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops) self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops) self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops)
self.assertListEqual(self.test1.constraints[Logexp()].tolist(), [0,1]) self.assertListEqual(self.test1.constraints[Logexp()].tolist(), range(self.param.size, self.param.size+self.rbf.size))
def test_remove_parameter_param_array_grad_array(self):
val = self.test1.kern._param_array_.copy()
self.test1.kern.remove_parameter(self.white)
self.assertListEqual(self.test1.kern._param_array_.tolist(), val[:2].tolist())
def test_add_parameter_already_in_hirarchy(self): def test_add_parameter_already_in_hirarchy(self):
self.assertRaises(HierarchyError, self.test1.add_parameter, self.white._parameters_[0]) self.assertRaises(HierarchyError, self.test1.add_parameter, self.white._parameters_[0])
@ -91,34 +98,51 @@ class ParameterizedTest(unittest.TestCase):
self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops) self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
self.assertListEqual(self.rbf.constraints.indices()[0].tolist(), range(2)) self.assertListEqual(self.rbf.constraints.indices()[0].tolist(), range(2))
from GPy.core.parameterization.transformations import Logexp from GPy.core.parameterization.transformations import Logexp
kern = self.rbf+self.white kern = self.test1.kern
self.test1.remove_parameter(kern)
self.assertListEqual(kern.constraints[Logexp()].tolist(), range(3)) self.assertListEqual(kern.constraints[Logexp()].tolist(), range(3))
def test_constraints(self): def test_constraints(self):
self.rbf.constrain(GPy.transformations.Square(), False) self.rbf.constrain(GPy.transformations.Square(), False)
self.assertListEqual(self.test1.constraints[GPy.transformations.Square()].tolist(), range(2)) self.assertListEqual(self.test1.constraints[GPy.transformations.Square()].tolist(), range(self.param.size, self.param.size+self.rbf.size))
self.assertListEqual(self.test1.constraints[GPy.transformations.Logexp()].tolist(), [2]) self.assertListEqual(self.test1.constraints[GPy.transformations.Logexp()].tolist(), [self.param.size+self.rbf.size])
self.test1.remove_parameter(self.rbf) self.test1.kern.remove_parameter(self.rbf)
self.assertListEqual(self.test1.constraints[GPy.transformations.Square()].tolist(), []) self.assertListEqual(self.test1.constraints[GPy.transformations.Square()].tolist(), [])
def test_constraints_views(self): def test_constraints_views(self):
self.assertEqual(self.white.constraints._offset, 2) self.assertEqual(self.white.constraints._offset, self.param.size+self.rbf.size)
self.assertEqual(self.rbf.constraints._offset, 0) self.assertEqual(self.rbf.constraints._offset, self.param.size)
self.assertEqual(self.param.constraints._offset, 3) self.assertEqual(self.param.constraints._offset, 0)
def test_fixing_randomize(self): def test_fixing_randomize(self):
self.white.fix(warning=False) self.white.fix(warning=True)
val = float(self.test1.white.variance) val = float(self.white.variance)
self.test1.randomize() self.test1.randomize()
self.assertEqual(val, self.white.variance) self.assertEqual(val, self.white.variance)
def test_randomize(self):
ps = self.test1.param.view(np.ndarray).copy()
self.test1.param.randomize()
self.assertFalse(np.all(ps==self.test1.param))
def test_fixing_randomize_parameter_handling(self):
self.rbf.fix(warning=True)
val = float(self.rbf.variance)
self.test1.kern.randomize()
self.assertEqual(val, self.rbf.variance)
def test_fixing_optimize(self): def test_fixing_optimize(self):
self.testmodel.kern.lengthscale.fix() self.testmodel.kern.lengthscale.fix()
val = float(self.testmodel.kern.lengthscale) val = float(self.testmodel.kern.lengthscale)
self.testmodel.randomize() self.testmodel.randomize()
self.assertEqual(val, self.testmodel.kern.lengthscale) self.assertEqual(val, self.testmodel.kern.lengthscale)
def test_printing(self):
print self.test1
print self.param
print self.test1['']
if __name__ == "__main__": if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.test_add_parameter'] #import sys;sys.argv = ['', 'Test.test_add_parameter']
unittest.main() unittest.main()

View file

@ -56,8 +56,6 @@ def ICM(input_dim, num_outputs, kernel, W_rank=1,W=None,kappa=None,name='X'):
warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.") warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")
K = kernel.prod(GPy.kern.Coregionalize(1, num_outputs, active_dims=[input_dim], rank=W_rank,W=W,kappa=kappa,name='B'),name=name) K = kernel.prod(GPy.kern.Coregionalize(1, num_outputs, active_dims=[input_dim], rank=W_rank,W=W,kappa=kappa,name='B'),name=name)
#K = kernel * GPy.kern.Coregionalize(1, num_outputs, active_dims=[input_dim], rank=W_rank,W=W,kappa=kappa,name='B')
#K = kernel ** GPy.kern.Coregionalize(input_dim, num_outputs,W_rank,W,kappa, name= 'B')
K['.*variance'] = 1. K['.*variance'] = 1.
K['.*variance'].fix() K['.*variance'].fix()
return K return K

View file

@ -18,7 +18,7 @@ setup(name = 'GPy',
license = "BSD 3-clause", license = "BSD 3-clause",
keywords = "machine-learning gaussian-processes kernels", keywords = "machine-learning gaussian-processes kernels",
url = "http://sheffieldml.github.com/GPy/", url = "http://sheffieldml.github.com/GPy/",
packages = ['GPy', 'GPy.core', 'GPy.kern', 'GPy.util', 'GPy.models', 'GPy.inference', 'GPy.examples', 'GPy.likelihoods', 'GPy.testing', 'GPy.util.latent_space_visualizations', 'GPy.util.latent_space_visualizations.controllers', 'GPy.likelihoods.noise_models', 'GPy.kern.parts', 'GPy.mappings'], packages = ["GPy.models", "GPy.inference.optimization", "GPy.inference", "GPy.inference.latent_function_inference", "GPy.likelihoods", "GPy.mappings", "GPy.examples", "GPy.core.parameterization", "GPy.core", "GPy.testing", "GPy", "GPy.util", "GPy.kern", "GPy.kern._src.psi_comp", "GPy.kern._src", "GPy.plotting.matplot_dep.latent_space_visualizations.controllers", "GPy.plotting.matplot_dep.latent_space_visualizations", "GPy.plotting.matplot_dep", "GPy.plotting"],
package_dir={'GPy': 'GPy'}, package_dir={'GPy': 'GPy'},
package_data = {'GPy': ['GPy/examples']}, package_data = {'GPy': ['GPy/examples']},
py_modules = ['GPy.__init__'], py_modules = ['GPy.__init__'],
@ -29,6 +29,4 @@ setup(name = 'GPy',
}, },
classifiers=[ classifiers=[
"License :: OSI Approved :: BSD License"], "License :: OSI Approved :: BSD License"],
#ext_modules = [Extension(name = 'GPy.kern.lfmUpsilonf2py',
# sources = ['GPy/kern/src/lfmUpsilonf2py.f90'])],
) )