mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-06 02:24:17 +02:00
merge changes
This commit is contained in:
commit
8a83845937
65 changed files with 1197 additions and 600 deletions
|
|
@ -7,7 +7,7 @@ import warnings
|
||||||
from .. import kern
|
from .. import kern
|
||||||
from ..util.linalg import dtrtrs
|
from ..util.linalg import dtrtrs
|
||||||
from model import Model
|
from model import Model
|
||||||
from parameterization import ObservableArray
|
from parameterization import ObsAr
|
||||||
from .. import likelihoods
|
from .. import likelihoods
|
||||||
from ..likelihoods.gaussian import Gaussian
|
from ..likelihoods.gaussian import Gaussian
|
||||||
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
|
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
|
||||||
|
|
@ -31,21 +31,19 @@ class GP(Model):
|
||||||
super(GP, self).__init__(name)
|
super(GP, self).__init__(name)
|
||||||
|
|
||||||
assert X.ndim == 2
|
assert X.ndim == 2
|
||||||
if isinstance(X, (ObservableArray, VariationalPosterior)):
|
if isinstance(X, (ObsAr, VariationalPosterior)):
|
||||||
self.X = X
|
self.X = X
|
||||||
else: self.X = ObservableArray(X)
|
else: self.X = ObsAr(X)
|
||||||
|
|
||||||
self.num_data, self.input_dim = self.X.shape
|
self.num_data, self.input_dim = self.X.shape
|
||||||
|
|
||||||
assert Y.ndim == 2
|
assert Y.ndim == 2
|
||||||
self.Y = ObservableArray(Y)
|
self.Y = ObsAr(Y)
|
||||||
assert Y.shape[0] == self.num_data
|
assert Y.shape[0] == self.num_data
|
||||||
_, self.output_dim = self.Y.shape
|
_, self.output_dim = self.Y.shape
|
||||||
|
|
||||||
if Y_metadata is None:
|
#TODO: check the type of this is okay?
|
||||||
self.Y_metadata = {}
|
self.Y_metadata = Y_metadata
|
||||||
else:
|
|
||||||
self.Y_metadata = Y_metadata
|
|
||||||
|
|
||||||
assert isinstance(kernel, kern.Kern)
|
assert isinstance(kernel, kern.Kern)
|
||||||
#assert self.input_dim == kernel.input_dim
|
#assert self.input_dim == kernel.input_dim
|
||||||
|
|
@ -76,25 +74,27 @@ class GP(Model):
|
||||||
|
|
||||||
def _raw_predict(self, _Xnew, full_cov=False):
|
def _raw_predict(self, _Xnew, full_cov=False):
|
||||||
"""
|
"""
|
||||||
Internal helper function for making predictions, does not account
|
For making predictions, does not account for normalization or likelihood
|
||||||
for normalization or likelihood
|
|
||||||
|
|
||||||
full_cov is a boolean which defines whether the full covariance matrix
|
full_cov is a boolean which defines whether the full covariance matrix
|
||||||
of the prediction is computed. If full_cov is False (default), only the
|
of the prediction is computed. If full_cov is False (default), only the
|
||||||
diagonal of the covariance is returned.
|
diagonal of the covariance is returned.
|
||||||
|
|
||||||
|
$$
|
||||||
|
p(f*|X*, X, Y) = \int^{\inf}_{\inf} p(f*|f,X*)p(f|X,Y) df
|
||||||
|
= N(f*| K_{x*x}(K_{xx} + \Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \Sigma)^{-1}K_{xx*}
|
||||||
|
\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
|
||||||
|
$$
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Kx = self.kern.K(_Xnew, self.X).T
|
Kx = self.kern.K(_Xnew, self.X).T
|
||||||
#LiKx, _ = dtrtrs(self.posterior.woodbury_chol, np.asfortranarray(Kx), lower=1)
|
|
||||||
WiKx = np.dot(self.posterior.woodbury_inv, Kx)
|
WiKx = np.dot(self.posterior.woodbury_inv, Kx)
|
||||||
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
|
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
|
||||||
if full_cov:
|
if full_cov:
|
||||||
Kxx = self.kern.K(_Xnew)
|
Kxx = self.kern.K(_Xnew)
|
||||||
#var = Kxx - tdot(LiKx.T)
|
var = Kxx - np.dot(Kx.T, WiKx)
|
||||||
var = np.dot(Kx.T, WiKx)
|
|
||||||
else:
|
else:
|
||||||
Kxx = self.kern.Kdiag(_Xnew)
|
Kxx = self.kern.Kdiag(_Xnew)
|
||||||
#var = Kxx - np.sum(LiKx*LiKx, 0)
|
|
||||||
var = Kxx - np.sum(WiKx*Kx, 0)
|
var = Kxx - np.sum(WiKx*Kx, 0)
|
||||||
var = var.reshape(-1, 1)
|
var = var.reshape(-1, 1)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -10,11 +10,11 @@ class Mapping(Parameterized):
|
||||||
Base model for shared behavior between models that can act like a mapping.
|
Base model for shared behavior between models that can act like a mapping.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, input_dim, output_dim):
|
def __init__(self, input_dim, output_dim, name='mapping'):
|
||||||
self.input_dim = input_dim
|
self.input_dim = input_dim
|
||||||
self.output_dim = output_dim
|
self.output_dim = output_dim
|
||||||
|
|
||||||
super(Mapping, self).__init__()
|
super(Mapping, self).__init__(name=name)
|
||||||
# Model.__init__(self)
|
# Model.__init__(self)
|
||||||
# All leaf nodes should call self._set_params(self._get_params()) at
|
# All leaf nodes should call self._set_params(self._get_params()) at
|
||||||
# the end
|
# the end
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
from param import Param, ObservableArray
|
from param import Param, ObsAr
|
||||||
from parameterized import Parameterized
|
from parameterized import Parameterized
|
||||||
|
|
|
||||||
|
|
@ -1,25 +1,25 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
__updated__ = '2013-12-16'
|
__updated__ = '2014-03-17'
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from parameter_core import Observable
|
from parameter_core import Observable
|
||||||
|
|
||||||
class ObservableArray(np.ndarray, Observable):
|
class ObsAr(np.ndarray, Observable):
|
||||||
"""
|
"""
|
||||||
An ndarray which reports changes to its observers.
|
An ndarray which reports changes to its observers.
|
||||||
The observers can add themselves with a callable, which
|
The observers can add themselves with a callable, which
|
||||||
will be called every time this array changes. The callable
|
will be called every time this array changes. The callable
|
||||||
takes exactly one argument, which is this array itself.
|
takes exactly one argument, which is this array itself.
|
||||||
"""
|
"""
|
||||||
__array_priority__ = -1 # Never give back ObservableArray
|
__array_priority__ = -1 # Never give back ObsAr
|
||||||
def __new__(cls, input_array, *a, **kw):
|
def __new__(cls, input_array, *a, **kw):
|
||||||
if not isinstance(input_array, ObservableArray):
|
if not isinstance(input_array, ObsAr):
|
||||||
obj = np.atleast_1d(np.require(input_array, dtype=np.float64, requirements=['W', 'C'])).view(cls)
|
obj = np.atleast_1d(np.require(input_array, dtype=np.float64, requirements=['W', 'C'])).view(cls)
|
||||||
else: obj = input_array
|
else: obj = input_array
|
||||||
cls.__name__ = "ObservableArray\n "
|
#cls.__name__ = "ObsAr" # because of fixed printing of `array` in np printing
|
||||||
super(ObservableArray, obj).__init__(*a, **kw)
|
super(ObsAr, obj).__init__(*a, **kw)
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
def __array_finalize__(self, obj):
|
def __array_finalize__(self, obj):
|
||||||
|
|
@ -30,6 +30,14 @@ class ObservableArray(np.ndarray, Observable):
|
||||||
def __array_wrap__(self, out_arr, context=None):
|
def __array_wrap__(self, out_arr, context=None):
|
||||||
return out_arr.view(np.ndarray)
|
return out_arr.view(np.ndarray)
|
||||||
|
|
||||||
|
def __reduce__(self):
|
||||||
|
func, args, state = np.ndarray.__reduce__(self)
|
||||||
|
return func, args, (state, Observable._getstate(self))
|
||||||
|
|
||||||
|
def __setstate__(self, state):
|
||||||
|
np.ndarray.__setstate__(self, state[0])
|
||||||
|
Observable._setstate(self, state[1])
|
||||||
|
|
||||||
def _s_not_empty(self, s):
|
def _s_not_empty(self, s):
|
||||||
# this checks whether there is something picked by this slice.
|
# this checks whether there is something picked by this slice.
|
||||||
return True
|
return True
|
||||||
|
|
@ -46,7 +54,7 @@ class ObservableArray(np.ndarray, Observable):
|
||||||
|
|
||||||
def __setitem__(self, s, val):
|
def __setitem__(self, s, val):
|
||||||
if self._s_not_empty(s):
|
if self._s_not_empty(s):
|
||||||
super(ObservableArray, self).__setitem__(s, val)
|
super(ObsAr, self).__setitem__(s, val)
|
||||||
self.notify_observers(self[s])
|
self.notify_observers(self[s])
|
||||||
|
|
||||||
def __getslice__(self, start, stop):
|
def __getslice__(self, start, stop):
|
||||||
|
|
@ -56,7 +64,7 @@ class ObservableArray(np.ndarray, Observable):
|
||||||
return self.__setitem__(slice(start, stop), val)
|
return self.__setitem__(slice(start, stop), val)
|
||||||
|
|
||||||
def __copy__(self, *args):
|
def __copy__(self, *args):
|
||||||
return ObservableArray(self.view(np.ndarray).copy())
|
return ObsAr(self.view(np.ndarray).copy())
|
||||||
|
|
||||||
def copy(self, *args):
|
def copy(self, *args):
|
||||||
return self.__copy__(*args)
|
return self.__copy__(*args)
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@
|
||||||
import itertools
|
import itertools
|
||||||
import numpy
|
import numpy
|
||||||
from parameter_core import OptimizationHandlable, adjust_name_for_printing
|
from parameter_core import OptimizationHandlable, adjust_name_for_printing
|
||||||
from array_core import ObservableArray
|
from array_core import ObsAr
|
||||||
|
|
||||||
###### printing
|
###### printing
|
||||||
__constraints_name__ = "Constraint"
|
__constraints_name__ = "Constraint"
|
||||||
|
|
@ -15,7 +15,7 @@ __precision__ = numpy.get_printoptions()['precision'] # numpy printing precision
|
||||||
__print_threshold__ = 5
|
__print_threshold__ = 5
|
||||||
######
|
######
|
||||||
|
|
||||||
class Param(OptimizationHandlable, ObservableArray):
|
class Param(OptimizationHandlable, ObsAr):
|
||||||
"""
|
"""
|
||||||
Parameter object for GPy models.
|
Parameter object for GPy models.
|
||||||
|
|
||||||
|
|
@ -226,7 +226,7 @@ class Param(OptimizationHandlable, ObservableArray):
|
||||||
# Constrainable
|
# Constrainable
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
def _ensure_fixes(self):
|
def _ensure_fixes(self):
|
||||||
self._fixes_ = numpy.ones(self._realsize_, dtype=bool)
|
if not self._has_fixes(): self._fixes_ = numpy.ones(self._realsize_, dtype=bool)
|
||||||
|
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
# Convenience
|
# Convenience
|
||||||
|
|
@ -269,6 +269,8 @@ class Param(OptimizationHandlable, ObservableArray):
|
||||||
@property
|
@property
|
||||||
def _ties_str(self):
|
def _ties_str(self):
|
||||||
return ['']
|
return ['']
|
||||||
|
def _ties_for(self, ravi):
|
||||||
|
return [['N/A']]*ravi.size
|
||||||
def __repr__(self, *args, **kwargs):
|
def __repr__(self, *args, **kwargs):
|
||||||
name = "\033[1m{x:s}\033[0;0m:\n".format(
|
name = "\033[1m{x:s}\033[0;0m:\n".format(
|
||||||
x=self.hierarchy_name())
|
x=self.hierarchy_name())
|
||||||
|
|
@ -312,7 +314,7 @@ class Param(OptimizationHandlable, ObservableArray):
|
||||||
ravi = self._raveled_index(filter_)
|
ravi = self._raveled_index(filter_)
|
||||||
if constr_matrix is None: constr_matrix = self.constraints.properties_for(ravi)
|
if constr_matrix is None: constr_matrix = self.constraints.properties_for(ravi)
|
||||||
if prirs is None: prirs = self.priors.properties_for(ravi)
|
if prirs is None: prirs = self.priors.properties_for(ravi)
|
||||||
if ties is None: ties = [['N/A']]*self.size
|
if ties is None: ties = self._ties_for(ravi)
|
||||||
ties = [' '.join(map(lambda x: x, t)) for t in ties]
|
ties = [' '.join(map(lambda x: x, t)) for t in ties]
|
||||||
if lc is None: lc = self._max_len_names(constr_matrix, __constraints_name__)
|
if lc is None: lc = self._max_len_names(constr_matrix, __constraints_name__)
|
||||||
if lx is None: lx = self._max_len_values()
|
if lx is None: lx = self._max_len_values()
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ Observable Pattern for patameterization
|
||||||
from transformations import Transformation, Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
|
from transformations import Transformation, Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
__updated__ = '2014-03-14'
|
__updated__ = '2014-03-18'
|
||||||
|
|
||||||
class HierarchyError(Exception):
|
class HierarchyError(Exception):
|
||||||
"""
|
"""
|
||||||
|
|
@ -56,7 +56,7 @@ class InterfacePickleFunctions(object):
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError, "To be able to use pickling you need to implement this method"
|
raise NotImplementedError, "To be able to use pickling you need to implement this method"
|
||||||
|
|
||||||
class Pickleable(object):
|
class Pickleable(InterfacePickleFunctions):
|
||||||
"""
|
"""
|
||||||
Make an object pickleable (See python doc 'pickling').
|
Make an object pickleable (See python doc 'pickling').
|
||||||
|
|
||||||
|
|
@ -95,7 +95,7 @@ class Pickleable(object):
|
||||||
def _has_get_set_state(self):
|
def _has_get_set_state(self):
|
||||||
return '_getstate' in vars(self.__class__) and '_setstate' in vars(self.__class__)
|
return '_getstate' in vars(self.__class__) and '_setstate' in vars(self.__class__)
|
||||||
|
|
||||||
class Observable(InterfacePickleFunctions):
|
class Observable(Pickleable):
|
||||||
"""
|
"""
|
||||||
Observable pattern for parameterization.
|
Observable pattern for parameterization.
|
||||||
|
|
||||||
|
|
@ -155,6 +155,7 @@ class Observable(InterfacePickleFunctions):
|
||||||
|
|
||||||
def _getstate(self):
|
def _getstate(self):
|
||||||
return [self._observer_callables_]
|
return [self._observer_callables_]
|
||||||
|
|
||||||
def _setstate(self, state):
|
def _setstate(self, state):
|
||||||
self._observer_callables_ = state.pop()
|
self._observer_callables_ = state.pop()
|
||||||
|
|
||||||
|
|
@ -376,7 +377,7 @@ class Constrainable(Nameable, Indexable):
|
||||||
# Ensure that the fixes array is set:
|
# Ensure that the fixes array is set:
|
||||||
# Parameterized: ones(self.size)
|
# Parameterized: ones(self.size)
|
||||||
# Param: ones(self._realsize_
|
# Param: ones(self._realsize_
|
||||||
self._fixes_ = np.ones(self.size, dtype=bool)
|
if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool)
|
||||||
|
|
||||||
def _set_fixed(self, index):
|
def _set_fixed(self, index):
|
||||||
self._ensure_fixes()
|
self._ensure_fixes()
|
||||||
|
|
@ -397,7 +398,7 @@ class Constrainable(Nameable, Indexable):
|
||||||
self._fixes_ = None
|
self._fixes_ = None
|
||||||
|
|
||||||
def _has_fixes(self):
|
def _has_fixes(self):
|
||||||
return hasattr(self, "_fixes_") and self._fixes_ is not None
|
return hasattr(self, "_fixes_") and self._fixes_ is not None and self._fixes_.size == self.size
|
||||||
|
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
# Prior Operations
|
# Prior Operations
|
||||||
|
|
@ -540,12 +541,12 @@ class Constrainable(Nameable, Indexable):
|
||||||
print "WARNING: reconstraining parameters {}".format(self.parameter_names() or self.name)
|
print "WARNING: reconstraining parameters {}".format(self.parameter_names() or self.name)
|
||||||
which.add(what, self._raveled_index())
|
which.add(what, self._raveled_index())
|
||||||
|
|
||||||
def _remove_from_index_operations(self, which, what):
|
def _remove_from_index_operations(self, which, transforms):
|
||||||
"""
|
"""
|
||||||
Helper preventing copy code.
|
Helper preventing copy code.
|
||||||
Remove given what (transform prior etc) from which param index ops.
|
Remove given what (transform prior etc) from which param index ops.
|
||||||
"""
|
"""
|
||||||
if len(what) == 0:
|
if len(transforms) == 0:
|
||||||
transforms = which.properties()
|
transforms = which.properties()
|
||||||
removed = np.empty((0,), dtype=int)
|
removed = np.empty((0,), dtype=int)
|
||||||
for t in transforms:
|
for t in transforms:
|
||||||
|
|
@ -566,24 +567,32 @@ class OptimizationHandlable(Constrainable):
|
||||||
super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw)
|
super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw)
|
||||||
|
|
||||||
def transform(self):
|
def transform(self):
|
||||||
[np.put(self._param_array_, ind, c.finv(self._param_array_[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
[np.put(self._param_array_, ind, c.finv(self._param_array_.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||||
|
|
||||||
def untransform(self):
|
def untransform(self):
|
||||||
[np.put(self._param_array_, ind, c.f(self._param_array_[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
[np.put(self._param_array_, ind, c.f(self._param_array_.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||||
|
|
||||||
def _get_params_transformed(self):
|
def _get_params_transformed(self):
|
||||||
# transformed parameters (apply transformation rules)
|
# transformed parameters (apply transformation rules)
|
||||||
p = self._param_array_.copy()
|
p = self._param_array_.copy()
|
||||||
[np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
[np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||||
if self._has_fixes():
|
if self.has_parent() and self.constraints[__fixed__].size != 0:
|
||||||
|
fixes = np.ones(self.size).astype(bool)
|
||||||
|
fixes[self.constraints[__fixed__]] = FIXED
|
||||||
|
return p[fixes]
|
||||||
|
elif self._has_fixes():
|
||||||
return p[self._fixes_]
|
return p[self._fixes_]
|
||||||
return p
|
return p
|
||||||
|
|
||||||
def _set_params_transformed(self, p):
|
def _set_params_transformed(self, p):
|
||||||
if p is self._param_array_:
|
if p is self._param_array_:
|
||||||
p = p.copy()
|
p = p.copy()
|
||||||
if self._has_fixes(): self._param_array_[self._fixes_] = p
|
if self.has_parent() and self.constraints[__fixed__].size != 0:
|
||||||
else: self._param_array_[:] = p
|
fixes = np.ones(self.size).astype(bool)
|
||||||
|
fixes[self.constraints[__fixed__]] = FIXED
|
||||||
|
self._param_array_.flat[fixes] = p
|
||||||
|
elif self._has_fixes(): self._param_array_.flat[self._fixes_] = p
|
||||||
|
else: self._param_array_.flat = p
|
||||||
self.untransform()
|
self.untransform()
|
||||||
self._trigger_params_changed()
|
self._trigger_params_changed()
|
||||||
|
|
||||||
|
|
@ -661,8 +670,8 @@ class OptimizationHandlable(Constrainable):
|
||||||
for pi in self._parameters_:
|
for pi in self._parameters_:
|
||||||
pislice = slice(pi_old_size, pi_old_size+pi.size)
|
pislice = slice(pi_old_size, pi_old_size+pi.size)
|
||||||
|
|
||||||
self._param_array_[pislice] = pi._param_array_.ravel()#, requirements=['C', 'W']).flat
|
self._param_array_[pislice] = pi._param_array_.flat#, requirements=['C', 'W']).flat
|
||||||
self._gradient_array_[pislice] = pi._gradient_array_.ravel()#, requirements=['C', 'W']).flat
|
self._gradient_array_[pislice] = pi._gradient_array_.flat#, requirements=['C', 'W']).flat
|
||||||
|
|
||||||
pi._param_array_.data = parray[pislice].data
|
pi._param_array_.data = parray[pislice].data
|
||||||
pi._gradient_array_.data = garray[pislice].data
|
pi._gradient_array_.data = garray[pislice].data
|
||||||
|
|
@ -769,11 +778,11 @@ class Parameterizable(OptimizationHandlable):
|
||||||
Add all parameters to this param class, you can insert parameters
|
Add all parameters to this param class, you can insert parameters
|
||||||
at any given index using the :func:`list.insert` syntax
|
at any given index using the :func:`list.insert` syntax
|
||||||
"""
|
"""
|
||||||
# if param.has_parent():
|
|
||||||
# raise AttributeError, "parameter {} already in another model, create new object (or copy) for adding".format(param._short())
|
|
||||||
if param in self._parameters_ and index is not None:
|
if param in self._parameters_ and index is not None:
|
||||||
self.remove_parameter(param)
|
self.remove_parameter(param)
|
||||||
self.add_parameter(param, index)
|
self.add_parameter(param, index)
|
||||||
|
#elif param.has_parent():
|
||||||
|
# raise HierarchyError, "parameter {} already in another model ({}), create new object (or copy) for adding".format(param._short(), param._highest_parent_._short())
|
||||||
elif param not in self._parameters_:
|
elif param not in self._parameters_:
|
||||||
if param.has_parent():
|
if param.has_parent():
|
||||||
parent = param._parent_
|
parent = param._parent_
|
||||||
|
|
@ -797,13 +806,19 @@ class Parameterizable(OptimizationHandlable):
|
||||||
|
|
||||||
param.add_observer(self, self._pass_through_notify_observers, -np.inf)
|
param.add_observer(self, self._pass_through_notify_observers, -np.inf)
|
||||||
|
|
||||||
self.size += param.size
|
parent = self
|
||||||
|
while parent is not None:
|
||||||
|
parent.size += param.size
|
||||||
|
parent = parent._parent_
|
||||||
|
|
||||||
|
self._connect_parameters()
|
||||||
|
|
||||||
|
self._highest_parent_._connect_parameters(ignore_added_names=_ignore_added_names)
|
||||||
|
self._highest_parent_._notify_parent_change()
|
||||||
|
self._highest_parent_._connect_fixes()
|
||||||
|
|
||||||
self._connect_parameters(ignore_added_names=_ignore_added_names)
|
|
||||||
self._notify_parent_change()
|
|
||||||
self._connect_fixes()
|
|
||||||
else:
|
else:
|
||||||
raise RuntimeError, """Parameter exists already added and no copy made"""
|
raise HierarchyError, """Parameter exists already and no copy made"""
|
||||||
|
|
||||||
|
|
||||||
def add_parameters(self, *parameters):
|
def add_parameters(self, *parameters):
|
||||||
|
|
@ -829,17 +844,18 @@ class Parameterizable(OptimizationHandlable):
|
||||||
param.remove_observer(self, self._pass_through_notify_observers)
|
param.remove_observer(self, self._pass_through_notify_observers)
|
||||||
self.constraints.shift_left(start, param.size)
|
self.constraints.shift_left(start, param.size)
|
||||||
|
|
||||||
self._connect_fixes()
|
|
||||||
self._connect_parameters()
|
self._connect_parameters()
|
||||||
self._notify_parent_change()
|
self._notify_parent_change()
|
||||||
|
|
||||||
parent = self._parent_
|
parent = self._parent_
|
||||||
while parent is not None:
|
while parent is not None:
|
||||||
parent._connect_fixes()
|
parent.size -= param.size
|
||||||
parent._connect_parameters()
|
|
||||||
parent._notify_parent_change()
|
|
||||||
parent = parent._parent_
|
parent = parent._parent_
|
||||||
|
|
||||||
|
self._highest_parent_._connect_parameters()
|
||||||
|
self._highest_parent_._connect_fixes()
|
||||||
|
self._highest_parent_._notify_parent_change()
|
||||||
|
|
||||||
def _connect_parameters(self, ignore_added_names=False):
|
def _connect_parameters(self, ignore_added_names=False):
|
||||||
# connect parameterlist to this parameterized object
|
# connect parameterlist to this parameterized object
|
||||||
# This just sets up the right connection for the params objects
|
# This just sets up the right connection for the params objects
|
||||||
|
|
@ -862,8 +878,8 @@ class Parameterizable(OptimizationHandlable):
|
||||||
# first connect all children
|
# first connect all children
|
||||||
p._propagate_param_grad(self._param_array_[pslice], self._gradient_array_[pslice])
|
p._propagate_param_grad(self._param_array_[pslice], self._gradient_array_[pslice])
|
||||||
# then connect children to self
|
# then connect children to self
|
||||||
self._param_array_[pslice] = p._param_array_.ravel()#, requirements=['C', 'W']).ravel(order='C')
|
self._param_array_[pslice] = p._param_array_.flat#, requirements=['C', 'W']).ravel(order='C')
|
||||||
self._gradient_array_[pslice] = p._gradient_array_.ravel()#, requirements=['C', 'W']).ravel(order='C')
|
self._gradient_array_[pslice] = p._gradient_array_.flat#, requirements=['C', 'W']).ravel(order='C')
|
||||||
|
|
||||||
if not p._param_array_.flags['C_CONTIGUOUS']:
|
if not p._param_array_.flags['C_CONTIGUOUS']:
|
||||||
import ipdb;ipdb.set_trace()
|
import ipdb;ipdb.set_trace()
|
||||||
|
|
|
||||||
|
|
@ -7,10 +7,10 @@ from domains import _POSITIVE,_NEGATIVE, _BOUNDED
|
||||||
import weakref
|
import weakref
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
#_lim_val = -np.log(sys.float_info.epsilon)
|
|
||||||
|
|
||||||
_exp_lim_val = np.finfo(np.float64).max
|
_exp_lim_val = np.finfo(np.float64).max
|
||||||
_lim_val = np.log(_exp_lim_val)
|
_lim_val = 36.0
|
||||||
|
epsilon = np.finfo(np.float64).resolution
|
||||||
|
|
||||||
#===============================================================================
|
#===============================================================================
|
||||||
# Fixing constants
|
# Fixing constants
|
||||||
|
|
@ -54,19 +54,19 @@ class Transformation(object):
|
||||||
class Logexp(Transformation):
|
class Logexp(Transformation):
|
||||||
domain = _POSITIVE
|
domain = _POSITIVE
|
||||||
def f(self, x):
|
def f(self, x):
|
||||||
return np.where(x>_lim_val, x, np.log(1. + np.exp(np.clip(x, -_lim_val, _lim_val))))
|
return np.where(x>_lim_val, x, np.log(1. + np.exp(np.clip(x, -_lim_val, _lim_val)))) + epsilon
|
||||||
#raises overflow warning: return np.where(x>_lim_val, x, np.log(1. + np.exp(x)))
|
#raises overflow warning: return np.where(x>_lim_val, x, np.log(1. + np.exp(x)))
|
||||||
def finv(self, f):
|
def finv(self, f):
|
||||||
return np.where(f>_lim_val, f, np.log(np.exp(f+1e-20) - 1.))
|
return np.where(f>_lim_val, f, np.log(np.exp(f+1e-20) - 1.))
|
||||||
def gradfactor(self, f):
|
def gradfactor(self, f):
|
||||||
return np.where(f>_lim_val, 1., 1 - np.exp(-f))
|
return np.where(f>_lim_val, 1., 1. - np.exp(-f))
|
||||||
def initialize(self, f):
|
def initialize(self, f):
|
||||||
if np.any(f < 0.):
|
if np.any(f < 0.):
|
||||||
print "Warning: changing parameters to satisfy constraints"
|
print "Warning: changing parameters to satisfy constraints"
|
||||||
return np.abs(f)
|
return np.abs(f)
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return '+ve'
|
return '+ve'
|
||||||
|
|
||||||
|
|
||||||
class LogexpNeg(Transformation):
|
class LogexpNeg(Transformation):
|
||||||
domain = _POSITIVE
|
domain = _POSITIVE
|
||||||
|
|
@ -98,7 +98,7 @@ class NegativeLogexp(Transformation):
|
||||||
return -self.logexp.initialize(f) # np.abs(f)
|
return -self.logexp.initialize(f) # np.abs(f)
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return '-ve'
|
return '-ve'
|
||||||
|
|
||||||
class LogexpClipped(Logexp):
|
class LogexpClipped(Logexp):
|
||||||
max_bound = 1e100
|
max_bound = 1e100
|
||||||
min_bound = 1e-10
|
min_bound = 1e-10
|
||||||
|
|
|
||||||
|
|
@ -64,8 +64,8 @@ class SparseGP(GP):
|
||||||
self.kern.gradient += target
|
self.kern.gradient += target
|
||||||
|
|
||||||
#gradients wrt Z
|
#gradients wrt Z
|
||||||
self.Z.gradient[:,self.kern.active_dims] = self.kern.gradients_X(dL_dKmm, self.Z)
|
self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
|
||||||
self.Z.gradient[:,self.kern.active_dims] += self.kern.gradients_Z_expectations(
|
self.Z.gradient += self.kern.gradients_Z_expectations(
|
||||||
self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X)
|
self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X)
|
||||||
else:
|
else:
|
||||||
#gradients wrt kernel
|
#gradients wrt kernel
|
||||||
|
|
@ -76,8 +76,8 @@ class SparseGP(GP):
|
||||||
self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z, None)
|
self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z, None)
|
||||||
self.kern.gradient += target
|
self.kern.gradient += target
|
||||||
#gradients wrt Z
|
#gradients wrt Z
|
||||||
self.Z.gradient[:,self.kern.active_dims] = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
|
self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
|
||||||
self.Z.gradient[:,self.kern.active_dims] += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
|
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
|
||||||
|
|
||||||
def _raw_predict(self, Xnew, full_cov=False):
|
def _raw_predict(self, Xnew, full_cov=False):
|
||||||
"""
|
"""
|
||||||
|
|
@ -88,8 +88,9 @@ class SparseGP(GP):
|
||||||
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
|
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
|
||||||
if full_cov:
|
if full_cov:
|
||||||
Kxx = self.kern.K(Xnew)
|
Kxx = self.kern.K(Xnew)
|
||||||
#var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx)
|
var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
|
||||||
var = Kxx - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T, Kx, [1,0]).swapaxes(1,2)
|
#var = Kxx[:,:,None] - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T, Kx, [1,0]).swapaxes(1,2)
|
||||||
|
var = var.squeeze()
|
||||||
else:
|
else:
|
||||||
Kxx = self.kern.Kdiag(Xnew)
|
Kxx = self.kern.Kdiag(Xnew)
|
||||||
var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T
|
var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T
|
||||||
|
|
|
||||||
|
|
@ -6,4 +6,4 @@ import regression
|
||||||
import dimensionality_reduction
|
import dimensionality_reduction
|
||||||
import tutorials
|
import tutorials
|
||||||
import stochastic
|
import stochastic
|
||||||
import non_gaussian
|
import non_gaussian
|
||||||
|
|
|
||||||
|
|
@ -23,13 +23,10 @@ K = Bias.prod(Coreg,name='X')
|
||||||
|
|
||||||
#K.coregion.W = 0
|
#K.coregion.W = 0
|
||||||
#print K.coregion.W
|
#print K.coregion.W
|
||||||
|
|
||||||
#print Bias.K(_X,_X)
|
#print Bias.K(_X,_X)
|
||||||
#print K.K(X,X)
|
#print K.K(X,X)
|
||||||
|
|
||||||
#pb.matshow(K.K(X,X))
|
#pb.matshow(K.K(X,X))
|
||||||
|
|
||||||
|
|
||||||
Mlist = [GPy.kern.Matern32(1,lengthscale=20.,name="Mat")]
|
Mlist = [GPy.kern.Matern32(1,lengthscale=20.,name="Mat")]
|
||||||
kern = GPy.util.multioutput.LCM(input_dim=1,num_outputs=2,kernels_list=Mlist,name='H')
|
kern = GPy.util.multioutput.LCM(input_dim=1,num_outputs=2,kernels_list=Mlist,name='H')
|
||||||
kern.B.W = 0
|
kern.B.W = 0
|
||||||
|
|
@ -37,16 +34,22 @@ kern.B.kappa = 1.
|
||||||
#kern.B.W.fix()
|
#kern.B.W.fix()
|
||||||
#kern.B.kappa.fix()
|
#kern.B.kappa.fix()
|
||||||
#m = GPy.models.GPCoregionalizedRegression(X_list=[X1,X2], Y_list=[Y1,Y2], kernel=kern)
|
#m = GPy.models.GPCoregionalizedRegression(X_list=[X1,X2], Y_list=[Y1,Y2], kernel=kern)
|
||||||
m = GPy.models.SparseGPCoregionalizedRegression(X_list=[X1], Y_list=[Y1], kernel=kern)
|
|
||||||
|
|
||||||
|
Z1 = np.array([1.5,2.5])[:,None]
|
||||||
|
|
||||||
|
m = GPy.models.SparseGPCoregionalizedRegression(X_list=[X1], Y_list=[Y1], Z_list = [Z1], kernel=kern)
|
||||||
#m.optimize()
|
#m.optimize()
|
||||||
m.checkgrad(verbose=1)
|
m.checkgrad(verbose=1)
|
||||||
|
|
||||||
|
"""
|
||||||
fig = pb.figure()
|
fig = pb.figure()
|
||||||
ax0 = fig.add_subplot(211)
|
ax0 = fig.add_subplot(211)
|
||||||
ax1 = fig.add_subplot(212)
|
ax1 = fig.add_subplot(212)
|
||||||
slices = GPy.util.multioutput.get_slices([Y1,Y2])
|
slices = GPy.util.multioutput.get_slices([Y1,Y2])
|
||||||
m.plot(fixed_inputs=[(1,0)],which_data_rows=slices[0],ax=ax0)
|
m.plot(fixed_inputs=[(1,0)],which_data_rows=slices[0],ax=ax0)
|
||||||
#m.plot(fixed_inputs=[(1,1)],which_data_rows=slices[1],ax=ax1)
|
#m.plot(fixed_inputs=[(1,1)],which_data_rows=slices[1],ax=ax1)
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -160,6 +160,7 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=15, Q=4
|
||||||
def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k):
|
def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k):
|
||||||
import GPy
|
import GPy
|
||||||
from matplotlib import pyplot as plt
|
from matplotlib import pyplot as plt
|
||||||
|
from ..util.misc import param_to_array
|
||||||
|
|
||||||
_np.random.seed(0)
|
_np.random.seed(0)
|
||||||
data = GPy.util.datasets.oil()
|
data = GPy.util.datasets.oil()
|
||||||
|
|
@ -173,11 +174,11 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
|
||||||
m.optimize('scg', messages=verbose, max_iters=max_iters, gtol=.05)
|
m.optimize('scg', messages=verbose, max_iters=max_iters, gtol=.05)
|
||||||
|
|
||||||
if plot:
|
if plot:
|
||||||
y = m.Y[0, :]
|
y = m.Y
|
||||||
fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
|
fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
|
||||||
m.plot_latent(ax=latent_axes)
|
m.plot_latent(ax=latent_axes)
|
||||||
data_show = GPy.plotting.matplot_dep.visualize.vector_show(y)
|
data_show = GPy.plotting.matplot_dep.visualize.vector_show(y)
|
||||||
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X[0, :], # @UnusedVariable
|
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(param_to_array(m.X.mean), # @UnusedVariable
|
||||||
m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
|
m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
|
||||||
raw_input('Press enter to finish')
|
raw_input('Press enter to finish')
|
||||||
plt.close(fig)
|
plt.close(fig)
|
||||||
|
|
|
||||||
|
|
@ -158,7 +158,7 @@ def boston_example(optimize=True, plot=True):
|
||||||
#Gaussian GP
|
#Gaussian GP
|
||||||
print "Gauss GP"
|
print "Gauss GP"
|
||||||
mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp.copy())
|
mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp.copy())
|
||||||
mgp.constrain_fixed('white', 1e-5)
|
mgp.constrain_fixed('.*white', 1e-5)
|
||||||
mgp['rbf_len'] = rbf_len
|
mgp['rbf_len'] = rbf_len
|
||||||
mgp['noise'] = noise
|
mgp['noise'] = noise
|
||||||
print mgp
|
print mgp
|
||||||
|
|
@ -176,7 +176,7 @@ def boston_example(optimize=True, plot=True):
|
||||||
g_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), g_distribution)
|
g_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), g_distribution)
|
||||||
mg = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=g_likelihood)
|
mg = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=g_likelihood)
|
||||||
mg.constrain_positive('noise_variance')
|
mg.constrain_positive('noise_variance')
|
||||||
mg.constrain_fixed('white', 1e-5)
|
mg.constrain_fixed('.*white', 1e-5)
|
||||||
mg['rbf_len'] = rbf_len
|
mg['rbf_len'] = rbf_len
|
||||||
mg['noise'] = noise
|
mg['noise'] = noise
|
||||||
print mg
|
print mg
|
||||||
|
|
@ -194,10 +194,10 @@ def boston_example(optimize=True, plot=True):
|
||||||
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=df, sigma2=noise)
|
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=df, sigma2=noise)
|
||||||
stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
|
stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
|
||||||
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
|
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
|
||||||
mstu_t.constrain_fixed('white', 1e-5)
|
mstu_t.constrain_fixed('.*white', 1e-5)
|
||||||
mstu_t.constrain_bounded('t_noise', 0.0001, 1000)
|
mstu_t.constrain_bounded('.*t_noise', 0.0001, 1000)
|
||||||
mstu_t['rbf_len'] = rbf_len
|
mstu_t['rbf_len'] = rbf_len
|
||||||
mstu_t['t_noise'] = noise
|
mstu_t['.*t_noise'] = noise
|
||||||
print mstu_t
|
print mstu_t
|
||||||
if optimize:
|
if optimize:
|
||||||
mstu_t.optimize(optimizer=optimizer, messages=messages)
|
mstu_t.optimize(optimizer=optimizer, messages=messages)
|
||||||
|
|
|
||||||
|
|
@ -25,80 +25,51 @@ def olympic_marathon_men(optimize=True, plot=True):
|
||||||
|
|
||||||
return m
|
return m
|
||||||
|
|
||||||
def coregionalization_toy2(optimize=True, plot=True):
|
def coregionalization_toy(optimize=True, plot=True):
|
||||||
"""
|
"""
|
||||||
A simple demonstration of coregionalization on two sinusoidal functions.
|
A simple demonstration of coregionalization on two sinusoidal functions.
|
||||||
"""
|
"""
|
||||||
#build a design matrix with a column of integers indicating the output
|
#build a design matrix with a column of integers indicating the output
|
||||||
X1 = np.random.rand(50, 1) * 8
|
X1 = np.random.rand(50, 1) * 8
|
||||||
X2 = np.random.rand(30, 1) * 5
|
X2 = np.random.rand(30, 1) * 5
|
||||||
index = np.vstack((np.zeros_like(X1), np.ones_like(X2)))
|
|
||||||
X = np.hstack((np.vstack((X1, X2)), index))
|
|
||||||
|
|
||||||
#build a suitable set of observed variables
|
#build a suitable set of observed variables
|
||||||
Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05
|
Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05
|
||||||
Y2 = np.sin(X2) + np.random.randn(*X2.shape) * 0.05 + 2.
|
Y2 = np.sin(X2) + np.random.randn(*X2.shape) * 0.05 + 2.
|
||||||
Y = np.vstack((Y1, Y2))
|
|
||||||
|
|
||||||
#build the kernel
|
m = GPy.models.GPCoregionalizedRegression(X_list=[X1,X2], Y_list=[Y1,Y2])
|
||||||
k1 = GPy.kern.RBF(1) + GPy.kern.Bias(1)
|
|
||||||
k2 = GPy.kern.Coregionalize(2,1)
|
|
||||||
k = k1**k2
|
|
||||||
m = GPy.models.GPRegression(X, Y, kernel=k)
|
|
||||||
|
|
||||||
if optimize:
|
if optimize:
|
||||||
m.optimize('bfgs', max_iters=100)
|
m.optimize('bfgs', max_iters=100)
|
||||||
|
|
||||||
if plot:
|
if plot:
|
||||||
m.plot(fixed_inputs=[(1,0)])
|
slices = GPy.util.multioutput.get_slices([X1,X2])
|
||||||
m.plot(fixed_inputs=[(1,1)], ax=pb.gca())
|
m.plot(fixed_inputs=[(1,0)],which_data_rows=slices[0],Y_metadata={'output_index':0})
|
||||||
|
m.plot(fixed_inputs=[(1,1)],which_data_rows=slices[1],Y_metadata={'output_index':1},ax=pb.gca())
|
||||||
return m
|
return m
|
||||||
|
|
||||||
#FIXME: Needs recovering once likelihoods are consolidated
|
|
||||||
#def coregionalization_toy(optimize=True, plot=True):
|
|
||||||
# """
|
|
||||||
# A simple demonstration of coregionalization on two sinusoidal functions.
|
|
||||||
# """
|
|
||||||
# X1 = np.random.rand(50, 1) * 8
|
|
||||||
# X2 = np.random.rand(30, 1) * 5
|
|
||||||
# X = np.vstack((X1, X2))
|
|
||||||
# Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05
|
|
||||||
# Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05
|
|
||||||
# Y = np.vstack((Y1, Y2))
|
|
||||||
#
|
|
||||||
# k1 = GPy.kern.RBF(1)
|
|
||||||
# m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1])
|
|
||||||
# m.constrain_fixed('.*rbf_var', 1.)
|
|
||||||
# m.optimize(max_iters=100)
|
|
||||||
#
|
|
||||||
# fig, axes = pb.subplots(2,1)
|
|
||||||
# m.plot(fixed_inputs=[(1,0)],ax=axes[0])
|
|
||||||
# m.plot(fixed_inputs=[(1,1)],ax=axes[1])
|
|
||||||
# axes[0].set_title('Output 0')
|
|
||||||
# axes[1].set_title('Output 1')
|
|
||||||
# return m
|
|
||||||
|
|
||||||
def coregionalization_sparse(optimize=True, plot=True):
|
def coregionalization_sparse(optimize=True, plot=True):
|
||||||
"""
|
"""
|
||||||
A simple demonstration of coregionalization on two sinusoidal functions using sparse approximations.
|
A simple demonstration of coregionalization on two sinusoidal functions using sparse approximations.
|
||||||
"""
|
"""
|
||||||
#fetch the data from the non sparse examples
|
#build a design matrix with a column of integers indicating the output
|
||||||
m = coregionalization_toy2(optimize=False, plot=False)
|
X1 = np.random.rand(50, 1) * 8
|
||||||
X, Y = m.X, m.Y
|
X2 = np.random.rand(30, 1) * 5
|
||||||
|
|
||||||
k = GPy.kern.RBF(1)**GPy.kern.Coregionalize(2)
|
#build a suitable set of observed variables
|
||||||
|
Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05
|
||||||
|
Y2 = np.sin(X2) + np.random.randn(*X2.shape) * 0.05 + 2.
|
||||||
|
|
||||||
#construct a model
|
m = GPy.models.SparseGPCoregionalizedRegression(X_list=[X1,X2], Y_list=[Y1,Y2])
|
||||||
m = GPy.models.SparseGPRegression(X,Y, num_inducing=25, kernel=k)
|
|
||||||
m.Z[:,1].fix() # don't optimize the inducing input indexes
|
|
||||||
|
|
||||||
if optimize:
|
if optimize:
|
||||||
m.optimize('bfgs', max_iters=100, messages=1)
|
m.optimize('bfgs', max_iters=100)
|
||||||
|
|
||||||
if plot:
|
if plot:
|
||||||
m.plot(fixed_inputs=[(1,0)])
|
slices = GPy.util.multioutput.get_slices([X1,X2])
|
||||||
m.plot(fixed_inputs=[(1,1)], ax=pb.gca())
|
m.plot(fixed_inputs=[(1,0)],which_data_rows=slices[0],Y_metadata={'output_index':0})
|
||||||
|
m.plot(fixed_inputs=[(1,1)],which_data_rows=slices[1],Y_metadata={'output_index':1},ax=pb.gca())
|
||||||
|
pb.ylim(-3,)
|
||||||
|
|
||||||
return m
|
return m
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -19,19 +19,15 @@ class DTC(object):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.const_jitter = 1e-6
|
self.const_jitter = 1e-6
|
||||||
|
|
||||||
def inference(self, kern, X, Z, likelihood, Y):
|
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
|
||||||
assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
|
assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
|
||||||
|
|
||||||
#TODO: MAX! fix this!
|
|
||||||
from ...util.misc import param_to_array
|
|
||||||
Y = param_to_array(Y)
|
|
||||||
|
|
||||||
num_inducing, _ = Z.shape
|
num_inducing, _ = Z.shape
|
||||||
num_data, output_dim = Y.shape
|
num_data, output_dim = Y.shape
|
||||||
|
|
||||||
#make sure the noise is not hetero
|
#make sure the noise is not hetero
|
||||||
beta = 1./np.squeeze(likelihood.variance)
|
beta = 1./likelihood.gaussian_variance(Y_metadata)
|
||||||
if beta.size <1:
|
if beta.size > 1:
|
||||||
raise NotImplementedError, "no hetero noise with this implementation of DTC"
|
raise NotImplementedError, "no hetero noise with this implementation of DTC"
|
||||||
|
|
||||||
Kmm = kern.K(Z)
|
Kmm = kern.K(Z)
|
||||||
|
|
@ -91,19 +87,15 @@ class vDTC(object):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.const_jitter = 1e-6
|
self.const_jitter = 1e-6
|
||||||
|
|
||||||
def inference(self, kern, X, X_variance, Z, likelihood, Y):
|
def inference(self, kern, X, X_variance, Z, likelihood, Y, Y_metadata):
|
||||||
assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
|
assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
|
||||||
|
|
||||||
#TODO: MAX! fix this!
|
|
||||||
from ...util.misc import param_to_array
|
|
||||||
Y = param_to_array(Y)
|
|
||||||
|
|
||||||
num_inducing, _ = Z.shape
|
num_inducing, _ = Z.shape
|
||||||
num_data, output_dim = Y.shape
|
num_data, output_dim = Y.shape
|
||||||
|
|
||||||
#make sure the noise is not hetero
|
#make sure the noise is not hetero
|
||||||
beta = 1./np.squeeze(likelihood.variance)
|
beta = 1./likelihood.gaussian_variance(Y_metadata)
|
||||||
if beta.size <1:
|
if beta.size > 1:
|
||||||
raise NotImplementedError, "no hetero noise with this implementation of DTC"
|
raise NotImplementedError, "no hetero noise with this implementation of DTC"
|
||||||
|
|
||||||
Kmm = kern.K(Z)
|
Kmm = kern.K(Z)
|
||||||
|
|
@ -112,7 +104,7 @@ class vDTC(object):
|
||||||
U = Knm
|
U = Knm
|
||||||
Uy = np.dot(U.T,Y)
|
Uy = np.dot(U.T,Y)
|
||||||
|
|
||||||
#factor Kmm
|
#factor Kmm
|
||||||
Kmmi, L, Li, _ = pdinv(Kmm)
|
Kmmi, L, Li, _ = pdinv(Kmm)
|
||||||
|
|
||||||
# Compute A
|
# Compute A
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
from posterior import Posterior
|
from posterior import Posterior
|
||||||
from ...util.linalg import pdinv, dpotrs, tdot
|
from ...util.linalg import pdinv, dpotrs, tdot
|
||||||
|
from ...util import diag
|
||||||
import numpy as np
|
import numpy as np
|
||||||
log_2_pi = np.log(2*np.pi)
|
log_2_pi = np.log(2*np.pi)
|
||||||
|
|
||||||
|
|
@ -41,7 +42,9 @@ class ExactGaussianInference(object):
|
||||||
|
|
||||||
K = kern.K(X)
|
K = kern.K(X)
|
||||||
|
|
||||||
Wi, LW, LWi, W_logdet = pdinv(K + likelihood.covariance_matrix(Y, Y_metadata))
|
Ky = K.copy()
|
||||||
|
diag.add(Ky, likelihood.gaussian_variance(Y_metadata))
|
||||||
|
Wi, LW, LWi, W_logdet = pdinv(Ky)
|
||||||
|
|
||||||
alpha, _ = dpotrs(LW, YYT_factor, lower=1)
|
alpha, _ = dpotrs(LW, YYT_factor, lower=1)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,9 +11,9 @@ class EP(object):
|
||||||
|
|
||||||
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
|
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
|
||||||
:type epsilon: float
|
:type epsilon: float
|
||||||
:param eta: Power EP thing TODO: Ricardo: what, exactly?
|
:param eta: parameter for fractional EP updates.
|
||||||
:type eta: float64
|
:type eta: float64
|
||||||
:param delta: Power EP thing TODO: Ricardo: what, exactly?
|
:param delta: damping EP updates factor.
|
||||||
:type delta: float64
|
:type delta: float64
|
||||||
"""
|
"""
|
||||||
self.epsilon, self.eta, self.delta = epsilon, eta, delta
|
self.epsilon, self.eta, self.delta = epsilon, eta, delta
|
||||||
|
|
|
||||||
|
|
@ -17,14 +17,14 @@ class FITC(object):
|
||||||
"""
|
"""
|
||||||
const_jitter = 1e-6
|
const_jitter = 1e-6
|
||||||
|
|
||||||
def inference(self, kern, X, Z, likelihood, Y):
|
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
|
||||||
|
|
||||||
num_inducing, _ = Z.shape
|
num_inducing, _ = Z.shape
|
||||||
num_data, output_dim = Y.shape
|
num_data, output_dim = Y.shape
|
||||||
|
|
||||||
#make sure the noise is not hetero
|
#make sure the noise is not hetero
|
||||||
sigma_n = np.squeeze(likelihood.variance)
|
sigma_n = likelihood.gaussian_variance(Y_metadata)
|
||||||
if sigma_n.size <1:
|
if sigma_n.size >1:
|
||||||
raise NotImplementedError, "no hetero noise with this implementation of FITC"
|
raise NotImplementedError, "no hetero noise with this implementation of FITC"
|
||||||
|
|
||||||
Kmm = kern.K(Z)
|
Kmm = kern.K(Z)
|
||||||
|
|
|
||||||
|
|
@ -51,12 +51,11 @@ class Laplace(object):
|
||||||
Ki_f_init = self._previous_Ki_fhat
|
Ki_f_init = self._previous_Ki_fhat
|
||||||
|
|
||||||
f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
|
f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
|
||||||
|
|
||||||
self.f_hat = f_hat
|
self.f_hat = f_hat
|
||||||
self.Ki_fhat = Ki_fhat
|
self.Ki_fhat = Ki_fhat
|
||||||
self.K = K.copy()
|
self.K = K.copy()
|
||||||
#Compute hessian and other variables at mode
|
#Compute hessian and other variables at mode
|
||||||
log_marginal, woodbury_vector, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata)
|
log_marginal, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata)
|
||||||
|
|
||||||
self._previous_Ki_fhat = Ki_fhat.copy()
|
self._previous_Ki_fhat = Ki_fhat.copy()
|
||||||
return Posterior(woodbury_vector=Ki_fhat, woodbury_inv=woodbury_inv, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}
|
return Posterior(woodbury_vector=Ki_fhat, woodbury_inv=woodbury_inv, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}
|
||||||
|
|
@ -86,13 +85,13 @@ class Laplace(object):
|
||||||
|
|
||||||
#define the objective function (to be maximised)
|
#define the objective function (to be maximised)
|
||||||
def obj(Ki_f, f):
|
def obj(Ki_f, f):
|
||||||
return -0.5*np.dot(Ki_f.flatten(), f.flatten()) + likelihood.logpdf(f, Y, extra_data=Y_metadata)
|
return -0.5*np.dot(Ki_f.flatten(), f.flatten()) + likelihood.logpdf(f, Y, Y_metadata=Y_metadata)
|
||||||
|
|
||||||
difference = np.inf
|
difference = np.inf
|
||||||
iteration = 0
|
iteration = 0
|
||||||
while difference > self._mode_finding_tolerance and iteration < self._mode_finding_max_iter:
|
while difference > self._mode_finding_tolerance and iteration < self._mode_finding_max_iter:
|
||||||
W = -likelihood.d2logpdf_df2(f, Y, extra_data=Y_metadata)
|
W = -likelihood.d2logpdf_df2(f, Y, Y_metadata=Y_metadata)
|
||||||
grad = likelihood.dlogpdf_df(f, Y, extra_data=Y_metadata)
|
grad = likelihood.dlogpdf_df(f, Y, Y_metadata=Y_metadata)
|
||||||
|
|
||||||
W_f = W*f
|
W_f = W*f
|
||||||
|
|
||||||
|
|
@ -136,13 +135,12 @@ class Laplace(object):
|
||||||
At the mode, compute the hessian and effective covariance matrix.
|
At the mode, compute the hessian and effective covariance matrix.
|
||||||
|
|
||||||
returns: logZ : approximation to the marginal likelihood
|
returns: logZ : approximation to the marginal likelihood
|
||||||
woodbury_vector : variable required for calculating the approximation to the covariance matrix
|
|
||||||
woodbury_inv : variable required for calculating the approximation to the covariance matrix
|
woodbury_inv : variable required for calculating the approximation to the covariance matrix
|
||||||
dL_dthetaL : array of derivatives (1 x num_kernel_params)
|
dL_dthetaL : array of derivatives (1 x num_kernel_params)
|
||||||
dL_dthetaL : array of derivatives (1 x num_likelihood_params)
|
dL_dthetaL : array of derivatives (1 x num_likelihood_params)
|
||||||
"""
|
"""
|
||||||
#At this point get the hessian matrix (or vector as W is diagonal)
|
#At this point get the hessian matrix (or vector as W is diagonal)
|
||||||
W = -likelihood.d2logpdf_df2(f_hat, Y, extra_data=Y_metadata)
|
W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata)
|
||||||
|
|
||||||
K_Wi_i, L, LiW12 = self._compute_B_statistics(K, W, likelihood.log_concave)
|
K_Wi_i, L, LiW12 = self._compute_B_statistics(K, W, likelihood.log_concave)
|
||||||
|
|
||||||
|
|
@ -151,11 +149,10 @@ class Laplace(object):
|
||||||
Ki_W_i = K - C.T.dot(C) #Could this be wrong?
|
Ki_W_i = K - C.T.dot(C) #Could this be wrong?
|
||||||
|
|
||||||
#compute the log marginal
|
#compute the log marginal
|
||||||
log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, extra_data=Y_metadata) - np.sum(np.log(np.diag(L)))
|
log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata) - np.sum(np.log(np.diag(L)))
|
||||||
|
|
||||||
#Compute vival matrices for derivatives
|
#Compute vival matrices for derivatives
|
||||||
dW_df = -likelihood.d3logpdf_df3(f_hat, Y, extra_data=Y_metadata) # -d3lik_d3fhat
|
dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat
|
||||||
woodbury_vector = likelihood.dlogpdf_df(f_hat, Y, extra_data=Y_metadata)
|
|
||||||
dL_dfhat = -0.5*(np.diag(Ki_W_i)[:, None]*dW_df) #why isn't this -0.5? s2 in R&W p126 line 9.
|
dL_dfhat = -0.5*(np.diag(Ki_W_i)[:, None]*dW_df) #why isn't this -0.5? s2 in R&W p126 line 9.
|
||||||
#BiK, _ = dpotrs(L, K, lower=1)
|
#BiK, _ = dpotrs(L, K, lower=1)
|
||||||
#dL_dfhat = 0.5*np.diag(BiK)[:, None]*dW_df
|
#dL_dfhat = 0.5*np.diag(BiK)[:, None]*dW_df
|
||||||
|
|
@ -169,7 +166,7 @@ class Laplace(object):
|
||||||
explicit_part = 0.5*(np.dot(Ki_f, Ki_f.T) - K_Wi_i)
|
explicit_part = 0.5*(np.dot(Ki_f, Ki_f.T) - K_Wi_i)
|
||||||
|
|
||||||
#Implicit
|
#Implicit
|
||||||
implicit_part = np.dot(woodbury_vector, dL_dfhat.T).dot(I_KW_i)
|
implicit_part = np.dot(Ki_f, dL_dfhat.T).dot(I_KW_i)
|
||||||
|
|
||||||
dL_dK = explicit_part + implicit_part
|
dL_dK = explicit_part + implicit_part
|
||||||
else:
|
else:
|
||||||
|
|
@ -179,7 +176,7 @@ class Laplace(object):
|
||||||
#compute dL_dthetaL#
|
#compute dL_dthetaL#
|
||||||
####################
|
####################
|
||||||
if likelihood.size > 0 and not likelihood.is_fixed:
|
if likelihood.size > 0 and not likelihood.is_fixed:
|
||||||
dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = likelihood._laplace_gradients(f_hat, Y, extra_data=Y_metadata)
|
dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = likelihood._laplace_gradients(f_hat, Y, Y_metadata=Y_metadata)
|
||||||
|
|
||||||
num_params = likelihood.size
|
num_params = likelihood.size
|
||||||
# make space for one derivative for each likelihood parameter
|
# make space for one derivative for each likelihood parameter
|
||||||
|
|
@ -200,7 +197,7 @@ class Laplace(object):
|
||||||
else:
|
else:
|
||||||
dL_dthetaL = np.zeros(likelihood.size)
|
dL_dthetaL = np.zeros(likelihood.size)
|
||||||
|
|
||||||
return log_marginal, woodbury_vector, K_Wi_i, dL_dK, dL_dthetaL
|
return log_marginal, K_Wi_i, dL_dK, dL_dthetaL
|
||||||
|
|
||||||
def _compute_B_statistics(self, K, W, log_concave):
|
def _compute_B_statistics(self, K, W, log_concave):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -73,20 +73,37 @@ class Posterior(object):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def mean(self):
|
def mean(self):
|
||||||
|
"""
|
||||||
|
Posterior mean
|
||||||
|
$$
|
||||||
|
K_{xx}v
|
||||||
|
v := \texttt{Woodbury vector}
|
||||||
|
$$
|
||||||
|
"""
|
||||||
if self._mean is None:
|
if self._mean is None:
|
||||||
self._mean = np.dot(self._K, self.woodbury_vector)
|
self._mean = np.dot(self._K, self.woodbury_vector)
|
||||||
return self._mean
|
return self._mean
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def covariance(self):
|
def covariance(self):
|
||||||
|
"""
|
||||||
|
Posterior covariance
|
||||||
|
$$
|
||||||
|
K_{xx} - K_{xx}W_{xx}^{-1}K_{xx}
|
||||||
|
W_{xx} := \texttt{Woodbury inv}
|
||||||
|
$$
|
||||||
|
"""
|
||||||
if self._covariance is None:
|
if self._covariance is None:
|
||||||
#LiK, _ = dtrtrs(self.woodbury_chol, self._K, lower=1)
|
#LiK, _ = dtrtrs(self.woodbury_chol, self._K, lower=1)
|
||||||
self._covariance = self._K - np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T
|
self._covariance = self._K - (np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T).squeeze()
|
||||||
#self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K)
|
#self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K)
|
||||||
return self._covariance.squeeze()
|
return self._covariance
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def precision(self):
|
def precision(self):
|
||||||
|
"""
|
||||||
|
Inverse of posterior covariance
|
||||||
|
"""
|
||||||
if self._precision is None:
|
if self._precision is None:
|
||||||
cov = np.atleast_3d(self.covariance)
|
cov = np.atleast_3d(self.covariance)
|
||||||
self._precision = np.zeros(cov.shape) # if one covariance per dimension
|
self._precision = np.zeros(cov.shape) # if one covariance per dimension
|
||||||
|
|
@ -96,8 +113,15 @@ class Posterior(object):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def woodbury_chol(self):
|
def woodbury_chol(self):
|
||||||
|
"""
|
||||||
|
return $L_{W}$ where L is the lower triangular Cholesky decomposition of the Woodbury matrix
|
||||||
|
$$
|
||||||
|
L_{W}L_{W}^{\top} = W^{-1}
|
||||||
|
W^{-1} := \texttt{Woodbury inv}
|
||||||
|
$$
|
||||||
|
"""
|
||||||
if self._woodbury_chol is None:
|
if self._woodbury_chol is None:
|
||||||
#compute woodbury chol from
|
#compute woodbury chol from
|
||||||
if self._woodbury_inv is not None:
|
if self._woodbury_inv is not None:
|
||||||
winv = np.atleast_3d(self._woodbury_inv)
|
winv = np.atleast_3d(self._woodbury_inv)
|
||||||
self._woodbury_chol = np.zeros(winv.shape)
|
self._woodbury_chol = np.zeros(winv.shape)
|
||||||
|
|
@ -121,6 +145,13 @@ class Posterior(object):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def woodbury_inv(self):
|
def woodbury_inv(self):
|
||||||
|
"""
|
||||||
|
The inverse of the woodbury matrix, in the gaussian likelihood case it is defined as
|
||||||
|
$$
|
||||||
|
(K_{xx} + \Sigma_{xx})^{-1}
|
||||||
|
\Sigma_{xx} := \texttt{Likelihood.variance / Approximate likelihood covariance}
|
||||||
|
$$
|
||||||
|
"""
|
||||||
if self._woodbury_inv is None:
|
if self._woodbury_inv is None:
|
||||||
self._woodbury_inv, _ = dpotri(self.woodbury_chol, lower=1)
|
self._woodbury_inv, _ = dpotri(self.woodbury_chol, lower=1)
|
||||||
#self._woodbury_inv, _ = dpotrs(self.woodbury_chol, np.eye(self.woodbury_chol.shape[0]), lower=1)
|
#self._woodbury_inv, _ = dpotrs(self.woodbury_chol, np.eye(self.woodbury_chol.shape[0]), lower=1)
|
||||||
|
|
@ -129,17 +160,22 @@ class Posterior(object):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def woodbury_vector(self):
|
def woodbury_vector(self):
|
||||||
|
"""
|
||||||
|
Woodbury vector in the gaussian likelihood case only is defined as
|
||||||
|
$$
|
||||||
|
(K_{xx} + \Sigma)^{-1}Y
|
||||||
|
\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
|
||||||
|
$$
|
||||||
|
"""
|
||||||
if self._woodbury_vector is None:
|
if self._woodbury_vector is None:
|
||||||
self._woodbury_vector, _ = dpotrs(self.K_chol, self.mean)
|
self._woodbury_vector, _ = dpotrs(self.K_chol, self.mean)
|
||||||
return self._woodbury_vector
|
return self._woodbury_vector
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def K_chol(self):
|
def K_chol(self):
|
||||||
|
"""
|
||||||
|
Cholesky of the prior covariance K
|
||||||
|
"""
|
||||||
if self._K_chol is None:
|
if self._K_chol is None:
|
||||||
self._K_chol = jitchol(self._K)
|
self._K_chol = jitchol(self._K)
|
||||||
return self._K_chol
|
return self._K_chol
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -176,7 +176,6 @@ class VarDTC(object):
|
||||||
|
|
||||||
#construct a posterior object
|
#construct a posterior object
|
||||||
post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
|
post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
|
||||||
|
|
||||||
return post, log_marginal, grad_dict
|
return post, log_marginal, grad_dict
|
||||||
|
|
||||||
class VarDTCMissingData(object):
|
class VarDTCMissingData(object):
|
||||||
|
|
@ -365,7 +364,7 @@ class VarDTCMissingData(object):
|
||||||
return post, log_marginal, grad_dict
|
return post, log_marginal, grad_dict
|
||||||
|
|
||||||
def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, psi1, het_noise, uncertain_inputs):
|
def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, psi1, het_noise, uncertain_inputs):
|
||||||
dL_dpsi0 = -0.5 * output_dim * (beta * np.ones([num_data, 1])).flatten()
|
dL_dpsi0 = -0.5 * output_dim * (beta[:,None] * np.ones([num_data, 1])).flatten()
|
||||||
dL_dpsi1 = np.dot(VVT_factor, Cpsi1Vf.T)
|
dL_dpsi1 = np.dot(VVT_factor, Cpsi1Vf.T)
|
||||||
dL_dpsi2_beta = 0.5 * backsub_both_sides(Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi)
|
dL_dpsi2_beta = 0.5 * backsub_both_sides(Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi)
|
||||||
if het_noise:
|
if het_noise:
|
||||||
|
|
|
||||||
63
GPy/inference/optimization/BayesOpt.py
Normal file
63
GPy/inference/optimization/BayesOpt.py
Normal file
|
|
@ -0,0 +1,63 @@
|
||||||
|
import numpy as np
|
||||||
|
from scipy.stats import norm
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
|
||||||
|
####### Preliminar BO with standad acquisition functions ###############################
|
||||||
|
# Types of BO
|
||||||
|
# MM: Maximum (or minimum) mean
|
||||||
|
# MPI: Maximum posterior improvement
|
||||||
|
# MUI: Maximum upper interval
|
||||||
|
|
||||||
|
def BOacquisition(X,Y,model,type_bo="MPI",type_objective="max",par_mpi = 0,z_mui=1.96,plot=True,n_eval = 500):
|
||||||
|
|
||||||
|
# Only works in dimension 1
|
||||||
|
# Grid where the GP will be evaluated
|
||||||
|
X_star = np.linspace(min(X)-10,max(X)+10,n_eval)
|
||||||
|
X_star = X_star[:,None]
|
||||||
|
|
||||||
|
# Posterior GP evaluated on the grid
|
||||||
|
fest = model.predict(X_star)
|
||||||
|
|
||||||
|
# Calculate the acquisition function
|
||||||
|
## IF Maximize
|
||||||
|
if type_objective == "max":
|
||||||
|
if type_bo == "MPI": # add others here
|
||||||
|
acqu = norm.cdf((fest[0]-(1+par_mpi)*max(fest[0])) / fest[1])
|
||||||
|
acqu = acqu/(2*max(acqu))
|
||||||
|
if type_bo == "MM":
|
||||||
|
acqu = fest[0]/max(fest[0])
|
||||||
|
acqu = acqu/(2*max(acqu))
|
||||||
|
if type_bo == "MUI":
|
||||||
|
acqu = fest[0]+z_mui*np.sqrt(fest[1])
|
||||||
|
acqu = acqu/(2*max(acqu))
|
||||||
|
optimal_loc = np.argmax(acqu)
|
||||||
|
x_new = X_star[optimal_loc]
|
||||||
|
|
||||||
|
## IF Minimize
|
||||||
|
if type_objective == "min":
|
||||||
|
if type_bo == "MPI": # add others here
|
||||||
|
acqu = 1-norm.cdf((fest[0]-(1+par_mpi)*min(fest[0])) / fest[1])
|
||||||
|
acqu = acqu/(2*max(acqu))
|
||||||
|
if type_bo == "MM":
|
||||||
|
acqu = 1-fest[0]/max(fest[0])
|
||||||
|
acqu = acqu/(2*max(acqu))
|
||||||
|
if type_bo == "MUI":
|
||||||
|
acqu = -fest[0]+z_mui*np.sqrt(fest[1])
|
||||||
|
acqu = acqu/(2*max(acqu))
|
||||||
|
optimal_loc = np.argmax(acqu)
|
||||||
|
x_new = X_star[optimal_loc]
|
||||||
|
|
||||||
|
# Plot GP posterior, collected data and the acquisition function
|
||||||
|
if plot:
|
||||||
|
plt.plot(X,Y , 'p')
|
||||||
|
plt.title('Acquisition function')
|
||||||
|
model.plot()
|
||||||
|
plt.plot(X_star, acqu, 'r--')
|
||||||
|
|
||||||
|
|
||||||
|
# Return the point where we shoould take the new sample
|
||||||
|
return x_new
|
||||||
|
###############################################################
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -9,4 +9,6 @@ from _src.mlp import MLP
|
||||||
from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52
|
from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52
|
||||||
from _src.independent_outputs import IndependentOutputs, Hierarchical
|
from _src.independent_outputs import IndependentOutputs, Hierarchical
|
||||||
from _src.coregionalize import Coregionalize
|
from _src.coregionalize import Coregionalize
|
||||||
from _src.ssrbf import SSRBF
|
from _src.ssrbf import SSRBF # TODO: ZD: did you remove this?
|
||||||
|
from _src.ODE_UY import ODE_UY
|
||||||
|
|
||||||
|
|
|
||||||
282
GPy/kern/_src/ODE_UY.py
Normal file
282
GPy/kern/_src/ODE_UY.py
Normal file
|
|
@ -0,0 +1,282 @@
|
||||||
|
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
from kern import Kern
|
||||||
|
from ...core.parameterization import Param
|
||||||
|
from ...core.parameterization.transformations import Logexp
|
||||||
|
import numpy as np
|
||||||
|
from independent_outputs import index_to_slices
|
||||||
|
|
||||||
|
class ODE_UY(Kern):
|
||||||
|
def __init__(self, input_dim, variance_U=3., variance_Y=1., lengthscale_U=1., lengthscale_Y=1., active_dims=None, name='ode_uy'):
|
||||||
|
assert input_dim ==2, "only defined for 2 input dims"
|
||||||
|
super(ODE_UY, self).__init__(input_dim, active_dims, name)
|
||||||
|
|
||||||
|
self.variance_Y = Param('variance_Y', variance_Y, Logexp())
|
||||||
|
self.variance_U = Param('variance_U', variance_Y, Logexp())
|
||||||
|
self.lengthscale_Y = Param('lengthscale_Y', lengthscale_Y, Logexp())
|
||||||
|
self.lengthscale_U = Param('lengthscale_U', lengthscale_Y, Logexp())
|
||||||
|
|
||||||
|
self.add_parameters(self.variance_Y, self.variance_U, self.lengthscale_Y, self.lengthscale_U)
|
||||||
|
|
||||||
|
def K(self, X, X2=None):
|
||||||
|
# model : a * dy/dt + b * y = U
|
||||||
|
#lu=sqrt(3)/theta1 ly=1/theta2 theta2= a/b :thetay sigma2=1/(2ab) :sigmay
|
||||||
|
|
||||||
|
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||||
|
if X2 is None:
|
||||||
|
X2,slices2 = X,slices
|
||||||
|
K = np.zeros((X.shape[0], X.shape[0]))
|
||||||
|
else:
|
||||||
|
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
||||||
|
K = np.zeros((X.shape[0], X2.shape[0]))
|
||||||
|
|
||||||
|
|
||||||
|
#rdist = X[:,0][:,None] - X2[:,0][:,None].T
|
||||||
|
rdist = X - X2.T
|
||||||
|
ly=1/self.lengthscale_Y
|
||||||
|
lu=np.sqrt(3)/self.lengthscale_U
|
||||||
|
#iu=self.input_lengthU #dimention of U
|
||||||
|
Vu=self.variance_U
|
||||||
|
Vy=self.variance_Y
|
||||||
|
#Vy=ly/2
|
||||||
|
#stop
|
||||||
|
|
||||||
|
|
||||||
|
# kernel for kuu matern3/2
|
||||||
|
kuu = lambda dist:Vu * (1 + lu* np.abs(dist)) * np.exp(-lu * np.abs(dist))
|
||||||
|
|
||||||
|
# kernel for kyy
|
||||||
|
k1 = lambda dist:np.exp(-ly*np.abs(dist))*(2*lu+ly)/(lu+ly)**2
|
||||||
|
k2 = lambda dist:(np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
|
||||||
|
k3 = lambda dist:np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
|
||||||
|
kyy = lambda dist:Vu*Vy*(k1(dist) + k2(dist) + k3(dist))
|
||||||
|
|
||||||
|
|
||||||
|
# cross covariance function
|
||||||
|
kyu3 = lambda dist:np.exp(-lu*dist)/(lu+ly)*(1+lu*(dist+1/(lu+ly)))
|
||||||
|
#kyu3 = lambda dist: 0
|
||||||
|
|
||||||
|
k1cros = lambda dist:np.exp(ly*dist)/(lu-ly) * ( 1- np.exp( (lu-ly)*dist) + lu* ( dist*np.exp( (lu-ly)*dist ) + (1- np.exp( (lu-ly)*dist ) ) /(lu-ly) ) )
|
||||||
|
#k1cros = lambda dist:0
|
||||||
|
|
||||||
|
k2cros = lambda dist:np.exp(ly*dist)*( 1/(lu+ly) + lu/(lu+ly)**2 )
|
||||||
|
#k2cros = lambda dist:0
|
||||||
|
|
||||||
|
Vyu=np.sqrt(Vy*ly*2)
|
||||||
|
|
||||||
|
# cross covariance kuy
|
||||||
|
kuyp = lambda dist:Vu*Vyu*(kyu3(dist)) #t>0 kuy
|
||||||
|
kuyn = lambda dist:Vu*Vyu*(k1cros(dist)+k2cros(dist)) #t<0 kuy
|
||||||
|
# cross covariance kyu
|
||||||
|
kyup = lambda dist:Vu*Vyu*(k1cros(-dist)+k2cros(-dist)) #t>0 kyu
|
||||||
|
kyun = lambda dist:Vu*Vyu*(kyu3(-dist)) #t<0 kyu
|
||||||
|
|
||||||
|
|
||||||
|
for i, s1 in enumerate(slices):
|
||||||
|
for j, s2 in enumerate(slices2):
|
||||||
|
for ss1 in s1:
|
||||||
|
for ss2 in s2:
|
||||||
|
if i==0 and j==0:
|
||||||
|
K[ss1,ss2] = kuu(np.abs(rdist[ss1,ss2]))
|
||||||
|
elif i==0 and j==1:
|
||||||
|
#K[ss1,ss2]= np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[ss1,ss2]) ) )
|
||||||
|
K[ss1,ss2]= np.where( rdist[ss1,ss2]>0 , kuyp(rdist[ss1,ss2]), kuyn(rdist[ss1,ss2] ) )
|
||||||
|
elif i==1 and j==1:
|
||||||
|
K[ss1,ss2] = kyy(np.abs(rdist[ss1,ss2]))
|
||||||
|
else:
|
||||||
|
#K[ss1,ss2]= 0
|
||||||
|
#K[ss1,ss2]= np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[ss1,ss2]) ) )
|
||||||
|
K[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(rdist[ss1,ss2]), kyun(rdist[ss1,ss2] ) )
|
||||||
|
return K
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def Kdiag(self, X):
|
||||||
|
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||||
|
Kdiag = np.zeros(X.shape[0])
|
||||||
|
ly=1/self.lengthscale_Y
|
||||||
|
lu=np.sqrt(3)/self.lengthscale_U
|
||||||
|
|
||||||
|
Vu = self.variance_U
|
||||||
|
Vy=self.variance_Y
|
||||||
|
|
||||||
|
k1 = (2*lu+ly)/(lu+ly)**2
|
||||||
|
k2 = (ly-2*lu + 2*lu-ly ) / (ly-lu)**2
|
||||||
|
k3 = 1/(lu+ly) + (lu)/(lu+ly)**2
|
||||||
|
|
||||||
|
slices = index_to_slices(X[:,-1])
|
||||||
|
|
||||||
|
for i, ss1 in enumerate(slices):
|
||||||
|
for s1 in ss1:
|
||||||
|
if i==0:
|
||||||
|
Kdiag[s1]+= self.variance_U
|
||||||
|
elif i==1:
|
||||||
|
Kdiag[s1]+= Vu*Vy*(k1+k2+k3)
|
||||||
|
else:
|
||||||
|
raise ValueError, "invalid input/output index"
|
||||||
|
#Kdiag[slices[0][0]]+= self.variance_U #matern32 diag
|
||||||
|
#Kdiag[slices[1][0]]+= self.variance_U*self.variance_Y*(k1+k2+k3) # diag
|
||||||
|
return Kdiag
|
||||||
|
|
||||||
|
|
||||||
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
|
"""derivative of the covariance matrix with respect to the parameters."""
|
||||||
|
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||||
|
if X2 is None:
|
||||||
|
X2,slices2 = X,slices
|
||||||
|
else:
|
||||||
|
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
||||||
|
#rdist = X[:,0][:,None] - X2[:,0][:,None].T
|
||||||
|
|
||||||
|
rdist = X - X2.T
|
||||||
|
ly=1/self.lengthscale_Y
|
||||||
|
lu=np.sqrt(3)/self.lengthscale_U
|
||||||
|
|
||||||
|
Vu=self.variance_U
|
||||||
|
Vy=self.variance_Y
|
||||||
|
Vyu = np.sqrt(Vy*ly*2)
|
||||||
|
dVdly = 0.5/np.sqrt(ly)*np.sqrt(2*Vy)
|
||||||
|
dVdVy = 0.5/np.sqrt(Vy)*np.sqrt(2*ly)
|
||||||
|
|
||||||
|
rd=rdist.shape
|
||||||
|
dktheta1 = np.zeros(rd)
|
||||||
|
dktheta2 = np.zeros(rd)
|
||||||
|
dkUdvar = np.zeros(rd)
|
||||||
|
dkYdvar = np.zeros(rd)
|
||||||
|
|
||||||
|
# dk dtheta for UU
|
||||||
|
UUdtheta1 = lambda dist: np.exp(-lu* dist)*dist + (-dist)*np.exp(-lu* dist)*(1+lu*dist)
|
||||||
|
UUdtheta2 = lambda dist: 0
|
||||||
|
#UUdvar = lambda dist: (1 + lu*dist)*np.exp(-lu*dist)
|
||||||
|
UUdvar = lambda dist: (1 + lu* np.abs(dist)) * np.exp(-lu * np.abs(dist))
|
||||||
|
|
||||||
|
# dk dtheta for YY
|
||||||
|
|
||||||
|
dk1theta1 = lambda dist: np.exp(-ly*dist)*2*(-lu)/(lu+ly)**3
|
||||||
|
|
||||||
|
dk2theta1 = lambda dist: (1.0)*(
|
||||||
|
np.exp(-lu*dist)*dist*(-ly+2*lu-lu*ly*dist+dist*lu**2)*(ly-lu)**(-2) + np.exp(-lu*dist)*(-2+ly*dist-2*dist*lu)*(ly-lu)**(-2)
|
||||||
|
+np.exp(-dist*lu)*(ly-2*lu+ly*lu*dist-dist*lu**2)*2*(ly-lu)**(-3)
|
||||||
|
+np.exp(-dist*ly)*2*(ly-lu)**(-2)
|
||||||
|
+np.exp(-dist*ly)*2*(2*lu-ly)*(ly-lu)**(-3)
|
||||||
|
)
|
||||||
|
|
||||||
|
dk3theta1 = lambda dist: np.exp(-dist*lu)*(lu+ly)**(-2)*((2*lu+ly+dist*lu**2+lu*ly*dist)*(-dist-2/(lu+ly))+2+2*lu*dist+ly*dist)
|
||||||
|
|
||||||
|
#dktheta1 = lambda dist: self.variance_U*self.variance_Y*(dk1theta1+dk2theta1+dk3theta1)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
dk1theta2 = lambda dist: np.exp(-ly*dist) * ((lu+ly)**(-2)) * ( (-dist)*(2*lu+ly) + 1 + (-2)*(2*lu+ly)/(lu+ly) )
|
||||||
|
|
||||||
|
dk2theta2 =lambda dist: 1*(
|
||||||
|
np.exp(-dist*lu)*(ly-lu)**(-2) * ( 1+lu*dist+(-2)*(ly-2*lu+lu*ly*dist-dist*lu**2)*(ly-lu)**(-1) )
|
||||||
|
+np.exp(-dist*ly)*(ly-lu)**(-2) * ( (-dist)*(2*lu-ly) -1+(2*lu-ly)*(-2)*(ly-lu)**(-1) )
|
||||||
|
)
|
||||||
|
|
||||||
|
dk3theta2 = lambda dist: np.exp(-dist*lu) * (-3*lu-ly-dist*lu**2-lu*ly*dist)/(lu+ly)**3
|
||||||
|
|
||||||
|
#dktheta2 = lambda dist: self.variance_U*self.variance_Y*(dk1theta2 + dk2theta2 +dk3theta2)
|
||||||
|
|
||||||
|
# kyy kernel
|
||||||
|
|
||||||
|
k1 = lambda dist: np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2
|
||||||
|
k2 = lambda dist: (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
|
||||||
|
k3 = lambda dist: np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
|
||||||
|
#dkdvar = k1+k2+k3
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# cross covariance function
|
||||||
|
kyu3 = lambda dist:np.exp(-lu*dist)/(lu+ly)*(1+lu*(dist+1/(lu+ly)))
|
||||||
|
|
||||||
|
k1cros = lambda dist:np.exp(ly*dist)/(lu-ly) * ( 1- np.exp( (lu-ly)*dist) + lu* ( dist*np.exp( (lu-ly)*dist ) + (1- np.exp( (lu-ly)*dist ) ) /(lu-ly) ) )
|
||||||
|
|
||||||
|
k2cros = lambda dist:np.exp(ly*dist)*( 1/(lu+ly) + lu/(lu+ly)**2 )
|
||||||
|
# cross covariance kuy
|
||||||
|
kuyp = lambda dist:(kyu3(dist)) #t>0 kuy
|
||||||
|
kuyn = lambda dist:(k1cros(dist)+k2cros(dist)) #t<0 kuy
|
||||||
|
# cross covariance kyu
|
||||||
|
kyup = lambda dist:(k1cros(-dist)+k2cros(-dist)) #t>0 kyu
|
||||||
|
kyun = lambda dist:(kyu3(-dist)) #t<0 kyu
|
||||||
|
|
||||||
|
# dk dtheta for UY
|
||||||
|
|
||||||
|
|
||||||
|
dkyu3dtheta2 = lambda dist: np.exp(-lu*dist) * ( (-1)*(lu+ly)**(-2)*(1+lu*dist+lu*(lu+ly)**(-1)) + (lu+ly)**(-1)*(-lu)*(lu+ly)**(-2) )
|
||||||
|
dkyu3dtheta1 = lambda dist: np.exp(-lu*dist)*(lu+ly)**(-1)* ( (-dist)*(1+dist*lu+lu*(lu+ly)**(-1)) -\
|
||||||
|
(lu+ly)**(-1)*(1+dist*lu+lu*(lu+ly)**(-1)) +dist+(lu+ly)**(-1)-lu*(lu+ly)**(-2) )
|
||||||
|
|
||||||
|
dkcros2dtheta1 = lambda dist: np.exp(ly*dist)* ( -(ly+lu)**(-2) + (ly+lu)**(-2) + (-2)*lu*(lu+ly)**(-3) )
|
||||||
|
dkcros2dtheta2 = lambda dist: np.exp(ly*dist)*dist* ( (ly+lu)**(-1) + lu*(lu+ly)**(-2) ) + \
|
||||||
|
np.exp(ly*dist)*( -(lu+ly)**(-2) + lu*(-2)*(lu+ly)**(-3) )
|
||||||
|
|
||||||
|
dkcros1dtheta1 = lambda dist: np.exp(ly*dist)*( -(lu-ly)**(-2)*( 1-np.exp((lu-ly)*dist) + lu*dist*np.exp((lu-ly)*dist)+ \
|
||||||
|
lu*(1-np.exp((lu-ly)*dist))/(lu-ly) ) + (lu-ly)**(-1)*( -np.exp( (lu-ly)*dist )*dist + dist*np.exp( (lu-ly)*dist)+\
|
||||||
|
lu*dist**2*np.exp((lu-ly)*dist)+(1-np.exp((lu-ly)*dist))/(lu-ly) - lu*np.exp((lu-ly)*dist)*dist/(lu-ly) -\
|
||||||
|
lu*(1-np.exp((lu-ly)*dist))/(lu-ly)**2 ) )
|
||||||
|
|
||||||
|
dkcros1dtheta2 = lambda t: np.exp(ly*t)*t/(lu-ly)*( 1-np.exp((lu-ly)*t) +lu*t*np.exp((lu-ly)*t)+\
|
||||||
|
lu*(1-np.exp((lu-ly)*t))/(lu-ly) )+\
|
||||||
|
np.exp(ly*t)/(lu-ly)**2* ( 1-np.exp((lu-ly)*t) +lu*t*np.exp((lu-ly)*t) + lu*( 1-np.exp((lu-ly)*t) )/(lu-ly) )+\
|
||||||
|
np.exp(ly*t)/(lu-ly)*( np.exp((lu-ly)*t)*t -lu*t*t*np.exp((lu-ly)*t) +lu*t*np.exp((lu-ly)*t)/(lu-ly)+\
|
||||||
|
lu*( 1-np.exp((lu-ly)*t) )/(lu-ly)**2 )
|
||||||
|
|
||||||
|
dkuypdtheta1 = lambda dist:(dkyu3dtheta1(dist)) #t>0 kuy
|
||||||
|
dkuyndtheta1 = lambda dist:(dkcros1dtheta1(dist)+dkcros2dtheta1(dist)) #t<0 kuy
|
||||||
|
# cross covariance kyu
|
||||||
|
dkyupdtheta1 = lambda dist:(dkcros1dtheta1(-dist)+dkcros2dtheta1(-dist)) #t>0 kyu
|
||||||
|
dkyundtheta1 = lambda dist:(dkyu3dtheta1(-dist)) #t<0 kyu
|
||||||
|
|
||||||
|
dkuypdtheta2 = lambda dist:(dkyu3dtheta2(dist)) #t>0 kuy
|
||||||
|
dkuyndtheta2 = lambda dist:(dkcros1dtheta2(dist)+dkcros2dtheta2(dist)) #t<0 kuy
|
||||||
|
# cross covariance kyu
|
||||||
|
dkyupdtheta2 = lambda dist:(dkcros1dtheta2(-dist)+dkcros2dtheta2(-dist)) #t>0 kyu
|
||||||
|
dkyundtheta2 = lambda dist:(dkyu3dtheta2(-dist)) #t<0 kyu
|
||||||
|
|
||||||
|
|
||||||
|
for i, s1 in enumerate(slices):
|
||||||
|
for j, s2 in enumerate(slices2):
|
||||||
|
for ss1 in s1:
|
||||||
|
for ss2 in s2:
|
||||||
|
if i==0 and j==0:
|
||||||
|
#target[ss1,ss2] = kuu(np.abs(rdist[ss1,ss2]))
|
||||||
|
dktheta1[ss1,ss2] = Vu*UUdtheta1(np.abs(rdist[ss1,ss2]))
|
||||||
|
dktheta2[ss1,ss2] = 0
|
||||||
|
dkUdvar[ss1,ss2] = UUdvar(np.abs(rdist[ss1,ss2]))
|
||||||
|
dkYdvar[ss1,ss2] = 0
|
||||||
|
elif i==0 and j==1:
|
||||||
|
########target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) )
|
||||||
|
#np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) )
|
||||||
|
#dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , self.variance_U*self.variance_Y*dkcrtheta1(np.abs(rdist[ss1,ss2])) ,self.variance_U*self.variance_Y*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))) )
|
||||||
|
#dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , self.variance_U*self.variance_Y*dkcrtheta2(np.abs(rdist[ss1,ss2])) ,self.variance_U*self.variance_Y*(dk1theta2(np.abs(rdist[ss1,ss2]))+dk2theta2(np.abs(rdist[ss1,ss2]))) )
|
||||||
|
dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkuypdtheta1(rdist[ss1,ss2]),Vu*Vyu*dkuyndtheta1(rdist[ss1,ss2]) )
|
||||||
|
dkUdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vyu*kuyp(rdist[ss1,ss2]), Vyu* kuyn(rdist[ss1,ss2]) )
|
||||||
|
dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkuypdtheta2(rdist[ss1,ss2])+Vu*dVdly*kuyp(rdist[ss1,ss2]),Vu*Vyu*dkuyndtheta2(rdist[ss1,ss2])+Vu*dVdly*kuyn(rdist[ss1,ss2]) )
|
||||||
|
dkYdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*dVdVy*kuyp(rdist[ss1,ss2]), Vu*dVdVy* kuyn(rdist[ss1,ss2]) )
|
||||||
|
elif i==1 and j==1:
|
||||||
|
#target[ss1,ss2] = kyy(np.abs(rdist[ss1,ss2]))
|
||||||
|
dktheta1[ss1,ss2] = self.variance_U*self.variance_Y*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))+dk3theta1(np.abs(rdist[ss1,ss2])))
|
||||||
|
dktheta2[ss1,ss2] = self.variance_U*self.variance_Y*(dk1theta2(np.abs(rdist[ss1,ss2])) + dk2theta2(np.abs(rdist[ss1,ss2])) +dk3theta2(np.abs(rdist[ss1,ss2])))
|
||||||
|
dkUdvar[ss1,ss2] = self.variance_Y*(k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2]))+k3(np.abs(rdist[ss1,ss2])) )
|
||||||
|
dkYdvar[ss1,ss2] = self.variance_U*(k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2]))+k3(np.abs(rdist[ss1,ss2])) )
|
||||||
|
else:
|
||||||
|
#######target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[s1[0],s2[0]]) ) )
|
||||||
|
#dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 ,self.variance_U*self.variance_Y*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))) , self.variance_U*self.variance_Y*dkcrtheta1(np.abs(rdist[ss1,ss2])) )
|
||||||
|
#dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 ,self.variance_U*self.variance_Y*(dk1theta2(np.abs(rdist[ss1,ss2]))+dk2theta2(np.abs(rdist[ss1,ss2]))) , self.variance_U*self.variance_Y*dkcrtheta2(np.abs(rdist[ss1,ss2])) )
|
||||||
|
dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkyupdtheta1(rdist[ss1,ss2]),Vu*Vyu*dkyundtheta1(rdist[ss1,ss2]) )
|
||||||
|
dkUdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vyu*kyup(rdist[ss1,ss2]),Vyu*kyun(rdist[ss1,ss2]))
|
||||||
|
dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkyupdtheta2(rdist[ss1,ss2])+Vu*dVdly*kyup(rdist[ss1,ss2]),Vu*Vyu*dkyundtheta2(rdist[ss1,ss2])+Vu*dVdly*kyun(rdist[ss1,ss2]) )
|
||||||
|
dkYdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*dVdVy*kyup(rdist[ss1,ss2]), Vu*dVdVy*kyun(rdist[ss1,ss2]))
|
||||||
|
|
||||||
|
#stop
|
||||||
|
self.variance_U.gradient = np.sum(dkUdvar * dL_dK) # Vu
|
||||||
|
|
||||||
|
self.variance_Y.gradient = np.sum(dkYdvar * dL_dK) # Vy
|
||||||
|
|
||||||
|
self.lengthscale_U.gradient = np.sum(dktheta1*(-np.sqrt(3)*self.lengthscale_U**(-2))* dL_dK) #lu
|
||||||
|
|
||||||
|
self.lengthscale_Y.gradient = np.sum(dktheta2*(-self.lengthscale_Y**(-2)) * dL_dK) #ly
|
||||||
|
|
||||||
|
|
@ -23,7 +23,7 @@ class Add(CombinationKernel):
|
||||||
If a list of parts (of this kernel!) `which_parts` is given, only
|
If a list of parts (of this kernel!) `which_parts` is given, only
|
||||||
the parts of the list are taken to compute the covariance.
|
the parts of the list are taken to compute the covariance.
|
||||||
"""
|
"""
|
||||||
assert X.shape[1] == self.input_dim
|
assert X.shape[1] > max(np.r_[self.active_dims])
|
||||||
if which_parts is None:
|
if which_parts is None:
|
||||||
which_parts = self.parts
|
which_parts = self.parts
|
||||||
elif not isinstance(which_parts, (list, tuple)):
|
elif not isinstance(which_parts, (list, tuple)):
|
||||||
|
|
@ -33,7 +33,7 @@ class Add(CombinationKernel):
|
||||||
|
|
||||||
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||||
def Kdiag(self, X, which_parts=None):
|
def Kdiag(self, X, which_parts=None):
|
||||||
assert X.shape[1] == self.input_dim
|
assert X.shape[1] > max(np.r_[self.active_dims])
|
||||||
if which_parts is None:
|
if which_parts is None:
|
||||||
which_parts = self.parts
|
which_parts = self.parts
|
||||||
elif not isinstance(which_parts, (list, tuple)):
|
elif not isinstance(which_parts, (list, tuple)):
|
||||||
|
|
@ -58,7 +58,12 @@ class Add(CombinationKernel):
|
||||||
:type X2: np.ndarray (num_inducing x input_dim)"""
|
:type X2: np.ndarray (num_inducing x input_dim)"""
|
||||||
|
|
||||||
target = np.zeros(X.shape)
|
target = np.zeros(X.shape)
|
||||||
[target.__setitem__([Ellipsis, p.active_dims], target[:, p.active_dims]+p.gradients_X(dL_dK, X, X2)) for p in self.parts]
|
[target.__iadd__(p.gradients_X(dL_dK, X, X2)) for p in self.parts]
|
||||||
|
return target
|
||||||
|
|
||||||
|
def gradients_X_diag(self, dL_dKdiag, X):
|
||||||
|
target = np.zeros(X.shape)
|
||||||
|
[target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts]
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def psi0(self, Z, variational_posterior):
|
def psi0(self, Z, variational_posterior):
|
||||||
|
|
@ -131,7 +136,7 @@ class Add(CombinationKernel):
|
||||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||||
else:
|
else:
|
||||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
|
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
|
||||||
target[:, p1.active_dims] += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
target += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
|
|
@ -151,8 +156,8 @@ class Add(CombinationKernel):
|
||||||
else:
|
else:
|
||||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
|
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
|
||||||
a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||||
target_mu[:, p1.active_dims] += a
|
target_mu += a
|
||||||
target_S[:, p1.active_dims] += b
|
target_S += b
|
||||||
return target_mu, target_S
|
return target_mu, target_S
|
||||||
|
|
||||||
def _getstate(self):
|
def _getstate(self):
|
||||||
|
|
@ -165,4 +170,11 @@ class Add(CombinationKernel):
|
||||||
def _setstate(self, state):
|
def _setstate(self, state):
|
||||||
super(Add, self)._setstate(state)
|
super(Add, self)._setstate(state)
|
||||||
|
|
||||||
|
def add(self, other, name='sum'):
|
||||||
|
if isinstance(other, Add):
|
||||||
|
other_params = other._parameters_[:]
|
||||||
|
for p in other_params:
|
||||||
|
other.remove_parameter(p)
|
||||||
|
self.add_parameters(*other_params)
|
||||||
|
else: self.add_parameter(other)
|
||||||
|
return self
|
||||||
|
|
@ -8,7 +8,7 @@ import itertools
|
||||||
|
|
||||||
def index_to_slices(index):
|
def index_to_slices(index):
|
||||||
"""
|
"""
|
||||||
take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index.
|
take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index.
|
||||||
|
|
||||||
e.g.
|
e.g.
|
||||||
>>> index = np.asarray([0,0,0,1,1,1,2,2,2])
|
>>> index = np.asarray([0,0,0,1,1,1,2,2,2])
|
||||||
|
|
@ -39,73 +39,102 @@ class IndependentOutputs(CombinationKernel):
|
||||||
|
|
||||||
The index of the functions is given by the last column in the input X
|
The index of the functions is given by the last column in the input X
|
||||||
the rest of the columns of X are passed to the underlying kernel for computation (in blocks).
|
the rest of the columns of X are passed to the underlying kernel for computation (in blocks).
|
||||||
|
|
||||||
Kern is wrapped with a slicer metaclass
|
:param kernels: either a kernel, or list of kernels to work with. If it is a list of kernels
|
||||||
|
the indices in the index_dim, index the kernels you gave!
|
||||||
"""
|
"""
|
||||||
def __init__(self, kern, index_dim=-1, name='independ'):
|
def __init__(self, kernels, index_dim=-1, name='independ'):
|
||||||
assert isinstance(index_dim, int), "IndependentOutputs kernel is only defined with one input dimension being the indeces"
|
assert isinstance(index_dim, int), "IndependentOutputs kernel is only defined with one input dimension being the indeces"
|
||||||
super(IndependentOutputs, self).__init__(kernels=[kern], extra_dims=[index_dim], name=name)
|
if not isinstance(kernels, list):
|
||||||
|
self.single_kern = True
|
||||||
|
self.kern = kernels
|
||||||
|
kernels = [kernels]
|
||||||
|
else:
|
||||||
|
self.single_kern = False
|
||||||
|
self.kern = kernels
|
||||||
|
super(IndependentOutputs, self).__init__(kernels=kernels, extra_dims=[index_dim], name=name)
|
||||||
self.index_dim = index_dim
|
self.index_dim = index_dim
|
||||||
self.kern = kern
|
self.kerns = kernels if len(kernels) != 1 else itertools.repeat(kernels[0])
|
||||||
#self.add_parameters(self.kern)
|
|
||||||
|
|
||||||
def K(self,X ,X2=None):
|
def K(self,X ,X2=None):
|
||||||
slices = index_to_slices(X[:,self.index_dim])
|
slices = index_to_slices(X[:,self.index_dim])
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
target = np.zeros((X.shape[0], X.shape[0]))
|
target = np.zeros((X.shape[0], X.shape[0]))
|
||||||
[[np.copyto(target[s,ss], self.kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for slices_i in slices]
|
[[target.__setitem__((s,ss), kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for kern, slices_i in zip(self.kerns, slices)]
|
||||||
else:
|
else:
|
||||||
slices2 = index_to_slices(X2[:,self.index_dim])
|
slices2 = index_to_slices(X2[:,self.index_dim])
|
||||||
target = np.zeros((X.shape[0], X2.shape[0]))
|
target = np.zeros((X.shape[0], X2.shape[0]))
|
||||||
[[[np.copyto(target[s, s2], self.kern.K(X[s,:],X2[s2,:])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
[[target.__setitem__((s,s2), kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(self.kerns, slices,slices2)]
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def Kdiag(self,X):
|
def Kdiag(self,X):
|
||||||
slices = index_to_slices(X[:,self.index_dim])
|
slices = index_to_slices(X[:,self.index_dim])
|
||||||
target = np.zeros(X.shape[0])
|
target = np.zeros(X.shape[0])
|
||||||
[[np.copyto(target[s], self.kern.Kdiag(X[s])) for s in slices_i] for slices_i in slices]
|
[[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(self.kerns, slices)]
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def update_gradients_full(self,dL_dK,X,X2=None):
|
def update_gradients_full(self,dL_dK,X,X2=None):
|
||||||
target = np.zeros(self.kern.size)
|
|
||||||
def collate_grads(dL, X, X2):
|
|
||||||
self.kern.update_gradients_full(dL,X,X2)
|
|
||||||
target[:] += self.kern.gradient
|
|
||||||
|
|
||||||
slices = index_to_slices(X[:,self.index_dim])
|
slices = index_to_slices(X[:,self.index_dim])
|
||||||
|
if self.single_kern: target = np.zeros(self.kern.size)
|
||||||
|
else: target = [np.zeros(kern.size) for kern, _ in zip(self.kerns, slices)]
|
||||||
|
def collate_grads(kern, i, dL, X, X2):
|
||||||
|
kern.update_gradients_full(dL,X,X2)
|
||||||
|
if self.single_kern: target[:] += kern.gradient
|
||||||
|
else: target[i][:] += kern.gradient
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
[[collate_grads(dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for slices_i in slices]
|
[[collate_grads(kern, i, dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for i,(kern,slices_i) in enumerate(zip(self.kerns,slices))]
|
||||||
else:
|
else:
|
||||||
slices2 = index_to_slices(X2[:,self.index_dim])
|
slices2 = index_to_slices(X2[:,self.index_dim])
|
||||||
[[[collate_grads(dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
[[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(self.kerns,slices,slices2))]
|
||||||
self.kern.gradient = target
|
if self.single_kern: kern.gradient = target
|
||||||
|
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(self.kerns, slices))]
|
||||||
|
|
||||||
def gradients_X(self,dL_dK, X, X2=None):
|
def gradients_X(self,dL_dK, X, X2=None):
|
||||||
target = np.zeros(X.shape)
|
target = np.zeros(X.shape)
|
||||||
slices = index_to_slices(X[:,self.index_dim])
|
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
[[np.copyto(target[s,self.kern.active_dims], self.kern.gradients_X(dL_dK[s,ss],X[s],X[ss])) for s, ss in itertools.product(slices_i, slices_i)] for slices_i in slices]
|
# TODO: make use of index_to_slices
|
||||||
|
values = np.unique(X[:,self.index_dim])
|
||||||
|
slices = [X[:,self.index_dim]==i for i in values]
|
||||||
|
[target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None))
|
||||||
|
for kern, s in zip(self.kerns, slices)]
|
||||||
|
#slices = index_to_slices(X[:,self.index_dim])
|
||||||
|
#[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s])
|
||||||
|
# for s in slices_i] for kern, slices_i in zip(self.kerns, slices)]
|
||||||
|
#import ipdb;ipdb.set_trace()
|
||||||
|
#[[(np.add(target[s ], kern.gradients_X(dL_dK[s ,ss],X[s ], X[ss]), out=target[s ]),
|
||||||
|
# np.add(target[ss], kern.gradients_X(dL_dK[ss,s ],X[ss], X[s ]), out=target[ss]))
|
||||||
|
# for s, ss in itertools.combinations(slices_i, 2)] for kern, slices_i in zip(self.kerns, slices)]
|
||||||
else:
|
else:
|
||||||
slices2 = index_to_slices(X2[:,self.index_dim])
|
values = np.unique(X[:,self.index_dim])
|
||||||
[[[np.copyto(target[s,self.kern.active_dims], self.kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
slices = [X[:,self.index_dim]==i for i in values]
|
||||||
|
slices2 = [X2[:,self.index_dim]==i for i in values]
|
||||||
|
[target.__setitem__(s, kern.gradients_X(dL_dK[s, :][:, s2],X[s],X2[s2]))
|
||||||
|
for kern, s, s2 in zip(self.kerns, slices, slices2)]
|
||||||
|
# TODO: make work with index_to_slices
|
||||||
|
#slices = index_to_slices(X[:,self.index_dim])
|
||||||
|
#slices2 = index_to_slices(X2[:,self.index_dim])
|
||||||
|
#[[target.__setitem__(s, target[s] + kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s, s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(self.kerns, slices,slices2)]
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def gradients_X_diag(self, dL_dKdiag, X):
|
def gradients_X_diag(self, dL_dKdiag, X):
|
||||||
slices = index_to_slices(X[:,self.index_dim])
|
slices = index_to_slices(X[:,self.index_dim])
|
||||||
target = np.zeros(X.shape)
|
target = np.zeros(X.shape)
|
||||||
[[np.copyto(target[s,self.kern.active_dims], self.kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for slices_i in slices]
|
[[target.__setitem__(s, kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for kern, slices_i in zip(self.kerns, slices)]
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def update_gradients_diag(self, dL_dKdiag, X):
|
def update_gradients_diag(self, dL_dKdiag, X):
|
||||||
target = np.zeros(self.kern.size)
|
|
||||||
def collate_grads(dL, X):
|
|
||||||
self.kern.update_gradients_diag(dL,X)
|
|
||||||
target[:] += self.kern.gradient
|
|
||||||
slices = index_to_slices(X[:,self.index_dim])
|
slices = index_to_slices(X[:,self.index_dim])
|
||||||
[[collate_grads(dL_dKdiag[s], X[s,:]) for s in slices_i] for slices_i in slices]
|
if self.single_kern: target = np.zeros(self.kern.size)
|
||||||
self.kern.gradient = target
|
else: target = [np.zeros(kern.size) for kern, _ in zip(self.kerns, slices)]
|
||||||
|
def collate_grads(kern, i, dL, X):
|
||||||
|
kern.update_gradients_diag(dL,X)
|
||||||
|
if self.single_kern: target[:] += kern.gradient
|
||||||
|
else: target[i][:] += kern.gradient
|
||||||
|
[[collate_grads(kern, i, dL_dKdiag[s], X[s,:]) for s in slices_i] for i, (kern, slices_i) in enumerate(zip(self.kerns, slices))]
|
||||||
|
if self.single_kern: kern.gradient = target
|
||||||
|
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(self.kerns, slices))]
|
||||||
|
|
||||||
class Hierarchical(Kern):
|
class Hierarchical(CombinationKernel):
|
||||||
"""
|
"""
|
||||||
A kernel which can reopresent a simple hierarchical model.
|
A kernel which can reopresent a simple hierarchical model.
|
||||||
|
|
||||||
|
|
@ -116,7 +145,7 @@ class Hierarchical(Kern):
|
||||||
The index of the functions is given by additional columns in the input X.
|
The index of the functions is given by additional columns in the input X.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, kerns, name='hierarchy'):
|
def __init__(self, kern, name='hierarchy'):
|
||||||
assert all([k.input_dim==kerns[0].input_dim for k in kerns])
|
assert all([k.input_dim==kerns[0].input_dim for k in kerns])
|
||||||
super(Hierarchical, self).__init__(kerns[0].input_dim + len(kerns) - 1, name)
|
super(Hierarchical, self).__init__(kerns[0].input_dim + len(kerns) - 1, name)
|
||||||
self.kerns = kerns
|
self.kerns = kerns
|
||||||
|
|
|
||||||
|
|
@ -140,12 +140,7 @@ class Kern(Parameterized):
|
||||||
"""
|
"""
|
||||||
assert isinstance(other, Kern), "only kernels can be added to kernels..."
|
assert isinstance(other, Kern), "only kernels can be added to kernels..."
|
||||||
from add import Add
|
from add import Add
|
||||||
kernels = []
|
return Add([self, other], name=name)
|
||||||
if isinstance(self, Add): kernels.extend(self._parameters_)
|
|
||||||
else: kernels.append(self)
|
|
||||||
if isinstance(other, Add): kernels.extend(other._parameters_)
|
|
||||||
else: kernels.append(other)
|
|
||||||
return Add(kernels, name=name)
|
|
||||||
|
|
||||||
def __mul__(self, other):
|
def __mul__(self, other):
|
||||||
""" Here we overload the '*' operator. See self.prod for more information"""
|
""" Here we overload the '*' operator. See self.prod for more information"""
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ Created on 11 Mar 2014
|
||||||
@author: maxz
|
@author: maxz
|
||||||
'''
|
'''
|
||||||
from ...core.parameterization.parameterized import ParametersChangedMeta
|
from ...core.parameterization.parameterized import ParametersChangedMeta
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
class KernCallsViaSlicerMeta(ParametersChangedMeta):
|
class KernCallsViaSlicerMeta(ParametersChangedMeta):
|
||||||
def __call__(self, *args, **kw):
|
def __call__(self, *args, **kw):
|
||||||
|
|
@ -12,18 +13,18 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta):
|
||||||
instance.Kdiag = _slice_wrapper(instance, instance.Kdiag, diag=True)
|
instance.Kdiag = _slice_wrapper(instance, instance.Kdiag, diag=True)
|
||||||
instance.update_gradients_full = _slice_wrapper(instance, instance.update_gradients_full, diag=False, derivative=True)
|
instance.update_gradients_full = _slice_wrapper(instance, instance.update_gradients_full, diag=False, derivative=True)
|
||||||
instance.update_gradients_diag = _slice_wrapper(instance, instance.update_gradients_diag, diag=True, derivative=True)
|
instance.update_gradients_diag = _slice_wrapper(instance, instance.update_gradients_diag, diag=True, derivative=True)
|
||||||
instance.gradients_X = _slice_wrapper(instance, instance.gradients_X, diag=False, derivative=True)
|
instance.gradients_X = _slice_wrapper(instance, instance.gradients_X, diag=False, derivative=True, ret_X=True)
|
||||||
instance.gradients_X_diag = _slice_wrapper(instance, instance.gradients_X_diag, diag=True, derivative=True)
|
instance.gradients_X_diag = _slice_wrapper(instance, instance.gradients_X_diag, diag=True, derivative=True, ret_X=True)
|
||||||
instance.psi0 = _slice_wrapper(instance, instance.psi0, diag=False, derivative=False)
|
instance.psi0 = _slice_wrapper(instance, instance.psi0, diag=False, derivative=False)
|
||||||
instance.psi1 = _slice_wrapper(instance, instance.psi1, diag=False, derivative=False)
|
instance.psi1 = _slice_wrapper(instance, instance.psi1, diag=False, derivative=False)
|
||||||
instance.psi2 = _slice_wrapper(instance, instance.psi2, diag=False, derivative=False)
|
instance.psi2 = _slice_wrapper(instance, instance.psi2, diag=False, derivative=False)
|
||||||
instance.update_gradients_expectations = _slice_wrapper(instance, instance.update_gradients_expectations, derivative=True, psi_stat=True)
|
instance.update_gradients_expectations = _slice_wrapper(instance, instance.update_gradients_expectations, derivative=True, psi_stat=True)
|
||||||
instance.gradients_Z_expectations = _slice_wrapper(instance, instance.gradients_Z_expectations, derivative=True, psi_stat_Z=True)
|
instance.gradients_Z_expectations = _slice_wrapper(instance, instance.gradients_Z_expectations, derivative=True, psi_stat_Z=True, ret_X=True)
|
||||||
instance.gradients_qX_expectations = _slice_wrapper(instance, instance.gradients_qX_expectations, derivative=True, psi_stat=True)
|
instance.gradients_qX_expectations = _slice_wrapper(instance, instance.gradients_qX_expectations, derivative=True, psi_stat=True, ret_X=True)
|
||||||
instance.parameters_changed()
|
instance.parameters_changed()
|
||||||
return instance
|
return instance
|
||||||
|
|
||||||
def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False, psi_stat_Z=False):
|
def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False, psi_stat_Z=False, ret_X=False):
|
||||||
"""
|
"""
|
||||||
This method wraps the functions in kernel to make sure all kernels allways see their respective input dimension.
|
This method wraps the functions in kernel to make sure all kernels allways see their respective input dimension.
|
||||||
The different switches are:
|
The different switches are:
|
||||||
|
|
@ -34,11 +35,16 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False
|
||||||
"""
|
"""
|
||||||
if derivative:
|
if derivative:
|
||||||
if diag:
|
if diag:
|
||||||
def x_slice_wrapper(dL_dK, X):
|
def x_slice_wrapper(dL_dKdiag, X):
|
||||||
|
ret_X_not_sliced = ret_X and kern._sliced_X == 0
|
||||||
|
if ret_X_not_sliced:
|
||||||
|
ret = np.zeros(X.shape)
|
||||||
X = kern._slice_X(X) if not kern._sliced_X else X
|
X = kern._slice_X(X) if not kern._sliced_X else X
|
||||||
|
# if the return value is of shape X.shape, we need to make sure to return the right shape
|
||||||
kern._sliced_X += 1
|
kern._sliced_X += 1
|
||||||
try:
|
try:
|
||||||
ret = operation(dL_dK, X)
|
if ret_X_not_sliced: ret[:, kern.active_dims] = operation(dL_dKdiag, X)
|
||||||
|
else: ret = operation(dL_dKdiag, X)
|
||||||
except:
|
except:
|
||||||
raise
|
raise
|
||||||
finally:
|
finally:
|
||||||
|
|
@ -46,10 +52,22 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False
|
||||||
return ret
|
return ret
|
||||||
elif psi_stat:
|
elif psi_stat:
|
||||||
def x_slice_wrapper(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
def x_slice_wrapper(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
|
ret_X_not_sliced = ret_X and kern._sliced_X == 0
|
||||||
|
if ret_X_not_sliced:
|
||||||
|
ret1, ret2 = np.zeros(variational_posterior.shape), np.zeros(variational_posterior.shape)
|
||||||
Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior
|
Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior
|
||||||
kern._sliced_X += 1
|
kern._sliced_X += 1
|
||||||
|
# if the return value is of shape X.shape, we need to make sure to return the right shape
|
||||||
try:
|
try:
|
||||||
ret = operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
if ret_X_not_sliced:
|
||||||
|
ret = list(operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior))
|
||||||
|
r2 = ret[:2]
|
||||||
|
ret[0] = ret1
|
||||||
|
ret[1] = ret2
|
||||||
|
ret[0][:, kern.active_dims] = r2[0]
|
||||||
|
ret[1][:, kern.active_dims] = r2[1]
|
||||||
|
del r2
|
||||||
|
else: ret = operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||||
except:
|
except:
|
||||||
raise
|
raise
|
||||||
finally:
|
finally:
|
||||||
|
|
@ -57,10 +75,14 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False
|
||||||
return ret
|
return ret
|
||||||
elif psi_stat_Z:
|
elif psi_stat_Z:
|
||||||
def x_slice_wrapper(dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
def x_slice_wrapper(dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
|
ret_X_not_sliced = ret_X and kern._sliced_X == 0
|
||||||
|
if ret_X_not_sliced: ret = np.zeros(Z.shape)
|
||||||
Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior
|
Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior
|
||||||
kern._sliced_X += 1
|
kern._sliced_X += 1
|
||||||
try:
|
try:
|
||||||
ret = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
if ret_X_not_sliced:
|
||||||
|
ret[:, kern.active_dims] = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||||
|
else: ret = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||||
except:
|
except:
|
||||||
raise
|
raise
|
||||||
finally:
|
finally:
|
||||||
|
|
@ -68,10 +90,14 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False
|
||||||
return ret
|
return ret
|
||||||
else:
|
else:
|
||||||
def x_slice_wrapper(dL_dK, X, X2=None):
|
def x_slice_wrapper(dL_dK, X, X2=None):
|
||||||
|
ret_X_not_sliced = ret_X and kern._sliced_X == 0
|
||||||
|
if ret_X_not_sliced:
|
||||||
|
ret = np.zeros(X.shape)
|
||||||
X, X2 = kern._slice_X(X) if not kern._sliced_X else X, kern._slice_X(X2) if X2 is not None and not kern._sliced_X else X2
|
X, X2 = kern._slice_X(X) if not kern._sliced_X else X, kern._slice_X(X2) if X2 is not None and not kern._sliced_X else X2
|
||||||
kern._sliced_X += 1
|
kern._sliced_X += 1
|
||||||
try:
|
try:
|
||||||
ret = operation(dL_dK, X, X2)
|
if ret_X_not_sliced: ret[:, kern.active_dims] = operation(dL_dK, X, X2)
|
||||||
|
else: ret = operation(dL_dK, X, X2)
|
||||||
except:
|
except:
|
||||||
raise
|
raise
|
||||||
finally:
|
finally:
|
||||||
|
|
|
||||||
|
|
@ -312,5 +312,4 @@ class Linear(Kern):
|
||||||
return np.dot(ZA, inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x num_data x input_dim]!
|
return np.dot(ZA, inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x num_data x input_dim]!
|
||||||
|
|
||||||
def input_sensitivity(self):
|
def input_sensitivity(self):
|
||||||
if self.ARD: return self.variances
|
return np.ones(self.input_dim) * self.variances
|
||||||
else: return self.variances.repeat(self.input_dim)
|
|
||||||
|
|
|
||||||
|
|
@ -51,15 +51,15 @@ class Prod(CombinationKernel):
|
||||||
def gradients_X(self, dL_dK, X, X2=None):
|
def gradients_X(self, dL_dK, X, X2=None):
|
||||||
target = np.zeros(X.shape)
|
target = np.zeros(X.shape)
|
||||||
for k1,k2 in itertools.combinations(self.parts, 2):
|
for k1,k2 in itertools.combinations(self.parts, 2):
|
||||||
target[:,k1.active_dims] += k1.gradients_X(dL_dK*k2.K(X, X2), X, X2)
|
target += k1.gradients_X(dL_dK*k2.K(X, X2), X, X2)
|
||||||
target[:,k2.active_dims] += k2.gradients_X(dL_dK*k1.K(X, X2), X, X2)
|
target += k2.gradients_X(dL_dK*k1.K(X, X2), X, X2)
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def gradients_X_diag(self, dL_dKdiag, X):
|
def gradients_X_diag(self, dL_dKdiag, X):
|
||||||
target = np.zeros(X.shape)
|
target = np.zeros(X.shape)
|
||||||
for k1,k2 in itertools.combinations(self.parts, 2):
|
for k1,k2 in itertools.combinations(self.parts, 2):
|
||||||
target[:,k1.active_dims] += k1.gradients_X(dL_dKdiag*k2.Kdiag(X), X)
|
target += k1.gradients_X(dL_dKdiag*k2.Kdiag(X), X)
|
||||||
target[:,k2.active_dims] += k2.gradients_X(dL_dKdiag*k1.Kdiag(X), X)
|
target += k2.gradients_X(dL_dKdiag*k1.Kdiag(X), X)
|
||||||
return target
|
return target
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -152,12 +152,7 @@ class Stationary(Kern):
|
||||||
This term appears in derviatives.
|
This term appears in derviatives.
|
||||||
"""
|
"""
|
||||||
dist = self._scaled_dist(X, X2).copy()
|
dist = self._scaled_dist(X, X2).copy()
|
||||||
if X2 is None:
|
return 1./np.where(dist != 0., dist, np.inf)
|
||||||
nondiag = util.diag.offdiag_view(dist)
|
|
||||||
nondiag[:] = 1./nondiag
|
|
||||||
return dist
|
|
||||||
else:
|
|
||||||
return 1./np.where(dist != 0., dist, np.inf)
|
|
||||||
|
|
||||||
def gradients_X(self, dL_dK, X, X2=None):
|
def gradients_X(self, dL_dK, X, X2=None):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -95,7 +95,7 @@ class Bernoulli(Likelihood):
|
||||||
else:
|
else:
|
||||||
return np.nan
|
return np.nan
|
||||||
|
|
||||||
def pdf_link(self, link_f, y, extra_data=None):
|
def pdf_link(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Likelihood function given link(f)
|
Likelihood function given link(f)
|
||||||
|
|
||||||
|
|
@ -106,7 +106,7 @@ class Bernoulli(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data not used in bernoulli
|
:param Y_metadata: Y_metadata not used in bernoulli
|
||||||
:returns: likelihood evaluated for this point
|
:returns: likelihood evaluated for this point
|
||||||
:rtype: float
|
:rtype: float
|
||||||
|
|
||||||
|
|
@ -118,7 +118,7 @@ class Bernoulli(Likelihood):
|
||||||
objective = np.where(y, link_f, 1.-link_f)
|
objective = np.where(y, link_f, 1.-link_f)
|
||||||
return np.exp(np.sum(np.log(objective)))
|
return np.exp(np.sum(np.log(objective)))
|
||||||
|
|
||||||
def logpdf_link(self, link_f, y, extra_data=None):
|
def logpdf_link(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Log Likelihood function given link(f)
|
Log Likelihood function given link(f)
|
||||||
|
|
||||||
|
|
@ -129,7 +129,7 @@ class Bernoulli(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data not used in bernoulli
|
:param Y_metadata: Y_metadata not used in bernoulli
|
||||||
:returns: log likelihood evaluated at points link(f)
|
:returns: log likelihood evaluated at points link(f)
|
||||||
:rtype: float
|
:rtype: float
|
||||||
"""
|
"""
|
||||||
|
|
@ -140,7 +140,7 @@ class Bernoulli(Likelihood):
|
||||||
np.seterr(**state)
|
np.seterr(**state)
|
||||||
return np.sum(objective)
|
return np.sum(objective)
|
||||||
|
|
||||||
def dlogpdf_dlink(self, link_f, y, extra_data=None):
|
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Gradient of the pdf at y, given link(f) w.r.t link(f)
|
Gradient of the pdf at y, given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
|
@ -151,7 +151,7 @@ class Bernoulli(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data not used in bernoulli
|
:param Y_metadata: Y_metadata not used in bernoulli
|
||||||
:returns: gradient of log likelihood evaluated at points link(f)
|
:returns: gradient of log likelihood evaluated at points link(f)
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
"""
|
"""
|
||||||
|
|
@ -162,7 +162,7 @@ class Bernoulli(Likelihood):
|
||||||
np.seterr(**state)
|
np.seterr(**state)
|
||||||
return grad
|
return grad
|
||||||
|
|
||||||
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
|
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Hessian at y, given link_f, w.r.t link_f the hessian will be 0 unless i == j
|
Hessian at y, given link_f, w.r.t link_f the hessian will be 0 unless i == j
|
||||||
i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j)
|
i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j)
|
||||||
|
|
@ -175,7 +175,7 @@ class Bernoulli(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data not used in bernoulli
|
:param Y_metadata: Y_metadata not used in bernoulli
|
||||||
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f))
|
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f))
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
|
@ -190,7 +190,7 @@ class Bernoulli(Likelihood):
|
||||||
np.seterr(**state)
|
np.seterr(**state)
|
||||||
return d2logpdf_dlink2
|
return d2logpdf_dlink2
|
||||||
|
|
||||||
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
|
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
|
@ -201,7 +201,7 @@ class Bernoulli(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data not used in bernoulli
|
:param Y_metadata: Y_metadata not used in bernoulli
|
||||||
:returns: third derivative of log likelihood evaluated at points link(f)
|
:returns: third derivative of log likelihood evaluated at points link(f)
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -18,13 +18,12 @@ class Exponential(Likelihood):
|
||||||
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
|
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
|
||||||
$$
|
$$
|
||||||
"""
|
"""
|
||||||
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
|
def __init__(self,gp_link=None):
|
||||||
super(Exponential, self).__init__(gp_link,analytical_mean,analytical_variance)
|
if gp_link is None:
|
||||||
|
gp_link = link_functions.Log()
|
||||||
|
super(Exponential, self).__init__(gp_link, 'ExpLikelihood')
|
||||||
|
|
||||||
def _preprocess_values(self,Y):
|
def pdf_link(self, link_f, y, Y_metadata=None):
|
||||||
return Y
|
|
||||||
|
|
||||||
def pdf_link(self, link_f, y, extra_data=None):
|
|
||||||
"""
|
"""
|
||||||
Likelihood function given link(f)
|
Likelihood function given link(f)
|
||||||
|
|
||||||
|
|
@ -35,16 +34,15 @@ class Exponential(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data which is not used in exponential distribution
|
:param Y_metadata: Y_metadata which is not used in exponential distribution
|
||||||
:returns: likelihood evaluated for this point
|
:returns: likelihood evaluated for this point
|
||||||
:rtype: float
|
:rtype: float
|
||||||
"""
|
"""
|
||||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
log_objective = link_f*np.exp(-y*link_f)
|
log_objective = link_f*np.exp(-y*link_f)
|
||||||
return np.exp(np.sum(np.log(log_objective)))
|
return np.exp(np.sum(np.log(log_objective)))
|
||||||
#return np.exp(np.sum(-y/link_f - np.log(link_f) ))
|
|
||||||
|
|
||||||
def logpdf_link(self, link_f, y, extra_data=None):
|
def logpdf_link(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Log Likelihood Function given link(f)
|
Log Likelihood Function given link(f)
|
||||||
|
|
||||||
|
|
@ -55,17 +53,16 @@ class Exponential(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data which is not used in exponential distribution
|
:param Y_metadata: Y_metadata which is not used in exponential distribution
|
||||||
:returns: likelihood evaluated for this point
|
:returns: likelihood evaluated for this point
|
||||||
:rtype: float
|
:rtype: float
|
||||||
|
|
||||||
"""
|
"""
|
||||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
log_objective = np.log(link_f) - y*link_f
|
log_objective = np.log(link_f) - y*link_f
|
||||||
#logpdf_link = np.sum(-np.log(link_f) - y/link_f)
|
|
||||||
return np.sum(log_objective)
|
return np.sum(log_objective)
|
||||||
|
|
||||||
def dlogpdf_dlink(self, link_f, y, extra_data=None):
|
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
|
@ -76,7 +73,7 @@ class Exponential(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data which is not used in exponential distribution
|
:param Y_metadata: Y_metadata which is not used in exponential distribution
|
||||||
:returns: gradient of likelihood evaluated at points
|
:returns: gradient of likelihood evaluated at points
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
|
@ -86,7 +83,7 @@ class Exponential(Likelihood):
|
||||||
#grad = y/(link_f**2) - 1./link_f
|
#grad = y/(link_f**2) - 1./link_f
|
||||||
return grad
|
return grad
|
||||||
|
|
||||||
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
|
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Hessian at y, given link(f), w.r.t link(f)
|
Hessian at y, given link(f), w.r.t link(f)
|
||||||
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
|
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
|
||||||
|
|
@ -99,7 +96,7 @@ class Exponential(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data which is not used in exponential distribution
|
:param Y_metadata: Y_metadata which is not used in exponential distribution
|
||||||
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
|
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
|
@ -112,7 +109,7 @@ class Exponential(Likelihood):
|
||||||
#hess = -2*y/(link_f**3) + 1/(link_f**2)
|
#hess = -2*y/(link_f**3) + 1/(link_f**2)
|
||||||
return hess
|
return hess
|
||||||
|
|
||||||
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
|
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
|
@ -123,7 +120,7 @@ class Exponential(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data which is not used in exponential distribution
|
:param Y_metadata: Y_metadata which is not used in exponential distribution
|
||||||
:returns: third derivative of likelihood evaluated at points f
|
:returns: third derivative of likelihood evaluated at points f
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
"""
|
"""
|
||||||
|
|
@ -132,18 +129,6 @@ class Exponential(Likelihood):
|
||||||
#d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3)
|
#d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3)
|
||||||
return d3lik_dlink3
|
return d3lik_dlink3
|
||||||
|
|
||||||
def _mean(self,gp):
|
|
||||||
"""
|
|
||||||
Mass (or density) function
|
|
||||||
"""
|
|
||||||
return self.gp_link.transf(gp)
|
|
||||||
|
|
||||||
def _variance(self,gp):
|
|
||||||
"""
|
|
||||||
Mass (or density) function
|
|
||||||
"""
|
|
||||||
return self.gp_link.transf(gp)**2
|
|
||||||
|
|
||||||
def samples(self, gp):
|
def samples(self, gp):
|
||||||
"""
|
"""
|
||||||
Returns a set of samples of observations based on a given value of the latent variable.
|
Returns a set of samples of observations based on a given value of the latent variable.
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,12 @@
|
||||||
# Copyright (c) 2012, 2013 Ricardo Andrade
|
# Copyright (c) 2012 - 2014, GPy authors (see AUTHORS.txt).
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy import stats,special
|
from scipy import stats,special
|
||||||
import scipy as sp
|
import scipy as sp
|
||||||
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
||||||
|
from ..core.parameterization import Param
|
||||||
import link_functions
|
import link_functions
|
||||||
from likelihood import Likelihood
|
from likelihood import Likelihood
|
||||||
|
|
||||||
|
|
@ -18,14 +19,16 @@ class Gamma(Likelihood):
|
||||||
\\alpha_{i} = \\beta y_{i}
|
\\alpha_{i} = \\beta y_{i}
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,beta=1.):
|
def __init__(self,gp_link=None,beta=1.):
|
||||||
self.beta = beta
|
if gp_link is None:
|
||||||
super(Gamma, self).__init__(gp_link,analytical_mean,analytical_variance)
|
gp_link = link_functions.Log()
|
||||||
|
super(Gamma, self).__init__(gp_link, 'Gamma')
|
||||||
|
|
||||||
def _preprocess_values(self,Y):
|
self.beta = Param('beta', beta)
|
||||||
return Y
|
self.add_parameter(self.beta)
|
||||||
|
self.beta.fix()#TODO: gradients!
|
||||||
|
|
||||||
def pdf_link(self, link_f, y, extra_data=None):
|
def pdf_link(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Likelihood function given link(f)
|
Likelihood function given link(f)
|
||||||
|
|
||||||
|
|
@ -37,7 +40,7 @@ class Gamma(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data which is not used in poisson distribution
|
:param Y_metadata: Y_metadata which is not used in poisson distribution
|
||||||
:returns: likelihood evaluated for this point
|
:returns: likelihood evaluated for this point
|
||||||
:rtype: float
|
:rtype: float
|
||||||
"""
|
"""
|
||||||
|
|
@ -47,7 +50,7 @@ class Gamma(Likelihood):
|
||||||
objective = (y**(alpha - 1.) * np.exp(-self.beta*y) * self.beta**alpha)/ special.gamma(alpha)
|
objective = (y**(alpha - 1.) * np.exp(-self.beta*y) * self.beta**alpha)/ special.gamma(alpha)
|
||||||
return np.exp(np.sum(np.log(objective)))
|
return np.exp(np.sum(np.log(objective)))
|
||||||
|
|
||||||
def logpdf_link(self, link_f, y, extra_data=None):
|
def logpdf_link(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Log Likelihood Function given link(f)
|
Log Likelihood Function given link(f)
|
||||||
|
|
||||||
|
|
@ -59,7 +62,7 @@ class Gamma(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data which is not used in poisson distribution
|
:param Y_metadata: Y_metadata which is not used in poisson distribution
|
||||||
:returns: likelihood evaluated for this point
|
:returns: likelihood evaluated for this point
|
||||||
:rtype: float
|
:rtype: float
|
||||||
|
|
||||||
|
|
@ -71,7 +74,7 @@ class Gamma(Likelihood):
|
||||||
log_objective = alpha*np.log(self.beta) - np.log(special.gamma(alpha)) + (alpha - 1)*np.log(y) - self.beta*y
|
log_objective = alpha*np.log(self.beta) - np.log(special.gamma(alpha)) + (alpha - 1)*np.log(y) - self.beta*y
|
||||||
return np.sum(log_objective)
|
return np.sum(log_objective)
|
||||||
|
|
||||||
def dlogpdf_dlink(self, link_f, y, extra_data=None):
|
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
|
@ -83,7 +86,7 @@ class Gamma(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data which is not used in gamma distribution
|
:param Y_metadata: Y_metadata which is not used in gamma distribution
|
||||||
:returns: gradient of likelihood evaluated at points
|
:returns: gradient of likelihood evaluated at points
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
|
@ -94,7 +97,7 @@ class Gamma(Likelihood):
|
||||||
#return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta
|
#return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta
|
||||||
return grad
|
return grad
|
||||||
|
|
||||||
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
|
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Hessian at y, given link(f), w.r.t link(f)
|
Hessian at y, given link(f), w.r.t link(f)
|
||||||
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
|
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
|
||||||
|
|
@ -108,7 +111,7 @@ class Gamma(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data which is not used in gamma distribution
|
:param Y_metadata: Y_metadata which is not used in gamma distribution
|
||||||
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
|
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
|
@ -122,7 +125,7 @@ class Gamma(Likelihood):
|
||||||
#return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta
|
#return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta
|
||||||
return hess
|
return hess
|
||||||
|
|
||||||
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
|
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
|
@ -134,22 +137,10 @@ class Gamma(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data which is not used in gamma distribution
|
:param Y_metadata: Y_metadata which is not used in gamma distribution
|
||||||
:returns: third derivative of likelihood evaluated at points f
|
:returns: third derivative of likelihood evaluated at points f
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
"""
|
"""
|
||||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
d3lik_dlink3 = -special.polygamma(2, self.beta*link_f)*(self.beta**3)
|
d3lik_dlink3 = -special.polygamma(2, self.beta*link_f)*(self.beta**3)
|
||||||
return d3lik_dlink3
|
return d3lik_dlink3
|
||||||
|
|
||||||
def _mean(self,gp):
|
|
||||||
"""
|
|
||||||
Mass (or density) function
|
|
||||||
"""
|
|
||||||
return self.gp_link.transf(gp)
|
|
||||||
|
|
||||||
def _variance(self,gp):
|
|
||||||
"""
|
|
||||||
Mass (or density) function
|
|
||||||
"""
|
|
||||||
return self.gp_link.transf(gp)/self.beta
|
|
||||||
|
|
|
||||||
|
|
@ -35,12 +35,7 @@ class Gaussian(Likelihood):
|
||||||
if gp_link is None:
|
if gp_link is None:
|
||||||
gp_link = link_functions.Identity()
|
gp_link = link_functions.Identity()
|
||||||
|
|
||||||
if isinstance(gp_link, link_functions.Identity):
|
assert isinstance(gp_link, link_functions.Identity), "the likelihood only implemented for the identity link"
|
||||||
analytical_variance = True
|
|
||||||
analytical_mean = True
|
|
||||||
else:
|
|
||||||
analytical_variance = False
|
|
||||||
analytical_mean = False
|
|
||||||
|
|
||||||
super(Gaussian, self).__init__(gp_link, name=name)
|
super(Gaussian, self).__init__(gp_link, name=name)
|
||||||
|
|
||||||
|
|
@ -51,14 +46,12 @@ class Gaussian(Likelihood):
|
||||||
self.log_concave = True
|
self.log_concave = True
|
||||||
|
|
||||||
def betaY(self,Y,Y_metadata=None):
|
def betaY(self,Y,Y_metadata=None):
|
||||||
|
#TODO: ~Ricardo this does not live here
|
||||||
return Y/self.gaussian_variance(Y_metadata)
|
return Y/self.gaussian_variance(Y_metadata)
|
||||||
|
|
||||||
def gaussian_variance(self, Y_metadata=None):
|
def gaussian_variance(self, Y_metadata=None):
|
||||||
return self.variance
|
return self.variance
|
||||||
|
|
||||||
def covariance_matrix(self, Y, Y_metadata=None):
|
|
||||||
return np.eye(Y.shape[0]) * self.variance
|
|
||||||
|
|
||||||
def update_gradients(self, grad):
|
def update_gradients(self, grad):
|
||||||
self.variance.gradient = grad
|
self.variance.gradient = grad
|
||||||
|
|
||||||
|
|
@ -99,10 +92,10 @@ class Gaussian(Likelihood):
|
||||||
def predictive_variance(self, mu, sigma, predictive_mean=None):
|
def predictive_variance(self, mu, sigma, predictive_mean=None):
|
||||||
return self.variance + sigma**2
|
return self.variance + sigma**2
|
||||||
|
|
||||||
def predictive_quantiles(self, mu, var, quantiles, Y_metadata):
|
def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None):
|
||||||
return [stats.norm.ppf(q/100.)*np.sqrt(var) + mu for q in quantiles]
|
return [stats.norm.ppf(q/100.)*np.sqrt(var + self.variance) + mu for q in quantiles]
|
||||||
|
|
||||||
def pdf_link(self, link_f, y, extra_data=None):
|
def pdf_link(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Likelihood function given link(f)
|
Likelihood function given link(f)
|
||||||
|
|
||||||
|
|
@ -113,14 +106,14 @@ class Gaussian(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data not used in gaussian
|
:param Y_metadata: Y_metadata not used in gaussian
|
||||||
:returns: likelihood evaluated for this point
|
:returns: likelihood evaluated for this point
|
||||||
:rtype: float
|
:rtype: float
|
||||||
"""
|
"""
|
||||||
#Assumes no covariance, exp, sum, log for numerical stability
|
#Assumes no covariance, exp, sum, log for numerical stability
|
||||||
return np.exp(np.sum(np.log(stats.norm.pdf(y, link_f, np.sqrt(self.variance)))))
|
return np.exp(np.sum(np.log(stats.norm.pdf(y, link_f, np.sqrt(self.variance)))))
|
||||||
|
|
||||||
def logpdf_link(self, link_f, y, extra_data=None):
|
def logpdf_link(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Log likelihood function given link(f)
|
Log likelihood function given link(f)
|
||||||
|
|
||||||
|
|
@ -131,7 +124,7 @@ class Gaussian(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data not used in gaussian
|
:param Y_metadata: Y_metadata not used in gaussian
|
||||||
:returns: log likelihood evaluated for this point
|
:returns: log likelihood evaluated for this point
|
||||||
:rtype: float
|
:rtype: float
|
||||||
"""
|
"""
|
||||||
|
|
@ -141,7 +134,7 @@ class Gaussian(Likelihood):
|
||||||
|
|
||||||
return -0.5*(np.sum((y-link_f)**2/self.variance) + ln_det_cov + N*np.log(2.*np.pi))
|
return -0.5*(np.sum((y-link_f)**2/self.variance) + ln_det_cov + N*np.log(2.*np.pi))
|
||||||
|
|
||||||
def dlogpdf_dlink(self, link_f, y, extra_data=None):
|
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Gradient of the pdf at y, given link(f) w.r.t link(f)
|
Gradient of the pdf at y, given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
|
@ -152,7 +145,7 @@ class Gaussian(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data not used in gaussian
|
:param Y_metadata: Y_metadata not used in gaussian
|
||||||
:returns: gradient of log likelihood evaluated at points link(f)
|
:returns: gradient of log likelihood evaluated at points link(f)
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
"""
|
"""
|
||||||
|
|
@ -161,7 +154,7 @@ class Gaussian(Likelihood):
|
||||||
grad = s2_i*y - s2_i*link_f
|
grad = s2_i*y - s2_i*link_f
|
||||||
return grad
|
return grad
|
||||||
|
|
||||||
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
|
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Hessian at y, given link_f, w.r.t link_f.
|
Hessian at y, given link_f, w.r.t link_f.
|
||||||
i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j)
|
i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j)
|
||||||
|
|
@ -175,7 +168,7 @@ class Gaussian(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data not used in gaussian
|
:param Y_metadata: Y_metadata not used in gaussian
|
||||||
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f))
|
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f))
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
|
@ -188,7 +181,7 @@ class Gaussian(Likelihood):
|
||||||
hess = -(1.0/self.variance)*np.ones((N, 1))
|
hess = -(1.0/self.variance)*np.ones((N, 1))
|
||||||
return hess
|
return hess
|
||||||
|
|
||||||
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
|
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
|
@ -199,7 +192,7 @@ class Gaussian(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data not used in gaussian
|
:param Y_metadata: Y_metadata not used in gaussian
|
||||||
:returns: third derivative of log likelihood evaluated at points link(f)
|
:returns: third derivative of log likelihood evaluated at points link(f)
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
"""
|
"""
|
||||||
|
|
@ -208,7 +201,7 @@ class Gaussian(Likelihood):
|
||||||
d3logpdf_dlink3 = np.zeros((N,1))
|
d3logpdf_dlink3 = np.zeros((N,1))
|
||||||
return d3logpdf_dlink3
|
return d3logpdf_dlink3
|
||||||
|
|
||||||
def dlogpdf_link_dvar(self, link_f, y, extra_data=None):
|
def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Gradient of the log-likelihood function at y given link(f), w.r.t variance parameter (noise_variance)
|
Gradient of the log-likelihood function at y given link(f), w.r.t variance parameter (noise_variance)
|
||||||
|
|
||||||
|
|
@ -219,7 +212,7 @@ class Gaussian(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data not used in gaussian
|
:param Y_metadata: Y_metadata not used in gaussian
|
||||||
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
|
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
|
||||||
:rtype: float
|
:rtype: float
|
||||||
"""
|
"""
|
||||||
|
|
@ -230,7 +223,7 @@ class Gaussian(Likelihood):
|
||||||
dlik_dsigma = -0.5*N/self.variance + 0.5*s_4*np.sum(np.square(e))
|
dlik_dsigma = -0.5*N/self.variance + 0.5*s_4*np.sum(np.square(e))
|
||||||
return np.sum(dlik_dsigma) # Sure about this sum?
|
return np.sum(dlik_dsigma) # Sure about this sum?
|
||||||
|
|
||||||
def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None):
|
def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Derivative of the dlogpdf_dlink w.r.t variance parameter (noise_variance)
|
Derivative of the dlogpdf_dlink w.r.t variance parameter (noise_variance)
|
||||||
|
|
||||||
|
|
@ -241,7 +234,7 @@ class Gaussian(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data not used in gaussian
|
:param Y_metadata: Y_metadata not used in gaussian
|
||||||
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
|
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
"""
|
"""
|
||||||
|
|
@ -250,7 +243,7 @@ class Gaussian(Likelihood):
|
||||||
dlik_grad_dsigma = -s_4*y + s_4*link_f
|
dlik_grad_dsigma = -s_4*y + s_4*link_f
|
||||||
return dlik_grad_dsigma
|
return dlik_grad_dsigma
|
||||||
|
|
||||||
def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None):
|
def d2logpdf_dlink2_dvar(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (noise_variance)
|
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (noise_variance)
|
||||||
|
|
||||||
|
|
@ -261,7 +254,7 @@ class Gaussian(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data not used in gaussian
|
:param Y_metadata: Y_metadata not used in gaussian
|
||||||
:returns: derivative of log hessian evaluated at points link(f_i) and link(f_j) w.r.t variance parameter
|
:returns: derivative of log hessian evaluated at points link(f_i) and link(f_j) w.r.t variance parameter
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
"""
|
"""
|
||||||
|
|
@ -271,16 +264,16 @@ class Gaussian(Likelihood):
|
||||||
d2logpdf_dlink2_dvar = np.ones((N,1))*s_4
|
d2logpdf_dlink2_dvar = np.ones((N,1))*s_4
|
||||||
return d2logpdf_dlink2_dvar
|
return d2logpdf_dlink2_dvar
|
||||||
|
|
||||||
def dlogpdf_link_dtheta(self, f, y, extra_data=None):
|
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
|
||||||
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data)
|
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
|
||||||
return np.asarray([[dlogpdf_dvar]])
|
return np.asarray([[dlogpdf_dvar]])
|
||||||
|
|
||||||
def dlogpdf_dlink_dtheta(self, f, y, extra_data=None):
|
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
|
||||||
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data)
|
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
|
||||||
return dlogpdf_dlink_dvar
|
return dlogpdf_dlink_dvar
|
||||||
|
|
||||||
def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None):
|
def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
|
||||||
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data)
|
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
|
||||||
return d2logpdf_dlink2_dvar
|
return d2logpdf_dlink2_dvar
|
||||||
|
|
||||||
def _mean(self, gp):
|
def _mean(self, gp):
|
||||||
|
|
|
||||||
|
|
@ -153,6 +153,10 @@ class Likelihood(Parameterized):
|
||||||
|
|
||||||
return mean
|
return mean
|
||||||
|
|
||||||
|
def _conditional_mean(self, f):
|
||||||
|
"""Quadrature calculation of the conditional mean: E(Y_star|f)"""
|
||||||
|
raise NotImplementedError, "implement this function to make predictions"
|
||||||
|
|
||||||
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
|
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Numerical approximation to the predictive variance: V(Y_star)
|
Numerical approximation to the predictive variance: V(Y_star)
|
||||||
|
|
@ -204,31 +208,31 @@ class Likelihood(Parameterized):
|
||||||
# V(Y_star) = E[ V(Y_star|f_star) ] + E(Y_star**2|f_star) - E[Y_star|f_star]**2
|
# V(Y_star) = E[ V(Y_star|f_star) ] + E(Y_star**2|f_star) - E[Y_star|f_star]**2
|
||||||
return exp_var + var_exp
|
return exp_var + var_exp
|
||||||
|
|
||||||
def pdf_link(self, link_f, y, extra_data=None):
|
def pdf_link(self, link_f, y, Y_metadata=None):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def logpdf_link(self, link_f, y, extra_data=None):
|
def logpdf_link(self, link_f, y, Y_metadata=None):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def dlogpdf_dlink(self, link_f, y, extra_data=None):
|
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
|
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
|
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def dlogpdf_link_dtheta(self, link_f, y, extra_data=None):
|
def dlogpdf_link_dtheta(self, link_f, y, Y_metadata=None):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def dlogpdf_dlink_dtheta(self, link_f, y, extra_data=None):
|
def dlogpdf_dlink_dtheta(self, link_f, y, Y_metadata=None):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def d2logpdf_dlink2_dtheta(self, link_f, y, extra_data=None):
|
def d2logpdf_dlink2_dtheta(self, link_f, y, Y_metadata=None):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def pdf(self, f, y, extra_data=None):
|
def pdf(self, f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Evaluates the link function link(f) then computes the likelihood (pdf) using it
|
Evaluates the link function link(f) then computes the likelihood (pdf) using it
|
||||||
|
|
||||||
|
|
@ -239,14 +243,14 @@ class Likelihood(Parameterized):
|
||||||
:type f: Nx1 array
|
:type f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data which is not used in student t distribution - not used
|
:param Y_metadata: Y_metadata which is not used in student t distribution - not used
|
||||||
:returns: likelihood evaluated for this point
|
:returns: likelihood evaluated for this point
|
||||||
:rtype: float
|
:rtype: float
|
||||||
"""
|
"""
|
||||||
link_f = self.gp_link.transf(f)
|
link_f = self.gp_link.transf(f)
|
||||||
return self.pdf_link(link_f, y, extra_data=extra_data)
|
return self.pdf_link(link_f, y, Y_metadata=Y_metadata)
|
||||||
|
|
||||||
def logpdf(self, f, y, extra_data=None):
|
def logpdf(self, f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Evaluates the link function link(f) then computes the log likelihood (log pdf) using it
|
Evaluates the link function link(f) then computes the log likelihood (log pdf) using it
|
||||||
|
|
||||||
|
|
@ -257,14 +261,14 @@ class Likelihood(Parameterized):
|
||||||
:type f: Nx1 array
|
:type f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data which is not used in student t distribution - not used
|
:param Y_metadata: Y_metadata which is not used in student t distribution - not used
|
||||||
:returns: log likelihood evaluated for this point
|
:returns: log likelihood evaluated for this point
|
||||||
:rtype: float
|
:rtype: float
|
||||||
"""
|
"""
|
||||||
link_f = self.gp_link.transf(f)
|
link_f = self.gp_link.transf(f)
|
||||||
return self.logpdf_link(link_f, y, extra_data=extra_data)
|
return self.logpdf_link(link_f, y, Y_metadata=Y_metadata)
|
||||||
|
|
||||||
def dlogpdf_df(self, f, y, extra_data=None):
|
def dlogpdf_df(self, f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Evaluates the link function link(f) then computes the derivative of log likelihood using it
|
Evaluates the link function link(f) then computes the derivative of log likelihood using it
|
||||||
Uses the Faa di Bruno's formula for the chain rule
|
Uses the Faa di Bruno's formula for the chain rule
|
||||||
|
|
@ -276,16 +280,16 @@ class Likelihood(Parameterized):
|
||||||
:type f: Nx1 array
|
:type f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data which is not used in student t distribution - not used
|
:param Y_metadata: Y_metadata which is not used in student t distribution - not used
|
||||||
:returns: derivative of log likelihood evaluated for this point
|
:returns: derivative of log likelihood evaluated for this point
|
||||||
:rtype: 1xN array
|
:rtype: 1xN array
|
||||||
"""
|
"""
|
||||||
link_f = self.gp_link.transf(f)
|
link_f = self.gp_link.transf(f)
|
||||||
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
|
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata)
|
||||||
dlink_df = self.gp_link.dtransf_df(f)
|
dlink_df = self.gp_link.dtransf_df(f)
|
||||||
return chain_1(dlogpdf_dlink, dlink_df)
|
return chain_1(dlogpdf_dlink, dlink_df)
|
||||||
|
|
||||||
def d2logpdf_df2(self, f, y, extra_data=None):
|
def d2logpdf_df2(self, f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Evaluates the link function link(f) then computes the second derivative of log likelihood using it
|
Evaluates the link function link(f) then computes the second derivative of log likelihood using it
|
||||||
Uses the Faa di Bruno's formula for the chain rule
|
Uses the Faa di Bruno's formula for the chain rule
|
||||||
|
|
@ -297,18 +301,18 @@ class Likelihood(Parameterized):
|
||||||
:type f: Nx1 array
|
:type f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data which is not used in student t distribution - not used
|
:param Y_metadata: Y_metadata which is not used in student t distribution - not used
|
||||||
:returns: second derivative of log likelihood evaluated for this point (diagonal only)
|
:returns: second derivative of log likelihood evaluated for this point (diagonal only)
|
||||||
:rtype: 1xN array
|
:rtype: 1xN array
|
||||||
"""
|
"""
|
||||||
link_f = self.gp_link.transf(f)
|
link_f = self.gp_link.transf(f)
|
||||||
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, extra_data=extra_data)
|
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, Y_metadata=Y_metadata)
|
||||||
dlink_df = self.gp_link.dtransf_df(f)
|
dlink_df = self.gp_link.dtransf_df(f)
|
||||||
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
|
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata)
|
||||||
d2link_df2 = self.gp_link.d2transf_df2(f)
|
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||||
return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
|
return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
|
||||||
|
|
||||||
def d3logpdf_df3(self, f, y, extra_data=None):
|
def d3logpdf_df3(self, f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Evaluates the link function link(f) then computes the third derivative of log likelihood using it
|
Evaluates the link function link(f) then computes the third derivative of log likelihood using it
|
||||||
Uses the Faa di Bruno's formula for the chain rule
|
Uses the Faa di Bruno's formula for the chain rule
|
||||||
|
|
@ -320,44 +324,44 @@ class Likelihood(Parameterized):
|
||||||
:type f: Nx1 array
|
:type f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data which is not used in student t distribution - not used
|
:param Y_metadata: Y_metadata which is not used in student t distribution - not used
|
||||||
:returns: third derivative of log likelihood evaluated for this point
|
:returns: third derivative of log likelihood evaluated for this point
|
||||||
:rtype: float
|
:rtype: float
|
||||||
"""
|
"""
|
||||||
link_f = self.gp_link.transf(f)
|
link_f = self.gp_link.transf(f)
|
||||||
d3logpdf_dlink3 = self.d3logpdf_dlink3(link_f, y, extra_data=extra_data)
|
d3logpdf_dlink3 = self.d3logpdf_dlink3(link_f, y, Y_metadata=Y_metadata)
|
||||||
dlink_df = self.gp_link.dtransf_df(f)
|
dlink_df = self.gp_link.dtransf_df(f)
|
||||||
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, extra_data=extra_data)
|
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, Y_metadata=Y_metadata)
|
||||||
d2link_df2 = self.gp_link.d2transf_df2(f)
|
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||||
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
|
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata)
|
||||||
d3link_df3 = self.gp_link.d3transf_df3(f)
|
d3link_df3 = self.gp_link.d3transf_df3(f)
|
||||||
return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
|
return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
|
||||||
|
|
||||||
def dlogpdf_dtheta(self, f, y, extra_data=None):
|
def dlogpdf_dtheta(self, f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
TODO: Doc strings
|
TODO: Doc strings
|
||||||
"""
|
"""
|
||||||
if self.size > 0:
|
if self.size > 0:
|
||||||
link_f = self.gp_link.transf(f)
|
link_f = self.gp_link.transf(f)
|
||||||
return self.dlogpdf_link_dtheta(link_f, y, extra_data=extra_data)
|
return self.dlogpdf_link_dtheta(link_f, y, Y_metadata=Y_metadata)
|
||||||
else:
|
else:
|
||||||
#Is no parameters so return an empty array for its derivatives
|
#Is no parameters so return an empty array for its derivatives
|
||||||
return np.zeros([1, 0])
|
return np.zeros([1, 0])
|
||||||
|
|
||||||
def dlogpdf_df_dtheta(self, f, y, extra_data=None):
|
def dlogpdf_df_dtheta(self, f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
TODO: Doc strings
|
TODO: Doc strings
|
||||||
"""
|
"""
|
||||||
if self.size > 0:
|
if self.size > 0:
|
||||||
link_f = self.gp_link.transf(f)
|
link_f = self.gp_link.transf(f)
|
||||||
dlink_df = self.gp_link.dtransf_df(f)
|
dlink_df = self.gp_link.dtransf_df(f)
|
||||||
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data)
|
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, Y_metadata=Y_metadata)
|
||||||
return chain_1(dlogpdf_dlink_dtheta, dlink_df)
|
return chain_1(dlogpdf_dlink_dtheta, dlink_df)
|
||||||
else:
|
else:
|
||||||
#Is no parameters so return an empty array for its derivatives
|
#Is no parameters so return an empty array for its derivatives
|
||||||
return np.zeros([f.shape[0], 0])
|
return np.zeros([f.shape[0], 0])
|
||||||
|
|
||||||
def d2logpdf_df2_dtheta(self, f, y, extra_data=None):
|
def d2logpdf_df2_dtheta(self, f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
TODO: Doc strings
|
TODO: Doc strings
|
||||||
"""
|
"""
|
||||||
|
|
@ -365,17 +369,17 @@ class Likelihood(Parameterized):
|
||||||
link_f = self.gp_link.transf(f)
|
link_f = self.gp_link.transf(f)
|
||||||
dlink_df = self.gp_link.dtransf_df(f)
|
dlink_df = self.gp_link.dtransf_df(f)
|
||||||
d2link_df2 = self.gp_link.d2transf_df2(f)
|
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||||
d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, extra_data=extra_data)
|
d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, Y_metadata=Y_metadata)
|
||||||
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data)
|
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, Y_metadata=Y_metadata)
|
||||||
return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
|
return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
|
||||||
else:
|
else:
|
||||||
#Is no parameters so return an empty array for its derivatives
|
#Is no parameters so return an empty array for its derivatives
|
||||||
return np.zeros([f.shape[0], 0])
|
return np.zeros([f.shape[0], 0])
|
||||||
|
|
||||||
def _laplace_gradients(self, f, y, extra_data=None):
|
def _laplace_gradients(self, f, y, Y_metadata=None):
|
||||||
dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, extra_data=extra_data)
|
dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata)
|
||||||
dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, extra_data=extra_data)
|
dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, Y_metadata=Y_metadata)
|
||||||
d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, extra_data=extra_data)
|
d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, Y_metadata=Y_metadata)
|
||||||
|
|
||||||
#Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
|
#Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
|
||||||
# ensure we have gradients for every parameter we want to optimize
|
# ensure we have gradients for every parameter we want to optimize
|
||||||
|
|
@ -390,7 +394,7 @@ class Likelihood(Parameterized):
|
||||||
|
|
||||||
def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
|
def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction.
|
Compute mean, variance of the predictive distibution.
|
||||||
|
|
||||||
:param mu: mean of the latent variable, f, of posterior
|
:param mu: mean of the latent variable, f, of posterior
|
||||||
:param var: variance of the latent variable, f, of posterior
|
:param var: variance of the latent variable, f, of posterior
|
||||||
|
|
@ -407,10 +411,7 @@ class Likelihood(Parameterized):
|
||||||
#compute the quantiles by sampling!!!
|
#compute the quantiles by sampling!!!
|
||||||
N_samp = 1000
|
N_samp = 1000
|
||||||
s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu
|
s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu
|
||||||
#ss_f = s.flatten()
|
|
||||||
#ss_y = self.samples(ss_f, Y_metadata)
|
|
||||||
ss_y = self.samples(s, Y_metadata)
|
ss_y = self.samples(s, Y_metadata)
|
||||||
#ss_y = ss_y.reshape(mu.shape[0], N_samp)
|
|
||||||
|
|
||||||
return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles]
|
return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ import itertools
|
||||||
|
|
||||||
class MixedNoise(Likelihood):
|
class MixedNoise(Likelihood):
|
||||||
def __init__(self, likelihoods_list, name='mixed_noise'):
|
def __init__(self, likelihoods_list, name='mixed_noise'):
|
||||||
|
#NOTE at the moment this likelihood only works for using a list of gaussians
|
||||||
super(Likelihood, self).__init__(name=name)
|
super(Likelihood, self).__init__(name=name)
|
||||||
|
|
||||||
self.add_parameters(*likelihoods_list)
|
self.add_parameters(*likelihoods_list)
|
||||||
|
|
@ -24,10 +24,11 @@ class MixedNoise(Likelihood):
|
||||||
variance = np.zeros(ind.size)
|
variance = np.zeros(ind.size)
|
||||||
for lik, j in zip(self.likelihoods_list, range(len(self.likelihoods_list))):
|
for lik, j in zip(self.likelihoods_list, range(len(self.likelihoods_list))):
|
||||||
variance[ind==j] = lik.variance
|
variance[ind==j] = lik.variance
|
||||||
return variance[:,None]
|
return variance
|
||||||
|
|
||||||
def betaY(self,Y,Y_metadata):
|
def betaY(self,Y,Y_metadata):
|
||||||
return Y/self.gaussian_variance(Y_metadata=Y_metadata)
|
#TODO not here.
|
||||||
|
return Y/self.gaussian_variance(Y_metadata=Y_metadata)[:,None]
|
||||||
|
|
||||||
def update_gradients(self, gradients):
|
def update_gradients(self, gradients):
|
||||||
self.gradient = gradients
|
self.gradient = gradients
|
||||||
|
|
@ -38,34 +39,27 @@ class MixedNoise(Likelihood):
|
||||||
return np.array([dL_dKdiag[ind==i].sum() for i in range(len(self.likelihoods_list))])
|
return np.array([dL_dKdiag[ind==i].sum() for i in range(len(self.likelihoods_list))])
|
||||||
|
|
||||||
def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
|
def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
|
||||||
if all([isinstance(l, Gaussian) for l in self.likelihoods_list]):
|
ind = Y_metadata['output_index'].flatten()
|
||||||
ind = Y_metadata['output_index'].flatten()
|
_variance = np.array([self.likelihoods_list[j].variance for j in ind ])
|
||||||
_variance = np.array([self.likelihoods_list[j].variance for j in ind ])
|
if full_cov:
|
||||||
if full_cov:
|
var += np.eye(var.shape[0])*_variance
|
||||||
var += np.eye(var.shape[0])*_variance
|
|
||||||
else:
|
|
||||||
var += _variance
|
|
||||||
return mu, var
|
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError
|
var += _variance
|
||||||
|
return mu, var
|
||||||
|
|
||||||
def predictive_variance(self, mu, sigma, **other_shit):
|
def predictive_variance(self, mu, sigma, Y_metadata):
|
||||||
if isinstance(noise_index,int):
|
_variance = self.gaussian_variance(Y_metadata)
|
||||||
_variance = self.variance[noise_index]
|
|
||||||
else:
|
|
||||||
_variance = np.array([ self.variance[j] for j in noise_index ])[:,None]
|
|
||||||
return _variance + sigma**2
|
return _variance + sigma**2
|
||||||
|
|
||||||
|
def predictive_quantiles(self, mu, var, quantiles, Y_metadata):
|
||||||
def covariance_matrix(self, Y, Y_metadata):
|
ind = Y_metadata['output_index'].flatten()
|
||||||
#assert all([isinstance(l, Gaussian) for l in self.likelihoods_list])
|
outputs = np.unique(ind)
|
||||||
#ind = Y_metadata['output_index'].flatten()
|
Q = np.zeros( (mu.size,len(quantiles)) )
|
||||||
#variance = np.zeros(Y.shape[0])
|
for j in outputs:
|
||||||
#for lik, j in zip(self.likelihoods_list, range(len(self.likelihoods_list))):
|
q = self.likelihoods_list[j].predictive_quantiles(mu[ind==j,:],
|
||||||
# variance[ind==j] = lik.variance
|
var[ind==j,:],quantiles,Y_metadata=None)
|
||||||
#return np.diag(variance)
|
Q[ind==j,:] = np.hstack(q)
|
||||||
return np.diag(self.gaussian_variance(Y_metadata).flatten())
|
return [q[:,None] for q in Q.T]
|
||||||
|
|
||||||
|
|
||||||
def samples(self, gp, Y_metadata):
|
def samples(self, gp, Y_metadata):
|
||||||
"""
|
"""
|
||||||
|
|
@ -84,4 +78,3 @@ class MixedNoise(Likelihood):
|
||||||
_ysim = np.array([np.random.normal(lik.gp_link.transf(gpj), scale=np.sqrt(lik.variance), size=1) for gpj in gp_filtered.flatten()])
|
_ysim = np.array([np.random.normal(lik.gp_link.transf(gpj), scale=np.sqrt(lik.variance), size=1) for gpj in gp_filtered.flatten()])
|
||||||
Ysim[flt,:] = _ysim.reshape(n1,N2)
|
Ysim[flt,:] = _ysim.reshape(n1,N2)
|
||||||
return Ysim
|
return Ysim
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,10 +25,13 @@ class Poisson(Likelihood):
|
||||||
|
|
||||||
super(Poisson, self).__init__(gp_link, name='Poisson')
|
super(Poisson, self).__init__(gp_link, name='Poisson')
|
||||||
|
|
||||||
def _preprocess_values(self,Y):
|
def _conditional_mean(self, f):
|
||||||
return Y
|
"""
|
||||||
|
the expected value of y given a value of f
|
||||||
|
"""
|
||||||
|
return self.gp_link.transf(gp)
|
||||||
|
|
||||||
def pdf_link(self, link_f, y, extra_data=None):
|
def pdf_link(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Likelihood function given link(f)
|
Likelihood function given link(f)
|
||||||
|
|
||||||
|
|
@ -39,14 +42,14 @@ class Poisson(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data which is not used in poisson distribution
|
:param Y_metadata: Y_metadata which is not used in poisson distribution
|
||||||
:returns: likelihood evaluated for this point
|
:returns: likelihood evaluated for this point
|
||||||
:rtype: float
|
:rtype: float
|
||||||
"""
|
"""
|
||||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
return np.prod(stats.poisson.pmf(y,link_f))
|
return np.prod(stats.poisson.pmf(y,link_f))
|
||||||
|
|
||||||
def logpdf_link(self, link_f, y, extra_data=None):
|
def logpdf_link(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Log Likelihood Function given link(f)
|
Log Likelihood Function given link(f)
|
||||||
|
|
||||||
|
|
@ -57,7 +60,7 @@ class Poisson(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data which is not used in poisson distribution
|
:param Y_metadata: Y_metadata which is not used in poisson distribution
|
||||||
:returns: likelihood evaluated for this point
|
:returns: likelihood evaluated for this point
|
||||||
:rtype: float
|
:rtype: float
|
||||||
|
|
||||||
|
|
@ -65,7 +68,7 @@ class Poisson(Likelihood):
|
||||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
return np.sum(-link_f + y*np.log(link_f) - special.gammaln(y+1))
|
return np.sum(-link_f + y*np.log(link_f) - special.gammaln(y+1))
|
||||||
|
|
||||||
def dlogpdf_dlink(self, link_f, y, extra_data=None):
|
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
|
@ -76,7 +79,7 @@ class Poisson(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data which is not used in poisson distribution
|
:param Y_metadata: Y_metadata which is not used in poisson distribution
|
||||||
:returns: gradient of likelihood evaluated at points
|
:returns: gradient of likelihood evaluated at points
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
|
@ -84,7 +87,7 @@ class Poisson(Likelihood):
|
||||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
return y/link_f - 1
|
return y/link_f - 1
|
||||||
|
|
||||||
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
|
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Hessian at y, given link(f), w.r.t link(f)
|
Hessian at y, given link(f), w.r.t link(f)
|
||||||
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
|
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
|
||||||
|
|
@ -97,7 +100,7 @@ class Poisson(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data which is not used in poisson distribution
|
:param Y_metadata: Y_metadata which is not used in poisson distribution
|
||||||
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
|
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
|
@ -112,7 +115,7 @@ class Poisson(Likelihood):
|
||||||
#transf = self.gp_link.transf(gp)
|
#transf = self.gp_link.transf(gp)
|
||||||
#return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df
|
#return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df
|
||||||
|
|
||||||
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
|
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
|
@ -123,7 +126,7 @@ class Poisson(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data which is not used in poisson distribution
|
:param Y_metadata: Y_metadata which is not used in poisson distribution
|
||||||
:returns: third derivative of likelihood evaluated at points f
|
:returns: third derivative of likelihood evaluated at points f
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -46,7 +46,7 @@ class StudentT(Likelihood):
|
||||||
self.sigma2.gradient = grads[0]
|
self.sigma2.gradient = grads[0]
|
||||||
self.v.gradient = grads[1]
|
self.v.gradient = grads[1]
|
||||||
|
|
||||||
def pdf_link(self, link_f, y, extra_data=None):
|
def pdf_link(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Likelihood function given link(f)
|
Likelihood function given link(f)
|
||||||
|
|
||||||
|
|
@ -57,7 +57,7 @@ class StudentT(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data which is not used in student t distribution
|
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||||
:returns: likelihood evaluated for this point
|
:returns: likelihood evaluated for this point
|
||||||
:rtype: float
|
:rtype: float
|
||||||
"""
|
"""
|
||||||
|
|
@ -70,7 +70,7 @@ class StudentT(Likelihood):
|
||||||
)
|
)
|
||||||
return np.prod(objective)
|
return np.prod(objective)
|
||||||
|
|
||||||
def logpdf_link(self, link_f, y, extra_data=None):
|
def logpdf_link(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Log Likelihood Function given link(f)
|
Log Likelihood Function given link(f)
|
||||||
|
|
||||||
|
|
@ -81,7 +81,7 @@ class StudentT(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data which is not used in student t distribution
|
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||||
:returns: likelihood evaluated for this point
|
:returns: likelihood evaluated for this point
|
||||||
:rtype: float
|
:rtype: float
|
||||||
|
|
||||||
|
|
@ -99,7 +99,7 @@ class StudentT(Likelihood):
|
||||||
)
|
)
|
||||||
return np.sum(objective)
|
return np.sum(objective)
|
||||||
|
|
||||||
def dlogpdf_dlink(self, link_f, y, extra_data=None):
|
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
|
@ -110,7 +110,7 @@ class StudentT(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data which is not used in student t distribution
|
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||||
:returns: gradient of likelihood evaluated at points
|
:returns: gradient of likelihood evaluated at points
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
|
@ -120,7 +120,7 @@ class StudentT(Likelihood):
|
||||||
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
|
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
|
||||||
return grad
|
return grad
|
||||||
|
|
||||||
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
|
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Hessian at y, given link(f), w.r.t link(f)
|
Hessian at y, given link(f), w.r.t link(f)
|
||||||
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
|
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
|
||||||
|
|
@ -133,7 +133,7 @@ class StudentT(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data which is not used in student t distribution
|
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||||
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
|
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
|
@ -146,7 +146,7 @@ class StudentT(Likelihood):
|
||||||
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
|
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
|
||||||
return hess
|
return hess
|
||||||
|
|
||||||
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
|
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
|
@ -157,7 +157,7 @@ class StudentT(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data which is not used in student t distribution
|
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||||
:returns: third derivative of likelihood evaluated at points f
|
:returns: third derivative of likelihood evaluated at points f
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
"""
|
"""
|
||||||
|
|
@ -168,7 +168,7 @@ class StudentT(Likelihood):
|
||||||
)
|
)
|
||||||
return d3lik_dlink3
|
return d3lik_dlink3
|
||||||
|
|
||||||
def dlogpdf_link_dvar(self, link_f, y, extra_data=None):
|
def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise)
|
Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise)
|
||||||
|
|
||||||
|
|
@ -179,7 +179,7 @@ class StudentT(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data which is not used in student t distribution
|
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||||
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
||||||
:rtype: float
|
:rtype: float
|
||||||
"""
|
"""
|
||||||
|
|
@ -188,7 +188,7 @@ class StudentT(Likelihood):
|
||||||
dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
|
dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
|
||||||
return np.sum(dlogpdf_dvar)
|
return np.sum(dlogpdf_dvar)
|
||||||
|
|
||||||
def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None):
|
def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise)
|
Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise)
|
||||||
|
|
||||||
|
|
@ -199,7 +199,7 @@ class StudentT(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data which is not used in student t distribution
|
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||||
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
"""
|
"""
|
||||||
|
|
@ -208,7 +208,7 @@ class StudentT(Likelihood):
|
||||||
dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
|
dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
|
||||||
return dlogpdf_dlink_dvar
|
return dlogpdf_dlink_dvar
|
||||||
|
|
||||||
def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None):
|
def d2logpdf_dlink2_dvar(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise)
|
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise)
|
||||||
|
|
||||||
|
|
@ -219,7 +219,7 @@ class StudentT(Likelihood):
|
||||||
:type link_f: Nx1 array
|
:type link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param extra_data: extra_data which is not used in student t distribution
|
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||||
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
|
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
"""
|
"""
|
||||||
|
|
@ -230,25 +230,22 @@ class StudentT(Likelihood):
|
||||||
)
|
)
|
||||||
return d2logpdf_dlink2_dvar
|
return d2logpdf_dlink2_dvar
|
||||||
|
|
||||||
def dlogpdf_link_dtheta(self, f, y, extra_data=None):
|
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
|
||||||
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data)
|
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
|
||||||
dlogpdf_dv = np.zeros_like(dlogpdf_dvar) #FIXME: Not done yet
|
dlogpdf_dv = np.zeros_like(dlogpdf_dvar) #FIXME: Not done yet
|
||||||
return np.hstack((dlogpdf_dvar, dlogpdf_dv))
|
return np.hstack((dlogpdf_dvar, dlogpdf_dv))
|
||||||
|
|
||||||
def dlogpdf_dlink_dtheta(self, f, y, extra_data=None):
|
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
|
||||||
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data)
|
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
|
||||||
dlogpdf_dlink_dv = np.zeros_like(dlogpdf_dlink_dvar) #FIXME: Not done yet
|
dlogpdf_dlink_dv = np.zeros_like(dlogpdf_dlink_dvar) #FIXME: Not done yet
|
||||||
return np.hstack((dlogpdf_dlink_dvar, dlogpdf_dlink_dv))
|
return np.hstack((dlogpdf_dlink_dvar, dlogpdf_dlink_dv))
|
||||||
|
|
||||||
def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None):
|
def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
|
||||||
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data)
|
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
|
||||||
d2logpdf_dlink2_dv = np.zeros_like(d2logpdf_dlink2_dvar) #FIXME: Not done yet
|
d2logpdf_dlink2_dv = np.zeros_like(d2logpdf_dlink2_dvar) #FIXME: Not done yet
|
||||||
return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
|
return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
|
||||||
|
|
||||||
def predictive_mean(self, mu, sigma, Y_metadata=None):
|
def predictive_mean(self, mu, sigma, Y_metadata=None):
|
||||||
"""
|
|
||||||
Compute mean of the prediction
|
|
||||||
"""
|
|
||||||
return self.gp_link.transf(mu) # only true in link is monotoci, which it is.
|
return self.gp_link.transf(mu) # only true in link is monotoci, which it is.
|
||||||
|
|
||||||
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
|
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from ..core.mapping import Mapping
|
from ..core.mapping import Mapping
|
||||||
|
from ..core.parameterization import Param
|
||||||
|
|
||||||
class Linear(Mapping):
|
class Linear(Mapping):
|
||||||
"""
|
"""
|
||||||
|
|
@ -16,38 +17,22 @@ class Linear(Mapping):
|
||||||
:type X: ndarray
|
:type X: ndarray
|
||||||
:param output_dim: dimension of output.
|
:param output_dim: dimension of output.
|
||||||
:type output_dim: int
|
:type output_dim: int
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, input_dim=1, output_dim=1):
|
def __init__(self, input_dim=1, output_dim=1, name='linear_map'):
|
||||||
self.name = 'linear'
|
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
|
||||||
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim)
|
self.W = Param('W',np.array((self.input_dim, self.output_dim)))
|
||||||
self.num_params = self.output_dim*(self.input_dim + 1)
|
self.bias = Param('bias',np.array(self.output_dim))
|
||||||
self.W = np.array((self.input_dim, self.output_dim))
|
self.add_parameters(self.W, self.bias)
|
||||||
self.bias = np.array(self.output_dim)
|
|
||||||
self.randomize()
|
|
||||||
|
|
||||||
def _get_param_names(self):
|
|
||||||
return sum([['W_%i_%i' % (n, d) for d in range(self.output_dim)] for n in range(self.input_dim)], []) + ['bias_%i' % d for d in range(self.output_dim)]
|
|
||||||
|
|
||||||
def _get_params(self):
|
|
||||||
return np.hstack((self.W.flatten(), self.bias))
|
|
||||||
|
|
||||||
def _set_params(self, x):
|
|
||||||
self.W = x[:self.input_dim * self.output_dim].reshape(self.input_dim, self.output_dim).copy()
|
|
||||||
self.bias = x[self.input_dim*self.output_dim:].copy()
|
|
||||||
def randomize(self):
|
|
||||||
self.W = np.random.randn(self.input_dim, self.output_dim)/np.sqrt(self.input_dim + 1)
|
|
||||||
self.bias = np.random.randn(self.output_dim)/np.sqrt(self.input_dim + 1)
|
|
||||||
|
|
||||||
def f(self, X):
|
def f(self, X):
|
||||||
return np.dot(X,self.W) + self.bias
|
return np.dot(X,self.W) + self.bias
|
||||||
|
|
||||||
def df_dtheta(self, dL_df, X):
|
def df_dtheta(self, dL_df, X):
|
||||||
self._df_dW = (dL_df[:, :, None]*X[:, None, :]).sum(0).T
|
df_dW = (dL_df[:, :, None]*X[:, None, :]).sum(0).T
|
||||||
self._df_dbias = (dL_df.sum(0))
|
df_dbias = (dL_df.sum(0))
|
||||||
return np.hstack((self._df_dW.flatten(), self._df_dbias))
|
return np.hstack((df_dW.flatten(), df_dbias))
|
||||||
|
|
||||||
def df_dX(self, dL_df, X):
|
def dL_dX(self, dL_df, X):
|
||||||
return (dL_df[:, None, :]*self.W[None, :, :]).sum(2)
|
return (dL_df[:, None, :]*self.W[None, :, :]).sum(2)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -75,15 +75,19 @@ class BayesianGPLVM(SparseGP):
|
||||||
# update for the KL divergence
|
# update for the KL divergence
|
||||||
self.variational_prior.update_gradients_KL(self.X)
|
self.variational_prior.update_gradients_KL(self.X)
|
||||||
|
|
||||||
def plot_latent(self, plot_inducing=True, *args, **kwargs):
|
def plot_latent(self, labels=None, which_indices=None,
|
||||||
"""
|
resolution=50, ax=None, marker='o', s=40,
|
||||||
See GPy.plotting.matplot_dep.dim_reduction_plots.plot_latent
|
fignum=None, plot_inducing=True, legend=True,
|
||||||
"""
|
plot_limits=None,
|
||||||
|
aspect='auto', updates=False, **kwargs):
|
||||||
import sys
|
import sys
|
||||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||||
from ..plotting.matplot_dep import dim_reduction_plots
|
from ..plotting.matplot_dep import dim_reduction_plots
|
||||||
|
|
||||||
return dim_reduction_plots.plot_latent(self, plot_inducing=plot_inducing, *args, **kwargs)
|
return dim_reduction_plots.plot_latent(self, labels, which_indices,
|
||||||
|
resolution, ax, marker, s,
|
||||||
|
fignum, plot_inducing, legend,
|
||||||
|
plot_limits, aspect, updates, **kwargs)
|
||||||
|
|
||||||
def do_test_latents(self, Y):
|
def do_test_latents(self, Y):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -36,7 +36,7 @@ class GPCoregionalizedRegression(GP):
|
||||||
|
|
||||||
#Kernel
|
#Kernel
|
||||||
if kernel is None:
|
if kernel is None:
|
||||||
kernel = util.multioutput.ICM(input_dim=X.shape[1]-1, num_outputs=Ny, kernel=GPy.kern.rbf(X.shape[1]-1), W_rank=1,name=kernel_name)
|
kernel = util.multioutput.ICM(input_dim=X.shape[1]-1, num_outputs=Ny, kernel=kern.RBF(X.shape[1]-1), W_rank=1,name=kernel_name)
|
||||||
|
|
||||||
#Likelihood
|
#Likelihood
|
||||||
likelihood = util.multioutput.build_likelihood(Y_list,self.output_index,likelihoods_list)
|
likelihood = util.multioutput.build_likelihood(Y_list,self.output_index,likelihoods_list)
|
||||||
|
|
|
||||||
|
|
@ -20,14 +20,14 @@ class GPRegression(GP):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, X, Y, kernel=None):
|
def __init__(self, X, Y, kernel=None, Y_metadata=None):
|
||||||
|
|
||||||
if kernel is None:
|
if kernel is None:
|
||||||
kernel = kern.RBF(X.shape[1])
|
kernel = kern.RBF(X.shape[1])
|
||||||
|
|
||||||
likelihood = likelihoods.Gaussian()
|
likelihood = likelihoods.Gaussian()
|
||||||
|
|
||||||
super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression')
|
super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata)
|
||||||
|
|
||||||
def _getstate(self):
|
def _getstate(self):
|
||||||
return GP._getstate(self)
|
return GP._getstate(self)
|
||||||
|
|
|
||||||
|
|
@ -67,12 +67,22 @@ class GPLVM(GP):
|
||||||
assert self.likelihood.Y.shape[1] == 2
|
assert self.likelihood.Y.shape[1] == 2
|
||||||
pb.scatter(self.likelihood.Y[:, 0], self.likelihood.Y[:, 1], 40, self.X[:, 0].copy(), linewidth=0, cmap=pb.cm.jet) # @UndefinedVariable
|
pb.scatter(self.likelihood.Y[:, 0], self.likelihood.Y[:, 1], 40, self.X[:, 0].copy(), linewidth=0, cmap=pb.cm.jet) # @UndefinedVariable
|
||||||
Xnew = np.linspace(self.X.min(), self.X.max(), 200)[:, None]
|
Xnew = np.linspace(self.X.min(), self.X.max(), 200)[:, None]
|
||||||
mu, var, upper, lower = self.predict(Xnew)
|
mu, _ = self.predict(Xnew)
|
||||||
pb.plot(mu[:, 0], mu[:, 1], 'k', linewidth=1.5)
|
pb.plot(mu[:, 0], mu[:, 1], 'k', linewidth=1.5)
|
||||||
|
|
||||||
def plot_latent(self, *args, **kwargs):
|
def plot_latent(self, labels=None, which_indices=None,
|
||||||
|
resolution=50, ax=None, marker='o', s=40,
|
||||||
|
fignum=None, legend=True,
|
||||||
|
plot_limits=None,
|
||||||
|
aspect='auto', updates=False, **kwargs):
|
||||||
|
import sys
|
||||||
|
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||||
from ..plotting.matplot_dep import dim_reduction_plots
|
from ..plotting.matplot_dep import dim_reduction_plots
|
||||||
|
|
||||||
return dim_reduction_plots.plot_latent(self, *args, **kwargs)
|
return dim_reduction_plots.plot_latent(self, labels, which_indices,
|
||||||
|
resolution, ax, marker, s,
|
||||||
|
fignum, False, legend,
|
||||||
|
plot_limits, aspect, updates, **kwargs)
|
||||||
|
|
||||||
def plot_magnification(self, *args, **kwargs):
|
def plot_magnification(self, *args, **kwargs):
|
||||||
return util.plot_latent.plot_magnification(self, *args, **kwargs)
|
return util.plot_latent.plot_magnification(self, *args, **kwargs)
|
||||||
|
|
|
||||||
|
|
@ -43,14 +43,14 @@ class SparseGPCoregionalizedRegression(SparseGP):
|
||||||
|
|
||||||
#Kernel
|
#Kernel
|
||||||
if kernel is None:
|
if kernel is None:
|
||||||
kernel = util.multioutput.ICM(input_dim=X.shape[1]-1, num_outputs=Ny, kernel=GPy.kern.rbf(X.shape[1]-1), W_rank=1,name=kernel_name)
|
kernel = util.multioutput.ICM(input_dim=X.shape[1]-1, num_outputs=Ny, kernel=kern.RBF(X.shape[1]-1), W_rank=1,name=kernel_name)
|
||||||
|
|
||||||
#Likelihood
|
#Likelihood
|
||||||
likelihood = util.multioutput.build_likelihood(Y_list,self.output_index,likelihoods_list)
|
likelihood = util.multioutput.build_likelihood(Y_list,self.output_index,likelihoods_list)
|
||||||
|
|
||||||
#Inducing inputs list
|
#Inducing inputs list
|
||||||
if len(Z_list):
|
if len(Z_list):
|
||||||
assert len(Z_list) == self.output_dim, 'Number of outputs do not match length of inducing inputs list.'
|
assert len(Z_list) == Ny, 'Number of outputs do not match length of inducing inputs list.'
|
||||||
else:
|
else:
|
||||||
if isinstance(num_inducing,np.int):
|
if isinstance(num_inducing,np.int):
|
||||||
num_inducing = [num_inducing] * Ny
|
num_inducing = [num_inducing] * Ny
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@
|
||||||
import unittest
|
import unittest
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import GPy
|
import GPy
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class MappingTests(unittest.TestCase):
|
class MappingTests(unittest.TestCase):
|
||||||
|
|
@ -23,12 +23,11 @@ class MappingTests(unittest.TestCase):
|
||||||
|
|
||||||
def test_mlpmapping(self):
|
def test_mlpmapping(self):
|
||||||
verbose = False
|
verbose = False
|
||||||
mapping = GPy.mappings.MLP(input_dim=2, hidden_dim=[3, 4, 8, 2], output_dim=2)
|
mapping = GPy.mappings.MLP(input_dim=2, hidden_dim=[3, 4, 8, 2], output_dim=2)
|
||||||
self.assertTrue(GPy.core.Mapping_check_df_dtheta(mapping=mapping).checkgrad(verbose=verbose))
|
self.assertTrue(GPy.core.Mapping_check_df_dtheta(mapping=mapping).checkgrad(verbose=verbose))
|
||||||
self.assertTrue(GPy.core.Mapping_check_df_dX(mapping=mapping).checkgrad(verbose=verbose))
|
self.assertTrue(GPy.core.Mapping_check_df_dX(mapping=mapping).checkgrad(verbose=verbose))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print "Running unit tests, please be (very) patient..."
|
print "Running unit tests, please be (very) patient..."
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
@ -30,7 +30,8 @@ def most_significant_input_dimensions(model, which_indices):
|
||||||
def plot_latent(model, labels=None, which_indices=None,
|
def plot_latent(model, labels=None, which_indices=None,
|
||||||
resolution=50, ax=None, marker='o', s=40,
|
resolution=50, ax=None, marker='o', s=40,
|
||||||
fignum=None, plot_inducing=False, legend=True,
|
fignum=None, plot_inducing=False, legend=True,
|
||||||
aspect='auto', updates=False):
|
plot_limits=None,
|
||||||
|
aspect='auto', updates=False, **kwargs):
|
||||||
"""
|
"""
|
||||||
:param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc)
|
:param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc)
|
||||||
:param resolution: the resolution of the grid on which to evaluate the predictive variance
|
:param resolution: the resolution of the grid on which to evaluate the predictive variance
|
||||||
|
|
@ -38,6 +39,8 @@ def plot_latent(model, labels=None, which_indices=None,
|
||||||
if ax is None:
|
if ax is None:
|
||||||
fig = pb.figure(num=fignum)
|
fig = pb.figure(num=fignum)
|
||||||
ax = fig.add_subplot(111)
|
ax = fig.add_subplot(111)
|
||||||
|
else:
|
||||||
|
fig = ax.figure
|
||||||
Tango.reset()
|
Tango.reset()
|
||||||
|
|
||||||
if labels is None:
|
if labels is None:
|
||||||
|
|
@ -57,15 +60,28 @@ def plot_latent(model, labels=None, which_indices=None,
|
||||||
def plot_function(x):
|
def plot_function(x):
|
||||||
Xtest_full = np.zeros((x.shape[0], model.X.shape[1]))
|
Xtest_full = np.zeros((x.shape[0], model.X.shape[1]))
|
||||||
Xtest_full[:, [input_1, input_2]] = x
|
Xtest_full[:, [input_1, input_2]] = x
|
||||||
mu, var, low, up = model.predict(Xtest_full)
|
_, var = model.predict(Xtest_full)
|
||||||
var = var[:, :1]
|
var = var[:, :1]
|
||||||
return np.log(var)
|
return np.log(var)
|
||||||
|
|
||||||
#Create an IMshow controller that can re-plot the latent space shading at a good resolution
|
#Create an IMshow controller that can re-plot the latent space shading at a good resolution
|
||||||
|
if plot_limits is None:
|
||||||
|
xmin, ymin = X[:, [input_1, input_2]].min(0)
|
||||||
|
xmax, ymax = X[:, [input_1, input_2]].max(0)
|
||||||
|
x_r, y_r = xmax-xmin, ymax-ymin
|
||||||
|
xmin -= .1*x_r
|
||||||
|
xmax += .1*x_r
|
||||||
|
ymin -= .1*y_r
|
||||||
|
ymax += .1*y_r
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
xmin, xmax, ymin, ymax = plot_limits
|
||||||
|
except (TypeError, ValueError) as e:
|
||||||
|
raise e.__class__, "Wrong plot limits: {} given -> need (xmin, xmax, ymin, ymax)".format(plot_limits)
|
||||||
view = ImshowController(ax, plot_function,
|
view = ImshowController(ax, plot_function,
|
||||||
tuple(X[:, [input_1, input_2]].min(0)) + tuple(X[:, [input_1, input_2]].max(0)),
|
(xmin, ymin, xmax, ymax),
|
||||||
resolution, aspect=aspect, interpolation='bilinear',
|
resolution, aspect=aspect, interpolation='bilinear',
|
||||||
cmap=pb.cm.binary)
|
cmap=pb.cm.binary, **kwargs)
|
||||||
|
|
||||||
# make sure labels are in order of input:
|
# make sure labels are in order of input:
|
||||||
ulabels = []
|
ulabels = []
|
||||||
|
|
@ -99,18 +115,31 @@ def plot_latent(model, labels=None, which_indices=None,
|
||||||
if not np.all(labels == 1.) and legend:
|
if not np.all(labels == 1.) and legend:
|
||||||
ax.legend(loc=0, numpoints=1)
|
ax.legend(loc=0, numpoints=1)
|
||||||
|
|
||||||
#ax.set_xlim(xmin[0], xmax[0])
|
|
||||||
#ax.set_ylim(xmin[1], xmax[1])
|
|
||||||
ax.grid(b=False) # remove the grid if present, it doesn't look good
|
ax.grid(b=False) # remove the grid if present, it doesn't look good
|
||||||
ax.set_aspect('auto') # set a nice aspect ratio
|
ax.set_aspect('auto') # set a nice aspect ratio
|
||||||
|
|
||||||
if plot_inducing:
|
if plot_inducing:
|
||||||
Z = param_to_array(model.Z)
|
Z = param_to_array(model.Z)
|
||||||
ax.plot(Z[:, input_1], Z[:, input_2], '^w')
|
ax.plot(Z[:, input_1], Z[:, input_2], '^w')
|
||||||
|
|
||||||
|
ax.set_xlim((xmin, xmax))
|
||||||
|
ax.set_ylim((ymin, ymax))
|
||||||
|
|
||||||
|
try:
|
||||||
|
fig.canvas.draw()
|
||||||
|
fig.tight_layout()
|
||||||
|
fig.canvas.draw()
|
||||||
|
except Exception as e:
|
||||||
|
print "Could not invoke tight layout: {}".format(e)
|
||||||
|
pass
|
||||||
|
|
||||||
if updates:
|
if updates:
|
||||||
ax.figure.canvas.show()
|
try:
|
||||||
|
ax.figure.canvas.show()
|
||||||
|
except Exception as e:
|
||||||
|
print "Could not invoke show: {}".format(e)
|
||||||
raw_input('Enter to continue')
|
raw_input('Enter to continue')
|
||||||
|
view.deactivate()
|
||||||
return ax
|
return ax
|
||||||
|
|
||||||
def plot_magnification(model, labels=None, which_indices=None,
|
def plot_magnification(model, labels=None, which_indices=None,
|
||||||
|
|
@ -186,7 +215,7 @@ def plot_magnification(model, labels=None, which_indices=None,
|
||||||
ax.plot(model.Z[:, input_1], model.Z[:, input_2], '^w')
|
ax.plot(model.Z[:, input_1], model.Z[:, input_2], '^w')
|
||||||
|
|
||||||
if updates:
|
if updates:
|
||||||
ax.figure.canvas.show()
|
fig.canvas.show()
|
||||||
raw_input('Enter to continue')
|
raw_input('Enter to continue')
|
||||||
|
|
||||||
pb.title('Magnification Factor')
|
pb.title('Magnification Factor')
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ class AxisChangedController(AxisEventController):
|
||||||
Constructor
|
Constructor
|
||||||
'''
|
'''
|
||||||
super(AxisChangedController, self).__init__(ax)
|
super(AxisChangedController, self).__init__(ax)
|
||||||
self._lim_ratio_threshold = update_lim or .8
|
self._lim_ratio_threshold = update_lim or .95
|
||||||
self._x_lim = self.ax.get_xlim()
|
self._x_lim = self.ax.get_xlim()
|
||||||
self._y_lim = self.ax.get_ylim()
|
self._y_lim = self.ax.get_ylim()
|
||||||
|
|
||||||
|
|
@ -80,6 +80,10 @@ class AxisChangedController(AxisEventController):
|
||||||
class BufferedAxisChangedController(AxisChangedController):
|
class BufferedAxisChangedController(AxisChangedController):
|
||||||
def __init__(self, ax, plot_function, plot_limits, resolution=50, update_lim=None, **kwargs):
|
def __init__(self, ax, plot_function, plot_limits, resolution=50, update_lim=None, **kwargs):
|
||||||
"""
|
"""
|
||||||
|
Buffered axis changed controller. Controls the buffer and handles update events for when the axes changed.
|
||||||
|
|
||||||
|
Updated plotting will be after first reload (first time will be within plot limits, after that the limits will be buffered)
|
||||||
|
|
||||||
:param plot_function:
|
:param plot_function:
|
||||||
function to use for creating image for plotting (return ndarray-like)
|
function to use for creating image for plotting (return ndarray-like)
|
||||||
plot_function gets called with (2D!) Xtest grid if replotting required
|
plot_function gets called with (2D!) Xtest grid if replotting required
|
||||||
|
|
@ -91,11 +95,13 @@ class BufferedAxisChangedController(AxisChangedController):
|
||||||
"""
|
"""
|
||||||
super(BufferedAxisChangedController, self).__init__(ax, update_lim=update_lim)
|
super(BufferedAxisChangedController, self).__init__(ax, update_lim=update_lim)
|
||||||
self.plot_function = plot_function
|
self.plot_function = plot_function
|
||||||
xmin, xmax = self._x_lim # self._compute_buffered(*self._x_lim)
|
xmin, ymin, xmax, ymax = plot_limits#self._x_lim # self._compute_buffered(*self._x_lim)
|
||||||
ymin, ymax = self._y_lim # self._compute_buffered(*self._y_lim)
|
# imshow acts on the limits of the plot, this is why we need to override the limits here, to make sure the right plot limits are used:
|
||||||
|
self._x_lim = xmin, xmax
|
||||||
|
self._y_lim = ymin, ymax
|
||||||
self.resolution = resolution
|
self.resolution = resolution
|
||||||
self._not_init = False
|
self._not_init = False
|
||||||
self.view = self._init_view(self.ax, self.recompute_X(), xmin, xmax, ymin, ymax, **kwargs)
|
self.view = self._init_view(self.ax, self.recompute_X(buffered=False), xmin, xmax, ymin, ymax, **kwargs)
|
||||||
self._not_init = True
|
self._not_init = True
|
||||||
|
|
||||||
def update(self, ax):
|
def update(self, ax):
|
||||||
|
|
@ -111,14 +117,16 @@ class BufferedAxisChangedController(AxisChangedController):
|
||||||
def update_view(self, view, X, xmin, xmax, ymin, ymax):
|
def update_view(self, view, X, xmin, xmax, ymin, ymax):
|
||||||
raise NotImplementedError('update view given in here')
|
raise NotImplementedError('update view given in here')
|
||||||
|
|
||||||
def get_grid(self):
|
def get_grid(self, buffered=True):
|
||||||
xmin, xmax = self._compute_buffered(*self._x_lim)
|
if buffered: comp = self._compute_buffered
|
||||||
ymin, ymax = self._compute_buffered(*self._y_lim)
|
else: comp = lambda a,b: (a,b)
|
||||||
|
xmin, xmax = comp(*self._x_lim)
|
||||||
|
ymin, ymax = comp(*self._y_lim)
|
||||||
x, y = numpy.mgrid[xmin:xmax:1j * self.resolution, ymin:ymax:1j * self.resolution]
|
x, y = numpy.mgrid[xmin:xmax:1j * self.resolution, ymin:ymax:1j * self.resolution]
|
||||||
return numpy.hstack((x.flatten()[:, None], y.flatten()[:, None]))
|
return numpy.hstack((x.flatten()[:, None], y.flatten()[:, None]))
|
||||||
|
|
||||||
def recompute_X(self):
|
def recompute_X(self, buffered=True):
|
||||||
X = self.plot_function(self.get_grid())
|
X = self.plot_function(self.get_grid(buffered))
|
||||||
if isinstance(X, (tuple, list)):
|
if isinstance(X, (tuple, list)):
|
||||||
for x in X:
|
for x in X:
|
||||||
x.shape = [self.resolution, self.resolution]
|
x.shape = [self.resolution, self.resolution]
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ import numpy
|
||||||
|
|
||||||
|
|
||||||
class ImshowController(BufferedAxisChangedController):
|
class ImshowController(BufferedAxisChangedController):
|
||||||
def __init__(self, ax, plot_function, plot_limits, resolution=50, update_lim=.5, **kwargs):
|
def __init__(self, ax, plot_function, plot_limits, resolution=50, update_lim=.8, **kwargs):
|
||||||
"""
|
"""
|
||||||
:param plot_function:
|
:param plot_function:
|
||||||
function to use for creating image for plotting (return ndarray-like)
|
function to use for creating image for plotting (return ndarray-like)
|
||||||
|
|
|
||||||
|
|
@ -123,6 +123,8 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
||||||
#add inducing inputs (if a sparse model is used)
|
#add inducing inputs (if a sparse model is used)
|
||||||
if hasattr(model,"Z"):
|
if hasattr(model,"Z"):
|
||||||
#Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
|
#Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
|
||||||
|
if isinstance(model,SparseGPCoregionalizedRegression):
|
||||||
|
Z = Z[Z[:,-1] == Y_metadata['output_index'],:]
|
||||||
Zu = Z[:,free_dims]
|
Zu = Z[:,free_dims]
|
||||||
z_height = ax.get_ylim()[0]
|
z_height = ax.get_ylim()[0]
|
||||||
plots['inducing_inputs'] = ax.plot(Zu, np.zeros_like(Zu) + z_height, 'r|', mew=1.5, markersize=12)
|
plots['inducing_inputs'] = ax.plot(Zu, np.zeros_like(Zu) + z_height, 'r|', mew=1.5, markersize=12)
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,8 @@ import GPy
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import matplotlib as mpl
|
import matplotlib as mpl
|
||||||
import time
|
import time
|
||||||
|
from ...util.misc import param_to_array
|
||||||
|
from GPy.core.parameterization.variational import VariationalPosterior
|
||||||
try:
|
try:
|
||||||
import visual
|
import visual
|
||||||
visual_available = True
|
visual_available = True
|
||||||
|
|
@ -72,12 +74,13 @@ class vector_show(matplotlib_show):
|
||||||
"""
|
"""
|
||||||
def __init__(self, vals, axes=None):
|
def __init__(self, vals, axes=None):
|
||||||
matplotlib_show.__init__(self, vals, axes)
|
matplotlib_show.__init__(self, vals, axes)
|
||||||
self.handle = self.axes.plot(np.arange(0, len(vals))[:, None], self.vals.T)[0]
|
self.handle = self.axes.plot(np.arange(0, len(vals))[:, None], self.vals)
|
||||||
|
|
||||||
def modify(self, vals):
|
def modify(self, vals):
|
||||||
self.vals = vals.copy()
|
self.vals = vals.copy()
|
||||||
xdata, ydata = self.handle.get_data()
|
for handle, vals in zip(self.handle, self.vals.T):
|
||||||
self.handle.set_data(xdata, self.vals.T)
|
xdata, ydata = handle.get_data()
|
||||||
|
handle.set_data(xdata, vals)
|
||||||
self.axes.figure.canvas.draw()
|
self.axes.figure.canvas.draw()
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -91,8 +94,12 @@ class lvm(matplotlib_show):
|
||||||
:param latent_axes: the axes where the latent visualization should be plotted.
|
:param latent_axes: the axes where the latent visualization should be plotted.
|
||||||
"""
|
"""
|
||||||
if vals == None:
|
if vals == None:
|
||||||
vals = model.X[0]
|
if isinstance(model.X, VariationalPosterior):
|
||||||
|
vals = param_to_array(model.X.mean)
|
||||||
|
else:
|
||||||
|
vals = param_to_array(model.X)
|
||||||
|
|
||||||
|
vals = param_to_array(vals)
|
||||||
matplotlib_show.__init__(self, vals, axes=latent_axes)
|
matplotlib_show.__init__(self, vals, axes=latent_axes)
|
||||||
|
|
||||||
if isinstance(latent_axes,mpl.axes.Axes):
|
if isinstance(latent_axes,mpl.axes.Axes):
|
||||||
|
|
|
||||||
34
GPy/testing/fitc.py
Normal file
34
GPy/testing/fitc.py
Normal file
|
|
@ -0,0 +1,34 @@
|
||||||
|
# Copyright (c) 2014, James Hensman
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
import numpy as np
|
||||||
|
import GPy
|
||||||
|
|
||||||
|
class FITCtest(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
######################################
|
||||||
|
# # 1 dimensional example
|
||||||
|
|
||||||
|
N = 20
|
||||||
|
# sample inputs and outputs
|
||||||
|
self.X1D = np.random.uniform(-3., 3., (N, 1))
|
||||||
|
self.Y1D = np.sin(self.X1D) + np.random.randn(N, 1) * 0.05
|
||||||
|
|
||||||
|
######################################
|
||||||
|
# # 2 dimensional example
|
||||||
|
|
||||||
|
# sample inputs and outputs
|
||||||
|
self.X2D = np.random.uniform(-3., 3., (N, 2))
|
||||||
|
self.Y2D = np.sin(self.X2D[:, 0:1]) * np.sin(self.X2D[:, 1:2]) + np.random.randn(N, 1) * 0.05
|
||||||
|
|
||||||
|
def test_fitc_1d(self):
|
||||||
|
m = GPy.models.SparseGPRegression(self.X1D, self.Y1D)
|
||||||
|
m.inference_method=GPy.inference.latent_function_inference.FITC()
|
||||||
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
|
def test_fitc_2d(self):
|
||||||
|
m = GPy.models.SparseGPRegression(self.X2D, self.Y2D)
|
||||||
|
m.inference_method=GPy.inference.latent_function_inference.FITC()
|
||||||
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
|
|
@ -94,7 +94,7 @@ class Kern_check_dKdiag_dX(Kern_check_dK_dX):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verbose=False):
|
def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verbose=False, fixed_X_dims=None):
|
||||||
"""
|
"""
|
||||||
This function runs on kernels to check the correctness of their
|
This function runs on kernels to check the correctness of their
|
||||||
implementation. It checks that the covariance function is positive definite
|
implementation. It checks that the covariance function is positive definite
|
||||||
|
|
@ -109,19 +109,17 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
|
||||||
|
|
||||||
"""
|
"""
|
||||||
pass_checks = True
|
pass_checks = True
|
||||||
if X==None:
|
if X is None:
|
||||||
X = np.random.randn(10, kern.input_dim)
|
X = np.random.randn(10, kern.input_dim)
|
||||||
if output_ind is not None:
|
if output_ind is not None:
|
||||||
X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0])
|
X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0])
|
||||||
if X2==None:
|
if X2 is None:
|
||||||
X2 = np.random.randn(20, kern.input_dim)
|
X2 = np.random.randn(20, kern.input_dim)
|
||||||
if output_ind is not None:
|
if output_ind is not None:
|
||||||
X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0])
|
X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0])
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
print("Checking covariance function is positive definite.")
|
print("Checking covariance function is positive definite.")
|
||||||
#if isinstance(kern, GPy.kern.IndependentOutputs):
|
|
||||||
#import ipdb; ipdb.set_trace() # XXX BREAKPOINT
|
|
||||||
result = Kern_check_model(kern, X=X).is_positive_semi_definite()
|
result = Kern_check_model(kern, X=X).is_positive_semi_definite()
|
||||||
if result and verbose:
|
if result and verbose:
|
||||||
print("Check passed.")
|
print("Check passed.")
|
||||||
|
|
@ -154,7 +152,12 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
print("Checking gradients of Kdiag(X) wrt theta.")
|
print("Checking gradients of Kdiag(X) wrt theta.")
|
||||||
result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose)
|
try:
|
||||||
|
result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose)
|
||||||
|
except NotImplementedError:
|
||||||
|
result=True
|
||||||
|
if verbose:
|
||||||
|
print("update_gradients_diag not implemented for " + kern.name)
|
||||||
if result and verbose:
|
if result and verbose:
|
||||||
print("Check passed.")
|
print("Check passed.")
|
||||||
if not result:
|
if not result:
|
||||||
|
|
@ -166,7 +169,10 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
|
||||||
if verbose:
|
if verbose:
|
||||||
print("Checking gradients of K(X, X) wrt X.")
|
print("Checking gradients of K(X, X) wrt X.")
|
||||||
try:
|
try:
|
||||||
result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose)
|
testmodel = Kern_check_dK_dX(kern, X=X, X2=None)
|
||||||
|
if fixed_X_dims is not None:
|
||||||
|
testmodel.X[:,fixed_X_dims].fix()
|
||||||
|
result = testmodel.checkgrad(verbose=verbose)
|
||||||
except NotImplementedError:
|
except NotImplementedError:
|
||||||
result=True
|
result=True
|
||||||
if verbose:
|
if verbose:
|
||||||
|
|
@ -175,14 +181,17 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
|
||||||
print("Check passed.")
|
print("Check passed.")
|
||||||
if not result:
|
if not result:
|
||||||
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||||
Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True)
|
testmodel.checkgrad(verbose=True)
|
||||||
pass_checks = False
|
pass_checks = False
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
print("Checking gradients of K(X, X2) wrt X.")
|
print("Checking gradients of K(X, X2) wrt X.")
|
||||||
try:
|
try:
|
||||||
result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose)
|
testmodel = Kern_check_dK_dX(kern, X=X, X2=X2)
|
||||||
|
if fixed_X_dims is not None:
|
||||||
|
testmodel.X[:,fixed_X_dims].fix()
|
||||||
|
result = testmodel.checkgrad(verbose=verbose)
|
||||||
except NotImplementedError:
|
except NotImplementedError:
|
||||||
result=True
|
result=True
|
||||||
if verbose:
|
if verbose:
|
||||||
|
|
@ -190,8 +199,8 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
|
||||||
if result and verbose:
|
if result and verbose:
|
||||||
print("Check passed.")
|
print("Check passed.")
|
||||||
if not result:
|
if not result:
|
||||||
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
print("Gradient of K(X, X2) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||||
Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True)
|
testmodel.checkgrad(verbose=True)
|
||||||
pass_checks = False
|
pass_checks = False
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
@ -236,9 +245,22 @@ class KernelGradientTestsContinuous(unittest.TestCase):
|
||||||
|
|
||||||
def test_Add(self):
|
def test_Add(self):
|
||||||
k = GPy.kern.Matern32(2, active_dims=[2,3]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
|
k = GPy.kern.Matern32(2, active_dims=[2,3]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
|
||||||
|
k += GPy.kern.Matern32(2, active_dims=[2,3]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
|
||||||
k.randomize()
|
k.randomize()
|
||||||
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
|
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
|
||||||
|
|
||||||
|
def test_Add_dims(self):
|
||||||
|
k = GPy.kern.Matern32(2, active_dims=[2,self.D]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
|
||||||
|
k.randomize()
|
||||||
|
self.assertRaises(AssertionError, k.K, self.X)
|
||||||
|
k = GPy.kern.Matern32(2, active_dims=[2,self.D-1]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
|
||||||
|
k.randomize()
|
||||||
|
# assert it runs:
|
||||||
|
try:
|
||||||
|
k.K(self.X)
|
||||||
|
except AssertionError:
|
||||||
|
raise AssertionError, "k.K(X) should run on self.D-1 dimension"
|
||||||
|
|
||||||
def test_Matern52(self):
|
def test_Matern52(self):
|
||||||
k = GPy.kern.Matern52(self.D)
|
k = GPy.kern.Matern52(self.D)
|
||||||
k.randomize()
|
k.randomize()
|
||||||
|
|
@ -302,29 +324,57 @@ class KernelTestsMiscellaneous(unittest.TestCase):
|
||||||
|
|
||||||
class KernelTestsNonContinuous(unittest.TestCase):
|
class KernelTestsNonContinuous(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
N = 100
|
N0 = 3
|
||||||
N1 = 110
|
N1 = 9
|
||||||
self.D = 2
|
N2 = 4
|
||||||
D = self.D
|
N = N0+N1+N2
|
||||||
self.X = np.random.randn(N,D)
|
self.D = 3
|
||||||
self.X2 = np.random.randn(N1,D)
|
self.X = np.random.randn(N, self.D+1)
|
||||||
#self.X_block = np.zeros((N+N1, D+D+1))
|
indices = np.random.random_integers(0, 2, size=N)
|
||||||
#self.X_block[0:N, 0:D] = self.X
|
self.X[indices==0, -1] = 0
|
||||||
#self.X_block[N:N+N1, D:D+D] = self.X2
|
self.X[indices==1, -1] = 1
|
||||||
#self.X_block[0:N, -1] = 0
|
self.X[indices==2, -1] = 2
|
||||||
#self.X_block[N:N+N1, -1] = 1
|
#self.X = self.X[self.X[:, -1].argsort(), :]
|
||||||
self.X_block = np.zeros((N+N1, D+1))
|
self.X2 = np.random.randn((N0+N1)*2, self.D+1)
|
||||||
self.X_block[0:N, 0:D] = self.X
|
self.X2[:(N0*2), -1] = 0
|
||||||
self.X_block[N:N+N1, 0:D] = self.X2
|
self.X2[(N0*2):, -1] = 1
|
||||||
self.X_block[0:N, -1] = 0
|
|
||||||
self.X_block[N:N+N1, -1] = 1
|
|
||||||
self.X_block = self.X_block[self.X_block.argsort(0)[:, -1], :]
|
|
||||||
|
|
||||||
def test_IndependentOutputs(self):
|
def test_IndependentOutputs(self):
|
||||||
k = GPy.kern.RBF(self.D)
|
k = GPy.kern.RBF(self.D)
|
||||||
kern = GPy.kern.IndependentOutputs(k, -1)
|
kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single')
|
||||||
self.assertTrue(check_kernel_gradient_functions(kern, X=self.X_block, verbose=verbose))
|
self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1))
|
||||||
|
k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(self.D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')]
|
||||||
|
kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split')
|
||||||
|
self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1))
|
||||||
|
|
||||||
|
def test_ODE_UY(self):
|
||||||
|
kern = GPy.kern.ODE_UY(2, active_dims=[0, self.D])
|
||||||
|
X = self.X[self.X[:,-1]!=2]
|
||||||
|
X2 = self.X2[self.X2[:,-1]!=2]
|
||||||
|
self.assertTrue(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print "Running unit tests, please be (very) patient..."
|
print "Running unit tests, please be (very) patient..."
|
||||||
unittest.main()
|
#unittest.main()
|
||||||
|
np.random.seed(0)
|
||||||
|
N0 = 3
|
||||||
|
N1 = 9
|
||||||
|
N2 = 4
|
||||||
|
N = N0+N1+N2
|
||||||
|
D = 3
|
||||||
|
X = np.random.randn(N, D+1)
|
||||||
|
indices = np.random.random_integers(0, 2, size=N)
|
||||||
|
X[indices==0, -1] = 0
|
||||||
|
X[indices==1, -1] = 1
|
||||||
|
X[indices==2, -1] = 2
|
||||||
|
#X = X[X[:, -1].argsort(), :]
|
||||||
|
X2 = np.random.randn((N0+N1)*2, D+1)
|
||||||
|
X2[:(N0*2), -1] = 0
|
||||||
|
X2[(N0*2):, -1] = 1
|
||||||
|
k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')]
|
||||||
|
kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split')
|
||||||
|
assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))
|
||||||
|
k = GPy.kern.RBF(D)
|
||||||
|
kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single')
|
||||||
|
assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))
|
||||||
|
|
|
||||||
|
|
@ -255,21 +255,21 @@ class TestNoiseModels(object):
|
||||||
"Y": self.binary_Y,
|
"Y": self.binary_Y,
|
||||||
"ep": False # FIXME: Should be True when we have it working again
|
"ep": False # FIXME: Should be True when we have it working again
|
||||||
},
|
},
|
||||||
#"Exponential_default": {
|
"Exponential_default": {
|
||||||
#"model": GPy.likelihoods.exponential(),
|
"model": GPy.likelihoods.Exponential(),
|
||||||
#"link_f_constraints": [constrain_positive],
|
"link_f_constraints": [constrain_positive],
|
||||||
#"Y": self.positive_Y,
|
"Y": self.positive_Y,
|
||||||
#"laplace": True,
|
"laplace": True,
|
||||||
#},
|
},
|
||||||
#"Poisson_default": {
|
"Poisson_default": {
|
||||||
#"model": GPy.likelihoods.poisson(),
|
"model": GPy.likelihoods.Poisson(),
|
||||||
#"link_f_constraints": [constrain_positive],
|
"link_f_constraints": [constrain_positive],
|
||||||
#"Y": self.integer_Y,
|
"Y": self.integer_Y,
|
||||||
#"laplace": True,
|
"laplace": True,
|
||||||
#"ep": False #Should work though...
|
"ep": False #Should work though...
|
||||||
#},
|
}#,
|
||||||
#"Gamma_default": {
|
#GAMMA needs some work!"Gamma_default": {
|
||||||
#"model": GPy.likelihoods.gamma(),
|
#"model": GPy.likelihoods.Gamma(),
|
||||||
#"link_f_constraints": [constrain_positive],
|
#"link_f_constraints": [constrain_positive],
|
||||||
#"Y": self.positive_Y,
|
#"Y": self.positive_Y,
|
||||||
#"laplace": True
|
#"laplace": True
|
||||||
|
|
@ -589,7 +589,8 @@ class LaplaceTests(unittest.TestCase):
|
||||||
|
|
||||||
self.var = np.random.rand(1)
|
self.var = np.random.rand(1)
|
||||||
self.stu_t = GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var)
|
self.stu_t = GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var)
|
||||||
self.gauss = GPy.likelihoods.Gaussian(gp_link=link_functions.Log(), variance=self.var)
|
#TODO: gaussians with on Identity link. self.gauss = GPy.likelihoods.Gaussian(gp_link=link_functions.Log(), variance=self.var)
|
||||||
|
self.gauss = GPy.likelihoods.Gaussian(variance=self.var)
|
||||||
|
|
||||||
#Make a bigger step as lower bound can be quite curved
|
#Make a bigger step as lower bound can be quite curved
|
||||||
self.step = 1e-6
|
self.step = 1e-6
|
||||||
|
|
@ -604,7 +605,6 @@ class LaplaceTests(unittest.TestCase):
|
||||||
def test_gaussian_d2logpdf_df2_2(self):
|
def test_gaussian_d2logpdf_df2_2(self):
|
||||||
print "\n{}".format(inspect.stack()[0][3])
|
print "\n{}".format(inspect.stack()[0][3])
|
||||||
self.Y = None
|
self.Y = None
|
||||||
self.gauss = None
|
|
||||||
|
|
||||||
self.N = 2
|
self.N = 2
|
||||||
self.D = 1
|
self.D = 1
|
||||||
|
|
@ -613,7 +613,6 @@ class LaplaceTests(unittest.TestCase):
|
||||||
noise = np.random.randn(*self.X.shape)*self.real_std
|
noise = np.random.randn(*self.X.shape)*self.real_std
|
||||||
self.Y = np.sin(self.X*2*np.pi) + noise
|
self.Y = np.sin(self.X*2*np.pi) + noise
|
||||||
self.f = np.random.rand(self.N, 1)
|
self.f = np.random.rand(self.N, 1)
|
||||||
self.gauss = GPy.likelihoods.Gaussian(variance=self.var)
|
|
||||||
|
|
||||||
dlogpdf_df = functools.partial(self.gauss.dlogpdf_df, y=self.Y)
|
dlogpdf_df = functools.partial(self.gauss.dlogpdf_df, y=self.Y)
|
||||||
d2logpdf_df2 = functools.partial(self.gauss.d2logpdf_df2, y=self.Y)
|
d2logpdf_df2 = functools.partial(self.gauss.d2logpdf_df2, y=self.Y)
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,60 @@ import unittest
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import GPy
|
import GPy
|
||||||
|
|
||||||
|
class MiscTests(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.N = 20
|
||||||
|
self.N_new = 50
|
||||||
|
self.D = 1
|
||||||
|
self.X = np.random.uniform(-3., 3., (self.N, 1))
|
||||||
|
self.Y = np.sin(self.X) + np.random.randn(self.N, self.D) * 0.05
|
||||||
|
self.X_new = np.random.uniform(-3., 3., (self.N_new, 1))
|
||||||
|
|
||||||
|
def test_raw_predict(self):
|
||||||
|
k = GPy.kern.RBF(1)
|
||||||
|
m = GPy.models.GPRegression(self.X, self.Y, kernel=k)
|
||||||
|
m.randomize()
|
||||||
|
Kinv = np.linalg.pinv(k.K(self.X) + np.eye(self.N)*m.Gaussian_noise.variance)
|
||||||
|
K_hat = k.K(self.X_new) - k.K(self.X_new, self.X).dot(Kinv).dot(k.K(self.X, self.X_new))
|
||||||
|
mu_hat = k.K(self.X_new, self.X).dot(Kinv).dot(self.Y)
|
||||||
|
|
||||||
|
mu, covar = m._raw_predict(self.X_new, full_cov=True)
|
||||||
|
self.assertEquals(mu.shape, (self.N_new, self.D))
|
||||||
|
self.assertEquals(covar.shape, (self.N_new, self.N_new))
|
||||||
|
np.testing.assert_almost_equal(K_hat, covar)
|
||||||
|
np.testing.assert_almost_equal(mu_hat, mu)
|
||||||
|
|
||||||
|
mu, var = m._raw_predict(self.X_new)
|
||||||
|
self.assertEquals(mu.shape, (self.N_new, self.D))
|
||||||
|
self.assertEquals(var.shape, (self.N_new, 1))
|
||||||
|
np.testing.assert_almost_equal(np.diag(K_hat)[:, None], var)
|
||||||
|
np.testing.assert_almost_equal(mu_hat, mu)
|
||||||
|
|
||||||
|
def test_sparse_raw_predict(self):
|
||||||
|
k = GPy.kern.RBF(1)
|
||||||
|
m = GPy.models.SparseGPRegression(self.X, self.Y, kernel=k)
|
||||||
|
m.randomize()
|
||||||
|
Z = m.Z[:]
|
||||||
|
X = self.X[:]
|
||||||
|
|
||||||
|
#Not easy to check if woodbury_inv is correct in itself as it requires a large derivation and expression
|
||||||
|
Kinv = m.posterior.woodbury_inv
|
||||||
|
K_hat = k.K(self.X_new) - k.K(self.X_new, Z).dot(Kinv).dot(k.K(Z, self.X_new))
|
||||||
|
|
||||||
|
mu, covar = m._raw_predict(self.X_new, full_cov=True)
|
||||||
|
self.assertEquals(mu.shape, (self.N_new, self.D))
|
||||||
|
self.assertEquals(covar.shape, (self.N_new, self.N_new))
|
||||||
|
np.testing.assert_almost_equal(K_hat, covar)
|
||||||
|
#np.testing.assert_almost_equal(mu_hat, mu)
|
||||||
|
|
||||||
|
mu, var = m._raw_predict(self.X_new)
|
||||||
|
self.assertEquals(mu.shape, (self.N_new, self.D))
|
||||||
|
self.assertEquals(var.shape, (self.N_new, 1))
|
||||||
|
np.testing.assert_almost_equal(np.diag(K_hat)[:, None], var)
|
||||||
|
#np.testing.assert_almost_equal(mu_hat, mu)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class GradientTests(unittest.TestCase):
|
class GradientTests(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
######################################
|
######################################
|
||||||
|
|
@ -198,6 +252,7 @@ class GradientTests(unittest.TestCase):
|
||||||
m = GPy.models.GPLVM(Y, input_dim, init='PCA', kernel=k)
|
m = GPy.models.GPLVM(Y, input_dim, init='PCA', kernel=k)
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
|
@unittest.expectedFailure
|
||||||
def test_GP_EP_probit(self):
|
def test_GP_EP_probit(self):
|
||||||
N = 20
|
N = 20
|
||||||
X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None]
|
X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None]
|
||||||
|
|
@ -207,6 +262,7 @@ class GradientTests(unittest.TestCase):
|
||||||
m.update_likelihood_approximation()
|
m.update_likelihood_approximation()
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
|
@unittest.expectedFailure
|
||||||
def test_sparse_EP_DTC_probit(self):
|
def test_sparse_EP_DTC_probit(self):
|
||||||
N = 20
|
N = 20
|
||||||
X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None]
|
X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None]
|
||||||
|
|
@ -221,6 +277,7 @@ class GradientTests(unittest.TestCase):
|
||||||
m.update_likelihood_approximation()
|
m.update_likelihood_approximation()
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
|
@unittest.expectedFailure
|
||||||
def test_generalized_FITC(self):
|
def test_generalized_FITC(self):
|
||||||
N = 20
|
N = 20
|
||||||
X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None]
|
X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None]
|
||||||
|
|
@ -8,7 +8,7 @@ from GPy.core.parameterization.parameterized import Parameterized
|
||||||
from GPy.core.parameterization.param import Param
|
from GPy.core.parameterization.param import Param
|
||||||
import numpy
|
import numpy
|
||||||
|
|
||||||
# One trigger in init
|
# One trigger in init
|
||||||
_trigger_start = -1
|
_trigger_start = -1
|
||||||
|
|
||||||
class ParamTestParent(Parameterized):
|
class ParamTestParent(Parameterized):
|
||||||
|
|
@ -21,11 +21,9 @@ class ParameterizedTest(Parameterized):
|
||||||
params_changed_count = _trigger_start
|
params_changed_count = _trigger_start
|
||||||
def parameters_changed(self):
|
def parameters_changed(self):
|
||||||
self.params_changed_count += 1
|
self.params_changed_count += 1
|
||||||
def _set_params(self, params, trigger_parent=True):
|
|
||||||
Parameterized._set_params(self, params, trigger_parent=trigger_parent)
|
|
||||||
|
|
||||||
class Test(unittest.TestCase):
|
class Test(unittest.TestCase):
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.parent = ParamTestParent('test parent')
|
self.parent = ParamTestParent('test parent')
|
||||||
self.par = ParameterizedTest('test model')
|
self.par = ParameterizedTest('test model')
|
||||||
|
|
@ -41,12 +39,12 @@ class Test(unittest.TestCase):
|
||||||
|
|
||||||
self.parent.add_parameter(self.par)
|
self.parent.add_parameter(self.par)
|
||||||
self.parent.add_parameter(self.par2)
|
self.parent.add_parameter(self.par2)
|
||||||
|
|
||||||
self._observer_triggered = None
|
self._observer_triggered = None
|
||||||
self._trigger_count = 0
|
self._trigger_count = 0
|
||||||
self._first = None
|
self._first = None
|
||||||
self._second = None
|
self._second = None
|
||||||
|
|
||||||
def _trigger(self, which):
|
def _trigger(self, which):
|
||||||
self._observer_triggered = float(which)
|
self._observer_triggered = float(which)
|
||||||
self._trigger_count += 1
|
self._trigger_count += 1
|
||||||
|
|
@ -54,18 +52,18 @@ class Test(unittest.TestCase):
|
||||||
self._second = self._trigger
|
self._second = self._trigger
|
||||||
else:
|
else:
|
||||||
self._first = self._trigger
|
self._first = self._trigger
|
||||||
|
|
||||||
def _trigger_priority(self, which):
|
def _trigger_priority(self, which):
|
||||||
if self._first is not None:
|
if self._first is not None:
|
||||||
self._second = self._trigger_priority
|
self._second = self._trigger_priority
|
||||||
else:
|
else:
|
||||||
self._first = self._trigger_priority
|
self._first = self._trigger_priority
|
||||||
|
|
||||||
def test_observable(self):
|
def test_observable(self):
|
||||||
self.par.add_observer(self, self._trigger, -1)
|
self.par.add_observer(self, self._trigger, -1)
|
||||||
self.assertEqual(self.par.params_changed_count, 0, 'no params changed yet')
|
self.assertEqual(self.par.params_changed_count, 0, 'no params changed yet')
|
||||||
self.assertEqual(self.par.params_changed_count, self.parent.parent_changed_count, 'parent should be triggered as often as param')
|
self.assertEqual(self.par.params_changed_count, self.parent.parent_changed_count, 'parent should be triggered as often as param')
|
||||||
|
|
||||||
self.p[0,1] = 3 # trigger observers
|
self.p[0,1] = 3 # trigger observers
|
||||||
self.assertEqual(self._observer_triggered, 3, 'observer should have triggered')
|
self.assertEqual(self._observer_triggered, 3, 'observer should have triggered')
|
||||||
self.assertEqual(self._trigger_count, 1, 'observer should have triggered once')
|
self.assertEqual(self._trigger_count, 1, 'observer should have triggered once')
|
||||||
|
|
@ -78,14 +76,14 @@ class Test(unittest.TestCase):
|
||||||
self.assertEqual(self._trigger_count, 1, 'observer should have triggered once')
|
self.assertEqual(self._trigger_count, 1, 'observer should have triggered once')
|
||||||
self.assertEqual(self.par.params_changed_count, 2, 'params changed second')
|
self.assertEqual(self.par.params_changed_count, 2, 'params changed second')
|
||||||
self.assertEqual(self.par.params_changed_count, self.parent.parent_changed_count, 'parent should be triggered as often as param')
|
self.assertEqual(self.par.params_changed_count, self.parent.parent_changed_count, 'parent should be triggered as often as param')
|
||||||
|
|
||||||
self.par.add_observer(self, self._trigger, -1)
|
self.par.add_observer(self, self._trigger, -1)
|
||||||
self.p[2,1] = 4
|
self.p[2,1] = 4
|
||||||
self.assertEqual(self._observer_triggered, 4, 'observer should have triggered')
|
self.assertEqual(self._observer_triggered, 4, 'observer should have triggered')
|
||||||
self.assertEqual(self._trigger_count, 2, 'observer should have triggered once')
|
self.assertEqual(self._trigger_count, 2, 'observer should have triggered once')
|
||||||
self.assertEqual(self.par.params_changed_count, 3, 'params changed second')
|
self.assertEqual(self.par.params_changed_count, 3, 'params changed second')
|
||||||
self.assertEqual(self.par.params_changed_count, self.parent.parent_changed_count, 'parent should be triggered as often as param')
|
self.assertEqual(self.par.params_changed_count, self.parent.parent_changed_count, 'parent should be triggered as often as param')
|
||||||
|
|
||||||
self.par.remove_observer(self, self._trigger)
|
self.par.remove_observer(self, self._trigger)
|
||||||
self.p[0,1] = 3
|
self.p[0,1] = 3
|
||||||
self.assertEqual(self._observer_triggered, 4, 'observer should not have triggered')
|
self.assertEqual(self._observer_triggered, 4, 'observer should not have triggered')
|
||||||
|
|
@ -99,7 +97,7 @@ class Test(unittest.TestCase):
|
||||||
self.par._trigger_params_changed()
|
self.par._trigger_params_changed()
|
||||||
self.assertEqual(self.par.params_changed_count, 1, 'now params changed')
|
self.assertEqual(self.par.params_changed_count, 1, 'now params changed')
|
||||||
self.assertEqual(self.parent.parent_changed_count, self.par.params_changed_count)
|
self.assertEqual(self.parent.parent_changed_count, self.par.params_changed_count)
|
||||||
|
|
||||||
self.par._param_array_[:] = 2
|
self.par._param_array_[:] = 2
|
||||||
self.par._trigger_params_changed()
|
self.par._trigger_params_changed()
|
||||||
self.assertEqual(self.par.params_changed_count, 2, 'now params changed')
|
self.assertEqual(self.par.params_changed_count, 2, 'now params changed')
|
||||||
|
|
@ -125,13 +123,13 @@ class Test(unittest.TestCase):
|
||||||
|
|
||||||
self.par.remove_observer(self)
|
self.par.remove_observer(self)
|
||||||
self._first = self._second = None
|
self._first = self._second = None
|
||||||
|
|
||||||
self.par.add_observer(self, self._trigger, 1)
|
self.par.add_observer(self, self._trigger, 1)
|
||||||
self.par.add_observer(self, self._trigger_priority, 0)
|
self.par.add_observer(self, self._trigger_priority, 0)
|
||||||
self.par.notify_observers(0)
|
self.par.notify_observers(0)
|
||||||
self.assertEqual(self._first, self._trigger, 'priority should be second')
|
self.assertEqual(self._first, self._trigger, 'priority should be second')
|
||||||
self.assertEqual(self._second, self._trigger_priority, 'priority should be second')
|
self.assertEqual(self._second, self._trigger_priority, 'priority should be second')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
#import sys;sys.argv = ['', 'Test.testName']
|
#import sys;sys.argv = ['', 'Test.testName']
|
||||||
|
|
|
||||||
|
|
@ -7,16 +7,16 @@ import unittest
|
||||||
import GPy
|
import GPy
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from GPy.core.parameterization.parameter_core import HierarchyError
|
from GPy.core.parameterization.parameter_core import HierarchyError
|
||||||
from GPy.core.parameterization.array_core import ObservableArray
|
from GPy.core.parameterization.array_core import ObsAr
|
||||||
|
|
||||||
class ArrayCoreTest(unittest.TestCase):
|
class ArrayCoreTest(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.X = np.random.normal(1,1, size=(100,10))
|
self.X = np.random.normal(1,1, size=(100,10))
|
||||||
self.obsX = ObservableArray(self.X)
|
self.obsX = ObsAr(self.X)
|
||||||
|
|
||||||
def test_init(self):
|
def test_init(self):
|
||||||
X = ObservableArray(self.X)
|
X = ObsAr(self.X)
|
||||||
X2 = ObservableArray(X)
|
X2 = ObsAr(X)
|
||||||
self.assertIs(X, X2, "no new Observable array, when Observable is given")
|
self.assertIs(X, X2, "no new Observable array, when Observable is given")
|
||||||
|
|
||||||
def test_slice(self):
|
def test_slice(self):
|
||||||
|
|
@ -34,9 +34,9 @@ class ParameterizedTest(unittest.TestCase):
|
||||||
self.param = Param('param', np.random.rand(25,2), Logistic(0, 1))
|
self.param = Param('param', np.random.rand(25,2), Logistic(0, 1))
|
||||||
|
|
||||||
self.test1 = GPy.core.Parameterized("test model")
|
self.test1 = GPy.core.Parameterized("test model")
|
||||||
self.test1.add_parameter(self.white)
|
self.test1.kern = self.rbf+self.white
|
||||||
self.test1.add_parameter(self.rbf, 0)
|
self.test1.add_parameter(self.test1.kern)
|
||||||
self.test1.add_parameter(self.param)
|
self.test1.add_parameter(self.param, 0)
|
||||||
|
|
||||||
x = np.linspace(-2,6,4)[:,None]
|
x = np.linspace(-2,6,4)[:,None]
|
||||||
y = np.sin(x)
|
y = np.sin(x)
|
||||||
|
|
@ -45,22 +45,24 @@ class ParameterizedTest(unittest.TestCase):
|
||||||
def test_add_parameter(self):
|
def test_add_parameter(self):
|
||||||
self.assertEquals(self.rbf._parent_index_, 0)
|
self.assertEquals(self.rbf._parent_index_, 0)
|
||||||
self.assertEquals(self.white._parent_index_, 1)
|
self.assertEquals(self.white._parent_index_, 1)
|
||||||
|
self.assertEquals(self.param._parent_index_, 0)
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def test_fixes(self):
|
def test_fixes(self):
|
||||||
self.white.fix(warning=False)
|
self.white.fix(warning=False)
|
||||||
self.test1.remove_parameter(self.test1.param)
|
self.test1.remove_parameter(self.param)
|
||||||
self.assertTrue(self.test1._has_fixes())
|
self.assertTrue(self.test1._has_fixes())
|
||||||
from GPy.core.parameterization.transformations import FIXED, UNFIXED
|
from GPy.core.parameterization.transformations import FIXED, UNFIXED
|
||||||
self.assertListEqual(self.test1._fixes_.tolist(),[UNFIXED,UNFIXED,FIXED])
|
self.assertListEqual(self.test1._fixes_.tolist(),[UNFIXED,UNFIXED,FIXED])
|
||||||
|
self.test1.kern.add_parameter(self.white, 0)
|
||||||
self.test1.add_parameter(self.white, 0)
|
|
||||||
self.assertListEqual(self.test1._fixes_.tolist(),[FIXED,UNFIXED,UNFIXED])
|
self.assertListEqual(self.test1._fixes_.tolist(),[FIXED,UNFIXED,UNFIXED])
|
||||||
|
self.test1.kern.rbf.fix()
|
||||||
|
self.assertListEqual(self.test1._fixes_.tolist(),[FIXED]*3)
|
||||||
|
|
||||||
def test_remove_parameter(self):
|
def test_remove_parameter(self):
|
||||||
from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__, Logexp
|
from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__, Logexp
|
||||||
self.white.fix()
|
self.white.fix()
|
||||||
self.test1.remove_parameter(self.white)
|
self.test1.kern.remove_parameter(self.white)
|
||||||
self.assertIs(self.test1._fixes_,None)
|
self.assertIs(self.test1._fixes_,None)
|
||||||
|
|
||||||
self.assertListEqual(self.white._fixes_.tolist(), [FIXED])
|
self.assertListEqual(self.white._fixes_.tolist(), [FIXED])
|
||||||
|
|
@ -81,7 +83,12 @@ class ParameterizedTest(unittest.TestCase):
|
||||||
self.assertListEqual(self.white._fixes_.tolist(), [FIXED])
|
self.assertListEqual(self.white._fixes_.tolist(), [FIXED])
|
||||||
self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
|
self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
|
||||||
self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops)
|
self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops)
|
||||||
self.assertListEqual(self.test1.constraints[Logexp()].tolist(), [0,1])
|
self.assertListEqual(self.test1.constraints[Logexp()].tolist(), range(self.param.size, self.param.size+self.rbf.size))
|
||||||
|
|
||||||
|
def test_remove_parameter_param_array_grad_array(self):
|
||||||
|
val = self.test1.kern._param_array_.copy()
|
||||||
|
self.test1.kern.remove_parameter(self.white)
|
||||||
|
self.assertListEqual(self.test1.kern._param_array_.tolist(), val[:2].tolist())
|
||||||
|
|
||||||
def test_add_parameter_already_in_hirarchy(self):
|
def test_add_parameter_already_in_hirarchy(self):
|
||||||
self.assertRaises(HierarchyError, self.test1.add_parameter, self.white._parameters_[0])
|
self.assertRaises(HierarchyError, self.test1.add_parameter, self.white._parameters_[0])
|
||||||
|
|
@ -91,34 +98,51 @@ class ParameterizedTest(unittest.TestCase):
|
||||||
self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
|
self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
|
||||||
self.assertListEqual(self.rbf.constraints.indices()[0].tolist(), range(2))
|
self.assertListEqual(self.rbf.constraints.indices()[0].tolist(), range(2))
|
||||||
from GPy.core.parameterization.transformations import Logexp
|
from GPy.core.parameterization.transformations import Logexp
|
||||||
kern = self.rbf+self.white
|
kern = self.test1.kern
|
||||||
|
self.test1.remove_parameter(kern)
|
||||||
self.assertListEqual(kern.constraints[Logexp()].tolist(), range(3))
|
self.assertListEqual(kern.constraints[Logexp()].tolist(), range(3))
|
||||||
|
|
||||||
def test_constraints(self):
|
def test_constraints(self):
|
||||||
self.rbf.constrain(GPy.transformations.Square(), False)
|
self.rbf.constrain(GPy.transformations.Square(), False)
|
||||||
self.assertListEqual(self.test1.constraints[GPy.transformations.Square()].tolist(), range(2))
|
self.assertListEqual(self.test1.constraints[GPy.transformations.Square()].tolist(), range(self.param.size, self.param.size+self.rbf.size))
|
||||||
self.assertListEqual(self.test1.constraints[GPy.transformations.Logexp()].tolist(), [2])
|
self.assertListEqual(self.test1.constraints[GPy.transformations.Logexp()].tolist(), [self.param.size+self.rbf.size])
|
||||||
|
|
||||||
self.test1.remove_parameter(self.rbf)
|
self.test1.kern.remove_parameter(self.rbf)
|
||||||
self.assertListEqual(self.test1.constraints[GPy.transformations.Square()].tolist(), [])
|
self.assertListEqual(self.test1.constraints[GPy.transformations.Square()].tolist(), [])
|
||||||
|
|
||||||
def test_constraints_views(self):
|
def test_constraints_views(self):
|
||||||
self.assertEqual(self.white.constraints._offset, 2)
|
self.assertEqual(self.white.constraints._offset, self.param.size+self.rbf.size)
|
||||||
self.assertEqual(self.rbf.constraints._offset, 0)
|
self.assertEqual(self.rbf.constraints._offset, self.param.size)
|
||||||
self.assertEqual(self.param.constraints._offset, 3)
|
self.assertEqual(self.param.constraints._offset, 0)
|
||||||
|
|
||||||
def test_fixing_randomize(self):
|
def test_fixing_randomize(self):
|
||||||
self.white.fix(warning=False)
|
self.white.fix(warning=True)
|
||||||
val = float(self.test1.white.variance)
|
val = float(self.white.variance)
|
||||||
self.test1.randomize()
|
self.test1.randomize()
|
||||||
self.assertEqual(val, self.white.variance)
|
self.assertEqual(val, self.white.variance)
|
||||||
|
|
||||||
|
def test_randomize(self):
|
||||||
|
ps = self.test1.param.view(np.ndarray).copy()
|
||||||
|
self.test1.param.randomize()
|
||||||
|
self.assertFalse(np.all(ps==self.test1.param))
|
||||||
|
|
||||||
|
def test_fixing_randomize_parameter_handling(self):
|
||||||
|
self.rbf.fix(warning=True)
|
||||||
|
val = float(self.rbf.variance)
|
||||||
|
self.test1.kern.randomize()
|
||||||
|
self.assertEqual(val, self.rbf.variance)
|
||||||
|
|
||||||
def test_fixing_optimize(self):
|
def test_fixing_optimize(self):
|
||||||
self.testmodel.kern.lengthscale.fix()
|
self.testmodel.kern.lengthscale.fix()
|
||||||
val = float(self.testmodel.kern.lengthscale)
|
val = float(self.testmodel.kern.lengthscale)
|
||||||
self.testmodel.randomize()
|
self.testmodel.randomize()
|
||||||
self.assertEqual(val, self.testmodel.kern.lengthscale)
|
self.assertEqual(val, self.testmodel.kern.lengthscale)
|
||||||
|
|
||||||
|
def test_printing(self):
|
||||||
|
print self.test1
|
||||||
|
print self.param
|
||||||
|
print self.test1['']
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
#import sys;sys.argv = ['', 'Test.test_add_parameter']
|
#import sys;sys.argv = ['', 'Test.test_add_parameter']
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
@ -56,8 +56,6 @@ def ICM(input_dim, num_outputs, kernel, W_rank=1,W=None,kappa=None,name='X'):
|
||||||
warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")
|
warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")
|
||||||
|
|
||||||
K = kernel.prod(GPy.kern.Coregionalize(1, num_outputs, active_dims=[input_dim], rank=W_rank,W=W,kappa=kappa,name='B'),name=name)
|
K = kernel.prod(GPy.kern.Coregionalize(1, num_outputs, active_dims=[input_dim], rank=W_rank,W=W,kappa=kappa,name='B'),name=name)
|
||||||
#K = kernel * GPy.kern.Coregionalize(1, num_outputs, active_dims=[input_dim], rank=W_rank,W=W,kappa=kappa,name='B')
|
|
||||||
#K = kernel ** GPy.kern.Coregionalize(input_dim, num_outputs,W_rank,W,kappa, name= 'B')
|
|
||||||
K['.*variance'] = 1.
|
K['.*variance'] = 1.
|
||||||
K['.*variance'].fix()
|
K['.*variance'].fix()
|
||||||
return K
|
return K
|
||||||
|
|
|
||||||
4
setup.py
4
setup.py
|
|
@ -18,7 +18,7 @@ setup(name = 'GPy',
|
||||||
license = "BSD 3-clause",
|
license = "BSD 3-clause",
|
||||||
keywords = "machine-learning gaussian-processes kernels",
|
keywords = "machine-learning gaussian-processes kernels",
|
||||||
url = "http://sheffieldml.github.com/GPy/",
|
url = "http://sheffieldml.github.com/GPy/",
|
||||||
packages = ['GPy', 'GPy.core', 'GPy.kern', 'GPy.util', 'GPy.models', 'GPy.inference', 'GPy.examples', 'GPy.likelihoods', 'GPy.testing', 'GPy.util.latent_space_visualizations', 'GPy.util.latent_space_visualizations.controllers', 'GPy.likelihoods.noise_models', 'GPy.kern.parts', 'GPy.mappings'],
|
packages = ["GPy.models", "GPy.inference.optimization", "GPy.inference", "GPy.inference.latent_function_inference", "GPy.likelihoods", "GPy.mappings", "GPy.examples", "GPy.core.parameterization", "GPy.core", "GPy.testing", "GPy", "GPy.util", "GPy.kern", "GPy.kern._src.psi_comp", "GPy.kern._src", "GPy.plotting.matplot_dep.latent_space_visualizations.controllers", "GPy.plotting.matplot_dep.latent_space_visualizations", "GPy.plotting.matplot_dep", "GPy.plotting"],
|
||||||
package_dir={'GPy': 'GPy'},
|
package_dir={'GPy': 'GPy'},
|
||||||
package_data = {'GPy': ['GPy/examples']},
|
package_data = {'GPy': ['GPy/examples']},
|
||||||
py_modules = ['GPy.__init__'],
|
py_modules = ['GPy.__init__'],
|
||||||
|
|
@ -29,6 +29,4 @@ setup(name = 'GPy',
|
||||||
},
|
},
|
||||||
classifiers=[
|
classifiers=[
|
||||||
"License :: OSI Approved :: BSD License"],
|
"License :: OSI Approved :: BSD License"],
|
||||||
#ext_modules = [Extension(name = 'GPy.kern.lfmUpsilonf2py',
|
|
||||||
# sources = ['GPy/kern/src/lfmUpsilonf2py.f90'])],
|
|
||||||
)
|
)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue