Merge branch 'params' of github.com:SheffieldML/GPy into params

This commit is contained in:
Alan Saul 2014-02-24 11:35:45 +00:00
commit 632a702532
78 changed files with 2892 additions and 3760 deletions

View file

@ -2,7 +2,9 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from model import * from model import *
from parameterization.parameterized import * from parameterization.parameterized import adjust_name_for_printing, Parameterizable
from parameterization.param import Param, ParamConcatenation
from gp import GP from gp import GP
from sparse_gp import SparseGP from sparse_gp import SparseGP
from svigp import SVIGP from svigp import SVIGP

View file

@ -30,7 +30,10 @@ class GP(Model):
super(GP, self).__init__(name) super(GP, self).__init__(name)
assert X.ndim == 2 assert X.ndim == 2
self.X = ObservableArray(X) if isinstance(X, ObservableArray):
self.X = self.X = X
else: self.X = ObservableArray(X)
self.num_data, self.input_dim = self.X.shape self.num_data, self.input_dim = self.X.shape
assert Y.ndim == 2 assert Y.ndim == 2
@ -43,7 +46,8 @@ class GP(Model):
else: else:
self.Y_metadata = None self.Y_metadata = None
assert isinstance(kernel, kern.kern) assert isinstance(kernel, kern.Kern)
assert self.input_dim == kernel.input_dim
self.kern = kernel self.kern = kernel
assert isinstance(likelihood, likelihoods.Likelihood) assert isinstance(likelihood, likelihoods.Likelihood)
@ -70,7 +74,7 @@ class GP(Model):
def log_likelihood(self): def log_likelihood(self):
return self._log_marginal_likelihood return self._log_marginal_likelihood
def _raw_predict(self, _Xnew, which_parts='all', full_cov=False, stop=False): def _raw_predict(self, _Xnew, full_cov=False):
""" """
Internal helper function for making predictions, does not account Internal helper function for making predictions, does not account
for normalization or likelihood for normalization or likelihood
@ -80,29 +84,27 @@ class GP(Model):
diagonal of the covariance is returned. diagonal of the covariance is returned.
""" """
Kx = self.kern.K(_Xnew, self.X, which_parts=which_parts).T Kx = self.kern.K(_Xnew, self.X).T
#LiKx, _ = dtrtrs(self.posterior.woodbury_chol, np.asfortranarray(Kx), lower=1) #LiKx, _ = dtrtrs(self.posterior.woodbury_chol, np.asfortranarray(Kx), lower=1)
WiKx = np.dot(self.posterior.woodbury_inv, Kx) WiKx = np.dot(self.posterior.woodbury_inv, Kx)
mu = np.dot(Kx.T, self.posterior.woodbury_vector) mu = np.dot(Kx.T, self.posterior.woodbury_vector)
if full_cov: if full_cov:
Kxx = self.kern.K(_Xnew, which_parts=which_parts) Kxx = self.kern.K(_Xnew)
#var = Kxx - tdot(LiKx.T) #var = Kxx - tdot(LiKx.T)
var = np.dot(Kx.T, WiKx) var = np.dot(Kx.T, WiKx)
else: else:
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts) Kxx = self.kern.Kdiag(_Xnew)
#var = Kxx - np.sum(LiKx*LiKx, 0) #var = Kxx - np.sum(LiKx*LiKx, 0)
var = Kxx - np.sum(WiKx*Kx, 0) var = Kxx - np.sum(WiKx*Kx, 0)
var = var.reshape(-1, 1) var = var.reshape(-1, 1)
return mu, var return mu, var
def predict(self, Xnew, which_parts='all', full_cov=False, **likelihood_args): def predict(self, Xnew, full_cov=False, **likelihood_args):
""" """
Predict the function(s) at the new point(s) Xnew. Predict the function(s) at the new point(s) Xnew.
:param Xnew: The points at which to make a prediction :param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim :type Xnew: np.ndarray, Nnew x self.input_dim
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the full covariance matrix, or just :param full_cov: whether to return the full covariance matrix, or just
the diagonal the diagonal
:type full_cov: bool :type full_cov: bool
@ -118,13 +120,13 @@ class GP(Model):
""" """
#predict the latent function values #predict the latent function values
mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts) mu, var = self._raw_predict(Xnew, full_cov=full_cov)
# now push through likelihood # now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, **likelihood_args) mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, **likelihood_args)
return mean, var, _025pm, _975pm return mean, var, _025pm, _975pm
def posterior_samples_f(self,X,size=10,which_parts='all',full_cov=True): def posterior_samples_f(self,X,size=10, full_cov=True):
""" """
Samples the posterior GP at the points X. Samples the posterior GP at the points X.
@ -132,13 +134,11 @@ class GP(Model):
:type X: np.ndarray, Nnew x self.input_dim. :type X: np.ndarray, Nnew x self.input_dim.
:param size: the number of a posteriori samples. :param size: the number of a posteriori samples.
:type size: int. :type size: int.
:param which_parts: which of the kernel functions to use (additively).
:type which_parts: 'all', or list of bools.
:param full_cov: whether to return the full covariance matrix, or just the diagonal. :param full_cov: whether to return the full covariance matrix, or just the diagonal.
:type full_cov: bool. :type full_cov: bool.
:returns: Ysim: set of simulations, a Numpy array (N x samples). :returns: Ysim: set of simulations, a Numpy array (N x samples).
""" """
m, v = self._raw_predict(X, which_parts=which_parts, full_cov=full_cov) m, v = self._raw_predict(X, full_cov=full_cov)
v = v.reshape(m.size,-1) if len(v.shape)==3 else v v = v.reshape(m.size,-1) if len(v.shape)==3 else v
if not full_cov: if not full_cov:
Ysim = np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T Ysim = np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T
@ -147,7 +147,7 @@ class GP(Model):
return Ysim return Ysim
def posterior_samples(self,X,size=10,which_parts='all',full_cov=True,noise_model=None): def posterior_samples(self,X,size=10, full_cov=True,noise_model=None):
""" """
Samples the posterior GP at the points X. Samples the posterior GP at the points X.
@ -155,15 +155,13 @@ class GP(Model):
:type X: np.ndarray, Nnew x self.input_dim. :type X: np.ndarray, Nnew x self.input_dim.
:param size: the number of a posteriori samples. :param size: the number of a posteriori samples.
:type size: int. :type size: int.
:param which_parts: which of the kernel functions to use (additively).
:type which_parts: 'all', or list of bools.
:param full_cov: whether to return the full covariance matrix, or just the diagonal. :param full_cov: whether to return the full covariance matrix, or just the diagonal.
:type full_cov: bool. :type full_cov: bool.
:param noise_model: for mixed noise likelihood, the noise model to use in the samples. :param noise_model: for mixed noise likelihood, the noise model to use in the samples.
:type noise_model: integer. :type noise_model: integer.
:returns: Ysim: set of simulations, a Numpy array (N x samples). :returns: Ysim: set of simulations, a Numpy array (N x samples).
""" """
Ysim = self.posterior_samples_f(X, size, which_parts=which_parts, full_cov=full_cov) Ysim = self.posterior_samples_f(X, size, full_cov=full_cov)
if isinstance(self.likelihood, Gaussian): if isinstance(self.likelihood, Gaussian):
noise_std = np.sqrt(self.likelihood._get_params()) noise_std = np.sqrt(self.likelihood._get_params())
Ysim += np.random.normal(0,noise_std,Ysim.shape) Ysim += np.random.normal(0,noise_std,Ysim.shape)

View file

@ -4,12 +4,8 @@
from .. import likelihoods from .. import likelihoods
from ..inference import optimization from ..inference import optimization
from ..util.linalg import jitchol
from ..util.misc import opt_wrapper from ..util.misc import opt_wrapper
from parameterization import Parameterized from parameterization import Parameterized
from parameterization.parameterized import UNFIXED
from parameterization.domains import _POSITIVE, _REAL
from parameterization.index_operations import ParameterIndexOperations
import multiprocessing as mp import multiprocessing as mp
import numpy as np import numpy as np
from numpy.linalg.linalg import LinAlgError from numpy.linalg.linalg import LinAlgError
@ -240,7 +236,7 @@ class Model(Parameterized):
constrained positive. constrained positive.
""" """
raise DeprecationWarning, 'parameters now have default constraints' raise DeprecationWarning, 'parameters now have default constraints'
positive_strings = ['variance', 'lengthscale', 'precision', 'kappa', 'sensitivity'] #positive_strings = ['variance', 'lengthscale', 'precision', 'kappa', 'sensitivity']
# param_names = self._get_param_names() # param_names = self._get_param_names()
# for s in positive_strings: # for s in positive_strings:
@ -489,20 +485,17 @@ class Model(Parameterized):
if not hasattr(self, 'kern'): if not hasattr(self, 'kern'):
raise ValueError, "this model has no kernel" raise ValueError, "this model has no kernel"
k = [p for p in self.kern._parameters_ if hasattr(p, "ARD") and p.ARD] k = self.kern#[p for p in self.kern._parameters_ if hasattr(p, "ARD") and p.ARD]
if (not len(k) == 1): from ..kern import RBF, Linear#, RBFInv
raise ValueError, "cannot determine sensitivity for this kernel"
k = k[0]
from ..kern.parts.rbf import RBF
from ..kern.parts.rbf_inv import RBFInv
from ..kern.parts.linear import Linear
if isinstance(k, RBF): if isinstance(k, RBF):
return 1. / k.lengthscale return 1. / k.lengthscale
elif isinstance(k, RBFInv): #elif isinstance(k, RBFInv):
return k.inv_lengthscale # return k.inv_lengthscale
elif isinstance(k, Linear): elif isinstance(k, Linear):
return k.variances return k.variances
else:
raise ValueError, "cannot determine sensitivity for this kernel"
def pseudo_EM(self, stop_crit=.1, **kwargs): def pseudo_EM(self, stop_crit=.1, **kwargs):
""" """

View file

@ -28,14 +28,20 @@ class ObservableArray(np.ndarray, Observable):
""" """
__array_priority__ = -1 # Never give back ObservableArray __array_priority__ = -1 # Never give back ObservableArray
def __new__(cls, input_array): def __new__(cls, input_array):
if not isinstance(input_array, ObservableArray):
obj = np.atleast_1d(input_array).view(cls) obj = np.atleast_1d(input_array).view(cls)
else: obj = input_array
cls.__name__ = "ObservableArray\n " cls.__name__ = "ObservableArray\n "
obj._observers_ = {}
return obj return obj
def __init__(self, *a, **kw):
super(ObservableArray, self).__init__(*a, **kw)
def __array_finalize__(self, obj): def __array_finalize__(self, obj):
# see InfoArray.__array_finalize__ for comments # see InfoArray.__array_finalize__ for comments
if obj is None: return if obj is None: return
self._observers_ = getattr(obj, '_observers_', None) self._observer_callables_ = getattr(obj, '_observer_callables_', None)
def __array_wrap__(self, out_arr, context=None): def __array_wrap__(self, out_arr, context=None):
return out_arr.view(np.ndarray) return out_arr.view(np.ndarray)

View file

@ -83,12 +83,22 @@ class ParameterIndexOperations(object):
def iterproperties(self): def iterproperties(self):
return self._properties.iterkeys() return self._properties.iterkeys()
def shift(self, start, size): def shift_right(self, start, size):
for ind in self.iterindices(): for ind in self.iterindices():
toshift = ind>=start toshift = ind>=start
if toshift.size > 0:
ind[toshift] += size ind[toshift] += size
def shift_left(self, start, size):
for v, ind in self.items():
todelete = (ind>=start) * (ind<start+size)
if todelete.size != 0:
ind = ind[~todelete]
toshift = ind>=start
if toshift.size != 0:
ind[toshift] -= size
if ind.size != 0: self._properties[v] = ind
else: del self._properties[v]
def clear(self): def clear(self):
self._properties.clear() self._properties.clear()
@ -183,7 +193,7 @@ class ParameterIndexOperationsView(object):
yield i yield i
def shift(self, start, size): def shift_right(self, start, size):
raise NotImplementedError, 'Shifting only supported in original ParamIndexOperations' raise NotImplementedError, 'Shifting only supported in original ParamIndexOperations'

View file

@ -3,7 +3,7 @@
import itertools import itertools
import numpy import numpy
from parameter_core import Constrainable, Gradcheckable, Indexable, Parameterizable, adjust_name_for_printing from parameter_core import Constrainable, Gradcheckable, Indexable, Parentable, adjust_name_for_printing
from array_core import ObservableArray, ParamList from array_core import ObservableArray, ParamList
###### printing ###### printing
@ -15,7 +15,7 @@ __precision__ = numpy.get_printoptions()['precision'] # numpy printing precision
__print_threshold__ = 5 __print_threshold__ = 5
###### ######
class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameterizable): class Param(Constrainable, ObservableArray, Gradcheckable, Indexable):
""" """
Parameter object for GPy models. Parameter object for GPy models.
@ -54,11 +54,11 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
obj._tied_to_me_ = SetDict() obj._tied_to_me_ = SetDict()
obj._tied_to_ = [] obj._tied_to_ = []
obj._original_ = True obj._original_ = True
obj.gradient = None obj._gradient_ = None
return obj return obj
def __init__(self, name, input_array, default_constraint=None): def __init__(self, name, input_array, default_constraint=None, *a, **kw):
super(Param, self).__init__(name=name, default_constraint=default_constraint) super(Param, self).__init__(name=name, default_constraint=default_constraint, *a, **kw)
def __array_finalize__(self, obj): def __array_finalize__(self, obj):
# see InfoArray.__array_finalize__ for comments # see InfoArray.__array_finalize__ for comments
@ -76,10 +76,20 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
self._updated_ = getattr(obj, '_updated_', None) self._updated_ = getattr(obj, '_updated_', None)
self._original_ = getattr(obj, '_original_', None) self._original_ = getattr(obj, '_original_', None)
self._name = getattr(obj, 'name', None) self._name = getattr(obj, 'name', None)
self.gradient = getattr(obj, 'gradient', None) self._gradient_ = getattr(obj, '_gradient_', None)
self.constraints = getattr(obj, 'constraints', None) self.constraints = getattr(obj, 'constraints', None)
self.priors = getattr(obj, 'priors', None) self.priors = getattr(obj, 'priors', None)
@property
def gradient(self):
if self._gradient_ is None:
self._gradient_ = numpy.zeros(self._realshape_)
return self._gradient_[self._current_slice_]
@gradient.setter
def gradient(self, val):
self.gradient[:] = val
#=========================================================================== #===========================================================================
# Pickling operations # Pickling operations
#=========================================================================== #===========================================================================
@ -115,6 +125,13 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
self._direct_parent_ = state.pop() self._direct_parent_ = state.pop()
self.name = state.pop() self.name = state.pop()
def copy(self, *args):
constr = self.constraints.copy()
priors = self.priors.copy()
p = Param(self.name, self.view(numpy.ndarray).copy(), self._default_constraint_)
p.constraints = constr
p.priors = priors
return p
#=========================================================================== #===========================================================================
# get/set parameters # get/set parameters
#=========================================================================== #===========================================================================
@ -127,7 +144,10 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
return self.flat return self.flat
def _collect_gradient(self, target): def _collect_gradient(self, target):
target[:] = self.gradient.flat target += self.gradient.flat
def _set_gradient(self, g):
self.gradient = g.reshape(self._realshape_)
#=========================================================================== #===========================================================================
# Array operations -> done # Array operations -> done
@ -214,7 +234,9 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
def _description_str(self): def _description_str(self):
if self.size <= 1: return ["%f" % self] if self.size <= 1: return ["%f" % self]
else: return [str(self.shape)] else: return [str(self.shape)]
def parameter_names(self, add_name=False): def parameter_names(self, add_self=False, adjust_for_printing=False):
if adjust_for_printing:
return [adjust_name_for_printing(self.name)]
return [self.name] return [self.name]
@property @property
def flattened_parameters(self): def flattened_parameters(self):
@ -231,14 +253,9 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
@property @property
def _ties_str(self): def _ties_str(self):
return [t._short() for t in self._tied_to_] or [''] return [t._short() for t in self._tied_to_] or ['']
@property
def name_hirarchical(self):
if self.has_parent():
return self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name)
return adjust_name_for_printing(self.name)
def __repr__(self, *args, **kwargs): def __repr__(self, *args, **kwargs):
name = "\033[1m{x:s}\033[0;0m:\n".format( name = "\033[1m{x:s}\033[0;0m:\n".format(
x=self.name_hirarchical) x=self.hirarchy_name())
return name + super(Param, self).__repr__(*args, **kwargs) return name + super(Param, self).__repr__(*args, **kwargs)
def _ties_for(self, rav_index): def _ties_for(self, rav_index):
# size = sum(p.size for p in self._tied_to_) # size = sum(p.size for p in self._tied_to_)
@ -272,12 +289,12 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
gen = map(lambda x: " ".join(map(str, x)), gen) gen = map(lambda x: " ".join(map(str, x)), gen)
return reduce(lambda a, b:max(a, len(b)), gen, len(header)) return reduce(lambda a, b:max(a, len(b)), gen, len(header))
def _max_len_values(self): def _max_len_values(self):
return reduce(lambda a, b:max(a, len("{x:=.{0}g}".format(__precision__, x=b))), self.flat, len(self.name_hirarchical)) return reduce(lambda a, b:max(a, len("{x:=.{0}g}".format(__precision__, x=b))), self.flat, len(self.hirarchy_name()))
def _max_len_index(self, ind): def _max_len_index(self, ind):
return reduce(lambda a, b:max(a, len(str(b))), ind, len(__index_name__)) return reduce(lambda a, b:max(a, len(str(b))), ind, len(__index_name__))
def _short(self): def _short(self):
# short string to print # short string to print
name = self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name) name = self.hirarchy_name()
if self._realsize_ < 2: if self._realsize_ < 2:
return name return name
ind = self._indices() ind = self._indices()
@ -300,8 +317,8 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
if lp is None: lp = self._max_len_names(prirs, __tie_name__) if lp is None: lp = self._max_len_names(prirs, __tie_name__)
sep = '-' sep = '-'
header_format = " {i:{5}^{2}s} | \033[1m{x:{5}^{1}s}\033[0;0m | {c:{5}^{0}s} | {p:{5}^{4}s} | {t:{5}^{3}s}" header_format = " {i:{5}^{2}s} | \033[1m{x:{5}^{1}s}\033[0;0m | {c:{5}^{0}s} | {p:{5}^{4}s} | {t:{5}^{3}s}"
if only_name: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.name_hirarchical, c=sep*lc, i=sep*li, t=sep*lt, p=sep*lp) # nice header for printing if only_name: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hirarchy_name(), c=sep*lc, i=sep*li, t=sep*lt, p=sep*lp) # nice header for printing
else: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.name_hirarchical, c=__constraints_name__, i=__index_name__, t=__tie_name__, p=__priors_name__) # nice header for printing else: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hirarchy_name(), c=__constraints_name__, i=__index_name__, t=__tie_name__, p=__priors_name__) # nice header for printing
if not ties: ties = itertools.cycle(['']) if not ties: ties = itertools.cycle([''])
return "\n".join([header] + [" {i!s:^{3}s} | {x: >{1}.{2}g} | {c:^{0}s} | {p:^{5}s} | {t:^{4}s} ".format(lc, lx, __precision__, li, lt, lp, x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in itertools.izip(indices, vals, constr_matrix, ties, prirs)]) # return all the constraints with right indices return "\n".join([header] + [" {i!s:^{3}s} | {x: >{1}.{2}g} | {c:^{0}s} | {p:^{5}s} | {t:^{4}s} ".format(lc, lx, __precision__, li, lt, lp, x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in itertools.izip(indices, vals, constr_matrix, ties, prirs)]) # return all the constraints with right indices
# except: return super(Param, self).__str__() # except: return super(Param, self).__str__()

View file

@ -11,14 +11,19 @@ def adjust_name_for_printing(name):
return '' return ''
class Observable(object): class Observable(object):
_observers_ = {} def __init__(self, *args, **kwargs):
from collections import defaultdict
self._observer_callables_ = defaultdict(list)
def add_observer(self, observer, callble): def add_observer(self, observer, callble):
self._observers_[observer] = callble self._observer_callables_[observer].append(callble)
#callble(self)
def remove_observer(self, observer): def remove_observer(self, observer, callble):
del self._observers_[observer] del self._observer_callables_[observer][callble]
def _notify_observers(self): def _notify_observers(self):
[callble(self) for callble in self._observers_.itervalues()] [[callble(self) for callble in callables]
for callables in self._observer_callables_.itervalues()]
class Pickleable(object): class Pickleable(object):
def _getstate(self): def _getstate(self):
@ -47,10 +52,8 @@ class Pickleable(object):
#=============================================================================== #===============================================================================
class Parentable(object): class Parentable(object):
def __init__(self, direct_parent=None, parent_index=None): _direct_parent_ = None
super(Parentable,self).__init__() _parent_index_ = None
self._direct_parent_ = direct_parent
self._parent_index_ = parent_index
def has_parent(self): def has_parent(self):
return self._direct_parent_ is not None return self._direct_parent_ is not None
@ -68,10 +71,13 @@ class Parentable(object):
return self return self
return self._direct_parent_._highest_parent_ return self._direct_parent_._highest_parent_
def _notify_parameters_changed(self):
if self.has_parent():
self._direct_parent_._notify_parameters_changed()
class Nameable(Parentable): class Nameable(Parentable):
_name = None def __init__(self, name, *a, **kw):
def __init__(self, name, direct_parent=None, parent_index=None): super(Nameable, self).__init__(*a, **kw)
super(Nameable,self).__init__(direct_parent, parent_index)
self._name = name or self.__class__.__name__ self._name = name or self.__class__.__name__
@property @property
@ -80,58 +86,21 @@ class Nameable(Parentable):
@name.setter @name.setter
def name(self, name): def name(self, name):
from_name = self.name from_name = self.name
assert isinstance(name, str)
self._name = name self._name = name
if self.has_parent(): if self.has_parent():
self._direct_parent_._name_changed(self, from_name) self._direct_parent_._name_changed(self, from_name)
def hirarchy_name(self, adjust_for_printing=True):
if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x)
class Parameterizable(Parentable): else: adjust = lambda x: x
def __init__(self, *args, **kwargs):
super(Parameterizable, self).__init__(*args, **kwargs)
from GPy.core.parameterization.array_core import ParamList
_parameters_ = ParamList()
def parameter_names(self, add_name=False):
if add_name:
return [adjust_name_for_printing(self.name) + "." + xi for x in self._parameters_ for xi in x.parameter_names(add_name=True)]
return [xi for x in self._parameters_ for xi in x.parameter_names(add_name=True)]
def _collect_gradient(self, target):
import itertools
[p._collect_gradient(target[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
def _get_params(self):
import numpy as np
# don't overwrite this anymore!
if not self.size:
return np.empty(shape=(0,), dtype=np.float64)
return np.hstack([x._get_params() for x in self._parameters_ if x.size > 0])
def _set_params(self, params, update=True):
# don't overwrite this anymore!
import itertools
[p._set_params(params[s], update=update) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
self.parameters_changed()
def parameters_changed(self):
"""
This method gets called when parameters have changed.
Another way of listening to param changes is to
add self as a listener to the param, such that
updates get passed through. See :py:function:``GPy.core.param.Observable.add_observer``
"""
pass
def _notify_parameters_changed(self):
self.parameters_changed()
if self.has_parent(): if self.has_parent():
self._direct_parent_._notify_parameters_changed() return self._direct_parent_.hirarchy_name() + "." + adjust(self.name)
return adjust(self.name)
class Gradcheckable(Parentable): class Gradcheckable(Parentable):
#=========================================================================== def __init__(self, *a, **kw):
# Gradchecking super(Gradcheckable, self).__init__(*a, **kw)
#===========================================================================
def checkgrad(self, verbose=0, step=1e-6, tolerance=1e-3): def checkgrad(self, verbose=0, step=1e-6, tolerance=1e-3):
if self.has_parent(): if self.has_parent():
return self._highest_parent_._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance) return self._highest_parent_._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance)
@ -139,6 +108,7 @@ class Gradcheckable(Parentable):
def _checkgrad(self, param): def _checkgrad(self, param):
raise NotImplementedError, "Need log likelihood to check gradient against" raise NotImplementedError, "Need log likelihood to check gradient against"
class Indexable(object): class Indexable(object):
def _raveled_index(self): def _raveled_index(self):
raise NotImplementedError, "Need to be able to get the raveled Index" raise NotImplementedError, "Need to be able to get the raveled Index"
@ -157,9 +127,10 @@ class Indexable(object):
""" """
raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?" raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?"
class Constrainable(Nameable, Indexable, Parameterizable):
def __init__(self, name, default_constraint=None): class Constrainable(Nameable, Indexable):
super(Constrainable,self).__init__(name) def __init__(self, name, default_constraint=None, *a, **kw):
super(Constrainable, self).__init__(name=name, *a, **kw)
self._default_constraint_ = default_constraint self._default_constraint_ = default_constraint
from index_operations import ParameterIndexOperations from index_operations import ParameterIndexOperations
self.constraints = ParameterIndexOperations() self.constraints = ParameterIndexOperations()
@ -167,6 +138,16 @@ class Constrainable(Nameable, Indexable, Parameterizable):
if self._default_constraint_ is not None: if self._default_constraint_ is not None:
self.constrain(self._default_constraint_) self.constrain(self._default_constraint_)
def _disconnect_parent(self, constr=None):
if constr is None:
constr = self.constraints.copy()
self.constraints.clear()
self.constraints = constr
self._direct_parent_ = None
self._parent_index_ = None
self._connect_fixes()
self._notify_parent_change()
#=========================================================================== #===========================================================================
# Fixing Parameters: # Fixing Parameters:
#=========================================================================== #===========================================================================
@ -344,5 +325,108 @@ class Constrainable(Nameable, Indexable, Parameterizable):
return removed return removed
class Parameterizable(Constrainable):
def __init__(self, *args, **kwargs):
super(Parameterizable, self).__init__(*args, **kwargs)
from GPy.core.parameterization.array_core import ParamList
_parameters_ = ParamList()
self._added_names_ = set()
def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True):
if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x)
else: adjust = lambda x: x
if recursive: names = [xi for x in self._parameters_ for xi in x.parameter_names(add_self=True, adjust_for_printing=adjust_for_printing)]
else: names = [adjust(x.name) for x in self._parameters_]
if add_self: names = map(lambda x: adjust(self.name) + "." + x, names)
return names
def _add_parameter_name(self, param):
pname = adjust_name_for_printing(param.name)
# and makes sure to not delete programmatically added parameters
if pname in self.__dict__:
if not (param is self.__dict__[pname]):
if pname in self._added_names_:
del self.__dict__[pname]
self._add_parameter_name(param)
else:
self.__dict__[pname] = param
self._added_names_.add(pname)
def _remove_parameter_name(self, param=None, pname=None):
assert param is None or pname is None, "can only delete either param by name, or the name of a param"
pname = adjust_name_for_printing(pname) or adjust_name_for_printing(param.name)
if pname in self._added_names_:
del self.__dict__[pname]
self._added_names_.remove(pname)
self._connect_parameters()
def _name_changed(self, param, old_name):
self._remove_parameter_name(None, old_name)
self._add_parameter_name(param)
def _collect_gradient(self, target):
import itertools
[p._collect_gradient(target[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
def _set_gradient(self, g):
import itertools
[p._set_gradient(g[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
def _get_params(self):
import numpy as np
# don't overwrite this anymore!
if not self.size:
return np.empty(shape=(0,), dtype=np.float64)
return np.hstack([x._get_params() for x in self._parameters_ if x.size > 0])
def _set_params(self, params, update=True):
# don't overwrite this anymore!
import itertools
[p._set_params(params[s], update=update) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
self.parameters_changed()
def copy(self):
"""Returns a (deep) copy of the current model"""
import copy
from .index_operations import ParameterIndexOperations, ParameterIndexOperationsView
from .array_core import ParamList
dc = dict()
for k, v in self.__dict__.iteritems():
if k not in ['_direct_parent_', '_parameters_', '_parent_index_'] + self.parameter_names():
if isinstance(v, (Constrainable, ParameterIndexOperations, ParameterIndexOperationsView)):
dc[k] = v.copy()
else:
dc[k] = copy.deepcopy(v)
if k == '_parameters_':
params = [p.copy() for p in v]
dc['_direct_parent_'] = None
dc['_parent_index_'] = None
dc['_parameters_'] = ParamList()
dc['constraints'].clear()
dc['priors'].clear()
dc['size'] = 0
s = self.__new__(self.__class__)
s.__dict__ = dc
for p in params:
s.add_parameter(p)
return s
def _notify_parameters_changed(self):
self.parameters_changed()
if self.has_parent():
self._direct_parent_._notify_parameters_changed()
def parameters_changed(self):
"""
This method gets called when parameters have changed.
Another way of listening to param changes is to
add self as a listener to the param, such that
updates get passed through. See :py:function:``GPy.core.param.Observable.add_observer``
"""
pass

View file

@ -3,16 +3,15 @@
import numpy; np = numpy import numpy; np = numpy
import copy
import cPickle import cPickle
import itertools import itertools
from re import compile, _pattern_type from re import compile, _pattern_type
from param import ParamConcatenation, Param from param import ParamConcatenation
from parameter_core import Constrainable, Pickleable, Observable, adjust_name_for_printing, Gradcheckable from parameter_core import Constrainable, Pickleable, Parentable, Observable, Parameterizable, adjust_name_for_printing, Gradcheckable
from transformations import __fixed__, FIXED, UNFIXED from transformations import __fixed__
from array_core import ParamList from array_core import ParamList
class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): class Parameterized(Parameterizable, Pickleable, Observable, Gradcheckable):
""" """
Parameterized class Parameterized class
@ -54,8 +53,8 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
If you want to operate on all parameters use m[''] to wildcard select all paramters If you want to operate on all parameters use m[''] to wildcard select all paramters
and concatenate them. Printing m[''] will result in printing of all parameters in detail. and concatenate them. Printing m[''] will result in printing of all parameters in detail.
""" """
def __init__(self, name=None): def __init__(self, name=None, *a, **kw):
super(Parameterized, self).__init__(name=name) super(Parameterized, self).__init__(name=name, parent=None, parent_index=None, *a, **kw)
self._in_init_ = True self._in_init_ = True
self._parameters_ = ParamList() self._parameters_ = ParamList()
self.size = sum(p.size for p in self._parameters_) self.size = sum(p.size for p in self._parameters_)
@ -63,7 +62,6 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
self._fixes_ = None self._fixes_ = None
self._param_slices_ = [] self._param_slices_ = []
self._connect_parameters() self._connect_parameters()
self._added_names_ = set()
del self._in_init_ del self._in_init_
def add_parameter(self, param, index=None): def add_parameter(self, param, index=None):
@ -89,8 +87,8 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
self._parameters_.append(param) self._parameters_.append(param)
else: else:
start = sum(p.size for p in self._parameters_[:index]) start = sum(p.size for p in self._parameters_[:index])
self.constraints.shift(start, param.size) self.constraints.shift_right(start, param.size)
self.priors.shift(start, param.size) self.priors.shift_right(start, param.size)
self.constraints.update(param.constraints, start) self.constraints.update(param.constraints, start)
self.priors.update(param.priors, start) self.priors.update(param.priors, start)
self._parameters_.insert(index, param) self._parameters_.insert(index, param)
@ -115,21 +113,18 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
""" """
if not param in self._parameters_: if not param in self._parameters_:
raise RuntimeError, "Parameter {} does not belong to this object, remove parameters directly from their respective parents".format(param._short()) raise RuntimeError, "Parameter {} does not belong to this object, remove parameters directly from their respective parents".format(param._short())
del self._parameters_[param._parent_index_]
start = sum([p.size for p in self._parameters_[:param._parent_index_]])
self._remove_parameter_name(param)
self.size -= param.size self.size -= param.size
constr = param.constraints.copy() del self._parameters_[param._parent_index_]
param.constraints.clear()
param.constraints = constr param._disconnect_parent()
param._direct_parent_ = None self.constraints.shift_left(start, param.size)
param._parent_index_ = None
param._connect_fixes()
param._notify_parent_change()
pname = adjust_name_for_printing(param.name)
if pname in self._added_names_:
del self.__dict__[pname]
self._connect_parameters()
#self._notify_parent_change()
self._connect_fixes() self._connect_fixes()
self._connect_parameters()
self._notify_parent_change()
def _connect_parameters(self): def _connect_parameters(self):
# connect parameterlist to this parameterized object # connect parameterlist to this parameterized object
@ -145,19 +140,9 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
for i, p in enumerate(self._parameters_): for i, p in enumerate(self._parameters_):
p._direct_parent_ = self p._direct_parent_ = self
p._parent_index_ = i p._parent_index_ = i
not_unique = []
sizes.append(p.size + sizes[-1]) sizes.append(p.size + sizes[-1])
self._param_slices_.append(slice(sizes[-2], sizes[-1])) self._param_slices_.append(slice(sizes[-2], sizes[-1]))
pname = adjust_name_for_printing(p.name) self._add_parameter_name(p)
# and makes sure to not delete programmatically added parameters
if pname in self.__dict__:
if isinstance(self.__dict__[pname], (Parameterized, Param)):
if not p is self.__dict__[pname]:
not_unique.append(pname)
del self.__dict__[pname]
elif not (pname in not_unique):
self.__dict__[pname] = p
self._added_names_.add(pname)
#=========================================================================== #===========================================================================
# Pickling operations # Pickling operations
@ -174,19 +159,7 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
cPickle.dump(self, f, protocol) cPickle.dump(self, f, protocol)
else: else:
cPickle.dump(self, f, protocol) cPickle.dump(self, f, protocol)
def copy(self):
"""Returns a (deep) copy of the current model """
# dc = dict()
# for k, v in self.__dict__.iteritems():
# if k not in ['_highest_parent_', '_direct_parent_']:
# dc[k] = copy.deepcopy(v)
# dc = copy.deepcopy(self.__dict__)
# dc['_highest_parent_'] = None
# dc['_direct_parent_'] = None
# s = self.__class__.new()
# s.__dict__ = dc
return copy.deepcopy(self)
def __getstate__(self): def __getstate__(self):
if self._has_get_set_state(): if self._has_get_set_state():
return self._getstate() return self._getstate()
@ -243,7 +216,7 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
# Optimization handles: # Optimization handles:
#=========================================================================== #===========================================================================
def _get_param_names(self): def _get_param_names(self):
n = numpy.array([p.name_hirarchical + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()]) n = numpy.array([p.hirarchy_name() + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()])
return n return n
def _get_param_names_transformed(self): def _get_param_names_transformed(self):
n = self._get_param_names() n = self._get_param_names()
@ -265,14 +238,6 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
if self._has_fixes(): tmp = self._get_params(); tmp[self._fixes_] = p; p = tmp; del tmp if self._has_fixes(): tmp = self._get_params(); tmp[self._fixes_] = p; p = tmp; del tmp
[numpy.put(p, ind, c.f(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] [numpy.put(p, ind, c.f(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
return p return p
def _name_changed(self, param, old_name):
if hasattr(self, old_name) and old_name in self._added_names_:
delattr(self, old_name)
self._added_names_.remove(old_name)
pname = adjust_name_for_printing(param.name)
if pname not in self.__dict__:
self._added_names_.add(pname)
self.__dict__[pname] = param
#=========================================================================== #===========================================================================
# Indexable Handling # Indexable Handling
#=========================================================================== #===========================================================================
@ -335,10 +300,6 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
# you can retrieve the original param through this method, by passing # you can retrieve the original param through this method, by passing
# the copy here # the copy here
return self._parameters_[param._parent_index_] return self._parameters_[param._parent_index_]
def hirarchy_name(self):
if self.has_parent():
return self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name) + "."
return ''
#=========================================================================== #===========================================================================
# Get/set parameters: # Get/set parameters:
#=========================================================================== #===========================================================================
@ -348,13 +309,11 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
""" """
if not isinstance(regexp, _pattern_type): regexp = compile(regexp) if not isinstance(regexp, _pattern_type): regexp = compile(regexp)
found_params = [] found_params = []
for p in self._parameters_: for n, p in itertools.izip(self.parameter_names(False, False, True), self.flattened_parameters):
if regexp.match(p.name) is not None: if regexp.match(n) is not None:
found_params.append(p) found_params.append(p)
if isinstance(p, Parameterized):
found_params.extend(p.grep_param_names(regexp))
return found_params return found_params
return [param for param in self._parameters_ if regexp.match(param.name) is not None]
def __getitem__(self, name, paramlist=None): def __getitem__(self, name, paramlist=None):
if paramlist is None: if paramlist is None:
paramlist = self.grep_param_names(name) paramlist = self.grep_param_names(name)
@ -366,36 +325,22 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
return ParamConcatenation(paramlist) return ParamConcatenation(paramlist)
return paramlist[-1] return paramlist[-1]
return ParamConcatenation(paramlist) return ParamConcatenation(paramlist)
def __setitem__(self, name, value, paramlist=None): def __setitem__(self, name, value, paramlist=None):
try: param = self.__getitem__(name, paramlist) try: param = self.__getitem__(name, paramlist)
except AttributeError as a: raise a except AttributeError as a: raise a
param[:] = value param[:] = value
# def __getattr__(self, name):
# return self.__getitem__(name)
# def __getattribute__(self, name):
# #try:
# return object.__getattribute__(self, name)
# except AttributeError:
# _, a, tb = sys.exc_info()
# try:
# return self.__getitem__(name)
# except AttributeError:
# raise AttributeError, a.message, tb
def __setattr__(self, name, val): def __setattr__(self, name, val):
# override the default behaviour, if setting a param, so broadcasting can by used # override the default behaviour, if setting a param, so broadcasting can by used
if hasattr(self, "_parameters_"): if hasattr(self, '_parameters_'):
paramlist = self.grep_param_names(name) pnames = self.parameter_names(False, adjust_for_printing=True, recursive=False)
if len(paramlist) == 1: self.__setitem__(name, val, paramlist); return if name in pnames: self._parameters_[pnames.index(name)][:] = val; return
object.__setattr__(self, name, val); object.__setattr__(self, name, val);
#=========================================================================== #===========================================================================
# Printing: # Printing:
#=========================================================================== #===========================================================================
def _short(self): def _short(self):
# short string to print return self.hirarchy_name()
if self.has_parent():
return self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name)
else:
return adjust_name_for_printing(self.name)
@property @property
def flattened_parameters(self): def flattened_parameters(self):
return [xi for x in self._parameters_ for xi in x.flattened_parameters] return [xi for x in self._parameters_ for xi in x.flattened_parameters]

View file

@ -3,21 +3,77 @@ Created on 6 Nov 2013
@author: maxz @author: maxz
''' '''
import numpy as np
from parameterized import Parameterized from parameterized import Parameterized
from param import Param from param import Param
from transformations import Logexp from transformations import Logexp
class Normal(Parameterized): class VariationalPrior(object):
def KL_divergence(self, variational_posterior):
raise NotImplementedError, "override this for variational inference of latent space"
def update_gradients_KL(self, variational_posterior):
"""
updates the gradients for mean and variance **in place**
"""
raise NotImplementedError, "override this for variational inference of latent space"
class NormalPrior(VariationalPrior):
def KL_divergence(self, variational_posterior):
var_mean = np.square(variational_posterior.mean).sum()
var_S = (variational_posterior.variance - np.log(variational_posterior.variance)).sum()
return 0.5 * (var_mean + var_S) - 0.5 * variational_posterior.input_dim * variational_posterior.num_data
def update_gradients_KL(self, variational_posterior):
# dL:
variational_posterior.mean.gradient -= variational_posterior.mean
variational_posterior.variance.gradient -= (1. - (1. / (variational_posterior.variance))) * 0.5
class VariationalPosterior(Parameterized):
def __init__(self, means=None, variances=None, name=None, **kw):
super(VariationalPosterior, self).__init__(name=name, **kw)
self.mean = Param("mean", means)
self.variance = Param("variance", variances, Logexp())
self.add_parameters(self.mean, self.variance)
self.num_data, self.input_dim = self.mean.shape
if self.has_uncertain_inputs():
assert self.variance.shape == self.mean.shape, "need one variance per sample and dimenion"
def has_uncertain_inputs(self):
return not self.variance is None
class NormalPosterior(VariationalPosterior):
''' '''
Normal distribution for variational approximations. NormalPosterior distribution for variational approximations.
holds the means and variances for a factorizing multivariate normal distribution holds the means and variances for a factorizing multivariate normal distribution
''' '''
def __init__(self, means, variances, name='latent space'):
Parameterized.__init__(self, name=name) def plot(self, *args):
self.mean = Param("mean", means) """
self.variance = Param('variance', variances, Logexp()) Plot latent space X in 1D:
self.add_parameters(self.mean, self.variance)
See GPy.plotting.matplot_dep.variational_plots
"""
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ...plotting.matplot_dep import variational_plots
return variational_plots.plot(self,*args)
class SpikeAndSlabPosterior(VariationalPosterior):
'''
The SpikeAndSlab distribution for variational approximations.
'''
def __init__(self, means, variances, binary_prob, name='latent space'):
"""
binary_prob : the probability of the distribution on the slab part.
"""
super(SpikeAndSlabPosterior, self).__init__(means, variances, name)
self.gamma = Param("binary_prob",binary_prob,)
self.add_parameter(self.gamma)
def plot(self, *args): def plot(self, *args):
""" """

View file

@ -5,8 +5,9 @@ import numpy as np
from ..util.linalg import mdot from ..util.linalg import mdot
from gp import GP from gp import GP
from parameterization.param import Param from parameterization.param import Param
from GPy.inference.latent_function_inference import var_dtc from ..inference.latent_function_inference import var_dtc
from .. import likelihoods from .. import likelihoods
from parameterization.variational import NormalPosterior
class SparseGP(GP): class SparseGP(GP):
""" """
@ -45,45 +46,44 @@ class SparseGP(GP):
self.Z = Param('inducing inputs', Z) self.Z = Param('inducing inputs', Z)
self.num_inducing = Z.shape[0] self.num_inducing = Z.shape[0]
if not (X_variance is None): self.q = NormalPosterior(X, X_variance)
assert X_variance.shape == X.shape
self.X_variance = X_variance
GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name) GP.__init__(self, self.q.mean, Y, kernel, likelihood, inference_method=inference_method, name=name)
self.add_parameter(self.Z, index=0) self.add_parameter(self.Z, index=0)
self.parameters_changed() self.parameters_changed()
def _update_gradients_Z(self, add=False): def has_uncertain_inputs(self):
#The derivative of the bound wrt the inducing inputs Z ( unless they're all fixed) return self.q.has_uncertain_inputs()
if not self.Z.is_fixed:
if add: self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
else: self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
if self.X_variance is None:
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
else:
self.Z.gradient += self.kern.dpsi1_dZ(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance)
self.Z.gradient += self.kern.dpsi2_dZ(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance)
def parameters_changed(self): def parameters_changed(self):
if self.has_uncertain_inputs():
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference_latent(self.kern, self.q, self.Z, self.likelihood, self.Y)
else:
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y) self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y)
self._update_gradients_Z(add=False) self.likelihood.update_gradients(self.grad_dict.pop('partial_for_likelihood'))
if self.has_uncertain_inputs():
self.kern.update_gradients_variational(posterior_variational=self.q, Z=self.Z, **self.grad_dict)
self.Z.gradient = self.kern.gradients_Z_variational(posterior_variational=self.q, Z=self.Z, **self.grad_dict)
else:
self.kern.update_gradients_sparse(X=self.X, Z=self.Z, **self.grad_dict)
self.Z.gradient = self.kern.gradients_Z_sparse(X=self.X, Z=self.Z, **self.grad_dict)
def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False): def _raw_predict(self, Xnew, X_variance_new=None, full_cov=False):
""" """
Make a prediction for the latent function values Make a prediction for the latent function values
""" """
if X_variance_new is None: if X_variance_new is None:
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts) Kx = self.kern.K(self.Z, Xnew)
mu = np.dot(Kx.T, self.posterior.woodbury_vector) mu = np.dot(Kx.T, self.posterior.woodbury_vector)
if full_cov: if full_cov:
Kxx = self.kern.K(Xnew, which_parts=which_parts) Kxx = self.kern.K(Xnew)
var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx) # NOTE this won't work for plotting #var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx)
var = Kxx - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T, Kx, [1,0]).swapaxes(1,2)
else: else:
Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts) Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(Kx * np.dot(self.posterior.woodbury_inv, Kx), 0) var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T
else: else:
# assert which_parts=='all', "swithching out parts of variational kernels is not implemented" Kx = self.kern.psi1(self.Z, Xnew, X_variance_new)
Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts
mu = np.dot(Kx, self.Cpsi1V) mu = np.dot(Kx, self.Cpsi1V)
if full_cov: if full_cov:
raise NotImplementedError, "TODO" raise NotImplementedError, "TODO"
@ -91,7 +91,7 @@ class SparseGP(GP):
Kxx = self.kern.psi0(self.Z, Xnew, X_variance_new) Kxx = self.kern.psi0(self.Z, Xnew, X_variance_new)
psi2 = self.kern.psi2(self.Z, Xnew, X_variance_new) psi2 = self.kern.psi2(self.Z, Xnew, X_variance_new)
var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1) var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
return mu, var[:,None] return mu, var
def _getstate(self): def _getstate(self):
@ -101,12 +101,10 @@ class SparseGP(GP):
""" """
return GP._getstate(self) + [self.Z, return GP._getstate(self) + [self.Z,
self.num_inducing, self.num_inducing,
self.has_uncertain_inputs,
self.X_variance] self.X_variance]
def _setstate(self, state): def _setstate(self, state):
self.X_variance = state.pop() self.X_variance = state.pop()
self.has_uncertain_inputs = state.pop()
self.num_inducing = state.pop() self.num_inducing = state.pop()
self.Z = state.pop() self.Z = state.pop()
GP._setstate(self, state) GP._setstate(self, state)

View file

@ -1,9 +1,9 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as _np import numpy as _np
default_seed = _np.random.seed(123344) #default_seed = _np.random.seed(123344)
def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False, output_dim=200, nan=False): def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan=False):
""" """
model for testing purposes. Samples from a GP with rbf kernel and learns model for testing purposes. Samples from a GP with rbf kernel and learns
the samples with a new kernel. Normally not for optimization, just model cheking the samples with a new kernel. Normally not for optimization, just model cheking
@ -21,19 +21,20 @@ def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False,
# generate GPLVM-like data # generate GPLVM-like data
X = _np.random.rand(num_inputs, input_dim) X = _np.random.rand(num_inputs, input_dim)
lengthscales = _np.random.rand(input_dim) #lengthscales = _np.random.rand(input_dim)
k = (GPy.kern.rbf(input_dim, .5, lengthscales, ARD=True) #k = (GPy.kern.RBF(input_dim, .5, lengthscales, ARD=True)
#+ GPy.kern.white(input_dim, 0.01) ##+ GPy.kern.white(input_dim, 0.01)
) #)
k = GPy.kern.Linear(input_dim, ARD=1)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, (output_dim,)).T Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, (output_dim,)).T
# k = GPy.kern.rbf_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim) # k = GPy.kern.RBF_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) #k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
# k = GPy.kern.rbf(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001) # k = GPy.kern.RBF(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001)
# k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.rbf(input_dim, .3, _np.ones(input_dim) * .2, ARD=True) # k = GPy.kern.RBF(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.RBF(input_dim, .3, _np.ones(input_dim) * .2, ARD=True)
# k = GPy.kern.rbf(input_dim, .5, 2., ARD=0) + GPy.kern.rbf(input_dim, .3, .2, ARD=0) # k = GPy.kern.RBF(input_dim, .5, 2., ARD=0) + GPy.kern.RBF(input_dim, .3, .2, ARD=0)
# k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True) # k = GPy.kern.RBF(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True)
p = .3 p = .3
@ -41,14 +42,14 @@ def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False,
if nan: if nan:
m.inference_method = GPy.inference.latent_function_inference.var_dtc.VarDTCMissingData() m.inference_method = GPy.inference.latent_function_inference.var_dtc.VarDTCMissingData()
m.Y[_np.random.binomial(1,p,size=(Y.shape))] = _np.nan m.Y[_np.random.binomial(1,p,size=(Y.shape)).astype(bool)] = _np.nan
m.parameters_changed() m.parameters_changed()
#=========================================================================== #===========================================================================
# randomly obstruct data with percentage p # randomly obstruct data with percentage p
#=========================================================================== #===========================================================================
#m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing) #m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing)
m.lengthscales = lengthscales #m.lengthscales = lengthscales
if plot: if plot:
import matplotlib.pyplot as pb import matplotlib.pyplot as pb
@ -73,7 +74,7 @@ def gplvm_oil_100(optimize=True, verbose=1, plot=True):
data = GPy.util.datasets.oil_100() data = GPy.util.datasets.oil_100()
Y = data['X'] Y = data['X']
# create simple GP model # create simple GP model
kernel = GPy.kern.rbf(6, ARD=True) + GPy.kern.bias(6) kernel = GPy.kern.RBF(6, ARD=True) + GPy.kern.Bias(6)
m = GPy.models.GPLVM(Y, 6, kernel=kernel) m = GPy.models.GPLVM(Y, 6, kernel=kernel)
m.data_labels = data['Y'].argmax(axis=1) m.data_labels = data['Y'].argmax(axis=1)
if optimize: m.optimize('scg', messages=verbose) if optimize: m.optimize('scg', messages=verbose)
@ -88,7 +89,7 @@ def sparse_gplvm_oil(optimize=True, verbose=0, plot=True, N=100, Q=6, num_induci
Y = Y - Y.mean(0) Y = Y - Y.mean(0)
Y /= Y.std(0) Y /= Y.std(0)
# Create the model # Create the model
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.Bias(Q)
m = GPy.models.SparseGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing) m = GPy.models.SparseGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing)
m.data_labels = data['Y'][:N].argmax(axis=1) m.data_labels = data['Y'][:N].argmax(axis=1)
@ -138,7 +139,7 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=15, Q=4
(1 - var))) + .001 (1 - var))) + .001
Z = _np.random.permutation(X)[:num_inducing] Z = _np.random.permutation(X)[:num_inducing]
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2)) kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.Bias(Q, _np.exp(-2)) + GPy.kern.White(Q, _np.exp(-2))
m = BayesianGPLVM(Y, Q, X=X, X_variance=S, num_inducing=num_inducing, Z=Z, kernel=kernel) m = BayesianGPLVM(Y, Q, X=X, X_variance=S, num_inducing=num_inducing, Z=Z, kernel=kernel)
m.data_colors = c m.data_colors = c
@ -158,46 +159,51 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=15, Q=4
def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k): def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k):
import GPy import GPy
from GPy.likelihoods import Gaussian
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
_np.random.seed(0) _np.random.seed(0)
data = GPy.util.datasets.oil() data = GPy.util.datasets.oil()
kernel = GPy.kern.rbf_inv(Q, 1., [.1] * Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) kernel = GPy.kern.RBF(Q, 1., [.1] * Q, ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2))
Y = data['X'][:N] Y = data['X'][:N]
Yn = Gaussian(Y, normalize=True) m = GPy.models.BayesianGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing, **k)
m = GPy.models.BayesianGPLVM(Yn, Q, kernel=kernel, num_inducing=num_inducing, **k)
m.data_labels = data['Y'][:N].argmax(axis=1) m.data_labels = data['Y'][:N].argmax(axis=1)
m['noise'] = Yn.Y.var() / 100. m['.*noise.var'] = Y.var() / 100.
if optimize: if optimize:
m.optimize('scg', messages=verbose, max_iters=max_iters, gtol=.05) m.optimize('scg', messages=verbose, max_iters=max_iters, gtol=.05)
if plot: if plot:
y = m.likelihood.Y[0, :] y = m.Y[0, :]
fig, (latent_axes, sense_axes) = plt.subplots(1, 2) fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
m.plot_latent(ax=latent_axes) m.plot_latent(ax=latent_axes)
data_show = GPy.util.visualize.vector_show(y) data_show = GPy.plotting.matplot_dep.visualize.vector_show(y)
lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :], # @UnusedVariable lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X[0, :], # @UnusedVariable
m, data_show, latent_axes=latent_axes, sense_axes=sense_axes) m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
raw_input('Press enter to finish') raw_input('Press enter to finish')
plt.close(fig) plt.close(fig)
return m return m
def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False): def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False):
_np.random.seed(1234)
x = _np.linspace(0, 4 * _np.pi, N)[:, None] x = _np.linspace(0, 4 * _np.pi, N)[:, None]
s1 = _np.vectorize(lambda x: _np.sin(x)) s1 = _np.vectorize(lambda x: -_np.sin(_np.exp(x)))
s2 = _np.vectorize(lambda x: _np.cos(x)) s2 = _np.vectorize(lambda x: _np.cos(x)**2)
s3 = _np.vectorize(lambda x:-_np.exp(-_np.cos(2 * x))) s3 = _np.vectorize(lambda x:-_np.exp(-_np.cos(2 * x)))
sS = _np.vectorize(lambda x: _np.sin(2 * x)) sS = _np.vectorize(lambda x: x*_np.sin(x))
s1 = s1(x) s1 = s1(x)
s2 = s2(x) s2 = s2(x)
s3 = s3(x) s3 = s3(x)
sS = sS(x) sS = sS(x)
S1 = _np.hstack([s1, sS]) s1 -= s1.mean(); s1 /= s1.std(0)
s2 -= s2.mean(); s2 /= s2.std(0)
s3 -= s3.mean(); s3 /= s3.std(0)
sS -= sS.mean(); sS /= sS.std(0)
S1 = _np.hstack([s1, s2, sS])
S2 = _np.hstack([s2, s3, sS]) S2 = _np.hstack([s2, s3, sS])
S3 = _np.hstack([s3, sS]) S3 = _np.hstack([s3, sS])
@ -268,7 +274,7 @@ def bgplvm_simulation(optimize=True, verbose=1,
D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 3, 10 D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 3, 10
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
Y = Ylist[0] Y = Ylist[0]
k = kern.linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k) m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k)
if optimize: if optimize:
@ -288,16 +294,18 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
from GPy.models import BayesianGPLVM from GPy.models import BayesianGPLVM
from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData
D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 3, 10 D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 5, 9
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
Y = Ylist[0] Y = Ylist[0]
k = kern.linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
inan = _np.random.binomial(1, .3, size=Y.shape) inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool)
m = BayesianGPLVM(Y, Q, init="random", num_inducing=num_inducing, kernel=k) m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, kernel=k)
m.inference_method = VarDTCMissingData() m.inference_method = VarDTCMissingData()
m.Y[inan] = _np.nan m.Y[inan] = _np.nan
m.q.variance *= .1
m.parameters_changed() m.parameters_changed()
m.Yreal = Y
if optimize: if optimize:
print "Optimizing model:" print "Optimizing model:"
@ -318,7 +326,7 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
likelihood_list = [Gaussian(x, normalize=True) for x in Ylist] likelihood_list = [Gaussian(x, normalize=True) for x in Ylist]
k = kern.linear(Q, ARD=True) + kern.bias(Q, _np.exp(-2)) + kern.white(Q, _np.exp(-2)) k = kern.Linear(Q, ARD=True) + kern.Bias(Q, _np.exp(-2)) + kern.White(Q, _np.exp(-2))
m = MRD(likelihood_list, input_dim=Q, num_inducing=num_inducing, kernels=k, initx="", initz='permute', **kw) m = MRD(likelihood_list, input_dim=Q, num_inducing=num_inducing, kernels=k, initx="", initz='permute', **kw)
m.ensure_default_constraints() m.ensure_default_constraints()
@ -345,15 +353,15 @@ def brendan_faces(optimize=True, verbose=True, plot=True):
m = GPy.models.GPLVM(Yn, Q) m = GPy.models.GPLVM(Yn, Q)
# optimize # optimize
m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped()) m.constrain('rbf|noise|white', GPy.transformations.LogexpClipped())
if optimize: m.optimize('scg', messages=verbose, max_iters=1000) if optimize: m.optimize('scg', messages=verbose, max_iters=1000)
if plot: if plot:
ax = m.plot_latent(which_indices=(0, 1)) ax = m.plot_latent(which_indices=(0, 1))
y = m.likelihood.Y[0, :] y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, order='F', invert=False, scale=False) data_show = GPy.plotting.matplot_dep.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, order='F', invert=False, scale=False)
GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish') raw_input('Press enter to finish')
return m return m
@ -372,8 +380,8 @@ def olivetti_faces(optimize=True, verbose=True, plot=True):
if plot: if plot:
ax = m.plot_latent(which_indices=(0, 1)) ax = m.plot_latent(which_indices=(0, 1))
y = m.likelihood.Y[0, :] y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(112, 92), transpose=False, invert=False, scale=False) data_show = GPy.plotting.matplot_dep.visualize.image_show(y[None, :], dimensions=(112, 92), transpose=False, invert=False, scale=False)
GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish') raw_input('Press enter to finish')
return m return m
@ -388,8 +396,8 @@ def stick_play(range=None, frame_rate=15, optimize=False, verbose=True, plot=Tru
Y = data['Y'][range[0]:range[1], :].copy() Y = data['Y'][range[0]:range[1], :].copy()
if plot: if plot:
y = Y[0, :] y = Y[0, :]
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
GPy.util.visualize.data_play(Y, data_show, frame_rate) GPy.plotting.matplot_dep.visualize.data_play(Y, data_show, frame_rate)
return Y return Y
def stick(kernel=None, optimize=True, verbose=True, plot=True): def stick(kernel=None, optimize=True, verbose=True, plot=True):
@ -400,12 +408,12 @@ def stick(kernel=None, optimize=True, verbose=True, plot=True):
# optimize # optimize
m = GPy.models.GPLVM(data['Y'], 2, kernel=kernel) m = GPy.models.GPLVM(data['Y'], 2, kernel=kernel)
if optimize: m.optimize(messages=verbose, max_f_eval=10000) if optimize: m.optimize(messages=verbose, max_f_eval=10000)
if plot and GPy.util.visualize.visual_available: if plot and GPy.plotting.matplot_dep.visualize.visual_available:
plt.clf plt.clf
ax = m.plot_latent() ax = m.plot_latent()
y = m.likelihood.Y[0, :] y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish') raw_input('Press enter to finish')
return m return m
@ -419,12 +427,12 @@ def bcgplvm_linear_stick(kernel=None, optimize=True, verbose=True, plot=True):
mapping = GPy.mappings.Linear(data['Y'].shape[1], 2) mapping = GPy.mappings.Linear(data['Y'].shape[1], 2)
m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping) m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping)
if optimize: m.optimize(messages=verbose, max_f_eval=10000) if optimize: m.optimize(messages=verbose, max_f_eval=10000)
if plot and GPy.util.visualize.visual_available: if plot and GPy.plotting.matplot_dep.visualize.visual_available:
plt.clf plt.clf
ax = m.plot_latent() ax = m.plot_latent()
y = m.likelihood.Y[0, :] y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish') raw_input('Press enter to finish')
return m return m
@ -435,16 +443,16 @@ def bcgplvm_stick(kernel=None, optimize=True, verbose=True, plot=True):
data = GPy.util.datasets.osu_run1() data = GPy.util.datasets.osu_run1()
# optimize # optimize
back_kernel=GPy.kern.rbf(data['Y'].shape[1], lengthscale=5.) back_kernel=GPy.kern.RBF(data['Y'].shape[1], lengthscale=5.)
mapping = GPy.mappings.Kernel(X=data['Y'], output_dim=2, kernel=back_kernel) mapping = GPy.mappings.Kernel(X=data['Y'], output_dim=2, kernel=back_kernel)
m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping) m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping)
if optimize: m.optimize(messages=verbose, max_f_eval=10000) if optimize: m.optimize(messages=verbose, max_f_eval=10000)
if plot and GPy.util.visualize.visual_available: if plot and GPy.plotting.matplot_dep.visualize.visual_available:
plt.clf plt.clf
ax = m.plot_latent() ax = m.plot_latent()
y = m.likelihood.Y[0, :] y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish') raw_input('Press enter to finish')
return m return m
@ -470,7 +478,7 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
data = GPy.util.datasets.osu_run1() data = GPy.util.datasets.osu_run1()
Q = 6 Q = 6
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2)) kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.Bias(Q, _np.exp(-2)) + GPy.kern.White(Q, _np.exp(-2))
m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel) m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel)
# optimize # optimize
m.ensure_default_constraints() m.ensure_default_constraints()
@ -481,8 +489,8 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
plt.sca(latent_axes) plt.sca(latent_axes)
m.plot_latent() m.plot_latent()
y = m.likelihood.Y[0, :].copy() y = m.likelihood.Y[0, :].copy()
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
GPy.util.visualize.lvm_dimselect(m.X[0, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes) GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X[0, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
raw_input('Press enter to finish') raw_input('Press enter to finish')
return m return m
@ -501,8 +509,8 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True, optimize=True, verbose
if plot: if plot:
ax = m.plot_latent() ax = m.plot_latent()
y = m.likelihood.Y[0, :] y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.skeleton_show(y[None, :], data['skel']) data_show = GPy.plotting.matplot_dep.visualize.skeleton_show(y[None, :], data['skel'])
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish') raw_input('Press enter to finish')
lvm_visualizer.close() lvm_visualizer.close()

View file

@ -41,7 +41,7 @@ def coregionalization_toy2(optimize=True, plot=True):
Y = np.vstack((Y1, Y2)) Y = np.vstack((Y1, Y2))
#build the kernel #build the kernel
k1 = GPy.kern.rbf(1) + GPy.kern.bias(1) k1 = GPy.kern.RBF(1) + GPy.kern.bias(1)
k2 = GPy.kern.coregionalize(2,1) k2 = GPy.kern.coregionalize(2,1)
k = k1**k2 k = k1**k2
m = GPy.models.GPRegression(X, Y, kernel=k) m = GPy.models.GPRegression(X, Y, kernel=k)
@ -68,7 +68,7 @@ def coregionalization_toy2(optimize=True, plot=True):
# Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05 # Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05
# Y = np.vstack((Y1, Y2)) # Y = np.vstack((Y1, Y2))
# #
# k1 = GPy.kern.rbf(1) # k1 = GPy.kern.RBF(1)
# m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1]) # m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1])
# m.constrain_fixed('.*rbf_var', 1.) # m.constrain_fixed('.*rbf_var', 1.)
# m.optimize(max_iters=100) # m.optimize(max_iters=100)
@ -127,7 +127,7 @@ def epomeo_gpx(max_iters=200, optimize=True, plot=True):
Z = np.hstack((np.linspace(t[:,0].min(), t[:, 0].max(), num_inducing)[:, None], Z = np.hstack((np.linspace(t[:,0].min(), t[:, 0].max(), num_inducing)[:, None],
np.random.randint(0, 4, num_inducing)[:, None])) np.random.randint(0, 4, num_inducing)[:, None]))
k1 = GPy.kern.rbf(1) k1 = GPy.kern.RBF(1)
k2 = GPy.kern.coregionalize(output_dim=5, rank=5) k2 = GPy.kern.coregionalize(output_dim=5, rank=5)
k = k1**k2 k = k1**k2
@ -156,7 +156,7 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000
data['Y'] = data['Y'] - np.mean(data['Y']) data['Y'] = data['Y'] - np.mean(data['Y'])
lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.rbf) lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.RBF)
if plot: if plot:
pb.contour(length_scales, log_SNRs, np.exp(lls), 20, cmap=pb.cm.jet) pb.contour(length_scales, log_SNRs, np.exp(lls), 20, cmap=pb.cm.jet)
ax = pb.gca() ax = pb.gca()
@ -172,8 +172,8 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000
optim_point_y = np.empty(2) optim_point_y = np.empty(2)
np.random.seed(seed=seed) np.random.seed(seed=seed)
for i in range(0, model_restarts): for i in range(0, model_restarts):
# kern = GPy.kern.rbf(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.)) # kern = GPy.kern.RBF(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.))
kern = GPy.kern.rbf(1, variance=np.random.uniform(1e-3, 1), lengthscale=np.random.uniform(5, 50)) kern = GPy.kern.RBF(1, variance=np.random.uniform(1e-3, 1), lengthscale=np.random.uniform(5, 50))
m = GPy.models.GPRegression(data['X'], data['Y'], kernel=kern) m = GPy.models.GPRegression(data['X'], data['Y'], kernel=kern)
m['noise_variance'] = np.random.uniform(1e-3, 1) m['noise_variance'] = np.random.uniform(1e-3, 1)
@ -196,7 +196,7 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000
ax.set_ylim(ylim) ax.set_ylim(ylim)
return m # (models, lls) return m # (models, lls)
def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.rbf): def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.RBF):
""" """
Evaluate the GP objective function for a given data set for a range of Evaluate the GP objective function for a given data set for a range of
signal to noise ratios and a range of lengthscales. signal to noise ratios and a range of lengthscales.
@ -278,10 +278,10 @@ def toy_poisson_rbf_1d_laplace(optimize=True, plot=True):
optimizer='scg' optimizer='scg'
x_len = 30 x_len = 30
X = np.linspace(0, 10, x_len)[:, None] X = np.linspace(0, 10, x_len)[:, None]
f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.rbf(1).K(X)) f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.RBF(1).K(X))
Y = np.array([np.random.poisson(np.exp(f)) for f in f_true])[:,None] Y = np.array([np.random.poisson(np.exp(f)) for f in f_true])[:,None]
kern = GPy.kern.rbf(1) kern = GPy.kern.RBF(1)
poisson_lik = GPy.likelihoods.Poisson() poisson_lik = GPy.likelihoods.Poisson()
laplace_inf = GPy.inference.latent_function_inference.LaplaceInference() laplace_inf = GPy.inference.latent_function_inference.LaplaceInference()
@ -319,10 +319,10 @@ def toy_ARD(max_iters=1000, kernel_type='linear', num_samples=300, D=4, optimize
if kernel_type == 'linear': if kernel_type == 'linear':
kernel = GPy.kern.linear(X.shape[1], ARD=1) kernel = GPy.kern.linear(X.shape[1], ARD=1)
elif kernel_type == 'rbf_inv': elif kernel_type == 'rbf_inv':
kernel = GPy.kern.rbf_inv(X.shape[1], ARD=1) kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1)
else: else:
kernel = GPy.kern.rbf(X.shape[1], ARD=1) kernel = GPy.kern.RBF(X.shape[1], ARD=1)
kernel += GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1]) kernel += GPy.kern.White(X.shape[1]) + GPy.kern.bias(X.shape[1])
m = GPy.models.GPRegression(X, Y, kernel) m = GPy.models.GPRegression(X, Y, kernel)
# len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25 # len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25
# m.set_prior('.*lengthscale',len_prior) # m.set_prior('.*lengthscale',len_prior)
@ -358,9 +358,9 @@ def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4, o
if kernel_type == 'linear': if kernel_type == 'linear':
kernel = GPy.kern.linear(X.shape[1], ARD=1) kernel = GPy.kern.linear(X.shape[1], ARD=1)
elif kernel_type == 'rbf_inv': elif kernel_type == 'rbf_inv':
kernel = GPy.kern.rbf_inv(X.shape[1], ARD=1) kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1)
else: else:
kernel = GPy.kern.rbf(X.shape[1], ARD=1) kernel = GPy.kern.RBF(X.shape[1], ARD=1)
#kernel += GPy.kern.bias(X.shape[1]) #kernel += GPy.kern.bias(X.shape[1])
X_variance = np.ones(X.shape) * 0.5 X_variance = np.ones(X.shape) * 0.5
m = GPy.models.SparseGPRegression(X, Y, kernel, X_variance=X_variance) m = GPy.models.SparseGPRegression(X, Y, kernel, X_variance=X_variance)
@ -421,7 +421,7 @@ def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, opti
X = np.random.uniform(-3., 3., (num_samples, 1)) X = np.random.uniform(-3., 3., (num_samples, 1))
Y = np.sin(X) + np.random.randn(num_samples, 1) * 0.05 Y = np.sin(X) + np.random.randn(num_samples, 1) * 0.05
# construct kernel # construct kernel
rbf = GPy.kern.rbf(1) rbf = GPy.kern.RBF(1)
# create simple GP Model # create simple GP Model
m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing) m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing)
m.checkgrad(verbose=1) m.checkgrad(verbose=1)
@ -444,7 +444,7 @@ def sparse_GP_regression_2D(num_samples=400, num_inducing=50, max_iters=100, opt
Y[inan] = np.nan Y[inan] = np.nan
# construct kernel # construct kernel
rbf = GPy.kern.rbf(2) rbf = GPy.kern.RBF(2)
# create simple GP Model # create simple GP Model
m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing) m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing)
@ -476,9 +476,9 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
# likelihood = GPy.likelihoods.Gaussian(Y) # likelihood = GPy.likelihoods.Gaussian(Y)
Z = np.random.uniform(-3., 3., (7, 1)) Z = np.random.uniform(-3., 3., (7, 1))
k = GPy.kern.rbf(1) k = GPy.kern.RBF(1)
# create simple GP Model - no input uncertainty on this one # create simple GP Model - no input uncertainty on this one
m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.rbf(1), Z=Z) m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.RBF(1), Z=Z)
if optimize: if optimize:
m.optimize('scg', messages=1, max_iters=max_iters) m.optimize('scg', messages=1, max_iters=max_iters)
@ -489,7 +489,7 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
print m print m
# the same Model with uncertainty # the same Model with uncertainty
m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.rbf(1), Z=Z, X_variance=S) m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.RBF(1), Z=Z, X_variance=S)
if optimize: if optimize:
m.optimize('scg', messages=1, max_iters=max_iters) m.optimize('scg', messages=1, max_iters=max_iters)
if plot: if plot:

View file

@ -16,7 +16,9 @@ If the likelihood object is something other than Gaussian, then exact inference
is not tractable. We then resort to a Laplace approximation (laplace.py) or is not tractable. We then resort to a Laplace approximation (laplace.py) or
expectation propagation (ep.py). expectation propagation (ep.py).
The inference methods return a "Posterior" instance, which is a simple The inference methods return a
:class:`~GPy.inference.latent_function_inference.posterior.Posterior`
instance, which is a simple
structure which contains a summary of the posterior. The model classes can then structure which contains a summary of the posterior. The model classes can then
use this posterior object for making predictions, optimizing hyper-parameters, use this posterior object for making predictions, optimizing hyper-parameters,
etc. etc.
@ -29,3 +31,15 @@ expectation_propagation = 'foo' # TODO
from GPy.inference.latent_function_inference.var_dtc import VarDTC from GPy.inference.latent_function_inference.var_dtc import VarDTC
from dtc import DTC from dtc import DTC
from fitc import FITC from fitc import FITC
# class FullLatentFunctionData(object):
#
#
# class LatentFunctionInference(object):
# def inference(self, kern, X, likelihood, Y, Y_metadata=None):
# """
# Do inference on the latent functions given a covariance function `kern`,
# inputs and outputs `X` and `Y`, and a likelihood `likelihood`.
# Additional metadata for the outputs `Y` can be given in `Y_metadata`.
# """
# raise NotImplementedError, "Abstract base class for full inference"

View file

@ -32,7 +32,7 @@ class DTC(object):
#make sure the noise is not hetero #make sure the noise is not hetero
beta = 1./np.squeeze(likelihood.variance) beta = 1./np.squeeze(likelihood.variance)
if beta.size <1: if beta.size <1:
raise NotImplementedError, "no hetero noise with this implementatino of DTC" raise NotImplementedError, "no hetero noise with this implementation of DTC"
Kmm = kern.K(Z) Kmm = kern.K(Z)
Knn = kern.Kdiag(X) Knn = kern.Kdiag(X)
@ -89,4 +89,85 @@ class DTC(object):
return post, log_marginal, grad_dict return post, log_marginal, grad_dict
class vDTC(object):
def __init__(self):
self.const_jitter = 1e-6
def inference(self, kern, X, X_variance, Z, likelihood, Y):
assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
#TODO: MAX! fix this!
from ...util.misc import param_to_array
Y = param_to_array(Y)
num_inducing, _ = Z.shape
num_data, output_dim = Y.shape
#make sure the noise is not hetero
beta = 1./np.squeeze(likelihood.variance)
if beta.size <1:
raise NotImplementedError, "no hetero noise with this implementation of DTC"
Kmm = kern.K(Z)
Knn = kern.Kdiag(X)
Knm = kern.K(X, Z)
U = Knm
Uy = np.dot(U.T,Y)
#factor Kmm
Kmmi, L, Li, _ = pdinv(Kmm)
# Compute A
LiUTbeta = np.dot(Li, U.T)*np.sqrt(beta)
A_ = tdot(LiUTbeta)
trace_term = -0.5*(np.sum(Knn)*beta - np.trace(A_))
A = A_ + np.eye(num_inducing)
# factor A
LA = jitchol(A)
# back substutue to get b, P, v
tmp, _ = dtrtrs(L, Uy, lower=1)
b, _ = dtrtrs(LA, tmp*beta, lower=1)
tmp, _ = dtrtrs(LA, b, lower=1, trans=1)
v, _ = dtrtrs(L, tmp, lower=1, trans=1)
tmp, _ = dtrtrs(LA, Li, lower=1, trans=0)
P = tdot(tmp.T)
#compute log marginal
log_marginal = -0.5*num_data*output_dim*np.log(2*np.pi) + \
-np.sum(np.log(np.diag(LA)))*output_dim + \
0.5*num_data*output_dim*np.log(beta) + \
-0.5*beta*np.sum(np.square(Y)) + \
0.5*np.sum(np.square(b)) + \
trace_term
# Compute dL_dKmm
vvT_P = tdot(v.reshape(-1,1)) + P
LAL = Li.T.dot(A).dot(Li)
dL_dK = Kmmi - 0.5*(vvT_P + LAL)
# Compute dL_dU
vY = np.dot(v.reshape(-1,1),Y.T)
#dL_dU = vY - np.dot(vvT_P, U.T)
dL_dU = vY - np.dot(vvT_P - Kmmi, U.T)
dL_dU *= beta
#compute dL_dR
Uv = np.dot(U, v)
dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - 1./beta + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) + np.sum(np.square(Uv), 1) )*beta**2
dL_dR -=beta*trace_term/num_data
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':np.zeros_like(Knn) + -0.5*beta, 'dL_dKnm':dL_dU.T}
#update gradients
kern.update_gradients_sparse(X=X, Z=Z, **grad_dict)
likelihood.update_gradients(dL_dR)
#construct a posterior object
post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L)
return post, log_marginal, grad_dict

View file

@ -3,390 +3,91 @@ from scipy import stats
from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs
from likelihood import likelihood from likelihood import likelihood
class EP(likelihood): class EP(object):
def __init__(self,data,noise_model): def __init__(self, epsilon=1e-6, eta=1., delta=1.):
"""
Expectation Propagation
:param data: data to model
:type data: numpy array
:param noise_model: noise distribution
:type noise_model: A GPy noise model
"""
self.noise_model = noise_model
self.data = data
self.num_data, self.output_dim = self.data.shape
self.is_heteroscedastic = True
self.num_params = 0
#Initial values - Likelihood approximation parameters:
#p(y|f) = t(f|tau_tilde,v_tilde)
self.tau_tilde = np.zeros(self.num_data)
self.v_tilde = np.zeros(self.num_data)
#initial values for the GP variables
self.Y = np.zeros((self.num_data,1))
self.covariance_matrix = np.eye(self.num_data)
self.precision = np.ones(self.num_data)[:,None]
self.Z = 0
self.YYT = None
self.V = self.precision * self.Y
self.VVT_factor = self.V
self.trYYT = 0.
super(EP, self).__init__()
def restart(self):
self.tau_tilde = np.zeros(self.num_data)
self.v_tilde = np.zeros(self.num_data)
self.Y = np.zeros((self.num_data,1))
self.covariance_matrix = np.eye(self.num_data)
self.precision = np.ones(self.num_data)[:,None]
self.Z = 0
self.YYT = None
self.V = self.precision * self.Y
self.VVT_factor = self.V
self.trYYT = 0.
def predictive_values(self,mu,var,full_cov,**noise_args):
if full_cov:
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
return self.noise_model.predictive_values(mu,var,**noise_args)
def log_predictive_density(self, y_test, mu_star, var_star):
"""
Calculation of the log predictive density
.. math:
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
:param y_test: test observations (y_{*})
:type y_test: (Nx1) array
:param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
:type mu_star: (Nx1) array
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
:type var_star: (Nx1) array
"""
return self.noise_model.log_predictive_density(y_test, mu_star, var_star)
def _get_params(self):
#return np.zeros(0)
return self.noise_model._get_params()
def _get_param_names(self):
#return []
return self.noise_model._get_param_names()
def _set_params(self,p):
#pass # TODO: the EP likelihood might want to take some parameters...
self.noise_model._set_params(p)
def _gradients(self,partial):
#return np.zeros(0) # TODO: the EP likelihood might want to take some parameters...
return self.noise_model._gradients(partial)
def _compute_GP_variables(self):
#Variables to be called from GP
mu_tilde = self.v_tilde/self.tau_tilde #When calling EP, this variable is used instead of Y in the GP model
sigma_sum = 1./self.tau_ + 1./self.tau_tilde
mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2
self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep
self.Z += 0.5*self.num_data*np.log(2*np.pi)
self.Y = mu_tilde[:,None]
self.YYT = np.dot(self.Y,self.Y.T)
self.covariance_matrix = np.diag(1./self.tau_tilde)
self.precision = self.tau_tilde[:,None]
self.V = self.precision * self.Y
self.VVT_factor = self.V
self.trYYT = np.trace(self.YYT)
def fit_full(self, K, epsilon=1e-3,power_ep=[1.,1.]):
""" """
The expectation-propagation algorithm. The expectation-propagation algorithm.
For nomenclature see Rasmussen & Williams 2006. For nomenclature see Rasmussen & Williams 2006.
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float) :param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
:type epsilon: float :type epsilon: float
:param power_ep: Power EP parameters :param eta: Power EP thing TODO: Ricardo: what, exactly?
:type power_ep: list of floats :type eta: float64
:param delta: Power EP thing TODO: Ricardo: what, exactly?
:type delta: float64
""" """
self.epsilon = epsilon self.epsilon, self.eta, self.delta = epsilon, eta, delta
self.eta, self.delta = power_ep self.reset()
def reset(self):
self.old_mutilde, self.old_vtilde = None, None
def inference(self, kern, X, likelihood, Y, Y_metadata=None):
K = kern.K(X)
mu_tilde, tau_tilde = self.expectation_propagation()
def expectation_propagation(self, K, Y, Y_metadata, likelihood)
num_data, data_dim = Y.shape
assert data_dim == 1, "This EP methods only works for 1D outputs"
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma) #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
mu = np.zeros(self.num_data) mu = np.zeros(self.num_data)
Sigma = K.copy() Sigma = K.copy()
"""
Initial values - Cavity distribution parameters:
q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
self.tau_ = np.empty(self.num_data,dtype=float)
self.v_ = np.empty(self.num_data,dtype=float)
#Initial values - Marginal moments #Initial values - Marginal moments
z = np.empty(self.num_data,dtype=float) Z_hat = np.empty(num_data,dtype=np.float64)
self.Z_hat = np.empty(self.num_data,dtype=float) mu_hat = np.empty(num_data,dtype=np.float64)
phi = np.empty(self.num_data,dtype=float) sigma2_hat = np.empty(num_data,dtype=np.float64)
mu_hat = np.empty(self.num_data,dtype=float)
sigma2_hat = np.empty(self.num_data,dtype=float) #initial values - Gaussian factors
if self.old_mutilde is None:
tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data, num_data))
else:
assert old_mutilde.size == num_data, "data size mis-match: did you change the data? try resetting!"
mu_tilde, v_tilde = self.old_mutilde, self.old_vtilde
tau_tilde = v_tilde/mu_tilde
#Approximation #Approximation
epsilon_np1 = self.epsilon + 1. epsilon_np1 = self.epsilon + 1.
epsilon_np2 = self.epsilon + 1. epsilon_np2 = self.epsilon + 1.
self.iterations = 0 iterations = 0
self.np1 = [self.tau_tilde.copy()] while (epsilon_np1 > self.epsilon) or (epsilon_np2 > self.epsilon):
self.np2 = [self.v_tilde.copy()] update_order = np.random.permutation(num_data)
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.num_data)
for i in update_order: for i in update_order:
#Cavity distribution parameters #Cavity distribution parameters
self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i] tau_cav = 1./Sigma[i,i] - self.eta*tau_tilde[i]
self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i] v_cav = mu[i]/Sigma[i,i] - self.eta*v_tilde[i]
#Marginal moments #Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i]) Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match(Y[i], tau_cav, v_cav, Y_metadata=(None if Y_metadata is None else Y_metadata[i]))
#Site parameters update #Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i]) delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i]) delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
self.tau_tilde[i] += Delta_tau tau_tilde[i] += delta_tau
self.v_tilde[i] += Delta_v v_tilde[i] += delta_v
#Posterior distribution parameters update #Posterior distribution parameters update
DSYR(Sigma,Sigma[:,i].copy(), -float(Delta_tau/(1.+ Delta_tau*Sigma[i,i]))) DSYR(Sigma, Sigma[:,i].copy(), -Delta_tau/(1.+ Delta_tau*Sigma[i,i]))
mu = np.dot(Sigma,self.v_tilde) mu = np.dot(Sigma, v_tilde)
self.iterations += 1 iterations += 1
#Sigma recomptutation with Cholesky decompositon
Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K #(re) compute Sigma and mu using full Cholesky decompy
B = np.eye(self.num_data) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K tau_tilde_root = np.sqrt(tau_tilde)
Sroot_tilde_K = tau_tilde_root[:,None] * K
B = np.eye(num_data) + Sroot_tilde_K * tau_tilde_root[None,:]
L = jitchol(B) L = jitchol(B)
V,info = dtrtrs(L,Sroot_tilde_K,lower=1) V, _ = dtrtrs(L, Sroot_tilde_K, lower=1)
Sigma = K - np.dot(V.T,V) Sigma = K - np.dot(V.T,V)
mu = np.dot(Sigma,self.v_tilde) mu = np.dot(Sigma,v_tilde)
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.num_data
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.num_data
self.np1.append(self.tau_tilde.copy())
self.np2.append(self.v_tilde.copy())
return self._compute_GP_variables() #monitor convergence
epsilon_np1 = np.mean(np.square(tau_tilde-tau_tilde_old))
epsilon_np2 = np.mean(np.square(v_tilde-v_tilde_old))
tau_tilde_old = tau_tilde.copy()
v_tilde_old = v_tilde.copy()
def fit_DTC(self, Kmm, Kmn, epsilon=1e-3,power_ep=[1.,1.]): return mu, Sigma, mu_tilde, tau_tilde
"""
The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see ... 2013.
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
:type epsilon: float
:param power_ep: Power EP parameters
:type power_ep: list of floats
"""
self.epsilon = epsilon
self.eta, self.delta = power_ep
num_inducing = Kmm.shape[0]
#TODO: this doesn't work with uncertain inputs!
"""
Prior approximation parameters:
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
Sigma0 = Qnn = Knm*Kmmi*Kmn
"""
KmnKnm = np.dot(Kmn,Kmn.T)
Lm = jitchol(Kmm)
Lmi = chol_inv(Lm)
Kmmi = np.dot(Lmi.T,Lmi)
KmmiKmn = np.dot(Kmmi,Kmn)
Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
LLT0 = Kmm.copy()
#Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm)
#KmnKnm = np.dot(Kmn, Kmn.T)
#KmmiKmn = np.dot(Kmmi,Kmn)
#Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
#LLT0 = Kmm.copy()
"""
Posterior approximation: q(f|y) = N(f| mu, Sigma)
Sigma = Diag + P*R.T*R*P.T + K
mu = w + P*Gamma
"""
mu = np.zeros(self.num_data)
LLT = Kmm.copy()
Sigma_diag = Qnn_diag.copy()
"""
Initial values - Cavity distribution parameters:
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
self.tau_ = np.empty(self.num_data,dtype=float)
self.v_ = np.empty(self.num_data,dtype=float)
#Initial values - Marginal moments
z = np.empty(self.num_data,dtype=float)
self.Z_hat = np.empty(self.num_data,dtype=float)
phi = np.empty(self.num_data,dtype=float)
mu_hat = np.empty(self.num_data,dtype=float)
sigma2_hat = np.empty(self.num_data,dtype=float)
#Approximation
epsilon_np1 = 1
epsilon_np2 = 1
self.iterations = 0
np1 = [self.tau_tilde.copy()]
np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.num_data)
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
self.tau_tilde[i] += Delta_tau
self.v_tilde[i] += Delta_v
#Posterior distribution parameters update
DSYR(LLT,Kmn[:,i].copy(),Delta_tau) #LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau
L = jitchol(LLT)
#cholUpdate(L,Kmn[:,i]*np.sqrt(Delta_tau))
V,info = dtrtrs(L,Kmn,lower=1)
Sigma_diag = np.sum(V*V,-2)
si = np.sum(V.T*V[:,i],-1)
mu += (Delta_v-Delta_tau*mu[i])*si
self.iterations += 1
#Sigma recomputation with Cholesky decompositon
LLT = LLT0 + np.dot(Kmn*self.tau_tilde[None,:],Kmn.T)
L = jitchol(LLT)
V,info = dtrtrs(L,Kmn,lower=1)
V2,info = dtrtrs(L.T,V,lower=0)
Sigma_diag = np.sum(V*V,-2)
Knmv_tilde = np.dot(Kmn,self.v_tilde)
mu = np.dot(V2.T,Knmv_tilde)
epsilon_np1 = sum((self.tau_tilde-np1[-1])**2)/self.num_data
epsilon_np2 = sum((self.v_tilde-np2[-1])**2)/self.num_data
np1.append(self.tau_tilde.copy())
np2.append(self.v_tilde.copy())
self._compute_GP_variables()
def fit_FITC(self, Kmm, Kmn, Knn_diag, epsilon=1e-3,power_ep=[1.,1.]):
"""
The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see Naish-Guzman and Holden, 2008.
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
:type epsilon: float
:param power_ep: Power EP parameters
:type power_ep: list of floats
"""
self.epsilon = epsilon
self.eta, self.delta = power_ep
num_inducing = Kmm.shape[0]
"""
Prior approximation parameters:
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
"""
Lm = jitchol(Kmm)
Lmi = chol_inv(Lm)
Kmmi = np.dot(Lmi.T,Lmi)
P0 = Kmn.T
KmnKnm = np.dot(P0.T, P0)
KmmiKmn = np.dot(Kmmi,P0.T)
Qnn_diag = np.sum(P0.T*KmmiKmn,-2)
Diag0 = Knn_diag - Qnn_diag
R0 = jitchol(Kmmi).T
"""
Posterior approximation: q(f|y) = N(f| mu, Sigma)
Sigma = Diag + P*R.T*R*P.T + K
mu = w + P*Gamma
"""
self.w = np.zeros(self.num_data)
self.Gamma = np.zeros(num_inducing)
mu = np.zeros(self.num_data)
P = P0.copy()
R = R0.copy()
Diag = Diag0.copy()
Sigma_diag = Knn_diag
RPT0 = np.dot(R0,P0.T)
"""
Initial values - Cavity distribution parameters:
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
self.tau_ = np.empty(self.num_data,dtype=float)
self.v_ = np.empty(self.num_data,dtype=float)
#Initial values - Marginal moments
z = np.empty(self.num_data,dtype=float)
self.Z_hat = np.empty(self.num_data,dtype=float)
phi = np.empty(self.num_data,dtype=float)
mu_hat = np.empty(self.num_data,dtype=float)
sigma2_hat = np.empty(self.num_data,dtype=float)
#Approximation
epsilon_np1 = 1
epsilon_np2 = 1
self.iterations = 0
self.np1 = [self.tau_tilde.copy()]
self.np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.num_data)
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
self.tau_tilde[i] += Delta_tau
self.v_tilde[i] += Delta_v
#Posterior distribution parameters update
dtd1 = Delta_tau*Diag[i] + 1.
dii = Diag[i]
Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
pi_ = P[i,:].reshape(1,num_inducing)
P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
Rp_i = np.dot(R,pi_.T)
RTR = np.dot(R.T,np.dot(np.eye(num_inducing) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
R = jitchol(RTR).T
self.w[i] += (Delta_v - Delta_tau*self.w[i])*dii/dtd1
self.Gamma += (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
RPT = np.dot(R,P.T)
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
mu = self.w + np.dot(P,self.Gamma)
self.iterations += 1
#Sigma recomptutation with Cholesky decompositon
Iplus_Dprod_i = 1./(1.+ Diag0 * self.tau_tilde)
Diag = Diag0 * Iplus_Dprod_i
P = Iplus_Dprod_i[:,None] * P0
safe_diag = np.where(Diag0 < self.tau_tilde, self.tau_tilde/(1.+Diag0*self.tau_tilde), (1. - Iplus_Dprod_i)/Diag0)
L = jitchol(np.eye(num_inducing) + np.dot(RPT0,safe_diag[:,None]*RPT0.T))
R,info = dtrtrs(L,R0,lower=1)
RPT = np.dot(R,P.T)
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
self.w = Diag * self.v_tilde
self.Gamma = np.dot(R.T, np.dot(RPT,self.v_tilde))
mu = self.w + np.dot(P,self.Gamma)
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.num_data
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.num_data
self.np1.append(self.tau_tilde.copy())
self.np2.append(self.v_tilde.copy())
return self._compute_GP_variables()

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
from ...util.linalg import pdinv, dpotrs, tdot, dtrtrs, dpotri, symmetrify, jitchol, dtrtri from ...util.linalg import pdinv, dpotrs, dpotri, symmetrify, jitchol
class Posterior(object): class Posterior(object):
""" """
@ -81,13 +81,17 @@ class Posterior(object):
def covariance(self): def covariance(self):
if self._covariance is None: if self._covariance is None:
#LiK, _ = dtrtrs(self.woodbury_chol, self._K, lower=1) #LiK, _ = dtrtrs(self.woodbury_chol, self._K, lower=1)
self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K) self._covariance = np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T
return self._covariance #self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K)
return self._covariance.squeeze()
@property @property
def precision(self): def precision(self):
if self._precision is None: if self._precision is None:
self._precision, _, _, _ = pdinv(self.covariance) cov = np.atleast_3d(self.covariance)
self._precision = np.zeros(cov.shape) # if one covariance per dimension
for p in xrange(cov.shape[-1]):
self._precision[:,:,p] = pdinv(cov[:,:,p])[0]
return self._precision return self._precision
@property @property
@ -95,7 +99,10 @@ class Posterior(object):
if self._woodbury_chol is None: if self._woodbury_chol is None:
#compute woodbury chol from #compute woodbury chol from
if self._woodbury_inv is not None: if self._woodbury_inv is not None:
_, _, self._woodbury_chol, _ = pdinv(self._woodbury_inv) winv = np.atleast_3d(self._woodbury_inv)
self._woodbury_chol = np.zeros(winv.shape)
for p in xrange(winv.shape[-1]):
self._woodbury_chol[:,:,p] = pdinv(winv[:,:,p])[2]
#Li = jitchol(self._woodbury_inv) #Li = jitchol(self._woodbury_inv)
#self._woodbury_chol, _ = dtrtri(Li) #self._woodbury_chol, _ = dtrtri(Li)
#W, _, _, _, = pdinv(self._woodbury_inv) #W, _, _, _, = pdinv(self._woodbury_inv)
@ -129,7 +136,7 @@ class Posterior(object):
@property @property
def K_chol(self): def K_chol(self):
if self._K_chol is None: if self._K_chol is None:
self._K_chol = dportf(self._K) self._K_chol = jitchol(self._K)
return self._K_chol return self._K_chol

View file

@ -43,9 +43,20 @@ class VarDTC(object):
return Y * prec # TODO chache this, and make it effective return Y * prec # TODO chache this, and make it effective
def inference(self, kern, X, X_variance, Z, likelihood, Y): def inference(self, kern, X, X_variance, Z, likelihood, Y):
"""Inference for normal sparseGP"""
uncertain_inputs = False
psi0, psi1, psi2 = _compute_psi(kern, X, X_variance, Z, uncertain_inputs)
return self._inference(kern, psi0, psi1, psi2, Z, likelihood, Y, uncertain_inputs)
def inference_latent(self, kern, posterior_variational, Z, likelihood, Y):
"""Inference for GPLVM with uncertain inputs"""
uncertain_inputs = True
psi0, psi1, psi2 = _compute_psi_latent(kern, posterior_variational, Z)
return self._inference(kern, psi0, psi1, psi2, Z, likelihood, Y, uncertain_inputs)
def _inference(self, kern, psi0, psi1, psi2, Z, likelihood, Y, uncertain_inputs):
#see whether we're using variational uncertain inputs #see whether we're using variational uncertain inputs
uncertain_inputs = not (X_variance is None)
_, output_dim = Y.shape _, output_dim = Y.shape
@ -60,20 +71,87 @@ class VarDTC(object):
trYYT = self.get_trYYT(Y) trYYT = self.get_trYYT(Y)
# do the inference: # do the inference:
dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Cpsi1Vf, \ het_noise = beta.size < 1
psi1, Lm, LB, log_marginal, Kmm, partial_for_likelihood = _do_inference_on( num_inducing = Z.shape[0]
kern, X, X_variance, Z, likelihood, num_data = Y.shape[0]
uncertain_inputs, output_dim, # kernel computations, using BGPLVM notation
beta, VVT_factor, trYYT) Kmm = kern.K(Z)
likelihood.update_gradients(partial_for_likelihood) Lm = jitchol(Kmm)
# The rather complex computations of A
if uncertain_inputs:
if het_noise:
psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0)
else:
psi2_beta = psi2.sum(0) * beta
#if 0:
# evals, evecs = linalg.eigh(psi2_beta)
# clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
# if not np.array_equal(evals, clipped_evals):
# pass # print evals
# tmp = evecs * np.sqrt(clipped_evals)
# tmp = tmp.T
# no backsubstitution because of bound explosion on tr(A) if not...
LmInv = dtrtri(Lm)
A = LmInv.dot(psi2_beta.dot(LmInv.T))
else:
if het_noise:
tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
else:
tmp = psi1 * (np.sqrt(beta))
tmp, _ = dtrtrs(Lm, tmp.T, lower=1)
A = tdot(tmp) #print A.sum()
# factor B
B = np.eye(num_inducing) + A
LB = jitchol(B)
psi1Vf = np.dot(psi1.T, VVT_factor)
# back substutue C into psi1Vf
tmp, _ = dtrtrs(Lm, psi1Vf, lower=1, trans=0)
_LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0)
tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1)
Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
# data fit and derivative of L w.r.t. Kmm
delit = tdot(_LBi_Lmi_psi1Vf)
data_fit = np.trace(delit)
DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit)
delit = -0.5 * DBi_plus_BiPBi
delit += -0.5 * B * output_dim
delit += output_dim * np.eye(num_inducing)
# Compute dL_dKmm
dL_dKmm = backsub_both_sides(Lm, delit)
# derivatives of L w.r.t. psi
dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm,
VVT_factor, Cpsi1Vf, DBi_plus_BiPBi,
psi1, het_noise, uncertain_inputs)
# log marginal likelihood
log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise,
psi0, A, LB, trYYT, data_fit)
#put the gradients in the right places
partial_for_likelihood = _compute_partial_for_likelihood(likelihood,
het_noise, uncertain_inputs, LB,
_LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
psi0, psi1, beta,
data_fit, num_data, output_dim, trYYT)
#likelihood.update_gradients(partial_for_likelihood)
if uncertain_inputs: if uncertain_inputs:
grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dpsi0':dL_dpsi0, 'dL_dpsi1':dL_dpsi1, 'dL_dpsi2':dL_dpsi2} grad_dict = {'dL_dKmm': dL_dKmm,
kern.update_gradients_variational(mu=X, S=X_variance, Z=Z, **grad_dict) 'dL_dpsi0':dL_dpsi0,
'dL_dpsi1':dL_dpsi1,
'dL_dpsi2':dL_dpsi2,
'partial_for_likelihood':partial_for_likelihood}
else: else:
grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dKdiag':dL_dpsi0, 'dL_dKnm':dL_dpsi1} grad_dict = {'dL_dKmm': dL_dKmm,
kern.update_gradients_sparse(X=X, Z=Z, **grad_dict) 'dL_dKdiag':dL_dpsi0,
'dL_dKnm':dL_dpsi1,
'partial_for_likelihood':partial_for_likelihood}
#get sufficient things for posterior prediction #get sufficient things for posterior prediction
#TODO: do we really want to do this in the loop? #TODO: do we really want to do this in the loop?
@ -125,21 +203,33 @@ class VarDTCMissingData(object):
return [Y], [(Y**2).sum()] return [Y], [(Y**2).sum()]
def inference(self, kern, X, X_variance, Z, likelihood, Y): def inference(self, kern, X, X_variance, Z, likelihood, Y):
"""Inference for normal sparseGP"""
uncertain_inputs = False
psi0, psi1, psi2 = _compute_psi(kern, X, X_variance, Z, uncertain_inputs)
return self._inference(kern, psi0, psi1, psi2, Z, likelihood, Y, uncertain_inputs)
def inference_latent(self, kern, posterior_variational, Z, likelihood, Y):
"""Inference for GPLVM with uncertain inputs"""
uncertain_inputs = True
psi0, psi1, psi2 = _compute_psi_latent(kern, posterior_variational, Z)
return self._inference(kern, psi0, psi1, psi2, Z, likelihood, Y, uncertain_inputs)
def _inference(self, kern, psi0_all, psi1_all, psi2_all, Z, likelihood, Y, uncertain_inputs):
Ys, traces = self._Y(Y) Ys, traces = self._Y(Y)
beta_all = 1./likelihood.variance beta_all = 1./likelihood.variance
uncertain_inputs = not (X_variance is None)
het_noise = beta_all.size != 1 het_noise = beta_all.size != 1
import itertools import itertools
num_inducing = Z.shape[0] num_inducing = Z.shape[0]
dL_dpsi0_all = np.zeros(X.shape[0]) dL_dpsi0_all = np.zeros(Y.shape[0])
dL_dpsi1_all = np.zeros((X.shape[0], num_inducing)) dL_dpsi1_all = np.zeros((Y.shape[0], num_inducing))
if uncertain_inputs: if uncertain_inputs:
dL_dpsi2_all = np.zeros((X.shape[0], num_inducing, num_inducing)) dL_dpsi2_all = np.zeros((Y.shape[0], num_inducing, num_inducing))
partial_for_likelihood = 0 partial_for_likelihood = 0
LB_all = Cpsi1Vf_all = 0 woodbury_vector = np.zeros((num_inducing, Y.shape[1]))
woodbury_inv_all = np.zeros((num_inducing, num_inducing, Y.shape[1]))
dL_dKmm = 0 dL_dKmm = 0
log_marginal = 0 log_marginal = 0
@ -148,11 +238,10 @@ class VarDTCMissingData(object):
Lm = jitchol(Kmm) Lm = jitchol(Kmm)
if uncertain_inputs: LmInv = dtrtri(Lm) if uncertain_inputs: LmInv = dtrtri(Lm)
# kernel computations, using BGPLVM notation
psi0_all, psi1_all, psi2_all = _compute_psi(kern, X, X_variance, Z, uncertain_inputs)
VVT_factor_all = np.empty(Y.shape) VVT_factor_all = np.empty(Y.shape)
full_VVT_factor = VVT_factor_all.shape[1] == Y.shape[1] full_VVT_factor = VVT_factor_all.shape[1] == Y.shape[1]
if not full_VVT_factor:
psi1V = np.dot(Y.T*beta_all, psi1_all).T
for y, trYYT, [v, ind] in itertools.izip(Ys, traces, self._subarray_indices): for y, trYYT, [v, ind] in itertools.izip(Ys, traces, self._subarray_indices):
if het_noise: beta = beta_all[ind] if het_noise: beta = beta_all[ind]
@ -183,10 +272,10 @@ class VarDTCMissingData(object):
LB = jitchol(B) LB = jitchol(B)
psi1Vf = psi1.T.dot(VVT_factor) psi1Vf = psi1.T.dot(VVT_factor)
_LBi_Lmi_psi1Vf, Cpsi1Vf = _compute_psi1Vf(Lm, LB, psi1Vf) tmp, _ = dtrtrs(Lm, psi1Vf, lower=1, trans=0)
_LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0)
if full_VVT_factor: Cpsi1Vf_all += Cpsi1Vf tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1)
LB_all += LB Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
# data fit and derivative of L w.r.t. Kmm # data fit and derivative of L w.r.t. Kmm
delit = tdot(_LBi_Lmi_psi1Vf) delit = tdot(_LBi_Lmi_psi1Vf)
@ -219,92 +308,67 @@ class VarDTCMissingData(object):
psi0, psi1, beta, psi0, psi1, beta,
data_fit, num_data, output_dim, trYYT) data_fit, num_data, output_dim, trYYT)
# gradients: if full_VVT_factor: woodbury_vector[:, ind] = Cpsi1Vf
likelihood.update_gradients(partial_for_likelihood)
if uncertain_inputs:
grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dpsi0':dL_dpsi0_all, 'dL_dpsi1':dL_dpsi1_all, 'dL_dpsi2':dL_dpsi2_all}
kern.update_gradients_variational(mu=X, S=X_variance, Z=Z, **grad_dict)
else: else:
grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dKdiag':dL_dpsi0_all, 'dL_dKnm':dL_dpsi1_all} print 'foobar'
kern.update_gradients_sparse(X=X, Z=Z, **grad_dict) tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
tmp, _ = dpotrs(LB, tmp, lower=1)
woodbury_vector[:, ind] = dtrtrs(Lm, tmp, lower=1, trans=1)[0]
#import ipdb;ipdb.set_trace()
Bi, _ = dpotri(LB, lower=1)
symmetrify(Bi)
Bi = -dpotri(LB, lower=1)[0]
from ...util import diag
diag.add(Bi, 1)
woodbury_inv_all[:, :, ind] = backsub_both_sides(Lm, Bi)[:,:,None]
# gradients:
if uncertain_inputs:
grad_dict = {'dL_dKmm': dL_dKmm,
'dL_dpsi0':dL_dpsi0_all,
'dL_dpsi1':dL_dpsi1_all,
'dL_dpsi2':dL_dpsi2_all,
'partial_for_likelihood':partial_for_likelihood}
else:
grad_dict = {'dL_dKmm': dL_dKmm,
'dL_dKdiag':dL_dpsi0_all,
'dL_dKnm':dL_dpsi1_all,
'partial_for_likelihood':partial_for_likelihood}
#get sufficient things for posterior prediction #get sufficient things for posterior prediction
#TODO: do we really want to do this in the loop? #TODO: do we really want to do this in the loop?
if full_VVT_factor: #if not full_VVT_factor:
woodbury_vector = Cpsi1Vf_all # == Cpsi1V # print 'foobar'
else: # psi1V = np.dot(Y.T*beta_all, psi1_all).T
print 'foobar' # tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
psi1V = np.dot(Y.T*beta_all, psi1_all).T # tmp, _ = dpotrs(LB_all, tmp, lower=1)
tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0) # woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
tmp, _ = dpotrs(LB_all, tmp, lower=1) #import ipdb;ipdb.set_trace()
woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1) #Bi, _ = dpotri(LB_all, lower=1)
#symmetrify(Bi)
#Bi = -dpotri(LB_all, lower=1)[0]
#from ...util import diag
#diag.add(Bi, 1)
Bi, _ = dpotri(LB_all, lower=1) #woodbury_inv = backsub_both_sides(Lm, Bi)
symmetrify(Bi)
Bi = -dpotri(LB_all, lower=1)[0]
from ...util import diag
diag.add(Bi, 1)
woodbury_inv = backsub_both_sides(Lm, Bi) post = Posterior(woodbury_inv=woodbury_inv_all, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
return post, log_marginal, grad_dict return post, log_marginal, grad_dict
def _compute_A(num_data, uncertain_inputs, beta, het_noise, psi1, psi2, Lm): def _compute_psi(kern, X, X_variance, Z):
# The rather complex computations of A
if uncertain_inputs:
if het_noise:
psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0)
else:
psi2_beta = psi2.sum(0) * beta
#if 0:
# evals, evecs = linalg.eigh(psi2_beta)
# clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
# if not np.array_equal(evals, clipped_evals):
# pass # print evals
# tmp = evecs * np.sqrt(clipped_evals)
# tmp = tmp.T
# no backsubstitution because of bound explosion on tr(A) if not...
LmInv = dtrtri(Lm)
A = LmInv.dot(psi2_beta.dot(LmInv.T))
else:
if het_noise:
tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
else:
tmp = psi1 * (np.sqrt(beta))
tmp, _ = dtrtrs(Lm, tmp.T, lower=1)
A = tdot(tmp) #print A.sum()
return A
def _compute_psi(kern, X, X_variance, Z, uncertain_inputs):
if uncertain_inputs:
psi0 = kern.psi0(Z, X, X_variance)
psi1 = kern.psi1(Z, X, X_variance)
psi2 = kern.psi2(Z, X, X_variance)
else:
psi0 = kern.Kdiag(X) psi0 = kern.Kdiag(X)
psi1 = kern.K(X, Z) psi1 = kern.K(X, Z)
psi2 = None psi2 = None
return psi0, psi1, psi2 return psi0, psi1, psi2
def _compute_Kmm(kern, X, X_variance, Z, uncertain_inputs): def _compute_psi_latent(kern, posterior_variational, Z):
Kmm = kern.K(Z) psi0 = kern.psi0(Z, posterior_variational)
psi0, psi1, psi2 = _compute_psi(kern, X, X_variance, Z, uncertain_inputs) psi1 = kern.psi1(Z, posterior_variational)
return Kmm, psi0, psi1, psi2 psi2 = kern.psi2(Z, posterior_variational)
return psi0, psi1, psi2
def _compute_dL_dKmm(num_inducing, output_dim, Lm, B, LB, _LBi_Lmi_psi1Vf):
# Compute dL_dKmm
delit = tdot(_LBi_Lmi_psi1Vf)
data_fit = np.trace(delit)
DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit)
delit = -0.5 * DBi_plus_BiPBi
delit += -0.5 * B * output_dim
delit += output_dim * np.eye(num_inducing)
dL_dKmm = backsub_both_sides(Lm, delit)
return DBi_plus_BiPBi, data_fit, dL_dKmm
def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, psi1, het_noise, uncertain_inputs): def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, psi1, het_noise, uncertain_inputs):
dL_dpsi0 = -0.5 * output_dim * (beta * np.ones([num_data, 1])).flatten() dL_dpsi0 = -0.5 * output_dim * (beta * np.ones([num_data, 1])).flatten()
@ -329,15 +393,6 @@ def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, C
return dL_dpsi0, dL_dpsi1, dL_dpsi2 return dL_dpsi0, dL_dpsi1, dL_dpsi2
def _compute_psi1Vf(Lm, LB, psi1Vf):
# back substutue C into psi1Vf
tmp, _ = dtrtrs(Lm, psi1Vf, lower=1, trans=0)
_LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0)
tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1)
Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
return _LBi_Lmi_psi1Vf, Cpsi1Vf
def _compute_partial_for_likelihood(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, psi0, psi1, beta, data_fit, num_data, output_dim, trYYT): def _compute_partial_for_likelihood(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, psi0, psi1, beta, data_fit, num_data, output_dim, trYYT):
# the partial derivative vector for the likelihood # the partial derivative vector for the likelihood
if likelihood.size == 0: if likelihood.size == 0:
@ -379,35 +434,3 @@ def _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het
lik_4 = 0.5 * data_fit lik_4 = 0.5 * data_fit
log_marginal = lik_1 + lik_2 + lik_3 + lik_4 log_marginal = lik_1 + lik_2 + lik_3 + lik_4
return log_marginal return log_marginal
def _do_inference_on(kern, X, X_variance, Z, likelihood, uncertain_inputs, output_dim, beta, VVT_factor, trYYT):
het_noise = beta.size < 1
num_inducing = Z.shape[0]
num_data = X.shape[0]
# kernel computations, using BGPLVM notation
Kmm, psi0, psi1, psi2 = _compute_Kmm(kern, X, X_variance, Z, uncertain_inputs)
#factor Kmm # TODO: cache?
Lm = jitchol(Kmm)
A = _compute_A(num_data, uncertain_inputs, beta, het_noise, psi1, psi2, Lm)
# factor B
B = np.eye(num_inducing) + A
LB = jitchol(B)
psi1Vf = np.dot(psi1.T, VVT_factor)
_LBi_Lmi_psi1Vf, Cpsi1Vf = _compute_psi1Vf(Lm, LB, psi1Vf)
# data fit and derivative of L w.r.t. Kmm
DBi_plus_BiPBi, data_fit, dL_dKmm = _compute_dL_dKmm(num_inducing, output_dim,
Lm, B, LB, _LBi_Lmi_psi1Vf)
# derivatives of L w.r.t. psi
dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm,
VVT_factor, Cpsi1Vf, DBi_plus_BiPBi,
psi1, het_noise, uncertain_inputs)
# log marginal likelihood
log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise,
psi0, A, LB, trYYT, data_fit)
#put the gradients in the right places
partial_for_likelihood = _compute_partial_for_likelihood(likelihood,
het_noise, uncertain_inputs, LB,
_LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
psi0, psi1, beta,
data_fit, num_data, output_dim, trYYT)
return dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Cpsi1Vf, psi1, Lm, LB, log_marginal, Kmm, partial_for_likelihood

View file

@ -1,9 +1,34 @@
# Copyright (c) 2012, 2013 GPy authors (see AUTHORS.txt). from _src.rbf import RBF
# Licensed under the BSD 3-clause license (see LICENSE.txt) from _src.white import White
from _src.kern import Kern
from constructors import * from _src.linear import Linear
try: from _src.bias import Bias
from constructors import rbf_sympy, sympykern # these depend on sympy from _src.brownian import Brownian
except: from _src.stationary import Exponential, Matern32, Matern52, ExpQuad
pass #import coregionalize
from kern import * #import exponential
#import eq_ode1
#import finite_dimensional
#import fixed
#import gibbs
#import hetero
#import hierarchical
#import independent_outputs
#import linear
#import Matern32
#import Matern52
#import mlp
#import ODE_1
#import periodic_exponential
#import periodic_Matern32
#import periodic_Matern52
#import poly
#import prod_orthogonal
#import prod
#import rational_quadratic
#import rbfcos
#import rbf
#import rbf_inv
#import spline
#import symmetric
#import white

View file

215
GPy/kern/_src/add.py Normal file
View file

@ -0,0 +1,215 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import sys
import numpy as np
import itertools
from linear import Linear
from ...core.parameterization import Parameterized
from ...core.parameterization.param import Param
from kern import Kern
class Add(Kern):
def __init__(self, subkerns, tensor):
assert all([isinstance(k, Kern) for k in subkerns])
if tensor:
input_dim = sum([k.input_dim for k in subkerns])
self.input_slices = []
n = 0
for k in subkerns:
self.input_slices.append(slice(n, n+k.input_dim))
n += k.input_dim
else:
assert all([k.input_dim == subkerns[0].input_dim for k in subkerns])
input_dim = subkerns[0].input_dim
self.input_slices = [slice(None) for k in subkerns]
super(Add, self).__init__(input_dim, 'add')
self.add_parameters(*subkerns)
def K(self, X, X2=None):
"""
Compute the kernel function.
:param X: the first set of inputs to the kernel
:param X2: (optional) the second set of arguments to the kernel. If X2
is None, this is passed throgh to the 'part' object, which
handLes this as X2 == X.
"""
assert X.shape[1] == self.input_dim
if X2 is None:
return sum([p.K(X[:, i_s], None) for p, i_s in zip(self._parameters_, self.input_slices)])
else:
return sum([p.K(X[:, i_s], X2[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)])
def update_gradients_full(self, dL_dK, X):
[p.update_gradients_full(dL_dK, X[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
[p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X[:,i_s], Z[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
def gradients_X(self, dL_dK, X, X2=None):
"""Compute the gradient of the objective function with respect to X.
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
:type dL_dK: np.ndarray (num_samples x num_inducing)
:param X: Observed data inputs
:type X: np.ndarray (num_samples x input_dim)
:param X2: Observed data inputs (optional, defaults to X)
:type X2: np.ndarray (num_inducing x input_dim)"""
target = np.zeros_like(X)
if X2 is None:
[np.add(target[:,i_s], p.gradients_X(dL_dK, X[:, i_s], None), target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
else:
[np.add(target[:,i_s], p.gradients_X(dL_dK, X[:, i_s], X2[:,i_s]), target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
return target
def Kdiag(self, X):
assert X.shape[1] == self.input_dim
return sum([p.Kdiag(X[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)])
def psi0(self, Z, mu, S):
return np.sum([p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)],0)
def psi1(self, Z, mu, S):
return np.sum([p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0)
def psi2(self, Z, mu, S):
psi2 = np.sum([p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0)
# compute the "cross" terms
from white import White
from rbf import RBF
#from rbf_inv import RBFInv
#from bias import Bias
from linear import Linear
#ffrom fixed import Fixed
for (p1, i1), (p2, i2) in itertools.combinations(itertools.izip(self._parameters_, self.input_slices), 2):
# white doesn;t combine with anything
if isinstance(p1, White) or isinstance(p2, White):
pass
# rbf X bias
#elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)):
tmp = p2.psi1(Z[:,i2], mu[:,i2], S[:,i2])
psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
#elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)):
tmp = p1.psi1(Z[:,i1], mu[:,i1], S[:,i1])
psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
else:
raise NotImplementedError, "psi2 cannot be computed for this kernel"
return psi2
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
from white import White
from rbf import RBF
#from rbf_inv import RBFInv
#from bias import Bias
from linear import Linear
#ffrom fixed import Fixed
for p1, is1 in zip(self._parameters_, self.input_slices):
#compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2, is2 in zip(self._parameters_, self.input_slices):
if p2 is p1:
continue
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2.
p1.update_gradients_variational(dL_dKmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1])
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
from white import white
from rbf import rbf
#from rbf_inv import rbfinv
#from bias import bias
from linear import linear
#ffrom fixed import fixed
target = np.zeros(Z.shape)
for p1, is1 in zip(self._parameters_, self.input_slices):
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2, is2 in zip(self._parameters_, self.input_slices):
if p2 is p1:
continue
if isinstance(p2, white):
continue
elif isinstance(p2, bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2.
target += p1.gradients_z_variational(dL_dkmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1])
return target
def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
from white import white
from rbf import rbf
#from rbf_inv import rbfinv
#from bias import bias
from linear import linear
#ffrom fixed import fixed
target_mu = np.zeros(mu.shape)
target_S = np.zeros(S.shape)
for p1, is1 in zip(self._parameters_, self.input_slices):
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2, is2 in zip(self._parameters_, self.input_slices):
if p2 is p1:
continue
if isinstance(p2, white):
continue
elif isinstance(p2, bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2.
a, b = p1.gradients_muS_variational(dL_dkmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1])
target_mu += a
target_S += b
return target_mu, target_S
def plot(self, *args, **kwargs):
"""
See GPy.plotting.matplot_dep.plot
"""
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import kernel_plots
kernel_plots.plot(self,*args)
def _getstate(self):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
"""
return Parameterized._getstate(self) + [#self._parameters_,
self.input_dim,
self.input_slices,
self._param_slices_
]
def _setstate(self, state):
self._param_slices_ = state.pop()
self.input_slices = state.pop()
self.input_dim = state.pop()
Parameterized._setstate(self, state)

62
GPy/kern/_src/bias.py Normal file
View file

@ -0,0 +1,62 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
import numpy as np
class Bias(Kern):
def __init__(self,input_dim,variance=1.,name=None):
super(Bias, self).__init__(input_dim, name)
self.variance = Param("variance", variance, Logexp())
self.add_parameter(self.variance)
def K(self, X, X2=None):
shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0])
ret = np.empty(shape, dtype=np.float64)
ret[:] = self.variance
return ret
def Kdiag(self,X):
ret = np.empty((X.shape[0],), dtype=np.float64)
ret[:] = self.variance
return ret
def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = dL_dK.sum()
def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = dL_dK.sum()
def gradients_X(self, dL_dK,X, X2, target):
return np.zeros(X.shape)
def gradients_X_diag(self,dL_dKdiag,X,target):
return np.zeros(X.shape)
#---------------------------------------#
# PSI statistics #
#---------------------------------------#
def psi0(self, Z, mu, S):
return self.Kdiag(mu)
def psi1(self, Z, mu, S, target):
return self.K(mu, S)
def psi2(self, Z, mu, S, target):
ret = np.empty((mu.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64)
ret[:] = self.variance**2
return ret
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
self.variance.gradient = dL_dKmm.sum() + dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum()
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
return np.zeros(Z.shape)
def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
return np.zeros(mu.shape), np.zeros(S.shape)

50
GPy/kern/_src/brownian.py Normal file
View file

@ -0,0 +1,50 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
import numpy as np
class Brownian(Kern):
"""
Brownian motion in 1D only.
Negative times are treated as a separate (backwards!) Brownian motion.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance:
:type variance: float
"""
def __init__(self, input_dim=1, variance=1., name='Brownian'):
assert input_dim==1, "Brownian motion in 1D only"
super(Brownian, self).__init__(input_dim, name)
self.variance = Param('variance', variance, Logexp())
self.add_parameters(self.variance)
def K(self,X,X2=None):
if X2 is None:
X2 = X
return self.variance*np.where(np.sign(X)==np.sign(X2.T),np.fmin(np.abs(X),np.abs(X2.T)), 0.)
def Kdiag(self,X):
return self.variance*np.abs(X.flatten())
def update_gradients_full(self, dL_dK, X, X2=None):
if X2 is None:
X2 = X
self.variance.gradient = np.sum(dL_dK * np.where(np.sign(X)==np.sign(X2.T),np.fmin(np.abs(X),np.abs(X2.T)), 0.))
#def update_gradients_diag(self, dL_dKdiag, X):
#self.variance.gradient = np.dot(np.abs(X.flatten()), dL_dKdiag)
#def gradients_X(self, dL_dK, X, X2=None):
#if X2 is None:
#return np.sum(self.variance*dL_dK*np.abs(X),1)[:,None]
#else:
#return np.sum(np.where(np.logical_and(np.abs(X)<np.abs(X2.T), np.sign(X)==np.sign(X2)), self.variance*dL_dK,0.),1)[:,None]

View file

@ -1,12 +1,13 @@
# Copyright (c) 2012, James Hensman and Ricardo Andrade # Copyright (c) 2012, James Hensman and Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart from kern import Kern
import numpy as np import numpy as np
from scipy import weave from scipy import weave
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
class Coregionalize(Kernpart): class Coregionalize(Kern):
""" """
Covariance function for intrinsic/linear coregionalization models Covariance function for intrinsic/linear coregionalization models
@ -37,7 +38,7 @@ class Coregionalize(Kernpart):
super(Coregionalize, self).__init__(input_dim=1, name=name) super(Coregionalize, self).__init__(input_dim=1, name=name)
self.output_dim = output_dim self.output_dim = output_dim
self.rank = rank self.rank = rank
if self.rank>output_dim-1: if self.rank>output_dim:
print("Warning: Unusual choice of rank, it should normally be less than the output_dim.") print("Warning: Unusual choice of rank, it should normally be less than the output_dim.")
if W is None: if W is None:
W = 0.5*np.random.randn(self.output_dim, self.rank)/np.sqrt(self.rank) W = 0.5*np.random.randn(self.output_dim, self.rank)/np.sqrt(self.rank)
@ -48,7 +49,7 @@ class Coregionalize(Kernpart):
kappa = 0.5*np.ones(self.output_dim) kappa = 0.5*np.ones(self.output_dim)
else: else:
assert kappa.shape==(self.output_dim, ) assert kappa.shape==(self.output_dim, )
self.kappa = Param('kappa', kappa) self.kappa = Param('kappa', kappa, Logexp())
self.add_parameters(self.W, self.kappa) self.add_parameters(self.W, self.kappa)
self.parameters_changed() self.parameters_changed()
@ -56,8 +57,8 @@ class Coregionalize(Kernpart):
def parameters_changed(self): def parameters_changed(self):
self.B = np.dot(self.W, self.W.T) + np.diag(self.kappa) self.B = np.dot(self.W, self.W.T) + np.diag(self.kappa)
def K(self,index,index2,target): def K(self, X, X2=None):
index = np.asarray(index,dtype=np.int) index = np.asarray(X, dtype=np.int)
#here's the old code (numpy) #here's the old code (numpy)
#if index2 is None: #if index2 is None:
@ -69,41 +70,45 @@ class Coregionalize(Kernpart):
#ii, jj = ii.T, jj.T #ii, jj = ii.T, jj.T
#false_target += self.B[ii, jj] #false_target += self.B[ii, jj]
if index2 is None:
if X2 is None:
target = np.empty((X.shape[0], X.shape[0]), dtype=np.float64)
code=""" code="""
for(int i=0;i<N; i++){ for(int i=0;i<N; i++){
target[i+i*N] += B[index[i]+output_dim*index[i]]; target[i+i*N] = B[index[i]+output_dim*index[i]];
for(int j=0; j<i; j++){ for(int j=0; j<i; j++){
target[j+i*N] += B[index[i]+output_dim*index[j]]; target[j+i*N] = B[index[i]+output_dim*index[j]];
target[i+j*N] += target[j+i*N]; target[i+j*N] = target[j+i*N];
} }
} }
""" """
N, B, output_dim = index.size, self.B, self.output_dim N, B, output_dim = index.size, self.B, self.output_dim
weave.inline(code, ['target', 'index', 'N', 'B', 'output_dim']) weave.inline(code, ['target', 'index', 'N', 'B', 'output_dim'])
else: else:
index2 = np.asarray(index2,dtype=np.int) index2 = np.asarray(X2, dtype=np.int)
target = np.empty((X.shape[0], X2.shape[0]), dtype=np.float64)
code=""" code="""
for(int i=0;i<num_inducing; i++){ for(int i=0;i<num_inducing; i++){
for(int j=0; j<N; j++){ for(int j=0; j<N; j++){
target[i+j*num_inducing] += B[output_dim*index[j]+index2[i]]; target[i+j*num_inducing] = B[output_dim*index[j]+index2[i]];
} }
} }
""" """
N, num_inducing, B, output_dim = index.size, index2.size, self.B, self.output_dim N, num_inducing, B, output_dim = index.size, index2.size, self.B, self.output_dim
weave.inline(code, ['target', 'index', 'index2', 'N', 'num_inducing', 'B', 'output_dim']) weave.inline(code, ['target', 'index', 'index2', 'N', 'num_inducing', 'B', 'output_dim'])
return target
def Kdiag(self,index,target): def Kdiag(self, X):
target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()] return np.diag(self.B)[np.asarray(X, dtype=np.int).flatten()]
def update_gradients_full(self,dL_dK, index, index2=None): def update_gradients_full(self, dL_dK, X, X2=None):
index = np.asarray(index,dtype=np.int) index = np.asarray(X, dtype=np.int)
dL_dK_small = np.zeros_like(self.B) dL_dK_small = np.zeros_like(self.B)
if index2 is None: if X2 is None:
index2 = index index2 = index
else: else:
index2 = np.asarray(index2,dtype=np.int) index2 = np.asarray(X2, dtype=np.int)
code=""" code="""
for(int i=0; i<num_inducing; i++){ for(int i=0; i<num_inducing; i++){
@ -122,17 +127,15 @@ class Coregionalize(Kernpart):
self.W.gradient = dW self.W.gradient = dW
self.kappa.gradient = dkappa self.kappa.gradient = dkappa
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): def update_gradients_diag(self, dL_dKdiag, X):
raise NotImplementedError, "some code below" index = np.asarray(X, dtype=np.int).flatten()
#def dKdiag_dtheta(self,dL_dKdiag,index,target): dL_dKdiag_small = np.array([dL_dKdiag[index==i] for i in xrange(output_dim)])
#index = np.asarray(index,dtype=np.int).flatten() self.W.gradient = 2.*self.W*dL_dKdiag_small[:, None]
#dL_dKdiag_small = np.zeros(self.output_dim) self.kappa.gradient = dL_dKdiag_small
#for i in range(self.output_dim):
#dL_dKdiag_small[i] += np.sum(dL_dKdiag[index==i]) def gradients_X(self, dL_dK, X, X2=None):
#dW = 2.*self.W*dL_dKdiag_small[:,None] return np.zeros(X.shape)
#dkappa = dL_dKdiag_small
#target += np.hstack([dW.flatten(),dkappa]) def gradients_X_diag(self, dL_dKdiag, X):
return np.zeros(X.shape)
def gradients_X(self,dL_dK,X,X2,target):
#NOTE In this case, pass is equivalent to returning zero.
pass

328
GPy/kern/_src/kern.py Normal file
View file

@ -0,0 +1,328 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import sys
import numpy as np
import itertools
from ...core.parameterization import Parameterized
from ...core.parameterization.param import Param
class Kern(Parameterized):
def __init__(self, input_dim, name, *a, **kw):
"""
The base class for a kernel: a positive definite function
which forms of a covariance function (kernel).
:param input_dim: the number of input dimensions to the function
:type input_dim: int
Do not instantiate.
"""
super(Kern, self).__init__(name=name, *a, **kw)
self.input_dim = input_dim
def K(self, X, X2):
raise NotImplementedError
def Kdiag(self, Xa):
raise NotImplementedError
def psi0(self,Z,posterior_variational):
raise NotImplementedError
def psi1(self,Z,posterior_variational):
raise NotImplementedError
def psi2(self,Z,posterior_variational):
raise NotImplementedError
def gradients_X(self, dL_dK, X, X2):
raise NotImplementedError
def gradients_X_diag(self, dL_dK, X):
raise NotImplementedError
def update_gradients_full(self, dL_dK, X):
"""Set the gradients of all parameters when doing full (N) inference."""
raise NotImplementedError
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
target = np.zeros(self.size)
self.update_gradients_diag(dL_dKdiag, X)
self._collect_gradient(target)
self.update_gradients_full(dL_dKnm, X, Z)
self._collect_gradient(target)
self.update_gradients_full(dL_dKmm, Z, None)
self._collect_gradient(target)
self._set_gradient(target)
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational):
"""Set the gradients of all parameters when doing variational (M) inference with uncertain inputs."""
raise NotImplementedError
def gradients_Z_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
grad = self.gradients_X(dL_dKmm, Z)
grad += self.gradients_X(dL_dKnm.T, Z, X)
return grad
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational):
raise NotImplementedError
def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational):
raise NotImplementedError
def plot_ARD(self, *args):
"""If an ARD kernel is present, plot a bar representation using matplotlib
See GPy.plotting.matplot_dep.plot_ARD
"""
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ...plotting.matplot_dep import kernel_plots
return kernel_plots.plot_ARD(self,*args)
def __add__(self, other):
""" Overloading of the '+' operator. for more control, see self.add """
return self.add(other)
def add(self, other, tensor=False):
"""
Add another kernel to this one.
If Tensor is False, both kernels are defined on the same _space_. then
the created kernel will have the same number of inputs as self and
other (which must be the same).
If Tensor is True, then the dimensions are stacked 'horizontally', so
that the resulting kernel has self.input_dim + other.input_dim
:param other: the other kernel to be added
:type other: GPy.kern
"""
assert isinstance(other, Kern), "only kernels can be added to kernels..."
from add import Add
return Add([self, other], tensor)
def __call__(self, X, X2=None):
return self.K(X, X2)
def __mul__(self, other):
""" Here we overload the '*' operator. See self.prod for more information"""
return self.prod(other)
def __pow__(self, other, tensor=False):
"""
Shortcut for tensor `prod`.
"""
return self.prod(other, tensor=True)
def prod(self, other, tensor=False):
"""
Multiply two kernels (either on the same space, or on the tensor product of the input space).
:param other: the other kernel to be added
:type other: GPy.kern
:param tensor: whether or not to use the tensor space (default is false).
:type tensor: bool
"""
assert isinstance(other, Kern), "only kernels can be added to kernels..."
from prod import Prod
return Prod(self, other, tensor)
from GPy.core.model import Model
class Kern_check_model(Model):
"""This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel."""
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
from GPy.kern import RBF
Model.__init__(self, 'kernel_test_model')
num_samples = 20
num_samples2 = 10
if kernel==None:
kernel = RBF(1)
if X==None:
X = np.random.randn(num_samples, kernel.input_dim)
if dL_dK==None:
if X2==None:
dL_dK = np.ones((X.shape[0], X.shape[0]))
else:
dL_dK = np.ones((X.shape[0], X2.shape[0]))
self.kernel=kernel
self.add_parameter(kernel)
self.X = X
self.X2 = X2
self.dL_dK = dL_dK
def is_positive_definite(self):
v = np.linalg.eig(self.kernel.K(self.X))[0]
if any(v<-10*sys.float_info.epsilon):
return False
else:
return True
def log_likelihood(self):
return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum()
def _log_likelihood_gradients(self):
raise NotImplementedError, "This needs to be implemented to use the kern_check_model class."
class Kern_check_dK_dtheta(Kern_check_model):
"""This class allows gradient checks for the gradient of a kernel with respect to parameters. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
def _log_likelihood_gradients(self):
return self.kernel._param_grad_helper(self.dL_dK, self.X, self.X2)
class Kern_check_dKdiag_dtheta(Kern_check_model):
"""This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters."""
def __init__(self, kernel=None, dL_dK=None, X=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
if dL_dK==None:
self.dL_dK = np.ones((self.X.shape[0]))
def parameters_changed(self):
self.kernel.update_gradients_full(self.dL_dK, self.X)
def log_likelihood(self):
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
def _log_likelihood_gradients(self):
return self.kernel.dKdiag_dtheta(self.dL_dK, self.X)
class Kern_check_dK_dX(Kern_check_model):
"""This class allows gradient checks for the gradient of a kernel with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
self.remove_parameter(kernel)
self.X = Param('X', self.X)
self.add_parameter(self.X)
def _log_likelihood_gradients(self):
return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).flatten()
class Kern_check_dKdiag_dX(Kern_check_dK_dX):
"""This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_dK_dX.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
if dL_dK==None:
self.dL_dK = np.ones((self.X.shape[0]))
def log_likelihood(self):
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
def _log_likelihood_gradients(self):
return self.kernel.dKdiag_dX(self.dL_dK, self.X).flatten()
def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
"""
This function runs on kernels to check the correctness of their
implementation. It checks that the covariance function is positive definite
for a randomly generated data set.
:param kern: the kernel to be tested.
:type kern: GPy.kern.Kernpart
:param X: X input values to test the covariance function.
:type X: ndarray
:param X2: X2 input values to test the covariance function.
:type X2: ndarray
"""
pass_checks = True
if X==None:
X = np.random.randn(10, kern.input_dim)
if output_ind is not None:
X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0])
if X2==None:
X2 = np.random.randn(20, kern.input_dim)
if output_ind is not None:
X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0])
if verbose:
print("Checking covariance function is positive definite.")
result = Kern_check_model(kern, X=X).is_positive_definite()
if result and verbose:
print("Check passed.")
if not result:
print("Positive definite check failed for " + kern.name + " covariance function.")
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X) wrt theta.")
result = Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X2) wrt theta.")
result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of Kdiag(X) wrt theta.")
result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X) wrt X.")
try:
result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("gradients_X not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X2) wrt X.")
try:
result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("gradients_X not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of Kdiag(X) wrt X.")
try:
result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("gradients_X not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True)
pass_checks = False
return False
return pass_checks

254
GPy/kern/_src/linear.py Normal file
View file

@ -0,0 +1,254 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import weave
from kern import Kern
from ...util.linalg import tdot
from ...util.misc import fast_array_equal, param_to_array
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
from ...util.caching import cache_this
class Linear(Kern):
"""
Linear kernel
.. math::
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i x_iy_i
:param input_dim: the number of input dimensions
:type input_dim: int
:param variances: the vector of variances :math:`\sigma^2_i`
:type variances: array or list of the appropriate size (or float if there is only one variance parameter)
:param ARD: Auto Relevance Determination. If equal to "False", the kernel has only one variance parameter \sigma^2, otherwise there is one variance parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
"""
def __init__(self, input_dim, variances=None, ARD=False, name='linear'):
super(Linear, self).__init__(input_dim, name)
self.ARD = ARD
if ARD == False:
if variances is not None:
variances = np.asarray(variances)
assert variances.size == 1, "Only one variance needed for non-ARD kernel"
else:
variances = np.ones(1)
self._Xcache, self._X2cache = np.empty(shape=(2,))
else:
if variances is not None:
variances = np.asarray(variances)
assert variances.size == self.input_dim, "bad number of variances, need one ARD variance per input_dim"
else:
variances = np.ones(self.input_dim)
self.variances = Param('variances', variances, Logexp())
self.add_parameter(self.variances)
self.variances.add_observer(self, self._on_changed)
def _on_changed(self, obj):
#TODO: move this to base class? isnt it jst for the caching?
self._notify_observers()
#@cache_this(limit=3, reset_on_self=True)
def K(self, X, X2=None):
if self.ARD:
if X2 is None:
return tdot(X*np.sqrt(self.variances))
else:
rv = np.sqrt(self.variances)
return np.dot(X*rv, (X2*rv).T)
else:
return self._dot_product(X, X2) * self.variances
#@cache_this(limit=3, reset_on_self=False)
def _dot_product(self, X, X2=None):
if X2 is None:
return tdot(X)
else:
return np.dot(X, X2.T)
def Kdiag(self, X):
return np.sum(self.variances * np.square(X), -1)
def update_gradients_full(self, dL_dK, X, X2=None):
if self.ARD:
if X2 is None:
self.variances.gradient = np.array([np.sum(dL_dK * tdot(X[:, i:i + 1])) for i in range(self.input_dim)])
else:
product = X[:, None, :] * X2[None, :, :]
self.variances.gradient = (dL_dK[:, :, None] * product).sum(0).sum(0)
else:
self.variances.gradient = np.sum(self._dot_product(X, X2) * dL_dK)
def update_gradients_diag(self, dL_dKdiag, X):
tmp = dL_dKdiag[:, None] * X ** 2
if self.ARD:
self.variances.gradient = tmp.sum(0)
else:
self.variances.gradient = np.atleast_1d(tmp.sum())
def gradients_X(self, dL_dK, X, X2=None):
if X2 is None:
return 2.*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
else:
return (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
def gradients_X_diag(self, dL_dKdiag, X):
return 2.*self.variances*dL_dKdiag[:,None]*X
#---------------------------------------#
# PSI statistics #
# variational #
#---------------------------------------#
def psi0(self, Z, posterior_variational):
return np.sum(self.variances * self._mu2S(posterior_variational), 1)
def psi1(self, Z, posterior_variational):
return self.K(posterior_variational.mean, Z) #the variance, it does nothing
def psi2(self, Z, posterior_variational):
ZA = Z * self.variances
ZAinner = self._ZAinner(posterior_variational, Z)
return np.dot(ZAinner, ZA.T)
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, posterior_variational, Z):
mu, S = posterior_variational.mean, posterior_variational.variance
# psi0:
tmp = dL_dpsi0[:, None] * self._mu2S(posterior_variational)
if self.ARD: grad = tmp.sum(0)
else: grad = np.atleast_1d(tmp.sum())
#psi1
self.update_gradients_full(dL_dpsi1, mu, Z)
grad += self.variances.gradient
#psi2
tmp = dL_dpsi2[:, :, :, None] * (self._ZAinner(posterior_variational, Z)[:, :, None, :] * (2. * Z)[None, None, :, :])
if self.ARD: grad += tmp.sum(0).sum(0).sum(0)
else: grad += tmp.sum()
#from Kmm
self.update_gradients_full(dL_dKmm, Z, None)
self.variances.gradient += grad
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, posterior_variational, Z):
# Kmm
grad = self.gradients_X(dL_dKmm, Z, None)
#psi1
grad += self.gradients_X(dL_dpsi1.T, Z, posterior_variational.mean)
#psi2
self._weave_dpsi2_dZ(dL_dpsi2, Z, posterior_variational, grad)
return grad
def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, posterior_variational, Z):
grad_mu, grad_S = np.zeros(posterior_variational.mean.shape), np.zeros(posterior_variational.mean.shape)
# psi0
grad_mu += dL_dpsi0[:, None] * (2.0 * posterior_variational.mean * self.variances)
grad_S += dL_dpsi0[:, None] * self.variances
# psi1
grad_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1)
# psi2
self._weave_dpsi2_dmuS(dL_dpsi2, Z, posterior_variational, grad_mu, grad_S)
return grad_mu, grad_S
#--------------------------------------------------#
# Helpers for psi statistics #
#--------------------------------------------------#
def _weave_dpsi2_dmuS(self, dL_dpsi2, Z, pv, target_mu, target_S):
# Think N,num_inducing,num_inducing,input_dim
ZA = Z * self.variances
AZZA = ZA.T[:, None, :, None] * ZA[None, :, None, :]
AZZA = AZZA + AZZA.swapaxes(1, 2)
AZZA_2 = AZZA/2.
#Using weave, we can exploit the symmetry of this problem:
code = """
int n, m, mm,q,qq;
double factor,tmp;
#pragma omp parallel for private(m,mm,q,qq,factor,tmp)
for(n=0;n<N;n++){
for(m=0;m<num_inducing;m++){
for(mm=0;mm<=m;mm++){
//add in a factor of 2 for the off-diagonal terms (and then count them only once)
if(m==mm)
factor = dL_dpsi2(n,m,mm);
else
factor = 2.0*dL_dpsi2(n,m,mm);
for(q=0;q<input_dim;q++){
//take the dot product of mu[n,:] and AZZA[:,m,mm,q] TODO: blas!
tmp = 0.0;
for(qq=0;qq<input_dim;qq++){
tmp += mu(n,qq)*AZZA(qq,m,mm,q);
}
target_mu(n,q) += factor*tmp;
target_S(n,q) += factor*AZZA_2(q,m,mm,q);
}
}
}
}
"""
support_code = """
#include <omp.h>
#include <math.h>
"""
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
'extra_link_args' : ['-lgomp']}
mu = pv.mean
N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu)
weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
type_converters=weave.converters.blitz,**weave_options)
def _weave_dpsi2_dZ(self, dL_dpsi2, Z, pv, target):
AZA = self.variances*self._ZAinner(pv, Z)
code="""
int n,m,mm,q;
#pragma omp parallel for private(n,mm,q)
for(m=0;m<num_inducing;m++){
for(q=0;q<input_dim;q++){
for(mm=0;mm<num_inducing;mm++){
for(n=0;n<N;n++){
target(m,q) += 2*dL_dpsi2(n,m,mm)*AZA(n,mm,q);
}
}
}
}
"""
support_code = """
#include <omp.h>
#include <math.h>
"""
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
'extra_link_args' : ['-lgomp']}
N,num_inducing,input_dim = pv.mean.shape[0],Z.shape[0],pv.mean.shape[1]
mu = param_to_array(pv.mean)
weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'],
type_converters=weave.converters.blitz,**weave_options)
def _mu2S(self, pv):
return np.square(pv.mean) + pv.variance
def _ZAinner(self, pv, Z):
ZA = Z*self.variances
inner = (pv.mean[:, None, :] * pv.mean[:, :, None])
diag_indices = np.diag_indices(pv.mean.shape[1], 2)
inner[:, diag_indices[0], diag_indices[1]] += pv.variance
return np.dot(ZA, inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x N x input_dim]!

65
GPy/kern/_src/prod.py Normal file
View file

@ -0,0 +1,65 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
import numpy as np
class Prod(Kern):
"""
Computes the product of 2 kernels
:param k1, k2: the kernels to multiply
:type k1, k2: Kern
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
:type tensor: Boolean
:rtype: kernel object
"""
def __init__(self, k1, k2, tensor=False):
if tensor:
super(Prod, self).__init__(k1.input_dim + k2.input_dim, k1.name + '_xx_' + k2.name)
self.slice1 = slice(0,k1.input_dim)
self.slice2 = slice(k1.input_dim,k1.input_dim+k2.input_dim)
else:
assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to multiply don't have the same dimension."
super(Prod, self).__init__(k1.input_dim, k1.name + '_x_' + k2.name)
self.slice1 = slice(0, self.input_dim)
self.slice2 = slice(0, self.input_dim)
self.k1 = k1
self.k2 = k2
self.add_parameters(self.k1, self.k2)
def K(self, X, X2=None):
if X2 is None:
return self.k1.K(X[:,self.slice1], None) * self.k2.K(X[:,self.slice2], None)
else:
return self.k1.K(X[:,self.slice1], X2[:,self.slice1]) * self.k2.K(X[:,self.slice2], X2[:,self.slice2])
def Kdiag(self, X):
return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2])
def update_gradients_full(self, dL_dK, X):
self.k1.update_gradients_full(dL_dK*self.k2(X[:,self.slice2]), X[:,self.slice1])
self.k2.update_gradients_full(dL_dK*self.k1(X[:,self.slice1]), X[:,self.slice2])
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
self.k1.update_gradients_sparse(dL_dKmm * self.k2.K(Z[:,self.slice2]), dL_dKnm * self.k2(X[:,self.slice2], Z[:,self.slice2]), dL_dKdiag * self.k2.Kdiag(X[:,self.slice2]), X[:,self.slice1], Z[:,self.slice1] )
self.k2.update_gradients_sparse(dL_dKmm * self.k1.K(Z[:,self.slice1]), dL_dKnm * self.k1(X[:,self.slice1], Z[:,self.slice1]), dL_dKdiag * self.k1.Kdiag(X[:,self.slice1]), X[:,self.slice2], Z[:,self.slice2] )
def gradients_X(self, dL_dK, X, X2=None):
target = np.zeros(X.shape)
if X2 is None:
target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2(X[:,self.slice2]), X[:,self.slice1], None)
target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1(X[:,self.slice1]), X[:,self.slice2], None)
else:
target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2(X[:,self.slice2], X2[:,self.slice2]), X[:,self.slice1], X2[:,self.slice1])
target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1(X[:,self.slice1], X2[:,self.slice1]), X[:,self.slice2], X2[:,self.slice2])
return target
def gradients_X_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape)
target[:,self.slice1] = self.k1.gradients_X(dL_dKdiag*self.k2.Kdiag(X[:,self.slice2]), X[:,self.slice1])
target[:,self.slice2] += self.k2.gradients_X(dL_dKdiag*self.k1.Kdiag(X[:,self.slice1]), X[:,self.slice2])
return target

View file

@ -4,13 +4,13 @@
import numpy as np import numpy as np
from scipy import weave from scipy import weave
from kernpart import Kernpart from kern import Kern
from ...util.linalg import tdot from ...util.linalg import tdot
from ...util.misc import fast_array_equal, param_to_array from ...util.misc import fast_array_equal, param_to_array
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
class RBF(Kernpart): class RBF(Kern):
""" """
Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel: Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel:
@ -60,22 +60,8 @@ class RBF(Kernpart):
self.add_parameters(self.variance, self.lengthscale) self.add_parameters(self.variance, self.lengthscale)
self.parameters_changed() # initializes cache self.parameters_changed() # initializes cache
#self.update_inv_lengthscale(self.lengthscale)
#self.parameters_changed()
# initialize cache
#self._Z, self._mu, self._S = np.empty(shape=(3, 1))
#self._X, self._X2, self._params_save = np.empty(shape=(3, 1))
# a set of optional args to pass to weave
# self.weave_options = {'headers' : ['<omp.h>'],
# 'extra_compile_args': ['-fopenmp -O3'], # -march=native'],
# 'extra_link_args' : ['-lgomp']}
self.weave_options = {} self.weave_options = {}
def on_input_change(self, X):
#self._K_computations(X, None)
pass
def update_lengthscale(self, l): def update_lengthscale(self, l):
self.lengthscale2 = np.square(self.lengthscale) self.lengthscale2 = np.square(self.lengthscale)
@ -84,23 +70,32 @@ class RBF(Kernpart):
self._X, self._X2 = np.empty(shape=(2, 1)) self._X, self._X2 = np.empty(shape=(2, 1))
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
def K(self, X, X2, target): def K(self, X, X2=None):
self._K_computations(X, X2) self._K_computations(X, X2)
target += self.variance * self._K_dvar return self.variance * self._K_dvar
def Kdiag(self, X, target): def Kdiag(self, X):
np.add(target, self.variance, target) ret = np.ones(X.shape[0])
ret[:] = self.variance
return ret
def psi0(self, Z, mu, S, target): def psi0(self, Z, posterior_variational):
target += self.variance mu = posterior_variational.mean
ret = np.empty(mu.shape[0], dtype=np.float64)
ret[:] = self.variance
return ret
def psi1(self, Z, mu, S, target): def psi1(self, Z, posterior_variational):
mu = posterior_variational.mean
S = posterior_variational.variance
self._psi_computations(Z, mu, S) self._psi_computations(Z, mu, S)
target += self._psi1 return self._psi1
def psi2(self, Z, mu, S, target): def psi2(self, Z, posterior_variational):
mu = posterior_variational.mean
S = posterior_variational.variance
self._psi_computations(Z, mu, S) self._psi_computations(Z, mu, S)
target += self._psi2 return self._psi2
def update_gradients_full(self, dL_dK, X): def update_gradients_full(self, dL_dK, X):
self._K_computations(X, None) self._K_computations(X, None)
@ -131,7 +126,9 @@ class RBF(Kernpart):
else: else:
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm) self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational):
mu = posterior_variational.mean
S = posterior_variational.variance
self._psi_computations(Z, mu, S) self._psi_computations(Z, mu, S)
#contributions from psi0: #contributions from psi0:
@ -165,7 +162,43 @@ class RBF(Kernpart):
else: else:
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm) self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
def gradients_X(self, dL_dK, X, X2, target): def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational):
mu = posterior_variational.mean
S = posterior_variational.variance
self._psi_computations(Z, mu, S)
#psi1
denominator = (self.lengthscale2 * (self._psi1_denom))
dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator))
grad = np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
#psi2
term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim
term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim
dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
grad += 2*(dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
grad += self.gradients_X(dL_dKmm, Z, None)
return grad
def update_gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational):
mu = posterior_variational.mean
S = posterior_variational.variance
self._psi_computations(Z, mu, S)
#psi1
tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom
grad_mu = np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
grad_S = np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
#psi2
tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom
grad_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
grad_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
posterior_variational.mean.gradient = grad_mu
posterior_variational.variance.gradient = grad_S
def gradients_X(self, dL_dK, X, X2=None):
#if self._X is None or X.base is not self._X.base or X2 is not None: #if self._X is None or X.base is not self._X.base or X2 is not None:
self._K_computations(X, X2) self._K_computations(X, X2)
if X2 is None: if X2 is None:
@ -173,44 +206,15 @@ class RBF(Kernpart):
else: else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena. _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
gradients_X = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2)) gradients_X = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0) return np.sum(gradients_X * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target): def dKdiag_dX(self, dL_dKdiag, X):
pass return np.zeros(X.shape[0])
#---------------------------------------# #---------------------------------------#
# PSI statistics # # PSI statistics #
#---------------------------------------# #---------------------------------------#
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
pass
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
self._psi_computations(Z, mu, S)
denominator = (self.lengthscale2 * (self._psi1_denom))
dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator))
target += np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
self._psi_computations(Z, mu, S)
tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom
target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
self._psi_computations(Z, mu, S)
term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim
term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim
dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
target += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
"""Think N,num_inducing,num_inducing,input_dim """
self._psi_computations(Z, mu, S)
tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom
target_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
target_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
#---------------------------------------# #---------------------------------------#
# Precomputations # # Precomputations #
#---------------------------------------# #---------------------------------------#
@ -373,6 +377,7 @@ class RBF(Kernpart):
#include <omp.h> #include <omp.h>
#include <math.h> #include <math.h>
""" """
mu = param_to_array(mu)
weave.inline(code, support_code=support_code, libraries=['gomp'], weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'], arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'],
type_converters=weave.converters.blitz, **self.weave_options) type_converters=weave.converters.blitz, **self.weave_options)

211
GPy/kern/_src/stationary.py Normal file
View file

@ -0,0 +1,211 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
from ... import util
import numpy as np
from scipy import integrate
class Stationary(Kern):
def __init__(self, input_dim, variance, lengthscale, ARD, name):
super(Stationary, self).__init__(input_dim, name)
self.ARD = ARD
if not ARD:
if lengthscale is None:
lengthscale = np.ones(1)
else:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == 1, "Only lengthscale needed for non-ARD kernel"
else:
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size in [1, input_dim], "Bad lengthscales"
if lengthscale.size != input_dim:
lengthscale = np.ones(input_dim)*lengthscale
else:
lengthscale = np.ones(self.input_dim)
self.lengthscale = Param('lengthscale', lengthscale, Logexp())
self.variance = Param('variance', variance, Logexp())
assert self.variance.size==1
self.add_parameters(self.variance, self.lengthscale)
def _dist(self, X, X2):
if X2 is None:
X2 = X
return X[:, None, :] - X2[None, :, :]
def _scaled_dist(self, X, X2=None):
return np.sqrt(np.sum(np.square(self._dist(X, X2) / self.lengthscale), -1))
def Kdiag(self, X):
ret = np.empty(X.shape[0])
ret[:] = self.variance
return ret
def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = np.sum(dL_dKdiag)
self.lengthscale.gradient = 0.
def update_gradients_full(self, dL_dK, X, X2=None):
K = self.K(X, X2)
self.variance.gradient = np.sum(K * dL_dK)/self.variance
rinv = self._inv_dist(X, X2)
dL_dr = self.dK_dr(X, X2) * dL_dK
x_xl3 = np.square(self._dist(X, X2)) / self.lengthscale**3
if self.ARD:
self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)
else:
self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum()
def _inv_dist(self, X, X2=None):
dist = self._scaled_dist(X, X2)
if X2 is None:
nondiag = util.diag.offdiag_view(dist)
nondiag[:] = 1./nondiag
return dist
else:
return 1./np.where(dist != 0., dist, np.inf)
def gradients_X(self, dL_dK, X, X2=None):
dL_dr = self.dK_dr(X, X2) * dL_dK
invdist = self._inv_dist(X, X2)
ret = np.sum((invdist*dL_dr)[:,:,None]*self._dist(X, X2),1)/self.lengthscale**2
if X2 is None:
ret *= 2.
return ret
def gradients_X_diag(self, dL_dKdiag, X):
return np.zeros(X.shape)
class Exponential(Stationary):
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Exponential'):
super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, name)
def K(self, X, X2=None):
dist = self._scaled_dist(X, X2)
return self.variance * np.exp(-0.5 * dist)
def dK_dr(self, X, X2):
return -0.5*self.K(X, X2)
class Matern32(Stationary):
"""
Matern 3/2 kernel:
.. math::
k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Mat32'):
super(Matern32, self).__init__(input_dim, variance, lengthscale, ARD, name)
def K(self, X, X2=None):
dist = self._scaled_dist(X, X2)
return self.variance * (1. + np.sqrt(3.) * dist) * np.exp(-np.sqrt(3.) * dist)
def dK_dr(self, X, X2):
dist = self._scaled_dist(X, X2)
return -3.*self.variance*dist*np.exp(-np.sqrt(3.)*dist)
def Gram_matrix(self, F, F1, F2, lower, upper):
"""
Return the Gram matrix of the vector of functions F with respect to the
RKHS norm. The use of this function is limited to input_dim=1.
:param F: vector of functions
:type F: np.array
:param F1: vector of derivatives of F
:type F1: np.array
:param F2: vector of second derivatives of F
:type F2: np.array
:param lower,upper: boundaries of the input domain
:type lower,upper: floats
"""
assert self.input_dim == 1
def L(x, i):
return(3. / self.lengthscale ** 2 * F[i](x) + 2 * np.sqrt(3) / self.lengthscale * F1[i](x) + F2[i](x))
n = F.shape[0]
G = np.zeros((n, n))
for i in range(n):
for j in range(i, n):
G[i, j] = G[j, i] = integrate.quad(lambda x : L(x, i) * L(x, j), lower, upper)[0]
Flower = np.array([f(lower) for f in F])[:, None]
F1lower = np.array([f(lower) for f in F1])[:, None]
return(self.lengthscale ** 3 / (12.*np.sqrt(3) * self.variance) * G + 1. / self.variance * np.dot(Flower, Flower.T) + self.lengthscale ** 2 / (3.*self.variance) * np.dot(F1lower, F1lower.T))
class Matern52(Stationary):
"""
Matern 5/2 kernel:
.. math::
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def K(self, X, X2=None):
r = self._scaled_dist(X, X2)
return self.variance*(1+np.sqrt(5.)*r+5./3*r**2)*np.exp(-np.sqrt(5.)*r)
def dK_dr(self, X, X2):
r = self._scaled_dist(X, X2)
return self.variance*(10./3*r -5.*r -5.*np.sqrt(5.)/3*r**2)*np.exp(-np.sqrt(5.)*r)
def Gram_matrix(self,F,F1,F2,F3,lower,upper):
"""
Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1.
:param F: vector of functions
:type F: np.array
:param F1: vector of derivatives of F
:type F1: np.array
:param F2: vector of second derivatives of F
:type F2: np.array
:param F3: vector of third derivatives of F
:type F3: np.array
:param lower,upper: boundaries of the input domain
:type lower,upper: floats
"""
assert self.input_dim == 1
def L(x,i):
return(5*np.sqrt(5)/self.lengthscale**3*F[i](x) + 15./self.lengthscale**2*F1[i](x)+ 3*np.sqrt(5)/self.lengthscale*F2[i](x) + F3[i](x))
n = F.shape[0]
G = np.zeros((n,n))
for i in range(n):
for j in range(i,n):
G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0]
G_coef = 3.*self.lengthscale**5/(400*np.sqrt(5))
Flower = np.array([f(lower) for f in F])[:,None]
F1lower = np.array([f(lower) for f in F1])[:,None]
F2lower = np.array([f(lower) for f in F2])[:,None]
orig = 9./8*np.dot(Flower,Flower.T) + 9.*self.lengthscale**4/200*np.dot(F2lower,F2lower.T)
orig2 = 3./5*self.lengthscale**2 * ( np.dot(F1lower,F1lower.T) + 1./8*np.dot(Flower,F2lower.T) + 1./8*np.dot(F2lower,Flower.T))
return(1./self.variance* (G_coef*G + orig + orig2))
class ExpQuad(Stationary):
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='ExpQuad'):
super(ExpQuad, self).__init__(input_dim, variance, lengthscale, ARD, name)
def K(self, X, X2=None):
r = self._scaled_dist(X, X2)
return self.variance * np.exp(-0.5 * r**2)
def dK_dr(self, X, X2):
dist = self._scaled_dist(X, X2)
return -dist*self.K(X, X2)

563
GPy/kern/_src/sympykern.py Normal file
View file

@ -0,0 +1,563 @@
# Check Matthew Rocklin's blog post.
try:
import sympy as sp
sympy_available=True
except ImportError:
sympy_available=False
exit()
from sympy.core.cache import clear_cache
from sympy.utilities.codegen import codegen
try:
from scipy import weave
weave_available = True
except ImportError:
weave_available = False
import os
current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
import sys
import numpy as np
import re
import tempfile
import pdb
import ast
from kernpart import Kernpart
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
# TODO have this set up in a set up file!
user_code_storage = tempfile.gettempdir()
class spkern(Kernpart):
"""
A kernel object, where all the hard work in done by sympy.
:param k: the covariance function
:type k: a positive definite sympy function of x_0, z_0, x_1, z_1, x_2, z_2...
To construct a new sympy kernel, you'll need to define:
- a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z).
- that's it! we'll extract the variables from the function k.
Note:
- to handle multiple inputs, call them x_1, z_1, etc
- to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j.
"""
def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None):
if name is None:
name='sympykern'
if k is None:
raise ValueError, "You must provide an argument for the covariance function."
super(spkern, self).__init__(input_dim, name)
self._sp_k = k
# pull the variable names out of the symbolic covariance function.
sp_vars = [e for e in k.atoms() if e.is_Symbol]
self._sp_x= sorted([e for e in sp_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:]))
self._sp_z= sorted([e for e in sp_vars if e.name[0:2]=='z_'],key=lambda z:int(z.name[2:]))
# Check that variable names make sense.
assert all([x.name=='x_%i'%i for i,x in enumerate(self._sp_x)])
assert all([z.name=='z_%i'%i for i,z in enumerate(self._sp_z)])
assert len(self._sp_x)==len(self._sp_z)
x_dim=len(self._sp_x)
# If it is a multi-output covariance, add an input for indexing the outputs.
self._real_input_dim = x_dim
# Check input dim is number of xs + 1 if output_dim is >1
assert self.input_dim == x_dim + int(output_dim > 1)
self.output_dim = output_dim
# extract parameter names from the covariance
thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name)
# Look for parameters with index (subscripts), they are associated with different outputs.
if self.output_dim>1:
self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name)
self._sp_theta_j = sorted([e for e in thetas if (e.name[-2:]=='_j')], key=lambda e:e.name)
# Make sure parameter appears with both indices!
assert len(self._sp_theta_i)==len(self._sp_theta_j)
assert all([theta_i.name[:-2]==theta_j.name[:-2] for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j)])
# Extract names of shared parameters (those without a subscript)
self._sp_theta = [theta for theta in thetas if theta not in self._sp_theta_i and theta not in self._sp_theta_j]
self.num_split_params = len(self._sp_theta_i)
self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sp_theta_i]
for theta in self._split_theta_names:
setattr(self, theta, Param(theta, np.ones(self.output_dim), None))
self.add_parameters(getattr(self, theta))
#setattr(self, theta, np.ones(self.output_dim))
self.num_shared_params = len(self._sp_theta)
#self.num_params = self.num_shared_params+self.num_split_params*self.output_dim
else:
self.num_split_params = 0
self._split_theta_names = []
self._sp_theta = thetas
self.num_shared_params = len(self._sp_theta)
#self.num_params = self.num_shared_params
# Add parameters to the model.
for theta in self._sp_theta:
val = 1.0
if param is not None:
if param.has_key(theta):
val = param[theta]
#setattr(self, theta.name, val)
setattr(self, theta.name, Param(theta.name, val, None))
self.add_parameters(getattr(self, theta.name))
#deal with param
#self._set_params(self._get_params())
# Differentiate with respect to parameters.
self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta]
if self.output_dim > 1:
self._sp_dk_dtheta_i = [sp.diff(k,theta).simplify() for theta in self._sp_theta_i]
# differentiate with respect to input variables.
self._sp_dk_dx = [sp.diff(k,xi).simplify() for xi in self._sp_x]
# psi_stats aren't yet implemented.
if False:
self.compute_psi_stats()
self._code = {}
# generate the code for the covariance functions
self._gen_code()
if weave_available:
if False:
extra_compile_args = ['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5']
else:
extra_compile_args = []
self.weave_kwargs = {
'support_code': None, #self._function_code,
'include_dirs':[user_code_storage, os.path.join(current_dir,'parts/')],
'headers':['"sympy_helpers.h"', '"'+self.name+'.h"'],
'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp"), os.path.join(user_code_storage, self.name+'.cpp')],
'extra_compile_args':extra_compile_args,
'extra_link_args':['-lgomp'],
'verbose':True}
self.parameters_changed() # initializes caches
def __add__(self,other):
return spkern(self._sp_k+other._sp_k)
def _gen_code(self):
argument_sequence = self._sp_x+self._sp_z+self._sp_theta
code_list = [('k',self._sp_k)]
# gradients with respect to covariance input
code_list += [('dk_d%s'%x.name,dx) for x,dx in zip(self._sp_x,self._sp_dk_dx)]
# gradient with respect to parameters
code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta,self._sp_dk_dtheta)]
# gradient with respect to multiple output parameters
if self.output_dim > 1:
argument_sequence += self._sp_theta_i + self._sp_theta_j
code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta_i,self._sp_dk_dtheta_i)]
# generate c functions from sympy objects
if weave_available:
code_type = "C"
else:
code_type = "PYTHON"
# Need to add the sympy_helpers header in here.
(foo_c,self._function_code), (foo_h,self._function_header) = \
codegen(code_list,
code_type,
self.name,
argument_sequence=argument_sequence)
# Use weave to compute the underlying functions.
if weave_available:
# put the header file where we can find it
f = file(os.path.join(user_code_storage, self.name + '.h'),'w')
f.write(self._function_header)
f.close()
if weave_available:
# Substitute any known derivatives which sympy doesn't compute
self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code)
# put the cpp file in user code storage (defaults to temp file location)
f = file(os.path.join(user_code_storage, self.name + '.cpp'),'w')
else:
# put the python file in user code storage
f = file(os.path.join(user_code_storage, self.name + '.py'),'w')
f.write(self._function_code)
f.close()
if weave_available:
# arg_list will store the arguments required for the C code.
input_arg_list = (["X2(i, %s)"%x.name[2:] for x in self._sp_x]
+ ["Z2(j, %s)"%z.name[2:] for z in self._sp_z])
# for multiple outputs reverse argument list is also required
if self.output_dim>1:
reverse_input_arg_list = list(input_arg_list)
reverse_input_arg_list.reverse()
# This gives the parameters for the arg list.
param_arg_list = [shared_params.name for shared_params in self._sp_theta]
arg_list = input_arg_list + param_arg_list
precompute_list=[]
if self.output_dim > 1:
reverse_arg_list= reverse_input_arg_list + list(param_arg_list)
# For multiple outputs, also need the split parameters.
split_param_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['ii', 'jj'] for theta in self._sp_theta_i]
split_param_reverse_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['jj', 'ii'] for theta in self._sp_theta_i]
arg_list += split_param_arg_list
reverse_arg_list += split_param_reverse_arg_list
# Extract the right output indices from the inputs.
c_define_output_indices = [' '*16 + "int %s=(int)%s(%s, %i);"%(index, var, index2, self.input_dim-1) for index, var, index2 in zip(['ii', 'jj'], ['X2', 'Z2'], ['i', 'j'])]
precompute_list += c_define_output_indices
reverse_arg_string = ", ".join(reverse_arg_list)
arg_string = ", ".join(arg_list)
precompute_string = "\n".join(precompute_list)
# Now we use the arguments in code that computes the separate parts.
# Any precomputations will be done here eventually.
self._precompute = \
"""
// Precompute code would go here. It will be called when parameters are updated.
"""
# Here's the code to do the looping for K
self._code['K'] =\
"""
// _K_code
// Code for computing the covariance function.
int i;
int j;
int n = target_array->dimensions[0];
int num_inducing = target_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<n;i++){
for (j=0;j<num_inducing;j++){
%s
//target[i*num_inducing+j] =
TARGET2(i, j) += k(%s);
}
}
%s
"""%(precompute_string,arg_string,"/*"+str(self._sp_k)+"*/")
# adding a string representation of the function in the
# comment forces recompile when needed
self._code['K_X'] = self._code['K'].replace('Z2(', 'X2(')
# Code to compute diagonal of covariance.
diag_arg_string = re.sub('Z','X',arg_string)
diag_arg_string = re.sub('int jj','//int jj',diag_arg_string)
diag_arg_string = re.sub('j','i',diag_arg_string)
diag_precompute_string = re.sub('int jj','//int jj',precompute_string)
diag_precompute_string = re.sub('Z','X',diag_precompute_string)
diag_precompute_string = re.sub('j','i',diag_precompute_string)
# Code to do the looping for Kdiag
self._code['Kdiag'] =\
"""
// _code['Kdiag']
// Code for computing diagonal of covariance function.
int i;
int n = target_array->dimensions[0];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for
for (i=0;i<n;i++){
%s
//target[i] =
TARGET1(i)=k(%s);
}
%s
"""%(diag_precompute_string,diag_arg_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
# Code to compute gradients
if self.output_dim>1:
for i, theta in enumerate(self._sp_theta_i):
grad_func_list = [' '*26 + 'TARGET1(ii) += PARTIAL2(i, j)*dk_d%s(%s);'%(theta.name, arg_string)]
grad_func_list += [' '*26 + 'TARGET1(jj) += PARTIAL2(i, j)*dk_d%s(%s);'%(theta.name, reverse_arg_string)]
grad_func_list = c_define_output_indices+grad_func_list
grad_func_string = '\n'.join(grad_func_list)
self._code['dK_d' + theta.name] =\
"""
int i;
int j;
int n = partial_array->dimensions[0];
int num_inducing = partial_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<n;i++){
for (j=0;j<num_inducing;j++){
%s
}
}
%s
"""%(grad_func_string,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
self._code['dK_d' +theta.name + '_X'] = self._code['dK_d' + theta.name].replace('Z2(', 'X2(')
# Code to compute gradients for Kdiag TODO: needs clean up
diag_grad_func_string = re.sub('Z','X',grad_func_string,count=0)
diag_grad_func_string = re.sub('int jj','//int jj',diag_grad_func_string)
diag_grad_func_string = re.sub('j','i',diag_grad_func_string)
diag_grad_func_string = re.sub('PARTIAL2\(i, i\)','PARTIAL(i)',diag_grad_func_string)
self._code['dKdiag_d' + theta.name] =\
"""
// _dKdiag_dtheta_code
// Code for computing gradient of diagonal with respect to parameters.
int i;
int n = partial_array->dimensions[0];
int input_dim = X_array->dimensions[1];
for (i=0;i<n;i++){
%s
}
%s
"""%(diag_grad_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
for i, theta in enumerate(self._sp_theta):
grad_func_list = [' '*26 + 'TARGET1(%i) += PARTIAL2(i, j)*dk_d%s(%s);'%(i,theta.name,arg_string)]
grad_func_string = '\n'.join(grad_func_list)
self._code['dK_d' + theta.name] =\
"""
// _dK_dtheta_code
// Code for computing gradient of covariance with respect to parameters.
int i;
int j;
int n = partial_array->dimensions[0];
int num_inducing = partial_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<n;i++){
for (j=0;j<num_inducing;j++){
%s
}
}
%s
"""%(grad_func_string,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
self._code['dK_d' + theta.name +'_X'] = self._code['dK_d' + theta.name].replace('Z2(', 'X2(')
# Code to compute gradients for Kdiag TODO: needs clean up
diag_grad_func_string = re.sub('Z','X',grad_func_string,count=0)
diag_grad_func_string = re.sub('int jj','//int jj',diag_grad_func_string)
diag_grad_func_string = re.sub('j','i',diag_grad_func_string)
diag_grad_func_string = re.sub('PARTIAL2\(i, i\)','PARTIAL(i)',diag_grad_func_string)
self._code['dKdiag_d' + theta.name] =\
"""
// _dKdiag_dtheta_code
// Code for computing gradient of diagonal with respect to parameters.
int i;
int n = partial_array->dimensions[0];
int input_dim = X_array->dimensions[1];
for (i=0;i<n;i++){
%s
}
%s
"""%(diag_grad_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
# Code for gradients wrt X, TODO: may need to deal with special case where one input is actually an output.
gradX_func_list = []
if self.output_dim>1:
gradX_func_list += c_define_output_indices
gradX_func_list += ["TARGET2(i, %i) += partial[i*num_inducing+j]*dk_dx_%i(%s);"%(q,q,arg_string) for q in range(self._real_input_dim)]
gradX_func_string = "\n".join(gradX_func_list)
self._code['dK_dX'] = \
"""
// _dK_dX_code
// Code for computing gradient of covariance with respect to inputs.
int i;
int j;
int n = partial_array->dimensions[0];
int num_inducing = partial_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<n; i++){
for (j=0; j<num_inducing; j++){
%s
}
}
%s
"""%(gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
self._code['dK_dX_X'] = self._code['dK_dX'].replace('Z2(', 'X2(')
diag_gradX_func_string = re.sub('Z','X',gradX_func_string,count=0)
diag_gradX_func_string = re.sub('int jj','//int jj',diag_gradX_func_string)
diag_gradX_func_string = re.sub('j','i',diag_gradX_func_string)
diag_gradX_func_string = re.sub('PARTIAL2\(i\, i\)','2*PARTIAL(i)',diag_gradX_func_string)
# Code for gradients of Kdiag wrt X
self._code['dKdiag_dX'] = \
"""
// _dKdiag_dX_code
// Code for computing gradient of diagonal with respect to inputs.
int n = partial_array->dimensions[0];
int input_dim = X_array->dimensions[1];
for (int i=0;i<n; i++){
%s
}
%s
"""%(diag_gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a
# string representation forces recompile when needed Get rid
# of Zs in argument for diagonal. TODO: Why wasn't
# diag_func_string called here? Need to check that.
#TODO: insert multiple functions here via string manipulation
#TODO: similar functions for psi_stats
#TODO: similar functions when cython available.
#TODO: similar functions when only python available.
def _get_arg_names(self, target=None, Z=None, partial=None):
arg_names = ['X']
if target is not None:
arg_names += ['target']
for shared_params in self._sp_theta:
arg_names += [shared_params.name]
if Z is not None:
arg_names += ['Z']
if partial is not None:
arg_names += ['partial']
if self.output_dim>1:
arg_names += self._split_theta_names
arg_names += ['output_dim']
return arg_names
def _generate_inline(self, code, X, target=None, Z=None, partial=None):
output_dim = self.output_dim
# Need to extract parameters to local variables first
for shared_params in self._sp_theta:
locals()[shared_params.name] = getattr(self, shared_params.name)
for split_params in self._split_theta_names:
locals()[split_params] = np.asarray(getattr(self, split_params))
arg_names = self._get_arg_names(target, Z, partial)
if weave_available:
return weave.inline(code=code, arg_names=arg_names,**self.weave_kwargs)
else:
raise RuntimeError('Weave not available and other variants of sympy covariance not yet implemented')
def K(self,X,Z,target):
if Z is None:
self._generate_inline(self._code['K_X'], X, target)
else:
self._generate_inline(self._code['K'], X, target, Z)
def Kdiag(self,X,target):
self._generate_inline(self._code['Kdiag'], X, target)
def _param_grad_helper(self,partial,X,Z,target):
if Z is None:
self._generate_inline(self._code['dK_dtheta_X'], X, target, Z, partial)
else:
self._generate_inline(self._code['dK_dtheta'], X, target, Z, partial)
def dKdiag_dtheta(self,partial,X,target):
self._generate_inline(self._code['dKdiag_dtheta'], X, target, Z=None, partial=partial).namelocals()[shared_params.name] = getattr(self, shared_params.name)
def gradients_X(self,partial,X,Z,target):
if Z is None:
self._generate_inline(self._code['dK_dX_X'], X, target, Z, partial)
else:
self._generate_inline(self._code['dK_dX'], X, target, Z, partial)
def dKdiag_dX(self,partial,X,target):
self._generate_inline(self._code['dKdiag_dX'], X, target, Z, partial)
def compute_psi_stats(self):
#define some normal distributions
mus = [sp.var('mu_%i'%i,real=True) for i in range(self.input_dim)]
Ss = [sp.var('S_%i'%i,positive=True) for i in range(self.input_dim)]
normals = [(2*sp.pi*Si)**(-0.5)*sp.exp(-0.5*(xi-mui)**2/Si) for xi, mui, Si in zip(self._sp_x, mus, Ss)]
#do some integration!
#self._sp_psi0 = ??
self._sp_psi1 = self._sp_k
for i in range(self.input_dim):
print 'perfoming integrals %i of %i'%(i+1,2*self.input_dim)
sys.stdout.flush()
self._sp_psi1 *= normals[i]
self._sp_psi1 = sp.integrate(self._sp_psi1,(self._sp_x[i],-sp.oo,sp.oo))
clear_cache()
self._sp_psi1 = self._sp_psi1.simplify()
#and here's psi2 (eek!)
zprime = [sp.Symbol('zp%i'%i) for i in range(self.input_dim)]
self._sp_psi2 = self._sp_k.copy()*self._sp_k.copy().subs(zip(self._sp_z,zprime))
for i in range(self.input_dim):
print 'perfoming integrals %i of %i'%(self.input_dim+i+1,2*self.input_dim)
sys.stdout.flush()
self._sp_psi2 *= normals[i]
self._sp_psi2 = sp.integrate(self._sp_psi2,(self._sp_x[i],-sp.oo,sp.oo))
clear_cache()
self._sp_psi2 = self._sp_psi2.simplify()
def parameters_changed(self):
# Reset the caches
self._cache, self._cache2 = np.empty(shape=(2, 1))
self._cache3, self._cache4, self._cache5 = np.empty(shape=(3, 1))
def update_gradients_full(self, dL_dK, X):
# Need to extract parameters to local variables first
self._K_computations(X, None)
for shared_params in self._sp_theta:
parameter = getattr(self, shared_params.name)
code = self._code['dK_d' + shared_params.name]
setattr(parameter, 'gradient', self._generate_inline(code, X, target=None, Z=None, partial=dL_dK))
for split_params in self._split_theta_names:
parameter = getattr(self, split_params.name)
code = self._code['dK_d' + split_params.name]
setattr(parameter, 'gradient', self._generate_inline(code, X, target=None, Z=None, partial=dL_dK))
# def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
# #contributions from Kdiag
# self.variance.gradient = np.sum(dL_dKdiag)
# #from Knm
# self._K_computations(X, Z)
# self.variance.gradient += np.sum(dL_dKnm * self._K_dvar)
# if self.ARD:
# self.lengthscale.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z)
# else:
# self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKnm)
# #from Kmm
# self._K_computations(Z, None)
# self.variance.gradient += np.sum(dL_dKmm * self._K_dvar)
# if self.ARD:
# self.lengthscale.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
# else:
# self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
#---------------------------------------#
# Precomputations #
#---------------------------------------#
def _K_computations(self, X, Z):
if Z is None:
self._generate_inline(self._precompute, X)
else:
self._generate_inline(self._precompute, X, Z=Z)

View file

@ -1,12 +1,12 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart from kern import Kern
import numpy as np import numpy as np
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
class White(Kernpart): class White(Kern):
""" """
White noise kernel. White noise kernel.
@ -20,14 +20,17 @@ class White(Kernpart):
self.input_dim = input_dim self.input_dim = input_dim
self.variance = Param('variance', variance, Logexp()) self.variance = Param('variance', variance, Logexp())
self.add_parameters(self.variance) self.add_parameters(self.variance)
self._psi1 = 0 # TODO: more elegance here
def K(self,X,X2,target): def K(self, X, X2=None):
if X2 is None: if X2 is None:
target += np.eye(X.shape[0])*self.variance return np.eye(X.shape[0])*self.variance
else:
return np.zeros((X.shape[0], X2.shape[0]))
def Kdiag(self,X,target): def Kdiag(self,X):
target += self.variance ret = np.ones(X.shape[0])
ret[:] = self.variance
return ret
def update_gradients_full(self, dL_dK, X): def update_gradients_full(self, dL_dK, X):
self.variance.gradient = np.trace(dL_dK) self.variance.gradient = np.trace(dL_dK)
@ -38,14 +41,8 @@ class White(Kernpart):
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
raise NotImplementedError raise NotImplementedError
def dKdiag_dtheta(self,dL_dKdiag,X,target): def gradients_X(self,dL_dK,X,X2):
target += np.sum(dL_dKdiag) return np.zeros_like(X)
def gradients_X(self,dL_dK,X,X2,target):
pass
def dKdiag_dX(self,dL_dKdiag,X,target):
pass
def psi0(self,Z,mu,S,target): def psi0(self,Z,mu,S,target):
pass # target += self.variance pass # target += self.variance

View file

@ -1,680 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import sys
import numpy as np
import itertools
from parts.prod import Prod as prod
from parts.linear import Linear
from parts.kernpart import Kernpart
from ..core.parameterization import Parameterized
from GPy.core.parameterization.param import Param
class kern(Parameterized):
def __init__(self, input_dim, parts=[], input_slices=None):
"""
This is the main kernel class for GPy. It handles multiple
(additive) kernel functions, and keeps track of various things
like which parameters live where.
The technical code for kernels is divided into _parts_ (see
e.g. rbf.py). This object contains a list of parts, which are
computed additively. For multiplication, special _prod_ parts
are used.
:param input_dim: The dimensionality of the kernel's input space
:type input_dim: int
:param parts: the 'parts' (PD functions) of the kernel
:type parts: list of Kernpart objects
:param input_slices: the slices on the inputs which apply to each kernel
:type input_slices: list of slice objects, or list of bools
"""
super(kern, self).__init__('kern')
self.add_parameters(*parts)
self.input_dim = input_dim
if input_slices is None:
self.input_slices = [slice(None) for p in self._parameters_]
else:
assert len(input_slices) == len(self._parameters_)
self.input_slices = [sl if type(sl) is slice else slice(None) for sl in input_slices]
for p in self._parameters_:
assert isinstance(p, Kernpart), "bad kernel part"
def parameters_changed(self):
[p.parameters_changed() for p in self._parameters_]
def connect_input(self, Xparam):
[p.connect_input(Xparam) for p in self._parameters_]
def _getstate(self):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
"""
return Parameterized._getstate(self) + [#self._parameters_,
#self.num_params,
self.input_dim,
self.input_slices,
self._param_slices_
]
def _setstate(self, state):
self._param_slices_ = state.pop()
self.input_slices = state.pop()
self.input_dim = state.pop()
#self.num_params = state.pop()
#self._parameters_ = state.pop()
Parameterized._setstate(self, state)
def plot_ARD(self, *args):
"""If an ARD kernel is present, plot a bar representation using matplotlib
See GPy.plotting.matplot_dep.plot_ARD
"""
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import kernel_plots
return kernel_plots.plot_ARD(self,*args)
# def _transform_gradients(self, g):
# """
# Apply the transformations of the kernel so that the returned vector
# represents the gradient in the transformed space (i.e. that given by
# get_params_transformed())
#
# :param g: the gradient vector for the current model, usually created by _param_grad_helper
# """
# x = self._get_params()
# [np.place(g, index, g[index] * constraint.gradfactor(x[index]))
# for constraint, index in self.constraints.iteritems() if constraint is not __fixed__]
# # for constraint, index in self.constraints.iteritems():
# # if constraint != __fixed__:
# # g[index] = g[index] * constraint.gradfactor(x[index])
# #[np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
# [np.put(g, i, v) for i, v in [[i, t.sum()] for p in self._parameters_ for t,i in p._tied_to_me_.iteritems()]]
# # if len(self.tied_indices) or len(self.fixed_indices):
# # to_remove = np.hstack((self.fixed_indices + [t[1:] for t in self.tied_indices]))
# # return np.delete(g, to_remove)
# # else:
# if self._fixes_ is not None: return g[self._fixes_]
# return g
# x = self._get_params()
# [np.put(x, i, x * t.gradfactor(x[i])) for i, t in zip(self.constrained_indices, self.constraints)]
# [np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
# if len(self.tied_indices) or len(self.fixed_indices):
# to_remove = np.hstack((self.fixed_indices + [t[1:] for t in self.tied_indices]))
# return np.delete(g, to_remove)
# else:
# return g
def __add__(self, other):
""" Overloading of the '+' operator. for more control, see self.add """
return self.add(other)
def add(self, other, tensor=False):
"""
Add another kernel to this one.
If Tensor is False, both kernels are defined on the same _space_. then
the created kernel will have the same number of inputs as self and
other (which must be the same).
If Tensor is True, then the dimensions are stacked 'horizontally', so
that the resulting kernel has self.input_dim + other.input_dim
:param other: the other kernel to be added
:type other: GPy.kern
"""
if tensor:
D = self.input_dim + other.input_dim
self_input_slices = [slice(*sl.indices(self.input_dim)) for sl in self.input_slices]
other_input_indices = [sl.indices(other.input_dim) for sl in other.input_slices]
other_input_slices = [slice(i[0] + self.input_dim, i[1] + self.input_dim, i[2]) for i in other_input_indices]
newkern = kern(D, self._parameters_ + other._parameters_, self_input_slices + other_input_slices)
# transfer constraints:
# newkern.constrained_indices = self.constrained_indices + [x + self.num_params for x in other.constrained_indices]
# newkern.constraints = self.constraints + other.constraints
# newkern.fixed_indices = self.fixed_indices + [self.num_params + x for x in other.fixed_indices]
# newkern.fixed_values = self.fixed_values + other.fixed_values
# newkern.constraints = self.constraints + other.constraints
# newkern.tied_indices = self.tied_indices + [self.num_params + x for x in other.tied_indices]
else:
assert self.input_dim == other.input_dim
newkern = kern(self.input_dim, self._parameters_ + other._parameters_, self.input_slices + other.input_slices)
# transfer constraints:
# newkern.constrained_indices = self.constrained_indices + [i + self.num_params for i in other.constrained_indices]
# newkern.constraints = self.constraints + other.constraints
# newkern.fixed_indices = self.fixed_indices + [self.num_params + x for x in other.fixed_indices]
# newkern.fixed_values = self.fixed_values + other.fixed_values
# newkern.tied_indices = self.tied_indices + [self.num_params + x for x in other.tied_indices]
[newkern.constraints.add(transform, ind) for transform, ind in self.constraints.iteritems()]
[newkern.constraints.add(transform, ind+self.size) for transform, ind in other.constraints.iteritems()]
newkern._fixes_ = ((self._fixes_ or 0) + (other._fixes_ or 0)) or None
return newkern
def __call__(self, X, X2=None):
return self.K(X, X2)
def __mul__(self, other):
""" Here we overload the '*' operator. See self.prod for more information"""
return self.prod(other)
def __pow__(self, other, tensor=False):
"""
Shortcut for tensor `prod`.
"""
return self.prod(other, tensor=True)
def prod(self, other, tensor=False):
"""
Multiply two kernels (either on the same space, or on the tensor product of the input space).
:param other: the other kernel to be added
:type other: GPy.kern
:param tensor: whether or not to use the tensor space (default is false).
:type tensor: bool
"""
K1 = self
K2 = other
#K1 = self.copy()
#K2 = other.copy()
slices = []
for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices):
s1, s2 = [False] * K1.input_dim, [False] * K2.input_dim
s1[sl1], s2[sl2] = [True], [True]
slices += [s1 + s2]
newkernparts = [prod(k1, k2, tensor) for k1, k2 in itertools.product(K1._parameters_, K2._parameters_)]
if tensor:
newkern = kern(K1.input_dim + K2.input_dim, newkernparts, slices)
else:
newkern = kern(K1.input_dim, newkernparts, slices)
#newkern._follow_constrains(K1, K2)
return newkern
# def _follow_constrains(self, K1, K2):
#
# # Build the array that allows to go from the initial indices of the param to the new ones
# K1_param = []
# n = 0
# for k1 in K1.parts:
# K1_param += [range(n, n + k1.num_params)]
# n += k1.num_params
# n = 0
# K2_param = []
# for k2 in K2.parts:
# K2_param += [range(K1.num_params + n, K1.num_params + n + k2.num_params)]
# n += k2.num_params
# index_param = []
# for p1 in K1_param:
# for p2 in K2_param:
# index_param += p1 + p2
# index_param = np.array(index_param)
#
# # Get the ties and constrains of the kernels before the multiplication
# prev_ties = K1.tied_indices + [arr + K1.num_params for arr in K2.tied_indices]
#
# prev_constr_ind = [K1.constrained_indices] + [K1.num_params + i for i in K2.constrained_indices]
# prev_constr = K1.constraints + K2.constraints
#
# # prev_constr_fix = K1.fixed_indices + [arr + K1.num_params for arr in K2.fixed_indices]
# # prev_constr_fix_values = K1.fixed_values + K2.fixed_values
#
# # follow the previous ties
# for arr in prev_ties:
# for j in arr:
# index_param[np.where(index_param == j)[0]] = arr[0]
#
# # ties and constrains
# for i in range(K1.num_params + K2.num_params):
# index = np.where(index_param == i)[0]
# if index.size > 1:
# self.tie_params(index)
# for i, t in zip(prev_constr_ind, prev_constr):
# self.constrain(np.where(index_param == i)[0], t)
#
# def _get_params(self):
# return np.hstack(self._parameters_)
# return np.hstack([p._get_params() for p in self._parameters_])
# def _set_params(self, x):
# import ipdb;ipdb.set_trace()
# [p._set_params(x[s]) for p, s in zip(self._parameters_, self._param_slices_)]
# def _get_param_names(self):
# # this is a bit nasty: we want to distinguish between parts with the same name by appending a count
# part_names = np.array([k.name for k in self._parameters_], dtype=np.str)
# counts = [np.sum(part_names == ni) for i, ni in enumerate(part_names)]
# cum_counts = [np.sum(part_names[i:] == ni) for i, ni in enumerate(part_names)]
# names = [name + '_' + str(cum_count) if count > 1 else name for name, count, cum_count in zip(part_names, counts, cum_counts)]
#
# return sum([[name + '_' + n for n in k._get_param_names()] for name, k in zip(names, self._parameters_)], [])
def K(self, X, X2=None, which_parts='all'):
"""
Compute the kernel function.
:param X: the first set of inputs to the kernel
:param X2: (optional) the second set of arguments to the kernel. If X2
is None, this is passed throgh to the 'part' object, which
handles this as X2 == X.
:param which_parts: a list of booleans detailing whether to include
each of the part functions. By default, 'all'
indicates all parts
"""
if which_parts == 'all':
which_parts = [True] * self.size
assert X.shape[1] == self.input_dim
if X2 is None:
target = np.zeros((X.shape[0], X.shape[0]))
[p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used]
else:
target = np.zeros((X.shape[0], X2.shape[0]))
[p.K(X[:, i_s], X2[:, i_s], target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used]
return target
def update_gradients_full(self, dL_dK, X):
[p.update_gradients_full(dL_dK, X) for p in self._parameters_]
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
[p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X, Z) for p in self._parameters_]
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
[p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_]
def _param_grad_helper(self, dL_dK, X, X2=None):
"""
Compute the gradient of the covariance function with respect to the parameters.
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
:type dL_dK: Np.ndarray (num_samples x num_inducing)
:param X: Observed data inputs
:type X: np.ndarray (num_samples x input_dim)
:param X2: Observed data inputs (optional, defaults to X)
:type X2: np.ndarray (num_inducing x input_dim)
returns: dL_dtheta
"""
assert X.shape[1] == self.input_dim
target = np.zeros(self.size)
if X2 is None:
[p._param_grad_helper(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)]
else:
[p._param_grad_helper(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)]
return self._transform_gradients(target)
def gradients_X(self, dL_dK, X, X2=None):
"""Compute the gradient of the objective function with respect to X.
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
:type dL_dK: np.ndarray (num_samples x num_inducing)
:param X: Observed data inputs
:type X: np.ndarray (num_samples x input_dim)
:param X2: Observed data inputs (optional, defaults to X)
:type X2: np.ndarray (num_inducing x input_dim)"""
target = np.zeros_like(X)
if X2 is None:
[p.gradients_X(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
else:
[p.gradients_X(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
return target
def Kdiag(self, X, which_parts='all'):
"""Compute the diagonal of the covariance function for inputs X."""
if which_parts == 'all':
which_parts = [True] * self.size
assert X.shape[1] == self.input_dim
target = np.zeros(X.shape[0])
[p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self._parameters_, self.input_slices, which_parts) if part_on]
return target
def dKdiag_dtheta(self, dL_dKdiag, X):
"""Compute the gradient of the diagonal of the covariance function with respect to the parameters."""
assert X.shape[1] == self.input_dim
assert dL_dKdiag.size == X.shape[0]
target = np.zeros(self.size)
[p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)]
return self._transform_gradients(target)
def dKdiag_dX(self, dL_dKdiag, X):
assert X.shape[1] == self.input_dim
target = np.zeros_like(X)
[p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
return target
def psi0(self, Z, mu, S):
target = np.zeros(mu.shape[0])
[p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)]
return target
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S):
target = np.zeros(self.size)
[p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)]
return self._transform_gradients(target)
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S):
target_mu, target_S = np.zeros_like(mu), np.zeros_like(S)
[p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
return target_mu, target_S
def psi1(self, Z, mu, S):
target = np.zeros((mu.shape[0], Z.shape[0]))
[p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)]
return target
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S):
target = np.zeros((self.size))
[p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)]
return self._transform_gradients(target)
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S):
target = np.zeros_like(Z)
[p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
return target
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S):
"""return shapes are num_samples,num_inducing,input_dim"""
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
[p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
return target_mu, target_S
def psi2(self, Z, mu, S):
"""
Computer the psi2 statistics for the covariance function.
:param Z: np.ndarray of inducing inputs (num_inducing x input_dim)
:param mu, S: np.ndarrays of means and variances (each num_samples x input_dim)
:returns psi2: np.ndarray (num_samples,num_inducing,num_inducing)
"""
target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
[p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)]
# compute the "cross" terms
# TODO: input_slices needed
crossterms = 0
for [p1, i_s1], [p2, i_s2] in itertools.combinations(zip(self._parameters_, self.input_slices), 2):
if i_s1 == i_s2:
# TODO psi1 this must be faster/better/precached/more nice
tmp1 = np.zeros((mu.shape[0], Z.shape[0]))
p1.psi1(Z[:, i_s1], mu[:, i_s1], S[:, i_s1], tmp1)
tmp2 = np.zeros((mu.shape[0], Z.shape[0]))
p2.psi1(Z[:, i_s2], mu[:, i_s2], S[:, i_s2], tmp2)
prod = np.multiply(tmp1, tmp2)
crossterms += prod[:, :, None] + prod[:, None, :]
target += crossterms
return target
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S):
"""Gradient of the psi2 statistics with respect to the parameters."""
target = np.zeros(self.size)
[p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)]
# compute the "cross" terms
# TODO: better looping, input_slices
for i1, i2 in itertools.permutations(range(len(self._parameters_)), 2):
p1, p2 = self._parameters_[i1], self._parameters_[i2]
# ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]
ps1, ps2 = self._param_slices_[i1], self._param_slices_[i2]
tmp = np.zeros((mu.shape[0], Z.shape[0]))
p1.psi1(Z, mu, S, tmp)
p2.dpsi1_dtheta((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target[ps2])
return self._transform_gradients(target)
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S):
target = np.zeros_like(Z)
[p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
# target *= 2
# compute the "cross" terms
# TODO: we need input_slices here.
for p1, p2 in itertools.permutations(self._parameters_, 2):
# if p1.name == 'linear' and p2.name == 'linear':
# raise NotImplementedError("We don't handle linear/linear cross-terms")
tmp = np.zeros((mu.shape[0], Z.shape[0]))
p1.psi1(Z, mu, S, tmp)
p2.dpsi1_dZ((tmp[:, None, :] * dL_dpsi2).sum(1), Z, mu, S, target)
return target * 2
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S):
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
[p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
# compute the "cross" terms
# TODO: we need input_slices here.
for p1, p2 in itertools.permutations(self._parameters_, 2):
# if p1.name == 'linear' and p2.name == 'linear':
# raise NotImplementedError("We don't handle linear/linear cross-terms")
tmp = np.zeros((mu.shape[0], Z.shape[0]))
p1.psi1(Z, mu, S, tmp)
p2.dpsi1_dmuS((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target_mu, target_S)
return target_mu, target_S
def plot(self, *args, **kwargs):
"""
See GPy.plotting.matplot_dep.plot
"""
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import kernel_plots
kernel_plots.plot(self,*args)
from GPy.core.model import Model
class Kern_check_model(Model):
"""This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel."""
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Model.__init__(self, 'kernel_test_model')
num_samples = 20
num_samples2 = 10
if kernel==None:
kernel = GPy.kern.rbf(1)
if X==None:
X = np.random.randn(num_samples, kernel.input_dim)
if dL_dK==None:
if X2==None:
dL_dK = np.ones((X.shape[0], X.shape[0]))
else:
dL_dK = np.ones((X.shape[0], X2.shape[0]))
self.kernel=kernel
self.add_parameter(kernel)
self.X = X
self.X2 = X2
self.dL_dK = dL_dK
def is_positive_definite(self):
v = np.linalg.eig(self.kernel.K(self.X))[0]
if any(v<-10*sys.float_info.epsilon):
return False
else:
return True
def log_likelihood(self):
return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum()
def _log_likelihood_gradients(self):
raise NotImplementedError, "This needs to be implemented to use the kern_check_model class."
class Kern_check_dK_dtheta(Kern_check_model):
"""This class allows gradient checks for the gradient of a kernel with respect to parameters. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
def _log_likelihood_gradients(self):
return self.kernel._param_grad_helper(self.dL_dK, self.X, self.X2)
class Kern_check_dKdiag_dtheta(Kern_check_model):
"""This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters."""
def __init__(self, kernel=None, dL_dK=None, X=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
if dL_dK==None:
self.dL_dK = np.ones((self.X.shape[0]))
def parameters_changed(self):
self.kernel.update_gradients_full(self.dL_dK, self.X)
def log_likelihood(self):
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
def _log_likelihood_gradients(self):
return self.kernel.dKdiag_dtheta(self.dL_dK, self.X)
class Kern_check_dK_dX(Kern_check_model):
"""This class allows gradient checks for the gradient of a kernel with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
self.remove_parameter(kernel)
self.X = Param('X', self.X)
self.add_parameter(self.X)
def _log_likelihood_gradients(self):
return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).flatten()
class Kern_check_dKdiag_dX(Kern_check_dK_dX):
"""This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_dK_dX.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
if dL_dK==None:
self.dL_dK = np.ones((self.X.shape[0]))
def log_likelihood(self):
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
def _log_likelihood_gradients(self):
return self.kernel.dKdiag_dX(self.dL_dK, self.X).flatten()
def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
"""
This function runs on kernels to check the correctness of their
implementation. It checks that the covariance function is positive definite
for a randomly generated data set.
:param kern: the kernel to be tested.
:type kern: GPy.kern.Kernpart
:param X: X input values to test the covariance function.
:type X: ndarray
:param X2: X2 input values to test the covariance function.
:type X2: ndarray
"""
pass_checks = True
if X==None:
X = np.random.randn(10, kern.input_dim)
if output_ind is not None:
X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0])
if X2==None:
X2 = np.random.randn(20, kern.input_dim)
if output_ind is not None:
X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0])
if verbose:
print("Checking covariance function is positive definite.")
result = Kern_check_model(kern, X=X).is_positive_definite()
if result and verbose:
print("Check passed.")
if not result:
print("Positive definite check failed for " + kern.name + " covariance function.")
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X) wrt theta.")
result = Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X2) wrt theta.")
result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of Kdiag(X) wrt theta.")
result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X) wrt X.")
try:
result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("gradients_X not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X2) wrt X.")
try:
result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("gradients_X not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of Kdiag(X) wrt X.")
try:
result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("gradients_X not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True)
pass_checks = False
return False
return pass_checks

View file

@ -1,65 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
def theta(x):
"""Heavisdie step function"""
return np.where(x>=0.,1.,0.)
class Brownian(Kernpart):
"""
Brownian Motion kernel.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance:
:type variance: float
"""
def __init__(self,input_dim,variance=1.):
self.input_dim = input_dim
assert self.input_dim==1, "Brownian motion in 1D only"
self.num_params = 1
self.name = 'Brownian'
self._set_params(np.array([variance]).flatten())
def _get_params(self):
return self.variance
def _set_params(self,x):
assert x.shape==(1,)
self.variance = x
def _get_param_names(self):
return ['variance']
def K(self,X,X2,target):
if X2 is None:
X2 = X
target += self.variance*np.fmin(X,X2.T)
def Kdiag(self,X,target):
target += self.variance*X.flatten()
def _param_grad_helper(self,dL_dK,X,X2,target):
if X2 is None:
X2 = X
target += np.sum(np.fmin(X,X2.T)*dL_dK)
def dKdiag_dtheta(self,dL_dKdiag,X,target):
target += np.dot(X.flatten(), dL_dKdiag)
def gradients_X(self,dL_dK,X,X2,target):
raise NotImplementedError, "TODO"
#target += self.variance
#target -= self.variance*theta(X-X2.T)
#if X.shape==X2.shape:
#if np.all(X==X2):
#np.add(target[:,:,0],self.variance*np.diag(X2.flatten()-X.flatten()),target[:,:,0])
def dKdiag_dX(self,dL_dKdiag,X,target):
target += self.variance*dL_dKdiag[:,None]

View file

@ -1,139 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
from scipy import integrate
class Matern32(Kernpart):
"""
Matern 3/2 kernel:
.. math::
k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param lengthscale: the vector of lengthscale :math:`\ell_i`
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
"""
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False):
self.input_dim = input_dim
self.ARD = ARD
if ARD == False:
self.num_params = 2
self.name = 'Mat32'
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
else:
lengthscale = np.ones(1)
else:
self.num_params = self.input_dim + 1
self.name = 'Mat32'
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == self.input_dim, "bad number of lengthscales"
else:
lengthscale = np.ones(self.input_dim)
self._set_params(np.hstack((variance, lengthscale.flatten())))
def _get_params(self):
"""return the value of the parameters."""
return np.hstack((self.variance, self.lengthscale))
def _set_params(self, x):
"""set the value of the parameters."""
assert x.size == self.num_params
self.variance = x[0]
self.lengthscale = x[1:]
def _get_param_names(self):
"""return parameter names."""
if self.num_params == 2:
return ['variance', 'lengthscale']
else:
return ['variance'] + ['lengthscale_%i' % i for i in range(self.lengthscale.size)]
def K(self, X, X2, target):
"""Compute the covariance matrix between X and X2."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
np.add(self.variance * (1 + np.sqrt(3.) * dist) * np.exp(-np.sqrt(3.) * dist), target, target)
def Kdiag(self, X, target):
"""Compute the diagonal of the covariance matrix associated to X."""
np.add(target, self.variance, target)
def _param_grad_helper(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
dvar = (1 + np.sqrt(3.) * dist) * np.exp(-np.sqrt(3.) * dist)
invdist = 1. / np.where(dist != 0., dist, np.inf)
dist2M = np.square(X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 3
# dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
target[0] += np.sum(dvar * dL_dK)
if self.ARD == True:
dl = (self.variance * 3 * dist * np.exp(-np.sqrt(3.) * dist))[:, :, np.newaxis] * dist2M * invdist[:, :, np.newaxis]
# dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,None]
target[1:] += (dl * dL_dK[:, :, None]).sum(0).sum(0)
else:
dl = (self.variance * 3 * dist * np.exp(-np.sqrt(3.) * dist)) * dist2M.sum(-1) * invdist
# dl = self.variance*dvar*dist2M.sum(-1)*invdist
target[1] += np.sum(dl * dL_dK)
def dKdiag_dtheta(self, dL_dKdiag, X, target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
target[0] += np.sum(dL_dKdiag)
def gradients_X(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None:
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X[None, :, :]) / self.lengthscale), -1))[:, :, None]
ddist_dX = 2*(X[:, None, :] - X[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
else:
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
gradients_X = -np.transpose(3 * self.variance * dist * np.exp(-np.sqrt(3) * dist) * ddist_dX, (1, 0, 2))
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target):
pass
def Gram_matrix(self, F, F1, F2, lower, upper):
"""
Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1.
:param F: vector of functions
:type F: np.array
:param F1: vector of derivatives of F
:type F1: np.array
:param F2: vector of second derivatives of F
:type F2: np.array
:param lower,upper: boundaries of the input domain
:type lower,upper: floats
"""
assert self.input_dim == 1
def L(x, i):
return(3. / self.lengthscale ** 2 * F[i](x) + 2 * np.sqrt(3) / self.lengthscale * F1[i](x) + F2[i](x))
n = F.shape[0]
G = np.zeros((n, n))
for i in range(n):
for j in range(i, n):
G[i, j] = G[j, i] = integrate.quad(lambda x : L(x, i) * L(x, j), lower, upper)[0]
Flower = np.array([f(lower) for f in F])[:, None]
F1lower = np.array([f(lower) for f in F1])[:, None]
# print "OLD \n", np.dot(F1lower,F1lower.T), "\n \n"
# return(G)
return(self.lengthscale ** 3 / (12.*np.sqrt(3) * self.variance) * G + 1. / self.variance * np.dot(Flower, Flower.T) + self.lengthscale ** 2 / (3.*self.variance) * np.dot(F1lower, F1lower.T))

View file

@ -1,145 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
import hashlib
from scipy import integrate
class Matern52(Kernpart):
"""
Matern 5/2 kernel:
.. math::
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param lengthscale: the vector of lengthscale :math:`\ell_i`
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
"""
def __init__(self,input_dim,variance=1.,lengthscale=None,ARD=False):
self.input_dim = input_dim
self.ARD = ARD
if ARD == False:
self.num_params = 2
self.name = 'Mat52'
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
else:
lengthscale = np.ones(1)
else:
self.num_params = self.input_dim + 1
self.name = 'Mat52'
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == self.input_dim, "bad number of lengthscales"
else:
lengthscale = np.ones(self.input_dim)
self._set_params(np.hstack((variance,lengthscale.flatten())))
def _get_params(self):
"""return the value of the parameters."""
return np.hstack((self.variance,self.lengthscale))
def _set_params(self,x):
"""set the value of the parameters."""
assert x.size == self.num_params
self.variance = x[0]
self.lengthscale = x[1:]
def _get_param_names(self):
"""return parameter names."""
if self.num_params == 2:
return ['variance','lengthscale']
else:
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)]
def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
np.add(self.variance*(1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist), target,target)
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
np.add(target,self.variance,target)
def _param_grad_helper(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
invdist = 1./np.where(dist!=0.,dist,np.inf)
dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3
dvar = (1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist)
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
target[0] += np.sum(dvar*dL_dK)
if self.ARD:
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
#dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
target[1:] += (dl*dL_dK[:,:,None]).sum(0).sum(0)
else:
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist)) * dist2M.sum(-1)*invdist
#dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist)) * dist2M.sum(-1)*invdist
target[1] += np.sum(dl*dL_dK)
def dKdiag_dtheta(self,dL_dKdiag,X,target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
target[0] += np.sum(dL_dKdiag)
def gradients_X(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None:
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X[None,:,:])/self.lengthscale),-1))[:,:,None]
ddist_dX = 2*(X[:,None,:]-X[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
else:
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
gradients_X = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))
target += np.sum(gradients_X*dL_dK.T[:,:,None],0)
def dKdiag_dX(self,dL_dKdiag,X,target):
pass
def Gram_matrix(self,F,F1,F2,F3,lower,upper):
"""
Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1.
:param F: vector of functions
:type F: np.array
:param F1: vector of derivatives of F
:type F1: np.array
:param F2: vector of second derivatives of F
:type F2: np.array
:param F3: vector of third derivatives of F
:type F3: np.array
:param lower,upper: boundaries of the input domain
:type lower,upper: floats
"""
assert self.input_dim == 1
def L(x,i):
return(5*np.sqrt(5)/self.lengthscale**3*F[i](x) + 15./self.lengthscale**2*F1[i](x)+ 3*np.sqrt(5)/self.lengthscale*F2[i](x) + F3[i](x))
n = F.shape[0]
G = np.zeros((n,n))
for i in range(n):
for j in range(i,n):
G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0]
G_coef = 3.*self.lengthscale**5/(400*np.sqrt(5))
Flower = np.array([f(lower) for f in F])[:,None]
F1lower = np.array([f(lower) for f in F1])[:,None]
F2lower = np.array([f(lower) for f in F2])[:,None]
orig = 9./8*np.dot(Flower,Flower.T) + 9.*self.lengthscale**4/200*np.dot(F2lower,F2lower.T)
orig2 = 3./5*self.lengthscale**2 * ( np.dot(F1lower,F1lower.T) + 1./8*np.dot(Flower,F2lower.T) + 1./8*np.dot(F2lower,Flower.T))
return(1./self.variance* (G_coef*G + orig + orig2))

View file

@ -1,29 +0,0 @@
import bias
import Brownian
import coregionalize
import exponential
import eq_ode1
import finite_dimensional
import fixed
import gibbs
import hetero
import hierarchical
import independent_outputs
import linear
import Matern32
import Matern52
import mlp
import ODE_1
import periodic_exponential
import periodic_Matern32
import periodic_Matern52
import poly
import prod_orthogonal
import prod
import rational_quadratic
import rbfcos
import rbf
import rbf_inv
import spline
import symmetric
import white

View file

@ -1,81 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
from ...core.parameterization import Param
class Bias(Kernpart):
def __init__(self,input_dim,variance=1.,name=None):
"""
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
"""
super(Bias, self).__init__(input_dim, name)
from ...core.parameterization.transformations import Logexp
self.variance = Param("variance", variance, Logexp())
self.add_parameter(self.variance)
def K(self,X,X2,target):
target += self.variance
def Kdiag(self,X,target):
target += self.variance
#def dK_dtheta(self,dL_dKdiag,X,X2,target):
#target += dL_dKdiag.sum()
def update_gradients_full(self, dL_dK, X):
self.variance.gradient = dL_dK.sum()
def dKdiag_dtheta(self,dL_dKdiag,X,target):
target += dL_dKdiag.sum()
def gradients_X(self, dL_dK,X, X2, target):
pass
def dKdiag_dX(self,dL_dKdiag,X,target):
pass
#---------------------------------------#
# PSI statistics #
#---------------------------------------#
def psi0(self, Z, mu, S, target):
target += self.variance
def psi1(self, Z, mu, S, target):
self._psi1 = self.variance
target += self._psi1
def psi2(self, Z, mu, S, target):
target += self.variance**2
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, target):
target += dL_dpsi0.sum()
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
target += dL_dpsi1.sum()
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
target += 2.*self.variance*dL_dpsi2.sum()
def dpsi0_dZ(self, dL_dpsi0, Z, mu, S, target):
pass
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
pass
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
pass
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
pass
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
pass
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
pass

View file

@ -1,129 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
from scipy import integrate
class Exponential(Kernpart):
"""
Exponential kernel (aka Ornstein-Uhlenbeck or Matern 1/2)
.. math::
k(r) = \sigma^2 \exp(- r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param lengthscale: the vector of lengthscale :math:`\ell_i`
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
:type ARD: Boolean
:param name: the name of the kernel
:rtype: kernel object
"""
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='exp'):
self.input_dim = input_dim
self.ARD = ARD
self.variance = variance
self.name = name
if ARD == False:
self.num_params = 2
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
else:
lengthscale = np.ones(1)
else:
self.num_params = self.input_dim + 1
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == self.input_dim, "bad number of lengthscales"
else:
lengthscale = np.ones(self.input_dim)
#self._set_params(np.hstack((variance, lengthscale.flatten())))
self.set_as_parameter('variance', 'lengthscale')
# def _get_params(self):
# """return the value of the parameters."""
# return np.hstack((self.variance, self.lengthscale))
#
# def _set_params(self, x):
# """set the value of the parameters."""
# assert x.size == self.num_params
# self.variance = x[0]
# self.lengthscale = x[1:]
#
# def _get_param_names(self):
# """return parameter names."""
# if self.num_params == 2:
# return ['variance', 'lengthscale']
# else:
# return ['variance'] + ['lengthscale_%i' % i for i in range(self.lengthscale.size)]
def K(self, X, X2, target):
"""Compute the covariance matrix between X and X2."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
np.add(self.variance * np.exp(-dist), target, target)
def Kdiag(self, X, target):
"""Compute the diagonal of the covariance matrix associated to X."""
np.add(target, self.variance, target)
def _param_grad_helper(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
invdist = 1. / np.where(dist != 0., dist, np.inf)
dist2M = np.square(X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 3
dvar = np.exp(-dist)
target[0] += np.sum(dvar * dL_dK)
if self.ARD == True:
dl = self.variance * dvar[:, :, None] * dist2M * invdist[:, :, None]
target[1:] += (dl * dL_dK[:, :, None]).sum(0).sum(0)
else:
dl = self.variance * dvar * dist2M.sum(-1) * invdist
target[1] += np.sum(dl * dL_dK)
def dKdiag_dtheta(self, dL_dKdiag, X, target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
# NB: derivative of diagonal elements wrt lengthscale is 0
target[0] += np.sum(dL_dKdiag)
def gradients_X(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
gradients_X = -np.transpose(self.variance * np.exp(-dist) * ddist_dX, (1, 0, 2))
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target):
pass
def Gram_matrix(self, F, F1, lower, upper):
"""
Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1.
:param F: vector of functions
:type F: np.array
:param F1: vector of derivatives of F
:type F1: np.array
:param lower,upper: boundaries of the input domain
:type lower,upper: floats
"""
assert self.input_dim == 1
def L(x, i):
return(1. / self.lengthscale * F[i](x) + F1[i](x))
n = F.shape[0]
G = np.zeros((n, n))
for i in range(n):
for j in range(i, n):
G[i, j] = G[j, i] = integrate.quad(lambda x : L(x, i) * L(x, j), lower, upper)[0]
Flower = np.array([f(lower) for f in F])[:, None]
return(self.lengthscale / 2. / self.variance * G + 1. / self.variance * np.dot(Flower, Flower.T))

View file

@ -1,176 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
#from ...core.parameterized.Parameterized import set_as_parameter
from ...core.parameterization import Parameterized
class Kernpart(Parameterized):
def __init__(self,input_dim,name):
"""
The base class for a kernpart: a positive definite function
which forms part of a covariance function (kernel).
:param input_dim: the number of input dimensions to the function
:type input_dim: int
Do not instantiate.
"""
super(Kernpart, self).__init__(name)
# the input dimensionality for the covariance
self.input_dim = input_dim
# the number of optimisable parameters
# the name of the covariance function.
# link to parameterized objects
#self._X = None
def connect_input(self, X):
X.add_observer(self, self.on_input_change)
#self._X = X
def on_input_change(self, X):
"""
During optimization this function will be called when
the inputs X changed. Use this to update caches dependent
on the inputs X.
"""
# overwrite this to update kernel when inputs X change
pass
# def set_as_parameter_named(self, name, gradient, index=None, *args, **kwargs):
# """
# :param names: name of parameter to set as parameter
# :param gradient: gradient method to get the gradient of this parameter
# :param index: index of where to place parameter in printing
# :param args, kwargs: additional arguments to gradient
#
# Convenience method to connect Kernpart parameters:
# parameter with name (attribute of this Kernpart) will be set as parameter with following name:
#
# kernel_name + _ + parameter_name
#
# To add the kernels name to the parameter name use this method to
# add parameters.
# """
# self.set_as_parameter(name, getattr(self, name), gradient, index, *args, **kwargs)
# def set_as_parameter(self, name, array, gradient, index=None, *args, **kwargs):
# """
# See :py:func:`GPy.core.parameterized.Parameterized.set_as_parameter`
#
# Note: this method adds the kernels name in front of the parameter.
# """
# p = Param(self.name+"_"+name, array, gradient, *args, **kwargs)
# if index is None:
# self._parameters_.append(p)
# else:
# self._parameters_.insert(index, p)
# self.__dict__[name] = p
#set_as_parameter.__doc__ += set_as_parameter.__doc__ # @UndefinedVariable
# def _get_params(self):
# raise NotImplementedError
# def _set_params(self,x):
# raise NotImplementedError
# def _get_param_names(self):
# raise NotImplementedError
def K(self,X,X2,target):
raise NotImplementedError
def Kdiag(self,X,target):
raise NotImplementedError
def _param_grad_helper(self,dL_dK,X,X2,target):
raise NotImplementedError
def dKdiag_dtheta(self,dL_dKdiag,X,target):
# In the base case compute this by calling _param_grad_helper. Need to
# override for stationary covariances (for example) to save
# time.
for i in range(X.shape[0]):
self._param_grad_helper(dL_dKdiag[i], X[i, :][None, :], X2=None, target=target)
def psi0(self,Z,mu,S,target):
raise NotImplementedError
def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
raise NotImplementedError
def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S):
raise NotImplementedError
def psi1(self,Z,mu,S,target):
raise NotImplementedError
def dpsi1_dtheta(self,Z,mu,S,target):
raise NotImplementedError
def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target):
raise NotImplementedError
def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S):
raise NotImplementedError
def psi2(self,Z,mu,S,target):
raise NotImplementedError
def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target):
raise NotImplementedError
def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
raise NotImplementedError
def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
raise NotImplementedError
def gradients_X(self, dL_dK, X, X2, target):
raise NotImplementedError
def dKdiag_dX(self, dL_dK, X, target):
raise NotImplementedError
def update_gradients_full(self, dL_dK, X):
"""Set the gradients of all parameters when doing full (N) inference."""
raise NotImplementedError
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
"""Set the gradients of all parameters when doing sparse (M) inference."""
raise NotImplementedError
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
"""Set the gradients of all parameters when doing variational (M) inference with uncertain inputs."""
raise NotImplementedError
class Kernpart_stationary(Kernpart):
def __init__(self, input_dim, lengthscale=None, ARD=False):
self.input_dim = input_dim
self.ARD = ARD
if not ARD:
self.num_params = 2
if lengthscale is not None:
self.lengthscale = np.asarray(lengthscale)
assert self.lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
else:
self.lengthscale = np.ones(1)
else:
self.num_params = self.input_dim + 1
if lengthscale is not None:
self.lengthscale = np.asarray(lengthscale)
assert self.lengthscale.size == self.input_dim, "bad number of lengthscales"
else:
self.lengthscale = np.ones(self.input_dim)
# initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1))
def _set_params(self, x):
self.lengthscale = x
self.lengthscale2 = np.square(self.lengthscale)
# reset cached results
self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1))
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
def dKdiag_dtheta(self, dL_dKdiag, X, target):
# For stationary covariances, derivative of diagonal elements
# wrt lengthscale is 0.
target[0] += np.sum(dL_dKdiag)
def dKdiag_dX(self, dL_dK, X, target):
pass # true for all stationary kernels
class Kernpart_inner(Kernpart):
def __init__(self,input_dim):
"""
The base class for a kernpart_inner: a positive definite function which forms part of a kernel that is based on the inner product between inputs.
:param input_dim: the number of input dimensions to the function
:type input_dim: int
Do not instantiate.
"""
Kernpart.__init__(self, input_dim)
# initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1))

View file

@ -1,306 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import weave
from kernpart import Kernpart
from ...util.linalg import tdot
from ...util.misc import fast_array_equal, param_to_array
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
class Linear(Kernpart):
"""
Linear kernel
.. math::
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i x_iy_i
:param input_dim: the number of input dimensions
:type input_dim: int
:param variances: the vector of variances :math:`\sigma^2_i`
:type variances: array or list of the appropriate size (or float if there is only one variance parameter)
:param ARD: Auto Relevance Determination. If equal to "False", the kernel has only one variance parameter \sigma^2, otherwise there is one variance parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
"""
def __init__(self, input_dim, variances=None, ARD=False, name='linear'):
super(Linear, self).__init__(input_dim, name)
self.ARD = ARD
if ARD == False:
if variances is not None:
variances = np.asarray(variances)
assert variances.size == 1, "Only one variance needed for non-ARD kernel"
else:
variances = np.ones(1)
self._Xcache, self._X2cache = np.empty(shape=(2,))
else:
if variances is not None:
variances = np.asarray(variances)
assert variances.size == self.input_dim, "bad number of variances, need one ARD variance per input_dim"
else:
variances = np.ones(self.input_dim)
self.variances = Param('variances', variances, Logexp())
self.variances.gradient = np.zeros(self.variances.shape)
self.add_parameter(self.variances)
self.variances.add_observer(self, self.update_variance)
# initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
self._X, self._X2 = np.empty(shape=(2, 1))
def update_variance(self, v):
self.variances2 = np.square(self.variances)
def on_input_change(self, X):
self._K_computations(X, None)
def update_gradients_full(self, dL_dK, X):
self.variances.gradient[:] = 0
self._param_grad_helper(dL_dK, X, None, self.variances.gradient)
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
tmp = dL_dKdiag[:, None] * X ** 2
if self.ARD:
self.variances.gradient = tmp.sum(0)
else:
self.variances.gradient = tmp.sum()
self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient)
self._param_grad_helper(dL_dKnm, X, Z, self.variances.gradient)
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
self._psi_computations(Z, mu, S)
# psi0:
tmp = dL_dpsi0[:, None] * self.mu2_S
if self.ARD: self.variances.gradient[:] = tmp.sum(0)
else: self.variances.gradient[:] = tmp.sum()
#psi1
self._param_grad_helper(dL_dpsi1, mu, Z, self.variances.gradient)
#psi2
tmp = dL_dpsi2[:, :, :, None] * (self.ZAinner[:, :, None, :] * (2 * Z)[None, None, :, :])
if self.ARD: self.variances.gradient += tmp.sum(0).sum(0).sum(0)
else: self.variances.gradient += tmp.sum()
#from Kmm
self._K_computations(Z, None)
self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient)
def K(self, X, X2, target):
if self.ARD:
XX = X * np.sqrt(self.variances)
if X2 is None:
target += tdot(XX)
else:
XX2 = X2 * np.sqrt(self.variances)
target += np.dot(XX, XX2.T)
else:
if X is not self._X or X2 is not None:
self._K_computations(X, X2)
target += self.variances * self._dot_product
def Kdiag(self, X, target):
np.add(target, np.sum(self.variances * np.square(X), -1), target)
def _param_grad_helper(self, dL_dK, X, X2, target):
if self.ARD:
if X2 is None:
[np.add(target[i:i + 1], np.sum(dL_dK * tdot(X[:, i:i + 1])), target[i:i + 1]) for i in range(self.input_dim)]
else:
product = X[:, None, :] * X2[None, :, :]
target += (dL_dK[:, :, None] * product).sum(0).sum(0)
else:
if X is not self._X or X2 is not None:
self._K_computations(X, X2)
target += np.sum(self._dot_product * dL_dK)
def gradients_X(self, dL_dK, X, X2, target):
if X2 is None:
target += 2*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
else:
target += (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
def dKdiag_dX(self,dL_dKdiag,X,target):
target += 2.*self.variances*dL_dKdiag[:,None]*X
#---------------------------------------#
# PSI statistics #
#---------------------------------------#
def psi0(self, Z, mu, S, target):
self._psi_computations(Z, mu, S)
target += np.sum(self.variances * self.mu2_S, 1)
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
target_mu += dL_dpsi0[:, None] * (2.0 * mu * self.variances)
target_S += dL_dpsi0[:, None] * self.variances
def psi1(self, Z, mu, S, target):
"""the variance, it does nothing"""
self._psi1 = self.K(mu, Z, target)
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
"""Do nothing for S, it does not affect psi1"""
self._psi_computations(Z, mu, S)
target_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1)
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
self.gradients_X(dL_dpsi1.T, Z, mu, target)
def psi2(self, Z, mu, S, target):
self._psi_computations(Z, mu, S)
target += self._psi2
def psi2_new(self,Z,mu,S,target):
tmp = np.zeros((mu.shape[0], Z.shape[0]))
self.K(mu,Z,tmp)
target += tmp[:,:,None]*tmp[:,None,:] + np.sum(S[:,None,None,:]*self.variances**2*Z[None,:,None,:]*Z[None,None,:,:],-1)
def dpsi2_dtheta_new(self, dL_dpsi2, Z, mu, S, target):
tmp = np.zeros((mu.shape[0], Z.shape[0]))
self.K(mu,Z,tmp)
self._param_grad_helper(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target)
result= 2.*(dL_dpsi2[:,:,:,None]*S[:,None,None,:]*self.variances*Z[None,:,None,:]*Z[None,None,:,:]).sum(0).sum(0).sum(0)
if self.ARD:
target += result.sum(0).sum(0).sum(0)
else:
target += result.sum()
def dpsi2_dmuS_new(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
tmp = np.zeros((mu.shape[0], Z.shape[0]))
self.K(mu,Z,tmp)
self.gradients_X(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target_mu)
Zs = Z*self.variances
Zs_sq = Zs[:,None,:]*Zs[None,:,:]
target_S += (dL_dpsi2[:,:,:,None]*Zs_sq[None,:,:,:]).sum(1).sum(1)
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
"""Think N,num_inducing,num_inducing,input_dim """
self._psi_computations(Z, mu, S)
AZZA = self.ZA.T[:, None, :, None] * self.ZA[None, :, None, :]
AZZA = AZZA + AZZA.swapaxes(1, 2)
AZZA_2 = AZZA/2.
#muAZZA = np.tensordot(mu,AZZA,(-1,0))
#target_mu_dummy, target_S_dummy = np.zeros_like(target_mu), np.zeros_like(target_S)
#target_mu_dummy += (dL_dpsi2[:, :, :, None] * muAZZA).sum(1).sum(1)
#target_S_dummy += (dL_dpsi2[:, :, :, None] * self.ZA[None, :, None, :] * self.ZA[None, None, :, :]).sum(1).sum(1)
#Using weave, we can exploiut the symmetry of this problem:
code = """
int n, m, mm,q,qq;
double factor,tmp;
#pragma omp parallel for private(m,mm,q,qq,factor,tmp)
for(n=0;n<N;n++){
for(m=0;m<num_inducing;m++){
for(mm=0;mm<=m;mm++){
//add in a factor of 2 for the off-diagonal terms (and then count them only once)
if(m==mm)
factor = dL_dpsi2(n,m,mm);
else
factor = 2.0*dL_dpsi2(n,m,mm);
for(q=0;q<input_dim;q++){
//take the dot product of mu[n,:] and AZZA[:,m,mm,q] TODO: blas!
tmp = 0.0;
for(qq=0;qq<input_dim;qq++){
tmp += mu(n,qq)*AZZA(qq,m,mm,q);
}
target_mu(n,q) += factor*tmp;
target_S(n,q) += factor*AZZA_2(q,m,mm,q);
}
}
}
}
"""
support_code = """
#include <omp.h>
#include <math.h>
"""
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
'extra_link_args' : ['-lgomp']}
N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu)
weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
type_converters=weave.converters.blitz,**weave_options)
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
self._psi_computations(Z, mu, S)
#psi2_dZ = dL_dpsi2[:, :, :, None] * self.variances * self.ZAinner[:, :, None, :]
#dummy_target = np.zeros_like(target)
#dummy_target += psi2_dZ.sum(0).sum(0)
AZA = self.variances*self.ZAinner
code="""
int n,m,mm,q;
#pragma omp parallel for private(n,mm,q)
for(m=0;m<num_inducing;m++){
for(q=0;q<input_dim;q++){
for(mm=0;mm<num_inducing;mm++){
for(n=0;n<N;n++){
target(m,q) += dL_dpsi2(n,m,mm)*AZA(n,mm,q);
}
}
}
}
"""
support_code = """
#include <omp.h>
#include <math.h>
"""
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
'extra_link_args' : ['-lgomp']}
N,num_inducing,input_dim = mu.shape[0],Z.shape[0],mu.shape[1]
mu, AZA, target, dL_dpsi2 = param_to_array(mu, AZA, target, dL_dpsi2)
weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'],
type_converters=weave.converters.blitz,**weave_options)
#---------------------------------------#
# Precomputations #
#---------------------------------------#
def _K_computations(self, X, X2):
if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)):
self._X = X.copy()
if X2 is None:
self._dot_product = tdot(param_to_array(X))
self._X2 = None
else:
self._X2 = X2.copy()
self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T))
def _psi_computations(self, Z, mu, S):
# here are the "statistics" for psi1 and psi2
Zv_changed = not (fast_array_equal(Z, self._Z) and fast_array_equal(self.variances, self._variances))
muS_changed = not (fast_array_equal(mu, self._mu) and fast_array_equal(S, self._S))
if Zv_changed:
# Z has changed, compute Z specific stuff
# self.ZZ = Z[:,None,:]*Z[None,:,:] # num_inducing,num_inducing,input_dim
# self.ZZ = np.empty((Z.shape[0], Z.shape[0], Z.shape[1]), order='F')
# [tdot(Z[:, i:i + 1], self.ZZ[:, :, i].T) for i in xrange(Z.shape[1])]
self.ZA = Z * self.variances
self._Z = Z.copy()
self._variances = self.variances.copy()
if muS_changed:
self.mu2_S = np.square(mu) + S
self.inner = (mu[:, None, :] * mu[:, :, None])
diag_indices = np.diag_indices(mu.shape[1], 2)
self.inner[:, diag_indices[0], diag_indices[1]] += S
self._mu, self._S = mu.copy(), S.copy()
if Zv_changed or muS_changed:
self.ZAinner = np.dot(self.ZA, self.inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x N x input_dim]!
self._psi2 = np.dot(self.ZAinner, self.ZA.T)

View file

@ -1,125 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
from coregionalize import Coregionalize
import numpy as np
import hashlib
class Prod(Kernpart):
"""
Computes the product of 2 kernels
:param k1, k2: the kernels to multiply
:type k1, k2: Kernpart
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
:type tensor: Boolean
:rtype: kernel object
"""
def __init__(self,k1,k2,tensor=False):
if tensor:
super(Prod, self).__init__(k1.input_dim + k2.input_dim, k1.name + '_xx_' + k2.name)
self.slice1 = slice(0,k1.input_dim)
self.slice2 = slice(k1.input_dim,k1.input_dim+k2.input_dim)
else:
assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to multiply don't have the same dimension."
super(Prod, self).__init__(k1.input_dim, k1.name + '_x_' + k2.name)
self.slice1 = slice(0,self.input_dim)
self.slice2 = slice(0,self.input_dim)
self.k1 = k1
self.k2 = k2
self.add_parameters(self.k1, self.k2)
#initialize cache
self._X, self._X2 = np.empty(shape=(2,1))
self._params = None
def K(self,X,X2,target):
self._K_computations(X,X2)
target += self._K1 * self._K2
def K1(self,X, X2):
"""Compute the part of the kernel associated with k1."""
self._K_computations(X, X2)
return self._K1
def K2(self, X, X2):
"""Compute the part of the kernel associated with k2."""
self._K_computations(X, X2)
return self._K2
def update_gradients_full(self, dL_dK, X):
self._K_computations(X, None)
self.k1.update_gradients_full(dL_dK*self._K2, X[:,self.slice1])
self.k2.update_gradients_full(dL_dK*self._K1, X[:,self.slice2])
def _param_grad_helper(self,dL_dK,X,X2,target):
"""Derivative of the covariance matrix with respect to the parameters."""
self._K_computations(X,X2)
if X2 is None:
self.k1._param_grad_helper(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.num_params])
self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.num_params:])
else:
self.k1._param_grad_helper(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.num_params])
self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.num_params:])
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
target1 = np.zeros(X.shape[0])
target2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,self.slice1],target1)
self.k2.Kdiag(X[:,self.slice2],target2)
target += target1 * target2
def dKdiag_dtheta(self,dL_dKdiag,X,target):
K1 = np.zeros(X.shape[0])
K2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,self.slice1],K1)
self.k2.Kdiag(X[:,self.slice2],K2)
self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.num_params])
self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.num_params:])
def gradients_X(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
self._K_computations(X,X2)
if X2 is None:
if not isinstance(self.k1,Coregionalize) and not isinstance(self.k2,Coregionalize):
self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1])
self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2])
else:#if isinstance(self.k1,Coregionalize) or isinstance(self.k2,Coregionalize):
#NOTE The indices column in the inputs makes the ki.gradients_X fail when passing None instead of X[:,self.slicei]
X2 = X
self.k1.gradients_X(2.*dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
self.k2.gradients_X(2.*dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
else:
self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
def dKdiag_dX(self, dL_dKdiag, X, target):
K1 = np.zeros(X.shape[0])
K2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,self.slice1],K1)
self.k2.Kdiag(X[:,self.slice2],K2)
self.k1.gradients_X(dL_dKdiag*K2, X[:,self.slice1], target[:,self.slice1])
self.k2.gradients_X(dL_dKdiag*K1, X[:,self.slice2], target[:,self.slice2])
def _K_computations(self,X,X2):
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
self._X = X.copy()
self._params == self._get_params().copy()
if X2 is None:
self._X2 = None
self._K1 = np.zeros((X.shape[0],X.shape[0]))
self._K2 = np.zeros((X.shape[0],X.shape[0]))
self.k1.K(X[:,self.slice1],None,self._K1)
self.k2.K(X[:,self.slice2],None,self._K2)
else:
self._X2 = X2.copy()
self._K1 = np.zeros((X.shape[0],X2.shape[0]))
self._K2 = np.zeros((X.shape[0],X2.shape[0]))
self.k1.K(X[:,self.slice1],X2[:,self.slice1],self._K1)
self.k2.K(X[:,self.slice2],X2[:,self.slice2],self._K2)

View file

@ -1,352 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from kernpart import Kernpart
from ...util.linalg import tdot
from ...util.misc import fast_array_equal, param_to_array
from ...core.parameterization import Param
class SS_RBF(Kernpart):
"""
The RBF kernel for Spike-and-Slab GPLVM
Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel:
.. math::
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \ \ \ \ \ \\text{ where } r^2 = \sum_{i=1}^d \\frac{ (x_i-x^\prime_i)^2}{\ell_i^2}
where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the vector of lengthscale of the kernel
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
:rtype: kernel object
"""
def __init__(self, input_dim, variance=1., lengthscale=None, name='rbf'):
super(RBF, self).__init__(input_dim, name)
self.input_dim = input_dim
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == self.input_dim, "bad number of lengthscales"
else:
lengthscale = np.ones(self.input_dim)
self.variance = Param('variance', variance)
self.lengthscale = Param('lengthscale', lengthscale)
self.lengthscale.add_observer(self, self.update_lengthscale)
self.add_parameters(self.variance, self.lengthscale)
self.parameters_changed() # initializes cache
def on_input_change(self, X):
#self._K_computations(X, None)
pass
def update_lengthscale(self, l):
self.lengthscale2 = np.square(self.lengthscale)
def parameters_changed(self):
# reset cached results
self._X, self._X2 = np.empty(shape=(2, 1))
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
def K(self, X, X2, target):
self._K_computations(X, X2)
target += self.variance * self._K_dvar
def Kdiag(self, X, target):
np.add(target, self.variance, target)
def psi0(self, Z, mu, S, target):
target += self.variance
def psi1(self, Z, mu, S, target):
self._psi_computations(Z, mu, S)
target += self._psi1
def psi2(self, Z, mu, S, target):
self._psi_computations(Z, mu, S)
target += self._psi2
def update_gradients_full(self, dL_dK, X):
self._K_computations(X, None)
self.variance.gradient = np.sum(self._K_dvar * dL_dK)
if self.ARD:
self.lengthscale.gradient = self._dL_dlengthscales_via_K(dL_dK, X, None)
else:
self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK)
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
#contributions from Kdiag
self.variance.gradient = np.sum(dL_dKdiag)
#from Knm
self._K_computations(X, Z)
self.variance.gradient += np.sum(dL_dKnm * self._K_dvar)
if self.ARD:
self.lengthscales.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z)
else:
self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
#from Kmm
self._K_computations(Z, None)
self.variance.gradient += np.sum(dL_dKmm * self._K_dvar)
if self.ARD:
self.lengthscales.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
else:
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
self._psi_computations(Z, mu, S)
#contributions from psi0:
self.variance.gradient = np.sum(dL_dpsi0)
#from psi1
self.variance.gradient += np.sum(dL_dpsi1 * self._psi1 / self.variance)
d_length = self._psi1[:,:,None] * ((self._psi1_dist_sq - 1.)/(self.lengthscale*self._psi1_denom) +1./self.lengthscale)
dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
if not self.ARD:
self.lengthscale.gradeint = dpsi1_dlength.sum()
else:
self.lengthscale.gradient = dpsi1_dlength.sum(0).sum(0)
#from psi2
d_var = 2.*self._psi2 / self.variance
d_length = 2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] / self.lengthscale2) / (self.lengthscale * self._psi2_denom)
self.variance.gradient += np.sum(dL_dpsi2 * d_var)
dpsi2_dlength = d_length * dL_dpsi2[:, :, :, None]
if not self.ARD:
self.lengthscale.gradient += dpsi2_dlength.sum()
else:
self.lengthscale.gradient += dpsi2_dlength.sum(0).sum(0).sum(0)
#from Kmm
self._K_computations(Z, None)
self.variance.gradient += np.sum(dL_dKmm * self._K_dvar)
if self.ARD:
self.lengthscales.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
else:
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK)
def gradients_X(self, dL_dK, X, X2, target):
#if self._X is None or X.base is not self._X.base or X2 is not None:
self._K_computations(X, X2)
if X2 is None:
_K_dist = 2*(X[:, None, :] - X[None, :, :])
else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
gradients_X = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target):
pass
#---------------------------------------#
# PSI statistics #
#---------------------------------------#
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
pass
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
self._psi_computations(Z, mu, S)
denominator = (self.lengthscale2 * (self._psi1_denom))
dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator))
target += np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
self._psi_computations(Z, mu, S)
tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom
target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
self._psi_computations(Z, mu, S)
term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim
term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim
dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
target += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
"""Think N,num_inducing,num_inducing,input_dim """
self._psi_computations(Z, mu, S)
tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom
target_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
target_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
#---------------------------------------#
# Precomputations #
#---------------------------------------#
def _K_computations(self, X, X2):
#params = self._get_params()
if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)):# and fast_array_equal(self._params_save , params)):
#self._X = X.copy()
#self._params_save = params.copy()
if X2 is None:
self._X2 = None
X = X / self.lengthscale
Xsquare = np.sum(np.square(X), 1)
self._K_dist2 = -2.*tdot(X) + (Xsquare[:, None] + Xsquare[None, :])
else:
self._X2 = X2.copy()
X = X / self.lengthscale
X2 = X2 / self.lengthscale
self._K_dist2 = -2.*np.dot(X, X2.T) + (np.sum(np.square(X), 1)[:, None] + np.sum(np.square(X2), 1)[None, :])
self._K_dvar = np.exp(-0.5 * self._K_dist2)
def _dL_dlengthscales_via_K(self, dL_dK, X, X2):
"""
A helper function for update_gradients_* methods
Computes the derivative of the objective L wrt the lengthscales via
dL_dl = sum_{i,j}(dL_dK_{ij} dK_dl)
assumes self._K_computations has just been called.
This is only valid if self.ARD=True
"""
target = np.zeros(self.input_dim)
dvardLdK = self._K_dvar * dL_dK
var_len3 = self.variance / np.power(self.lengthscale, 3)
if X2 is None:
# save computation for the symmetrical case
dvardLdK = dvardLdK + dvardLdK.T
code = """
int q,i,j;
double tmp;
for(q=0; q<input_dim; q++){
tmp = 0;
for(i=0; i<num_data; i++){
for(j=0; j<i; j++){
tmp += (X(i,q)-X(j,q))*(X(i,q)-X(j,q))*dvardLdK(i,j);
}
}
target(q) += var_len3(q)*tmp;
}
"""
num_data, num_inducing, input_dim = X.shape[0], X.shape[0], self.input_dim
X, dvardLdK = param_to_array(X, dvardLdK)
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
else:
code = """
int q,i,j;
double tmp;
for(q=0; q<input_dim; q++){
tmp = 0;
for(i=0; i<num_data; i++){
for(j=0; j<num_inducing; j++){
tmp += (X(i,q)-X2(j,q))*(X(i,q)-X2(j,q))*dvardLdK(i,j);
}
}
target(q) += var_len3(q)*tmp;
}
"""
num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim
X, X2, dvardLdK = param_to_array(X, X2, dvardLdK)
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
return target
def _psi_computations(self, Z, mu, S):
# here are the "statistics" for psi1 and psi2
Z_changed = not fast_array_equal(Z, self._Z)
if Z_changed:
# Z has changed, compute Z specific stuff
self._psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
self._psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
self._psi2_Zdist_sq = np.square(self._psi2_Zdist / self.lengthscale) # M,M,Q
if Z_changed or not fast_array_equal(mu, self._mu) or not fast_array_equal(S, self._S):
# something's changed. recompute EVERYTHING
# psi1
self._psi1_denom = S[:, None, :] / self.lengthscale2 + 1.
self._psi1_dist = Z[None, :, :] - mu[:, None, :]
self._psi1_dist_sq = np.square(self._psi1_dist) / self.lengthscale2 / self._psi1_denom
self._psi1_exponent = -0.5 * np.sum(self._psi1_dist_sq + np.log(self._psi1_denom), -1)
self._psi1 = self.variance * np.exp(self._psi1_exponent)
# psi2
self._psi2_denom = 2.*S[:, None, None, :] / self.lengthscale2 + 1. # N,M,M,Q
self._psi2_mudist, self._psi2_mudist_sq, self._psi2_exponent, _ = self.weave_psi2(mu, self._psi2_Zhat)
# self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
# self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom)
# self._psi2_exponent = np.sum(-self._psi2_Zdist_sq -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M,Q
self._psi2 = np.square(self.variance) * np.exp(self._psi2_exponent) # N,M,M,Q
# store matrices for caching
self._Z, self._mu, self._S = Z, mu, S
def weave_psi2(self, mu, Zhat):
N, input_dim = mu.shape
num_inducing = Zhat.shape[0]
mudist = np.empty((N, num_inducing, num_inducing, input_dim))
mudist_sq = np.empty((N, num_inducing, num_inducing, input_dim))
psi2_exponent = np.zeros((N, num_inducing, num_inducing))
psi2 = np.empty((N, num_inducing, num_inducing))
psi2_Zdist_sq = self._psi2_Zdist_sq
_psi2_denom = self._psi2_denom.squeeze().reshape(N, self.input_dim)
half_log_psi2_denom = 0.5 * np.log(self._psi2_denom).squeeze().reshape(N, self.input_dim)
variance_sq = float(np.square(self.variance))
if self.ARD:
lengthscale2 = self.lengthscale2
else:
lengthscale2 = np.ones(input_dim) * self.lengthscale2
code = """
double tmp;
#pragma omp parallel for private(tmp)
for (int n=0; n<N; n++){
for (int m=0; m<num_inducing; m++){
for (int mm=0; mm<(m+1); mm++){
for (int q=0; q<input_dim; q++){
//compute mudist
tmp = mu(n,q) - Zhat(m,mm,q);
mudist(n,m,mm,q) = tmp;
mudist(n,mm,m,q) = tmp;
//now mudist_sq
tmp = tmp*tmp/lengthscale2(q)/_psi2_denom(n,q);
mudist_sq(n,m,mm,q) = tmp;
mudist_sq(n,mm,m,q) = tmp;
//now psi2_exponent
tmp = -psi2_Zdist_sq(m,mm,q) - tmp - half_log_psi2_denom(n,q);
psi2_exponent(n,mm,m) += tmp;
if (m !=mm){
psi2_exponent(n,m,mm) += tmp;
}
//psi2 would be computed like this, but np is faster
//tmp = variance_sq*exp(psi2_exponent(n,m,mm));
//psi2(n,m,mm) = tmp;
//psi2(n,mm,m) = tmp;
}
}
}
}
"""
support_code = """
#include <omp.h>
#include <math.h>
"""
weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'],
type_converters=weave.converters.blitz, **self.weave_options)
return mudist, mudist_sq, psi2_exponent, psi2

View file

@ -1,423 +0,0 @@
import numpy as np
import sympy as sp
from sympy.utilities.codegen import codegen
from sympy.core.cache import clear_cache
from scipy import weave
import re
import os
import sys
current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
import tempfile
import pdb
import ast
from kernpart import Kernpart
class spkern(Kernpart):
"""
A kernel object, where all the hard work in done by sympy.
:param k: the covariance function
:type k: a positive definite sympy function of x_0, z_0, x_1, z_1, x_2, z_2...
To construct a new sympy kernel, you'll need to define:
- a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z).
- that's it! we'll extract the variables from the function k.
Note:
- to handle multiple inputs, call them x_1, z_1, etc
- to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j.
"""
def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None):
if name is None:
self.name='sympykern'
else:
self.name = name
if k is None:
raise ValueError, "You must provide an argument for the covariance function."
self._sp_k = k
sp_vars = [e for e in k.atoms() if e.is_Symbol]
self._sp_x= sorted([e for e in sp_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:]))
self._sp_z= sorted([e for e in sp_vars if e.name[0:2]=='z_'],key=lambda z:int(z.name[2:]))
# Check that variable names make sense.
assert all([x.name=='x_%i'%i for i,x in enumerate(self._sp_x)])
assert all([z.name=='z_%i'%i for i,z in enumerate(self._sp_z)])
assert len(self._sp_x)==len(self._sp_z)
self.input_dim = len(self._sp_x)
self._real_input_dim = self.input_dim
if output_dim > 1:
self.input_dim += 1
assert self.input_dim == input_dim
self.output_dim = output_dim
# extract parameter names
thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name)
# Look for parameters with index.
if self.output_dim>1:
self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name)
self._sp_theta_j = sorted([e for e in thetas if (e.name[-2:]=='_j')], key=lambda e:e.name)
# Make sure parameter appears with both indices!
assert len(self._sp_theta_i)==len(self._sp_theta_j)
assert all([theta_i.name[:-2]==theta_j.name[:-2] for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j)])
# Extract names of shared parameters
self._sp_theta = [theta for theta in thetas if theta not in self._sp_theta_i and theta not in self._sp_theta_j]
self.num_split_params = len(self._sp_theta_i)
self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sp_theta_i]
for theta in self._split_theta_names:
setattr(self, theta, np.ones(self.output_dim))
self.num_shared_params = len(self._sp_theta)
self.num_params = self.num_shared_params+self.num_split_params*self.output_dim
else:
self.num_split_params = 0
self._split_theta_names = []
self._sp_theta = thetas
self.num_shared_params = len(self._sp_theta)
self.num_params = self.num_shared_params
for theta in self._sp_theta:
val = 1.0
if param is not None:
if param.has_key(theta):
val = param[theta]
setattr(self, theta.name, val)
#deal with param
self._set_params(self._get_params())
#Differentiate!
self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta]
if self.output_dim > 1:
self._sp_dk_dtheta_i = [sp.diff(k,theta).simplify() for theta in self._sp_theta_i]
self._sp_dk_dx = [sp.diff(k,xi).simplify() for xi in self._sp_x]
if False:
self.compute_psi_stats()
self._gen_code()
if False:
extra_compile_args = ['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5']
else:
extra_compile_args = []
self.weave_kwargs = {
'support_code':self._function_code,
'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'parts/')],
'headers':['"sympy_helpers.h"'],
'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp")],
'extra_compile_args':extra_compile_args,
'extra_link_args':['-lgomp'],
'verbose':True}
def __add__(self,other):
return spkern(self._sp_k+other._sp_k)
def _gen_code(self):
#generate c functions from sympy objects
argument_sequence = self._sp_x+self._sp_z+self._sp_theta
code_list = [('k',self._sp_k)]
# gradients with respect to covariance input
code_list += [('dk_d%s'%x.name,dx) for x,dx in zip(self._sp_x,self._sp_dk_dx)]
# gradient with respect to parameters
code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta,self._sp_dk_dtheta)]
# gradient with respect to multiple output parameters
if self.output_dim > 1:
argument_sequence += self._sp_theta_i + self._sp_theta_j
code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta_i,self._sp_dk_dtheta_i)]
(foo_c,self._function_code), (foo_h,self._function_header) = \
codegen(code_list, "C",'foobar',argument_sequence=argument_sequence)
#put the header file where we can find it
f = file(os.path.join(tempfile.gettempdir(),'foobar.h'),'w')
f.write(self._function_header)
f.close()
# Substitute any known derivatives which sympy doesn't compute
self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code)
# This is the basic argument construction for the C code.
#arg_list = (["X[i*input_dim+%s]"%x.name[2:] for x in self._sp_x]
# + ["Z[j*input_dim+%s]"%z.name[2:] for z in self._sp_z])
arg_list = (["X2(i, %s)"%x.name[2:] for x in self._sp_x]
+ ["Z2(j, %s)"%z.name[2:] for z in self._sp_z])
if self.output_dim>1:
reverse_arg_list = list(arg_list)
reverse_arg_list.reverse()
param_arg_list = [shared_params.name for shared_params in self._sp_theta]
arg_list += param_arg_list
precompute_list=[]
if self.output_dim > 1:
reverse_arg_list+=list(param_arg_list)
split_param_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['ii', 'jj'] for theta in self._sp_theta_i]
split_param_reverse_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['jj', 'ii'] for theta in self._sp_theta_i]
arg_list += split_param_arg_list
reverse_arg_list += split_param_reverse_arg_list
# Extract the right output indices from the inputs.
c_define_output_indices = [' '*16 + "int %s=(int)%s(%s, %i);"%(index, var, index2, self.input_dim-1) for index, var, index2 in zip(['ii', 'jj'], ['X2', 'Z2'], ['i', 'j'])]
precompute_list += c_define_output_indices
reverse_arg_string = ", ".join(reverse_arg_list)
arg_string = ", ".join(arg_list)
precompute_string = "\n".join(precompute_list)
# Here's the code to do the looping for K
self._K_code =\
"""
// _K_code
// Code for computing the covariance function.
int i;
int j;
int N = target_array->dimensions[0];
int num_inducing = target_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<N;i++){
for (j=0;j<num_inducing;j++){
%s
//target[i*num_inducing+j] =
TARGET2(i, j) += k(%s);
}
}
%s
"""%(precompute_string,arg_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
# Code to compute diagonal of covariance.
diag_arg_string = re.sub('Z','X',arg_string)
diag_arg_string = re.sub('int jj','//int jj',diag_arg_string)
diag_arg_string = re.sub('j','i',diag_arg_string)
diag_precompute_string = re.sub('int jj','//int jj',precompute_string)
diag_precompute_string = re.sub('Z','X',diag_precompute_string)
diag_precompute_string = re.sub('j','i',diag_precompute_string)
# Code to do the looping for Kdiag
self._Kdiag_code =\
"""
// _Kdiag_code
// Code for computing diagonal of covariance function.
int i;
int N = target_array->dimensions[0];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for
for (i=0;i<N;i++){
%s
//target[i] =
TARGET1(i)=k(%s);
}
%s
"""%(diag_precompute_string,diag_arg_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
# Code to compute gradients
grad_func_list = []
if self.output_dim>1:
grad_func_list += c_define_output_indices
grad_func_list += [' '*16 + 'TARGET1(%i+ii) += partial[i*num_inducing+j]*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, arg_string) for i, theta in enumerate(self._sp_theta_i)]
grad_func_list += [' '*16 + 'TARGET1(%i+jj) += partial[i*num_inducing+j]*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, reverse_arg_string) for i, theta in enumerate(self._sp_theta_i)]
grad_func_list += ([' '*16 + 'TARGET1(%i) += partial[i*num_inducing+j]*dk_d%s(%s);'%(i,theta.name,arg_string) for i,theta in enumerate(self._sp_theta)])
grad_func_string = '\n'.join(grad_func_list)
self._dK_dtheta_code =\
"""
// _dK_dtheta_code
// Code for computing gradient of covariance with respect to parameters.
int i;
int j;
int N = partial_array->dimensions[0];
int num_inducing = partial_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<N;i++){
for (j=0;j<num_inducing;j++){
%s
}
}
%s
"""%(grad_func_string,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
# Code to compute gradients for Kdiag TODO: needs clean up
diag_grad_func_string = re.sub('Z','X',grad_func_string,count=0)
diag_grad_func_string = re.sub('int jj','//int jj',diag_grad_func_string)
diag_grad_func_string = re.sub('j','i',diag_grad_func_string)
diag_grad_func_string = re.sub('partial\[i\*num_inducing\+i\]','partial[i]',diag_grad_func_string)
self._dKdiag_dtheta_code =\
"""
// _dKdiag_dtheta_code
// Code for computing gradient of diagonal with respect to parameters.
int i;
int N = partial_array->dimensions[0];
int input_dim = X_array->dimensions[1];
for (i=0;i<N;i++){
%s
}
%s
"""%(diag_grad_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
# Code for gradients wrt X, TODO: may need to deal with special case where one input is actually an output.
gradX_func_list = []
if self.output_dim>1:
gradX_func_list += c_define_output_indices
gradX_func_list += ["TARGET2(i, %i) += partial[i*num_inducing+j]*dk_dx_%i(%s);"%(q,q,arg_string) for q in range(self._real_input_dim)]
gradX_func_string = "\n".join(gradX_func_list)
self._dK_dX_code = \
"""
// _dK_dX_code
// Code for computing gradient of covariance with respect to inputs.
int i;
int j;
int N = partial_array->dimensions[0];
int num_inducing = partial_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<N; i++){
for (j=0; j<num_inducing; j++){
%s
}
}
%s
"""%(gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
diag_gradX_func_string = re.sub('Z','X',gradX_func_string,count=0)
diag_gradX_func_string = re.sub('int jj','//int jj',diag_gradX_func_string)
diag_gradX_func_string = re.sub('j','i',diag_gradX_func_string)
diag_gradX_func_string = re.sub('partial\[i\*num_inducing\+i\]','2*partial[i]',diag_gradX_func_string)
# Code for gradients of Kdiag wrt X
self._dKdiag_dX_code= \
"""
// _dKdiag_dX_code
// Code for computing gradient of diagonal with respect to inputs.
int N = partial_array->dimensions[0];
int input_dim = X_array->dimensions[1];
for (int i=0;i<N; i++){
%s
}
%s
"""%(diag_gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a
# string representation forces recompile when needed Get rid
# of Zs in argument for diagonal. TODO: Why wasn't
# diag_func_string called here? Need to check that.
#self._dKdiag_dX_code = self._dKdiag_dX_code.replace('Z[j', 'X[i')
# Code to use when only X is provided.
self._K_code_X = self._K_code.replace('Z[', 'X[')
self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z[', 'X[')
self._dK_dX_code_X = self._dK_dX_code.replace('Z[', 'X[').replace('+= partial[', '+= 2*partial[')
self._K_code_X = self._K_code.replace('Z2(', 'X2(')
self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z2(', 'X2(')
self._dK_dX_code_X = self._dK_dX_code.replace('Z2(', 'X2(')
#TODO: insert multiple functions here via string manipulation
#TODO: similar functions for psi_stats
def _get_arg_names(self, Z=None, partial=None):
arg_names = ['target','X']
for shared_params in self._sp_theta:
arg_names += [shared_params.name]
if Z is not None:
arg_names += ['Z']
if partial is not None:
arg_names += ['partial']
if self.output_dim>1:
arg_names += self._split_theta_names
arg_names += ['output_dim']
return arg_names
def _weave_inline(self, code, X, target, Z=None, partial=None):
output_dim = self.output_dim
for shared_params in self._sp_theta:
locals()[shared_params.name] = getattr(self, shared_params.name)
# Need to extract parameters first
for split_params in self._split_theta_names:
locals()[split_params] = getattr(self, split_params)
arg_names = self._get_arg_names(Z, partial)
weave.inline(code=code, arg_names=arg_names,**self.weave_kwargs)
def K(self,X,Z,target):
if Z is None:
self._weave_inline(self._K_code_X, X, target)
else:
self._weave_inline(self._K_code, X, target, Z)
def Kdiag(self,X,target):
self._weave_inline(self._Kdiag_code, X, target)
def _param_grad_helper(self,partial,X,Z,target):
if Z is None:
self._weave_inline(self._dK_dtheta_code_X, X, target, Z, partial)
else:
self._weave_inline(self._dK_dtheta_code, X, target, Z, partial)
def dKdiag_dtheta(self,partial,X,target):
self._weave_inline(self._dKdiag_dtheta_code, X, target, Z=None, partial=partial)
def gradients_X(self,partial,X,Z,target):
if Z is None:
self._weave_inline(self._dK_dX_code_X, X, target, Z, partial)
else:
self._weave_inline(self._dK_dX_code, X, target, Z, partial)
def dKdiag_dX(self,partial,X,target):
self._weave.inline(self._dKdiag_dX_code, X, target, Z, partial)
def compute_psi_stats(self):
#define some normal distributions
mus = [sp.var('mu_%i'%i,real=True) for i in range(self.input_dim)]
Ss = [sp.var('S_%i'%i,positive=True) for i in range(self.input_dim)]
normals = [(2*sp.pi*Si)**(-0.5)*sp.exp(-0.5*(xi-mui)**2/Si) for xi, mui, Si in zip(self._sp_x, mus, Ss)]
#do some integration!
#self._sp_psi0 = ??
self._sp_psi1 = self._sp_k
for i in range(self.input_dim):
print 'perfoming integrals %i of %i'%(i+1,2*self.input_dim)
sys.stdout.flush()
self._sp_psi1 *= normals[i]
self._sp_psi1 = sp.integrate(self._sp_psi1,(self._sp_x[i],-sp.oo,sp.oo))
clear_cache()
self._sp_psi1 = self._sp_psi1.simplify()
#and here's psi2 (eek!)
zprime = [sp.Symbol('zp%i'%i) for i in range(self.input_dim)]
self._sp_psi2 = self._sp_k.copy()*self._sp_k.copy().subs(zip(self._sp_z,zprime))
for i in range(self.input_dim):
print 'perfoming integrals %i of %i'%(self.input_dim+i+1,2*self.input_dim)
sys.stdout.flush()
self._sp_psi2 *= normals[i]
self._sp_psi2 = sp.integrate(self._sp_psi2,(self._sp_x[i],-sp.oo,sp.oo))
clear_cache()
self._sp_psi2 = self._sp_psi2.simplify()
def _set_params(self,param):
assert param.size == (self.num_params)
for i, shared_params in enumerate(self._sp_theta):
setattr(self, shared_params.name, param[i])
if self.output_dim>1:
for i, split_params in enumerate(self._split_theta_names):
start = self.num_shared_params + i*self.output_dim
end = self.num_shared_params + (i+1)*self.output_dim
setattr(self, split_params, param[start:end])
def _get_params(self):
params = np.zeros(0)
for shared_params in self._sp_theta:
params = np.hstack((params, getattr(self, shared_params.name)))
if self.output_dim>1:
for split_params in self._split_theta_names:
params = np.hstack((params, getattr(self, split_params).flatten()))
return params
def _get_param_names(self):
if self.output_dim>1:
return [x.name for x in self._sp_theta] + [x.name[:-2] + str(i) for x in self._sp_theta_i for i in range(self.output_dim)]
else:
return [x.name for x in self._sp_theta]

View file

@ -8,7 +8,7 @@ from ..core import SparseGP
from ..likelihoods import Gaussian from ..likelihoods import Gaussian
from ..inference.optimization import SCG from ..inference.optimization import SCG
from ..util import linalg from ..util import linalg
from ..core.parameterization.variational import Normal from ..core.parameterization.variational import NormalPosterior, NormalPrior
class BayesianGPLVM(SparseGP, GPLVM): class BayesianGPLVM(SparseGP, GPLVM):
""" """
@ -29,18 +29,20 @@ class BayesianGPLVM(SparseGP, GPLVM):
self.init = init self.init = init
if X_variance is None: if X_variance is None:
X_variance = np.clip((np.ones_like(X) * 0.5) + .01 * np.random.randn(*X.shape), 0.001, 1) X_variance = np.random.uniform(0,.1,X.shape)
if Z is None: if Z is None:
Z = np.random.permutation(X.copy())[:num_inducing] Z = np.random.permutation(X.copy())[:num_inducing]
assert Z.shape[1] == X.shape[1] assert Z.shape[1] == X.shape[1]
if kernel is None: if kernel is None:
kernel = kern.rbf(input_dim) # + kern.white(input_dim) kernel = kern.RBF(input_dim) # + kern.white(input_dim)
if likelihood is None: if likelihood is None:
likelihood = Gaussian() likelihood = Gaussian()
self.q = Normal(X, X_variance) self.q = NormalPosterior(X, X_variance)
self.variational_prior = NormalPrior()
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, X_variance, name, **kwargs) SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, X_variance, name, **kwargs)
self.add_parameter(self.q, index=0) self.add_parameter(self.q, index=0)
#self.ensure_default_constraints() #self.ensure_default_constraints()
@ -57,34 +59,15 @@ class BayesianGPLVM(SparseGP, GPLVM):
self.init = state.pop() self.init = state.pop()
SparseGP._setstate(self, state) SparseGP._setstate(self, state)
def dL_dmuS(self):
dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi0_dmuS(self.grad_dict['dL_dpsi0'], self.Z, self.X, self.X_variance)
dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi1_dmuS(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance)
dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance)
dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2
dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2
return dL_dmu, dL_dS
def KL_divergence(self):
var_mean = np.square(self.X).sum()
var_S = np.sum(self.X_variance - np.log(self.X_variance))
return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.num_data
def parameters_changed(self): def parameters_changed(self):
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y) super(BayesianGPLVM, self).parameters_changed()
self._update_gradients_Z(add=False) self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.q)
self._log_marginal_likelihood -= self.KL_divergence() self.kern.update_gradients_q_variational(posterior_variational=self.q, Z=self.Z, **self.grad_dict)
dL_dmu, dL_dS = self.dL_dmuS()
# dL: # update for the KL divergence
self.q.mean.gradient = dL_dmu self.variational_prior.update_gradients_KL(self.q)
self.q.variance.gradient = dL_dS
# dKL:
self.q.mean.gradient -= self.X
self.q.variance.gradient -= (1. - (1. / (self.X_variance))) * 0.5
def plot_latent(self, plot_inducing=True, *args, **kwargs): def plot_latent(self, plot_inducing=True, *args, **kwargs):
""" """
@ -157,6 +140,7 @@ class BayesianGPLVM(SparseGP, GPLVM):
""" """
See GPy.plotting.matplot_dep.dim_reduction_plots.plot_steepest_gradient_map See GPy.plotting.matplot_dep.dim_reduction_plots.plot_steepest_gradient_map
""" """
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported." assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import dim_reduction_plots from ..plotting.matplot_dep import dim_reduction_plots

View file

@ -23,7 +23,7 @@ class GPRegression(GP):
def __init__(self, X, Y, kernel=None): def __init__(self, X, Y, kernel=None):
if kernel is None: if kernel is None:
kernel = kern.rbf(X.shape[1]) kernel = kern.RBF(X.shape[1])
likelihood = likelihoods.Gaussian() likelihood = likelihoods.Gaussian()

View file

@ -7,9 +7,25 @@ from GPy.util.linalg import PCA
import numpy import numpy
import itertools import itertools
import pylab import pylab
from GPy.kern.kern import kern from GPy.kern import Kern
from GPy.models.bayesian_gplvm import BayesianGPLVM from GPy.models.bayesian_gplvm import BayesianGPLVM
class MRD2(Model):
"""
Apply MRD to all given datasets Y in Ylist.
Y_i in [n x p_i]
The samples n in the datasets need
to match up, whereas the dimensionality p_d can differ.
:param [array-like] Ylist: List of datasets to apply MRD on
:param array-like q_mean: mean of starting latent space q in [n x q]
:param array-like q_variance: variance of starting latent space q in [n x q]
:param :class:`~GPy.inference.latent_function_inference
"""
class MRD(Model): class MRD(Model):
""" """
Do MRD on given Datasets in Ylist. Do MRD on given Datasets in Ylist.
@ -48,11 +64,11 @@ class MRD(Model):
# sort out the kernels # sort out the kernels
if kernels is None: if kernels is None:
kernels = [None] * len(likelihood_or_Y_list) kernels = [None] * len(likelihood_or_Y_list)
elif isinstance(kernels, kern): elif isinstance(kernels, Kern):
kernels = [kernels.copy() for i in range(len(likelihood_or_Y_list))] kernels = [kernels.copy() for i in range(len(likelihood_or_Y_list))]
else: else:
assert len(kernels) == len(likelihood_or_Y_list), "need one kernel per output" assert len(kernels) == len(likelihood_or_Y_list), "need one kernel per output"
assert all([isinstance(k, kern) for k in kernels]), "invalid kernel object detected!" assert all([isinstance(k, Kern) for k in kernels]), "invalid kernel object detected!"
assert not ('kernel' in kw), "pass kernels through `kernels` argument" assert not ('kernel' in kw), "pass kernels through `kernels` argument"
self.input_dim = input_dim self.input_dim = input_dim

View file

@ -1,8 +1,8 @@
import pylab as pb import pylab as pb
import numpy as np import numpy as np
from ... import util
from latent_space_visualizations.controllers.imshow_controller import ImshowController,ImAnnotateController from latent_space_visualizations.controllers.imshow_controller import ImshowController,ImAnnotateController
from GPy.util.misc import param_to_array from ...util.misc import param_to_array
from .base_plots import x_frame2D
import itertools import itertools
import Tango import Tango
from matplotlib.cm import get_cmap from matplotlib.cm import get_cmap
@ -37,7 +37,7 @@ def plot_latent(model, labels=None, which_indices=None,
if ax is None: if ax is None:
fig = pb.figure(num=fignum) fig = pb.figure(num=fignum)
ax = fig.add_subplot(111) ax = fig.add_subplot(111)
util.plot.Tango.reset() Tango.reset()
if labels is None: if labels is None:
labels = np.ones(model.num_data) labels = np.ones(model.num_data)
@ -46,7 +46,7 @@ def plot_latent(model, labels=None, which_indices=None,
X = param_to_array(model.X) X = param_to_array(model.X)
# first, plot the output variance as a function of the latent space # first, plot the output variance as a function of the latent space
Xtest, xx, yy, xmin, xmax = util.plot.x_frame2D(X[:, [input_1, input_2]], resolution=resolution) Xtest, xx, yy, xmin, xmax = x_frame2D(X[:, [input_1, input_2]], resolution=resolution)
Xtest_full = np.zeros((Xtest.shape[0], model.X.shape[1])) Xtest_full = np.zeros((Xtest.shape[0], model.X.shape[1]))
def plot_function(x): def plot_function(x):
@ -87,7 +87,7 @@ def plot_latent(model, labels=None, which_indices=None,
else: else:
x = X[index, input_1] x = X[index, input_1]
y = X[index, input_2] y = X[index, input_2]
ax.scatter(x, y, marker=m, s=s, color=util.plot.Tango.nextMedium(), label=this_label) ax.scatter(x, y, marker=m, s=s, color=Tango.nextMedium(), label=this_label)
ax.set_xlabel('latent dimension %i' % input_1) ax.set_xlabel('latent dimension %i' % input_1)
ax.set_ylabel('latent dimension %i' % input_2) ax.set_ylabel('latent dimension %i' % input_2)
@ -120,7 +120,7 @@ def plot_magnification(model, labels=None, which_indices=None,
if ax is None: if ax is None:
fig = pb.figure(num=fignum) fig = pb.figure(num=fignum)
ax = fig.add_subplot(111) ax = fig.add_subplot(111)
util.plot.Tango.reset() Tango.reset()
if labels is None: if labels is None:
labels = np.ones(model.num_data) labels = np.ones(model.num_data)
@ -128,7 +128,7 @@ def plot_magnification(model, labels=None, which_indices=None,
input_1, input_2 = most_significant_input_dimensions(model, which_indices) input_1, input_2 = most_significant_input_dimensions(model, which_indices)
# first, plot the output variance as a function of the latent space # first, plot the output variance as a function of the latent space
Xtest, xx, yy, xmin, xmax = util.plot.x_frame2D(model.X[:, [input_1, input_2]], resolution=resolution) Xtest, xx, yy, xmin, xmax = x_frame2D(model.X[:, [input_1, input_2]], resolution=resolution)
Xtest_full = np.zeros((Xtest.shape[0], model.X.shape[1])) Xtest_full = np.zeros((Xtest.shape[0], model.X.shape[1]))
def plot_function(x): def plot_function(x):
@ -165,7 +165,7 @@ def plot_magnification(model, labels=None, which_indices=None,
else: else:
x = model.X[index, input_1] x = model.X[index, input_1]
y = model.X[index, input_2] y = model.X[index, input_2]
ax.scatter(x, y, marker=m, s=s, color=util.plot.Tango.nextMedium(), label=this_label) ax.scatter(x, y, marker=m, s=s, color=Tango.nextMedium(), label=this_label)
ax.set_xlabel('latent dimension %i' % input_1) ax.set_xlabel('latent dimension %i' % input_1)
ax.set_ylabel('latent dimension %i' % input_2) ax.set_ylabel('latent dimension %i' % input_2)
@ -205,7 +205,7 @@ def plot_steepest_gradient_map(model, fignum=None, ax=None, which_indices=None,
return dmu_dX[indices, argmax], np.array(labels)[argmax] return dmu_dX[indices, argmax], np.array(labels)[argmax]
if ax is None: if ax is None:
fig = pyplot.figure(num=fignum) fig = pb.figure(num=fignum)
ax = fig.add_subplot(111) ax = fig.add_subplot(111)
if data_labels is None: if data_labels is None:
@ -241,7 +241,7 @@ def plot_steepest_gradient_map(model, fignum=None, ax=None, which_indices=None,
ax.legend() ax.legend()
ax.figure.tight_layout() ax.figure.tight_layout()
if updates: if updates:
pyplot.show() pb.show()
clear = raw_input('Enter to continue') clear = raw_input('Enter to continue')
if clear.lower() in 'yes' or clear == '': if clear.lower() in 'yes' or clear == '':
controller.deactivate() controller.deactivate()

View file

@ -1,13 +1,12 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import sys
import numpy as np import numpy as np
import pylab as pb import pylab as pb
import Tango import Tango
from matplotlib.textpath import TextPath from matplotlib.textpath import TextPath
from matplotlib.transforms import offset_copy from matplotlib.transforms import offset_copy
from ...kern.parts.linear import Linear from ...kern import Linear
def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False): def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):
@ -29,7 +28,8 @@ def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):
xticklabels = [] xticklabels = []
bars = [] bars = []
x0 = 0 x0 = 0
for p in kernel._parameters_: #for p in kernel._parameters_:
p = kernel
c = Tango.nextMedium() c = Tango.nextMedium()
if hasattr(p, 'ARD') and p.ARD: if hasattr(p, 'ARD') and p.ARD:
if title is None: if title is None:
@ -40,9 +40,9 @@ def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):
ard_params = p.variances ard_params = p.variances
else: else:
ard_params = 1. / p.lengthscale ard_params = 1. / p.lengthscale
x = np.arange(x0, x0 + len(ard_params)) x = np.arange(x0, x0 + len(ard_params))
bars.append(ax.bar(x, ard_params, align='center', color=c, edgecolor='k', linewidth=1.2, label=p.name.replace("_"," "))) from ...util.misc import param_to_array
bars.append(ax.bar(x, param_to_array(ard_params), align='center', color=c, edgecolor='k', linewidth=1.2, label=p.name.replace("_"," ")))
xticklabels.extend([r"$\mathrm{{{name}}}\ {x}$".format(name=p.name, x=i) for i in np.arange(len(ard_params))]) xticklabels.extend([r"$\mathrm{{{name}}}\ {x}$".format(name=p.name, x=i) for i in np.arange(len(ard_params))])
x0 += len(ard_params) x0 += len(ard_params)
x = np.arange(x0) x = np.arange(x0)

View file

@ -9,7 +9,7 @@ from ...util.misc import param_to_array
def plot_fit(model, plot_limits=None, which_data_rows='all', def plot_fit(model, plot_limits=None, which_data_rows='all',
which_data_ycols='all', which_parts='all', fixed_inputs=[], which_data_ycols='all', fixed_inputs=[],
levels=20, samples=0, fignum=None, ax=None, resolution=None, levels=20, samples=0, fignum=None, ax=None, resolution=None,
plot_raw=False, plot_raw=False,
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']): linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
@ -20,7 +20,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
- In higher dimensions, use fixed_inputs to plot the GP with some of the inputs fixed. - In higher dimensions, use fixed_inputs to plot the GP with some of the inputs fixed.
Can plot only part of the data and part of the posterior functions Can plot only part of the data and part of the posterior functions
using which_data_rowsm which_data_ycols and which_parts using which_data_rowsm which_data_ycols.
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:type plot_limits: np.array :type plot_limits: np.array
@ -28,8 +28,6 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
:type which_data_rows: 'all' or a slice object to slice model.X, model.Y :type which_data_rows: 'all' or a slice object to slice model.X, model.Y
:param which_data_ycols: when the data has several columns (independant outputs), only plot these :param which_data_ycols: when the data has several columns (independant outputs), only plot these
:type which_data_rows: 'all' or a list of integers :type which_data_rows: 'all' or a list of integers
:param which_parts: which of the kernel functions to plot (additively)
:type which_parts: 'all', or list of bools
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v. :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples :type fixed_inputs: a list of tuples
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
@ -59,6 +57,9 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
fig = pb.figure(num=fignum) fig = pb.figure(num=fignum)
ax = fig.add_subplot(111) ax = fig.add_subplot(111)
X, Y, Z = param_to_array(model.X, model.Y, model.Z)
if model.has_uncertain_inputs(): X_variance = param_to_array(model.q.variance)
#work out what the inputs are for plotting (1D or 2D) #work out what the inputs are for plotting (1D or 2D)
fixed_dims = np.array([i for i,v in fixed_inputs]) fixed_dims = np.array([i for i,v in fixed_inputs])
free_dims = np.setdiff1d(np.arange(model.input_dim),fixed_dims) free_dims = np.setdiff1d(np.arange(model.input_dim),fixed_dims)
@ -68,7 +69,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#define the frame on which to plot #define the frame on which to plot
resolution = resolution or 200 resolution = resolution or 200
Xnew, xmin, xmax = x_frame1D(model.X[:,free_dims], plot_limits=plot_limits) Xnew, xmin, xmax = x_frame1D(X[:,free_dims], plot_limits=plot_limits)
Xgrid = np.empty((Xnew.shape[0],model.input_dim)) Xgrid = np.empty((Xnew.shape[0],model.input_dim))
Xgrid[:,free_dims] = Xnew Xgrid[:,free_dims] = Xnew
for i,v in fixed_inputs: for i,v in fixed_inputs:
@ -76,30 +77,30 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#make a prediction on the frame and plot it #make a prediction on the frame and plot it
if plot_raw: if plot_raw:
m, v = model._raw_predict(Xgrid, which_parts=which_parts) m, v = model._raw_predict(Xgrid)
lower = m - 2*np.sqrt(v) lower = m - 2*np.sqrt(v)
upper = m + 2*np.sqrt(v) upper = m + 2*np.sqrt(v)
Y = model.Y Y = Y
else: else:
m, v, lower, upper = model.predict(Xgrid, which_parts=which_parts) m, v, lower, upper = model.predict(Xgrid)
Y = model.Y Y = Y
for d in which_data_ycols: for d in which_data_ycols:
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol) gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol)
ax.plot(model.X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', mew=1.5) ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', mew=1.5)
#optionally plot some samples #optionally plot some samples
if samples: #NOTE not tested with fixed_inputs if samples: #NOTE not tested with fixed_inputs
Ysim = model.posterior_samples(Xgrid, samples, which_parts=which_parts) Ysim = model.posterior_samples(Xgrid, samples)
for yi in Ysim.T: for yi in Ysim.T:
ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25) ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
#ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs. #ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
#add error bars for uncertain (if input uncertainty is being modelled) #add error bars for uncertain (if input uncertainty is being modelled)
if hasattr(model,"has_uncertain_inputs"): #if hasattr(model,"has_uncertain_inputs") and model.has_uncertain_inputs():
ax.errorbar(model.X[which_data, free_dims], model.likelihood.data[which_data, 0], # ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(),
xerr=2 * np.sqrt(model.X_variance[which_data, free_dims]), # xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()),
ecolor='k', fmt=None, elinewidth=.5, alpha=.5) # ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
#set the limits of the plot to some sensible values #set the limits of the plot to some sensible values
@ -111,7 +112,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#add inducing inputs (if a sparse model is used) #add inducing inputs (if a sparse model is used)
if hasattr(model,"Z"): if hasattr(model,"Z"):
#Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims] #Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
Zu = param_to_array(model.Z[:,free_dims]) Zu = Z[:,free_dims]
z_height = ax.get_ylim()[0] z_height = ax.get_ylim()[0]
ax.plot(Zu, np.zeros_like(Zu) + z_height, 'r|', mew=1.5, markersize=12) ax.plot(Zu, np.zeros_like(Zu) + z_height, 'r|', mew=1.5, markersize=12)
@ -122,7 +123,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#define the frame for plotting on #define the frame for plotting on
resolution = resolution or 50 resolution = resolution or 50
Xnew, _, _, xmin, xmax = x_frame2D(model.X[:,free_dims], plot_limits, resolution) Xnew, _, _, xmin, xmax = x_frame2D(X[:,free_dims], plot_limits, resolution)
Xgrid = np.empty((Xnew.shape[0],model.input_dim)) Xgrid = np.empty((Xnew.shape[0],model.input_dim))
Xgrid[:,free_dims] = Xnew Xgrid[:,free_dims] = Xnew
for i,v in fixed_inputs: for i,v in fixed_inputs:
@ -131,15 +132,15 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#predict on the frame and plot #predict on the frame and plot
if plot_raw: if plot_raw:
m, _ = model._raw_predict(Xgrid, which_parts=which_parts) m, _ = model._raw_predict(Xgrid)
Y = model.Y Y = Y
else: else:
m, _, _, _ = model.predict(Xgrid, which_parts=which_parts) m, _, _, _ = model.predict(Xgrid)
Y = model.data Y = Y
for d in which_data_ycols: for d in which_data_ycols:
m_d = m[:,d].reshape(resolution, resolution).T m_d = m[:,d].reshape(resolution, resolution).T
ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
ax.scatter(model.X[which_data_rows, free_dims[0]], model.X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
#set the limits of the plot to some sensible values #set the limits of the plot to some sensible values
ax.set_xlim(xmin[0], xmax[0]) ax.set_xlim(xmin[0], xmax[0])
@ -151,7 +152,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#add inducing inputs (if a sparse model is used) #add inducing inputs (if a sparse model is used)
if hasattr(model,"Z"): if hasattr(model,"Z"):
#Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims] #Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
Zu = model.Z[:,free_dims] Zu = Z[:,free_dims]
ax.plot(Zu[:,free_dims[0]], Zu[:,free_dims[1]], 'wo') ax.plot(Zu[:,free_dims[0]], Zu[:,free_dims[1]], 'wo')
else: else:

View file

@ -24,6 +24,18 @@ class Test(unittest.TestCase):
self.param_index.remove(one, [1]) self.param_index.remove(one, [1])
self.assertListEqual(self.param_index[one].tolist(), [3]) self.assertListEqual(self.param_index[one].tolist(), [3])
def test_shift_left(self):
self.param_index.shift_left(1, 2)
self.assertListEqual(self.param_index[three].tolist(), [2,5])
self.assertListEqual(self.param_index[two].tolist(), [0,3])
self.assertListEqual(self.param_index[one].tolist(), [1])
def test_shift_right(self):
self.param_index.shift_right(5, 2)
self.assertListEqual(self.param_index[three].tolist(), [2,4,9])
self.assertListEqual(self.param_index[two].tolist(), [0,7])
self.assertListEqual(self.param_index[one].tolist(), [3])
def test_index_view(self): def test_index_view(self):
#======================================================================= #=======================================================================
# 0 1 2 3 4 5 6 7 8 9 # 0 1 2 3 4 5 6 7 8 9

View file

@ -10,8 +10,8 @@ import numpy as np
class Test(unittest.TestCase): class Test(unittest.TestCase):
def setUp(self): def setUp(self):
self.rbf = GPy.kern.rbf(1) self.rbf = GPy.kern.RBF(1)
self.white = GPy.kern.white(1) self.white = GPy.kern.White(1)
from GPy.core.parameterization import Param from GPy.core.parameterization import Param
from GPy.core.parameterization.transformations import Logistic from GPy.core.parameterization.transformations import Logistic
self.param = Param('param', np.random.rand(25,2), Logistic(0, 1)) self.param = Param('param', np.random.rand(25,2), Logistic(0, 1))
@ -39,14 +39,13 @@ class Test(unittest.TestCase):
def test_remove_parameter(self): def test_remove_parameter(self):
from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__ from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__, Logexp
self.white.fix() self.white.fix()
self.test1.remove_parameter(self.white) self.test1.remove_parameter(self.white)
self.assertIs(self.test1._fixes_,None) self.assertIs(self.test1._fixes_,None)
self.assertListEqual(self.white._fixes_.tolist(), [FIXED]) self.assertListEqual(self.white._fixes_.tolist(), [FIXED])
self.assertIs(self.white.constraints,self.white.white.constraints._param_index_ops) self.assertEquals(self.white.constraints._offset, 0)
self.assertEquals(self.white.white.constraints._offset, 0)
self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops) self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops) self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops)
@ -57,18 +56,19 @@ class Test(unittest.TestCase):
self.assertListEqual(self.test1.constraints[__fixed__].tolist(), [0]) self.assertListEqual(self.test1.constraints[__fixed__].tolist(), [0])
self.assertIs(self.white._fixes_,None) self.assertIs(self.white._fixes_,None)
self.assertListEqual(self.test1._fixes_.tolist(),[FIXED] + [UNFIXED] * 52) self.assertListEqual(self.test1._fixes_.tolist(),[FIXED] + [UNFIXED] * 52)
self.test1.remove_parameter(self.white) self.test1.remove_parameter(self.white)
self.assertIs(self.test1._fixes_,None) self.assertIs(self.test1._fixes_,None)
self.assertListEqual(self.white._fixes_.tolist(), [FIXED]) self.assertListEqual(self.white._fixes_.tolist(), [FIXED])
self.assertIs(self.white.constraints,self.white.white.constraints._param_index_ops)
self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops) self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops) self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops)
self.assertListEqual(self.test1.constraints[Logexp()].tolist(), [0,1])
def test_add_parameter_already_in_hirarchy(self): def test_add_parameter_already_in_hirarchy(self):
self.test1.add_parameter(self.white._parameters_[0]) self.test1.add_parameter(self.white._parameters_[0])
def test_default_constraints(self): def test_default_constraints(self):
self.assertIs(self.rbf.rbf.variance.constraints._param_index_ops, self.rbf.constraints._param_index_ops) self.assertIs(self.rbf.variance.constraints._param_index_ops, self.rbf.constraints._param_index_ops)
self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops) self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
self.assertListEqual(self.rbf.constraints.indices()[0].tolist(), range(2)) self.assertListEqual(self.rbf.constraints.indices()[0].tolist(), range(2))
from GPy.core.parameterization.transformations import Logexp from GPy.core.parameterization.transformations import Logexp

View file

@ -12,6 +12,7 @@ import decorators
import classification import classification
import subarray_and_sorting import subarray_and_sorting
import caching import caching
import diag
try: try:
import sympy import sympy

View file

@ -1,44 +1,86 @@
from ..core.parameterization.array_core import ObservableArray, ParamList from ..core.parameterization.parameter_core import Observable
class Cacher(object): class Cacher(object):
def __init__(self, operation, limit=5): def __init__(self, operation, limit=5, reset_on_first=False):
self.limit = int(limit) self.limit = int(limit)
self._reset_on_first = reset_on_first
self.operation=operation self.operation=operation
self.cached_inputs = ParamList([]) self.cached_inputs = []
self.cached_outputs = [] self.cached_outputs = []
self.inputs_changed = [] self.inputs_changed = []
def __call__(self, X): def __call__(self, *args):
assert isinstance(X, ObservableArray) if self._reset_on_first:
if X in self.cached_inputs: assert isinstance(args[0], Observable)
i = self.cached_inputs.index(X) args[0].add_observer(self, self.reset)
cached_args = args
else:
cached_args = args[1:]
if not all([isinstance(arg, Observable) for arg in cached_args]):
return self.operation(*args)
if cached_args in self.cached_inputs:
i = self.cached_inputs.index(cached_args)
if self.inputs_changed[i]: if self.inputs_changed[i]:
self.cached_outputs[i] = self.operation(X) self.cached_outputs[i] = self.operation(*args)
self.inputs_changed[i] = False self.inputs_changed[i] = False
return self.cached_outputs[i] return self.cached_outputs[i]
else: else:
if len(self.cached_inputs) == self.limit: if len(self.cached_inputs) == self.limit:
X_ = self.cached_inputs.pop(0) args_ = self.cached_inputs.pop(0)
X_.remove_observer(self) [a.remove_observer(self, self.on_cache_changed) for a in args_]
self.inputs_changed.pop(0) self.inputs_changed.pop(0)
self.cached_outputs.pop(0) self.cached_outputs.pop(0)
self.cached_inputs.append(X) self.cached_inputs.append(cached_args)
self.cached_outputs.append(self.operation(X)) self.cached_outputs.append(self.operation(*args))
self.inputs_changed.append(False) self.inputs_changed.append(False)
X.add_observer(self, self.on_cache_changed) [a.add_observer(self, self.on_cache_changed) for a in args]
return self.cached_outputs[-1] return self.cached_outputs[-1]
def on_cache_changed(self, X): def on_cache_changed(self, arg):
#print id(X) self.inputs_changed = [any([a is arg for a in args]) or old_ic for args, old_ic in zip(self.cached_inputs, self.inputs_changed)]
Xbase = X
while Xbase is not None: def reset(self, obj):
try: [[a.remove_observer(self, self.reset) for a in args] for args in self.cached_inputs]
i = self.cached_inputs.index(X) self.cached_inputs = []
break self.cached_outputs = []
except ValueError: self.inputs_changed = []
Xbase = X.base
continue
self.inputs_changed[i] = True
def cache_this(limit=5, reset_on_self=False):
def limited_cache(f):
c = Cacher(f, limit, reset_on_first=reset_on_self)
def f_wrap(*args):
return c(*args)
f_wrap._cacher = c
return f_wrap
return limited_cache
#Xbase = X
#while Xbase is not None:
#try:
#i = self.cached_inputs.index(X)
#break
#except ValueError:
#Xbase = X.base
#continue
#self.inputs_changed[i] = True

View file

@ -513,8 +513,8 @@ def toy_rbf_1d(seed=default_seed, num_samples=500):
num_in = 1 num_in = 1
X = np.random.uniform(low= -1.0, high=1.0, size=(num_samples, num_in)) X = np.random.uniform(low= -1.0, high=1.0, size=(num_samples, num_in))
X.sort(axis=0) X.sort(axis=0)
rbf = GPy.kern.rbf(num_in, variance=1., lengthscale=np.array((0.25,))) rbf = GPy.kern.RBF(num_in, variance=1., lengthscale=np.array((0.25,)))
white = GPy.kern.white(num_in, variance=1e-2) white = GPy.kern.White(num_in, variance=1e-2)
kernel = rbf + white kernel = rbf + white
K = kernel.K(X) K = kernel.K(X)
y = np.reshape(np.random.multivariate_normal(np.zeros(num_samples), K), (num_samples, 1)) y = np.reshape(np.random.multivariate_normal(np.zeros(num_samples), K), (num_samples, 1))

View file

@ -44,6 +44,12 @@ def view(A, offset=0):
else: else:
return as_strided(A, shape=(A.shape[0], ), strides=((A.shape[0]+1)*A.itemsize, )) return as_strided(A, shape=(A.shape[0], ), strides=((A.shape[0]+1)*A.itemsize, ))
def offdiag_view(A, offset=0):
from numpy.lib.stride_tricks import as_strided
assert A.ndim == 2, "only implemented for 2 dimensions"
Af = as_strided(A, shape=(A.size,), strides=(A.itemsize,))
return as_strided(Af[(1+offset):], shape=(A.shape[0]-1, A.shape[1]), strides=(A.strides[0] + A.itemsize, A.strides[1]))
def _diag_ufunc(A,b,offset,func): def _diag_ufunc(A,b,offset,func):
dA = view(A, offset); func(dA,b,dA) dA = view(A, offset); func(dA,b,dA)
return A return A

View file

@ -3,8 +3,6 @@
import numpy as np import numpy as np
import scipy as sp
import pylab as plt
class WarpingFunction(object): class WarpingFunction(object):
""" """
@ -39,6 +37,7 @@ class WarpingFunction(object):
def plot(self, psi, xmin, xmax): def plot(self, psi, xmin, xmax):
y = np.arange(xmin, xmax, 0.01) y = np.arange(xmin, xmax, 0.01)
f_y = self.f(y, psi) f_y = self.f(y, psi)
from matplotlib import pyplot as plt
plt.figure() plt.figure()
plt.plot(y, f_y) plt.plot(y, f_y)
plt.xlabel('y') plt.xlabel('y')