Merge branch 'params' of https://github.com/SheffieldML/GPy into params

This commit is contained in:
Neil Lawrence 2014-02-23 11:02:28 -05:00
commit 3968d48ba5
76 changed files with 2158 additions and 2908 deletions

View file

@ -2,7 +2,9 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from model import *
from parameterization.parameterized import *
from parameterization.parameterized import adjust_name_for_printing, Parameterizable
from parameterization.param import Param, ParamConcatenation
from gp import GP
from sparse_gp import SparseGP
from svigp import SVIGP

View file

@ -43,7 +43,8 @@ class GP(Model):
else:
self.Y_metadata = None
assert isinstance(kernel, kern.kern)
assert isinstance(kernel, kern.Kern)
assert self.input_dim == kernel.input_dim
self.kern = kernel
assert isinstance(likelihood, likelihoods.Likelihood)
@ -70,7 +71,7 @@ class GP(Model):
def log_likelihood(self):
return self._log_marginal_likelihood
def _raw_predict(self, _Xnew, which_parts='all', full_cov=False, stop=False):
def _raw_predict(self, _Xnew, full_cov=False):
"""
Internal helper function for making predictions, does not account
for normalization or likelihood
@ -80,29 +81,27 @@ class GP(Model):
diagonal of the covariance is returned.
"""
Kx = self.kern.K(_Xnew, self.X, which_parts=which_parts).T
Kx = self.kern.K(_Xnew, self.X).T
#LiKx, _ = dtrtrs(self.posterior.woodbury_chol, np.asfortranarray(Kx), lower=1)
WiKx = np.dot(self.posterior.woodbury_inv, Kx)
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
if full_cov:
Kxx = self.kern.K(_Xnew, which_parts=which_parts)
Kxx = self.kern.K(_Xnew)
#var = Kxx - tdot(LiKx.T)
var = np.dot(Kx.T, WiKx)
else:
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
Kxx = self.kern.Kdiag(_Xnew)
#var = Kxx - np.sum(LiKx*LiKx, 0)
var = Kxx - np.sum(WiKx*Kx, 0)
var = var.reshape(-1, 1)
return mu, var
def predict(self, Xnew, which_parts='all', full_cov=False, **likelihood_args):
def predict(self, Xnew, full_cov=False, **likelihood_args):
"""
Predict the function(s) at the new point(s) Xnew.
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the full covariance matrix, or just
the diagonal
:type full_cov: bool
@ -118,13 +117,13 @@ class GP(Model):
"""
#predict the latent function values
mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts)
mu, var = self._raw_predict(Xnew, full_cov=full_cov)
# now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, **likelihood_args)
return mean, var, _025pm, _975pm
def posterior_samples_f(self,X,size=10,which_parts='all',full_cov=True):
def posterior_samples_f(self,X,size=10, full_cov=True):
"""
Samples the posterior GP at the points X.
@ -132,13 +131,11 @@ class GP(Model):
:type X: np.ndarray, Nnew x self.input_dim.
:param size: the number of a posteriori samples.
:type size: int.
:param which_parts: which of the kernel functions to use (additively).
:type which_parts: 'all', or list of bools.
:param full_cov: whether to return the full covariance matrix, or just the diagonal.
:type full_cov: bool.
:returns: Ysim: set of simulations, a Numpy array (N x samples).
"""
m, v = self._raw_predict(X, which_parts=which_parts, full_cov=full_cov)
m, v = self._raw_predict(X, full_cov=full_cov)
v = v.reshape(m.size,-1) if len(v.shape)==3 else v
if not full_cov:
Ysim = np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T
@ -147,7 +144,7 @@ class GP(Model):
return Ysim
def posterior_samples(self,X,size=10,which_parts='all',full_cov=True,noise_model=None):
def posterior_samples(self,X,size=10, full_cov=True,noise_model=None):
"""
Samples the posterior GP at the points X.
@ -155,15 +152,13 @@ class GP(Model):
:type X: np.ndarray, Nnew x self.input_dim.
:param size: the number of a posteriori samples.
:type size: int.
:param which_parts: which of the kernel functions to use (additively).
:type which_parts: 'all', or list of bools.
:param full_cov: whether to return the full covariance matrix, or just the diagonal.
:type full_cov: bool.
:param noise_model: for mixed noise likelihood, the noise model to use in the samples.
:type noise_model: integer.
:returns: Ysim: set of simulations, a Numpy array (N x samples).
"""
Ysim = self.posterior_samples_f(X, size, which_parts=which_parts, full_cov=full_cov)
Ysim = self.posterior_samples_f(X, size, full_cov=full_cov)
if isinstance(self.likelihood, Gaussian):
noise_std = np.sqrt(self.likelihood._get_params())
Ysim += np.random.normal(0,noise_std,Ysim.shape)

View file

@ -4,12 +4,8 @@
from .. import likelihoods
from ..inference import optimization
from ..util.linalg import jitchol
from ..util.misc import opt_wrapper
from parameterization import Parameterized
from parameterization.parameterized import UNFIXED
from parameterization.domains import _POSITIVE, _REAL
from parameterization.index_operations import ParameterIndexOperations
import multiprocessing as mp
import numpy as np
from numpy.linalg.linalg import LinAlgError
@ -240,7 +236,7 @@ class Model(Parameterized):
constrained positive.
"""
raise DeprecationWarning, 'parameters now have default constraints'
positive_strings = ['variance', 'lengthscale', 'precision', 'kappa', 'sensitivity']
#positive_strings = ['variance', 'lengthscale', 'precision', 'kappa', 'sensitivity']
# param_names = self._get_param_names()
# for s in positive_strings:
@ -489,20 +485,17 @@ class Model(Parameterized):
if not hasattr(self, 'kern'):
raise ValueError, "this model has no kernel"
k = [p for p in self.kern._parameters_ if hasattr(p, "ARD") and p.ARD]
if (not len(k) == 1):
raise ValueError, "cannot determine sensitivity for this kernel"
k = k[0]
from ..kern.parts.rbf import RBF
from ..kern.parts.rbf_inv import RBFInv
from ..kern.parts.linear import Linear
k = self.kern#[p for p in self.kern._parameters_ if hasattr(p, "ARD") and p.ARD]
from ..kern import RBF, Linear#, RBFInv
if isinstance(k, RBF):
return 1. / k.lengthscale
elif isinstance(k, RBFInv):
return k.inv_lengthscale
#elif isinstance(k, RBFInv):
# return k.inv_lengthscale
elif isinstance(k, Linear):
return k.variances
else:
raise ValueError, "cannot determine sensitivity for this kernel"
def pseudo_EM(self, stop_crit=.1, **kwargs):
"""

View file

@ -30,12 +30,16 @@ class ObservableArray(np.ndarray, Observable):
def __new__(cls, input_array):
obj = np.atleast_1d(input_array).view(cls)
cls.__name__ = "ObservableArray\n "
obj._observers_ = {}
return obj
def __init__(self, *a, **kw):
super(ObservableArray, self).__init__(*a, **kw)
def __array_finalize__(self, obj):
# see InfoArray.__array_finalize__ for comments
if obj is None: return
self._observers_ = getattr(obj, '_observers_', None)
self._observer_callables_ = getattr(obj, '_observer_callables_', None)
def __array_wrap__(self, out_arr, context=None):
return out_arr.view(np.ndarray)

View file

@ -83,11 +83,21 @@ class ParameterIndexOperations(object):
def iterproperties(self):
return self._properties.iterkeys()
def shift(self, start, size):
def shift_right(self, start, size):
for ind in self.iterindices():
toshift = ind>=start
if toshift.size > 0:
ind[toshift] += size
ind[toshift] += size
def shift_left(self, start, size):
for v, ind in self.items():
todelete = (ind>=start) * (ind<start+size)
if todelete.size != 0:
ind = ind[~todelete]
toshift = ind>=start
if toshift.size != 0:
ind[toshift] -= size
if ind.size != 0: self._properties[v] = ind
else: del self._properties[v]
def clear(self):
self._properties.clear()
@ -183,7 +193,7 @@ class ParameterIndexOperationsView(object):
yield i
def shift(self, start, size):
def shift_right(self, start, size):
raise NotImplementedError, 'Shifting only supported in original ParamIndexOperations'

View file

@ -3,7 +3,7 @@
import itertools
import numpy
from parameter_core import Constrainable, Gradcheckable, Indexable, Parameterizable, adjust_name_for_printing
from parameter_core import Constrainable, Gradcheckable, Indexable, Parentable, adjust_name_for_printing
from array_core import ObservableArray, ParamList
###### printing
@ -15,7 +15,7 @@ __precision__ = numpy.get_printoptions()['precision'] # numpy printing precision
__print_threshold__ = 5
######
class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameterizable):
class Param(Constrainable, ObservableArray, Gradcheckable, Indexable):
"""
Parameter object for GPy models.
@ -23,7 +23,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
:param input_array: array which this parameter handles
:type input_array: numpy.ndarray
:param default_constraint: The default constraint for this parameter
:type default_constraint:
:type default_constraint:
You can add/remove constraints by calling constrain on the parameter itself, e.g:
@ -54,12 +54,12 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
obj._tied_to_me_ = SetDict()
obj._tied_to_ = []
obj._original_ = True
obj.gradient = None
obj._gradient_ = None
return obj
def __init__(self, name, input_array, default_constraint=None):
super(Param, self).__init__(name=name, default_constraint=default_constraint)
def __init__(self, name, input_array, default_constraint=None, *a, **kw):
super(Param, self).__init__(name=name, default_constraint=default_constraint, *a, **kw)
def __array_finalize__(self, obj):
# see InfoArray.__array_finalize__ for comments
if obj is None: return
@ -76,10 +76,20 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
self._updated_ = getattr(obj, '_updated_', None)
self._original_ = getattr(obj, '_original_', None)
self._name = getattr(obj, 'name', None)
self.gradient = getattr(obj, 'gradient', None)
self._gradient_ = getattr(obj, '_gradient_', None)
self.constraints = getattr(obj, 'constraints', None)
self.priors = getattr(obj, 'priors', None)
@property
def gradient(self):
if self._gradient_ is None:
self._gradient_ = numpy.zeros(self._realshape_)
return self._gradient_[self._current_slice_]
@gradient.setter
def gradient(self, val):
self.gradient[:] = val
#===========================================================================
# Pickling operations
#===========================================================================
@ -114,7 +124,14 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
self._parent_index_ = state.pop()
self._direct_parent_ = state.pop()
self.name = state.pop()
def copy(self, *args):
constr = self.constraints.copy()
priors = self.priors.copy()
p = Param(self.name, self.view(numpy.ndarray).copy(), self._default_constraint_)
p.constraints = constr
p.priors = priors
return p
#===========================================================================
# get/set parameters
#===========================================================================
@ -127,7 +144,10 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
return self.flat
def _collect_gradient(self, target):
target[:] = self.gradient.flat
target += self.gradient.flat
def _set_gradient(self, g):
self.gradient = g.reshape(self._realshape_)
#===========================================================================
# Array operations -> done
@ -192,7 +212,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
return numpy.r_[a]
return numpy.r_[:b]
return itertools.imap(f, itertools.izip_longest(slice_index[:self._realndim_], self._realshape_, fillvalue=slice(self.size)))
#===========================================================================
# Convenience
#===========================================================================
@ -214,7 +234,9 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
def _description_str(self):
if self.size <= 1: return ["%f" % self]
else: return [str(self.shape)]
def parameter_names(self, add_name=False):
def parameter_names(self, add_self=False, adjust_for_printing=False):
if adjust_for_printing:
return [adjust_name_for_printing(self.name)]
return [self.name]
@property
def flattened_parameters(self):
@ -231,14 +253,9 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
@property
def _ties_str(self):
return [t._short() for t in self._tied_to_] or ['']
@property
def name_hirarchical(self):
if self.has_parent():
return self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name)
return adjust_name_for_printing(self.name)
def __repr__(self, *args, **kwargs):
name = "\033[1m{x:s}\033[0;0m:\n".format(
x=self.name_hirarchical)
x=self.hirarchy_name())
return name + super(Param, self).__repr__(*args, **kwargs)
def _ties_for(self, rav_index):
# size = sum(p.size for p in self._tied_to_)
@ -260,7 +277,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
clean_curr_slice = [s for s in slice_index if numpy.any(s != Ellipsis)]
for i in range(self._realndim_-len(clean_curr_slice)):
i+=len(clean_curr_slice)
clean_curr_slice += range(self._realshape_[i])
clean_curr_slice += range(self._realshape_[i])
if (all(isinstance(n, (numpy.ndarray, list, tuple)) for n in clean_curr_slice)
and len(set(map(len, clean_curr_slice))) <= 1):
return numpy.fromiter(itertools.izip(*clean_curr_slice),
@ -272,12 +289,12 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
gen = map(lambda x: " ".join(map(str, x)), gen)
return reduce(lambda a, b:max(a, len(b)), gen, len(header))
def _max_len_values(self):
return reduce(lambda a, b:max(a, len("{x:=.{0}g}".format(__precision__, x=b))), self.flat, len(self.name_hirarchical))
return reduce(lambda a, b:max(a, len("{x:=.{0}g}".format(__precision__, x=b))), self.flat, len(self.hirarchy_name()))
def _max_len_index(self, ind):
return reduce(lambda a, b:max(a, len(str(b))), ind, len(__index_name__))
def _short(self):
# short string to print
name = self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name)
name = self.hirarchy_name()
if self._realsize_ < 2:
return name
ind = self._indices()
@ -300,8 +317,8 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
if lp is None: lp = self._max_len_names(prirs, __tie_name__)
sep = '-'
header_format = " {i:{5}^{2}s} | \033[1m{x:{5}^{1}s}\033[0;0m | {c:{5}^{0}s} | {p:{5}^{4}s} | {t:{5}^{3}s}"
if only_name: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.name_hirarchical, c=sep*lc, i=sep*li, t=sep*lt, p=sep*lp) # nice header for printing
else: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.name_hirarchical, c=__constraints_name__, i=__index_name__, t=__tie_name__, p=__priors_name__) # nice header for printing
if only_name: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hirarchy_name(), c=sep*lc, i=sep*li, t=sep*lt, p=sep*lp) # nice header for printing
else: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hirarchy_name(), c=__constraints_name__, i=__index_name__, t=__tie_name__, p=__priors_name__) # nice header for printing
if not ties: ties = itertools.cycle([''])
return "\n".join([header] + [" {i!s:^{3}s} | {x: >{1}.{2}g} | {c:^{0}s} | {p:^{5}s} | {t:^{4}s} ".format(lc, lx, __precision__, li, lt, lp, x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in itertools.izip(indices, vals, constr_matrix, ties, prirs)]) # return all the constraints with right indices
# except: return super(Param, self).__str__()
@ -426,4 +443,4 @@ class ParamConcatenation(object):
start = False
return "\n".join(strings)
def __repr__(self):
return "\n".join(map(repr,self.params))
return "\n".join(map(repr,self.params))

View file

@ -7,19 +7,24 @@ __updated__ = '2013-12-16'
def adjust_name_for_printing(name):
if name is not None:
return name.replace(" ", "_").replace(".", "_").replace("-","").replace("+","").replace("!","").replace("*","").replace("/","")
return name.replace(" ", "_").replace(".", "_").replace("-", "").replace("+", "").replace("!", "").replace("*", "").replace("/", "")
return ''
class Observable(object):
_observers_ = {}
def __init__(self, *args, **kwargs):
from collections import defaultdict
self._observer_callables_ = defaultdict(list)
def add_observer(self, observer, callble):
self._observers_[observer] = callble
#callble(self)
def remove_observer(self, observer):
del self._observers_[observer]
self._observer_callables_[observer].append(callble)
def remove_observer(self, observer, callble):
del self._observer_callables_[observer][callble]
def _notify_observers(self):
[callble(self) for callble in self._observers_.itervalues()]
[[callble(self) for callble in callables]
for callables in self._observer_callables_.itervalues()]
class Pickleable(object):
def _getstate(self):
"""
@ -47,10 +52,8 @@ class Pickleable(object):
#===============================================================================
class Parentable(object):
def __init__(self, direct_parent=None, parent_index=None):
super(Parentable,self).__init__()
self._direct_parent_ = direct_parent
self._parent_index_ = parent_index
_direct_parent_ = None
_parent_index_ = None
def has_parent(self):
return self._direct_parent_ is not None
@ -68,10 +71,13 @@ class Parentable(object):
return self
return self._direct_parent_._highest_parent_
def _notify_parameters_changed(self):
if self.has_parent():
self._direct_parent_._notify_parameters_changed()
class Nameable(Parentable):
_name = None
def __init__(self, name, direct_parent=None, parent_index=None):
super(Nameable,self).__init__(direct_parent, parent_index)
def __init__(self, name, *a, **kw):
super(Nameable, self).__init__(*a, **kw)
self._name = name or self.__class__.__name__
@property
@ -80,58 +86,21 @@ class Nameable(Parentable):
@name.setter
def name(self, name):
from_name = self.name
assert isinstance(name, str)
self._name = name
if self.has_parent():
self._direct_parent_._name_changed(self, from_name)
class Parameterizable(Parentable):
def __init__(self, *args, **kwargs):
super(Parameterizable, self).__init__(*args, **kwargs)
from GPy.core.parameterization.array_core import ParamList
_parameters_ = ParamList()
def parameter_names(self, add_name=False):
if add_name:
return [adjust_name_for_printing(self.name) + "." + xi for x in self._parameters_ for xi in x.parameter_names(add_name=True)]
return [xi for x in self._parameters_ for xi in x.parameter_names(add_name=True)]
def _collect_gradient(self, target):
import itertools
[p._collect_gradient(target[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
def _get_params(self):
import numpy as np
# don't overwrite this anymore!
if not self.size:
return np.empty(shape=(0,), dtype=np.float64)
return np.hstack([x._get_params() for x in self._parameters_ if x.size > 0])
def _set_params(self, params, update=True):
# don't overwrite this anymore!
import itertools
[p._set_params(params[s], update=update) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
self.parameters_changed()
def parameters_changed(self):
"""
This method gets called when parameters have changed.
Another way of listening to param changes is to
add self as a listener to the param, such that
updates get passed through. See :py:function:``GPy.core.param.Observable.add_observer``
"""
pass
def _notify_parameters_changed(self):
self.parameters_changed()
self._direct_parent_._name_changed(self, from_name)
def hirarchy_name(self, adjust_for_printing=True):
if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x)
else: adjust = lambda x: x
if self.has_parent():
self._direct_parent_._notify_parameters_changed()
return self._direct_parent_.hirarchy_name() + "." + adjust(self.name)
return adjust(self.name)
class Gradcheckable(Parentable):
#===========================================================================
# Gradchecking
#===========================================================================
def __init__(self, *a, **kw):
super(Gradcheckable, self).__init__(*a, **kw)
def checkgrad(self, verbose=0, step=1e-6, tolerance=1e-3):
if self.has_parent():
return self._highest_parent_._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance)
@ -139,6 +108,7 @@ class Gradcheckable(Parentable):
def _checkgrad(self, param):
raise NotImplementedError, "Need log likelihood to check gradient against"
class Indexable(object):
def _raveled_index(self):
raise NotImplementedError, "Need to be able to get the raveled Index"
@ -157,9 +127,10 @@ class Indexable(object):
"""
raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?"
class Constrainable(Nameable, Indexable, Parameterizable):
def __init__(self, name, default_constraint=None):
super(Constrainable,self).__init__(name)
class Constrainable(Nameable, Indexable):
def __init__(self, name, default_constraint=None, *a, **kw):
super(Constrainable, self).__init__(name=name, *a, **kw)
self._default_constraint_ = default_constraint
from index_operations import ParameterIndexOperations
self.constraints = ParameterIndexOperations()
@ -167,6 +138,16 @@ class Constrainable(Nameable, Indexable, Parameterizable):
if self._default_constraint_ is not None:
self.constrain(self._default_constraint_)
def _disconnect_parent(self, constr=None):
if constr is None:
constr = self.constraints.copy()
self.constraints.clear()
self.constraints = constr
self._direct_parent_ = None
self._parent_index_ = None
self._connect_fixes()
self._notify_parent_change()
#===========================================================================
# Fixing Parameters:
#===========================================================================
@ -200,7 +181,7 @@ class Constrainable(Nameable, Indexable, Parameterizable):
def _set_unfixed(self, index):
import numpy as np
if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool)
#rav_i = self._raveled_index_for(param)[index]
# rav_i = self._raveled_index_for(param)[index]
self._fixes_[index] = UNFIXED
if np.all(self._fixes_): self._fixes_ = None # ==UNFIXED
@ -230,7 +211,7 @@ class Constrainable(Nameable, Indexable, Parameterizable):
"""evaluate the prior"""
if self.priors.size > 0:
x = self._get_params()
return reduce(lambda a,b: a+b, [p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()], 0)
return reduce(lambda a, b: a + b, [p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()], 0)
return 0.
def _log_prior_gradients(self):
@ -334,7 +315,7 @@ class Constrainable(Nameable, Indexable, Parameterizable):
if len(transforms) == 0:
transforms = which.properties()
import numpy as np
removed = np.empty((0, ), dtype=int)
removed = np.empty((0,), dtype=int)
for t in transforms:
unconstrained = which.remove(t, self._raveled_index())
removed = np.union1d(removed, unconstrained)
@ -344,5 +325,108 @@ class Constrainable(Nameable, Indexable, Parameterizable):
return removed
class Parameterizable(Constrainable):
def __init__(self, *args, **kwargs):
super(Parameterizable, self).__init__(*args, **kwargs)
from GPy.core.parameterization.array_core import ParamList
_parameters_ = ParamList()
self._added_names_ = set()
def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True):
if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x)
else: adjust = lambda x: x
if recursive: names = [xi for x in self._parameters_ for xi in x.parameter_names(add_self=True, adjust_for_printing=adjust_for_printing)]
else: names = [adjust(x.name) for x in self._parameters_]
if add_self: names = map(lambda x: adjust(self.name) + "." + x, names)
return names
def _add_parameter_name(self, param):
pname = adjust_name_for_printing(param.name)
# and makes sure to not delete programmatically added parameters
if pname in self.__dict__:
if not (param is self.__dict__[pname]):
if pname in self._added_names_:
del self.__dict__[pname]
self._add_parameter_name(param)
else:
self.__dict__[pname] = param
self._added_names_.add(pname)
def _remove_parameter_name(self, param=None, pname=None):
assert param is None or pname is None, "can only delete either param by name, or the name of a param"
pname = adjust_name_for_printing(pname) or adjust_name_for_printing(param.name)
if pname in self._added_names_:
del self.__dict__[pname]
self._added_names_.remove(pname)
self._connect_parameters()
def _name_changed(self, param, old_name):
self._remove_parameter_name(None, old_name)
self._add_parameter_name(param)
def _collect_gradient(self, target):
import itertools
[p._collect_gradient(target[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
def _set_gradient(self, g):
import itertools
[p._set_gradient(g[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
def _get_params(self):
import numpy as np
# don't overwrite this anymore!
if not self.size:
return np.empty(shape=(0,), dtype=np.float64)
return np.hstack([x._get_params() for x in self._parameters_ if x.size > 0])
def _set_params(self, params, update=True):
# don't overwrite this anymore!
import itertools
[p._set_params(params[s], update=update) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
self.parameters_changed()
def copy(self):
"""Returns a (deep) copy of the current model"""
import copy
from .index_operations import ParameterIndexOperations, ParameterIndexOperationsView
from .array_core import ParamList
dc = dict()
for k, v in self.__dict__.iteritems():
if k not in ['_direct_parent_', '_parameters_', '_parent_index_'] + self.parameter_names():
if isinstance(v, (Constrainable, ParameterIndexOperations, ParameterIndexOperationsView)):
dc[k] = v.copy()
else:
dc[k] = copy.deepcopy(v)
if k == '_parameters_':
params = [p.copy() for p in v]
dc['_direct_parent_'] = None
dc['_parent_index_'] = None
dc['_parameters_'] = ParamList()
dc['constraints'].clear()
dc['priors'].clear()
dc['size'] = 0
s = self.__new__(self.__class__)
s.__dict__ = dc
for p in params:
s.add_parameter(p)
return s
def _notify_parameters_changed(self):
self.parameters_changed()
if self.has_parent():
self._direct_parent_._notify_parameters_changed()
def parameters_changed(self):
"""
This method gets called when parameters have changed.
Another way of listening to param changes is to
add self as a listener to the param, such that
updates get passed through. See :py:function:``GPy.core.param.Observable.add_observer``
"""
pass

View file

@ -3,16 +3,15 @@
import numpy; np = numpy
import copy
import cPickle
import itertools
from re import compile, _pattern_type
from param import ParamConcatenation, Param
from parameter_core import Constrainable, Pickleable, Observable, adjust_name_for_printing, Gradcheckable
from transformations import __fixed__, FIXED, UNFIXED
from param import ParamConcatenation
from parameter_core import Constrainable, Pickleable, Parentable, Observable, Parameterizable, adjust_name_for_printing, Gradcheckable
from transformations import __fixed__
from array_core import ParamList
class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
class Parameterized(Parameterizable, Pickleable, Observable, Gradcheckable):
"""
Parameterized class
@ -54,8 +53,8 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
If you want to operate on all parameters use m[''] to wildcard select all paramters
and concatenate them. Printing m[''] will result in printing of all parameters in detail.
"""
def __init__(self, name=None):
super(Parameterized, self).__init__(name=name)
def __init__(self, name=None, *a, **kw):
super(Parameterized, self).__init__(name=name, parent=None, parent_index=None, *a, **kw)
self._in_init_ = True
self._parameters_ = ParamList()
self.size = sum(p.size for p in self._parameters_)
@ -63,7 +62,6 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
self._fixes_ = None
self._param_slices_ = []
self._connect_parameters()
self._added_names_ = set()
del self._in_init_
def add_parameter(self, param, index=None):
@ -89,8 +87,8 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
self._parameters_.append(param)
else:
start = sum(p.size for p in self._parameters_[:index])
self.constraints.shift(start, param.size)
self.priors.shift(start, param.size)
self.constraints.shift_right(start, param.size)
self.priors.shift_right(start, param.size)
self.constraints.update(param.constraints, start)
self.priors.update(param.priors, start)
self._parameters_.insert(index, param)
@ -115,22 +113,19 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
"""
if not param in self._parameters_:
raise RuntimeError, "Parameter {} does not belong to this object, remove parameters directly from their respective parents".format(param._short())
del self._parameters_[param._parent_index_]
start = sum([p.size for p in self._parameters_[:param._parent_index_]])
self._remove_parameter_name(param)
self.size -= param.size
constr = param.constraints.copy()
param.constraints.clear()
param.constraints = constr
param._direct_parent_ = None
param._parent_index_ = None
param._connect_fixes()
param._notify_parent_change()
pname = adjust_name_for_printing(param.name)
if pname in self._added_names_:
del self.__dict__[pname]
self._connect_parameters()
#self._notify_parent_change()
del self._parameters_[param._parent_index_]
param._disconnect_parent()
self.constraints.shift_left(start, param.size)
self._connect_fixes()
self._connect_parameters()
self._notify_parent_change()
def _connect_parameters(self):
# connect parameterlist to this parameterized object
# This just sets up the right connection for the params objects
@ -145,19 +140,9 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
for i, p in enumerate(self._parameters_):
p._direct_parent_ = self
p._parent_index_ = i
not_unique = []
sizes.append(p.size + sizes[-1])
self._param_slices_.append(slice(sizes[-2], sizes[-1]))
pname = adjust_name_for_printing(p.name)
# and makes sure to not delete programmatically added parameters
if pname in self.__dict__:
if isinstance(self.__dict__[pname], (Parameterized, Param)):
if not p is self.__dict__[pname]:
not_unique.append(pname)
del self.__dict__[pname]
elif not (pname in not_unique):
self.__dict__[pname] = p
self._added_names_.add(pname)
self._add_parameter_name(p)
#===========================================================================
# Pickling operations
@ -174,19 +159,7 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
cPickle.dump(self, f, protocol)
else:
cPickle.dump(self, f, protocol)
def copy(self):
"""Returns a (deep) copy of the current model """
# dc = dict()
# for k, v in self.__dict__.iteritems():
# if k not in ['_highest_parent_', '_direct_parent_']:
# dc[k] = copy.deepcopy(v)
# dc = copy.deepcopy(self.__dict__)
# dc['_highest_parent_'] = None
# dc['_direct_parent_'] = None
# s = self.__class__.new()
# s.__dict__ = dc
return copy.deepcopy(self)
def __getstate__(self):
if self._has_get_set_state():
return self._getstate()
@ -243,7 +216,7 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
# Optimization handles:
#===========================================================================
def _get_param_names(self):
n = numpy.array([p.name_hirarchical + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()])
n = numpy.array([p.hirarchy_name() + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()])
return n
def _get_param_names_transformed(self):
n = self._get_param_names()
@ -265,14 +238,6 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
if self._has_fixes(): tmp = self._get_params(); tmp[self._fixes_] = p; p = tmp; del tmp
[numpy.put(p, ind, c.f(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
return p
def _name_changed(self, param, old_name):
if hasattr(self, old_name) and old_name in self._added_names_:
delattr(self, old_name)
self._added_names_.remove(old_name)
pname = adjust_name_for_printing(param.name)
if pname not in self.__dict__:
self._added_names_.add(pname)
self.__dict__[pname] = param
#===========================================================================
# Indexable Handling
#===========================================================================
@ -335,10 +300,6 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
# you can retrieve the original param through this method, by passing
# the copy here
return self._parameters_[param._parent_index_]
def hirarchy_name(self):
if self.has_parent():
return self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name) + "."
return ''
#===========================================================================
# Get/set parameters:
#===========================================================================
@ -348,13 +309,11 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
"""
if not isinstance(regexp, _pattern_type): regexp = compile(regexp)
found_params = []
for p in self._parameters_:
if regexp.match(p.name) is not None:
for n, p in itertools.izip(self.parameter_names(False, False, True), self.flattened_parameters):
if regexp.match(n) is not None:
found_params.append(p)
if isinstance(p, Parameterized):
found_params.extend(p.grep_param_names(regexp))
return found_params
return [param for param in self._parameters_ if regexp.match(param.name) is not None]
def __getitem__(self, name, paramlist=None):
if paramlist is None:
paramlist = self.grep_param_names(name)
@ -366,36 +325,22 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
return ParamConcatenation(paramlist)
return paramlist[-1]
return ParamConcatenation(paramlist)
def __setitem__(self, name, value, paramlist=None):
try: param = self.__getitem__(name, paramlist)
except AttributeError as a: raise a
param[:] = value
# def __getattr__(self, name):
# return self.__getitem__(name)
# def __getattribute__(self, name):
# #try:
# return object.__getattribute__(self, name)
# except AttributeError:
# _, a, tb = sys.exc_info()
# try:
# return self.__getitem__(name)
# except AttributeError:
# raise AttributeError, a.message, tb
def __setattr__(self, name, val):
# override the default behaviour, if setting a param, so broadcasting can by used
if hasattr(self, "_parameters_"):
paramlist = self.grep_param_names(name)
if len(paramlist) == 1: self.__setitem__(name, val, paramlist); return
# override the default behaviour, if setting a param, so broadcasting can by used
if hasattr(self, '_parameters_'):
pnames = self.parameter_names(False, adjust_for_printing=True, recursive=False)
if name in pnames: self._parameters_[pnames.index(name)][:] = val; return
object.__setattr__(self, name, val);
#===========================================================================
# Printing:
#===========================================================================
def _short(self):
# short string to print
if self.has_parent():
return self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name)
else:
return adjust_name_for_printing(self.name)
return self.hirarchy_name()
@property
def flattened_parameters(self):
return [xi for x in self._parameters_ for xi in x.flattened_parameters]

View file

@ -29,3 +29,29 @@ class Normal(Parameterized):
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ...plotting.matplot_dep import variational_plots
return variational_plots.plot(self,*args)
class SpikeAndSlab(Parameterized):
'''
The SpikeAndSlab distribution for variational approximations.
'''
def __init__(self, means, variances, binary_prob, name='latent space'):
"""
binary_prob : the probability of the distribution on the slab part.
"""
Parameterized.__init__(self, name=name)
self.mean = Param("mean", means)
self.variance = Param('variance', variances, Logexp())
self.gamma = Param("binary_prob",binary_prob,)
self.add_parameters(self.mean, self.variance, self.gamma)
def plot(self, *args):
"""
Plot latent space X in 1D:
See GPy.plotting.matplot_dep.variational_plots
"""
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ...plotting.matplot_dep import variational_plots
return variational_plots.plot(self,*args)

View file

@ -44,46 +44,46 @@ class SparseGP(GP):
self.Z = Param('inducing inputs', Z)
self.num_inducing = Z.shape[0]
if not (X_variance is None):
assert X_variance.shape == X.shape
self.X_variance = X_variance
if self.has_uncertain_inputs():
assert X_variance.shape == X.shape
GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name)
self.add_parameter(self.Z, index=0)
self.parameters_changed()
def _update_gradients_Z(self, add=False):
#The derivative of the bound wrt the inducing inputs Z ( unless they're all fixed)
if not self.Z.is_fixed:
if add: self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
else: self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
if self.X_variance is None:
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
else:
self.Z.gradient += self.kern.dpsi1_dZ(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance)
self.Z.gradient += self.kern.dpsi2_dZ(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance)
def has_uncertain_inputs(self):
return not (self.X_variance is None)
def parameters_changed(self):
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y)
self._update_gradients_Z(add=False)
if self.has_uncertain_inputs():
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference_latent(self.kern, self.q, self.Z, self.likelihood, self.Y)
else:
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y)
self.likelihood.update_gradients(self.grad_dict.pop('partial_for_likelihood'))
if self.has_uncertain_inputs():
self.kern.update_gradients_variational(posterior_variational=self.q, Z=self.Z, **self.grad_dict)
self.Z.gradient = self.kern.gradients_Z_variational(posterior_variational=self.q, Z=self.Z, **self.grad_dict)
else:
self.kern.update_gradients_sparse(X=self.X, Z=self.Z, **self.grad_dict)
self.Z.gradient = self.kern.gradients_Z_sparse(X=self.X, Z=self.Z, **self.grad_dict)
def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
def _raw_predict(self, Xnew, X_variance_new=None, full_cov=False):
"""
Make a prediction for the latent function values
"""
if X_variance_new is None:
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
Kx = self.kern.K(self.Z, Xnew)
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
if full_cov:
Kxx = self.kern.K(Xnew, which_parts=which_parts)
var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx) # NOTE this won't work for plotting
Kxx = self.kern.K(Xnew)
var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx)
else:
Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts)
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(Kx * np.dot(self.posterior.woodbury_inv, Kx), 0)
else:
# assert which_parts=='all', "swithching out parts of variational kernels is not implemented"
Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts
Kx = self.kern.psi1(self.Z, Xnew, X_variance_new)
mu = np.dot(Kx, self.Cpsi1V)
if full_cov:
raise NotImplementedError, "TODO"
@ -101,12 +101,10 @@ class SparseGP(GP):
"""
return GP._getstate(self) + [self.Z,
self.num_inducing,
self.has_uncertain_inputs,
self.X_variance]
def _setstate(self, state):
self.X_variance = state.pop()
self.has_uncertain_inputs = state.pop()
self.num_inducing = state.pop()
self.Z = state.pop()
GP._setstate(self, state)

View file

@ -1,9 +1,9 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as _np
default_seed = _np.random.seed(123344)
#default_seed = _np.random.seed(123344)
def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False, output_dim=200, nan=False):
def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan=False):
"""
model for testing purposes. Samples from a GP with rbf kernel and learns
the samples with a new kernel. Normally not for optimization, just model cheking
@ -21,19 +21,20 @@ def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False,
# generate GPLVM-like data
X = _np.random.rand(num_inputs, input_dim)
lengthscales = _np.random.rand(input_dim)
k = (GPy.kern.rbf(input_dim, .5, lengthscales, ARD=True)
#+ GPy.kern.white(input_dim, 0.01)
)
#lengthscales = _np.random.rand(input_dim)
#k = (GPy.kern.RBF(input_dim, .5, lengthscales, ARD=True)
##+ GPy.kern.white(input_dim, 0.01)
#)
k = GPy.kern.Linear(input_dim, ARD=1)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X)
Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, (output_dim,)).T
# k = GPy.kern.rbf_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
# k = GPy.kern.rbf(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001)
# k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.rbf(input_dim, .3, _np.ones(input_dim) * .2, ARD=True)
# k = GPy.kern.rbf(input_dim, .5, 2., ARD=0) + GPy.kern.rbf(input_dim, .3, .2, ARD=0)
# k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True)
# k = GPy.kern.RBF_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
#k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
# k = GPy.kern.RBF(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001)
# k = GPy.kern.RBF(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.RBF(input_dim, .3, _np.ones(input_dim) * .2, ARD=True)
# k = GPy.kern.RBF(input_dim, .5, 2., ARD=0) + GPy.kern.RBF(input_dim, .3, .2, ARD=0)
# k = GPy.kern.RBF(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True)
p = .3
@ -41,14 +42,14 @@ def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False,
if nan:
m.inference_method = GPy.inference.latent_function_inference.var_dtc.VarDTCMissingData()
m.Y[_np.random.binomial(1,p,size=(Y.shape))] = _np.nan
m.Y[_np.random.binomial(1,p,size=(Y.shape)).astype(bool)] = _np.nan
m.parameters_changed()
#===========================================================================
# randomly obstruct data with percentage p
#===========================================================================
#m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing)
m.lengthscales = lengthscales
#m.lengthscales = lengthscales
if plot:
import matplotlib.pyplot as pb
@ -73,7 +74,7 @@ def gplvm_oil_100(optimize=True, verbose=1, plot=True):
data = GPy.util.datasets.oil_100()
Y = data['X']
# create simple GP model
kernel = GPy.kern.rbf(6, ARD=True) + GPy.kern.bias(6)
kernel = GPy.kern.RBF(6, ARD=True) + GPy.kern.Bias(6)
m = GPy.models.GPLVM(Y, 6, kernel=kernel)
m.data_labels = data['Y'].argmax(axis=1)
if optimize: m.optimize('scg', messages=verbose)
@ -88,7 +89,7 @@ def sparse_gplvm_oil(optimize=True, verbose=0, plot=True, N=100, Q=6, num_induci
Y = Y - Y.mean(0)
Y /= Y.std(0)
# Create the model
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q)
kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.bias(Q)
m = GPy.models.SparseGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing)
m.data_labels = data['Y'][:N].argmax(axis=1)
@ -138,7 +139,7 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=15, Q=4
(1 - var))) + .001
Z = _np.random.permutation(X)[:num_inducing]
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2))
kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2))
m = BayesianGPLVM(Y, Q, X=X, X_variance=S, num_inducing=num_inducing, Z=Z, kernel=kernel)
m.data_colors = c
@ -164,7 +165,7 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
_np.random.seed(0)
data = GPy.util.datasets.oil()
kernel = GPy.kern.rbf_inv(Q, 1., [.1] * Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2))
kernel = GPy.kern.RBF_inv(Q, 1., [.1] * Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2))
Y = data['X'][:N]
Yn = Gaussian(Y, normalize=True)
m = GPy.models.BayesianGPLVM(Yn, Q, kernel=kernel, num_inducing=num_inducing, **k)
@ -186,18 +187,25 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
return m
def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False):
_np.random.seed(1234)
x = _np.linspace(0, 4 * _np.pi, N)[:, None]
s1 = _np.vectorize(lambda x: _np.sin(x))
s1 = _np.vectorize(lambda x: -_np.sin(x))
s2 = _np.vectorize(lambda x: _np.cos(x))
s3 = _np.vectorize(lambda x:-_np.exp(-_np.cos(2 * x)))
sS = _np.vectorize(lambda x: _np.sin(2 * x))
sS = _np.vectorize(lambda x: x*_np.sin(x))
s1 = s1(x)
s2 = s2(x)
s3 = s3(x)
sS = sS(x)
S1 = _np.hstack([s1, sS])
s1 -= s1.mean(); s1 /= s1.std(0)
s2 -= s2.mean(); s2 /= s2.std(0)
s3 -= s3.mean(); s3 /= s3.std(0)
sS -= sS.mean(); sS /= sS.std(0)
S1 = _np.hstack([s1, s2, sS])
S2 = _np.hstack([s2, s3, sS])
S3 = _np.hstack([s3, sS])
@ -268,7 +276,7 @@ def bgplvm_simulation(optimize=True, verbose=1,
D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 3, 10
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
Y = Ylist[0]
k = kern.linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k)
if optimize:
@ -288,16 +296,18 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
from GPy.models import BayesianGPLVM
from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData
D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 3, 10
D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 5, 9
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
Y = Ylist[0]
k = kern.linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
inan = _np.random.binomial(1, .3, size=Y.shape)
m = BayesianGPLVM(Y, Q, init="random", num_inducing=num_inducing, kernel=k)
inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool)
m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, kernel=k)
m.inference_method = VarDTCMissingData()
m.Y[inan] = _np.nan
m.q.variance *= .1
m.parameters_changed()
m.Yreal = Y
if optimize:
print "Optimizing model:"
@ -435,7 +445,7 @@ def bcgplvm_stick(kernel=None, optimize=True, verbose=True, plot=True):
data = GPy.util.datasets.osu_run1()
# optimize
back_kernel=GPy.kern.rbf(data['Y'].shape[1], lengthscale=5.)
back_kernel=GPy.kern.RBF(data['Y'].shape[1], lengthscale=5.)
mapping = GPy.mappings.Kernel(X=data['Y'], output_dim=2, kernel=back_kernel)
m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping)
if optimize: m.optimize(messages=verbose, max_f_eval=10000)
@ -470,7 +480,7 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
data = GPy.util.datasets.osu_run1()
Q = 6
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2))
kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2))
m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel)
# optimize
m.ensure_default_constraints()

View file

@ -41,7 +41,7 @@ def coregionalization_toy2(optimize=True, plot=True):
Y = np.vstack((Y1, Y2))
#build the kernel
k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
k1 = GPy.kern.RBF(1) + GPy.kern.bias(1)
k2 = GPy.kern.coregionalize(2,1)
k = k1**k2
m = GPy.models.GPRegression(X, Y, kernel=k)
@ -68,7 +68,7 @@ def coregionalization_toy2(optimize=True, plot=True):
# Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05
# Y = np.vstack((Y1, Y2))
#
# k1 = GPy.kern.rbf(1)
# k1 = GPy.kern.RBF(1)
# m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1])
# m.constrain_fixed('.*rbf_var', 1.)
# m.optimize(max_iters=100)
@ -127,7 +127,7 @@ def epomeo_gpx(max_iters=200, optimize=True, plot=True):
Z = np.hstack((np.linspace(t[:,0].min(), t[:, 0].max(), num_inducing)[:, None],
np.random.randint(0, 4, num_inducing)[:, None]))
k1 = GPy.kern.rbf(1)
k1 = GPy.kern.RBF(1)
k2 = GPy.kern.coregionalize(output_dim=5, rank=5)
k = k1**k2
@ -156,7 +156,7 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000
data['Y'] = data['Y'] - np.mean(data['Y'])
lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.rbf)
lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.RBF)
if plot:
pb.contour(length_scales, log_SNRs, np.exp(lls), 20, cmap=pb.cm.jet)
ax = pb.gca()
@ -172,8 +172,8 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000
optim_point_y = np.empty(2)
np.random.seed(seed=seed)
for i in range(0, model_restarts):
# kern = GPy.kern.rbf(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.))
kern = GPy.kern.rbf(1, variance=np.random.uniform(1e-3, 1), lengthscale=np.random.uniform(5, 50))
# kern = GPy.kern.RBF(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.))
kern = GPy.kern.RBF(1, variance=np.random.uniform(1e-3, 1), lengthscale=np.random.uniform(5, 50))
m = GPy.models.GPRegression(data['X'], data['Y'], kernel=kern)
m['noise_variance'] = np.random.uniform(1e-3, 1)
@ -196,7 +196,7 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000
ax.set_ylim(ylim)
return m # (models, lls)
def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.rbf):
def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.RBF):
"""
Evaluate the GP objective function for a given data set for a range of
signal to noise ratios and a range of lengthscales.
@ -278,10 +278,10 @@ def toy_poisson_rbf_1d_laplace(optimize=True, plot=True):
optimizer='scg'
x_len = 30
X = np.linspace(0, 10, x_len)[:, None]
f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.rbf(1).K(X))
f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.RBF(1).K(X))
Y = np.array([np.random.poisson(np.exp(f)) for f in f_true])[:,None]
kern = GPy.kern.rbf(1)
kern = GPy.kern.RBF(1)
poisson_lik = GPy.likelihoods.Poisson()
laplace_inf = GPy.inference.latent_function_inference.LaplaceInference()
@ -319,10 +319,10 @@ def toy_ARD(max_iters=1000, kernel_type='linear', num_samples=300, D=4, optimize
if kernel_type == 'linear':
kernel = GPy.kern.linear(X.shape[1], ARD=1)
elif kernel_type == 'rbf_inv':
kernel = GPy.kern.rbf_inv(X.shape[1], ARD=1)
kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1)
else:
kernel = GPy.kern.rbf(X.shape[1], ARD=1)
kernel += GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])
kernel = GPy.kern.RBF(X.shape[1], ARD=1)
kernel += GPy.kern.White(X.shape[1]) + GPy.kern.bias(X.shape[1])
m = GPy.models.GPRegression(X, Y, kernel)
# len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25
# m.set_prior('.*lengthscale',len_prior)
@ -358,9 +358,9 @@ def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4, o
if kernel_type == 'linear':
kernel = GPy.kern.linear(X.shape[1], ARD=1)
elif kernel_type == 'rbf_inv':
kernel = GPy.kern.rbf_inv(X.shape[1], ARD=1)
kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1)
else:
kernel = GPy.kern.rbf(X.shape[1], ARD=1)
kernel = GPy.kern.RBF(X.shape[1], ARD=1)
#kernel += GPy.kern.bias(X.shape[1])
X_variance = np.ones(X.shape) * 0.5
m = GPy.models.SparseGPRegression(X, Y, kernel, X_variance=X_variance)
@ -421,7 +421,7 @@ def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, opti
X = np.random.uniform(-3., 3., (num_samples, 1))
Y = np.sin(X) + np.random.randn(num_samples, 1) * 0.05
# construct kernel
rbf = GPy.kern.rbf(1)
rbf = GPy.kern.RBF(1)
# create simple GP Model
m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing)
m.checkgrad(verbose=1)
@ -444,7 +444,7 @@ def sparse_GP_regression_2D(num_samples=400, num_inducing=50, max_iters=100, opt
Y[inan] = np.nan
# construct kernel
rbf = GPy.kern.rbf(2)
rbf = GPy.kern.RBF(2)
# create simple GP Model
m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing)
@ -476,9 +476,9 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
# likelihood = GPy.likelihoods.Gaussian(Y)
Z = np.random.uniform(-3., 3., (7, 1))
k = GPy.kern.rbf(1)
k = GPy.kern.RBF(1)
# create simple GP Model - no input uncertainty on this one
m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.rbf(1), Z=Z)
m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.RBF(1), Z=Z)
if optimize:
m.optimize('scg', messages=1, max_iters=max_iters)
@ -489,7 +489,7 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
print m
# the same Model with uncertainty
m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.rbf(1), Z=Z, X_variance=S)
m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.RBF(1), Z=Z, X_variance=S)
if optimize:
m.optimize('scg', messages=1, max_iters=max_iters)
if plot:

View file

@ -16,7 +16,9 @@ If the likelihood object is something other than Gaussian, then exact inference
is not tractable. We then resort to a Laplace approximation (laplace.py) or
expectation propagation (ep.py).
The inference methods return a "Posterior" instance, which is a simple
The inference methods return a
:class:`~GPy.inference.latent_function_inference.posterior.Posterior`
instance, which is a simple
structure which contains a summary of the posterior. The model classes can then
use this posterior object for making predictions, optimizing hyper-parameters,
etc.
@ -29,3 +31,15 @@ expectation_propagation = 'foo' # TODO
from GPy.inference.latent_function_inference.var_dtc import VarDTC
from dtc import DTC
from fitc import FITC
# class FullLatentFunctionData(object):
#
#
# class LatentFunctionInference(object):
# def inference(self, kern, X, likelihood, Y, Y_metadata=None):
# """
# Do inference on the latent functions given a covariance function `kern`,
# inputs and outputs `X` and `Y`, and a likelihood `likelihood`.
# Additional metadata for the outputs `Y` can be given in `Y_metadata`.
# """
# raise NotImplementedError, "Abstract base class for full inference"

View file

@ -32,7 +32,7 @@ class DTC(object):
#make sure the noise is not hetero
beta = 1./np.squeeze(likelihood.variance)
if beta.size <1:
raise NotImplementedError, "no hetero noise with this implementatino of DTC"
raise NotImplementedError, "no hetero noise with this implementation of DTC"
Kmm = kern.K(Z)
Knn = kern.Kdiag(X)
@ -89,4 +89,85 @@ class DTC(object):
return post, log_marginal, grad_dict
class vDTC(object):
def __init__(self):
self.const_jitter = 1e-6
def inference(self, kern, X, X_variance, Z, likelihood, Y):
assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
#TODO: MAX! fix this!
from ...util.misc import param_to_array
Y = param_to_array(Y)
num_inducing, _ = Z.shape
num_data, output_dim = Y.shape
#make sure the noise is not hetero
beta = 1./np.squeeze(likelihood.variance)
if beta.size <1:
raise NotImplementedError, "no hetero noise with this implementation of DTC"
Kmm = kern.K(Z)
Knn = kern.Kdiag(X)
Knm = kern.K(X, Z)
U = Knm
Uy = np.dot(U.T,Y)
#factor Kmm
Kmmi, L, Li, _ = pdinv(Kmm)
# Compute A
LiUTbeta = np.dot(Li, U.T)*np.sqrt(beta)
A_ = tdot(LiUTbeta)
trace_term = -0.5*(np.sum(Knn)*beta - np.trace(A_))
A = A_ + np.eye(num_inducing)
# factor A
LA = jitchol(A)
# back substutue to get b, P, v
tmp, _ = dtrtrs(L, Uy, lower=1)
b, _ = dtrtrs(LA, tmp*beta, lower=1)
tmp, _ = dtrtrs(LA, b, lower=1, trans=1)
v, _ = dtrtrs(L, tmp, lower=1, trans=1)
tmp, _ = dtrtrs(LA, Li, lower=1, trans=0)
P = tdot(tmp.T)
#compute log marginal
log_marginal = -0.5*num_data*output_dim*np.log(2*np.pi) + \
-np.sum(np.log(np.diag(LA)))*output_dim + \
0.5*num_data*output_dim*np.log(beta) + \
-0.5*beta*np.sum(np.square(Y)) + \
0.5*np.sum(np.square(b)) + \
trace_term
# Compute dL_dKmm
vvT_P = tdot(v.reshape(-1,1)) + P
LAL = Li.T.dot(A).dot(Li)
dL_dK = Kmmi - 0.5*(vvT_P + LAL)
# Compute dL_dU
vY = np.dot(v.reshape(-1,1),Y.T)
#dL_dU = vY - np.dot(vvT_P, U.T)
dL_dU = vY - np.dot(vvT_P - Kmmi, U.T)
dL_dU *= beta
#compute dL_dR
Uv = np.dot(U, v)
dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - 1./beta + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) + np.sum(np.square(Uv), 1) )*beta**2
dL_dR -=beta*trace_term/num_data
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':np.zeros_like(Knn) + -0.5*beta, 'dL_dKnm':dL_dU.T}
#update gradients
kern.update_gradients_sparse(X=X, Z=Z, **grad_dict)
likelihood.update_gradients(dL_dR)
#construct a posterior object
post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L)
return post, log_marginal, grad_dict

View file

@ -81,13 +81,16 @@ class Posterior(object):
def covariance(self):
if self._covariance is None:
#LiK, _ = dtrtrs(self.woodbury_chol, self._K, lower=1)
self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K)
self._covariance = np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T
#self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K)
return self._covariance
@property
def precision(self):
if self._precision is None:
self._precision, _, _, _ = pdinv(self.covariance)
self._precision = np.zeros(np.atleast_3d(self.covariance).shape) # if one covariance per dimension
for p in xrange(self.covariance.shape[-1]):
self._precision[:,:,p] = pdinv(self.covariance[:,:,p])[0]
return self._precision
@property

View file

@ -43,9 +43,20 @@ class VarDTC(object):
return Y * prec # TODO chache this, and make it effective
def inference(self, kern, X, X_variance, Z, likelihood, Y):
"""Inference for normal sparseGP"""
uncertain_inputs = False
psi0, psi1, psi2 = _compute_psi(kern, X, X_variance, Z, uncertain_inputs)
return self._inference(kern, psi0, psi1, psi2, Z, likelihood, Y, uncertain_inputs)
def inference_latent(self, kern, posterior_variational, Z, likelihood, Y):
"""Inference for GPLVM with uncertain inputs"""
uncertain_inputs = True
psi0, psi1, psi2 = _compute_psi_latent(kern, posterior_variational, Z)
return self._inference(kern, psi0, psi1, psi2, Z, likelihood, Y, uncertain_inputs)
def _inference(self, kern, psi0, psi1, psi2, Z, likelihood, Y, uncertain_inputs):
#see whether we're using variational uncertain inputs
uncertain_inputs = not (X_variance is None)
_, output_dim = Y.shape
@ -60,20 +71,87 @@ class VarDTC(object):
trYYT = self.get_trYYT(Y)
# do the inference:
dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Cpsi1Vf, \
psi1, Lm, LB, log_marginal, Kmm, partial_for_likelihood = _do_inference_on(
kern, X, X_variance, Z, likelihood,
uncertain_inputs, output_dim,
beta, VVT_factor, trYYT)
het_noise = beta.size < 1
num_inducing = Z.shape[0]
num_data = Y.shape[0]
# kernel computations, using BGPLVM notation
Kmm = kern.K(Z)
Lm = jitchol(Kmm)
# The rather complex computations of A
if uncertain_inputs:
if het_noise:
psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0)
else:
psi2_beta = psi2.sum(0) * beta
#if 0:
# evals, evecs = linalg.eigh(psi2_beta)
# clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
# if not np.array_equal(evals, clipped_evals):
# pass # print evals
# tmp = evecs * np.sqrt(clipped_evals)
# tmp = tmp.T
# no backsubstitution because of bound explosion on tr(A) if not...
LmInv = dtrtri(Lm)
A = LmInv.dot(psi2_beta.dot(LmInv.T))
else:
if het_noise:
tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
else:
tmp = psi1 * (np.sqrt(beta))
tmp, _ = dtrtrs(Lm, tmp.T, lower=1)
A = tdot(tmp) #print A.sum()
likelihood.update_gradients(partial_for_likelihood)
# factor B
B = np.eye(num_inducing) + A
LB = jitchol(B)
psi1Vf = np.dot(psi1.T, VVT_factor)
# back substutue C into psi1Vf
tmp, _ = dtrtrs(Lm, psi1Vf, lower=1, trans=0)
_LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0)
tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1)
Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
# data fit and derivative of L w.r.t. Kmm
delit = tdot(_LBi_Lmi_psi1Vf)
data_fit = np.trace(delit)
DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit)
delit = -0.5 * DBi_plus_BiPBi
delit += -0.5 * B * output_dim
delit += output_dim * np.eye(num_inducing)
# Compute dL_dKmm
dL_dKmm = backsub_both_sides(Lm, delit)
# derivatives of L w.r.t. psi
dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm,
VVT_factor, Cpsi1Vf, DBi_plus_BiPBi,
psi1, het_noise, uncertain_inputs)
# log marginal likelihood
log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise,
psi0, A, LB, trYYT, data_fit)
#put the gradients in the right places
partial_for_likelihood = _compute_partial_for_likelihood(likelihood,
het_noise, uncertain_inputs, LB,
_LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
psi0, psi1, beta,
data_fit, num_data, output_dim, trYYT)
#likelihood.update_gradients(partial_for_likelihood)
if uncertain_inputs:
grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dpsi0':dL_dpsi0, 'dL_dpsi1':dL_dpsi1, 'dL_dpsi2':dL_dpsi2}
kern.update_gradients_variational(mu=X, S=X_variance, Z=Z, **grad_dict)
grad_dict = {'dL_dKmm': dL_dKmm,
'dL_dpsi0':dL_dpsi0,
'dL_dpsi1':dL_dpsi1,
'dL_dpsi2':dL_dpsi2,
'partial_for_likelihood':partial_for_likelihood}
else:
grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dKdiag':dL_dpsi0, 'dL_dKnm':dL_dpsi1}
kern.update_gradients_sparse(X=X, Z=Z, **grad_dict)
grad_dict = {'dL_dKmm': dL_dKmm,
'dL_dKdiag':dL_dpsi0,
'dL_dKnm':dL_dpsi1,
'partial_for_likelihood':partial_for_likelihood}
#get sufficient things for posterior prediction
#TODO: do we really want to do this in the loop?
@ -123,23 +201,35 @@ class VarDTCMissingData(object):
else:
self._subarray_indices = [[slice(None),slice(None)]]
return [Y], [(Y**2).sum()]
def inference(self, kern, X, X_variance, Z, likelihood, Y):
"""Inference for normal sparseGP"""
uncertain_inputs = False
psi0, psi1, psi2 = _compute_psi(kern, X, X_variance, Z, uncertain_inputs)
return self._inference(kern, psi0, psi1, psi2, Z, likelihood, Y, uncertain_inputs)
def inference_latent(self, kern, posterior_variational, Z, likelihood, Y):
"""Inference for GPLVM with uncertain inputs"""
uncertain_inputs = True
psi0, psi1, psi2 = _compute_psi_latent(kern, posterior_variational, Z)
return self._inference(kern, psi0, psi1, psi2, Z, likelihood, Y, uncertain_inputs)
def _inference(self, kern, psi0_all, psi1_all, psi2_all, Z, likelihood, Y, uncertain_inputs):
Ys, traces = self._Y(Y)
beta_all = 1./likelihood.variance
uncertain_inputs = not (X_variance is None)
het_noise = beta_all.size != 1
import itertools
num_inducing = Z.shape[0]
dL_dpsi0_all = np.zeros(X.shape[0])
dL_dpsi1_all = np.zeros((X.shape[0], num_inducing))
dL_dpsi0_all = np.zeros(Y.shape[0])
dL_dpsi1_all = np.zeros((Y.shape[0], num_inducing))
if uncertain_inputs:
dL_dpsi2_all = np.zeros((X.shape[0], num_inducing, num_inducing))
dL_dpsi2_all = np.zeros((Y.shape[0], num_inducing, num_inducing))
partial_for_likelihood = 0
LB_all = Cpsi1Vf_all = 0
woodbury_vector = np.zeros((num_inducing, Y.shape[1]))
woodbury_inv_all = np.zeros((num_inducing, num_inducing, Y.shape[1]))
dL_dKmm = 0
log_marginal = 0
@ -148,11 +238,10 @@ class VarDTCMissingData(object):
Lm = jitchol(Kmm)
if uncertain_inputs: LmInv = dtrtri(Lm)
# kernel computations, using BGPLVM notation
psi0_all, psi1_all, psi2_all = _compute_psi(kern, X, X_variance, Z, uncertain_inputs)
VVT_factor_all = np.empty(Y.shape)
full_VVT_factor = VVT_factor_all.shape[1] == Y.shape[1]
if not full_VVT_factor:
psi1V = np.dot(Y.T*beta_all, psi1_all).T
for y, trYYT, [v, ind] in itertools.izip(Ys, traces, self._subarray_indices):
if het_noise: beta = beta_all[ind]
@ -183,10 +272,10 @@ class VarDTCMissingData(object):
LB = jitchol(B)
psi1Vf = psi1.T.dot(VVT_factor)
_LBi_Lmi_psi1Vf, Cpsi1Vf = _compute_psi1Vf(Lm, LB, psi1Vf)
if full_VVT_factor: Cpsi1Vf_all += Cpsi1Vf
LB_all += LB
tmp, _ = dtrtrs(Lm, psi1Vf, lower=1, trans=0)
_LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0)
tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1)
Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
# data fit and derivative of L w.r.t. Kmm
delit = tdot(_LBi_Lmi_psi1Vf)
@ -219,92 +308,67 @@ class VarDTCMissingData(object):
psi0, psi1, beta,
data_fit, num_data, output_dim, trYYT)
# gradients:
likelihood.update_gradients(partial_for_likelihood)
if full_VVT_factor: woodbury_vector[:, ind] = Cpsi1Vf
else:
print 'foobar'
tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
tmp, _ = dpotrs(LB, tmp, lower=1)
woodbury_vector[:, ind] = dtrtrs(Lm, tmp, lower=1, trans=1)[0]
#import ipdb;ipdb.set_trace()
Bi, _ = dpotri(LB, lower=1)
symmetrify(Bi)
Bi = -dpotri(LB, lower=1)[0]
from ...util import diag
diag.add(Bi, 1)
woodbury_inv_all[:, :, ind] = backsub_both_sides(Lm, Bi)[:,:,None]
# gradients:
if uncertain_inputs:
grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dpsi0':dL_dpsi0_all, 'dL_dpsi1':dL_dpsi1_all, 'dL_dpsi2':dL_dpsi2_all}
kern.update_gradients_variational(mu=X, S=X_variance, Z=Z, **grad_dict)
grad_dict = {'dL_dKmm': dL_dKmm,
'dL_dpsi0':dL_dpsi0_all,
'dL_dpsi1':dL_dpsi1_all,
'dL_dpsi2':dL_dpsi2_all,
'partial_for_likelihood':partial_for_likelihood}
else:
grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dKdiag':dL_dpsi0_all, 'dL_dKnm':dL_dpsi1_all}
kern.update_gradients_sparse(X=X, Z=Z, **grad_dict)
grad_dict = {'dL_dKmm': dL_dKmm,
'dL_dKdiag':dL_dpsi0_all,
'dL_dKnm':dL_dpsi1_all,
'partial_for_likelihood':partial_for_likelihood}
#get sufficient things for posterior prediction
#TODO: do we really want to do this in the loop?
if full_VVT_factor:
woodbury_vector = Cpsi1Vf_all # == Cpsi1V
else:
print 'foobar'
psi1V = np.dot(Y.T*beta_all, psi1_all).T
tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
tmp, _ = dpotrs(LB_all, tmp, lower=1)
woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
Bi, _ = dpotri(LB_all, lower=1)
symmetrify(Bi)
Bi = -dpotri(LB_all, lower=1)[0]
from ...util import diag
diag.add(Bi, 1)
#if not full_VVT_factor:
# print 'foobar'
# psi1V = np.dot(Y.T*beta_all, psi1_all).T
# tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
# tmp, _ = dpotrs(LB_all, tmp, lower=1)
# woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
#import ipdb;ipdb.set_trace()
#Bi, _ = dpotri(LB_all, lower=1)
#symmetrify(Bi)
#Bi = -dpotri(LB_all, lower=1)[0]
#from ...util import diag
#diag.add(Bi, 1)
woodbury_inv = backsub_both_sides(Lm, Bi)
post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
#woodbury_inv = backsub_both_sides(Lm, Bi)
post = Posterior(woodbury_inv=woodbury_inv_all, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
return post, log_marginal, grad_dict
def _compute_A(num_data, uncertain_inputs, beta, het_noise, psi1, psi2, Lm):
# The rather complex computations of A
if uncertain_inputs:
if het_noise:
psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0)
else:
psi2_beta = psi2.sum(0) * beta
#if 0:
# evals, evecs = linalg.eigh(psi2_beta)
# clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
# if not np.array_equal(evals, clipped_evals):
# pass # print evals
# tmp = evecs * np.sqrt(clipped_evals)
# tmp = tmp.T
# no backsubstitution because of bound explosion on tr(A) if not...
LmInv = dtrtri(Lm)
A = LmInv.dot(psi2_beta.dot(LmInv.T))
else:
if het_noise:
tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
else:
tmp = psi1 * (np.sqrt(beta))
tmp, _ = dtrtrs(Lm, tmp.T, lower=1)
A = tdot(tmp) #print A.sum()
return A
def _compute_psi(kern, X, X_variance, Z, uncertain_inputs):
if uncertain_inputs:
psi0 = kern.psi0(Z, X, X_variance)
psi1 = kern.psi1(Z, X, X_variance)
psi2 = kern.psi2(Z, X, X_variance)
else:
psi0 = kern.Kdiag(X)
psi1 = kern.K(X, Z)
psi2 = None
def _compute_psi(kern, X, X_variance, Z):
psi0 = kern.Kdiag(X)
psi1 = kern.K(X, Z)
psi2 = None
return psi0, psi1, psi2
def _compute_Kmm(kern, X, X_variance, Z, uncertain_inputs):
Kmm = kern.K(Z)
psi0, psi1, psi2 = _compute_psi(kern, X, X_variance, Z, uncertain_inputs)
return Kmm, psi0, psi1, psi2
def _compute_dL_dKmm(num_inducing, output_dim, Lm, B, LB, _LBi_Lmi_psi1Vf):
# Compute dL_dKmm
delit = tdot(_LBi_Lmi_psi1Vf)
data_fit = np.trace(delit)
DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit)
delit = -0.5 * DBi_plus_BiPBi
delit += -0.5 * B * output_dim
delit += output_dim * np.eye(num_inducing)
dL_dKmm = backsub_both_sides(Lm, delit)
return DBi_plus_BiPBi, data_fit, dL_dKmm
def _compute_psi_latent(kern, posterior_variational, Z):
psi0 = kern.psi0(Z, posterior_variational)
psi1 = kern.psi1(Z, posterior_variational)
psi2 = kern.psi2(Z, posterior_variational)
return psi0, psi1, psi2
def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, psi1, het_noise, uncertain_inputs):
dL_dpsi0 = -0.5 * output_dim * (beta * np.ones([num_data, 1])).flatten()
@ -329,15 +393,6 @@ def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, C
return dL_dpsi0, dL_dpsi1, dL_dpsi2
def _compute_psi1Vf(Lm, LB, psi1Vf):
# back substutue C into psi1Vf
tmp, _ = dtrtrs(Lm, psi1Vf, lower=1, trans=0)
_LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0)
tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1)
Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
return _LBi_Lmi_psi1Vf, Cpsi1Vf
def _compute_partial_for_likelihood(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, psi0, psi1, beta, data_fit, num_data, output_dim, trYYT):
# the partial derivative vector for the likelihood
if likelihood.size == 0:
@ -379,35 +434,3 @@ def _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het
lik_4 = 0.5 * data_fit
log_marginal = lik_1 + lik_2 + lik_3 + lik_4
return log_marginal
def _do_inference_on(kern, X, X_variance, Z, likelihood, uncertain_inputs, output_dim, beta, VVT_factor, trYYT):
het_noise = beta.size < 1
num_inducing = Z.shape[0]
num_data = X.shape[0]
# kernel computations, using BGPLVM notation
Kmm, psi0, psi1, psi2 = _compute_Kmm(kern, X, X_variance, Z, uncertain_inputs)
#factor Kmm # TODO: cache?
Lm = jitchol(Kmm)
A = _compute_A(num_data, uncertain_inputs, beta, het_noise, psi1, psi2, Lm)
# factor B
B = np.eye(num_inducing) + A
LB = jitchol(B)
psi1Vf = np.dot(psi1.T, VVT_factor)
_LBi_Lmi_psi1Vf, Cpsi1Vf = _compute_psi1Vf(Lm, LB, psi1Vf)
# data fit and derivative of L w.r.t. Kmm
DBi_plus_BiPBi, data_fit, dL_dKmm = _compute_dL_dKmm(num_inducing, output_dim,
Lm, B, LB, _LBi_Lmi_psi1Vf)
# derivatives of L w.r.t. psi
dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm,
VVT_factor, Cpsi1Vf, DBi_plus_BiPBi,
psi1, het_noise, uncertain_inputs)
# log marginal likelihood
log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise,
psi0, A, LB, trYYT, data_fit)
#put the gradients in the right places
partial_for_likelihood = _compute_partial_for_likelihood(likelihood,
het_noise, uncertain_inputs, LB,
_LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
psi0, psi1, beta,
data_fit, num_data, output_dim, trYYT)
return dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Cpsi1Vf, psi1, Lm, LB, log_marginal, Kmm, partial_for_likelihood

View file

@ -1,9 +1,34 @@
# Copyright (c) 2012, 2013 GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from constructors import *
try:
from constructors import rbf_sympy, sympykern # these depend on sympy
except:
pass
from kern import *
from _src.rbf import RBF
from _src.white import White
from _src.kern import Kern
from _src.linear import Linear
from _src.brownian import Brownian
from _src.stationary import Exponential, Matern32, Matern52, ExpQuad
#from _src.bias import Bias
#import coregionalize
#import exponential
#import eq_ode1
#import finite_dimensional
#import fixed
#import gibbs
#import hetero
#import hierarchical
#import independent_outputs
#import linear
#import Matern32
#import Matern52
#import mlp
#import ODE_1
#import periodic_exponential
#import periodic_Matern32
#import periodic_Matern52
#import poly
#import prod_orthogonal
#import prod
#import rational_quadratic
#import rbfcos
#import rbf
#import rbf_inv
#import spline
#import symmetric
#import white

View file

215
GPy/kern/_src/add.py Normal file
View file

@ -0,0 +1,215 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import sys
import numpy as np
import itertools
from linear import Linear
from ...core.parameterization import Parameterized
from ...core.parameterization.param import Param
from kern import Kern
class Add(Kern):
def __init__(self, subkerns, tensor):
assert all([isinstance(k, Kern) for k in subkerns])
if tensor:
input_dim = sum([k.input_dim for k in subkerns])
self.input_slices = []
n = 0
for k in subkerns:
self.input_slices.append(slice(n, n+k.input_dim))
n += k.input_dim
else:
assert all([k.input_dim == subkerns[0].input_dim for k in subkerns])
input_dim = subkerns[0].input_dim
self.input_slices = [slice(None) for k in subkerns]
super(Add, self).__init__(input_dim, 'add')
self.add_parameters(*subkerns)
def K(self, X, X2=None):
"""
Compute the kernel function.
:param X: the first set of inputs to the kernel
:param X2: (optional) the second set of arguments to the kernel. If X2
is None, this is passed throgh to the 'part' object, which
handLes this as X2 == X.
"""
assert X.shape[1] == self.input_dim
if X2 is None:
return sum([p.K(X[:, i_s], None) for p, i_s in zip(self._parameters_, self.input_slices)])
else:
return sum([p.K(X[:, i_s], X2[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)])
def update_gradients_full(self, dL_dK, X):
[p.update_gradients_full(dL_dK, X[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
[p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X[:,i_s], Z[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
def gradients_X(self, dL_dK, X, X2=None):
"""Compute the gradient of the objective function with respect to X.
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
:type dL_dK: np.ndarray (num_samples x num_inducing)
:param X: Observed data inputs
:type X: np.ndarray (num_samples x input_dim)
:param X2: Observed data inputs (optional, defaults to X)
:type X2: np.ndarray (num_inducing x input_dim)"""
target = np.zeros_like(X)
if X2 is None:
[np.add(target[:,i_s], p.gradients_X(dL_dK, X[:, i_s], None), target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
else:
[np.add(target[:,i_s], p.gradients_X(dL_dK, X[:, i_s], X2[:,i_s]), target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
return target
def Kdiag(self, X):
assert X.shape[1] == self.input_dim
return sum([p.Kdiag(X[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)])
def psi0(self, Z, mu, S):
return np.sum([p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)],0)
def psi1(self, Z, mu, S):
return np.sum([p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0)
def psi2(self, Z, mu, S):
psi2 = np.sum([p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0)
# compute the "cross" terms
from white import White
from rbf import RBF
#from rbf_inv import RBFInv
#from bias import Bias
from linear import Linear
#ffrom fixed import Fixed
for (p1, i1), (p2, i2) in itertools.combinations(itertools.izip(self._parameters_, self.input_slices), 2):
# white doesn;t combine with anything
if isinstance(p1, White) or isinstance(p2, White):
pass
# rbf X bias
#elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)):
tmp = p2.psi1(Z[:,i2], mu[:,i2], S[:,i2])
psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
#elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)):
tmp = p1.psi1(Z[:,i1], mu[:,i1], S[:,i1])
psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
else:
raise NotImplementedError, "psi2 cannot be computed for this kernel"
return psi2
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
from white import White
from rbf import RBF
#from rbf_inv import RBFInv
#from bias import Bias
from linear import Linear
#ffrom fixed import Fixed
for p1, is1 in zip(self._parameters_, self.input_slices):
#compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2, is2 in zip(self._parameters_, self.input_slices):
if p2 is p1:
continue
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2.
p1.update_gradients_variational(dL_dKmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1])
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
from white import white
from rbf import rbf
#from rbf_inv import rbfinv
#from bias import bias
from linear import linear
#ffrom fixed import fixed
target = np.zeros(Z.shape)
for p1, is1 in zip(self._parameters_, self.input_slices):
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2, is2 in zip(self._parameters_, self.input_slices):
if p2 is p1:
continue
if isinstance(p2, white):
continue
elif isinstance(p2, bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2.
target += p1.gradients_z_variational(dL_dkmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1])
return target
def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
from white import white
from rbf import rbf
#from rbf_inv import rbfinv
#from bias import bias
from linear import linear
#ffrom fixed import fixed
target_mu = np.zeros(mu.shape)
target_S = np.zeros(S.shape)
for p1, is1 in zip(self._parameters_, self.input_slices):
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2, is2 in zip(self._parameters_, self.input_slices):
if p2 is p1:
continue
if isinstance(p2, white):
continue
elif isinstance(p2, bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2.
a, b = p1.gradients_muS_variational(dL_dkmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1])
target_mu += a
target_S += b
return target_mu, target_S
def plot(self, *args, **kwargs):
"""
See GPy.plotting.matplot_dep.plot
"""
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import kernel_plots
kernel_plots.plot(self,*args)
def _getstate(self):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
"""
return Parameterized._getstate(self) + [#self._parameters_,
self.input_dim,
self.input_slices,
self._param_slices_
]
def _setstate(self, state):
self._param_slices_ = state.pop()
self.input_slices = state.pop()
self.input_dim = state.pop()
Parameterized._setstate(self, state)

61
GPy/kern/_src/bias.py Normal file
View file

@ -0,0 +1,61 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
class Bias(Kern):
def __init__(self,input_dim,variance=1.,name=None):
super(Bias, self).__init__(input_dim, name)
self.variance = Param("variance", variance, Logexp())
self.add_parameter(self.variance)
def K(self, X, X2=None):
shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0])
ret = np.empty(shape, dtype=np.float64)
ret[:] = self.variance
return ret
def Kdiag(self,X):
ret = np.empty((X.shape[0],), dtype=np.float64)
ret[:] = self.variance
return ret
def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = dL_dK.sum()
def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = dL_dK.sum()
def gradients_X(self, dL_dK,X, X2, target):
return np.zeros(X.shape)
def gradients_X_diag(self,dL_dKdiag,X,target):
return np.zeros(X.shape)
#---------------------------------------#
# PSI statistics #
#---------------------------------------#
def psi0(self, Z, mu, S):
return self.Kdiag(mu)
def psi1(self, Z, mu, S, target):
return self.K(mu, S)
def psi2(self, Z, mu, S, target):
ret = np.empty((mu.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64)
ret[:] = self.variance**2
return ret
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
self.variance.gradient = dL_dKmm.sum() + dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum()
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
return np.zeros(Z.shape)
def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
return np.zeros(mu.shape), np.zeros(S.shape)

50
GPy/kern/_src/brownian.py Normal file
View file

@ -0,0 +1,50 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
import numpy as np
class Brownian(Kern):
"""
Brownian motion in 1D only.
Negative times are treated as a separate (backwards!) Brownian motion.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance:
:type variance: float
"""
def __init__(self, input_dim=1, variance=1., name='Brownian'):
assert input_dim==1, "Brownian motion in 1D only"
super(Brownian, self).__init__(input_dim, name)
self.variance = Param('variance', variance, Logexp())
self.add_parameters(self.variance)
def K(self,X,X2=None):
if X2 is None:
X2 = X
return self.variance*np.where(np.sign(X)==np.sign(X2.T),np.fmin(np.abs(X),np.abs(X2.T)), 0.)
def Kdiag(self,X):
return self.variance*np.abs(X.flatten())
def update_gradients_full(self, dL_dK, X, X2=None):
if X2 is None:
X2 = X
self.variance.gradient = np.sum(dL_dK * np.where(np.sign(X)==np.sign(X2.T),np.fmin(np.abs(X),np.abs(X2.T)), 0.))
#def update_gradients_diag(self, dL_dKdiag, X):
#self.variance.gradient = np.dot(np.abs(X.flatten()), dL_dKdiag)
#def gradients_X(self, dL_dK, X, X2=None):
#if X2 is None:
#return np.sum(self.variance*dL_dK*np.abs(X),1)[:,None]
#else:
#return np.sum(np.where(np.logical_and(np.abs(X)<np.abs(X2.T), np.sign(X)==np.sign(X2)), self.variance*dL_dK,0.),1)[:,None]

View file

@ -1,12 +1,13 @@
# Copyright (c) 2012, James Hensman and Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
from kern import Kern
import numpy as np
from scipy import weave
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
class Coregionalize(Kernpart):
class Coregionalize(Kern):
"""
Covariance function for intrinsic/linear coregionalization models
@ -20,7 +21,7 @@ class Coregionalize(Kernpart):
k_2(x, y)=\mathbf{B} k(x, y)
it is obtained as the tensor product between a covariance function
k(x,y) and B.
k(x, y) and B.
:param output_dim: number of outputs to coregionalize
:type output_dim: int
@ -29,7 +30,7 @@ class Coregionalize(Kernpart):
:param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalization matrix B
:type W: numpy array of dimensionality (num_outpus, W_columns)
:param kappa: a vector which allows the outputs to behave independently
:type kappa: numpy array of dimensionality (output_dim,)
:type kappa: numpy array of dimensionality (output_dim, )
.. note: see coregionalization examples in GPy.examples.regression for some usage.
"""
@ -37,18 +38,18 @@ class Coregionalize(Kernpart):
super(Coregionalize, self).__init__(input_dim=1, name=name)
self.output_dim = output_dim
self.rank = rank
if self.rank>output_dim-1:
if self.rank>output_dim:
print("Warning: Unusual choice of rank, it should normally be less than the output_dim.")
if W is None:
W = 0.5*np.random.randn(self.output_dim,self.rank)/np.sqrt(self.rank)
W = 0.5*np.random.randn(self.output_dim, self.rank)/np.sqrt(self.rank)
else:
assert W.shape==(self.output_dim,self.rank)
self.W = Param('W',W)
assert W.shape==(self.output_dim, self.rank)
self.W = Param('W', W)
if kappa is None:
kappa = 0.5*np.ones(self.output_dim)
else:
assert kappa.shape==(self.output_dim,)
self.kappa = Param('kappa', kappa)
assert kappa.shape==(self.output_dim, )
self.kappa = Param('kappa', kappa, Logexp())
self.add_parameters(self.W, self.kappa)
self.parameters_changed()
@ -56,54 +57,58 @@ class Coregionalize(Kernpart):
def parameters_changed(self):
self.B = np.dot(self.W, self.W.T) + np.diag(self.kappa)
def K(self,index,index2,target):
index = np.asarray(index,dtype=np.int)
def K(self, X, X2=None):
index = np.asarray(X, dtype=np.int)
#here's the old code (numpy)
#if index2 is None:
#index2 = index
#else:
#index2 = np.asarray(index2,dtype=np.int)
#index2 = np.asarray(index2, dtype=np.int)
#false_target = target.copy()
#ii,jj = np.meshgrid(index,index2)
#ii,jj = ii.T, jj.T
#false_target += self.B[ii,jj]
#ii, jj = np.meshgrid(index, index2)
#ii, jj = ii.T, jj.T
#false_target += self.B[ii, jj]
if index2 is None:
if X2 is None:
target = np.empty((X.shape[0], X.shape[0]), dtype=np.float64)
code="""
for(int i=0;i<N; i++){
target[i+i*N] += B[index[i]+output_dim*index[i]];
target[i+i*N] = B[index[i]+output_dim*index[i]];
for(int j=0; j<i; j++){
target[j+i*N] += B[index[i]+output_dim*index[j]];
target[i+j*N] += target[j+i*N];
target[j+i*N] = B[index[i]+output_dim*index[j]];
target[i+j*N] = target[j+i*N];
}
}
"""
N,B,output_dim = index.size, self.B, self.output_dim
weave.inline(code,['target','index','N','B','output_dim'])
N, B, output_dim = index.size, self.B, self.output_dim
weave.inline(code, ['target', 'index', 'N', 'B', 'output_dim'])
else:
index2 = np.asarray(index2,dtype=np.int)
index2 = np.asarray(X2, dtype=np.int)
target = np.empty((X.shape[0], X2.shape[0]), dtype=np.float64)
code="""
for(int i=0;i<num_inducing; i++){
for(int j=0; j<N; j++){
target[i+j*num_inducing] += B[output_dim*index[j]+index2[i]];
target[i+j*num_inducing] = B[output_dim*index[j]+index2[i]];
}
}
"""
N,num_inducing,B,output_dim = index.size,index2.size, self.B, self.output_dim
weave.inline(code,['target','index','index2','N','num_inducing','B','output_dim'])
N, num_inducing, B, output_dim = index.size, index2.size, self.B, self.output_dim
weave.inline(code, ['target', 'index', 'index2', 'N', 'num_inducing', 'B', 'output_dim'])
return target
def Kdiag(self,index,target):
target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
def Kdiag(self, X):
return np.diag(self.B)[np.asarray(X, dtype=np.int).flatten()]
def update_gradients_full(self,dL_dK, index, index2=None):
index = np.asarray(index,dtype=np.int)
def update_gradients_full(self, dL_dK, X, X2=None):
index = np.asarray(X, dtype=np.int)
dL_dK_small = np.zeros_like(self.B)
if index2 is None:
if X2 is None:
index2 = index
else:
index2 = np.asarray(index2,dtype=np.int)
index2 = np.asarray(X2, dtype=np.int)
code="""
for(int i=0; i<num_inducing; i++){
@ -113,26 +118,24 @@ class Coregionalize(Kernpart):
}
"""
N, num_inducing, output_dim = index.size, index2.size, self.output_dim
weave.inline(code, ['N','num_inducing','output_dim','dL_dK','dL_dK_small','index','index2'])
weave.inline(code, ['N', 'num_inducing', 'output_dim', 'dL_dK', 'dL_dK_small', 'index', 'index2'])
dkappa = np.diag(dL_dK_small)
dL_dK_small += dL_dK_small.T
dW = (self.W[:,None,:]*dL_dK_small[:,:,None]).sum(0)
dW = (self.W[:, None, :]*dL_dK_small[:, :, None]).sum(0)
self.W.gradient = dW
self.kappa.gradient = dkappa
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
raise NotImplementedError, "some code below"
#def dKdiag_dtheta(self,dL_dKdiag,index,target):
#index = np.asarray(index,dtype=np.int).flatten()
#dL_dKdiag_small = np.zeros(self.output_dim)
#for i in range(self.output_dim):
#dL_dKdiag_small[i] += np.sum(dL_dKdiag[index==i])
#dW = 2.*self.W*dL_dKdiag_small[:,None]
#dkappa = dL_dKdiag_small
#target += np.hstack([dW.flatten(),dkappa])
def update_gradients_diag(self, dL_dKdiag, X):
index = np.asarray(X, dtype=np.int).flatten()
dL_dKdiag_small = np.array([dL_dKdiag[index==i] for i in xrange(output_dim)])
self.W.gradient = 2.*self.W*dL_dKdiag_small[:, None]
self.kappa.gradient = dL_dKdiag_small
def gradients_X(self, dL_dK, X, X2=None):
return np.zeros(X.shape)
def gradients_X_diag(self, dL_dKdiag, X):
return np.zeros(X.shape)
def gradients_X(self,dL_dK,X,X2,target):
#NOTE In this case, pass is equivalent to returning zero.
pass

327
GPy/kern/_src/kern.py Normal file
View file

@ -0,0 +1,327 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import sys
import numpy as np
import itertools
from ...core.parameterization import Parameterized
from ...core.parameterization.param import Param
class Kern(Parameterized):
def __init__(self, input_dim, name, *a, **kw):
"""
The base class for a kernel: a positive definite function
which forms of a covariance function (kernel).
:param input_dim: the number of input dimensions to the function
:type input_dim: int
Do not instantiate.
"""
super(Kern, self).__init__(name=name, *a, **kw)
self.input_dim = input_dim
def K(self, X, X2):
raise NotImplementedError
def Kdiag(self, Xa):
raise NotImplementedError
def psi0(self,Z,posterior_variational):
raise NotImplementedError
def psi1(self,Z,posterior_variational):
raise NotImplementedError
def psi2(self,Z,posterior_variational):
raise NotImplementedError
def gradients_X(self, dL_dK, X, X2):
raise NotImplementedError
def gradients_X_diag(self, dL_dK, X):
raise NotImplementedError
def update_gradients_full(self, dL_dK, X):
"""Set the gradients of all parameters when doing full (N) inference."""
raise NotImplementedError
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
target = np.zeros(self.size)
self.update_gradients_diag(dL_dKdiag, X)
self._collect_gradient(target)
self.update_gradients_full(dL_dKnm, X, Z)
self._collect_gradient(target)
self.update_gradients_full(dL_dKmm, Z, None)
self._collect_gradient(target)
self._set_gradient(target)
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational):
"""Set the gradients of all parameters when doing variational (M) inference with uncertain inputs."""
raise NotImplementedError
def gradients_Z_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
grad = self.gradients_X(dL_dKmm, Z)
grad += self.gradients_X(dL_dKnm.T, Z, X)
return grad
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational):
raise NotImplementedError
def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational):
raise NotImplementedError
def plot_ARD(self, *args):
"""If an ARD kernel is present, plot a bar representation using matplotlib
See GPy.plotting.matplot_dep.plot_ARD
"""
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ...plotting.matplot_dep import kernel_plots
return kernel_plots.plot_ARD(self,*args)
def __add__(self, other):
""" Overloading of the '+' operator. for more control, see self.add """
return self.add(other)
def add(self, other, tensor=False):
"""
Add another kernel to this one.
If Tensor is False, both kernels are defined on the same _space_. then
the created kernel will have the same number of inputs as self and
other (which must be the same).
If Tensor is True, then the dimensions are stacked 'horizontally', so
that the resulting kernel has self.input_dim + other.input_dim
:param other: the other kernel to be added
:type other: GPy.kern
"""
assert isinstance(other, Kern), "only kernels can be added to kernels..."
from add import Add
return Add([self, other], tensor)
def __call__(self, X, X2=None):
return self.K(X, X2)
def __mul__(self, other):
""" Here we overload the '*' operator. See self.prod for more information"""
return self.prod(other)
def __pow__(self, other, tensor=False):
"""
Shortcut for tensor `prod`.
"""
return self.prod(other, tensor=True)
def prod(self, other, tensor=False):
"""
Multiply two kernels (either on the same space, or on the tensor product of the input space).
:param other: the other kernel to be added
:type other: GPy.kern
:param tensor: whether or not to use the tensor space (default is false).
:type tensor: bool
"""
assert isinstance(other, Kern), "only kernels can be added to kernels..."
from prod import Prod
return Prod(self, other, tensor)
from GPy.core.model import Model
class Kern_check_model(Model):
"""This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel."""
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Model.__init__(self, 'kernel_test_model')
num_samples = 20
num_samples2 = 10
if kernel==None:
kernel = GPy.kern.rbf(1)
if X==None:
X = np.random.randn(num_samples, kernel.input_dim)
if dL_dK==None:
if X2==None:
dL_dK = np.ones((X.shape[0], X.shape[0]))
else:
dL_dK = np.ones((X.shape[0], X2.shape[0]))
self.kernel=kernel
self.add_parameter(kernel)
self.X = X
self.X2 = X2
self.dL_dK = dL_dK
def is_positive_definite(self):
v = np.linalg.eig(self.kernel.K(self.X))[0]
if any(v<-10*sys.float_info.epsilon):
return False
else:
return True
def log_likelihood(self):
return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum()
def _log_likelihood_gradients(self):
raise NotImplementedError, "This needs to be implemented to use the kern_check_model class."
class Kern_check_dK_dtheta(Kern_check_model):
"""This class allows gradient checks for the gradient of a kernel with respect to parameters. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
def _log_likelihood_gradients(self):
return self.kernel._param_grad_helper(self.dL_dK, self.X, self.X2)
class Kern_check_dKdiag_dtheta(Kern_check_model):
"""This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters."""
def __init__(self, kernel=None, dL_dK=None, X=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
if dL_dK==None:
self.dL_dK = np.ones((self.X.shape[0]))
def parameters_changed(self):
self.kernel.update_gradients_full(self.dL_dK, self.X)
def log_likelihood(self):
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
def _log_likelihood_gradients(self):
return self.kernel.dKdiag_dtheta(self.dL_dK, self.X)
class Kern_check_dK_dX(Kern_check_model):
"""This class allows gradient checks for the gradient of a kernel with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
self.remove_parameter(kernel)
self.X = Param('X', self.X)
self.add_parameter(self.X)
def _log_likelihood_gradients(self):
return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).flatten()
class Kern_check_dKdiag_dX(Kern_check_dK_dX):
"""This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_dK_dX.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
if dL_dK==None:
self.dL_dK = np.ones((self.X.shape[0]))
def log_likelihood(self):
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
def _log_likelihood_gradients(self):
return self.kernel.dKdiag_dX(self.dL_dK, self.X).flatten()
def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
"""
This function runs on kernels to check the correctness of their
implementation. It checks that the covariance function is positive definite
for a randomly generated data set.
:param kern: the kernel to be tested.
:type kern: GPy.kern.Kernpart
:param X: X input values to test the covariance function.
:type X: ndarray
:param X2: X2 input values to test the covariance function.
:type X2: ndarray
"""
pass_checks = True
if X==None:
X = np.random.randn(10, kern.input_dim)
if output_ind is not None:
X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0])
if X2==None:
X2 = np.random.randn(20, kern.input_dim)
if output_ind is not None:
X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0])
if verbose:
print("Checking covariance function is positive definite.")
result = Kern_check_model(kern, X=X).is_positive_definite()
if result and verbose:
print("Check passed.")
if not result:
print("Positive definite check failed for " + kern.name + " covariance function.")
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X) wrt theta.")
result = Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X2) wrt theta.")
result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of Kdiag(X) wrt theta.")
result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X) wrt X.")
try:
result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("gradients_X not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X2) wrt X.")
try:
result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("gradients_X not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of Kdiag(X) wrt X.")
try:
result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("gradients_X not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True)
pass_checks = False
return False
return pass_checks

252
GPy/kern/_src/linear.py Normal file
View file

@ -0,0 +1,252 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import weave
from kern import Kern
from ...util.linalg import tdot
from ...util.misc import fast_array_equal, param_to_array
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
from ...util.caching import cache_this
class Linear(Kern):
"""
Linear kernel
.. math::
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i x_iy_i
:param input_dim: the number of input dimensions
:type input_dim: int
:param variances: the vector of variances :math:`\sigma^2_i`
:type variances: array or list of the appropriate size (or float if there is only one variance parameter)
:param ARD: Auto Relevance Determination. If equal to "False", the kernel has only one variance parameter \sigma^2, otherwise there is one variance parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
"""
def __init__(self, input_dim, variances=None, ARD=False, name='linear'):
super(Linear, self).__init__(input_dim, name)
self.ARD = ARD
if ARD == False:
if variances is not None:
variances = np.asarray(variances)
assert variances.size == 1, "Only one variance needed for non-ARD kernel"
else:
variances = np.ones(1)
self._Xcache, self._X2cache = np.empty(shape=(2,))
else:
if variances is not None:
variances = np.asarray(variances)
assert variances.size == self.input_dim, "bad number of variances, need one ARD variance per input_dim"
else:
variances = np.ones(self.input_dim)
self.variances = Param('variances', variances, Logexp())
self.add_parameter(self.variances)
self.variances.add_observer(self, self._on_changed)
def _on_changed(self, obj):
#TODO: move this to base class? isnt it jst for the caching?
self._notify_observers()
#@cache_this(limit=3, reset_on_self=True)
def K(self, X, X2=None):
if self.ARD:
if X2 is None:
return tdot(X*np.sqrt(self.variances))
else:
rv = np.sqrt(self.variances)
return np.dot(X*rv, (X2*rv).T)
else:
return self._dot_product(X, X2) * self.variances
#@cache_this(limit=3, reset_on_self=False)
def _dot_product(self, X, X2=None):
if X2 is None:
return tdot(X)
else:
return np.dot(X, X2.T)
def Kdiag(self, X):
return np.sum(self.variances * np.square(X), -1)
def update_gradients_full(self, dL_dK, X, X2=None):
if self.ARD:
if X2 is None:
self.variances.gradient = np.array([np.sum(dL_dK * tdot(X[:, i:i + 1])) for i in range(self.input_dim)])
else:
product = X[:, None, :] * X2[None, :, :]
self.variances.gradient = (dL_dK[:, :, None] * product).sum(0).sum(0)
else:
self.variances.gradient = np.sum(self._dot_product(X, X2) * dL_dK)
def update_gradients_diag(self, dL_dKdiag, X):
tmp = dL_dKdiag[:, None] * X ** 2
if self.ARD:
self.variances.gradient = tmp.sum(0)
else:
self.variances.gradient = np.atleast_1d(tmp.sum())
def gradients_X(self, dL_dK, X, X2=None):
if X2 is None:
return 2.*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
else:
return (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
def gradients_X_diag(self, dL_dKdiag, X):
return 2.*self.variances*dL_dKdiag[:,None]*X
#---------------------------------------#
# PSI statistics #
# variational #
#---------------------------------------#
def psi0(self, Z, mu, S):
return np.sum(self.variances * self._mu2S(mu, S), 1)
def psi1(self, Z, mu, S):
return self.K(mu, Z) #the variance, it does nothing
def psi2(self, Z, mu, S):
ZA = Z * self.variances
ZAinner = self._ZAinner(mu, S, Z)
return np.dot(ZAinner, ZA.T)
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
# psi0:
tmp = dL_dpsi0[:, None] * self._mu2S(mu, S)
if self.ARD: grad = tmp.sum(0)
else: grad = np.atleast_1d(tmp.sum())
#psi1
self.update_gradients_full(dL_dpsi1, mu, Z)
grad += self.variances.gradient
#psi2
tmp = dL_dpsi2[:, :, :, None] * (self._ZAinner(mu, S, Z)[:, :, None, :] * (2. * Z)[None, None, :, :])
if self.ARD: grad += tmp.sum(0).sum(0).sum(0)
else: grad += tmp.sum()
#from Kmm
self.update_gradients_full(dL_dKmm, Z, None)
self.variances.gradient += grad
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
# Kmm
grad = self.gradients_X(dL_dKmm, Z, None)
#psi1
grad += self.gradients_X(dL_dpsi1.T, Z, mu)
#psi2
self._weave_dpsi2_dZ(dL_dpsi2, Z, mu, S, grad)
return grad
def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
grad_mu, grad_S = np.zeros(mu.shape), np.zeros(mu.shape)
# psi0
grad_mu += dL_dpsi0[:, None] * (2.0 * mu * self.variances)
grad_S += dL_dpsi0[:, None] * self.variances
# psi1
grad_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1)
# psi2
self._weave_dpsi2_dmuS(dL_dpsi2, Z, mu, S, grad_mu, grad_S)
return grad_mu, grad_S
#--------------------------------------------------#
# Helpers for psi statistics #
#--------------------------------------------------#
def _weave_dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
# Think N,num_inducing,num_inducing,input_dim
ZA = Z * self.variances
AZZA = ZA.T[:, None, :, None] * ZA[None, :, None, :]
AZZA = AZZA + AZZA.swapaxes(1, 2)
AZZA_2 = AZZA/2.
#Using weave, we can exploit the symmetry of this problem:
code = """
int n, m, mm,q,qq;
double factor,tmp;
#pragma omp parallel for private(m,mm,q,qq,factor,tmp)
for(n=0;n<N;n++){
for(m=0;m<num_inducing;m++){
for(mm=0;mm<=m;mm++){
//add in a factor of 2 for the off-diagonal terms (and then count them only once)
if(m==mm)
factor = dL_dpsi2(n,m,mm);
else
factor = 2.0*dL_dpsi2(n,m,mm);
for(q=0;q<input_dim;q++){
//take the dot product of mu[n,:] and AZZA[:,m,mm,q] TODO: blas!
tmp = 0.0;
for(qq=0;qq<input_dim;qq++){
tmp += mu(n,qq)*AZZA(qq,m,mm,q);
}
target_mu(n,q) += factor*tmp;
target_S(n,q) += factor*AZZA_2(q,m,mm,q);
}
}
}
}
"""
support_code = """
#include <omp.h>
#include <math.h>
"""
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
'extra_link_args' : ['-lgomp']}
N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu)
weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
type_converters=weave.converters.blitz,**weave_options)
def _weave_dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
AZA = self.variances*self._ZAinner(mu, S, Z)
code="""
int n,m,mm,q;
#pragma omp parallel for private(n,mm,q)
for(m=0;m<num_inducing;m++){
for(q=0;q<input_dim;q++){
for(mm=0;mm<num_inducing;mm++){
for(n=0;n<N;n++){
target(m,q) += 2*dL_dpsi2(n,m,mm)*AZA(n,mm,q);
}
}
}
}
"""
support_code = """
#include <omp.h>
#include <math.h>
"""
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
'extra_link_args' : ['-lgomp']}
N,num_inducing,input_dim = mu.shape[0],Z.shape[0],mu.shape[1]
mu = param_to_array(mu)
weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'],
type_converters=weave.converters.blitz,**weave_options)
def _mu2S(self, mu, S):
return np.square(mu) + S
def _ZAinner(self, mu, S, Z):
ZA = Z*self.variances
inner = (mu[:, None, :] * mu[:, :, None])
diag_indices = np.diag_indices(mu.shape[1], 2)
inner[:, diag_indices[0], diag_indices[1]] += S
return np.dot(ZA, inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x N x input_dim]!

65
GPy/kern/_src/prod.py Normal file
View file

@ -0,0 +1,65 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
import numpy as np
class Prod(Kern):
"""
Computes the product of 2 kernels
:param k1, k2: the kernels to multiply
:type k1, k2: Kern
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
:type tensor: Boolean
:rtype: kernel object
"""
def __init__(self, k1, k2, tensor=False):
if tensor:
super(Prod, self).__init__(k1.input_dim + k2.input_dim, k1.name + '_xx_' + k2.name)
self.slice1 = slice(0,k1.input_dim)
self.slice2 = slice(k1.input_dim,k1.input_dim+k2.input_dim)
else:
assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to multiply don't have the same dimension."
super(Prod, self).__init__(k1.input_dim, k1.name + '_x_' + k2.name)
self.slice1 = slice(0, self.input_dim)
self.slice2 = slice(0, self.input_dim)
self.k1 = k1
self.k2 = k2
self.add_parameters(self.k1, self.k2)
def K(self, X, X2=None):
if X2 is None:
return self.k1.K(X[:,self.slice1], None) * self.k2.K(X[:,self.slice2], None)
else:
return self.k1.K(X[:,self.slice1], X2[:,self.slice1]) * self.k2.K(X[:,self.slice2], X2[:,self.slice2])
def Kdiag(self, X):
return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2])
def update_gradients_full(self, dL_dK, X):
self.k1.update_gradients_full(dL_dK*self.k2(X[:,self.slice2]), X[:,self.slice1])
self.k2.update_gradients_full(dL_dK*self.k1(X[:,self.slice1]), X[:,self.slice2])
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
self.k1.update_gradients_sparse(dL_dKmm * self.k2.K(Z[:,self.slice2]), dL_dKnm * self.k2(X[:,self.slice2], Z[:,self.slice2]), dL_dKdiag * self.k2.Kdiag(X[:,self.slice2]), X[:,self.slice1], Z[:,self.slice1] )
self.k2.update_gradients_sparse(dL_dKmm * self.k1.K(Z[:,self.slice1]), dL_dKnm * self.k1(X[:,self.slice1], Z[:,self.slice1]), dL_dKdiag * self.k1.Kdiag(X[:,self.slice1]), X[:,self.slice2], Z[:,self.slice2] )
def gradients_X(self, dL_dK, X, X2=None):
target = np.zeros(X.shape)
if X2 is None:
target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2(X[:,self.slice2]), X[:,self.slice1], None)
target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1(X[:,self.slice1]), X[:,self.slice2], None)
else:
target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2(X[:,self.slice2], X2[:,self.slice2]), X[:,self.slice1], X2[:,self.slice1])
target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1(X[:,self.slice1], X2[:,self.slice1]), X[:,self.slice2], X2[:,self.slice2])
return target
def gradients_X_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape)
target[:,self.slice1] = self.k1.gradients_X(dL_dKdiag*self.k2.Kdiag(X[:,self.slice2]), X[:,self.slice1])
target[:,self.slice2] += self.k2.gradients_X(dL_dKdiag*self.k1.Kdiag(X[:,self.slice1]), X[:,self.slice2])
return target

View file

@ -4,13 +4,13 @@
import numpy as np
from scipy import weave
from kernpart import Kernpart
from kern import Kern
from ...util.linalg import tdot
from ...util.misc import fast_array_equal, param_to_array
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
class RBF(Kernpart):
class RBF(Kern):
"""
Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel:
@ -52,30 +52,16 @@ class RBF(Kernpart):
lengthscale = np.ones(self.input_dim)
self.variance = Param('variance', variance, Logexp())
self.lengthscale = Param('lengthscale', lengthscale, Logexp())
self.lengthscale.add_observer(self, self.update_lengthscale)
self.update_lengthscale(self.lengthscale)
self.add_parameters(self.variance, self.lengthscale)
self.parameters_changed() # initializes cache
#self.update_inv_lengthscale(self.lengthscale)
#self.parameters_changed()
# initialize cache
#self._Z, self._mu, self._S = np.empty(shape=(3, 1))
#self._X, self._X2, self._params_save = np.empty(shape=(3, 1))
# a set of optional args to pass to weave
# self.weave_options = {'headers' : ['<omp.h>'],
# 'extra_compile_args': ['-fopenmp -O3'], # -march=native'],
# 'extra_link_args' : ['-lgomp']}
self.weave_options = {}
def on_input_change(self, X):
#self._K_computations(X, None)
pass
def update_lengthscale(self, l):
self.lengthscale2 = np.square(self.lengthscale)
@ -84,23 +70,32 @@ class RBF(Kernpart):
self._X, self._X2 = np.empty(shape=(2, 1))
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
def K(self, X, X2, target):
def K(self, X, X2=None):
self._K_computations(X, X2)
target += self.variance * self._K_dvar
return self.variance * self._K_dvar
def Kdiag(self, X, target):
np.add(target, self.variance, target)
def Kdiag(self, X):
ret = np.ones(X.shape[0])
ret[:] = self.variance
return ret
def psi0(self, Z, mu, S, target):
target += self.variance
def psi0(self, Z, posterior_variational):
mu = posterior_variational.mean
ret = np.empty(mu.shape[0], dtype=np.float64)
ret[:] = self.variance
return ret
def psi1(self, Z, mu, S, target):
def psi1(self, Z, posterior_variational):
mu = posterior_variational.mean
S = posterior_variational.variance
self._psi_computations(Z, mu, S)
target += self._psi1
return self._psi1
def psi2(self, Z, mu, S, target):
def psi2(self, Z, posterior_variational):
mu = posterior_variational.mean
S = posterior_variational.variance
self._psi_computations(Z, mu, S)
target += self._psi2
return self._psi2
def update_gradients_full(self, dL_dK, X):
self._K_computations(X, None)
@ -131,7 +126,9 @@ class RBF(Kernpart):
else:
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational):
mu = posterior_variational.mean
S = posterior_variational.variance
self._psi_computations(Z, mu, S)
#contributions from psi0:
@ -165,7 +162,42 @@ class RBF(Kernpart):
else:
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
def gradients_X(self, dL_dK, X, X2, target):
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational):
mu = posterior_variational.mean
S = posterior_variational.variance
self._psi_computations(Z, mu, S)
#psi1
denominator = (self.lengthscale2 * (self._psi1_denom))
dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator))
grad = np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
#psi2
term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim
term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim
dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
grad += 2*(dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
grad += self.gradients_X(dL_dKmm, Z, None)
return grad
def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational):
mu = posterior_variational.mean
S = posterior_variational.variance
self._psi_computations(Z, mu, S)
#psi1
tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom
grad_mu = np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
grad_S = np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
#psi2
tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom
grad_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
grad_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
return grad_mu, grad_S
def gradients_X(self, dL_dK, X, X2=None):
#if self._X is None or X.base is not self._X.base or X2 is not None:
self._K_computations(X, X2)
if X2 is None:
@ -173,44 +205,15 @@ class RBF(Kernpart):
else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
gradients_X = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
return np.sum(gradients_X * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target):
pass
def dKdiag_dX(self, dL_dKdiag, X):
return np.zeros(X.shape[0])
#---------------------------------------#
# PSI statistics #
#---------------------------------------#
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
pass
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
self._psi_computations(Z, mu, S)
denominator = (self.lengthscale2 * (self._psi1_denom))
dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator))
target += np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
self._psi_computations(Z, mu, S)
tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom
target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
self._psi_computations(Z, mu, S)
term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim
term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim
dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
target += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
"""Think N,num_inducing,num_inducing,input_dim """
self._psi_computations(Z, mu, S)
tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom
target_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
target_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
#---------------------------------------#
# Precomputations #
#---------------------------------------#
@ -373,6 +376,7 @@ class RBF(Kernpart):
#include <omp.h>
#include <math.h>
"""
mu = param_to_array(mu)
weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'],
type_converters=weave.converters.blitz, **self.weave_options)

211
GPy/kern/_src/stationary.py Normal file
View file

@ -0,0 +1,211 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
from ... import util
import numpy as np
from scipy import integrate
class Stationary(Kern):
def __init__(self, input_dim, variance, lengthscale, ARD, name):
super(Stationary, self).__init__(input_dim, name)
self.ARD = ARD
if not ARD:
if lengthscale is None:
lengthscale = np.ones(1)
else:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == 1 "Only lengthscale needed for non-ARD kernel"
else:
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size in [1, input_dim], "Bad lengthscales"
if lengthscale.size != input_dim:
lengthscale = np.ones(input_dim)*lengthscale
else:
lengthscale = np.ones(self.input_dim)
self.lengthscale = Param('lengthscale', lengthscale, Logexp())
self.variance = Param('variance', variance, Logexp())
assert self.variance.size==1
self.add_parameters(self.variance, self.lengthscale)
def _dist(self, X, X2):
if X2 is None:
X2 = X
return X[:, None, :] - X2[None, :, :]
def _scaled_dist(self, X, X2=None):
return np.sqrt(np.sum(np.square(self._dist(X, X2) / self.lengthscale), -1))
def Kdiag(self, X):
ret = np.empty(X.shape[0])
ret[:] = self.variance
return ret
def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = np.sum(dL_dKdiag)
self.lengthscale.gradient = 0.
def update_gradients_full(self, dL_dK, X, X2=None):
K = self.K(X, X2)
self.variance.gradient = np.sum(K * dL_dK)/self.variance
rinv = self._inv_dist(X, X2)
dL_dr = self.dK_dr(X, X2) * dL_dK
x_xl3 = np.square(self._dist(X, X2)) / self.lengthscale**3
if self.ARD:
self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)
else:
self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum()
def _inv_dist(self, X, X2=None):
dist = self._scaled_dist(X, X2)
if X2 is None:
nondiag = util.diag.offdiag_view(dist)
nondiag[:] = 1./nondiag
return dist
else:
return 1./np.where(dist != 0., dist, np.inf)
def gradients_X(self, dL_dK, X, X2=None):
dL_dr = self.dK_dr(X, X2) * dL_dK
invdist = self._inv_dist(X, X2)
ret = np.sum((invdist*dL_dr)[:,:,None]*self._dist(X, X2),1)/self.lengthscale**2
if X2 is None:
ret *= 2.
return ret
def gradients_X_diag(self, dL_dKdiag, X):
return np.zeros(X.shape)
class Exponential(Stationary):
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Exponential'):
super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, name)
def K(self, X, X2=None):
dist = self._scaled_dist(X, X2)
return self.variance * np.exp(-0.5 * dist)
def dK_dr(self, X, X2):
return -0.5*self.K(X, X2)
class Matern32(Stationary):
"""
Matern 3/2 kernel:
.. math::
k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Mat32'):
super(Matern32, self).__init__(input_dim, variance, lengthscale, ARD, name)
def K(self, X, X2=None):
dist = self._scaled_dist(X, X2)
return self.variance * (1. + np.sqrt(3.) * dist) * np.exp(-np.sqrt(3.) * dist)
def dK_dr(self, X, X2):
dist = self._scaled_dist(X, X2)
return -3.*self.variance*dist*np.exp(-np.sqrt(3.)*dist)
def Gram_matrix(self, F, F1, F2, lower, upper):
"""
Return the Gram matrix of the vector of functions F with respect to the
RKHS norm. The use of this function is limited to input_dim=1.
:param F: vector of functions
:type F: np.array
:param F1: vector of derivatives of F
:type F1: np.array
:param F2: vector of second derivatives of F
:type F2: np.array
:param lower,upper: boundaries of the input domain
:type lower,upper: floats
"""
assert self.input_dim == 1
def L(x, i):
return(3. / self.lengthscale ** 2 * F[i](x) + 2 * np.sqrt(3) / self.lengthscale * F1[i](x) + F2[i](x))
n = F.shape[0]
G = np.zeros((n, n))
for i in range(n):
for j in range(i, n):
G[i, j] = G[j, i] = integrate.quad(lambda x : L(x, i) * L(x, j), lower, upper)[0]
Flower = np.array([f(lower) for f in F])[:, None]
F1lower = np.array([f(lower) for f in F1])[:, None]
return(self.lengthscale ** 3 / (12.*np.sqrt(3) * self.variance) * G + 1. / self.variance * np.dot(Flower, Flower.T) + self.lengthscale ** 2 / (3.*self.variance) * np.dot(F1lower, F1lower.T))
class Matern52(Stationary):
"""
Matern 5/2 kernel:
.. math::
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def K(self, X, X2=None):
r = self._scaled_dist(X, X2)
return self.variance*(1+np.sqrt(5.)*r+5./3*r**2)*np.exp(-np.sqrt(5.)*r)
def dK_dr(self, X, X2):
r = self._scaled_dist(X, X2)
return self.variance*(10./3*r -5.*r -5.*np.sqrt(5.)/3*r**2)*np.exp(-np.sqrt(5.)*r)
def Gram_matrix(self,F,F1,F2,F3,lower,upper):
"""
Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1.
:param F: vector of functions
:type F: np.array
:param F1: vector of derivatives of F
:type F1: np.array
:param F2: vector of second derivatives of F
:type F2: np.array
:param F3: vector of third derivatives of F
:type F3: np.array
:param lower,upper: boundaries of the input domain
:type lower,upper: floats
"""
assert self.input_dim == 1
def L(x,i):
return(5*np.sqrt(5)/self.lengthscale**3*F[i](x) + 15./self.lengthscale**2*F1[i](x)+ 3*np.sqrt(5)/self.lengthscale*F2[i](x) + F3[i](x))
n = F.shape[0]
G = np.zeros((n,n))
for i in range(n):
for j in range(i,n):
G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0]
G_coef = 3.*self.lengthscale**5/(400*np.sqrt(5))
Flower = np.array([f(lower) for f in F])[:,None]
F1lower = np.array([f(lower) for f in F1])[:,None]
F2lower = np.array([f(lower) for f in F2])[:,None]
orig = 9./8*np.dot(Flower,Flower.T) + 9.*self.lengthscale**4/200*np.dot(F2lower,F2lower.T)
orig2 = 3./5*self.lengthscale**2 * ( np.dot(F1lower,F1lower.T) + 1./8*np.dot(Flower,F2lower.T) + 1./8*np.dot(F2lower,Flower.T))
return(1./self.variance* (G_coef*G + orig + orig2))
class ExpQuad(Stationary):
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='ExpQuad'):
super(ExpQuad, self).__init__(input_dim, variance, lengthscale, ARD, name)
def K(self, X, X2=None):
r = self._scaled_dist(X, X2)
return self.variance * np.exp(-0.5 * r**2)
def dK_dr(self, X, X2):
dist = self._scaled_dist(X, X2)
return -dist*self.K(X, X2)

View file

@ -1,12 +1,12 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
from kern import Kern
import numpy as np
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
class White(Kernpart):
class White(Kern):
"""
White noise kernel.
@ -20,14 +20,17 @@ class White(Kernpart):
self.input_dim = input_dim
self.variance = Param('variance', variance, Logexp())
self.add_parameters(self.variance)
self._psi1 = 0 # TODO: more elegance here
def K(self,X,X2,target):
def K(self, X, X2=None):
if X2 is None:
target += np.eye(X.shape[0])*self.variance
return np.eye(X.shape[0])*self.variance
else:
return np.zeros((X.shape[0], X2.shape[0]))
def Kdiag(self,X,target):
target += self.variance
def Kdiag(self,X):
ret = np.ones(X.shape[0])
ret[:] = self.variance
return ret
def update_gradients_full(self, dL_dK, X):
self.variance.gradient = np.trace(dL_dK)
@ -38,14 +41,8 @@ class White(Kernpart):
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
raise NotImplementedError
def dKdiag_dtheta(self,dL_dKdiag,X,target):
target += np.sum(dL_dKdiag)
def gradients_X(self,dL_dK,X,X2,target):
pass
def dKdiag_dX(self,dL_dKdiag,X,target):
pass
def gradients_X(self,dL_dK,X,X2):
return np.zeros_like(X)
def psi0(self,Z,mu,S,target):
pass # target += self.variance

View file

@ -1,680 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import sys
import numpy as np
import itertools
from parts.prod import Prod as prod
from parts.linear import Linear
from parts.kernpart import Kernpart
from ..core.parameterization import Parameterized
from GPy.core.parameterization.param import Param
class kern(Parameterized):
def __init__(self, input_dim, parts=[], input_slices=None):
"""
This is the main kernel class for GPy. It handles multiple
(additive) kernel functions, and keeps track of various things
like which parameters live where.
The technical code for kernels is divided into _parts_ (see
e.g. rbf.py). This object contains a list of parts, which are
computed additively. For multiplication, special _prod_ parts
are used.
:param input_dim: The dimensionality of the kernel's input space
:type input_dim: int
:param parts: the 'parts' (PD functions) of the kernel
:type parts: list of Kernpart objects
:param input_slices: the slices on the inputs which apply to each kernel
:type input_slices: list of slice objects, or list of bools
"""
super(kern, self).__init__('kern')
self.add_parameters(*parts)
self.input_dim = input_dim
if input_slices is None:
self.input_slices = [slice(None) for p in self._parameters_]
else:
assert len(input_slices) == len(self._parameters_)
self.input_slices = [sl if type(sl) is slice else slice(None) for sl in input_slices]
for p in self._parameters_:
assert isinstance(p, Kernpart), "bad kernel part"
def parameters_changed(self):
[p.parameters_changed() for p in self._parameters_]
def connect_input(self, Xparam):
[p.connect_input(Xparam) for p in self._parameters_]
def _getstate(self):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
"""
return Parameterized._getstate(self) + [#self._parameters_,
#self.num_params,
self.input_dim,
self.input_slices,
self._param_slices_
]
def _setstate(self, state):
self._param_slices_ = state.pop()
self.input_slices = state.pop()
self.input_dim = state.pop()
#self.num_params = state.pop()
#self._parameters_ = state.pop()
Parameterized._setstate(self, state)
def plot_ARD(self, *args):
"""If an ARD kernel is present, plot a bar representation using matplotlib
See GPy.plotting.matplot_dep.plot_ARD
"""
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import kernel_plots
return kernel_plots.plot_ARD(self,*args)
# def _transform_gradients(self, g):
# """
# Apply the transformations of the kernel so that the returned vector
# represents the gradient in the transformed space (i.e. that given by
# get_params_transformed())
#
# :param g: the gradient vector for the current model, usually created by _param_grad_helper
# """
# x = self._get_params()
# [np.place(g, index, g[index] * constraint.gradfactor(x[index]))
# for constraint, index in self.constraints.iteritems() if constraint is not __fixed__]
# # for constraint, index in self.constraints.iteritems():
# # if constraint != __fixed__:
# # g[index] = g[index] * constraint.gradfactor(x[index])
# #[np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
# [np.put(g, i, v) for i, v in [[i, t.sum()] for p in self._parameters_ for t,i in p._tied_to_me_.iteritems()]]
# # if len(self.tied_indices) or len(self.fixed_indices):
# # to_remove = np.hstack((self.fixed_indices + [t[1:] for t in self.tied_indices]))
# # return np.delete(g, to_remove)
# # else:
# if self._fixes_ is not None: return g[self._fixes_]
# return g
# x = self._get_params()
# [np.put(x, i, x * t.gradfactor(x[i])) for i, t in zip(self.constrained_indices, self.constraints)]
# [np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
# if len(self.tied_indices) or len(self.fixed_indices):
# to_remove = np.hstack((self.fixed_indices + [t[1:] for t in self.tied_indices]))
# return np.delete(g, to_remove)
# else:
# return g
def __add__(self, other):
""" Overloading of the '+' operator. for more control, see self.add """
return self.add(other)
def add(self, other, tensor=False):
"""
Add another kernel to this one.
If Tensor is False, both kernels are defined on the same _space_. then
the created kernel will have the same number of inputs as self and
other (which must be the same).
If Tensor is True, then the dimensions are stacked 'horizontally', so
that the resulting kernel has self.input_dim + other.input_dim
:param other: the other kernel to be added
:type other: GPy.kern
"""
if tensor:
D = self.input_dim + other.input_dim
self_input_slices = [slice(*sl.indices(self.input_dim)) for sl in self.input_slices]
other_input_indices = [sl.indices(other.input_dim) for sl in other.input_slices]
other_input_slices = [slice(i[0] + self.input_dim, i[1] + self.input_dim, i[2]) for i in other_input_indices]
newkern = kern(D, self._parameters_ + other._parameters_, self_input_slices + other_input_slices)
# transfer constraints:
# newkern.constrained_indices = self.constrained_indices + [x + self.num_params for x in other.constrained_indices]
# newkern.constraints = self.constraints + other.constraints
# newkern.fixed_indices = self.fixed_indices + [self.num_params + x for x in other.fixed_indices]
# newkern.fixed_values = self.fixed_values + other.fixed_values
# newkern.constraints = self.constraints + other.constraints
# newkern.tied_indices = self.tied_indices + [self.num_params + x for x in other.tied_indices]
else:
assert self.input_dim == other.input_dim
newkern = kern(self.input_dim, self._parameters_ + other._parameters_, self.input_slices + other.input_slices)
# transfer constraints:
# newkern.constrained_indices = self.constrained_indices + [i + self.num_params for i in other.constrained_indices]
# newkern.constraints = self.constraints + other.constraints
# newkern.fixed_indices = self.fixed_indices + [self.num_params + x for x in other.fixed_indices]
# newkern.fixed_values = self.fixed_values + other.fixed_values
# newkern.tied_indices = self.tied_indices + [self.num_params + x for x in other.tied_indices]
[newkern.constraints.add(transform, ind) for transform, ind in self.constraints.iteritems()]
[newkern.constraints.add(transform, ind+self.size) for transform, ind in other.constraints.iteritems()]
newkern._fixes_ = ((self._fixes_ or 0) + (other._fixes_ or 0)) or None
return newkern
def __call__(self, X, X2=None):
return self.K(X, X2)
def __mul__(self, other):
""" Here we overload the '*' operator. See self.prod for more information"""
return self.prod(other)
def __pow__(self, other, tensor=False):
"""
Shortcut for tensor `prod`.
"""
return self.prod(other, tensor=True)
def prod(self, other, tensor=False):
"""
Multiply two kernels (either on the same space, or on the tensor product of the input space).
:param other: the other kernel to be added
:type other: GPy.kern
:param tensor: whether or not to use the tensor space (default is false).
:type tensor: bool
"""
K1 = self
K2 = other
#K1 = self.copy()
#K2 = other.copy()
slices = []
for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices):
s1, s2 = [False] * K1.input_dim, [False] * K2.input_dim
s1[sl1], s2[sl2] = [True], [True]
slices += [s1 + s2]
newkernparts = [prod(k1, k2, tensor) for k1, k2 in itertools.product(K1._parameters_, K2._parameters_)]
if tensor:
newkern = kern(K1.input_dim + K2.input_dim, newkernparts, slices)
else:
newkern = kern(K1.input_dim, newkernparts, slices)
#newkern._follow_constrains(K1, K2)
return newkern
# def _follow_constrains(self, K1, K2):
#
# # Build the array that allows to go from the initial indices of the param to the new ones
# K1_param = []
# n = 0
# for k1 in K1.parts:
# K1_param += [range(n, n + k1.num_params)]
# n += k1.num_params
# n = 0
# K2_param = []
# for k2 in K2.parts:
# K2_param += [range(K1.num_params + n, K1.num_params + n + k2.num_params)]
# n += k2.num_params
# index_param = []
# for p1 in K1_param:
# for p2 in K2_param:
# index_param += p1 + p2
# index_param = np.array(index_param)
#
# # Get the ties and constrains of the kernels before the multiplication
# prev_ties = K1.tied_indices + [arr + K1.num_params for arr in K2.tied_indices]
#
# prev_constr_ind = [K1.constrained_indices] + [K1.num_params + i for i in K2.constrained_indices]
# prev_constr = K1.constraints + K2.constraints
#
# # prev_constr_fix = K1.fixed_indices + [arr + K1.num_params for arr in K2.fixed_indices]
# # prev_constr_fix_values = K1.fixed_values + K2.fixed_values
#
# # follow the previous ties
# for arr in prev_ties:
# for j in arr:
# index_param[np.where(index_param == j)[0]] = arr[0]
#
# # ties and constrains
# for i in range(K1.num_params + K2.num_params):
# index = np.where(index_param == i)[0]
# if index.size > 1:
# self.tie_params(index)
# for i, t in zip(prev_constr_ind, prev_constr):
# self.constrain(np.where(index_param == i)[0], t)
#
# def _get_params(self):
# return np.hstack(self._parameters_)
# return np.hstack([p._get_params() for p in self._parameters_])
# def _set_params(self, x):
# import ipdb;ipdb.set_trace()
# [p._set_params(x[s]) for p, s in zip(self._parameters_, self._param_slices_)]
# def _get_param_names(self):
# # this is a bit nasty: we want to distinguish between parts with the same name by appending a count
# part_names = np.array([k.name for k in self._parameters_], dtype=np.str)
# counts = [np.sum(part_names == ni) for i, ni in enumerate(part_names)]
# cum_counts = [np.sum(part_names[i:] == ni) for i, ni in enumerate(part_names)]
# names = [name + '_' + str(cum_count) if count > 1 else name for name, count, cum_count in zip(part_names, counts, cum_counts)]
#
# return sum([[name + '_' + n for n in k._get_param_names()] for name, k in zip(names, self._parameters_)], [])
def K(self, X, X2=None, which_parts='all'):
"""
Compute the kernel function.
:param X: the first set of inputs to the kernel
:param X2: (optional) the second set of arguments to the kernel. If X2
is None, this is passed throgh to the 'part' object, which
handles this as X2 == X.
:param which_parts: a list of booleans detailing whether to include
each of the part functions. By default, 'all'
indicates all parts
"""
if which_parts == 'all':
which_parts = [True] * self.size
assert X.shape[1] == self.input_dim
if X2 is None:
target = np.zeros((X.shape[0], X.shape[0]))
[p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used]
else:
target = np.zeros((X.shape[0], X2.shape[0]))
[p.K(X[:, i_s], X2[:, i_s], target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used]
return target
def update_gradients_full(self, dL_dK, X):
[p.update_gradients_full(dL_dK, X) for p in self._parameters_]
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
[p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X, Z) for p in self._parameters_]
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
[p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_]
def _param_grad_helper(self, dL_dK, X, X2=None):
"""
Compute the gradient of the covariance function with respect to the parameters.
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
:type dL_dK: Np.ndarray (num_samples x num_inducing)
:param X: Observed data inputs
:type X: np.ndarray (num_samples x input_dim)
:param X2: Observed data inputs (optional, defaults to X)
:type X2: np.ndarray (num_inducing x input_dim)
returns: dL_dtheta
"""
assert X.shape[1] == self.input_dim
target = np.zeros(self.size)
if X2 is None:
[p._param_grad_helper(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)]
else:
[p._param_grad_helper(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)]
return self._transform_gradients(target)
def gradients_X(self, dL_dK, X, X2=None):
"""Compute the gradient of the objective function with respect to X.
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
:type dL_dK: np.ndarray (num_samples x num_inducing)
:param X: Observed data inputs
:type X: np.ndarray (num_samples x input_dim)
:param X2: Observed data inputs (optional, defaults to X)
:type X2: np.ndarray (num_inducing x input_dim)"""
target = np.zeros_like(X)
if X2 is None:
[p.gradients_X(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
else:
[p.gradients_X(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
return target
def Kdiag(self, X, which_parts='all'):
"""Compute the diagonal of the covariance function for inputs X."""
if which_parts == 'all':
which_parts = [True] * self.size
assert X.shape[1] == self.input_dim
target = np.zeros(X.shape[0])
[p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self._parameters_, self.input_slices, which_parts) if part_on]
return target
def dKdiag_dtheta(self, dL_dKdiag, X):
"""Compute the gradient of the diagonal of the covariance function with respect to the parameters."""
assert X.shape[1] == self.input_dim
assert dL_dKdiag.size == X.shape[0]
target = np.zeros(self.size)
[p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)]
return self._transform_gradients(target)
def dKdiag_dX(self, dL_dKdiag, X):
assert X.shape[1] == self.input_dim
target = np.zeros_like(X)
[p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
return target
def psi0(self, Z, mu, S):
target = np.zeros(mu.shape[0])
[p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)]
return target
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S):
target = np.zeros(self.size)
[p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)]
return self._transform_gradients(target)
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S):
target_mu, target_S = np.zeros_like(mu), np.zeros_like(S)
[p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
return target_mu, target_S
def psi1(self, Z, mu, S):
target = np.zeros((mu.shape[0], Z.shape[0]))
[p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)]
return target
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S):
target = np.zeros((self.size))
[p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)]
return self._transform_gradients(target)
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S):
target = np.zeros_like(Z)
[p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
return target
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S):
"""return shapes are num_samples,num_inducing,input_dim"""
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
[p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
return target_mu, target_S
def psi2(self, Z, mu, S):
"""
Computer the psi2 statistics for the covariance function.
:param Z: np.ndarray of inducing inputs (num_inducing x input_dim)
:param mu, S: np.ndarrays of means and variances (each num_samples x input_dim)
:returns psi2: np.ndarray (num_samples,num_inducing,num_inducing)
"""
target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
[p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)]
# compute the "cross" terms
# TODO: input_slices needed
crossterms = 0
for [p1, i_s1], [p2, i_s2] in itertools.combinations(zip(self._parameters_, self.input_slices), 2):
if i_s1 == i_s2:
# TODO psi1 this must be faster/better/precached/more nice
tmp1 = np.zeros((mu.shape[0], Z.shape[0]))
p1.psi1(Z[:, i_s1], mu[:, i_s1], S[:, i_s1], tmp1)
tmp2 = np.zeros((mu.shape[0], Z.shape[0]))
p2.psi1(Z[:, i_s2], mu[:, i_s2], S[:, i_s2], tmp2)
prod = np.multiply(tmp1, tmp2)
crossterms += prod[:, :, None] + prod[:, None, :]
target += crossterms
return target
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S):
"""Gradient of the psi2 statistics with respect to the parameters."""
target = np.zeros(self.size)
[p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)]
# compute the "cross" terms
# TODO: better looping, input_slices
for i1, i2 in itertools.permutations(range(len(self._parameters_)), 2):
p1, p2 = self._parameters_[i1], self._parameters_[i2]
# ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]
ps1, ps2 = self._param_slices_[i1], self._param_slices_[i2]
tmp = np.zeros((mu.shape[0], Z.shape[0]))
p1.psi1(Z, mu, S, tmp)
p2.dpsi1_dtheta((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target[ps2])
return self._transform_gradients(target)
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S):
target = np.zeros_like(Z)
[p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
# target *= 2
# compute the "cross" terms
# TODO: we need input_slices here.
for p1, p2 in itertools.permutations(self._parameters_, 2):
# if p1.name == 'linear' and p2.name == 'linear':
# raise NotImplementedError("We don't handle linear/linear cross-terms")
tmp = np.zeros((mu.shape[0], Z.shape[0]))
p1.psi1(Z, mu, S, tmp)
p2.dpsi1_dZ((tmp[:, None, :] * dL_dpsi2).sum(1), Z, mu, S, target)
return target * 2
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S):
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
[p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
# compute the "cross" terms
# TODO: we need input_slices here.
for p1, p2 in itertools.permutations(self._parameters_, 2):
# if p1.name == 'linear' and p2.name == 'linear':
# raise NotImplementedError("We don't handle linear/linear cross-terms")
tmp = np.zeros((mu.shape[0], Z.shape[0]))
p1.psi1(Z, mu, S, tmp)
p2.dpsi1_dmuS((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target_mu, target_S)
return target_mu, target_S
def plot(self, *args, **kwargs):
"""
See GPy.plotting.matplot_dep.plot
"""
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import kernel_plots
kernel_plots.plot(self,*args)
from GPy.core.model import Model
class Kern_check_model(Model):
"""This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel."""
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Model.__init__(self, 'kernel_test_model')
num_samples = 20
num_samples2 = 10
if kernel==None:
kernel = GPy.kern.rbf(1)
if X==None:
X = np.random.randn(num_samples, kernel.input_dim)
if dL_dK==None:
if X2==None:
dL_dK = np.ones((X.shape[0], X.shape[0]))
else:
dL_dK = np.ones((X.shape[0], X2.shape[0]))
self.kernel=kernel
self.add_parameter(kernel)
self.X = X
self.X2 = X2
self.dL_dK = dL_dK
def is_positive_definite(self):
v = np.linalg.eig(self.kernel.K(self.X))[0]
if any(v<-10*sys.float_info.epsilon):
return False
else:
return True
def log_likelihood(self):
return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum()
def _log_likelihood_gradients(self):
raise NotImplementedError, "This needs to be implemented to use the kern_check_model class."
class Kern_check_dK_dtheta(Kern_check_model):
"""This class allows gradient checks for the gradient of a kernel with respect to parameters. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
def _log_likelihood_gradients(self):
return self.kernel._param_grad_helper(self.dL_dK, self.X, self.X2)
class Kern_check_dKdiag_dtheta(Kern_check_model):
"""This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters."""
def __init__(self, kernel=None, dL_dK=None, X=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
if dL_dK==None:
self.dL_dK = np.ones((self.X.shape[0]))
def parameters_changed(self):
self.kernel.update_gradients_full(self.dL_dK, self.X)
def log_likelihood(self):
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
def _log_likelihood_gradients(self):
return self.kernel.dKdiag_dtheta(self.dL_dK, self.X)
class Kern_check_dK_dX(Kern_check_model):
"""This class allows gradient checks for the gradient of a kernel with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
self.remove_parameter(kernel)
self.X = Param('X', self.X)
self.add_parameter(self.X)
def _log_likelihood_gradients(self):
return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).flatten()
class Kern_check_dKdiag_dX(Kern_check_dK_dX):
"""This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_dK_dX.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
if dL_dK==None:
self.dL_dK = np.ones((self.X.shape[0]))
def log_likelihood(self):
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
def _log_likelihood_gradients(self):
return self.kernel.dKdiag_dX(self.dL_dK, self.X).flatten()
def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
"""
This function runs on kernels to check the correctness of their
implementation. It checks that the covariance function is positive definite
for a randomly generated data set.
:param kern: the kernel to be tested.
:type kern: GPy.kern.Kernpart
:param X: X input values to test the covariance function.
:type X: ndarray
:param X2: X2 input values to test the covariance function.
:type X2: ndarray
"""
pass_checks = True
if X==None:
X = np.random.randn(10, kern.input_dim)
if output_ind is not None:
X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0])
if X2==None:
X2 = np.random.randn(20, kern.input_dim)
if output_ind is not None:
X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0])
if verbose:
print("Checking covariance function is positive definite.")
result = Kern_check_model(kern, X=X).is_positive_definite()
if result and verbose:
print("Check passed.")
if not result:
print("Positive definite check failed for " + kern.name + " covariance function.")
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X) wrt theta.")
result = Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X2) wrt theta.")
result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of Kdiag(X) wrt theta.")
result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X) wrt X.")
try:
result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("gradients_X not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X2) wrt X.")
try:
result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("gradients_X not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of Kdiag(X) wrt X.")
try:
result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("gradients_X not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True)
pass_checks = False
return False
return pass_checks

View file

@ -1,65 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
def theta(x):
"""Heavisdie step function"""
return np.where(x>=0.,1.,0.)
class Brownian(Kernpart):
"""
Brownian Motion kernel.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance:
:type variance: float
"""
def __init__(self,input_dim,variance=1.):
self.input_dim = input_dim
assert self.input_dim==1, "Brownian motion in 1D only"
self.num_params = 1
self.name = 'Brownian'
self._set_params(np.array([variance]).flatten())
def _get_params(self):
return self.variance
def _set_params(self,x):
assert x.shape==(1,)
self.variance = x
def _get_param_names(self):
return ['variance']
def K(self,X,X2,target):
if X2 is None:
X2 = X
target += self.variance*np.fmin(X,X2.T)
def Kdiag(self,X,target):
target += self.variance*X.flatten()
def _param_grad_helper(self,dL_dK,X,X2,target):
if X2 is None:
X2 = X
target += np.sum(np.fmin(X,X2.T)*dL_dK)
def dKdiag_dtheta(self,dL_dKdiag,X,target):
target += np.dot(X.flatten(), dL_dKdiag)
def gradients_X(self,dL_dK,X,X2,target):
raise NotImplementedError, "TODO"
#target += self.variance
#target -= self.variance*theta(X-X2.T)
#if X.shape==X2.shape:
#if np.all(X==X2):
#np.add(target[:,:,0],self.variance*np.diag(X2.flatten()-X.flatten()),target[:,:,0])
def dKdiag_dX(self,dL_dKdiag,X,target):
target += self.variance*dL_dKdiag[:,None]

View file

@ -1,139 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
from scipy import integrate
class Matern32(Kernpart):
"""
Matern 3/2 kernel:
.. math::
k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param lengthscale: the vector of lengthscale :math:`\ell_i`
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
"""
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False):
self.input_dim = input_dim
self.ARD = ARD
if ARD == False:
self.num_params = 2
self.name = 'Mat32'
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
else:
lengthscale = np.ones(1)
else:
self.num_params = self.input_dim + 1
self.name = 'Mat32'
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == self.input_dim, "bad number of lengthscales"
else:
lengthscale = np.ones(self.input_dim)
self._set_params(np.hstack((variance, lengthscale.flatten())))
def _get_params(self):
"""return the value of the parameters."""
return np.hstack((self.variance, self.lengthscale))
def _set_params(self, x):
"""set the value of the parameters."""
assert x.size == self.num_params
self.variance = x[0]
self.lengthscale = x[1:]
def _get_param_names(self):
"""return parameter names."""
if self.num_params == 2:
return ['variance', 'lengthscale']
else:
return ['variance'] + ['lengthscale_%i' % i for i in range(self.lengthscale.size)]
def K(self, X, X2, target):
"""Compute the covariance matrix between X and X2."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
np.add(self.variance * (1 + np.sqrt(3.) * dist) * np.exp(-np.sqrt(3.) * dist), target, target)
def Kdiag(self, X, target):
"""Compute the diagonal of the covariance matrix associated to X."""
np.add(target, self.variance, target)
def _param_grad_helper(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
dvar = (1 + np.sqrt(3.) * dist) * np.exp(-np.sqrt(3.) * dist)
invdist = 1. / np.where(dist != 0., dist, np.inf)
dist2M = np.square(X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 3
# dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
target[0] += np.sum(dvar * dL_dK)
if self.ARD == True:
dl = (self.variance * 3 * dist * np.exp(-np.sqrt(3.) * dist))[:, :, np.newaxis] * dist2M * invdist[:, :, np.newaxis]
# dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,None]
target[1:] += (dl * dL_dK[:, :, None]).sum(0).sum(0)
else:
dl = (self.variance * 3 * dist * np.exp(-np.sqrt(3.) * dist)) * dist2M.sum(-1) * invdist
# dl = self.variance*dvar*dist2M.sum(-1)*invdist
target[1] += np.sum(dl * dL_dK)
def dKdiag_dtheta(self, dL_dKdiag, X, target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
target[0] += np.sum(dL_dKdiag)
def gradients_X(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None:
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X[None, :, :]) / self.lengthscale), -1))[:, :, None]
ddist_dX = 2*(X[:, None, :] - X[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
else:
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
gradients_X = -np.transpose(3 * self.variance * dist * np.exp(-np.sqrt(3) * dist) * ddist_dX, (1, 0, 2))
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target):
pass
def Gram_matrix(self, F, F1, F2, lower, upper):
"""
Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1.
:param F: vector of functions
:type F: np.array
:param F1: vector of derivatives of F
:type F1: np.array
:param F2: vector of second derivatives of F
:type F2: np.array
:param lower,upper: boundaries of the input domain
:type lower,upper: floats
"""
assert self.input_dim == 1
def L(x, i):
return(3. / self.lengthscale ** 2 * F[i](x) + 2 * np.sqrt(3) / self.lengthscale * F1[i](x) + F2[i](x))
n = F.shape[0]
G = np.zeros((n, n))
for i in range(n):
for j in range(i, n):
G[i, j] = G[j, i] = integrate.quad(lambda x : L(x, i) * L(x, j), lower, upper)[0]
Flower = np.array([f(lower) for f in F])[:, None]
F1lower = np.array([f(lower) for f in F1])[:, None]
# print "OLD \n", np.dot(F1lower,F1lower.T), "\n \n"
# return(G)
return(self.lengthscale ** 3 / (12.*np.sqrt(3) * self.variance) * G + 1. / self.variance * np.dot(Flower, Flower.T) + self.lengthscale ** 2 / (3.*self.variance) * np.dot(F1lower, F1lower.T))

View file

@ -1,145 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
import hashlib
from scipy import integrate
class Matern52(Kernpart):
"""
Matern 5/2 kernel:
.. math::
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param lengthscale: the vector of lengthscale :math:`\ell_i`
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
"""
def __init__(self,input_dim,variance=1.,lengthscale=None,ARD=False):
self.input_dim = input_dim
self.ARD = ARD
if ARD == False:
self.num_params = 2
self.name = 'Mat52'
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
else:
lengthscale = np.ones(1)
else:
self.num_params = self.input_dim + 1
self.name = 'Mat52'
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == self.input_dim, "bad number of lengthscales"
else:
lengthscale = np.ones(self.input_dim)
self._set_params(np.hstack((variance,lengthscale.flatten())))
def _get_params(self):
"""return the value of the parameters."""
return np.hstack((self.variance,self.lengthscale))
def _set_params(self,x):
"""set the value of the parameters."""
assert x.size == self.num_params
self.variance = x[0]
self.lengthscale = x[1:]
def _get_param_names(self):
"""return parameter names."""
if self.num_params == 2:
return ['variance','lengthscale']
else:
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)]
def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
np.add(self.variance*(1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist), target,target)
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
np.add(target,self.variance,target)
def _param_grad_helper(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
invdist = 1./np.where(dist!=0.,dist,np.inf)
dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3
dvar = (1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist)
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
target[0] += np.sum(dvar*dL_dK)
if self.ARD:
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
#dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
target[1:] += (dl*dL_dK[:,:,None]).sum(0).sum(0)
else:
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist)) * dist2M.sum(-1)*invdist
#dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist)) * dist2M.sum(-1)*invdist
target[1] += np.sum(dl*dL_dK)
def dKdiag_dtheta(self,dL_dKdiag,X,target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
target[0] += np.sum(dL_dKdiag)
def gradients_X(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None:
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X[None,:,:])/self.lengthscale),-1))[:,:,None]
ddist_dX = 2*(X[:,None,:]-X[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
else:
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
gradients_X = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))
target += np.sum(gradients_X*dL_dK.T[:,:,None],0)
def dKdiag_dX(self,dL_dKdiag,X,target):
pass
def Gram_matrix(self,F,F1,F2,F3,lower,upper):
"""
Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1.
:param F: vector of functions
:type F: np.array
:param F1: vector of derivatives of F
:type F1: np.array
:param F2: vector of second derivatives of F
:type F2: np.array
:param F3: vector of third derivatives of F
:type F3: np.array
:param lower,upper: boundaries of the input domain
:type lower,upper: floats
"""
assert self.input_dim == 1
def L(x,i):
return(5*np.sqrt(5)/self.lengthscale**3*F[i](x) + 15./self.lengthscale**2*F1[i](x)+ 3*np.sqrt(5)/self.lengthscale*F2[i](x) + F3[i](x))
n = F.shape[0]
G = np.zeros((n,n))
for i in range(n):
for j in range(i,n):
G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0]
G_coef = 3.*self.lengthscale**5/(400*np.sqrt(5))
Flower = np.array([f(lower) for f in F])[:,None]
F1lower = np.array([f(lower) for f in F1])[:,None]
F2lower = np.array([f(lower) for f in F2])[:,None]
orig = 9./8*np.dot(Flower,Flower.T) + 9.*self.lengthscale**4/200*np.dot(F2lower,F2lower.T)
orig2 = 3./5*self.lengthscale**2 * ( np.dot(F1lower,F1lower.T) + 1./8*np.dot(Flower,F2lower.T) + 1./8*np.dot(F2lower,Flower.T))
return(1./self.variance* (G_coef*G + orig + orig2))

View file

@ -1,29 +0,0 @@
import bias
import Brownian
import coregionalize
import exponential
import eq_ode1
import finite_dimensional
import fixed
import gibbs
import hetero
import hierarchical
import independent_outputs
import linear
import Matern32
import Matern52
import mlp
import ODE_1
import periodic_exponential
import periodic_Matern32
import periodic_Matern52
import poly
import prod_orthogonal
import prod
import rational_quadratic
import rbfcos
import rbf
import rbf_inv
import spline
import symmetric
import white

View file

@ -1,81 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
from ...core.parameterization import Param
class Bias(Kernpart):
def __init__(self,input_dim,variance=1.,name=None):
"""
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
"""
super(Bias, self).__init__(input_dim, name)
from ...core.parameterization.transformations import Logexp
self.variance = Param("variance", variance, Logexp())
self.add_parameter(self.variance)
def K(self,X,X2,target):
target += self.variance
def Kdiag(self,X,target):
target += self.variance
#def dK_dtheta(self,dL_dKdiag,X,X2,target):
#target += dL_dKdiag.sum()
def update_gradients_full(self, dL_dK, X):
self.variance.gradient = dL_dK.sum()
def dKdiag_dtheta(self,dL_dKdiag,X,target):
target += dL_dKdiag.sum()
def gradients_X(self, dL_dK,X, X2, target):
pass
def dKdiag_dX(self,dL_dKdiag,X,target):
pass
#---------------------------------------#
# PSI statistics #
#---------------------------------------#
def psi0(self, Z, mu, S, target):
target += self.variance
def psi1(self, Z, mu, S, target):
self._psi1 = self.variance
target += self._psi1
def psi2(self, Z, mu, S, target):
target += self.variance**2
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, target):
target += dL_dpsi0.sum()
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
target += dL_dpsi1.sum()
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
target += 2.*self.variance*dL_dpsi2.sum()
def dpsi0_dZ(self, dL_dpsi0, Z, mu, S, target):
pass
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
pass
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
pass
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
pass
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
pass
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
pass

View file

@ -1,129 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
from scipy import integrate
class Exponential(Kernpart):
"""
Exponential kernel (aka Ornstein-Uhlenbeck or Matern 1/2)
.. math::
k(r) = \sigma^2 \exp(- r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param lengthscale: the vector of lengthscale :math:`\ell_i`
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
:type ARD: Boolean
:param name: the name of the kernel
:rtype: kernel object
"""
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='exp'):
self.input_dim = input_dim
self.ARD = ARD
self.variance = variance
self.name = name
if ARD == False:
self.num_params = 2
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
else:
lengthscale = np.ones(1)
else:
self.num_params = self.input_dim + 1
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == self.input_dim, "bad number of lengthscales"
else:
lengthscale = np.ones(self.input_dim)
#self._set_params(np.hstack((variance, lengthscale.flatten())))
self.set_as_parameter('variance', 'lengthscale')
# def _get_params(self):
# """return the value of the parameters."""
# return np.hstack((self.variance, self.lengthscale))
#
# def _set_params(self, x):
# """set the value of the parameters."""
# assert x.size == self.num_params
# self.variance = x[0]
# self.lengthscale = x[1:]
#
# def _get_param_names(self):
# """return parameter names."""
# if self.num_params == 2:
# return ['variance', 'lengthscale']
# else:
# return ['variance'] + ['lengthscale_%i' % i for i in range(self.lengthscale.size)]
def K(self, X, X2, target):
"""Compute the covariance matrix between X and X2."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
np.add(self.variance * np.exp(-dist), target, target)
def Kdiag(self, X, target):
"""Compute the diagonal of the covariance matrix associated to X."""
np.add(target, self.variance, target)
def _param_grad_helper(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
invdist = 1. / np.where(dist != 0., dist, np.inf)
dist2M = np.square(X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 3
dvar = np.exp(-dist)
target[0] += np.sum(dvar * dL_dK)
if self.ARD == True:
dl = self.variance * dvar[:, :, None] * dist2M * invdist[:, :, None]
target[1:] += (dl * dL_dK[:, :, None]).sum(0).sum(0)
else:
dl = self.variance * dvar * dist2M.sum(-1) * invdist
target[1] += np.sum(dl * dL_dK)
def dKdiag_dtheta(self, dL_dKdiag, X, target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
# NB: derivative of diagonal elements wrt lengthscale is 0
target[0] += np.sum(dL_dKdiag)
def gradients_X(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
gradients_X = -np.transpose(self.variance * np.exp(-dist) * ddist_dX, (1, 0, 2))
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target):
pass
def Gram_matrix(self, F, F1, lower, upper):
"""
Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1.
:param F: vector of functions
:type F: np.array
:param F1: vector of derivatives of F
:type F1: np.array
:param lower,upper: boundaries of the input domain
:type lower,upper: floats
"""
assert self.input_dim == 1
def L(x, i):
return(1. / self.lengthscale * F[i](x) + F1[i](x))
n = F.shape[0]
G = np.zeros((n, n))
for i in range(n):
for j in range(i, n):
G[i, j] = G[j, i] = integrate.quad(lambda x : L(x, i) * L(x, j), lower, upper)[0]
Flower = np.array([f(lower) for f in F])[:, None]
return(self.lengthscale / 2. / self.variance * G + 1. / self.variance * np.dot(Flower, Flower.T))

View file

@ -1,176 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
#from ...core.parameterized.Parameterized import set_as_parameter
from ...core.parameterization import Parameterized
class Kernpart(Parameterized):
def __init__(self,input_dim,name):
"""
The base class for a kernpart: a positive definite function
which forms part of a covariance function (kernel).
:param input_dim: the number of input dimensions to the function
:type input_dim: int
Do not instantiate.
"""
super(Kernpart, self).__init__(name)
# the input dimensionality for the covariance
self.input_dim = input_dim
# the number of optimisable parameters
# the name of the covariance function.
# link to parameterized objects
#self._X = None
def connect_input(self, X):
X.add_observer(self, self.on_input_change)
#self._X = X
def on_input_change(self, X):
"""
During optimization this function will be called when
the inputs X changed. Use this to update caches dependent
on the inputs X.
"""
# overwrite this to update kernel when inputs X change
pass
# def set_as_parameter_named(self, name, gradient, index=None, *args, **kwargs):
# """
# :param names: name of parameter to set as parameter
# :param gradient: gradient method to get the gradient of this parameter
# :param index: index of where to place parameter in printing
# :param args, kwargs: additional arguments to gradient
#
# Convenience method to connect Kernpart parameters:
# parameter with name (attribute of this Kernpart) will be set as parameter with following name:
#
# kernel_name + _ + parameter_name
#
# To add the kernels name to the parameter name use this method to
# add parameters.
# """
# self.set_as_parameter(name, getattr(self, name), gradient, index, *args, **kwargs)
# def set_as_parameter(self, name, array, gradient, index=None, *args, **kwargs):
# """
# See :py:func:`GPy.core.parameterized.Parameterized.set_as_parameter`
#
# Note: this method adds the kernels name in front of the parameter.
# """
# p = Param(self.name+"_"+name, array, gradient, *args, **kwargs)
# if index is None:
# self._parameters_.append(p)
# else:
# self._parameters_.insert(index, p)
# self.__dict__[name] = p
#set_as_parameter.__doc__ += set_as_parameter.__doc__ # @UndefinedVariable
# def _get_params(self):
# raise NotImplementedError
# def _set_params(self,x):
# raise NotImplementedError
# def _get_param_names(self):
# raise NotImplementedError
def K(self,X,X2,target):
raise NotImplementedError
def Kdiag(self,X,target):
raise NotImplementedError
def _param_grad_helper(self,dL_dK,X,X2,target):
raise NotImplementedError
def dKdiag_dtheta(self,dL_dKdiag,X,target):
# In the base case compute this by calling _param_grad_helper. Need to
# override for stationary covariances (for example) to save
# time.
for i in range(X.shape[0]):
self._param_grad_helper(dL_dKdiag[i], X[i, :][None, :], X2=None, target=target)
def psi0(self,Z,mu,S,target):
raise NotImplementedError
def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
raise NotImplementedError
def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S):
raise NotImplementedError
def psi1(self,Z,mu,S,target):
raise NotImplementedError
def dpsi1_dtheta(self,Z,mu,S,target):
raise NotImplementedError
def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target):
raise NotImplementedError
def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S):
raise NotImplementedError
def psi2(self,Z,mu,S,target):
raise NotImplementedError
def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target):
raise NotImplementedError
def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
raise NotImplementedError
def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
raise NotImplementedError
def gradients_X(self, dL_dK, X, X2, target):
raise NotImplementedError
def dKdiag_dX(self, dL_dK, X, target):
raise NotImplementedError
def update_gradients_full(self, dL_dK, X):
"""Set the gradients of all parameters when doing full (N) inference."""
raise NotImplementedError
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
"""Set the gradients of all parameters when doing sparse (M) inference."""
raise NotImplementedError
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
"""Set the gradients of all parameters when doing variational (M) inference with uncertain inputs."""
raise NotImplementedError
class Kernpart_stationary(Kernpart):
def __init__(self, input_dim, lengthscale=None, ARD=False):
self.input_dim = input_dim
self.ARD = ARD
if not ARD:
self.num_params = 2
if lengthscale is not None:
self.lengthscale = np.asarray(lengthscale)
assert self.lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
else:
self.lengthscale = np.ones(1)
else:
self.num_params = self.input_dim + 1
if lengthscale is not None:
self.lengthscale = np.asarray(lengthscale)
assert self.lengthscale.size == self.input_dim, "bad number of lengthscales"
else:
self.lengthscale = np.ones(self.input_dim)
# initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1))
def _set_params(self, x):
self.lengthscale = x
self.lengthscale2 = np.square(self.lengthscale)
# reset cached results
self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1))
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
def dKdiag_dtheta(self, dL_dKdiag, X, target):
# For stationary covariances, derivative of diagonal elements
# wrt lengthscale is 0.
target[0] += np.sum(dL_dKdiag)
def dKdiag_dX(self, dL_dK, X, target):
pass # true for all stationary kernels
class Kernpart_inner(Kernpart):
def __init__(self,input_dim):
"""
The base class for a kernpart_inner: a positive definite function which forms part of a kernel that is based on the inner product between inputs.
:param input_dim: the number of input dimensions to the function
:type input_dim: int
Do not instantiate.
"""
Kernpart.__init__(self, input_dim)
# initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1))

View file

@ -1,306 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import weave
from kernpart import Kernpart
from ...util.linalg import tdot
from ...util.misc import fast_array_equal, param_to_array
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
class Linear(Kernpart):
"""
Linear kernel
.. math::
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i x_iy_i
:param input_dim: the number of input dimensions
:type input_dim: int
:param variances: the vector of variances :math:`\sigma^2_i`
:type variances: array or list of the appropriate size (or float if there is only one variance parameter)
:param ARD: Auto Relevance Determination. If equal to "False", the kernel has only one variance parameter \sigma^2, otherwise there is one variance parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
"""
def __init__(self, input_dim, variances=None, ARD=False, name='linear'):
super(Linear, self).__init__(input_dim, name)
self.ARD = ARD
if ARD == False:
if variances is not None:
variances = np.asarray(variances)
assert variances.size == 1, "Only one variance needed for non-ARD kernel"
else:
variances = np.ones(1)
self._Xcache, self._X2cache = np.empty(shape=(2,))
else:
if variances is not None:
variances = np.asarray(variances)
assert variances.size == self.input_dim, "bad number of variances, need one ARD variance per input_dim"
else:
variances = np.ones(self.input_dim)
self.variances = Param('variances', variances, Logexp())
self.variances.gradient = np.zeros(self.variances.shape)
self.add_parameter(self.variances)
self.variances.add_observer(self, self.update_variance)
# initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
self._X, self._X2 = np.empty(shape=(2, 1))
def update_variance(self, v):
self.variances2 = np.square(self.variances)
def on_input_change(self, X):
self._K_computations(X, None)
def update_gradients_full(self, dL_dK, X):
self.variances.gradient[:] = 0
self._param_grad_helper(dL_dK, X, None, self.variances.gradient)
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
tmp = dL_dKdiag[:, None] * X ** 2
if self.ARD:
self.variances.gradient = tmp.sum(0)
else:
self.variances.gradient = tmp.sum()
self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient)
self._param_grad_helper(dL_dKnm, X, Z, self.variances.gradient)
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
self._psi_computations(Z, mu, S)
# psi0:
tmp = dL_dpsi0[:, None] * self.mu2_S
if self.ARD: self.variances.gradient[:] = tmp.sum(0)
else: self.variances.gradient[:] = tmp.sum()
#psi1
self._param_grad_helper(dL_dpsi1, mu, Z, self.variances.gradient)
#psi2
tmp = dL_dpsi2[:, :, :, None] * (self.ZAinner[:, :, None, :] * (2 * Z)[None, None, :, :])
if self.ARD: self.variances.gradient += tmp.sum(0).sum(0).sum(0)
else: self.variances.gradient += tmp.sum()
#from Kmm
self._K_computations(Z, None)
self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient)
def K(self, X, X2, target):
if self.ARD:
XX = X * np.sqrt(self.variances)
if X2 is None:
target += tdot(XX)
else:
XX2 = X2 * np.sqrt(self.variances)
target += np.dot(XX, XX2.T)
else:
if X is not self._X or X2 is not None:
self._K_computations(X, X2)
target += self.variances * self._dot_product
def Kdiag(self, X, target):
np.add(target, np.sum(self.variances * np.square(X), -1), target)
def _param_grad_helper(self, dL_dK, X, X2, target):
if self.ARD:
if X2 is None:
[np.add(target[i:i + 1], np.sum(dL_dK * tdot(X[:, i:i + 1])), target[i:i + 1]) for i in range(self.input_dim)]
else:
product = X[:, None, :] * X2[None, :, :]
target += (dL_dK[:, :, None] * product).sum(0).sum(0)
else:
if X is not self._X or X2 is not None:
self._K_computations(X, X2)
target += np.sum(self._dot_product * dL_dK)
def gradients_X(self, dL_dK, X, X2, target):
if X2 is None:
target += 2*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
else:
target += (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
def dKdiag_dX(self,dL_dKdiag,X,target):
target += 2.*self.variances*dL_dKdiag[:,None]*X
#---------------------------------------#
# PSI statistics #
#---------------------------------------#
def psi0(self, Z, mu, S, target):
self._psi_computations(Z, mu, S)
target += np.sum(self.variances * self.mu2_S, 1)
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
target_mu += dL_dpsi0[:, None] * (2.0 * mu * self.variances)
target_S += dL_dpsi0[:, None] * self.variances
def psi1(self, Z, mu, S, target):
"""the variance, it does nothing"""
self._psi1 = self.K(mu, Z, target)
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
"""Do nothing for S, it does not affect psi1"""
self._psi_computations(Z, mu, S)
target_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1)
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
self.gradients_X(dL_dpsi1.T, Z, mu, target)
def psi2(self, Z, mu, S, target):
self._psi_computations(Z, mu, S)
target += self._psi2
def psi2_new(self,Z,mu,S,target):
tmp = np.zeros((mu.shape[0], Z.shape[0]))
self.K(mu,Z,tmp)
target += tmp[:,:,None]*tmp[:,None,:] + np.sum(S[:,None,None,:]*self.variances**2*Z[None,:,None,:]*Z[None,None,:,:],-1)
def dpsi2_dtheta_new(self, dL_dpsi2, Z, mu, S, target):
tmp = np.zeros((mu.shape[0], Z.shape[0]))
self.K(mu,Z,tmp)
self._param_grad_helper(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target)
result= 2.*(dL_dpsi2[:,:,:,None]*S[:,None,None,:]*self.variances*Z[None,:,None,:]*Z[None,None,:,:]).sum(0).sum(0).sum(0)
if self.ARD:
target += result.sum(0).sum(0).sum(0)
else:
target += result.sum()
def dpsi2_dmuS_new(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
tmp = np.zeros((mu.shape[0], Z.shape[0]))
self.K(mu,Z,tmp)
self.gradients_X(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target_mu)
Zs = Z*self.variances
Zs_sq = Zs[:,None,:]*Zs[None,:,:]
target_S += (dL_dpsi2[:,:,:,None]*Zs_sq[None,:,:,:]).sum(1).sum(1)
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
"""Think N,num_inducing,num_inducing,input_dim """
self._psi_computations(Z, mu, S)
AZZA = self.ZA.T[:, None, :, None] * self.ZA[None, :, None, :]
AZZA = AZZA + AZZA.swapaxes(1, 2)
AZZA_2 = AZZA/2.
#muAZZA = np.tensordot(mu,AZZA,(-1,0))
#target_mu_dummy, target_S_dummy = np.zeros_like(target_mu), np.zeros_like(target_S)
#target_mu_dummy += (dL_dpsi2[:, :, :, None] * muAZZA).sum(1).sum(1)
#target_S_dummy += (dL_dpsi2[:, :, :, None] * self.ZA[None, :, None, :] * self.ZA[None, None, :, :]).sum(1).sum(1)
#Using weave, we can exploiut the symmetry of this problem:
code = """
int n, m, mm,q,qq;
double factor,tmp;
#pragma omp parallel for private(m,mm,q,qq,factor,tmp)
for(n=0;n<N;n++){
for(m=0;m<num_inducing;m++){
for(mm=0;mm<=m;mm++){
//add in a factor of 2 for the off-diagonal terms (and then count them only once)
if(m==mm)
factor = dL_dpsi2(n,m,mm);
else
factor = 2.0*dL_dpsi2(n,m,mm);
for(q=0;q<input_dim;q++){
//take the dot product of mu[n,:] and AZZA[:,m,mm,q] TODO: blas!
tmp = 0.0;
for(qq=0;qq<input_dim;qq++){
tmp += mu(n,qq)*AZZA(qq,m,mm,q);
}
target_mu(n,q) += factor*tmp;
target_S(n,q) += factor*AZZA_2(q,m,mm,q);
}
}
}
}
"""
support_code = """
#include <omp.h>
#include <math.h>
"""
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
'extra_link_args' : ['-lgomp']}
N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu)
weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
type_converters=weave.converters.blitz,**weave_options)
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
self._psi_computations(Z, mu, S)
#psi2_dZ = dL_dpsi2[:, :, :, None] * self.variances * self.ZAinner[:, :, None, :]
#dummy_target = np.zeros_like(target)
#dummy_target += psi2_dZ.sum(0).sum(0)
AZA = self.variances*self.ZAinner
code="""
int n,m,mm,q;
#pragma omp parallel for private(n,mm,q)
for(m=0;m<num_inducing;m++){
for(q=0;q<input_dim;q++){
for(mm=0;mm<num_inducing;mm++){
for(n=0;n<N;n++){
target(m,q) += dL_dpsi2(n,m,mm)*AZA(n,mm,q);
}
}
}
}
"""
support_code = """
#include <omp.h>
#include <math.h>
"""
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
'extra_link_args' : ['-lgomp']}
N,num_inducing,input_dim = mu.shape[0],Z.shape[0],mu.shape[1]
mu, AZA, target, dL_dpsi2 = param_to_array(mu, AZA, target, dL_dpsi2)
weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'],
type_converters=weave.converters.blitz,**weave_options)
#---------------------------------------#
# Precomputations #
#---------------------------------------#
def _K_computations(self, X, X2):
if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)):
self._X = X.copy()
if X2 is None:
self._dot_product = tdot(param_to_array(X))
self._X2 = None
else:
self._X2 = X2.copy()
self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T))
def _psi_computations(self, Z, mu, S):
# here are the "statistics" for psi1 and psi2
Zv_changed = not (fast_array_equal(Z, self._Z) and fast_array_equal(self.variances, self._variances))
muS_changed = not (fast_array_equal(mu, self._mu) and fast_array_equal(S, self._S))
if Zv_changed:
# Z has changed, compute Z specific stuff
# self.ZZ = Z[:,None,:]*Z[None,:,:] # num_inducing,num_inducing,input_dim
# self.ZZ = np.empty((Z.shape[0], Z.shape[0], Z.shape[1]), order='F')
# [tdot(Z[:, i:i + 1], self.ZZ[:, :, i].T) for i in xrange(Z.shape[1])]
self.ZA = Z * self.variances
self._Z = Z.copy()
self._variances = self.variances.copy()
if muS_changed:
self.mu2_S = np.square(mu) + S
self.inner = (mu[:, None, :] * mu[:, :, None])
diag_indices = np.diag_indices(mu.shape[1], 2)
self.inner[:, diag_indices[0], diag_indices[1]] += S
self._mu, self._S = mu.copy(), S.copy()
if Zv_changed or muS_changed:
self.ZAinner = np.dot(self.ZA, self.inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x N x input_dim]!
self._psi2 = np.dot(self.ZAinner, self.ZA.T)

View file

@ -1,125 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
from coregionalize import Coregionalize
import numpy as np
import hashlib
class Prod(Kernpart):
"""
Computes the product of 2 kernels
:param k1, k2: the kernels to multiply
:type k1, k2: Kernpart
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
:type tensor: Boolean
:rtype: kernel object
"""
def __init__(self,k1,k2,tensor=False):
if tensor:
super(Prod, self).__init__(k1.input_dim + k2.input_dim, k1.name + '_xx_' + k2.name)
self.slice1 = slice(0,k1.input_dim)
self.slice2 = slice(k1.input_dim,k1.input_dim+k2.input_dim)
else:
assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to multiply don't have the same dimension."
super(Prod, self).__init__(k1.input_dim, k1.name + '_x_' + k2.name)
self.slice1 = slice(0,self.input_dim)
self.slice2 = slice(0,self.input_dim)
self.k1 = k1
self.k2 = k2
self.add_parameters(self.k1, self.k2)
#initialize cache
self._X, self._X2 = np.empty(shape=(2,1))
self._params = None
def K(self,X,X2,target):
self._K_computations(X,X2)
target += self._K1 * self._K2
def K1(self,X, X2):
"""Compute the part of the kernel associated with k1."""
self._K_computations(X, X2)
return self._K1
def K2(self, X, X2):
"""Compute the part of the kernel associated with k2."""
self._K_computations(X, X2)
return self._K2
def update_gradients_full(self, dL_dK, X):
self._K_computations(X, None)
self.k1.update_gradients_full(dL_dK*self._K2, X[:,self.slice1])
self.k2.update_gradients_full(dL_dK*self._K1, X[:,self.slice2])
def _param_grad_helper(self,dL_dK,X,X2,target):
"""Derivative of the covariance matrix with respect to the parameters."""
self._K_computations(X,X2)
if X2 is None:
self.k1._param_grad_helper(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.num_params])
self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.num_params:])
else:
self.k1._param_grad_helper(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.num_params])
self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.num_params:])
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
target1 = np.zeros(X.shape[0])
target2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,self.slice1],target1)
self.k2.Kdiag(X[:,self.slice2],target2)
target += target1 * target2
def dKdiag_dtheta(self,dL_dKdiag,X,target):
K1 = np.zeros(X.shape[0])
K2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,self.slice1],K1)
self.k2.Kdiag(X[:,self.slice2],K2)
self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.num_params])
self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.num_params:])
def gradients_X(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
self._K_computations(X,X2)
if X2 is None:
if not isinstance(self.k1,Coregionalize) and not isinstance(self.k2,Coregionalize):
self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1])
self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2])
else:#if isinstance(self.k1,Coregionalize) or isinstance(self.k2,Coregionalize):
#NOTE The indices column in the inputs makes the ki.gradients_X fail when passing None instead of X[:,self.slicei]
X2 = X
self.k1.gradients_X(2.*dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
self.k2.gradients_X(2.*dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
else:
self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
def dKdiag_dX(self, dL_dKdiag, X, target):
K1 = np.zeros(X.shape[0])
K2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,self.slice1],K1)
self.k2.Kdiag(X[:,self.slice2],K2)
self.k1.gradients_X(dL_dKdiag*K2, X[:,self.slice1], target[:,self.slice1])
self.k2.gradients_X(dL_dKdiag*K1, X[:,self.slice2], target[:,self.slice2])
def _K_computations(self,X,X2):
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
self._X = X.copy()
self._params == self._get_params().copy()
if X2 is None:
self._X2 = None
self._K1 = np.zeros((X.shape[0],X.shape[0]))
self._K2 = np.zeros((X.shape[0],X.shape[0]))
self.k1.K(X[:,self.slice1],None,self._K1)
self.k2.K(X[:,self.slice2],None,self._K2)
else:
self._X2 = X2.copy()
self._K1 = np.zeros((X.shape[0],X2.shape[0]))
self._K2 = np.zeros((X.shape[0],X2.shape[0]))
self.k1.K(X[:,self.slice1],X2[:,self.slice1],self._K1)
self.k2.K(X[:,self.slice2],X2[:,self.slice2],self._K2)

View file

@ -1,352 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from kernpart import Kernpart
from ...util.linalg import tdot
from ...util.misc import fast_array_equal, param_to_array
from ...core.parameterization import Param
class SS_RBF(Kernpart):
"""
The RBF kernel for Spike-and-Slab GPLVM
Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel:
.. math::
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \ \ \ \ \ \\text{ where } r^2 = \sum_{i=1}^d \\frac{ (x_i-x^\prime_i)^2}{\ell_i^2}
where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the vector of lengthscale of the kernel
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
:rtype: kernel object
"""
def __init__(self, input_dim, variance=1., lengthscale=None, name='rbf'):
super(RBF, self).__init__(input_dim, name)
self.input_dim = input_dim
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == self.input_dim, "bad number of lengthscales"
else:
lengthscale = np.ones(self.input_dim)
self.variance = Param('variance', variance)
self.lengthscale = Param('lengthscale', lengthscale)
self.lengthscale.add_observer(self, self.update_lengthscale)
self.add_parameters(self.variance, self.lengthscale)
self.parameters_changed() # initializes cache
def on_input_change(self, X):
#self._K_computations(X, None)
pass
def update_lengthscale(self, l):
self.lengthscale2 = np.square(self.lengthscale)
def parameters_changed(self):
# reset cached results
self._X, self._X2 = np.empty(shape=(2, 1))
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
def K(self, X, X2, target):
self._K_computations(X, X2)
target += self.variance * self._K_dvar
def Kdiag(self, X, target):
np.add(target, self.variance, target)
def psi0(self, Z, mu, S, target):
target += self.variance
def psi1(self, Z, mu, S, target):
self._psi_computations(Z, mu, S)
target += self._psi1
def psi2(self, Z, mu, S, target):
self._psi_computations(Z, mu, S)
target += self._psi2
def update_gradients_full(self, dL_dK, X):
self._K_computations(X, None)
self.variance.gradient = np.sum(self._K_dvar * dL_dK)
if self.ARD:
self.lengthscale.gradient = self._dL_dlengthscales_via_K(dL_dK, X, None)
else:
self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK)
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
#contributions from Kdiag
self.variance.gradient = np.sum(dL_dKdiag)
#from Knm
self._K_computations(X, Z)
self.variance.gradient += np.sum(dL_dKnm * self._K_dvar)
if self.ARD:
self.lengthscales.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z)
else:
self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
#from Kmm
self._K_computations(Z, None)
self.variance.gradient += np.sum(dL_dKmm * self._K_dvar)
if self.ARD:
self.lengthscales.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
else:
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
self._psi_computations(Z, mu, S)
#contributions from psi0:
self.variance.gradient = np.sum(dL_dpsi0)
#from psi1
self.variance.gradient += np.sum(dL_dpsi1 * self._psi1 / self.variance)
d_length = self._psi1[:,:,None] * ((self._psi1_dist_sq - 1.)/(self.lengthscale*self._psi1_denom) +1./self.lengthscale)
dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
if not self.ARD:
self.lengthscale.gradeint = dpsi1_dlength.sum()
else:
self.lengthscale.gradient = dpsi1_dlength.sum(0).sum(0)
#from psi2
d_var = 2.*self._psi2 / self.variance
d_length = 2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] / self.lengthscale2) / (self.lengthscale * self._psi2_denom)
self.variance.gradient += np.sum(dL_dpsi2 * d_var)
dpsi2_dlength = d_length * dL_dpsi2[:, :, :, None]
if not self.ARD:
self.lengthscale.gradient += dpsi2_dlength.sum()
else:
self.lengthscale.gradient += dpsi2_dlength.sum(0).sum(0).sum(0)
#from Kmm
self._K_computations(Z, None)
self.variance.gradient += np.sum(dL_dKmm * self._K_dvar)
if self.ARD:
self.lengthscales.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
else:
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK)
def gradients_X(self, dL_dK, X, X2, target):
#if self._X is None or X.base is not self._X.base or X2 is not None:
self._K_computations(X, X2)
if X2 is None:
_K_dist = 2*(X[:, None, :] - X[None, :, :])
else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
gradients_X = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target):
pass
#---------------------------------------#
# PSI statistics #
#---------------------------------------#
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
pass
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
self._psi_computations(Z, mu, S)
denominator = (self.lengthscale2 * (self._psi1_denom))
dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator))
target += np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
self._psi_computations(Z, mu, S)
tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom
target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
self._psi_computations(Z, mu, S)
term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim
term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim
dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
target += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
"""Think N,num_inducing,num_inducing,input_dim """
self._psi_computations(Z, mu, S)
tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom
target_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
target_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
#---------------------------------------#
# Precomputations #
#---------------------------------------#
def _K_computations(self, X, X2):
#params = self._get_params()
if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)):# and fast_array_equal(self._params_save , params)):
#self._X = X.copy()
#self._params_save = params.copy()
if X2 is None:
self._X2 = None
X = X / self.lengthscale
Xsquare = np.sum(np.square(X), 1)
self._K_dist2 = -2.*tdot(X) + (Xsquare[:, None] + Xsquare[None, :])
else:
self._X2 = X2.copy()
X = X / self.lengthscale
X2 = X2 / self.lengthscale
self._K_dist2 = -2.*np.dot(X, X2.T) + (np.sum(np.square(X), 1)[:, None] + np.sum(np.square(X2), 1)[None, :])
self._K_dvar = np.exp(-0.5 * self._K_dist2)
def _dL_dlengthscales_via_K(self, dL_dK, X, X2):
"""
A helper function for update_gradients_* methods
Computes the derivative of the objective L wrt the lengthscales via
dL_dl = sum_{i,j}(dL_dK_{ij} dK_dl)
assumes self._K_computations has just been called.
This is only valid if self.ARD=True
"""
target = np.zeros(self.input_dim)
dvardLdK = self._K_dvar * dL_dK
var_len3 = self.variance / np.power(self.lengthscale, 3)
if X2 is None:
# save computation for the symmetrical case
dvardLdK = dvardLdK + dvardLdK.T
code = """
int q,i,j;
double tmp;
for(q=0; q<input_dim; q++){
tmp = 0;
for(i=0; i<num_data; i++){
for(j=0; j<i; j++){
tmp += (X(i,q)-X(j,q))*(X(i,q)-X(j,q))*dvardLdK(i,j);
}
}
target(q) += var_len3(q)*tmp;
}
"""
num_data, num_inducing, input_dim = X.shape[0], X.shape[0], self.input_dim
X, dvardLdK = param_to_array(X, dvardLdK)
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
else:
code = """
int q,i,j;
double tmp;
for(q=0; q<input_dim; q++){
tmp = 0;
for(i=0; i<num_data; i++){
for(j=0; j<num_inducing; j++){
tmp += (X(i,q)-X2(j,q))*(X(i,q)-X2(j,q))*dvardLdK(i,j);
}
}
target(q) += var_len3(q)*tmp;
}
"""
num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim
X, X2, dvardLdK = param_to_array(X, X2, dvardLdK)
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
return target
def _psi_computations(self, Z, mu, S):
# here are the "statistics" for psi1 and psi2
Z_changed = not fast_array_equal(Z, self._Z)
if Z_changed:
# Z has changed, compute Z specific stuff
self._psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
self._psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
self._psi2_Zdist_sq = np.square(self._psi2_Zdist / self.lengthscale) # M,M,Q
if Z_changed or not fast_array_equal(mu, self._mu) or not fast_array_equal(S, self._S):
# something's changed. recompute EVERYTHING
# psi1
self._psi1_denom = S[:, None, :] / self.lengthscale2 + 1.
self._psi1_dist = Z[None, :, :] - mu[:, None, :]
self._psi1_dist_sq = np.square(self._psi1_dist) / self.lengthscale2 / self._psi1_denom
self._psi1_exponent = -0.5 * np.sum(self._psi1_dist_sq + np.log(self._psi1_denom), -1)
self._psi1 = self.variance * np.exp(self._psi1_exponent)
# psi2
self._psi2_denom = 2.*S[:, None, None, :] / self.lengthscale2 + 1. # N,M,M,Q
self._psi2_mudist, self._psi2_mudist_sq, self._psi2_exponent, _ = self.weave_psi2(mu, self._psi2_Zhat)
# self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
# self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom)
# self._psi2_exponent = np.sum(-self._psi2_Zdist_sq -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M,Q
self._psi2 = np.square(self.variance) * np.exp(self._psi2_exponent) # N,M,M,Q
# store matrices for caching
self._Z, self._mu, self._S = Z, mu, S
def weave_psi2(self, mu, Zhat):
N, input_dim = mu.shape
num_inducing = Zhat.shape[0]
mudist = np.empty((N, num_inducing, num_inducing, input_dim))
mudist_sq = np.empty((N, num_inducing, num_inducing, input_dim))
psi2_exponent = np.zeros((N, num_inducing, num_inducing))
psi2 = np.empty((N, num_inducing, num_inducing))
psi2_Zdist_sq = self._psi2_Zdist_sq
_psi2_denom = self._psi2_denom.squeeze().reshape(N, self.input_dim)
half_log_psi2_denom = 0.5 * np.log(self._psi2_denom).squeeze().reshape(N, self.input_dim)
variance_sq = float(np.square(self.variance))
if self.ARD:
lengthscale2 = self.lengthscale2
else:
lengthscale2 = np.ones(input_dim) * self.lengthscale2
code = """
double tmp;
#pragma omp parallel for private(tmp)
for (int n=0; n<N; n++){
for (int m=0; m<num_inducing; m++){
for (int mm=0; mm<(m+1); mm++){
for (int q=0; q<input_dim; q++){
//compute mudist
tmp = mu(n,q) - Zhat(m,mm,q);
mudist(n,m,mm,q) = tmp;
mudist(n,mm,m,q) = tmp;
//now mudist_sq
tmp = tmp*tmp/lengthscale2(q)/_psi2_denom(n,q);
mudist_sq(n,m,mm,q) = tmp;
mudist_sq(n,mm,m,q) = tmp;
//now psi2_exponent
tmp = -psi2_Zdist_sq(m,mm,q) - tmp - half_log_psi2_denom(n,q);
psi2_exponent(n,mm,m) += tmp;
if (m !=mm){
psi2_exponent(n,m,mm) += tmp;
}
//psi2 would be computed like this, but np is faster
//tmp = variance_sq*exp(psi2_exponent(n,m,mm));
//psi2(n,m,mm) = tmp;
//psi2(n,mm,m) = tmp;
}
}
}
}
"""
support_code = """
#include <omp.h>
#include <math.h>
"""
weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'],
type_converters=weave.converters.blitz, **self.weave_options)
return mudist, mudist_sq, psi2_exponent, psi2

View file

@ -36,7 +36,7 @@ class BayesianGPLVM(SparseGP, GPLVM):
assert Z.shape[1] == X.shape[1]
if kernel is None:
kernel = kern.rbf(input_dim) # + kern.white(input_dim)
kernel = kern.RBF(input_dim) # + kern.white(input_dim)
if likelihood is None:
likelihood = Gaussian()
@ -57,26 +57,16 @@ class BayesianGPLVM(SparseGP, GPLVM):
self.init = state.pop()
SparseGP._setstate(self, state)
def dL_dmuS(self):
dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi0_dmuS(self.grad_dict['dL_dpsi0'], self.Z, self.X, self.X_variance)
dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi1_dmuS(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance)
dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance)
dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2
dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2
return dL_dmu, dL_dS
def KL_divergence(self):
var_mean = np.square(self.X).sum()
var_S = np.sum(self.X_variance - np.log(self.X_variance))
return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.num_data
def parameters_changed(self):
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y)
self._update_gradients_Z(add=False)
super(BayesianGPLVM, self).parameters_changed()
self._log_marginal_likelihood -= self.KL_divergence()
dL_dmu, dL_dS = self.dL_dmuS()
dL_dmu, dL_dS = self.kern.gradients_q_variational(posterior_variational=self.q, Z=self.Z, **self.grad_dict)
# dL:
self.q.mean.gradient = dL_dmu

View file

@ -23,7 +23,7 @@ class GPRegression(GP):
def __init__(self, X, Y, kernel=None):
if kernel is None:
kernel = kern.rbf(X.shape[1])
kernel = kern.RBF(X.shape[1])
likelihood = likelihoods.Gaussian()

View file

@ -7,9 +7,25 @@ from GPy.util.linalg import PCA
import numpy
import itertools
import pylab
from GPy.kern.kern import kern
from GPy.kern import Kern
from GPy.models.bayesian_gplvm import BayesianGPLVM
class MRD2(Model):
"""
Apply MRD to all given datasets Y in Ylist.
Y_i in [n x p_i]
The samples n in the datasets need
to match up, whereas the dimensionality p_d can differ.
:param [array-like] Ylist: List of datasets to apply MRD on
:param array-like q_mean: mean of starting latent space q in [n x q]
:param array-like q_variance: variance of starting latent space q in [n x q]
:param :class:`~GPy.inference.latent_function_inference
"""
class MRD(Model):
"""
Do MRD on given Datasets in Ylist.
@ -48,11 +64,11 @@ class MRD(Model):
# sort out the kernels
if kernels is None:
kernels = [None] * len(likelihood_or_Y_list)
elif isinstance(kernels, kern):
elif isinstance(kernels, Kern):
kernels = [kernels.copy() for i in range(len(likelihood_or_Y_list))]
else:
assert len(kernels) == len(likelihood_or_Y_list), "need one kernel per output"
assert all([isinstance(k, kern) for k in kernels]), "invalid kernel object detected!"
assert all([isinstance(k, Kern) for k in kernels]), "invalid kernel object detected!"
assert not ('kernel' in kw), "pass kernels through `kernels` argument"
self.input_dim = input_dim

View file

@ -1,8 +1,8 @@
import pylab as pb
import numpy as np
from ... import util
from latent_space_visualizations.controllers.imshow_controller import ImshowController,ImAnnotateController
from GPy.util.misc import param_to_array
from ...util.misc import param_to_array
from .base_plots import x_frame2D
import itertools
import Tango
from matplotlib.cm import get_cmap
@ -37,7 +37,7 @@ def plot_latent(model, labels=None, which_indices=None,
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
util.plot.Tango.reset()
Tango.reset()
if labels is None:
labels = np.ones(model.num_data)
@ -46,7 +46,7 @@ def plot_latent(model, labels=None, which_indices=None,
X = param_to_array(model.X)
# first, plot the output variance as a function of the latent space
Xtest, xx, yy, xmin, xmax = util.plot.x_frame2D(X[:, [input_1, input_2]], resolution=resolution)
Xtest, xx, yy, xmin, xmax = x_frame2D(X[:, [input_1, input_2]], resolution=resolution)
Xtest_full = np.zeros((Xtest.shape[0], model.X.shape[1]))
def plot_function(x):
@ -87,7 +87,7 @@ def plot_latent(model, labels=None, which_indices=None,
else:
x = X[index, input_1]
y = X[index, input_2]
ax.scatter(x, y, marker=m, s=s, color=util.plot.Tango.nextMedium(), label=this_label)
ax.scatter(x, y, marker=m, s=s, color=Tango.nextMedium(), label=this_label)
ax.set_xlabel('latent dimension %i' % input_1)
ax.set_ylabel('latent dimension %i' % input_2)
@ -120,7 +120,7 @@ def plot_magnification(model, labels=None, which_indices=None,
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
util.plot.Tango.reset()
Tango.reset()
if labels is None:
labels = np.ones(model.num_data)
@ -128,7 +128,7 @@ def plot_magnification(model, labels=None, which_indices=None,
input_1, input_2 = most_significant_input_dimensions(model, which_indices)
# first, plot the output variance as a function of the latent space
Xtest, xx, yy, xmin, xmax = util.plot.x_frame2D(model.X[:, [input_1, input_2]], resolution=resolution)
Xtest, xx, yy, xmin, xmax = x_frame2D(model.X[:, [input_1, input_2]], resolution=resolution)
Xtest_full = np.zeros((Xtest.shape[0], model.X.shape[1]))
def plot_function(x):
@ -165,7 +165,7 @@ def plot_magnification(model, labels=None, which_indices=None,
else:
x = model.X[index, input_1]
y = model.X[index, input_2]
ax.scatter(x, y, marker=m, s=s, color=util.plot.Tango.nextMedium(), label=this_label)
ax.scatter(x, y, marker=m, s=s, color=Tango.nextMedium(), label=this_label)
ax.set_xlabel('latent dimension %i' % input_1)
ax.set_ylabel('latent dimension %i' % input_2)
@ -205,7 +205,7 @@ def plot_steepest_gradient_map(model, fignum=None, ax=None, which_indices=None,
return dmu_dX[indices, argmax], np.array(labels)[argmax]
if ax is None:
fig = pyplot.figure(num=fignum)
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
if data_labels is None:
@ -241,7 +241,7 @@ def plot_steepest_gradient_map(model, fignum=None, ax=None, which_indices=None,
ax.legend()
ax.figure.tight_layout()
if updates:
pyplot.show()
pb.show()
clear = raw_input('Enter to continue')
if clear.lower() in 'yes' or clear == '':
controller.deactivate()

View file

@ -1,13 +1,12 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import sys
import numpy as np
import pylab as pb
import Tango
from matplotlib.textpath import TextPath
from matplotlib.transforms import offset_copy
from ...kern.parts.linear import Linear
from ...kern import Linear
def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):
@ -29,22 +28,23 @@ def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):
xticklabels = []
bars = []
x0 = 0
for p in kernel._parameters_:
c = Tango.nextMedium()
if hasattr(p, 'ARD') and p.ARD:
if title is None:
ax.set_title('ARD parameters, %s kernel' % p.name)
else:
ax.set_title(title)
if isinstance(p, Linear):
ard_params = p.variances
else:
ard_params = 1. / p.lengthscale
x = np.arange(x0, x0 + len(ard_params))
bars.append(ax.bar(x, ard_params, align='center', color=c, edgecolor='k', linewidth=1.2, label=p.name.replace("_"," ")))
xticklabels.extend([r"$\mathrm{{{name}}}\ {x}$".format(name=p.name, x=i) for i in np.arange(len(ard_params))])
x0 += len(ard_params)
#for p in kernel._parameters_:
p = kernel
c = Tango.nextMedium()
if hasattr(p, 'ARD') and p.ARD:
if title is None:
ax.set_title('ARD parameters, %s kernel' % p.name)
else:
ax.set_title(title)
if isinstance(p, Linear):
ard_params = p.variances
else:
ard_params = 1. / p.lengthscale
x = np.arange(x0, x0 + len(ard_params))
from ...util.misc import param_to_array
bars.append(ax.bar(x, param_to_array(ard_params), align='center', color=c, edgecolor='k', linewidth=1.2, label=p.name.replace("_"," ")))
xticklabels.extend([r"$\mathrm{{{name}}}\ {x}$".format(name=p.name, x=i) for i in np.arange(len(ard_params))])
x0 += len(ard_params)
x = np.arange(x0)
transOffset = offset_copy(ax.transData, fig=fig,
x=0., y= -2., units='points')

View file

@ -9,7 +9,7 @@ from ...util.misc import param_to_array
def plot_fit(model, plot_limits=None, which_data_rows='all',
which_data_ycols='all', which_parts='all', fixed_inputs=[],
which_data_ycols='all', fixed_inputs=[],
levels=20, samples=0, fignum=None, ax=None, resolution=None,
plot_raw=False,
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
@ -20,7 +20,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
- In higher dimensions, use fixed_inputs to plot the GP with some of the inputs fixed.
Can plot only part of the data and part of the posterior functions
using which_data_rowsm which_data_ycols and which_parts
using which_data_rowsm which_data_ycols.
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:type plot_limits: np.array
@ -28,8 +28,6 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
:type which_data_rows: 'all' or a slice object to slice model.X, model.Y
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
:type which_data_rows: 'all' or a list of integers
:param which_parts: which of the kernel functions to plot (additively)
:type which_parts: 'all', or list of bools
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
@ -58,7 +56,10 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
X, Y = param_to_array(model.X, model.Y)
if model.has_uncertain_inputs(): X_variance = model.X_variance
#work out what the inputs are for plotting (1D or 2D)
fixed_dims = np.array([i for i,v in fixed_inputs])
free_dims = np.setdiff1d(np.arange(model.input_dim),fixed_dims)
@ -68,7 +69,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#define the frame on which to plot
resolution = resolution or 200
Xnew, xmin, xmax = x_frame1D(model.X[:,free_dims], plot_limits=plot_limits)
Xnew, xmin, xmax = x_frame1D(X[:,free_dims], plot_limits=plot_limits)
Xgrid = np.empty((Xnew.shape[0],model.input_dim))
Xgrid[:,free_dims] = Xnew
for i,v in fixed_inputs:
@ -76,29 +77,29 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#make a prediction on the frame and plot it
if plot_raw:
m, v = model._raw_predict(Xgrid, which_parts=which_parts)
m, v = model._raw_predict(Xgrid)
lower = m - 2*np.sqrt(v)
upper = m + 2*np.sqrt(v)
Y = model.Y
Y = Y
else:
m, v, lower, upper = model.predict(Xgrid, which_parts=which_parts)
Y = model.Y
m, v, lower, upper = model.predict(Xgrid)
Y = Y
for d in which_data_ycols:
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol)
ax.plot(model.X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', mew=1.5)
ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', mew=1.5)
#optionally plot some samples
if samples: #NOTE not tested with fixed_inputs
Ysim = model.posterior_samples(Xgrid, samples, which_parts=which_parts)
Ysim = model.posterior_samples(Xgrid, samples)
for yi in Ysim.T:
ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
#ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
#add error bars for uncertain (if input uncertainty is being modelled)
if hasattr(model,"has_uncertain_inputs"):
ax.errorbar(model.X[which_data, free_dims], model.likelihood.data[which_data, 0],
xerr=2 * np.sqrt(model.X_variance[which_data, free_dims]),
if hasattr(model,"has_uncertain_inputs") and model.has_uncertain_inputs():
ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(),
xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()),
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
@ -122,7 +123,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#define the frame for plotting on
resolution = resolution or 50
Xnew, _, _, xmin, xmax = x_frame2D(model.X[:,free_dims], plot_limits, resolution)
Xnew, _, _, xmin, xmax = x_frame2D(X[:,free_dims], plot_limits, resolution)
Xgrid = np.empty((Xnew.shape[0],model.input_dim))
Xgrid[:,free_dims] = Xnew
for i,v in fixed_inputs:
@ -131,15 +132,15 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#predict on the frame and plot
if plot_raw:
m, _ = model._raw_predict(Xgrid, which_parts=which_parts)
Y = model.Y
m, _ = model._raw_predict(Xgrid)
Y = Y
else:
m, _, _, _ = model.predict(Xgrid, which_parts=which_parts)
m, _, _, _ = model.predict(Xgrid)
Y = model.data
for d in which_data_ycols:
m_d = m[:,d].reshape(resolution, resolution).T
ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
ax.scatter(model.X[which_data_rows, free_dims[0]], model.X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
#set the limits of the plot to some sensible values
ax.set_xlim(xmin[0], xmax[0])

View file

@ -24,6 +24,13 @@ class Test(unittest.TestCase):
self.param_index.remove(one, [1])
self.assertListEqual(self.param_index[one].tolist(), [3])
def test_shift_left(self):
self.param_index.shift_left(1, 2)
self.assertListEqual(self.param_index[three].tolist(), [2,5])
self.assertListEqual(self.param_index[two].tolist(), [0,3])
self.assertListEqual(self.param_index[one].tolist(), [1])
def test_index_view(self):
#=======================================================================
# 0 1 2 3 4 5 6 7 8 9

View file

@ -10,8 +10,8 @@ import numpy as np
class Test(unittest.TestCase):
def setUp(self):
self.rbf = GPy.kern.rbf(1)
self.white = GPy.kern.white(1)
self.rbf = GPy.kern.RBF(1)
self.white = GPy.kern.White(1)
from GPy.core.parameterization import Param
from GPy.core.parameterization.transformations import Logistic
self.param = Param('param', np.random.rand(25,2), Logistic(0, 1))
@ -39,14 +39,13 @@ class Test(unittest.TestCase):
def test_remove_parameter(self):
from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__
from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__, Logexp
self.white.fix()
self.test1.remove_parameter(self.white)
self.assertIs(self.test1._fixes_,None)
self.assertListEqual(self.white._fixes_.tolist(), [FIXED])
self.assertIs(self.white.constraints,self.white.white.constraints._param_index_ops)
self.assertEquals(self.white.white.constraints._offset, 0)
self.assertEquals(self.white.constraints._offset, 0)
self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops)
@ -57,18 +56,19 @@ class Test(unittest.TestCase):
self.assertListEqual(self.test1.constraints[__fixed__].tolist(), [0])
self.assertIs(self.white._fixes_,None)
self.assertListEqual(self.test1._fixes_.tolist(),[FIXED] + [UNFIXED] * 52)
self.test1.remove_parameter(self.white)
self.assertIs(self.test1._fixes_,None)
self.assertListEqual(self.white._fixes_.tolist(), [FIXED])
self.assertIs(self.white.constraints,self.white.white.constraints._param_index_ops)
self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops)
self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops)
self.assertListEqual(self.test1.constraints[Logexp()].tolist(), [0,1])
def test_add_parameter_already_in_hirarchy(self):
self.test1.add_parameter(self.white._parameters_[0])
def test_default_constraints(self):
self.assertIs(self.rbf.rbf.variance.constraints._param_index_ops, self.rbf.constraints._param_index_ops)
self.assertIs(self.rbf.variance.constraints._param_index_ops, self.rbf.constraints._param_index_ops)
self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
self.assertListEqual(self.rbf.constraints.indices()[0].tolist(), range(2))
from GPy.core.parameterization.transformations import Logexp

View file

@ -12,6 +12,7 @@ import decorators
import classification
import subarray_and_sorting
import caching
import diag
try:
import sympy

View file

@ -1,46 +1,88 @@
from ..core.parameterization.array_core import ObservableArray, ParamList
from ..core.parameterization.parameter_core import Observable
class Cacher(object):
def __init__(self, operation, limit=5):
def __init__(self, operation, limit=5, reset_on_first=False):
self.limit = int(limit)
self._reset_on_first = reset_on_first
self.operation=operation
self.cached_inputs = ParamList([])
self.cached_inputs = []
self.cached_outputs = []
self.inputs_changed = []
def __call__(self, X):
assert isinstance(X, ObservableArray)
if X in self.cached_inputs:
i = self.cached_inputs.index(X)
def __call__(self, *args):
if self._reset_on_first:
assert isinstance(args[0], Observable)
args[0].add_observer(self, self.reset)
cached_args = args
else:
cached_args = args[1:]
if not all([isinstance(arg, Observable) for arg in cached_args]):
return self.operation(*args)
if cached_args in self.cached_inputs:
i = self.cached_inputs.index(cached_args)
if self.inputs_changed[i]:
self.cached_outputs[i] = self.operation(X)
self.cached_outputs[i] = self.operation(*args)
self.inputs_changed[i] = False
return self.cached_outputs[i]
else:
if len(self.cached_inputs) == self.limit:
X_ = self.cached_inputs.pop(0)
X_.remove_observer(self)
args_ = self.cached_inputs.pop(0)
[a.remove_observer(self, self.on_cache_changed) for a in args_]
self.inputs_changed.pop(0)
self.cached_outputs.pop(0)
self.cached_inputs.append(X)
self.cached_outputs.append(self.operation(X))
self.cached_inputs.append(cached_args)
self.cached_outputs.append(self.operation(*args))
self.inputs_changed.append(False)
X.add_observer(self, self.on_cache_changed)
[a.add_observer(self, self.on_cache_changed) for a in args]
return self.cached_outputs[-1]
def on_cache_changed(self, X):
#print id(X)
Xbase = X
while Xbase is not None:
try:
i = self.cached_inputs.index(X)
break
except ValueError:
Xbase = X.base
continue
self.inputs_changed[i] = True
def on_cache_changed(self, arg):
self.inputs_changed = [any([a is arg for a in args]) or old_ic for args, old_ic in zip(self.cached_inputs, self.inputs_changed)]
def reset(self, obj):
[[a.remove_observer(self, self.reset) for a in args] for args in self.cached_inputs]
self.cached_inputs = []
self.cached_outputs = []
self.inputs_changed = []
def cache_this(limit=5, reset_on_self=False):
def limited_cache(f):
c = Cacher(f, limit, reset_on_first=reset_on_self)
def f_wrap(*args):
return c(*args)
f_wrap._cacher = c
return f_wrap
return limited_cache
#Xbase = X
#while Xbase is not None:
#try:
#i = self.cached_inputs.index(X)
#break
#except ValueError:
#Xbase = X.base
#continue
#self.inputs_changed[i] = True

View file

@ -513,8 +513,8 @@ def toy_rbf_1d(seed=default_seed, num_samples=500):
num_in = 1
X = np.random.uniform(low= -1.0, high=1.0, size=(num_samples, num_in))
X.sort(axis=0)
rbf = GPy.kern.rbf(num_in, variance=1., lengthscale=np.array((0.25,)))
white = GPy.kern.white(num_in, variance=1e-2)
rbf = GPy.kern.RBF(num_in, variance=1., lengthscale=np.array((0.25,)))
white = GPy.kern.White(num_in, variance=1e-2)
kernel = rbf + white
K = kernel.K(X)
y = np.reshape(np.random.multivariate_normal(np.zeros(num_samples), K), (num_samples, 1))

View file

@ -11,14 +11,14 @@ import numpy as np
def view(A, offset=0):
"""
Get a view on the diagonal elements of a 2D array.
This is actually a view (!) on the diagonal of the array, so you can
This is actually a view (!) on the diagonal of the array, so you can
in-place adjust the view.
:param :class:`ndarray` A: 2 dimensional numpy array
:param int offset: view offset to give back (negative entries allowed)
:rtype: :class:`ndarray` view of diag(A)
>>> import numpy as np
>>> X = np.arange(9).reshape(3,3)
>>> view(X)
@ -36,7 +36,7 @@ def view(A, offset=0):
"""
from numpy.lib.stride_tricks import as_strided
assert A.ndim == 2, "only implemented for 2 dimensions"
assert A.shape[0] == A.shape[1], "attempting to get the view of non-square matrix?!"
assert A.shape[0] == A.shape[1], "attempting to get the view of non-square matrix?!"
if offset > 0:
return as_strided(A[0, offset:], shape=(A.shape[0] - offset, ), strides=((A.shape[0]+1)*A.itemsize, ))
elif offset < 0:
@ -44,6 +44,12 @@ def view(A, offset=0):
else:
return as_strided(A, shape=(A.shape[0], ), strides=((A.shape[0]+1)*A.itemsize, ))
def offdiag_view(A, offset=0):
from numpy.lib.stride_tricks import as_strided
assert A.ndim == 2, "only implemented for 2 dimensions"
Af = as_strided(A, shape=(A.size,), strides=(A.itemsize,))
return as_strided(Af[(1+offset):], shape=(A.shape[0]-1, A.shape[1]), strides=(A.strides[0] + A.itemsize, A.strides[1]))
def _diag_ufunc(A,b,offset,func):
dA = view(A, offset); func(dA,b,dA)
return A
@ -51,11 +57,11 @@ def _diag_ufunc(A,b,offset,func):
def times(A, b, offset=0):
"""
Times the view of A with b in place (!).
Returns modified A
Returns modified A
Broadcasting is allowed, thus b can be scalar.
if offset is not zero, make sure b is of right shape!
:param ndarray A: 2 dimensional array
:param ndarray-like b: either one dimensional or scalar
:param int offset: same as in view.
@ -67,11 +73,11 @@ multiply = times
def divide(A, b, offset=0):
"""
Divide the view of A by b in place (!).
Returns modified A
Returns modified A
Broadcasting is allowed, thus b can be scalar.
if offset is not zero, make sure b is of right shape!
:param ndarray A: 2 dimensional array
:param ndarray-like b: either one dimensional or scalar
:param int offset: same as in view.
@ -84,9 +90,9 @@ def add(A, b, offset=0):
Add b to the view of A in place (!).
Returns modified A.
Broadcasting is allowed, thus b can be scalar.
if offset is not zero, make sure b is of right shape!
:param ndarray A: 2 dimensional array
:param ndarray-like b: either one dimensional or scalar
:param int offset: same as in view.
@ -99,16 +105,16 @@ def subtract(A, b, offset=0):
Subtract b from the view of A in place (!).
Returns modified A.
Broadcasting is allowed, thus b can be scalar.
if offset is not zero, make sure b is of right shape!
:param ndarray A: 2 dimensional array
:param ndarray-like b: either one dimensional or scalar
:param int offset: same as in view.
:rtype: view of A, which is adjusted inplace
"""
return _diag_ufunc(A, b, offset, np.subtract)
if __name__ == '__main__':
import doctest
doctest.testmod()
doctest.testmod()

View file

@ -3,8 +3,6 @@
import numpy as np
import scipy as sp
import pylab as plt
class WarpingFunction(object):
"""
@ -39,6 +37,7 @@ class WarpingFunction(object):
def plot(self, psi, xmin, xmax):
y = np.arange(xmin, xmax, 0.01)
f_y = self.f(y, psi)
from matplotlib import pyplot as plt
plt.figure()
plt.plot(y, f_y)
plt.xlabel('y')