mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-04-28 22:36:24 +02:00
Merge branch 'params' of github.com:SheffieldML/GPy into params
This commit is contained in:
commit
632a702532
78 changed files with 2892 additions and 3760 deletions
|
|
@ -2,7 +2,9 @@
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
from model import *
|
from model import *
|
||||||
from parameterization.parameterized import *
|
from parameterization.parameterized import adjust_name_for_printing, Parameterizable
|
||||||
|
from parameterization.param import Param, ParamConcatenation
|
||||||
|
|
||||||
from gp import GP
|
from gp import GP
|
||||||
from sparse_gp import SparseGP
|
from sparse_gp import SparseGP
|
||||||
from svigp import SVIGP
|
from svigp import SVIGP
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,10 @@ class GP(Model):
|
||||||
super(GP, self).__init__(name)
|
super(GP, self).__init__(name)
|
||||||
|
|
||||||
assert X.ndim == 2
|
assert X.ndim == 2
|
||||||
self.X = ObservableArray(X)
|
if isinstance(X, ObservableArray):
|
||||||
|
self.X = self.X = X
|
||||||
|
else: self.X = ObservableArray(X)
|
||||||
|
|
||||||
self.num_data, self.input_dim = self.X.shape
|
self.num_data, self.input_dim = self.X.shape
|
||||||
|
|
||||||
assert Y.ndim == 2
|
assert Y.ndim == 2
|
||||||
|
|
@ -43,7 +46,8 @@ class GP(Model):
|
||||||
else:
|
else:
|
||||||
self.Y_metadata = None
|
self.Y_metadata = None
|
||||||
|
|
||||||
assert isinstance(kernel, kern.kern)
|
assert isinstance(kernel, kern.Kern)
|
||||||
|
assert self.input_dim == kernel.input_dim
|
||||||
self.kern = kernel
|
self.kern = kernel
|
||||||
|
|
||||||
assert isinstance(likelihood, likelihoods.Likelihood)
|
assert isinstance(likelihood, likelihoods.Likelihood)
|
||||||
|
|
@ -70,7 +74,7 @@ class GP(Model):
|
||||||
def log_likelihood(self):
|
def log_likelihood(self):
|
||||||
return self._log_marginal_likelihood
|
return self._log_marginal_likelihood
|
||||||
|
|
||||||
def _raw_predict(self, _Xnew, which_parts='all', full_cov=False, stop=False):
|
def _raw_predict(self, _Xnew, full_cov=False):
|
||||||
"""
|
"""
|
||||||
Internal helper function for making predictions, does not account
|
Internal helper function for making predictions, does not account
|
||||||
for normalization or likelihood
|
for normalization or likelihood
|
||||||
|
|
@ -80,29 +84,27 @@ class GP(Model):
|
||||||
diagonal of the covariance is returned.
|
diagonal of the covariance is returned.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Kx = self.kern.K(_Xnew, self.X, which_parts=which_parts).T
|
Kx = self.kern.K(_Xnew, self.X).T
|
||||||
#LiKx, _ = dtrtrs(self.posterior.woodbury_chol, np.asfortranarray(Kx), lower=1)
|
#LiKx, _ = dtrtrs(self.posterior.woodbury_chol, np.asfortranarray(Kx), lower=1)
|
||||||
WiKx = np.dot(self.posterior.woodbury_inv, Kx)
|
WiKx = np.dot(self.posterior.woodbury_inv, Kx)
|
||||||
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
|
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
|
||||||
if full_cov:
|
if full_cov:
|
||||||
Kxx = self.kern.K(_Xnew, which_parts=which_parts)
|
Kxx = self.kern.K(_Xnew)
|
||||||
#var = Kxx - tdot(LiKx.T)
|
#var = Kxx - tdot(LiKx.T)
|
||||||
var = np.dot(Kx.T, WiKx)
|
var = np.dot(Kx.T, WiKx)
|
||||||
else:
|
else:
|
||||||
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
|
Kxx = self.kern.Kdiag(_Xnew)
|
||||||
#var = Kxx - np.sum(LiKx*LiKx, 0)
|
#var = Kxx - np.sum(LiKx*LiKx, 0)
|
||||||
var = Kxx - np.sum(WiKx*Kx, 0)
|
var = Kxx - np.sum(WiKx*Kx, 0)
|
||||||
var = var.reshape(-1, 1)
|
var = var.reshape(-1, 1)
|
||||||
return mu, var
|
return mu, var
|
||||||
|
|
||||||
def predict(self, Xnew, which_parts='all', full_cov=False, **likelihood_args):
|
def predict(self, Xnew, full_cov=False, **likelihood_args):
|
||||||
"""
|
"""
|
||||||
Predict the function(s) at the new point(s) Xnew.
|
Predict the function(s) at the new point(s) Xnew.
|
||||||
|
|
||||||
:param Xnew: The points at which to make a prediction
|
:param Xnew: The points at which to make a prediction
|
||||||
:type Xnew: np.ndarray, Nnew x self.input_dim
|
:type Xnew: np.ndarray, Nnew x self.input_dim
|
||||||
:param which_parts: specifies which outputs kernel(s) to use in prediction
|
|
||||||
:type which_parts: ('all', list of bools)
|
|
||||||
:param full_cov: whether to return the full covariance matrix, or just
|
:param full_cov: whether to return the full covariance matrix, or just
|
||||||
the diagonal
|
the diagonal
|
||||||
:type full_cov: bool
|
:type full_cov: bool
|
||||||
|
|
@ -118,13 +120,13 @@ class GP(Model):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
#predict the latent function values
|
#predict the latent function values
|
||||||
mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts)
|
mu, var = self._raw_predict(Xnew, full_cov=full_cov)
|
||||||
|
|
||||||
# now push through likelihood
|
# now push through likelihood
|
||||||
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, **likelihood_args)
|
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, **likelihood_args)
|
||||||
return mean, var, _025pm, _975pm
|
return mean, var, _025pm, _975pm
|
||||||
|
|
||||||
def posterior_samples_f(self,X,size=10,which_parts='all',full_cov=True):
|
def posterior_samples_f(self,X,size=10, full_cov=True):
|
||||||
"""
|
"""
|
||||||
Samples the posterior GP at the points X.
|
Samples the posterior GP at the points X.
|
||||||
|
|
||||||
|
|
@ -132,13 +134,11 @@ class GP(Model):
|
||||||
:type X: np.ndarray, Nnew x self.input_dim.
|
:type X: np.ndarray, Nnew x self.input_dim.
|
||||||
:param size: the number of a posteriori samples.
|
:param size: the number of a posteriori samples.
|
||||||
:type size: int.
|
:type size: int.
|
||||||
:param which_parts: which of the kernel functions to use (additively).
|
|
||||||
:type which_parts: 'all', or list of bools.
|
|
||||||
:param full_cov: whether to return the full covariance matrix, or just the diagonal.
|
:param full_cov: whether to return the full covariance matrix, or just the diagonal.
|
||||||
:type full_cov: bool.
|
:type full_cov: bool.
|
||||||
:returns: Ysim: set of simulations, a Numpy array (N x samples).
|
:returns: Ysim: set of simulations, a Numpy array (N x samples).
|
||||||
"""
|
"""
|
||||||
m, v = self._raw_predict(X, which_parts=which_parts, full_cov=full_cov)
|
m, v = self._raw_predict(X, full_cov=full_cov)
|
||||||
v = v.reshape(m.size,-1) if len(v.shape)==3 else v
|
v = v.reshape(m.size,-1) if len(v.shape)==3 else v
|
||||||
if not full_cov:
|
if not full_cov:
|
||||||
Ysim = np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T
|
Ysim = np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T
|
||||||
|
|
@ -147,7 +147,7 @@ class GP(Model):
|
||||||
|
|
||||||
return Ysim
|
return Ysim
|
||||||
|
|
||||||
def posterior_samples(self,X,size=10,which_parts='all',full_cov=True,noise_model=None):
|
def posterior_samples(self,X,size=10, full_cov=True,noise_model=None):
|
||||||
"""
|
"""
|
||||||
Samples the posterior GP at the points X.
|
Samples the posterior GP at the points X.
|
||||||
|
|
||||||
|
|
@ -155,15 +155,13 @@ class GP(Model):
|
||||||
:type X: np.ndarray, Nnew x self.input_dim.
|
:type X: np.ndarray, Nnew x self.input_dim.
|
||||||
:param size: the number of a posteriori samples.
|
:param size: the number of a posteriori samples.
|
||||||
:type size: int.
|
:type size: int.
|
||||||
:param which_parts: which of the kernel functions to use (additively).
|
|
||||||
:type which_parts: 'all', or list of bools.
|
|
||||||
:param full_cov: whether to return the full covariance matrix, or just the diagonal.
|
:param full_cov: whether to return the full covariance matrix, or just the diagonal.
|
||||||
:type full_cov: bool.
|
:type full_cov: bool.
|
||||||
:param noise_model: for mixed noise likelihood, the noise model to use in the samples.
|
:param noise_model: for mixed noise likelihood, the noise model to use in the samples.
|
||||||
:type noise_model: integer.
|
:type noise_model: integer.
|
||||||
:returns: Ysim: set of simulations, a Numpy array (N x samples).
|
:returns: Ysim: set of simulations, a Numpy array (N x samples).
|
||||||
"""
|
"""
|
||||||
Ysim = self.posterior_samples_f(X, size, which_parts=which_parts, full_cov=full_cov)
|
Ysim = self.posterior_samples_f(X, size, full_cov=full_cov)
|
||||||
if isinstance(self.likelihood, Gaussian):
|
if isinstance(self.likelihood, Gaussian):
|
||||||
noise_std = np.sqrt(self.likelihood._get_params())
|
noise_std = np.sqrt(self.likelihood._get_params())
|
||||||
Ysim += np.random.normal(0,noise_std,Ysim.shape)
|
Ysim += np.random.normal(0,noise_std,Ysim.shape)
|
||||||
|
|
|
||||||
|
|
@ -4,12 +4,8 @@
|
||||||
|
|
||||||
from .. import likelihoods
|
from .. import likelihoods
|
||||||
from ..inference import optimization
|
from ..inference import optimization
|
||||||
from ..util.linalg import jitchol
|
|
||||||
from ..util.misc import opt_wrapper
|
from ..util.misc import opt_wrapper
|
||||||
from parameterization import Parameterized
|
from parameterization import Parameterized
|
||||||
from parameterization.parameterized import UNFIXED
|
|
||||||
from parameterization.domains import _POSITIVE, _REAL
|
|
||||||
from parameterization.index_operations import ParameterIndexOperations
|
|
||||||
import multiprocessing as mp
|
import multiprocessing as mp
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from numpy.linalg.linalg import LinAlgError
|
from numpy.linalg.linalg import LinAlgError
|
||||||
|
|
@ -240,7 +236,7 @@ class Model(Parameterized):
|
||||||
constrained positive.
|
constrained positive.
|
||||||
"""
|
"""
|
||||||
raise DeprecationWarning, 'parameters now have default constraints'
|
raise DeprecationWarning, 'parameters now have default constraints'
|
||||||
positive_strings = ['variance', 'lengthscale', 'precision', 'kappa', 'sensitivity']
|
#positive_strings = ['variance', 'lengthscale', 'precision', 'kappa', 'sensitivity']
|
||||||
# param_names = self._get_param_names()
|
# param_names = self._get_param_names()
|
||||||
|
|
||||||
# for s in positive_strings:
|
# for s in positive_strings:
|
||||||
|
|
@ -489,20 +485,17 @@ class Model(Parameterized):
|
||||||
if not hasattr(self, 'kern'):
|
if not hasattr(self, 'kern'):
|
||||||
raise ValueError, "this model has no kernel"
|
raise ValueError, "this model has no kernel"
|
||||||
|
|
||||||
k = [p for p in self.kern._parameters_ if hasattr(p, "ARD") and p.ARD]
|
k = self.kern#[p for p in self.kern._parameters_ if hasattr(p, "ARD") and p.ARD]
|
||||||
if (not len(k) == 1):
|
from ..kern import RBF, Linear#, RBFInv
|
||||||
raise ValueError, "cannot determine sensitivity for this kernel"
|
|
||||||
k = k[0]
|
|
||||||
from ..kern.parts.rbf import RBF
|
|
||||||
from ..kern.parts.rbf_inv import RBFInv
|
|
||||||
from ..kern.parts.linear import Linear
|
|
||||||
if isinstance(k, RBF):
|
if isinstance(k, RBF):
|
||||||
return 1. / k.lengthscale
|
return 1. / k.lengthscale
|
||||||
elif isinstance(k, RBFInv):
|
#elif isinstance(k, RBFInv):
|
||||||
return k.inv_lengthscale
|
# return k.inv_lengthscale
|
||||||
elif isinstance(k, Linear):
|
elif isinstance(k, Linear):
|
||||||
return k.variances
|
return k.variances
|
||||||
|
else:
|
||||||
|
raise ValueError, "cannot determine sensitivity for this kernel"
|
||||||
|
|
||||||
def pseudo_EM(self, stop_crit=.1, **kwargs):
|
def pseudo_EM(self, stop_crit=.1, **kwargs):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -28,14 +28,20 @@ class ObservableArray(np.ndarray, Observable):
|
||||||
"""
|
"""
|
||||||
__array_priority__ = -1 # Never give back ObservableArray
|
__array_priority__ = -1 # Never give back ObservableArray
|
||||||
def __new__(cls, input_array):
|
def __new__(cls, input_array):
|
||||||
|
if not isinstance(input_array, ObservableArray):
|
||||||
obj = np.atleast_1d(input_array).view(cls)
|
obj = np.atleast_1d(input_array).view(cls)
|
||||||
|
else: obj = input_array
|
||||||
cls.__name__ = "ObservableArray\n "
|
cls.__name__ = "ObservableArray\n "
|
||||||
obj._observers_ = {}
|
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
|
def __init__(self, *a, **kw):
|
||||||
|
super(ObservableArray, self).__init__(*a, **kw)
|
||||||
|
|
||||||
def __array_finalize__(self, obj):
|
def __array_finalize__(self, obj):
|
||||||
# see InfoArray.__array_finalize__ for comments
|
# see InfoArray.__array_finalize__ for comments
|
||||||
if obj is None: return
|
if obj is None: return
|
||||||
self._observers_ = getattr(obj, '_observers_', None)
|
self._observer_callables_ = getattr(obj, '_observer_callables_', None)
|
||||||
|
|
||||||
def __array_wrap__(self, out_arr, context=None):
|
def __array_wrap__(self, out_arr, context=None):
|
||||||
return out_arr.view(np.ndarray)
|
return out_arr.view(np.ndarray)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -83,12 +83,22 @@ class ParameterIndexOperations(object):
|
||||||
def iterproperties(self):
|
def iterproperties(self):
|
||||||
return self._properties.iterkeys()
|
return self._properties.iterkeys()
|
||||||
|
|
||||||
def shift(self, start, size):
|
def shift_right(self, start, size):
|
||||||
for ind in self.iterindices():
|
for ind in self.iterindices():
|
||||||
toshift = ind>=start
|
toshift = ind>=start
|
||||||
if toshift.size > 0:
|
|
||||||
ind[toshift] += size
|
ind[toshift] += size
|
||||||
|
|
||||||
|
def shift_left(self, start, size):
|
||||||
|
for v, ind in self.items():
|
||||||
|
todelete = (ind>=start) * (ind<start+size)
|
||||||
|
if todelete.size != 0:
|
||||||
|
ind = ind[~todelete]
|
||||||
|
toshift = ind>=start
|
||||||
|
if toshift.size != 0:
|
||||||
|
ind[toshift] -= size
|
||||||
|
if ind.size != 0: self._properties[v] = ind
|
||||||
|
else: del self._properties[v]
|
||||||
|
|
||||||
def clear(self):
|
def clear(self):
|
||||||
self._properties.clear()
|
self._properties.clear()
|
||||||
|
|
||||||
|
|
@ -183,7 +193,7 @@ class ParameterIndexOperationsView(object):
|
||||||
yield i
|
yield i
|
||||||
|
|
||||||
|
|
||||||
def shift(self, start, size):
|
def shift_right(self, start, size):
|
||||||
raise NotImplementedError, 'Shifting only supported in original ParamIndexOperations'
|
raise NotImplementedError, 'Shifting only supported in original ParamIndexOperations'
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
import numpy
|
import numpy
|
||||||
from parameter_core import Constrainable, Gradcheckable, Indexable, Parameterizable, adjust_name_for_printing
|
from parameter_core import Constrainable, Gradcheckable, Indexable, Parentable, adjust_name_for_printing
|
||||||
from array_core import ObservableArray, ParamList
|
from array_core import ObservableArray, ParamList
|
||||||
|
|
||||||
###### printing
|
###### printing
|
||||||
|
|
@ -15,7 +15,7 @@ __precision__ = numpy.get_printoptions()['precision'] # numpy printing precision
|
||||||
__print_threshold__ = 5
|
__print_threshold__ = 5
|
||||||
######
|
######
|
||||||
|
|
||||||
class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameterizable):
|
class Param(Constrainable, ObservableArray, Gradcheckable, Indexable):
|
||||||
"""
|
"""
|
||||||
Parameter object for GPy models.
|
Parameter object for GPy models.
|
||||||
|
|
||||||
|
|
@ -54,11 +54,11 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
|
||||||
obj._tied_to_me_ = SetDict()
|
obj._tied_to_me_ = SetDict()
|
||||||
obj._tied_to_ = []
|
obj._tied_to_ = []
|
||||||
obj._original_ = True
|
obj._original_ = True
|
||||||
obj.gradient = None
|
obj._gradient_ = None
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
def __init__(self, name, input_array, default_constraint=None):
|
def __init__(self, name, input_array, default_constraint=None, *a, **kw):
|
||||||
super(Param, self).__init__(name=name, default_constraint=default_constraint)
|
super(Param, self).__init__(name=name, default_constraint=default_constraint, *a, **kw)
|
||||||
|
|
||||||
def __array_finalize__(self, obj):
|
def __array_finalize__(self, obj):
|
||||||
# see InfoArray.__array_finalize__ for comments
|
# see InfoArray.__array_finalize__ for comments
|
||||||
|
|
@ -76,10 +76,20 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
|
||||||
self._updated_ = getattr(obj, '_updated_', None)
|
self._updated_ = getattr(obj, '_updated_', None)
|
||||||
self._original_ = getattr(obj, '_original_', None)
|
self._original_ = getattr(obj, '_original_', None)
|
||||||
self._name = getattr(obj, 'name', None)
|
self._name = getattr(obj, 'name', None)
|
||||||
self.gradient = getattr(obj, 'gradient', None)
|
self._gradient_ = getattr(obj, '_gradient_', None)
|
||||||
self.constraints = getattr(obj, 'constraints', None)
|
self.constraints = getattr(obj, 'constraints', None)
|
||||||
self.priors = getattr(obj, 'priors', None)
|
self.priors = getattr(obj, 'priors', None)
|
||||||
|
|
||||||
|
|
||||||
|
@property
|
||||||
|
def gradient(self):
|
||||||
|
if self._gradient_ is None:
|
||||||
|
self._gradient_ = numpy.zeros(self._realshape_)
|
||||||
|
return self._gradient_[self._current_slice_]
|
||||||
|
@gradient.setter
|
||||||
|
def gradient(self, val):
|
||||||
|
self.gradient[:] = val
|
||||||
|
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
# Pickling operations
|
# Pickling operations
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
|
|
@ -115,6 +125,13 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
|
||||||
self._direct_parent_ = state.pop()
|
self._direct_parent_ = state.pop()
|
||||||
self.name = state.pop()
|
self.name = state.pop()
|
||||||
|
|
||||||
|
def copy(self, *args):
|
||||||
|
constr = self.constraints.copy()
|
||||||
|
priors = self.priors.copy()
|
||||||
|
p = Param(self.name, self.view(numpy.ndarray).copy(), self._default_constraint_)
|
||||||
|
p.constraints = constr
|
||||||
|
p.priors = priors
|
||||||
|
return p
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
# get/set parameters
|
# get/set parameters
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
|
|
@ -127,7 +144,10 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
|
||||||
return self.flat
|
return self.flat
|
||||||
|
|
||||||
def _collect_gradient(self, target):
|
def _collect_gradient(self, target):
|
||||||
target[:] = self.gradient.flat
|
target += self.gradient.flat
|
||||||
|
|
||||||
|
def _set_gradient(self, g):
|
||||||
|
self.gradient = g.reshape(self._realshape_)
|
||||||
|
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
# Array operations -> done
|
# Array operations -> done
|
||||||
|
|
@ -214,7 +234,9 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
|
||||||
def _description_str(self):
|
def _description_str(self):
|
||||||
if self.size <= 1: return ["%f" % self]
|
if self.size <= 1: return ["%f" % self]
|
||||||
else: return [str(self.shape)]
|
else: return [str(self.shape)]
|
||||||
def parameter_names(self, add_name=False):
|
def parameter_names(self, add_self=False, adjust_for_printing=False):
|
||||||
|
if adjust_for_printing:
|
||||||
|
return [adjust_name_for_printing(self.name)]
|
||||||
return [self.name]
|
return [self.name]
|
||||||
@property
|
@property
|
||||||
def flattened_parameters(self):
|
def flattened_parameters(self):
|
||||||
|
|
@ -231,14 +253,9 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
|
||||||
@property
|
@property
|
||||||
def _ties_str(self):
|
def _ties_str(self):
|
||||||
return [t._short() for t in self._tied_to_] or ['']
|
return [t._short() for t in self._tied_to_] or ['']
|
||||||
@property
|
|
||||||
def name_hirarchical(self):
|
|
||||||
if self.has_parent():
|
|
||||||
return self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name)
|
|
||||||
return adjust_name_for_printing(self.name)
|
|
||||||
def __repr__(self, *args, **kwargs):
|
def __repr__(self, *args, **kwargs):
|
||||||
name = "\033[1m{x:s}\033[0;0m:\n".format(
|
name = "\033[1m{x:s}\033[0;0m:\n".format(
|
||||||
x=self.name_hirarchical)
|
x=self.hirarchy_name())
|
||||||
return name + super(Param, self).__repr__(*args, **kwargs)
|
return name + super(Param, self).__repr__(*args, **kwargs)
|
||||||
def _ties_for(self, rav_index):
|
def _ties_for(self, rav_index):
|
||||||
# size = sum(p.size for p in self._tied_to_)
|
# size = sum(p.size for p in self._tied_to_)
|
||||||
|
|
@ -272,12 +289,12 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
|
||||||
gen = map(lambda x: " ".join(map(str, x)), gen)
|
gen = map(lambda x: " ".join(map(str, x)), gen)
|
||||||
return reduce(lambda a, b:max(a, len(b)), gen, len(header))
|
return reduce(lambda a, b:max(a, len(b)), gen, len(header))
|
||||||
def _max_len_values(self):
|
def _max_len_values(self):
|
||||||
return reduce(lambda a, b:max(a, len("{x:=.{0}g}".format(__precision__, x=b))), self.flat, len(self.name_hirarchical))
|
return reduce(lambda a, b:max(a, len("{x:=.{0}g}".format(__precision__, x=b))), self.flat, len(self.hirarchy_name()))
|
||||||
def _max_len_index(self, ind):
|
def _max_len_index(self, ind):
|
||||||
return reduce(lambda a, b:max(a, len(str(b))), ind, len(__index_name__))
|
return reduce(lambda a, b:max(a, len(str(b))), ind, len(__index_name__))
|
||||||
def _short(self):
|
def _short(self):
|
||||||
# short string to print
|
# short string to print
|
||||||
name = self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name)
|
name = self.hirarchy_name()
|
||||||
if self._realsize_ < 2:
|
if self._realsize_ < 2:
|
||||||
return name
|
return name
|
||||||
ind = self._indices()
|
ind = self._indices()
|
||||||
|
|
@ -300,8 +317,8 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
|
||||||
if lp is None: lp = self._max_len_names(prirs, __tie_name__)
|
if lp is None: lp = self._max_len_names(prirs, __tie_name__)
|
||||||
sep = '-'
|
sep = '-'
|
||||||
header_format = " {i:{5}^{2}s} | \033[1m{x:{5}^{1}s}\033[0;0m | {c:{5}^{0}s} | {p:{5}^{4}s} | {t:{5}^{3}s}"
|
header_format = " {i:{5}^{2}s} | \033[1m{x:{5}^{1}s}\033[0;0m | {c:{5}^{0}s} | {p:{5}^{4}s} | {t:{5}^{3}s}"
|
||||||
if only_name: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.name_hirarchical, c=sep*lc, i=sep*li, t=sep*lt, p=sep*lp) # nice header for printing
|
if only_name: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hirarchy_name(), c=sep*lc, i=sep*li, t=sep*lt, p=sep*lp) # nice header for printing
|
||||||
else: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.name_hirarchical, c=__constraints_name__, i=__index_name__, t=__tie_name__, p=__priors_name__) # nice header for printing
|
else: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hirarchy_name(), c=__constraints_name__, i=__index_name__, t=__tie_name__, p=__priors_name__) # nice header for printing
|
||||||
if not ties: ties = itertools.cycle([''])
|
if not ties: ties = itertools.cycle([''])
|
||||||
return "\n".join([header] + [" {i!s:^{3}s} | {x: >{1}.{2}g} | {c:^{0}s} | {p:^{5}s} | {t:^{4}s} ".format(lc, lx, __precision__, li, lt, lp, x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in itertools.izip(indices, vals, constr_matrix, ties, prirs)]) # return all the constraints with right indices
|
return "\n".join([header] + [" {i!s:^{3}s} | {x: >{1}.{2}g} | {c:^{0}s} | {p:^{5}s} | {t:^{4}s} ".format(lc, lx, __precision__, li, lt, lp, x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in itertools.izip(indices, vals, constr_matrix, ties, prirs)]) # return all the constraints with right indices
|
||||||
# except: return super(Param, self).__str__()
|
# except: return super(Param, self).__str__()
|
||||||
|
|
|
||||||
|
|
@ -11,14 +11,19 @@ def adjust_name_for_printing(name):
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
class Observable(object):
|
class Observable(object):
|
||||||
_observers_ = {}
|
def __init__(self, *args, **kwargs):
|
||||||
|
from collections import defaultdict
|
||||||
|
self._observer_callables_ = defaultdict(list)
|
||||||
|
|
||||||
def add_observer(self, observer, callble):
|
def add_observer(self, observer, callble):
|
||||||
self._observers_[observer] = callble
|
self._observer_callables_[observer].append(callble)
|
||||||
#callble(self)
|
|
||||||
def remove_observer(self, observer):
|
def remove_observer(self, observer, callble):
|
||||||
del self._observers_[observer]
|
del self._observer_callables_[observer][callble]
|
||||||
|
|
||||||
def _notify_observers(self):
|
def _notify_observers(self):
|
||||||
[callble(self) for callble in self._observers_.itervalues()]
|
[[callble(self) for callble in callables]
|
||||||
|
for callables in self._observer_callables_.itervalues()]
|
||||||
|
|
||||||
class Pickleable(object):
|
class Pickleable(object):
|
||||||
def _getstate(self):
|
def _getstate(self):
|
||||||
|
|
@ -47,10 +52,8 @@ class Pickleable(object):
|
||||||
#===============================================================================
|
#===============================================================================
|
||||||
|
|
||||||
class Parentable(object):
|
class Parentable(object):
|
||||||
def __init__(self, direct_parent=None, parent_index=None):
|
_direct_parent_ = None
|
||||||
super(Parentable,self).__init__()
|
_parent_index_ = None
|
||||||
self._direct_parent_ = direct_parent
|
|
||||||
self._parent_index_ = parent_index
|
|
||||||
|
|
||||||
def has_parent(self):
|
def has_parent(self):
|
||||||
return self._direct_parent_ is not None
|
return self._direct_parent_ is not None
|
||||||
|
|
@ -68,10 +71,13 @@ class Parentable(object):
|
||||||
return self
|
return self
|
||||||
return self._direct_parent_._highest_parent_
|
return self._direct_parent_._highest_parent_
|
||||||
|
|
||||||
|
def _notify_parameters_changed(self):
|
||||||
|
if self.has_parent():
|
||||||
|
self._direct_parent_._notify_parameters_changed()
|
||||||
|
|
||||||
class Nameable(Parentable):
|
class Nameable(Parentable):
|
||||||
_name = None
|
def __init__(self, name, *a, **kw):
|
||||||
def __init__(self, name, direct_parent=None, parent_index=None):
|
super(Nameable, self).__init__(*a, **kw)
|
||||||
super(Nameable,self).__init__(direct_parent, parent_index)
|
|
||||||
self._name = name or self.__class__.__name__
|
self._name = name or self.__class__.__name__
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|
@ -80,58 +86,21 @@ class Nameable(Parentable):
|
||||||
@name.setter
|
@name.setter
|
||||||
def name(self, name):
|
def name(self, name):
|
||||||
from_name = self.name
|
from_name = self.name
|
||||||
|
assert isinstance(name, str)
|
||||||
self._name = name
|
self._name = name
|
||||||
if self.has_parent():
|
if self.has_parent():
|
||||||
self._direct_parent_._name_changed(self, from_name)
|
self._direct_parent_._name_changed(self, from_name)
|
||||||
|
def hirarchy_name(self, adjust_for_printing=True):
|
||||||
|
if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x)
|
||||||
class Parameterizable(Parentable):
|
else: adjust = lambda x: x
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
super(Parameterizable, self).__init__(*args, **kwargs)
|
|
||||||
from GPy.core.parameterization.array_core import ParamList
|
|
||||||
_parameters_ = ParamList()
|
|
||||||
|
|
||||||
def parameter_names(self, add_name=False):
|
|
||||||
if add_name:
|
|
||||||
return [adjust_name_for_printing(self.name) + "." + xi for x in self._parameters_ for xi in x.parameter_names(add_name=True)]
|
|
||||||
return [xi for x in self._parameters_ for xi in x.parameter_names(add_name=True)]
|
|
||||||
|
|
||||||
def _collect_gradient(self, target):
|
|
||||||
import itertools
|
|
||||||
[p._collect_gradient(target[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
|
|
||||||
|
|
||||||
def _get_params(self):
|
|
||||||
import numpy as np
|
|
||||||
# don't overwrite this anymore!
|
|
||||||
if not self.size:
|
|
||||||
return np.empty(shape=(0,), dtype=np.float64)
|
|
||||||
return np.hstack([x._get_params() for x in self._parameters_ if x.size > 0])
|
|
||||||
|
|
||||||
def _set_params(self, params, update=True):
|
|
||||||
# don't overwrite this anymore!
|
|
||||||
import itertools
|
|
||||||
[p._set_params(params[s], update=update) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
|
|
||||||
self.parameters_changed()
|
|
||||||
|
|
||||||
def parameters_changed(self):
|
|
||||||
"""
|
|
||||||
This method gets called when parameters have changed.
|
|
||||||
Another way of listening to param changes is to
|
|
||||||
add self as a listener to the param, such that
|
|
||||||
updates get passed through. See :py:function:``GPy.core.param.Observable.add_observer``
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def _notify_parameters_changed(self):
|
|
||||||
self.parameters_changed()
|
|
||||||
if self.has_parent():
|
if self.has_parent():
|
||||||
self._direct_parent_._notify_parameters_changed()
|
return self._direct_parent_.hirarchy_name() + "." + adjust(self.name)
|
||||||
|
return adjust(self.name)
|
||||||
|
|
||||||
|
|
||||||
class Gradcheckable(Parentable):
|
class Gradcheckable(Parentable):
|
||||||
#===========================================================================
|
def __init__(self, *a, **kw):
|
||||||
# Gradchecking
|
super(Gradcheckable, self).__init__(*a, **kw)
|
||||||
#===========================================================================
|
|
||||||
def checkgrad(self, verbose=0, step=1e-6, tolerance=1e-3):
|
def checkgrad(self, verbose=0, step=1e-6, tolerance=1e-3):
|
||||||
if self.has_parent():
|
if self.has_parent():
|
||||||
return self._highest_parent_._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance)
|
return self._highest_parent_._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance)
|
||||||
|
|
@ -139,6 +108,7 @@ class Gradcheckable(Parentable):
|
||||||
def _checkgrad(self, param):
|
def _checkgrad(self, param):
|
||||||
raise NotImplementedError, "Need log likelihood to check gradient against"
|
raise NotImplementedError, "Need log likelihood to check gradient against"
|
||||||
|
|
||||||
|
|
||||||
class Indexable(object):
|
class Indexable(object):
|
||||||
def _raveled_index(self):
|
def _raveled_index(self):
|
||||||
raise NotImplementedError, "Need to be able to get the raveled Index"
|
raise NotImplementedError, "Need to be able to get the raveled Index"
|
||||||
|
|
@ -157,9 +127,10 @@ class Indexable(object):
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?"
|
raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?"
|
||||||
|
|
||||||
class Constrainable(Nameable, Indexable, Parameterizable):
|
|
||||||
def __init__(self, name, default_constraint=None):
|
class Constrainable(Nameable, Indexable):
|
||||||
super(Constrainable,self).__init__(name)
|
def __init__(self, name, default_constraint=None, *a, **kw):
|
||||||
|
super(Constrainable, self).__init__(name=name, *a, **kw)
|
||||||
self._default_constraint_ = default_constraint
|
self._default_constraint_ = default_constraint
|
||||||
from index_operations import ParameterIndexOperations
|
from index_operations import ParameterIndexOperations
|
||||||
self.constraints = ParameterIndexOperations()
|
self.constraints = ParameterIndexOperations()
|
||||||
|
|
@ -167,6 +138,16 @@ class Constrainable(Nameable, Indexable, Parameterizable):
|
||||||
if self._default_constraint_ is not None:
|
if self._default_constraint_ is not None:
|
||||||
self.constrain(self._default_constraint_)
|
self.constrain(self._default_constraint_)
|
||||||
|
|
||||||
|
def _disconnect_parent(self, constr=None):
|
||||||
|
if constr is None:
|
||||||
|
constr = self.constraints.copy()
|
||||||
|
self.constraints.clear()
|
||||||
|
self.constraints = constr
|
||||||
|
self._direct_parent_ = None
|
||||||
|
self._parent_index_ = None
|
||||||
|
self._connect_fixes()
|
||||||
|
self._notify_parent_change()
|
||||||
|
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
# Fixing Parameters:
|
# Fixing Parameters:
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
|
|
@ -344,5 +325,108 @@ class Constrainable(Nameable, Indexable, Parameterizable):
|
||||||
return removed
|
return removed
|
||||||
|
|
||||||
|
|
||||||
|
class Parameterizable(Constrainable):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super(Parameterizable, self).__init__(*args, **kwargs)
|
||||||
|
from GPy.core.parameterization.array_core import ParamList
|
||||||
|
_parameters_ = ParamList()
|
||||||
|
self._added_names_ = set()
|
||||||
|
|
||||||
|
def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True):
|
||||||
|
if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x)
|
||||||
|
else: adjust = lambda x: x
|
||||||
|
if recursive: names = [xi for x in self._parameters_ for xi in x.parameter_names(add_self=True, adjust_for_printing=adjust_for_printing)]
|
||||||
|
else: names = [adjust(x.name) for x in self._parameters_]
|
||||||
|
if add_self: names = map(lambda x: adjust(self.name) + "." + x, names)
|
||||||
|
return names
|
||||||
|
|
||||||
|
def _add_parameter_name(self, param):
|
||||||
|
pname = adjust_name_for_printing(param.name)
|
||||||
|
# and makes sure to not delete programmatically added parameters
|
||||||
|
if pname in self.__dict__:
|
||||||
|
if not (param is self.__dict__[pname]):
|
||||||
|
if pname in self._added_names_:
|
||||||
|
del self.__dict__[pname]
|
||||||
|
self._add_parameter_name(param)
|
||||||
|
else:
|
||||||
|
self.__dict__[pname] = param
|
||||||
|
self._added_names_.add(pname)
|
||||||
|
|
||||||
|
def _remove_parameter_name(self, param=None, pname=None):
|
||||||
|
assert param is None or pname is None, "can only delete either param by name, or the name of a param"
|
||||||
|
pname = adjust_name_for_printing(pname) or adjust_name_for_printing(param.name)
|
||||||
|
if pname in self._added_names_:
|
||||||
|
del self.__dict__[pname]
|
||||||
|
self._added_names_.remove(pname)
|
||||||
|
self._connect_parameters()
|
||||||
|
|
||||||
|
def _name_changed(self, param, old_name):
|
||||||
|
self._remove_parameter_name(None, old_name)
|
||||||
|
self._add_parameter_name(param)
|
||||||
|
|
||||||
|
def _collect_gradient(self, target):
|
||||||
|
import itertools
|
||||||
|
[p._collect_gradient(target[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
|
||||||
|
|
||||||
|
def _set_gradient(self, g):
|
||||||
|
import itertools
|
||||||
|
[p._set_gradient(g[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
|
||||||
|
|
||||||
|
def _get_params(self):
|
||||||
|
import numpy as np
|
||||||
|
# don't overwrite this anymore!
|
||||||
|
if not self.size:
|
||||||
|
return np.empty(shape=(0,), dtype=np.float64)
|
||||||
|
return np.hstack([x._get_params() for x in self._parameters_ if x.size > 0])
|
||||||
|
|
||||||
|
def _set_params(self, params, update=True):
|
||||||
|
# don't overwrite this anymore!
|
||||||
|
import itertools
|
||||||
|
[p._set_params(params[s], update=update) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
|
||||||
|
self.parameters_changed()
|
||||||
|
|
||||||
|
def copy(self):
|
||||||
|
"""Returns a (deep) copy of the current model"""
|
||||||
|
import copy
|
||||||
|
from .index_operations import ParameterIndexOperations, ParameterIndexOperationsView
|
||||||
|
from .array_core import ParamList
|
||||||
|
|
||||||
|
dc = dict()
|
||||||
|
for k, v in self.__dict__.iteritems():
|
||||||
|
if k not in ['_direct_parent_', '_parameters_', '_parent_index_'] + self.parameter_names():
|
||||||
|
if isinstance(v, (Constrainable, ParameterIndexOperations, ParameterIndexOperationsView)):
|
||||||
|
dc[k] = v.copy()
|
||||||
|
else:
|
||||||
|
dc[k] = copy.deepcopy(v)
|
||||||
|
if k == '_parameters_':
|
||||||
|
params = [p.copy() for p in v]
|
||||||
|
|
||||||
|
dc['_direct_parent_'] = None
|
||||||
|
dc['_parent_index_'] = None
|
||||||
|
dc['_parameters_'] = ParamList()
|
||||||
|
dc['constraints'].clear()
|
||||||
|
dc['priors'].clear()
|
||||||
|
dc['size'] = 0
|
||||||
|
|
||||||
|
s = self.__new__(self.__class__)
|
||||||
|
s.__dict__ = dc
|
||||||
|
|
||||||
|
for p in params:
|
||||||
|
s.add_parameter(p)
|
||||||
|
|
||||||
|
return s
|
||||||
|
|
||||||
|
def _notify_parameters_changed(self):
|
||||||
|
self.parameters_changed()
|
||||||
|
if self.has_parent():
|
||||||
|
self._direct_parent_._notify_parameters_changed()
|
||||||
|
|
||||||
|
def parameters_changed(self):
|
||||||
|
"""
|
||||||
|
This method gets called when parameters have changed.
|
||||||
|
Another way of listening to param changes is to
|
||||||
|
add self as a listener to the param, such that
|
||||||
|
updates get passed through. See :py:function:``GPy.core.param.Observable.add_observer``
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,16 +3,15 @@
|
||||||
|
|
||||||
|
|
||||||
import numpy; np = numpy
|
import numpy; np = numpy
|
||||||
import copy
|
|
||||||
import cPickle
|
import cPickle
|
||||||
import itertools
|
import itertools
|
||||||
from re import compile, _pattern_type
|
from re import compile, _pattern_type
|
||||||
from param import ParamConcatenation, Param
|
from param import ParamConcatenation
|
||||||
from parameter_core import Constrainable, Pickleable, Observable, adjust_name_for_printing, Gradcheckable
|
from parameter_core import Constrainable, Pickleable, Parentable, Observable, Parameterizable, adjust_name_for_printing, Gradcheckable
|
||||||
from transformations import __fixed__, FIXED, UNFIXED
|
from transformations import __fixed__
|
||||||
from array_core import ParamList
|
from array_core import ParamList
|
||||||
|
|
||||||
class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
|
class Parameterized(Parameterizable, Pickleable, Observable, Gradcheckable):
|
||||||
"""
|
"""
|
||||||
Parameterized class
|
Parameterized class
|
||||||
|
|
||||||
|
|
@ -54,8 +53,8 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
|
||||||
If you want to operate on all parameters use m[''] to wildcard select all paramters
|
If you want to operate on all parameters use m[''] to wildcard select all paramters
|
||||||
and concatenate them. Printing m[''] will result in printing of all parameters in detail.
|
and concatenate them. Printing m[''] will result in printing of all parameters in detail.
|
||||||
"""
|
"""
|
||||||
def __init__(self, name=None):
|
def __init__(self, name=None, *a, **kw):
|
||||||
super(Parameterized, self).__init__(name=name)
|
super(Parameterized, self).__init__(name=name, parent=None, parent_index=None, *a, **kw)
|
||||||
self._in_init_ = True
|
self._in_init_ = True
|
||||||
self._parameters_ = ParamList()
|
self._parameters_ = ParamList()
|
||||||
self.size = sum(p.size for p in self._parameters_)
|
self.size = sum(p.size for p in self._parameters_)
|
||||||
|
|
@ -63,7 +62,6 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
|
||||||
self._fixes_ = None
|
self._fixes_ = None
|
||||||
self._param_slices_ = []
|
self._param_slices_ = []
|
||||||
self._connect_parameters()
|
self._connect_parameters()
|
||||||
self._added_names_ = set()
|
|
||||||
del self._in_init_
|
del self._in_init_
|
||||||
|
|
||||||
def add_parameter(self, param, index=None):
|
def add_parameter(self, param, index=None):
|
||||||
|
|
@ -89,8 +87,8 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
|
||||||
self._parameters_.append(param)
|
self._parameters_.append(param)
|
||||||
else:
|
else:
|
||||||
start = sum(p.size for p in self._parameters_[:index])
|
start = sum(p.size for p in self._parameters_[:index])
|
||||||
self.constraints.shift(start, param.size)
|
self.constraints.shift_right(start, param.size)
|
||||||
self.priors.shift(start, param.size)
|
self.priors.shift_right(start, param.size)
|
||||||
self.constraints.update(param.constraints, start)
|
self.constraints.update(param.constraints, start)
|
||||||
self.priors.update(param.priors, start)
|
self.priors.update(param.priors, start)
|
||||||
self._parameters_.insert(index, param)
|
self._parameters_.insert(index, param)
|
||||||
|
|
@ -115,21 +113,18 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
|
||||||
"""
|
"""
|
||||||
if not param in self._parameters_:
|
if not param in self._parameters_:
|
||||||
raise RuntimeError, "Parameter {} does not belong to this object, remove parameters directly from their respective parents".format(param._short())
|
raise RuntimeError, "Parameter {} does not belong to this object, remove parameters directly from their respective parents".format(param._short())
|
||||||
del self._parameters_[param._parent_index_]
|
|
||||||
|
start = sum([p.size for p in self._parameters_[:param._parent_index_]])
|
||||||
|
self._remove_parameter_name(param)
|
||||||
self.size -= param.size
|
self.size -= param.size
|
||||||
constr = param.constraints.copy()
|
del self._parameters_[param._parent_index_]
|
||||||
param.constraints.clear()
|
|
||||||
param.constraints = constr
|
param._disconnect_parent()
|
||||||
param._direct_parent_ = None
|
self.constraints.shift_left(start, param.size)
|
||||||
param._parent_index_ = None
|
|
||||||
param._connect_fixes()
|
|
||||||
param._notify_parent_change()
|
|
||||||
pname = adjust_name_for_printing(param.name)
|
|
||||||
if pname in self._added_names_:
|
|
||||||
del self.__dict__[pname]
|
|
||||||
self._connect_parameters()
|
|
||||||
#self._notify_parent_change()
|
|
||||||
self._connect_fixes()
|
self._connect_fixes()
|
||||||
|
self._connect_parameters()
|
||||||
|
self._notify_parent_change()
|
||||||
|
|
||||||
|
|
||||||
def _connect_parameters(self):
|
def _connect_parameters(self):
|
||||||
# connect parameterlist to this parameterized object
|
# connect parameterlist to this parameterized object
|
||||||
|
|
@ -145,19 +140,9 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
|
||||||
for i, p in enumerate(self._parameters_):
|
for i, p in enumerate(self._parameters_):
|
||||||
p._direct_parent_ = self
|
p._direct_parent_ = self
|
||||||
p._parent_index_ = i
|
p._parent_index_ = i
|
||||||
not_unique = []
|
|
||||||
sizes.append(p.size + sizes[-1])
|
sizes.append(p.size + sizes[-1])
|
||||||
self._param_slices_.append(slice(sizes[-2], sizes[-1]))
|
self._param_slices_.append(slice(sizes[-2], sizes[-1]))
|
||||||
pname = adjust_name_for_printing(p.name)
|
self._add_parameter_name(p)
|
||||||
# and makes sure to not delete programmatically added parameters
|
|
||||||
if pname in self.__dict__:
|
|
||||||
if isinstance(self.__dict__[pname], (Parameterized, Param)):
|
|
||||||
if not p is self.__dict__[pname]:
|
|
||||||
not_unique.append(pname)
|
|
||||||
del self.__dict__[pname]
|
|
||||||
elif not (pname in not_unique):
|
|
||||||
self.__dict__[pname] = p
|
|
||||||
self._added_names_.add(pname)
|
|
||||||
|
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
# Pickling operations
|
# Pickling operations
|
||||||
|
|
@ -174,19 +159,7 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
|
||||||
cPickle.dump(self, f, protocol)
|
cPickle.dump(self, f, protocol)
|
||||||
else:
|
else:
|
||||||
cPickle.dump(self, f, protocol)
|
cPickle.dump(self, f, protocol)
|
||||||
def copy(self):
|
|
||||||
"""Returns a (deep) copy of the current model """
|
|
||||||
# dc = dict()
|
|
||||||
# for k, v in self.__dict__.iteritems():
|
|
||||||
# if k not in ['_highest_parent_', '_direct_parent_']:
|
|
||||||
# dc[k] = copy.deepcopy(v)
|
|
||||||
|
|
||||||
# dc = copy.deepcopy(self.__dict__)
|
|
||||||
# dc['_highest_parent_'] = None
|
|
||||||
# dc['_direct_parent_'] = None
|
|
||||||
# s = self.__class__.new()
|
|
||||||
# s.__dict__ = dc
|
|
||||||
return copy.deepcopy(self)
|
|
||||||
def __getstate__(self):
|
def __getstate__(self):
|
||||||
if self._has_get_set_state():
|
if self._has_get_set_state():
|
||||||
return self._getstate()
|
return self._getstate()
|
||||||
|
|
@ -243,7 +216,7 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
|
||||||
# Optimization handles:
|
# Optimization handles:
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
def _get_param_names(self):
|
def _get_param_names(self):
|
||||||
n = numpy.array([p.name_hirarchical + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()])
|
n = numpy.array([p.hirarchy_name() + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()])
|
||||||
return n
|
return n
|
||||||
def _get_param_names_transformed(self):
|
def _get_param_names_transformed(self):
|
||||||
n = self._get_param_names()
|
n = self._get_param_names()
|
||||||
|
|
@ -265,14 +238,6 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
|
||||||
if self._has_fixes(): tmp = self._get_params(); tmp[self._fixes_] = p; p = tmp; del tmp
|
if self._has_fixes(): tmp = self._get_params(); tmp[self._fixes_] = p; p = tmp; del tmp
|
||||||
[numpy.put(p, ind, c.f(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
[numpy.put(p, ind, c.f(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||||
return p
|
return p
|
||||||
def _name_changed(self, param, old_name):
|
|
||||||
if hasattr(self, old_name) and old_name in self._added_names_:
|
|
||||||
delattr(self, old_name)
|
|
||||||
self._added_names_.remove(old_name)
|
|
||||||
pname = adjust_name_for_printing(param.name)
|
|
||||||
if pname not in self.__dict__:
|
|
||||||
self._added_names_.add(pname)
|
|
||||||
self.__dict__[pname] = param
|
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
# Indexable Handling
|
# Indexable Handling
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
|
|
@ -335,10 +300,6 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
|
||||||
# you can retrieve the original param through this method, by passing
|
# you can retrieve the original param through this method, by passing
|
||||||
# the copy here
|
# the copy here
|
||||||
return self._parameters_[param._parent_index_]
|
return self._parameters_[param._parent_index_]
|
||||||
def hirarchy_name(self):
|
|
||||||
if self.has_parent():
|
|
||||||
return self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name) + "."
|
|
||||||
return ''
|
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
# Get/set parameters:
|
# Get/set parameters:
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
|
|
@ -348,13 +309,11 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
|
||||||
"""
|
"""
|
||||||
if not isinstance(regexp, _pattern_type): regexp = compile(regexp)
|
if not isinstance(regexp, _pattern_type): regexp = compile(regexp)
|
||||||
found_params = []
|
found_params = []
|
||||||
for p in self._parameters_:
|
for n, p in itertools.izip(self.parameter_names(False, False, True), self.flattened_parameters):
|
||||||
if regexp.match(p.name) is not None:
|
if regexp.match(n) is not None:
|
||||||
found_params.append(p)
|
found_params.append(p)
|
||||||
if isinstance(p, Parameterized):
|
|
||||||
found_params.extend(p.grep_param_names(regexp))
|
|
||||||
return found_params
|
return found_params
|
||||||
return [param for param in self._parameters_ if regexp.match(param.name) is not None]
|
|
||||||
def __getitem__(self, name, paramlist=None):
|
def __getitem__(self, name, paramlist=None):
|
||||||
if paramlist is None:
|
if paramlist is None:
|
||||||
paramlist = self.grep_param_names(name)
|
paramlist = self.grep_param_names(name)
|
||||||
|
|
@ -366,36 +325,22 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
|
||||||
return ParamConcatenation(paramlist)
|
return ParamConcatenation(paramlist)
|
||||||
return paramlist[-1]
|
return paramlist[-1]
|
||||||
return ParamConcatenation(paramlist)
|
return ParamConcatenation(paramlist)
|
||||||
|
|
||||||
def __setitem__(self, name, value, paramlist=None):
|
def __setitem__(self, name, value, paramlist=None):
|
||||||
try: param = self.__getitem__(name, paramlist)
|
try: param = self.__getitem__(name, paramlist)
|
||||||
except AttributeError as a: raise a
|
except AttributeError as a: raise a
|
||||||
param[:] = value
|
param[:] = value
|
||||||
# def __getattr__(self, name):
|
|
||||||
# return self.__getitem__(name)
|
|
||||||
# def __getattribute__(self, name):
|
|
||||||
# #try:
|
|
||||||
# return object.__getattribute__(self, name)
|
|
||||||
# except AttributeError:
|
|
||||||
# _, a, tb = sys.exc_info()
|
|
||||||
# try:
|
|
||||||
# return self.__getitem__(name)
|
|
||||||
# except AttributeError:
|
|
||||||
# raise AttributeError, a.message, tb
|
|
||||||
def __setattr__(self, name, val):
|
def __setattr__(self, name, val):
|
||||||
# override the default behaviour, if setting a param, so broadcasting can by used
|
# override the default behaviour, if setting a param, so broadcasting can by used
|
||||||
if hasattr(self, "_parameters_"):
|
if hasattr(self, '_parameters_'):
|
||||||
paramlist = self.grep_param_names(name)
|
pnames = self.parameter_names(False, adjust_for_printing=True, recursive=False)
|
||||||
if len(paramlist) == 1: self.__setitem__(name, val, paramlist); return
|
if name in pnames: self._parameters_[pnames.index(name)][:] = val; return
|
||||||
object.__setattr__(self, name, val);
|
object.__setattr__(self, name, val);
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
# Printing:
|
# Printing:
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
def _short(self):
|
def _short(self):
|
||||||
# short string to print
|
return self.hirarchy_name()
|
||||||
if self.has_parent():
|
|
||||||
return self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name)
|
|
||||||
else:
|
|
||||||
return adjust_name_for_printing(self.name)
|
|
||||||
@property
|
@property
|
||||||
def flattened_parameters(self):
|
def flattened_parameters(self):
|
||||||
return [xi for x in self._parameters_ for xi in x.flattened_parameters]
|
return [xi for x in self._parameters_ for xi in x.flattened_parameters]
|
||||||
|
|
|
||||||
|
|
@ -3,21 +3,77 @@ Created on 6 Nov 2013
|
||||||
|
|
||||||
@author: maxz
|
@author: maxz
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
from parameterized import Parameterized
|
from parameterized import Parameterized
|
||||||
from param import Param
|
from param import Param
|
||||||
from transformations import Logexp
|
from transformations import Logexp
|
||||||
|
|
||||||
class Normal(Parameterized):
|
class VariationalPrior(object):
|
||||||
|
def KL_divergence(self, variational_posterior):
|
||||||
|
raise NotImplementedError, "override this for variational inference of latent space"
|
||||||
|
|
||||||
|
def update_gradients_KL(self, variational_posterior):
|
||||||
|
"""
|
||||||
|
updates the gradients for mean and variance **in place**
|
||||||
|
"""
|
||||||
|
raise NotImplementedError, "override this for variational inference of latent space"
|
||||||
|
|
||||||
|
class NormalPrior(VariationalPrior):
|
||||||
|
def KL_divergence(self, variational_posterior):
|
||||||
|
var_mean = np.square(variational_posterior.mean).sum()
|
||||||
|
var_S = (variational_posterior.variance - np.log(variational_posterior.variance)).sum()
|
||||||
|
return 0.5 * (var_mean + var_S) - 0.5 * variational_posterior.input_dim * variational_posterior.num_data
|
||||||
|
|
||||||
|
def update_gradients_KL(self, variational_posterior):
|
||||||
|
# dL:
|
||||||
|
variational_posterior.mean.gradient -= variational_posterior.mean
|
||||||
|
variational_posterior.variance.gradient -= (1. - (1. / (variational_posterior.variance))) * 0.5
|
||||||
|
|
||||||
|
|
||||||
|
class VariationalPosterior(Parameterized):
|
||||||
|
def __init__(self, means=None, variances=None, name=None, **kw):
|
||||||
|
super(VariationalPosterior, self).__init__(name=name, **kw)
|
||||||
|
self.mean = Param("mean", means)
|
||||||
|
self.variance = Param("variance", variances, Logexp())
|
||||||
|
self.add_parameters(self.mean, self.variance)
|
||||||
|
self.num_data, self.input_dim = self.mean.shape
|
||||||
|
if self.has_uncertain_inputs():
|
||||||
|
assert self.variance.shape == self.mean.shape, "need one variance per sample and dimenion"
|
||||||
|
|
||||||
|
def has_uncertain_inputs(self):
|
||||||
|
return not self.variance is None
|
||||||
|
|
||||||
|
|
||||||
|
class NormalPosterior(VariationalPosterior):
|
||||||
'''
|
'''
|
||||||
Normal distribution for variational approximations.
|
NormalPosterior distribution for variational approximations.
|
||||||
|
|
||||||
holds the means and variances for a factorizing multivariate normal distribution
|
holds the means and variances for a factorizing multivariate normal distribution
|
||||||
'''
|
'''
|
||||||
def __init__(self, means, variances, name='latent space'):
|
|
||||||
Parameterized.__init__(self, name=name)
|
def plot(self, *args):
|
||||||
self.mean = Param("mean", means)
|
"""
|
||||||
self.variance = Param('variance', variances, Logexp())
|
Plot latent space X in 1D:
|
||||||
self.add_parameters(self.mean, self.variance)
|
|
||||||
|
See GPy.plotting.matplot_dep.variational_plots
|
||||||
|
"""
|
||||||
|
import sys
|
||||||
|
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||||
|
from ...plotting.matplot_dep import variational_plots
|
||||||
|
return variational_plots.plot(self,*args)
|
||||||
|
|
||||||
|
class SpikeAndSlabPosterior(VariationalPosterior):
|
||||||
|
'''
|
||||||
|
The SpikeAndSlab distribution for variational approximations.
|
||||||
|
'''
|
||||||
|
def __init__(self, means, variances, binary_prob, name='latent space'):
|
||||||
|
"""
|
||||||
|
binary_prob : the probability of the distribution on the slab part.
|
||||||
|
"""
|
||||||
|
super(SpikeAndSlabPosterior, self).__init__(means, variances, name)
|
||||||
|
self.gamma = Param("binary_prob",binary_prob,)
|
||||||
|
self.add_parameter(self.gamma)
|
||||||
|
|
||||||
def plot(self, *args):
|
def plot(self, *args):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -5,8 +5,9 @@ import numpy as np
|
||||||
from ..util.linalg import mdot
|
from ..util.linalg import mdot
|
||||||
from gp import GP
|
from gp import GP
|
||||||
from parameterization.param import Param
|
from parameterization.param import Param
|
||||||
from GPy.inference.latent_function_inference import var_dtc
|
from ..inference.latent_function_inference import var_dtc
|
||||||
from .. import likelihoods
|
from .. import likelihoods
|
||||||
|
from parameterization.variational import NormalPosterior
|
||||||
|
|
||||||
class SparseGP(GP):
|
class SparseGP(GP):
|
||||||
"""
|
"""
|
||||||
|
|
@ -45,45 +46,44 @@ class SparseGP(GP):
|
||||||
self.Z = Param('inducing inputs', Z)
|
self.Z = Param('inducing inputs', Z)
|
||||||
self.num_inducing = Z.shape[0]
|
self.num_inducing = Z.shape[0]
|
||||||
|
|
||||||
if not (X_variance is None):
|
self.q = NormalPosterior(X, X_variance)
|
||||||
assert X_variance.shape == X.shape
|
|
||||||
self.X_variance = X_variance
|
|
||||||
|
|
||||||
GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name)
|
GP.__init__(self, self.q.mean, Y, kernel, likelihood, inference_method=inference_method, name=name)
|
||||||
self.add_parameter(self.Z, index=0)
|
self.add_parameter(self.Z, index=0)
|
||||||
self.parameters_changed()
|
self.parameters_changed()
|
||||||
|
|
||||||
def _update_gradients_Z(self, add=False):
|
def has_uncertain_inputs(self):
|
||||||
#The derivative of the bound wrt the inducing inputs Z ( unless they're all fixed)
|
return self.q.has_uncertain_inputs()
|
||||||
if not self.Z.is_fixed:
|
|
||||||
if add: self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
|
|
||||||
else: self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
|
|
||||||
if self.X_variance is None:
|
|
||||||
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
|
|
||||||
else:
|
|
||||||
self.Z.gradient += self.kern.dpsi1_dZ(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance)
|
|
||||||
self.Z.gradient += self.kern.dpsi2_dZ(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance)
|
|
||||||
|
|
||||||
def parameters_changed(self):
|
def parameters_changed(self):
|
||||||
|
if self.has_uncertain_inputs():
|
||||||
|
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference_latent(self.kern, self.q, self.Z, self.likelihood, self.Y)
|
||||||
|
else:
|
||||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y)
|
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y)
|
||||||
self._update_gradients_Z(add=False)
|
self.likelihood.update_gradients(self.grad_dict.pop('partial_for_likelihood'))
|
||||||
|
if self.has_uncertain_inputs():
|
||||||
|
self.kern.update_gradients_variational(posterior_variational=self.q, Z=self.Z, **self.grad_dict)
|
||||||
|
self.Z.gradient = self.kern.gradients_Z_variational(posterior_variational=self.q, Z=self.Z, **self.grad_dict)
|
||||||
|
else:
|
||||||
|
self.kern.update_gradients_sparse(X=self.X, Z=self.Z, **self.grad_dict)
|
||||||
|
self.Z.gradient = self.kern.gradients_Z_sparse(X=self.X, Z=self.Z, **self.grad_dict)
|
||||||
|
|
||||||
def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
|
def _raw_predict(self, Xnew, X_variance_new=None, full_cov=False):
|
||||||
"""
|
"""
|
||||||
Make a prediction for the latent function values
|
Make a prediction for the latent function values
|
||||||
"""
|
"""
|
||||||
if X_variance_new is None:
|
if X_variance_new is None:
|
||||||
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
|
Kx = self.kern.K(self.Z, Xnew)
|
||||||
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
|
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
|
||||||
if full_cov:
|
if full_cov:
|
||||||
Kxx = self.kern.K(Xnew, which_parts=which_parts)
|
Kxx = self.kern.K(Xnew)
|
||||||
var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx) # NOTE this won't work for plotting
|
#var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx)
|
||||||
|
var = Kxx - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T, Kx, [1,0]).swapaxes(1,2)
|
||||||
else:
|
else:
|
||||||
Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts)
|
Kxx = self.kern.Kdiag(Xnew)
|
||||||
var = Kxx - np.sum(Kx * np.dot(self.posterior.woodbury_inv, Kx), 0)
|
var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T
|
||||||
else:
|
else:
|
||||||
# assert which_parts=='all', "swithching out parts of variational kernels is not implemented"
|
Kx = self.kern.psi1(self.Z, Xnew, X_variance_new)
|
||||||
Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts
|
|
||||||
mu = np.dot(Kx, self.Cpsi1V)
|
mu = np.dot(Kx, self.Cpsi1V)
|
||||||
if full_cov:
|
if full_cov:
|
||||||
raise NotImplementedError, "TODO"
|
raise NotImplementedError, "TODO"
|
||||||
|
|
@ -91,7 +91,7 @@ class SparseGP(GP):
|
||||||
Kxx = self.kern.psi0(self.Z, Xnew, X_variance_new)
|
Kxx = self.kern.psi0(self.Z, Xnew, X_variance_new)
|
||||||
psi2 = self.kern.psi2(self.Z, Xnew, X_variance_new)
|
psi2 = self.kern.psi2(self.Z, Xnew, X_variance_new)
|
||||||
var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
|
var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
|
||||||
return mu, var[:,None]
|
return mu, var
|
||||||
|
|
||||||
|
|
||||||
def _getstate(self):
|
def _getstate(self):
|
||||||
|
|
@ -101,12 +101,10 @@ class SparseGP(GP):
|
||||||
"""
|
"""
|
||||||
return GP._getstate(self) + [self.Z,
|
return GP._getstate(self) + [self.Z,
|
||||||
self.num_inducing,
|
self.num_inducing,
|
||||||
self.has_uncertain_inputs,
|
|
||||||
self.X_variance]
|
self.X_variance]
|
||||||
|
|
||||||
def _setstate(self, state):
|
def _setstate(self, state):
|
||||||
self.X_variance = state.pop()
|
self.X_variance = state.pop()
|
||||||
self.has_uncertain_inputs = state.pop()
|
|
||||||
self.num_inducing = state.pop()
|
self.num_inducing = state.pop()
|
||||||
self.Z = state.pop()
|
self.Z = state.pop()
|
||||||
GP._setstate(self, state)
|
GP._setstate(self, state)
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
import numpy as _np
|
import numpy as _np
|
||||||
default_seed = _np.random.seed(123344)
|
#default_seed = _np.random.seed(123344)
|
||||||
|
|
||||||
def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False, output_dim=200, nan=False):
|
def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan=False):
|
||||||
"""
|
"""
|
||||||
model for testing purposes. Samples from a GP with rbf kernel and learns
|
model for testing purposes. Samples from a GP with rbf kernel and learns
|
||||||
the samples with a new kernel. Normally not for optimization, just model cheking
|
the samples with a new kernel. Normally not for optimization, just model cheking
|
||||||
|
|
@ -21,19 +21,20 @@ def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False,
|
||||||
|
|
||||||
# generate GPLVM-like data
|
# generate GPLVM-like data
|
||||||
X = _np.random.rand(num_inputs, input_dim)
|
X = _np.random.rand(num_inputs, input_dim)
|
||||||
lengthscales = _np.random.rand(input_dim)
|
#lengthscales = _np.random.rand(input_dim)
|
||||||
k = (GPy.kern.rbf(input_dim, .5, lengthscales, ARD=True)
|
#k = (GPy.kern.RBF(input_dim, .5, lengthscales, ARD=True)
|
||||||
#+ GPy.kern.white(input_dim, 0.01)
|
##+ GPy.kern.white(input_dim, 0.01)
|
||||||
)
|
#)
|
||||||
|
k = GPy.kern.Linear(input_dim, ARD=1)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
||||||
K = k.K(X)
|
K = k.K(X)
|
||||||
Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, (output_dim,)).T
|
Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, (output_dim,)).T
|
||||||
|
|
||||||
# k = GPy.kern.rbf_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
|
# k = GPy.kern.RBF_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
|
||||||
k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
#k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
||||||
# k = GPy.kern.rbf(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001)
|
# k = GPy.kern.RBF(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001)
|
||||||
# k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.rbf(input_dim, .3, _np.ones(input_dim) * .2, ARD=True)
|
# k = GPy.kern.RBF(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.RBF(input_dim, .3, _np.ones(input_dim) * .2, ARD=True)
|
||||||
# k = GPy.kern.rbf(input_dim, .5, 2., ARD=0) + GPy.kern.rbf(input_dim, .3, .2, ARD=0)
|
# k = GPy.kern.RBF(input_dim, .5, 2., ARD=0) + GPy.kern.RBF(input_dim, .3, .2, ARD=0)
|
||||||
# k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True)
|
# k = GPy.kern.RBF(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True)
|
||||||
|
|
||||||
p = .3
|
p = .3
|
||||||
|
|
||||||
|
|
@ -41,14 +42,14 @@ def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False,
|
||||||
|
|
||||||
if nan:
|
if nan:
|
||||||
m.inference_method = GPy.inference.latent_function_inference.var_dtc.VarDTCMissingData()
|
m.inference_method = GPy.inference.latent_function_inference.var_dtc.VarDTCMissingData()
|
||||||
m.Y[_np.random.binomial(1,p,size=(Y.shape))] = _np.nan
|
m.Y[_np.random.binomial(1,p,size=(Y.shape)).astype(bool)] = _np.nan
|
||||||
m.parameters_changed()
|
m.parameters_changed()
|
||||||
|
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
# randomly obstruct data with percentage p
|
# randomly obstruct data with percentage p
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
#m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing)
|
#m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing)
|
||||||
m.lengthscales = lengthscales
|
#m.lengthscales = lengthscales
|
||||||
|
|
||||||
if plot:
|
if plot:
|
||||||
import matplotlib.pyplot as pb
|
import matplotlib.pyplot as pb
|
||||||
|
|
@ -73,7 +74,7 @@ def gplvm_oil_100(optimize=True, verbose=1, plot=True):
|
||||||
data = GPy.util.datasets.oil_100()
|
data = GPy.util.datasets.oil_100()
|
||||||
Y = data['X']
|
Y = data['X']
|
||||||
# create simple GP model
|
# create simple GP model
|
||||||
kernel = GPy.kern.rbf(6, ARD=True) + GPy.kern.bias(6)
|
kernel = GPy.kern.RBF(6, ARD=True) + GPy.kern.Bias(6)
|
||||||
m = GPy.models.GPLVM(Y, 6, kernel=kernel)
|
m = GPy.models.GPLVM(Y, 6, kernel=kernel)
|
||||||
m.data_labels = data['Y'].argmax(axis=1)
|
m.data_labels = data['Y'].argmax(axis=1)
|
||||||
if optimize: m.optimize('scg', messages=verbose)
|
if optimize: m.optimize('scg', messages=verbose)
|
||||||
|
|
@ -88,7 +89,7 @@ def sparse_gplvm_oil(optimize=True, verbose=0, plot=True, N=100, Q=6, num_induci
|
||||||
Y = Y - Y.mean(0)
|
Y = Y - Y.mean(0)
|
||||||
Y /= Y.std(0)
|
Y /= Y.std(0)
|
||||||
# Create the model
|
# Create the model
|
||||||
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q)
|
kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.Bias(Q)
|
||||||
m = GPy.models.SparseGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing)
|
m = GPy.models.SparseGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing)
|
||||||
m.data_labels = data['Y'][:N].argmax(axis=1)
|
m.data_labels = data['Y'][:N].argmax(axis=1)
|
||||||
|
|
||||||
|
|
@ -138,7 +139,7 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=15, Q=4
|
||||||
(1 - var))) + .001
|
(1 - var))) + .001
|
||||||
Z = _np.random.permutation(X)[:num_inducing]
|
Z = _np.random.permutation(X)[:num_inducing]
|
||||||
|
|
||||||
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2))
|
kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.Bias(Q, _np.exp(-2)) + GPy.kern.White(Q, _np.exp(-2))
|
||||||
|
|
||||||
m = BayesianGPLVM(Y, Q, X=X, X_variance=S, num_inducing=num_inducing, Z=Z, kernel=kernel)
|
m = BayesianGPLVM(Y, Q, X=X, X_variance=S, num_inducing=num_inducing, Z=Z, kernel=kernel)
|
||||||
m.data_colors = c
|
m.data_colors = c
|
||||||
|
|
@ -158,46 +159,51 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=15, Q=4
|
||||||
|
|
||||||
def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k):
|
def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k):
|
||||||
import GPy
|
import GPy
|
||||||
from GPy.likelihoods import Gaussian
|
|
||||||
from matplotlib import pyplot as plt
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
_np.random.seed(0)
|
_np.random.seed(0)
|
||||||
data = GPy.util.datasets.oil()
|
data = GPy.util.datasets.oil()
|
||||||
|
|
||||||
kernel = GPy.kern.rbf_inv(Q, 1., [.1] * Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2))
|
kernel = GPy.kern.RBF(Q, 1., [.1] * Q, ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2))
|
||||||
Y = data['X'][:N]
|
Y = data['X'][:N]
|
||||||
Yn = Gaussian(Y, normalize=True)
|
m = GPy.models.BayesianGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing, **k)
|
||||||
m = GPy.models.BayesianGPLVM(Yn, Q, kernel=kernel, num_inducing=num_inducing, **k)
|
|
||||||
m.data_labels = data['Y'][:N].argmax(axis=1)
|
m.data_labels = data['Y'][:N].argmax(axis=1)
|
||||||
m['noise'] = Yn.Y.var() / 100.
|
m['.*noise.var'] = Y.var() / 100.
|
||||||
|
|
||||||
if optimize:
|
if optimize:
|
||||||
m.optimize('scg', messages=verbose, max_iters=max_iters, gtol=.05)
|
m.optimize('scg', messages=verbose, max_iters=max_iters, gtol=.05)
|
||||||
|
|
||||||
if plot:
|
if plot:
|
||||||
y = m.likelihood.Y[0, :]
|
y = m.Y[0, :]
|
||||||
fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
|
fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
|
||||||
m.plot_latent(ax=latent_axes)
|
m.plot_latent(ax=latent_axes)
|
||||||
data_show = GPy.util.visualize.vector_show(y)
|
data_show = GPy.plotting.matplot_dep.visualize.vector_show(y)
|
||||||
lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :], # @UnusedVariable
|
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X[0, :], # @UnusedVariable
|
||||||
m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
|
m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
|
||||||
raw_input('Press enter to finish')
|
raw_input('Press enter to finish')
|
||||||
plt.close(fig)
|
plt.close(fig)
|
||||||
return m
|
return m
|
||||||
|
|
||||||
def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False):
|
def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False):
|
||||||
|
_np.random.seed(1234)
|
||||||
|
|
||||||
x = _np.linspace(0, 4 * _np.pi, N)[:, None]
|
x = _np.linspace(0, 4 * _np.pi, N)[:, None]
|
||||||
s1 = _np.vectorize(lambda x: _np.sin(x))
|
s1 = _np.vectorize(lambda x: -_np.sin(_np.exp(x)))
|
||||||
s2 = _np.vectorize(lambda x: _np.cos(x))
|
s2 = _np.vectorize(lambda x: _np.cos(x)**2)
|
||||||
s3 = _np.vectorize(lambda x:-_np.exp(-_np.cos(2 * x)))
|
s3 = _np.vectorize(lambda x:-_np.exp(-_np.cos(2 * x)))
|
||||||
sS = _np.vectorize(lambda x: _np.sin(2 * x))
|
sS = _np.vectorize(lambda x: x*_np.sin(x))
|
||||||
|
|
||||||
s1 = s1(x)
|
s1 = s1(x)
|
||||||
s2 = s2(x)
|
s2 = s2(x)
|
||||||
s3 = s3(x)
|
s3 = s3(x)
|
||||||
sS = sS(x)
|
sS = sS(x)
|
||||||
|
|
||||||
S1 = _np.hstack([s1, sS])
|
s1 -= s1.mean(); s1 /= s1.std(0)
|
||||||
|
s2 -= s2.mean(); s2 /= s2.std(0)
|
||||||
|
s3 -= s3.mean(); s3 /= s3.std(0)
|
||||||
|
sS -= sS.mean(); sS /= sS.std(0)
|
||||||
|
|
||||||
|
S1 = _np.hstack([s1, s2, sS])
|
||||||
S2 = _np.hstack([s2, s3, sS])
|
S2 = _np.hstack([s2, s3, sS])
|
||||||
S3 = _np.hstack([s3, sS])
|
S3 = _np.hstack([s3, sS])
|
||||||
|
|
||||||
|
|
@ -268,7 +274,7 @@ def bgplvm_simulation(optimize=True, verbose=1,
|
||||||
D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 3, 10
|
D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 3, 10
|
||||||
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
|
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
|
||||||
Y = Ylist[0]
|
Y = Ylist[0]
|
||||||
k = kern.linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
|
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
|
||||||
m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k)
|
m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k)
|
||||||
|
|
||||||
if optimize:
|
if optimize:
|
||||||
|
|
@ -288,16 +294,18 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
|
||||||
from GPy.models import BayesianGPLVM
|
from GPy.models import BayesianGPLVM
|
||||||
from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData
|
from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData
|
||||||
|
|
||||||
D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 3, 10
|
D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 5, 9
|
||||||
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
|
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
|
||||||
Y = Ylist[0]
|
Y = Ylist[0]
|
||||||
k = kern.linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
|
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
|
||||||
|
|
||||||
inan = _np.random.binomial(1, .3, size=Y.shape)
|
inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool)
|
||||||
m = BayesianGPLVM(Y, Q, init="random", num_inducing=num_inducing, kernel=k)
|
m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, kernel=k)
|
||||||
m.inference_method = VarDTCMissingData()
|
m.inference_method = VarDTCMissingData()
|
||||||
m.Y[inan] = _np.nan
|
m.Y[inan] = _np.nan
|
||||||
|
m.q.variance *= .1
|
||||||
m.parameters_changed()
|
m.parameters_changed()
|
||||||
|
m.Yreal = Y
|
||||||
|
|
||||||
if optimize:
|
if optimize:
|
||||||
print "Optimizing model:"
|
print "Optimizing model:"
|
||||||
|
|
@ -318,7 +326,7 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
|
||||||
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
|
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
|
||||||
likelihood_list = [Gaussian(x, normalize=True) for x in Ylist]
|
likelihood_list = [Gaussian(x, normalize=True) for x in Ylist]
|
||||||
|
|
||||||
k = kern.linear(Q, ARD=True) + kern.bias(Q, _np.exp(-2)) + kern.white(Q, _np.exp(-2))
|
k = kern.Linear(Q, ARD=True) + kern.Bias(Q, _np.exp(-2)) + kern.White(Q, _np.exp(-2))
|
||||||
m = MRD(likelihood_list, input_dim=Q, num_inducing=num_inducing, kernels=k, initx="", initz='permute', **kw)
|
m = MRD(likelihood_list, input_dim=Q, num_inducing=num_inducing, kernels=k, initx="", initz='permute', **kw)
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints()
|
||||||
|
|
||||||
|
|
@ -345,15 +353,15 @@ def brendan_faces(optimize=True, verbose=True, plot=True):
|
||||||
m = GPy.models.GPLVM(Yn, Q)
|
m = GPy.models.GPLVM(Yn, Q)
|
||||||
|
|
||||||
# optimize
|
# optimize
|
||||||
m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped())
|
m.constrain('rbf|noise|white', GPy.transformations.LogexpClipped())
|
||||||
|
|
||||||
if optimize: m.optimize('scg', messages=verbose, max_iters=1000)
|
if optimize: m.optimize('scg', messages=verbose, max_iters=1000)
|
||||||
|
|
||||||
if plot:
|
if plot:
|
||||||
ax = m.plot_latent(which_indices=(0, 1))
|
ax = m.plot_latent(which_indices=(0, 1))
|
||||||
y = m.likelihood.Y[0, :]
|
y = m.likelihood.Y[0, :]
|
||||||
data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, order='F', invert=False, scale=False)
|
data_show = GPy.plotting.matplot_dep.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, order='F', invert=False, scale=False)
|
||||||
GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
||||||
raw_input('Press enter to finish')
|
raw_input('Press enter to finish')
|
||||||
|
|
||||||
return m
|
return m
|
||||||
|
|
@ -372,8 +380,8 @@ def olivetti_faces(optimize=True, verbose=True, plot=True):
|
||||||
if plot:
|
if plot:
|
||||||
ax = m.plot_latent(which_indices=(0, 1))
|
ax = m.plot_latent(which_indices=(0, 1))
|
||||||
y = m.likelihood.Y[0, :]
|
y = m.likelihood.Y[0, :]
|
||||||
data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(112, 92), transpose=False, invert=False, scale=False)
|
data_show = GPy.plotting.matplot_dep.visualize.image_show(y[None, :], dimensions=(112, 92), transpose=False, invert=False, scale=False)
|
||||||
GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
||||||
raw_input('Press enter to finish')
|
raw_input('Press enter to finish')
|
||||||
|
|
||||||
return m
|
return m
|
||||||
|
|
@ -388,8 +396,8 @@ def stick_play(range=None, frame_rate=15, optimize=False, verbose=True, plot=Tru
|
||||||
Y = data['Y'][range[0]:range[1], :].copy()
|
Y = data['Y'][range[0]:range[1], :].copy()
|
||||||
if plot:
|
if plot:
|
||||||
y = Y[0, :]
|
y = Y[0, :]
|
||||||
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
|
data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
|
||||||
GPy.util.visualize.data_play(Y, data_show, frame_rate)
|
GPy.plotting.matplot_dep.visualize.data_play(Y, data_show, frame_rate)
|
||||||
return Y
|
return Y
|
||||||
|
|
||||||
def stick(kernel=None, optimize=True, verbose=True, plot=True):
|
def stick(kernel=None, optimize=True, verbose=True, plot=True):
|
||||||
|
|
@ -400,12 +408,12 @@ def stick(kernel=None, optimize=True, verbose=True, plot=True):
|
||||||
# optimize
|
# optimize
|
||||||
m = GPy.models.GPLVM(data['Y'], 2, kernel=kernel)
|
m = GPy.models.GPLVM(data['Y'], 2, kernel=kernel)
|
||||||
if optimize: m.optimize(messages=verbose, max_f_eval=10000)
|
if optimize: m.optimize(messages=verbose, max_f_eval=10000)
|
||||||
if plot and GPy.util.visualize.visual_available:
|
if plot and GPy.plotting.matplot_dep.visualize.visual_available:
|
||||||
plt.clf
|
plt.clf
|
||||||
ax = m.plot_latent()
|
ax = m.plot_latent()
|
||||||
y = m.likelihood.Y[0, :]
|
y = m.likelihood.Y[0, :]
|
||||||
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
|
data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
|
||||||
GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
||||||
raw_input('Press enter to finish')
|
raw_input('Press enter to finish')
|
||||||
|
|
||||||
return m
|
return m
|
||||||
|
|
@ -419,12 +427,12 @@ def bcgplvm_linear_stick(kernel=None, optimize=True, verbose=True, plot=True):
|
||||||
mapping = GPy.mappings.Linear(data['Y'].shape[1], 2)
|
mapping = GPy.mappings.Linear(data['Y'].shape[1], 2)
|
||||||
m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping)
|
m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping)
|
||||||
if optimize: m.optimize(messages=verbose, max_f_eval=10000)
|
if optimize: m.optimize(messages=verbose, max_f_eval=10000)
|
||||||
if plot and GPy.util.visualize.visual_available:
|
if plot and GPy.plotting.matplot_dep.visualize.visual_available:
|
||||||
plt.clf
|
plt.clf
|
||||||
ax = m.plot_latent()
|
ax = m.plot_latent()
|
||||||
y = m.likelihood.Y[0, :]
|
y = m.likelihood.Y[0, :]
|
||||||
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
|
data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
|
||||||
GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
||||||
raw_input('Press enter to finish')
|
raw_input('Press enter to finish')
|
||||||
|
|
||||||
return m
|
return m
|
||||||
|
|
@ -435,16 +443,16 @@ def bcgplvm_stick(kernel=None, optimize=True, verbose=True, plot=True):
|
||||||
|
|
||||||
data = GPy.util.datasets.osu_run1()
|
data = GPy.util.datasets.osu_run1()
|
||||||
# optimize
|
# optimize
|
||||||
back_kernel=GPy.kern.rbf(data['Y'].shape[1], lengthscale=5.)
|
back_kernel=GPy.kern.RBF(data['Y'].shape[1], lengthscale=5.)
|
||||||
mapping = GPy.mappings.Kernel(X=data['Y'], output_dim=2, kernel=back_kernel)
|
mapping = GPy.mappings.Kernel(X=data['Y'], output_dim=2, kernel=back_kernel)
|
||||||
m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping)
|
m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping)
|
||||||
if optimize: m.optimize(messages=verbose, max_f_eval=10000)
|
if optimize: m.optimize(messages=verbose, max_f_eval=10000)
|
||||||
if plot and GPy.util.visualize.visual_available:
|
if plot and GPy.plotting.matplot_dep.visualize.visual_available:
|
||||||
plt.clf
|
plt.clf
|
||||||
ax = m.plot_latent()
|
ax = m.plot_latent()
|
||||||
y = m.likelihood.Y[0, :]
|
y = m.likelihood.Y[0, :]
|
||||||
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
|
data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
|
||||||
GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
||||||
raw_input('Press enter to finish')
|
raw_input('Press enter to finish')
|
||||||
|
|
||||||
return m
|
return m
|
||||||
|
|
@ -470,7 +478,7 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
|
||||||
|
|
||||||
data = GPy.util.datasets.osu_run1()
|
data = GPy.util.datasets.osu_run1()
|
||||||
Q = 6
|
Q = 6
|
||||||
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2))
|
kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.Bias(Q, _np.exp(-2)) + GPy.kern.White(Q, _np.exp(-2))
|
||||||
m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel)
|
m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel)
|
||||||
# optimize
|
# optimize
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints()
|
||||||
|
|
@ -481,8 +489,8 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
|
||||||
plt.sca(latent_axes)
|
plt.sca(latent_axes)
|
||||||
m.plot_latent()
|
m.plot_latent()
|
||||||
y = m.likelihood.Y[0, :].copy()
|
y = m.likelihood.Y[0, :].copy()
|
||||||
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
|
data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
|
||||||
GPy.util.visualize.lvm_dimselect(m.X[0, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
|
GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X[0, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
|
||||||
raw_input('Press enter to finish')
|
raw_input('Press enter to finish')
|
||||||
|
|
||||||
return m
|
return m
|
||||||
|
|
@ -501,8 +509,8 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True, optimize=True, verbose
|
||||||
if plot:
|
if plot:
|
||||||
ax = m.plot_latent()
|
ax = m.plot_latent()
|
||||||
y = m.likelihood.Y[0, :]
|
y = m.likelihood.Y[0, :]
|
||||||
data_show = GPy.util.visualize.skeleton_show(y[None, :], data['skel'])
|
data_show = GPy.plotting.matplot_dep.visualize.skeleton_show(y[None, :], data['skel'])
|
||||||
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
||||||
raw_input('Press enter to finish')
|
raw_input('Press enter to finish')
|
||||||
lvm_visualizer.close()
|
lvm_visualizer.close()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -41,7 +41,7 @@ def coregionalization_toy2(optimize=True, plot=True):
|
||||||
Y = np.vstack((Y1, Y2))
|
Y = np.vstack((Y1, Y2))
|
||||||
|
|
||||||
#build the kernel
|
#build the kernel
|
||||||
k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
|
k1 = GPy.kern.RBF(1) + GPy.kern.bias(1)
|
||||||
k2 = GPy.kern.coregionalize(2,1)
|
k2 = GPy.kern.coregionalize(2,1)
|
||||||
k = k1**k2
|
k = k1**k2
|
||||||
m = GPy.models.GPRegression(X, Y, kernel=k)
|
m = GPy.models.GPRegression(X, Y, kernel=k)
|
||||||
|
|
@ -68,7 +68,7 @@ def coregionalization_toy2(optimize=True, plot=True):
|
||||||
# Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05
|
# Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05
|
||||||
# Y = np.vstack((Y1, Y2))
|
# Y = np.vstack((Y1, Y2))
|
||||||
#
|
#
|
||||||
# k1 = GPy.kern.rbf(1)
|
# k1 = GPy.kern.RBF(1)
|
||||||
# m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1])
|
# m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1])
|
||||||
# m.constrain_fixed('.*rbf_var', 1.)
|
# m.constrain_fixed('.*rbf_var', 1.)
|
||||||
# m.optimize(max_iters=100)
|
# m.optimize(max_iters=100)
|
||||||
|
|
@ -127,7 +127,7 @@ def epomeo_gpx(max_iters=200, optimize=True, plot=True):
|
||||||
Z = np.hstack((np.linspace(t[:,0].min(), t[:, 0].max(), num_inducing)[:, None],
|
Z = np.hstack((np.linspace(t[:,0].min(), t[:, 0].max(), num_inducing)[:, None],
|
||||||
np.random.randint(0, 4, num_inducing)[:, None]))
|
np.random.randint(0, 4, num_inducing)[:, None]))
|
||||||
|
|
||||||
k1 = GPy.kern.rbf(1)
|
k1 = GPy.kern.RBF(1)
|
||||||
k2 = GPy.kern.coregionalize(output_dim=5, rank=5)
|
k2 = GPy.kern.coregionalize(output_dim=5, rank=5)
|
||||||
k = k1**k2
|
k = k1**k2
|
||||||
|
|
||||||
|
|
@ -156,7 +156,7 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000
|
||||||
|
|
||||||
data['Y'] = data['Y'] - np.mean(data['Y'])
|
data['Y'] = data['Y'] - np.mean(data['Y'])
|
||||||
|
|
||||||
lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.rbf)
|
lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.RBF)
|
||||||
if plot:
|
if plot:
|
||||||
pb.contour(length_scales, log_SNRs, np.exp(lls), 20, cmap=pb.cm.jet)
|
pb.contour(length_scales, log_SNRs, np.exp(lls), 20, cmap=pb.cm.jet)
|
||||||
ax = pb.gca()
|
ax = pb.gca()
|
||||||
|
|
@ -172,8 +172,8 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000
|
||||||
optim_point_y = np.empty(2)
|
optim_point_y = np.empty(2)
|
||||||
np.random.seed(seed=seed)
|
np.random.seed(seed=seed)
|
||||||
for i in range(0, model_restarts):
|
for i in range(0, model_restarts):
|
||||||
# kern = GPy.kern.rbf(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.))
|
# kern = GPy.kern.RBF(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.))
|
||||||
kern = GPy.kern.rbf(1, variance=np.random.uniform(1e-3, 1), lengthscale=np.random.uniform(5, 50))
|
kern = GPy.kern.RBF(1, variance=np.random.uniform(1e-3, 1), lengthscale=np.random.uniform(5, 50))
|
||||||
|
|
||||||
m = GPy.models.GPRegression(data['X'], data['Y'], kernel=kern)
|
m = GPy.models.GPRegression(data['X'], data['Y'], kernel=kern)
|
||||||
m['noise_variance'] = np.random.uniform(1e-3, 1)
|
m['noise_variance'] = np.random.uniform(1e-3, 1)
|
||||||
|
|
@ -196,7 +196,7 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000
|
||||||
ax.set_ylim(ylim)
|
ax.set_ylim(ylim)
|
||||||
return m # (models, lls)
|
return m # (models, lls)
|
||||||
|
|
||||||
def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.rbf):
|
def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.RBF):
|
||||||
"""
|
"""
|
||||||
Evaluate the GP objective function for a given data set for a range of
|
Evaluate the GP objective function for a given data set for a range of
|
||||||
signal to noise ratios and a range of lengthscales.
|
signal to noise ratios and a range of lengthscales.
|
||||||
|
|
@ -278,10 +278,10 @@ def toy_poisson_rbf_1d_laplace(optimize=True, plot=True):
|
||||||
optimizer='scg'
|
optimizer='scg'
|
||||||
x_len = 30
|
x_len = 30
|
||||||
X = np.linspace(0, 10, x_len)[:, None]
|
X = np.linspace(0, 10, x_len)[:, None]
|
||||||
f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.rbf(1).K(X))
|
f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.RBF(1).K(X))
|
||||||
Y = np.array([np.random.poisson(np.exp(f)) for f in f_true])[:,None]
|
Y = np.array([np.random.poisson(np.exp(f)) for f in f_true])[:,None]
|
||||||
|
|
||||||
kern = GPy.kern.rbf(1)
|
kern = GPy.kern.RBF(1)
|
||||||
poisson_lik = GPy.likelihoods.Poisson()
|
poisson_lik = GPy.likelihoods.Poisson()
|
||||||
laplace_inf = GPy.inference.latent_function_inference.LaplaceInference()
|
laplace_inf = GPy.inference.latent_function_inference.LaplaceInference()
|
||||||
|
|
||||||
|
|
@ -319,10 +319,10 @@ def toy_ARD(max_iters=1000, kernel_type='linear', num_samples=300, D=4, optimize
|
||||||
if kernel_type == 'linear':
|
if kernel_type == 'linear':
|
||||||
kernel = GPy.kern.linear(X.shape[1], ARD=1)
|
kernel = GPy.kern.linear(X.shape[1], ARD=1)
|
||||||
elif kernel_type == 'rbf_inv':
|
elif kernel_type == 'rbf_inv':
|
||||||
kernel = GPy.kern.rbf_inv(X.shape[1], ARD=1)
|
kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1)
|
||||||
else:
|
else:
|
||||||
kernel = GPy.kern.rbf(X.shape[1], ARD=1)
|
kernel = GPy.kern.RBF(X.shape[1], ARD=1)
|
||||||
kernel += GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])
|
kernel += GPy.kern.White(X.shape[1]) + GPy.kern.bias(X.shape[1])
|
||||||
m = GPy.models.GPRegression(X, Y, kernel)
|
m = GPy.models.GPRegression(X, Y, kernel)
|
||||||
# len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25
|
# len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25
|
||||||
# m.set_prior('.*lengthscale',len_prior)
|
# m.set_prior('.*lengthscale',len_prior)
|
||||||
|
|
@ -358,9 +358,9 @@ def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4, o
|
||||||
if kernel_type == 'linear':
|
if kernel_type == 'linear':
|
||||||
kernel = GPy.kern.linear(X.shape[1], ARD=1)
|
kernel = GPy.kern.linear(X.shape[1], ARD=1)
|
||||||
elif kernel_type == 'rbf_inv':
|
elif kernel_type == 'rbf_inv':
|
||||||
kernel = GPy.kern.rbf_inv(X.shape[1], ARD=1)
|
kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1)
|
||||||
else:
|
else:
|
||||||
kernel = GPy.kern.rbf(X.shape[1], ARD=1)
|
kernel = GPy.kern.RBF(X.shape[1], ARD=1)
|
||||||
#kernel += GPy.kern.bias(X.shape[1])
|
#kernel += GPy.kern.bias(X.shape[1])
|
||||||
X_variance = np.ones(X.shape) * 0.5
|
X_variance = np.ones(X.shape) * 0.5
|
||||||
m = GPy.models.SparseGPRegression(X, Y, kernel, X_variance=X_variance)
|
m = GPy.models.SparseGPRegression(X, Y, kernel, X_variance=X_variance)
|
||||||
|
|
@ -421,7 +421,7 @@ def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, opti
|
||||||
X = np.random.uniform(-3., 3., (num_samples, 1))
|
X = np.random.uniform(-3., 3., (num_samples, 1))
|
||||||
Y = np.sin(X) + np.random.randn(num_samples, 1) * 0.05
|
Y = np.sin(X) + np.random.randn(num_samples, 1) * 0.05
|
||||||
# construct kernel
|
# construct kernel
|
||||||
rbf = GPy.kern.rbf(1)
|
rbf = GPy.kern.RBF(1)
|
||||||
# create simple GP Model
|
# create simple GP Model
|
||||||
m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing)
|
m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing)
|
||||||
m.checkgrad(verbose=1)
|
m.checkgrad(verbose=1)
|
||||||
|
|
@ -444,7 +444,7 @@ def sparse_GP_regression_2D(num_samples=400, num_inducing=50, max_iters=100, opt
|
||||||
Y[inan] = np.nan
|
Y[inan] = np.nan
|
||||||
|
|
||||||
# construct kernel
|
# construct kernel
|
||||||
rbf = GPy.kern.rbf(2)
|
rbf = GPy.kern.RBF(2)
|
||||||
|
|
||||||
# create simple GP Model
|
# create simple GP Model
|
||||||
m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing)
|
m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing)
|
||||||
|
|
@ -476,9 +476,9 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
|
||||||
# likelihood = GPy.likelihoods.Gaussian(Y)
|
# likelihood = GPy.likelihoods.Gaussian(Y)
|
||||||
Z = np.random.uniform(-3., 3., (7, 1))
|
Z = np.random.uniform(-3., 3., (7, 1))
|
||||||
|
|
||||||
k = GPy.kern.rbf(1)
|
k = GPy.kern.RBF(1)
|
||||||
# create simple GP Model - no input uncertainty on this one
|
# create simple GP Model - no input uncertainty on this one
|
||||||
m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.rbf(1), Z=Z)
|
m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.RBF(1), Z=Z)
|
||||||
|
|
||||||
if optimize:
|
if optimize:
|
||||||
m.optimize('scg', messages=1, max_iters=max_iters)
|
m.optimize('scg', messages=1, max_iters=max_iters)
|
||||||
|
|
@ -489,7 +489,7 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
|
||||||
print m
|
print m
|
||||||
|
|
||||||
# the same Model with uncertainty
|
# the same Model with uncertainty
|
||||||
m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.rbf(1), Z=Z, X_variance=S)
|
m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.RBF(1), Z=Z, X_variance=S)
|
||||||
if optimize:
|
if optimize:
|
||||||
m.optimize('scg', messages=1, max_iters=max_iters)
|
m.optimize('scg', messages=1, max_iters=max_iters)
|
||||||
if plot:
|
if plot:
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,9 @@ If the likelihood object is something other than Gaussian, then exact inference
|
||||||
is not tractable. We then resort to a Laplace approximation (laplace.py) or
|
is not tractable. We then resort to a Laplace approximation (laplace.py) or
|
||||||
expectation propagation (ep.py).
|
expectation propagation (ep.py).
|
||||||
|
|
||||||
The inference methods return a "Posterior" instance, which is a simple
|
The inference methods return a
|
||||||
|
:class:`~GPy.inference.latent_function_inference.posterior.Posterior`
|
||||||
|
instance, which is a simple
|
||||||
structure which contains a summary of the posterior. The model classes can then
|
structure which contains a summary of the posterior. The model classes can then
|
||||||
use this posterior object for making predictions, optimizing hyper-parameters,
|
use this posterior object for making predictions, optimizing hyper-parameters,
|
||||||
etc.
|
etc.
|
||||||
|
|
@ -29,3 +31,15 @@ expectation_propagation = 'foo' # TODO
|
||||||
from GPy.inference.latent_function_inference.var_dtc import VarDTC
|
from GPy.inference.latent_function_inference.var_dtc import VarDTC
|
||||||
from dtc import DTC
|
from dtc import DTC
|
||||||
from fitc import FITC
|
from fitc import FITC
|
||||||
|
|
||||||
|
# class FullLatentFunctionData(object):
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# class LatentFunctionInference(object):
|
||||||
|
# def inference(self, kern, X, likelihood, Y, Y_metadata=None):
|
||||||
|
# """
|
||||||
|
# Do inference on the latent functions given a covariance function `kern`,
|
||||||
|
# inputs and outputs `X` and `Y`, and a likelihood `likelihood`.
|
||||||
|
# Additional metadata for the outputs `Y` can be given in `Y_metadata`.
|
||||||
|
# """
|
||||||
|
# raise NotImplementedError, "Abstract base class for full inference"
|
||||||
|
|
@ -32,7 +32,7 @@ class DTC(object):
|
||||||
#make sure the noise is not hetero
|
#make sure the noise is not hetero
|
||||||
beta = 1./np.squeeze(likelihood.variance)
|
beta = 1./np.squeeze(likelihood.variance)
|
||||||
if beta.size <1:
|
if beta.size <1:
|
||||||
raise NotImplementedError, "no hetero noise with this implementatino of DTC"
|
raise NotImplementedError, "no hetero noise with this implementation of DTC"
|
||||||
|
|
||||||
Kmm = kern.K(Z)
|
Kmm = kern.K(Z)
|
||||||
Knn = kern.Kdiag(X)
|
Knn = kern.Kdiag(X)
|
||||||
|
|
@ -89,4 +89,85 @@ class DTC(object):
|
||||||
|
|
||||||
return post, log_marginal, grad_dict
|
return post, log_marginal, grad_dict
|
||||||
|
|
||||||
|
class vDTC(object):
|
||||||
|
def __init__(self):
|
||||||
|
self.const_jitter = 1e-6
|
||||||
|
|
||||||
|
def inference(self, kern, X, X_variance, Z, likelihood, Y):
|
||||||
|
assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
|
||||||
|
|
||||||
|
#TODO: MAX! fix this!
|
||||||
|
from ...util.misc import param_to_array
|
||||||
|
Y = param_to_array(Y)
|
||||||
|
|
||||||
|
num_inducing, _ = Z.shape
|
||||||
|
num_data, output_dim = Y.shape
|
||||||
|
|
||||||
|
#make sure the noise is not hetero
|
||||||
|
beta = 1./np.squeeze(likelihood.variance)
|
||||||
|
if beta.size <1:
|
||||||
|
raise NotImplementedError, "no hetero noise with this implementation of DTC"
|
||||||
|
|
||||||
|
Kmm = kern.K(Z)
|
||||||
|
Knn = kern.Kdiag(X)
|
||||||
|
Knm = kern.K(X, Z)
|
||||||
|
U = Knm
|
||||||
|
Uy = np.dot(U.T,Y)
|
||||||
|
|
||||||
|
#factor Kmm
|
||||||
|
Kmmi, L, Li, _ = pdinv(Kmm)
|
||||||
|
|
||||||
|
# Compute A
|
||||||
|
LiUTbeta = np.dot(Li, U.T)*np.sqrt(beta)
|
||||||
|
A_ = tdot(LiUTbeta)
|
||||||
|
trace_term = -0.5*(np.sum(Knn)*beta - np.trace(A_))
|
||||||
|
A = A_ + np.eye(num_inducing)
|
||||||
|
|
||||||
|
# factor A
|
||||||
|
LA = jitchol(A)
|
||||||
|
|
||||||
|
# back substutue to get b, P, v
|
||||||
|
tmp, _ = dtrtrs(L, Uy, lower=1)
|
||||||
|
b, _ = dtrtrs(LA, tmp*beta, lower=1)
|
||||||
|
tmp, _ = dtrtrs(LA, b, lower=1, trans=1)
|
||||||
|
v, _ = dtrtrs(L, tmp, lower=1, trans=1)
|
||||||
|
tmp, _ = dtrtrs(LA, Li, lower=1, trans=0)
|
||||||
|
P = tdot(tmp.T)
|
||||||
|
|
||||||
|
#compute log marginal
|
||||||
|
log_marginal = -0.5*num_data*output_dim*np.log(2*np.pi) + \
|
||||||
|
-np.sum(np.log(np.diag(LA)))*output_dim + \
|
||||||
|
0.5*num_data*output_dim*np.log(beta) + \
|
||||||
|
-0.5*beta*np.sum(np.square(Y)) + \
|
||||||
|
0.5*np.sum(np.square(b)) + \
|
||||||
|
trace_term
|
||||||
|
|
||||||
|
# Compute dL_dKmm
|
||||||
|
vvT_P = tdot(v.reshape(-1,1)) + P
|
||||||
|
LAL = Li.T.dot(A).dot(Li)
|
||||||
|
dL_dK = Kmmi - 0.5*(vvT_P + LAL)
|
||||||
|
|
||||||
|
# Compute dL_dU
|
||||||
|
vY = np.dot(v.reshape(-1,1),Y.T)
|
||||||
|
#dL_dU = vY - np.dot(vvT_P, U.T)
|
||||||
|
dL_dU = vY - np.dot(vvT_P - Kmmi, U.T)
|
||||||
|
dL_dU *= beta
|
||||||
|
|
||||||
|
#compute dL_dR
|
||||||
|
Uv = np.dot(U, v)
|
||||||
|
dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - 1./beta + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) + np.sum(np.square(Uv), 1) )*beta**2
|
||||||
|
dL_dR -=beta*trace_term/num_data
|
||||||
|
|
||||||
|
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':np.zeros_like(Knn) + -0.5*beta, 'dL_dKnm':dL_dU.T}
|
||||||
|
|
||||||
|
#update gradients
|
||||||
|
kern.update_gradients_sparse(X=X, Z=Z, **grad_dict)
|
||||||
|
likelihood.update_gradients(dL_dR)
|
||||||
|
|
||||||
|
#construct a posterior object
|
||||||
|
post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L)
|
||||||
|
|
||||||
|
|
||||||
|
return post, log_marginal, grad_dict
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,390 +3,91 @@ from scipy import stats
|
||||||
from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs
|
from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs
|
||||||
from likelihood import likelihood
|
from likelihood import likelihood
|
||||||
|
|
||||||
class EP(likelihood):
|
class EP(object):
|
||||||
def __init__(self,data,noise_model):
|
def __init__(self, epsilon=1e-6, eta=1., delta=1.):
|
||||||
"""
|
|
||||||
Expectation Propagation
|
|
||||||
|
|
||||||
:param data: data to model
|
|
||||||
:type data: numpy array
|
|
||||||
:param noise_model: noise distribution
|
|
||||||
:type noise_model: A GPy noise model
|
|
||||||
|
|
||||||
"""
|
|
||||||
self.noise_model = noise_model
|
|
||||||
self.data = data
|
|
||||||
self.num_data, self.output_dim = self.data.shape
|
|
||||||
self.is_heteroscedastic = True
|
|
||||||
self.num_params = 0
|
|
||||||
|
|
||||||
#Initial values - Likelihood approximation parameters:
|
|
||||||
#p(y|f) = t(f|tau_tilde,v_tilde)
|
|
||||||
self.tau_tilde = np.zeros(self.num_data)
|
|
||||||
self.v_tilde = np.zeros(self.num_data)
|
|
||||||
|
|
||||||
#initial values for the GP variables
|
|
||||||
self.Y = np.zeros((self.num_data,1))
|
|
||||||
self.covariance_matrix = np.eye(self.num_data)
|
|
||||||
self.precision = np.ones(self.num_data)[:,None]
|
|
||||||
self.Z = 0
|
|
||||||
self.YYT = None
|
|
||||||
self.V = self.precision * self.Y
|
|
||||||
self.VVT_factor = self.V
|
|
||||||
self.trYYT = 0.
|
|
||||||
|
|
||||||
super(EP, self).__init__()
|
|
||||||
|
|
||||||
def restart(self):
|
|
||||||
self.tau_tilde = np.zeros(self.num_data)
|
|
||||||
self.v_tilde = np.zeros(self.num_data)
|
|
||||||
self.Y = np.zeros((self.num_data,1))
|
|
||||||
self.covariance_matrix = np.eye(self.num_data)
|
|
||||||
self.precision = np.ones(self.num_data)[:,None]
|
|
||||||
self.Z = 0
|
|
||||||
self.YYT = None
|
|
||||||
self.V = self.precision * self.Y
|
|
||||||
self.VVT_factor = self.V
|
|
||||||
self.trYYT = 0.
|
|
||||||
|
|
||||||
def predictive_values(self,mu,var,full_cov,**noise_args):
|
|
||||||
if full_cov:
|
|
||||||
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
|
|
||||||
return self.noise_model.predictive_values(mu,var,**noise_args)
|
|
||||||
|
|
||||||
def log_predictive_density(self, y_test, mu_star, var_star):
|
|
||||||
"""
|
|
||||||
Calculation of the log predictive density
|
|
||||||
|
|
||||||
.. math:
|
|
||||||
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
|
||||||
|
|
||||||
:param y_test: test observations (y_{*})
|
|
||||||
:type y_test: (Nx1) array
|
|
||||||
:param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
|
|
||||||
:type mu_star: (Nx1) array
|
|
||||||
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
|
|
||||||
:type var_star: (Nx1) array
|
|
||||||
"""
|
|
||||||
return self.noise_model.log_predictive_density(y_test, mu_star, var_star)
|
|
||||||
|
|
||||||
def _get_params(self):
|
|
||||||
#return np.zeros(0)
|
|
||||||
return self.noise_model._get_params()
|
|
||||||
|
|
||||||
def _get_param_names(self):
|
|
||||||
#return []
|
|
||||||
return self.noise_model._get_param_names()
|
|
||||||
|
|
||||||
def _set_params(self,p):
|
|
||||||
#pass # TODO: the EP likelihood might want to take some parameters...
|
|
||||||
self.noise_model._set_params(p)
|
|
||||||
|
|
||||||
def _gradients(self,partial):
|
|
||||||
#return np.zeros(0) # TODO: the EP likelihood might want to take some parameters...
|
|
||||||
return self.noise_model._gradients(partial)
|
|
||||||
|
|
||||||
def _compute_GP_variables(self):
|
|
||||||
#Variables to be called from GP
|
|
||||||
mu_tilde = self.v_tilde/self.tau_tilde #When calling EP, this variable is used instead of Y in the GP model
|
|
||||||
sigma_sum = 1./self.tau_ + 1./self.tau_tilde
|
|
||||||
mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2
|
|
||||||
self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep
|
|
||||||
self.Z += 0.5*self.num_data*np.log(2*np.pi)
|
|
||||||
|
|
||||||
self.Y = mu_tilde[:,None]
|
|
||||||
self.YYT = np.dot(self.Y,self.Y.T)
|
|
||||||
self.covariance_matrix = np.diag(1./self.tau_tilde)
|
|
||||||
self.precision = self.tau_tilde[:,None]
|
|
||||||
self.V = self.precision * self.Y
|
|
||||||
self.VVT_factor = self.V
|
|
||||||
self.trYYT = np.trace(self.YYT)
|
|
||||||
|
|
||||||
def fit_full(self, K, epsilon=1e-3,power_ep=[1.,1.]):
|
|
||||||
"""
|
"""
|
||||||
The expectation-propagation algorithm.
|
The expectation-propagation algorithm.
|
||||||
For nomenclature see Rasmussen & Williams 2006.
|
For nomenclature see Rasmussen & Williams 2006.
|
||||||
|
|
||||||
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
|
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
|
||||||
:type epsilon: float
|
:type epsilon: float
|
||||||
:param power_ep: Power EP parameters
|
:param eta: Power EP thing TODO: Ricardo: what, exactly?
|
||||||
:type power_ep: list of floats
|
:type eta: float64
|
||||||
|
:param delta: Power EP thing TODO: Ricardo: what, exactly?
|
||||||
|
:type delta: float64
|
||||||
"""
|
"""
|
||||||
self.epsilon = epsilon
|
self.epsilon, self.eta, self.delta = epsilon, eta, delta
|
||||||
self.eta, self.delta = power_ep
|
self.reset()
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.old_mutilde, self.old_vtilde = None, None
|
||||||
|
|
||||||
|
def inference(self, kern, X, likelihood, Y, Y_metadata=None):
|
||||||
|
|
||||||
|
K = kern.K(X)
|
||||||
|
|
||||||
|
mu_tilde, tau_tilde = self.expectation_propagation()
|
||||||
|
|
||||||
|
|
||||||
|
def expectation_propagation(self, K, Y, Y_metadata, likelihood)
|
||||||
|
|
||||||
|
num_data, data_dim = Y.shape
|
||||||
|
assert data_dim == 1, "This EP methods only works for 1D outputs"
|
||||||
|
|
||||||
|
|
||||||
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
|
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
|
||||||
mu = np.zeros(self.num_data)
|
mu = np.zeros(self.num_data)
|
||||||
Sigma = K.copy()
|
Sigma = K.copy()
|
||||||
|
|
||||||
"""
|
|
||||||
Initial values - Cavity distribution parameters:
|
|
||||||
q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)}
|
|
||||||
sigma_ = 1./tau_
|
|
||||||
mu_ = v_/tau_
|
|
||||||
"""
|
|
||||||
self.tau_ = np.empty(self.num_data,dtype=float)
|
|
||||||
self.v_ = np.empty(self.num_data,dtype=float)
|
|
||||||
|
|
||||||
#Initial values - Marginal moments
|
#Initial values - Marginal moments
|
||||||
z = np.empty(self.num_data,dtype=float)
|
Z_hat = np.empty(num_data,dtype=np.float64)
|
||||||
self.Z_hat = np.empty(self.num_data,dtype=float)
|
mu_hat = np.empty(num_data,dtype=np.float64)
|
||||||
phi = np.empty(self.num_data,dtype=float)
|
sigma2_hat = np.empty(num_data,dtype=np.float64)
|
||||||
mu_hat = np.empty(self.num_data,dtype=float)
|
|
||||||
sigma2_hat = np.empty(self.num_data,dtype=float)
|
#initial values - Gaussian factors
|
||||||
|
if self.old_mutilde is None:
|
||||||
|
tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data, num_data))
|
||||||
|
else:
|
||||||
|
assert old_mutilde.size == num_data, "data size mis-match: did you change the data? try resetting!"
|
||||||
|
mu_tilde, v_tilde = self.old_mutilde, self.old_vtilde
|
||||||
|
tau_tilde = v_tilde/mu_tilde
|
||||||
|
|
||||||
#Approximation
|
#Approximation
|
||||||
epsilon_np1 = self.epsilon + 1.
|
epsilon_np1 = self.epsilon + 1.
|
||||||
epsilon_np2 = self.epsilon + 1.
|
epsilon_np2 = self.epsilon + 1.
|
||||||
self.iterations = 0
|
iterations = 0
|
||||||
self.np1 = [self.tau_tilde.copy()]
|
while (epsilon_np1 > self.epsilon) or (epsilon_np2 > self.epsilon):
|
||||||
self.np2 = [self.v_tilde.copy()]
|
update_order = np.random.permutation(num_data)
|
||||||
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
|
|
||||||
update_order = np.random.permutation(self.num_data)
|
|
||||||
for i in update_order:
|
for i in update_order:
|
||||||
#Cavity distribution parameters
|
#Cavity distribution parameters
|
||||||
self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
|
tau_cav = 1./Sigma[i,i] - self.eta*tau_tilde[i]
|
||||||
self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i]
|
v_cav = mu[i]/Sigma[i,i] - self.eta*v_tilde[i]
|
||||||
#Marginal moments
|
#Marginal moments
|
||||||
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i])
|
Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match(Y[i], tau_cav, v_cav, Y_metadata=(None if Y_metadata is None else Y_metadata[i]))
|
||||||
#Site parameters update
|
#Site parameters update
|
||||||
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
|
delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
|
||||||
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
|
delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
|
||||||
self.tau_tilde[i] += Delta_tau
|
tau_tilde[i] += delta_tau
|
||||||
self.v_tilde[i] += Delta_v
|
v_tilde[i] += delta_v
|
||||||
#Posterior distribution parameters update
|
#Posterior distribution parameters update
|
||||||
DSYR(Sigma,Sigma[:,i].copy(), -float(Delta_tau/(1.+ Delta_tau*Sigma[i,i])))
|
DSYR(Sigma, Sigma[:,i].copy(), -Delta_tau/(1.+ Delta_tau*Sigma[i,i]))
|
||||||
mu = np.dot(Sigma,self.v_tilde)
|
mu = np.dot(Sigma, v_tilde)
|
||||||
self.iterations += 1
|
iterations += 1
|
||||||
#Sigma recomptutation with Cholesky decompositon
|
|
||||||
Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K
|
#(re) compute Sigma and mu using full Cholesky decompy
|
||||||
B = np.eye(self.num_data) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
|
tau_tilde_root = np.sqrt(tau_tilde)
|
||||||
|
Sroot_tilde_K = tau_tilde_root[:,None] * K
|
||||||
|
B = np.eye(num_data) + Sroot_tilde_K * tau_tilde_root[None,:]
|
||||||
L = jitchol(B)
|
L = jitchol(B)
|
||||||
V,info = dtrtrs(L,Sroot_tilde_K,lower=1)
|
V, _ = dtrtrs(L, Sroot_tilde_K, lower=1)
|
||||||
Sigma = K - np.dot(V.T,V)
|
Sigma = K - np.dot(V.T,V)
|
||||||
mu = np.dot(Sigma,self.v_tilde)
|
mu = np.dot(Sigma,v_tilde)
|
||||||
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.num_data
|
|
||||||
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.num_data
|
|
||||||
self.np1.append(self.tau_tilde.copy())
|
|
||||||
self.np2.append(self.v_tilde.copy())
|
|
||||||
|
|
||||||
return self._compute_GP_variables()
|
#monitor convergence
|
||||||
|
epsilon_np1 = np.mean(np.square(tau_tilde-tau_tilde_old))
|
||||||
|
epsilon_np2 = np.mean(np.square(v_tilde-v_tilde_old))
|
||||||
|
tau_tilde_old = tau_tilde.copy()
|
||||||
|
v_tilde_old = v_tilde.copy()
|
||||||
|
|
||||||
def fit_DTC(self, Kmm, Kmn, epsilon=1e-3,power_ep=[1.,1.]):
|
return mu, Sigma, mu_tilde, tau_tilde
|
||||||
"""
|
|
||||||
The expectation-propagation algorithm with sparse pseudo-input.
|
|
||||||
For nomenclature see ... 2013.
|
|
||||||
|
|
||||||
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
|
|
||||||
:type epsilon: float
|
|
||||||
:param power_ep: Power EP parameters
|
|
||||||
:type power_ep: list of floats
|
|
||||||
|
|
||||||
"""
|
|
||||||
self.epsilon = epsilon
|
|
||||||
self.eta, self.delta = power_ep
|
|
||||||
|
|
||||||
num_inducing = Kmm.shape[0]
|
|
||||||
|
|
||||||
#TODO: this doesn't work with uncertain inputs!
|
|
||||||
|
|
||||||
"""
|
|
||||||
Prior approximation parameters:
|
|
||||||
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
|
|
||||||
Sigma0 = Qnn = Knm*Kmmi*Kmn
|
|
||||||
"""
|
|
||||||
KmnKnm = np.dot(Kmn,Kmn.T)
|
|
||||||
Lm = jitchol(Kmm)
|
|
||||||
Lmi = chol_inv(Lm)
|
|
||||||
Kmmi = np.dot(Lmi.T,Lmi)
|
|
||||||
KmmiKmn = np.dot(Kmmi,Kmn)
|
|
||||||
Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
|
|
||||||
LLT0 = Kmm.copy()
|
|
||||||
|
|
||||||
#Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm)
|
|
||||||
#KmnKnm = np.dot(Kmn, Kmn.T)
|
|
||||||
#KmmiKmn = np.dot(Kmmi,Kmn)
|
|
||||||
#Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
|
|
||||||
#LLT0 = Kmm.copy()
|
|
||||||
|
|
||||||
"""
|
|
||||||
Posterior approximation: q(f|y) = N(f| mu, Sigma)
|
|
||||||
Sigma = Diag + P*R.T*R*P.T + K
|
|
||||||
mu = w + P*Gamma
|
|
||||||
"""
|
|
||||||
mu = np.zeros(self.num_data)
|
|
||||||
LLT = Kmm.copy()
|
|
||||||
Sigma_diag = Qnn_diag.copy()
|
|
||||||
|
|
||||||
"""
|
|
||||||
Initial values - Cavity distribution parameters:
|
|
||||||
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
|
|
||||||
sigma_ = 1./tau_
|
|
||||||
mu_ = v_/tau_
|
|
||||||
"""
|
|
||||||
self.tau_ = np.empty(self.num_data,dtype=float)
|
|
||||||
self.v_ = np.empty(self.num_data,dtype=float)
|
|
||||||
|
|
||||||
#Initial values - Marginal moments
|
|
||||||
z = np.empty(self.num_data,dtype=float)
|
|
||||||
self.Z_hat = np.empty(self.num_data,dtype=float)
|
|
||||||
phi = np.empty(self.num_data,dtype=float)
|
|
||||||
mu_hat = np.empty(self.num_data,dtype=float)
|
|
||||||
sigma2_hat = np.empty(self.num_data,dtype=float)
|
|
||||||
|
|
||||||
#Approximation
|
|
||||||
epsilon_np1 = 1
|
|
||||||
epsilon_np2 = 1
|
|
||||||
self.iterations = 0
|
|
||||||
np1 = [self.tau_tilde.copy()]
|
|
||||||
np2 = [self.v_tilde.copy()]
|
|
||||||
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
|
|
||||||
update_order = np.random.permutation(self.num_data)
|
|
||||||
for i in update_order:
|
|
||||||
#Cavity distribution parameters
|
|
||||||
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
|
|
||||||
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
|
|
||||||
#Marginal moments
|
|
||||||
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i])
|
|
||||||
#Site parameters update
|
|
||||||
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
|
|
||||||
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
|
|
||||||
self.tau_tilde[i] += Delta_tau
|
|
||||||
self.v_tilde[i] += Delta_v
|
|
||||||
#Posterior distribution parameters update
|
|
||||||
DSYR(LLT,Kmn[:,i].copy(),Delta_tau) #LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau
|
|
||||||
L = jitchol(LLT)
|
|
||||||
#cholUpdate(L,Kmn[:,i]*np.sqrt(Delta_tau))
|
|
||||||
V,info = dtrtrs(L,Kmn,lower=1)
|
|
||||||
Sigma_diag = np.sum(V*V,-2)
|
|
||||||
si = np.sum(V.T*V[:,i],-1)
|
|
||||||
mu += (Delta_v-Delta_tau*mu[i])*si
|
|
||||||
self.iterations += 1
|
|
||||||
#Sigma recomputation with Cholesky decompositon
|
|
||||||
LLT = LLT0 + np.dot(Kmn*self.tau_tilde[None,:],Kmn.T)
|
|
||||||
L = jitchol(LLT)
|
|
||||||
V,info = dtrtrs(L,Kmn,lower=1)
|
|
||||||
V2,info = dtrtrs(L.T,V,lower=0)
|
|
||||||
Sigma_diag = np.sum(V*V,-2)
|
|
||||||
Knmv_tilde = np.dot(Kmn,self.v_tilde)
|
|
||||||
mu = np.dot(V2.T,Knmv_tilde)
|
|
||||||
epsilon_np1 = sum((self.tau_tilde-np1[-1])**2)/self.num_data
|
|
||||||
epsilon_np2 = sum((self.v_tilde-np2[-1])**2)/self.num_data
|
|
||||||
np1.append(self.tau_tilde.copy())
|
|
||||||
np2.append(self.v_tilde.copy())
|
|
||||||
|
|
||||||
self._compute_GP_variables()
|
|
||||||
|
|
||||||
def fit_FITC(self, Kmm, Kmn, Knn_diag, epsilon=1e-3,power_ep=[1.,1.]):
|
|
||||||
"""
|
|
||||||
The expectation-propagation algorithm with sparse pseudo-input.
|
|
||||||
For nomenclature see Naish-Guzman and Holden, 2008.
|
|
||||||
|
|
||||||
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
|
|
||||||
:type epsilon: float
|
|
||||||
:param power_ep: Power EP parameters
|
|
||||||
:type power_ep: list of floats
|
|
||||||
"""
|
|
||||||
self.epsilon = epsilon
|
|
||||||
self.eta, self.delta = power_ep
|
|
||||||
|
|
||||||
num_inducing = Kmm.shape[0]
|
|
||||||
|
|
||||||
"""
|
|
||||||
Prior approximation parameters:
|
|
||||||
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
|
|
||||||
Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
|
|
||||||
"""
|
|
||||||
Lm = jitchol(Kmm)
|
|
||||||
Lmi = chol_inv(Lm)
|
|
||||||
Kmmi = np.dot(Lmi.T,Lmi)
|
|
||||||
P0 = Kmn.T
|
|
||||||
KmnKnm = np.dot(P0.T, P0)
|
|
||||||
KmmiKmn = np.dot(Kmmi,P0.T)
|
|
||||||
Qnn_diag = np.sum(P0.T*KmmiKmn,-2)
|
|
||||||
Diag0 = Knn_diag - Qnn_diag
|
|
||||||
R0 = jitchol(Kmmi).T
|
|
||||||
|
|
||||||
"""
|
|
||||||
Posterior approximation: q(f|y) = N(f| mu, Sigma)
|
|
||||||
Sigma = Diag + P*R.T*R*P.T + K
|
|
||||||
mu = w + P*Gamma
|
|
||||||
"""
|
|
||||||
self.w = np.zeros(self.num_data)
|
|
||||||
self.Gamma = np.zeros(num_inducing)
|
|
||||||
mu = np.zeros(self.num_data)
|
|
||||||
P = P0.copy()
|
|
||||||
R = R0.copy()
|
|
||||||
Diag = Diag0.copy()
|
|
||||||
Sigma_diag = Knn_diag
|
|
||||||
RPT0 = np.dot(R0,P0.T)
|
|
||||||
|
|
||||||
"""
|
|
||||||
Initial values - Cavity distribution parameters:
|
|
||||||
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
|
|
||||||
sigma_ = 1./tau_
|
|
||||||
mu_ = v_/tau_
|
|
||||||
"""
|
|
||||||
self.tau_ = np.empty(self.num_data,dtype=float)
|
|
||||||
self.v_ = np.empty(self.num_data,dtype=float)
|
|
||||||
|
|
||||||
#Initial values - Marginal moments
|
|
||||||
z = np.empty(self.num_data,dtype=float)
|
|
||||||
self.Z_hat = np.empty(self.num_data,dtype=float)
|
|
||||||
phi = np.empty(self.num_data,dtype=float)
|
|
||||||
mu_hat = np.empty(self.num_data,dtype=float)
|
|
||||||
sigma2_hat = np.empty(self.num_data,dtype=float)
|
|
||||||
|
|
||||||
#Approximation
|
|
||||||
epsilon_np1 = 1
|
|
||||||
epsilon_np2 = 1
|
|
||||||
self.iterations = 0
|
|
||||||
self.np1 = [self.tau_tilde.copy()]
|
|
||||||
self.np2 = [self.v_tilde.copy()]
|
|
||||||
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
|
|
||||||
update_order = np.random.permutation(self.num_data)
|
|
||||||
for i in update_order:
|
|
||||||
#Cavity distribution parameters
|
|
||||||
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
|
|
||||||
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
|
|
||||||
#Marginal moments
|
|
||||||
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i])
|
|
||||||
#Site parameters update
|
|
||||||
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
|
|
||||||
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
|
|
||||||
self.tau_tilde[i] += Delta_tau
|
|
||||||
self.v_tilde[i] += Delta_v
|
|
||||||
#Posterior distribution parameters update
|
|
||||||
dtd1 = Delta_tau*Diag[i] + 1.
|
|
||||||
dii = Diag[i]
|
|
||||||
Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
|
|
||||||
pi_ = P[i,:].reshape(1,num_inducing)
|
|
||||||
P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
|
|
||||||
Rp_i = np.dot(R,pi_.T)
|
|
||||||
RTR = np.dot(R.T,np.dot(np.eye(num_inducing) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
|
|
||||||
R = jitchol(RTR).T
|
|
||||||
self.w[i] += (Delta_v - Delta_tau*self.w[i])*dii/dtd1
|
|
||||||
self.Gamma += (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
|
|
||||||
RPT = np.dot(R,P.T)
|
|
||||||
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
|
|
||||||
mu = self.w + np.dot(P,self.Gamma)
|
|
||||||
self.iterations += 1
|
|
||||||
#Sigma recomptutation with Cholesky decompositon
|
|
||||||
Iplus_Dprod_i = 1./(1.+ Diag0 * self.tau_tilde)
|
|
||||||
Diag = Diag0 * Iplus_Dprod_i
|
|
||||||
P = Iplus_Dprod_i[:,None] * P0
|
|
||||||
safe_diag = np.where(Diag0 < self.tau_tilde, self.tau_tilde/(1.+Diag0*self.tau_tilde), (1. - Iplus_Dprod_i)/Diag0)
|
|
||||||
L = jitchol(np.eye(num_inducing) + np.dot(RPT0,safe_diag[:,None]*RPT0.T))
|
|
||||||
R,info = dtrtrs(L,R0,lower=1)
|
|
||||||
RPT = np.dot(R,P.T)
|
|
||||||
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
|
|
||||||
self.w = Diag * self.v_tilde
|
|
||||||
self.Gamma = np.dot(R.T, np.dot(RPT,self.v_tilde))
|
|
||||||
mu = self.w + np.dot(P,self.Gamma)
|
|
||||||
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.num_data
|
|
||||||
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.num_data
|
|
||||||
self.np1.append(self.tau_tilde.copy())
|
|
||||||
self.np2.append(self.v_tilde.copy())
|
|
||||||
|
|
||||||
return self._compute_GP_variables()
|
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from ...util.linalg import pdinv, dpotrs, tdot, dtrtrs, dpotri, symmetrify, jitchol, dtrtri
|
from ...util.linalg import pdinv, dpotrs, dpotri, symmetrify, jitchol
|
||||||
|
|
||||||
class Posterior(object):
|
class Posterior(object):
|
||||||
"""
|
"""
|
||||||
|
|
@ -81,13 +81,17 @@ class Posterior(object):
|
||||||
def covariance(self):
|
def covariance(self):
|
||||||
if self._covariance is None:
|
if self._covariance is None:
|
||||||
#LiK, _ = dtrtrs(self.woodbury_chol, self._K, lower=1)
|
#LiK, _ = dtrtrs(self.woodbury_chol, self._K, lower=1)
|
||||||
self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K)
|
self._covariance = np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T
|
||||||
return self._covariance
|
#self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K)
|
||||||
|
return self._covariance.squeeze()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def precision(self):
|
def precision(self):
|
||||||
if self._precision is None:
|
if self._precision is None:
|
||||||
self._precision, _, _, _ = pdinv(self.covariance)
|
cov = np.atleast_3d(self.covariance)
|
||||||
|
self._precision = np.zeros(cov.shape) # if one covariance per dimension
|
||||||
|
for p in xrange(cov.shape[-1]):
|
||||||
|
self._precision[:,:,p] = pdinv(cov[:,:,p])[0]
|
||||||
return self._precision
|
return self._precision
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|
@ -95,7 +99,10 @@ class Posterior(object):
|
||||||
if self._woodbury_chol is None:
|
if self._woodbury_chol is None:
|
||||||
#compute woodbury chol from
|
#compute woodbury chol from
|
||||||
if self._woodbury_inv is not None:
|
if self._woodbury_inv is not None:
|
||||||
_, _, self._woodbury_chol, _ = pdinv(self._woodbury_inv)
|
winv = np.atleast_3d(self._woodbury_inv)
|
||||||
|
self._woodbury_chol = np.zeros(winv.shape)
|
||||||
|
for p in xrange(winv.shape[-1]):
|
||||||
|
self._woodbury_chol[:,:,p] = pdinv(winv[:,:,p])[2]
|
||||||
#Li = jitchol(self._woodbury_inv)
|
#Li = jitchol(self._woodbury_inv)
|
||||||
#self._woodbury_chol, _ = dtrtri(Li)
|
#self._woodbury_chol, _ = dtrtri(Li)
|
||||||
#W, _, _, _, = pdinv(self._woodbury_inv)
|
#W, _, _, _, = pdinv(self._woodbury_inv)
|
||||||
|
|
@ -129,7 +136,7 @@ class Posterior(object):
|
||||||
@property
|
@property
|
||||||
def K_chol(self):
|
def K_chol(self):
|
||||||
if self._K_chol is None:
|
if self._K_chol is None:
|
||||||
self._K_chol = dportf(self._K)
|
self._K_chol = jitchol(self._K)
|
||||||
return self._K_chol
|
return self._K_chol
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -43,9 +43,20 @@ class VarDTC(object):
|
||||||
return Y * prec # TODO chache this, and make it effective
|
return Y * prec # TODO chache this, and make it effective
|
||||||
|
|
||||||
def inference(self, kern, X, X_variance, Z, likelihood, Y):
|
def inference(self, kern, X, X_variance, Z, likelihood, Y):
|
||||||
|
"""Inference for normal sparseGP"""
|
||||||
|
uncertain_inputs = False
|
||||||
|
psi0, psi1, psi2 = _compute_psi(kern, X, X_variance, Z, uncertain_inputs)
|
||||||
|
return self._inference(kern, psi0, psi1, psi2, Z, likelihood, Y, uncertain_inputs)
|
||||||
|
|
||||||
|
def inference_latent(self, kern, posterior_variational, Z, likelihood, Y):
|
||||||
|
"""Inference for GPLVM with uncertain inputs"""
|
||||||
|
uncertain_inputs = True
|
||||||
|
psi0, psi1, psi2 = _compute_psi_latent(kern, posterior_variational, Z)
|
||||||
|
return self._inference(kern, psi0, psi1, psi2, Z, likelihood, Y, uncertain_inputs)
|
||||||
|
|
||||||
|
def _inference(self, kern, psi0, psi1, psi2, Z, likelihood, Y, uncertain_inputs):
|
||||||
|
|
||||||
#see whether we're using variational uncertain inputs
|
#see whether we're using variational uncertain inputs
|
||||||
uncertain_inputs = not (X_variance is None)
|
|
||||||
|
|
||||||
_, output_dim = Y.shape
|
_, output_dim = Y.shape
|
||||||
|
|
||||||
|
|
@ -60,20 +71,87 @@ class VarDTC(object):
|
||||||
trYYT = self.get_trYYT(Y)
|
trYYT = self.get_trYYT(Y)
|
||||||
|
|
||||||
# do the inference:
|
# do the inference:
|
||||||
dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Cpsi1Vf, \
|
het_noise = beta.size < 1
|
||||||
psi1, Lm, LB, log_marginal, Kmm, partial_for_likelihood = _do_inference_on(
|
num_inducing = Z.shape[0]
|
||||||
kern, X, X_variance, Z, likelihood,
|
num_data = Y.shape[0]
|
||||||
uncertain_inputs, output_dim,
|
# kernel computations, using BGPLVM notation
|
||||||
beta, VVT_factor, trYYT)
|
Kmm = kern.K(Z)
|
||||||
|
|
||||||
likelihood.update_gradients(partial_for_likelihood)
|
Lm = jitchol(Kmm)
|
||||||
|
|
||||||
|
# The rather complex computations of A
|
||||||
|
if uncertain_inputs:
|
||||||
|
if het_noise:
|
||||||
|
psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0)
|
||||||
|
else:
|
||||||
|
psi2_beta = psi2.sum(0) * beta
|
||||||
|
#if 0:
|
||||||
|
# evals, evecs = linalg.eigh(psi2_beta)
|
||||||
|
# clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
|
||||||
|
# if not np.array_equal(evals, clipped_evals):
|
||||||
|
# pass # print evals
|
||||||
|
# tmp = evecs * np.sqrt(clipped_evals)
|
||||||
|
# tmp = tmp.T
|
||||||
|
# no backsubstitution because of bound explosion on tr(A) if not...
|
||||||
|
LmInv = dtrtri(Lm)
|
||||||
|
A = LmInv.dot(psi2_beta.dot(LmInv.T))
|
||||||
|
else:
|
||||||
|
if het_noise:
|
||||||
|
tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
|
||||||
|
else:
|
||||||
|
tmp = psi1 * (np.sqrt(beta))
|
||||||
|
tmp, _ = dtrtrs(Lm, tmp.T, lower=1)
|
||||||
|
A = tdot(tmp) #print A.sum()
|
||||||
|
|
||||||
|
# factor B
|
||||||
|
B = np.eye(num_inducing) + A
|
||||||
|
LB = jitchol(B)
|
||||||
|
psi1Vf = np.dot(psi1.T, VVT_factor)
|
||||||
|
# back substutue C into psi1Vf
|
||||||
|
tmp, _ = dtrtrs(Lm, psi1Vf, lower=1, trans=0)
|
||||||
|
_LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0)
|
||||||
|
tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1)
|
||||||
|
Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
|
||||||
|
|
||||||
|
# data fit and derivative of L w.r.t. Kmm
|
||||||
|
delit = tdot(_LBi_Lmi_psi1Vf)
|
||||||
|
data_fit = np.trace(delit)
|
||||||
|
DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit)
|
||||||
|
delit = -0.5 * DBi_plus_BiPBi
|
||||||
|
delit += -0.5 * B * output_dim
|
||||||
|
delit += output_dim * np.eye(num_inducing)
|
||||||
|
# Compute dL_dKmm
|
||||||
|
dL_dKmm = backsub_both_sides(Lm, delit)
|
||||||
|
|
||||||
|
# derivatives of L w.r.t. psi
|
||||||
|
dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm,
|
||||||
|
VVT_factor, Cpsi1Vf, DBi_plus_BiPBi,
|
||||||
|
psi1, het_noise, uncertain_inputs)
|
||||||
|
|
||||||
|
# log marginal likelihood
|
||||||
|
log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise,
|
||||||
|
psi0, A, LB, trYYT, data_fit)
|
||||||
|
|
||||||
|
#put the gradients in the right places
|
||||||
|
partial_for_likelihood = _compute_partial_for_likelihood(likelihood,
|
||||||
|
het_noise, uncertain_inputs, LB,
|
||||||
|
_LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
|
||||||
|
psi0, psi1, beta,
|
||||||
|
data_fit, num_data, output_dim, trYYT)
|
||||||
|
|
||||||
|
#likelihood.update_gradients(partial_for_likelihood)
|
||||||
|
|
||||||
if uncertain_inputs:
|
if uncertain_inputs:
|
||||||
grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dpsi0':dL_dpsi0, 'dL_dpsi1':dL_dpsi1, 'dL_dpsi2':dL_dpsi2}
|
grad_dict = {'dL_dKmm': dL_dKmm,
|
||||||
kern.update_gradients_variational(mu=X, S=X_variance, Z=Z, **grad_dict)
|
'dL_dpsi0':dL_dpsi0,
|
||||||
|
'dL_dpsi1':dL_dpsi1,
|
||||||
|
'dL_dpsi2':dL_dpsi2,
|
||||||
|
'partial_for_likelihood':partial_for_likelihood}
|
||||||
else:
|
else:
|
||||||
grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dKdiag':dL_dpsi0, 'dL_dKnm':dL_dpsi1}
|
grad_dict = {'dL_dKmm': dL_dKmm,
|
||||||
kern.update_gradients_sparse(X=X, Z=Z, **grad_dict)
|
'dL_dKdiag':dL_dpsi0,
|
||||||
|
'dL_dKnm':dL_dpsi1,
|
||||||
|
'partial_for_likelihood':partial_for_likelihood}
|
||||||
|
|
||||||
#get sufficient things for posterior prediction
|
#get sufficient things for posterior prediction
|
||||||
#TODO: do we really want to do this in the loop?
|
#TODO: do we really want to do this in the loop?
|
||||||
|
|
@ -125,21 +203,33 @@ class VarDTCMissingData(object):
|
||||||
return [Y], [(Y**2).sum()]
|
return [Y], [(Y**2).sum()]
|
||||||
|
|
||||||
def inference(self, kern, X, X_variance, Z, likelihood, Y):
|
def inference(self, kern, X, X_variance, Z, likelihood, Y):
|
||||||
|
"""Inference for normal sparseGP"""
|
||||||
|
uncertain_inputs = False
|
||||||
|
psi0, psi1, psi2 = _compute_psi(kern, X, X_variance, Z, uncertain_inputs)
|
||||||
|
return self._inference(kern, psi0, psi1, psi2, Z, likelihood, Y, uncertain_inputs)
|
||||||
|
|
||||||
|
def inference_latent(self, kern, posterior_variational, Z, likelihood, Y):
|
||||||
|
"""Inference for GPLVM with uncertain inputs"""
|
||||||
|
uncertain_inputs = True
|
||||||
|
psi0, psi1, psi2 = _compute_psi_latent(kern, posterior_variational, Z)
|
||||||
|
return self._inference(kern, psi0, psi1, psi2, Z, likelihood, Y, uncertain_inputs)
|
||||||
|
|
||||||
|
def _inference(self, kern, psi0_all, psi1_all, psi2_all, Z, likelihood, Y, uncertain_inputs):
|
||||||
Ys, traces = self._Y(Y)
|
Ys, traces = self._Y(Y)
|
||||||
beta_all = 1./likelihood.variance
|
beta_all = 1./likelihood.variance
|
||||||
uncertain_inputs = not (X_variance is None)
|
|
||||||
het_noise = beta_all.size != 1
|
het_noise = beta_all.size != 1
|
||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
num_inducing = Z.shape[0]
|
num_inducing = Z.shape[0]
|
||||||
|
|
||||||
dL_dpsi0_all = np.zeros(X.shape[0])
|
dL_dpsi0_all = np.zeros(Y.shape[0])
|
||||||
dL_dpsi1_all = np.zeros((X.shape[0], num_inducing))
|
dL_dpsi1_all = np.zeros((Y.shape[0], num_inducing))
|
||||||
if uncertain_inputs:
|
if uncertain_inputs:
|
||||||
dL_dpsi2_all = np.zeros((X.shape[0], num_inducing, num_inducing))
|
dL_dpsi2_all = np.zeros((Y.shape[0], num_inducing, num_inducing))
|
||||||
|
|
||||||
partial_for_likelihood = 0
|
partial_for_likelihood = 0
|
||||||
LB_all = Cpsi1Vf_all = 0
|
woodbury_vector = np.zeros((num_inducing, Y.shape[1]))
|
||||||
|
woodbury_inv_all = np.zeros((num_inducing, num_inducing, Y.shape[1]))
|
||||||
dL_dKmm = 0
|
dL_dKmm = 0
|
||||||
log_marginal = 0
|
log_marginal = 0
|
||||||
|
|
||||||
|
|
@ -148,11 +238,10 @@ class VarDTCMissingData(object):
|
||||||
Lm = jitchol(Kmm)
|
Lm = jitchol(Kmm)
|
||||||
if uncertain_inputs: LmInv = dtrtri(Lm)
|
if uncertain_inputs: LmInv = dtrtri(Lm)
|
||||||
|
|
||||||
# kernel computations, using BGPLVM notation
|
|
||||||
psi0_all, psi1_all, psi2_all = _compute_psi(kern, X, X_variance, Z, uncertain_inputs)
|
|
||||||
|
|
||||||
VVT_factor_all = np.empty(Y.shape)
|
VVT_factor_all = np.empty(Y.shape)
|
||||||
full_VVT_factor = VVT_factor_all.shape[1] == Y.shape[1]
|
full_VVT_factor = VVT_factor_all.shape[1] == Y.shape[1]
|
||||||
|
if not full_VVT_factor:
|
||||||
|
psi1V = np.dot(Y.T*beta_all, psi1_all).T
|
||||||
|
|
||||||
for y, trYYT, [v, ind] in itertools.izip(Ys, traces, self._subarray_indices):
|
for y, trYYT, [v, ind] in itertools.izip(Ys, traces, self._subarray_indices):
|
||||||
if het_noise: beta = beta_all[ind]
|
if het_noise: beta = beta_all[ind]
|
||||||
|
|
@ -183,10 +272,10 @@ class VarDTCMissingData(object):
|
||||||
LB = jitchol(B)
|
LB = jitchol(B)
|
||||||
|
|
||||||
psi1Vf = psi1.T.dot(VVT_factor)
|
psi1Vf = psi1.T.dot(VVT_factor)
|
||||||
_LBi_Lmi_psi1Vf, Cpsi1Vf = _compute_psi1Vf(Lm, LB, psi1Vf)
|
tmp, _ = dtrtrs(Lm, psi1Vf, lower=1, trans=0)
|
||||||
|
_LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0)
|
||||||
if full_VVT_factor: Cpsi1Vf_all += Cpsi1Vf
|
tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1)
|
||||||
LB_all += LB
|
Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
|
||||||
|
|
||||||
# data fit and derivative of L w.r.t. Kmm
|
# data fit and derivative of L w.r.t. Kmm
|
||||||
delit = tdot(_LBi_Lmi_psi1Vf)
|
delit = tdot(_LBi_Lmi_psi1Vf)
|
||||||
|
|
@ -219,92 +308,67 @@ class VarDTCMissingData(object):
|
||||||
psi0, psi1, beta,
|
psi0, psi1, beta,
|
||||||
data_fit, num_data, output_dim, trYYT)
|
data_fit, num_data, output_dim, trYYT)
|
||||||
|
|
||||||
# gradients:
|
if full_VVT_factor: woodbury_vector[:, ind] = Cpsi1Vf
|
||||||
likelihood.update_gradients(partial_for_likelihood)
|
|
||||||
|
|
||||||
if uncertain_inputs:
|
|
||||||
grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dpsi0':dL_dpsi0_all, 'dL_dpsi1':dL_dpsi1_all, 'dL_dpsi2':dL_dpsi2_all}
|
|
||||||
kern.update_gradients_variational(mu=X, S=X_variance, Z=Z, **grad_dict)
|
|
||||||
else:
|
else:
|
||||||
grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dKdiag':dL_dpsi0_all, 'dL_dKnm':dL_dpsi1_all}
|
print 'foobar'
|
||||||
kern.update_gradients_sparse(X=X, Z=Z, **grad_dict)
|
tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
|
||||||
|
tmp, _ = dpotrs(LB, tmp, lower=1)
|
||||||
|
woodbury_vector[:, ind] = dtrtrs(Lm, tmp, lower=1, trans=1)[0]
|
||||||
|
|
||||||
|
#import ipdb;ipdb.set_trace()
|
||||||
|
Bi, _ = dpotri(LB, lower=1)
|
||||||
|
symmetrify(Bi)
|
||||||
|
Bi = -dpotri(LB, lower=1)[0]
|
||||||
|
from ...util import diag
|
||||||
|
diag.add(Bi, 1)
|
||||||
|
woodbury_inv_all[:, :, ind] = backsub_both_sides(Lm, Bi)[:,:,None]
|
||||||
|
|
||||||
|
# gradients:
|
||||||
|
if uncertain_inputs:
|
||||||
|
grad_dict = {'dL_dKmm': dL_dKmm,
|
||||||
|
'dL_dpsi0':dL_dpsi0_all,
|
||||||
|
'dL_dpsi1':dL_dpsi1_all,
|
||||||
|
'dL_dpsi2':dL_dpsi2_all,
|
||||||
|
'partial_for_likelihood':partial_for_likelihood}
|
||||||
|
else:
|
||||||
|
grad_dict = {'dL_dKmm': dL_dKmm,
|
||||||
|
'dL_dKdiag':dL_dpsi0_all,
|
||||||
|
'dL_dKnm':dL_dpsi1_all,
|
||||||
|
'partial_for_likelihood':partial_for_likelihood}
|
||||||
|
|
||||||
#get sufficient things for posterior prediction
|
#get sufficient things for posterior prediction
|
||||||
#TODO: do we really want to do this in the loop?
|
#TODO: do we really want to do this in the loop?
|
||||||
if full_VVT_factor:
|
#if not full_VVT_factor:
|
||||||
woodbury_vector = Cpsi1Vf_all # == Cpsi1V
|
# print 'foobar'
|
||||||
else:
|
# psi1V = np.dot(Y.T*beta_all, psi1_all).T
|
||||||
print 'foobar'
|
# tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
|
||||||
psi1V = np.dot(Y.T*beta_all, psi1_all).T
|
# tmp, _ = dpotrs(LB_all, tmp, lower=1)
|
||||||
tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
|
# woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
|
||||||
tmp, _ = dpotrs(LB_all, tmp, lower=1)
|
#import ipdb;ipdb.set_trace()
|
||||||
woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
|
#Bi, _ = dpotri(LB_all, lower=1)
|
||||||
|
#symmetrify(Bi)
|
||||||
|
#Bi = -dpotri(LB_all, lower=1)[0]
|
||||||
|
#from ...util import diag
|
||||||
|
#diag.add(Bi, 1)
|
||||||
|
|
||||||
Bi, _ = dpotri(LB_all, lower=1)
|
#woodbury_inv = backsub_both_sides(Lm, Bi)
|
||||||
symmetrify(Bi)
|
|
||||||
Bi = -dpotri(LB_all, lower=1)[0]
|
|
||||||
from ...util import diag
|
|
||||||
diag.add(Bi, 1)
|
|
||||||
|
|
||||||
woodbury_inv = backsub_both_sides(Lm, Bi)
|
post = Posterior(woodbury_inv=woodbury_inv_all, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
|
||||||
post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
|
|
||||||
|
|
||||||
return post, log_marginal, grad_dict
|
return post, log_marginal, grad_dict
|
||||||
|
|
||||||
|
|
||||||
def _compute_A(num_data, uncertain_inputs, beta, het_noise, psi1, psi2, Lm):
|
def _compute_psi(kern, X, X_variance, Z):
|
||||||
# The rather complex computations of A
|
|
||||||
if uncertain_inputs:
|
|
||||||
if het_noise:
|
|
||||||
psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0)
|
|
||||||
else:
|
|
||||||
psi2_beta = psi2.sum(0) * beta
|
|
||||||
#if 0:
|
|
||||||
# evals, evecs = linalg.eigh(psi2_beta)
|
|
||||||
# clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
|
|
||||||
# if not np.array_equal(evals, clipped_evals):
|
|
||||||
# pass # print evals
|
|
||||||
# tmp = evecs * np.sqrt(clipped_evals)
|
|
||||||
# tmp = tmp.T
|
|
||||||
# no backsubstitution because of bound explosion on tr(A) if not...
|
|
||||||
LmInv = dtrtri(Lm)
|
|
||||||
A = LmInv.dot(psi2_beta.dot(LmInv.T))
|
|
||||||
else:
|
|
||||||
if het_noise:
|
|
||||||
tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
|
|
||||||
else:
|
|
||||||
tmp = psi1 * (np.sqrt(beta))
|
|
||||||
tmp, _ = dtrtrs(Lm, tmp.T, lower=1)
|
|
||||||
A = tdot(tmp) #print A.sum()
|
|
||||||
return A
|
|
||||||
|
|
||||||
|
|
||||||
def _compute_psi(kern, X, X_variance, Z, uncertain_inputs):
|
|
||||||
if uncertain_inputs:
|
|
||||||
psi0 = kern.psi0(Z, X, X_variance)
|
|
||||||
psi1 = kern.psi1(Z, X, X_variance)
|
|
||||||
psi2 = kern.psi2(Z, X, X_variance)
|
|
||||||
else:
|
|
||||||
psi0 = kern.Kdiag(X)
|
psi0 = kern.Kdiag(X)
|
||||||
psi1 = kern.K(X, Z)
|
psi1 = kern.K(X, Z)
|
||||||
psi2 = None
|
psi2 = None
|
||||||
return psi0, psi1, psi2
|
return psi0, psi1, psi2
|
||||||
|
|
||||||
def _compute_Kmm(kern, X, X_variance, Z, uncertain_inputs):
|
def _compute_psi_latent(kern, posterior_variational, Z):
|
||||||
Kmm = kern.K(Z)
|
psi0 = kern.psi0(Z, posterior_variational)
|
||||||
psi0, psi1, psi2 = _compute_psi(kern, X, X_variance, Z, uncertain_inputs)
|
psi1 = kern.psi1(Z, posterior_variational)
|
||||||
return Kmm, psi0, psi1, psi2
|
psi2 = kern.psi2(Z, posterior_variational)
|
||||||
|
return psi0, psi1, psi2
|
||||||
def _compute_dL_dKmm(num_inducing, output_dim, Lm, B, LB, _LBi_Lmi_psi1Vf):
|
|
||||||
# Compute dL_dKmm
|
|
||||||
delit = tdot(_LBi_Lmi_psi1Vf)
|
|
||||||
data_fit = np.trace(delit)
|
|
||||||
DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit)
|
|
||||||
delit = -0.5 * DBi_plus_BiPBi
|
|
||||||
delit += -0.5 * B * output_dim
|
|
||||||
delit += output_dim * np.eye(num_inducing)
|
|
||||||
dL_dKmm = backsub_both_sides(Lm, delit)
|
|
||||||
return DBi_plus_BiPBi, data_fit, dL_dKmm
|
|
||||||
|
|
||||||
def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, psi1, het_noise, uncertain_inputs):
|
def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, psi1, het_noise, uncertain_inputs):
|
||||||
dL_dpsi0 = -0.5 * output_dim * (beta * np.ones([num_data, 1])).flatten()
|
dL_dpsi0 = -0.5 * output_dim * (beta * np.ones([num_data, 1])).flatten()
|
||||||
|
|
@ -329,15 +393,6 @@ def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, C
|
||||||
return dL_dpsi0, dL_dpsi1, dL_dpsi2
|
return dL_dpsi0, dL_dpsi1, dL_dpsi2
|
||||||
|
|
||||||
|
|
||||||
def _compute_psi1Vf(Lm, LB, psi1Vf):
|
|
||||||
# back substutue C into psi1Vf
|
|
||||||
tmp, _ = dtrtrs(Lm, psi1Vf, lower=1, trans=0)
|
|
||||||
_LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0)
|
|
||||||
tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1)
|
|
||||||
Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
|
|
||||||
return _LBi_Lmi_psi1Vf, Cpsi1Vf
|
|
||||||
|
|
||||||
|
|
||||||
def _compute_partial_for_likelihood(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, psi0, psi1, beta, data_fit, num_data, output_dim, trYYT):
|
def _compute_partial_for_likelihood(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, psi0, psi1, beta, data_fit, num_data, output_dim, trYYT):
|
||||||
# the partial derivative vector for the likelihood
|
# the partial derivative vector for the likelihood
|
||||||
if likelihood.size == 0:
|
if likelihood.size == 0:
|
||||||
|
|
@ -379,35 +434,3 @@ def _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het
|
||||||
lik_4 = 0.5 * data_fit
|
lik_4 = 0.5 * data_fit
|
||||||
log_marginal = lik_1 + lik_2 + lik_3 + lik_4
|
log_marginal = lik_1 + lik_2 + lik_3 + lik_4
|
||||||
return log_marginal
|
return log_marginal
|
||||||
|
|
||||||
def _do_inference_on(kern, X, X_variance, Z, likelihood, uncertain_inputs, output_dim, beta, VVT_factor, trYYT):
|
|
||||||
het_noise = beta.size < 1
|
|
||||||
num_inducing = Z.shape[0]
|
|
||||||
num_data = X.shape[0]
|
|
||||||
# kernel computations, using BGPLVM notation
|
|
||||||
Kmm, psi0, psi1, psi2 = _compute_Kmm(kern, X, X_variance, Z, uncertain_inputs)
|
|
||||||
#factor Kmm # TODO: cache?
|
|
||||||
Lm = jitchol(Kmm)
|
|
||||||
A = _compute_A(num_data, uncertain_inputs, beta, het_noise, psi1, psi2, Lm)
|
|
||||||
# factor B
|
|
||||||
B = np.eye(num_inducing) + A
|
|
||||||
LB = jitchol(B)
|
|
||||||
psi1Vf = np.dot(psi1.T, VVT_factor)
|
|
||||||
_LBi_Lmi_psi1Vf, Cpsi1Vf = _compute_psi1Vf(Lm, LB, psi1Vf)
|
|
||||||
# data fit and derivative of L w.r.t. Kmm
|
|
||||||
DBi_plus_BiPBi, data_fit, dL_dKmm = _compute_dL_dKmm(num_inducing, output_dim,
|
|
||||||
Lm, B, LB, _LBi_Lmi_psi1Vf)
|
|
||||||
# derivatives of L w.r.t. psi
|
|
||||||
dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm,
|
|
||||||
VVT_factor, Cpsi1Vf, DBi_plus_BiPBi,
|
|
||||||
psi1, het_noise, uncertain_inputs)
|
|
||||||
# log marginal likelihood
|
|
||||||
log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise,
|
|
||||||
psi0, A, LB, trYYT, data_fit)
|
|
||||||
#put the gradients in the right places
|
|
||||||
partial_for_likelihood = _compute_partial_for_likelihood(likelihood,
|
|
||||||
het_noise, uncertain_inputs, LB,
|
|
||||||
_LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
|
|
||||||
psi0, psi1, beta,
|
|
||||||
data_fit, num_data, output_dim, trYYT)
|
|
||||||
return dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Cpsi1Vf, psi1, Lm, LB, log_marginal, Kmm, partial_for_likelihood
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,34 @@
|
||||||
# Copyright (c) 2012, 2013 GPy authors (see AUTHORS.txt).
|
from _src.rbf import RBF
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
from _src.white import White
|
||||||
|
from _src.kern import Kern
|
||||||
from constructors import *
|
from _src.linear import Linear
|
||||||
try:
|
from _src.bias import Bias
|
||||||
from constructors import rbf_sympy, sympykern # these depend on sympy
|
from _src.brownian import Brownian
|
||||||
except:
|
from _src.stationary import Exponential, Matern32, Matern52, ExpQuad
|
||||||
pass
|
#import coregionalize
|
||||||
from kern import *
|
#import exponential
|
||||||
|
#import eq_ode1
|
||||||
|
#import finite_dimensional
|
||||||
|
#import fixed
|
||||||
|
#import gibbs
|
||||||
|
#import hetero
|
||||||
|
#import hierarchical
|
||||||
|
#import independent_outputs
|
||||||
|
#import linear
|
||||||
|
#import Matern32
|
||||||
|
#import Matern52
|
||||||
|
#import mlp
|
||||||
|
#import ODE_1
|
||||||
|
#import periodic_exponential
|
||||||
|
#import periodic_Matern32
|
||||||
|
#import periodic_Matern52
|
||||||
|
#import poly
|
||||||
|
#import prod_orthogonal
|
||||||
|
#import prod
|
||||||
|
#import rational_quadratic
|
||||||
|
#import rbfcos
|
||||||
|
#import rbf
|
||||||
|
#import rbf_inv
|
||||||
|
#import spline
|
||||||
|
#import symmetric
|
||||||
|
#import white
|
||||||
|
|
|
||||||
0
GPy/kern/_src/__init__.py
Normal file
0
GPy/kern/_src/__init__.py
Normal file
215
GPy/kern/_src/add.py
Normal file
215
GPy/kern/_src/add.py
Normal file
|
|
@ -0,0 +1,215 @@
|
||||||
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import numpy as np
|
||||||
|
import itertools
|
||||||
|
from linear import Linear
|
||||||
|
from ...core.parameterization import Parameterized
|
||||||
|
from ...core.parameterization.param import Param
|
||||||
|
from kern import Kern
|
||||||
|
|
||||||
|
class Add(Kern):
|
||||||
|
def __init__(self, subkerns, tensor):
|
||||||
|
assert all([isinstance(k, Kern) for k in subkerns])
|
||||||
|
if tensor:
|
||||||
|
input_dim = sum([k.input_dim for k in subkerns])
|
||||||
|
self.input_slices = []
|
||||||
|
n = 0
|
||||||
|
for k in subkerns:
|
||||||
|
self.input_slices.append(slice(n, n+k.input_dim))
|
||||||
|
n += k.input_dim
|
||||||
|
else:
|
||||||
|
assert all([k.input_dim == subkerns[0].input_dim for k in subkerns])
|
||||||
|
input_dim = subkerns[0].input_dim
|
||||||
|
self.input_slices = [slice(None) for k in subkerns]
|
||||||
|
super(Add, self).__init__(input_dim, 'add')
|
||||||
|
self.add_parameters(*subkerns)
|
||||||
|
|
||||||
|
|
||||||
|
def K(self, X, X2=None):
|
||||||
|
"""
|
||||||
|
Compute the kernel function.
|
||||||
|
|
||||||
|
:param X: the first set of inputs to the kernel
|
||||||
|
:param X2: (optional) the second set of arguments to the kernel. If X2
|
||||||
|
is None, this is passed throgh to the 'part' object, which
|
||||||
|
handLes this as X2 == X.
|
||||||
|
"""
|
||||||
|
assert X.shape[1] == self.input_dim
|
||||||
|
if X2 is None:
|
||||||
|
return sum([p.K(X[:, i_s], None) for p, i_s in zip(self._parameters_, self.input_slices)])
|
||||||
|
else:
|
||||||
|
return sum([p.K(X[:, i_s], X2[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)])
|
||||||
|
|
||||||
|
def update_gradients_full(self, dL_dK, X):
|
||||||
|
[p.update_gradients_full(dL_dK, X[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||||
|
|
||||||
|
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
||||||
|
[p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X[:,i_s], Z[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||||
|
|
||||||
|
def gradients_X(self, dL_dK, X, X2=None):
|
||||||
|
"""Compute the gradient of the objective function with respect to X.
|
||||||
|
|
||||||
|
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
|
||||||
|
:type dL_dK: np.ndarray (num_samples x num_inducing)
|
||||||
|
:param X: Observed data inputs
|
||||||
|
:type X: np.ndarray (num_samples x input_dim)
|
||||||
|
:param X2: Observed data inputs (optional, defaults to X)
|
||||||
|
:type X2: np.ndarray (num_inducing x input_dim)"""
|
||||||
|
|
||||||
|
target = np.zeros_like(X)
|
||||||
|
if X2 is None:
|
||||||
|
[np.add(target[:,i_s], p.gradients_X(dL_dK, X[:, i_s], None), target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||||
|
else:
|
||||||
|
[np.add(target[:,i_s], p.gradients_X(dL_dK, X[:, i_s], X2[:,i_s]), target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||||
|
return target
|
||||||
|
|
||||||
|
def Kdiag(self, X):
|
||||||
|
assert X.shape[1] == self.input_dim
|
||||||
|
return sum([p.Kdiag(X[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)])
|
||||||
|
|
||||||
|
|
||||||
|
def psi0(self, Z, mu, S):
|
||||||
|
return np.sum([p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)],0)
|
||||||
|
|
||||||
|
def psi1(self, Z, mu, S):
|
||||||
|
return np.sum([p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0)
|
||||||
|
|
||||||
|
def psi2(self, Z, mu, S):
|
||||||
|
psi2 = np.sum([p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0)
|
||||||
|
|
||||||
|
# compute the "cross" terms
|
||||||
|
from white import White
|
||||||
|
from rbf import RBF
|
||||||
|
#from rbf_inv import RBFInv
|
||||||
|
#from bias import Bias
|
||||||
|
from linear import Linear
|
||||||
|
#ffrom fixed import Fixed
|
||||||
|
|
||||||
|
for (p1, i1), (p2, i2) in itertools.combinations(itertools.izip(self._parameters_, self.input_slices), 2):
|
||||||
|
# white doesn;t combine with anything
|
||||||
|
if isinstance(p1, White) or isinstance(p2, White):
|
||||||
|
pass
|
||||||
|
# rbf X bias
|
||||||
|
#elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
|
||||||
|
elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)):
|
||||||
|
tmp = p2.psi1(Z[:,i2], mu[:,i2], S[:,i2])
|
||||||
|
psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
|
||||||
|
#elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
|
||||||
|
elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)):
|
||||||
|
tmp = p1.psi1(Z[:,i1], mu[:,i1], S[:,i1])
|
||||||
|
psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
|
||||||
|
else:
|
||||||
|
raise NotImplementedError, "psi2 cannot be computed for this kernel"
|
||||||
|
return psi2
|
||||||
|
|
||||||
|
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||||
|
from white import White
|
||||||
|
from rbf import RBF
|
||||||
|
#from rbf_inv import RBFInv
|
||||||
|
#from bias import Bias
|
||||||
|
from linear import Linear
|
||||||
|
#ffrom fixed import Fixed
|
||||||
|
|
||||||
|
for p1, is1 in zip(self._parameters_, self.input_slices):
|
||||||
|
|
||||||
|
#compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2!
|
||||||
|
eff_dL_dpsi1 = dL_dpsi1.copy()
|
||||||
|
for p2, is2 in zip(self._parameters_, self.input_slices):
|
||||||
|
if p2 is p1:
|
||||||
|
continue
|
||||||
|
if isinstance(p2, White):
|
||||||
|
continue
|
||||||
|
elif isinstance(p2, Bias):
|
||||||
|
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||||
|
else:
|
||||||
|
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2.
|
||||||
|
|
||||||
|
|
||||||
|
p1.update_gradients_variational(dL_dKmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1])
|
||||||
|
|
||||||
|
|
||||||
|
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||||
|
from white import white
|
||||||
|
from rbf import rbf
|
||||||
|
#from rbf_inv import rbfinv
|
||||||
|
#from bias import bias
|
||||||
|
from linear import linear
|
||||||
|
#ffrom fixed import fixed
|
||||||
|
|
||||||
|
target = np.zeros(Z.shape)
|
||||||
|
for p1, is1 in zip(self._parameters_, self.input_slices):
|
||||||
|
|
||||||
|
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
|
||||||
|
eff_dL_dpsi1 = dL_dpsi1.copy()
|
||||||
|
for p2, is2 in zip(self._parameters_, self.input_slices):
|
||||||
|
if p2 is p1:
|
||||||
|
continue
|
||||||
|
if isinstance(p2, white):
|
||||||
|
continue
|
||||||
|
elif isinstance(p2, bias):
|
||||||
|
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||||
|
else:
|
||||||
|
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2.
|
||||||
|
|
||||||
|
|
||||||
|
target += p1.gradients_z_variational(dL_dkmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1])
|
||||||
|
return target
|
||||||
|
|
||||||
|
def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||||
|
from white import white
|
||||||
|
from rbf import rbf
|
||||||
|
#from rbf_inv import rbfinv
|
||||||
|
#from bias import bias
|
||||||
|
from linear import linear
|
||||||
|
#ffrom fixed import fixed
|
||||||
|
|
||||||
|
target_mu = np.zeros(mu.shape)
|
||||||
|
target_S = np.zeros(S.shape)
|
||||||
|
for p1, is1 in zip(self._parameters_, self.input_slices):
|
||||||
|
|
||||||
|
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
|
||||||
|
eff_dL_dpsi1 = dL_dpsi1.copy()
|
||||||
|
for p2, is2 in zip(self._parameters_, self.input_slices):
|
||||||
|
if p2 is p1:
|
||||||
|
continue
|
||||||
|
if isinstance(p2, white):
|
||||||
|
continue
|
||||||
|
elif isinstance(p2, bias):
|
||||||
|
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||||
|
else:
|
||||||
|
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2.
|
||||||
|
|
||||||
|
|
||||||
|
a, b = p1.gradients_muS_variational(dL_dkmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1])
|
||||||
|
target_mu += a
|
||||||
|
target_S += b
|
||||||
|
return target_mu, target_S
|
||||||
|
|
||||||
|
def plot(self, *args, **kwargs):
|
||||||
|
"""
|
||||||
|
See GPy.plotting.matplot_dep.plot
|
||||||
|
"""
|
||||||
|
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||||
|
from ..plotting.matplot_dep import kernel_plots
|
||||||
|
kernel_plots.plot(self,*args)
|
||||||
|
|
||||||
|
def _getstate(self):
|
||||||
|
"""
|
||||||
|
Get the current state of the class,
|
||||||
|
here just all the indices, rest can get recomputed
|
||||||
|
"""
|
||||||
|
return Parameterized._getstate(self) + [#self._parameters_,
|
||||||
|
self.input_dim,
|
||||||
|
self.input_slices,
|
||||||
|
self._param_slices_
|
||||||
|
]
|
||||||
|
|
||||||
|
def _setstate(self, state):
|
||||||
|
self._param_slices_ = state.pop()
|
||||||
|
self.input_slices = state.pop()
|
||||||
|
self.input_dim = state.pop()
|
||||||
|
Parameterized._setstate(self, state)
|
||||||
|
|
||||||
|
|
||||||
62
GPy/kern/_src/bias.py
Normal file
62
GPy/kern/_src/bias.py
Normal file
|
|
@ -0,0 +1,62 @@
|
||||||
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
|
from kern import Kern
|
||||||
|
from ...core.parameterization import Param
|
||||||
|
from ...core.parameterization.transformations import Logexp
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
class Bias(Kern):
|
||||||
|
def __init__(self,input_dim,variance=1.,name=None):
|
||||||
|
super(Bias, self).__init__(input_dim, name)
|
||||||
|
self.variance = Param("variance", variance, Logexp())
|
||||||
|
self.add_parameter(self.variance)
|
||||||
|
|
||||||
|
def K(self, X, X2=None):
|
||||||
|
shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0])
|
||||||
|
ret = np.empty(shape, dtype=np.float64)
|
||||||
|
ret[:] = self.variance
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def Kdiag(self,X):
|
||||||
|
ret = np.empty((X.shape[0],), dtype=np.float64)
|
||||||
|
ret[:] = self.variance
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
|
self.variance.gradient = dL_dK.sum()
|
||||||
|
|
||||||
|
def update_gradients_diag(self, dL_dKdiag, X):
|
||||||
|
self.variance.gradient = dL_dK.sum()
|
||||||
|
|
||||||
|
def gradients_X(self, dL_dK,X, X2, target):
|
||||||
|
return np.zeros(X.shape)
|
||||||
|
|
||||||
|
def gradients_X_diag(self,dL_dKdiag,X,target):
|
||||||
|
return np.zeros(X.shape)
|
||||||
|
|
||||||
|
|
||||||
|
#---------------------------------------#
|
||||||
|
# PSI statistics #
|
||||||
|
#---------------------------------------#
|
||||||
|
|
||||||
|
def psi0(self, Z, mu, S):
|
||||||
|
return self.Kdiag(mu)
|
||||||
|
|
||||||
|
def psi1(self, Z, mu, S, target):
|
||||||
|
return self.K(mu, S)
|
||||||
|
|
||||||
|
def psi2(self, Z, mu, S, target):
|
||||||
|
ret = np.empty((mu.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64)
|
||||||
|
ret[:] = self.variance**2
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||||
|
self.variance.gradient = dL_dKmm.sum() + dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum()
|
||||||
|
|
||||||
|
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||||
|
return np.zeros(Z.shape)
|
||||||
|
|
||||||
|
def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||||
|
return np.zeros(mu.shape), np.zeros(S.shape)
|
||||||
50
GPy/kern/_src/brownian.py
Normal file
50
GPy/kern/_src/brownian.py
Normal file
|
|
@ -0,0 +1,50 @@
|
||||||
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
from kern import Kern
|
||||||
|
from ...core.parameterization import Param
|
||||||
|
from ...core.parameterization.transformations import Logexp
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
class Brownian(Kern):
|
||||||
|
"""
|
||||||
|
Brownian motion in 1D only.
|
||||||
|
|
||||||
|
Negative times are treated as a separate (backwards!) Brownian motion.
|
||||||
|
|
||||||
|
:param input_dim: the number of input dimensions
|
||||||
|
:type input_dim: int
|
||||||
|
:param variance:
|
||||||
|
:type variance: float
|
||||||
|
"""
|
||||||
|
def __init__(self, input_dim=1, variance=1., name='Brownian'):
|
||||||
|
assert input_dim==1, "Brownian motion in 1D only"
|
||||||
|
super(Brownian, self).__init__(input_dim, name)
|
||||||
|
|
||||||
|
self.variance = Param('variance', variance, Logexp())
|
||||||
|
self.add_parameters(self.variance)
|
||||||
|
|
||||||
|
def K(self,X,X2=None):
|
||||||
|
if X2 is None:
|
||||||
|
X2 = X
|
||||||
|
return self.variance*np.where(np.sign(X)==np.sign(X2.T),np.fmin(np.abs(X),np.abs(X2.T)), 0.)
|
||||||
|
|
||||||
|
def Kdiag(self,X):
|
||||||
|
return self.variance*np.abs(X.flatten())
|
||||||
|
|
||||||
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
|
if X2 is None:
|
||||||
|
X2 = X
|
||||||
|
self.variance.gradient = np.sum(dL_dK * np.where(np.sign(X)==np.sign(X2.T),np.fmin(np.abs(X),np.abs(X2.T)), 0.))
|
||||||
|
|
||||||
|
#def update_gradients_diag(self, dL_dKdiag, X):
|
||||||
|
#self.variance.gradient = np.dot(np.abs(X.flatten()), dL_dKdiag)
|
||||||
|
|
||||||
|
#def gradients_X(self, dL_dK, X, X2=None):
|
||||||
|
#if X2 is None:
|
||||||
|
#return np.sum(self.variance*dL_dK*np.abs(X),1)[:,None]
|
||||||
|
#else:
|
||||||
|
#return np.sum(np.where(np.logical_and(np.abs(X)<np.abs(X2.T), np.sign(X)==np.sign(X2)), self.variance*dL_dK,0.),1)[:,None]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,12 +1,13 @@
|
||||||
# Copyright (c) 2012, James Hensman and Ricardo Andrade
|
# Copyright (c) 2012, James Hensman and Ricardo Andrade
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
from kernpart import Kernpart
|
from kern import Kern
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy import weave
|
from scipy import weave
|
||||||
from ...core.parameterization import Param
|
from ...core.parameterization import Param
|
||||||
|
from ...core.parameterization.transformations import Logexp
|
||||||
|
|
||||||
class Coregionalize(Kernpart):
|
class Coregionalize(Kern):
|
||||||
"""
|
"""
|
||||||
Covariance function for intrinsic/linear coregionalization models
|
Covariance function for intrinsic/linear coregionalization models
|
||||||
|
|
||||||
|
|
@ -37,7 +38,7 @@ class Coregionalize(Kernpart):
|
||||||
super(Coregionalize, self).__init__(input_dim=1, name=name)
|
super(Coregionalize, self).__init__(input_dim=1, name=name)
|
||||||
self.output_dim = output_dim
|
self.output_dim = output_dim
|
||||||
self.rank = rank
|
self.rank = rank
|
||||||
if self.rank>output_dim-1:
|
if self.rank>output_dim:
|
||||||
print("Warning: Unusual choice of rank, it should normally be less than the output_dim.")
|
print("Warning: Unusual choice of rank, it should normally be less than the output_dim.")
|
||||||
if W is None:
|
if W is None:
|
||||||
W = 0.5*np.random.randn(self.output_dim, self.rank)/np.sqrt(self.rank)
|
W = 0.5*np.random.randn(self.output_dim, self.rank)/np.sqrt(self.rank)
|
||||||
|
|
@ -48,7 +49,7 @@ class Coregionalize(Kernpart):
|
||||||
kappa = 0.5*np.ones(self.output_dim)
|
kappa = 0.5*np.ones(self.output_dim)
|
||||||
else:
|
else:
|
||||||
assert kappa.shape==(self.output_dim, )
|
assert kappa.shape==(self.output_dim, )
|
||||||
self.kappa = Param('kappa', kappa)
|
self.kappa = Param('kappa', kappa, Logexp())
|
||||||
self.add_parameters(self.W, self.kappa)
|
self.add_parameters(self.W, self.kappa)
|
||||||
self.parameters_changed()
|
self.parameters_changed()
|
||||||
|
|
||||||
|
|
@ -56,8 +57,8 @@ class Coregionalize(Kernpart):
|
||||||
def parameters_changed(self):
|
def parameters_changed(self):
|
||||||
self.B = np.dot(self.W, self.W.T) + np.diag(self.kappa)
|
self.B = np.dot(self.W, self.W.T) + np.diag(self.kappa)
|
||||||
|
|
||||||
def K(self,index,index2,target):
|
def K(self, X, X2=None):
|
||||||
index = np.asarray(index,dtype=np.int)
|
index = np.asarray(X, dtype=np.int)
|
||||||
|
|
||||||
#here's the old code (numpy)
|
#here's the old code (numpy)
|
||||||
#if index2 is None:
|
#if index2 is None:
|
||||||
|
|
@ -69,41 +70,45 @@ class Coregionalize(Kernpart):
|
||||||
#ii, jj = ii.T, jj.T
|
#ii, jj = ii.T, jj.T
|
||||||
#false_target += self.B[ii, jj]
|
#false_target += self.B[ii, jj]
|
||||||
|
|
||||||
if index2 is None:
|
|
||||||
|
if X2 is None:
|
||||||
|
target = np.empty((X.shape[0], X.shape[0]), dtype=np.float64)
|
||||||
code="""
|
code="""
|
||||||
for(int i=0;i<N; i++){
|
for(int i=0;i<N; i++){
|
||||||
target[i+i*N] += B[index[i]+output_dim*index[i]];
|
target[i+i*N] = B[index[i]+output_dim*index[i]];
|
||||||
for(int j=0; j<i; j++){
|
for(int j=0; j<i; j++){
|
||||||
target[j+i*N] += B[index[i]+output_dim*index[j]];
|
target[j+i*N] = B[index[i]+output_dim*index[j]];
|
||||||
target[i+j*N] += target[j+i*N];
|
target[i+j*N] = target[j+i*N];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
N, B, output_dim = index.size, self.B, self.output_dim
|
N, B, output_dim = index.size, self.B, self.output_dim
|
||||||
weave.inline(code, ['target', 'index', 'N', 'B', 'output_dim'])
|
weave.inline(code, ['target', 'index', 'N', 'B', 'output_dim'])
|
||||||
else:
|
else:
|
||||||
index2 = np.asarray(index2,dtype=np.int)
|
index2 = np.asarray(X2, dtype=np.int)
|
||||||
|
target = np.empty((X.shape[0], X2.shape[0]), dtype=np.float64)
|
||||||
code="""
|
code="""
|
||||||
for(int i=0;i<num_inducing; i++){
|
for(int i=0;i<num_inducing; i++){
|
||||||
for(int j=0; j<N; j++){
|
for(int j=0; j<N; j++){
|
||||||
target[i+j*num_inducing] += B[output_dim*index[j]+index2[i]];
|
target[i+j*num_inducing] = B[output_dim*index[j]+index2[i]];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
N, num_inducing, B, output_dim = index.size, index2.size, self.B, self.output_dim
|
N, num_inducing, B, output_dim = index.size, index2.size, self.B, self.output_dim
|
||||||
weave.inline(code, ['target', 'index', 'index2', 'N', 'num_inducing', 'B', 'output_dim'])
|
weave.inline(code, ['target', 'index', 'index2', 'N', 'num_inducing', 'B', 'output_dim'])
|
||||||
|
return target
|
||||||
|
|
||||||
|
|
||||||
def Kdiag(self,index,target):
|
def Kdiag(self, X):
|
||||||
target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
|
return np.diag(self.B)[np.asarray(X, dtype=np.int).flatten()]
|
||||||
|
|
||||||
def update_gradients_full(self,dL_dK, index, index2=None):
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
index = np.asarray(index,dtype=np.int)
|
index = np.asarray(X, dtype=np.int)
|
||||||
dL_dK_small = np.zeros_like(self.B)
|
dL_dK_small = np.zeros_like(self.B)
|
||||||
if index2 is None:
|
if X2 is None:
|
||||||
index2 = index
|
index2 = index
|
||||||
else:
|
else:
|
||||||
index2 = np.asarray(index2,dtype=np.int)
|
index2 = np.asarray(X2, dtype=np.int)
|
||||||
|
|
||||||
code="""
|
code="""
|
||||||
for(int i=0; i<num_inducing; i++){
|
for(int i=0; i<num_inducing; i++){
|
||||||
|
|
@ -122,17 +127,15 @@ class Coregionalize(Kernpart):
|
||||||
self.W.gradient = dW
|
self.W.gradient = dW
|
||||||
self.kappa.gradient = dkappa
|
self.kappa.gradient = dkappa
|
||||||
|
|
||||||
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
def update_gradients_diag(self, dL_dKdiag, X):
|
||||||
raise NotImplementedError, "some code below"
|
index = np.asarray(X, dtype=np.int).flatten()
|
||||||
#def dKdiag_dtheta(self,dL_dKdiag,index,target):
|
dL_dKdiag_small = np.array([dL_dKdiag[index==i] for i in xrange(output_dim)])
|
||||||
#index = np.asarray(index,dtype=np.int).flatten()
|
self.W.gradient = 2.*self.W*dL_dKdiag_small[:, None]
|
||||||
#dL_dKdiag_small = np.zeros(self.output_dim)
|
self.kappa.gradient = dL_dKdiag_small
|
||||||
#for i in range(self.output_dim):
|
|
||||||
#dL_dKdiag_small[i] += np.sum(dL_dKdiag[index==i])
|
def gradients_X(self, dL_dK, X, X2=None):
|
||||||
#dW = 2.*self.W*dL_dKdiag_small[:,None]
|
return np.zeros(X.shape)
|
||||||
#dkappa = dL_dKdiag_small
|
|
||||||
#target += np.hstack([dW.flatten(),dkappa])
|
def gradients_X_diag(self, dL_dKdiag, X):
|
||||||
|
return np.zeros(X.shape)
|
||||||
|
|
||||||
def gradients_X(self,dL_dK,X,X2,target):
|
|
||||||
#NOTE In this case, pass is equivalent to returning zero.
|
|
||||||
pass
|
|
||||||
328
GPy/kern/_src/kern.py
Normal file
328
GPy/kern/_src/kern.py
Normal file
|
|
@ -0,0 +1,328 @@
|
||||||
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import numpy as np
|
||||||
|
import itertools
|
||||||
|
from ...core.parameterization import Parameterized
|
||||||
|
from ...core.parameterization.param import Param
|
||||||
|
|
||||||
|
|
||||||
|
class Kern(Parameterized):
|
||||||
|
def __init__(self, input_dim, name, *a, **kw):
|
||||||
|
"""
|
||||||
|
The base class for a kernel: a positive definite function
|
||||||
|
which forms of a covariance function (kernel).
|
||||||
|
|
||||||
|
:param input_dim: the number of input dimensions to the function
|
||||||
|
:type input_dim: int
|
||||||
|
|
||||||
|
Do not instantiate.
|
||||||
|
"""
|
||||||
|
super(Kern, self).__init__(name=name, *a, **kw)
|
||||||
|
self.input_dim = input_dim
|
||||||
|
|
||||||
|
def K(self, X, X2):
|
||||||
|
raise NotImplementedError
|
||||||
|
def Kdiag(self, Xa):
|
||||||
|
raise NotImplementedError
|
||||||
|
def psi0(self,Z,posterior_variational):
|
||||||
|
raise NotImplementedError
|
||||||
|
def psi1(self,Z,posterior_variational):
|
||||||
|
raise NotImplementedError
|
||||||
|
def psi2(self,Z,posterior_variational):
|
||||||
|
raise NotImplementedError
|
||||||
|
def gradients_X(self, dL_dK, X, X2):
|
||||||
|
raise NotImplementedError
|
||||||
|
def gradients_X_diag(self, dL_dK, X):
|
||||||
|
raise NotImplementedError
|
||||||
|
def update_gradients_full(self, dL_dK, X):
|
||||||
|
"""Set the gradients of all parameters when doing full (N) inference."""
|
||||||
|
raise NotImplementedError
|
||||||
|
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
||||||
|
target = np.zeros(self.size)
|
||||||
|
self.update_gradients_diag(dL_dKdiag, X)
|
||||||
|
self._collect_gradient(target)
|
||||||
|
self.update_gradients_full(dL_dKnm, X, Z)
|
||||||
|
self._collect_gradient(target)
|
||||||
|
self.update_gradients_full(dL_dKmm, Z, None)
|
||||||
|
self._collect_gradient(target)
|
||||||
|
self._set_gradient(target)
|
||||||
|
|
||||||
|
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational):
|
||||||
|
"""Set the gradients of all parameters when doing variational (M) inference with uncertain inputs."""
|
||||||
|
raise NotImplementedError
|
||||||
|
def gradients_Z_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
||||||
|
grad = self.gradients_X(dL_dKmm, Z)
|
||||||
|
grad += self.gradients_X(dL_dKnm.T, Z, X)
|
||||||
|
return grad
|
||||||
|
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational):
|
||||||
|
raise NotImplementedError
|
||||||
|
def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def plot_ARD(self, *args):
|
||||||
|
"""If an ARD kernel is present, plot a bar representation using matplotlib
|
||||||
|
|
||||||
|
See GPy.plotting.matplot_dep.plot_ARD
|
||||||
|
"""
|
||||||
|
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||||
|
from ...plotting.matplot_dep import kernel_plots
|
||||||
|
return kernel_plots.plot_ARD(self,*args)
|
||||||
|
|
||||||
|
|
||||||
|
def __add__(self, other):
|
||||||
|
""" Overloading of the '+' operator. for more control, see self.add """
|
||||||
|
return self.add(other)
|
||||||
|
|
||||||
|
def add(self, other, tensor=False):
|
||||||
|
"""
|
||||||
|
Add another kernel to this one.
|
||||||
|
|
||||||
|
If Tensor is False, both kernels are defined on the same _space_. then
|
||||||
|
the created kernel will have the same number of inputs as self and
|
||||||
|
other (which must be the same).
|
||||||
|
|
||||||
|
If Tensor is True, then the dimensions are stacked 'horizontally', so
|
||||||
|
that the resulting kernel has self.input_dim + other.input_dim
|
||||||
|
|
||||||
|
:param other: the other kernel to be added
|
||||||
|
:type other: GPy.kern
|
||||||
|
|
||||||
|
"""
|
||||||
|
assert isinstance(other, Kern), "only kernels can be added to kernels..."
|
||||||
|
from add import Add
|
||||||
|
return Add([self, other], tensor)
|
||||||
|
|
||||||
|
def __call__(self, X, X2=None):
|
||||||
|
return self.K(X, X2)
|
||||||
|
|
||||||
|
def __mul__(self, other):
|
||||||
|
""" Here we overload the '*' operator. See self.prod for more information"""
|
||||||
|
return self.prod(other)
|
||||||
|
|
||||||
|
def __pow__(self, other, tensor=False):
|
||||||
|
"""
|
||||||
|
Shortcut for tensor `prod`.
|
||||||
|
"""
|
||||||
|
return self.prod(other, tensor=True)
|
||||||
|
|
||||||
|
def prod(self, other, tensor=False):
|
||||||
|
"""
|
||||||
|
Multiply two kernels (either on the same space, or on the tensor product of the input space).
|
||||||
|
|
||||||
|
:param other: the other kernel to be added
|
||||||
|
:type other: GPy.kern
|
||||||
|
:param tensor: whether or not to use the tensor space (default is false).
|
||||||
|
:type tensor: bool
|
||||||
|
|
||||||
|
"""
|
||||||
|
assert isinstance(other, Kern), "only kernels can be added to kernels..."
|
||||||
|
from prod import Prod
|
||||||
|
return Prod(self, other, tensor)
|
||||||
|
|
||||||
|
|
||||||
|
from GPy.core.model import Model
|
||||||
|
|
||||||
|
class Kern_check_model(Model):
|
||||||
|
"""This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel."""
|
||||||
|
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||||
|
from GPy.kern import RBF
|
||||||
|
Model.__init__(self, 'kernel_test_model')
|
||||||
|
num_samples = 20
|
||||||
|
num_samples2 = 10
|
||||||
|
if kernel==None:
|
||||||
|
kernel = RBF(1)
|
||||||
|
if X==None:
|
||||||
|
X = np.random.randn(num_samples, kernel.input_dim)
|
||||||
|
if dL_dK==None:
|
||||||
|
if X2==None:
|
||||||
|
dL_dK = np.ones((X.shape[0], X.shape[0]))
|
||||||
|
else:
|
||||||
|
dL_dK = np.ones((X.shape[0], X2.shape[0]))
|
||||||
|
|
||||||
|
self.kernel=kernel
|
||||||
|
self.add_parameter(kernel)
|
||||||
|
self.X = X
|
||||||
|
self.X2 = X2
|
||||||
|
self.dL_dK = dL_dK
|
||||||
|
|
||||||
|
def is_positive_definite(self):
|
||||||
|
v = np.linalg.eig(self.kernel.K(self.X))[0]
|
||||||
|
if any(v<-10*sys.float_info.epsilon):
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def log_likelihood(self):
|
||||||
|
return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum()
|
||||||
|
|
||||||
|
def _log_likelihood_gradients(self):
|
||||||
|
raise NotImplementedError, "This needs to be implemented to use the kern_check_model class."
|
||||||
|
|
||||||
|
class Kern_check_dK_dtheta(Kern_check_model):
|
||||||
|
"""This class allows gradient checks for the gradient of a kernel with respect to parameters. """
|
||||||
|
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||||
|
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
|
||||||
|
|
||||||
|
def _log_likelihood_gradients(self):
|
||||||
|
return self.kernel._param_grad_helper(self.dL_dK, self.X, self.X2)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Kern_check_dKdiag_dtheta(Kern_check_model):
|
||||||
|
"""This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters."""
|
||||||
|
def __init__(self, kernel=None, dL_dK=None, X=None):
|
||||||
|
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
|
||||||
|
if dL_dK==None:
|
||||||
|
self.dL_dK = np.ones((self.X.shape[0]))
|
||||||
|
def parameters_changed(self):
|
||||||
|
self.kernel.update_gradients_full(self.dL_dK, self.X)
|
||||||
|
|
||||||
|
def log_likelihood(self):
|
||||||
|
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
|
||||||
|
|
||||||
|
def _log_likelihood_gradients(self):
|
||||||
|
return self.kernel.dKdiag_dtheta(self.dL_dK, self.X)
|
||||||
|
|
||||||
|
class Kern_check_dK_dX(Kern_check_model):
|
||||||
|
"""This class allows gradient checks for the gradient of a kernel with respect to X. """
|
||||||
|
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||||
|
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
|
||||||
|
self.remove_parameter(kernel)
|
||||||
|
self.X = Param('X', self.X)
|
||||||
|
self.add_parameter(self.X)
|
||||||
|
def _log_likelihood_gradients(self):
|
||||||
|
return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).flatten()
|
||||||
|
|
||||||
|
class Kern_check_dKdiag_dX(Kern_check_dK_dX):
|
||||||
|
"""This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """
|
||||||
|
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||||
|
Kern_check_dK_dX.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
|
||||||
|
if dL_dK==None:
|
||||||
|
self.dL_dK = np.ones((self.X.shape[0]))
|
||||||
|
|
||||||
|
def log_likelihood(self):
|
||||||
|
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
|
||||||
|
|
||||||
|
def _log_likelihood_gradients(self):
|
||||||
|
return self.kernel.dKdiag_dX(self.dL_dK, self.X).flatten()
|
||||||
|
|
||||||
|
def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
|
||||||
|
"""
|
||||||
|
This function runs on kernels to check the correctness of their
|
||||||
|
implementation. It checks that the covariance function is positive definite
|
||||||
|
for a randomly generated data set.
|
||||||
|
|
||||||
|
:param kern: the kernel to be tested.
|
||||||
|
:type kern: GPy.kern.Kernpart
|
||||||
|
:param X: X input values to test the covariance function.
|
||||||
|
:type X: ndarray
|
||||||
|
:param X2: X2 input values to test the covariance function.
|
||||||
|
:type X2: ndarray
|
||||||
|
|
||||||
|
"""
|
||||||
|
pass_checks = True
|
||||||
|
if X==None:
|
||||||
|
X = np.random.randn(10, kern.input_dim)
|
||||||
|
if output_ind is not None:
|
||||||
|
X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0])
|
||||||
|
if X2==None:
|
||||||
|
X2 = np.random.randn(20, kern.input_dim)
|
||||||
|
if output_ind is not None:
|
||||||
|
X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0])
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print("Checking covariance function is positive definite.")
|
||||||
|
result = Kern_check_model(kern, X=X).is_positive_definite()
|
||||||
|
if result and verbose:
|
||||||
|
print("Check passed.")
|
||||||
|
if not result:
|
||||||
|
print("Positive definite check failed for " + kern.name + " covariance function.")
|
||||||
|
pass_checks = False
|
||||||
|
return False
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print("Checking gradients of K(X, X) wrt theta.")
|
||||||
|
result = Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose)
|
||||||
|
if result and verbose:
|
||||||
|
print("Check passed.")
|
||||||
|
if not result:
|
||||||
|
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||||
|
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True)
|
||||||
|
pass_checks = False
|
||||||
|
return False
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print("Checking gradients of K(X, X2) wrt theta.")
|
||||||
|
result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose)
|
||||||
|
if result and verbose:
|
||||||
|
print("Check passed.")
|
||||||
|
if not result:
|
||||||
|
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||||
|
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True)
|
||||||
|
pass_checks = False
|
||||||
|
return False
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print("Checking gradients of Kdiag(X) wrt theta.")
|
||||||
|
result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose)
|
||||||
|
if result and verbose:
|
||||||
|
print("Check passed.")
|
||||||
|
if not result:
|
||||||
|
print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||||
|
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True)
|
||||||
|
pass_checks = False
|
||||||
|
return False
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print("Checking gradients of K(X, X) wrt X.")
|
||||||
|
try:
|
||||||
|
result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose)
|
||||||
|
except NotImplementedError:
|
||||||
|
result=True
|
||||||
|
if verbose:
|
||||||
|
print("gradients_X not implemented for " + kern.name)
|
||||||
|
if result and verbose:
|
||||||
|
print("Check passed.")
|
||||||
|
if not result:
|
||||||
|
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||||
|
Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True)
|
||||||
|
pass_checks = False
|
||||||
|
return False
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print("Checking gradients of K(X, X2) wrt X.")
|
||||||
|
try:
|
||||||
|
result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose)
|
||||||
|
except NotImplementedError:
|
||||||
|
result=True
|
||||||
|
if verbose:
|
||||||
|
print("gradients_X not implemented for " + kern.name)
|
||||||
|
if result and verbose:
|
||||||
|
print("Check passed.")
|
||||||
|
if not result:
|
||||||
|
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||||
|
Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True)
|
||||||
|
pass_checks = False
|
||||||
|
return False
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print("Checking gradients of Kdiag(X) wrt X.")
|
||||||
|
try:
|
||||||
|
result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)
|
||||||
|
except NotImplementedError:
|
||||||
|
result=True
|
||||||
|
if verbose:
|
||||||
|
print("gradients_X not implemented for " + kern.name)
|
||||||
|
if result and verbose:
|
||||||
|
print("Check passed.")
|
||||||
|
if not result:
|
||||||
|
print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||||
|
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True)
|
||||||
|
pass_checks = False
|
||||||
|
return False
|
||||||
|
|
||||||
|
return pass_checks
|
||||||
254
GPy/kern/_src/linear.py
Normal file
254
GPy/kern/_src/linear.py
Normal file
|
|
@ -0,0 +1,254 @@
|
||||||
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from scipy import weave
|
||||||
|
from kern import Kern
|
||||||
|
from ...util.linalg import tdot
|
||||||
|
from ...util.misc import fast_array_equal, param_to_array
|
||||||
|
from ...core.parameterization import Param
|
||||||
|
from ...core.parameterization.transformations import Logexp
|
||||||
|
from ...util.caching import cache_this
|
||||||
|
|
||||||
|
class Linear(Kern):
|
||||||
|
"""
|
||||||
|
Linear kernel
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
|
||||||
|
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i x_iy_i
|
||||||
|
|
||||||
|
:param input_dim: the number of input dimensions
|
||||||
|
:type input_dim: int
|
||||||
|
:param variances: the vector of variances :math:`\sigma^2_i`
|
||||||
|
:type variances: array or list of the appropriate size (or float if there is only one variance parameter)
|
||||||
|
:param ARD: Auto Relevance Determination. If equal to "False", the kernel has only one variance parameter \sigma^2, otherwise there is one variance parameter per dimension.
|
||||||
|
:type ARD: Boolean
|
||||||
|
:rtype: kernel object
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, input_dim, variances=None, ARD=False, name='linear'):
|
||||||
|
super(Linear, self).__init__(input_dim, name)
|
||||||
|
self.ARD = ARD
|
||||||
|
if ARD == False:
|
||||||
|
if variances is not None:
|
||||||
|
variances = np.asarray(variances)
|
||||||
|
assert variances.size == 1, "Only one variance needed for non-ARD kernel"
|
||||||
|
else:
|
||||||
|
variances = np.ones(1)
|
||||||
|
self._Xcache, self._X2cache = np.empty(shape=(2,))
|
||||||
|
else:
|
||||||
|
if variances is not None:
|
||||||
|
variances = np.asarray(variances)
|
||||||
|
assert variances.size == self.input_dim, "bad number of variances, need one ARD variance per input_dim"
|
||||||
|
else:
|
||||||
|
variances = np.ones(self.input_dim)
|
||||||
|
|
||||||
|
self.variances = Param('variances', variances, Logexp())
|
||||||
|
self.add_parameter(self.variances)
|
||||||
|
self.variances.add_observer(self, self._on_changed)
|
||||||
|
|
||||||
|
def _on_changed(self, obj):
|
||||||
|
#TODO: move this to base class? isnt it jst for the caching?
|
||||||
|
self._notify_observers()
|
||||||
|
|
||||||
|
#@cache_this(limit=3, reset_on_self=True)
|
||||||
|
def K(self, X, X2=None):
|
||||||
|
if self.ARD:
|
||||||
|
if X2 is None:
|
||||||
|
return tdot(X*np.sqrt(self.variances))
|
||||||
|
else:
|
||||||
|
rv = np.sqrt(self.variances)
|
||||||
|
return np.dot(X*rv, (X2*rv).T)
|
||||||
|
else:
|
||||||
|
return self._dot_product(X, X2) * self.variances
|
||||||
|
|
||||||
|
#@cache_this(limit=3, reset_on_self=False)
|
||||||
|
def _dot_product(self, X, X2=None):
|
||||||
|
if X2 is None:
|
||||||
|
return tdot(X)
|
||||||
|
else:
|
||||||
|
return np.dot(X, X2.T)
|
||||||
|
|
||||||
|
def Kdiag(self, X):
|
||||||
|
return np.sum(self.variances * np.square(X), -1)
|
||||||
|
|
||||||
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
|
if self.ARD:
|
||||||
|
if X2 is None:
|
||||||
|
self.variances.gradient = np.array([np.sum(dL_dK * tdot(X[:, i:i + 1])) for i in range(self.input_dim)])
|
||||||
|
else:
|
||||||
|
product = X[:, None, :] * X2[None, :, :]
|
||||||
|
self.variances.gradient = (dL_dK[:, :, None] * product).sum(0).sum(0)
|
||||||
|
else:
|
||||||
|
self.variances.gradient = np.sum(self._dot_product(X, X2) * dL_dK)
|
||||||
|
|
||||||
|
def update_gradients_diag(self, dL_dKdiag, X):
|
||||||
|
tmp = dL_dKdiag[:, None] * X ** 2
|
||||||
|
if self.ARD:
|
||||||
|
self.variances.gradient = tmp.sum(0)
|
||||||
|
else:
|
||||||
|
self.variances.gradient = np.atleast_1d(tmp.sum())
|
||||||
|
|
||||||
|
|
||||||
|
def gradients_X(self, dL_dK, X, X2=None):
|
||||||
|
if X2 is None:
|
||||||
|
return 2.*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
|
||||||
|
else:
|
||||||
|
return (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
|
||||||
|
|
||||||
|
def gradients_X_diag(self, dL_dKdiag, X):
|
||||||
|
return 2.*self.variances*dL_dKdiag[:,None]*X
|
||||||
|
|
||||||
|
#---------------------------------------#
|
||||||
|
# PSI statistics #
|
||||||
|
# variational #
|
||||||
|
#---------------------------------------#
|
||||||
|
|
||||||
|
def psi0(self, Z, posterior_variational):
|
||||||
|
return np.sum(self.variances * self._mu2S(posterior_variational), 1)
|
||||||
|
|
||||||
|
def psi1(self, Z, posterior_variational):
|
||||||
|
return self.K(posterior_variational.mean, Z) #the variance, it does nothing
|
||||||
|
|
||||||
|
def psi2(self, Z, posterior_variational):
|
||||||
|
ZA = Z * self.variances
|
||||||
|
ZAinner = self._ZAinner(posterior_variational, Z)
|
||||||
|
return np.dot(ZAinner, ZA.T)
|
||||||
|
|
||||||
|
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, posterior_variational, Z):
|
||||||
|
mu, S = posterior_variational.mean, posterior_variational.variance
|
||||||
|
# psi0:
|
||||||
|
tmp = dL_dpsi0[:, None] * self._mu2S(posterior_variational)
|
||||||
|
if self.ARD: grad = tmp.sum(0)
|
||||||
|
else: grad = np.atleast_1d(tmp.sum())
|
||||||
|
#psi1
|
||||||
|
self.update_gradients_full(dL_dpsi1, mu, Z)
|
||||||
|
grad += self.variances.gradient
|
||||||
|
#psi2
|
||||||
|
tmp = dL_dpsi2[:, :, :, None] * (self._ZAinner(posterior_variational, Z)[:, :, None, :] * (2. * Z)[None, None, :, :])
|
||||||
|
if self.ARD: grad += tmp.sum(0).sum(0).sum(0)
|
||||||
|
else: grad += tmp.sum()
|
||||||
|
#from Kmm
|
||||||
|
self.update_gradients_full(dL_dKmm, Z, None)
|
||||||
|
self.variances.gradient += grad
|
||||||
|
|
||||||
|
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, posterior_variational, Z):
|
||||||
|
# Kmm
|
||||||
|
grad = self.gradients_X(dL_dKmm, Z, None)
|
||||||
|
#psi1
|
||||||
|
grad += self.gradients_X(dL_dpsi1.T, Z, posterior_variational.mean)
|
||||||
|
#psi2
|
||||||
|
self._weave_dpsi2_dZ(dL_dpsi2, Z, posterior_variational, grad)
|
||||||
|
return grad
|
||||||
|
|
||||||
|
def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, posterior_variational, Z):
|
||||||
|
grad_mu, grad_S = np.zeros(posterior_variational.mean.shape), np.zeros(posterior_variational.mean.shape)
|
||||||
|
# psi0
|
||||||
|
grad_mu += dL_dpsi0[:, None] * (2.0 * posterior_variational.mean * self.variances)
|
||||||
|
grad_S += dL_dpsi0[:, None] * self.variances
|
||||||
|
# psi1
|
||||||
|
grad_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1)
|
||||||
|
# psi2
|
||||||
|
self._weave_dpsi2_dmuS(dL_dpsi2, Z, posterior_variational, grad_mu, grad_S)
|
||||||
|
|
||||||
|
return grad_mu, grad_S
|
||||||
|
|
||||||
|
#--------------------------------------------------#
|
||||||
|
# Helpers for psi statistics #
|
||||||
|
#--------------------------------------------------#
|
||||||
|
|
||||||
|
|
||||||
|
def _weave_dpsi2_dmuS(self, dL_dpsi2, Z, pv, target_mu, target_S):
|
||||||
|
# Think N,num_inducing,num_inducing,input_dim
|
||||||
|
ZA = Z * self.variances
|
||||||
|
AZZA = ZA.T[:, None, :, None] * ZA[None, :, None, :]
|
||||||
|
AZZA = AZZA + AZZA.swapaxes(1, 2)
|
||||||
|
AZZA_2 = AZZA/2.
|
||||||
|
|
||||||
|
#Using weave, we can exploit the symmetry of this problem:
|
||||||
|
code = """
|
||||||
|
int n, m, mm,q,qq;
|
||||||
|
double factor,tmp;
|
||||||
|
#pragma omp parallel for private(m,mm,q,qq,factor,tmp)
|
||||||
|
for(n=0;n<N;n++){
|
||||||
|
for(m=0;m<num_inducing;m++){
|
||||||
|
for(mm=0;mm<=m;mm++){
|
||||||
|
//add in a factor of 2 for the off-diagonal terms (and then count them only once)
|
||||||
|
if(m==mm)
|
||||||
|
factor = dL_dpsi2(n,m,mm);
|
||||||
|
else
|
||||||
|
factor = 2.0*dL_dpsi2(n,m,mm);
|
||||||
|
|
||||||
|
for(q=0;q<input_dim;q++){
|
||||||
|
|
||||||
|
//take the dot product of mu[n,:] and AZZA[:,m,mm,q] TODO: blas!
|
||||||
|
tmp = 0.0;
|
||||||
|
for(qq=0;qq<input_dim;qq++){
|
||||||
|
tmp += mu(n,qq)*AZZA(qq,m,mm,q);
|
||||||
|
}
|
||||||
|
|
||||||
|
target_mu(n,q) += factor*tmp;
|
||||||
|
target_S(n,q) += factor*AZZA_2(q,m,mm,q);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
support_code = """
|
||||||
|
#include <omp.h>
|
||||||
|
#include <math.h>
|
||||||
|
"""
|
||||||
|
weave_options = {'headers' : ['<omp.h>'],
|
||||||
|
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
|
||||||
|
'extra_link_args' : ['-lgomp']}
|
||||||
|
|
||||||
|
mu = pv.mean
|
||||||
|
N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu)
|
||||||
|
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
||||||
|
arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
|
||||||
|
type_converters=weave.converters.blitz,**weave_options)
|
||||||
|
|
||||||
|
|
||||||
|
def _weave_dpsi2_dZ(self, dL_dpsi2, Z, pv, target):
|
||||||
|
AZA = self.variances*self._ZAinner(pv, Z)
|
||||||
|
code="""
|
||||||
|
int n,m,mm,q;
|
||||||
|
#pragma omp parallel for private(n,mm,q)
|
||||||
|
for(m=0;m<num_inducing;m++){
|
||||||
|
for(q=0;q<input_dim;q++){
|
||||||
|
for(mm=0;mm<num_inducing;mm++){
|
||||||
|
for(n=0;n<N;n++){
|
||||||
|
target(m,q) += 2*dL_dpsi2(n,m,mm)*AZA(n,mm,q);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
support_code = """
|
||||||
|
#include <omp.h>
|
||||||
|
#include <math.h>
|
||||||
|
"""
|
||||||
|
weave_options = {'headers' : ['<omp.h>'],
|
||||||
|
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
|
||||||
|
'extra_link_args' : ['-lgomp']}
|
||||||
|
|
||||||
|
N,num_inducing,input_dim = pv.mean.shape[0],Z.shape[0],pv.mean.shape[1]
|
||||||
|
mu = param_to_array(pv.mean)
|
||||||
|
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
||||||
|
arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'],
|
||||||
|
type_converters=weave.converters.blitz,**weave_options)
|
||||||
|
|
||||||
|
|
||||||
|
def _mu2S(self, pv):
|
||||||
|
return np.square(pv.mean) + pv.variance
|
||||||
|
|
||||||
|
def _ZAinner(self, pv, Z):
|
||||||
|
ZA = Z*self.variances
|
||||||
|
inner = (pv.mean[:, None, :] * pv.mean[:, :, None])
|
||||||
|
diag_indices = np.diag_indices(pv.mean.shape[1], 2)
|
||||||
|
inner[:, diag_indices[0], diag_indices[1]] += pv.variance
|
||||||
|
|
||||||
|
return np.dot(ZA, inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x N x input_dim]!
|
||||||
|
|
||||||
65
GPy/kern/_src/prod.py
Normal file
65
GPy/kern/_src/prod.py
Normal file
|
|
@ -0,0 +1,65 @@
|
||||||
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
from kern import Kern
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
class Prod(Kern):
|
||||||
|
"""
|
||||||
|
Computes the product of 2 kernels
|
||||||
|
|
||||||
|
:param k1, k2: the kernels to multiply
|
||||||
|
:type k1, k2: Kern
|
||||||
|
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
|
||||||
|
:type tensor: Boolean
|
||||||
|
:rtype: kernel object
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self, k1, k2, tensor=False):
|
||||||
|
if tensor:
|
||||||
|
super(Prod, self).__init__(k1.input_dim + k2.input_dim, k1.name + '_xx_' + k2.name)
|
||||||
|
self.slice1 = slice(0,k1.input_dim)
|
||||||
|
self.slice2 = slice(k1.input_dim,k1.input_dim+k2.input_dim)
|
||||||
|
else:
|
||||||
|
assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to multiply don't have the same dimension."
|
||||||
|
super(Prod, self).__init__(k1.input_dim, k1.name + '_x_' + k2.name)
|
||||||
|
self.slice1 = slice(0, self.input_dim)
|
||||||
|
self.slice2 = slice(0, self.input_dim)
|
||||||
|
self.k1 = k1
|
||||||
|
self.k2 = k2
|
||||||
|
self.add_parameters(self.k1, self.k2)
|
||||||
|
|
||||||
|
def K(self, X, X2=None):
|
||||||
|
if X2 is None:
|
||||||
|
return self.k1.K(X[:,self.slice1], None) * self.k2.K(X[:,self.slice2], None)
|
||||||
|
else:
|
||||||
|
return self.k1.K(X[:,self.slice1], X2[:,self.slice1]) * self.k2.K(X[:,self.slice2], X2[:,self.slice2])
|
||||||
|
|
||||||
|
def Kdiag(self, X):
|
||||||
|
return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2])
|
||||||
|
|
||||||
|
def update_gradients_full(self, dL_dK, X):
|
||||||
|
self.k1.update_gradients_full(dL_dK*self.k2(X[:,self.slice2]), X[:,self.slice1])
|
||||||
|
self.k2.update_gradients_full(dL_dK*self.k1(X[:,self.slice1]), X[:,self.slice2])
|
||||||
|
|
||||||
|
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
||||||
|
self.k1.update_gradients_sparse(dL_dKmm * self.k2.K(Z[:,self.slice2]), dL_dKnm * self.k2(X[:,self.slice2], Z[:,self.slice2]), dL_dKdiag * self.k2.Kdiag(X[:,self.slice2]), X[:,self.slice1], Z[:,self.slice1] )
|
||||||
|
self.k2.update_gradients_sparse(dL_dKmm * self.k1.K(Z[:,self.slice1]), dL_dKnm * self.k1(X[:,self.slice1], Z[:,self.slice1]), dL_dKdiag * self.k1.Kdiag(X[:,self.slice1]), X[:,self.slice2], Z[:,self.slice2] )
|
||||||
|
|
||||||
|
def gradients_X(self, dL_dK, X, X2=None):
|
||||||
|
target = np.zeros(X.shape)
|
||||||
|
if X2 is None:
|
||||||
|
target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2(X[:,self.slice2]), X[:,self.slice1], None)
|
||||||
|
target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1(X[:,self.slice1]), X[:,self.slice2], None)
|
||||||
|
else:
|
||||||
|
target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2(X[:,self.slice2], X2[:,self.slice2]), X[:,self.slice1], X2[:,self.slice1])
|
||||||
|
target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1(X[:,self.slice1], X2[:,self.slice1]), X[:,self.slice2], X2[:,self.slice2])
|
||||||
|
return target
|
||||||
|
|
||||||
|
def gradients_X_diag(self, dL_dKdiag, X):
|
||||||
|
target = np.zeros(X.shape)
|
||||||
|
target[:,self.slice1] = self.k1.gradients_X(dL_dKdiag*self.k2.Kdiag(X[:,self.slice2]), X[:,self.slice1])
|
||||||
|
target[:,self.slice2] += self.k2.gradients_X(dL_dKdiag*self.k1.Kdiag(X[:,self.slice1]), X[:,self.slice2])
|
||||||
|
return target
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -4,13 +4,13 @@
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy import weave
|
from scipy import weave
|
||||||
from kernpart import Kernpart
|
from kern import Kern
|
||||||
from ...util.linalg import tdot
|
from ...util.linalg import tdot
|
||||||
from ...util.misc import fast_array_equal, param_to_array
|
from ...util.misc import fast_array_equal, param_to_array
|
||||||
from ...core.parameterization import Param
|
from ...core.parameterization import Param
|
||||||
from ...core.parameterization.transformations import Logexp
|
from ...core.parameterization.transformations import Logexp
|
||||||
|
|
||||||
class RBF(Kernpart):
|
class RBF(Kern):
|
||||||
"""
|
"""
|
||||||
Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel:
|
Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel:
|
||||||
|
|
||||||
|
|
@ -60,22 +60,8 @@ class RBF(Kernpart):
|
||||||
self.add_parameters(self.variance, self.lengthscale)
|
self.add_parameters(self.variance, self.lengthscale)
|
||||||
self.parameters_changed() # initializes cache
|
self.parameters_changed() # initializes cache
|
||||||
|
|
||||||
#self.update_inv_lengthscale(self.lengthscale)
|
|
||||||
#self.parameters_changed()
|
|
||||||
# initialize cache
|
|
||||||
#self._Z, self._mu, self._S = np.empty(shape=(3, 1))
|
|
||||||
#self._X, self._X2, self._params_save = np.empty(shape=(3, 1))
|
|
||||||
|
|
||||||
# a set of optional args to pass to weave
|
|
||||||
# self.weave_options = {'headers' : ['<omp.h>'],
|
|
||||||
# 'extra_compile_args': ['-fopenmp -O3'], # -march=native'],
|
|
||||||
# 'extra_link_args' : ['-lgomp']}
|
|
||||||
self.weave_options = {}
|
self.weave_options = {}
|
||||||
|
|
||||||
def on_input_change(self, X):
|
|
||||||
#self._K_computations(X, None)
|
|
||||||
pass
|
|
||||||
|
|
||||||
def update_lengthscale(self, l):
|
def update_lengthscale(self, l):
|
||||||
self.lengthscale2 = np.square(self.lengthscale)
|
self.lengthscale2 = np.square(self.lengthscale)
|
||||||
|
|
||||||
|
|
@ -84,23 +70,32 @@ class RBF(Kernpart):
|
||||||
self._X, self._X2 = np.empty(shape=(2, 1))
|
self._X, self._X2 = np.empty(shape=(2, 1))
|
||||||
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
|
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
|
||||||
|
|
||||||
def K(self, X, X2, target):
|
def K(self, X, X2=None):
|
||||||
self._K_computations(X, X2)
|
self._K_computations(X, X2)
|
||||||
target += self.variance * self._K_dvar
|
return self.variance * self._K_dvar
|
||||||
|
|
||||||
def Kdiag(self, X, target):
|
def Kdiag(self, X):
|
||||||
np.add(target, self.variance, target)
|
ret = np.ones(X.shape[0])
|
||||||
|
ret[:] = self.variance
|
||||||
|
return ret
|
||||||
|
|
||||||
def psi0(self, Z, mu, S, target):
|
def psi0(self, Z, posterior_variational):
|
||||||
target += self.variance
|
mu = posterior_variational.mean
|
||||||
|
ret = np.empty(mu.shape[0], dtype=np.float64)
|
||||||
|
ret[:] = self.variance
|
||||||
|
return ret
|
||||||
|
|
||||||
def psi1(self, Z, mu, S, target):
|
def psi1(self, Z, posterior_variational):
|
||||||
|
mu = posterior_variational.mean
|
||||||
|
S = posterior_variational.variance
|
||||||
self._psi_computations(Z, mu, S)
|
self._psi_computations(Z, mu, S)
|
||||||
target += self._psi1
|
return self._psi1
|
||||||
|
|
||||||
def psi2(self, Z, mu, S, target):
|
def psi2(self, Z, posterior_variational):
|
||||||
|
mu = posterior_variational.mean
|
||||||
|
S = posterior_variational.variance
|
||||||
self._psi_computations(Z, mu, S)
|
self._psi_computations(Z, mu, S)
|
||||||
target += self._psi2
|
return self._psi2
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X):
|
def update_gradients_full(self, dL_dK, X):
|
||||||
self._K_computations(X, None)
|
self._K_computations(X, None)
|
||||||
|
|
@ -131,7 +126,9 @@ class RBF(Kernpart):
|
||||||
else:
|
else:
|
||||||
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
|
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
|
||||||
|
|
||||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational):
|
||||||
|
mu = posterior_variational.mean
|
||||||
|
S = posterior_variational.variance
|
||||||
self._psi_computations(Z, mu, S)
|
self._psi_computations(Z, mu, S)
|
||||||
|
|
||||||
#contributions from psi0:
|
#contributions from psi0:
|
||||||
|
|
@ -165,7 +162,43 @@ class RBF(Kernpart):
|
||||||
else:
|
else:
|
||||||
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
|
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
|
||||||
|
|
||||||
def gradients_X(self, dL_dK, X, X2, target):
|
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational):
|
||||||
|
mu = posterior_variational.mean
|
||||||
|
S = posterior_variational.variance
|
||||||
|
self._psi_computations(Z, mu, S)
|
||||||
|
|
||||||
|
#psi1
|
||||||
|
denominator = (self.lengthscale2 * (self._psi1_denom))
|
||||||
|
dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator))
|
||||||
|
grad = np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
|
||||||
|
|
||||||
|
#psi2
|
||||||
|
term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim
|
||||||
|
term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim
|
||||||
|
dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
|
||||||
|
grad += 2*(dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
|
||||||
|
|
||||||
|
grad += self.gradients_X(dL_dKmm, Z, None)
|
||||||
|
|
||||||
|
return grad
|
||||||
|
|
||||||
|
def update_gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational):
|
||||||
|
mu = posterior_variational.mean
|
||||||
|
S = posterior_variational.variance
|
||||||
|
self._psi_computations(Z, mu, S)
|
||||||
|
#psi1
|
||||||
|
tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom
|
||||||
|
grad_mu = np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
|
||||||
|
grad_S = np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
|
||||||
|
#psi2
|
||||||
|
tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom
|
||||||
|
grad_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
|
||||||
|
grad_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
|
||||||
|
|
||||||
|
posterior_variational.mean.gradient = grad_mu
|
||||||
|
posterior_variational.variance.gradient = grad_S
|
||||||
|
|
||||||
|
def gradients_X(self, dL_dK, X, X2=None):
|
||||||
#if self._X is None or X.base is not self._X.base or X2 is not None:
|
#if self._X is None or X.base is not self._X.base or X2 is not None:
|
||||||
self._K_computations(X, X2)
|
self._K_computations(X, X2)
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
|
|
@ -173,44 +206,15 @@ class RBF(Kernpart):
|
||||||
else:
|
else:
|
||||||
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
|
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
|
||||||
gradients_X = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
|
gradients_X = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
|
||||||
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
|
return np.sum(gradients_X * dL_dK.T[:, :, None], 0)
|
||||||
|
|
||||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
def dKdiag_dX(self, dL_dKdiag, X):
|
||||||
pass
|
return np.zeros(X.shape[0])
|
||||||
|
|
||||||
#---------------------------------------#
|
#---------------------------------------#
|
||||||
# PSI statistics #
|
# PSI statistics #
|
||||||
#---------------------------------------#
|
#---------------------------------------#
|
||||||
|
|
||||||
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
|
|
||||||
self._psi_computations(Z, mu, S)
|
|
||||||
denominator = (self.lengthscale2 * (self._psi1_denom))
|
|
||||||
dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator))
|
|
||||||
target += np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
|
|
||||||
|
|
||||||
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
|
|
||||||
self._psi_computations(Z, mu, S)
|
|
||||||
tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom
|
|
||||||
target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
|
|
||||||
target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
|
|
||||||
|
|
||||||
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
|
|
||||||
self._psi_computations(Z, mu, S)
|
|
||||||
term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim
|
|
||||||
term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim
|
|
||||||
dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
|
|
||||||
target += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
|
|
||||||
|
|
||||||
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
|
|
||||||
"""Think N,num_inducing,num_inducing,input_dim """
|
|
||||||
self._psi_computations(Z, mu, S)
|
|
||||||
tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom
|
|
||||||
target_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
|
|
||||||
target_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
|
|
||||||
|
|
||||||
#---------------------------------------#
|
#---------------------------------------#
|
||||||
# Precomputations #
|
# Precomputations #
|
||||||
#---------------------------------------#
|
#---------------------------------------#
|
||||||
|
|
@ -373,6 +377,7 @@ class RBF(Kernpart):
|
||||||
#include <omp.h>
|
#include <omp.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
"""
|
"""
|
||||||
|
mu = param_to_array(mu)
|
||||||
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
||||||
arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'],
|
arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'],
|
||||||
type_converters=weave.converters.blitz, **self.weave_options)
|
type_converters=weave.converters.blitz, **self.weave_options)
|
||||||
211
GPy/kern/_src/stationary.py
Normal file
211
GPy/kern/_src/stationary.py
Normal file
|
|
@ -0,0 +1,211 @@
|
||||||
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
|
from kern import Kern
|
||||||
|
from ...core.parameterization import Param
|
||||||
|
from ...core.parameterization.transformations import Logexp
|
||||||
|
from ... import util
|
||||||
|
import numpy as np
|
||||||
|
from scipy import integrate
|
||||||
|
|
||||||
|
class Stationary(Kern):
|
||||||
|
def __init__(self, input_dim, variance, lengthscale, ARD, name):
|
||||||
|
super(Stationary, self).__init__(input_dim, name)
|
||||||
|
self.ARD = ARD
|
||||||
|
if not ARD:
|
||||||
|
if lengthscale is None:
|
||||||
|
lengthscale = np.ones(1)
|
||||||
|
else:
|
||||||
|
lengthscale = np.asarray(lengthscale)
|
||||||
|
assert lengthscale.size == 1, "Only lengthscale needed for non-ARD kernel"
|
||||||
|
else:
|
||||||
|
if lengthscale is not None:
|
||||||
|
lengthscale = np.asarray(lengthscale)
|
||||||
|
assert lengthscale.size in [1, input_dim], "Bad lengthscales"
|
||||||
|
if lengthscale.size != input_dim:
|
||||||
|
lengthscale = np.ones(input_dim)*lengthscale
|
||||||
|
else:
|
||||||
|
lengthscale = np.ones(self.input_dim)
|
||||||
|
self.lengthscale = Param('lengthscale', lengthscale, Logexp())
|
||||||
|
self.variance = Param('variance', variance, Logexp())
|
||||||
|
assert self.variance.size==1
|
||||||
|
self.add_parameters(self.variance, self.lengthscale)
|
||||||
|
|
||||||
|
def _dist(self, X, X2):
|
||||||
|
if X2 is None:
|
||||||
|
X2 = X
|
||||||
|
return X[:, None, :] - X2[None, :, :]
|
||||||
|
|
||||||
|
def _scaled_dist(self, X, X2=None):
|
||||||
|
return np.sqrt(np.sum(np.square(self._dist(X, X2) / self.lengthscale), -1))
|
||||||
|
|
||||||
|
def Kdiag(self, X):
|
||||||
|
ret = np.empty(X.shape[0])
|
||||||
|
ret[:] = self.variance
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def update_gradients_diag(self, dL_dKdiag, X):
|
||||||
|
self.variance.gradient = np.sum(dL_dKdiag)
|
||||||
|
self.lengthscale.gradient = 0.
|
||||||
|
|
||||||
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
|
K = self.K(X, X2)
|
||||||
|
self.variance.gradient = np.sum(K * dL_dK)/self.variance
|
||||||
|
|
||||||
|
rinv = self._inv_dist(X, X2)
|
||||||
|
dL_dr = self.dK_dr(X, X2) * dL_dK
|
||||||
|
x_xl3 = np.square(self._dist(X, X2)) / self.lengthscale**3
|
||||||
|
|
||||||
|
if self.ARD:
|
||||||
|
self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)
|
||||||
|
else:
|
||||||
|
self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum()
|
||||||
|
|
||||||
|
def _inv_dist(self, X, X2=None):
|
||||||
|
dist = self._scaled_dist(X, X2)
|
||||||
|
if X2 is None:
|
||||||
|
nondiag = util.diag.offdiag_view(dist)
|
||||||
|
nondiag[:] = 1./nondiag
|
||||||
|
return dist
|
||||||
|
else:
|
||||||
|
return 1./np.where(dist != 0., dist, np.inf)
|
||||||
|
|
||||||
|
def gradients_X(self, dL_dK, X, X2=None):
|
||||||
|
dL_dr = self.dK_dr(X, X2) * dL_dK
|
||||||
|
invdist = self._inv_dist(X, X2)
|
||||||
|
ret = np.sum((invdist*dL_dr)[:,:,None]*self._dist(X, X2),1)/self.lengthscale**2
|
||||||
|
if X2 is None:
|
||||||
|
ret *= 2.
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def gradients_X_diag(self, dL_dKdiag, X):
|
||||||
|
return np.zeros(X.shape)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Exponential(Stationary):
|
||||||
|
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Exponential'):
|
||||||
|
super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, name)
|
||||||
|
|
||||||
|
def K(self, X, X2=None):
|
||||||
|
dist = self._scaled_dist(X, X2)
|
||||||
|
return self.variance * np.exp(-0.5 * dist)
|
||||||
|
|
||||||
|
def dK_dr(self, X, X2):
|
||||||
|
return -0.5*self.K(X, X2)
|
||||||
|
|
||||||
|
class Matern32(Stationary):
|
||||||
|
"""
|
||||||
|
Matern 3/2 kernel:
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
|
||||||
|
k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Mat32'):
|
||||||
|
super(Matern32, self).__init__(input_dim, variance, lengthscale, ARD, name)
|
||||||
|
|
||||||
|
def K(self, X, X2=None):
|
||||||
|
dist = self._scaled_dist(X, X2)
|
||||||
|
return self.variance * (1. + np.sqrt(3.) * dist) * np.exp(-np.sqrt(3.) * dist)
|
||||||
|
|
||||||
|
def dK_dr(self, X, X2):
|
||||||
|
dist = self._scaled_dist(X, X2)
|
||||||
|
return -3.*self.variance*dist*np.exp(-np.sqrt(3.)*dist)
|
||||||
|
|
||||||
|
def Gram_matrix(self, F, F1, F2, lower, upper):
|
||||||
|
"""
|
||||||
|
Return the Gram matrix of the vector of functions F with respect to the
|
||||||
|
RKHS norm. The use of this function is limited to input_dim=1.
|
||||||
|
|
||||||
|
:param F: vector of functions
|
||||||
|
:type F: np.array
|
||||||
|
:param F1: vector of derivatives of F
|
||||||
|
:type F1: np.array
|
||||||
|
:param F2: vector of second derivatives of F
|
||||||
|
:type F2: np.array
|
||||||
|
:param lower,upper: boundaries of the input domain
|
||||||
|
:type lower,upper: floats
|
||||||
|
"""
|
||||||
|
assert self.input_dim == 1
|
||||||
|
def L(x, i):
|
||||||
|
return(3. / self.lengthscale ** 2 * F[i](x) + 2 * np.sqrt(3) / self.lengthscale * F1[i](x) + F2[i](x))
|
||||||
|
n = F.shape[0]
|
||||||
|
G = np.zeros((n, n))
|
||||||
|
for i in range(n):
|
||||||
|
for j in range(i, n):
|
||||||
|
G[i, j] = G[j, i] = integrate.quad(lambda x : L(x, i) * L(x, j), lower, upper)[0]
|
||||||
|
Flower = np.array([f(lower) for f in F])[:, None]
|
||||||
|
F1lower = np.array([f(lower) for f in F1])[:, None]
|
||||||
|
return(self.lengthscale ** 3 / (12.*np.sqrt(3) * self.variance) * G + 1. / self.variance * np.dot(Flower, Flower.T) + self.lengthscale ** 2 / (3.*self.variance) * np.dot(F1lower, F1lower.T))
|
||||||
|
|
||||||
|
|
||||||
|
class Matern52(Stationary):
|
||||||
|
"""
|
||||||
|
Matern 5/2 kernel:
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
|
||||||
|
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||||
|
"""
|
||||||
|
|
||||||
|
def K(self, X, X2=None):
|
||||||
|
r = self._scaled_dist(X, X2)
|
||||||
|
return self.variance*(1+np.sqrt(5.)*r+5./3*r**2)*np.exp(-np.sqrt(5.)*r)
|
||||||
|
|
||||||
|
def dK_dr(self, X, X2):
|
||||||
|
r = self._scaled_dist(X, X2)
|
||||||
|
return self.variance*(10./3*r -5.*r -5.*np.sqrt(5.)/3*r**2)*np.exp(-np.sqrt(5.)*r)
|
||||||
|
|
||||||
|
def Gram_matrix(self,F,F1,F2,F3,lower,upper):
|
||||||
|
"""
|
||||||
|
Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1.
|
||||||
|
|
||||||
|
:param F: vector of functions
|
||||||
|
:type F: np.array
|
||||||
|
:param F1: vector of derivatives of F
|
||||||
|
:type F1: np.array
|
||||||
|
:param F2: vector of second derivatives of F
|
||||||
|
:type F2: np.array
|
||||||
|
:param F3: vector of third derivatives of F
|
||||||
|
:type F3: np.array
|
||||||
|
:param lower,upper: boundaries of the input domain
|
||||||
|
:type lower,upper: floats
|
||||||
|
"""
|
||||||
|
assert self.input_dim == 1
|
||||||
|
def L(x,i):
|
||||||
|
return(5*np.sqrt(5)/self.lengthscale**3*F[i](x) + 15./self.lengthscale**2*F1[i](x)+ 3*np.sqrt(5)/self.lengthscale*F2[i](x) + F3[i](x))
|
||||||
|
n = F.shape[0]
|
||||||
|
G = np.zeros((n,n))
|
||||||
|
for i in range(n):
|
||||||
|
for j in range(i,n):
|
||||||
|
G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0]
|
||||||
|
G_coef = 3.*self.lengthscale**5/(400*np.sqrt(5))
|
||||||
|
Flower = np.array([f(lower) for f in F])[:,None]
|
||||||
|
F1lower = np.array([f(lower) for f in F1])[:,None]
|
||||||
|
F2lower = np.array([f(lower) for f in F2])[:,None]
|
||||||
|
orig = 9./8*np.dot(Flower,Flower.T) + 9.*self.lengthscale**4/200*np.dot(F2lower,F2lower.T)
|
||||||
|
orig2 = 3./5*self.lengthscale**2 * ( np.dot(F1lower,F1lower.T) + 1./8*np.dot(Flower,F2lower.T) + 1./8*np.dot(F2lower,Flower.T))
|
||||||
|
return(1./self.variance* (G_coef*G + orig + orig2))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ExpQuad(Stationary):
|
||||||
|
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='ExpQuad'):
|
||||||
|
super(ExpQuad, self).__init__(input_dim, variance, lengthscale, ARD, name)
|
||||||
|
|
||||||
|
def K(self, X, X2=None):
|
||||||
|
r = self._scaled_dist(X, X2)
|
||||||
|
return self.variance * np.exp(-0.5 * r**2)
|
||||||
|
|
||||||
|
def dK_dr(self, X, X2):
|
||||||
|
dist = self._scaled_dist(X, X2)
|
||||||
|
return -dist*self.K(X, X2)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
563
GPy/kern/_src/sympykern.py
Normal file
563
GPy/kern/_src/sympykern.py
Normal file
|
|
@ -0,0 +1,563 @@
|
||||||
|
# Check Matthew Rocklin's blog post.
|
||||||
|
try:
|
||||||
|
import sympy as sp
|
||||||
|
sympy_available=True
|
||||||
|
except ImportError:
|
||||||
|
sympy_available=False
|
||||||
|
exit()
|
||||||
|
|
||||||
|
from sympy.core.cache import clear_cache
|
||||||
|
from sympy.utilities.codegen import codegen
|
||||||
|
|
||||||
|
try:
|
||||||
|
from scipy import weave
|
||||||
|
weave_available = True
|
||||||
|
except ImportError:
|
||||||
|
weave_available = False
|
||||||
|
|
||||||
|
import os
|
||||||
|
current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
|
||||||
|
import sys
|
||||||
|
import numpy as np
|
||||||
|
import re
|
||||||
|
import tempfile
|
||||||
|
import pdb
|
||||||
|
import ast
|
||||||
|
|
||||||
|
from kernpart import Kernpart
|
||||||
|
from ...core.parameterization import Param
|
||||||
|
from ...core.parameterization.transformations import Logexp
|
||||||
|
# TODO have this set up in a set up file!
|
||||||
|
user_code_storage = tempfile.gettempdir()
|
||||||
|
|
||||||
|
class spkern(Kernpart):
|
||||||
|
"""
|
||||||
|
A kernel object, where all the hard work in done by sympy.
|
||||||
|
|
||||||
|
:param k: the covariance function
|
||||||
|
:type k: a positive definite sympy function of x_0, z_0, x_1, z_1, x_2, z_2...
|
||||||
|
|
||||||
|
To construct a new sympy kernel, you'll need to define:
|
||||||
|
- a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z).
|
||||||
|
- that's it! we'll extract the variables from the function k.
|
||||||
|
|
||||||
|
Note:
|
||||||
|
- to handle multiple inputs, call them x_1, z_1, etc
|
||||||
|
- to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j.
|
||||||
|
"""
|
||||||
|
def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None):
|
||||||
|
|
||||||
|
if name is None:
|
||||||
|
name='sympykern'
|
||||||
|
if k is None:
|
||||||
|
raise ValueError, "You must provide an argument for the covariance function."
|
||||||
|
super(spkern, self).__init__(input_dim, name)
|
||||||
|
|
||||||
|
self._sp_k = k
|
||||||
|
|
||||||
|
# pull the variable names out of the symbolic covariance function.
|
||||||
|
sp_vars = [e for e in k.atoms() if e.is_Symbol]
|
||||||
|
self._sp_x= sorted([e for e in sp_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:]))
|
||||||
|
self._sp_z= sorted([e for e in sp_vars if e.name[0:2]=='z_'],key=lambda z:int(z.name[2:]))
|
||||||
|
|
||||||
|
# Check that variable names make sense.
|
||||||
|
assert all([x.name=='x_%i'%i for i,x in enumerate(self._sp_x)])
|
||||||
|
assert all([z.name=='z_%i'%i for i,z in enumerate(self._sp_z)])
|
||||||
|
assert len(self._sp_x)==len(self._sp_z)
|
||||||
|
x_dim=len(self._sp_x)
|
||||||
|
|
||||||
|
# If it is a multi-output covariance, add an input for indexing the outputs.
|
||||||
|
self._real_input_dim = x_dim
|
||||||
|
# Check input dim is number of xs + 1 if output_dim is >1
|
||||||
|
assert self.input_dim == x_dim + int(output_dim > 1)
|
||||||
|
self.output_dim = output_dim
|
||||||
|
|
||||||
|
# extract parameter names from the covariance
|
||||||
|
thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name)
|
||||||
|
|
||||||
|
|
||||||
|
# Look for parameters with index (subscripts), they are associated with different outputs.
|
||||||
|
if self.output_dim>1:
|
||||||
|
self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name)
|
||||||
|
self._sp_theta_j = sorted([e for e in thetas if (e.name[-2:]=='_j')], key=lambda e:e.name)
|
||||||
|
|
||||||
|
# Make sure parameter appears with both indices!
|
||||||
|
assert len(self._sp_theta_i)==len(self._sp_theta_j)
|
||||||
|
assert all([theta_i.name[:-2]==theta_j.name[:-2] for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j)])
|
||||||
|
|
||||||
|
# Extract names of shared parameters (those without a subscript)
|
||||||
|
self._sp_theta = [theta for theta in thetas if theta not in self._sp_theta_i and theta not in self._sp_theta_j]
|
||||||
|
|
||||||
|
self.num_split_params = len(self._sp_theta_i)
|
||||||
|
self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sp_theta_i]
|
||||||
|
for theta in self._split_theta_names:
|
||||||
|
setattr(self, theta, Param(theta, np.ones(self.output_dim), None))
|
||||||
|
self.add_parameters(getattr(self, theta))
|
||||||
|
|
||||||
|
#setattr(self, theta, np.ones(self.output_dim))
|
||||||
|
|
||||||
|
self.num_shared_params = len(self._sp_theta)
|
||||||
|
#self.num_params = self.num_shared_params+self.num_split_params*self.output_dim
|
||||||
|
|
||||||
|
else:
|
||||||
|
self.num_split_params = 0
|
||||||
|
self._split_theta_names = []
|
||||||
|
self._sp_theta = thetas
|
||||||
|
self.num_shared_params = len(self._sp_theta)
|
||||||
|
#self.num_params = self.num_shared_params
|
||||||
|
|
||||||
|
# Add parameters to the model.
|
||||||
|
for theta in self._sp_theta:
|
||||||
|
val = 1.0
|
||||||
|
if param is not None:
|
||||||
|
if param.has_key(theta):
|
||||||
|
val = param[theta]
|
||||||
|
#setattr(self, theta.name, val)
|
||||||
|
setattr(self, theta.name, Param(theta.name, val, None))
|
||||||
|
self.add_parameters(getattr(self, theta.name))
|
||||||
|
#deal with param
|
||||||
|
#self._set_params(self._get_params())
|
||||||
|
|
||||||
|
# Differentiate with respect to parameters.
|
||||||
|
self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta]
|
||||||
|
if self.output_dim > 1:
|
||||||
|
self._sp_dk_dtheta_i = [sp.diff(k,theta).simplify() for theta in self._sp_theta_i]
|
||||||
|
|
||||||
|
# differentiate with respect to input variables.
|
||||||
|
self._sp_dk_dx = [sp.diff(k,xi).simplify() for xi in self._sp_x]
|
||||||
|
|
||||||
|
# psi_stats aren't yet implemented.
|
||||||
|
if False:
|
||||||
|
self.compute_psi_stats()
|
||||||
|
|
||||||
|
self._code = {}
|
||||||
|
|
||||||
|
# generate the code for the covariance functions
|
||||||
|
self._gen_code()
|
||||||
|
|
||||||
|
if weave_available:
|
||||||
|
if False:
|
||||||
|
extra_compile_args = ['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5']
|
||||||
|
else:
|
||||||
|
extra_compile_args = []
|
||||||
|
|
||||||
|
self.weave_kwargs = {
|
||||||
|
'support_code': None, #self._function_code,
|
||||||
|
'include_dirs':[user_code_storage, os.path.join(current_dir,'parts/')],
|
||||||
|
'headers':['"sympy_helpers.h"', '"'+self.name+'.h"'],
|
||||||
|
'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp"), os.path.join(user_code_storage, self.name+'.cpp')],
|
||||||
|
'extra_compile_args':extra_compile_args,
|
||||||
|
'extra_link_args':['-lgomp'],
|
||||||
|
'verbose':True}
|
||||||
|
self.parameters_changed() # initializes caches
|
||||||
|
|
||||||
|
|
||||||
|
def __add__(self,other):
|
||||||
|
return spkern(self._sp_k+other._sp_k)
|
||||||
|
|
||||||
|
def _gen_code(self):
|
||||||
|
|
||||||
|
argument_sequence = self._sp_x+self._sp_z+self._sp_theta
|
||||||
|
code_list = [('k',self._sp_k)]
|
||||||
|
# gradients with respect to covariance input
|
||||||
|
code_list += [('dk_d%s'%x.name,dx) for x,dx in zip(self._sp_x,self._sp_dk_dx)]
|
||||||
|
# gradient with respect to parameters
|
||||||
|
code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta,self._sp_dk_dtheta)]
|
||||||
|
# gradient with respect to multiple output parameters
|
||||||
|
if self.output_dim > 1:
|
||||||
|
argument_sequence += self._sp_theta_i + self._sp_theta_j
|
||||||
|
code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta_i,self._sp_dk_dtheta_i)]
|
||||||
|
# generate c functions from sympy objects
|
||||||
|
if weave_available:
|
||||||
|
code_type = "C"
|
||||||
|
else:
|
||||||
|
code_type = "PYTHON"
|
||||||
|
# Need to add the sympy_helpers header in here.
|
||||||
|
(foo_c,self._function_code), (foo_h,self._function_header) = \
|
||||||
|
codegen(code_list,
|
||||||
|
code_type,
|
||||||
|
self.name,
|
||||||
|
argument_sequence=argument_sequence)
|
||||||
|
|
||||||
|
|
||||||
|
# Use weave to compute the underlying functions.
|
||||||
|
if weave_available:
|
||||||
|
# put the header file where we can find it
|
||||||
|
f = file(os.path.join(user_code_storage, self.name + '.h'),'w')
|
||||||
|
f.write(self._function_header)
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
|
||||||
|
if weave_available:
|
||||||
|
# Substitute any known derivatives which sympy doesn't compute
|
||||||
|
self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code)
|
||||||
|
# put the cpp file in user code storage (defaults to temp file location)
|
||||||
|
f = file(os.path.join(user_code_storage, self.name + '.cpp'),'w')
|
||||||
|
else:
|
||||||
|
# put the python file in user code storage
|
||||||
|
f = file(os.path.join(user_code_storage, self.name + '.py'),'w')
|
||||||
|
f.write(self._function_code)
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
if weave_available:
|
||||||
|
# arg_list will store the arguments required for the C code.
|
||||||
|
input_arg_list = (["X2(i, %s)"%x.name[2:] for x in self._sp_x]
|
||||||
|
+ ["Z2(j, %s)"%z.name[2:] for z in self._sp_z])
|
||||||
|
|
||||||
|
# for multiple outputs reverse argument list is also required
|
||||||
|
if self.output_dim>1:
|
||||||
|
reverse_input_arg_list = list(input_arg_list)
|
||||||
|
reverse_input_arg_list.reverse()
|
||||||
|
|
||||||
|
# This gives the parameters for the arg list.
|
||||||
|
param_arg_list = [shared_params.name for shared_params in self._sp_theta]
|
||||||
|
arg_list = input_arg_list + param_arg_list
|
||||||
|
|
||||||
|
precompute_list=[]
|
||||||
|
if self.output_dim > 1:
|
||||||
|
reverse_arg_list= reverse_input_arg_list + list(param_arg_list)
|
||||||
|
# For multiple outputs, also need the split parameters.
|
||||||
|
split_param_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['ii', 'jj'] for theta in self._sp_theta_i]
|
||||||
|
split_param_reverse_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['jj', 'ii'] for theta in self._sp_theta_i]
|
||||||
|
arg_list += split_param_arg_list
|
||||||
|
reverse_arg_list += split_param_reverse_arg_list
|
||||||
|
# Extract the right output indices from the inputs.
|
||||||
|
c_define_output_indices = [' '*16 + "int %s=(int)%s(%s, %i);"%(index, var, index2, self.input_dim-1) for index, var, index2 in zip(['ii', 'jj'], ['X2', 'Z2'], ['i', 'j'])]
|
||||||
|
precompute_list += c_define_output_indices
|
||||||
|
reverse_arg_string = ", ".join(reverse_arg_list)
|
||||||
|
arg_string = ", ".join(arg_list)
|
||||||
|
precompute_string = "\n".join(precompute_list)
|
||||||
|
|
||||||
|
# Now we use the arguments in code that computes the separate parts.
|
||||||
|
|
||||||
|
# Any precomputations will be done here eventually.
|
||||||
|
self._precompute = \
|
||||||
|
"""
|
||||||
|
// Precompute code would go here. It will be called when parameters are updated.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Here's the code to do the looping for K
|
||||||
|
self._code['K'] =\
|
||||||
|
"""
|
||||||
|
// _K_code
|
||||||
|
// Code for computing the covariance function.
|
||||||
|
int i;
|
||||||
|
int j;
|
||||||
|
int n = target_array->dimensions[0];
|
||||||
|
int num_inducing = target_array->dimensions[1];
|
||||||
|
int input_dim = X_array->dimensions[1];
|
||||||
|
//#pragma omp parallel for private(j)
|
||||||
|
for (i=0;i<n;i++){
|
||||||
|
for (j=0;j<num_inducing;j++){
|
||||||
|
%s
|
||||||
|
//target[i*num_inducing+j] =
|
||||||
|
TARGET2(i, j) += k(%s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
%s
|
||||||
|
"""%(precompute_string,arg_string,"/*"+str(self._sp_k)+"*/")
|
||||||
|
# adding a string representation of the function in the
|
||||||
|
# comment forces recompile when needed
|
||||||
|
self._code['K_X'] = self._code['K'].replace('Z2(', 'X2(')
|
||||||
|
|
||||||
|
|
||||||
|
# Code to compute diagonal of covariance.
|
||||||
|
diag_arg_string = re.sub('Z','X',arg_string)
|
||||||
|
diag_arg_string = re.sub('int jj','//int jj',diag_arg_string)
|
||||||
|
diag_arg_string = re.sub('j','i',diag_arg_string)
|
||||||
|
diag_precompute_string = re.sub('int jj','//int jj',precompute_string)
|
||||||
|
diag_precompute_string = re.sub('Z','X',diag_precompute_string)
|
||||||
|
diag_precompute_string = re.sub('j','i',diag_precompute_string)
|
||||||
|
# Code to do the looping for Kdiag
|
||||||
|
self._code['Kdiag'] =\
|
||||||
|
"""
|
||||||
|
// _code['Kdiag']
|
||||||
|
// Code for computing diagonal of covariance function.
|
||||||
|
int i;
|
||||||
|
int n = target_array->dimensions[0];
|
||||||
|
int input_dim = X_array->dimensions[1];
|
||||||
|
//#pragma omp parallel for
|
||||||
|
for (i=0;i<n;i++){
|
||||||
|
%s
|
||||||
|
//target[i] =
|
||||||
|
TARGET1(i)=k(%s);
|
||||||
|
}
|
||||||
|
%s
|
||||||
|
"""%(diag_precompute_string,diag_arg_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||||
|
|
||||||
|
# Code to compute gradients
|
||||||
|
if self.output_dim>1:
|
||||||
|
for i, theta in enumerate(self._sp_theta_i):
|
||||||
|
grad_func_list = [' '*26 + 'TARGET1(ii) += PARTIAL2(i, j)*dk_d%s(%s);'%(theta.name, arg_string)]
|
||||||
|
grad_func_list += [' '*26 + 'TARGET1(jj) += PARTIAL2(i, j)*dk_d%s(%s);'%(theta.name, reverse_arg_string)]
|
||||||
|
grad_func_list = c_define_output_indices+grad_func_list
|
||||||
|
|
||||||
|
grad_func_string = '\n'.join(grad_func_list)
|
||||||
|
self._code['dK_d' + theta.name] =\
|
||||||
|
"""
|
||||||
|
int i;
|
||||||
|
int j;
|
||||||
|
int n = partial_array->dimensions[0];
|
||||||
|
int num_inducing = partial_array->dimensions[1];
|
||||||
|
int input_dim = X_array->dimensions[1];
|
||||||
|
//#pragma omp parallel for private(j)
|
||||||
|
for (i=0;i<n;i++){
|
||||||
|
for (j=0;j<num_inducing;j++){
|
||||||
|
%s
|
||||||
|
}
|
||||||
|
}
|
||||||
|
%s
|
||||||
|
"""%(grad_func_string,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
|
||||||
|
self._code['dK_d' +theta.name + '_X'] = self._code['dK_d' + theta.name].replace('Z2(', 'X2(')
|
||||||
|
# Code to compute gradients for Kdiag TODO: needs clean up
|
||||||
|
diag_grad_func_string = re.sub('Z','X',grad_func_string,count=0)
|
||||||
|
diag_grad_func_string = re.sub('int jj','//int jj',diag_grad_func_string)
|
||||||
|
diag_grad_func_string = re.sub('j','i',diag_grad_func_string)
|
||||||
|
diag_grad_func_string = re.sub('PARTIAL2\(i, i\)','PARTIAL(i)',diag_grad_func_string)
|
||||||
|
self._code['dKdiag_d' + theta.name] =\
|
||||||
|
"""
|
||||||
|
// _dKdiag_dtheta_code
|
||||||
|
// Code for computing gradient of diagonal with respect to parameters.
|
||||||
|
int i;
|
||||||
|
int n = partial_array->dimensions[0];
|
||||||
|
int input_dim = X_array->dimensions[1];
|
||||||
|
for (i=0;i<n;i++){
|
||||||
|
%s
|
||||||
|
}
|
||||||
|
%s
|
||||||
|
"""%(diag_grad_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||||
|
|
||||||
|
for i, theta in enumerate(self._sp_theta):
|
||||||
|
grad_func_list = [' '*26 + 'TARGET1(%i) += PARTIAL2(i, j)*dk_d%s(%s);'%(i,theta.name,arg_string)]
|
||||||
|
grad_func_string = '\n'.join(grad_func_list)
|
||||||
|
|
||||||
|
self._code['dK_d' + theta.name] =\
|
||||||
|
"""
|
||||||
|
// _dK_dtheta_code
|
||||||
|
// Code for computing gradient of covariance with respect to parameters.
|
||||||
|
int i;
|
||||||
|
int j;
|
||||||
|
int n = partial_array->dimensions[0];
|
||||||
|
int num_inducing = partial_array->dimensions[1];
|
||||||
|
int input_dim = X_array->dimensions[1];
|
||||||
|
//#pragma omp parallel for private(j)
|
||||||
|
for (i=0;i<n;i++){
|
||||||
|
for (j=0;j<num_inducing;j++){
|
||||||
|
%s
|
||||||
|
}
|
||||||
|
}
|
||||||
|
%s
|
||||||
|
"""%(grad_func_string,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
|
||||||
|
self._code['dK_d' + theta.name +'_X'] = self._code['dK_d' + theta.name].replace('Z2(', 'X2(')
|
||||||
|
# Code to compute gradients for Kdiag TODO: needs clean up
|
||||||
|
diag_grad_func_string = re.sub('Z','X',grad_func_string,count=0)
|
||||||
|
diag_grad_func_string = re.sub('int jj','//int jj',diag_grad_func_string)
|
||||||
|
diag_grad_func_string = re.sub('j','i',diag_grad_func_string)
|
||||||
|
diag_grad_func_string = re.sub('PARTIAL2\(i, i\)','PARTIAL(i)',diag_grad_func_string)
|
||||||
|
self._code['dKdiag_d' + theta.name] =\
|
||||||
|
"""
|
||||||
|
// _dKdiag_dtheta_code
|
||||||
|
// Code for computing gradient of diagonal with respect to parameters.
|
||||||
|
int i;
|
||||||
|
int n = partial_array->dimensions[0];
|
||||||
|
int input_dim = X_array->dimensions[1];
|
||||||
|
for (i=0;i<n;i++){
|
||||||
|
%s
|
||||||
|
}
|
||||||
|
%s
|
||||||
|
"""%(diag_grad_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Code for gradients wrt X, TODO: may need to deal with special case where one input is actually an output.
|
||||||
|
gradX_func_list = []
|
||||||
|
if self.output_dim>1:
|
||||||
|
gradX_func_list += c_define_output_indices
|
||||||
|
gradX_func_list += ["TARGET2(i, %i) += partial[i*num_inducing+j]*dk_dx_%i(%s);"%(q,q,arg_string) for q in range(self._real_input_dim)]
|
||||||
|
gradX_func_string = "\n".join(gradX_func_list)
|
||||||
|
|
||||||
|
self._code['dK_dX'] = \
|
||||||
|
"""
|
||||||
|
// _dK_dX_code
|
||||||
|
// Code for computing gradient of covariance with respect to inputs.
|
||||||
|
int i;
|
||||||
|
int j;
|
||||||
|
int n = partial_array->dimensions[0];
|
||||||
|
int num_inducing = partial_array->dimensions[1];
|
||||||
|
int input_dim = X_array->dimensions[1];
|
||||||
|
//#pragma omp parallel for private(j)
|
||||||
|
for (i=0;i<n; i++){
|
||||||
|
for (j=0; j<num_inducing; j++){
|
||||||
|
%s
|
||||||
|
}
|
||||||
|
}
|
||||||
|
%s
|
||||||
|
"""%(gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||||
|
self._code['dK_dX_X'] = self._code['dK_dX'].replace('Z2(', 'X2(')
|
||||||
|
|
||||||
|
|
||||||
|
diag_gradX_func_string = re.sub('Z','X',gradX_func_string,count=0)
|
||||||
|
diag_gradX_func_string = re.sub('int jj','//int jj',diag_gradX_func_string)
|
||||||
|
diag_gradX_func_string = re.sub('j','i',diag_gradX_func_string)
|
||||||
|
diag_gradX_func_string = re.sub('PARTIAL2\(i\, i\)','2*PARTIAL(i)',diag_gradX_func_string)
|
||||||
|
|
||||||
|
# Code for gradients of Kdiag wrt X
|
||||||
|
self._code['dKdiag_dX'] = \
|
||||||
|
"""
|
||||||
|
// _dKdiag_dX_code
|
||||||
|
// Code for computing gradient of diagonal with respect to inputs.
|
||||||
|
int n = partial_array->dimensions[0];
|
||||||
|
int input_dim = X_array->dimensions[1];
|
||||||
|
for (int i=0;i<n; i++){
|
||||||
|
%s
|
||||||
|
}
|
||||||
|
%s
|
||||||
|
"""%(diag_gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a
|
||||||
|
# string representation forces recompile when needed Get rid
|
||||||
|
# of Zs in argument for diagonal. TODO: Why wasn't
|
||||||
|
# diag_func_string called here? Need to check that.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#TODO: insert multiple functions here via string manipulation
|
||||||
|
#TODO: similar functions for psi_stats
|
||||||
|
#TODO: similar functions when cython available.
|
||||||
|
#TODO: similar functions when only python available.
|
||||||
|
|
||||||
|
def _get_arg_names(self, target=None, Z=None, partial=None):
|
||||||
|
arg_names = ['X']
|
||||||
|
if target is not None:
|
||||||
|
arg_names += ['target']
|
||||||
|
for shared_params in self._sp_theta:
|
||||||
|
arg_names += [shared_params.name]
|
||||||
|
if Z is not None:
|
||||||
|
arg_names += ['Z']
|
||||||
|
if partial is not None:
|
||||||
|
arg_names += ['partial']
|
||||||
|
if self.output_dim>1:
|
||||||
|
arg_names += self._split_theta_names
|
||||||
|
arg_names += ['output_dim']
|
||||||
|
return arg_names
|
||||||
|
|
||||||
|
def _generate_inline(self, code, X, target=None, Z=None, partial=None):
|
||||||
|
output_dim = self.output_dim
|
||||||
|
# Need to extract parameters to local variables first
|
||||||
|
for shared_params in self._sp_theta:
|
||||||
|
locals()[shared_params.name] = getattr(self, shared_params.name)
|
||||||
|
|
||||||
|
for split_params in self._split_theta_names:
|
||||||
|
locals()[split_params] = np.asarray(getattr(self, split_params))
|
||||||
|
arg_names = self._get_arg_names(target, Z, partial)
|
||||||
|
|
||||||
|
if weave_available:
|
||||||
|
return weave.inline(code=code, arg_names=arg_names,**self.weave_kwargs)
|
||||||
|
else:
|
||||||
|
raise RuntimeError('Weave not available and other variants of sympy covariance not yet implemented')
|
||||||
|
|
||||||
|
def K(self,X,Z,target):
|
||||||
|
if Z is None:
|
||||||
|
self._generate_inline(self._code['K_X'], X, target)
|
||||||
|
else:
|
||||||
|
self._generate_inline(self._code['K'], X, target, Z)
|
||||||
|
|
||||||
|
|
||||||
|
def Kdiag(self,X,target):
|
||||||
|
self._generate_inline(self._code['Kdiag'], X, target)
|
||||||
|
|
||||||
|
def _param_grad_helper(self,partial,X,Z,target):
|
||||||
|
if Z is None:
|
||||||
|
self._generate_inline(self._code['dK_dtheta_X'], X, target, Z, partial)
|
||||||
|
else:
|
||||||
|
self._generate_inline(self._code['dK_dtheta'], X, target, Z, partial)
|
||||||
|
|
||||||
|
def dKdiag_dtheta(self,partial,X,target):
|
||||||
|
self._generate_inline(self._code['dKdiag_dtheta'], X, target, Z=None, partial=partial).namelocals()[shared_params.name] = getattr(self, shared_params.name)
|
||||||
|
|
||||||
|
def gradients_X(self,partial,X,Z,target):
|
||||||
|
if Z is None:
|
||||||
|
self._generate_inline(self._code['dK_dX_X'], X, target, Z, partial)
|
||||||
|
else:
|
||||||
|
self._generate_inline(self._code['dK_dX'], X, target, Z, partial)
|
||||||
|
|
||||||
|
def dKdiag_dX(self,partial,X,target):
|
||||||
|
self._generate_inline(self._code['dKdiag_dX'], X, target, Z, partial)
|
||||||
|
|
||||||
|
def compute_psi_stats(self):
|
||||||
|
#define some normal distributions
|
||||||
|
mus = [sp.var('mu_%i'%i,real=True) for i in range(self.input_dim)]
|
||||||
|
Ss = [sp.var('S_%i'%i,positive=True) for i in range(self.input_dim)]
|
||||||
|
normals = [(2*sp.pi*Si)**(-0.5)*sp.exp(-0.5*(xi-mui)**2/Si) for xi, mui, Si in zip(self._sp_x, mus, Ss)]
|
||||||
|
|
||||||
|
#do some integration!
|
||||||
|
#self._sp_psi0 = ??
|
||||||
|
self._sp_psi1 = self._sp_k
|
||||||
|
for i in range(self.input_dim):
|
||||||
|
print 'perfoming integrals %i of %i'%(i+1,2*self.input_dim)
|
||||||
|
sys.stdout.flush()
|
||||||
|
self._sp_psi1 *= normals[i]
|
||||||
|
self._sp_psi1 = sp.integrate(self._sp_psi1,(self._sp_x[i],-sp.oo,sp.oo))
|
||||||
|
clear_cache()
|
||||||
|
self._sp_psi1 = self._sp_psi1.simplify()
|
||||||
|
|
||||||
|
#and here's psi2 (eek!)
|
||||||
|
zprime = [sp.Symbol('zp%i'%i) for i in range(self.input_dim)]
|
||||||
|
self._sp_psi2 = self._sp_k.copy()*self._sp_k.copy().subs(zip(self._sp_z,zprime))
|
||||||
|
for i in range(self.input_dim):
|
||||||
|
print 'perfoming integrals %i of %i'%(self.input_dim+i+1,2*self.input_dim)
|
||||||
|
sys.stdout.flush()
|
||||||
|
self._sp_psi2 *= normals[i]
|
||||||
|
self._sp_psi2 = sp.integrate(self._sp_psi2,(self._sp_x[i],-sp.oo,sp.oo))
|
||||||
|
clear_cache()
|
||||||
|
self._sp_psi2 = self._sp_psi2.simplify()
|
||||||
|
|
||||||
|
def parameters_changed(self):
|
||||||
|
# Reset the caches
|
||||||
|
self._cache, self._cache2 = np.empty(shape=(2, 1))
|
||||||
|
self._cache3, self._cache4, self._cache5 = np.empty(shape=(3, 1))
|
||||||
|
|
||||||
|
def update_gradients_full(self, dL_dK, X):
|
||||||
|
# Need to extract parameters to local variables first
|
||||||
|
self._K_computations(X, None)
|
||||||
|
for shared_params in self._sp_theta:
|
||||||
|
parameter = getattr(self, shared_params.name)
|
||||||
|
code = self._code['dK_d' + shared_params.name]
|
||||||
|
setattr(parameter, 'gradient', self._generate_inline(code, X, target=None, Z=None, partial=dL_dK))
|
||||||
|
|
||||||
|
for split_params in self._split_theta_names:
|
||||||
|
parameter = getattr(self, split_params.name)
|
||||||
|
code = self._code['dK_d' + split_params.name]
|
||||||
|
setattr(parameter, 'gradient', self._generate_inline(code, X, target=None, Z=None, partial=dL_dK))
|
||||||
|
|
||||||
|
|
||||||
|
# def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
||||||
|
# #contributions from Kdiag
|
||||||
|
# self.variance.gradient = np.sum(dL_dKdiag)
|
||||||
|
|
||||||
|
# #from Knm
|
||||||
|
# self._K_computations(X, Z)
|
||||||
|
# self.variance.gradient += np.sum(dL_dKnm * self._K_dvar)
|
||||||
|
# if self.ARD:
|
||||||
|
# self.lengthscale.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z)
|
||||||
|
|
||||||
|
# else:
|
||||||
|
# self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKnm)
|
||||||
|
|
||||||
|
# #from Kmm
|
||||||
|
# self._K_computations(Z, None)
|
||||||
|
# self.variance.gradient += np.sum(dL_dKmm * self._K_dvar)
|
||||||
|
# if self.ARD:
|
||||||
|
# self.lengthscale.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
|
||||||
|
# else:
|
||||||
|
# self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
|
||||||
|
|
||||||
|
|
||||||
|
#---------------------------------------#
|
||||||
|
# Precomputations #
|
||||||
|
#---------------------------------------#
|
||||||
|
|
||||||
|
def _K_computations(self, X, Z):
|
||||||
|
if Z is None:
|
||||||
|
self._generate_inline(self._precompute, X)
|
||||||
|
else:
|
||||||
|
self._generate_inline(self._precompute, X, Z=Z)
|
||||||
|
|
@ -1,12 +1,12 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
from kernpart import Kernpart
|
from kern import Kern
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from ...core.parameterization import Param
|
from ...core.parameterization import Param
|
||||||
from ...core.parameterization.transformations import Logexp
|
from ...core.parameterization.transformations import Logexp
|
||||||
|
|
||||||
class White(Kernpart):
|
class White(Kern):
|
||||||
"""
|
"""
|
||||||
White noise kernel.
|
White noise kernel.
|
||||||
|
|
||||||
|
|
@ -20,14 +20,17 @@ class White(Kernpart):
|
||||||
self.input_dim = input_dim
|
self.input_dim = input_dim
|
||||||
self.variance = Param('variance', variance, Logexp())
|
self.variance = Param('variance', variance, Logexp())
|
||||||
self.add_parameters(self.variance)
|
self.add_parameters(self.variance)
|
||||||
self._psi1 = 0 # TODO: more elegance here
|
|
||||||
|
|
||||||
def K(self,X,X2,target):
|
def K(self, X, X2=None):
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
target += np.eye(X.shape[0])*self.variance
|
return np.eye(X.shape[0])*self.variance
|
||||||
|
else:
|
||||||
|
return np.zeros((X.shape[0], X2.shape[0]))
|
||||||
|
|
||||||
def Kdiag(self,X,target):
|
def Kdiag(self,X):
|
||||||
target += self.variance
|
ret = np.ones(X.shape[0])
|
||||||
|
ret[:] = self.variance
|
||||||
|
return ret
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X):
|
def update_gradients_full(self, dL_dK, X):
|
||||||
self.variance.gradient = np.trace(dL_dK)
|
self.variance.gradient = np.trace(dL_dK)
|
||||||
|
|
@ -38,14 +41,8 @@ class White(Kernpart):
|
||||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
def gradients_X(self,dL_dK,X,X2):
|
||||||
target += np.sum(dL_dKdiag)
|
return np.zeros_like(X)
|
||||||
|
|
||||||
def gradients_X(self,dL_dK,X,X2,target):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def psi0(self,Z,mu,S,target):
|
def psi0(self,Z,mu,S,target):
|
||||||
pass # target += self.variance
|
pass # target += self.variance
|
||||||
680
GPy/kern/kern.py
680
GPy/kern/kern.py
|
|
@ -1,680 +0,0 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
import sys
|
|
||||||
import numpy as np
|
|
||||||
import itertools
|
|
||||||
from parts.prod import Prod as prod
|
|
||||||
from parts.linear import Linear
|
|
||||||
from parts.kernpart import Kernpart
|
|
||||||
from ..core.parameterization import Parameterized
|
|
||||||
from GPy.core.parameterization.param import Param
|
|
||||||
|
|
||||||
class kern(Parameterized):
|
|
||||||
def __init__(self, input_dim, parts=[], input_slices=None):
|
|
||||||
"""
|
|
||||||
This is the main kernel class for GPy. It handles multiple
|
|
||||||
(additive) kernel functions, and keeps track of various things
|
|
||||||
like which parameters live where.
|
|
||||||
|
|
||||||
The technical code for kernels is divided into _parts_ (see
|
|
||||||
e.g. rbf.py). This object contains a list of parts, which are
|
|
||||||
computed additively. For multiplication, special _prod_ parts
|
|
||||||
are used.
|
|
||||||
|
|
||||||
:param input_dim: The dimensionality of the kernel's input space
|
|
||||||
:type input_dim: int
|
|
||||||
:param parts: the 'parts' (PD functions) of the kernel
|
|
||||||
:type parts: list of Kernpart objects
|
|
||||||
:param input_slices: the slices on the inputs which apply to each kernel
|
|
||||||
:type input_slices: list of slice objects, or list of bools
|
|
||||||
|
|
||||||
"""
|
|
||||||
super(kern, self).__init__('kern')
|
|
||||||
self.add_parameters(*parts)
|
|
||||||
self.input_dim = input_dim
|
|
||||||
|
|
||||||
if input_slices is None:
|
|
||||||
self.input_slices = [slice(None) for p in self._parameters_]
|
|
||||||
else:
|
|
||||||
assert len(input_slices) == len(self._parameters_)
|
|
||||||
self.input_slices = [sl if type(sl) is slice else slice(None) for sl in input_slices]
|
|
||||||
|
|
||||||
for p in self._parameters_:
|
|
||||||
assert isinstance(p, Kernpart), "bad kernel part"
|
|
||||||
|
|
||||||
def parameters_changed(self):
|
|
||||||
[p.parameters_changed() for p in self._parameters_]
|
|
||||||
|
|
||||||
def connect_input(self, Xparam):
|
|
||||||
[p.connect_input(Xparam) for p in self._parameters_]
|
|
||||||
|
|
||||||
def _getstate(self):
|
|
||||||
"""
|
|
||||||
Get the current state of the class,
|
|
||||||
here just all the indices, rest can get recomputed
|
|
||||||
"""
|
|
||||||
return Parameterized._getstate(self) + [#self._parameters_,
|
|
||||||
#self.num_params,
|
|
||||||
self.input_dim,
|
|
||||||
self.input_slices,
|
|
||||||
self._param_slices_
|
|
||||||
]
|
|
||||||
|
|
||||||
def _setstate(self, state):
|
|
||||||
self._param_slices_ = state.pop()
|
|
||||||
self.input_slices = state.pop()
|
|
||||||
self.input_dim = state.pop()
|
|
||||||
#self.num_params = state.pop()
|
|
||||||
#self._parameters_ = state.pop()
|
|
||||||
Parameterized._setstate(self, state)
|
|
||||||
|
|
||||||
|
|
||||||
def plot_ARD(self, *args):
|
|
||||||
"""If an ARD kernel is present, plot a bar representation using matplotlib
|
|
||||||
|
|
||||||
See GPy.plotting.matplot_dep.plot_ARD
|
|
||||||
"""
|
|
||||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
|
||||||
from ..plotting.matplot_dep import kernel_plots
|
|
||||||
return kernel_plots.plot_ARD(self,*args)
|
|
||||||
|
|
||||||
# def _transform_gradients(self, g):
|
|
||||||
# """
|
|
||||||
# Apply the transformations of the kernel so that the returned vector
|
|
||||||
# represents the gradient in the transformed space (i.e. that given by
|
|
||||||
# get_params_transformed())
|
|
||||||
#
|
|
||||||
# :param g: the gradient vector for the current model, usually created by _param_grad_helper
|
|
||||||
# """
|
|
||||||
# x = self._get_params()
|
|
||||||
# [np.place(g, index, g[index] * constraint.gradfactor(x[index]))
|
|
||||||
# for constraint, index in self.constraints.iteritems() if constraint is not __fixed__]
|
|
||||||
# # for constraint, index in self.constraints.iteritems():
|
|
||||||
# # if constraint != __fixed__:
|
|
||||||
# # g[index] = g[index] * constraint.gradfactor(x[index])
|
|
||||||
# #[np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
|
|
||||||
# [np.put(g, i, v) for i, v in [[i, t.sum()] for p in self._parameters_ for t,i in p._tied_to_me_.iteritems()]]
|
|
||||||
# # if len(self.tied_indices) or len(self.fixed_indices):
|
|
||||||
# # to_remove = np.hstack((self.fixed_indices + [t[1:] for t in self.tied_indices]))
|
|
||||||
# # return np.delete(g, to_remove)
|
|
||||||
# # else:
|
|
||||||
# if self._fixes_ is not None: return g[self._fixes_]
|
|
||||||
# return g
|
|
||||||
# x = self._get_params()
|
|
||||||
# [np.put(x, i, x * t.gradfactor(x[i])) for i, t in zip(self.constrained_indices, self.constraints)]
|
|
||||||
# [np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
|
|
||||||
# if len(self.tied_indices) or len(self.fixed_indices):
|
|
||||||
# to_remove = np.hstack((self.fixed_indices + [t[1:] for t in self.tied_indices]))
|
|
||||||
# return np.delete(g, to_remove)
|
|
||||||
# else:
|
|
||||||
# return g
|
|
||||||
|
|
||||||
def __add__(self, other):
|
|
||||||
""" Overloading of the '+' operator. for more control, see self.add """
|
|
||||||
return self.add(other)
|
|
||||||
|
|
||||||
def add(self, other, tensor=False):
|
|
||||||
"""
|
|
||||||
Add another kernel to this one.
|
|
||||||
|
|
||||||
If Tensor is False, both kernels are defined on the same _space_. then
|
|
||||||
the created kernel will have the same number of inputs as self and
|
|
||||||
other (which must be the same).
|
|
||||||
|
|
||||||
If Tensor is True, then the dimensions are stacked 'horizontally', so
|
|
||||||
that the resulting kernel has self.input_dim + other.input_dim
|
|
||||||
|
|
||||||
:param other: the other kernel to be added
|
|
||||||
:type other: GPy.kern
|
|
||||||
|
|
||||||
"""
|
|
||||||
if tensor:
|
|
||||||
D = self.input_dim + other.input_dim
|
|
||||||
self_input_slices = [slice(*sl.indices(self.input_dim)) for sl in self.input_slices]
|
|
||||||
other_input_indices = [sl.indices(other.input_dim) for sl in other.input_slices]
|
|
||||||
other_input_slices = [slice(i[0] + self.input_dim, i[1] + self.input_dim, i[2]) for i in other_input_indices]
|
|
||||||
|
|
||||||
newkern = kern(D, self._parameters_ + other._parameters_, self_input_slices + other_input_slices)
|
|
||||||
|
|
||||||
# transfer constraints:
|
|
||||||
# newkern.constrained_indices = self.constrained_indices + [x + self.num_params for x in other.constrained_indices]
|
|
||||||
# newkern.constraints = self.constraints + other.constraints
|
|
||||||
# newkern.fixed_indices = self.fixed_indices + [self.num_params + x for x in other.fixed_indices]
|
|
||||||
# newkern.fixed_values = self.fixed_values + other.fixed_values
|
|
||||||
# newkern.constraints = self.constraints + other.constraints
|
|
||||||
# newkern.tied_indices = self.tied_indices + [self.num_params + x for x in other.tied_indices]
|
|
||||||
else:
|
|
||||||
assert self.input_dim == other.input_dim
|
|
||||||
newkern = kern(self.input_dim, self._parameters_ + other._parameters_, self.input_slices + other.input_slices)
|
|
||||||
# transfer constraints:
|
|
||||||
# newkern.constrained_indices = self.constrained_indices + [i + self.num_params for i in other.constrained_indices]
|
|
||||||
# newkern.constraints = self.constraints + other.constraints
|
|
||||||
# newkern.fixed_indices = self.fixed_indices + [self.num_params + x for x in other.fixed_indices]
|
|
||||||
# newkern.fixed_values = self.fixed_values + other.fixed_values
|
|
||||||
# newkern.tied_indices = self.tied_indices + [self.num_params + x for x in other.tied_indices]
|
|
||||||
|
|
||||||
[newkern.constraints.add(transform, ind) for transform, ind in self.constraints.iteritems()]
|
|
||||||
[newkern.constraints.add(transform, ind+self.size) for transform, ind in other.constraints.iteritems()]
|
|
||||||
newkern._fixes_ = ((self._fixes_ or 0) + (other._fixes_ or 0)) or None
|
|
||||||
|
|
||||||
return newkern
|
|
||||||
|
|
||||||
def __call__(self, X, X2=None):
|
|
||||||
return self.K(X, X2)
|
|
||||||
|
|
||||||
def __mul__(self, other):
|
|
||||||
""" Here we overload the '*' operator. See self.prod for more information"""
|
|
||||||
return self.prod(other)
|
|
||||||
|
|
||||||
def __pow__(self, other, tensor=False):
|
|
||||||
"""
|
|
||||||
Shortcut for tensor `prod`.
|
|
||||||
"""
|
|
||||||
return self.prod(other, tensor=True)
|
|
||||||
|
|
||||||
def prod(self, other, tensor=False):
|
|
||||||
"""
|
|
||||||
Multiply two kernels (either on the same space, or on the tensor product of the input space).
|
|
||||||
|
|
||||||
:param other: the other kernel to be added
|
|
||||||
:type other: GPy.kern
|
|
||||||
:param tensor: whether or not to use the tensor space (default is false).
|
|
||||||
:type tensor: bool
|
|
||||||
|
|
||||||
"""
|
|
||||||
K1 = self
|
|
||||||
K2 = other
|
|
||||||
#K1 = self.copy()
|
|
||||||
#K2 = other.copy()
|
|
||||||
|
|
||||||
slices = []
|
|
||||||
for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices):
|
|
||||||
s1, s2 = [False] * K1.input_dim, [False] * K2.input_dim
|
|
||||||
s1[sl1], s2[sl2] = [True], [True]
|
|
||||||
slices += [s1 + s2]
|
|
||||||
|
|
||||||
newkernparts = [prod(k1, k2, tensor) for k1, k2 in itertools.product(K1._parameters_, K2._parameters_)]
|
|
||||||
|
|
||||||
if tensor:
|
|
||||||
newkern = kern(K1.input_dim + K2.input_dim, newkernparts, slices)
|
|
||||||
else:
|
|
||||||
newkern = kern(K1.input_dim, newkernparts, slices)
|
|
||||||
|
|
||||||
#newkern._follow_constrains(K1, K2)
|
|
||||||
return newkern
|
|
||||||
|
|
||||||
# def _follow_constrains(self, K1, K2):
|
|
||||||
#
|
|
||||||
# # Build the array that allows to go from the initial indices of the param to the new ones
|
|
||||||
# K1_param = []
|
|
||||||
# n = 0
|
|
||||||
# for k1 in K1.parts:
|
|
||||||
# K1_param += [range(n, n + k1.num_params)]
|
|
||||||
# n += k1.num_params
|
|
||||||
# n = 0
|
|
||||||
# K2_param = []
|
|
||||||
# for k2 in K2.parts:
|
|
||||||
# K2_param += [range(K1.num_params + n, K1.num_params + n + k2.num_params)]
|
|
||||||
# n += k2.num_params
|
|
||||||
# index_param = []
|
|
||||||
# for p1 in K1_param:
|
|
||||||
# for p2 in K2_param:
|
|
||||||
# index_param += p1 + p2
|
|
||||||
# index_param = np.array(index_param)
|
|
||||||
#
|
|
||||||
# # Get the ties and constrains of the kernels before the multiplication
|
|
||||||
# prev_ties = K1.tied_indices + [arr + K1.num_params for arr in K2.tied_indices]
|
|
||||||
#
|
|
||||||
# prev_constr_ind = [K1.constrained_indices] + [K1.num_params + i for i in K2.constrained_indices]
|
|
||||||
# prev_constr = K1.constraints + K2.constraints
|
|
||||||
#
|
|
||||||
# # prev_constr_fix = K1.fixed_indices + [arr + K1.num_params for arr in K2.fixed_indices]
|
|
||||||
# # prev_constr_fix_values = K1.fixed_values + K2.fixed_values
|
|
||||||
#
|
|
||||||
# # follow the previous ties
|
|
||||||
# for arr in prev_ties:
|
|
||||||
# for j in arr:
|
|
||||||
# index_param[np.where(index_param == j)[0]] = arr[0]
|
|
||||||
#
|
|
||||||
# # ties and constrains
|
|
||||||
# for i in range(K1.num_params + K2.num_params):
|
|
||||||
# index = np.where(index_param == i)[0]
|
|
||||||
# if index.size > 1:
|
|
||||||
# self.tie_params(index)
|
|
||||||
# for i, t in zip(prev_constr_ind, prev_constr):
|
|
||||||
# self.constrain(np.where(index_param == i)[0], t)
|
|
||||||
#
|
|
||||||
# def _get_params(self):
|
|
||||||
# return np.hstack(self._parameters_)
|
|
||||||
# return np.hstack([p._get_params() for p in self._parameters_])
|
|
||||||
|
|
||||||
# def _set_params(self, x):
|
|
||||||
# import ipdb;ipdb.set_trace()
|
|
||||||
# [p._set_params(x[s]) for p, s in zip(self._parameters_, self._param_slices_)]
|
|
||||||
|
|
||||||
# def _get_param_names(self):
|
|
||||||
# # this is a bit nasty: we want to distinguish between parts with the same name by appending a count
|
|
||||||
# part_names = np.array([k.name for k in self._parameters_], dtype=np.str)
|
|
||||||
# counts = [np.sum(part_names == ni) for i, ni in enumerate(part_names)]
|
|
||||||
# cum_counts = [np.sum(part_names[i:] == ni) for i, ni in enumerate(part_names)]
|
|
||||||
# names = [name + '_' + str(cum_count) if count > 1 else name for name, count, cum_count in zip(part_names, counts, cum_counts)]
|
|
||||||
#
|
|
||||||
# return sum([[name + '_' + n for n in k._get_param_names()] for name, k in zip(names, self._parameters_)], [])
|
|
||||||
|
|
||||||
def K(self, X, X2=None, which_parts='all'):
|
|
||||||
"""
|
|
||||||
Compute the kernel function.
|
|
||||||
|
|
||||||
:param X: the first set of inputs to the kernel
|
|
||||||
:param X2: (optional) the second set of arguments to the kernel. If X2
|
|
||||||
is None, this is passed throgh to the 'part' object, which
|
|
||||||
handles this as X2 == X.
|
|
||||||
:param which_parts: a list of booleans detailing whether to include
|
|
||||||
each of the part functions. By default, 'all'
|
|
||||||
indicates all parts
|
|
||||||
"""
|
|
||||||
if which_parts == 'all':
|
|
||||||
which_parts = [True] * self.size
|
|
||||||
assert X.shape[1] == self.input_dim
|
|
||||||
if X2 is None:
|
|
||||||
target = np.zeros((X.shape[0], X.shape[0]))
|
|
||||||
[p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used]
|
|
||||||
else:
|
|
||||||
target = np.zeros((X.shape[0], X2.shape[0]))
|
|
||||||
[p.K(X[:, i_s], X2[:, i_s], target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used]
|
|
||||||
return target
|
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X):
|
|
||||||
[p.update_gradients_full(dL_dK, X) for p in self._parameters_]
|
|
||||||
|
|
||||||
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
|
||||||
[p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X, Z) for p in self._parameters_]
|
|
||||||
|
|
||||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
|
||||||
[p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_]
|
|
||||||
|
|
||||||
def _param_grad_helper(self, dL_dK, X, X2=None):
|
|
||||||
"""
|
|
||||||
Compute the gradient of the covariance function with respect to the parameters.
|
|
||||||
|
|
||||||
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
|
|
||||||
:type dL_dK: Np.ndarray (num_samples x num_inducing)
|
|
||||||
:param X: Observed data inputs
|
|
||||||
:type X: np.ndarray (num_samples x input_dim)
|
|
||||||
:param X2: Observed data inputs (optional, defaults to X)
|
|
||||||
:type X2: np.ndarray (num_inducing x input_dim)
|
|
||||||
|
|
||||||
returns: dL_dtheta
|
|
||||||
"""
|
|
||||||
assert X.shape[1] == self.input_dim
|
|
||||||
target = np.zeros(self.size)
|
|
||||||
if X2 is None:
|
|
||||||
[p._param_grad_helper(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)]
|
|
||||||
else:
|
|
||||||
[p._param_grad_helper(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)]
|
|
||||||
|
|
||||||
return self._transform_gradients(target)
|
|
||||||
|
|
||||||
def gradients_X(self, dL_dK, X, X2=None):
|
|
||||||
"""Compute the gradient of the objective function with respect to X.
|
|
||||||
|
|
||||||
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
|
|
||||||
:type dL_dK: np.ndarray (num_samples x num_inducing)
|
|
||||||
:param X: Observed data inputs
|
|
||||||
:type X: np.ndarray (num_samples x input_dim)
|
|
||||||
:param X2: Observed data inputs (optional, defaults to X)
|
|
||||||
:type X2: np.ndarray (num_inducing x input_dim)"""
|
|
||||||
|
|
||||||
target = np.zeros_like(X)
|
|
||||||
if X2 is None:
|
|
||||||
[p.gradients_X(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
|
||||||
else:
|
|
||||||
[p.gradients_X(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
|
||||||
return target
|
|
||||||
|
|
||||||
def Kdiag(self, X, which_parts='all'):
|
|
||||||
"""Compute the diagonal of the covariance function for inputs X."""
|
|
||||||
if which_parts == 'all':
|
|
||||||
which_parts = [True] * self.size
|
|
||||||
assert X.shape[1] == self.input_dim
|
|
||||||
target = np.zeros(X.shape[0])
|
|
||||||
[p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self._parameters_, self.input_slices, which_parts) if part_on]
|
|
||||||
return target
|
|
||||||
|
|
||||||
def dKdiag_dtheta(self, dL_dKdiag, X):
|
|
||||||
"""Compute the gradient of the diagonal of the covariance function with respect to the parameters."""
|
|
||||||
assert X.shape[1] == self.input_dim
|
|
||||||
assert dL_dKdiag.size == X.shape[0]
|
|
||||||
target = np.zeros(self.size)
|
|
||||||
[p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)]
|
|
||||||
return self._transform_gradients(target)
|
|
||||||
|
|
||||||
def dKdiag_dX(self, dL_dKdiag, X):
|
|
||||||
assert X.shape[1] == self.input_dim
|
|
||||||
target = np.zeros_like(X)
|
|
||||||
[p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
|
||||||
return target
|
|
||||||
|
|
||||||
def psi0(self, Z, mu, S):
|
|
||||||
target = np.zeros(mu.shape[0])
|
|
||||||
[p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)]
|
|
||||||
return target
|
|
||||||
|
|
||||||
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S):
|
|
||||||
target = np.zeros(self.size)
|
|
||||||
[p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)]
|
|
||||||
return self._transform_gradients(target)
|
|
||||||
|
|
||||||
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S):
|
|
||||||
target_mu, target_S = np.zeros_like(mu), np.zeros_like(S)
|
|
||||||
[p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
|
||||||
return target_mu, target_S
|
|
||||||
|
|
||||||
def psi1(self, Z, mu, S):
|
|
||||||
target = np.zeros((mu.shape[0], Z.shape[0]))
|
|
||||||
[p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)]
|
|
||||||
return target
|
|
||||||
|
|
||||||
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S):
|
|
||||||
target = np.zeros((self.size))
|
|
||||||
[p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)]
|
|
||||||
return self._transform_gradients(target)
|
|
||||||
|
|
||||||
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S):
|
|
||||||
target = np.zeros_like(Z)
|
|
||||||
[p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
|
||||||
return target
|
|
||||||
|
|
||||||
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S):
|
|
||||||
"""return shapes are num_samples,num_inducing,input_dim"""
|
|
||||||
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
|
|
||||||
[p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
|
||||||
return target_mu, target_S
|
|
||||||
|
|
||||||
def psi2(self, Z, mu, S):
|
|
||||||
"""
|
|
||||||
Computer the psi2 statistics for the covariance function.
|
|
||||||
|
|
||||||
:param Z: np.ndarray of inducing inputs (num_inducing x input_dim)
|
|
||||||
:param mu, S: np.ndarrays of means and variances (each num_samples x input_dim)
|
|
||||||
:returns psi2: np.ndarray (num_samples,num_inducing,num_inducing)
|
|
||||||
|
|
||||||
"""
|
|
||||||
target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
|
|
||||||
[p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)]
|
|
||||||
|
|
||||||
# compute the "cross" terms
|
|
||||||
# TODO: input_slices needed
|
|
||||||
crossterms = 0
|
|
||||||
|
|
||||||
for [p1, i_s1], [p2, i_s2] in itertools.combinations(zip(self._parameters_, self.input_slices), 2):
|
|
||||||
if i_s1 == i_s2:
|
|
||||||
# TODO psi1 this must be faster/better/precached/more nice
|
|
||||||
tmp1 = np.zeros((mu.shape[0], Z.shape[0]))
|
|
||||||
p1.psi1(Z[:, i_s1], mu[:, i_s1], S[:, i_s1], tmp1)
|
|
||||||
tmp2 = np.zeros((mu.shape[0], Z.shape[0]))
|
|
||||||
p2.psi1(Z[:, i_s2], mu[:, i_s2], S[:, i_s2], tmp2)
|
|
||||||
|
|
||||||
prod = np.multiply(tmp1, tmp2)
|
|
||||||
crossterms += prod[:, :, None] + prod[:, None, :]
|
|
||||||
|
|
||||||
target += crossterms
|
|
||||||
return target
|
|
||||||
|
|
||||||
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S):
|
|
||||||
"""Gradient of the psi2 statistics with respect to the parameters."""
|
|
||||||
target = np.zeros(self.size)
|
|
||||||
[p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)]
|
|
||||||
|
|
||||||
# compute the "cross" terms
|
|
||||||
# TODO: better looping, input_slices
|
|
||||||
for i1, i2 in itertools.permutations(range(len(self._parameters_)), 2):
|
|
||||||
p1, p2 = self._parameters_[i1], self._parameters_[i2]
|
|
||||||
# ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]
|
|
||||||
ps1, ps2 = self._param_slices_[i1], self._param_slices_[i2]
|
|
||||||
|
|
||||||
tmp = np.zeros((mu.shape[0], Z.shape[0]))
|
|
||||||
p1.psi1(Z, mu, S, tmp)
|
|
||||||
p2.dpsi1_dtheta((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target[ps2])
|
|
||||||
|
|
||||||
return self._transform_gradients(target)
|
|
||||||
|
|
||||||
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S):
|
|
||||||
target = np.zeros_like(Z)
|
|
||||||
[p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
|
||||||
# target *= 2
|
|
||||||
|
|
||||||
# compute the "cross" terms
|
|
||||||
# TODO: we need input_slices here.
|
|
||||||
for p1, p2 in itertools.permutations(self._parameters_, 2):
|
|
||||||
# if p1.name == 'linear' and p2.name == 'linear':
|
|
||||||
# raise NotImplementedError("We don't handle linear/linear cross-terms")
|
|
||||||
tmp = np.zeros((mu.shape[0], Z.shape[0]))
|
|
||||||
p1.psi1(Z, mu, S, tmp)
|
|
||||||
p2.dpsi1_dZ((tmp[:, None, :] * dL_dpsi2).sum(1), Z, mu, S, target)
|
|
||||||
|
|
||||||
return target * 2
|
|
||||||
|
|
||||||
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S):
|
|
||||||
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
|
|
||||||
[p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
|
||||||
|
|
||||||
# compute the "cross" terms
|
|
||||||
# TODO: we need input_slices here.
|
|
||||||
for p1, p2 in itertools.permutations(self._parameters_, 2):
|
|
||||||
# if p1.name == 'linear' and p2.name == 'linear':
|
|
||||||
# raise NotImplementedError("We don't handle linear/linear cross-terms")
|
|
||||||
tmp = np.zeros((mu.shape[0], Z.shape[0]))
|
|
||||||
p1.psi1(Z, mu, S, tmp)
|
|
||||||
p2.dpsi1_dmuS((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target_mu, target_S)
|
|
||||||
|
|
||||||
return target_mu, target_S
|
|
||||||
|
|
||||||
def plot(self, *args, **kwargs):
|
|
||||||
"""
|
|
||||||
See GPy.plotting.matplot_dep.plot
|
|
||||||
"""
|
|
||||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
|
||||||
from ..plotting.matplot_dep import kernel_plots
|
|
||||||
kernel_plots.plot(self,*args)
|
|
||||||
|
|
||||||
from GPy.core.model import Model
|
|
||||||
|
|
||||||
class Kern_check_model(Model):
|
|
||||||
"""This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel."""
|
|
||||||
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
|
||||||
Model.__init__(self, 'kernel_test_model')
|
|
||||||
num_samples = 20
|
|
||||||
num_samples2 = 10
|
|
||||||
if kernel==None:
|
|
||||||
kernel = GPy.kern.rbf(1)
|
|
||||||
if X==None:
|
|
||||||
X = np.random.randn(num_samples, kernel.input_dim)
|
|
||||||
if dL_dK==None:
|
|
||||||
if X2==None:
|
|
||||||
dL_dK = np.ones((X.shape[0], X.shape[0]))
|
|
||||||
else:
|
|
||||||
dL_dK = np.ones((X.shape[0], X2.shape[0]))
|
|
||||||
|
|
||||||
self.kernel=kernel
|
|
||||||
self.add_parameter(kernel)
|
|
||||||
self.X = X
|
|
||||||
self.X2 = X2
|
|
||||||
self.dL_dK = dL_dK
|
|
||||||
|
|
||||||
def is_positive_definite(self):
|
|
||||||
v = np.linalg.eig(self.kernel.K(self.X))[0]
|
|
||||||
if any(v<-10*sys.float_info.epsilon):
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
return True
|
|
||||||
|
|
||||||
def log_likelihood(self):
|
|
||||||
return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum()
|
|
||||||
|
|
||||||
def _log_likelihood_gradients(self):
|
|
||||||
raise NotImplementedError, "This needs to be implemented to use the kern_check_model class."
|
|
||||||
|
|
||||||
class Kern_check_dK_dtheta(Kern_check_model):
|
|
||||||
"""This class allows gradient checks for the gradient of a kernel with respect to parameters. """
|
|
||||||
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
|
||||||
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
|
|
||||||
|
|
||||||
def _log_likelihood_gradients(self):
|
|
||||||
return self.kernel._param_grad_helper(self.dL_dK, self.X, self.X2)
|
|
||||||
|
|
||||||
class Kern_check_dKdiag_dtheta(Kern_check_model):
|
|
||||||
"""This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters."""
|
|
||||||
def __init__(self, kernel=None, dL_dK=None, X=None):
|
|
||||||
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
|
|
||||||
if dL_dK==None:
|
|
||||||
self.dL_dK = np.ones((self.X.shape[0]))
|
|
||||||
def parameters_changed(self):
|
|
||||||
self.kernel.update_gradients_full(self.dL_dK, self.X)
|
|
||||||
|
|
||||||
def log_likelihood(self):
|
|
||||||
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
|
|
||||||
|
|
||||||
def _log_likelihood_gradients(self):
|
|
||||||
return self.kernel.dKdiag_dtheta(self.dL_dK, self.X)
|
|
||||||
|
|
||||||
class Kern_check_dK_dX(Kern_check_model):
|
|
||||||
"""This class allows gradient checks for the gradient of a kernel with respect to X. """
|
|
||||||
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
|
||||||
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
|
|
||||||
self.remove_parameter(kernel)
|
|
||||||
self.X = Param('X', self.X)
|
|
||||||
self.add_parameter(self.X)
|
|
||||||
def _log_likelihood_gradients(self):
|
|
||||||
return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).flatten()
|
|
||||||
|
|
||||||
class Kern_check_dKdiag_dX(Kern_check_dK_dX):
|
|
||||||
"""This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """
|
|
||||||
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
|
||||||
Kern_check_dK_dX.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
|
|
||||||
if dL_dK==None:
|
|
||||||
self.dL_dK = np.ones((self.X.shape[0]))
|
|
||||||
|
|
||||||
def log_likelihood(self):
|
|
||||||
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
|
|
||||||
|
|
||||||
def _log_likelihood_gradients(self):
|
|
||||||
return self.kernel.dKdiag_dX(self.dL_dK, self.X).flatten()
|
|
||||||
|
|
||||||
def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
|
|
||||||
"""
|
|
||||||
This function runs on kernels to check the correctness of their
|
|
||||||
implementation. It checks that the covariance function is positive definite
|
|
||||||
for a randomly generated data set.
|
|
||||||
|
|
||||||
:param kern: the kernel to be tested.
|
|
||||||
:type kern: GPy.kern.Kernpart
|
|
||||||
:param X: X input values to test the covariance function.
|
|
||||||
:type X: ndarray
|
|
||||||
:param X2: X2 input values to test the covariance function.
|
|
||||||
:type X2: ndarray
|
|
||||||
|
|
||||||
"""
|
|
||||||
pass_checks = True
|
|
||||||
if X==None:
|
|
||||||
X = np.random.randn(10, kern.input_dim)
|
|
||||||
if output_ind is not None:
|
|
||||||
X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0])
|
|
||||||
if X2==None:
|
|
||||||
X2 = np.random.randn(20, kern.input_dim)
|
|
||||||
if output_ind is not None:
|
|
||||||
X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0])
|
|
||||||
|
|
||||||
if verbose:
|
|
||||||
print("Checking covariance function is positive definite.")
|
|
||||||
result = Kern_check_model(kern, X=X).is_positive_definite()
|
|
||||||
if result and verbose:
|
|
||||||
print("Check passed.")
|
|
||||||
if not result:
|
|
||||||
print("Positive definite check failed for " + kern.name + " covariance function.")
|
|
||||||
pass_checks = False
|
|
||||||
return False
|
|
||||||
|
|
||||||
if verbose:
|
|
||||||
print("Checking gradients of K(X, X) wrt theta.")
|
|
||||||
result = Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose)
|
|
||||||
if result and verbose:
|
|
||||||
print("Check passed.")
|
|
||||||
if not result:
|
|
||||||
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
|
|
||||||
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True)
|
|
||||||
pass_checks = False
|
|
||||||
return False
|
|
||||||
|
|
||||||
if verbose:
|
|
||||||
print("Checking gradients of K(X, X2) wrt theta.")
|
|
||||||
result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose)
|
|
||||||
if result and verbose:
|
|
||||||
print("Check passed.")
|
|
||||||
if not result:
|
|
||||||
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
|
|
||||||
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True)
|
|
||||||
pass_checks = False
|
|
||||||
return False
|
|
||||||
|
|
||||||
if verbose:
|
|
||||||
print("Checking gradients of Kdiag(X) wrt theta.")
|
|
||||||
result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose)
|
|
||||||
if result and verbose:
|
|
||||||
print("Check passed.")
|
|
||||||
if not result:
|
|
||||||
print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
|
|
||||||
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True)
|
|
||||||
pass_checks = False
|
|
||||||
return False
|
|
||||||
|
|
||||||
if verbose:
|
|
||||||
print("Checking gradients of K(X, X) wrt X.")
|
|
||||||
try:
|
|
||||||
result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose)
|
|
||||||
except NotImplementedError:
|
|
||||||
result=True
|
|
||||||
if verbose:
|
|
||||||
print("gradients_X not implemented for " + kern.name)
|
|
||||||
if result and verbose:
|
|
||||||
print("Check passed.")
|
|
||||||
if not result:
|
|
||||||
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
|
||||||
Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True)
|
|
||||||
pass_checks = False
|
|
||||||
return False
|
|
||||||
|
|
||||||
if verbose:
|
|
||||||
print("Checking gradients of K(X, X2) wrt X.")
|
|
||||||
try:
|
|
||||||
result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose)
|
|
||||||
except NotImplementedError:
|
|
||||||
result=True
|
|
||||||
if verbose:
|
|
||||||
print("gradients_X not implemented for " + kern.name)
|
|
||||||
if result and verbose:
|
|
||||||
print("Check passed.")
|
|
||||||
if not result:
|
|
||||||
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
|
||||||
Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True)
|
|
||||||
pass_checks = False
|
|
||||||
return False
|
|
||||||
|
|
||||||
if verbose:
|
|
||||||
print("Checking gradients of Kdiag(X) wrt X.")
|
|
||||||
try:
|
|
||||||
result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)
|
|
||||||
except NotImplementedError:
|
|
||||||
result=True
|
|
||||||
if verbose:
|
|
||||||
print("gradients_X not implemented for " + kern.name)
|
|
||||||
if result and verbose:
|
|
||||||
print("Check passed.")
|
|
||||||
if not result:
|
|
||||||
print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
|
||||||
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True)
|
|
||||||
pass_checks = False
|
|
||||||
return False
|
|
||||||
|
|
||||||
return pass_checks
|
|
||||||
|
|
@ -1,65 +0,0 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
from kernpart import Kernpart
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
def theta(x):
|
|
||||||
"""Heavisdie step function"""
|
|
||||||
return np.where(x>=0.,1.,0.)
|
|
||||||
|
|
||||||
class Brownian(Kernpart):
|
|
||||||
"""
|
|
||||||
Brownian Motion kernel.
|
|
||||||
|
|
||||||
:param input_dim: the number of input dimensions
|
|
||||||
:type input_dim: int
|
|
||||||
:param variance:
|
|
||||||
:type variance: float
|
|
||||||
"""
|
|
||||||
def __init__(self,input_dim,variance=1.):
|
|
||||||
self.input_dim = input_dim
|
|
||||||
assert self.input_dim==1, "Brownian motion in 1D only"
|
|
||||||
self.num_params = 1
|
|
||||||
self.name = 'Brownian'
|
|
||||||
self._set_params(np.array([variance]).flatten())
|
|
||||||
|
|
||||||
def _get_params(self):
|
|
||||||
return self.variance
|
|
||||||
|
|
||||||
def _set_params(self,x):
|
|
||||||
assert x.shape==(1,)
|
|
||||||
self.variance = x
|
|
||||||
|
|
||||||
def _get_param_names(self):
|
|
||||||
return ['variance']
|
|
||||||
|
|
||||||
def K(self,X,X2,target):
|
|
||||||
if X2 is None:
|
|
||||||
X2 = X
|
|
||||||
target += self.variance*np.fmin(X,X2.T)
|
|
||||||
|
|
||||||
def Kdiag(self,X,target):
|
|
||||||
target += self.variance*X.flatten()
|
|
||||||
|
|
||||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
|
||||||
if X2 is None:
|
|
||||||
X2 = X
|
|
||||||
target += np.sum(np.fmin(X,X2.T)*dL_dK)
|
|
||||||
|
|
||||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
|
||||||
target += np.dot(X.flatten(), dL_dKdiag)
|
|
||||||
|
|
||||||
def gradients_X(self,dL_dK,X,X2,target):
|
|
||||||
raise NotImplementedError, "TODO"
|
|
||||||
#target += self.variance
|
|
||||||
#target -= self.variance*theta(X-X2.T)
|
|
||||||
#if X.shape==X2.shape:
|
|
||||||
#if np.all(X==X2):
|
|
||||||
#np.add(target[:,:,0],self.variance*np.diag(X2.flatten()-X.flatten()),target[:,:,0])
|
|
||||||
|
|
||||||
|
|
||||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
|
||||||
target += self.variance*dL_dKdiag[:,None]
|
|
||||||
|
|
||||||
|
|
@ -1,139 +0,0 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
from kernpart import Kernpart
|
|
||||||
import numpy as np
|
|
||||||
from scipy import integrate
|
|
||||||
|
|
||||||
class Matern32(Kernpart):
|
|
||||||
"""
|
|
||||||
Matern 3/2 kernel:
|
|
||||||
|
|
||||||
.. math::
|
|
||||||
|
|
||||||
k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
|
|
||||||
|
|
||||||
:param input_dim: the number of input dimensions
|
|
||||||
:type input_dim: int
|
|
||||||
:param variance: the variance :math:`\sigma^2`
|
|
||||||
:type variance: float
|
|
||||||
:param lengthscale: the vector of lengthscale :math:`\ell_i`
|
|
||||||
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
|
|
||||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
|
|
||||||
:type ARD: Boolean
|
|
||||||
:rtype: kernel object
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False):
|
|
||||||
self.input_dim = input_dim
|
|
||||||
self.ARD = ARD
|
|
||||||
if ARD == False:
|
|
||||||
self.num_params = 2
|
|
||||||
self.name = 'Mat32'
|
|
||||||
if lengthscale is not None:
|
|
||||||
lengthscale = np.asarray(lengthscale)
|
|
||||||
assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
|
|
||||||
else:
|
|
||||||
lengthscale = np.ones(1)
|
|
||||||
else:
|
|
||||||
self.num_params = self.input_dim + 1
|
|
||||||
self.name = 'Mat32'
|
|
||||||
if lengthscale is not None:
|
|
||||||
lengthscale = np.asarray(lengthscale)
|
|
||||||
assert lengthscale.size == self.input_dim, "bad number of lengthscales"
|
|
||||||
else:
|
|
||||||
lengthscale = np.ones(self.input_dim)
|
|
||||||
self._set_params(np.hstack((variance, lengthscale.flatten())))
|
|
||||||
|
|
||||||
def _get_params(self):
|
|
||||||
"""return the value of the parameters."""
|
|
||||||
return np.hstack((self.variance, self.lengthscale))
|
|
||||||
|
|
||||||
def _set_params(self, x):
|
|
||||||
"""set the value of the parameters."""
|
|
||||||
assert x.size == self.num_params
|
|
||||||
self.variance = x[0]
|
|
||||||
self.lengthscale = x[1:]
|
|
||||||
|
|
||||||
def _get_param_names(self):
|
|
||||||
"""return parameter names."""
|
|
||||||
if self.num_params == 2:
|
|
||||||
return ['variance', 'lengthscale']
|
|
||||||
else:
|
|
||||||
return ['variance'] + ['lengthscale_%i' % i for i in range(self.lengthscale.size)]
|
|
||||||
|
|
||||||
def K(self, X, X2, target):
|
|
||||||
"""Compute the covariance matrix between X and X2."""
|
|
||||||
if X2 is None: X2 = X
|
|
||||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
|
|
||||||
np.add(self.variance * (1 + np.sqrt(3.) * dist) * np.exp(-np.sqrt(3.) * dist), target, target)
|
|
||||||
|
|
||||||
def Kdiag(self, X, target):
|
|
||||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
|
||||||
np.add(target, self.variance, target)
|
|
||||||
|
|
||||||
def _param_grad_helper(self, dL_dK, X, X2, target):
|
|
||||||
"""derivative of the covariance matrix with respect to the parameters."""
|
|
||||||
if X2 is None: X2 = X
|
|
||||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
|
|
||||||
dvar = (1 + np.sqrt(3.) * dist) * np.exp(-np.sqrt(3.) * dist)
|
|
||||||
invdist = 1. / np.where(dist != 0., dist, np.inf)
|
|
||||||
dist2M = np.square(X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 3
|
|
||||||
# dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
|
|
||||||
target[0] += np.sum(dvar * dL_dK)
|
|
||||||
if self.ARD == True:
|
|
||||||
dl = (self.variance * 3 * dist * np.exp(-np.sqrt(3.) * dist))[:, :, np.newaxis] * dist2M * invdist[:, :, np.newaxis]
|
|
||||||
# dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,None]
|
|
||||||
target[1:] += (dl * dL_dK[:, :, None]).sum(0).sum(0)
|
|
||||||
else:
|
|
||||||
dl = (self.variance * 3 * dist * np.exp(-np.sqrt(3.) * dist)) * dist2M.sum(-1) * invdist
|
|
||||||
# dl = self.variance*dvar*dist2M.sum(-1)*invdist
|
|
||||||
target[1] += np.sum(dl * dL_dK)
|
|
||||||
|
|
||||||
def dKdiag_dtheta(self, dL_dKdiag, X, target):
|
|
||||||
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
|
|
||||||
target[0] += np.sum(dL_dKdiag)
|
|
||||||
|
|
||||||
def gradients_X(self, dL_dK, X, X2, target):
|
|
||||||
"""derivative of the covariance matrix with respect to X."""
|
|
||||||
if X2 is None:
|
|
||||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X[None, :, :]) / self.lengthscale), -1))[:, :, None]
|
|
||||||
ddist_dX = 2*(X[:, None, :] - X[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
|
|
||||||
|
|
||||||
else:
|
|
||||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
|
|
||||||
ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
|
|
||||||
gradients_X = -np.transpose(3 * self.variance * dist * np.exp(-np.sqrt(3) * dist) * ddist_dX, (1, 0, 2))
|
|
||||||
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
|
|
||||||
|
|
||||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def Gram_matrix(self, F, F1, F2, lower, upper):
|
|
||||||
"""
|
|
||||||
Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1.
|
|
||||||
|
|
||||||
:param F: vector of functions
|
|
||||||
:type F: np.array
|
|
||||||
:param F1: vector of derivatives of F
|
|
||||||
:type F1: np.array
|
|
||||||
:param F2: vector of second derivatives of F
|
|
||||||
:type F2: np.array
|
|
||||||
:param lower,upper: boundaries of the input domain
|
|
||||||
:type lower,upper: floats
|
|
||||||
"""
|
|
||||||
assert self.input_dim == 1
|
|
||||||
def L(x, i):
|
|
||||||
return(3. / self.lengthscale ** 2 * F[i](x) + 2 * np.sqrt(3) / self.lengthscale * F1[i](x) + F2[i](x))
|
|
||||||
n = F.shape[0]
|
|
||||||
G = np.zeros((n, n))
|
|
||||||
for i in range(n):
|
|
||||||
for j in range(i, n):
|
|
||||||
G[i, j] = G[j, i] = integrate.quad(lambda x : L(x, i) * L(x, j), lower, upper)[0]
|
|
||||||
Flower = np.array([f(lower) for f in F])[:, None]
|
|
||||||
F1lower = np.array([f(lower) for f in F1])[:, None]
|
|
||||||
# print "OLD \n", np.dot(F1lower,F1lower.T), "\n \n"
|
|
||||||
# return(G)
|
|
||||||
return(self.lengthscale ** 3 / (12.*np.sqrt(3) * self.variance) * G + 1. / self.variance * np.dot(Flower, Flower.T) + self.lengthscale ** 2 / (3.*self.variance) * np.dot(F1lower, F1lower.T))
|
|
||||||
|
|
@ -1,145 +0,0 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
from kernpart import Kernpart
|
|
||||||
import numpy as np
|
|
||||||
import hashlib
|
|
||||||
from scipy import integrate
|
|
||||||
|
|
||||||
class Matern52(Kernpart):
|
|
||||||
"""
|
|
||||||
Matern 5/2 kernel:
|
|
||||||
|
|
||||||
.. math::
|
|
||||||
|
|
||||||
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
|
|
||||||
|
|
||||||
:param input_dim: the number of input dimensions
|
|
||||||
:type input_dim: int
|
|
||||||
:param variance: the variance :math:`\sigma^2`
|
|
||||||
:type variance: float
|
|
||||||
:param lengthscale: the vector of lengthscale :math:`\ell_i`
|
|
||||||
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
|
|
||||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
|
|
||||||
:type ARD: Boolean
|
|
||||||
:rtype: kernel object
|
|
||||||
|
|
||||||
"""
|
|
||||||
def __init__(self,input_dim,variance=1.,lengthscale=None,ARD=False):
|
|
||||||
self.input_dim = input_dim
|
|
||||||
self.ARD = ARD
|
|
||||||
if ARD == False:
|
|
||||||
self.num_params = 2
|
|
||||||
self.name = 'Mat52'
|
|
||||||
if lengthscale is not None:
|
|
||||||
lengthscale = np.asarray(lengthscale)
|
|
||||||
assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
|
|
||||||
else:
|
|
||||||
lengthscale = np.ones(1)
|
|
||||||
else:
|
|
||||||
self.num_params = self.input_dim + 1
|
|
||||||
self.name = 'Mat52'
|
|
||||||
if lengthscale is not None:
|
|
||||||
lengthscale = np.asarray(lengthscale)
|
|
||||||
assert lengthscale.size == self.input_dim, "bad number of lengthscales"
|
|
||||||
else:
|
|
||||||
lengthscale = np.ones(self.input_dim)
|
|
||||||
self._set_params(np.hstack((variance,lengthscale.flatten())))
|
|
||||||
|
|
||||||
def _get_params(self):
|
|
||||||
"""return the value of the parameters."""
|
|
||||||
return np.hstack((self.variance,self.lengthscale))
|
|
||||||
|
|
||||||
def _set_params(self,x):
|
|
||||||
"""set the value of the parameters."""
|
|
||||||
assert x.size == self.num_params
|
|
||||||
self.variance = x[0]
|
|
||||||
self.lengthscale = x[1:]
|
|
||||||
|
|
||||||
def _get_param_names(self):
|
|
||||||
"""return parameter names."""
|
|
||||||
if self.num_params == 2:
|
|
||||||
return ['variance','lengthscale']
|
|
||||||
else:
|
|
||||||
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)]
|
|
||||||
|
|
||||||
def K(self,X,X2,target):
|
|
||||||
"""Compute the covariance matrix between X and X2."""
|
|
||||||
if X2 is None: X2 = X
|
|
||||||
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
|
|
||||||
np.add(self.variance*(1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist), target,target)
|
|
||||||
|
|
||||||
def Kdiag(self,X,target):
|
|
||||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
|
||||||
np.add(target,self.variance,target)
|
|
||||||
|
|
||||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
|
||||||
"""derivative of the covariance matrix with respect to the parameters."""
|
|
||||||
if X2 is None: X2 = X
|
|
||||||
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
|
|
||||||
invdist = 1./np.where(dist!=0.,dist,np.inf)
|
|
||||||
dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3
|
|
||||||
dvar = (1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist)
|
|
||||||
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
|
|
||||||
target[0] += np.sum(dvar*dL_dK)
|
|
||||||
if self.ARD:
|
|
||||||
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
|
|
||||||
#dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
|
|
||||||
target[1:] += (dl*dL_dK[:,:,None]).sum(0).sum(0)
|
|
||||||
else:
|
|
||||||
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist)) * dist2M.sum(-1)*invdist
|
|
||||||
#dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist)) * dist2M.sum(-1)*invdist
|
|
||||||
target[1] += np.sum(dl*dL_dK)
|
|
||||||
|
|
||||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
|
||||||
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
|
|
||||||
target[0] += np.sum(dL_dKdiag)
|
|
||||||
|
|
||||||
def gradients_X(self,dL_dK,X,X2,target):
|
|
||||||
"""derivative of the covariance matrix with respect to X."""
|
|
||||||
if X2 is None:
|
|
||||||
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X[None,:,:])/self.lengthscale),-1))[:,:,None]
|
|
||||||
ddist_dX = 2*(X[:,None,:]-X[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
|
|
||||||
else:
|
|
||||||
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
|
|
||||||
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
|
|
||||||
gradients_X = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))
|
|
||||||
target += np.sum(gradients_X*dL_dK.T[:,:,None],0)
|
|
||||||
|
|
||||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def Gram_matrix(self,F,F1,F2,F3,lower,upper):
|
|
||||||
"""
|
|
||||||
Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1.
|
|
||||||
|
|
||||||
:param F: vector of functions
|
|
||||||
:type F: np.array
|
|
||||||
:param F1: vector of derivatives of F
|
|
||||||
:type F1: np.array
|
|
||||||
:param F2: vector of second derivatives of F
|
|
||||||
:type F2: np.array
|
|
||||||
:param F3: vector of third derivatives of F
|
|
||||||
:type F3: np.array
|
|
||||||
:param lower,upper: boundaries of the input domain
|
|
||||||
:type lower,upper: floats
|
|
||||||
"""
|
|
||||||
assert self.input_dim == 1
|
|
||||||
def L(x,i):
|
|
||||||
return(5*np.sqrt(5)/self.lengthscale**3*F[i](x) + 15./self.lengthscale**2*F1[i](x)+ 3*np.sqrt(5)/self.lengthscale*F2[i](x) + F3[i](x))
|
|
||||||
n = F.shape[0]
|
|
||||||
G = np.zeros((n,n))
|
|
||||||
for i in range(n):
|
|
||||||
for j in range(i,n):
|
|
||||||
G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0]
|
|
||||||
G_coef = 3.*self.lengthscale**5/(400*np.sqrt(5))
|
|
||||||
Flower = np.array([f(lower) for f in F])[:,None]
|
|
||||||
F1lower = np.array([f(lower) for f in F1])[:,None]
|
|
||||||
F2lower = np.array([f(lower) for f in F2])[:,None]
|
|
||||||
orig = 9./8*np.dot(Flower,Flower.T) + 9.*self.lengthscale**4/200*np.dot(F2lower,F2lower.T)
|
|
||||||
orig2 = 3./5*self.lengthscale**2 * ( np.dot(F1lower,F1lower.T) + 1./8*np.dot(Flower,F2lower.T) + 1./8*np.dot(F2lower,Flower.T))
|
|
||||||
return(1./self.variance* (G_coef*G + orig + orig2))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,29 +0,0 @@
|
||||||
import bias
|
|
||||||
import Brownian
|
|
||||||
import coregionalize
|
|
||||||
import exponential
|
|
||||||
import eq_ode1
|
|
||||||
import finite_dimensional
|
|
||||||
import fixed
|
|
||||||
import gibbs
|
|
||||||
import hetero
|
|
||||||
import hierarchical
|
|
||||||
import independent_outputs
|
|
||||||
import linear
|
|
||||||
import Matern32
|
|
||||||
import Matern52
|
|
||||||
import mlp
|
|
||||||
import ODE_1
|
|
||||||
import periodic_exponential
|
|
||||||
import periodic_Matern32
|
|
||||||
import periodic_Matern52
|
|
||||||
import poly
|
|
||||||
import prod_orthogonal
|
|
||||||
import prod
|
|
||||||
import rational_quadratic
|
|
||||||
import rbfcos
|
|
||||||
import rbf
|
|
||||||
import rbf_inv
|
|
||||||
import spline
|
|
||||||
import symmetric
|
|
||||||
import white
|
|
||||||
|
|
@ -1,81 +0,0 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
from kernpart import Kernpart
|
|
||||||
from ...core.parameterization import Param
|
|
||||||
|
|
||||||
class Bias(Kernpart):
|
|
||||||
def __init__(self,input_dim,variance=1.,name=None):
|
|
||||||
"""
|
|
||||||
:param input_dim: the number of input dimensions
|
|
||||||
:type input_dim: int
|
|
||||||
:param variance: the variance of the kernel
|
|
||||||
:type variance: float
|
|
||||||
"""
|
|
||||||
super(Bias, self).__init__(input_dim, name)
|
|
||||||
from ...core.parameterization.transformations import Logexp
|
|
||||||
self.variance = Param("variance", variance, Logexp())
|
|
||||||
self.add_parameter(self.variance)
|
|
||||||
|
|
||||||
def K(self,X,X2,target):
|
|
||||||
target += self.variance
|
|
||||||
|
|
||||||
def Kdiag(self,X,target):
|
|
||||||
target += self.variance
|
|
||||||
|
|
||||||
#def dK_dtheta(self,dL_dKdiag,X,X2,target):
|
|
||||||
#target += dL_dKdiag.sum()
|
|
||||||
def update_gradients_full(self, dL_dK, X):
|
|
||||||
self.variance.gradient = dL_dK.sum()
|
|
||||||
|
|
||||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
|
||||||
target += dL_dKdiag.sum()
|
|
||||||
|
|
||||||
def gradients_X(self, dL_dK,X, X2, target):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
#---------------------------------------#
|
|
||||||
# PSI statistics #
|
|
||||||
#---------------------------------------#
|
|
||||||
|
|
||||||
def psi0(self, Z, mu, S, target):
|
|
||||||
target += self.variance
|
|
||||||
|
|
||||||
def psi1(self, Z, mu, S, target):
|
|
||||||
self._psi1 = self.variance
|
|
||||||
target += self._psi1
|
|
||||||
|
|
||||||
def psi2(self, Z, mu, S, target):
|
|
||||||
target += self.variance**2
|
|
||||||
|
|
||||||
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, target):
|
|
||||||
target += dL_dpsi0.sum()
|
|
||||||
|
|
||||||
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
|
|
||||||
target += dL_dpsi1.sum()
|
|
||||||
|
|
||||||
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
|
|
||||||
target += 2.*self.variance*dL_dpsi2.sum()
|
|
||||||
|
|
||||||
def dpsi0_dZ(self, dL_dpsi0, Z, mu, S, target):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
|
|
||||||
pass
|
|
||||||
|
|
@ -1,129 +0,0 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
from kernpart import Kernpart
|
|
||||||
import numpy as np
|
|
||||||
from scipy import integrate
|
|
||||||
|
|
||||||
class Exponential(Kernpart):
|
|
||||||
"""
|
|
||||||
Exponential kernel (aka Ornstein-Uhlenbeck or Matern 1/2)
|
|
||||||
|
|
||||||
.. math::
|
|
||||||
|
|
||||||
k(r) = \sigma^2 \exp(- r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
|
|
||||||
|
|
||||||
:param input_dim: the number of input dimensions
|
|
||||||
:type input_dim: int
|
|
||||||
:param variance: the variance :math:`\sigma^2`
|
|
||||||
:type variance: float
|
|
||||||
:param lengthscale: the vector of lengthscale :math:`\ell_i`
|
|
||||||
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
|
|
||||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
|
|
||||||
:type ARD: Boolean
|
|
||||||
:param name: the name of the kernel
|
|
||||||
:rtype: kernel object
|
|
||||||
|
|
||||||
"""
|
|
||||||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='exp'):
|
|
||||||
self.input_dim = input_dim
|
|
||||||
self.ARD = ARD
|
|
||||||
self.variance = variance
|
|
||||||
self.name = name
|
|
||||||
if ARD == False:
|
|
||||||
self.num_params = 2
|
|
||||||
if lengthscale is not None:
|
|
||||||
lengthscale = np.asarray(lengthscale)
|
|
||||||
assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
|
|
||||||
else:
|
|
||||||
lengthscale = np.ones(1)
|
|
||||||
else:
|
|
||||||
self.num_params = self.input_dim + 1
|
|
||||||
if lengthscale is not None:
|
|
||||||
lengthscale = np.asarray(lengthscale)
|
|
||||||
assert lengthscale.size == self.input_dim, "bad number of lengthscales"
|
|
||||||
else:
|
|
||||||
lengthscale = np.ones(self.input_dim)
|
|
||||||
#self._set_params(np.hstack((variance, lengthscale.flatten())))
|
|
||||||
self.set_as_parameter('variance', 'lengthscale')
|
|
||||||
|
|
||||||
# def _get_params(self):
|
|
||||||
# """return the value of the parameters."""
|
|
||||||
# return np.hstack((self.variance, self.lengthscale))
|
|
||||||
#
|
|
||||||
# def _set_params(self, x):
|
|
||||||
# """set the value of the parameters."""
|
|
||||||
# assert x.size == self.num_params
|
|
||||||
# self.variance = x[0]
|
|
||||||
# self.lengthscale = x[1:]
|
|
||||||
#
|
|
||||||
# def _get_param_names(self):
|
|
||||||
# """return parameter names."""
|
|
||||||
# if self.num_params == 2:
|
|
||||||
# return ['variance', 'lengthscale']
|
|
||||||
# else:
|
|
||||||
# return ['variance'] + ['lengthscale_%i' % i for i in range(self.lengthscale.size)]
|
|
||||||
|
|
||||||
def K(self, X, X2, target):
|
|
||||||
"""Compute the covariance matrix between X and X2."""
|
|
||||||
if X2 is None: X2 = X
|
|
||||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
|
|
||||||
np.add(self.variance * np.exp(-dist), target, target)
|
|
||||||
|
|
||||||
def Kdiag(self, X, target):
|
|
||||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
|
||||||
np.add(target, self.variance, target)
|
|
||||||
|
|
||||||
def _param_grad_helper(self, dL_dK, X, X2, target):
|
|
||||||
"""derivative of the covariance matrix with respect to the parameters."""
|
|
||||||
if X2 is None: X2 = X
|
|
||||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
|
|
||||||
invdist = 1. / np.where(dist != 0., dist, np.inf)
|
|
||||||
dist2M = np.square(X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 3
|
|
||||||
dvar = np.exp(-dist)
|
|
||||||
target[0] += np.sum(dvar * dL_dK)
|
|
||||||
if self.ARD == True:
|
|
||||||
dl = self.variance * dvar[:, :, None] * dist2M * invdist[:, :, None]
|
|
||||||
target[1:] += (dl * dL_dK[:, :, None]).sum(0).sum(0)
|
|
||||||
else:
|
|
||||||
dl = self.variance * dvar * dist2M.sum(-1) * invdist
|
|
||||||
target[1] += np.sum(dl * dL_dK)
|
|
||||||
|
|
||||||
def dKdiag_dtheta(self, dL_dKdiag, X, target):
|
|
||||||
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
|
|
||||||
# NB: derivative of diagonal elements wrt lengthscale is 0
|
|
||||||
target[0] += np.sum(dL_dKdiag)
|
|
||||||
|
|
||||||
def gradients_X(self, dL_dK, X, X2, target):
|
|
||||||
"""derivative of the covariance matrix with respect to X."""
|
|
||||||
if X2 is None: X2 = X
|
|
||||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
|
|
||||||
ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
|
|
||||||
gradients_X = -np.transpose(self.variance * np.exp(-dist) * ddist_dX, (1, 0, 2))
|
|
||||||
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
|
|
||||||
|
|
||||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def Gram_matrix(self, F, F1, lower, upper):
|
|
||||||
"""
|
|
||||||
Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1.
|
|
||||||
|
|
||||||
:param F: vector of functions
|
|
||||||
:type F: np.array
|
|
||||||
:param F1: vector of derivatives of F
|
|
||||||
:type F1: np.array
|
|
||||||
:param lower,upper: boundaries of the input domain
|
|
||||||
:type lower,upper: floats
|
|
||||||
"""
|
|
||||||
assert self.input_dim == 1
|
|
||||||
def L(x, i):
|
|
||||||
return(1. / self.lengthscale * F[i](x) + F1[i](x))
|
|
||||||
n = F.shape[0]
|
|
||||||
G = np.zeros((n, n))
|
|
||||||
for i in range(n):
|
|
||||||
for j in range(i, n):
|
|
||||||
G[i, j] = G[j, i] = integrate.quad(lambda x : L(x, i) * L(x, j), lower, upper)[0]
|
|
||||||
Flower = np.array([f(lower) for f in F])[:, None]
|
|
||||||
return(self.lengthscale / 2. / self.variance * G + 1. / self.variance * np.dot(Flower, Flower.T))
|
|
||||||
|
|
@ -1,176 +0,0 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
#from ...core.parameterized.Parameterized import set_as_parameter
|
|
||||||
from ...core.parameterization import Parameterized
|
|
||||||
|
|
||||||
class Kernpart(Parameterized):
|
|
||||||
def __init__(self,input_dim,name):
|
|
||||||
"""
|
|
||||||
The base class for a kernpart: a positive definite function
|
|
||||||
which forms part of a covariance function (kernel).
|
|
||||||
|
|
||||||
:param input_dim: the number of input dimensions to the function
|
|
||||||
:type input_dim: int
|
|
||||||
|
|
||||||
Do not instantiate.
|
|
||||||
"""
|
|
||||||
super(Kernpart, self).__init__(name)
|
|
||||||
# the input dimensionality for the covariance
|
|
||||||
self.input_dim = input_dim
|
|
||||||
# the number of optimisable parameters
|
|
||||||
# the name of the covariance function.
|
|
||||||
# link to parameterized objects
|
|
||||||
#self._X = None
|
|
||||||
|
|
||||||
def connect_input(self, X):
|
|
||||||
X.add_observer(self, self.on_input_change)
|
|
||||||
#self._X = X
|
|
||||||
|
|
||||||
def on_input_change(self, X):
|
|
||||||
"""
|
|
||||||
During optimization this function will be called when
|
|
||||||
the inputs X changed. Use this to update caches dependent
|
|
||||||
on the inputs X.
|
|
||||||
"""
|
|
||||||
# overwrite this to update kernel when inputs X change
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
# def set_as_parameter_named(self, name, gradient, index=None, *args, **kwargs):
|
|
||||||
# """
|
|
||||||
# :param names: name of parameter to set as parameter
|
|
||||||
# :param gradient: gradient method to get the gradient of this parameter
|
|
||||||
# :param index: index of where to place parameter in printing
|
|
||||||
# :param args, kwargs: additional arguments to gradient
|
|
||||||
#
|
|
||||||
# Convenience method to connect Kernpart parameters:
|
|
||||||
# parameter with name (attribute of this Kernpart) will be set as parameter with following name:
|
|
||||||
#
|
|
||||||
# kernel_name + _ + parameter_name
|
|
||||||
#
|
|
||||||
# To add the kernels name to the parameter name use this method to
|
|
||||||
# add parameters.
|
|
||||||
# """
|
|
||||||
# self.set_as_parameter(name, getattr(self, name), gradient, index, *args, **kwargs)
|
|
||||||
# def set_as_parameter(self, name, array, gradient, index=None, *args, **kwargs):
|
|
||||||
# """
|
|
||||||
# See :py:func:`GPy.core.parameterized.Parameterized.set_as_parameter`
|
|
||||||
#
|
|
||||||
# Note: this method adds the kernels name in front of the parameter.
|
|
||||||
# """
|
|
||||||
# p = Param(self.name+"_"+name, array, gradient, *args, **kwargs)
|
|
||||||
# if index is None:
|
|
||||||
# self._parameters_.append(p)
|
|
||||||
# else:
|
|
||||||
# self._parameters_.insert(index, p)
|
|
||||||
# self.__dict__[name] = p
|
|
||||||
#set_as_parameter.__doc__ += set_as_parameter.__doc__ # @UndefinedVariable
|
|
||||||
# def _get_params(self):
|
|
||||||
# raise NotImplementedError
|
|
||||||
# def _set_params(self,x):
|
|
||||||
# raise NotImplementedError
|
|
||||||
# def _get_param_names(self):
|
|
||||||
# raise NotImplementedError
|
|
||||||
def K(self,X,X2,target):
|
|
||||||
raise NotImplementedError
|
|
||||||
def Kdiag(self,X,target):
|
|
||||||
raise NotImplementedError
|
|
||||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
|
||||||
raise NotImplementedError
|
|
||||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
|
||||||
# In the base case compute this by calling _param_grad_helper. Need to
|
|
||||||
# override for stationary covariances (for example) to save
|
|
||||||
# time.
|
|
||||||
for i in range(X.shape[0]):
|
|
||||||
self._param_grad_helper(dL_dKdiag[i], X[i, :][None, :], X2=None, target=target)
|
|
||||||
def psi0(self,Z,mu,S,target):
|
|
||||||
raise NotImplementedError
|
|
||||||
def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
|
|
||||||
raise NotImplementedError
|
|
||||||
def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S):
|
|
||||||
raise NotImplementedError
|
|
||||||
def psi1(self,Z,mu,S,target):
|
|
||||||
raise NotImplementedError
|
|
||||||
def dpsi1_dtheta(self,Z,mu,S,target):
|
|
||||||
raise NotImplementedError
|
|
||||||
def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target):
|
|
||||||
raise NotImplementedError
|
|
||||||
def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S):
|
|
||||||
raise NotImplementedError
|
|
||||||
def psi2(self,Z,mu,S,target):
|
|
||||||
raise NotImplementedError
|
|
||||||
def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target):
|
|
||||||
raise NotImplementedError
|
|
||||||
def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
|
|
||||||
raise NotImplementedError
|
|
||||||
def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
|
|
||||||
raise NotImplementedError
|
|
||||||
def gradients_X(self, dL_dK, X, X2, target):
|
|
||||||
raise NotImplementedError
|
|
||||||
def dKdiag_dX(self, dL_dK, X, target):
|
|
||||||
raise NotImplementedError
|
|
||||||
def update_gradients_full(self, dL_dK, X):
|
|
||||||
"""Set the gradients of all parameters when doing full (N) inference."""
|
|
||||||
raise NotImplementedError
|
|
||||||
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
|
||||||
"""Set the gradients of all parameters when doing sparse (M) inference."""
|
|
||||||
raise NotImplementedError
|
|
||||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
|
||||||
"""Set the gradients of all parameters when doing variational (M) inference with uncertain inputs."""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
class Kernpart_stationary(Kernpart):
|
|
||||||
def __init__(self, input_dim, lengthscale=None, ARD=False):
|
|
||||||
self.input_dim = input_dim
|
|
||||||
self.ARD = ARD
|
|
||||||
if not ARD:
|
|
||||||
self.num_params = 2
|
|
||||||
if lengthscale is not None:
|
|
||||||
self.lengthscale = np.asarray(lengthscale)
|
|
||||||
assert self.lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
|
|
||||||
else:
|
|
||||||
self.lengthscale = np.ones(1)
|
|
||||||
else:
|
|
||||||
self.num_params = self.input_dim + 1
|
|
||||||
if lengthscale is not None:
|
|
||||||
self.lengthscale = np.asarray(lengthscale)
|
|
||||||
assert self.lengthscale.size == self.input_dim, "bad number of lengthscales"
|
|
||||||
else:
|
|
||||||
self.lengthscale = np.ones(self.input_dim)
|
|
||||||
|
|
||||||
# initialize cache
|
|
||||||
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
|
|
||||||
self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1))
|
|
||||||
|
|
||||||
def _set_params(self, x):
|
|
||||||
self.lengthscale = x
|
|
||||||
self.lengthscale2 = np.square(self.lengthscale)
|
|
||||||
# reset cached results
|
|
||||||
self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1))
|
|
||||||
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
|
|
||||||
|
|
||||||
|
|
||||||
def dKdiag_dtheta(self, dL_dKdiag, X, target):
|
|
||||||
# For stationary covariances, derivative of diagonal elements
|
|
||||||
# wrt lengthscale is 0.
|
|
||||||
target[0] += np.sum(dL_dKdiag)
|
|
||||||
|
|
||||||
def dKdiag_dX(self, dL_dK, X, target):
|
|
||||||
pass # true for all stationary kernels
|
|
||||||
|
|
||||||
|
|
||||||
class Kernpart_inner(Kernpart):
|
|
||||||
def __init__(self,input_dim):
|
|
||||||
"""
|
|
||||||
The base class for a kernpart_inner: a positive definite function which forms part of a kernel that is based on the inner product between inputs.
|
|
||||||
|
|
||||||
:param input_dim: the number of input dimensions to the function
|
|
||||||
:type input_dim: int
|
|
||||||
|
|
||||||
Do not instantiate.
|
|
||||||
"""
|
|
||||||
Kernpart.__init__(self, input_dim)
|
|
||||||
|
|
||||||
# initialize cache
|
|
||||||
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
|
|
||||||
self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1))
|
|
||||||
|
|
@ -1,306 +0,0 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
from scipy import weave
|
|
||||||
from kernpart import Kernpart
|
|
||||||
from ...util.linalg import tdot
|
|
||||||
from ...util.misc import fast_array_equal, param_to_array
|
|
||||||
from ...core.parameterization import Param
|
|
||||||
from ...core.parameterization.transformations import Logexp
|
|
||||||
|
|
||||||
class Linear(Kernpart):
|
|
||||||
"""
|
|
||||||
Linear kernel
|
|
||||||
|
|
||||||
.. math::
|
|
||||||
|
|
||||||
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i x_iy_i
|
|
||||||
|
|
||||||
:param input_dim: the number of input dimensions
|
|
||||||
:type input_dim: int
|
|
||||||
:param variances: the vector of variances :math:`\sigma^2_i`
|
|
||||||
:type variances: array or list of the appropriate size (or float if there is only one variance parameter)
|
|
||||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel has only one variance parameter \sigma^2, otherwise there is one variance parameter per dimension.
|
|
||||||
:type ARD: Boolean
|
|
||||||
:rtype: kernel object
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, input_dim, variances=None, ARD=False, name='linear'):
|
|
||||||
super(Linear, self).__init__(input_dim, name)
|
|
||||||
self.ARD = ARD
|
|
||||||
if ARD == False:
|
|
||||||
if variances is not None:
|
|
||||||
variances = np.asarray(variances)
|
|
||||||
assert variances.size == 1, "Only one variance needed for non-ARD kernel"
|
|
||||||
else:
|
|
||||||
variances = np.ones(1)
|
|
||||||
self._Xcache, self._X2cache = np.empty(shape=(2,))
|
|
||||||
else:
|
|
||||||
if variances is not None:
|
|
||||||
variances = np.asarray(variances)
|
|
||||||
assert variances.size == self.input_dim, "bad number of variances, need one ARD variance per input_dim"
|
|
||||||
else:
|
|
||||||
variances = np.ones(self.input_dim)
|
|
||||||
|
|
||||||
self.variances = Param('variances', variances, Logexp())
|
|
||||||
self.variances.gradient = np.zeros(self.variances.shape)
|
|
||||||
self.add_parameter(self.variances)
|
|
||||||
self.variances.add_observer(self, self.update_variance)
|
|
||||||
|
|
||||||
# initialize cache
|
|
||||||
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
|
|
||||||
self._X, self._X2 = np.empty(shape=(2, 1))
|
|
||||||
|
|
||||||
def update_variance(self, v):
|
|
||||||
self.variances2 = np.square(self.variances)
|
|
||||||
|
|
||||||
def on_input_change(self, X):
|
|
||||||
self._K_computations(X, None)
|
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X):
|
|
||||||
self.variances.gradient[:] = 0
|
|
||||||
self._param_grad_helper(dL_dK, X, None, self.variances.gradient)
|
|
||||||
|
|
||||||
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
|
||||||
tmp = dL_dKdiag[:, None] * X ** 2
|
|
||||||
if self.ARD:
|
|
||||||
self.variances.gradient = tmp.sum(0)
|
|
||||||
else:
|
|
||||||
self.variances.gradient = tmp.sum()
|
|
||||||
self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient)
|
|
||||||
self._param_grad_helper(dL_dKnm, X, Z, self.variances.gradient)
|
|
||||||
|
|
||||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
|
||||||
self._psi_computations(Z, mu, S)
|
|
||||||
# psi0:
|
|
||||||
tmp = dL_dpsi0[:, None] * self.mu2_S
|
|
||||||
if self.ARD: self.variances.gradient[:] = tmp.sum(0)
|
|
||||||
else: self.variances.gradient[:] = tmp.sum()
|
|
||||||
#psi1
|
|
||||||
self._param_grad_helper(dL_dpsi1, mu, Z, self.variances.gradient)
|
|
||||||
#psi2
|
|
||||||
tmp = dL_dpsi2[:, :, :, None] * (self.ZAinner[:, :, None, :] * (2 * Z)[None, None, :, :])
|
|
||||||
if self.ARD: self.variances.gradient += tmp.sum(0).sum(0).sum(0)
|
|
||||||
else: self.variances.gradient += tmp.sum()
|
|
||||||
#from Kmm
|
|
||||||
self._K_computations(Z, None)
|
|
||||||
self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient)
|
|
||||||
|
|
||||||
def K(self, X, X2, target):
|
|
||||||
if self.ARD:
|
|
||||||
XX = X * np.sqrt(self.variances)
|
|
||||||
if X2 is None:
|
|
||||||
target += tdot(XX)
|
|
||||||
else:
|
|
||||||
XX2 = X2 * np.sqrt(self.variances)
|
|
||||||
target += np.dot(XX, XX2.T)
|
|
||||||
else:
|
|
||||||
if X is not self._X or X2 is not None:
|
|
||||||
self._K_computations(X, X2)
|
|
||||||
target += self.variances * self._dot_product
|
|
||||||
|
|
||||||
def Kdiag(self, X, target):
|
|
||||||
np.add(target, np.sum(self.variances * np.square(X), -1), target)
|
|
||||||
|
|
||||||
def _param_grad_helper(self, dL_dK, X, X2, target):
|
|
||||||
if self.ARD:
|
|
||||||
if X2 is None:
|
|
||||||
[np.add(target[i:i + 1], np.sum(dL_dK * tdot(X[:, i:i + 1])), target[i:i + 1]) for i in range(self.input_dim)]
|
|
||||||
else:
|
|
||||||
product = X[:, None, :] * X2[None, :, :]
|
|
||||||
target += (dL_dK[:, :, None] * product).sum(0).sum(0)
|
|
||||||
else:
|
|
||||||
if X is not self._X or X2 is not None:
|
|
||||||
self._K_computations(X, X2)
|
|
||||||
target += np.sum(self._dot_product * dL_dK)
|
|
||||||
|
|
||||||
def gradients_X(self, dL_dK, X, X2, target):
|
|
||||||
if X2 is None:
|
|
||||||
target += 2*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
|
|
||||||
else:
|
|
||||||
target += (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
|
|
||||||
|
|
||||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
|
||||||
target += 2.*self.variances*dL_dKdiag[:,None]*X
|
|
||||||
|
|
||||||
#---------------------------------------#
|
|
||||||
# PSI statistics #
|
|
||||||
#---------------------------------------#
|
|
||||||
|
|
||||||
def psi0(self, Z, mu, S, target):
|
|
||||||
self._psi_computations(Z, mu, S)
|
|
||||||
target += np.sum(self.variances * self.mu2_S, 1)
|
|
||||||
|
|
||||||
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
|
|
||||||
target_mu += dL_dpsi0[:, None] * (2.0 * mu * self.variances)
|
|
||||||
target_S += dL_dpsi0[:, None] * self.variances
|
|
||||||
|
|
||||||
def psi1(self, Z, mu, S, target):
|
|
||||||
"""the variance, it does nothing"""
|
|
||||||
self._psi1 = self.K(mu, Z, target)
|
|
||||||
|
|
||||||
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
|
|
||||||
"""Do nothing for S, it does not affect psi1"""
|
|
||||||
self._psi_computations(Z, mu, S)
|
|
||||||
target_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1)
|
|
||||||
|
|
||||||
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
|
|
||||||
self.gradients_X(dL_dpsi1.T, Z, mu, target)
|
|
||||||
|
|
||||||
def psi2(self, Z, mu, S, target):
|
|
||||||
self._psi_computations(Z, mu, S)
|
|
||||||
target += self._psi2
|
|
||||||
|
|
||||||
def psi2_new(self,Z,mu,S,target):
|
|
||||||
tmp = np.zeros((mu.shape[0], Z.shape[0]))
|
|
||||||
self.K(mu,Z,tmp)
|
|
||||||
target += tmp[:,:,None]*tmp[:,None,:] + np.sum(S[:,None,None,:]*self.variances**2*Z[None,:,None,:]*Z[None,None,:,:],-1)
|
|
||||||
|
|
||||||
def dpsi2_dtheta_new(self, dL_dpsi2, Z, mu, S, target):
|
|
||||||
tmp = np.zeros((mu.shape[0], Z.shape[0]))
|
|
||||||
self.K(mu,Z,tmp)
|
|
||||||
self._param_grad_helper(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target)
|
|
||||||
result= 2.*(dL_dpsi2[:,:,:,None]*S[:,None,None,:]*self.variances*Z[None,:,None,:]*Z[None,None,:,:]).sum(0).sum(0).sum(0)
|
|
||||||
if self.ARD:
|
|
||||||
target += result.sum(0).sum(0).sum(0)
|
|
||||||
else:
|
|
||||||
target += result.sum()
|
|
||||||
|
|
||||||
def dpsi2_dmuS_new(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
|
|
||||||
tmp = np.zeros((mu.shape[0], Z.shape[0]))
|
|
||||||
self.K(mu,Z,tmp)
|
|
||||||
self.gradients_X(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target_mu)
|
|
||||||
|
|
||||||
Zs = Z*self.variances
|
|
||||||
Zs_sq = Zs[:,None,:]*Zs[None,:,:]
|
|
||||||
target_S += (dL_dpsi2[:,:,:,None]*Zs_sq[None,:,:,:]).sum(1).sum(1)
|
|
||||||
|
|
||||||
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
|
|
||||||
"""Think N,num_inducing,num_inducing,input_dim """
|
|
||||||
self._psi_computations(Z, mu, S)
|
|
||||||
AZZA = self.ZA.T[:, None, :, None] * self.ZA[None, :, None, :]
|
|
||||||
AZZA = AZZA + AZZA.swapaxes(1, 2)
|
|
||||||
AZZA_2 = AZZA/2.
|
|
||||||
#muAZZA = np.tensordot(mu,AZZA,(-1,0))
|
|
||||||
#target_mu_dummy, target_S_dummy = np.zeros_like(target_mu), np.zeros_like(target_S)
|
|
||||||
#target_mu_dummy += (dL_dpsi2[:, :, :, None] * muAZZA).sum(1).sum(1)
|
|
||||||
#target_S_dummy += (dL_dpsi2[:, :, :, None] * self.ZA[None, :, None, :] * self.ZA[None, None, :, :]).sum(1).sum(1)
|
|
||||||
|
|
||||||
#Using weave, we can exploiut the symmetry of this problem:
|
|
||||||
code = """
|
|
||||||
int n, m, mm,q,qq;
|
|
||||||
double factor,tmp;
|
|
||||||
#pragma omp parallel for private(m,mm,q,qq,factor,tmp)
|
|
||||||
for(n=0;n<N;n++){
|
|
||||||
for(m=0;m<num_inducing;m++){
|
|
||||||
for(mm=0;mm<=m;mm++){
|
|
||||||
//add in a factor of 2 for the off-diagonal terms (and then count them only once)
|
|
||||||
if(m==mm)
|
|
||||||
factor = dL_dpsi2(n,m,mm);
|
|
||||||
else
|
|
||||||
factor = 2.0*dL_dpsi2(n,m,mm);
|
|
||||||
|
|
||||||
for(q=0;q<input_dim;q++){
|
|
||||||
|
|
||||||
//take the dot product of mu[n,:] and AZZA[:,m,mm,q] TODO: blas!
|
|
||||||
tmp = 0.0;
|
|
||||||
for(qq=0;qq<input_dim;qq++){
|
|
||||||
tmp += mu(n,qq)*AZZA(qq,m,mm,q);
|
|
||||||
}
|
|
||||||
|
|
||||||
target_mu(n,q) += factor*tmp;
|
|
||||||
target_S(n,q) += factor*AZZA_2(q,m,mm,q);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
"""
|
|
||||||
support_code = """
|
|
||||||
#include <omp.h>
|
|
||||||
#include <math.h>
|
|
||||||
"""
|
|
||||||
weave_options = {'headers' : ['<omp.h>'],
|
|
||||||
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
|
|
||||||
'extra_link_args' : ['-lgomp']}
|
|
||||||
|
|
||||||
N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu)
|
|
||||||
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
|
||||||
arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
|
|
||||||
type_converters=weave.converters.blitz,**weave_options)
|
|
||||||
|
|
||||||
|
|
||||||
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
|
|
||||||
self._psi_computations(Z, mu, S)
|
|
||||||
#psi2_dZ = dL_dpsi2[:, :, :, None] * self.variances * self.ZAinner[:, :, None, :]
|
|
||||||
#dummy_target = np.zeros_like(target)
|
|
||||||
#dummy_target += psi2_dZ.sum(0).sum(0)
|
|
||||||
|
|
||||||
AZA = self.variances*self.ZAinner
|
|
||||||
code="""
|
|
||||||
int n,m,mm,q;
|
|
||||||
#pragma omp parallel for private(n,mm,q)
|
|
||||||
for(m=0;m<num_inducing;m++){
|
|
||||||
for(q=0;q<input_dim;q++){
|
|
||||||
for(mm=0;mm<num_inducing;mm++){
|
|
||||||
for(n=0;n<N;n++){
|
|
||||||
target(m,q) += dL_dpsi2(n,m,mm)*AZA(n,mm,q);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
"""
|
|
||||||
support_code = """
|
|
||||||
#include <omp.h>
|
|
||||||
#include <math.h>
|
|
||||||
"""
|
|
||||||
weave_options = {'headers' : ['<omp.h>'],
|
|
||||||
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
|
|
||||||
'extra_link_args' : ['-lgomp']}
|
|
||||||
|
|
||||||
N,num_inducing,input_dim = mu.shape[0],Z.shape[0],mu.shape[1]
|
|
||||||
mu, AZA, target, dL_dpsi2 = param_to_array(mu, AZA, target, dL_dpsi2)
|
|
||||||
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
|
||||||
arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'],
|
|
||||||
type_converters=weave.converters.blitz,**weave_options)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#---------------------------------------#
|
|
||||||
# Precomputations #
|
|
||||||
#---------------------------------------#
|
|
||||||
|
|
||||||
def _K_computations(self, X, X2):
|
|
||||||
if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)):
|
|
||||||
self._X = X.copy()
|
|
||||||
if X2 is None:
|
|
||||||
self._dot_product = tdot(param_to_array(X))
|
|
||||||
self._X2 = None
|
|
||||||
else:
|
|
||||||
self._X2 = X2.copy()
|
|
||||||
self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T))
|
|
||||||
|
|
||||||
def _psi_computations(self, Z, mu, S):
|
|
||||||
# here are the "statistics" for psi1 and psi2
|
|
||||||
Zv_changed = not (fast_array_equal(Z, self._Z) and fast_array_equal(self.variances, self._variances))
|
|
||||||
muS_changed = not (fast_array_equal(mu, self._mu) and fast_array_equal(S, self._S))
|
|
||||||
if Zv_changed:
|
|
||||||
# Z has changed, compute Z specific stuff
|
|
||||||
# self.ZZ = Z[:,None,:]*Z[None,:,:] # num_inducing,num_inducing,input_dim
|
|
||||||
# self.ZZ = np.empty((Z.shape[0], Z.shape[0], Z.shape[1]), order='F')
|
|
||||||
# [tdot(Z[:, i:i + 1], self.ZZ[:, :, i].T) for i in xrange(Z.shape[1])]
|
|
||||||
self.ZA = Z * self.variances
|
|
||||||
self._Z = Z.copy()
|
|
||||||
self._variances = self.variances.copy()
|
|
||||||
if muS_changed:
|
|
||||||
self.mu2_S = np.square(mu) + S
|
|
||||||
self.inner = (mu[:, None, :] * mu[:, :, None])
|
|
||||||
diag_indices = np.diag_indices(mu.shape[1], 2)
|
|
||||||
self.inner[:, diag_indices[0], diag_indices[1]] += S
|
|
||||||
self._mu, self._S = mu.copy(), S.copy()
|
|
||||||
if Zv_changed or muS_changed:
|
|
||||||
self.ZAinner = np.dot(self.ZA, self.inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x N x input_dim]!
|
|
||||||
self._psi2 = np.dot(self.ZAinner, self.ZA.T)
|
|
||||||
|
|
@ -1,125 +0,0 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
from kernpart import Kernpart
|
|
||||||
from coregionalize import Coregionalize
|
|
||||||
import numpy as np
|
|
||||||
import hashlib
|
|
||||||
|
|
||||||
class Prod(Kernpart):
|
|
||||||
"""
|
|
||||||
Computes the product of 2 kernels
|
|
||||||
|
|
||||||
:param k1, k2: the kernels to multiply
|
|
||||||
:type k1, k2: Kernpart
|
|
||||||
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
|
|
||||||
:type tensor: Boolean
|
|
||||||
:rtype: kernel object
|
|
||||||
|
|
||||||
"""
|
|
||||||
def __init__(self,k1,k2,tensor=False):
|
|
||||||
if tensor:
|
|
||||||
super(Prod, self).__init__(k1.input_dim + k2.input_dim, k1.name + '_xx_' + k2.name)
|
|
||||||
self.slice1 = slice(0,k1.input_dim)
|
|
||||||
self.slice2 = slice(k1.input_dim,k1.input_dim+k2.input_dim)
|
|
||||||
else:
|
|
||||||
assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to multiply don't have the same dimension."
|
|
||||||
super(Prod, self).__init__(k1.input_dim, k1.name + '_x_' + k2.name)
|
|
||||||
self.slice1 = slice(0,self.input_dim)
|
|
||||||
self.slice2 = slice(0,self.input_dim)
|
|
||||||
self.k1 = k1
|
|
||||||
self.k2 = k2
|
|
||||||
self.add_parameters(self.k1, self.k2)
|
|
||||||
|
|
||||||
#initialize cache
|
|
||||||
self._X, self._X2 = np.empty(shape=(2,1))
|
|
||||||
self._params = None
|
|
||||||
|
|
||||||
def K(self,X,X2,target):
|
|
||||||
self._K_computations(X,X2)
|
|
||||||
target += self._K1 * self._K2
|
|
||||||
|
|
||||||
def K1(self,X, X2):
|
|
||||||
"""Compute the part of the kernel associated with k1."""
|
|
||||||
self._K_computations(X, X2)
|
|
||||||
return self._K1
|
|
||||||
|
|
||||||
def K2(self, X, X2):
|
|
||||||
"""Compute the part of the kernel associated with k2."""
|
|
||||||
self._K_computations(X, X2)
|
|
||||||
return self._K2
|
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X):
|
|
||||||
self._K_computations(X, None)
|
|
||||||
self.k1.update_gradients_full(dL_dK*self._K2, X[:,self.slice1])
|
|
||||||
self.k2.update_gradients_full(dL_dK*self._K1, X[:,self.slice2])
|
|
||||||
|
|
||||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
|
||||||
"""Derivative of the covariance matrix with respect to the parameters."""
|
|
||||||
self._K_computations(X,X2)
|
|
||||||
if X2 is None:
|
|
||||||
self.k1._param_grad_helper(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.num_params])
|
|
||||||
self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.num_params:])
|
|
||||||
else:
|
|
||||||
self.k1._param_grad_helper(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.num_params])
|
|
||||||
self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.num_params:])
|
|
||||||
|
|
||||||
def Kdiag(self,X,target):
|
|
||||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
|
||||||
target1 = np.zeros(X.shape[0])
|
|
||||||
target2 = np.zeros(X.shape[0])
|
|
||||||
self.k1.Kdiag(X[:,self.slice1],target1)
|
|
||||||
self.k2.Kdiag(X[:,self.slice2],target2)
|
|
||||||
target += target1 * target2
|
|
||||||
|
|
||||||
|
|
||||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
|
||||||
K1 = np.zeros(X.shape[0])
|
|
||||||
K2 = np.zeros(X.shape[0])
|
|
||||||
self.k1.Kdiag(X[:,self.slice1],K1)
|
|
||||||
self.k2.Kdiag(X[:,self.slice2],K2)
|
|
||||||
self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.num_params])
|
|
||||||
self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.num_params:])
|
|
||||||
|
|
||||||
def gradients_X(self,dL_dK,X,X2,target):
|
|
||||||
"""derivative of the covariance matrix with respect to X."""
|
|
||||||
self._K_computations(X,X2)
|
|
||||||
if X2 is None:
|
|
||||||
if not isinstance(self.k1,Coregionalize) and not isinstance(self.k2,Coregionalize):
|
|
||||||
self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1])
|
|
||||||
self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2])
|
|
||||||
else:#if isinstance(self.k1,Coregionalize) or isinstance(self.k2,Coregionalize):
|
|
||||||
#NOTE The indices column in the inputs makes the ki.gradients_X fail when passing None instead of X[:,self.slicei]
|
|
||||||
X2 = X
|
|
||||||
self.k1.gradients_X(2.*dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
|
|
||||||
self.k2.gradients_X(2.*dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
|
|
||||||
else:
|
|
||||||
self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
|
|
||||||
self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
|
|
||||||
|
|
||||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
|
||||||
K1 = np.zeros(X.shape[0])
|
|
||||||
K2 = np.zeros(X.shape[0])
|
|
||||||
self.k1.Kdiag(X[:,self.slice1],K1)
|
|
||||||
self.k2.Kdiag(X[:,self.slice2],K2)
|
|
||||||
|
|
||||||
self.k1.gradients_X(dL_dKdiag*K2, X[:,self.slice1], target[:,self.slice1])
|
|
||||||
self.k2.gradients_X(dL_dKdiag*K1, X[:,self.slice2], target[:,self.slice2])
|
|
||||||
|
|
||||||
def _K_computations(self,X,X2):
|
|
||||||
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
|
|
||||||
self._X = X.copy()
|
|
||||||
self._params == self._get_params().copy()
|
|
||||||
if X2 is None:
|
|
||||||
self._X2 = None
|
|
||||||
self._K1 = np.zeros((X.shape[0],X.shape[0]))
|
|
||||||
self._K2 = np.zeros((X.shape[0],X.shape[0]))
|
|
||||||
self.k1.K(X[:,self.slice1],None,self._K1)
|
|
||||||
self.k2.K(X[:,self.slice2],None,self._K2)
|
|
||||||
else:
|
|
||||||
self._X2 = X2.copy()
|
|
||||||
self._K1 = np.zeros((X.shape[0],X2.shape[0]))
|
|
||||||
self._K2 = np.zeros((X.shape[0],X2.shape[0]))
|
|
||||||
self.k1.K(X[:,self.slice1],X2[:,self.slice1],self._K1)
|
|
||||||
self.k2.K(X[:,self.slice2],X2[:,self.slice2],self._K2)
|
|
||||||
|
|
||||||
|
|
@ -1,352 +0,0 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
from kernpart import Kernpart
|
|
||||||
from ...util.linalg import tdot
|
|
||||||
from ...util.misc import fast_array_equal, param_to_array
|
|
||||||
from ...core.parameterization import Param
|
|
||||||
|
|
||||||
class SS_RBF(Kernpart):
|
|
||||||
"""
|
|
||||||
The RBF kernel for Spike-and-Slab GPLVM
|
|
||||||
Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel:
|
|
||||||
|
|
||||||
.. math::
|
|
||||||
|
|
||||||
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \ \ \ \ \ \\text{ where } r^2 = \sum_{i=1}^d \\frac{ (x_i-x^\prime_i)^2}{\ell_i^2}
|
|
||||||
|
|
||||||
where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
|
|
||||||
|
|
||||||
:param input_dim: the number of input dimensions
|
|
||||||
:type input_dim: int
|
|
||||||
:param variance: the variance of the kernel
|
|
||||||
:type variance: float
|
|
||||||
:param lengthscale: the vector of lengthscale of the kernel
|
|
||||||
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
|
|
||||||
:rtype: kernel object
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, input_dim, variance=1., lengthscale=None, name='rbf'):
|
|
||||||
super(RBF, self).__init__(input_dim, name)
|
|
||||||
self.input_dim = input_dim
|
|
||||||
|
|
||||||
if lengthscale is not None:
|
|
||||||
lengthscale = np.asarray(lengthscale)
|
|
||||||
assert lengthscale.size == self.input_dim, "bad number of lengthscales"
|
|
||||||
else:
|
|
||||||
lengthscale = np.ones(self.input_dim)
|
|
||||||
|
|
||||||
self.variance = Param('variance', variance)
|
|
||||||
self.lengthscale = Param('lengthscale', lengthscale)
|
|
||||||
self.lengthscale.add_observer(self, self.update_lengthscale)
|
|
||||||
self.add_parameters(self.variance, self.lengthscale)
|
|
||||||
self.parameters_changed() # initializes cache
|
|
||||||
|
|
||||||
def on_input_change(self, X):
|
|
||||||
#self._K_computations(X, None)
|
|
||||||
pass
|
|
||||||
|
|
||||||
def update_lengthscale(self, l):
|
|
||||||
self.lengthscale2 = np.square(self.lengthscale)
|
|
||||||
|
|
||||||
def parameters_changed(self):
|
|
||||||
# reset cached results
|
|
||||||
self._X, self._X2 = np.empty(shape=(2, 1))
|
|
||||||
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
|
|
||||||
|
|
||||||
def K(self, X, X2, target):
|
|
||||||
self._K_computations(X, X2)
|
|
||||||
target += self.variance * self._K_dvar
|
|
||||||
|
|
||||||
def Kdiag(self, X, target):
|
|
||||||
np.add(target, self.variance, target)
|
|
||||||
|
|
||||||
def psi0(self, Z, mu, S, target):
|
|
||||||
target += self.variance
|
|
||||||
|
|
||||||
def psi1(self, Z, mu, S, target):
|
|
||||||
self._psi_computations(Z, mu, S)
|
|
||||||
target += self._psi1
|
|
||||||
|
|
||||||
def psi2(self, Z, mu, S, target):
|
|
||||||
self._psi_computations(Z, mu, S)
|
|
||||||
target += self._psi2
|
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X):
|
|
||||||
self._K_computations(X, None)
|
|
||||||
self.variance.gradient = np.sum(self._K_dvar * dL_dK)
|
|
||||||
if self.ARD:
|
|
||||||
self.lengthscale.gradient = self._dL_dlengthscales_via_K(dL_dK, X, None)
|
|
||||||
else:
|
|
||||||
self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK)
|
|
||||||
|
|
||||||
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
|
||||||
#contributions from Kdiag
|
|
||||||
self.variance.gradient = np.sum(dL_dKdiag)
|
|
||||||
|
|
||||||
#from Knm
|
|
||||||
self._K_computations(X, Z)
|
|
||||||
self.variance.gradient += np.sum(dL_dKnm * self._K_dvar)
|
|
||||||
if self.ARD:
|
|
||||||
self.lengthscales.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z)
|
|
||||||
|
|
||||||
else:
|
|
||||||
self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
|
|
||||||
|
|
||||||
#from Kmm
|
|
||||||
self._K_computations(Z, None)
|
|
||||||
self.variance.gradient += np.sum(dL_dKmm * self._K_dvar)
|
|
||||||
if self.ARD:
|
|
||||||
self.lengthscales.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
|
|
||||||
else:
|
|
||||||
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
|
|
||||||
|
|
||||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
|
||||||
self._psi_computations(Z, mu, S)
|
|
||||||
|
|
||||||
#contributions from psi0:
|
|
||||||
self.variance.gradient = np.sum(dL_dpsi0)
|
|
||||||
|
|
||||||
#from psi1
|
|
||||||
self.variance.gradient += np.sum(dL_dpsi1 * self._psi1 / self.variance)
|
|
||||||
d_length = self._psi1[:,:,None] * ((self._psi1_dist_sq - 1.)/(self.lengthscale*self._psi1_denom) +1./self.lengthscale)
|
|
||||||
dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
|
|
||||||
if not self.ARD:
|
|
||||||
self.lengthscale.gradeint = dpsi1_dlength.sum()
|
|
||||||
else:
|
|
||||||
self.lengthscale.gradient = dpsi1_dlength.sum(0).sum(0)
|
|
||||||
|
|
||||||
#from psi2
|
|
||||||
d_var = 2.*self._psi2 / self.variance
|
|
||||||
d_length = 2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] / self.lengthscale2) / (self.lengthscale * self._psi2_denom)
|
|
||||||
|
|
||||||
self.variance.gradient += np.sum(dL_dpsi2 * d_var)
|
|
||||||
dpsi2_dlength = d_length * dL_dpsi2[:, :, :, None]
|
|
||||||
if not self.ARD:
|
|
||||||
self.lengthscale.gradient += dpsi2_dlength.sum()
|
|
||||||
else:
|
|
||||||
self.lengthscale.gradient += dpsi2_dlength.sum(0).sum(0).sum(0)
|
|
||||||
|
|
||||||
#from Kmm
|
|
||||||
self._K_computations(Z, None)
|
|
||||||
self.variance.gradient += np.sum(dL_dKmm * self._K_dvar)
|
|
||||||
if self.ARD:
|
|
||||||
self.lengthscales.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
|
|
||||||
else:
|
|
||||||
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK)
|
|
||||||
|
|
||||||
def gradients_X(self, dL_dK, X, X2, target):
|
|
||||||
#if self._X is None or X.base is not self._X.base or X2 is not None:
|
|
||||||
self._K_computations(X, X2)
|
|
||||||
if X2 is None:
|
|
||||||
_K_dist = 2*(X[:, None, :] - X[None, :, :])
|
|
||||||
else:
|
|
||||||
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
|
|
||||||
gradients_X = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
|
|
||||||
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
|
|
||||||
|
|
||||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
|
||||||
pass
|
|
||||||
|
|
||||||
#---------------------------------------#
|
|
||||||
# PSI statistics #
|
|
||||||
#---------------------------------------#
|
|
||||||
|
|
||||||
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
|
|
||||||
self._psi_computations(Z, mu, S)
|
|
||||||
denominator = (self.lengthscale2 * (self._psi1_denom))
|
|
||||||
dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator))
|
|
||||||
target += np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
|
|
||||||
|
|
||||||
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
|
|
||||||
self._psi_computations(Z, mu, S)
|
|
||||||
tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom
|
|
||||||
target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
|
|
||||||
target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
|
|
||||||
|
|
||||||
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
|
|
||||||
self._psi_computations(Z, mu, S)
|
|
||||||
term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim
|
|
||||||
term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim
|
|
||||||
dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
|
|
||||||
target += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
|
|
||||||
|
|
||||||
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
|
|
||||||
"""Think N,num_inducing,num_inducing,input_dim """
|
|
||||||
self._psi_computations(Z, mu, S)
|
|
||||||
tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom
|
|
||||||
target_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
|
|
||||||
target_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
|
|
||||||
|
|
||||||
#---------------------------------------#
|
|
||||||
# Precomputations #
|
|
||||||
#---------------------------------------#
|
|
||||||
|
|
||||||
def _K_computations(self, X, X2):
|
|
||||||
#params = self._get_params()
|
|
||||||
if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)):# and fast_array_equal(self._params_save , params)):
|
|
||||||
#self._X = X.copy()
|
|
||||||
#self._params_save = params.copy()
|
|
||||||
if X2 is None:
|
|
||||||
self._X2 = None
|
|
||||||
X = X / self.lengthscale
|
|
||||||
Xsquare = np.sum(np.square(X), 1)
|
|
||||||
self._K_dist2 = -2.*tdot(X) + (Xsquare[:, None] + Xsquare[None, :])
|
|
||||||
else:
|
|
||||||
self._X2 = X2.copy()
|
|
||||||
X = X / self.lengthscale
|
|
||||||
X2 = X2 / self.lengthscale
|
|
||||||
self._K_dist2 = -2.*np.dot(X, X2.T) + (np.sum(np.square(X), 1)[:, None] + np.sum(np.square(X2), 1)[None, :])
|
|
||||||
self._K_dvar = np.exp(-0.5 * self._K_dist2)
|
|
||||||
|
|
||||||
def _dL_dlengthscales_via_K(self, dL_dK, X, X2):
|
|
||||||
"""
|
|
||||||
A helper function for update_gradients_* methods
|
|
||||||
|
|
||||||
Computes the derivative of the objective L wrt the lengthscales via
|
|
||||||
|
|
||||||
dL_dl = sum_{i,j}(dL_dK_{ij} dK_dl)
|
|
||||||
|
|
||||||
assumes self._K_computations has just been called.
|
|
||||||
|
|
||||||
This is only valid if self.ARD=True
|
|
||||||
"""
|
|
||||||
target = np.zeros(self.input_dim)
|
|
||||||
dvardLdK = self._K_dvar * dL_dK
|
|
||||||
var_len3 = self.variance / np.power(self.lengthscale, 3)
|
|
||||||
if X2 is None:
|
|
||||||
# save computation for the symmetrical case
|
|
||||||
dvardLdK = dvardLdK + dvardLdK.T
|
|
||||||
code = """
|
|
||||||
int q,i,j;
|
|
||||||
double tmp;
|
|
||||||
for(q=0; q<input_dim; q++){
|
|
||||||
tmp = 0;
|
|
||||||
for(i=0; i<num_data; i++){
|
|
||||||
for(j=0; j<i; j++){
|
|
||||||
tmp += (X(i,q)-X(j,q))*(X(i,q)-X(j,q))*dvardLdK(i,j);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
target(q) += var_len3(q)*tmp;
|
|
||||||
}
|
|
||||||
"""
|
|
||||||
num_data, num_inducing, input_dim = X.shape[0], X.shape[0], self.input_dim
|
|
||||||
X, dvardLdK = param_to_array(X, dvardLdK)
|
|
||||||
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
|
|
||||||
else:
|
|
||||||
code = """
|
|
||||||
int q,i,j;
|
|
||||||
double tmp;
|
|
||||||
for(q=0; q<input_dim; q++){
|
|
||||||
tmp = 0;
|
|
||||||
for(i=0; i<num_data; i++){
|
|
||||||
for(j=0; j<num_inducing; j++){
|
|
||||||
tmp += (X(i,q)-X2(j,q))*(X(i,q)-X2(j,q))*dvardLdK(i,j);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
target(q) += var_len3(q)*tmp;
|
|
||||||
}
|
|
||||||
"""
|
|
||||||
num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim
|
|
||||||
X, X2, dvardLdK = param_to_array(X, X2, dvardLdK)
|
|
||||||
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
|
|
||||||
return target
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def _psi_computations(self, Z, mu, S):
|
|
||||||
# here are the "statistics" for psi1 and psi2
|
|
||||||
Z_changed = not fast_array_equal(Z, self._Z)
|
|
||||||
if Z_changed:
|
|
||||||
# Z has changed, compute Z specific stuff
|
|
||||||
self._psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
|
|
||||||
self._psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
|
|
||||||
self._psi2_Zdist_sq = np.square(self._psi2_Zdist / self.lengthscale) # M,M,Q
|
|
||||||
|
|
||||||
if Z_changed or not fast_array_equal(mu, self._mu) or not fast_array_equal(S, self._S):
|
|
||||||
# something's changed. recompute EVERYTHING
|
|
||||||
|
|
||||||
# psi1
|
|
||||||
self._psi1_denom = S[:, None, :] / self.lengthscale2 + 1.
|
|
||||||
self._psi1_dist = Z[None, :, :] - mu[:, None, :]
|
|
||||||
self._psi1_dist_sq = np.square(self._psi1_dist) / self.lengthscale2 / self._psi1_denom
|
|
||||||
self._psi1_exponent = -0.5 * np.sum(self._psi1_dist_sq + np.log(self._psi1_denom), -1)
|
|
||||||
self._psi1 = self.variance * np.exp(self._psi1_exponent)
|
|
||||||
|
|
||||||
# psi2
|
|
||||||
self._psi2_denom = 2.*S[:, None, None, :] / self.lengthscale2 + 1. # N,M,M,Q
|
|
||||||
self._psi2_mudist, self._psi2_mudist_sq, self._psi2_exponent, _ = self.weave_psi2(mu, self._psi2_Zhat)
|
|
||||||
# self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
|
|
||||||
# self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom)
|
|
||||||
# self._psi2_exponent = np.sum(-self._psi2_Zdist_sq -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M,Q
|
|
||||||
self._psi2 = np.square(self.variance) * np.exp(self._psi2_exponent) # N,M,M,Q
|
|
||||||
|
|
||||||
# store matrices for caching
|
|
||||||
self._Z, self._mu, self._S = Z, mu, S
|
|
||||||
|
|
||||||
def weave_psi2(self, mu, Zhat):
|
|
||||||
N, input_dim = mu.shape
|
|
||||||
num_inducing = Zhat.shape[0]
|
|
||||||
|
|
||||||
mudist = np.empty((N, num_inducing, num_inducing, input_dim))
|
|
||||||
mudist_sq = np.empty((N, num_inducing, num_inducing, input_dim))
|
|
||||||
psi2_exponent = np.zeros((N, num_inducing, num_inducing))
|
|
||||||
psi2 = np.empty((N, num_inducing, num_inducing))
|
|
||||||
|
|
||||||
psi2_Zdist_sq = self._psi2_Zdist_sq
|
|
||||||
_psi2_denom = self._psi2_denom.squeeze().reshape(N, self.input_dim)
|
|
||||||
half_log_psi2_denom = 0.5 * np.log(self._psi2_denom).squeeze().reshape(N, self.input_dim)
|
|
||||||
variance_sq = float(np.square(self.variance))
|
|
||||||
if self.ARD:
|
|
||||||
lengthscale2 = self.lengthscale2
|
|
||||||
else:
|
|
||||||
lengthscale2 = np.ones(input_dim) * self.lengthscale2
|
|
||||||
code = """
|
|
||||||
double tmp;
|
|
||||||
|
|
||||||
#pragma omp parallel for private(tmp)
|
|
||||||
for (int n=0; n<N; n++){
|
|
||||||
for (int m=0; m<num_inducing; m++){
|
|
||||||
for (int mm=0; mm<(m+1); mm++){
|
|
||||||
for (int q=0; q<input_dim; q++){
|
|
||||||
//compute mudist
|
|
||||||
tmp = mu(n,q) - Zhat(m,mm,q);
|
|
||||||
mudist(n,m,mm,q) = tmp;
|
|
||||||
mudist(n,mm,m,q) = tmp;
|
|
||||||
|
|
||||||
//now mudist_sq
|
|
||||||
tmp = tmp*tmp/lengthscale2(q)/_psi2_denom(n,q);
|
|
||||||
mudist_sq(n,m,mm,q) = tmp;
|
|
||||||
mudist_sq(n,mm,m,q) = tmp;
|
|
||||||
|
|
||||||
//now psi2_exponent
|
|
||||||
tmp = -psi2_Zdist_sq(m,mm,q) - tmp - half_log_psi2_denom(n,q);
|
|
||||||
psi2_exponent(n,mm,m) += tmp;
|
|
||||||
if (m !=mm){
|
|
||||||
psi2_exponent(n,m,mm) += tmp;
|
|
||||||
}
|
|
||||||
//psi2 would be computed like this, but np is faster
|
|
||||||
//tmp = variance_sq*exp(psi2_exponent(n,m,mm));
|
|
||||||
//psi2(n,m,mm) = tmp;
|
|
||||||
//psi2(n,mm,m) = tmp;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
support_code = """
|
|
||||||
#include <omp.h>
|
|
||||||
#include <math.h>
|
|
||||||
"""
|
|
||||||
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
|
||||||
arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'],
|
|
||||||
type_converters=weave.converters.blitz, **self.weave_options)
|
|
||||||
|
|
||||||
return mudist, mudist_sq, psi2_exponent, psi2
|
|
||||||
|
|
@ -1,423 +0,0 @@
|
||||||
import numpy as np
|
|
||||||
import sympy as sp
|
|
||||||
from sympy.utilities.codegen import codegen
|
|
||||||
from sympy.core.cache import clear_cache
|
|
||||||
from scipy import weave
|
|
||||||
import re
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
|
|
||||||
import tempfile
|
|
||||||
import pdb
|
|
||||||
import ast
|
|
||||||
from kernpart import Kernpart
|
|
||||||
|
|
||||||
class spkern(Kernpart):
|
|
||||||
"""
|
|
||||||
A kernel object, where all the hard work in done by sympy.
|
|
||||||
|
|
||||||
:param k: the covariance function
|
|
||||||
:type k: a positive definite sympy function of x_0, z_0, x_1, z_1, x_2, z_2...
|
|
||||||
|
|
||||||
To construct a new sympy kernel, you'll need to define:
|
|
||||||
- a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z).
|
|
||||||
- that's it! we'll extract the variables from the function k.
|
|
||||||
|
|
||||||
Note:
|
|
||||||
- to handle multiple inputs, call them x_1, z_1, etc
|
|
||||||
- to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j.
|
|
||||||
"""
|
|
||||||
def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None):
|
|
||||||
if name is None:
|
|
||||||
self.name='sympykern'
|
|
||||||
else:
|
|
||||||
self.name = name
|
|
||||||
if k is None:
|
|
||||||
raise ValueError, "You must provide an argument for the covariance function."
|
|
||||||
self._sp_k = k
|
|
||||||
sp_vars = [e for e in k.atoms() if e.is_Symbol]
|
|
||||||
self._sp_x= sorted([e for e in sp_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:]))
|
|
||||||
self._sp_z= sorted([e for e in sp_vars if e.name[0:2]=='z_'],key=lambda z:int(z.name[2:]))
|
|
||||||
# Check that variable names make sense.
|
|
||||||
assert all([x.name=='x_%i'%i for i,x in enumerate(self._sp_x)])
|
|
||||||
assert all([z.name=='z_%i'%i for i,z in enumerate(self._sp_z)])
|
|
||||||
assert len(self._sp_x)==len(self._sp_z)
|
|
||||||
self.input_dim = len(self._sp_x)
|
|
||||||
self._real_input_dim = self.input_dim
|
|
||||||
if output_dim > 1:
|
|
||||||
self.input_dim += 1
|
|
||||||
assert self.input_dim == input_dim
|
|
||||||
self.output_dim = output_dim
|
|
||||||
# extract parameter names
|
|
||||||
thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name)
|
|
||||||
|
|
||||||
|
|
||||||
# Look for parameters with index.
|
|
||||||
if self.output_dim>1:
|
|
||||||
self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name)
|
|
||||||
self._sp_theta_j = sorted([e for e in thetas if (e.name[-2:]=='_j')], key=lambda e:e.name)
|
|
||||||
# Make sure parameter appears with both indices!
|
|
||||||
assert len(self._sp_theta_i)==len(self._sp_theta_j)
|
|
||||||
assert all([theta_i.name[:-2]==theta_j.name[:-2] for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j)])
|
|
||||||
|
|
||||||
# Extract names of shared parameters
|
|
||||||
self._sp_theta = [theta for theta in thetas if theta not in self._sp_theta_i and theta not in self._sp_theta_j]
|
|
||||||
|
|
||||||
self.num_split_params = len(self._sp_theta_i)
|
|
||||||
self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sp_theta_i]
|
|
||||||
for theta in self._split_theta_names:
|
|
||||||
setattr(self, theta, np.ones(self.output_dim))
|
|
||||||
|
|
||||||
self.num_shared_params = len(self._sp_theta)
|
|
||||||
self.num_params = self.num_shared_params+self.num_split_params*self.output_dim
|
|
||||||
|
|
||||||
else:
|
|
||||||
self.num_split_params = 0
|
|
||||||
self._split_theta_names = []
|
|
||||||
self._sp_theta = thetas
|
|
||||||
self.num_shared_params = len(self._sp_theta)
|
|
||||||
self.num_params = self.num_shared_params
|
|
||||||
|
|
||||||
for theta in self._sp_theta:
|
|
||||||
val = 1.0
|
|
||||||
if param is not None:
|
|
||||||
if param.has_key(theta):
|
|
||||||
val = param[theta]
|
|
||||||
setattr(self, theta.name, val)
|
|
||||||
#deal with param
|
|
||||||
self._set_params(self._get_params())
|
|
||||||
|
|
||||||
#Differentiate!
|
|
||||||
self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta]
|
|
||||||
if self.output_dim > 1:
|
|
||||||
self._sp_dk_dtheta_i = [sp.diff(k,theta).simplify() for theta in self._sp_theta_i]
|
|
||||||
|
|
||||||
self._sp_dk_dx = [sp.diff(k,xi).simplify() for xi in self._sp_x]
|
|
||||||
|
|
||||||
if False:
|
|
||||||
self.compute_psi_stats()
|
|
||||||
|
|
||||||
self._gen_code()
|
|
||||||
|
|
||||||
if False:
|
|
||||||
extra_compile_args = ['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5']
|
|
||||||
else:
|
|
||||||
extra_compile_args = []
|
|
||||||
|
|
||||||
self.weave_kwargs = {
|
|
||||||
'support_code':self._function_code,
|
|
||||||
'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'parts/')],
|
|
||||||
'headers':['"sympy_helpers.h"'],
|
|
||||||
'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp")],
|
|
||||||
'extra_compile_args':extra_compile_args,
|
|
||||||
'extra_link_args':['-lgomp'],
|
|
||||||
'verbose':True}
|
|
||||||
|
|
||||||
def __add__(self,other):
|
|
||||||
return spkern(self._sp_k+other._sp_k)
|
|
||||||
|
|
||||||
def _gen_code(self):
|
|
||||||
#generate c functions from sympy objects
|
|
||||||
argument_sequence = self._sp_x+self._sp_z+self._sp_theta
|
|
||||||
code_list = [('k',self._sp_k)]
|
|
||||||
# gradients with respect to covariance input
|
|
||||||
code_list += [('dk_d%s'%x.name,dx) for x,dx in zip(self._sp_x,self._sp_dk_dx)]
|
|
||||||
# gradient with respect to parameters
|
|
||||||
code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta,self._sp_dk_dtheta)]
|
|
||||||
# gradient with respect to multiple output parameters
|
|
||||||
if self.output_dim > 1:
|
|
||||||
argument_sequence += self._sp_theta_i + self._sp_theta_j
|
|
||||||
code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta_i,self._sp_dk_dtheta_i)]
|
|
||||||
(foo_c,self._function_code), (foo_h,self._function_header) = \
|
|
||||||
codegen(code_list, "C",'foobar',argument_sequence=argument_sequence)
|
|
||||||
#put the header file where we can find it
|
|
||||||
f = file(os.path.join(tempfile.gettempdir(),'foobar.h'),'w')
|
|
||||||
f.write(self._function_header)
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
# Substitute any known derivatives which sympy doesn't compute
|
|
||||||
self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code)
|
|
||||||
|
|
||||||
# This is the basic argument construction for the C code.
|
|
||||||
#arg_list = (["X[i*input_dim+%s]"%x.name[2:] for x in self._sp_x]
|
|
||||||
# + ["Z[j*input_dim+%s]"%z.name[2:] for z in self._sp_z])
|
|
||||||
arg_list = (["X2(i, %s)"%x.name[2:] for x in self._sp_x]
|
|
||||||
+ ["Z2(j, %s)"%z.name[2:] for z in self._sp_z])
|
|
||||||
if self.output_dim>1:
|
|
||||||
reverse_arg_list = list(arg_list)
|
|
||||||
reverse_arg_list.reverse()
|
|
||||||
|
|
||||||
param_arg_list = [shared_params.name for shared_params in self._sp_theta]
|
|
||||||
arg_list += param_arg_list
|
|
||||||
|
|
||||||
precompute_list=[]
|
|
||||||
if self.output_dim > 1:
|
|
||||||
reverse_arg_list+=list(param_arg_list)
|
|
||||||
split_param_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['ii', 'jj'] for theta in self._sp_theta_i]
|
|
||||||
split_param_reverse_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['jj', 'ii'] for theta in self._sp_theta_i]
|
|
||||||
arg_list += split_param_arg_list
|
|
||||||
reverse_arg_list += split_param_reverse_arg_list
|
|
||||||
# Extract the right output indices from the inputs.
|
|
||||||
c_define_output_indices = [' '*16 + "int %s=(int)%s(%s, %i);"%(index, var, index2, self.input_dim-1) for index, var, index2 in zip(['ii', 'jj'], ['X2', 'Z2'], ['i', 'j'])]
|
|
||||||
precompute_list += c_define_output_indices
|
|
||||||
reverse_arg_string = ", ".join(reverse_arg_list)
|
|
||||||
arg_string = ", ".join(arg_list)
|
|
||||||
precompute_string = "\n".join(precompute_list)
|
|
||||||
# Here's the code to do the looping for K
|
|
||||||
self._K_code =\
|
|
||||||
"""
|
|
||||||
// _K_code
|
|
||||||
// Code for computing the covariance function.
|
|
||||||
int i;
|
|
||||||
int j;
|
|
||||||
int N = target_array->dimensions[0];
|
|
||||||
int num_inducing = target_array->dimensions[1];
|
|
||||||
int input_dim = X_array->dimensions[1];
|
|
||||||
//#pragma omp parallel for private(j)
|
|
||||||
for (i=0;i<N;i++){
|
|
||||||
for (j=0;j<num_inducing;j++){
|
|
||||||
%s
|
|
||||||
//target[i*num_inducing+j] =
|
|
||||||
TARGET2(i, j) += k(%s);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
%s
|
|
||||||
"""%(precompute_string,arg_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
|
||||||
|
|
||||||
|
|
||||||
# Code to compute diagonal of covariance.
|
|
||||||
diag_arg_string = re.sub('Z','X',arg_string)
|
|
||||||
diag_arg_string = re.sub('int jj','//int jj',diag_arg_string)
|
|
||||||
diag_arg_string = re.sub('j','i',diag_arg_string)
|
|
||||||
diag_precompute_string = re.sub('int jj','//int jj',precompute_string)
|
|
||||||
diag_precompute_string = re.sub('Z','X',diag_precompute_string)
|
|
||||||
diag_precompute_string = re.sub('j','i',diag_precompute_string)
|
|
||||||
# Code to do the looping for Kdiag
|
|
||||||
self._Kdiag_code =\
|
|
||||||
"""
|
|
||||||
// _Kdiag_code
|
|
||||||
// Code for computing diagonal of covariance function.
|
|
||||||
int i;
|
|
||||||
int N = target_array->dimensions[0];
|
|
||||||
int input_dim = X_array->dimensions[1];
|
|
||||||
//#pragma omp parallel for
|
|
||||||
for (i=0;i<N;i++){
|
|
||||||
%s
|
|
||||||
//target[i] =
|
|
||||||
TARGET1(i)=k(%s);
|
|
||||||
}
|
|
||||||
%s
|
|
||||||
"""%(diag_precompute_string,diag_arg_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
|
||||||
|
|
||||||
# Code to compute gradients
|
|
||||||
grad_func_list = []
|
|
||||||
if self.output_dim>1:
|
|
||||||
grad_func_list += c_define_output_indices
|
|
||||||
grad_func_list += [' '*16 + 'TARGET1(%i+ii) += partial[i*num_inducing+j]*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, arg_string) for i, theta in enumerate(self._sp_theta_i)]
|
|
||||||
grad_func_list += [' '*16 + 'TARGET1(%i+jj) += partial[i*num_inducing+j]*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, reverse_arg_string) for i, theta in enumerate(self._sp_theta_i)]
|
|
||||||
grad_func_list += ([' '*16 + 'TARGET1(%i) += partial[i*num_inducing+j]*dk_d%s(%s);'%(i,theta.name,arg_string) for i,theta in enumerate(self._sp_theta)])
|
|
||||||
grad_func_string = '\n'.join(grad_func_list)
|
|
||||||
|
|
||||||
self._dK_dtheta_code =\
|
|
||||||
"""
|
|
||||||
// _dK_dtheta_code
|
|
||||||
// Code for computing gradient of covariance with respect to parameters.
|
|
||||||
int i;
|
|
||||||
int j;
|
|
||||||
int N = partial_array->dimensions[0];
|
|
||||||
int num_inducing = partial_array->dimensions[1];
|
|
||||||
int input_dim = X_array->dimensions[1];
|
|
||||||
//#pragma omp parallel for private(j)
|
|
||||||
for (i=0;i<N;i++){
|
|
||||||
for (j=0;j<num_inducing;j++){
|
|
||||||
%s
|
|
||||||
}
|
|
||||||
}
|
|
||||||
%s
|
|
||||||
"""%(grad_func_string,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
|
|
||||||
|
|
||||||
|
|
||||||
# Code to compute gradients for Kdiag TODO: needs clean up
|
|
||||||
diag_grad_func_string = re.sub('Z','X',grad_func_string,count=0)
|
|
||||||
diag_grad_func_string = re.sub('int jj','//int jj',diag_grad_func_string)
|
|
||||||
diag_grad_func_string = re.sub('j','i',diag_grad_func_string)
|
|
||||||
diag_grad_func_string = re.sub('partial\[i\*num_inducing\+i\]','partial[i]',diag_grad_func_string)
|
|
||||||
self._dKdiag_dtheta_code =\
|
|
||||||
"""
|
|
||||||
// _dKdiag_dtheta_code
|
|
||||||
// Code for computing gradient of diagonal with respect to parameters.
|
|
||||||
int i;
|
|
||||||
int N = partial_array->dimensions[0];
|
|
||||||
int input_dim = X_array->dimensions[1];
|
|
||||||
for (i=0;i<N;i++){
|
|
||||||
%s
|
|
||||||
}
|
|
||||||
%s
|
|
||||||
"""%(diag_grad_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
|
||||||
|
|
||||||
# Code for gradients wrt X, TODO: may need to deal with special case where one input is actually an output.
|
|
||||||
gradX_func_list = []
|
|
||||||
if self.output_dim>1:
|
|
||||||
gradX_func_list += c_define_output_indices
|
|
||||||
gradX_func_list += ["TARGET2(i, %i) += partial[i*num_inducing+j]*dk_dx_%i(%s);"%(q,q,arg_string) for q in range(self._real_input_dim)]
|
|
||||||
gradX_func_string = "\n".join(gradX_func_list)
|
|
||||||
|
|
||||||
self._dK_dX_code = \
|
|
||||||
"""
|
|
||||||
// _dK_dX_code
|
|
||||||
// Code for computing gradient of covariance with respect to inputs.
|
|
||||||
int i;
|
|
||||||
int j;
|
|
||||||
int N = partial_array->dimensions[0];
|
|
||||||
int num_inducing = partial_array->dimensions[1];
|
|
||||||
int input_dim = X_array->dimensions[1];
|
|
||||||
//#pragma omp parallel for private(j)
|
|
||||||
for (i=0;i<N; i++){
|
|
||||||
for (j=0; j<num_inducing; j++){
|
|
||||||
%s
|
|
||||||
}
|
|
||||||
}
|
|
||||||
%s
|
|
||||||
"""%(gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
|
||||||
|
|
||||||
|
|
||||||
diag_gradX_func_string = re.sub('Z','X',gradX_func_string,count=0)
|
|
||||||
diag_gradX_func_string = re.sub('int jj','//int jj',diag_gradX_func_string)
|
|
||||||
diag_gradX_func_string = re.sub('j','i',diag_gradX_func_string)
|
|
||||||
diag_gradX_func_string = re.sub('partial\[i\*num_inducing\+i\]','2*partial[i]',diag_gradX_func_string)
|
|
||||||
|
|
||||||
# Code for gradients of Kdiag wrt X
|
|
||||||
self._dKdiag_dX_code= \
|
|
||||||
"""
|
|
||||||
// _dKdiag_dX_code
|
|
||||||
// Code for computing gradient of diagonal with respect to inputs.
|
|
||||||
int N = partial_array->dimensions[0];
|
|
||||||
int input_dim = X_array->dimensions[1];
|
|
||||||
for (int i=0;i<N; i++){
|
|
||||||
%s
|
|
||||||
}
|
|
||||||
%s
|
|
||||||
"""%(diag_gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a
|
|
||||||
# string representation forces recompile when needed Get rid
|
|
||||||
# of Zs in argument for diagonal. TODO: Why wasn't
|
|
||||||
# diag_func_string called here? Need to check that.
|
|
||||||
#self._dKdiag_dX_code = self._dKdiag_dX_code.replace('Z[j', 'X[i')
|
|
||||||
|
|
||||||
# Code to use when only X is provided.
|
|
||||||
self._K_code_X = self._K_code.replace('Z[', 'X[')
|
|
||||||
self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z[', 'X[')
|
|
||||||
self._dK_dX_code_X = self._dK_dX_code.replace('Z[', 'X[').replace('+= partial[', '+= 2*partial[')
|
|
||||||
self._K_code_X = self._K_code.replace('Z2(', 'X2(')
|
|
||||||
self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z2(', 'X2(')
|
|
||||||
self._dK_dX_code_X = self._dK_dX_code.replace('Z2(', 'X2(')
|
|
||||||
|
|
||||||
|
|
||||||
#TODO: insert multiple functions here via string manipulation
|
|
||||||
#TODO: similar functions for psi_stats
|
|
||||||
def _get_arg_names(self, Z=None, partial=None):
|
|
||||||
arg_names = ['target','X']
|
|
||||||
for shared_params in self._sp_theta:
|
|
||||||
arg_names += [shared_params.name]
|
|
||||||
if Z is not None:
|
|
||||||
arg_names += ['Z']
|
|
||||||
if partial is not None:
|
|
||||||
arg_names += ['partial']
|
|
||||||
if self.output_dim>1:
|
|
||||||
arg_names += self._split_theta_names
|
|
||||||
arg_names += ['output_dim']
|
|
||||||
return arg_names
|
|
||||||
|
|
||||||
def _weave_inline(self, code, X, target, Z=None, partial=None):
|
|
||||||
output_dim = self.output_dim
|
|
||||||
for shared_params in self._sp_theta:
|
|
||||||
locals()[shared_params.name] = getattr(self, shared_params.name)
|
|
||||||
|
|
||||||
# Need to extract parameters first
|
|
||||||
for split_params in self._split_theta_names:
|
|
||||||
locals()[split_params] = getattr(self, split_params)
|
|
||||||
arg_names = self._get_arg_names(Z, partial)
|
|
||||||
weave.inline(code=code, arg_names=arg_names,**self.weave_kwargs)
|
|
||||||
|
|
||||||
def K(self,X,Z,target):
|
|
||||||
if Z is None:
|
|
||||||
self._weave_inline(self._K_code_X, X, target)
|
|
||||||
else:
|
|
||||||
self._weave_inline(self._K_code, X, target, Z)
|
|
||||||
|
|
||||||
|
|
||||||
def Kdiag(self,X,target):
|
|
||||||
self._weave_inline(self._Kdiag_code, X, target)
|
|
||||||
|
|
||||||
def _param_grad_helper(self,partial,X,Z,target):
|
|
||||||
if Z is None:
|
|
||||||
self._weave_inline(self._dK_dtheta_code_X, X, target, Z, partial)
|
|
||||||
else:
|
|
||||||
self._weave_inline(self._dK_dtheta_code, X, target, Z, partial)
|
|
||||||
|
|
||||||
def dKdiag_dtheta(self,partial,X,target):
|
|
||||||
self._weave_inline(self._dKdiag_dtheta_code, X, target, Z=None, partial=partial)
|
|
||||||
|
|
||||||
def gradients_X(self,partial,X,Z,target):
|
|
||||||
if Z is None:
|
|
||||||
self._weave_inline(self._dK_dX_code_X, X, target, Z, partial)
|
|
||||||
else:
|
|
||||||
self._weave_inline(self._dK_dX_code, X, target, Z, partial)
|
|
||||||
|
|
||||||
def dKdiag_dX(self,partial,X,target):
|
|
||||||
self._weave.inline(self._dKdiag_dX_code, X, target, Z, partial)
|
|
||||||
|
|
||||||
def compute_psi_stats(self):
|
|
||||||
#define some normal distributions
|
|
||||||
mus = [sp.var('mu_%i'%i,real=True) for i in range(self.input_dim)]
|
|
||||||
Ss = [sp.var('S_%i'%i,positive=True) for i in range(self.input_dim)]
|
|
||||||
normals = [(2*sp.pi*Si)**(-0.5)*sp.exp(-0.5*(xi-mui)**2/Si) for xi, mui, Si in zip(self._sp_x, mus, Ss)]
|
|
||||||
|
|
||||||
#do some integration!
|
|
||||||
#self._sp_psi0 = ??
|
|
||||||
self._sp_psi1 = self._sp_k
|
|
||||||
for i in range(self.input_dim):
|
|
||||||
print 'perfoming integrals %i of %i'%(i+1,2*self.input_dim)
|
|
||||||
sys.stdout.flush()
|
|
||||||
self._sp_psi1 *= normals[i]
|
|
||||||
self._sp_psi1 = sp.integrate(self._sp_psi1,(self._sp_x[i],-sp.oo,sp.oo))
|
|
||||||
clear_cache()
|
|
||||||
self._sp_psi1 = self._sp_psi1.simplify()
|
|
||||||
|
|
||||||
#and here's psi2 (eek!)
|
|
||||||
zprime = [sp.Symbol('zp%i'%i) for i in range(self.input_dim)]
|
|
||||||
self._sp_psi2 = self._sp_k.copy()*self._sp_k.copy().subs(zip(self._sp_z,zprime))
|
|
||||||
for i in range(self.input_dim):
|
|
||||||
print 'perfoming integrals %i of %i'%(self.input_dim+i+1,2*self.input_dim)
|
|
||||||
sys.stdout.flush()
|
|
||||||
self._sp_psi2 *= normals[i]
|
|
||||||
self._sp_psi2 = sp.integrate(self._sp_psi2,(self._sp_x[i],-sp.oo,sp.oo))
|
|
||||||
clear_cache()
|
|
||||||
self._sp_psi2 = self._sp_psi2.simplify()
|
|
||||||
|
|
||||||
|
|
||||||
def _set_params(self,param):
|
|
||||||
assert param.size == (self.num_params)
|
|
||||||
for i, shared_params in enumerate(self._sp_theta):
|
|
||||||
setattr(self, shared_params.name, param[i])
|
|
||||||
|
|
||||||
if self.output_dim>1:
|
|
||||||
for i, split_params in enumerate(self._split_theta_names):
|
|
||||||
start = self.num_shared_params + i*self.output_dim
|
|
||||||
end = self.num_shared_params + (i+1)*self.output_dim
|
|
||||||
setattr(self, split_params, param[start:end])
|
|
||||||
|
|
||||||
|
|
||||||
def _get_params(self):
|
|
||||||
params = np.zeros(0)
|
|
||||||
for shared_params in self._sp_theta:
|
|
||||||
params = np.hstack((params, getattr(self, shared_params.name)))
|
|
||||||
if self.output_dim>1:
|
|
||||||
for split_params in self._split_theta_names:
|
|
||||||
params = np.hstack((params, getattr(self, split_params).flatten()))
|
|
||||||
return params
|
|
||||||
|
|
||||||
def _get_param_names(self):
|
|
||||||
if self.output_dim>1:
|
|
||||||
return [x.name for x in self._sp_theta] + [x.name[:-2] + str(i) for x in self._sp_theta_i for i in range(self.output_dim)]
|
|
||||||
else:
|
|
||||||
return [x.name for x in self._sp_theta]
|
|
||||||
|
|
@ -8,7 +8,7 @@ from ..core import SparseGP
|
||||||
from ..likelihoods import Gaussian
|
from ..likelihoods import Gaussian
|
||||||
from ..inference.optimization import SCG
|
from ..inference.optimization import SCG
|
||||||
from ..util import linalg
|
from ..util import linalg
|
||||||
from ..core.parameterization.variational import Normal
|
from ..core.parameterization.variational import NormalPosterior, NormalPrior
|
||||||
|
|
||||||
class BayesianGPLVM(SparseGP, GPLVM):
|
class BayesianGPLVM(SparseGP, GPLVM):
|
||||||
"""
|
"""
|
||||||
|
|
@ -29,18 +29,20 @@ class BayesianGPLVM(SparseGP, GPLVM):
|
||||||
self.init = init
|
self.init = init
|
||||||
|
|
||||||
if X_variance is None:
|
if X_variance is None:
|
||||||
X_variance = np.clip((np.ones_like(X) * 0.5) + .01 * np.random.randn(*X.shape), 0.001, 1)
|
X_variance = np.random.uniform(0,.1,X.shape)
|
||||||
|
|
||||||
if Z is None:
|
if Z is None:
|
||||||
Z = np.random.permutation(X.copy())[:num_inducing]
|
Z = np.random.permutation(X.copy())[:num_inducing]
|
||||||
assert Z.shape[1] == X.shape[1]
|
assert Z.shape[1] == X.shape[1]
|
||||||
|
|
||||||
if kernel is None:
|
if kernel is None:
|
||||||
kernel = kern.rbf(input_dim) # + kern.white(input_dim)
|
kernel = kern.RBF(input_dim) # + kern.white(input_dim)
|
||||||
|
|
||||||
if likelihood is None:
|
if likelihood is None:
|
||||||
likelihood = Gaussian()
|
likelihood = Gaussian()
|
||||||
self.q = Normal(X, X_variance)
|
self.q = NormalPosterior(X, X_variance)
|
||||||
|
self.variational_prior = NormalPrior()
|
||||||
|
|
||||||
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, X_variance, name, **kwargs)
|
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, X_variance, name, **kwargs)
|
||||||
self.add_parameter(self.q, index=0)
|
self.add_parameter(self.q, index=0)
|
||||||
#self.ensure_default_constraints()
|
#self.ensure_default_constraints()
|
||||||
|
|
@ -57,34 +59,15 @@ class BayesianGPLVM(SparseGP, GPLVM):
|
||||||
self.init = state.pop()
|
self.init = state.pop()
|
||||||
SparseGP._setstate(self, state)
|
SparseGP._setstate(self, state)
|
||||||
|
|
||||||
def dL_dmuS(self):
|
|
||||||
dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi0_dmuS(self.grad_dict['dL_dpsi0'], self.Z, self.X, self.X_variance)
|
|
||||||
dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi1_dmuS(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance)
|
|
||||||
dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance)
|
|
||||||
dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2
|
|
||||||
dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2
|
|
||||||
|
|
||||||
return dL_dmu, dL_dS
|
|
||||||
|
|
||||||
def KL_divergence(self):
|
|
||||||
var_mean = np.square(self.X).sum()
|
|
||||||
var_S = np.sum(self.X_variance - np.log(self.X_variance))
|
|
||||||
return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.num_data
|
|
||||||
|
|
||||||
def parameters_changed(self):
|
def parameters_changed(self):
|
||||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y)
|
super(BayesianGPLVM, self).parameters_changed()
|
||||||
self._update_gradients_Z(add=False)
|
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.q)
|
||||||
|
|
||||||
self._log_marginal_likelihood -= self.KL_divergence()
|
self.kern.update_gradients_q_variational(posterior_variational=self.q, Z=self.Z, **self.grad_dict)
|
||||||
dL_dmu, dL_dS = self.dL_dmuS()
|
|
||||||
|
|
||||||
# dL:
|
# update for the KL divergence
|
||||||
self.q.mean.gradient = dL_dmu
|
self.variational_prior.update_gradients_KL(self.q)
|
||||||
self.q.variance.gradient = dL_dS
|
|
||||||
|
|
||||||
# dKL:
|
|
||||||
self.q.mean.gradient -= self.X
|
|
||||||
self.q.variance.gradient -= (1. - (1. / (self.X_variance))) * 0.5
|
|
||||||
|
|
||||||
def plot_latent(self, plot_inducing=True, *args, **kwargs):
|
def plot_latent(self, plot_inducing=True, *args, **kwargs):
|
||||||
"""
|
"""
|
||||||
|
|
@ -157,6 +140,7 @@ class BayesianGPLVM(SparseGP, GPLVM):
|
||||||
"""
|
"""
|
||||||
See GPy.plotting.matplot_dep.dim_reduction_plots.plot_steepest_gradient_map
|
See GPy.plotting.matplot_dep.dim_reduction_plots.plot_steepest_gradient_map
|
||||||
"""
|
"""
|
||||||
|
import sys
|
||||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||||
from ..plotting.matplot_dep import dim_reduction_plots
|
from ..plotting.matplot_dep import dim_reduction_plots
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@ class GPRegression(GP):
|
||||||
def __init__(self, X, Y, kernel=None):
|
def __init__(self, X, Y, kernel=None):
|
||||||
|
|
||||||
if kernel is None:
|
if kernel is None:
|
||||||
kernel = kern.rbf(X.shape[1])
|
kernel = kern.RBF(X.shape[1])
|
||||||
|
|
||||||
likelihood = likelihoods.Gaussian()
|
likelihood = likelihoods.Gaussian()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,9 +7,25 @@ from GPy.util.linalg import PCA
|
||||||
import numpy
|
import numpy
|
||||||
import itertools
|
import itertools
|
||||||
import pylab
|
import pylab
|
||||||
from GPy.kern.kern import kern
|
from GPy.kern import Kern
|
||||||
from GPy.models.bayesian_gplvm import BayesianGPLVM
|
from GPy.models.bayesian_gplvm import BayesianGPLVM
|
||||||
|
|
||||||
|
class MRD2(Model):
|
||||||
|
"""
|
||||||
|
Apply MRD to all given datasets Y in Ylist.
|
||||||
|
|
||||||
|
Y_i in [n x p_i]
|
||||||
|
|
||||||
|
The samples n in the datasets need
|
||||||
|
to match up, whereas the dimensionality p_d can differ.
|
||||||
|
|
||||||
|
:param [array-like] Ylist: List of datasets to apply MRD on
|
||||||
|
:param array-like q_mean: mean of starting latent space q in [n x q]
|
||||||
|
:param array-like q_variance: variance of starting latent space q in [n x q]
|
||||||
|
:param :class:`~GPy.inference.latent_function_inference
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
class MRD(Model):
|
class MRD(Model):
|
||||||
"""
|
"""
|
||||||
Do MRD on given Datasets in Ylist.
|
Do MRD on given Datasets in Ylist.
|
||||||
|
|
@ -48,11 +64,11 @@ class MRD(Model):
|
||||||
# sort out the kernels
|
# sort out the kernels
|
||||||
if kernels is None:
|
if kernels is None:
|
||||||
kernels = [None] * len(likelihood_or_Y_list)
|
kernels = [None] * len(likelihood_or_Y_list)
|
||||||
elif isinstance(kernels, kern):
|
elif isinstance(kernels, Kern):
|
||||||
kernels = [kernels.copy() for i in range(len(likelihood_or_Y_list))]
|
kernels = [kernels.copy() for i in range(len(likelihood_or_Y_list))]
|
||||||
else:
|
else:
|
||||||
assert len(kernels) == len(likelihood_or_Y_list), "need one kernel per output"
|
assert len(kernels) == len(likelihood_or_Y_list), "need one kernel per output"
|
||||||
assert all([isinstance(k, kern) for k in kernels]), "invalid kernel object detected!"
|
assert all([isinstance(k, Kern) for k in kernels]), "invalid kernel object detected!"
|
||||||
assert not ('kernel' in kw), "pass kernels through `kernels` argument"
|
assert not ('kernel' in kw), "pass kernels through `kernels` argument"
|
||||||
|
|
||||||
self.input_dim = input_dim
|
self.input_dim = input_dim
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
import pylab as pb
|
import pylab as pb
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from ... import util
|
|
||||||
from latent_space_visualizations.controllers.imshow_controller import ImshowController,ImAnnotateController
|
from latent_space_visualizations.controllers.imshow_controller import ImshowController,ImAnnotateController
|
||||||
from GPy.util.misc import param_to_array
|
from ...util.misc import param_to_array
|
||||||
|
from .base_plots import x_frame2D
|
||||||
import itertools
|
import itertools
|
||||||
import Tango
|
import Tango
|
||||||
from matplotlib.cm import get_cmap
|
from matplotlib.cm import get_cmap
|
||||||
|
|
@ -37,7 +37,7 @@ def plot_latent(model, labels=None, which_indices=None,
|
||||||
if ax is None:
|
if ax is None:
|
||||||
fig = pb.figure(num=fignum)
|
fig = pb.figure(num=fignum)
|
||||||
ax = fig.add_subplot(111)
|
ax = fig.add_subplot(111)
|
||||||
util.plot.Tango.reset()
|
Tango.reset()
|
||||||
|
|
||||||
if labels is None:
|
if labels is None:
|
||||||
labels = np.ones(model.num_data)
|
labels = np.ones(model.num_data)
|
||||||
|
|
@ -46,7 +46,7 @@ def plot_latent(model, labels=None, which_indices=None,
|
||||||
X = param_to_array(model.X)
|
X = param_to_array(model.X)
|
||||||
|
|
||||||
# first, plot the output variance as a function of the latent space
|
# first, plot the output variance as a function of the latent space
|
||||||
Xtest, xx, yy, xmin, xmax = util.plot.x_frame2D(X[:, [input_1, input_2]], resolution=resolution)
|
Xtest, xx, yy, xmin, xmax = x_frame2D(X[:, [input_1, input_2]], resolution=resolution)
|
||||||
Xtest_full = np.zeros((Xtest.shape[0], model.X.shape[1]))
|
Xtest_full = np.zeros((Xtest.shape[0], model.X.shape[1]))
|
||||||
|
|
||||||
def plot_function(x):
|
def plot_function(x):
|
||||||
|
|
@ -87,7 +87,7 @@ def plot_latent(model, labels=None, which_indices=None,
|
||||||
else:
|
else:
|
||||||
x = X[index, input_1]
|
x = X[index, input_1]
|
||||||
y = X[index, input_2]
|
y = X[index, input_2]
|
||||||
ax.scatter(x, y, marker=m, s=s, color=util.plot.Tango.nextMedium(), label=this_label)
|
ax.scatter(x, y, marker=m, s=s, color=Tango.nextMedium(), label=this_label)
|
||||||
|
|
||||||
ax.set_xlabel('latent dimension %i' % input_1)
|
ax.set_xlabel('latent dimension %i' % input_1)
|
||||||
ax.set_ylabel('latent dimension %i' % input_2)
|
ax.set_ylabel('latent dimension %i' % input_2)
|
||||||
|
|
@ -120,7 +120,7 @@ def plot_magnification(model, labels=None, which_indices=None,
|
||||||
if ax is None:
|
if ax is None:
|
||||||
fig = pb.figure(num=fignum)
|
fig = pb.figure(num=fignum)
|
||||||
ax = fig.add_subplot(111)
|
ax = fig.add_subplot(111)
|
||||||
util.plot.Tango.reset()
|
Tango.reset()
|
||||||
|
|
||||||
if labels is None:
|
if labels is None:
|
||||||
labels = np.ones(model.num_data)
|
labels = np.ones(model.num_data)
|
||||||
|
|
@ -128,7 +128,7 @@ def plot_magnification(model, labels=None, which_indices=None,
|
||||||
input_1, input_2 = most_significant_input_dimensions(model, which_indices)
|
input_1, input_2 = most_significant_input_dimensions(model, which_indices)
|
||||||
|
|
||||||
# first, plot the output variance as a function of the latent space
|
# first, plot the output variance as a function of the latent space
|
||||||
Xtest, xx, yy, xmin, xmax = util.plot.x_frame2D(model.X[:, [input_1, input_2]], resolution=resolution)
|
Xtest, xx, yy, xmin, xmax = x_frame2D(model.X[:, [input_1, input_2]], resolution=resolution)
|
||||||
Xtest_full = np.zeros((Xtest.shape[0], model.X.shape[1]))
|
Xtest_full = np.zeros((Xtest.shape[0], model.X.shape[1]))
|
||||||
|
|
||||||
def plot_function(x):
|
def plot_function(x):
|
||||||
|
|
@ -165,7 +165,7 @@ def plot_magnification(model, labels=None, which_indices=None,
|
||||||
else:
|
else:
|
||||||
x = model.X[index, input_1]
|
x = model.X[index, input_1]
|
||||||
y = model.X[index, input_2]
|
y = model.X[index, input_2]
|
||||||
ax.scatter(x, y, marker=m, s=s, color=util.plot.Tango.nextMedium(), label=this_label)
|
ax.scatter(x, y, marker=m, s=s, color=Tango.nextMedium(), label=this_label)
|
||||||
|
|
||||||
ax.set_xlabel('latent dimension %i' % input_1)
|
ax.set_xlabel('latent dimension %i' % input_1)
|
||||||
ax.set_ylabel('latent dimension %i' % input_2)
|
ax.set_ylabel('latent dimension %i' % input_2)
|
||||||
|
|
@ -205,7 +205,7 @@ def plot_steepest_gradient_map(model, fignum=None, ax=None, which_indices=None,
|
||||||
return dmu_dX[indices, argmax], np.array(labels)[argmax]
|
return dmu_dX[indices, argmax], np.array(labels)[argmax]
|
||||||
|
|
||||||
if ax is None:
|
if ax is None:
|
||||||
fig = pyplot.figure(num=fignum)
|
fig = pb.figure(num=fignum)
|
||||||
ax = fig.add_subplot(111)
|
ax = fig.add_subplot(111)
|
||||||
|
|
||||||
if data_labels is None:
|
if data_labels is None:
|
||||||
|
|
@ -241,7 +241,7 @@ def plot_steepest_gradient_map(model, fignum=None, ax=None, which_indices=None,
|
||||||
ax.legend()
|
ax.legend()
|
||||||
ax.figure.tight_layout()
|
ax.figure.tight_layout()
|
||||||
if updates:
|
if updates:
|
||||||
pyplot.show()
|
pb.show()
|
||||||
clear = raw_input('Enter to continue')
|
clear = raw_input('Enter to continue')
|
||||||
if clear.lower() in 'yes' or clear == '':
|
if clear.lower() in 'yes' or clear == '':
|
||||||
controller.deactivate()
|
controller.deactivate()
|
||||||
|
|
|
||||||
|
|
@ -1,13 +1,12 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
import sys
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pylab as pb
|
import pylab as pb
|
||||||
import Tango
|
import Tango
|
||||||
from matplotlib.textpath import TextPath
|
from matplotlib.textpath import TextPath
|
||||||
from matplotlib.transforms import offset_copy
|
from matplotlib.transforms import offset_copy
|
||||||
from ...kern.parts.linear import Linear
|
from ...kern import Linear
|
||||||
|
|
||||||
|
|
||||||
def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):
|
def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):
|
||||||
|
|
@ -29,7 +28,8 @@ def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):
|
||||||
xticklabels = []
|
xticklabels = []
|
||||||
bars = []
|
bars = []
|
||||||
x0 = 0
|
x0 = 0
|
||||||
for p in kernel._parameters_:
|
#for p in kernel._parameters_:
|
||||||
|
p = kernel
|
||||||
c = Tango.nextMedium()
|
c = Tango.nextMedium()
|
||||||
if hasattr(p, 'ARD') and p.ARD:
|
if hasattr(p, 'ARD') and p.ARD:
|
||||||
if title is None:
|
if title is None:
|
||||||
|
|
@ -40,9 +40,9 @@ def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):
|
||||||
ard_params = p.variances
|
ard_params = p.variances
|
||||||
else:
|
else:
|
||||||
ard_params = 1. / p.lengthscale
|
ard_params = 1. / p.lengthscale
|
||||||
|
|
||||||
x = np.arange(x0, x0 + len(ard_params))
|
x = np.arange(x0, x0 + len(ard_params))
|
||||||
bars.append(ax.bar(x, ard_params, align='center', color=c, edgecolor='k', linewidth=1.2, label=p.name.replace("_"," ")))
|
from ...util.misc import param_to_array
|
||||||
|
bars.append(ax.bar(x, param_to_array(ard_params), align='center', color=c, edgecolor='k', linewidth=1.2, label=p.name.replace("_"," ")))
|
||||||
xticklabels.extend([r"$\mathrm{{{name}}}\ {x}$".format(name=p.name, x=i) for i in np.arange(len(ard_params))])
|
xticklabels.extend([r"$\mathrm{{{name}}}\ {x}$".format(name=p.name, x=i) for i in np.arange(len(ard_params))])
|
||||||
x0 += len(ard_params)
|
x0 += len(ard_params)
|
||||||
x = np.arange(x0)
|
x = np.arange(x0)
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ from ...util.misc import param_to_array
|
||||||
|
|
||||||
|
|
||||||
def plot_fit(model, plot_limits=None, which_data_rows='all',
|
def plot_fit(model, plot_limits=None, which_data_rows='all',
|
||||||
which_data_ycols='all', which_parts='all', fixed_inputs=[],
|
which_data_ycols='all', fixed_inputs=[],
|
||||||
levels=20, samples=0, fignum=None, ax=None, resolution=None,
|
levels=20, samples=0, fignum=None, ax=None, resolution=None,
|
||||||
plot_raw=False,
|
plot_raw=False,
|
||||||
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
|
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
|
||||||
|
|
@ -20,7 +20,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
||||||
- In higher dimensions, use fixed_inputs to plot the GP with some of the inputs fixed.
|
- In higher dimensions, use fixed_inputs to plot the GP with some of the inputs fixed.
|
||||||
|
|
||||||
Can plot only part of the data and part of the posterior functions
|
Can plot only part of the data and part of the posterior functions
|
||||||
using which_data_rowsm which_data_ycols and which_parts
|
using which_data_rowsm which_data_ycols.
|
||||||
|
|
||||||
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
|
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
|
||||||
:type plot_limits: np.array
|
:type plot_limits: np.array
|
||||||
|
|
@ -28,8 +28,6 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
||||||
:type which_data_rows: 'all' or a slice object to slice model.X, model.Y
|
:type which_data_rows: 'all' or a slice object to slice model.X, model.Y
|
||||||
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
|
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
|
||||||
:type which_data_rows: 'all' or a list of integers
|
:type which_data_rows: 'all' or a list of integers
|
||||||
:param which_parts: which of the kernel functions to plot (additively)
|
|
||||||
:type which_parts: 'all', or list of bools
|
|
||||||
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
|
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
|
||||||
:type fixed_inputs: a list of tuples
|
:type fixed_inputs: a list of tuples
|
||||||
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
|
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
|
||||||
|
|
@ -59,6 +57,9 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
||||||
fig = pb.figure(num=fignum)
|
fig = pb.figure(num=fignum)
|
||||||
ax = fig.add_subplot(111)
|
ax = fig.add_subplot(111)
|
||||||
|
|
||||||
|
X, Y, Z = param_to_array(model.X, model.Y, model.Z)
|
||||||
|
if model.has_uncertain_inputs(): X_variance = param_to_array(model.q.variance)
|
||||||
|
|
||||||
#work out what the inputs are for plotting (1D or 2D)
|
#work out what the inputs are for plotting (1D or 2D)
|
||||||
fixed_dims = np.array([i for i,v in fixed_inputs])
|
fixed_dims = np.array([i for i,v in fixed_inputs])
|
||||||
free_dims = np.setdiff1d(np.arange(model.input_dim),fixed_dims)
|
free_dims = np.setdiff1d(np.arange(model.input_dim),fixed_dims)
|
||||||
|
|
@ -68,7 +69,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
||||||
|
|
||||||
#define the frame on which to plot
|
#define the frame on which to plot
|
||||||
resolution = resolution or 200
|
resolution = resolution or 200
|
||||||
Xnew, xmin, xmax = x_frame1D(model.X[:,free_dims], plot_limits=plot_limits)
|
Xnew, xmin, xmax = x_frame1D(X[:,free_dims], plot_limits=plot_limits)
|
||||||
Xgrid = np.empty((Xnew.shape[0],model.input_dim))
|
Xgrid = np.empty((Xnew.shape[0],model.input_dim))
|
||||||
Xgrid[:,free_dims] = Xnew
|
Xgrid[:,free_dims] = Xnew
|
||||||
for i,v in fixed_inputs:
|
for i,v in fixed_inputs:
|
||||||
|
|
@ -76,30 +77,30 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
||||||
|
|
||||||
#make a prediction on the frame and plot it
|
#make a prediction on the frame and plot it
|
||||||
if plot_raw:
|
if plot_raw:
|
||||||
m, v = model._raw_predict(Xgrid, which_parts=which_parts)
|
m, v = model._raw_predict(Xgrid)
|
||||||
lower = m - 2*np.sqrt(v)
|
lower = m - 2*np.sqrt(v)
|
||||||
upper = m + 2*np.sqrt(v)
|
upper = m + 2*np.sqrt(v)
|
||||||
Y = model.Y
|
Y = Y
|
||||||
else:
|
else:
|
||||||
m, v, lower, upper = model.predict(Xgrid, which_parts=which_parts)
|
m, v, lower, upper = model.predict(Xgrid)
|
||||||
Y = model.Y
|
Y = Y
|
||||||
for d in which_data_ycols:
|
for d in which_data_ycols:
|
||||||
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol)
|
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol)
|
||||||
ax.plot(model.X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', mew=1.5)
|
ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', mew=1.5)
|
||||||
|
|
||||||
#optionally plot some samples
|
#optionally plot some samples
|
||||||
if samples: #NOTE not tested with fixed_inputs
|
if samples: #NOTE not tested with fixed_inputs
|
||||||
Ysim = model.posterior_samples(Xgrid, samples, which_parts=which_parts)
|
Ysim = model.posterior_samples(Xgrid, samples)
|
||||||
for yi in Ysim.T:
|
for yi in Ysim.T:
|
||||||
ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
|
ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
|
||||||
#ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
|
#ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
|
||||||
|
|
||||||
|
|
||||||
#add error bars for uncertain (if input uncertainty is being modelled)
|
#add error bars for uncertain (if input uncertainty is being modelled)
|
||||||
if hasattr(model,"has_uncertain_inputs"):
|
#if hasattr(model,"has_uncertain_inputs") and model.has_uncertain_inputs():
|
||||||
ax.errorbar(model.X[which_data, free_dims], model.likelihood.data[which_data, 0],
|
# ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(),
|
||||||
xerr=2 * np.sqrt(model.X_variance[which_data, free_dims]),
|
# xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()),
|
||||||
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
|
# ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
|
||||||
|
|
||||||
|
|
||||||
#set the limits of the plot to some sensible values
|
#set the limits of the plot to some sensible values
|
||||||
|
|
@ -111,7 +112,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
||||||
#add inducing inputs (if a sparse model is used)
|
#add inducing inputs (if a sparse model is used)
|
||||||
if hasattr(model,"Z"):
|
if hasattr(model,"Z"):
|
||||||
#Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
|
#Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
|
||||||
Zu = param_to_array(model.Z[:,free_dims])
|
Zu = Z[:,free_dims]
|
||||||
z_height = ax.get_ylim()[0]
|
z_height = ax.get_ylim()[0]
|
||||||
ax.plot(Zu, np.zeros_like(Zu) + z_height, 'r|', mew=1.5, markersize=12)
|
ax.plot(Zu, np.zeros_like(Zu) + z_height, 'r|', mew=1.5, markersize=12)
|
||||||
|
|
||||||
|
|
@ -122,7 +123,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
||||||
|
|
||||||
#define the frame for plotting on
|
#define the frame for plotting on
|
||||||
resolution = resolution or 50
|
resolution = resolution or 50
|
||||||
Xnew, _, _, xmin, xmax = x_frame2D(model.X[:,free_dims], plot_limits, resolution)
|
Xnew, _, _, xmin, xmax = x_frame2D(X[:,free_dims], plot_limits, resolution)
|
||||||
Xgrid = np.empty((Xnew.shape[0],model.input_dim))
|
Xgrid = np.empty((Xnew.shape[0],model.input_dim))
|
||||||
Xgrid[:,free_dims] = Xnew
|
Xgrid[:,free_dims] = Xnew
|
||||||
for i,v in fixed_inputs:
|
for i,v in fixed_inputs:
|
||||||
|
|
@ -131,15 +132,15 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
||||||
|
|
||||||
#predict on the frame and plot
|
#predict on the frame and plot
|
||||||
if plot_raw:
|
if plot_raw:
|
||||||
m, _ = model._raw_predict(Xgrid, which_parts=which_parts)
|
m, _ = model._raw_predict(Xgrid)
|
||||||
Y = model.Y
|
Y = Y
|
||||||
else:
|
else:
|
||||||
m, _, _, _ = model.predict(Xgrid, which_parts=which_parts)
|
m, _, _, _ = model.predict(Xgrid)
|
||||||
Y = model.data
|
Y = Y
|
||||||
for d in which_data_ycols:
|
for d in which_data_ycols:
|
||||||
m_d = m[:,d].reshape(resolution, resolution).T
|
m_d = m[:,d].reshape(resolution, resolution).T
|
||||||
ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
|
ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
|
||||||
ax.scatter(model.X[which_data_rows, free_dims[0]], model.X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
|
ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
|
||||||
|
|
||||||
#set the limits of the plot to some sensible values
|
#set the limits of the plot to some sensible values
|
||||||
ax.set_xlim(xmin[0], xmax[0])
|
ax.set_xlim(xmin[0], xmax[0])
|
||||||
|
|
@ -151,7 +152,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
||||||
#add inducing inputs (if a sparse model is used)
|
#add inducing inputs (if a sparse model is used)
|
||||||
if hasattr(model,"Z"):
|
if hasattr(model,"Z"):
|
||||||
#Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
|
#Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
|
||||||
Zu = model.Z[:,free_dims]
|
Zu = Z[:,free_dims]
|
||||||
ax.plot(Zu[:,free_dims[0]], Zu[:,free_dims[1]], 'wo')
|
ax.plot(Zu[:,free_dims[0]], Zu[:,free_dims[1]], 'wo')
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,18 @@ class Test(unittest.TestCase):
|
||||||
self.param_index.remove(one, [1])
|
self.param_index.remove(one, [1])
|
||||||
self.assertListEqual(self.param_index[one].tolist(), [3])
|
self.assertListEqual(self.param_index[one].tolist(), [3])
|
||||||
|
|
||||||
|
def test_shift_left(self):
|
||||||
|
self.param_index.shift_left(1, 2)
|
||||||
|
self.assertListEqual(self.param_index[three].tolist(), [2,5])
|
||||||
|
self.assertListEqual(self.param_index[two].tolist(), [0,3])
|
||||||
|
self.assertListEqual(self.param_index[one].tolist(), [1])
|
||||||
|
|
||||||
|
def test_shift_right(self):
|
||||||
|
self.param_index.shift_right(5, 2)
|
||||||
|
self.assertListEqual(self.param_index[three].tolist(), [2,4,9])
|
||||||
|
self.assertListEqual(self.param_index[two].tolist(), [0,7])
|
||||||
|
self.assertListEqual(self.param_index[one].tolist(), [3])
|
||||||
|
|
||||||
def test_index_view(self):
|
def test_index_view(self):
|
||||||
#=======================================================================
|
#=======================================================================
|
||||||
# 0 1 2 3 4 5 6 7 8 9
|
# 0 1 2 3 4 5 6 7 8 9
|
||||||
|
|
|
||||||
|
|
@ -10,8 +10,8 @@ import numpy as np
|
||||||
class Test(unittest.TestCase):
|
class Test(unittest.TestCase):
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.rbf = GPy.kern.rbf(1)
|
self.rbf = GPy.kern.RBF(1)
|
||||||
self.white = GPy.kern.white(1)
|
self.white = GPy.kern.White(1)
|
||||||
from GPy.core.parameterization import Param
|
from GPy.core.parameterization import Param
|
||||||
from GPy.core.parameterization.transformations import Logistic
|
from GPy.core.parameterization.transformations import Logistic
|
||||||
self.param = Param('param', np.random.rand(25,2), Logistic(0, 1))
|
self.param = Param('param', np.random.rand(25,2), Logistic(0, 1))
|
||||||
|
|
@ -39,14 +39,13 @@ class Test(unittest.TestCase):
|
||||||
|
|
||||||
|
|
||||||
def test_remove_parameter(self):
|
def test_remove_parameter(self):
|
||||||
from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__
|
from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__, Logexp
|
||||||
self.white.fix()
|
self.white.fix()
|
||||||
self.test1.remove_parameter(self.white)
|
self.test1.remove_parameter(self.white)
|
||||||
self.assertIs(self.test1._fixes_,None)
|
self.assertIs(self.test1._fixes_,None)
|
||||||
|
|
||||||
self.assertListEqual(self.white._fixes_.tolist(), [FIXED])
|
self.assertListEqual(self.white._fixes_.tolist(), [FIXED])
|
||||||
self.assertIs(self.white.constraints,self.white.white.constraints._param_index_ops)
|
self.assertEquals(self.white.constraints._offset, 0)
|
||||||
self.assertEquals(self.white.white.constraints._offset, 0)
|
|
||||||
self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
|
self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
|
||||||
self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops)
|
self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops)
|
||||||
|
|
||||||
|
|
@ -57,18 +56,19 @@ class Test(unittest.TestCase):
|
||||||
self.assertListEqual(self.test1.constraints[__fixed__].tolist(), [0])
|
self.assertListEqual(self.test1.constraints[__fixed__].tolist(), [0])
|
||||||
self.assertIs(self.white._fixes_,None)
|
self.assertIs(self.white._fixes_,None)
|
||||||
self.assertListEqual(self.test1._fixes_.tolist(),[FIXED] + [UNFIXED] * 52)
|
self.assertListEqual(self.test1._fixes_.tolist(),[FIXED] + [UNFIXED] * 52)
|
||||||
|
|
||||||
self.test1.remove_parameter(self.white)
|
self.test1.remove_parameter(self.white)
|
||||||
self.assertIs(self.test1._fixes_,None)
|
self.assertIs(self.test1._fixes_,None)
|
||||||
self.assertListEqual(self.white._fixes_.tolist(), [FIXED])
|
self.assertListEqual(self.white._fixes_.tolist(), [FIXED])
|
||||||
self.assertIs(self.white.constraints,self.white.white.constraints._param_index_ops)
|
|
||||||
self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
|
self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
|
||||||
self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops)
|
self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops)
|
||||||
|
self.assertListEqual(self.test1.constraints[Logexp()].tolist(), [0,1])
|
||||||
|
|
||||||
def test_add_parameter_already_in_hirarchy(self):
|
def test_add_parameter_already_in_hirarchy(self):
|
||||||
self.test1.add_parameter(self.white._parameters_[0])
|
self.test1.add_parameter(self.white._parameters_[0])
|
||||||
|
|
||||||
def test_default_constraints(self):
|
def test_default_constraints(self):
|
||||||
self.assertIs(self.rbf.rbf.variance.constraints._param_index_ops, self.rbf.constraints._param_index_ops)
|
self.assertIs(self.rbf.variance.constraints._param_index_ops, self.rbf.constraints._param_index_ops)
|
||||||
self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
|
self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
|
||||||
self.assertListEqual(self.rbf.constraints.indices()[0].tolist(), range(2))
|
self.assertListEqual(self.rbf.constraints.indices()[0].tolist(), range(2))
|
||||||
from GPy.core.parameterization.transformations import Logexp
|
from GPy.core.parameterization.transformations import Logexp
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@ import decorators
|
||||||
import classification
|
import classification
|
||||||
import subarray_and_sorting
|
import subarray_and_sorting
|
||||||
import caching
|
import caching
|
||||||
|
import diag
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import sympy
|
import sympy
|
||||||
|
|
|
||||||
|
|
@ -1,44 +1,86 @@
|
||||||
from ..core.parameterization.array_core import ObservableArray, ParamList
|
from ..core.parameterization.parameter_core import Observable
|
||||||
|
|
||||||
class Cacher(object):
|
class Cacher(object):
|
||||||
def __init__(self, operation, limit=5):
|
def __init__(self, operation, limit=5, reset_on_first=False):
|
||||||
self.limit = int(limit)
|
self.limit = int(limit)
|
||||||
|
self._reset_on_first = reset_on_first
|
||||||
self.operation=operation
|
self.operation=operation
|
||||||
self.cached_inputs = ParamList([])
|
self.cached_inputs = []
|
||||||
self.cached_outputs = []
|
self.cached_outputs = []
|
||||||
self.inputs_changed = []
|
self.inputs_changed = []
|
||||||
|
|
||||||
def __call__(self, X):
|
def __call__(self, *args):
|
||||||
assert isinstance(X, ObservableArray)
|
if self._reset_on_first:
|
||||||
if X in self.cached_inputs:
|
assert isinstance(args[0], Observable)
|
||||||
i = self.cached_inputs.index(X)
|
args[0].add_observer(self, self.reset)
|
||||||
|
cached_args = args
|
||||||
|
else:
|
||||||
|
cached_args = args[1:]
|
||||||
|
|
||||||
|
|
||||||
|
if not all([isinstance(arg, Observable) for arg in cached_args]):
|
||||||
|
return self.operation(*args)
|
||||||
|
if cached_args in self.cached_inputs:
|
||||||
|
i = self.cached_inputs.index(cached_args)
|
||||||
if self.inputs_changed[i]:
|
if self.inputs_changed[i]:
|
||||||
self.cached_outputs[i] = self.operation(X)
|
self.cached_outputs[i] = self.operation(*args)
|
||||||
self.inputs_changed[i] = False
|
self.inputs_changed[i] = False
|
||||||
return self.cached_outputs[i]
|
return self.cached_outputs[i]
|
||||||
else:
|
else:
|
||||||
if len(self.cached_inputs) == self.limit:
|
if len(self.cached_inputs) == self.limit:
|
||||||
X_ = self.cached_inputs.pop(0)
|
args_ = self.cached_inputs.pop(0)
|
||||||
X_.remove_observer(self)
|
[a.remove_observer(self, self.on_cache_changed) for a in args_]
|
||||||
self.inputs_changed.pop(0)
|
self.inputs_changed.pop(0)
|
||||||
self.cached_outputs.pop(0)
|
self.cached_outputs.pop(0)
|
||||||
|
|
||||||
self.cached_inputs.append(X)
|
self.cached_inputs.append(cached_args)
|
||||||
self.cached_outputs.append(self.operation(X))
|
self.cached_outputs.append(self.operation(*args))
|
||||||
self.inputs_changed.append(False)
|
self.inputs_changed.append(False)
|
||||||
X.add_observer(self, self.on_cache_changed)
|
[a.add_observer(self, self.on_cache_changed) for a in args]
|
||||||
return self.cached_outputs[-1]
|
return self.cached_outputs[-1]
|
||||||
|
|
||||||
def on_cache_changed(self, X):
|
def on_cache_changed(self, arg):
|
||||||
#print id(X)
|
self.inputs_changed = [any([a is arg for a in args]) or old_ic for args, old_ic in zip(self.cached_inputs, self.inputs_changed)]
|
||||||
Xbase = X
|
|
||||||
while Xbase is not None:
|
def reset(self, obj):
|
||||||
try:
|
[[a.remove_observer(self, self.reset) for a in args] for args in self.cached_inputs]
|
||||||
i = self.cached_inputs.index(X)
|
self.cached_inputs = []
|
||||||
break
|
self.cached_outputs = []
|
||||||
except ValueError:
|
self.inputs_changed = []
|
||||||
Xbase = X.base
|
|
||||||
continue
|
|
||||||
self.inputs_changed[i] = True
|
|
||||||
|
|
||||||
|
def cache_this(limit=5, reset_on_self=False):
|
||||||
|
def limited_cache(f):
|
||||||
|
c = Cacher(f, limit, reset_on_first=reset_on_self)
|
||||||
|
def f_wrap(*args):
|
||||||
|
return c(*args)
|
||||||
|
f_wrap._cacher = c
|
||||||
|
return f_wrap
|
||||||
|
return limited_cache
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#Xbase = X
|
||||||
|
#while Xbase is not None:
|
||||||
|
#try:
|
||||||
|
#i = self.cached_inputs.index(X)
|
||||||
|
#break
|
||||||
|
#except ValueError:
|
||||||
|
#Xbase = X.base
|
||||||
|
#continue
|
||||||
|
#self.inputs_changed[i] = True
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -513,8 +513,8 @@ def toy_rbf_1d(seed=default_seed, num_samples=500):
|
||||||
num_in = 1
|
num_in = 1
|
||||||
X = np.random.uniform(low= -1.0, high=1.0, size=(num_samples, num_in))
|
X = np.random.uniform(low= -1.0, high=1.0, size=(num_samples, num_in))
|
||||||
X.sort(axis=0)
|
X.sort(axis=0)
|
||||||
rbf = GPy.kern.rbf(num_in, variance=1., lengthscale=np.array((0.25,)))
|
rbf = GPy.kern.RBF(num_in, variance=1., lengthscale=np.array((0.25,)))
|
||||||
white = GPy.kern.white(num_in, variance=1e-2)
|
white = GPy.kern.White(num_in, variance=1e-2)
|
||||||
kernel = rbf + white
|
kernel = rbf + white
|
||||||
K = kernel.K(X)
|
K = kernel.K(X)
|
||||||
y = np.reshape(np.random.multivariate_normal(np.zeros(num_samples), K), (num_samples, 1))
|
y = np.reshape(np.random.multivariate_normal(np.zeros(num_samples), K), (num_samples, 1))
|
||||||
|
|
|
||||||
|
|
@ -44,6 +44,12 @@ def view(A, offset=0):
|
||||||
else:
|
else:
|
||||||
return as_strided(A, shape=(A.shape[0], ), strides=((A.shape[0]+1)*A.itemsize, ))
|
return as_strided(A, shape=(A.shape[0], ), strides=((A.shape[0]+1)*A.itemsize, ))
|
||||||
|
|
||||||
|
def offdiag_view(A, offset=0):
|
||||||
|
from numpy.lib.stride_tricks import as_strided
|
||||||
|
assert A.ndim == 2, "only implemented for 2 dimensions"
|
||||||
|
Af = as_strided(A, shape=(A.size,), strides=(A.itemsize,))
|
||||||
|
return as_strided(Af[(1+offset):], shape=(A.shape[0]-1, A.shape[1]), strides=(A.strides[0] + A.itemsize, A.strides[1]))
|
||||||
|
|
||||||
def _diag_ufunc(A,b,offset,func):
|
def _diag_ufunc(A,b,offset,func):
|
||||||
dA = view(A, offset); func(dA,b,dA)
|
dA = view(A, offset); func(dA,b,dA)
|
||||||
return A
|
return A
|
||||||
|
|
|
||||||
|
|
@ -3,8 +3,6 @@
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import scipy as sp
|
|
||||||
import pylab as plt
|
|
||||||
|
|
||||||
class WarpingFunction(object):
|
class WarpingFunction(object):
|
||||||
"""
|
"""
|
||||||
|
|
@ -39,6 +37,7 @@ class WarpingFunction(object):
|
||||||
def plot(self, psi, xmin, xmax):
|
def plot(self, psi, xmin, xmax):
|
||||||
y = np.arange(xmin, xmax, 0.01)
|
y = np.arange(xmin, xmax, 0.01)
|
||||||
f_y = self.f(y, psi)
|
f_y = self.f(y, psi)
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
plt.figure()
|
plt.figure()
|
||||||
plt.plot(y, f_y)
|
plt.plot(y, f_y)
|
||||||
plt.xlabel('y')
|
plt.xlabel('y')
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue