mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-09 03:52:39 +02:00
Minor merges.
This commit is contained in:
commit
8c5fe76bb1
51 changed files with 1342 additions and 2752 deletions
|
|
@ -11,6 +11,7 @@ from parameterization import ObservableArray
|
||||||
from .. import likelihoods
|
from .. import likelihoods
|
||||||
from ..likelihoods.gaussian import Gaussian
|
from ..likelihoods.gaussian import Gaussian
|
||||||
from ..inference.latent_function_inference import exact_gaussian_inference
|
from ..inference.latent_function_inference import exact_gaussian_inference
|
||||||
|
from parameterization.variational import VariationalPosterior
|
||||||
|
|
||||||
class GP(Model):
|
class GP(Model):
|
||||||
"""
|
"""
|
||||||
|
|
@ -30,7 +31,10 @@ class GP(Model):
|
||||||
super(GP, self).__init__(name)
|
super(GP, self).__init__(name)
|
||||||
|
|
||||||
assert X.ndim == 2
|
assert X.ndim == 2
|
||||||
self.X = ObservableArray(X)
|
if isinstance(X, ObservableArray) or isinstance(X, VariationalPosterior):
|
||||||
|
self.X = X
|
||||||
|
else: self.X = ObservableArray(X)
|
||||||
|
|
||||||
self.num_data, self.input_dim = self.X.shape
|
self.num_data, self.input_dim = self.X.shape
|
||||||
|
|
||||||
assert Y.ndim == 2
|
assert Y.ndim == 2
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,9 @@ class ObservableArray(np.ndarray, Observable):
|
||||||
"""
|
"""
|
||||||
__array_priority__ = -1 # Never give back ObservableArray
|
__array_priority__ = -1 # Never give back ObservableArray
|
||||||
def __new__(cls, input_array):
|
def __new__(cls, input_array):
|
||||||
obj = np.atleast_1d(input_array).view(cls)
|
if not isinstance(input_array, ObservableArray):
|
||||||
|
obj = np.atleast_1d(input_array).view(cls)
|
||||||
|
else: obj = input_array
|
||||||
cls.__name__ = "ObservableArray\n "
|
cls.__name__ = "ObservableArray\n "
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -340,6 +340,10 @@ class Parameterizable(Constrainable):
|
||||||
if add_self: names = map(lambda x: adjust(self.name) + "." + x, names)
|
if add_self: names = map(lambda x: adjust(self.name) + "." + x, names)
|
||||||
return names
|
return names
|
||||||
|
|
||||||
|
@property
|
||||||
|
def num_params(self):
|
||||||
|
return len(self._parameters_)
|
||||||
|
|
||||||
def _add_parameter_name(self, param):
|
def _add_parameter_name(self, param):
|
||||||
pname = adjust_name_for_printing(param.name)
|
pname = adjust_name_for_printing(param.name)
|
||||||
# and makes sure to not delete programmatically added parameters
|
# and makes sure to not delete programmatically added parameters
|
||||||
|
|
|
||||||
|
|
@ -3,21 +3,56 @@ Created on 6 Nov 2013
|
||||||
|
|
||||||
@author: maxz
|
@author: maxz
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
from parameterized import Parameterized
|
from parameterized import Parameterized
|
||||||
from param import Param
|
from param import Param
|
||||||
from transformations import Logexp
|
from transformations import Logexp
|
||||||
|
|
||||||
class Normal(Parameterized):
|
class VariationalPrior(object):
|
||||||
|
def KL_divergence(self, variational_posterior):
|
||||||
|
raise NotImplementedError, "override this for variational inference of latent space"
|
||||||
|
|
||||||
|
def update_gradients_KL(self, variational_posterior):
|
||||||
|
"""
|
||||||
|
updates the gradients for mean and variance **in place**
|
||||||
|
"""
|
||||||
|
raise NotImplementedError, "override this for variational inference of latent space"
|
||||||
|
|
||||||
|
class NormalPrior(VariationalPrior):
|
||||||
|
def KL_divergence(self, variational_posterior):
|
||||||
|
var_mean = np.square(variational_posterior.mean).sum()
|
||||||
|
var_S = (variational_posterior.variance - np.log(variational_posterior.variance)).sum()
|
||||||
|
return 0.5 * (var_mean + var_S) - 0.5 * variational_posterior.input_dim * variational_posterior.num_data
|
||||||
|
|
||||||
|
def update_gradients_KL(self, variational_posterior):
|
||||||
|
# dL:
|
||||||
|
variational_posterior.mean.gradient -= variational_posterior.mean
|
||||||
|
variational_posterior.variance.gradient -= (1. - (1. / (variational_posterior.variance))) * 0.5
|
||||||
|
|
||||||
|
|
||||||
|
class VariationalPosterior(Parameterized):
|
||||||
|
def __init__(self, means=None, variances=None, name=None, **kw):
|
||||||
|
super(VariationalPosterior, self).__init__(name=name, **kw)
|
||||||
|
self.mean = Param("mean", means)
|
||||||
|
self.ndim = self.mean.ndim
|
||||||
|
self.shape = self.mean.shape
|
||||||
|
self.variance = Param("variance", variances, Logexp())
|
||||||
|
self.add_parameters(self.mean, self.variance)
|
||||||
|
self.num_data, self.input_dim = self.mean.shape
|
||||||
|
if self.has_uncertain_inputs():
|
||||||
|
assert self.variance.shape == self.mean.shape, "need one variance per sample and dimenion"
|
||||||
|
|
||||||
|
def has_uncertain_inputs(self):
|
||||||
|
return not self.variance is None
|
||||||
|
|
||||||
|
|
||||||
|
class NormalPosterior(VariationalPosterior):
|
||||||
'''
|
'''
|
||||||
Normal distribution for variational approximations.
|
NormalPosterior distribution for variational approximations.
|
||||||
|
|
||||||
holds the means and variances for a factorizing multivariate normal distribution
|
holds the means and variances for a factorizing multivariate normal distribution
|
||||||
'''
|
'''
|
||||||
def __init__(self, means, variances, name='latent space'):
|
|
||||||
Parameterized.__init__(self, name=name)
|
|
||||||
self.mean = Param("mean", means)
|
|
||||||
self.variance = Param('variance', variances, Logexp())
|
|
||||||
self.add_parameters(self.mean, self.variance)
|
|
||||||
|
|
||||||
def plot(self, *args):
|
def plot(self, *args):
|
||||||
"""
|
"""
|
||||||
|
|
@ -30,8 +65,7 @@ class Normal(Parameterized):
|
||||||
from ...plotting.matplot_dep import variational_plots
|
from ...plotting.matplot_dep import variational_plots
|
||||||
return variational_plots.plot(self,*args)
|
return variational_plots.plot(self,*args)
|
||||||
|
|
||||||
|
class SpikeAndSlab(VariationalPosterior):
|
||||||
class SpikeAndSlab(Parameterized):
|
|
||||||
'''
|
'''
|
||||||
The SpikeAndSlab distribution for variational approximations.
|
The SpikeAndSlab distribution for variational approximations.
|
||||||
'''
|
'''
|
||||||
|
|
@ -39,11 +73,9 @@ class SpikeAndSlab(Parameterized):
|
||||||
"""
|
"""
|
||||||
binary_prob : the probability of the distribution on the slab part.
|
binary_prob : the probability of the distribution on the slab part.
|
||||||
"""
|
"""
|
||||||
Parameterized.__init__(self, name=name)
|
super(SpikeAndSlab, self).__init__(means, variances, name)
|
||||||
self.mean = Param("mean", means)
|
|
||||||
self.variance = Param('variance', variances, Logexp())
|
|
||||||
self.gamma = Param("binary_prob",binary_prob,)
|
self.gamma = Param("binary_prob",binary_prob,)
|
||||||
self.add_parameters(self.mean, self.variance, self.gamma)
|
self.add_parameter(self.gamma)
|
||||||
|
|
||||||
def plot(self, *args):
|
def plot(self, *args):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -5,8 +5,9 @@ import numpy as np
|
||||||
from ..util.linalg import mdot
|
from ..util.linalg import mdot
|
||||||
from gp import GP
|
from gp import GP
|
||||||
from parameterization.param import Param
|
from parameterization.param import Param
|
||||||
from GPy.inference.latent_function_inference import var_dtc
|
from ..inference.latent_function_inference import var_dtc
|
||||||
from .. import likelihoods
|
from .. import likelihoods
|
||||||
|
from parameterization.variational import VariationalPosterior
|
||||||
|
|
||||||
class SparseGP(GP):
|
class SparseGP(GP):
|
||||||
"""
|
"""
|
||||||
|
|
@ -31,7 +32,7 @@ class SparseGP(GP):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, X_variance=None, name='sparse gp'):
|
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, name='sparse gp'):
|
||||||
|
|
||||||
#pick a sensible inference method
|
#pick a sensible inference method
|
||||||
if inference_method is None:
|
if inference_method is None:
|
||||||
|
|
@ -45,26 +46,20 @@ class SparseGP(GP):
|
||||||
self.Z = Param('inducing inputs', Z)
|
self.Z = Param('inducing inputs', Z)
|
||||||
self.num_inducing = Z.shape[0]
|
self.num_inducing = Z.shape[0]
|
||||||
|
|
||||||
self.X_variance = X_variance
|
|
||||||
if self.has_uncertain_inputs():
|
|
||||||
assert X_variance.shape == X.shape
|
|
||||||
|
|
||||||
GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name)
|
GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name)
|
||||||
|
|
||||||
self.add_parameter(self.Z, index=0)
|
self.add_parameter(self.Z, index=0)
|
||||||
self.parameters_changed()
|
self.parameters_changed()
|
||||||
|
|
||||||
def has_uncertain_inputs(self):
|
def has_uncertain_inputs(self):
|
||||||
return not (self.X_variance is None)
|
return isinstance(self.X, VariationalPosterior)
|
||||||
|
|
||||||
def parameters_changed(self):
|
def parameters_changed(self):
|
||||||
if self.has_uncertain_inputs():
|
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y)
|
||||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference_latent(self.kern, self.q, self.Z, self.likelihood, self.Y)
|
|
||||||
else:
|
|
||||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y)
|
|
||||||
self.likelihood.update_gradients(self.grad_dict.pop('partial_for_likelihood'))
|
self.likelihood.update_gradients(self.grad_dict.pop('partial_for_likelihood'))
|
||||||
if self.has_uncertain_inputs():
|
if isinstance(self.X, VariationalPosterior):
|
||||||
self.kern.update_gradients_variational(posterior_variational=self.q, Z=self.Z, **self.grad_dict)
|
self.kern.update_gradients_variational(posterior_variational=self.X, Z=self.Z, **self.grad_dict)
|
||||||
self.Z.gradient = self.kern.gradients_Z_variational(posterior_variational=self.q, Z=self.Z, **self.grad_dict)
|
self.Z.gradient = self.kern.gradients_Z_variational(posterior_variational=self.X, Z=self.Z, **self.grad_dict)
|
||||||
else:
|
else:
|
||||||
self.kern.update_gradients_sparse(X=self.X, Z=self.Z, **self.grad_dict)
|
self.kern.update_gradients_sparse(X=self.X, Z=self.Z, **self.grad_dict)
|
||||||
self.Z.gradient = self.kern.gradients_Z_sparse(X=self.X, Z=self.Z, **self.grad_dict)
|
self.Z.gradient = self.kern.gradients_Z_sparse(X=self.X, Z=self.Z, **self.grad_dict)
|
||||||
|
|
@ -78,10 +73,11 @@ class SparseGP(GP):
|
||||||
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
|
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
|
||||||
if full_cov:
|
if full_cov:
|
||||||
Kxx = self.kern.K(Xnew)
|
Kxx = self.kern.K(Xnew)
|
||||||
var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx)
|
#var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx)
|
||||||
|
var = Kxx - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T, Kx, [1,0]).swapaxes(1,2)
|
||||||
else:
|
else:
|
||||||
Kxx = self.kern.Kdiag(Xnew)
|
Kxx = self.kern.Kdiag(Xnew)
|
||||||
var = Kxx - np.sum(Kx * np.dot(self.posterior.woodbury_inv, Kx), 0)
|
var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T
|
||||||
else:
|
else:
|
||||||
Kx = self.kern.psi1(self.Z, Xnew, X_variance_new)
|
Kx = self.kern.psi1(self.Z, Xnew, X_variance_new)
|
||||||
mu = np.dot(Kx, self.Cpsi1V)
|
mu = np.dot(Kx, self.Cpsi1V)
|
||||||
|
|
@ -91,7 +87,7 @@ class SparseGP(GP):
|
||||||
Kxx = self.kern.psi0(self.Z, Xnew, X_variance_new)
|
Kxx = self.kern.psi0(self.Z, Xnew, X_variance_new)
|
||||||
psi2 = self.kern.psi2(self.Z, Xnew, X_variance_new)
|
psi2 = self.kern.psi2(self.Z, Xnew, X_variance_new)
|
||||||
var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
|
var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
|
||||||
return mu, var[:,None]
|
return mu, var
|
||||||
|
|
||||||
|
|
||||||
def _getstate(self):
|
def _getstate(self):
|
||||||
|
|
|
||||||
|
|
@ -89,7 +89,7 @@ def sparse_gplvm_oil(optimize=True, verbose=0, plot=True, N=100, Q=6, num_induci
|
||||||
Y = Y - Y.mean(0)
|
Y = Y - Y.mean(0)
|
||||||
Y /= Y.std(0)
|
Y /= Y.std(0)
|
||||||
# Create the model
|
# Create the model
|
||||||
kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.bias(Q)
|
kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.Bias(Q)
|
||||||
m = GPy.models.SparseGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing)
|
m = GPy.models.SparseGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing)
|
||||||
m.data_labels = data['Y'][:N].argmax(axis=1)
|
m.data_labels = data['Y'][:N].argmax(axis=1)
|
||||||
|
|
||||||
|
|
@ -139,7 +139,7 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=15, Q=4
|
||||||
(1 - var))) + .001
|
(1 - var))) + .001
|
||||||
Z = _np.random.permutation(X)[:num_inducing]
|
Z = _np.random.permutation(X)[:num_inducing]
|
||||||
|
|
||||||
kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2))
|
kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.Bias(Q, _np.exp(-2)) + GPy.kern.White(Q, _np.exp(-2))
|
||||||
|
|
||||||
m = BayesianGPLVM(Y, Q, X=X, X_variance=S, num_inducing=num_inducing, Z=Z, kernel=kernel)
|
m = BayesianGPLVM(Y, Q, X=X, X_variance=S, num_inducing=num_inducing, Z=Z, kernel=kernel)
|
||||||
m.data_colors = c
|
m.data_colors = c
|
||||||
|
|
@ -159,28 +159,25 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=15, Q=4
|
||||||
|
|
||||||
def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k):
|
def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k):
|
||||||
import GPy
|
import GPy
|
||||||
from GPy.likelihoods import Gaussian
|
|
||||||
from matplotlib import pyplot as plt
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
_np.random.seed(0)
|
_np.random.seed(0)
|
||||||
data = GPy.util.datasets.oil()
|
data = GPy.util.datasets.oil()
|
||||||
|
|
||||||
kernel = GPy.kern.RBF_inv(Q, 1., [.1] * Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2))
|
kernel = GPy.kern.RBF(Q, 1., _np.random.uniform(0,1,(Q,)), ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2))
|
||||||
Y = data['X'][:N]
|
Y = data['X'][:N]
|
||||||
Yn = Gaussian(Y, normalize=True)
|
m = GPy.models.BayesianGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing, **k)
|
||||||
m = GPy.models.BayesianGPLVM(Yn, Q, kernel=kernel, num_inducing=num_inducing, **k)
|
|
||||||
m.data_labels = data['Y'][:N].argmax(axis=1)
|
m.data_labels = data['Y'][:N].argmax(axis=1)
|
||||||
m['noise'] = Yn.Y.var() / 100.
|
|
||||||
|
|
||||||
if optimize:
|
if optimize:
|
||||||
m.optimize('scg', messages=verbose, max_iters=max_iters, gtol=.05)
|
m.optimize('scg', messages=verbose, max_iters=max_iters, gtol=.05)
|
||||||
|
|
||||||
if plot:
|
if plot:
|
||||||
y = m.likelihood.Y[0, :]
|
y = m.Y[0, :]
|
||||||
fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
|
fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
|
||||||
m.plot_latent(ax=latent_axes)
|
m.plot_latent(ax=latent_axes)
|
||||||
data_show = GPy.util.visualize.vector_show(y)
|
data_show = GPy.plotting.matplot_dep.visualize.vector_show(y)
|
||||||
lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :], # @UnusedVariable
|
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X[0, :], # @UnusedVariable
|
||||||
m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
|
m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
|
||||||
raw_input('Press enter to finish')
|
raw_input('Press enter to finish')
|
||||||
plt.close(fig)
|
plt.close(fig)
|
||||||
|
|
@ -190,8 +187,8 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False):
|
||||||
_np.random.seed(1234)
|
_np.random.seed(1234)
|
||||||
|
|
||||||
x = _np.linspace(0, 4 * _np.pi, N)[:, None]
|
x = _np.linspace(0, 4 * _np.pi, N)[:, None]
|
||||||
s1 = _np.vectorize(lambda x: -_np.sin(x))
|
s1 = _np.vectorize(lambda x: -_np.sin(_np.exp(x)))
|
||||||
s2 = _np.vectorize(lambda x: _np.cos(x))
|
s2 = _np.vectorize(lambda x: _np.cos(x)**2)
|
||||||
s3 = _np.vectorize(lambda x:-_np.exp(-_np.cos(2 * x)))
|
s3 = _np.vectorize(lambda x:-_np.exp(-_np.cos(2 * x)))
|
||||||
sS = _np.vectorize(lambda x: x*_np.sin(x))
|
sS = _np.vectorize(lambda x: x*_np.sin(x))
|
||||||
|
|
||||||
|
|
@ -328,7 +325,7 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
|
||||||
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
|
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
|
||||||
likelihood_list = [Gaussian(x, normalize=True) for x in Ylist]
|
likelihood_list = [Gaussian(x, normalize=True) for x in Ylist]
|
||||||
|
|
||||||
k = kern.linear(Q, ARD=True) + kern.bias(Q, _np.exp(-2)) + kern.white(Q, _np.exp(-2))
|
k = kern.Linear(Q, ARD=True) + kern.Bias(Q, _np.exp(-2)) + kern.White(Q, _np.exp(-2))
|
||||||
m = MRD(likelihood_list, input_dim=Q, num_inducing=num_inducing, kernels=k, initx="", initz='permute', **kw)
|
m = MRD(likelihood_list, input_dim=Q, num_inducing=num_inducing, kernels=k, initx="", initz='permute', **kw)
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints()
|
||||||
|
|
||||||
|
|
@ -355,15 +352,15 @@ def brendan_faces(optimize=True, verbose=True, plot=True):
|
||||||
m = GPy.models.GPLVM(Yn, Q)
|
m = GPy.models.GPLVM(Yn, Q)
|
||||||
|
|
||||||
# optimize
|
# optimize
|
||||||
m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped())
|
m.constrain('rbf|noise|white', GPy.transformations.LogexpClipped())
|
||||||
|
|
||||||
if optimize: m.optimize('scg', messages=verbose, max_iters=1000)
|
if optimize: m.optimize('scg', messages=verbose, max_iters=1000)
|
||||||
|
|
||||||
if plot:
|
if plot:
|
||||||
ax = m.plot_latent(which_indices=(0, 1))
|
ax = m.plot_latent(which_indices=(0, 1))
|
||||||
y = m.likelihood.Y[0, :]
|
y = m.likelihood.Y[0, :]
|
||||||
data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, order='F', invert=False, scale=False)
|
data_show = GPy.plotting.matplot_dep.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, order='F', invert=False, scale=False)
|
||||||
GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
||||||
raw_input('Press enter to finish')
|
raw_input('Press enter to finish')
|
||||||
|
|
||||||
return m
|
return m
|
||||||
|
|
@ -382,8 +379,8 @@ def olivetti_faces(optimize=True, verbose=True, plot=True):
|
||||||
if plot:
|
if plot:
|
||||||
ax = m.plot_latent(which_indices=(0, 1))
|
ax = m.plot_latent(which_indices=(0, 1))
|
||||||
y = m.likelihood.Y[0, :]
|
y = m.likelihood.Y[0, :]
|
||||||
data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(112, 92), transpose=False, invert=False, scale=False)
|
data_show = GPy.plotting.matplot_dep.visualize.image_show(y[None, :], dimensions=(112, 92), transpose=False, invert=False, scale=False)
|
||||||
GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
||||||
raw_input('Press enter to finish')
|
raw_input('Press enter to finish')
|
||||||
|
|
||||||
return m
|
return m
|
||||||
|
|
@ -398,8 +395,8 @@ def stick_play(range=None, frame_rate=15, optimize=False, verbose=True, plot=Tru
|
||||||
Y = data['Y'][range[0]:range[1], :].copy()
|
Y = data['Y'][range[0]:range[1], :].copy()
|
||||||
if plot:
|
if plot:
|
||||||
y = Y[0, :]
|
y = Y[0, :]
|
||||||
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
|
data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
|
||||||
GPy.util.visualize.data_play(Y, data_show, frame_rate)
|
GPy.plotting.matplot_dep.visualize.data_play(Y, data_show, frame_rate)
|
||||||
return Y
|
return Y
|
||||||
|
|
||||||
def stick(kernel=None, optimize=True, verbose=True, plot=True):
|
def stick(kernel=None, optimize=True, verbose=True, plot=True):
|
||||||
|
|
@ -410,12 +407,12 @@ def stick(kernel=None, optimize=True, verbose=True, plot=True):
|
||||||
# optimize
|
# optimize
|
||||||
m = GPy.models.GPLVM(data['Y'], 2, kernel=kernel)
|
m = GPy.models.GPLVM(data['Y'], 2, kernel=kernel)
|
||||||
if optimize: m.optimize(messages=verbose, max_f_eval=10000)
|
if optimize: m.optimize(messages=verbose, max_f_eval=10000)
|
||||||
if plot and GPy.util.visualize.visual_available:
|
if plot and GPy.plotting.matplot_dep.visualize.visual_available:
|
||||||
plt.clf
|
plt.clf
|
||||||
ax = m.plot_latent()
|
ax = m.plot_latent()
|
||||||
y = m.likelihood.Y[0, :]
|
y = m.likelihood.Y[0, :]
|
||||||
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
|
data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
|
||||||
GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
||||||
raw_input('Press enter to finish')
|
raw_input('Press enter to finish')
|
||||||
|
|
||||||
return m
|
return m
|
||||||
|
|
@ -429,12 +426,12 @@ def bcgplvm_linear_stick(kernel=None, optimize=True, verbose=True, plot=True):
|
||||||
mapping = GPy.mappings.Linear(data['Y'].shape[1], 2)
|
mapping = GPy.mappings.Linear(data['Y'].shape[1], 2)
|
||||||
m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping)
|
m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping)
|
||||||
if optimize: m.optimize(messages=verbose, max_f_eval=10000)
|
if optimize: m.optimize(messages=verbose, max_f_eval=10000)
|
||||||
if plot and GPy.util.visualize.visual_available:
|
if plot and GPy.plotting.matplot_dep.visualize.visual_available:
|
||||||
plt.clf
|
plt.clf
|
||||||
ax = m.plot_latent()
|
ax = m.plot_latent()
|
||||||
y = m.likelihood.Y[0, :]
|
y = m.likelihood.Y[0, :]
|
||||||
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
|
data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
|
||||||
GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
||||||
raw_input('Press enter to finish')
|
raw_input('Press enter to finish')
|
||||||
|
|
||||||
return m
|
return m
|
||||||
|
|
@ -449,12 +446,12 @@ def bcgplvm_stick(kernel=None, optimize=True, verbose=True, plot=True):
|
||||||
mapping = GPy.mappings.Kernel(X=data['Y'], output_dim=2, kernel=back_kernel)
|
mapping = GPy.mappings.Kernel(X=data['Y'], output_dim=2, kernel=back_kernel)
|
||||||
m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping)
|
m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping)
|
||||||
if optimize: m.optimize(messages=verbose, max_f_eval=10000)
|
if optimize: m.optimize(messages=verbose, max_f_eval=10000)
|
||||||
if plot and GPy.util.visualize.visual_available:
|
if plot and GPy.plotting.matplot_dep.visualize.visual_available:
|
||||||
plt.clf
|
plt.clf
|
||||||
ax = m.plot_latent()
|
ax = m.plot_latent()
|
||||||
y = m.likelihood.Y[0, :]
|
y = m.likelihood.Y[0, :]
|
||||||
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
|
data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
|
||||||
GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
||||||
raw_input('Press enter to finish')
|
raw_input('Press enter to finish')
|
||||||
|
|
||||||
return m
|
return m
|
||||||
|
|
@ -480,7 +477,7 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
|
||||||
|
|
||||||
data = GPy.util.datasets.osu_run1()
|
data = GPy.util.datasets.osu_run1()
|
||||||
Q = 6
|
Q = 6
|
||||||
kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2))
|
kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.Bias(Q, _np.exp(-2)) + GPy.kern.White(Q, _np.exp(-2))
|
||||||
m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel)
|
m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel)
|
||||||
# optimize
|
# optimize
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints()
|
||||||
|
|
@ -491,8 +488,8 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
|
||||||
plt.sca(latent_axes)
|
plt.sca(latent_axes)
|
||||||
m.plot_latent()
|
m.plot_latent()
|
||||||
y = m.likelihood.Y[0, :].copy()
|
y = m.likelihood.Y[0, :].copy()
|
||||||
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
|
data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
|
||||||
GPy.util.visualize.lvm_dimselect(m.X[0, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
|
GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X[0, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
|
||||||
raw_input('Press enter to finish')
|
raw_input('Press enter to finish')
|
||||||
|
|
||||||
return m
|
return m
|
||||||
|
|
@ -511,8 +508,8 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True, optimize=True, verbose
|
||||||
if plot:
|
if plot:
|
||||||
ax = m.plot_latent()
|
ax = m.plot_latent()
|
||||||
y = m.likelihood.Y[0, :]
|
y = m.likelihood.Y[0, :]
|
||||||
data_show = GPy.util.visualize.skeleton_show(y[None, :], data['skel'])
|
data_show = GPy.plotting.matplot_dep.visualize.skeleton_show(y[None, :], data['skel'])
|
||||||
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
||||||
raw_input('Press enter to finish')
|
raw_input('Press enter to finish')
|
||||||
lvm_visualizer.close()
|
lvm_visualizer.close()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ def olympic_marathon_men(optimize=True, plot=True):
|
||||||
m = GPy.models.GPRegression(data['X'], data['Y'])
|
m = GPy.models.GPRegression(data['X'], data['Y'])
|
||||||
|
|
||||||
# set the lengthscale to be something sensible (defaults to 1)
|
# set the lengthscale to be something sensible (defaults to 1)
|
||||||
m['rbf_lengthscale'] = 10
|
m.kern.lengthscale = 10.
|
||||||
|
|
||||||
if optimize:
|
if optimize:
|
||||||
m.optimize('bfgs', max_iters=200)
|
m.optimize('bfgs', max_iters=200)
|
||||||
|
|
@ -41,11 +41,10 @@ def coregionalization_toy2(optimize=True, plot=True):
|
||||||
Y = np.vstack((Y1, Y2))
|
Y = np.vstack((Y1, Y2))
|
||||||
|
|
||||||
#build the kernel
|
#build the kernel
|
||||||
k1 = GPy.kern.RBF(1) + GPy.kern.bias(1)
|
k1 = GPy.kern.RBF(1) + GPy.kern.Bias(1)
|
||||||
k2 = GPy.kern.coregionalize(2,1)
|
k2 = GPy.kern.Coregionalize(2,1)
|
||||||
k = k1**k2
|
k = k1**k2
|
||||||
m = GPy.models.GPRegression(X, Y, kernel=k)
|
m = GPy.models.GPRegression(X, Y, kernel=k)
|
||||||
m.constrain_fixed('.*rbf_var', 1.)
|
|
||||||
|
|
||||||
if optimize:
|
if optimize:
|
||||||
m.optimize('bfgs', max_iters=100)
|
m.optimize('bfgs', max_iters=100)
|
||||||
|
|
@ -86,11 +85,13 @@ def coregionalization_sparse(optimize=True, plot=True):
|
||||||
"""
|
"""
|
||||||
#fetch the data from the non sparse examples
|
#fetch the data from the non sparse examples
|
||||||
m = coregionalization_toy2(optimize=False, plot=False)
|
m = coregionalization_toy2(optimize=False, plot=False)
|
||||||
X, Y = m.X, m.likelihood.Y
|
X, Y = m.X, m.Y
|
||||||
|
|
||||||
|
k = GPy.kern.RBF(1)**GPy.kern.Coregionalize(2)
|
||||||
|
|
||||||
#construct a model
|
#construct a model
|
||||||
m = GPy.models.SparseGPRegression(X,Y)
|
m = GPy.models.SparseGPRegression(X,Y, num_inducing=25, kernel=k)
|
||||||
m.constrain_fixed('iip_\d+_1') # don't optimize the inducing input indexes
|
m.Z[:,1].fix() # don't optimize the inducing input indexes
|
||||||
|
|
||||||
if optimize:
|
if optimize:
|
||||||
m.optimize('bfgs', max_iters=100, messages=1)
|
m.optimize('bfgs', max_iters=100, messages=1)
|
||||||
|
|
@ -128,7 +129,7 @@ def epomeo_gpx(max_iters=200, optimize=True, plot=True):
|
||||||
np.random.randint(0, 4, num_inducing)[:, None]))
|
np.random.randint(0, 4, num_inducing)[:, None]))
|
||||||
|
|
||||||
k1 = GPy.kern.RBF(1)
|
k1 = GPy.kern.RBF(1)
|
||||||
k2 = GPy.kern.coregionalize(output_dim=5, rank=5)
|
k2 = GPy.kern.Coregionalize(output_dim=5, rank=5)
|
||||||
k = k1**k2
|
k = k1**k2
|
||||||
|
|
||||||
m = GPy.models.SparseGPRegression(t, Y, kernel=k, Z=Z, normalize_Y=True)
|
m = GPy.models.SparseGPRegression(t, Y, kernel=k, Z=Z, normalize_Y=True)
|
||||||
|
|
@ -322,7 +323,7 @@ def toy_ARD(max_iters=1000, kernel_type='linear', num_samples=300, D=4, optimize
|
||||||
kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1)
|
kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1)
|
||||||
else:
|
else:
|
||||||
kernel = GPy.kern.RBF(X.shape[1], ARD=1)
|
kernel = GPy.kern.RBF(X.shape[1], ARD=1)
|
||||||
kernel += GPy.kern.White(X.shape[1]) + GPy.kern.bias(X.shape[1])
|
kernel += GPy.kern.White(X.shape[1]) + GPy.kern.Bias(X.shape[1])
|
||||||
m = GPy.models.GPRegression(X, Y, kernel)
|
m = GPy.models.GPRegression(X, Y, kernel)
|
||||||
# len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25
|
# len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25
|
||||||
# m.set_prior('.*lengthscale',len_prior)
|
# m.set_prior('.*lengthscale',len_prior)
|
||||||
|
|
@ -361,7 +362,7 @@ def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4, o
|
||||||
kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1)
|
kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1)
|
||||||
else:
|
else:
|
||||||
kernel = GPy.kern.RBF(X.shape[1], ARD=1)
|
kernel = GPy.kern.RBF(X.shape[1], ARD=1)
|
||||||
#kernel += GPy.kern.bias(X.shape[1])
|
#kernel += GPy.kern.Bias(X.shape[1])
|
||||||
X_variance = np.ones(X.shape) * 0.5
|
X_variance = np.ones(X.shape) * 0.5
|
||||||
m = GPy.models.SparseGPRegression(X, Y, kernel, X_variance=X_variance)
|
m = GPy.models.SparseGPRegression(X, Y, kernel, X_variance=X_variance)
|
||||||
# len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25
|
# len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25
|
||||||
|
|
|
||||||
|
|
@ -3,390 +3,91 @@ from scipy import stats
|
||||||
from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs
|
from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs
|
||||||
from likelihood import likelihood
|
from likelihood import likelihood
|
||||||
|
|
||||||
class EP(likelihood):
|
class EP(object):
|
||||||
def __init__(self,data,noise_model):
|
def __init__(self, epsilon=1e-6, eta=1., delta=1.):
|
||||||
"""
|
|
||||||
Expectation Propagation
|
|
||||||
|
|
||||||
:param data: data to model
|
|
||||||
:type data: numpy array
|
|
||||||
:param noise_model: noise distribution
|
|
||||||
:type noise_model: A GPy noise model
|
|
||||||
|
|
||||||
"""
|
|
||||||
self.noise_model = noise_model
|
|
||||||
self.data = data
|
|
||||||
self.num_data, self.output_dim = self.data.shape
|
|
||||||
self.is_heteroscedastic = True
|
|
||||||
self.num_params = 0
|
|
||||||
|
|
||||||
#Initial values - Likelihood approximation parameters:
|
|
||||||
#p(y|f) = t(f|tau_tilde,v_tilde)
|
|
||||||
self.tau_tilde = np.zeros(self.num_data)
|
|
||||||
self.v_tilde = np.zeros(self.num_data)
|
|
||||||
|
|
||||||
#initial values for the GP variables
|
|
||||||
self.Y = np.zeros((self.num_data,1))
|
|
||||||
self.covariance_matrix = np.eye(self.num_data)
|
|
||||||
self.precision = np.ones(self.num_data)[:,None]
|
|
||||||
self.Z = 0
|
|
||||||
self.YYT = None
|
|
||||||
self.V = self.precision * self.Y
|
|
||||||
self.VVT_factor = self.V
|
|
||||||
self.trYYT = 0.
|
|
||||||
|
|
||||||
super(EP, self).__init__()
|
|
||||||
|
|
||||||
def restart(self):
|
|
||||||
self.tau_tilde = np.zeros(self.num_data)
|
|
||||||
self.v_tilde = np.zeros(self.num_data)
|
|
||||||
self.Y = np.zeros((self.num_data,1))
|
|
||||||
self.covariance_matrix = np.eye(self.num_data)
|
|
||||||
self.precision = np.ones(self.num_data)[:,None]
|
|
||||||
self.Z = 0
|
|
||||||
self.YYT = None
|
|
||||||
self.V = self.precision * self.Y
|
|
||||||
self.VVT_factor = self.V
|
|
||||||
self.trYYT = 0.
|
|
||||||
|
|
||||||
def predictive_values(self,mu,var,full_cov,**noise_args):
|
|
||||||
if full_cov:
|
|
||||||
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
|
|
||||||
return self.noise_model.predictive_values(mu,var,**noise_args)
|
|
||||||
|
|
||||||
def log_predictive_density(self, y_test, mu_star, var_star):
|
|
||||||
"""
|
|
||||||
Calculation of the log predictive density
|
|
||||||
|
|
||||||
.. math:
|
|
||||||
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
|
||||||
|
|
||||||
:param y_test: test observations (y_{*})
|
|
||||||
:type y_test: (Nx1) array
|
|
||||||
:param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
|
|
||||||
:type mu_star: (Nx1) array
|
|
||||||
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
|
|
||||||
:type var_star: (Nx1) array
|
|
||||||
"""
|
|
||||||
return self.noise_model.log_predictive_density(y_test, mu_star, var_star)
|
|
||||||
|
|
||||||
def _get_params(self):
|
|
||||||
#return np.zeros(0)
|
|
||||||
return self.noise_model._get_params()
|
|
||||||
|
|
||||||
def _get_param_names(self):
|
|
||||||
#return []
|
|
||||||
return self.noise_model._get_param_names()
|
|
||||||
|
|
||||||
def _set_params(self,p):
|
|
||||||
#pass # TODO: the EP likelihood might want to take some parameters...
|
|
||||||
self.noise_model._set_params(p)
|
|
||||||
|
|
||||||
def _gradients(self,partial):
|
|
||||||
#return np.zeros(0) # TODO: the EP likelihood might want to take some parameters...
|
|
||||||
return self.noise_model._gradients(partial)
|
|
||||||
|
|
||||||
def _compute_GP_variables(self):
|
|
||||||
#Variables to be called from GP
|
|
||||||
mu_tilde = self.v_tilde/self.tau_tilde #When calling EP, this variable is used instead of Y in the GP model
|
|
||||||
sigma_sum = 1./self.tau_ + 1./self.tau_tilde
|
|
||||||
mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2
|
|
||||||
self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep
|
|
||||||
self.Z += 0.5*self.num_data*np.log(2*np.pi)
|
|
||||||
|
|
||||||
self.Y = mu_tilde[:,None]
|
|
||||||
self.YYT = np.dot(self.Y,self.Y.T)
|
|
||||||
self.covariance_matrix = np.diag(1./self.tau_tilde)
|
|
||||||
self.precision = self.tau_tilde[:,None]
|
|
||||||
self.V = self.precision * self.Y
|
|
||||||
self.VVT_factor = self.V
|
|
||||||
self.trYYT = np.trace(self.YYT)
|
|
||||||
|
|
||||||
def fit_full(self, K, epsilon=1e-3,power_ep=[1.,1.]):
|
|
||||||
"""
|
"""
|
||||||
The expectation-propagation algorithm.
|
The expectation-propagation algorithm.
|
||||||
For nomenclature see Rasmussen & Williams 2006.
|
For nomenclature see Rasmussen & Williams 2006.
|
||||||
|
|
||||||
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
|
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
|
||||||
:type epsilon: float
|
:type epsilon: float
|
||||||
:param power_ep: Power EP parameters
|
:param eta: Power EP thing TODO: Ricardo: what, exactly?
|
||||||
:type power_ep: list of floats
|
:type eta: float64
|
||||||
|
:param delta: Power EP thing TODO: Ricardo: what, exactly?
|
||||||
|
:type delta: float64
|
||||||
"""
|
"""
|
||||||
self.epsilon = epsilon
|
self.epsilon, self.eta, self.delta = epsilon, eta, delta
|
||||||
self.eta, self.delta = power_ep
|
self.reset()
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.old_mutilde, self.old_vtilde = None, None
|
||||||
|
|
||||||
|
def inference(self, kern, X, likelihood, Y, Y_metadata=None):
|
||||||
|
|
||||||
|
K = kern.K(X)
|
||||||
|
|
||||||
|
mu_tilde, tau_tilde = self.expectation_propagation()
|
||||||
|
|
||||||
|
|
||||||
|
def expectation_propagation(self, K, Y, Y_metadata, likelihood)
|
||||||
|
|
||||||
|
num_data, data_dim = Y.shape
|
||||||
|
assert data_dim == 1, "This EP methods only works for 1D outputs"
|
||||||
|
|
||||||
|
|
||||||
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
|
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
|
||||||
mu = np.zeros(self.num_data)
|
mu = np.zeros(self.num_data)
|
||||||
Sigma = K.copy()
|
Sigma = K.copy()
|
||||||
|
|
||||||
"""
|
|
||||||
Initial values - Cavity distribution parameters:
|
|
||||||
q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)}
|
|
||||||
sigma_ = 1./tau_
|
|
||||||
mu_ = v_/tau_
|
|
||||||
"""
|
|
||||||
self.tau_ = np.empty(self.num_data,dtype=float)
|
|
||||||
self.v_ = np.empty(self.num_data,dtype=float)
|
|
||||||
|
|
||||||
#Initial values - Marginal moments
|
#Initial values - Marginal moments
|
||||||
z = np.empty(self.num_data,dtype=float)
|
Z_hat = np.empty(num_data,dtype=np.float64)
|
||||||
self.Z_hat = np.empty(self.num_data,dtype=float)
|
mu_hat = np.empty(num_data,dtype=np.float64)
|
||||||
phi = np.empty(self.num_data,dtype=float)
|
sigma2_hat = np.empty(num_data,dtype=np.float64)
|
||||||
mu_hat = np.empty(self.num_data,dtype=float)
|
|
||||||
sigma2_hat = np.empty(self.num_data,dtype=float)
|
#initial values - Gaussian factors
|
||||||
|
if self.old_mutilde is None:
|
||||||
|
tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data, num_data))
|
||||||
|
else:
|
||||||
|
assert old_mutilde.size == num_data, "data size mis-match: did you change the data? try resetting!"
|
||||||
|
mu_tilde, v_tilde = self.old_mutilde, self.old_vtilde
|
||||||
|
tau_tilde = v_tilde/mu_tilde
|
||||||
|
|
||||||
#Approximation
|
#Approximation
|
||||||
epsilon_np1 = self.epsilon + 1.
|
epsilon_np1 = self.epsilon + 1.
|
||||||
epsilon_np2 = self.epsilon + 1.
|
epsilon_np2 = self.epsilon + 1.
|
||||||
self.iterations = 0
|
iterations = 0
|
||||||
self.np1 = [self.tau_tilde.copy()]
|
while (epsilon_np1 > self.epsilon) or (epsilon_np2 > self.epsilon):
|
||||||
self.np2 = [self.v_tilde.copy()]
|
update_order = np.random.permutation(num_data)
|
||||||
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
|
|
||||||
update_order = np.random.permutation(self.num_data)
|
|
||||||
for i in update_order:
|
for i in update_order:
|
||||||
#Cavity distribution parameters
|
#Cavity distribution parameters
|
||||||
self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
|
tau_cav = 1./Sigma[i,i] - self.eta*tau_tilde[i]
|
||||||
self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i]
|
v_cav = mu[i]/Sigma[i,i] - self.eta*v_tilde[i]
|
||||||
#Marginal moments
|
#Marginal moments
|
||||||
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i])
|
Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match(Y[i], tau_cav, v_cav, Y_metadata=(None if Y_metadata is None else Y_metadata[i]))
|
||||||
#Site parameters update
|
#Site parameters update
|
||||||
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
|
delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
|
||||||
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
|
delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
|
||||||
self.tau_tilde[i] += Delta_tau
|
tau_tilde[i] += delta_tau
|
||||||
self.v_tilde[i] += Delta_v
|
v_tilde[i] += delta_v
|
||||||
#Posterior distribution parameters update
|
#Posterior distribution parameters update
|
||||||
DSYR(Sigma,Sigma[:,i].copy(), -float(Delta_tau/(1.+ Delta_tau*Sigma[i,i])))
|
DSYR(Sigma, Sigma[:,i].copy(), -Delta_tau/(1.+ Delta_tau*Sigma[i,i]))
|
||||||
mu = np.dot(Sigma,self.v_tilde)
|
mu = np.dot(Sigma, v_tilde)
|
||||||
self.iterations += 1
|
iterations += 1
|
||||||
#Sigma recomptutation with Cholesky decompositon
|
|
||||||
Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K
|
#(re) compute Sigma and mu using full Cholesky decompy
|
||||||
B = np.eye(self.num_data) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
|
tau_tilde_root = np.sqrt(tau_tilde)
|
||||||
|
Sroot_tilde_K = tau_tilde_root[:,None] * K
|
||||||
|
B = np.eye(num_data) + Sroot_tilde_K * tau_tilde_root[None,:]
|
||||||
L = jitchol(B)
|
L = jitchol(B)
|
||||||
V,info = dtrtrs(L,Sroot_tilde_K,lower=1)
|
V, _ = dtrtrs(L, Sroot_tilde_K, lower=1)
|
||||||
Sigma = K - np.dot(V.T,V)
|
Sigma = K - np.dot(V.T,V)
|
||||||
mu = np.dot(Sigma,self.v_tilde)
|
mu = np.dot(Sigma,v_tilde)
|
||||||
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.num_data
|
|
||||||
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.num_data
|
|
||||||
self.np1.append(self.tau_tilde.copy())
|
|
||||||
self.np2.append(self.v_tilde.copy())
|
|
||||||
|
|
||||||
return self._compute_GP_variables()
|
#monitor convergence
|
||||||
|
epsilon_np1 = np.mean(np.square(tau_tilde-tau_tilde_old))
|
||||||
|
epsilon_np2 = np.mean(np.square(v_tilde-v_tilde_old))
|
||||||
|
tau_tilde_old = tau_tilde.copy()
|
||||||
|
v_tilde_old = v_tilde.copy()
|
||||||
|
|
||||||
def fit_DTC(self, Kmm, Kmn, epsilon=1e-3,power_ep=[1.,1.]):
|
return mu, Sigma, mu_tilde, tau_tilde
|
||||||
"""
|
|
||||||
The expectation-propagation algorithm with sparse pseudo-input.
|
|
||||||
For nomenclature see ... 2013.
|
|
||||||
|
|
||||||
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
|
|
||||||
:type epsilon: float
|
|
||||||
:param power_ep: Power EP parameters
|
|
||||||
:type power_ep: list of floats
|
|
||||||
|
|
||||||
"""
|
|
||||||
self.epsilon = epsilon
|
|
||||||
self.eta, self.delta = power_ep
|
|
||||||
|
|
||||||
num_inducing = Kmm.shape[0]
|
|
||||||
|
|
||||||
#TODO: this doesn't work with uncertain inputs!
|
|
||||||
|
|
||||||
"""
|
|
||||||
Prior approximation parameters:
|
|
||||||
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
|
|
||||||
Sigma0 = Qnn = Knm*Kmmi*Kmn
|
|
||||||
"""
|
|
||||||
KmnKnm = np.dot(Kmn,Kmn.T)
|
|
||||||
Lm = jitchol(Kmm)
|
|
||||||
Lmi = chol_inv(Lm)
|
|
||||||
Kmmi = np.dot(Lmi.T,Lmi)
|
|
||||||
KmmiKmn = np.dot(Kmmi,Kmn)
|
|
||||||
Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
|
|
||||||
LLT0 = Kmm.copy()
|
|
||||||
|
|
||||||
#Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm)
|
|
||||||
#KmnKnm = np.dot(Kmn, Kmn.T)
|
|
||||||
#KmmiKmn = np.dot(Kmmi,Kmn)
|
|
||||||
#Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
|
|
||||||
#LLT0 = Kmm.copy()
|
|
||||||
|
|
||||||
"""
|
|
||||||
Posterior approximation: q(f|y) = N(f| mu, Sigma)
|
|
||||||
Sigma = Diag + P*R.T*R*P.T + K
|
|
||||||
mu = w + P*Gamma
|
|
||||||
"""
|
|
||||||
mu = np.zeros(self.num_data)
|
|
||||||
LLT = Kmm.copy()
|
|
||||||
Sigma_diag = Qnn_diag.copy()
|
|
||||||
|
|
||||||
"""
|
|
||||||
Initial values - Cavity distribution parameters:
|
|
||||||
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
|
|
||||||
sigma_ = 1./tau_
|
|
||||||
mu_ = v_/tau_
|
|
||||||
"""
|
|
||||||
self.tau_ = np.empty(self.num_data,dtype=float)
|
|
||||||
self.v_ = np.empty(self.num_data,dtype=float)
|
|
||||||
|
|
||||||
#Initial values - Marginal moments
|
|
||||||
z = np.empty(self.num_data,dtype=float)
|
|
||||||
self.Z_hat = np.empty(self.num_data,dtype=float)
|
|
||||||
phi = np.empty(self.num_data,dtype=float)
|
|
||||||
mu_hat = np.empty(self.num_data,dtype=float)
|
|
||||||
sigma2_hat = np.empty(self.num_data,dtype=float)
|
|
||||||
|
|
||||||
#Approximation
|
|
||||||
epsilon_np1 = 1
|
|
||||||
epsilon_np2 = 1
|
|
||||||
self.iterations = 0
|
|
||||||
np1 = [self.tau_tilde.copy()]
|
|
||||||
np2 = [self.v_tilde.copy()]
|
|
||||||
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
|
|
||||||
update_order = np.random.permutation(self.num_data)
|
|
||||||
for i in update_order:
|
|
||||||
#Cavity distribution parameters
|
|
||||||
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
|
|
||||||
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
|
|
||||||
#Marginal moments
|
|
||||||
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i])
|
|
||||||
#Site parameters update
|
|
||||||
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
|
|
||||||
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
|
|
||||||
self.tau_tilde[i] += Delta_tau
|
|
||||||
self.v_tilde[i] += Delta_v
|
|
||||||
#Posterior distribution parameters update
|
|
||||||
DSYR(LLT,Kmn[:,i].copy(),Delta_tau) #LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau
|
|
||||||
L = jitchol(LLT)
|
|
||||||
#cholUpdate(L,Kmn[:,i]*np.sqrt(Delta_tau))
|
|
||||||
V,info = dtrtrs(L,Kmn,lower=1)
|
|
||||||
Sigma_diag = np.sum(V*V,-2)
|
|
||||||
si = np.sum(V.T*V[:,i],-1)
|
|
||||||
mu += (Delta_v-Delta_tau*mu[i])*si
|
|
||||||
self.iterations += 1
|
|
||||||
#Sigma recomputation with Cholesky decompositon
|
|
||||||
LLT = LLT0 + np.dot(Kmn*self.tau_tilde[None,:],Kmn.T)
|
|
||||||
L = jitchol(LLT)
|
|
||||||
V,info = dtrtrs(L,Kmn,lower=1)
|
|
||||||
V2,info = dtrtrs(L.T,V,lower=0)
|
|
||||||
Sigma_diag = np.sum(V*V,-2)
|
|
||||||
Knmv_tilde = np.dot(Kmn,self.v_tilde)
|
|
||||||
mu = np.dot(V2.T,Knmv_tilde)
|
|
||||||
epsilon_np1 = sum((self.tau_tilde-np1[-1])**2)/self.num_data
|
|
||||||
epsilon_np2 = sum((self.v_tilde-np2[-1])**2)/self.num_data
|
|
||||||
np1.append(self.tau_tilde.copy())
|
|
||||||
np2.append(self.v_tilde.copy())
|
|
||||||
|
|
||||||
self._compute_GP_variables()
|
|
||||||
|
|
||||||
def fit_FITC(self, Kmm, Kmn, Knn_diag, epsilon=1e-3,power_ep=[1.,1.]):
|
|
||||||
"""
|
|
||||||
The expectation-propagation algorithm with sparse pseudo-input.
|
|
||||||
For nomenclature see Naish-Guzman and Holden, 2008.
|
|
||||||
|
|
||||||
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
|
|
||||||
:type epsilon: float
|
|
||||||
:param power_ep: Power EP parameters
|
|
||||||
:type power_ep: list of floats
|
|
||||||
"""
|
|
||||||
self.epsilon = epsilon
|
|
||||||
self.eta, self.delta = power_ep
|
|
||||||
|
|
||||||
num_inducing = Kmm.shape[0]
|
|
||||||
|
|
||||||
"""
|
|
||||||
Prior approximation parameters:
|
|
||||||
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
|
|
||||||
Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
|
|
||||||
"""
|
|
||||||
Lm = jitchol(Kmm)
|
|
||||||
Lmi = chol_inv(Lm)
|
|
||||||
Kmmi = np.dot(Lmi.T,Lmi)
|
|
||||||
P0 = Kmn.T
|
|
||||||
KmnKnm = np.dot(P0.T, P0)
|
|
||||||
KmmiKmn = np.dot(Kmmi,P0.T)
|
|
||||||
Qnn_diag = np.sum(P0.T*KmmiKmn,-2)
|
|
||||||
Diag0 = Knn_diag - Qnn_diag
|
|
||||||
R0 = jitchol(Kmmi).T
|
|
||||||
|
|
||||||
"""
|
|
||||||
Posterior approximation: q(f|y) = N(f| mu, Sigma)
|
|
||||||
Sigma = Diag + P*R.T*R*P.T + K
|
|
||||||
mu = w + P*Gamma
|
|
||||||
"""
|
|
||||||
self.w = np.zeros(self.num_data)
|
|
||||||
self.Gamma = np.zeros(num_inducing)
|
|
||||||
mu = np.zeros(self.num_data)
|
|
||||||
P = P0.copy()
|
|
||||||
R = R0.copy()
|
|
||||||
Diag = Diag0.copy()
|
|
||||||
Sigma_diag = Knn_diag
|
|
||||||
RPT0 = np.dot(R0,P0.T)
|
|
||||||
|
|
||||||
"""
|
|
||||||
Initial values - Cavity distribution parameters:
|
|
||||||
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
|
|
||||||
sigma_ = 1./tau_
|
|
||||||
mu_ = v_/tau_
|
|
||||||
"""
|
|
||||||
self.tau_ = np.empty(self.num_data,dtype=float)
|
|
||||||
self.v_ = np.empty(self.num_data,dtype=float)
|
|
||||||
|
|
||||||
#Initial values - Marginal moments
|
|
||||||
z = np.empty(self.num_data,dtype=float)
|
|
||||||
self.Z_hat = np.empty(self.num_data,dtype=float)
|
|
||||||
phi = np.empty(self.num_data,dtype=float)
|
|
||||||
mu_hat = np.empty(self.num_data,dtype=float)
|
|
||||||
sigma2_hat = np.empty(self.num_data,dtype=float)
|
|
||||||
|
|
||||||
#Approximation
|
|
||||||
epsilon_np1 = 1
|
|
||||||
epsilon_np2 = 1
|
|
||||||
self.iterations = 0
|
|
||||||
self.np1 = [self.tau_tilde.copy()]
|
|
||||||
self.np2 = [self.v_tilde.copy()]
|
|
||||||
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
|
|
||||||
update_order = np.random.permutation(self.num_data)
|
|
||||||
for i in update_order:
|
|
||||||
#Cavity distribution parameters
|
|
||||||
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
|
|
||||||
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
|
|
||||||
#Marginal moments
|
|
||||||
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i])
|
|
||||||
#Site parameters update
|
|
||||||
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
|
|
||||||
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
|
|
||||||
self.tau_tilde[i] += Delta_tau
|
|
||||||
self.v_tilde[i] += Delta_v
|
|
||||||
#Posterior distribution parameters update
|
|
||||||
dtd1 = Delta_tau*Diag[i] + 1.
|
|
||||||
dii = Diag[i]
|
|
||||||
Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
|
|
||||||
pi_ = P[i,:].reshape(1,num_inducing)
|
|
||||||
P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
|
|
||||||
Rp_i = np.dot(R,pi_.T)
|
|
||||||
RTR = np.dot(R.T,np.dot(np.eye(num_inducing) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
|
|
||||||
R = jitchol(RTR).T
|
|
||||||
self.w[i] += (Delta_v - Delta_tau*self.w[i])*dii/dtd1
|
|
||||||
self.Gamma += (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
|
|
||||||
RPT = np.dot(R,P.T)
|
|
||||||
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
|
|
||||||
mu = self.w + np.dot(P,self.Gamma)
|
|
||||||
self.iterations += 1
|
|
||||||
#Sigma recomptutation with Cholesky decompositon
|
|
||||||
Iplus_Dprod_i = 1./(1.+ Diag0 * self.tau_tilde)
|
|
||||||
Diag = Diag0 * Iplus_Dprod_i
|
|
||||||
P = Iplus_Dprod_i[:,None] * P0
|
|
||||||
safe_diag = np.where(Diag0 < self.tau_tilde, self.tau_tilde/(1.+Diag0*self.tau_tilde), (1. - Iplus_Dprod_i)/Diag0)
|
|
||||||
L = jitchol(np.eye(num_inducing) + np.dot(RPT0,safe_diag[:,None]*RPT0.T))
|
|
||||||
R,info = dtrtrs(L,R0,lower=1)
|
|
||||||
RPT = np.dot(R,P.T)
|
|
||||||
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
|
|
||||||
self.w = Diag * self.v_tilde
|
|
||||||
self.Gamma = np.dot(R.T, np.dot(RPT,self.v_tilde))
|
|
||||||
mu = self.w + np.dot(P,self.Gamma)
|
|
||||||
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.num_data
|
|
||||||
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.num_data
|
|
||||||
self.np1.append(self.tau_tilde.copy())
|
|
||||||
self.np2.append(self.v_tilde.copy())
|
|
||||||
|
|
||||||
return self._compute_GP_variables()
|
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from ...util.linalg import pdinv, dpotrs, tdot, dtrtrs, dpotri, symmetrify, jitchol, dtrtri
|
from ...util.linalg import pdinv, dpotrs, dpotri, symmetrify, jitchol
|
||||||
|
|
||||||
class Posterior(object):
|
class Posterior(object):
|
||||||
"""
|
"""
|
||||||
|
|
@ -83,14 +83,15 @@ class Posterior(object):
|
||||||
#LiK, _ = dtrtrs(self.woodbury_chol, self._K, lower=1)
|
#LiK, _ = dtrtrs(self.woodbury_chol, self._K, lower=1)
|
||||||
self._covariance = np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T
|
self._covariance = np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T
|
||||||
#self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K)
|
#self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K)
|
||||||
return self._covariance
|
return self._covariance.squeeze()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def precision(self):
|
def precision(self):
|
||||||
if self._precision is None:
|
if self._precision is None:
|
||||||
self._precision = np.zeros(np.atleast_3d(self.covariance).shape) # if one covariance per dimension
|
cov = np.atleast_3d(self.covariance)
|
||||||
for p in xrange(self.covariance.shape[-1]):
|
self._precision = np.zeros(cov.shape) # if one covariance per dimension
|
||||||
self._precision[:,:,p] = pdinv(self.covariance[:,:,p])[0]
|
for p in xrange(cov.shape[-1]):
|
||||||
|
self._precision[:,:,p] = pdinv(cov[:,:,p])[0]
|
||||||
return self._precision
|
return self._precision
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|
@ -98,7 +99,10 @@ class Posterior(object):
|
||||||
if self._woodbury_chol is None:
|
if self._woodbury_chol is None:
|
||||||
#compute woodbury chol from
|
#compute woodbury chol from
|
||||||
if self._woodbury_inv is not None:
|
if self._woodbury_inv is not None:
|
||||||
_, _, self._woodbury_chol, _ = pdinv(self._woodbury_inv)
|
winv = np.atleast_3d(self._woodbury_inv)
|
||||||
|
self._woodbury_chol = np.zeros(winv.shape)
|
||||||
|
for p in xrange(winv.shape[-1]):
|
||||||
|
self._woodbury_chol[:,:,p] = pdinv(winv[:,:,p])[2]
|
||||||
#Li = jitchol(self._woodbury_inv)
|
#Li = jitchol(self._woodbury_inv)
|
||||||
#self._woodbury_chol, _ = dtrtri(Li)
|
#self._woodbury_chol, _ = dtrtri(Li)
|
||||||
#W, _, _, _, = pdinv(self._woodbury_inv)
|
#W, _, _, _, = pdinv(self._woodbury_inv)
|
||||||
|
|
@ -132,7 +136,7 @@ class Posterior(object):
|
||||||
@property
|
@property
|
||||||
def K_chol(self):
|
def K_chol(self):
|
||||||
if self._K_chol is None:
|
if self._K_chol is None:
|
||||||
self._K_chol = dportf(self._K)
|
self._K_chol = jitchol(self._K)
|
||||||
return self._K_chol
|
return self._K_chol
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
from posterior import Posterior
|
from posterior import Posterior
|
||||||
from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify
|
from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify
|
||||||
|
from ...core.parameterization.variational import VariationalPosterior
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from ...util.misc import param_to_array
|
from ...util.misc import param_to_array
|
||||||
log_2_pi = np.log(2*np.pi)
|
log_2_pi = np.log(2*np.pi)
|
||||||
|
|
@ -42,19 +43,17 @@ class VarDTC(object):
|
||||||
def get_VVTfactor(self, Y, prec):
|
def get_VVTfactor(self, Y, prec):
|
||||||
return Y * prec # TODO chache this, and make it effective
|
return Y * prec # TODO chache this, and make it effective
|
||||||
|
|
||||||
def inference(self, kern, X, X_variance, Z, likelihood, Y):
|
def inference(self, kern, X, Z, likelihood, Y):
|
||||||
"""Inference for normal sparseGP"""
|
if isinstance(X, VariationalPosterior):
|
||||||
uncertain_inputs = False
|
uncertain_inputs = True
|
||||||
psi0, psi1, psi2 = _compute_psi(kern, X, X_variance, Z, uncertain_inputs)
|
psi0 = kern.psi0(Z, X)
|
||||||
return self._inference(kern, psi0, psi1, psi2, Z, likelihood, Y, uncertain_inputs)
|
psi1 = kern.psi1(Z, X)
|
||||||
|
psi2 = kern.psi2(Z, X)
|
||||||
def inference_latent(self, kern, posterior_variational, Z, likelihood, Y):
|
else:
|
||||||
"""Inference for GPLVM with uncertain inputs"""
|
uncertain_inputs = False
|
||||||
uncertain_inputs = True
|
psi0 = kern.Kdiag(X)
|
||||||
psi0, psi1, psi2 = _compute_psi_latent(kern, posterior_variational, Z)
|
psi1 = kern.K(X, Z)
|
||||||
return self._inference(kern, psi0, psi1, psi2, Z, likelihood, Y, uncertain_inputs)
|
psi2 = None
|
||||||
|
|
||||||
def _inference(self, kern, psi0, psi1, psi2, Z, likelihood, Y, uncertain_inputs):
|
|
||||||
|
|
||||||
#see whether we're using variational uncertain inputs
|
#see whether we're using variational uncertain inputs
|
||||||
|
|
||||||
|
|
@ -202,19 +201,19 @@ class VarDTCMissingData(object):
|
||||||
self._subarray_indices = [[slice(None),slice(None)]]
|
self._subarray_indices = [[slice(None),slice(None)]]
|
||||||
return [Y], [(Y**2).sum()]
|
return [Y], [(Y**2).sum()]
|
||||||
|
|
||||||
def inference(self, kern, X, X_variance, Z, likelihood, Y):
|
def inference(self, kern, X, Z, likelihood, Y):
|
||||||
"""Inference for normal sparseGP"""
|
if isinstance(X, VariationalPosterior):
|
||||||
uncertain_inputs = False
|
uncertain_inputs = True
|
||||||
psi0, psi1, psi2 = _compute_psi(kern, X, X_variance, Z, uncertain_inputs)
|
psi0 = kern.psi0(Z, X)
|
||||||
return self._inference(kern, psi0, psi1, psi2, Z, likelihood, Y, uncertain_inputs)
|
psi1 = kern.psi1(Z, X)
|
||||||
|
psi2 = kern.psi2(Z, X)
|
||||||
|
else:
|
||||||
|
uncertain_inputs = False
|
||||||
|
psi0 = kern.Kdiag(X)
|
||||||
|
psi1 = kern.K(X, Z)
|
||||||
|
psi2 = None
|
||||||
|
|
||||||
def inference_latent(self, kern, posterior_variational, Z, likelihood, Y):
|
|
||||||
"""Inference for GPLVM with uncertain inputs"""
|
|
||||||
uncertain_inputs = True
|
|
||||||
psi0, psi1, psi2 = _compute_psi_latent(kern, posterior_variational, Z)
|
|
||||||
return self._inference(kern, psi0, psi1, psi2, Z, likelihood, Y, uncertain_inputs)
|
|
||||||
|
|
||||||
def _inference(self, kern, psi0_all, psi1_all, psi2_all, Z, likelihood, Y, uncertain_inputs):
|
|
||||||
Ys, traces = self._Y(Y)
|
Ys, traces = self._Y(Y)
|
||||||
beta_all = 1./likelihood.variance
|
beta_all = 1./likelihood.variance
|
||||||
het_noise = beta_all.size != 1
|
het_noise = beta_all.size != 1
|
||||||
|
|
@ -357,19 +356,6 @@ class VarDTCMissingData(object):
|
||||||
|
|
||||||
return post, log_marginal, grad_dict
|
return post, log_marginal, grad_dict
|
||||||
|
|
||||||
|
|
||||||
def _compute_psi(kern, X, X_variance, Z):
|
|
||||||
psi0 = kern.Kdiag(X)
|
|
||||||
psi1 = kern.K(X, Z)
|
|
||||||
psi2 = None
|
|
||||||
return psi0, psi1, psi2
|
|
||||||
|
|
||||||
def _compute_psi_latent(kern, posterior_variational, Z):
|
|
||||||
psi0 = kern.psi0(Z, posterior_variational)
|
|
||||||
psi1 = kern.psi1(Z, posterior_variational)
|
|
||||||
psi2 = kern.psi2(Z, posterior_variational)
|
|
||||||
return psi0, psi1, psi2
|
|
||||||
|
|
||||||
def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, psi1, het_noise, uncertain_inputs):
|
def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, psi1, het_noise, uncertain_inputs):
|
||||||
dL_dpsi0 = -0.5 * output_dim * (beta * np.ones([num_data, 1])).flatten()
|
dL_dpsi0 = -0.5 * output_dim * (beta * np.ones([num_data, 1])).flatten()
|
||||||
dL_dpsi1 = np.dot(VVT_factor, Cpsi1Vf.T)
|
dL_dpsi1 = np.dot(VVT_factor, Cpsi1Vf.T)
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,11 @@
|
||||||
from _src.rbf import RBF
|
|
||||||
from _src.white import White
|
|
||||||
from _src.kern import Kern
|
from _src.kern import Kern
|
||||||
|
from _src.rbf import RBF
|
||||||
from _src.linear import Linear
|
from _src.linear import Linear
|
||||||
|
from _src.static import Bias, White
|
||||||
from _src.brownian import Brownian
|
from _src.brownian import Brownian
|
||||||
from _src.stationary import Exponential, Matern32, Matern52, ExpQuad
|
from _src.stationary import Exponential, Matern32, Matern52, ExpQuad
|
||||||
from _src.sympykern import Sympykern
|
from _src.sympykern import Sympykern
|
||||||
#from _src.kern import kern_test
|
#from _src.kern import kern_test
|
||||||
from _src.bias import Bias
|
|
||||||
#import coregionalize
|
#import coregionalize
|
||||||
#import exponential
|
#import exponential
|
||||||
#import eq_ode1
|
#import eq_ode1
|
||||||
|
|
@ -34,3 +33,10 @@ from _src.bias import Bias
|
||||||
#import spline
|
#import spline
|
||||||
#import symmetric
|
#import symmetric
|
||||||
#import white
|
#import white
|
||||||
|
=======
|
||||||
|
from _src.stationary import Exponential, Matern32, Matern52, ExpQuad, RatQuad, Cosine
|
||||||
|
from _src.mlp import MLP
|
||||||
|
from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52
|
||||||
|
from _src.independent_outputs import IndependentOutputs, Hierarchical
|
||||||
|
from _src.coregionalize import Coregionalize
|
||||||
|
>>>>>>> da4686dd3c8db8639b0c3c6e30609d0b3fa59130
|
||||||
|
|
|
||||||
|
|
@ -45,9 +45,6 @@ class Add(Kern):
|
||||||
def update_gradients_full(self, dL_dK, X):
|
def update_gradients_full(self, dL_dK, X):
|
||||||
[p.update_gradients_full(dL_dK, X[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
[p.update_gradients_full(dL_dK, X[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||||
|
|
||||||
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
|
||||||
[p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X[:,i_s], Z[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
|
||||||
|
|
||||||
def gradients_X(self, dL_dK, X, X2=None):
|
def gradients_X(self, dL_dK, X, X2=None):
|
||||||
"""Compute the gradient of the objective function with respect to X.
|
"""Compute the gradient of the objective function with respect to X.
|
||||||
|
|
||||||
|
|
@ -83,7 +80,7 @@ class Add(Kern):
|
||||||
from white import White
|
from white import White
|
||||||
from rbf import RBF
|
from rbf import RBF
|
||||||
#from rbf_inv import RBFInv
|
#from rbf_inv import RBFInv
|
||||||
#from bias import Bias
|
from bias import Bias
|
||||||
from linear import Linear
|
from linear import Linear
|
||||||
#ffrom fixed import Fixed
|
#ffrom fixed import Fixed
|
||||||
|
|
||||||
|
|
@ -131,11 +128,11 @@ class Add(Kern):
|
||||||
|
|
||||||
|
|
||||||
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||||
from white import white
|
from white import White
|
||||||
from rbf import rbf
|
from rbf import RBF
|
||||||
#from rbf_inv import rbfinv
|
#from rbf_inv import rbfinv
|
||||||
#from bias import bias
|
from bias import Bias
|
||||||
from linear import linear
|
from linear import Linear
|
||||||
#ffrom fixed import fixed
|
#ffrom fixed import fixed
|
||||||
|
|
||||||
target = np.zeros(Z.shape)
|
target = np.zeros(Z.shape)
|
||||||
|
|
@ -146,15 +143,15 @@ class Add(Kern):
|
||||||
for p2, is2 in zip(self._parameters_, self.input_slices):
|
for p2, is2 in zip(self._parameters_, self.input_slices):
|
||||||
if p2 is p1:
|
if p2 is p1:
|
||||||
continue
|
continue
|
||||||
if isinstance(p2, white):
|
if isinstance(p2, White):
|
||||||
continue
|
continue
|
||||||
elif isinstance(p2, bias):
|
elif isinstance(p2, Bias):
|
||||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||||
else:
|
else:
|
||||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2.
|
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2.
|
||||||
|
|
||||||
|
|
||||||
target += p1.gradients_z_variational(dL_dkmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1])
|
target += p1.gradients_z_variational(dL_dKmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1])
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||||
|
|
@ -195,6 +192,12 @@ class Add(Kern):
|
||||||
from ..plotting.matplot_dep import kernel_plots
|
from ..plotting.matplot_dep import kernel_plots
|
||||||
kernel_plots.plot(self,*args)
|
kernel_plots.plot(self,*args)
|
||||||
|
|
||||||
|
def input_sensitivity(self):
|
||||||
|
in_sen = np.zeros((self.input_dim, self.num_params))
|
||||||
|
for i, [p, i_s] in enumerate(zip(self._parameters_, self.input_slices)):
|
||||||
|
in_sen[i_s, i] = p.input_sensitivity()
|
||||||
|
return in_sen
|
||||||
|
|
||||||
def _getstate(self):
|
def _getstate(self):
|
||||||
"""
|
"""
|
||||||
Get the current state of the class,
|
Get the current state of the class,
|
||||||
|
|
|
||||||
|
|
@ -1,568 +0,0 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
from kern import kern
|
|
||||||
import parts
|
|
||||||
|
|
||||||
def rbf_inv(input_dim,variance=1., inv_lengthscale=None,ARD=False,name='inverse rbf'):
|
|
||||||
"""
|
|
||||||
Construct an RBF kernel
|
|
||||||
|
|
||||||
:param input_dim: dimensionality of the kernel, obligatory
|
|
||||||
:type input_dim: int
|
|
||||||
:param variance: the variance of the kernel
|
|
||||||
:type variance: float
|
|
||||||
:param lengthscale: the lengthscale of the kernel
|
|
||||||
:type lengthscale: float
|
|
||||||
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
|
||||||
:type ARD: Boolean
|
|
||||||
|
|
||||||
"""
|
|
||||||
part = parts.rbf_inv.RBFInv(input_dim,variance,inv_lengthscale,ARD,name=name)
|
|
||||||
return kern(input_dim, [part])
|
|
||||||
|
|
||||||
def rbf(input_dim,variance=1., lengthscale=None,ARD=False, name='rbf'):
|
|
||||||
"""
|
|
||||||
Construct an RBF kernel
|
|
||||||
|
|
||||||
:param input_dim: dimensionality of the kernel, obligatory
|
|
||||||
:type input_dim: int
|
|
||||||
:param variance: the variance of the kernel
|
|
||||||
:type variance: float
|
|
||||||
:param lengthscale: the lengthscale of the kernel
|
|
||||||
:type lengthscale: float
|
|
||||||
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
|
||||||
:type ARD: Boolean
|
|
||||||
|
|
||||||
"""
|
|
||||||
part = parts.rbf.RBF(input_dim,variance,lengthscale,ARD, name=name)
|
|
||||||
return kern(input_dim, [part])
|
|
||||||
|
|
||||||
def linear(input_dim,variances=None,ARD=False,name='linear'):
|
|
||||||
"""
|
|
||||||
Construct a linear kernel.
|
|
||||||
|
|
||||||
:param input_dim: dimensionality of the kernel, obligatory
|
|
||||||
:type input_dim: int
|
|
||||||
:param variances:
|
|
||||||
:type variances: np.ndarray
|
|
||||||
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
|
||||||
:type ARD: Boolean
|
|
||||||
|
|
||||||
"""
|
|
||||||
part = parts.linear.Linear(input_dim,variances,ARD,name=name)
|
|
||||||
return kern(input_dim, [part])
|
|
||||||
|
|
||||||
def mlp(input_dim,variance=1., weight_variance=None,bias_variance=100.,ARD=False):
|
|
||||||
"""
|
|
||||||
Construct an MLP kernel
|
|
||||||
|
|
||||||
:param input_dim: dimensionality of the kernel, obligatory
|
|
||||||
:type input_dim: int
|
|
||||||
:param variance: the variance of the kernel
|
|
||||||
:type variance: float
|
|
||||||
:param weight_scale: the lengthscale of the kernel
|
|
||||||
:type weight_scale: vector of weight variances for input weights in neural network (length 1 if kernel is isotropic)
|
|
||||||
:param bias_variance: the variance of the biases in the neural network.
|
|
||||||
:type bias_variance: float
|
|
||||||
:param ARD: Auto Relevance Determination (allows for ARD version of covariance)
|
|
||||||
:type ARD: Boolean
|
|
||||||
|
|
||||||
"""
|
|
||||||
part = parts.mlp.MLP(input_dim,variance,weight_variance,bias_variance,ARD)
|
|
||||||
return kern(input_dim, [part])
|
|
||||||
|
|
||||||
def gibbs(input_dim,variance=1., mapping=None):
|
|
||||||
"""
|
|
||||||
|
|
||||||
Gibbs and MacKay non-stationary covariance function.
|
|
||||||
|
|
||||||
.. math::
|
|
||||||
|
|
||||||
r = \\sqrt{((x_i - x_j)'*(x_i - x_j))}
|
|
||||||
|
|
||||||
k(x_i, x_j) = \\sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))
|
|
||||||
|
|
||||||
Z = \\sqrt{2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')}
|
|
||||||
|
|
||||||
Where :math:`l(x)` is a function giving the length scale as a function of space.
|
|
||||||
|
|
||||||
This is the non stationary kernel proposed by Mark Gibbs in his 1997
|
|
||||||
thesis. It is similar to an RBF but has a length scale that varies
|
|
||||||
with input location. This leads to an additional term in front of
|
|
||||||
the kernel.
|
|
||||||
|
|
||||||
The parameters are :math:`\\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used.
|
|
||||||
|
|
||||||
:param input_dim: the number of input dimensions
|
|
||||||
:type input_dim: int
|
|
||||||
:param variance: the variance :math:`\\sigma^2`
|
|
||||||
:type variance: float
|
|
||||||
:param mapping: the mapping that gives the lengthscale across the input space.
|
|
||||||
:type mapping: GPy.core.Mapping
|
|
||||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter :math:`\\sigma^2_w`), otherwise there is one weight variance parameter per dimension.
|
|
||||||
:type ARD: Boolean
|
|
||||||
:rtype: Kernpart object
|
|
||||||
|
|
||||||
"""
|
|
||||||
part = parts.gibbs.Gibbs(input_dim,variance,mapping)
|
|
||||||
return kern(input_dim, [part])
|
|
||||||
|
|
||||||
def hetero(input_dim, mapping=None, transform=None):
|
|
||||||
"""
|
|
||||||
"""
|
|
||||||
part = parts.hetero.Hetero(input_dim,mapping,transform)
|
|
||||||
return kern(input_dim, [part])
|
|
||||||
|
|
||||||
def poly(input_dim,variance=1., weight_variance=None,bias_variance=1.,degree=2, ARD=False):
|
|
||||||
"""
|
|
||||||
Construct a polynomial kernel
|
|
||||||
|
|
||||||
:param input_dim: dimensionality of the kernel, obligatory
|
|
||||||
:type input_dim: int
|
|
||||||
:param variance: the variance of the kernel
|
|
||||||
:type variance: float
|
|
||||||
:param weight_scale: the lengthscale of the kernel
|
|
||||||
:type weight_scale: vector of weight variances for input weights.
|
|
||||||
:param bias_variance: the variance of the biases.
|
|
||||||
:type bias_variance: float
|
|
||||||
:param degree: the degree of the polynomial
|
|
||||||
:type degree: int
|
|
||||||
:param ARD: Auto Relevance Determination (allows for ARD version of covariance)
|
|
||||||
:type ARD: Boolean
|
|
||||||
|
|
||||||
"""
|
|
||||||
part = parts.poly.POLY(input_dim,variance,weight_variance,bias_variance,degree,ARD)
|
|
||||||
return kern(input_dim, [part])
|
|
||||||
|
|
||||||
def white(input_dim,variance=1.,name='white'):
|
|
||||||
"""
|
|
||||||
Construct a white kernel.
|
|
||||||
|
|
||||||
:param input_dim: dimensionality of the kernel, obligatory
|
|
||||||
:type input_dim: int
|
|
||||||
:param variance: the variance of the kernel
|
|
||||||
:type variance: float
|
|
||||||
|
|
||||||
"""
|
|
||||||
part = parts.white.White(input_dim,variance,name=name)
|
|
||||||
return kern(input_dim, [part])
|
|
||||||
|
|
||||||
def eq_ode1(output_dim, W=None, rank=1, kappa=None, length_scale=1., decay=None, delay=None):
|
|
||||||
"""Covariance function for first order differential equation driven by an exponentiated quadratic covariance.
|
|
||||||
|
|
||||||
This outputs of this kernel have the form
|
|
||||||
.. math::
|
|
||||||
\frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} f_i(t-\delta_j) +\sqrt{\kappa_j}g_j(t) - d_jy_j(t)
|
|
||||||
|
|
||||||
where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`f_i(t)` and :math:`g_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.
|
|
||||||
|
|
||||||
:param output_dim: number of outputs driven by latent function.
|
|
||||||
:type output_dim: int
|
|
||||||
:param W: sensitivities of each output to the latent driving function.
|
|
||||||
:type W: ndarray (output_dim x rank).
|
|
||||||
:param rank: If rank is greater than 1 then there are assumed to be a total of rank latent forces independently driving the system, each with identical covariance.
|
|
||||||
:type rank: int
|
|
||||||
:param decay: decay rates for the first order system.
|
|
||||||
:type decay: array of length output_dim.
|
|
||||||
:param delay: delay between latent force and output response.
|
|
||||||
:type delay: array of length output_dim.
|
|
||||||
:param kappa: diagonal term that allows each latent output to have an independent component to the response.
|
|
||||||
:type kappa: array of length output_dim.
|
|
||||||
|
|
||||||
.. Note: see first order differential equation examples in GPy.examples.regression for some usage.
|
|
||||||
"""
|
|
||||||
part = parts.eq_ode1.Eq_ode1(output_dim, W, rank, kappa, length_scale, decay, delay)
|
|
||||||
return kern(2, [part])
|
|
||||||
|
|
||||||
|
|
||||||
def exponential(input_dim,variance=1., lengthscale=None, ARD=False):
|
|
||||||
"""
|
|
||||||
Construct an exponential kernel
|
|
||||||
|
|
||||||
:param input_dim: dimensionality of the kernel, obligatory
|
|
||||||
:type input_dim: int
|
|
||||||
:param variance: the variance of the kernel
|
|
||||||
:type variance: float
|
|
||||||
:param lengthscale: the lengthscale of the kernel
|
|
||||||
:type lengthscale: float
|
|
||||||
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
|
||||||
:type ARD: Boolean
|
|
||||||
|
|
||||||
"""
|
|
||||||
part = parts.exponential.Exponential(input_dim,variance, lengthscale, ARD)
|
|
||||||
return kern(input_dim, [part])
|
|
||||||
|
|
||||||
def Matern32(input_dim,variance=1., lengthscale=None, ARD=False):
|
|
||||||
"""
|
|
||||||
Construct a Matern 3/2 kernel.
|
|
||||||
|
|
||||||
:param input_dim: dimensionality of the kernel, obligatory
|
|
||||||
:type input_dim: int
|
|
||||||
:param variance: the variance of the kernel
|
|
||||||
:type variance: float
|
|
||||||
:param lengthscale: the lengthscale of the kernel
|
|
||||||
:type lengthscale: float
|
|
||||||
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
|
||||||
:type ARD: Boolean
|
|
||||||
|
|
||||||
"""
|
|
||||||
part = parts.Matern32.Matern32(input_dim,variance, lengthscale, ARD)
|
|
||||||
return kern(input_dim, [part])
|
|
||||||
|
|
||||||
def Matern52(input_dim, variance=1., lengthscale=None, ARD=False):
|
|
||||||
"""
|
|
||||||
Construct a Matern 5/2 kernel.
|
|
||||||
|
|
||||||
:param input_dim: dimensionality of the kernel, obligatory
|
|
||||||
:type input_dim: int
|
|
||||||
:param variance: the variance of the kernel
|
|
||||||
:type variance: float
|
|
||||||
:param lengthscale: the lengthscale of the kernel
|
|
||||||
:type lengthscale: float
|
|
||||||
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
|
||||||
:type ARD: Boolean
|
|
||||||
|
|
||||||
"""
|
|
||||||
part = parts.Matern52.Matern52(input_dim, variance, lengthscale, ARD)
|
|
||||||
return kern(input_dim, [part])
|
|
||||||
|
|
||||||
def bias(input_dim, variance=1., name='bias'):
|
|
||||||
"""
|
|
||||||
Construct a bias kernel.
|
|
||||||
|
|
||||||
:param input_dim: dimensionality of the kernel, obligatory
|
|
||||||
:type input_dim: int
|
|
||||||
:param variance: the variance of the kernel
|
|
||||||
:type variance: float
|
|
||||||
|
|
||||||
"""
|
|
||||||
part = parts.bias.Bias(input_dim, variance, name=name)
|
|
||||||
return kern(input_dim, [part])
|
|
||||||
|
|
||||||
def finite_dimensional(input_dim, F, G, variances=1., weights=None):
|
|
||||||
"""
|
|
||||||
Construct a finite dimensional kernel.
|
|
||||||
|
|
||||||
:param input_dim: the number of input dimensions
|
|
||||||
:type input_dim: int
|
|
||||||
:param F: np.array of functions with shape (n,) - the n basis functions
|
|
||||||
:type F: np.array
|
|
||||||
:param G: np.array with shape (n,n) - the Gram matrix associated to F
|
|
||||||
:type G: np.array
|
|
||||||
:param variances: np.ndarray with shape (n,)
|
|
||||||
:type: np.ndarray
|
|
||||||
"""
|
|
||||||
part = parts.finite_dimensional.FiniteDimensional(input_dim, F, G, variances, weights)
|
|
||||||
return kern(input_dim, [part])
|
|
||||||
|
|
||||||
def spline(input_dim, variance=1.):
|
|
||||||
"""
|
|
||||||
Construct a spline kernel.
|
|
||||||
|
|
||||||
:param input_dim: Dimensionality of the kernel
|
|
||||||
:type input_dim: int
|
|
||||||
:param variance: the variance of the kernel
|
|
||||||
:type variance: float
|
|
||||||
|
|
||||||
"""
|
|
||||||
part = parts.spline.Spline(input_dim, variance)
|
|
||||||
return kern(input_dim, [part])
|
|
||||||
|
|
||||||
def Brownian(input_dim, variance=1.):
|
|
||||||
"""
|
|
||||||
Construct a Brownian motion kernel.
|
|
||||||
|
|
||||||
:param input_dim: Dimensionality of the kernel
|
|
||||||
:type input_dim: int
|
|
||||||
:param variance: the variance of the kernel
|
|
||||||
:type variance: float
|
|
||||||
|
|
||||||
"""
|
|
||||||
part = parts.Brownian.Brownian(input_dim, variance)
|
|
||||||
return kern(input_dim, [part])
|
|
||||||
|
|
||||||
try:
|
|
||||||
import sympy as sp
|
|
||||||
sympy_available = True
|
|
||||||
except ImportError:
|
|
||||||
sympy_available = False
|
|
||||||
|
|
||||||
if sympy_available:
|
|
||||||
from parts.sympykern import spkern
|
|
||||||
from sympy.parsing.sympy_parser import parse_expr
|
|
||||||
|
|
||||||
def rbf_sympy(input_dim, ARD=False, variance=1., lengthscale=1.):
|
|
||||||
"""
|
|
||||||
Radial Basis Function covariance.
|
|
||||||
"""
|
|
||||||
X = sp.symbols('x_:' + str(input_dim))
|
|
||||||
Z = sp.symbols('z_:' + str(input_dim))
|
|
||||||
variance = sp.var('variance',positive=True)
|
|
||||||
if ARD:
|
|
||||||
lengthscales = sp.symbols('lengthscale_:' + str(input_dim))
|
|
||||||
dist_string = ' + '.join(['(x_%i-z_%i)**2/lengthscale%i**2' % (i, i, i) for i in range(input_dim)])
|
|
||||||
dist = parse_expr(dist_string)
|
|
||||||
f = variance*sp.exp(-dist/2.)
|
|
||||||
else:
|
|
||||||
lengthscale = sp.var('lengthscale',positive=True)
|
|
||||||
dist_string = ' + '.join(['(x_%i-z_%i)**2' % (i, i) for i in range(input_dim)])
|
|
||||||
dist = parse_expr(dist_string)
|
|
||||||
f = variance*sp.exp(-dist/(2*lengthscale**2))
|
|
||||||
return kern(input_dim, [spkern(input_dim, f, name='rbf_sympy')])
|
|
||||||
|
|
||||||
def eq_sympy(input_dim, output_dim, ARD=False, variance=1., lengthscale=1.):
|
|
||||||
"""
|
|
||||||
Exponentiated quadratic with multiple outputs.
|
|
||||||
"""
|
|
||||||
real_input_dim = input_dim
|
|
||||||
if output_dim>1:
|
|
||||||
real_input_dim -= 1
|
|
||||||
X = sp.symbols('x_:' + str(real_input_dim))
|
|
||||||
Z = sp.symbols('z_:' + str(real_input_dim))
|
|
||||||
scale = sp.var('scale_i scale_j',positive=True)
|
|
||||||
if ARD:
|
|
||||||
lengthscales = [sp.var('lengthscale%i_i lengthscale%i_j' % i, positive=True) for i in range(real_input_dim)]
|
|
||||||
shared_lengthscales = [sp.var('shared_lengthscale%i' % i, positive=True) for i in range(real_input_dim)]
|
|
||||||
dist_string = ' + '.join(['(x_%i-z_%i)**2/(shared_lengthscale%i**2 + lengthscale%i_i*lengthscale%i_j)' % (i, i, i) for i in range(real_input_dim)])
|
|
||||||
dist = parse_expr(dist_string)
|
|
||||||
f = variance*sp.exp(-dist/2.)
|
|
||||||
else:
|
|
||||||
lengthscale = sp.var('lengthscale_i lengthscale_j',positive=True)
|
|
||||||
shared_lengthscale = sp.var('shared_lengthscale',positive=True)
|
|
||||||
dist_string = ' + '.join(['(x_%i-z_%i)**2' % (i, i) for i in range(real_input_dim)])
|
|
||||||
dist = parse_expr(dist_string)
|
|
||||||
f = scale_i*scale_j*sp.exp(-dist/(2*(shared_lengthscale**2 + lengthscale_i*lengthscale_j)))
|
|
||||||
return kern(input_dim, [spkern(input_dim, f, output_dim=output_dim, name='eq_sympy')])
|
|
||||||
|
|
||||||
def sympykern(input_dim, k=None, output_dim=1, name=None, param=None):
|
|
||||||
"""
|
|
||||||
A base kernel object, where all the hard work in done by sympy.
|
|
||||||
|
|
||||||
:param k: the covariance function
|
|
||||||
:type k: a positive definite sympy function of x1, z1, x2, z2...
|
|
||||||
|
|
||||||
To construct a new sympy kernel, you'll need to define:
|
|
||||||
- a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z).
|
|
||||||
- that's it! we'll extract the variables from the function k.
|
|
||||||
|
|
||||||
Note:
|
|
||||||
- to handle multiple inputs, call them x1, z1, etc
|
|
||||||
- to handle multpile correlated outputs, you'll need to define each covariance function and 'cross' variance function. TODO
|
|
||||||
"""
|
|
||||||
return kern(input_dim, [spkern(input_dim, k=k, output_dim=output_dim, name=name, param=param)])
|
|
||||||
del sympy_available
|
|
||||||
|
|
||||||
def periodic_exponential(input_dim=1, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi):
|
|
||||||
"""
|
|
||||||
Construct an periodic exponential kernel
|
|
||||||
|
|
||||||
:param input_dim: dimensionality, only defined for input_dim=1
|
|
||||||
:type input_dim: int
|
|
||||||
:param variance: the variance of the kernel
|
|
||||||
:type variance: float
|
|
||||||
:param lengthscale: the lengthscale of the kernel
|
|
||||||
:type lengthscale: float
|
|
||||||
:param period: the period
|
|
||||||
:type period: float
|
|
||||||
:param n_freq: the number of frequencies considered for the periodic subspace
|
|
||||||
:type n_freq: int
|
|
||||||
|
|
||||||
"""
|
|
||||||
part = parts.periodic_exponential.PeriodicExponential(input_dim, variance, lengthscale, period, n_freq, lower, upper)
|
|
||||||
return kern(input_dim, [part])
|
|
||||||
|
|
||||||
def periodic_Matern32(input_dim, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi):
|
|
||||||
"""
|
|
||||||
Construct a periodic Matern 3/2 kernel.
|
|
||||||
|
|
||||||
:param input_dim: dimensionality, only defined for input_dim=1
|
|
||||||
:type input_dim: int
|
|
||||||
:param variance: the variance of the kernel
|
|
||||||
:type variance: float
|
|
||||||
:param lengthscale: the lengthscale of the kernel
|
|
||||||
:type lengthscale: float
|
|
||||||
:param period: the period
|
|
||||||
:type period: float
|
|
||||||
:param n_freq: the number of frequencies considered for the periodic subspace
|
|
||||||
:type n_freq: int
|
|
||||||
|
|
||||||
"""
|
|
||||||
part = parts.periodic_Matern32.PeriodicMatern32(input_dim, variance, lengthscale, period, n_freq, lower, upper)
|
|
||||||
return kern(input_dim, [part])
|
|
||||||
|
|
||||||
def periodic_Matern52(input_dim, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi):
|
|
||||||
"""
|
|
||||||
Construct a periodic Matern 5/2 kernel.
|
|
||||||
|
|
||||||
:param input_dim: dimensionality, only defined for input_dim=1
|
|
||||||
:type input_dim: int
|
|
||||||
:param variance: the variance of the kernel
|
|
||||||
:type variance: float
|
|
||||||
:param lengthscale: the lengthscale of the kernel
|
|
||||||
:type lengthscale: float
|
|
||||||
:param period: the period
|
|
||||||
:type period: float
|
|
||||||
:param n_freq: the number of frequencies considered for the periodic subspace
|
|
||||||
:type n_freq: int
|
|
||||||
|
|
||||||
"""
|
|
||||||
part = parts.periodic_Matern52.PeriodicMatern52(input_dim, variance, lengthscale, period, n_freq, lower, upper)
|
|
||||||
return kern(input_dim, [part])
|
|
||||||
|
|
||||||
def prod(k1,k2,tensor=False):
|
|
||||||
"""
|
|
||||||
Construct a product kernel over input_dim from two kernels over input_dim
|
|
||||||
|
|
||||||
:param k1, k2: the kernels to multiply
|
|
||||||
:type k1, k2: kernpart
|
|
||||||
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
|
|
||||||
:type tensor: Boolean
|
|
||||||
:rtype: kernel object
|
|
||||||
|
|
||||||
"""
|
|
||||||
part = parts.prod.Prod(k1, k2, tensor)
|
|
||||||
return kern(part.input_dim, [part])
|
|
||||||
|
|
||||||
def symmetric(k):
|
|
||||||
"""
|
|
||||||
Construct a symmetric kernel from an existing kernel
|
|
||||||
"""
|
|
||||||
k_ = k.copy()
|
|
||||||
k_.parts = [symmetric.Symmetric(p) for p in k.parts]
|
|
||||||
return k_
|
|
||||||
|
|
||||||
def coregionalize(output_dim,rank=1, W=None, kappa=None):
|
|
||||||
"""
|
|
||||||
Coregionlization matrix B, of the form:
|
|
||||||
|
|
||||||
.. math::
|
|
||||||
\mathbf{B} = \mathbf{W}\mathbf{W}^\top + kappa \mathbf{I}
|
|
||||||
|
|
||||||
An intrinsic/linear coregionalization kernel of the form:
|
|
||||||
|
|
||||||
.. math::
|
|
||||||
k_2(x, y)=\mathbf{B} k(x, y)
|
|
||||||
|
|
||||||
it is obtainded as the tensor product between a kernel k(x,y) and B.
|
|
||||||
|
|
||||||
:param output_dim: the number of outputs to corregionalize
|
|
||||||
:type output_dim: int
|
|
||||||
:param rank: number of columns of the W matrix (this parameter is ignored if parameter W is not None)
|
|
||||||
:type rank: int
|
|
||||||
:param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalization matrix B
|
|
||||||
:type W: numpy array of dimensionality (num_outpus, rank)
|
|
||||||
:param kappa: a vector which allows the outputs to behave independently
|
|
||||||
:type kappa: numpy array of dimensionality (output_dim,)
|
|
||||||
:rtype: kernel object
|
|
||||||
|
|
||||||
"""
|
|
||||||
p = parts.coregionalize.Coregionalize(output_dim,rank,W,kappa)
|
|
||||||
return kern(1,[p])
|
|
||||||
|
|
||||||
|
|
||||||
def rational_quadratic(input_dim, variance=1., lengthscale=1., power=1.):
|
|
||||||
"""
|
|
||||||
Construct rational quadratic kernel.
|
|
||||||
|
|
||||||
:param input_dim: the number of input dimensions
|
|
||||||
:type input_dim: int (input_dim=1 is the only value currently supported)
|
|
||||||
:param variance: the variance :math:`\sigma^2`
|
|
||||||
:type variance: float
|
|
||||||
:param lengthscale: the lengthscale :math:`\ell`
|
|
||||||
:type lengthscale: float
|
|
||||||
:rtype: kern object
|
|
||||||
|
|
||||||
"""
|
|
||||||
part = parts.rational_quadratic.RationalQuadratic(input_dim, variance, lengthscale, power)
|
|
||||||
return kern(input_dim, [part])
|
|
||||||
|
|
||||||
def fixed(input_dim, K, variance=1.):
|
|
||||||
"""
|
|
||||||
Construct a Fixed effect kernel.
|
|
||||||
|
|
||||||
:param input_dim: the number of input dimensions
|
|
||||||
:type input_dim: int (input_dim=1 is the only value currently supported)
|
|
||||||
:param K: the variance :math:`\sigma^2`
|
|
||||||
:type K: np.array
|
|
||||||
:param variance: kernel variance
|
|
||||||
:type variance: float
|
|
||||||
:rtype: kern object
|
|
||||||
"""
|
|
||||||
part = parts.fixed.Fixed(input_dim, K, variance)
|
|
||||||
return kern(input_dim, [part])
|
|
||||||
|
|
||||||
def rbfcos(input_dim, variance=1., frequencies=None, bandwidths=None, ARD=False):
|
|
||||||
"""
|
|
||||||
construct a rbfcos kernel
|
|
||||||
"""
|
|
||||||
part = parts.rbfcos.RBFCos(input_dim, variance, frequencies, bandwidths, ARD)
|
|
||||||
return kern(input_dim, [part])
|
|
||||||
|
|
||||||
def independent_outputs(k):
|
|
||||||
"""
|
|
||||||
Construct a kernel with independent outputs from an existing kernel
|
|
||||||
"""
|
|
||||||
for sl in k.input_slices:
|
|
||||||
assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)"
|
|
||||||
_parts = [parts.independent_outputs.IndependentOutputs(p) for p in k.parts]
|
|
||||||
return kern(k.input_dim+1,_parts)
|
|
||||||
|
|
||||||
def hierarchical(k):
|
|
||||||
"""
|
|
||||||
TODO This can't be right! Construct a kernel with independent outputs from an existing kernel
|
|
||||||
"""
|
|
||||||
# for sl in k.input_slices:
|
|
||||||
# assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)"
|
|
||||||
_parts = [parts.hierarchical.Hierarchical(k.parts)]
|
|
||||||
return kern(k.input_dim+len(k.parts),_parts)
|
|
||||||
|
|
||||||
def build_lcm(input_dim, output_dim, kernel_list = [], rank=1,W=None,kappa=None):
|
|
||||||
"""
|
|
||||||
Builds a kernel of a linear coregionalization model
|
|
||||||
|
|
||||||
:input_dim: Input dimensionality
|
|
||||||
:output_dim: Number of outputs
|
|
||||||
:kernel_list: List of coregionalized kernels, each element in the list will be multiplied by a different corregionalization matrix
|
|
||||||
:type kernel_list: list of GPy kernels
|
|
||||||
:param rank: number tuples of the corregionalization parameters 'coregion_W'
|
|
||||||
:type rank: integer
|
|
||||||
|
|
||||||
..note the kernels dimensionality is overwritten to fit input_dim
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
for k in kernel_list:
|
|
||||||
if k.input_dim <> input_dim:
|
|
||||||
k.input_dim = input_dim
|
|
||||||
warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")
|
|
||||||
|
|
||||||
k_coreg = coregionalize(output_dim,rank,W,kappa)
|
|
||||||
kernel = kernel_list[0]**k_coreg.copy()
|
|
||||||
|
|
||||||
for k in kernel_list[1:]:
|
|
||||||
k_coreg = coregionalize(output_dim,rank,W,kappa)
|
|
||||||
kernel += k**k_coreg.copy()
|
|
||||||
|
|
||||||
return kernel
|
|
||||||
|
|
||||||
def ODE_1(input_dim=1, varianceU=1., varianceY=1., lengthscaleU=None, lengthscaleY=None):
|
|
||||||
"""
|
|
||||||
kernel resultiong from a first order ODE with OU driving GP
|
|
||||||
|
|
||||||
:param input_dim: the number of input dimension, has to be equal to one
|
|
||||||
:type input_dim: int
|
|
||||||
:param varianceU: variance of the driving GP
|
|
||||||
:type varianceU: float
|
|
||||||
:param lengthscaleU: lengthscale of the driving GP
|
|
||||||
:type lengthscaleU: float
|
|
||||||
:param varianceY: 'variance' of the transfer function
|
|
||||||
:type varianceY: float
|
|
||||||
:param lengthscaleY: 'lengthscale' of the transfer function
|
|
||||||
:type lengthscaleY: float
|
|
||||||
:rtype: kernel object
|
|
||||||
|
|
||||||
"""
|
|
||||||
part = parts.ODE_1.ODE_1(input_dim, varianceU, varianceY, lengthscaleU, lengthscaleY)
|
|
||||||
return kern(input_dim, [part])
|
|
||||||
|
|
@ -129,7 +129,7 @@ class Coregionalize(Kern):
|
||||||
|
|
||||||
def update_gradients_diag(self, dL_dKdiag, X):
|
def update_gradients_diag(self, dL_dKdiag, X):
|
||||||
index = np.asarray(X, dtype=np.int).flatten()
|
index = np.asarray(X, dtype=np.int).flatten()
|
||||||
dL_dKdiag_small = np.array([dL_dKdiag[index==i] for i in xrange(output_dim)])
|
dL_dKdiag_small = np.array([dL_dKdiag[index==i].sum() for i in xrange(self.output_dim)])
|
||||||
self.W.gradient = 2.*self.W*dL_dKdiag_small[:, None]
|
self.W.gradient = 2.*self.W*dL_dKdiag_small[:, None]
|
||||||
self.kappa.gradient = dL_dKdiag_small
|
self.kappa.gradient = dL_dKdiag_small
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
from kernpart import Kernpart
|
from kern import Kern
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
def index_to_slices(index):
|
def index_to_slices(index):
|
||||||
|
|
@ -31,67 +31,130 @@ def index_to_slices(index):
|
||||||
[ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
|
[ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
class IndependentOutputs(Kernpart):
|
class IndependentOutputs(Kern):
|
||||||
"""
|
"""
|
||||||
A kernel part shich can reopresent several independent functions.
|
A kernel which can reopresent several independent functions.
|
||||||
this kernel 'switches off' parts of the matrix where the output indexes are different.
|
this kernel 'switches off' parts of the matrix where the output indexes are different.
|
||||||
|
|
||||||
The index of the functions is given by the last column in the input X
|
The index of the functions is given by the last column in the input X
|
||||||
the rest of the columns of X are passed to the kernel for computation (in blocks).
|
the rest of the columns of X are passed to the underlying kernel for computation (in blocks).
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self,k):
|
def __init__(self, kern, name='independ'):
|
||||||
self.input_dim = k.input_dim + 1
|
super(IndependentOutputs, self).__init__(kern.input_dim+1, name)
|
||||||
self.num_params = k.num_params
|
self.kern = kern
|
||||||
self.name = 'iops('+ k.name + ')'
|
self.add_parameters(self.kern)
|
||||||
self.k = k
|
|
||||||
|
|
||||||
def _get_params(self):
|
def K(self,X ,X2=None):
|
||||||
return self.k._get_params()
|
X, slices = X[:,:-1], index_to_slices(X[:,-1])
|
||||||
|
if X2 is None:
|
||||||
|
target = np.zeros((X.shape[0], X.shape[0]))
|
||||||
|
[[np.copyto(target[s,s], self.kern.K(X[s], None)) for s in slices_i] for slices_i in slices]
|
||||||
|
else:
|
||||||
|
X2, slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
||||||
|
target = np.zeros((X.shape[0], X2.shape[0]))
|
||||||
|
[[[np.copyto(target[s, s2], self.kern.K(X[s],X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||||
|
return target
|
||||||
|
|
||||||
def _set_params(self,x):
|
def Kdiag(self,X):
|
||||||
self.k._set_params(x)
|
X, slices = X[:,:-1], index_to_slices(X[:,-1])
|
||||||
self.params = x
|
target = np.zeros(X.shape[0])
|
||||||
|
[[np.copyto(target[s], self.kern.Kdiag(X[s])) for s in slices_i] for slices_i in slices]
|
||||||
|
return target
|
||||||
|
|
||||||
def _get_param_names(self):
|
def update_gradients_full(self,dL_dK,X,X2=None):
|
||||||
return self.k._get_param_names()
|
target = np.zeros(self.kern.size)
|
||||||
|
def collate_grads(dL, X, X2):
|
||||||
|
self.kern.update_gradients_full(dL,X,X2)
|
||||||
|
self.kern._collect_gradient(target)
|
||||||
|
|
||||||
def K(self,X,X2,target):
|
|
||||||
#Sort out the slices from the input data
|
|
||||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
X2,slices2 = X,slices
|
[[collate_grads(dL_dK[s,s], X[s], None) for s in slices_i] for slices_i in slices]
|
||||||
|
else:
|
||||||
|
X2, slices2 = X2[:,:-1], index_to_slices(X2[:,-1])
|
||||||
|
[[[collate_grads(dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||||
|
|
||||||
|
self.kern._set_gradient(target)
|
||||||
|
|
||||||
|
def gradients_X(self,dL_dK, X, X2=None):
|
||||||
|
target = np.zeros_like(X)
|
||||||
|
X, slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||||
|
if X2 is None:
|
||||||
|
[[np.copyto(target[s,:-1], self.kern.gradients_X(dL_dK[s,s],X[s],None)) for s in slices_i] for slices_i in slices]
|
||||||
else:
|
else:
|
||||||
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
||||||
|
[[[np.copyto(target[s,:-1], self.kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||||
|
return target
|
||||||
|
|
||||||
[[[self.k.K(X[s],X2[s2],target[s,s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
def gradients_X_diag(self, dL_dKdiag, X):
|
||||||
|
X, slices = X[:,:-1], index_to_slices(X[:,-1])
|
||||||
|
target = np.zeros(X.shape)
|
||||||
|
[[np.copyto(target[s,:-1], self.kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for slices_i in slices]
|
||||||
|
return target
|
||||||
|
|
||||||
def Kdiag(self,X,target):
|
def update_gradients_diag(self,dL_dKdiag,X,target):
|
||||||
|
target = np.zeros(self.kern.size)
|
||||||
|
def collate_grads(dL, X):
|
||||||
|
self.kern.update_gradients_diag(dL,X)
|
||||||
|
self.kern._collect_gradient(target)
|
||||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||||
[[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices]
|
[[collate_grads(dL_dKdiag[s], X[s,:]) for s in slices_i] for slices_i in slices]
|
||||||
|
self.kern._set_gradient(target)
|
||||||
|
|
||||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
class Hierarchical(Kern):
|
||||||
|
"""
|
||||||
|
A kernel which can reopresent a simple hierarchical model.
|
||||||
|
|
||||||
|
See Hensman et al 2013, "Hierarchical Bayesian modelling of gene expression time
|
||||||
|
series across irregularly sampled replicates and clusters"
|
||||||
|
http://www.biomedcentral.com/1471-2105/14/252
|
||||||
|
|
||||||
|
The index of the functions is given by additional columns in the input X.
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self, kerns, name='hierarchy'):
|
||||||
|
assert all([k.input_dim==kerns[0].input_dim for k in kerns])
|
||||||
|
super(Hierarchical, self).__init__(kerns[0].input_dim + len(kerns) - 1, name)
|
||||||
|
self.kerns = kerns
|
||||||
|
self.add_parameters(self.kerns)
|
||||||
|
|
||||||
|
def K(self,X ,X2=None):
|
||||||
|
X, slices = X[:,:-self.levels], [index_to_slices(X[:,i]) for i in range(self.kerns[0].input_dim, self.input_dim)]
|
||||||
|
K = self.kerns[0].K(X, X2)
|
||||||
|
if X2 is None:
|
||||||
|
[[[np.copyto(K[s,s], k.K(X[s], None)) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(self.kerns[1:], slices)]
|
||||||
|
else:
|
||||||
|
X2, slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
||||||
|
[[[[np.copyto(K[s, s2], self.kern.K(X[s],X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_k,slices_k2)] for k, slices_k, slices_k2 in zip(self.kerns[1:], slices, slices2)]
|
||||||
|
return target
|
||||||
|
|
||||||
|
def Kdiag(self,X):
|
||||||
|
X, slices = X[:,:-self.levels], [index_to_slices(X[:,i]) for i in range(self.kerns[0].input_dim, self.input_dim)]
|
||||||
|
K = self.kerns[0].K(X, X2)
|
||||||
|
[[[np.copyto(target[s], self.kern.Kdiag(X[s])) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(self.kerns[1:], slices)]
|
||||||
|
return target
|
||||||
|
|
||||||
|
def update_gradients_full(self,dL_dK,X,X2=None):
|
||||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
X2,slices2 = X,slices
|
self.kerns[0].update_gradients_full(dL_dK, X, None)
|
||||||
|
for k, slices_k in zip(self.kerns[1:], slices):
|
||||||
|
target = np.zeros(k.size)
|
||||||
|
def collate_grads(dL, X, X2):
|
||||||
|
k.update_gradients_full(dL,X,X2)
|
||||||
|
k._collect_gradient(target)
|
||||||
|
[[k.update_gradients_full(dL_dK[s,s], X[s], None) for s in slices_i] for slices_i in slices_k]
|
||||||
|
k._set_gradient(target)
|
||||||
else:
|
else:
|
||||||
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
X2, slices2 = X2[:,:-1], index_to_slices(X2[:,-1])
|
||||||
[[[self.k._param_grad_helper(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
self.kerns[0].update_gradients_full(dL_dK, X, None)
|
||||||
|
for k, slices_k in zip(self.kerns[1:], slices):
|
||||||
|
target = np.zeros(k.size)
|
||||||
|
def collate_grads(dL, X, X2):
|
||||||
|
k.update_gradients_full(dL,X,X2)
|
||||||
|
k._collect_gradient(target)
|
||||||
|
[[[collate_grads(dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||||
|
k._set_gradient(target)
|
||||||
|
|
||||||
|
|
||||||
def gradients_X(self,dL_dK,X,X2,target):
|
|
||||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
|
||||||
if X2 is None:
|
|
||||||
X2,slices2 = X,slices
|
|
||||||
else:
|
|
||||||
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
|
||||||
[[[self.k.gradients_X(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
|
||||||
|
|
||||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
|
||||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
|
||||||
[[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target[s,:-1]) for s in slices_i] for slices_i in slices]
|
|
||||||
|
|
||||||
|
|
||||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
|
||||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
|
||||||
[[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target) for s in slices_i] for slices_i in slices]
|
|
||||||
|
|
|
||||||
|
|
@ -26,11 +26,11 @@ class Kern(Parameterized):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def Kdiag(self, Xa):
|
def Kdiag(self, Xa):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def psi0(self,Z,posterior_variational):
|
def psi0(self,Z,variational_posterior):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def psi1(self,Z,posterior_variational):
|
def psi1(self,Z,variational_posterior):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def psi2(self,Z,posterior_variational):
|
def psi2(self,Z,variational_posterior):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def gradients_X(self, dL_dK, X, X2):
|
def gradients_X(self, dL_dK, X, X2):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
@ -49,27 +49,31 @@ class Kern(Parameterized):
|
||||||
self._collect_gradient(target)
|
self._collect_gradient(target)
|
||||||
self._set_gradient(target)
|
self._set_gradient(target)
|
||||||
|
|
||||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational):
|
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
"""Set the gradients of all parameters when doing variational (M) inference with uncertain inputs."""
|
"""Set the gradients of all parameters when doing variational (M) inference with uncertain inputs."""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def gradients_Z_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
def gradients_Z_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
||||||
grad = self.gradients_X(dL_dKmm, Z)
|
grad = self.gradients_X(dL_dKmm, Z)
|
||||||
grad += self.gradients_X(dL_dKnm.T, Z, X)
|
grad += self.gradients_X(dL_dKnm.T, Z, X)
|
||||||
return grad
|
return grad
|
||||||
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational):
|
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational):
|
def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def plot_ARD(self, *args):
|
def plot_ARD(self, *args, **kw):
|
||||||
"""If an ARD kernel is present, plot a bar representation using matplotlib
|
if "matplotlib" in sys.modules:
|
||||||
|
from ...plotting.matplot_dep import kernel_plots
|
||||||
See GPy.plotting.matplot_dep.plot_ARD
|
self.plot_ARD.__doc__ += kernel_plots.plot_ARD.__doc__
|
||||||
"""
|
|
||||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||||
from ...plotting.matplot_dep import kernel_plots
|
from ...plotting.matplot_dep import kernel_plots
|
||||||
return kernel_plots.plot_ARD(self,*args)
|
return kernel_plots.plot_ARD(self,*args,**kw)
|
||||||
|
|
||||||
|
def input_sensitivity(self):
|
||||||
|
"""
|
||||||
|
Returns the sensitivity for each dimension of this kernel.
|
||||||
|
"""
|
||||||
|
return np.zeros(self.input_dim)
|
||||||
|
|
||||||
def __add__(self, other):
|
def __add__(self, other):
|
||||||
""" Overloading of the '+' operator. for more control, see self.add """
|
""" Overloading of the '+' operator. for more control, see self.add """
|
||||||
|
|
@ -101,7 +105,7 @@ class Kern(Parameterized):
|
||||||
""" Here we overload the '*' operator. See self.prod for more information"""
|
""" Here we overload the '*' operator. See self.prod for more information"""
|
||||||
return self.prod(other)
|
return self.prod(other)
|
||||||
|
|
||||||
def __pow__(self, other, tensor=False):
|
def __pow__(self, other):
|
||||||
"""
|
"""
|
||||||
Shortcut for tensor `prod`.
|
Shortcut for tensor `prod`.
|
||||||
"""
|
"""
|
||||||
|
|
@ -127,11 +131,12 @@ from GPy.core.model import Model
|
||||||
class Kern_check_model(Model):
|
class Kern_check_model(Model):
|
||||||
"""This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgrad() to be called independently on a kernel."""
|
"""This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgrad() to be called independently on a kernel."""
|
||||||
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||||
|
from GPy.kern import RBF
|
||||||
Model.__init__(self, 'kernel_test_model')
|
Model.__init__(self, 'kernel_test_model')
|
||||||
num_samples = 20
|
num_samples = 20
|
||||||
num_samples2 = 10
|
num_samples2 = 10
|
||||||
if kernel==None:
|
if kernel==None:
|
||||||
kernel = GPy.kern.rbf(1)
|
kernel = RBF(1)
|
||||||
if X==None:
|
if X==None:
|
||||||
X = np.random.randn(num_samples, kernel.input_dim)
|
X = np.random.randn(num_samples, kernel.input_dim)
|
||||||
if dL_dK==None:
|
if dL_dK==None:
|
||||||
|
|
|
||||||
|
|
@ -106,51 +106,52 @@ class Linear(Kern):
|
||||||
# variational #
|
# variational #
|
||||||
#---------------------------------------#
|
#---------------------------------------#
|
||||||
|
|
||||||
def psi0(self, Z, mu, S):
|
def psi0(self, Z, variational_posterior):
|
||||||
return np.sum(self.variances * self._mu2S(mu, S), 1)
|
return np.sum(self.variances * self._mu2S(variational_posterior), 1)
|
||||||
|
|
||||||
def psi1(self, Z, mu, S):
|
def psi1(self, Z, variational_posterior):
|
||||||
return self.K(mu, Z) #the variance, it does nothing
|
return self.K(variational_posterior.mean, Z) #the variance, it does nothing
|
||||||
|
|
||||||
def psi2(self, Z, mu, S):
|
def psi2(self, Z, variational_posterior):
|
||||||
ZA = Z * self.variances
|
ZA = Z * self.variances
|
||||||
ZAinner = self._ZAinner(mu, S, Z)
|
ZAinner = self._ZAinner(variational_posterior, Z)
|
||||||
return np.dot(ZAinner, ZA.T)
|
return np.dot(ZAinner, ZA.T)
|
||||||
|
|
||||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, variational_posterior, Z):
|
||||||
|
mu, S = variational_posterior.mean, variational_posterior.variance
|
||||||
# psi0:
|
# psi0:
|
||||||
tmp = dL_dpsi0[:, None] * self._mu2S(mu, S)
|
tmp = dL_dpsi0[:, None] * self._mu2S(variational_posterior)
|
||||||
if self.ARD: grad = tmp.sum(0)
|
if self.ARD: grad = tmp.sum(0)
|
||||||
else: grad = np.atleast_1d(tmp.sum())
|
else: grad = np.atleast_1d(tmp.sum())
|
||||||
#psi1
|
#psi1
|
||||||
self.update_gradients_full(dL_dpsi1, mu, Z)
|
self.update_gradients_full(dL_dpsi1, mu, Z)
|
||||||
grad += self.variances.gradient
|
grad += self.variances.gradient
|
||||||
#psi2
|
#psi2
|
||||||
tmp = dL_dpsi2[:, :, :, None] * (self._ZAinner(mu, S, Z)[:, :, None, :] * (2. * Z)[None, None, :, :])
|
tmp = dL_dpsi2[:, :, :, None] * (self._ZAinner(variational_posterior, Z)[:, :, None, :] * (2. * Z)[None, None, :, :])
|
||||||
if self.ARD: grad += tmp.sum(0).sum(0).sum(0)
|
if self.ARD: grad += tmp.sum(0).sum(0).sum(0)
|
||||||
else: grad += tmp.sum()
|
else: grad += tmp.sum()
|
||||||
#from Kmm
|
#from Kmm
|
||||||
self.update_gradients_full(dL_dKmm, Z, None)
|
self.update_gradients_full(dL_dKmm, Z, None)
|
||||||
self.variances.gradient += grad
|
self.variances.gradient += grad
|
||||||
|
|
||||||
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, variational_posterior, Z):
|
||||||
# Kmm
|
# Kmm
|
||||||
grad = self.gradients_X(dL_dKmm, Z, None)
|
grad = self.gradients_X(dL_dKmm, Z, None)
|
||||||
#psi1
|
#psi1
|
||||||
grad += self.gradients_X(dL_dpsi1.T, Z, mu)
|
grad += self.gradients_X(dL_dpsi1.T, Z, variational_posterior.mean)
|
||||||
#psi2
|
#psi2
|
||||||
self._weave_dpsi2_dZ(dL_dpsi2, Z, mu, S, grad)
|
self._weave_dpsi2_dZ(dL_dpsi2, Z, variational_posterior, grad)
|
||||||
return grad
|
return grad
|
||||||
|
|
||||||
def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, variational_posterior, Z):
|
||||||
grad_mu, grad_S = np.zeros(mu.shape), np.zeros(mu.shape)
|
grad_mu, grad_S = np.zeros(variational_posterior.mean.shape), np.zeros(variational_posterior.mean.shape)
|
||||||
# psi0
|
# psi0
|
||||||
grad_mu += dL_dpsi0[:, None] * (2.0 * mu * self.variances)
|
grad_mu += dL_dpsi0[:, None] * (2.0 * variational_posterior.mean * self.variances)
|
||||||
grad_S += dL_dpsi0[:, None] * self.variances
|
grad_S += dL_dpsi0[:, None] * self.variances
|
||||||
# psi1
|
# psi1
|
||||||
grad_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1)
|
grad_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1)
|
||||||
# psi2
|
# psi2
|
||||||
self._weave_dpsi2_dmuS(dL_dpsi2, Z, mu, S, grad_mu, grad_S)
|
self._weave_dpsi2_dmuS(dL_dpsi2, Z, variational_posterior, grad_mu, grad_S)
|
||||||
|
|
||||||
return grad_mu, grad_S
|
return grad_mu, grad_S
|
||||||
|
|
||||||
|
|
@ -159,7 +160,7 @@ class Linear(Kern):
|
||||||
#--------------------------------------------------#
|
#--------------------------------------------------#
|
||||||
|
|
||||||
|
|
||||||
def _weave_dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
|
def _weave_dpsi2_dmuS(self, dL_dpsi2, Z, pv, target_mu, target_S):
|
||||||
# Think N,num_inducing,num_inducing,input_dim
|
# Think N,num_inducing,num_inducing,input_dim
|
||||||
ZA = Z * self.variances
|
ZA = Z * self.variances
|
||||||
AZZA = ZA.T[:, None, :, None] * ZA[None, :, None, :]
|
AZZA = ZA.T[:, None, :, None] * ZA[None, :, None, :]
|
||||||
|
|
@ -203,14 +204,15 @@ class Linear(Kern):
|
||||||
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
|
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
|
||||||
'extra_link_args' : ['-lgomp']}
|
'extra_link_args' : ['-lgomp']}
|
||||||
|
|
||||||
|
mu = pv.mean
|
||||||
N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu)
|
N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu)
|
||||||
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
||||||
arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
|
arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
|
||||||
type_converters=weave.converters.blitz,**weave_options)
|
type_converters=weave.converters.blitz,**weave_options)
|
||||||
|
|
||||||
|
|
||||||
def _weave_dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
|
def _weave_dpsi2_dZ(self, dL_dpsi2, Z, pv, target):
|
||||||
AZA = self.variances*self._ZAinner(mu, S, Z)
|
AZA = self.variances*self._ZAinner(pv, Z)
|
||||||
code="""
|
code="""
|
||||||
int n,m,mm,q;
|
int n,m,mm,q;
|
||||||
#pragma omp parallel for private(n,mm,q)
|
#pragma omp parallel for private(n,mm,q)
|
||||||
|
|
@ -232,21 +234,24 @@ class Linear(Kern):
|
||||||
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
|
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
|
||||||
'extra_link_args' : ['-lgomp']}
|
'extra_link_args' : ['-lgomp']}
|
||||||
|
|
||||||
N,num_inducing,input_dim = mu.shape[0],Z.shape[0],mu.shape[1]
|
N,num_inducing,input_dim = pv.mean.shape[0],Z.shape[0],pv.mean.shape[1]
|
||||||
mu = param_to_array(mu)
|
mu = param_to_array(pv.mean)
|
||||||
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
||||||
arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'],
|
arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'],
|
||||||
type_converters=weave.converters.blitz,**weave_options)
|
type_converters=weave.converters.blitz,**weave_options)
|
||||||
|
|
||||||
|
|
||||||
def _mu2S(self, mu, S):
|
def _mu2S(self, pv):
|
||||||
return np.square(mu) + S
|
return np.square(pv.mean) + pv.variance
|
||||||
|
|
||||||
def _ZAinner(self, mu, S, Z):
|
def _ZAinner(self, pv, Z):
|
||||||
ZA = Z*self.variances
|
ZA = Z*self.variances
|
||||||
inner = (mu[:, None, :] * mu[:, :, None])
|
inner = (pv.mean[:, None, :] * pv.mean[:, :, None])
|
||||||
diag_indices = np.diag_indices(mu.shape[1], 2)
|
diag_indices = np.diag_indices(pv.mean.shape[1], 2)
|
||||||
inner[:, diag_indices[0], diag_indices[1]] += S
|
inner[:, diag_indices[0], diag_indices[1]] += pv.variance
|
||||||
|
|
||||||
return np.dot(ZA, inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x N x input_dim]!
|
return np.dot(ZA, inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x N x input_dim]!
|
||||||
|
|
||||||
|
def input_sensitivity(self):
|
||||||
|
if self.ARD: return self.variances
|
||||||
|
else: return self.variances.repeat(self.input_dim)
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,13 @@
|
||||||
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
|
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
from kernpart import Kernpart
|
from kern import Kern
|
||||||
|
from ...core.parameterization import Param
|
||||||
|
from ...core.parameterization.transformations import Logexp
|
||||||
import numpy as np
|
import numpy as np
|
||||||
four_over_tau = 2./np.pi
|
four_over_tau = 2./np.pi
|
||||||
|
|
||||||
class MLP(Kernpart):
|
class MLP(Kern):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
Multi layer perceptron kernel (also known as arc sine kernel or neural network kernel)
|
Multi layer perceptron kernel (also known as arc sine kernel or neural network kernel)
|
||||||
|
|
@ -29,85 +31,58 @@ class MLP(Kernpart):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, input_dim, variance=1., weight_variance=None, bias_variance=100., ARD=False):
|
def __init__(self, input_dim, variance=1., weight_variance=1., bias_variance=100., name='mlp'):
|
||||||
self.input_dim = input_dim
|
super(MLP, self).__init__(input_dim, name)
|
||||||
self.ARD = ARD
|
self.variance = Param('variance', variance, Logexp())
|
||||||
if not ARD:
|
self.weight_variance = Param('weight_variance', weight_variance, Logexp())
|
||||||
self.num_params=3
|
self.bias_variance = Param('bias_variance', bias_variance, Logexp())
|
||||||
if weight_variance is not None:
|
self.add_parameters(self.variance, self.weight_variance, self.bias_variance)
|
||||||
weight_variance = np.asarray(weight_variance)
|
|
||||||
assert weight_variance.size == 1, "Only one weight variance needed for non-ARD kernel"
|
|
||||||
else:
|
|
||||||
weight_variance = 100.*np.ones(1)
|
|
||||||
else:
|
|
||||||
self.num_params = self.input_dim + 2
|
|
||||||
if weight_variance is not None:
|
|
||||||
weight_variance = np.asarray(weight_variance)
|
|
||||||
assert weight_variance.size == self.input_dim, "bad number of weight variances"
|
|
||||||
else:
|
|
||||||
weight_variance = np.ones(self.input_dim)
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
self.name='mlp'
|
|
||||||
self._set_params(np.hstack((variance, weight_variance.flatten(), bias_variance)))
|
|
||||||
|
|
||||||
def _get_params(self):
|
def K(self, X, X2=None):
|
||||||
return np.hstack((self.variance, self.weight_variance.flatten(), self.bias_variance))
|
|
||||||
|
|
||||||
def _set_params(self, x):
|
|
||||||
assert x.size == (self.num_params)
|
|
||||||
self.variance = x[0]
|
|
||||||
self.weight_variance = x[1:-1]
|
|
||||||
self.weight_std = np.sqrt(self.weight_variance)
|
|
||||||
self.bias_variance = x[-1]
|
|
||||||
|
|
||||||
def _get_param_names(self):
|
|
||||||
if self.num_params == 3:
|
|
||||||
return ['variance', 'weight_variance', 'bias_variance']
|
|
||||||
else:
|
|
||||||
return ['variance'] + ['weight_variance_%i' % i for i in range(self.lengthscale.size)] + ['bias_variance']
|
|
||||||
|
|
||||||
def K(self, X, X2, target):
|
|
||||||
"""Return covariance between X and X2."""
|
|
||||||
self._K_computations(X, X2)
|
self._K_computations(X, X2)
|
||||||
target += self.variance*self._K_dvar
|
return self.variance*self._K_dvar
|
||||||
|
|
||||||
def Kdiag(self, X, target):
|
def Kdiag(self, X):
|
||||||
"""Compute the diagonal of the covariance matrix for X."""
|
"""Compute the diagonal of the covariance matrix for X."""
|
||||||
self._K_diag_computations(X)
|
self._K_diag_computations(X)
|
||||||
target+= self.variance*self._K_diag_dvar
|
return self.variance*self._K_diag_dvar
|
||||||
|
|
||||||
def _param_grad_helper(self, dL_dK, X, X2, target):
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
"""Derivative of the covariance with respect to the parameters."""
|
"""Derivative of the covariance with respect to the parameters."""
|
||||||
self._K_computations(X, X2)
|
self._K_computations(X, X2)
|
||||||
denom3 = self._K_denom*self._K_denom*self._K_denom
|
self.variance.gradient = np.sum(self._K_dvar*dL_dK)
|
||||||
|
|
||||||
|
denom3 = self._K_denom**3
|
||||||
base = four_over_tau*self.variance/np.sqrt(1-self._K_asin_arg*self._K_asin_arg)
|
base = four_over_tau*self.variance/np.sqrt(1-self._K_asin_arg*self._K_asin_arg)
|
||||||
base_cov_grad = base*dL_dK
|
base_cov_grad = base*dL_dK
|
||||||
|
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
vec = np.diag(self._K_inner_prod)
|
vec = np.diag(self._K_inner_prod)
|
||||||
target[1] += ((self._K_inner_prod/self._K_denom
|
self.weight_variance.gradient = ((self._K_inner_prod/self._K_denom
|
||||||
-.5*self._K_numer/denom3
|
-.5*self._K_numer/denom3
|
||||||
*(np.outer((self.weight_variance*vec+self.bias_variance+1.), vec)
|
*(np.outer((self.weight_variance*vec+self.bias_variance+1.), vec)
|
||||||
+np.outer(vec,(self.weight_variance*vec+self.bias_variance+1.))))*base_cov_grad).sum()
|
+np.outer(vec,(self.weight_variance*vec+self.bias_variance+1.))))*base_cov_grad).sum()
|
||||||
target[2] += ((1./self._K_denom
|
self.bias_variance.gradient = ((1./self._K_denom
|
||||||
-.5*self._K_numer/denom3
|
-.5*self._K_numer/denom3
|
||||||
*((vec[None, :]+vec[:, None])*self.weight_variance
|
*((vec[None, :]+vec[:, None])*self.weight_variance
|
||||||
+2.*self.bias_variance + 2.))*base_cov_grad).sum()
|
+2.*self.bias_variance + 2.))*base_cov_grad).sum()
|
||||||
else:
|
else:
|
||||||
vec1 = (X*X).sum(1)
|
vec1 = (X*X).sum(1)
|
||||||
vec2 = (X2*X2).sum(1)
|
vec2 = (X2*X2).sum(1)
|
||||||
target[1] += ((self._K_inner_prod/self._K_denom
|
self.weight_variance.gradient = ((self._K_inner_prod/self._K_denom
|
||||||
-.5*self._K_numer/denom3
|
-.5*self._K_numer/denom3
|
||||||
*(np.outer((self.weight_variance*vec1+self.bias_variance+1.), vec2) + np.outer(vec1, self.weight_variance*vec2 + self.bias_variance+1.)))*base_cov_grad).sum()
|
*(np.outer((self.weight_variance*vec1+self.bias_variance+1.), vec2) + np.outer(vec1, self.weight_variance*vec2 + self.bias_variance+1.)))*base_cov_grad).sum()
|
||||||
target[2] += ((1./self._K_denom
|
self.bias_variance.gradient = ((1./self._K_denom
|
||||||
-.5*self._K_numer/denom3
|
-.5*self._K_numer/denom3
|
||||||
*((vec1[:, None]+vec2[None, :])*self.weight_variance
|
*((vec1[:, None]+vec2[None, :])*self.weight_variance
|
||||||
+ 2*self.bias_variance + 2.))*base_cov_grad).sum()
|
+ 2*self.bias_variance + 2.))*base_cov_grad).sum()
|
||||||
|
|
||||||
target[0] += np.sum(self._K_dvar*dL_dK)
|
def update_gradients_diag(self, X):
|
||||||
|
raise NotImplementedError, "TODO"
|
||||||
|
|
||||||
def gradients_X(self, dL_dK, X, X2, target):
|
|
||||||
|
def gradients_X(self, dL_dK, X, X2):
|
||||||
"""Derivative of the covariance matrix with respect to X"""
|
"""Derivative of the covariance matrix with respect to X"""
|
||||||
self._K_computations(X, X2)
|
self._K_computations(X, X2)
|
||||||
arg = self._K_asin_arg
|
arg = self._K_asin_arg
|
||||||
|
|
@ -116,10 +91,10 @@ class MLP(Kernpart):
|
||||||
denom3 = denom*denom*denom
|
denom3 = denom*denom*denom
|
||||||
if X2 is not None:
|
if X2 is not None:
|
||||||
vec2 = (X2*X2).sum(1)*self.weight_variance+self.bias_variance + 1.
|
vec2 = (X2*X2).sum(1)*self.weight_variance+self.bias_variance + 1.
|
||||||
target += four_over_tau*self.weight_variance*self.variance*((X2[None, :, :]/denom[:, :, None] - vec2[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
|
return four_over_tau*self.weight_variance*self.variance*((X2[None, :, :]/denom[:, :, None] - vec2[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
|
||||||
else:
|
else:
|
||||||
vec = (X*X).sum(1)*self.weight_variance+self.bias_variance + 1.
|
vec = (X*X).sum(1)*self.weight_variance+self.bias_variance + 1.
|
||||||
target += 2*four_over_tau*self.weight_variance*self.variance*((X[None, :, :]/denom[:, :, None] - vec[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
|
return 2*four_over_tau*self.weight_variance*self.variance*((X[None, :, :]/denom[:, :, None] - vec[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
|
||||||
|
|
||||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||||
"""Gradient of diagonal of covariance with respect to X"""
|
"""Gradient of diagonal of covariance with respect to X"""
|
||||||
|
|
@ -127,36 +102,28 @@ class MLP(Kernpart):
|
||||||
arg = self._K_diag_asin_arg
|
arg = self._K_diag_asin_arg
|
||||||
denom = self._K_diag_denom
|
denom = self._K_diag_denom
|
||||||
numer = self._K_diag_numer
|
numer = self._K_diag_numer
|
||||||
target += four_over_tau*2.*self.weight_variance*self.variance*X*(1/denom*(1 - arg)*dL_dKdiag/(np.sqrt(1-arg*arg)))[:, None]
|
return four_over_tau*2.*self.weight_variance*self.variance*X*(1./denom*(1. - arg)*dL_dKdiag/(np.sqrt(1-arg*arg)))[:, None]
|
||||||
|
|
||||||
|
|
||||||
def _K_computations(self, X, X2):
|
def _K_computations(self, X, X2):
|
||||||
"""Pre-computations for the covariance matrix (used for computing the covariance and its gradients."""
|
"""Pre-computations for the covariance matrix (used for computing the covariance and its gradients."""
|
||||||
if self.ARD:
|
if X2 is None:
|
||||||
pass
|
self._K_inner_prod = np.dot(X,X.T)
|
||||||
|
self._K_numer = self._K_inner_prod*self.weight_variance + self.bias_variance
|
||||||
|
vec = np.diag(self._K_numer) + 1.
|
||||||
|
self._K_denom = np.sqrt(np.outer(vec,vec))
|
||||||
else:
|
else:
|
||||||
if X2 is None:
|
self._K_inner_prod = np.dot(X,X2.T)
|
||||||
self._K_inner_prod = np.dot(X,X.T)
|
self._K_numer = self._K_inner_prod*self.weight_variance + self.bias_variance
|
||||||
self._K_numer = self._K_inner_prod*self.weight_variance+self.bias_variance
|
vec1 = (X*X).sum(1)*self.weight_variance + self.bias_variance + 1.
|
||||||
vec = np.diag(self._K_numer) + 1.
|
vec2 = (X2*X2).sum(1)*self.weight_variance + self.bias_variance + 1.
|
||||||
self._K_denom = np.sqrt(np.outer(vec,vec))
|
self._K_denom = np.sqrt(np.outer(vec1,vec2))
|
||||||
self._K_asin_arg = self._K_numer/self._K_denom
|
self._K_asin_arg = self._K_numer/self._K_denom
|
||||||
self._K_dvar = four_over_tau*np.arcsin(self._K_asin_arg)
|
self._K_dvar = four_over_tau*np.arcsin(self._K_asin_arg)
|
||||||
else:
|
|
||||||
self._K_inner_prod = np.dot(X,X2.T)
|
|
||||||
self._K_numer = self._K_inner_prod*self.weight_variance + self.bias_variance
|
|
||||||
vec1 = (X*X).sum(1)*self.weight_variance + self.bias_variance + 1.
|
|
||||||
vec2 = (X2*X2).sum(1)*self.weight_variance + self.bias_variance + 1.
|
|
||||||
self._K_denom = np.sqrt(np.outer(vec1,vec2))
|
|
||||||
self._K_asin_arg = self._K_numer/self._K_denom
|
|
||||||
self._K_dvar = four_over_tau*np.arcsin(self._K_asin_arg)
|
|
||||||
|
|
||||||
def _K_diag_computations(self, X):
|
def _K_diag_computations(self, X):
|
||||||
"""Pre-computations concerning the diagonal terms (used for computation of diagonal and its gradients)."""
|
"""Pre-computations concerning the diagonal terms (used for computation of diagonal and its gradients)."""
|
||||||
if self.ARD:
|
self._K_diag_numer = (X*X).sum(1)*self.weight_variance + self.bias_variance
|
||||||
pass
|
self._K_diag_denom = self._K_diag_numer+1.
|
||||||
else:
|
self._K_diag_asin_arg = self._K_diag_numer/self._K_diag_denom
|
||||||
self._K_diag_numer = (X*X).sum(1)*self.weight_variance + self.bias_variance
|
self._K_diag_dvar = four_over_tau*np.arcsin(self._K_diag_asin_arg)
|
||||||
self._K_diag_denom = self._K_diag_numer+1.
|
|
||||||
self._K_diag_asin_arg = self._K_diag_numer/self._K_diag_denom
|
|
||||||
self._K_diag_dvar = four_over_tau*np.arcsin(self._K_diag_asin_arg)
|
|
||||||
|
|
|
||||||
|
|
@ -2,12 +2,287 @@
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
from kernpart import Kernpart
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from GPy.util.linalg import mdot
|
from kern import Kern
|
||||||
from GPy.util.decorators import silence_errors
|
from ...util.linalg import mdot
|
||||||
|
from ...util.decorators import silence_errors
|
||||||
|
from ...core.parameterization.param import Param
|
||||||
|
from ...core.parameterization.transformations import Logexp
|
||||||
|
|
||||||
class PeriodicMatern52(Kernpart):
|
class Periodic(Kern):
|
||||||
|
def __init__(self, input_dim, variance, lengthscale, period, n_freq, lower, upper, name):
|
||||||
|
"""
|
||||||
|
:type input_dim: int
|
||||||
|
:param variance: the variance of the Matern kernel
|
||||||
|
:type variance: float
|
||||||
|
:param lengthscale: the lengthscale of the Matern kernel
|
||||||
|
:type lengthscale: np.ndarray of size (input_dim,)
|
||||||
|
:param period: the period
|
||||||
|
:type period: float
|
||||||
|
:param n_freq: the number of frequencies considered for the periodic subspace
|
||||||
|
:type n_freq: int
|
||||||
|
:rtype: kernel object
|
||||||
|
"""
|
||||||
|
|
||||||
|
assert input_dim==1, "Periodic kernels are only defined for input_dim=1"
|
||||||
|
super(Periodic, self).__init__(input_dim, name)
|
||||||
|
self.input_dim = input_dim
|
||||||
|
self.lower,self.upper = lower, upper
|
||||||
|
self.n_freq = n_freq
|
||||||
|
self.n_basis = 2*n_freq
|
||||||
|
self.variance = Param('variance', np.float64(variance), Logexp())
|
||||||
|
self.lengthscale = Param('lengthscale', np.float64(lengthscale), Logexp())
|
||||||
|
self.period = Param('period', np.float64(period), Logexp())
|
||||||
|
self.add_parameters(self.variance, self.lengthscale, self.period)
|
||||||
|
self.parameters_changed()
|
||||||
|
|
||||||
|
def _cos(self, alpha, omega, phase):
|
||||||
|
def f(x):
|
||||||
|
return alpha*np.cos(omega*x + phase)
|
||||||
|
return f
|
||||||
|
|
||||||
|
@silence_errors
|
||||||
|
def _cos_factorization(self, alpha, omega, phase):
|
||||||
|
r1 = np.sum(alpha*np.cos(phase),axis=1)[:,None]
|
||||||
|
r2 = np.sum(alpha*np.sin(phase),axis=1)[:,None]
|
||||||
|
r = np.sqrt(r1**2 + r2**2)
|
||||||
|
psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2))
|
||||||
|
return r,omega[:,0:1], psi
|
||||||
|
|
||||||
|
@silence_errors
|
||||||
|
def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2):
|
||||||
|
Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )
|
||||||
|
Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + np.cos(phi1-phi2.T)*(self.upper-self.lower)
|
||||||
|
Gint = np.dot(r1,r2.T)/2 * np.where(np.isnan(Gint1),Gint2,Gint1)
|
||||||
|
return Gint
|
||||||
|
|
||||||
|
def K(self, X, X2=None):
|
||||||
|
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
||||||
|
if X2 is None:
|
||||||
|
FX2 = FX
|
||||||
|
else:
|
||||||
|
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
|
||||||
|
return mdot(FX,self.Gi,FX2.T)
|
||||||
|
|
||||||
|
def Kdiag(self,X):
|
||||||
|
return np.diag(self.K(X))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class PeriodicExponential(Periodic):
|
||||||
|
"""
|
||||||
|
Kernel of the periodic subspace (up to a given frequency) of a exponential
|
||||||
|
(Matern 1/2) RKHS.
|
||||||
|
|
||||||
|
Only defined for input_dim=1.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, input_dim=1, variance=1., lengthscale=1., period=2.*np.pi, n_freq=10, lower=0., upper=4*np.pi, name='periodic_exponential'):
|
||||||
|
super(PeriodicExponential, self).__init__(input_dim, variance, lengthscale, period, n_freq, lower, upper, name)
|
||||||
|
|
||||||
|
def parameters_changed(self):
|
||||||
|
self.a = [1./self.lengthscale, 1.]
|
||||||
|
self.b = [1]
|
||||||
|
|
||||||
|
self.basis_alpha = np.ones((self.n_basis,))
|
||||||
|
self.basis_omega = np.array(sum([[i*2*np.pi/self.period]*2 for i in range(1,self.n_freq+1)],[]))[:,0]
|
||||||
|
self.basis_phi = np.array(sum([[-np.pi/2, 0.] for i in range(1,self.n_freq+1)],[]))
|
||||||
|
|
||||||
|
self.G = self.Gram_matrix()
|
||||||
|
self.Gi = np.linalg.inv(self.G)
|
||||||
|
|
||||||
|
def Gram_matrix(self):
|
||||||
|
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega))
|
||||||
|
Lo = np.column_stack((self.basis_omega,self.basis_omega))
|
||||||
|
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2))
|
||||||
|
r,omega,phi = self._cos_factorization(La,Lo,Lp)
|
||||||
|
Gint = self._int_computation( r,omega,phi, r,omega,phi)
|
||||||
|
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
|
||||||
|
return(self.lengthscale/(2*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T))
|
||||||
|
|
||||||
|
#@silence_errors
|
||||||
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
|
"""derivative of the covariance matrix with respect to the parameters (shape is N x num_inducing x num_params)"""
|
||||||
|
if X2 is None: X2 = X
|
||||||
|
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
||||||
|
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
|
||||||
|
|
||||||
|
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega))
|
||||||
|
Lo = np.column_stack((self.basis_omega,self.basis_omega))
|
||||||
|
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2))
|
||||||
|
r,omega,phi = self._cos_factorization(La,Lo,Lp)
|
||||||
|
Gint = self._int_computation( r,omega,phi, r,omega,phi)
|
||||||
|
|
||||||
|
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
|
||||||
|
|
||||||
|
#dK_dvar
|
||||||
|
dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX2.T)
|
||||||
|
|
||||||
|
#dK_dlen
|
||||||
|
da_dlen = [-1./self.lengthscale**2,0.]
|
||||||
|
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)),da_dlen[1]*self.basis_omega))
|
||||||
|
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
|
||||||
|
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
|
||||||
|
dGint_dlen = dGint_dlen + dGint_dlen.T
|
||||||
|
dG_dlen = 1./2*Gint + self.lengthscale/2*dGint_dlen
|
||||||
|
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX2.T)
|
||||||
|
|
||||||
|
#dK_dper
|
||||||
|
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
|
||||||
|
dFX2_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X2,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X2)
|
||||||
|
|
||||||
|
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period))
|
||||||
|
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi))
|
||||||
|
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
|
||||||
|
|
||||||
|
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
|
||||||
|
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
|
||||||
|
# SIMPLIFY!!! IPPprim1 = (self.upper - self.lower)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
|
||||||
|
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
|
||||||
|
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
|
||||||
|
IPPprim = np.where(np.logical_or(np.isnan(IPPprim1), np.isinf(IPPprim1)), IPPprim2, IPPprim1)
|
||||||
|
|
||||||
|
|
||||||
|
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
|
||||||
|
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
|
||||||
|
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T)
|
||||||
|
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T)
|
||||||
|
#IPPint2[0,0] = (self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
|
||||||
|
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
|
||||||
|
|
||||||
|
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period))
|
||||||
|
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2))
|
||||||
|
r2,omega2,phi2 = dLa_dper2.T,Lo[:,0:1],dLp_dper2.T
|
||||||
|
|
||||||
|
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
|
||||||
|
dGint_dper = dGint_dper + dGint_dper.T
|
||||||
|
|
||||||
|
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
||||||
|
|
||||||
|
dG_dper = 1./self.variance*(self.lengthscale/2*dGint_dper + self.b[0]*(np.dot(dFlower_dper,Flower.T)+np.dot(Flower,dFlower_dper.T)))
|
||||||
|
|
||||||
|
dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
|
||||||
|
|
||||||
|
self.variance.gradient = np.sum(dK_dvar*dL_dK)
|
||||||
|
self.lengthscale.gradient = np.sum(dK_dlen*dL_dK)
|
||||||
|
self.period.gradient = np.sum(dK_dper*dL_dK)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class PeriodicMatern32(Periodic):
|
||||||
|
"""
|
||||||
|
Kernel of the periodic subspace (up to a given frequency) of a Matern 3/2 RKHS. Only defined for input_dim=1.
|
||||||
|
|
||||||
|
:param input_dim: the number of input dimensions
|
||||||
|
:type input_dim: int
|
||||||
|
:param variance: the variance of the Matern kernel
|
||||||
|
:type variance: float
|
||||||
|
:param lengthscale: the lengthscale of the Matern kernel
|
||||||
|
:type lengthscale: np.ndarray of size (input_dim,)
|
||||||
|
:param period: the period
|
||||||
|
:type period: float
|
||||||
|
:param n_freq: the number of frequencies considered for the periodic subspace
|
||||||
|
:type n_freq: int
|
||||||
|
:rtype: kernel object
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, input_dim=1, variance=1., lengthscale=1., period=2.*np.pi, n_freq=10, lower=0., upper=4*np.pi, name='periodic_Matern32'):
|
||||||
|
super(PeriodicMatern32, self).__init__(input_dim, variance, lengthscale, period, n_freq, lower, upper, name)
|
||||||
|
def parameters_changed(self):
|
||||||
|
self.a = [3./self.lengthscale**2, 2*np.sqrt(3)/self.lengthscale, 1.]
|
||||||
|
self.b = [1,self.lengthscale**2/3]
|
||||||
|
|
||||||
|
self.basis_alpha = np.ones((self.n_basis,))
|
||||||
|
self.basis_omega = np.array(sum([[i*2*np.pi/self.period]*2 for i in range(1,self.n_freq+1)],[]))
|
||||||
|
self.basis_phi = np.array(sum([[-np.pi/2, 0.] for i in range(1,self.n_freq+1)],[]))
|
||||||
|
|
||||||
|
self.G = self.Gram_matrix()
|
||||||
|
self.Gi = np.linalg.inv(self.G)
|
||||||
|
|
||||||
|
def Gram_matrix(self):
|
||||||
|
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega,self.a[2]*self.basis_omega**2))
|
||||||
|
Lo = np.column_stack((self.basis_omega,self.basis_omega,self.basis_omega))
|
||||||
|
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2,self.basis_phi+np.pi))
|
||||||
|
r,omega,phi = self._cos_factorization(La,Lo,Lp)
|
||||||
|
Gint = self._int_computation( r,omega,phi, r,omega,phi)
|
||||||
|
|
||||||
|
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
|
||||||
|
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
||||||
|
return(self.lengthscale**3/(12*np.sqrt(3)*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscale**2/(3.*self.variance)*np.dot(F1lower,F1lower.T))
|
||||||
|
|
||||||
|
|
||||||
|
@silence_errors
|
||||||
|
def update_gradients_full(self,dL_dK,X,X2,target):
|
||||||
|
"""derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)"""
|
||||||
|
if X2 is None: X2 = X
|
||||||
|
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
||||||
|
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
|
||||||
|
|
||||||
|
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega,self.a[2]*self.basis_omega**2))
|
||||||
|
Lo = np.column_stack((self.basis_omega,self.basis_omega,self.basis_omega))
|
||||||
|
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2,self.basis_phi+np.pi))
|
||||||
|
r,omega,phi = self._cos_factorization(La,Lo,Lp)
|
||||||
|
Gint = self._int_computation( r,omega,phi, r,omega,phi)
|
||||||
|
|
||||||
|
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
|
||||||
|
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
||||||
|
|
||||||
|
#dK_dvar
|
||||||
|
dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX2.T)
|
||||||
|
|
||||||
|
#dK_dlen
|
||||||
|
da_dlen = [-6/self.lengthscale**3,-2*np.sqrt(3)/self.lengthscale**2,0.]
|
||||||
|
db_dlen = [0.,2*self.lengthscale/3.]
|
||||||
|
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)),da_dlen[1]*self.basis_omega,da_dlen[2]*self.basis_omega**2))
|
||||||
|
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
|
||||||
|
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
|
||||||
|
dGint_dlen = dGint_dlen + dGint_dlen.T
|
||||||
|
dG_dlen = self.lengthscale**2/(4*np.sqrt(3))*Gint + self.lengthscale**3/(12*np.sqrt(3))*dGint_dlen + db_dlen[0]*np.dot(Flower,Flower.T) + db_dlen[1]*np.dot(F1lower,F1lower.T)
|
||||||
|
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX2.T)
|
||||||
|
|
||||||
|
#dK_dper
|
||||||
|
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
|
||||||
|
dFX2_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X2,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X2)
|
||||||
|
|
||||||
|
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period, -self.a[2]*self.basis_omega**3/self.period))
|
||||||
|
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi,self.basis_phi+np.pi*3/2))
|
||||||
|
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
|
||||||
|
|
||||||
|
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
|
||||||
|
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
|
||||||
|
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
|
||||||
|
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
|
||||||
|
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
|
||||||
|
|
||||||
|
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
|
||||||
|
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
|
||||||
|
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T)
|
||||||
|
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T)
|
||||||
|
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
|
||||||
|
|
||||||
|
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period))
|
||||||
|
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi))
|
||||||
|
r2,omega2,phi2 = self._cos_factorization(dLa_dper2,Lo[:,0:2],dLp_dper2)
|
||||||
|
|
||||||
|
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
|
||||||
|
dGint_dper = dGint_dper + dGint_dper.T
|
||||||
|
|
||||||
|
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
||||||
|
dF1lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower)+self._cos(-self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
||||||
|
|
||||||
|
dG_dper = 1./self.variance*(self.lengthscale**3/(12*np.sqrt(3))*dGint_dper + self.b[0]*(np.dot(dFlower_dper,Flower.T)+np.dot(Flower,dFlower_dper.T)) + self.b[1]*(np.dot(dF1lower_dper,F1lower.T)+np.dot(F1lower,dF1lower_dper.T)))
|
||||||
|
|
||||||
|
dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
|
||||||
|
|
||||||
|
self.variance.gradient = np.sum(dK_dvar*dL_dK)
|
||||||
|
self.lengthscale.gradient = np.sum(dK_dlen*dL_dK)
|
||||||
|
self.period.gradient = np.sum(dK_dper*dL_dK)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class PeriodicMatern52(Periodic):
|
||||||
"""
|
"""
|
||||||
Kernel of the periodic subspace (up to a given frequency) of a Matern 5/2 RKHS. Only defined for input_dim=1.
|
Kernel of the periodic subspace (up to a given frequency) of a Matern 5/2 RKHS. Only defined for input_dim=1.
|
||||||
|
|
||||||
|
|
@ -25,53 +300,10 @@ class PeriodicMatern52(Kernpart):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,input_dim=1,variance=1.,lengthscale=None,period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi):
|
def __init__(self, input_dim=1, variance=1., lengthscale=1., period=2.*np.pi, n_freq=10, lower=0., upper=4*np.pi, name='periodic_Matern52'):
|
||||||
assert input_dim==1, "Periodic kernels are only defined for input_dim=1"
|
super(PeriodicMatern52, self).__init__(input_dim, variance, lengthscale, period, n_freq, lower, upper, name)
|
||||||
self.name = 'periodic_Mat52'
|
|
||||||
self.input_dim = input_dim
|
|
||||||
if lengthscale is not None:
|
|
||||||
lengthscale = np.asarray(lengthscale)
|
|
||||||
assert lengthscale.size == 1, "Wrong size: only one lengthscale needed"
|
|
||||||
else:
|
|
||||||
lengthscale = np.ones(1)
|
|
||||||
self.lower,self.upper = lower, upper
|
|
||||||
self.num_params = 3
|
|
||||||
self.n_freq = n_freq
|
|
||||||
self.n_basis = 2*n_freq
|
|
||||||
self._set_params(np.hstack((variance,lengthscale,period)))
|
|
||||||
|
|
||||||
def _cos(self,alpha,omega,phase):
|
|
||||||
def f(x):
|
|
||||||
return alpha*np.cos(omega*x+phase)
|
|
||||||
return f
|
|
||||||
|
|
||||||
@silence_errors
|
|
||||||
def _cos_factorization(self,alpha,omega,phase):
|
|
||||||
r1 = np.sum(alpha*np.cos(phase),axis=1)[:,None]
|
|
||||||
r2 = np.sum(alpha*np.sin(phase),axis=1)[:,None]
|
|
||||||
r = np.sqrt(r1**2 + r2**2)
|
|
||||||
psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2))
|
|
||||||
return r,omega[:,0:1], psi
|
|
||||||
|
|
||||||
@silence_errors
|
|
||||||
def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2):
|
|
||||||
Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )
|
|
||||||
Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + np.cos(phi1-phi2.T)*(self.upper-self.lower)
|
|
||||||
#Gint2[0,0] = 2.*(self.upper-self.lower)*np.cos(phi1[0,0])*np.cos(phi2[0,0])
|
|
||||||
Gint = np.dot(r1,r2.T)/2 * np.where(np.isnan(Gint1),Gint2,Gint1)
|
|
||||||
return Gint
|
|
||||||
|
|
||||||
def _get_params(self):
|
|
||||||
"""return the value of the parameters."""
|
|
||||||
return np.hstack((self.variance,self.lengthscale,self.period))
|
|
||||||
|
|
||||||
def _set_params(self,x):
|
|
||||||
"""set the value of the parameters."""
|
|
||||||
assert x.size==3
|
|
||||||
self.variance = x[0]
|
|
||||||
self.lengthscale = x[1]
|
|
||||||
self.period = x[2]
|
|
||||||
|
|
||||||
|
def parameters_changed(self):
|
||||||
self.a = [5*np.sqrt(5)/self.lengthscale**3, 15./self.lengthscale**2,3*np.sqrt(5)/self.lengthscale, 1.]
|
self.a = [5*np.sqrt(5)/self.lengthscale**3, 15./self.lengthscale**2,3*np.sqrt(5)/self.lengthscale, 1.]
|
||||||
self.b = [9./8, 9*self.lengthscale**4/200., 3*self.lengthscale**2/5., 3*self.lengthscale**2/(5*8.), 3*self.lengthscale**2/(5*8.)]
|
self.b = [9./8, 9*self.lengthscale**4/200., 3*self.lengthscale**2/5., 3*self.lengthscale**2/(5*8.), 3*self.lengthscale**2/(5*8.)]
|
||||||
|
|
||||||
|
|
@ -82,10 +314,6 @@ class PeriodicMatern52(Kernpart):
|
||||||
self.G = self.Gram_matrix()
|
self.G = self.Gram_matrix()
|
||||||
self.Gi = np.linalg.inv(self.G)
|
self.Gi = np.linalg.inv(self.G)
|
||||||
|
|
||||||
def _get_param_names(self):
|
|
||||||
"""return parameter names."""
|
|
||||||
return ['variance','lengthscale','period']
|
|
||||||
|
|
||||||
def Gram_matrix(self):
|
def Gram_matrix(self):
|
||||||
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)), self.a[1]*self.basis_omega, self.a[2]*self.basis_omega**2, self.a[3]*self.basis_omega**3))
|
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)), self.a[1]*self.basis_omega, self.a[2]*self.basis_omega**2, self.a[3]*self.basis_omega**3))
|
||||||
Lo = np.column_stack((self.basis_omega, self.basis_omega, self.basis_omega, self.basis_omega))
|
Lo = np.column_stack((self.basis_omega, self.basis_omega, self.basis_omega, self.basis_omega))
|
||||||
|
|
@ -99,23 +327,8 @@ class PeriodicMatern52(Kernpart):
|
||||||
lower_terms = self.b[0]*np.dot(Flower,Flower.T) + self.b[1]*np.dot(F2lower,F2lower.T) + self.b[2]*np.dot(F1lower,F1lower.T) + self.b[3]*np.dot(F2lower,Flower.T) + self.b[4]*np.dot(Flower,F2lower.T)
|
lower_terms = self.b[0]*np.dot(Flower,Flower.T) + self.b[1]*np.dot(F2lower,F2lower.T) + self.b[2]*np.dot(F1lower,F1lower.T) + self.b[3]*np.dot(F2lower,Flower.T) + self.b[4]*np.dot(Flower,F2lower.T)
|
||||||
return(3*self.lengthscale**5/(400*np.sqrt(5)*self.variance) * Gint + 1./self.variance*lower_terms)
|
return(3*self.lengthscale**5/(400*np.sqrt(5)*self.variance) * Gint + 1./self.variance*lower_terms)
|
||||||
|
|
||||||
def K(self,X,X2,target):
|
|
||||||
"""Compute the covariance matrix between X and X2."""
|
|
||||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
|
||||||
if X2 is None:
|
|
||||||
FX2 = FX
|
|
||||||
else:
|
|
||||||
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
|
|
||||||
np.add(mdot(FX,self.Gi,FX2.T), target,target)
|
|
||||||
|
|
||||||
def Kdiag(self,X,target):
|
|
||||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
|
||||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
|
||||||
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
|
|
||||||
|
|
||||||
@silence_errors
|
@silence_errors
|
||||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
"""derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)"""
|
|
||||||
if X2 is None: X2 = X
|
if X2 is None: X2 = X
|
||||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
||||||
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
|
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
|
||||||
|
|
@ -156,14 +369,12 @@ class PeriodicMatern52(Kernpart):
|
||||||
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
|
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
|
||||||
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
|
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
|
||||||
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
|
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
|
||||||
#IPPprim2[0,0] = 2*(self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
|
|
||||||
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
|
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
|
||||||
|
|
||||||
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
|
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
|
||||||
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
|
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
|
||||||
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T)
|
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T)
|
||||||
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T)
|
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T)
|
||||||
#IPPint2[0,0] = (self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
|
|
||||||
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
|
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
|
||||||
|
|
||||||
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period, -3*self.a[3]*self.basis_omega**3/self.period))
|
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period, -3*self.a[3]*self.basis_omega**3/self.period))
|
||||||
|
|
@ -186,81 +397,7 @@ class PeriodicMatern52(Kernpart):
|
||||||
dG_dper = 1./self.variance*(3*self.lengthscale**5/(400*np.sqrt(5))*dGint_dper + 0.5*dlower_terms_dper)
|
dG_dper = 1./self.variance*(3*self.lengthscale**5/(400*np.sqrt(5))*dGint_dper + 0.5*dlower_terms_dper)
|
||||||
dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
|
dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
|
||||||
|
|
||||||
# np.add(target[:,:,0],dK_dvar, target[:,:,0])
|
self.variance.gradient = np.sum(dK_dvar*dL_dK)
|
||||||
target[0] += np.sum(dK_dvar*dL_dK)
|
self.lengthscale.gradient = np.sum(dK_dlen*dL_dK)
|
||||||
#np.add(target[:,:,1],dK_dlen, target[:,:,1])
|
self.period.gradient = np.sum(dK_dper*dL_dK)
|
||||||
target[1] += np.sum(dK_dlen*dL_dK)
|
|
||||||
#np.add(target[:,:,2],dK_dper, target[:,:,2])
|
|
||||||
target[2] += np.sum(dK_dper*dL_dK)
|
|
||||||
|
|
||||||
@silence_errors
|
|
||||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
|
||||||
"""derivative of the diagonal of the covariance matrix with respect to the parameters"""
|
|
||||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
|
||||||
|
|
||||||
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)), self.a[1]*self.basis_omega, self.a[2]*self.basis_omega**2, self.a[3]*self.basis_omega**3))
|
|
||||||
Lo = np.column_stack((self.basis_omega, self.basis_omega, self.basis_omega, self.basis_omega))
|
|
||||||
Lp = np.column_stack((self.basis_phi, self.basis_phi+np.pi/2, self.basis_phi+np.pi, self.basis_phi+np.pi*3/2))
|
|
||||||
r,omega,phi = self._cos_factorization(La,Lo,Lp)
|
|
||||||
Gint = self._int_computation( r,omega,phi, r,omega,phi)
|
|
||||||
|
|
||||||
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
|
|
||||||
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
|
||||||
F2lower = np.array(self._cos(self.basis_alpha*self.basis_omega**2,self.basis_omega,self.basis_phi+np.pi)(self.lower))[:,None]
|
|
||||||
|
|
||||||
#dK_dvar
|
|
||||||
dK_dvar = 1. / self.variance * mdot(FX, self.Gi, FX.T)
|
|
||||||
|
|
||||||
#dK_dlen
|
|
||||||
da_dlen = [-3*self.a[0]/self.lengthscale, -2*self.a[1]/self.lengthscale, -self.a[2]/self.lengthscale, 0.]
|
|
||||||
db_dlen = [0., 4*self.b[1]/self.lengthscale, 2*self.b[2]/self.lengthscale, 2*self.b[3]/self.lengthscale, 2*self.b[4]/self.lengthscale]
|
|
||||||
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)), da_dlen[1]*self.basis_omega, da_dlen[2]*self.basis_omega**2, da_dlen[3]*self.basis_omega**3))
|
|
||||||
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
|
|
||||||
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
|
|
||||||
dGint_dlen = dGint_dlen + dGint_dlen.T
|
|
||||||
dlower_terms_dlen = db_dlen[0]*np.dot(Flower,Flower.T) + db_dlen[1]*np.dot(F2lower,F2lower.T) + db_dlen[2]*np.dot(F1lower,F1lower.T) + db_dlen[3]*np.dot(F2lower,Flower.T) + db_dlen[4]*np.dot(Flower,F2lower.T)
|
|
||||||
dG_dlen = 15*self.lengthscale**4/(400*np.sqrt(5))*Gint + 3*self.lengthscale**5/(400*np.sqrt(5))*dGint_dlen + dlower_terms_dlen
|
|
||||||
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX.T)
|
|
||||||
|
|
||||||
#dK_dper
|
|
||||||
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
|
|
||||||
|
|
||||||
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period, -self.a[2]*self.basis_omega**3/self.period, -self.a[3]*self.basis_omega**4/self.period))
|
|
||||||
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi,self.basis_phi+np.pi*3/2,self.basis_phi))
|
|
||||||
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
|
|
||||||
|
|
||||||
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
|
|
||||||
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
|
|
||||||
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
|
|
||||||
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
|
|
||||||
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
|
|
||||||
|
|
||||||
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
|
|
||||||
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
|
|
||||||
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + .5*self.upper**2*np.cos(phi-phi1.T)
|
|
||||||
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + .5*self.lower**2*np.cos(phi-phi1.T)
|
|
||||||
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
|
|
||||||
|
|
||||||
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period, -3*self.a[3]*self.basis_omega**3/self.period))
|
|
||||||
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2, self.basis_phi+np.pi, self.basis_phi+np.pi*3/2))
|
|
||||||
r2,omega2,phi2 = self._cos_factorization(dLa_dper2,Lo[:,0:2],dLp_dper2)
|
|
||||||
|
|
||||||
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
|
|
||||||
dGint_dper = dGint_dper + dGint_dper.T
|
|
||||||
|
|
||||||
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
|
||||||
dF1lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower)+self._cos(-self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
|
||||||
dF2lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**3/self.period,self.basis_omega,self.basis_phi+np.pi*3/2)(self.lower) + self._cos(-2*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower))[:,None]
|
|
||||||
|
|
||||||
dlower_terms_dper = self.b[0] * (np.dot(dFlower_dper,Flower.T) + np.dot(Flower.T,dFlower_dper))
|
|
||||||
dlower_terms_dper += self.b[1] * (np.dot(dF2lower_dper,F2lower.T) + np.dot(F2lower,dF2lower_dper.T)) - 4*self.b[1]/self.period*np.dot(F2lower,F2lower.T)
|
|
||||||
dlower_terms_dper += self.b[2] * (np.dot(dF1lower_dper,F1lower.T) + np.dot(F1lower,dF1lower_dper.T)) - 2*self.b[2]/self.period*np.dot(F1lower,F1lower.T)
|
|
||||||
dlower_terms_dper += self.b[3] * (np.dot(dF2lower_dper,Flower.T) + np.dot(F2lower,dFlower_dper.T)) - 2*self.b[3]/self.period*np.dot(F2lower,Flower.T)
|
|
||||||
dlower_terms_dper += self.b[4] * (np.dot(dFlower_dper,F2lower.T) + np.dot(Flower,dF2lower_dper.T)) - 2*self.b[4]/self.period*np.dot(Flower,F2lower.T)
|
|
||||||
|
|
||||||
dG_dper = 1./self.variance*(3*self.lengthscale**5/(400*np.sqrt(5))*dGint_dper + 0.5*dlower_terms_dper)
|
|
||||||
dK_dper = 2*mdot(dFX_dper,self.Gi,FX.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX.T)
|
|
||||||
|
|
||||||
target[0] += np.sum(np.diag(dK_dvar)*dL_dKdiag)
|
|
||||||
target[1] += np.sum(np.diag(dK_dlen)*dL_dKdiag)
|
|
||||||
target[2] += np.sum(np.diag(dK_dper)*dL_dKdiag)
|
|
||||||
|
|
@ -1,248 +0,0 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
from kernpart import Kernpart
|
|
||||||
import numpy as np
|
|
||||||
from GPy.util.linalg import mdot
|
|
||||||
from GPy.util.decorators import silence_errors
|
|
||||||
|
|
||||||
class PeriodicMatern32(Kernpart):
|
|
||||||
"""
|
|
||||||
Kernel of the periodic subspace (up to a given frequency) of a Matern 3/2 RKHS. Only defined for input_dim=1.
|
|
||||||
|
|
||||||
:param input_dim: the number of input dimensions
|
|
||||||
:type input_dim: int
|
|
||||||
:param variance: the variance of the Matern kernel
|
|
||||||
:type variance: float
|
|
||||||
:param lengthscale: the lengthscale of the Matern kernel
|
|
||||||
:type lengthscale: np.ndarray of size (input_dim,)
|
|
||||||
:param period: the period
|
|
||||||
:type period: float
|
|
||||||
:param n_freq: the number of frequencies considered for the periodic subspace
|
|
||||||
:type n_freq: int
|
|
||||||
:rtype: kernel object
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, input_dim=1, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi):
|
|
||||||
assert input_dim==1, "Periodic kernels are only defined for input_dim=1"
|
|
||||||
self.name = 'periodic_Mat32'
|
|
||||||
self.input_dim = input_dim
|
|
||||||
if lengthscale is not None:
|
|
||||||
lengthscale = np.asarray(lengthscale)
|
|
||||||
assert lengthscale.size == 1, "Wrong size: only one lengthscale needed"
|
|
||||||
else:
|
|
||||||
lengthscale = np.ones(1)
|
|
||||||
self.lower,self.upper = lower, upper
|
|
||||||
self.num_params = 3
|
|
||||||
self.n_freq = n_freq
|
|
||||||
self.n_basis = 2*n_freq
|
|
||||||
self._set_params(np.hstack((variance,lengthscale,period)))
|
|
||||||
|
|
||||||
def _cos(self,alpha,omega,phase):
|
|
||||||
def f(x):
|
|
||||||
return alpha*np.cos(omega*x+phase)
|
|
||||||
return f
|
|
||||||
|
|
||||||
@silence_errors
|
|
||||||
def _cos_factorization(self,alpha,omega,phase):
|
|
||||||
r1 = np.sum(alpha*np.cos(phase),axis=1)[:,None]
|
|
||||||
r2 = np.sum(alpha*np.sin(phase),axis=1)[:,None]
|
|
||||||
r = np.sqrt(r1**2 + r2**2)
|
|
||||||
psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2))
|
|
||||||
return r,omega[:,0:1], psi
|
|
||||||
|
|
||||||
@silence_errors
|
|
||||||
def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2):
|
|
||||||
Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )
|
|
||||||
Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + np.cos(phi1-phi2.T)*(self.upper-self.lower)
|
|
||||||
#Gint2[0,0] = 2.*(self.upper-self.lower)*np.cos(phi1[0,0])*np.cos(phi2[0,0])
|
|
||||||
Gint = np.dot(r1,r2.T)/2 * np.where(np.isnan(Gint1),Gint2,Gint1)
|
|
||||||
return Gint
|
|
||||||
|
|
||||||
def _get_params(self):
|
|
||||||
"""return the value of the parameters."""
|
|
||||||
return np.hstack((self.variance,self.lengthscale,self.period))
|
|
||||||
|
|
||||||
def _set_params(self,x):
|
|
||||||
"""set the value of the parameters."""
|
|
||||||
assert x.size==3
|
|
||||||
self.variance = x[0]
|
|
||||||
self.lengthscale = x[1]
|
|
||||||
self.period = x[2]
|
|
||||||
|
|
||||||
self.a = [3./self.lengthscale**2, 2*np.sqrt(3)/self.lengthscale, 1.]
|
|
||||||
self.b = [1,self.lengthscale**2/3]
|
|
||||||
|
|
||||||
self.basis_alpha = np.ones((self.n_basis,))
|
|
||||||
self.basis_omega = np.array(sum([[i*2*np.pi/self.period]*2 for i in range(1,self.n_freq+1)],[]))
|
|
||||||
self.basis_phi = np.array(sum([[-np.pi/2, 0.] for i in range(1,self.n_freq+1)],[]))
|
|
||||||
|
|
||||||
self.G = self.Gram_matrix()
|
|
||||||
self.Gi = np.linalg.inv(self.G)
|
|
||||||
|
|
||||||
def _get_param_names(self):
|
|
||||||
"""return parameter names."""
|
|
||||||
return ['variance','lengthscale','period']
|
|
||||||
|
|
||||||
def Gram_matrix(self):
|
|
||||||
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega,self.a[2]*self.basis_omega**2))
|
|
||||||
Lo = np.column_stack((self.basis_omega,self.basis_omega,self.basis_omega))
|
|
||||||
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2,self.basis_phi+np.pi))
|
|
||||||
r,omega,phi = self._cos_factorization(La,Lo,Lp)
|
|
||||||
Gint = self._int_computation( r,omega,phi, r,omega,phi)
|
|
||||||
|
|
||||||
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
|
|
||||||
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
|
||||||
return(self.lengthscale**3/(12*np.sqrt(3)*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscale**2/(3.*self.variance)*np.dot(F1lower,F1lower.T))
|
|
||||||
|
|
||||||
def K(self,X,X2,target):
|
|
||||||
"""Compute the covariance matrix between X and X2."""
|
|
||||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
|
||||||
if X2 is None:
|
|
||||||
FX2 = FX
|
|
||||||
else:
|
|
||||||
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
|
|
||||||
np.add(mdot(FX,self.Gi,FX2.T), target,target)
|
|
||||||
|
|
||||||
def Kdiag(self,X,target):
|
|
||||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
|
||||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
|
||||||
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
|
|
||||||
|
|
||||||
@silence_errors
|
|
||||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
|
||||||
"""derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)"""
|
|
||||||
if X2 is None: X2 = X
|
|
||||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
|
||||||
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
|
|
||||||
|
|
||||||
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega,self.a[2]*self.basis_omega**2))
|
|
||||||
Lo = np.column_stack((self.basis_omega,self.basis_omega,self.basis_omega))
|
|
||||||
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2,self.basis_phi+np.pi))
|
|
||||||
r,omega,phi = self._cos_factorization(La,Lo,Lp)
|
|
||||||
Gint = self._int_computation( r,omega,phi, r,omega,phi)
|
|
||||||
|
|
||||||
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
|
|
||||||
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
|
||||||
|
|
||||||
#dK_dvar
|
|
||||||
dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX2.T)
|
|
||||||
|
|
||||||
#dK_dlen
|
|
||||||
da_dlen = [-6/self.lengthscale**3,-2*np.sqrt(3)/self.lengthscale**2,0.]
|
|
||||||
db_dlen = [0.,2*self.lengthscale/3.]
|
|
||||||
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)),da_dlen[1]*self.basis_omega,da_dlen[2]*self.basis_omega**2))
|
|
||||||
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
|
|
||||||
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
|
|
||||||
dGint_dlen = dGint_dlen + dGint_dlen.T
|
|
||||||
dG_dlen = self.lengthscale**2/(4*np.sqrt(3))*Gint + self.lengthscale**3/(12*np.sqrt(3))*dGint_dlen + db_dlen[0]*np.dot(Flower,Flower.T) + db_dlen[1]*np.dot(F1lower,F1lower.T)
|
|
||||||
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX2.T)
|
|
||||||
|
|
||||||
#dK_dper
|
|
||||||
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
|
|
||||||
dFX2_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X2,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X2)
|
|
||||||
|
|
||||||
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period, -self.a[2]*self.basis_omega**3/self.period))
|
|
||||||
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi,self.basis_phi+np.pi*3/2))
|
|
||||||
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
|
|
||||||
|
|
||||||
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
|
|
||||||
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
|
|
||||||
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
|
|
||||||
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
|
|
||||||
#IPPprim2[0,0] = 2*(self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
|
|
||||||
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
|
|
||||||
|
|
||||||
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
|
|
||||||
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
|
|
||||||
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T)
|
|
||||||
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T)
|
|
||||||
#IPPint2[0,0] = (self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
|
|
||||||
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
|
|
||||||
|
|
||||||
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period))
|
|
||||||
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi))
|
|
||||||
r2,omega2,phi2 = self._cos_factorization(dLa_dper2,Lo[:,0:2],dLp_dper2)
|
|
||||||
|
|
||||||
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
|
|
||||||
dGint_dper = dGint_dper + dGint_dper.T
|
|
||||||
|
|
||||||
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
|
||||||
dF1lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower)+self._cos(-self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
|
||||||
|
|
||||||
dG_dper = 1./self.variance*(self.lengthscale**3/(12*np.sqrt(3))*dGint_dper + self.b[0]*(np.dot(dFlower_dper,Flower.T)+np.dot(Flower,dFlower_dper.T)) + self.b[1]*(np.dot(dF1lower_dper,F1lower.T)+np.dot(F1lower,dF1lower_dper.T)))
|
|
||||||
|
|
||||||
dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
|
|
||||||
|
|
||||||
# np.add(target[:,:,0],dK_dvar, target[:,:,0])
|
|
||||||
target[0] += np.sum(dK_dvar*dL_dK)
|
|
||||||
#np.add(target[:,:,1],dK_dlen, target[:,:,1])
|
|
||||||
target[1] += np.sum(dK_dlen*dL_dK)
|
|
||||||
#np.add(target[:,:,2],dK_dper, target[:,:,2])
|
|
||||||
target[2] += np.sum(dK_dper*dL_dK)
|
|
||||||
|
|
||||||
@silence_errors
|
|
||||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
|
||||||
"""derivative of the diagonal covariance matrix with respect to the parameters"""
|
|
||||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
|
||||||
|
|
||||||
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega, self.a[2]*self.basis_omega**2))
|
|
||||||
Lo = np.column_stack((self.basis_omega,self.basis_omega,self.basis_omega))
|
|
||||||
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2,self.basis_phi+np.pi))
|
|
||||||
r,omega,phi = self._cos_factorization(La,Lo,Lp)
|
|
||||||
Gint = self._int_computation( r,omega,phi, r,omega,phi)
|
|
||||||
|
|
||||||
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
|
|
||||||
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
|
||||||
|
|
||||||
#dK_dvar
|
|
||||||
dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX.T)
|
|
||||||
|
|
||||||
#dK_dlen
|
|
||||||
da_dlen = [-6/self.lengthscale**3,-2*np.sqrt(3)/self.lengthscale**2,0.]
|
|
||||||
db_dlen = [0.,2*self.lengthscale/3.]
|
|
||||||
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)),da_dlen[1]*self.basis_omega,da_dlen[2]*self.basis_omega**2))
|
|
||||||
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
|
|
||||||
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
|
|
||||||
dGint_dlen = dGint_dlen + dGint_dlen.T
|
|
||||||
dG_dlen = self.lengthscale**2/(4*np.sqrt(3))*Gint + self.lengthscale**3/(12*np.sqrt(3))*dGint_dlen + db_dlen[0]*np.dot(Flower,Flower.T) + db_dlen[1]*np.dot(F1lower,F1lower.T)
|
|
||||||
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX.T)
|
|
||||||
|
|
||||||
#dK_dper
|
|
||||||
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
|
|
||||||
|
|
||||||
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period, -self.a[2]*self.basis_omega**3/self.period))
|
|
||||||
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi,self.basis_phi+np.pi*3/2))
|
|
||||||
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
|
|
||||||
|
|
||||||
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
|
|
||||||
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
|
|
||||||
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
|
|
||||||
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
|
|
||||||
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
|
|
||||||
|
|
||||||
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
|
|
||||||
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
|
|
||||||
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T)
|
|
||||||
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T)
|
|
||||||
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
|
|
||||||
|
|
||||||
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period))
|
|
||||||
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi))
|
|
||||||
r2,omega2,phi2 = self._cos_factorization(dLa_dper2,Lo[:,0:2],dLp_dper2)
|
|
||||||
|
|
||||||
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
|
|
||||||
dGint_dper = dGint_dper + dGint_dper.T
|
|
||||||
|
|
||||||
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
|
||||||
dF1lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower)+self._cos(-self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
|
||||||
|
|
||||||
dG_dper = 1./self.variance*(self.lengthscale**3/(12*np.sqrt(3))*dGint_dper + self.b[0]*(np.dot(dFlower_dper,Flower.T)+np.dot(Flower,dFlower_dper.T)) + self.b[1]*(np.dot(dF1lower_dper,F1lower.T)+np.dot(F1lower,dF1lower_dper.T)))
|
|
||||||
|
|
||||||
dK_dper = 2* mdot(dFX_dper,self.Gi,FX.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX.T)
|
|
||||||
|
|
||||||
target[0] += np.sum(np.diag(dK_dvar)*dL_dKdiag)
|
|
||||||
target[1] += np.sum(np.diag(dK_dlen)*dL_dKdiag)
|
|
||||||
target[2] += np.sum(np.diag(dK_dper)*dL_dKdiag)
|
|
||||||
|
|
@ -1,237 +0,0 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
from kernpart import Kernpart
|
|
||||||
import numpy as np
|
|
||||||
from GPy.util.linalg import mdot
|
|
||||||
from GPy.util.decorators import silence_errors
|
|
||||||
from GPy.core.parameterization.param import Param
|
|
||||||
|
|
||||||
class PeriodicExponential(Kernpart):
|
|
||||||
"""
|
|
||||||
Kernel of the periodic subspace (up to a given frequency) of a exponential (Matern 1/2) RKHS. Only defined for input_dim=1.
|
|
||||||
|
|
||||||
:param input_dim: the number of input dimensions
|
|
||||||
:type input_dim: int
|
|
||||||
:param variance: the variance of the Matern kernel
|
|
||||||
:type variance: float
|
|
||||||
:param lengthscale: the lengthscale of the Matern kernel
|
|
||||||
:type lengthscale: np.ndarray of size (input_dim,)
|
|
||||||
:param period: the period
|
|
||||||
:type period: float
|
|
||||||
:param n_freq: the number of frequencies considered for the periodic subspace
|
|
||||||
:type n_freq: int
|
|
||||||
:rtype: kernel object
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, input_dim=1, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi, name='periodic_exp'):
|
|
||||||
super(PeriodicExponential, self).__init__(input_dim, name)
|
|
||||||
assert input_dim==1, "Periodic kernels are only defined for input_dim=1"
|
|
||||||
self.input_dim = input_dim
|
|
||||||
if lengthscale is not None:
|
|
||||||
lengthscale = np.asarray(lengthscale)
|
|
||||||
assert lengthscale.size == 1, "Wrong size: only one lengthscale needed"
|
|
||||||
else:
|
|
||||||
lengthscale = np.ones(1)
|
|
||||||
self.lower,self.upper = lower, upper
|
|
||||||
self.num_params = 3
|
|
||||||
self.n_freq = n_freq
|
|
||||||
self.n_basis = 2*n_freq
|
|
||||||
self.variance = Param('variance', variance)
|
|
||||||
self.lengthscale = Param('lengthscale', lengthscale)
|
|
||||||
self.period = Param('period', period)
|
|
||||||
self.parameters_changed()
|
|
||||||
#self._set_params(np.hstack((variance,lengthscale,period)))
|
|
||||||
|
|
||||||
def _cos(self,alpha,omega,phase):
|
|
||||||
def f(x):
|
|
||||||
return alpha*np.cos(omega*x+phase)
|
|
||||||
return f
|
|
||||||
|
|
||||||
@silence_errors
|
|
||||||
def _cos_factorization(self,alpha,omega,phase):
|
|
||||||
r1 = np.sum(alpha*np.cos(phase),axis=1)[:,None]
|
|
||||||
r2 = np.sum(alpha*np.sin(phase),axis=1)[:,None]
|
|
||||||
r = np.sqrt(r1**2 + r2**2)
|
|
||||||
psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2))
|
|
||||||
return r,omega[:,0:1], psi
|
|
||||||
|
|
||||||
@silence_errors
|
|
||||||
def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2):
|
|
||||||
Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )
|
|
||||||
Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + np.cos(phi1-phi2.T)*(self.upper-self.lower)
|
|
||||||
#Gint2[0,0] = 2.*(self.upper-self.lower)*np.cos(phi1[0,0])*np.cos(phi2[0,0])
|
|
||||||
Gint = np.dot(r1,r2.T)/2 * np.where(np.isnan(Gint1),Gint2,Gint1)
|
|
||||||
return Gint
|
|
||||||
|
|
||||||
#def _get_params(self):
|
|
||||||
# """return the value of the parameters."""
|
|
||||||
# return np.hstack((self.variance,self.lengthscale,self.period))
|
|
||||||
|
|
||||||
def parameters_changed(self):
|
|
||||||
"""set the value of the parameters."""
|
|
||||||
self.a = [1./self.lengthscale, 1.]
|
|
||||||
self.b = [1]
|
|
||||||
|
|
||||||
self.basis_alpha = np.ones((self.n_basis,))
|
|
||||||
self.basis_omega = np.array(sum([[i*2*np.pi/self.period]*2 for i in range(1,self.n_freq+1)],[]))[:,0]
|
|
||||||
self.basis_phi = np.array(sum([[-np.pi/2, 0.] for i in range(1,self.n_freq+1)],[]))
|
|
||||||
|
|
||||||
self.G = self.Gram_matrix()
|
|
||||||
self.Gi = np.linalg.inv(self.G)
|
|
||||||
|
|
||||||
#def _get_param_names(self):
|
|
||||||
# """return parameter names."""
|
|
||||||
# return ['variance','lengthscale','period']
|
|
||||||
|
|
||||||
def Gram_matrix(self):
|
|
||||||
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega))
|
|
||||||
Lo = np.column_stack((self.basis_omega,self.basis_omega))
|
|
||||||
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2))
|
|
||||||
r,omega,phi = self._cos_factorization(La,Lo,Lp)
|
|
||||||
Gint = self._int_computation( r,omega,phi, r,omega,phi)
|
|
||||||
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
|
|
||||||
return(self.lengthscale/(2*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T))
|
|
||||||
|
|
||||||
def K(self,X,X2,target):
|
|
||||||
"""Compute the covariance matrix between X and X2."""
|
|
||||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
|
||||||
if X2 is None:
|
|
||||||
FX2 = FX
|
|
||||||
else:
|
|
||||||
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
|
|
||||||
np.add(mdot(FX,self.Gi,FX2.T), target,target)
|
|
||||||
|
|
||||||
def Kdiag(self,X,target):
|
|
||||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
|
||||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
|
||||||
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
|
|
||||||
|
|
||||||
@silence_errors
|
|
||||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
|
||||||
"""derivative of the covariance matrix with respect to the parameters (shape is N x num_inducing x num_params)"""
|
|
||||||
if X2 is None: X2 = X
|
|
||||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
|
||||||
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
|
|
||||||
|
|
||||||
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega))
|
|
||||||
Lo = np.column_stack((self.basis_omega,self.basis_omega))
|
|
||||||
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2))
|
|
||||||
r,omega,phi = self._cos_factorization(La,Lo,Lp)
|
|
||||||
Gint = self._int_computation( r,omega,phi, r,omega,phi)
|
|
||||||
|
|
||||||
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
|
|
||||||
|
|
||||||
#dK_dvar
|
|
||||||
dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX2.T)
|
|
||||||
|
|
||||||
#dK_dlen
|
|
||||||
da_dlen = [-1./self.lengthscale**2,0.]
|
|
||||||
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)),da_dlen[1]*self.basis_omega))
|
|
||||||
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
|
|
||||||
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
|
|
||||||
dGint_dlen = dGint_dlen + dGint_dlen.T
|
|
||||||
dG_dlen = 1./2*Gint + self.lengthscale/2*dGint_dlen
|
|
||||||
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX2.T)
|
|
||||||
|
|
||||||
#dK_dper
|
|
||||||
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
|
|
||||||
dFX2_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X2,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X2)
|
|
||||||
|
|
||||||
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period))
|
|
||||||
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi))
|
|
||||||
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
|
|
||||||
|
|
||||||
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
|
|
||||||
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
|
|
||||||
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
|
|
||||||
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
|
|
||||||
#IPPprim2[0,0] = 2*(self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
|
|
||||||
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
|
|
||||||
|
|
||||||
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
|
|
||||||
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
|
|
||||||
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T)
|
|
||||||
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T)
|
|
||||||
#IPPint2[0,0] = (self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
|
|
||||||
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
|
|
||||||
|
|
||||||
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period))
|
|
||||||
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2))
|
|
||||||
r2,omega2,phi2 = dLa_dper2.T,Lo[:,0:1],dLp_dper2.T
|
|
||||||
|
|
||||||
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
|
|
||||||
dGint_dper = dGint_dper + dGint_dper.T
|
|
||||||
|
|
||||||
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
|
||||||
|
|
||||||
dG_dper = 1./self.variance*(self.lengthscale/2*dGint_dper + self.b[0]*(np.dot(dFlower_dper,Flower.T)+np.dot(Flower,dFlower_dper.T)))
|
|
||||||
|
|
||||||
dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
|
|
||||||
|
|
||||||
target[0] += np.sum(dK_dvar*dL_dK)
|
|
||||||
target[1] += np.sum(dK_dlen*dL_dK)
|
|
||||||
target[2] += np.sum(dK_dper*dL_dK)
|
|
||||||
|
|
||||||
@silence_errors
|
|
||||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
|
||||||
"""derivative of the diagonal of the covariance matrix with respect to the parameters"""
|
|
||||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
|
||||||
|
|
||||||
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega))
|
|
||||||
Lo = np.column_stack((self.basis_omega,self.basis_omega))
|
|
||||||
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2))
|
|
||||||
r,omega,phi = self._cos_factorization(La,Lo,Lp)
|
|
||||||
Gint = self._int_computation( r,omega,phi, r,omega,phi)
|
|
||||||
|
|
||||||
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
|
|
||||||
|
|
||||||
#dK_dvar
|
|
||||||
dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX.T)
|
|
||||||
|
|
||||||
#dK_dlen
|
|
||||||
da_dlen = [-1./self.lengthscale**2,0.]
|
|
||||||
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)),da_dlen[1]*self.basis_omega))
|
|
||||||
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
|
|
||||||
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
|
|
||||||
dGint_dlen = dGint_dlen + dGint_dlen.T
|
|
||||||
dG_dlen = 1./2*Gint + self.lengthscale/2*dGint_dlen
|
|
||||||
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX.T)
|
|
||||||
|
|
||||||
#dK_dper
|
|
||||||
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
|
|
||||||
|
|
||||||
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period))
|
|
||||||
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi))
|
|
||||||
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
|
|
||||||
|
|
||||||
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
|
|
||||||
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
|
|
||||||
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
|
|
||||||
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
|
|
||||||
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
|
|
||||||
|
|
||||||
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
|
|
||||||
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
|
|
||||||
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T)
|
|
||||||
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T)
|
|
||||||
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
|
|
||||||
|
|
||||||
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period))
|
|
||||||
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2))
|
|
||||||
r2,omega2,phi2 = dLa_dper2.T,Lo[:,0:1],dLp_dper2.T
|
|
||||||
|
|
||||||
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
|
|
||||||
dGint_dper = dGint_dper + dGint_dper.T
|
|
||||||
|
|
||||||
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
|
||||||
|
|
||||||
dG_dper = 1./self.variance*(self.lengthscale/2*dGint_dper + self.b[0]*(np.dot(dFlower_dper,Flower.T)+np.dot(Flower,dFlower_dper.T)))
|
|
||||||
|
|
||||||
dK_dper = 2*mdot(dFX_dper,self.Gi,FX.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX.T)
|
|
||||||
|
|
||||||
target[0] += np.sum(np.diag(dK_dvar)*dL_dKdiag)
|
|
||||||
target[1] += np.sum(np.diag(dK_dlen)*dL_dKdiag)
|
|
||||||
target[2] += np.sum(np.diag(dK_dper)*dL_dKdiag)
|
|
||||||
|
|
@ -42,10 +42,6 @@ class Prod(Kern):
|
||||||
self.k1.update_gradients_full(dL_dK*self.k2(X[:,self.slice2]), X[:,self.slice1])
|
self.k1.update_gradients_full(dL_dK*self.k2(X[:,self.slice2]), X[:,self.slice1])
|
||||||
self.k2.update_gradients_full(dL_dK*self.k1(X[:,self.slice1]), X[:,self.slice2])
|
self.k2.update_gradients_full(dL_dK*self.k1(X[:,self.slice1]), X[:,self.slice2])
|
||||||
|
|
||||||
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
|
||||||
self.k1.update_gradients_sparse(dL_dKmm * self.k2.K(Z[:,self.slice2]), dL_dKnm * self.k2(X[:,self.slice2], Z[:,self.slice2]), dL_dKdiag * self.k2.Kdiag(X[:,self.slice2]), X[:,self.slice1], Z[:,self.slice1] )
|
|
||||||
self.k2.update_gradients_sparse(dL_dKmm * self.k1.K(Z[:,self.slice1]), dL_dKnm * self.k1(X[:,self.slice1], Z[:,self.slice1]), dL_dKdiag * self.k1.Kdiag(X[:,self.slice1]), X[:,self.slice2], Z[:,self.slice2] )
|
|
||||||
|
|
||||||
def gradients_X(self, dL_dK, X, X2=None):
|
def gradients_X(self, dL_dK, X, X2=None):
|
||||||
target = np.zeros(X.shape)
|
target = np.zeros(X.shape)
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
|
|
|
||||||
|
|
@ -1,101 +0,0 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
from kernpart import Kernpart
|
|
||||||
import numpy as np
|
|
||||||
import hashlib
|
|
||||||
#from scipy import integrate # This may not be necessary (Nicolas, 20th Feb)
|
|
||||||
|
|
||||||
class prod_orthogonal(Kernpart):
|
|
||||||
"""
|
|
||||||
Computes the product of 2 kernels
|
|
||||||
|
|
||||||
:param k1, k2: the kernels to multiply
|
|
||||||
:type k1, k2: Kernpart
|
|
||||||
:rtype: kernel object
|
|
||||||
|
|
||||||
"""
|
|
||||||
def __init__(self,k1,k2):
|
|
||||||
self.input_dim = k1.input_dim + k2.input_dim
|
|
||||||
self.num_params = k1.num_params + k2.num_params
|
|
||||||
self.name = k1.name + '<times>' + k2.name
|
|
||||||
self.k1 = k1
|
|
||||||
self.k2 = k2
|
|
||||||
self._X, self._X2, self._params = np.empty(shape=(3,1))
|
|
||||||
self._set_params(np.hstack((k1._get_params(),k2._get_params())))
|
|
||||||
|
|
||||||
def _get_params(self):
|
|
||||||
"""return the value of the parameters."""
|
|
||||||
return np.hstack((self.k1._get_params(), self.k2._get_params()))
|
|
||||||
|
|
||||||
def _set_params(self,x):
|
|
||||||
"""set the value of the parameters."""
|
|
||||||
self.k1._set_params(x[:self.k1.num_params])
|
|
||||||
self.k2._set_params(x[self.k1.num_params:])
|
|
||||||
|
|
||||||
def _get_param_names(self):
|
|
||||||
"""return parameter names."""
|
|
||||||
return [self.k1.name + '_' + param_name for param_name in self.k1._get_param_names()] + [self.k2.name + '_' + param_name for param_name in self.k2._get_param_names()]
|
|
||||||
|
|
||||||
def K(self,X,X2,target):
|
|
||||||
self._K_computations(X,X2)
|
|
||||||
target += self._K1 * self._K2
|
|
||||||
|
|
||||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
|
||||||
"""derivative of the covariance matrix with respect to the parameters."""
|
|
||||||
self._K_computations(X,X2)
|
|
||||||
if X2 is None:
|
|
||||||
self.k1._param_grad_helper(dL_dK*self._K2, X[:,:self.k1.input_dim], None, target[:self.k1.num_params])
|
|
||||||
self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.k1.input_dim:], None, target[self.k1.num_params:])
|
|
||||||
else:
|
|
||||||
self.k1._param_grad_helper(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target[:self.k1.num_params])
|
|
||||||
self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target[self.k1.num_params:])
|
|
||||||
|
|
||||||
def Kdiag(self,X,target):
|
|
||||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
|
||||||
target1 = np.zeros(X.shape[0])
|
|
||||||
target2 = np.zeros(X.shape[0])
|
|
||||||
self.k1.Kdiag(X[:,:self.k1.input_dim],target1)
|
|
||||||
self.k2.Kdiag(X[:,self.k1.input_dim:],target2)
|
|
||||||
target += target1 * target2
|
|
||||||
|
|
||||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
|
||||||
K1 = np.zeros(X.shape[0])
|
|
||||||
K2 = np.zeros(X.shape[0])
|
|
||||||
self.k1.Kdiag(X[:,:self.k1.input_dim],K1)
|
|
||||||
self.k2.Kdiag(X[:,self.k1.input_dim:],K2)
|
|
||||||
self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,:self.k1.input_dim],target[:self.k1.num_params])
|
|
||||||
self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.k1.input_dim:],target[self.k1.num_params:])
|
|
||||||
|
|
||||||
def gradients_X(self,dL_dK,X,X2,target):
|
|
||||||
"""derivative of the covariance matrix with respect to X."""
|
|
||||||
self._K_computations(X,X2)
|
|
||||||
self.k1.gradients_X(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target)
|
|
||||||
self.k2.gradients_X(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target)
|
|
||||||
|
|
||||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
|
||||||
K1 = np.zeros(X.shape[0])
|
|
||||||
K2 = np.zeros(X.shape[0])
|
|
||||||
self.k1.Kdiag(X[:,0:self.k1.input_dim],K1)
|
|
||||||
self.k2.Kdiag(X[:,self.k1.input_dim:],K2)
|
|
||||||
|
|
||||||
self.k1.gradients_X(dL_dKdiag*K2, X[:,:self.k1.input_dim], target)
|
|
||||||
self.k2.gradients_X(dL_dKdiag*K1, X[:,self.k1.input_dim:], target)
|
|
||||||
|
|
||||||
def _K_computations(self,X,X2):
|
|
||||||
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
|
|
||||||
self._X = X.copy()
|
|
||||||
self._params == self._get_params().copy()
|
|
||||||
if X2 is None:
|
|
||||||
self._X2 = None
|
|
||||||
self._K1 = np.zeros((X.shape[0],X.shape[0]))
|
|
||||||
self._K2 = np.zeros((X.shape[0],X.shape[0]))
|
|
||||||
self.k1.K(X[:,:self.k1.input_dim],None,self._K1)
|
|
||||||
self.k2.K(X[:,self.k1.input_dim:],None,self._K2)
|
|
||||||
else:
|
|
||||||
self._X2 = X2.copy()
|
|
||||||
self._K1 = np.zeros((X.shape[0],X2.shape[0]))
|
|
||||||
self._K2 = np.zeros((X.shape[0],X2.shape[0]))
|
|
||||||
self.k1.K(X[:,:self.k1.input_dim],X2[:,:self.k1.input_dim],self._K1)
|
|
||||||
self.k2.K(X[:,self.k1.input_dim:],X2[:,self.k1.input_dim:],self._K2)
|
|
||||||
|
|
||||||
|
|
@ -1,82 +0,0 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
from kernpart import Kernpart
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
class RationalQuadratic(Kernpart):
|
|
||||||
"""
|
|
||||||
rational quadratic kernel
|
|
||||||
|
|
||||||
.. math::
|
|
||||||
|
|
||||||
k(r) = \sigma^2 \\bigg( 1 + \\frac{r^2}{2 \ell^2} \\bigg)^{- \\alpha} \ \ \ \ \ \\text{ where } r^2 = (x-y)^2
|
|
||||||
|
|
||||||
:param input_dim: the number of input dimensions
|
|
||||||
:type input_dim: int (input_dim=1 is the only value currently supported)
|
|
||||||
:param variance: the variance :math:`\sigma^2`
|
|
||||||
:type variance: float
|
|
||||||
:param lengthscale: the lengthscale :math:`\ell`
|
|
||||||
:type lengthscale: float
|
|
||||||
:param power: the power :math:`\\alpha`
|
|
||||||
:type power: float
|
|
||||||
:rtype: Kernpart object
|
|
||||||
|
|
||||||
"""
|
|
||||||
def __init__(self,input_dim,variance=1.,lengthscale=1.,power=1.):
|
|
||||||
assert input_dim == 1, "For this kernel we assume input_dim=1"
|
|
||||||
self.input_dim = input_dim
|
|
||||||
self.num_params = 3
|
|
||||||
self.name = 'rat_quad'
|
|
||||||
self.variance = variance
|
|
||||||
self.lengthscale = lengthscale
|
|
||||||
self.power = power
|
|
||||||
|
|
||||||
def _get_params(self):
|
|
||||||
return np.hstack((self.variance,self.lengthscale,self.power))
|
|
||||||
|
|
||||||
def _set_params(self,x):
|
|
||||||
self.variance = x[0]
|
|
||||||
self.lengthscale = x[1]
|
|
||||||
self.power = x[2]
|
|
||||||
|
|
||||||
def _get_param_names(self):
|
|
||||||
return ['variance','lengthscale','power']
|
|
||||||
|
|
||||||
def K(self,X,X2,target):
|
|
||||||
if X2 is None: X2 = X
|
|
||||||
dist2 = np.square((X-X2.T)/self.lengthscale)
|
|
||||||
target += self.variance*(1 + dist2/2.)**(-self.power)
|
|
||||||
|
|
||||||
def Kdiag(self,X,target):
|
|
||||||
target += self.variance
|
|
||||||
|
|
||||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
|
||||||
if X2 is None: X2 = X
|
|
||||||
dist2 = np.square((X-X2.T)/self.lengthscale)
|
|
||||||
|
|
||||||
dvar = (1 + dist2/2.)**(-self.power)
|
|
||||||
dl = self.power * self.variance * dist2 / self.lengthscale * (1 + dist2/2.)**(-self.power-1)
|
|
||||||
dp = - self.variance * np.log(1 + dist2/2.) * (1 + dist2/2.)**(-self.power)
|
|
||||||
|
|
||||||
target[0] += np.sum(dvar*dL_dK)
|
|
||||||
target[1] += np.sum(dl*dL_dK)
|
|
||||||
target[2] += np.sum(dp*dL_dK)
|
|
||||||
|
|
||||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
|
||||||
target[0] += np.sum(dL_dKdiag)
|
|
||||||
# here self.lengthscale and self.power have no influence on Kdiag so target[1:] are unchanged
|
|
||||||
|
|
||||||
def gradients_X(self,dL_dK,X,X2,target):
|
|
||||||
"""derivative of the covariance matrix with respect to X."""
|
|
||||||
if X2 is None:
|
|
||||||
dist2 = np.square((X-X.T)/self.lengthscale)
|
|
||||||
dX = -2.*self.variance*self.power * (X-X.T)/self.lengthscale**2 * (1 + dist2/2./self.lengthscale)**(-self.power-1)
|
|
||||||
else:
|
|
||||||
dist2 = np.square((X-X2.T)/self.lengthscale)
|
|
||||||
dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 * (1 + dist2/2./self.lengthscale)**(-self.power-1)
|
|
||||||
target += np.sum(dL_dK*dX,1)[:,np.newaxis]
|
|
||||||
|
|
||||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
|
||||||
pass
|
|
||||||
|
|
@ -9,143 +9,76 @@ from ...util.linalg import tdot
|
||||||
from ...util.misc import fast_array_equal, param_to_array
|
from ...util.misc import fast_array_equal, param_to_array
|
||||||
from ...core.parameterization import Param
|
from ...core.parameterization import Param
|
||||||
from ...core.parameterization.transformations import Logexp
|
from ...core.parameterization.transformations import Logexp
|
||||||
|
from stationary import Stationary
|
||||||
|
|
||||||
class RBF(Kern):
|
class RBF(Stationary):
|
||||||
"""
|
"""
|
||||||
Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel:
|
Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel:
|
||||||
|
|
||||||
.. math::
|
.. math::
|
||||||
|
|
||||||
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \ \ \ \ \ \\text{ where } r^2 = \sum_{i=1}^d \\frac{ (x_i-x^\prime_i)^2}{\ell_i^2}
|
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg)
|
||||||
|
|
||||||
where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
|
|
||||||
|
|
||||||
:param input_dim: the number of input dimensions
|
|
||||||
:type input_dim: int
|
|
||||||
:param variance: the variance of the kernel
|
|
||||||
:type variance: float
|
|
||||||
:param lengthscale: the vector of lengthscale of the kernel
|
|
||||||
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
|
|
||||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
|
|
||||||
:type ARD: Boolean
|
|
||||||
:rtype: kernel object
|
|
||||||
|
|
||||||
.. Note: this object implements both the ARD and 'spherical' version of the function
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='rbf'):
|
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='RBF'):
|
||||||
super(RBF, self).__init__(input_dim, name)
|
super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, name)
|
||||||
self.input_dim = input_dim
|
|
||||||
self.ARD = ARD
|
|
||||||
|
|
||||||
if not ARD:
|
|
||||||
if lengthscale is not None:
|
|
||||||
lengthscale = np.asarray(lengthscale)
|
|
||||||
assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
|
|
||||||
else:
|
|
||||||
lengthscale = np.ones(1)
|
|
||||||
else:
|
|
||||||
if lengthscale is not None:
|
|
||||||
lengthscale = np.asarray(lengthscale)
|
|
||||||
assert lengthscale.size == self.input_dim, "bad number of lengthscales"
|
|
||||||
else:
|
|
||||||
lengthscale = np.ones(self.input_dim)
|
|
||||||
|
|
||||||
self.variance = Param('variance', variance, Logexp())
|
|
||||||
|
|
||||||
self.lengthscale = Param('lengthscale', lengthscale, Logexp())
|
|
||||||
self.lengthscale.add_observer(self, self.update_lengthscale)
|
|
||||||
self.update_lengthscale(self.lengthscale)
|
|
||||||
|
|
||||||
self.add_parameters(self.variance, self.lengthscale)
|
|
||||||
self.parameters_changed() # initializes cache
|
|
||||||
|
|
||||||
self.weave_options = {}
|
self.weave_options = {}
|
||||||
|
|
||||||
def update_lengthscale(self, l):
|
def K_of_r(self, r):
|
||||||
self.lengthscale2 = np.square(self.lengthscale)
|
return self.variance * np.exp(-0.5 * r**2)
|
||||||
|
|
||||||
|
def dK_dr(self, r):
|
||||||
|
return -r*self.K_of_r(r)
|
||||||
|
|
||||||
|
#---------------------------------------#
|
||||||
|
# PSI statistics #
|
||||||
|
#---------------------------------------#
|
||||||
|
|
||||||
def parameters_changed(self):
|
def parameters_changed(self):
|
||||||
# reset cached results
|
# reset cached results
|
||||||
self._X, self._X2 = np.empty(shape=(2, 1))
|
|
||||||
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
|
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
|
||||||
|
|
||||||
def K(self, X, X2=None):
|
|
||||||
self._K_computations(X, X2)
|
|
||||||
return self.variance * self._K_dvar
|
|
||||||
|
|
||||||
def Kdiag(self, X):
|
def psi0(self, Z, variational_posterior):
|
||||||
ret = np.ones(X.shape[0])
|
return self.Kdiag(variational_posterior.mean)
|
||||||
ret[:] = self.variance
|
|
||||||
return ret
|
|
||||||
|
|
||||||
def psi0(self, Z, posterior_variational):
|
def psi1(self, Z, variational_posterior):
|
||||||
mu = posterior_variational.mean
|
mu = variational_posterior.mean
|
||||||
ret = np.empty(mu.shape[0], dtype=np.float64)
|
S = variational_posterior.variance
|
||||||
ret[:] = self.variance
|
|
||||||
return ret
|
|
||||||
|
|
||||||
def psi1(self, Z, posterior_variational):
|
|
||||||
mu = posterior_variational.mean
|
|
||||||
S = posterior_variational.variance
|
|
||||||
self._psi_computations(Z, mu, S)
|
self._psi_computations(Z, mu, S)
|
||||||
return self._psi1
|
return self._psi1
|
||||||
|
|
||||||
def psi2(self, Z, posterior_variational):
|
def psi2(self, Z, variational_posterior):
|
||||||
mu = posterior_variational.mean
|
mu = variational_posterior.mean
|
||||||
S = posterior_variational.variance
|
S = variational_posterior.variance
|
||||||
self._psi_computations(Z, mu, S)
|
self._psi_computations(Z, mu, S)
|
||||||
return self._psi2
|
return self._psi2
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X):
|
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
self._K_computations(X, None)
|
#contributions from Kmm
|
||||||
self.variance.gradient = np.sum(self._K_dvar * dL_dK)
|
sself.update_gradients_full(dL_dKmm, Z)
|
||||||
if self.ARD:
|
|
||||||
self.lengthscale.gradient = self._dL_dlengthscales_via_K(dL_dK, X, None)
|
|
||||||
else:
|
|
||||||
self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK)
|
|
||||||
|
|
||||||
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
mu = variational_posterior.mean
|
||||||
#contributions from Kdiag
|
S = variational_posterior.variance
|
||||||
self.variance.gradient = np.sum(dL_dKdiag)
|
|
||||||
|
|
||||||
#from Knm
|
|
||||||
self._K_computations(X, Z)
|
|
||||||
self.variance.gradient += np.sum(dL_dKnm * self._K_dvar)
|
|
||||||
if self.ARD:
|
|
||||||
self.lengthscale.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z)
|
|
||||||
|
|
||||||
else:
|
|
||||||
self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKnm)
|
|
||||||
|
|
||||||
#from Kmm
|
|
||||||
self._K_computations(Z, None)
|
|
||||||
self.variance.gradient += np.sum(dL_dKmm * self._K_dvar)
|
|
||||||
if self.ARD:
|
|
||||||
self.lengthscale.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
|
|
||||||
else:
|
|
||||||
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
|
|
||||||
|
|
||||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational):
|
|
||||||
mu = posterior_variational.mean
|
|
||||||
S = posterior_variational.variance
|
|
||||||
self._psi_computations(Z, mu, S)
|
self._psi_computations(Z, mu, S)
|
||||||
|
l2 = self.lengthscale **2
|
||||||
|
|
||||||
#contributions from psi0:
|
#contributions from psi0:
|
||||||
self.variance.gradient = np.sum(dL_dpsi0)
|
self.variance.gradient += np.sum(dL_dpsi0)
|
||||||
|
|
||||||
#from psi1
|
#from psi1
|
||||||
self.variance.gradient += np.sum(dL_dpsi1 * self._psi1 / self.variance)
|
self.variance.gradient += np.sum(dL_dpsi1 * self._psi1 / self.variance)
|
||||||
d_length = self._psi1[:,:,None] * ((self._psi1_dist_sq - 1.)/(self.lengthscale*self._psi1_denom) +1./self.lengthscale)
|
d_length = self._psi1[:,:,None] * ((self._psi1_dist_sq - 1.)/(self.lengthscale*self._psi1_denom) +1./self.lengthscale)
|
||||||
dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
|
dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
|
||||||
if not self.ARD:
|
if not self.ARD:
|
||||||
self.lengthscale.gradient = dpsi1_dlength.sum()
|
self.lengthscale.gradient += dpsi1_dlength.sum()
|
||||||
else:
|
else:
|
||||||
self.lengthscale.gradient = dpsi1_dlength.sum(0).sum(0)
|
self.lengthscale.gradient += dpsi1_dlength.sum(0).sum(0)
|
||||||
|
|
||||||
#from psi2
|
#from psi2
|
||||||
d_var = 2.*self._psi2 / self.variance
|
d_var = 2.*self._psi2 / self.variance
|
||||||
d_length = 2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] / self.lengthscale2) / (self.lengthscale * self._psi2_denom)
|
d_length = 2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] / l2) / (self.lengthscale * self._psi2_denom)
|
||||||
|
|
||||||
self.variance.gradient += np.sum(dL_dpsi2 * d_var)
|
self.variance.gradient += np.sum(dL_dpsi2 * d_var)
|
||||||
dpsi2_dlength = d_length * dL_dpsi2[:, :, :, None]
|
dpsi2_dlength = d_length * dL_dpsi2[:, :, :, None]
|
||||||
|
|
@ -154,27 +87,20 @@ class RBF(Kern):
|
||||||
else:
|
else:
|
||||||
self.lengthscale.gradient += dpsi2_dlength.sum(0).sum(0).sum(0)
|
self.lengthscale.gradient += dpsi2_dlength.sum(0).sum(0).sum(0)
|
||||||
|
|
||||||
#from Kmm
|
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
self._K_computations(Z, None)
|
mu = variational_posterior.mean
|
||||||
self.variance.gradient += np.sum(dL_dKmm * self._K_dvar)
|
S = variational_posterior.variance
|
||||||
if self.ARD:
|
|
||||||
self.lengthscale.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
|
|
||||||
else:
|
|
||||||
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
|
|
||||||
|
|
||||||
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational):
|
|
||||||
mu = posterior_variational.mean
|
|
||||||
S = posterior_variational.variance
|
|
||||||
self._psi_computations(Z, mu, S)
|
self._psi_computations(Z, mu, S)
|
||||||
|
l2 = self.lengthscale **2
|
||||||
|
|
||||||
#psi1
|
#psi1
|
||||||
denominator = (self.lengthscale2 * (self._psi1_denom))
|
denominator = (l2 * (self._psi1_denom))
|
||||||
dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator))
|
dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator))
|
||||||
grad = np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
|
grad = np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
|
||||||
|
|
||||||
#psi2
|
#psi2
|
||||||
term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim
|
term1 = self._psi2_Zdist / l2 # num_inducing, num_inducing, input_dim
|
||||||
term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim
|
term2 = self._psi2_mudist / self._psi2_denom / l2 # N, num_inducing, num_inducing, input_dim
|
||||||
dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
|
dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
|
||||||
grad += 2*(dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
|
grad += 2*(dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
|
||||||
|
|
||||||
|
|
@ -182,59 +108,26 @@ class RBF(Kern):
|
||||||
|
|
||||||
return grad
|
return grad
|
||||||
|
|
||||||
def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational):
|
def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
mu = posterior_variational.mean
|
mu = variational_posterior.mean
|
||||||
S = posterior_variational.variance
|
S = variational_posterior.variance
|
||||||
self._psi_computations(Z, mu, S)
|
self._psi_computations(Z, mu, S)
|
||||||
|
l2 = self.lengthscale **2
|
||||||
#psi1
|
#psi1
|
||||||
tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom
|
tmp = self._psi1[:, :, None] / l2 / self._psi1_denom
|
||||||
grad_mu = np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
|
grad_mu = np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
|
||||||
grad_S = np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
|
grad_S = np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
|
||||||
#psi2
|
#psi2
|
||||||
tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom
|
tmp = self._psi2[:, :, :, None] / l2 / self._psi2_denom
|
||||||
grad_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
|
grad_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
|
||||||
grad_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
|
grad_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
|
||||||
|
|
||||||
return grad_mu, grad_S
|
return grad_mu, grad_S
|
||||||
|
|
||||||
def gradients_X(self, dL_dK, X, X2=None):
|
|
||||||
#if self._X is None or X.base is not self._X.base or X2 is not None:
|
|
||||||
self._K_computations(X, X2)
|
|
||||||
if X2 is None:
|
|
||||||
_K_dist = 2*(X[:, None, :] - X[None, :, :])
|
|
||||||
else:
|
|
||||||
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
|
|
||||||
gradients_X = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
|
|
||||||
return np.sum(gradients_X * dL_dK.T[:, :, None], 0)
|
|
||||||
|
|
||||||
def dKdiag_dX(self, dL_dKdiag, X):
|
|
||||||
return np.zeros(X.shape[0])
|
|
||||||
|
|
||||||
#---------------------------------------#
|
|
||||||
# PSI statistics #
|
|
||||||
#---------------------------------------#
|
|
||||||
|
|
||||||
#---------------------------------------#
|
#---------------------------------------#
|
||||||
# Precomputations #
|
# Precomputations #
|
||||||
#---------------------------------------#
|
#---------------------------------------#
|
||||||
|
|
||||||
def _K_computations(self, X, X2):
|
|
||||||
#params = self._get_params()
|
|
||||||
if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)):# and fast_array_equal(self._params_save , params)):
|
|
||||||
#self._X = X.copy()
|
|
||||||
#self._params_save = params.copy()
|
|
||||||
if X2 is None:
|
|
||||||
self._X2 = None
|
|
||||||
X = X / self.lengthscale
|
|
||||||
Xsquare = np.sum(np.square(X), 1)
|
|
||||||
self._K_dist2 = -2.*tdot(X) + (Xsquare[:, None] + Xsquare[None, :])
|
|
||||||
else:
|
|
||||||
self._X2 = X2.copy()
|
|
||||||
X = X / self.lengthscale
|
|
||||||
X2 = X2 / self.lengthscale
|
|
||||||
self._K_dist2 = -2.*np.dot(X, X2.T) + (np.sum(np.square(X), 1)[:, None] + np.sum(np.square(X2), 1)[None, :])
|
|
||||||
self._K_dvar = np.exp(-0.5 * self._K_dist2)
|
|
||||||
|
|
||||||
def _dL_dlengthscales_via_K(self, dL_dK, X, X2):
|
def _dL_dlengthscales_via_K(self, dL_dK, X, X2):
|
||||||
"""
|
"""
|
||||||
A helper function for update_gradients_* methods
|
A helper function for update_gradients_* methods
|
||||||
|
|
@ -301,19 +194,20 @@ class RBF(Kern):
|
||||||
|
|
||||||
if Z_changed or not fast_array_equal(mu, self._mu) or not fast_array_equal(S, self._S):
|
if Z_changed or not fast_array_equal(mu, self._mu) or not fast_array_equal(S, self._S):
|
||||||
# something's changed. recompute EVERYTHING
|
# something's changed. recompute EVERYTHING
|
||||||
|
l2 = self.lengthscale **2
|
||||||
|
|
||||||
# psi1
|
# psi1
|
||||||
self._psi1_denom = S[:, None, :] / self.lengthscale2 + 1.
|
self._psi1_denom = S[:, None, :] / l2 + 1.
|
||||||
self._psi1_dist = Z[None, :, :] - mu[:, None, :]
|
self._psi1_dist = Z[None, :, :] - mu[:, None, :]
|
||||||
self._psi1_dist_sq = np.square(self._psi1_dist) / self.lengthscale2 / self._psi1_denom
|
self._psi1_dist_sq = np.square(self._psi1_dist) / l2 / self._psi1_denom
|
||||||
self._psi1_exponent = -0.5 * np.sum(self._psi1_dist_sq + np.log(self._psi1_denom), -1)
|
self._psi1_exponent = -0.5 * np.sum(self._psi1_dist_sq + np.log(self._psi1_denom), -1)
|
||||||
self._psi1 = self.variance * np.exp(self._psi1_exponent)
|
self._psi1 = self.variance * np.exp(self._psi1_exponent)
|
||||||
|
|
||||||
# psi2
|
# psi2
|
||||||
self._psi2_denom = 2.*S[:, None, None, :] / self.lengthscale2 + 1. # N,M,M,Q
|
self._psi2_denom = 2.*S[:, None, None, :] / l2 + 1. # N,M,M,Q
|
||||||
self._psi2_mudist, self._psi2_mudist_sq, self._psi2_exponent, _ = self.weave_psi2(mu, self._psi2_Zhat)
|
self._psi2_mudist, self._psi2_mudist_sq, self._psi2_exponent, _ = self.weave_psi2(mu, self._psi2_Zhat)
|
||||||
# self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
|
# self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
|
||||||
# self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom)
|
# self._psi2_mudist_sq = np.square(self._psi2_mudist)/(l2*self._psi2_denom)
|
||||||
# self._psi2_exponent = np.sum(-self._psi2_Zdist_sq -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M,Q
|
# self._psi2_exponent = np.sum(-self._psi2_Zdist_sq -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M,Q
|
||||||
self._psi2 = np.square(self.variance) * np.exp(self._psi2_exponent) # N,M,M,Q
|
self._psi2 = np.square(self.variance) * np.exp(self._psi2_exponent) # N,M,M,Q
|
||||||
|
|
||||||
|
|
@ -332,11 +226,11 @@ class RBF(Kern):
|
||||||
psi2_Zdist_sq = self._psi2_Zdist_sq
|
psi2_Zdist_sq = self._psi2_Zdist_sq
|
||||||
_psi2_denom = self._psi2_denom.squeeze().reshape(N, self.input_dim)
|
_psi2_denom = self._psi2_denom.squeeze().reshape(N, self.input_dim)
|
||||||
half_log_psi2_denom = 0.5 * np.log(self._psi2_denom).squeeze().reshape(N, self.input_dim)
|
half_log_psi2_denom = 0.5 * np.log(self._psi2_denom).squeeze().reshape(N, self.input_dim)
|
||||||
variance_sq = float(np.square(self.variance))
|
variance_sq = np.float64(np.square(self.variance))
|
||||||
if self.ARD:
|
if self.ARD:
|
||||||
lengthscale2 = self.lengthscale2
|
lengthscale2 = self.lengthscale **2
|
||||||
else:
|
else:
|
||||||
lengthscale2 = np.ones(input_dim) * self.lengthscale2
|
lengthscale2 = np.ones(input_dim) * self.lengthscale2**2
|
||||||
code = """
|
code = """
|
||||||
double tmp;
|
double tmp;
|
||||||
|
|
||||||
|
|
@ -382,3 +276,7 @@ class RBF(Kern):
|
||||||
type_converters=weave.converters.blitz, **self.weave_options)
|
type_converters=weave.converters.blitz, **self.weave_options)
|
||||||
|
|
||||||
return mudist, mudist_sq, psi2_exponent, psi2
|
return mudist, mudist_sq, psi2_exponent, psi2
|
||||||
|
|
||||||
|
def input_sensitivity(self):
|
||||||
|
if self.ARD: return 1./self.lengthscale
|
||||||
|
else: return (1./self.lengthscale).repeat(self.input_dim)
|
||||||
|
|
|
||||||
|
|
@ -1,115 +0,0 @@
|
||||||
# Copyright (c) 2012, James Hensman and Andrew Gordon Wilson
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
from kernpart import Kernpart
|
|
||||||
import numpy as np
|
|
||||||
from ...core.parameterization import Param
|
|
||||||
|
|
||||||
class RBFCos(Kernpart):
|
|
||||||
def __init__(self,input_dim,variance=1.,frequencies=None,bandwidths=None,ARD=False):
|
|
||||||
self.input_dim = input_dim
|
|
||||||
self.name = 'rbfcos'
|
|
||||||
if self.input_dim>10:
|
|
||||||
print "Warning: the rbfcos kernel requires a lot of memory for high dimensional inputs"
|
|
||||||
self.ARD = ARD
|
|
||||||
|
|
||||||
#set the default frequencies and bandwidths, appropriate num_params
|
|
||||||
if ARD:
|
|
||||||
self.num_params = 2*self.input_dim + 1
|
|
||||||
if frequencies is not None:
|
|
||||||
frequencies = np.asarray(frequencies)
|
|
||||||
assert frequencies.size == self.input_dim, "bad number of frequencies"
|
|
||||||
else:
|
|
||||||
frequencies = np.ones(self.input_dim)
|
|
||||||
if bandwidths is not None:
|
|
||||||
bandwidths = np.asarray(bandwidths)
|
|
||||||
assert bandwidths.size == self.input_dim, "bad number of bandwidths"
|
|
||||||
else:
|
|
||||||
bandwidths = np.ones(self.input_dim)
|
|
||||||
else:
|
|
||||||
self.num_params = 3
|
|
||||||
if frequencies is not None:
|
|
||||||
frequencies = np.asarray(frequencies)
|
|
||||||
assert frequencies.size == 1, "Exactly one frequency needed for non-ARD kernel"
|
|
||||||
else:
|
|
||||||
frequencies = np.ones(1)
|
|
||||||
|
|
||||||
if bandwidths is not None:
|
|
||||||
bandwidths = np.asarray(bandwidths)
|
|
||||||
assert bandwidths.size == 1, "Exactly one bandwidth needed for non-ARD kernel"
|
|
||||||
else:
|
|
||||||
bandwidths = np.ones(1)
|
|
||||||
|
|
||||||
self.variance = Param('variance', variance)
|
|
||||||
self.frequencies = Param('frequencies', frequencies)
|
|
||||||
self.bandwidths = Param('bandwidths', bandwidths)
|
|
||||||
|
|
||||||
#initialise cache
|
|
||||||
self._X, self._X2 = np.empty(shape=(3,1))
|
|
||||||
|
|
||||||
# def _get_params(self):
|
|
||||||
# return np.hstack((self.variance,self.frequencies, self.bandwidths))
|
|
||||||
|
|
||||||
# def _set_params(self,x):
|
|
||||||
# assert x.size==(self.num_params)
|
|
||||||
# if self.ARD:
|
|
||||||
# self.variance = x[0]
|
|
||||||
# self.frequencies = x[1:1+self.input_dim]
|
|
||||||
# self.bandwidths = x[1+self.input_dim:]
|
|
||||||
# else:
|
|
||||||
# self.variance, self.frequencies, self.bandwidths = x
|
|
||||||
|
|
||||||
# def _get_param_names(self):
|
|
||||||
# if self.num_params == 3:
|
|
||||||
# return ['variance','frequency','bandwidth']
|
|
||||||
# else:
|
|
||||||
# return ['variance']+['frequency_%i'%i for i in range(self.input_dim)]+['bandwidth_%i'%i for i in range(self.input_dim)]
|
|
||||||
|
|
||||||
def K(self,X,X2,target):
|
|
||||||
self._K_computations(X,X2)
|
|
||||||
target += self.variance*self._dvar
|
|
||||||
|
|
||||||
def Kdiag(self,X,target):
|
|
||||||
np.add(target,self.variance,target)
|
|
||||||
|
|
||||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
|
||||||
self._K_computations(X,X2)
|
|
||||||
target[0] += np.sum(dL_dK*self._dvar)
|
|
||||||
if self.ARD:
|
|
||||||
for q in xrange(self.input_dim):
|
|
||||||
target[q+1] += -2.*np.pi*self.variance*np.sum(dL_dK*self._dvar*np.tan(2.*np.pi*self._dist[:,:,q]*self.frequencies[q])*self._dist[:,:,q])
|
|
||||||
target[q+1+self.input_dim] += -2.*np.pi**2*self.variance*np.sum(dL_dK*self._dvar*self._dist2[:,:,q])
|
|
||||||
else:
|
|
||||||
target[1] += -2.*np.pi*self.variance*np.sum(dL_dK*self._dvar*np.sum(np.tan(2.*np.pi*self._dist*self.frequencies)*self._dist,-1))
|
|
||||||
target[2] += -2.*np.pi**2*self.variance*np.sum(dL_dK*self._dvar*self._dist2.sum(-1))
|
|
||||||
|
|
||||||
|
|
||||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
|
||||||
target[0] += np.sum(dL_dKdiag)
|
|
||||||
|
|
||||||
def gradients_X(self,dL_dK,X,X2,target):
|
|
||||||
#TODO!!!
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def parameters_changed(self):
|
|
||||||
self._rbf_part = np.exp(-2.*np.pi**2*np.sum(self._dist2*self.bandwidths,-1))
|
|
||||||
self._cos_part = np.prod(np.cos(2.*np.pi*self._dist*self.frequencies),-1)
|
|
||||||
self._dvar = self._rbf_part*self._cos_part
|
|
||||||
|
|
||||||
def _K_computations(self,X,X2):
|
|
||||||
if not (np.all(X==self._X) and np.all(X2==self._X2)):
|
|
||||||
if X2 is None: X2 = X
|
|
||||||
self._X = X.copy()
|
|
||||||
self._X2 = X2.copy()
|
|
||||||
|
|
||||||
#do the distances: this will be high memory for large input_dim
|
|
||||||
#NB: we don't take the abs of the dist because cos is symmetric
|
|
||||||
self._dist = X[:,None,:] - X2[None,:,:]
|
|
||||||
self._dist2 = np.square(self._dist)
|
|
||||||
|
|
||||||
#ensure the next section is computed:
|
|
||||||
self._params = np.empty(self.num_params)
|
|
||||||
|
|
@ -6,12 +6,68 @@ from kern import Kern
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from ...core.parameterization import Param
|
from ...core.parameterization import Param
|
||||||
from ...core.parameterization.transformations import Logexp
|
from ...core.parameterization.transformations import Logexp
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
class Bias(Kern):
|
class Static(Kern):
|
||||||
def __init__(self,input_dim,variance=1.,name=None):
|
def __init__(self, input_dim, variance, name):
|
||||||
super(Bias, self).__init__(input_dim, name)
|
super(Static, self).__init__(input_dim, name)
|
||||||
self.variance = Param("variance", variance, Logexp())
|
self.variance = Param('variance', variance, Logexp())
|
||||||
self.add_parameter(self.variance)
|
self.add_parameters(self.variance)
|
||||||
|
|
||||||
|
def Kdiag(self, X):
|
||||||
|
ret = np.empty((X.shape[0],), dtype=np.float64)
|
||||||
|
ret[:] = self.variance
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def gradients_X(self, dL_dK, X, X2, target):
|
||||||
|
return np.zeros(X.shape)
|
||||||
|
|
||||||
|
def gradients_X_diag(self, dL_dKdiag, X, target):
|
||||||
|
return np.zeros(X.shape)
|
||||||
|
|
||||||
|
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||||
|
return np.zeros(Z.shape)
|
||||||
|
|
||||||
|
def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||||
|
return np.zeros(mu.shape), np.zeros(S.shape)
|
||||||
|
|
||||||
|
def psi0(self, Z, mu, S):
|
||||||
|
return self.Kdiag(mu)
|
||||||
|
|
||||||
|
def psi1(self, Z, mu, S, target):
|
||||||
|
return self.K(mu, Z)
|
||||||
|
|
||||||
|
def psi2(Z, mu, S):
|
||||||
|
K = self.K(mu, Z)
|
||||||
|
return K[:,:,None]*K[:,None,:] # NB. more efficient implementations on inherriting classes
|
||||||
|
|
||||||
|
|
||||||
|
class White(Static):
|
||||||
|
def __init__(self, input_dim, variance=1., name='white'):
|
||||||
|
super(White, self).__init__(input_dim, variance, name)
|
||||||
|
|
||||||
|
def K(self, X, X2=None):
|
||||||
|
if X2 is None:
|
||||||
|
return np.eye(X.shape[0])*self.variance
|
||||||
|
else:
|
||||||
|
return np.zeros((X.shape[0], X2.shape[0]))
|
||||||
|
|
||||||
|
def psi2(self, Z, mu, S, target):
|
||||||
|
return np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64)
|
||||||
|
|
||||||
|
def update_gradients_full(self, dL_dK, X):
|
||||||
|
self.variance.gradient = np.trace(dL_dK)
|
||||||
|
|
||||||
|
def update_gradients_diag(self, dL_dKdiag, X):
|
||||||
|
self.variance.gradient = dL_dKdiag.sum()
|
||||||
|
|
||||||
|
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||||
|
self.variance.gradient = np.trace(dL_dKmm) + dL_dpsi0.sum()
|
||||||
|
|
||||||
|
|
||||||
|
class Bias(Static):
|
||||||
|
def __init__(self, input_dim, variance=1., name='bias'):
|
||||||
|
super(Bias, self).__init__(input_dim, variance, name)
|
||||||
|
|
||||||
def K(self, X, X2=None):
|
def K(self, X, X2=None):
|
||||||
shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0])
|
shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0])
|
||||||
|
|
@ -19,34 +75,12 @@ class Bias(Kern):
|
||||||
ret[:] = self.variance
|
ret[:] = self.variance
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def Kdiag(self,X):
|
|
||||||
ret = np.empty((X.shape[0],), dtype=np.float64)
|
|
||||||
ret[:] = self.variance
|
|
||||||
return ret
|
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
self.variance.gradient = dL_dK.sum()
|
self.variance.gradient = dL_dK.sum()
|
||||||
|
|
||||||
def update_gradients_diag(self, dL_dKdiag, X):
|
def update_gradients_diag(self, dL_dKdiag, X):
|
||||||
self.variance.gradient = dL_dK.sum()
|
self.variance.gradient = dL_dK.sum()
|
||||||
|
|
||||||
def gradients_X(self, dL_dK,X, X2, target):
|
|
||||||
return np.zeros(X.shape)
|
|
||||||
|
|
||||||
def gradients_X_diag(self,dL_dKdiag,X,target):
|
|
||||||
return np.zeros(X.shape)
|
|
||||||
|
|
||||||
|
|
||||||
#---------------------------------------#
|
|
||||||
# PSI statistics #
|
|
||||||
#---------------------------------------#
|
|
||||||
|
|
||||||
def psi0(self, Z, mu, S):
|
|
||||||
return self.Kdiag(mu)
|
|
||||||
|
|
||||||
def psi1(self, Z, mu, S, target):
|
|
||||||
return self.K(mu, S)
|
|
||||||
|
|
||||||
def psi2(self, Z, mu, S, target):
|
def psi2(self, Z, mu, S, target):
|
||||||
ret = np.empty((mu.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64)
|
ret = np.empty((mu.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64)
|
||||||
ret[:] = self.variance**2
|
ret[:] = self.variance**2
|
||||||
|
|
@ -55,8 +89,3 @@ class Bias(Kern):
|
||||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||||
self.variance.gradient = dL_dKmm.sum() + dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum()
|
self.variance.gradient = dL_dKmm.sum() + dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum()
|
||||||
|
|
||||||
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
|
||||||
return np.zeros(Z.shape)
|
|
||||||
|
|
||||||
def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
|
||||||
return np.zeros(mu.shape), np.zeros(S.shape)
|
|
||||||
|
|
@ -32,6 +32,16 @@ class Stationary(Kern):
|
||||||
assert self.variance.size==1
|
assert self.variance.size==1
|
||||||
self.add_parameters(self.variance, self.lengthscale)
|
self.add_parameters(self.variance, self.lengthscale)
|
||||||
|
|
||||||
|
def K_of_r(self, r):
|
||||||
|
raise NotImplementedError, "implement the covaraiance functino and a fn of r to use this class"
|
||||||
|
|
||||||
|
def dK_dr(self, r):
|
||||||
|
raise NotImplementedError, "implement the covaraiance functino and a fn of r to use this class"
|
||||||
|
|
||||||
|
def K(self, X, X2=None):
|
||||||
|
r = self._scaled_dist(X, X2)
|
||||||
|
return self.K_of_r(r)
|
||||||
|
|
||||||
def _dist(self, X, X2):
|
def _dist(self, X, X2):
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
X2 = X
|
X2 = X
|
||||||
|
|
@ -50,11 +60,11 @@ class Stationary(Kern):
|
||||||
self.lengthscale.gradient = 0.
|
self.lengthscale.gradient = 0.
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
K = self.K(X, X2)
|
r = self._scaled_dist(X, X2)
|
||||||
self.variance.gradient = np.sum(K * dL_dK)/self.variance
|
K = self.K_of_r(r)
|
||||||
|
|
||||||
rinv = self._inv_dist(X, X2)
|
rinv = self._inv_dist(X, X2)
|
||||||
dL_dr = self.dK_dr(X, X2) * dL_dK
|
dL_dr = self.dK_dr(r) * dL_dK
|
||||||
x_xl3 = np.square(self._dist(X, X2)) / self.lengthscale**3
|
x_xl3 = np.square(self._dist(X, X2)) / self.lengthscale**3
|
||||||
|
|
||||||
if self.ARD:
|
if self.ARD:
|
||||||
|
|
@ -62,6 +72,8 @@ class Stationary(Kern):
|
||||||
else:
|
else:
|
||||||
self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum()
|
self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum()
|
||||||
|
|
||||||
|
self.variance.gradient = np.sum(K * dL_dK)/self.variance
|
||||||
|
|
||||||
def _inv_dist(self, X, X2=None):
|
def _inv_dist(self, X, X2=None):
|
||||||
dist = self._scaled_dist(X, X2)
|
dist = self._scaled_dist(X, X2)
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
|
|
@ -72,7 +84,8 @@ class Stationary(Kern):
|
||||||
return 1./np.where(dist != 0., dist, np.inf)
|
return 1./np.where(dist != 0., dist, np.inf)
|
||||||
|
|
||||||
def gradients_X(self, dL_dK, X, X2=None):
|
def gradients_X(self, dL_dK, X, X2=None):
|
||||||
dL_dr = self.dK_dr(X, X2) * dL_dK
|
r = self._scaled_dist(X, X2)
|
||||||
|
dL_dr = self.dK_dr(r) * dL_dK
|
||||||
invdist = self._inv_dist(X, X2)
|
invdist = self._inv_dist(X, X2)
|
||||||
ret = np.sum((invdist*dL_dr)[:,:,None]*self._dist(X, X2),1)/self.lengthscale**2
|
ret = np.sum((invdist*dL_dr)[:,:,None]*self._dist(X, X2),1)/self.lengthscale**2
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
|
|
@ -82,19 +95,18 @@ class Stationary(Kern):
|
||||||
def gradients_X_diag(self, dL_dKdiag, X):
|
def gradients_X_diag(self, dL_dKdiag, X):
|
||||||
return np.zeros(X.shape)
|
return np.zeros(X.shape)
|
||||||
|
|
||||||
|
def input_sensitivity(self):
|
||||||
|
return np.ones(self.input_dim)/self.lengthscale
|
||||||
|
|
||||||
class Exponential(Stationary):
|
class Exponential(Stationary):
|
||||||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Exponential'):
|
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Exponential'):
|
||||||
super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, name)
|
super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, name)
|
||||||
|
|
||||||
def K(self, X, X2=None):
|
def K_of_r(self, r):
|
||||||
dist = self._scaled_dist(X, X2)
|
return self.variance * np.exp(-0.5 * r)
|
||||||
return self.variance * np.exp(-0.5 * dist)
|
|
||||||
|
|
||||||
def dK_dr(self, X, X2):
|
def dK_dr(self, r):
|
||||||
return -0.5*self.K(X, X2)
|
return -0.5*self.K_of_r(r)
|
||||||
|
|
||||||
class Matern32(Stationary):
|
class Matern32(Stationary):
|
||||||
"""
|
"""
|
||||||
|
|
@ -109,13 +121,11 @@ class Matern32(Stationary):
|
||||||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Mat32'):
|
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Mat32'):
|
||||||
super(Matern32, self).__init__(input_dim, variance, lengthscale, ARD, name)
|
super(Matern32, self).__init__(input_dim, variance, lengthscale, ARD, name)
|
||||||
|
|
||||||
def K(self, X, X2=None):
|
def K_of_r(self, r):
|
||||||
dist = self._scaled_dist(X, X2)
|
return self.variance * (1. + np.sqrt(3.) * r) * np.exp(-np.sqrt(3.) * r)
|
||||||
return self.variance * (1. + np.sqrt(3.) * dist) * np.exp(-np.sqrt(3.) * dist)
|
|
||||||
|
|
||||||
def dK_dr(self, X, X2):
|
def dK_dr(self,r):
|
||||||
dist = self._scaled_dist(X, X2)
|
return -3.*self.variance*r*np.exp(-np.sqrt(3.)*r)
|
||||||
return -3.*self.variance*dist*np.exp(-np.sqrt(3.)*dist)
|
|
||||||
|
|
||||||
def Gram_matrix(self, F, F1, F2, lower, upper):
|
def Gram_matrix(self, F, F1, F2, lower, upper):
|
||||||
"""
|
"""
|
||||||
|
|
@ -152,13 +162,13 @@ class Matern52(Stationary):
|
||||||
|
|
||||||
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
|
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||||
"""
|
"""
|
||||||
|
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Mat52'):
|
||||||
|
super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, name)
|
||||||
|
|
||||||
def K(self, X, X2=None):
|
def K_of_r(self, r):
|
||||||
r = self._scaled_dist(X, X2)
|
|
||||||
return self.variance*(1+np.sqrt(5.)*r+5./3*r**2)*np.exp(-np.sqrt(5.)*r)
|
return self.variance*(1+np.sqrt(5.)*r+5./3*r**2)*np.exp(-np.sqrt(5.)*r)
|
||||||
|
|
||||||
def dK_dr(self, X, X2):
|
def dK_dr(self, r):
|
||||||
r = self._scaled_dist(X, X2)
|
|
||||||
return self.variance*(10./3*r -5.*r -5.*np.sqrt(5.)/3*r**2)*np.exp(-np.sqrt(5.)*r)
|
return self.variance*(10./3*r -5.*r -5.*np.sqrt(5.)/3*r**2)*np.exp(-np.sqrt(5.)*r)
|
||||||
|
|
||||||
def Gram_matrix(self,F,F1,F2,F3,lower,upper):
|
def Gram_matrix(self,F,F1,F2,F3,lower,upper):
|
||||||
|
|
@ -193,19 +203,55 @@ class Matern52(Stationary):
|
||||||
return(1./self.variance* (G_coef*G + orig + orig2))
|
return(1./self.variance* (G_coef*G + orig + orig2))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class ExpQuad(Stationary):
|
class ExpQuad(Stationary):
|
||||||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='ExpQuad'):
|
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='ExpQuad'):
|
||||||
super(ExpQuad, self).__init__(input_dim, variance, lengthscale, ARD, name)
|
super(ExpQuad, self).__init__(input_dim, variance, lengthscale, ARD, name)
|
||||||
|
|
||||||
def K(self, X, X2=None):
|
def K_of_r(self, r):
|
||||||
r = self._scaled_dist(X, X2)
|
|
||||||
return self.variance * np.exp(-0.5 * r**2)
|
return self.variance * np.exp(-0.5 * r**2)
|
||||||
|
|
||||||
def dK_dr(self, X, X2):
|
def dK_dr(self, r):
|
||||||
dist = self._scaled_dist(X, X2)
|
return -r*self.K_of_r(r)
|
||||||
return -dist*self.K(X, X2)
|
|
||||||
|
class Cosine(Stationary):
|
||||||
|
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Cosine'):
|
||||||
|
super(Cosine, self).__init__(input_dim, variance, lengthscale, ARD, name)
|
||||||
|
|
||||||
|
def K_of_r(self, r):
|
||||||
|
return self.variance * np.cos(r)
|
||||||
|
|
||||||
|
def dK_dr(self, r):
|
||||||
|
return -self.variance * np.sin(r)
|
||||||
|
|
||||||
|
|
||||||
|
class RatQuad(Stationary):
|
||||||
|
"""
|
||||||
|
Rational Quadratic Kernel
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
|
||||||
|
k(r) = \sigma^2 \\bigg( 1 + \\frac{r^2}{2} \\bigg)^{- \\alpha}
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def __init__(self, input_dim, variance=1., lengthscale=None, power=2., ARD=False, name='ExpQuad'):
|
||||||
|
super(RatQuad, self).__init__(input_dim, variance, lengthscale, ARD, name)
|
||||||
|
self.power = Param('power', power, Logexp())
|
||||||
|
self.add_parameters(self.power)
|
||||||
|
|
||||||
|
def K_of_r(self, r):
|
||||||
|
return self.variance*(1. + r**2/2.)**(-self.power)
|
||||||
|
|
||||||
|
def dK_dr(self, r):
|
||||||
|
return -self.variance*self.power*r*(1. + r**2/2)**(-self.power - 1.)
|
||||||
|
|
||||||
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
|
super(RatQuad, self).update_gradients_full(dL_dK, X, X2)
|
||||||
|
r = self._scaled_dist(X, X2)
|
||||||
|
r2 = r**2
|
||||||
|
dpow = -2.**self.power*(r2 + 2.)**(-self.power)*np.log(0.5*(r2+2.))
|
||||||
|
self.power.gradient = np.sum(dL_dK*dpow)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,78 +0,0 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
from kern import Kern
|
|
||||||
import numpy as np
|
|
||||||
from ...core.parameterization import Param
|
|
||||||
from ...core.parameterization.transformations import Logexp
|
|
||||||
|
|
||||||
class White(Kern):
|
|
||||||
"""
|
|
||||||
White noise kernel.
|
|
||||||
|
|
||||||
:param input_dim: the number of input dimensions
|
|
||||||
:type input_dim: int
|
|
||||||
:param variance:
|
|
||||||
:type variance: float
|
|
||||||
"""
|
|
||||||
def __init__(self,input_dim,variance=1., name='white'):
|
|
||||||
super(White, self).__init__(input_dim, name)
|
|
||||||
self.input_dim = input_dim
|
|
||||||
self.variance = Param('variance', variance, Logexp())
|
|
||||||
self.add_parameters(self.variance)
|
|
||||||
|
|
||||||
def K(self, X, X2=None):
|
|
||||||
if X2 is None:
|
|
||||||
return np.eye(X.shape[0])*self.variance
|
|
||||||
else:
|
|
||||||
return np.zeros((X.shape[0], X2.shape[0]))
|
|
||||||
|
|
||||||
def Kdiag(self,X):
|
|
||||||
ret = np.ones(X.shape[0])
|
|
||||||
ret[:] = self.variance
|
|
||||||
return ret
|
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X):
|
|
||||||
self.variance.gradient = np.trace(dL_dK)
|
|
||||||
|
|
||||||
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
|
||||||
self.variance.gradient = np.trace(dL_dKmm) + np.sum(dL_dKdiag)
|
|
||||||
|
|
||||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def gradients_X(self,dL_dK,X,X2):
|
|
||||||
return np.zeros_like(X)
|
|
||||||
|
|
||||||
def psi0(self,Z,mu,S,target):
|
|
||||||
pass # target += self.variance
|
|
||||||
|
|
||||||
def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
|
|
||||||
pass # target += dL_dpsi0.sum()
|
|
||||||
|
|
||||||
def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def psi1(self,Z,mu,S,target):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def dpsi1_dtheta(self,dL_dpsi1,Z,mu,S,target):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def psi2(self,Z,mu,S,target):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
|
|
||||||
pass
|
|
||||||
|
|
@ -2,9 +2,7 @@
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy import stats, special
|
from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
|
||||||
import scipy as sp
|
|
||||||
from GPy.util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
|
|
||||||
import link_functions
|
import link_functions
|
||||||
from likelihood import Likelihood
|
from likelihood import Likelihood
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -8,9 +8,9 @@ from ..core import SparseGP
|
||||||
from ..likelihoods import Gaussian
|
from ..likelihoods import Gaussian
|
||||||
from ..inference.optimization import SCG
|
from ..inference.optimization import SCG
|
||||||
from ..util import linalg
|
from ..util import linalg
|
||||||
from ..core.parameterization.variational import Normal
|
from ..core.parameterization.variational import NormalPosterior, NormalPrior
|
||||||
|
|
||||||
class BayesianGPLVM(SparseGP, GPLVM):
|
class BayesianGPLVM(SparseGP):
|
||||||
"""
|
"""
|
||||||
Bayesian Gaussian Process Latent Variable Model
|
Bayesian Gaussian Process Latent Variable Model
|
||||||
|
|
||||||
|
|
@ -25,11 +25,13 @@ class BayesianGPLVM(SparseGP, GPLVM):
|
||||||
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
|
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
|
||||||
Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', **kwargs):
|
Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', **kwargs):
|
||||||
if X == None:
|
if X == None:
|
||||||
X = self.initialise_latent(init, input_dim, Y)
|
from ..util.initialization import initialize_latent
|
||||||
|
X = initialize_latent(init, input_dim, Y)
|
||||||
self.init = init
|
self.init = init
|
||||||
|
|
||||||
if X_variance is None:
|
if X_variance is None:
|
||||||
X_variance = np.clip((np.ones_like(X) * 0.5) + .01 * np.random.randn(*X.shape), 0.001, 1)
|
X_variance = np.random.uniform(0,.1,X.shape)
|
||||||
|
|
||||||
|
|
||||||
if Z is None:
|
if Z is None:
|
||||||
Z = np.random.permutation(X.copy())[:num_inducing]
|
Z = np.random.permutation(X.copy())[:num_inducing]
|
||||||
|
|
@ -40,10 +42,13 @@ class BayesianGPLVM(SparseGP, GPLVM):
|
||||||
|
|
||||||
if likelihood is None:
|
if likelihood is None:
|
||||||
likelihood = Gaussian()
|
likelihood = Gaussian()
|
||||||
self.q = Normal(X, X_variance)
|
|
||||||
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, X_variance, name, **kwargs)
|
|
||||||
self.add_parameter(self.q, index=0)
|
self.variational_prior = NormalPrior()
|
||||||
#self.ensure_default_constraints()
|
X = NormalPosterior(X, X_variance)
|
||||||
|
|
||||||
|
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
|
||||||
|
self.add_parameter(self.X, index=0)
|
||||||
|
|
||||||
def _getstate(self):
|
def _getstate(self):
|
||||||
"""
|
"""
|
||||||
|
|
@ -57,24 +62,15 @@ class BayesianGPLVM(SparseGP, GPLVM):
|
||||||
self.init = state.pop()
|
self.init = state.pop()
|
||||||
SparseGP._setstate(self, state)
|
SparseGP._setstate(self, state)
|
||||||
|
|
||||||
def KL_divergence(self):
|
|
||||||
var_mean = np.square(self.X).sum()
|
|
||||||
var_S = np.sum(self.X_variance - np.log(self.X_variance))
|
|
||||||
return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.num_data
|
|
||||||
|
|
||||||
def parameters_changed(self):
|
def parameters_changed(self):
|
||||||
super(BayesianGPLVM, self).parameters_changed()
|
super(BayesianGPLVM, self).parameters_changed()
|
||||||
|
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
|
||||||
|
|
||||||
self._log_marginal_likelihood -= self.KL_divergence()
|
self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_q_variational(posterior_variational=self.X, Z=self.Z, **self.grad_dict)
|
||||||
dL_dmu, dL_dS = self.kern.gradients_q_variational(posterior_variational=self.q, Z=self.Z, **self.grad_dict)
|
|
||||||
|
|
||||||
# dL:
|
# update for the KL divergence
|
||||||
self.q.mean.gradient = dL_dmu
|
self.variational_prior.update_gradients_KL(self.X)
|
||||||
self.q.variance.gradient = dL_dS
|
|
||||||
|
|
||||||
# dKL:
|
|
||||||
self.q.mean.gradient -= self.X
|
|
||||||
self.q.variance.gradient -= (1. - (1. / (self.X_variance))) * 0.5
|
|
||||||
|
|
||||||
def plot_latent(self, plot_inducing=True, *args, **kwargs):
|
def plot_latent(self, plot_inducing=True, *args, **kwargs):
|
||||||
"""
|
"""
|
||||||
|
|
@ -147,6 +143,7 @@ class BayesianGPLVM(SparseGP, GPLVM):
|
||||||
"""
|
"""
|
||||||
See GPy.plotting.matplot_dep.dim_reduction_plots.plot_steepest_gradient_map
|
See GPy.plotting.matplot_dep.dim_reduction_plots.plot_steepest_gradient_map
|
||||||
"""
|
"""
|
||||||
|
import sys
|
||||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||||
from ..plotting.matplot_dep import dim_reduction_plots
|
from ..plotting.matplot_dep import dim_reduction_plots
|
||||||
|
|
||||||
|
|
@ -168,12 +165,12 @@ class BayesianGPLVMWithMissingData(BayesianGPLVM):
|
||||||
dL_dmu, dL_dS = self.dL_dmuS()
|
dL_dmu, dL_dS = self.dL_dmuS()
|
||||||
|
|
||||||
# dL:
|
# dL:
|
||||||
self.q.mean.gradient = dL_dmu
|
self.X.mean.gradient = dL_dmu
|
||||||
self.q.variance.gradient = dL_dS
|
self.X.variance.gradient = dL_dS
|
||||||
|
|
||||||
# dKL:
|
# dKL:
|
||||||
self.q.mean.gradient -= self.X
|
self.X.mean.gradient -= self.X.mean
|
||||||
self.q.variance.gradient -= (1. - (1. / (self.X_variance))) * 0.5
|
self.X.variance.gradient -= (1. - (1. / (self.X.variance))) * 0.5
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
|
||||||
|
|
@ -28,28 +28,20 @@ class GPLVM(GP):
|
||||||
:type init: 'PCA'|'random'
|
:type init: 'PCA'|'random'
|
||||||
"""
|
"""
|
||||||
if X is None:
|
if X is None:
|
||||||
X = self.initialise_latent(init, input_dim, Y)
|
from ..util.initialization import initialize_latent
|
||||||
|
X = initialize_latent(init, input_dim, Y)
|
||||||
if kernel is None:
|
if kernel is None:
|
||||||
kernel = kern.rbf(input_dim, ARD=input_dim > 1) + kern.bias(input_dim, np.exp(-2))
|
kernel = kern.RBF(input_dim, ARD=input_dim > 1) + kern.Bias(input_dim, np.exp(-2))
|
||||||
|
|
||||||
likelihood = Gaussian()
|
likelihood = Gaussian()
|
||||||
|
|
||||||
super(GPLVM, self).__init__(X, Y, kernel, likelihood, name='GPLVM')
|
super(GPLVM, self).__init__(X, Y, kernel, likelihood, name='GPLVM')
|
||||||
self.X = Param('X', X)
|
self.X = Param('latent_mean', X)
|
||||||
self.add_parameter(self.X, index=0)
|
self.add_parameter(self.X, index=0)
|
||||||
|
|
||||||
def initialise_latent(self, init, input_dim, Y):
|
|
||||||
Xr = np.random.randn(Y.shape[0], input_dim)
|
|
||||||
if init == 'PCA':
|
|
||||||
PC = PCA(Y, input_dim)[0]
|
|
||||||
Xr[:PC.shape[0], :PC.shape[1]] = PC
|
|
||||||
else:
|
|
||||||
pass
|
|
||||||
return Xr
|
|
||||||
|
|
||||||
def parameters_changed(self):
|
def parameters_changed(self):
|
||||||
GP.parameters_changed(self)
|
super(GPLVM, self).parameters_changed()
|
||||||
self.X.gradient = self.kern.gradients_X(self.posterior.dL_dK, self.X)
|
self.X.gradient = self.kern.gradients_X(self._dL_dK, self.X, None)
|
||||||
|
|
||||||
def _getstate(self):
|
def _getstate(self):
|
||||||
return GP._getstate(self)
|
return GP._getstate(self)
|
||||||
|
|
@ -79,7 +71,8 @@ class GPLVM(GP):
|
||||||
pb.plot(mu[:, 0], mu[:, 1], 'k', linewidth=1.5)
|
pb.plot(mu[:, 0], mu[:, 1], 'k', linewidth=1.5)
|
||||||
|
|
||||||
def plot_latent(self, *args, **kwargs):
|
def plot_latent(self, *args, **kwargs):
|
||||||
return util.plot_latent.plot_latent(self, *args, **kwargs)
|
from ..plotting.matplot_dep import dim_reduction_plots
|
||||||
|
|
||||||
|
return dim_reduction_plots.plot_latent(self, *args, **kwargs)
|
||||||
def plot_magnification(self, *args, **kwargs):
|
def plot_magnification(self, *args, **kwargs):
|
||||||
return util.plot_latent.plot_magnification(self, *args, **kwargs)
|
return util.plot_latent.plot_magnification(self, *args, **kwargs)
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ from ..core import SparseGP
|
||||||
from .. import likelihoods
|
from .. import likelihoods
|
||||||
from .. import kern
|
from .. import kern
|
||||||
from ..inference.latent_function_inference import VarDTC
|
from ..inference.latent_function_inference import VarDTC
|
||||||
|
from ..util.misc import param_to_array
|
||||||
|
|
||||||
class SparseGPRegression(SparseGP):
|
class SparseGPRegression(SparseGP):
|
||||||
"""
|
"""
|
||||||
|
|
@ -33,18 +34,18 @@ class SparseGPRegression(SparseGP):
|
||||||
|
|
||||||
# kern defaults to rbf (plus white for stability)
|
# kern defaults to rbf (plus white for stability)
|
||||||
if kernel is None:
|
if kernel is None:
|
||||||
kernel = kern.rbf(input_dim)# + kern.white(input_dim, variance=1e-3)
|
kernel = kern.RBF(input_dim)# + kern.white(input_dim, variance=1e-3)
|
||||||
|
|
||||||
# Z defaults to a subset of the data
|
# Z defaults to a subset of the data
|
||||||
if Z is None:
|
if Z is None:
|
||||||
i = np.random.permutation(num_data)[:min(num_inducing, num_data)]
|
i = np.random.permutation(num_data)[:min(num_inducing, num_data)]
|
||||||
Z = X[i].copy()
|
Z = param_to_array(X)[i].copy()
|
||||||
else:
|
else:
|
||||||
assert Z.shape[1] == input_dim
|
assert Z.shape[1] == input_dim
|
||||||
|
|
||||||
likelihood = likelihoods.Gaussian()
|
likelihood = likelihoods.Gaussian()
|
||||||
|
|
||||||
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, X_variance=X_variance, inference_method=VarDTC())
|
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method=VarDTC())
|
||||||
|
|
||||||
def _getstate(self):
|
def _getstate(self):
|
||||||
return SparseGP._getstate(self)
|
return SparseGP._getstate(self)
|
||||||
|
|
|
||||||
|
|
@ -9,8 +9,41 @@ from matplotlib.transforms import offset_copy
|
||||||
from ...kern import Linear
|
from ...kern import Linear
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def add_bar_labels(fig, ax, bars, bottom=0):
|
||||||
|
transOffset = offset_copy(ax.transData, fig=fig,
|
||||||
|
x=0., y= -2., units='points')
|
||||||
|
transOffsetUp = offset_copy(ax.transData, fig=fig,
|
||||||
|
x=0., y=1., units='points')
|
||||||
|
for bar in bars:
|
||||||
|
for i, [patch, num] in enumerate(zip(bar.patches, np.arange(len(bar.patches)))):
|
||||||
|
if len(bottom) == len(bar): b = bottom[i]
|
||||||
|
else: b = bottom
|
||||||
|
height = patch.get_height() + b
|
||||||
|
xi = patch.get_x() + patch.get_width() / 2.
|
||||||
|
va = 'top'
|
||||||
|
c = 'w'
|
||||||
|
t = TextPath((0, 0), "${xi}$".format(xi=xi), rotation=0, usetex=True, ha='center')
|
||||||
|
transform = transOffset
|
||||||
|
if patch.get_extents().height <= t.get_extents().height + 3:
|
||||||
|
va = 'bottom'
|
||||||
|
c = 'k'
|
||||||
|
transform = transOffsetUp
|
||||||
|
ax.text(xi, height, "${xi}$".format(xi=int(num)), color=c, rotation=0, ha='center', va=va, transform=transform)
|
||||||
|
|
||||||
|
ax.set_xticks([])
|
||||||
|
|
||||||
|
|
||||||
|
def plot_bars(fig, ax, x, ard_params, color, name, bottom=0):
|
||||||
|
from ...util.misc import param_to_array
|
||||||
|
return ax.bar(left=x, height=param_to_array(ard_params), width=.8,
|
||||||
|
bottom=bottom, align='center',
|
||||||
|
color=color, edgecolor='k', linewidth=1.2,
|
||||||
|
label=name.replace("_"," "))
|
||||||
|
|
||||||
def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):
|
def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):
|
||||||
"""If an ARD kernel is present, plot a bar representation using matplotlib
|
"""
|
||||||
|
If an ARD kernel is present, plot a bar representation using matplotlib
|
||||||
|
|
||||||
:param fignum: figure number of the plot
|
:param fignum: figure number of the plot
|
||||||
:param ax: matplotlib axis to plot on
|
:param ax: matplotlib axis to plot on
|
||||||
|
|
@ -24,50 +57,27 @@ def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):
|
||||||
ax = fig.add_subplot(111)
|
ax = fig.add_subplot(111)
|
||||||
else:
|
else:
|
||||||
fig = ax.figure
|
fig = ax.figure
|
||||||
|
|
||||||
|
if title is None:
|
||||||
|
ax.set_title('ARD parameters, %s kernel' % kernel.name)
|
||||||
|
else:
|
||||||
|
ax.set_title(title)
|
||||||
|
|
||||||
Tango.reset()
|
Tango.reset()
|
||||||
xticklabels = []
|
|
||||||
bars = []
|
bars = []
|
||||||
x0 = 0
|
|
||||||
#for p in kernel._parameters_:
|
ard_params = np.atleast_2d(kernel.input_sensitivity())
|
||||||
p = kernel
|
bottom = 0
|
||||||
c = Tango.nextMedium()
|
x = np.arange(kernel.input_dim)
|
||||||
if hasattr(p, 'ARD') and p.ARD:
|
|
||||||
if title is None:
|
for i in range(ard_params.shape[-1]):
|
||||||
ax.set_title('ARD parameters, %s kernel' % p.name)
|
c = Tango.nextMedium()
|
||||||
else:
|
bars.append(plot_bars(fig, ax, x, ard_params[:,i], c, kernel._parameters_[i].name, bottom=bottom))
|
||||||
ax.set_title(title)
|
bottom += ard_params[:,i]
|
||||||
if isinstance(p, Linear):
|
|
||||||
ard_params = p.variances
|
ax.set_xlim(-.5, kernel.input_dim - .5)
|
||||||
else:
|
add_bar_labels(fig, ax, [bars[-1]], bottom=bottom-ard_params[:,i])
|
||||||
ard_params = 1. / p.lengthscale
|
|
||||||
x = np.arange(x0, x0 + len(ard_params))
|
|
||||||
from ...util.misc import param_to_array
|
|
||||||
bars.append(ax.bar(x, param_to_array(ard_params), align='center', color=c, edgecolor='k', linewidth=1.2, label=p.name.replace("_"," ")))
|
|
||||||
xticklabels.extend([r"$\mathrm{{{name}}}\ {x}$".format(name=p.name, x=i) for i in np.arange(len(ard_params))])
|
|
||||||
x0 += len(ard_params)
|
|
||||||
x = np.arange(x0)
|
|
||||||
transOffset = offset_copy(ax.transData, fig=fig,
|
|
||||||
x=0., y= -2., units='points')
|
|
||||||
transOffsetUp = offset_copy(ax.transData, fig=fig,
|
|
||||||
x=0., y=1., units='points')
|
|
||||||
for bar in bars:
|
|
||||||
for patch, num in zip(bar.patches, np.arange(len(bar.patches))):
|
|
||||||
height = patch.get_height()
|
|
||||||
xi = patch.get_x() + patch.get_width() / 2.
|
|
||||||
va = 'top'
|
|
||||||
c = 'w'
|
|
||||||
t = TextPath((0, 0), "${xi}$".format(xi=xi), rotation=0, usetex=True, ha='center')
|
|
||||||
transform = transOffset
|
|
||||||
if patch.get_extents().height <= t.get_extents().height + 3:
|
|
||||||
va = 'bottom'
|
|
||||||
c = 'k'
|
|
||||||
transform = transOffsetUp
|
|
||||||
ax.text(xi, height, "${xi}$".format(xi=int(num)), color=c, rotation=0, ha='center', va=va, transform=transform)
|
|
||||||
# for xi, t in zip(x, xticklabels):
|
|
||||||
# ax.text(xi, maxi / 2, t, rotation=90, ha='center', va='center')
|
|
||||||
# ax.set_xticklabels(xticklabels, rotation=17)
|
|
||||||
ax.set_xticks([])
|
|
||||||
ax.set_xlim(-.5, x0 - .5)
|
|
||||||
if legend:
|
if legend:
|
||||||
if title is '':
|
if title is '':
|
||||||
mode = 'expand'
|
mode = 'expand'
|
||||||
|
|
|
||||||
|
|
@ -58,7 +58,9 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
||||||
ax = fig.add_subplot(111)
|
ax = fig.add_subplot(111)
|
||||||
|
|
||||||
X, Y = param_to_array(model.X, model.Y)
|
X, Y = param_to_array(model.X, model.Y)
|
||||||
if model.has_uncertain_inputs(): X_variance = model.X_variance
|
if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs(): X_variance = model.X_variance
|
||||||
|
|
||||||
|
if hasattr(model, 'Z'): Z = param_to_array(model.Z)
|
||||||
|
|
||||||
#work out what the inputs are for plotting (1D or 2D)
|
#work out what the inputs are for plotting (1D or 2D)
|
||||||
fixed_dims = np.array([i for i,v in fixed_inputs])
|
fixed_dims = np.array([i for i,v in fixed_inputs])
|
||||||
|
|
@ -68,8 +70,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
||||||
if len(free_dims) == 1:
|
if len(free_dims) == 1:
|
||||||
|
|
||||||
#define the frame on which to plot
|
#define the frame on which to plot
|
||||||
resolution = resolution or 200
|
Xnew, xmin, xmax = x_frame1D(X[:,free_dims], plot_limits=plot_limits, resolution=resolution or 200)
|
||||||
Xnew, xmin, xmax = x_frame1D(X[:,free_dims], plot_limits=plot_limits)
|
|
||||||
Xgrid = np.empty((Xnew.shape[0],model.input_dim))
|
Xgrid = np.empty((Xnew.shape[0],model.input_dim))
|
||||||
Xgrid[:,free_dims] = Xnew
|
Xgrid[:,free_dims] = Xnew
|
||||||
for i,v in fixed_inputs:
|
for i,v in fixed_inputs:
|
||||||
|
|
@ -97,10 +98,10 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
||||||
|
|
||||||
|
|
||||||
#add error bars for uncertain (if input uncertainty is being modelled)
|
#add error bars for uncertain (if input uncertainty is being modelled)
|
||||||
if hasattr(model,"has_uncertain_inputs") and model.has_uncertain_inputs():
|
#if hasattr(model,"has_uncertain_inputs") and model.has_uncertain_inputs():
|
||||||
ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(),
|
# ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(),
|
||||||
xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()),
|
# xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()),
|
||||||
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
|
# ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
|
||||||
|
|
||||||
|
|
||||||
#set the limits of the plot to some sensible values
|
#set the limits of the plot to some sensible values
|
||||||
|
|
@ -112,7 +113,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
||||||
#add inducing inputs (if a sparse model is used)
|
#add inducing inputs (if a sparse model is used)
|
||||||
if hasattr(model,"Z"):
|
if hasattr(model,"Z"):
|
||||||
#Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
|
#Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
|
||||||
Zu = param_to_array(model.Z[:,free_dims])
|
Zu = Z[:,free_dims]
|
||||||
z_height = ax.get_ylim()[0]
|
z_height = ax.get_ylim()[0]
|
||||||
ax.plot(Zu, np.zeros_like(Zu) + z_height, 'r|', mew=1.5, markersize=12)
|
ax.plot(Zu, np.zeros_like(Zu) + z_height, 'r|', mew=1.5, markersize=12)
|
||||||
|
|
||||||
|
|
@ -136,7 +137,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
||||||
Y = Y
|
Y = Y
|
||||||
else:
|
else:
|
||||||
m, _, _, _ = model.predict(Xgrid)
|
m, _, _, _ = model.predict(Xgrid)
|
||||||
Y = model.data
|
Y = Y
|
||||||
for d in which_data_ycols:
|
for d in which_data_ycols:
|
||||||
m_d = m[:,d].reshape(resolution, resolution).T
|
m_d = m[:,d].reshape(resolution, resolution).T
|
||||||
ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
|
ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
|
||||||
|
|
@ -152,7 +153,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
||||||
#add inducing inputs (if a sparse model is used)
|
#add inducing inputs (if a sparse model is used)
|
||||||
if hasattr(model,"Z"):
|
if hasattr(model,"Z"):
|
||||||
#Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
|
#Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
|
||||||
Zu = model.Z[:,free_dims]
|
Zu = Z[:,free_dims]
|
||||||
ax.plot(Zu[:,free_dims[0]], Zu[:,free_dims[1]], 'wo')
|
ax.plot(Zu[:,free_dims[0]], Zu[:,free_dims[1]], 'wo')
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,11 @@ class Test(unittest.TestCase):
|
||||||
self.assertListEqual(self.param_index[two].tolist(), [0,3])
|
self.assertListEqual(self.param_index[two].tolist(), [0,3])
|
||||||
self.assertListEqual(self.param_index[one].tolist(), [1])
|
self.assertListEqual(self.param_index[one].tolist(), [1])
|
||||||
|
|
||||||
|
def test_shift_right(self):
|
||||||
|
self.param_index.shift_right(5, 2)
|
||||||
|
self.assertListEqual(self.param_index[three].tolist(), [2,4,9])
|
||||||
|
self.assertListEqual(self.param_index[two].tolist(), [0,7])
|
||||||
|
self.assertListEqual(self.param_index[one].tolist(), [3])
|
||||||
|
|
||||||
def test_index_view(self):
|
def test_index_view(self):
|
||||||
#=======================================================================
|
#=======================================================================
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@
|
||||||
import unittest
|
import unittest
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import GPy
|
import GPy
|
||||||
|
import sys
|
||||||
|
|
||||||
verbose = True
|
verbose = True
|
||||||
|
|
||||||
|
|
@ -14,106 +15,223 @@ except ImportError:
|
||||||
SYMPY_AVAILABLE=False
|
SYMPY_AVAILABLE=False
|
||||||
|
|
||||||
|
|
||||||
class KernelTests(unittest.TestCase):
|
class Kern_check_model(GPy.core.Model):
|
||||||
def test_kerneltie(self):
|
"""
|
||||||
K = GPy.kern.rbf(5, ARD=True)
|
This is a dummy model class used as a base class for checking that the
|
||||||
K.tie_params('.*[01]')
|
gradients of a given kernel are implemented correctly. It enables
|
||||||
K.constrain_fixed('2')
|
checkgrad() to be called independently on a kernel.
|
||||||
X = np.random.rand(5,5)
|
"""
|
||||||
Y = np.ones((5,1))
|
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||||
m = GPy.models.GPRegression(X,Y,K)
|
GPy.core.Model.__init__(self, 'kernel_test_model')
|
||||||
self.assertTrue(m.checkgrad())
|
if kernel==None:
|
||||||
|
kernel = GPy.kern.RBF(1)
|
||||||
|
if X is None:
|
||||||
|
X = np.random.randn(20, kernel.input_dim)
|
||||||
|
if dL_dK is None:
|
||||||
|
if X2 is None:
|
||||||
|
dL_dK = np.ones((X.shape[0], X.shape[0]))
|
||||||
|
else:
|
||||||
|
dL_dK = np.ones((X.shape[0], X2.shape[0]))
|
||||||
|
|
||||||
def test_rbfkernel(self):
|
self.kernel = kernel
|
||||||
kern = GPy.kern.rbf(5)
|
self.X = GPy.core.parameterization.Param('X',X)
|
||||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
self.X2 = X2
|
||||||
|
self.dL_dK = dL_dK
|
||||||
|
|
||||||
def test_rbf_sympykernel(self):
|
def is_positive_definite(self):
|
||||||
if SYMPY_AVAILABLE:
|
v = np.linalg.eig(self.kernel.K(self.X))[0]
|
||||||
kern = GPy.kern.rbf_sympy(5)
|
if any(v<-10*sys.float_info.epsilon):
|
||||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
return False
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
|
||||||
def test_eq_sympykernel(self):
|
def log_likelihood(self):
|
||||||
if SYMPY_AVAILABLE:
|
return np.sum(self.dL_dK*self.kernel.K(self.X, self.X2))
|
||||||
kern = GPy.kern.eq_sympy(5, 3)
|
|
||||||
self.assertTrue(GPy.kern.kern_test(kern, output_ind=4, verbose=verbose))
|
|
||||||
|
|
||||||
def test_ode1_eqkernel(self):
|
class Kern_check_dK_dtheta(Kern_check_model):
|
||||||
if SYMPY_AVAILABLE:
|
"""
|
||||||
kern = GPy.kern.ode1_eq(3)
|
This class allows gradient checks for the gradient of a kernel with
|
||||||
self.assertTrue(GPy.kern.kern_test(kern, output_ind=1, verbose=verbose, X_positive=True))
|
respect to parameters.
|
||||||
|
"""
|
||||||
|
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||||
|
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
|
||||||
|
self.add_parameter(self.kernel)
|
||||||
|
|
||||||
def test_rbf_invkernel(self):
|
def parameters_changed(self):
|
||||||
kern = GPy.kern.rbf_inv(5)
|
return self.kernel.update_gradients_full(self.dL_dK, self.X, self.X2)
|
||||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
|
||||||
|
|
||||||
def test_Matern32kernel(self):
|
|
||||||
kern = GPy.kern.Matern32(5)
|
|
||||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
|
||||||
|
|
||||||
def test_Matern52kernel(self):
|
class Kern_check_dKdiag_dtheta(Kern_check_model):
|
||||||
kern = GPy.kern.Matern52(5)
|
"""
|
||||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
This class allows gradient checks of the gradient of the diagonal of a
|
||||||
|
kernel with respect to the parameters.
|
||||||
|
"""
|
||||||
|
def __init__(self, kernel=None, dL_dK=None, X=None):
|
||||||
|
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
|
||||||
|
self.add_parameter(self.kernel)
|
||||||
|
|
||||||
def test_linearkernel(self):
|
def parameters_changed(self):
|
||||||
kern = GPy.kern.linear(5)
|
self.kernel.update_gradients_diag(self.dL_dK, self.X)
|
||||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
|
||||||
|
|
||||||
def test_periodic_exponentialkernel(self):
|
def log_likelihood(self):
|
||||||
kern = GPy.kern.periodic_exponential(1)
|
return (np.diag(self.dL_dK)*self.kernel.Kdiag(self.X)).sum()
|
||||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
|
||||||
|
|
||||||
def test_periodic_Matern32kernel(self):
|
def parameters_changed(self):
|
||||||
kern = GPy.kern.periodic_Matern32(1)
|
return self.kernel.update_gradients_diag(np.diag(self.dL_dK), self.X)
|
||||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
|
||||||
|
|
||||||
def test_periodic_Matern52kernel(self):
|
class Kern_check_dK_dX(Kern_check_model):
|
||||||
kern = GPy.kern.periodic_Matern52(1)
|
"""This class allows gradient checks for the gradient of a kernel with respect to X. """
|
||||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||||
|
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
|
||||||
|
self.add_parameter(self.X)
|
||||||
|
|
||||||
def test_rational_quadratickernel(self):
|
def parameters_changed(self):
|
||||||
kern = GPy.kern.rational_quadratic(1)
|
self.X.gradient = self.kernel.gradients_X(self.dL_dK, self.X, self.X2)
|
||||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
|
||||||
|
|
||||||
def test_gibbskernel(self):
|
class Kern_check_dKdiag_dX(Kern_check_dK_dX):
|
||||||
kern = GPy.kern.gibbs(5, mapping=GPy.mappings.Linear(5, 1))
|
"""This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """
|
||||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||||
|
Kern_check_dK_dX.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
|
||||||
|
|
||||||
def test_heterokernel(self):
|
def log_likelihood(self):
|
||||||
kern = GPy.kern.hetero(5, mapping=GPy.mappings.Linear(5, 1), transform=GPy.core.transformations.logexp())
|
return (np.diag(self.dL_dK)*self.kernel.Kdiag(self.X)).sum()
|
||||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
|
||||||
|
|
||||||
def test_mlpkernel(self):
|
def parameters_changed(self):
|
||||||
kern = GPy.kern.mlp(5)
|
self.X.gradient = self.kernel.gradients_X_diag(self.dL_dK, self.X)
|
||||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
|
||||||
|
|
||||||
def test_polykernel(self):
|
def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
|
||||||
kern = GPy.kern.poly(5, degree=4)
|
"""
|
||||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
This function runs on kernels to check the correctness of their
|
||||||
|
implementation. It checks that the covariance function is positive definite
|
||||||
|
for a randomly generated data set.
|
||||||
|
|
||||||
def test_fixedkernel(self):
|
:param kern: the kernel to be tested.
|
||||||
"""
|
:type kern: GPy.kern.Kernpart
|
||||||
Fixed effect kernel test
|
:param X: X input values to test the covariance function.
|
||||||
"""
|
:type X: ndarray
|
||||||
X = np.random.rand(30, 4)
|
:param X2: X2 input values to test the covariance function.
|
||||||
K = np.dot(X, X.T)
|
:type X2: ndarray
|
||||||
kernel = GPy.kern.fixed(4, K)
|
|
||||||
kern = GPy.kern.poly(5, degree=4)
|
|
||||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
|
||||||
|
|
||||||
# def test_coregionalization(self):
|
"""
|
||||||
# X1 = np.random.rand(50,1)*8
|
pass_checks = True
|
||||||
# X2 = np.random.rand(30,1)*5
|
if X==None:
|
||||||
# index = np.vstack((np.zeros_like(X1),np.ones_like(X2)))
|
X = np.random.randn(10, kern.input_dim)
|
||||||
# X = np.hstack((np.vstack((X1,X2)),index))
|
if output_ind is not None:
|
||||||
# Y1 = np.sin(X1) + np.random.randn(*X1.shape)*0.05
|
X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0])
|
||||||
# Y2 = np.sin(X2) + np.random.randn(*X2.shape)*0.05 + 2.
|
if X2==None:
|
||||||
# Y = np.vstack((Y1,Y2))
|
X2 = np.random.randn(20, kern.input_dim)
|
||||||
|
if output_ind is not None:
|
||||||
|
X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0])
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print("Checking covariance function is positive definite.")
|
||||||
|
result = Kern_check_model(kern, X=X).is_positive_definite()
|
||||||
|
if result and verbose:
|
||||||
|
print("Check passed.")
|
||||||
|
if not result:
|
||||||
|
print("Positive definite check failed for " + kern.name + " covariance function.")
|
||||||
|
pass_checks = False
|
||||||
|
return False
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print("Checking gradients of K(X, X) wrt theta.")
|
||||||
|
result = Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose)
|
||||||
|
if result and verbose:
|
||||||
|
print("Check passed.")
|
||||||
|
if not result:
|
||||||
|
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||||
|
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True)
|
||||||
|
pass_checks = False
|
||||||
|
return False
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print("Checking gradients of K(X, X2) wrt theta.")
|
||||||
|
result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose)
|
||||||
|
if result and verbose:
|
||||||
|
print("Check passed.")
|
||||||
|
if not result:
|
||||||
|
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||||
|
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True)
|
||||||
|
pass_checks = False
|
||||||
|
return False
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print("Checking gradients of Kdiag(X) wrt theta.")
|
||||||
|
result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose)
|
||||||
|
if result and verbose:
|
||||||
|
print("Check passed.")
|
||||||
|
if not result:
|
||||||
|
print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||||
|
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True)
|
||||||
|
pass_checks = False
|
||||||
|
return False
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print("Checking gradients of K(X, X) wrt X.")
|
||||||
|
try:
|
||||||
|
result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose)
|
||||||
|
except NotImplementedError:
|
||||||
|
result=True
|
||||||
|
if verbose:
|
||||||
|
print("gradients_X not implemented for " + kern.name)
|
||||||
|
if result and verbose:
|
||||||
|
print("Check passed.")
|
||||||
|
if not result:
|
||||||
|
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||||
|
Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True)
|
||||||
|
pass_checks = False
|
||||||
|
return False
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print("Checking gradients of K(X, X2) wrt X.")
|
||||||
|
try:
|
||||||
|
result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose)
|
||||||
|
except NotImplementedError:
|
||||||
|
result=True
|
||||||
|
if verbose:
|
||||||
|
print("gradients_X not implemented for " + kern.name)
|
||||||
|
if result and verbose:
|
||||||
|
print("Check passed.")
|
||||||
|
if not result:
|
||||||
|
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||||
|
Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True)
|
||||||
|
pass_checks = False
|
||||||
|
return False
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print("Checking gradients of Kdiag(X) wrt X.")
|
||||||
|
try:
|
||||||
|
result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)
|
||||||
|
except NotImplementedError:
|
||||||
|
result=True
|
||||||
|
if verbose:
|
||||||
|
print("gradients_X not implemented for " + kern.name)
|
||||||
|
if result and verbose:
|
||||||
|
print("Check passed.")
|
||||||
|
if not result:
|
||||||
|
print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||||
|
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True)
|
||||||
|
pass_checks = False
|
||||||
|
return False
|
||||||
|
|
||||||
|
return pass_checks
|
||||||
|
|
||||||
|
|
||||||
|
class KernelTestsContinuous(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.X = np.random.randn(100,2)
|
||||||
|
self.X2 = np.random.randn(110,2)
|
||||||
|
|
||||||
|
def test_Matern32(self):
|
||||||
|
k = GPy.kern.Matern32(2)
|
||||||
|
self.assertTrue(kern_test(k, X=self.X, X2=self.X2, verbose=verbose))
|
||||||
|
|
||||||
|
def test_Matern52(self):
|
||||||
|
k = GPy.kern.Matern52(2)
|
||||||
|
self.assertTrue(kern_test(k, X=self.X, X2=self.X2, verbose=verbose))
|
||||||
|
|
||||||
|
#TODO: turn off grad checkingwrt X for indexed kernels liek coregionalize
|
||||||
|
|
||||||
# k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
|
|
||||||
# k2 = GPy.kern.coregionalize(2,1)
|
|
||||||
# kern = k1**k2
|
|
||||||
# self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ from functools import partial
|
||||||
#np.random.seed(300)
|
#np.random.seed(300)
|
||||||
#np.random.seed(7)
|
#np.random.seed(7)
|
||||||
|
|
||||||
|
np.seterr(divide='raise')
|
||||||
def dparam_partial(inst_func, *args):
|
def dparam_partial(inst_func, *args):
|
||||||
"""
|
"""
|
||||||
If we have a instance method that needs to be called but that doesn't
|
If we have a instance method that needs to be called but that doesn't
|
||||||
|
|
@ -149,9 +150,9 @@ class TestNoiseModels(object):
|
||||||
noise_models = {"Student_t_default": {
|
noise_models = {"Student_t_default": {
|
||||||
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
||||||
"grad_params": {
|
"grad_params": {
|
||||||
"names": ["t_noise"],
|
"names": [".*t_noise"],
|
||||||
"vals": [self.var],
|
"vals": [self.var],
|
||||||
"constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)]
|
"constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||||
#"constraints": [("t_noise", constrain_positive), ("deg_free", partial(constrain_fixed, value=5))]
|
#"constraints": [("t_noise", constrain_positive), ("deg_free", partial(constrain_fixed, value=5))]
|
||||||
},
|
},
|
||||||
"laplace": True
|
"laplace": True
|
||||||
|
|
@ -159,66 +160,66 @@ class TestNoiseModels(object):
|
||||||
"Student_t_1_var": {
|
"Student_t_1_var": {
|
||||||
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
||||||
"grad_params": {
|
"grad_params": {
|
||||||
"names": ["t_noise"],
|
"names": [".*t_noise"],
|
||||||
"vals": [1.0],
|
"vals": [1.0],
|
||||||
"constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)]
|
"constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||||
},
|
},
|
||||||
"laplace": True
|
"laplace": True
|
||||||
},
|
},
|
||||||
"Student_t_small_deg_free": {
|
"Student_t_small_deg_free": {
|
||||||
"model": GPy.likelihoods.StudentT(deg_free=1.5, sigma2=self.var),
|
"model": GPy.likelihoods.StudentT(deg_free=1.5, sigma2=self.var),
|
||||||
"grad_params": {
|
"grad_params": {
|
||||||
"names": ["t_noise"],
|
"names": [".*t_noise"],
|
||||||
"vals": [self.var],
|
"vals": [self.var],
|
||||||
"constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)]
|
"constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||||
},
|
},
|
||||||
"laplace": True
|
"laplace": True
|
||||||
},
|
},
|
||||||
"Student_t_small_var": {
|
"Student_t_small_var": {
|
||||||
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
||||||
"grad_params": {
|
"grad_params": {
|
||||||
"names": ["t_noise"],
|
"names": [".*t_noise"],
|
||||||
"vals": [0.0001],
|
"vals": [0.001],
|
||||||
"constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)]
|
"constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||||
},
|
},
|
||||||
"laplace": True
|
"laplace": True
|
||||||
},
|
},
|
||||||
"Student_t_large_var": {
|
"Student_t_large_var": {
|
||||||
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
||||||
"grad_params": {
|
"grad_params": {
|
||||||
"names": ["t_noise"],
|
"names": [".*t_noise"],
|
||||||
"vals": [10.0],
|
"vals": [10.0],
|
||||||
"constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)]
|
"constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||||
},
|
},
|
||||||
"laplace": True
|
"laplace": True
|
||||||
},
|
},
|
||||||
"Student_t_approx_gauss": {
|
"Student_t_approx_gauss": {
|
||||||
"model": GPy.likelihoods.StudentT(deg_free=1000, sigma2=self.var),
|
"model": GPy.likelihoods.StudentT(deg_free=1000, sigma2=self.var),
|
||||||
"grad_params": {
|
"grad_params": {
|
||||||
"names": ["t_noise"],
|
"names": [".*t_noise"],
|
||||||
"vals": [self.var],
|
"vals": [self.var],
|
||||||
"constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)]
|
"constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||||
},
|
},
|
||||||
"laplace": True
|
"laplace": True
|
||||||
},
|
},
|
||||||
"Student_t_log": {
|
"Student_t_log": {
|
||||||
"model": GPy.likelihoods.StudentT(gp_link=link_functions.Log(), deg_free=5, sigma2=self.var),
|
"model": GPy.likelihoods.StudentT(gp_link=link_functions.Log(), deg_free=5, sigma2=self.var),
|
||||||
"grad_params": {
|
"grad_params": {
|
||||||
"names": ["t_noise"],
|
"names": [".*t_noise"],
|
||||||
"vals": [self.var],
|
"vals": [self.var],
|
||||||
"constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)]
|
"constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||||
},
|
},
|
||||||
"laplace": True
|
"laplace": True
|
||||||
},
|
},
|
||||||
"Gaussian_default": {
|
"Gaussian_default": {
|
||||||
"model": GPy.likelihoods.Gaussian(variance=self.var),
|
"model": GPy.likelihoods.Gaussian(variance=self.var),
|
||||||
"grad_params": {
|
"grad_params": {
|
||||||
"names": ["variance"],
|
"names": [".*variance"],
|
||||||
"vals": [self.var],
|
"vals": [self.var],
|
||||||
"constraints": [("variance", constrain_positive)]
|
"constraints": [(".*variance", constrain_positive)]
|
||||||
},
|
},
|
||||||
"laplace": True,
|
"laplace": True,
|
||||||
"ep": True
|
"ep": False # FIXME: Should be True when we have it working again
|
||||||
},
|
},
|
||||||
#"Gaussian_log": {
|
#"Gaussian_log": {
|
||||||
#"model": GPy.likelihoods.gaussian(gp_link=link_functions.Log(), variance=self.var, D=self.D, N=self.N),
|
#"model": GPy.likelihoods.gaussian(gp_link=link_functions.Log(), variance=self.var, D=self.D, N=self.N),
|
||||||
|
|
@ -252,7 +253,7 @@ class TestNoiseModels(object):
|
||||||
"link_f_constraints": [partial(constrain_bounded, lower=0, upper=1)],
|
"link_f_constraints": [partial(constrain_bounded, lower=0, upper=1)],
|
||||||
"laplace": True,
|
"laplace": True,
|
||||||
"Y": self.binary_Y,
|
"Y": self.binary_Y,
|
||||||
"ep": True
|
"ep": False # FIXME: Should be True when we have it working again
|
||||||
},
|
},
|
||||||
#"Exponential_default": {
|
#"Exponential_default": {
|
||||||
#"model": GPy.likelihoods.exponential(),
|
#"model": GPy.likelihoods.exponential(),
|
||||||
|
|
@ -515,10 +516,10 @@ class TestNoiseModels(object):
|
||||||
#Normalize
|
#Normalize
|
||||||
Y = Y/Y.max()
|
Y = Y/Y.max()
|
||||||
white_var = 1e-6
|
white_var = 1e-6
|
||||||
kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
kernel = GPy.kern.RBF(X.shape[1]) + GPy.kern.White(X.shape[1])
|
||||||
laplace_likelihood = GPy.inference.latent_function_inference.Laplace()
|
laplace_likelihood = GPy.inference.latent_function_inference.Laplace()
|
||||||
m = GPy.core.GP(X.copy(), Y.copy(), kernel, likelihood=model, inference_method=laplace_likelihood)
|
m = GPy.core.GP(X.copy(), Y.copy(), kernel, likelihood=model, inference_method=laplace_likelihood)
|
||||||
m['white'].constrain_fixed(white_var)
|
m['.*white'].constrain_fixed(white_var)
|
||||||
|
|
||||||
#Set constraints
|
#Set constraints
|
||||||
for constrain_param, constraint in constraints:
|
for constrain_param, constraint in constraints:
|
||||||
|
|
@ -541,9 +542,6 @@ class TestNoiseModels(object):
|
||||||
#NOTE this test appears to be stochastic for some likelihoods (student t?)
|
#NOTE this test appears to be stochastic for some likelihoods (student t?)
|
||||||
# appears to all be working in test mode right now...
|
# appears to all be working in test mode right now...
|
||||||
|
|
||||||
if not m.checkgrad():
|
|
||||||
import ipdb; ipdb.set_trace() # XXX BREAKPOINT
|
|
||||||
|
|
||||||
assert m.checkgrad(step=step)
|
assert m.checkgrad(step=step)
|
||||||
|
|
||||||
###########
|
###########
|
||||||
|
|
@ -555,10 +553,10 @@ class TestNoiseModels(object):
|
||||||
#Normalize
|
#Normalize
|
||||||
Y = Y/Y.max()
|
Y = Y/Y.max()
|
||||||
white_var = 1e-6
|
white_var = 1e-6
|
||||||
kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
kernel = GPy.kern.RBF(X.shape[1]) + GPy.kern.White(X.shape[1])
|
||||||
ep_inf = GPy.inference.latent_function_inference.EP()
|
ep_inf = GPy.inference.latent_function_inference.EP()
|
||||||
m = GPy.core.GP(X.copy(), Y.copy(), kernel=kernel, likelihood=model, inference_method=ep_inf)
|
m = GPy.core.GP(X.copy(), Y.copy(), kernel=kernel, likelihood=model, inference_method=ep_inf)
|
||||||
m['white'].constrain_fixed(white_var)
|
m['.*white'].constrain_fixed(white_var)
|
||||||
|
|
||||||
for param_num in range(len(param_names)):
|
for param_num in range(len(param_names)):
|
||||||
name = param_names[param_num]
|
name = param_names[param_num]
|
||||||
|
|
@ -634,26 +632,24 @@ class LaplaceTests(unittest.TestCase):
|
||||||
Y = Y/Y.max()
|
Y = Y/Y.max()
|
||||||
#Yc = Y.copy()
|
#Yc = Y.copy()
|
||||||
#Yc[75:80] += 1
|
#Yc[75:80] += 1
|
||||||
kernel1 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
kernel1 = GPy.kern.RBF(X.shape[1]) + GPy.kern.White(X.shape[1])
|
||||||
#FIXME: Make sure you can copy kernels when params is fixed
|
#FIXME: Make sure you can copy kernels when params is fixed
|
||||||
#kernel2 = kernel1.copy()
|
#kernel2 = kernel1.copy()
|
||||||
kernel2 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
kernel2 = GPy.kern.RBF(X.shape[1]) + GPy.kern.White(X.shape[1])
|
||||||
|
|
||||||
gauss_distr1 = GPy.likelihoods.Gaussian(variance=initial_var_guess)
|
gauss_distr1 = GPy.likelihoods.Gaussian(variance=initial_var_guess)
|
||||||
exact_inf = GPy.inference.latent_function_inference.ExactGaussianInference()
|
exact_inf = GPy.inference.latent_function_inference.ExactGaussianInference()
|
||||||
m1 = GPy.core.GP(X, Y.copy(), kernel=kernel1, likelihood=gauss_distr1, inference_method=exact_inf)
|
m1 = GPy.core.GP(X, Y.copy(), kernel=kernel1, likelihood=gauss_distr1, inference_method=exact_inf)
|
||||||
m1['white'].constrain_fixed(1e-6)
|
m1['.*white'].constrain_fixed(1e-6)
|
||||||
m1['variance'] = initial_var_guess
|
m1['.*rbf.variance'] = initial_var_guess
|
||||||
m1['variance'].constrain_bounded(1e-4, 10)
|
m1['.*rbf.variance'].constrain_bounded(1e-4, 10)
|
||||||
m1['rbf'].constrain_bounded(1e-4, 10)
|
|
||||||
m1.randomize()
|
m1.randomize()
|
||||||
|
|
||||||
gauss_distr2 = GPy.likelihoods.Gaussian(variance=initial_var_guess)
|
gauss_distr2 = GPy.likelihoods.Gaussian(variance=initial_var_guess)
|
||||||
laplace_inf = GPy.inference.latent_function_inference.Laplace()
|
laplace_inf = GPy.inference.latent_function_inference.Laplace()
|
||||||
m2 = GPy.core.GP(X, Y.copy(), kernel=kernel2, likelihood=gauss_distr2, inference_method=laplace_inf)
|
m2 = GPy.core.GP(X, Y.copy(), kernel=kernel2, likelihood=gauss_distr2, inference_method=laplace_inf)
|
||||||
m2['white'].constrain_fixed(1e-6)
|
m2['.*white'].constrain_fixed(1e-6)
|
||||||
m2['rbf'].constrain_bounded(1e-4, 10)
|
m2['.*rbf.variance'].constrain_bounded(1e-4, 10)
|
||||||
m2['variance'].constrain_bounded(1e-4, 10)
|
|
||||||
m2.randomize()
|
m2.randomize()
|
||||||
|
|
||||||
if debug:
|
if debug:
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,7 @@ import classification
|
||||||
import subarray_and_sorting
|
import subarray_and_sorting
|
||||||
import caching
|
import caching
|
||||||
import diag
|
import diag
|
||||||
|
import initialization
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import sympy
|
import sympy
|
||||||
|
|
|
||||||
17
GPy/util/initialization.py
Normal file
17
GPy/util/initialization.py
Normal file
|
|
@ -0,0 +1,17 @@
|
||||||
|
'''
|
||||||
|
Created on 24 Feb 2014
|
||||||
|
|
||||||
|
@author: maxz
|
||||||
|
'''
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from linalg import PCA
|
||||||
|
|
||||||
|
def initialize_latent(init, input_dim, Y):
|
||||||
|
Xr = np.random.randn(Y.shape[0], input_dim)
|
||||||
|
if init == 'PCA':
|
||||||
|
PC = PCA(Y, input_dim)[0]
|
||||||
|
Xr[:PC.shape[0], :PC.shape[1]] = PC
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
return Xr
|
||||||
Loading…
Add table
Add a link
Reference in a new issue