Broken whilst unlinking GP and sparse_GP, kern not being imported

This commit is contained in:
Alan Saul 2013-06-04 16:05:33 +01:00
parent 26b4cd6c4f
commit 1302c46ace
16 changed files with 328 additions and 318 deletions

View file

@ -5,7 +5,7 @@ import numpy as np
import pylab as pb
import sys, pdb
from GPLVM import GPLVM
from sparse_GP import sparse_GP
from ..core import sparse_GP
from GPy.util.linalg import pdinv
from ..likelihoods import Gaussian
from .. import kern
@ -65,6 +65,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
self._savedABCD = []
sparse_GP.__init__(self, X, likelihood, kernel, Z=Z, X_variance=X_variance, **kwargs)
self._set_params(self._get_params())
@property
def oldps(self):
@ -96,7 +97,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
def _clipped(self, x):
return x # np.clip(x, -1e300, 1e300)
def _set_params(self, x, save_old=True, save_count=0):
# try:
x = self._clipped(x)

View file

@ -7,7 +7,7 @@ from ..util.linalg import mdot, jitchol, chol_inv, tdot, symmetrify,pdinv
from ..util.plot import gpplot
from .. import kern
from scipy import stats, linalg
from sparse_GP import sparse_GP
from ..core import sparse_GP
def backsub_both_sides(L,X):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
@ -16,6 +16,9 @@ def backsub_both_sides(L,X):
class FITC(sparse_GP):
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
super(FITC, self).__init__(X, likelihood, kernel, normalize_X=normalize_X)
def update_likelihood_approximation(self):
"""
Approximates a non-gaussian likelihood using Expectation Propagation

View file

@ -1,288 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import linalg
import pylab as pb
from .. import kern
from ..core import model
from ..util.linalg import pdinv, mdot, tdot
from ..util.plot import gpplot, x_frame1D, x_frame2D, Tango
from ..likelihoods import EP
class GP(model):
"""
Gaussian Process model for regression and EP
:param X: input observations
:param kernel: a GPy kernel, defaults to rbf+white
:parm likelihood: a GPy likelihood
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:rtype: model object
:param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1
:param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.]
:type powerep: list
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
def __init__(self, X, likelihood, kernel, normalize_X=False):
# parse arguments
self.X = X
assert len(self.X.shape) == 2
self.N, self.Q = self.X.shape
assert isinstance(kernel, kern.kern)
self.kern = kernel
self.likelihood = likelihood
assert self.X.shape[0] == self.likelihood.data.shape[0]
self.N, self.D = self.likelihood.data.shape
# here's some simple normalization for the inputs
if normalize_X:
self._Xmean = X.mean(0)[None, :]
self._Xstd = X.std(0)[None, :]
self.X = (X.copy() - self._Xmean) / self._Xstd
if hasattr(self, 'Z'):
self.Z = (self.Z - self._Xmean) / self._Xstd
else:
self._Xmean = np.zeros((1, self.X.shape[1]))
self._Xstd = np.ones((1, self.X.shape[1]))
if not hasattr(self,'has_uncertain_inputs'):
self.has_uncertain_inputs = False
model.__init__(self)
def dL_dZ(self):
"""
TODO: one day we might like to learn Z by gradient methods?
"""
#FIXME: this doesn;t live here.
return np.zeros_like(self.Z)
def _set_params(self, p):
self.kern._set_params_transformed(p[:self.kern.Nparam_transformed()])
self.likelihood._set_params(p[self.kern.Nparam_transformed():])
self.K = self.kern.K(self.X)
self.K += self.likelihood.covariance_matrix
self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
# the gradient of the likelihood wrt the covariance matrix
if self.likelihood.YYT is None:
#alpha = np.dot(self.Ki, self.likelihood.Y)
alpha,_ = linalg.lapack.flapack.dpotrs(self.L, self.likelihood.Y,lower=1)
self.dL_dK = 0.5 * (tdot(alpha) - self.D * self.Ki)
else:
#tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(self.likelihood.YYT), lower=1)
tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(tmp.T), lower=1)
self.dL_dK = 0.5 * (tmp - self.D * self.Ki)
def _get_params(self):
return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params()))
def _get_param_names(self):
return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
def update_likelihood_approximation(self):
"""
Approximates a non-gaussian likelihood using Expectation Propagation
For a Gaussian likelihood, no iteration is required:
this function does nothing
"""
self.likelihood.fit_full(self.kern.K(self.X))
self._set_params(self._get_params()) # update the GP
def _model_fit_term(self):
"""
Computes the model fit using YYT if it's available
"""
if self.likelihood.YYT is None:
tmp, _ = linalg.lapack.flapack.dtrtrs(self.L, np.asfortranarray(self.likelihood.Y), lower=1)
return -0.5 * np.sum(np.square(tmp))
#return -0.5 * np.sum(np.square(np.dot(self.Li, self.likelihood.Y)))
else:
return -0.5 * np.sum(np.multiply(self.Ki, self.likelihood.YYT))
def log_likelihood(self):
"""
The log marginal likelihood of the GP.
For an EP model, can be written as the log likelihood of a regression
model for a new variable Y* = v_tilde/tau_tilde, with a covariance
matrix K* = K + diag(1./tau_tilde) plus a normalization term.
"""
return -0.5 * self.D * self.K_logdet + self._model_fit_term() + self.likelihood.Z
def _log_likelihood_gradients(self):
"""
The gradient of all parameters.
Note, we use the chain rule: dL_dtheta = dL_dK * d_K_dtheta
"""
return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
def _raw_predict(self, _Xnew, which_parts='all', full_cov=False,stop=False):
"""
Internal helper function for making predictions, does not account
for normalization or likelihood
"""
Kx = self.kern.K(_Xnew,self.X,which_parts=which_parts).T
#KiKx = np.dot(self.Ki, Kx)
KiKx, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(Kx), lower=1)
mu = np.dot(KiKx.T, self.likelihood.Y)
if full_cov:
Kxx = self.kern.K(_Xnew, which_parts=which_parts)
var = Kxx - np.dot(KiKx.T, Kx)
else:
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
var = Kxx - np.sum(np.multiply(KiKx, Kx), 0)
var = var[:, None]
if stop:
debug_this
return mu, var
def predict(self, Xnew, which_parts='all', full_cov=False):
"""
Predict the function(s) at the new point(s) Xnew.
Arguments
---------
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.Q
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the folll covariance matrix, or just the diagonal
:type full_cov: bool
:rtype: posterior mean, a Numpy array, Nnew x self.D
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
:rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.D
If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew.
This is to allow for different normalizations of the output dimensions.
"""
# normalize X values
Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts)
# now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
return mean, var, _025pm, _975pm
def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False):
"""
Plot the GP's view of the world, where the data is normalized and the
likelihood is Gaussian.
:param samples: the number of a posteriori samples to plot
:param which_data: which if the training data to plot (default all)
:type which_data: 'all' or a slice object to slice self.X, self.Y
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:param which_parts: which of the kernel functions to plot (additively)
:type which_parts: 'all', or list of bools
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
Plot the posterior of the GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
- In two dimsensions, a contour-plot shows the mean predicted function
- In higher dimensions, we've no implemented this yet !TODO!
Can plot only part of the data and part of the posterior functions
using which_data and which_functions
"""
if which_data == 'all':
which_data = slice(None)
if self.X.shape[1] == 1:
Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
if samples == 0:
m, v = self._raw_predict(Xnew, which_parts=which_parts)
gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v))
pb.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
else:
m, v = self._raw_predict(Xnew, which_parts=which_parts, full_cov=True)
Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None])
for i in range(samples):
pb.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25)
pb.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
pb.xlim(xmin, xmax)
ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
pb.ylim(ymin, ymax)
if hasattr(self, 'Z'):
pb.plot(self.Z, self.Z * 0 + pb.ylim()[0], 'r|', mew=1.5, markersize=12)
elif self.X.shape[1] == 2:
resolution = resolution or 50
Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution)
m, v = self._raw_predict(Xnew, which_parts=which_parts)
m = m.reshape(resolution, resolution).T
pb.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
pb.scatter(Xorig[:, 0], Xorig[:, 1], 40, Yorig, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max())
pb.xlim(xmin[0], xmax[0])
pb.ylim(xmin[1], xmax[1])
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
def plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20):
"""
TODO: Docstrings!
:param levels: for 2D plotting, the number of contour levels to use
"""
# TODO include samples
if which_data == 'all':
which_data = slice(None)
if self.X.shape[1] == 1:
Xu = self.X * self._Xstd + self._Xmean # NOTE self.X are the normalized values now
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
m, var, lower, upper = self.predict(Xnew, which_parts=which_parts)
for d in range(m.shape[1]):
gpplot(Xnew, m[:,d], lower[:,d], upper[:,d])
pb.plot(Xu[which_data], self.likelihood.data[which_data,d], 'kx', mew=1.5)
if self.has_uncertain_inputs:
pb.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
pb.xlim(xmin, xmax)
pb.ylim(ymin, ymax)
if hasattr(self, 'Z'):
Zu = self.Z * self._Xstd + self._Xmean
pb.plot(Zu, Zu * 0 + pb.ylim()[0], 'r|', mew=1.5, markersize=12)
# pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_variance.flatten()))
elif self.X.shape[1] == 2: # FIXME
resolution = resolution or 50
Xnew, xx, yy, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
m, var, lower, upper = self.predict(Xnew, which_parts=which_parts)
m = m.reshape(resolution, resolution).T
pb.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
Yf = self.likelihood.Y.flatten()
pb.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
pb.xlim(xmin[0], xmax[0])
pb.ylim(xmin[1], xmax[1])
if hasattr(self, 'Z'):
pb.plot(self.Z[:, 0], self.Z[:, 1], 'wo')
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"

View file

@ -8,7 +8,7 @@ import sys, pdb
from .. import kern
from ..core import model
from ..util.linalg import pdinv, PCA
from GP import GP
from ..core import GP
from ..likelihoods import Gaussian
from .. import util
from GPy.util import plot_latent
@ -32,7 +32,8 @@ class GPLVM(GP):
if kernel is None:
kernel = kern.rbf(Q, ARD=Q>1) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
likelihood = Gaussian(Y, normalize=normalize_Y)
GP.__init__(self, X, likelihood, kernel, **kwargs)
super(GPLVM, self).__init__(self, X, likelihood, kernel, **kwargs)
self._set_params(self._get_params())
def initialise_latent(self, init, Q, Y):
if init == 'PCA':
@ -63,4 +64,4 @@ class GPLVM(GP):
pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5)
def plot_latent(self, *args, **kwargs):
util.plot_latent.plot_latent(self, *args, **kwargs)
util.plot_latent.plot_latent(self, *args, **kwargs)

View file

@ -3,7 +3,7 @@
import numpy as np
from GP import GP
from ..core import GP
from .. import likelihoods
from .. import kern
@ -31,4 +31,5 @@ class GP_regression(GP):
likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
super(GP_regression, self).__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
self._set_params(self._get_params())

View file

@ -2,9 +2,9 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from GP import GP
#from GP import GP
#from sparse_GP import sparse_GP
from GP_regression import GP_regression
from sparse_GP import sparse_GP
from sparse_GP_regression import sparse_GP_regression
from GPLVM import GPLVM
from warped_GP import warpedGP

View file

@ -7,7 +7,7 @@ from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot
from ..util.plot import gpplot
from .. import kern
from scipy import stats, linalg
from sparse_GP import sparse_GP
from ..core import sparse_GP
def backsub_both_sides(L,X):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
@ -36,12 +36,12 @@ class generalized_FITC(sparse_GP):
"""
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
self.Z = Z
self.M = self.Z.shape[0]
self.true_precision = likelihood.precision
sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_variance=None, normalize_X=False)
super(generalized_FITC, self).__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_variance=None, normalize_X=False)
self._set_params(self._get_params())
def _set_params(self, p):
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)

View file

@ -5,7 +5,7 @@ Created on 10 Apr 2013
'''
from GPy.core import model
from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM
from GPy.models.sparse_GP import sparse_GP
from GPy.core import sparse_GP
from GPy.util.linalg import PCA
from scipy import linalg
import numpy
@ -23,7 +23,7 @@ class MRD(model):
:type likelihood_list: [GPy.likelihood] | [Y1..Yy]
:param names: names for different gplvm models
:type names: [str]
:param Q: latent dimensionality (will raise
:param Q: latent dimensionality (will raise
:type Q: int
:param initx: initialisation method for the latent space
:type initx: 'PCA'|'random'
@ -77,6 +77,7 @@ class MRD(model):
self.MQ = self.M * self.Q
model.__init__(self) # @UndefinedVariable
self._set_params(self._get_params())
@property
def X(self):
@ -153,7 +154,7 @@ class MRD(model):
def _get_params(self):
"""
return parameter list containing private and shared parameters as follows:
=================================================================
| mu | S | Z || theta1 | theta2 | .. | thetaN |
=================================================================

View file

@ -1,284 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from ..util.linalg import mdot, jitchol, tdot, symmetrify, backsub_both_sides, chol_inv
from ..util.plot import gpplot
from .. import kern
from GP import GP
from scipy import linalg
from ..likelihoods import Gaussian
class sparse_GP(GP):
"""
Variational sparse GP model
:param X: inputs
:type X: np.ndarray (N x Q)
:param likelihood: a likelihood instance, containing the observed data
:type likelihood: GPy.likelihood.(Gaussian | EP | Laplace)
:param kernel : the kernel (covariance function). See link kernels
:type kernel: a GPy.kern.kern instance
:param X_variance: The uncertainty in the measurements of X (Gaussian variance)
:type X_variance: np.ndarray (N x Q) | None
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x Q) | None
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool
"""
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
self.Z = Z
self.M = Z.shape[0]
self.likelihood = likelihood
if X_variance is None:
self.has_uncertain_inputs = False
else:
assert X_variance.shape == X.shape
self.has_uncertain_inputs = True
self.X_variance = X_variance
GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X)
# normalize X uncertainty also
if self.has_uncertain_inputs:
self.X_variance /= np.square(self._Xstd)
def _compute_kernel_matrices(self):
# kernel computations, using BGPLVM notation
self.Kmm = self.kern.K(self.Z)
if self.has_uncertain_inputs:
self.psi0 = self.kern.psi0(self.Z, self.X, self.X_variance)
self.psi1 = self.kern.psi1(self.Z, self.X, self.X_variance).T
self.psi2 = self.kern.psi2(self.Z, self.X, self.X_variance)
else:
self.psi0 = self.kern.Kdiag(self.X)
self.psi1 = self.kern.K(self.Z, self.X)
self.psi2 = None
def _computations(self):
# factor Kmm
self.Lm = jitchol(self.Kmm)
# The rather complex computations of self.A
if self.has_uncertain_inputs:
if self.likelihood.is_heteroscedastic:
psi2_beta = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.N, 1, 1))).sum(0)
else:
psi2_beta = self.psi2.sum(0) * self.likelihood.precision
evals, evecs = linalg.eigh(psi2_beta)
clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
tmp = evecs * np.sqrt(clipped_evals)
else:
if self.likelihood.is_heteroscedastic:
tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.N)))
else:
tmp = self.psi1 * (np.sqrt(self.likelihood.precision))
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp)
# factor B
self.B = np.eye(self.M) + self.A
self.LB = jitchol(self.B)
# TODO: make a switch for either first compute psi1V, or VV.T
self.psi1V = np.dot(self.psi1, self.likelihood.V)
# back substutue C into psi1V
tmp, info1 = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
self._LBi_Lmi_psi1V, _ = linalg.lapack.flapack.dtrtrs(self.LB, np.asfortranarray(tmp), lower=1, trans=0)
tmp, info2 = linalg.lapack.flapack.dpotrs(self.LB, tmp, lower=1)
self.Cpsi1V, info3 = linalg.lapack.flapack.dtrtrs(self.Lm, tmp, lower=1, trans=1)
# Compute dL_dKmm
tmp = tdot(self._LBi_Lmi_psi1V)
self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.D * np.eye(self.M) + tmp)
tmp = -0.5 * self.DBi_plus_BiPBi
tmp += -0.5 * self.B * self.D
tmp += self.D * np.eye(self.M)
self.dL_dKmm = backsub_both_sides(self.Lm, tmp)
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case
self.dL_dpsi0 = -0.5 * self.D * (self.likelihood.precision * np.ones([self.N, 1])).flatten()
self.dL_dpsi1 = np.dot(self.Cpsi1V, self.likelihood.V.T)
dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.D * np.eye(self.M) - self.DBi_plus_BiPBi)
if self.likelihood.is_heteroscedastic:
if self.has_uncertain_inputs:
self.dL_dpsi2 = self.likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :]
else:
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, self.psi1 * self.likelihood.precision.reshape(1, self.N))
self.dL_dpsi2 = None
else:
dL_dpsi2 = self.likelihood.precision * dL_dpsi2_beta
if self.has_uncertain_inputs:
# repeat for each of the N psi_2 matrices
self.dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], self.N, axis=0)
else:
# subsume back into psi1 (==Kmn)
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2, self.psi1)
self.dL_dpsi2 = None
# the partial derivative vector for the likelihood
if self.likelihood.Nparams == 0:
# save computation here.
self.partial_for_likelihood = None
elif self.likelihood.is_heteroscedastic:
raise NotImplementedError, "heteroscedatic derivates not implemented"
else:
# likelihood is not heterscedatic
self.partial_for_likelihood = -0.5 * self.N * self.D * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision)
self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V)))
def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """
if self.likelihood.is_heteroscedastic:
A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y)
B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
else:
A = -0.5 * self.N * self.D * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
C = -self.D * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.M * np.log(sf2))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
return A + B + C + D + self.likelihood.Z
def _set_params(self, p):
self.Z = p[:self.M * self.Q].reshape(self.M, self.Q)
self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.Nparam])
self.likelihood._set_params(p[self.Z.size + self.kern.Nparam:])
self._compute_kernel_matrices()
self._computations()
def _get_params(self):
return np.hstack([self.Z.flatten(), GP._get_params(self)])
def _get_param_names(self):
return sum([['iip_%i_%i' % (i, j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])], []) + GP._get_param_names(self)
def update_likelihood_approximation(self):
"""
Approximates a non-gaussian likelihood using Expectation Propagation
For a Gaussian likelihood, no iteration is required:
this function does nothing
"""
if not isinstance(self.likelihood, Gaussian): # Updates not needed for Gaussian likelihood
self.likelihood.restart() # TODO check consistency with pseudo_EP
if self.has_uncertain_inputs:
Lmi = chol_inv(self.Lm)
Kmmi = tdot(Lmi.T)
diag_tr_psi2Kmmi = np.array([np.trace(psi2_Kmmi) for psi2_Kmmi in np.dot(self.psi2, Kmmi)])
self.likelihood.fit_FITC(self.Kmm, self.psi1, diag_tr_psi2Kmmi) # This uses the fit_FITC code, but does not perfomr a FITC-EP.#TODO solve potential confusion
# raise NotImplementedError, "EP approximation not implemented for uncertain inputs"
else:
self.likelihood.fit_DTC(self.Kmm, self.psi1)
# self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params()) # update the GP
def _log_likelihood_gradients(self):
return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))
def dL_dtheta(self):
"""
Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel
"""
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z)
if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X, self.X_variance)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T, self.Z, self.X, self.X_variance)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X, self.X_variance)
else:
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.Z, self.X)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
return dL_dtheta
def dL_dZ(self):
"""
The derivative of the bound wrt the inducing inputs Z
"""
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z) # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
if self.has_uncertain_inputs:
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance)
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
else:
dL_dZ += self.kern.dK_dX(self.dL_dpsi1, self.Z, self.X)
return dL_dZ
def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
"""Internal helper function for making predictions, does not account for normalization"""
Bi, _ = linalg.lapack.flapack.dpotri(self.LB, lower=0) # WTH? this lower switch should be 1, but that doesn't work!
symmetrify(Bi)
Kmmi_LmiBLmi = backsub_both_sides(self.Lm, np.eye(self.M) - Bi)
if X_variance_new is None:
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
mu = np.dot(Kx.T, self.Cpsi1V)
if full_cov:
Kxx = self.kern.K(Xnew, which_parts=which_parts)
var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx) # NOTE this won't work for plotting
else:
Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts)
var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0)
else:
# assert which_parts=='all', "swithching out parts of variational kernels is not implemented"
Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts
mu = np.dot(Kx, self.Cpsi1V)
if full_cov:
raise NotImplementedError, "TODO"
else:
Kxx = self.kern.psi0(self.Z, Xnew, X_variance_new)
psi2 = self.kern.psi2(self.Z, Xnew, X_variance_new)
var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
return mu, var[:, None]
def predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
"""
Predict the function(s) at the new point(s) Xnew.
Arguments
---------
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.Q
:param X_variance_new: The uncertainty in the prediction points
:type X_variance_new: np.ndarray, Nnew x self.Q
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the folll covariance matrix, or just the diagonal
:type full_cov: bool
:rtype: posterior mean, a Numpy array, Nnew x self.D
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
:rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.D
If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew.
This is to allow for different normalizations of the output dimensions.
"""
# normalize X values
Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
if X_variance_new is not None:
X_variance_new = X_variance_new / self._Xstd ** 2
# here's the actual prediction by the GP model
mu, var = self._raw_predict(Xnew, X_variance_new, full_cov=full_cov, which_parts=which_parts)
# now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
return mean, var, _025pm, _975pm

View file

@ -8,7 +8,7 @@ import sys, pdb
# from .. import kern
# from ..core import model
# from ..util.linalg import pdinv, PCA
from GPLVM import GPLVM
from ..core import GPLVM
from sparse_GP_regression import sparse_GP_regression
class sparse_GPLVM(sparse_GP_regression, GPLVM):

View file

@ -3,7 +3,7 @@
import numpy as np
from sparse_GP import sparse_GP
from ..core import sparse_GP
from .. import likelihoods
from .. import kern
from ..likelihoods import likelihood
@ -43,4 +43,5 @@ class sparse_GP_regression(sparse_GP):
#likelihood defaults to Gaussian
likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
sparse_GP.__init__(self, X, likelihood, kernel, Z, normalize_X=normalize_X)
super(sparse_GP_regression, self).__init__(self, X, likelihood, kernel, Z, normalize_X=normalize_X)
self._set_params(self._get_params())

View file

@ -9,7 +9,7 @@ from ..util.linalg import pdinv
from ..util.plot import gpplot
from ..util.warping_functions import *
from GP_regression import GP_regression
from GP import GP
from ..core import GP
from .. import likelihoods
from .. import kern
@ -29,7 +29,8 @@ class warpedGP(GP):
self.predict_in_warped_space = False
likelihood = likelihoods.Gaussian(self.transform_data(), normalize=normalize_Y)
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
super(warpedGP, self).__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
self._set_params(self._get_params())
def _scale_data(self, Y):
self._Ymax = Y.max()