mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-27 14:25:16 +02:00
Merge branch 'updates' into devel
This commit is contained in:
commit
016b3a9965
15 changed files with 366 additions and 65 deletions
105
GPy/core/gp.py
105
GPy/core/gp.py
|
|
@ -106,6 +106,13 @@ class GP(Model):
|
|||
self.link_parameter(self.likelihood)
|
||||
self.posterior = None
|
||||
|
||||
# The predictive variable to be used to predict using the posterior object's
|
||||
# woodbury_vector and woodbury_inv is defined as predictive_variable
|
||||
# This is usually just a link to self.X (full GP) or self.Z (sparse GP).
|
||||
# Make sure to name this variable and the predict functions will "just work"
|
||||
# as long as the posterior has the right woodbury entries.
|
||||
self._predictive_variable = self.X
|
||||
|
||||
|
||||
def set_XY(self, X=None, Y=None, trigger_update=True):
|
||||
"""
|
||||
|
|
@ -209,6 +216,7 @@ class GP(Model):
|
|||
var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
|
||||
elif self.posterior.woodbury_inv.ndim == 3:
|
||||
var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2]))
|
||||
from ..util.linalg import mdot
|
||||
for i in range(var.shape[2]):
|
||||
var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx))
|
||||
var = var
|
||||
|
|
@ -304,6 +312,103 @@ class GP(Model):
|
|||
return dmu_dX, dv_dX
|
||||
|
||||
|
||||
def predict_jacobian(self, Xnew, kern=None, full_cov=True):
|
||||
"""
|
||||
Compute the derivatives of the posterior of the GP.
|
||||
|
||||
Given a set of points at which to predict X* (size [N*,Q]), compute the
|
||||
mean and variance of the derivative. Resulting arrays are sized:
|
||||
|
||||
dL_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one).
|
||||
Note that this is the mean and variance of the derivative,
|
||||
not the derivative of the mean and variance! (See predictive_gradients for that)
|
||||
|
||||
dv_dX* -- [N*, Q], (since all outputs have the same variance)
|
||||
If there is missing data, it is not implemented for now, but
|
||||
there will be one output variance per output dimension.
|
||||
|
||||
:param X: The points at which to get the predictive gradients.
|
||||
:type X: np.ndarray (Xnew x self.input_dim)
|
||||
:param kern: The kernel to compute the jacobian for.
|
||||
:param boolean full_cov: whether to return the full covariance of the jacobian.
|
||||
|
||||
:returns: dmu_dX, dv_dX
|
||||
:rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q,(D)) ]
|
||||
|
||||
Note: We always return sum in input_dim gradients, as the off-diagonals
|
||||
in the input_dim are not needed for further calculations.
|
||||
This is a compromise for increase in speed. Mathematically the jacobian would
|
||||
have another dimension in Q.
|
||||
"""
|
||||
if kern is None:
|
||||
kern = self.kern
|
||||
|
||||
mean_jac = np.empty((Xnew.shape[0],Xnew.shape[1],self.output_dim))
|
||||
|
||||
for i in range(self.output_dim):
|
||||
mean_jac[:,:,i] = kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self._predictive_variable)
|
||||
|
||||
dK_dXnew_full = np.empty((self._predictive_variable.shape[0], Xnew.shape[0], Xnew.shape[1]))
|
||||
for i in range(self._predictive_variable.shape[0]):
|
||||
dK_dXnew_full[i] = kern.gradients_X([[1.]], Xnew, self._predictive_variable[[i]])
|
||||
|
||||
def compute_cov_inner(wi):
|
||||
if full_cov:
|
||||
# full covariance gradients:
|
||||
dK2_dXdX = kern.gradients_XX([[1.]], Xnew)
|
||||
var_jac = dK2_dXdX - np.einsum('qnm,miq->niq', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
|
||||
else:
|
||||
dK2_dXdX = kern.gradients_XX_diag([[1.]], Xnew)
|
||||
var_jac = dK2_dXdX - np.einsum('qim,miq->iq', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
|
||||
return var_jac
|
||||
|
||||
if self.posterior.woodbury_inv.ndim == 3:
|
||||
var_jac = []
|
||||
for d in range(self.posterior.woodbury_inv.shape[2]):
|
||||
var_jac.append(compute_cov_inner(self.posterior.woodbury_inv[:, :, d]))
|
||||
var_jac = np.concatenate(var_jac)
|
||||
else:
|
||||
var_jac = compute_cov_inner(self.posterior.woodbury_inv)
|
||||
return mean_jac, var_jac
|
||||
|
||||
def predict_wishard_embedding(self, Xnew, kern=None):
|
||||
"""
|
||||
Predict the wishard embedding G of the GP. This is the density of the
|
||||
input of the GP defined by the probabilistic function mapping f.
|
||||
G = J_mean.T*J_mean + output_dim*J_cov.
|
||||
|
||||
:param array-like Xnew: The points at which to evaluate the magnification.
|
||||
:param :py:class:`~GPy.kern.Kern` kern: The kernel to use for the magnification.
|
||||
|
||||
Supplying only a part of the learning kernel gives insights into the density
|
||||
of the specific kernel part of the input function. E.g. one can see how dense the
|
||||
linear part of a kernel is compared to the non-linear part etc.
|
||||
"""
|
||||
if kern is None:
|
||||
kern = self.kern
|
||||
|
||||
mu_jac, var_jac = self.predict_jacobian(Xnew, kern, full_cov=False)
|
||||
mumuT = np.einsum('iqd,ipd->iqp', mu_jac, mu_jac)
|
||||
if var_jac.ndim == 3:
|
||||
Sigma = np.einsum('iqd,ipd->iqp', var_jac, var_jac)
|
||||
G = mumuT + Sigma
|
||||
else:
|
||||
Sigma = np.einsum('iq,ip->iqp', var_jac, var_jac)
|
||||
G = mumuT + self.output_dim*Sigma
|
||||
return G
|
||||
|
||||
def predict_magnification(self, Xnew, kern=None):
|
||||
"""
|
||||
Predict the magnification factor as
|
||||
|
||||
sqrt(det(G))
|
||||
|
||||
for each point N in Xnew
|
||||
"""
|
||||
from ..util.linalg import jitchol
|
||||
G = self.predict_wishard_embedding(Xnew, kern)
|
||||
return np.array([2*np.sqrt(np.exp(np.sum(np.log(np.diag(jitchol(G[n, :, :])))))) for n in range(Xnew.shape[0])])
|
||||
|
||||
def posterior_samples_f(self,X,size=10, full_cov=True):
|
||||
"""
|
||||
Samples the posterior GP at the points X.
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ class Bijective_mapping(Mapping):
|
|||
also back from f to X. The inverse mapping is called g().
|
||||
"""
|
||||
def __init__(self, input_dim, output_dim, name='bijective_mapping'):
|
||||
super(Bijective_apping, self).__init__(name=name)
|
||||
super(Bijective_mapping, self).__init__(name=name)
|
||||
|
||||
def g(self, f):
|
||||
"""Inverse mapping from output domain of the function to the inputs."""
|
||||
|
|
|
|||
|
|
@ -59,6 +59,8 @@ class SparseGP(GP):
|
|||
logger.info("Adding Z as parameter")
|
||||
self.link_parameter(self.Z, index=0)
|
||||
self.posterior = None
|
||||
self._predictive_variable = self.Z
|
||||
|
||||
|
||||
def has_uncertain_inputs(self):
|
||||
return isinstance(self.X, VariationalPosterior)
|
||||
|
|
@ -114,10 +116,10 @@ class SparseGP(GP):
|
|||
Make a prediction for the latent function values.
|
||||
|
||||
For certain inputs we give back a full_cov of shape NxN,
|
||||
if there is missing data, each dimension has its own full_cov of shape NxNxD, and if full_cov is of,
|
||||
if there is missing data, each dimension has its own full_cov of shape NxNxD, and if full_cov is of,
|
||||
we take only the diagonal elements across N.
|
||||
|
||||
For uncertain inputs, the SparseGP bound produces a full covariance structure across D, so for full_cov we
|
||||
|
||||
For uncertain inputs, the SparseGP bound produces a full covariance structure across D, so for full_cov we
|
||||
return a NxDxD matrix and in the not full_cov case, we return the diagonal elements across D (NxD).
|
||||
This is for both with and without missing data. See for missing data SparseGP implementation py:class:'~GPy.models.sparse_gp_minibatch.SparseGPMiniBatch'.
|
||||
"""
|
||||
|
|
@ -125,7 +127,7 @@ class SparseGP(GP):
|
|||
if kern is None: kern = self.kern
|
||||
|
||||
if not isinstance(Xnew, VariationalPosterior):
|
||||
Kx = kern.K(self.Z, Xnew)
|
||||
Kx = kern.K(self._predictive_variable, Xnew)
|
||||
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
|
||||
if full_cov:
|
||||
Kxx = kern.K(Xnew)
|
||||
|
|
@ -149,28 +151,28 @@ class SparseGP(GP):
|
|||
if self.mean_function is not None:
|
||||
mu += self.mean_function.f(Xnew)
|
||||
else:
|
||||
psi0_star = kern.psi0(self.Z, Xnew)
|
||||
psi1_star = kern.psi1(self.Z, Xnew)
|
||||
psi0_star = kern.psi0(self._predictive_variable, Xnew)
|
||||
psi1_star = kern.psi1(self._predictive_variable, Xnew)
|
||||
#psi2_star = kern.psi2(self.Z, Xnew) # Only possible if we get NxMxM psi2 out of the code.
|
||||
la = self.posterior.woodbury_vector
|
||||
mu = np.dot(psi1_star, la) # TODO: dimensions?
|
||||
|
||||
if full_cov:
|
||||
|
||||
if full_cov:
|
||||
var = np.empty((Xnew.shape[0], la.shape[1], la.shape[1]))
|
||||
di = np.diag_indices(la.shape[1])
|
||||
else:
|
||||
else:
|
||||
var = np.empty((Xnew.shape[0], la.shape[1]))
|
||||
|
||||
|
||||
for i in range(Xnew.shape[0]):
|
||||
_mu, _var = Xnew.mean.values[[i]], Xnew.variance.values[[i]]
|
||||
psi2_star = kern.psi2(self.Z, NormalPosterior(_mu, _var))
|
||||
psi2_star = kern.psi2(self._predictive_variable, NormalPosterior(_mu, _var))
|
||||
tmp = (psi2_star[:, :] - psi1_star[[i]].T.dot(psi1_star[[i]]))
|
||||
|
||||
var_ = mdot(la.T, tmp, la)
|
||||
p0 = psi0_star[i]
|
||||
t = np.atleast_3d(self.posterior.woodbury_inv)
|
||||
t2 = np.trace(t.T.dot(psi2_star), axis1=1, axis2=2)
|
||||
|
||||
|
||||
if full_cov:
|
||||
var_[di] += p0
|
||||
var_[di] += -t2
|
||||
|
|
|
|||
|
|
@ -146,7 +146,7 @@ class VerboseOptimization(object):
|
|||
seconds = time.time()-self.start
|
||||
#sys.stdout.write(" "*len(self.message))
|
||||
self.deltat += seconds
|
||||
if self.deltat > .2:
|
||||
if self.deltat > .3 or seconds < .3:
|
||||
self.print_out(seconds)
|
||||
self.deltat = 0
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue