mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-13 22:12:38 +02:00
Merge branch 'updates' into devel
This commit is contained in:
commit
016b3a9965
15 changed files with 366 additions and 65 deletions
105
GPy/core/gp.py
105
GPy/core/gp.py
|
|
@ -106,6 +106,13 @@ class GP(Model):
|
||||||
self.link_parameter(self.likelihood)
|
self.link_parameter(self.likelihood)
|
||||||
self.posterior = None
|
self.posterior = None
|
||||||
|
|
||||||
|
# The predictive variable to be used to predict using the posterior object's
|
||||||
|
# woodbury_vector and woodbury_inv is defined as predictive_variable
|
||||||
|
# This is usually just a link to self.X (full GP) or self.Z (sparse GP).
|
||||||
|
# Make sure to name this variable and the predict functions will "just work"
|
||||||
|
# as long as the posterior has the right woodbury entries.
|
||||||
|
self._predictive_variable = self.X
|
||||||
|
|
||||||
|
|
||||||
def set_XY(self, X=None, Y=None, trigger_update=True):
|
def set_XY(self, X=None, Y=None, trigger_update=True):
|
||||||
"""
|
"""
|
||||||
|
|
@ -209,6 +216,7 @@ class GP(Model):
|
||||||
var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
|
var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
|
||||||
elif self.posterior.woodbury_inv.ndim == 3:
|
elif self.posterior.woodbury_inv.ndim == 3:
|
||||||
var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2]))
|
var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2]))
|
||||||
|
from ..util.linalg import mdot
|
||||||
for i in range(var.shape[2]):
|
for i in range(var.shape[2]):
|
||||||
var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx))
|
var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx))
|
||||||
var = var
|
var = var
|
||||||
|
|
@ -304,6 +312,103 @@ class GP(Model):
|
||||||
return dmu_dX, dv_dX
|
return dmu_dX, dv_dX
|
||||||
|
|
||||||
|
|
||||||
|
def predict_jacobian(self, Xnew, kern=None, full_cov=True):
|
||||||
|
"""
|
||||||
|
Compute the derivatives of the posterior of the GP.
|
||||||
|
|
||||||
|
Given a set of points at which to predict X* (size [N*,Q]), compute the
|
||||||
|
mean and variance of the derivative. Resulting arrays are sized:
|
||||||
|
|
||||||
|
dL_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one).
|
||||||
|
Note that this is the mean and variance of the derivative,
|
||||||
|
not the derivative of the mean and variance! (See predictive_gradients for that)
|
||||||
|
|
||||||
|
dv_dX* -- [N*, Q], (since all outputs have the same variance)
|
||||||
|
If there is missing data, it is not implemented for now, but
|
||||||
|
there will be one output variance per output dimension.
|
||||||
|
|
||||||
|
:param X: The points at which to get the predictive gradients.
|
||||||
|
:type X: np.ndarray (Xnew x self.input_dim)
|
||||||
|
:param kern: The kernel to compute the jacobian for.
|
||||||
|
:param boolean full_cov: whether to return the full covariance of the jacobian.
|
||||||
|
|
||||||
|
:returns: dmu_dX, dv_dX
|
||||||
|
:rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q,(D)) ]
|
||||||
|
|
||||||
|
Note: We always return sum in input_dim gradients, as the off-diagonals
|
||||||
|
in the input_dim are not needed for further calculations.
|
||||||
|
This is a compromise for increase in speed. Mathematically the jacobian would
|
||||||
|
have another dimension in Q.
|
||||||
|
"""
|
||||||
|
if kern is None:
|
||||||
|
kern = self.kern
|
||||||
|
|
||||||
|
mean_jac = np.empty((Xnew.shape[0],Xnew.shape[1],self.output_dim))
|
||||||
|
|
||||||
|
for i in range(self.output_dim):
|
||||||
|
mean_jac[:,:,i] = kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self._predictive_variable)
|
||||||
|
|
||||||
|
dK_dXnew_full = np.empty((self._predictive_variable.shape[0], Xnew.shape[0], Xnew.shape[1]))
|
||||||
|
for i in range(self._predictive_variable.shape[0]):
|
||||||
|
dK_dXnew_full[i] = kern.gradients_X([[1.]], Xnew, self._predictive_variable[[i]])
|
||||||
|
|
||||||
|
def compute_cov_inner(wi):
|
||||||
|
if full_cov:
|
||||||
|
# full covariance gradients:
|
||||||
|
dK2_dXdX = kern.gradients_XX([[1.]], Xnew)
|
||||||
|
var_jac = dK2_dXdX - np.einsum('qnm,miq->niq', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
|
||||||
|
else:
|
||||||
|
dK2_dXdX = kern.gradients_XX_diag([[1.]], Xnew)
|
||||||
|
var_jac = dK2_dXdX - np.einsum('qim,miq->iq', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
|
||||||
|
return var_jac
|
||||||
|
|
||||||
|
if self.posterior.woodbury_inv.ndim == 3:
|
||||||
|
var_jac = []
|
||||||
|
for d in range(self.posterior.woodbury_inv.shape[2]):
|
||||||
|
var_jac.append(compute_cov_inner(self.posterior.woodbury_inv[:, :, d]))
|
||||||
|
var_jac = np.concatenate(var_jac)
|
||||||
|
else:
|
||||||
|
var_jac = compute_cov_inner(self.posterior.woodbury_inv)
|
||||||
|
return mean_jac, var_jac
|
||||||
|
|
||||||
|
def predict_wishard_embedding(self, Xnew, kern=None):
|
||||||
|
"""
|
||||||
|
Predict the wishard embedding G of the GP. This is the density of the
|
||||||
|
input of the GP defined by the probabilistic function mapping f.
|
||||||
|
G = J_mean.T*J_mean + output_dim*J_cov.
|
||||||
|
|
||||||
|
:param array-like Xnew: The points at which to evaluate the magnification.
|
||||||
|
:param :py:class:`~GPy.kern.Kern` kern: The kernel to use for the magnification.
|
||||||
|
|
||||||
|
Supplying only a part of the learning kernel gives insights into the density
|
||||||
|
of the specific kernel part of the input function. E.g. one can see how dense the
|
||||||
|
linear part of a kernel is compared to the non-linear part etc.
|
||||||
|
"""
|
||||||
|
if kern is None:
|
||||||
|
kern = self.kern
|
||||||
|
|
||||||
|
mu_jac, var_jac = self.predict_jacobian(Xnew, kern, full_cov=False)
|
||||||
|
mumuT = np.einsum('iqd,ipd->iqp', mu_jac, mu_jac)
|
||||||
|
if var_jac.ndim == 3:
|
||||||
|
Sigma = np.einsum('iqd,ipd->iqp', var_jac, var_jac)
|
||||||
|
G = mumuT + Sigma
|
||||||
|
else:
|
||||||
|
Sigma = np.einsum('iq,ip->iqp', var_jac, var_jac)
|
||||||
|
G = mumuT + self.output_dim*Sigma
|
||||||
|
return G
|
||||||
|
|
||||||
|
def predict_magnification(self, Xnew, kern=None):
|
||||||
|
"""
|
||||||
|
Predict the magnification factor as
|
||||||
|
|
||||||
|
sqrt(det(G))
|
||||||
|
|
||||||
|
for each point N in Xnew
|
||||||
|
"""
|
||||||
|
from ..util.linalg import jitchol
|
||||||
|
G = self.predict_wishard_embedding(Xnew, kern)
|
||||||
|
return np.array([2*np.sqrt(np.exp(np.sum(np.log(np.diag(jitchol(G[n, :, :])))))) for n in range(Xnew.shape[0])])
|
||||||
|
|
||||||
def posterior_samples_f(self,X,size=10, full_cov=True):
|
def posterior_samples_f(self,X,size=10, full_cov=True):
|
||||||
"""
|
"""
|
||||||
Samples the posterior GP at the points X.
|
Samples the posterior GP at the points X.
|
||||||
|
|
|
||||||
|
|
@ -32,7 +32,7 @@ class Bijective_mapping(Mapping):
|
||||||
also back from f to X. The inverse mapping is called g().
|
also back from f to X. The inverse mapping is called g().
|
||||||
"""
|
"""
|
||||||
def __init__(self, input_dim, output_dim, name='bijective_mapping'):
|
def __init__(self, input_dim, output_dim, name='bijective_mapping'):
|
||||||
super(Bijective_apping, self).__init__(name=name)
|
super(Bijective_mapping, self).__init__(name=name)
|
||||||
|
|
||||||
def g(self, f):
|
def g(self, f):
|
||||||
"""Inverse mapping from output domain of the function to the inputs."""
|
"""Inverse mapping from output domain of the function to the inputs."""
|
||||||
|
|
|
||||||
|
|
@ -59,6 +59,8 @@ class SparseGP(GP):
|
||||||
logger.info("Adding Z as parameter")
|
logger.info("Adding Z as parameter")
|
||||||
self.link_parameter(self.Z, index=0)
|
self.link_parameter(self.Z, index=0)
|
||||||
self.posterior = None
|
self.posterior = None
|
||||||
|
self._predictive_variable = self.Z
|
||||||
|
|
||||||
|
|
||||||
def has_uncertain_inputs(self):
|
def has_uncertain_inputs(self):
|
||||||
return isinstance(self.X, VariationalPosterior)
|
return isinstance(self.X, VariationalPosterior)
|
||||||
|
|
@ -125,7 +127,7 @@ class SparseGP(GP):
|
||||||
if kern is None: kern = self.kern
|
if kern is None: kern = self.kern
|
||||||
|
|
||||||
if not isinstance(Xnew, VariationalPosterior):
|
if not isinstance(Xnew, VariationalPosterior):
|
||||||
Kx = kern.K(self.Z, Xnew)
|
Kx = kern.K(self._predictive_variable, Xnew)
|
||||||
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
|
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
|
||||||
if full_cov:
|
if full_cov:
|
||||||
Kxx = kern.K(Xnew)
|
Kxx = kern.K(Xnew)
|
||||||
|
|
@ -149,8 +151,8 @@ class SparseGP(GP):
|
||||||
if self.mean_function is not None:
|
if self.mean_function is not None:
|
||||||
mu += self.mean_function.f(Xnew)
|
mu += self.mean_function.f(Xnew)
|
||||||
else:
|
else:
|
||||||
psi0_star = kern.psi0(self.Z, Xnew)
|
psi0_star = kern.psi0(self._predictive_variable, Xnew)
|
||||||
psi1_star = kern.psi1(self.Z, Xnew)
|
psi1_star = kern.psi1(self._predictive_variable, Xnew)
|
||||||
#psi2_star = kern.psi2(self.Z, Xnew) # Only possible if we get NxMxM psi2 out of the code.
|
#psi2_star = kern.psi2(self.Z, Xnew) # Only possible if we get NxMxM psi2 out of the code.
|
||||||
la = self.posterior.woodbury_vector
|
la = self.posterior.woodbury_vector
|
||||||
mu = np.dot(psi1_star, la) # TODO: dimensions?
|
mu = np.dot(psi1_star, la) # TODO: dimensions?
|
||||||
|
|
@ -163,7 +165,7 @@ class SparseGP(GP):
|
||||||
|
|
||||||
for i in range(Xnew.shape[0]):
|
for i in range(Xnew.shape[0]):
|
||||||
_mu, _var = Xnew.mean.values[[i]], Xnew.variance.values[[i]]
|
_mu, _var = Xnew.mean.values[[i]], Xnew.variance.values[[i]]
|
||||||
psi2_star = kern.psi2(self.Z, NormalPosterior(_mu, _var))
|
psi2_star = kern.psi2(self._predictive_variable, NormalPosterior(_mu, _var))
|
||||||
tmp = (psi2_star[:, :] - psi1_star[[i]].T.dot(psi1_star[[i]]))
|
tmp = (psi2_star[:, :] - psi1_star[[i]].T.dot(psi1_star[[i]]))
|
||||||
|
|
||||||
var_ = mdot(la.T, tmp, la)
|
var_ = mdot(la.T, tmp, la)
|
||||||
|
|
|
||||||
|
|
@ -146,7 +146,7 @@ class VerboseOptimization(object):
|
||||||
seconds = time.time()-self.start
|
seconds = time.time()-self.start
|
||||||
#sys.stdout.write(" "*len(self.message))
|
#sys.stdout.write(" "*len(self.message))
|
||||||
self.deltat += seconds
|
self.deltat += seconds
|
||||||
if self.deltat > .2:
|
if self.deltat > .3 or seconds < .3:
|
||||||
self.print_out(seconds)
|
self.print_out(seconds)
|
||||||
self.deltat = 0
|
self.deltat = 0
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -100,6 +100,8 @@ class Kern(Parameterized):
|
||||||
return self.psicomp.psicomputations(self, Z, variational_posterior)[2]
|
return self.psicomp.psicomputations(self, Z, variational_posterior)[2]
|
||||||
def gradients_X(self, dL_dK, X, X2):
|
def gradients_X(self, dL_dK, X, X2):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
def gradients_XX(self, dL_dK, X, X2):
|
||||||
|
raise(NotImplementedError, "This is the second derivative of K wrt X and X2, and not implemented for this kernel")
|
||||||
def gradients_X_diag(self, dL_dKdiag, X):
|
def gradients_X_diag(self, dL_dKdiag, X):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,8 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta):
|
||||||
put_clean(dct, 'update_gradients_full', _slice_update_gradients_full)
|
put_clean(dct, 'update_gradients_full', _slice_update_gradients_full)
|
||||||
put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag)
|
put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag)
|
||||||
put_clean(dct, 'gradients_X', _slice_gradients_X)
|
put_clean(dct, 'gradients_X', _slice_gradients_X)
|
||||||
|
put_clean(dct, 'gradients_XX', _slice_gradients_XX)
|
||||||
|
put_clean(dct, 'gradients_XX_diag', _slice_gradients_X_diag)
|
||||||
put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag)
|
put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag)
|
||||||
|
|
||||||
put_clean(dct, 'psi0', _slice_psi)
|
put_clean(dct, 'psi0', _slice_psi)
|
||||||
|
|
@ -30,9 +32,12 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta):
|
||||||
return super(KernCallsViaSlicerMeta, cls).__new__(cls, name, bases, dct)
|
return super(KernCallsViaSlicerMeta, cls).__new__(cls, name, bases, dct)
|
||||||
|
|
||||||
class _Slice_wrap(object):
|
class _Slice_wrap(object):
|
||||||
def __init__(self, k, X, X2=None):
|
def __init__(self, k, X, X2=None, ret_shape=None):
|
||||||
self.k = k
|
self.k = k
|
||||||
self.shape = X.shape
|
if ret_shape is None:
|
||||||
|
self.shape = X.shape
|
||||||
|
else:
|
||||||
|
self.shape = ret_shape
|
||||||
assert X.ndim == 2, "only matrices are allowed as inputs to kernels for now, given X.shape={!s}".format(X.shape)
|
assert X.ndim == 2, "only matrices are allowed as inputs to kernels for now, given X.shape={!s}".format(X.shape)
|
||||||
if X2 is not None:
|
if X2 is not None:
|
||||||
assert X2.ndim == 2, "only matrices are allowed as inputs to kernels for now, given X2.shape={!s}".format(X2.shape)
|
assert X2.ndim == 2, "only matrices are allowed as inputs to kernels for now, given X2.shape={!s}".format(X2.shape)
|
||||||
|
|
@ -54,7 +59,10 @@ class _Slice_wrap(object):
|
||||||
def handle_return_array(self, return_val):
|
def handle_return_array(self, return_val):
|
||||||
if self.ret:
|
if self.ret:
|
||||||
ret = np.zeros(self.shape)
|
ret = np.zeros(self.shape)
|
||||||
ret[:, self.k.active_dims] = return_val
|
if len(self.shape) == 2:
|
||||||
|
ret[:, self.k.active_dims] = return_val
|
||||||
|
elif len(self.shape) == 3:
|
||||||
|
ret[:, :, self.k.active_dims] = return_val
|
||||||
return ret
|
return ret
|
||||||
return return_val
|
return return_val
|
||||||
|
|
||||||
|
|
@ -98,6 +106,19 @@ def _slice_gradients_X(f):
|
||||||
return ret
|
return ret
|
||||||
return wrap
|
return wrap
|
||||||
|
|
||||||
|
def _slice_gradients_XX(f):
|
||||||
|
@wraps(f)
|
||||||
|
def wrap(self, dL_dK, X, X2=None):
|
||||||
|
if X2 is None:
|
||||||
|
N, M = X.shape[0], X.shape[0]
|
||||||
|
else:
|
||||||
|
N, M = X.shape[0], X2.shape[0]
|
||||||
|
with _Slice_wrap(self, X, X2, ret_shape=(N, M, X.shape[1])) as s:
|
||||||
|
#with _Slice_wrap(self, X, X2, ret_shape=None) as s:
|
||||||
|
ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2))
|
||||||
|
return ret
|
||||||
|
return wrap
|
||||||
|
|
||||||
def _slice_gradients_X_diag(f):
|
def _slice_gradients_X_diag(f):
|
||||||
@wraps(f)
|
@wraps(f)
|
||||||
def wrap(self, dL_dKdiag, X):
|
def wrap(self, dL_dKdiag, X):
|
||||||
|
|
|
||||||
|
|
@ -100,6 +100,12 @@ class Linear(Kern):
|
||||||
#return (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
|
#return (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
|
||||||
return np.einsum('jq,q,ij->iq', X2, self.variances, dL_dK)
|
return np.einsum('jq,q,ij->iq', X2, self.variances, dL_dK)
|
||||||
|
|
||||||
|
def gradients_XX(self, dL_dK, X, X2=None):
|
||||||
|
if X2 is None:
|
||||||
|
return 2*np.ones(X.shape)*self.variances
|
||||||
|
else:
|
||||||
|
return np.ones(X.shape)*self.variances
|
||||||
|
|
||||||
def gradients_X_diag(self, dL_dKdiag, X):
|
def gradients_X_diag(self, dL_dKdiag, X):
|
||||||
return 2.*self.variances*dL_dKdiag[:,None]*X
|
return 2.*self.variances*dL_dKdiag[:,None]*X
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,9 @@ class RBF(Stationary):
|
||||||
def dK_dr(self, r):
|
def dK_dr(self, r):
|
||||||
return -r*self.K_of_r(r)
|
return -r*self.K_of_r(r)
|
||||||
|
|
||||||
|
def dK2_drdr(self, r):
|
||||||
|
return (r**2-1)*self.K_of_r(r)
|
||||||
|
|
||||||
def __getstate__(self):
|
def __getstate__(self):
|
||||||
dc = super(RBF, self).__getstate__()
|
dc = super(RBF, self).__getstate__()
|
||||||
if self.useGPU:
|
if self.useGPU:
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ from ...util.caching import Cache_this
|
||||||
try:
|
try:
|
||||||
from . import stationary_cython
|
from . import stationary_cython
|
||||||
except ImportError:
|
except ImportError:
|
||||||
print('warning in sationary: failed to import cython module: falling back to numpy')
|
print('warning in stationary: failed to import cython module: falling back to numpy')
|
||||||
config.set('cython', 'working', 'false')
|
config.set('cython', 'working', 'false')
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -78,6 +78,10 @@ class Stationary(Kern):
|
||||||
raise NotImplementedError("implement derivative of the covariance function wrt r to use this class")
|
raise NotImplementedError("implement derivative of the covariance function wrt r to use this class")
|
||||||
|
|
||||||
@Cache_this(limit=20, ignore_args=())
|
@Cache_this(limit=20, ignore_args=())
|
||||||
|
def dK2_drdr(self, r):
|
||||||
|
raise NotImplementedError("implement second derivative of covariance wrt r to use this method")
|
||||||
|
|
||||||
|
@Cache_this(limit=5, ignore_args=())
|
||||||
def K(self, X, X2=None):
|
def K(self, X, X2=None):
|
||||||
"""
|
"""
|
||||||
Kernel function applied on inputs X and X2.
|
Kernel function applied on inputs X and X2.
|
||||||
|
|
@ -94,6 +98,11 @@ class Stationary(Kern):
|
||||||
#a convenience function, so we can cache dK_dr
|
#a convenience function, so we can cache dK_dr
|
||||||
return self.dK_dr(self._scaled_dist(X, X2))
|
return self.dK_dr(self._scaled_dist(X, X2))
|
||||||
|
|
||||||
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
def dK2_drdr_via_X(self, X, X2):
|
||||||
|
#a convenience function, so we can cache dK_dr
|
||||||
|
return self.dK2_drdr(self._scaled_dist(X, X2))
|
||||||
|
|
||||||
def _unscaled_dist(self, X, X2=None):
|
def _unscaled_dist(self, X, X2=None):
|
||||||
"""
|
"""
|
||||||
Compute the Euclidean distance between each row of X and X2, or between
|
Compute the Euclidean distance between each row of X and X2, or between
|
||||||
|
|
@ -201,6 +210,59 @@ class Stationary(Kern):
|
||||||
else:
|
else:
|
||||||
return self._gradients_X_pure(dL_dK, X, X2)
|
return self._gradients_X_pure(dL_dK, X, X2)
|
||||||
|
|
||||||
|
def gradients_XX(self, dL_dK, X, X2=None):
|
||||||
|
"""
|
||||||
|
Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2:
|
||||||
|
|
||||||
|
..math:
|
||||||
|
\frac{\partial^2 K}{\partial X\partial X2}
|
||||||
|
|
||||||
|
..returns:
|
||||||
|
dL2_dXdX2: NxMxQ, for X [NxQ] and X2[MxQ] (X2 is X if, X2 is None)
|
||||||
|
Thus, we return the second derivative in X2.
|
||||||
|
"""
|
||||||
|
# The off diagonals in Q are always zero, this should also be true for the Linear kernel...
|
||||||
|
# According to multivariable chain rule, we can chain the second derivative through r:
|
||||||
|
# d2K_dXdX2 = dK_dr*d2r_dXdX2 + d2K_drdr * dr_dX * dr_dX2:
|
||||||
|
invdist = self._inv_dist(X, X2)
|
||||||
|
invdist2 = invdist**2
|
||||||
|
|
||||||
|
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
|
||||||
|
tmp1 = dL_dr * invdist
|
||||||
|
|
||||||
|
dL_drdr = self.dK2_drdr_via_X(X, X2) * dL_dK
|
||||||
|
tmp2 = dL_drdr * invdist2
|
||||||
|
|
||||||
|
l2 = np.ones(X.shape[1]) * self.lengthscale**2
|
||||||
|
|
||||||
|
if X2 is None:
|
||||||
|
X2 = X
|
||||||
|
tmp1 -= np.eye(X.shape[0])*self.variance
|
||||||
|
else:
|
||||||
|
tmp1[X==X2.T] -= self.variance
|
||||||
|
|
||||||
|
grad = np.empty((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64)
|
||||||
|
#grad = np.empty(X.shape, dtype=np.float64)
|
||||||
|
for q in range(self.input_dim):
|
||||||
|
tmpdist2 = (X[:,[q]]-X2[:,[q]].T) ** 2
|
||||||
|
grad[:, :, q] = ((tmp1*invdist2 - tmp2)*tmpdist2/l2[q] - tmp1)/l2[q]
|
||||||
|
#grad[:, :, q] = ((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q]
|
||||||
|
#np.sum(((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q], axis=1, out=grad[:,q])
|
||||||
|
#np.sum( - (tmp2*(tmpdist**2)), axis=1, out=grad[:,q])
|
||||||
|
return grad
|
||||||
|
|
||||||
|
def gradients_XX_diag(self, dL_dK, X):
|
||||||
|
"""
|
||||||
|
Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2:
|
||||||
|
|
||||||
|
..math:
|
||||||
|
\frac{\partial^2 K}{\partial X\partial X2}
|
||||||
|
|
||||||
|
..returns:
|
||||||
|
dL2_dXdX2: NxMxQ, for X [NxQ] and X2[MxQ]
|
||||||
|
"""
|
||||||
|
return np.ones(X.shape) * self.variance/self.lengthscale**2
|
||||||
|
|
||||||
def _gradients_X_pure(self, dL_dK, X, X2=None):
|
def _gradients_X_pure(self, dL_dK, X, X2=None):
|
||||||
invdist = self._inv_dist(X, X2)
|
invdist = self._inv_dist(X, X2)
|
||||||
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
|
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
from .bernoulli import Bernoulli
|
from .bernoulli import Bernoulli
|
||||||
from .exponential import Exponential
|
from .exponential import Exponential
|
||||||
from .gaussian import Gaussian
|
from .gaussian import Gaussian, Heteroscedastic_Gaussian
|
||||||
from .gamma import Gamma
|
from .gamma import Gamma
|
||||||
from .poisson import Poisson
|
from .poisson import Poisson
|
||||||
from .student_t import StudentT
|
from .student_t import StudentT
|
||||||
|
|
|
||||||
|
|
@ -137,6 +137,20 @@ class BayesianGPLVM(SparseGP_MPI):
|
||||||
fignum, plot_inducing, legend,
|
fignum, plot_inducing, legend,
|
||||||
plot_limits, aspect, updates, predict_kwargs, imshow_kwargs)
|
plot_limits, aspect, updates, predict_kwargs, imshow_kwargs)
|
||||||
|
|
||||||
|
def plot_magnification(self, labels=None, which_indices=None,
|
||||||
|
resolution=50, ax=None, marker='o', s=40,
|
||||||
|
fignum=None, legend=True,
|
||||||
|
plot_limits=None,
|
||||||
|
aspect='auto', updates=False, **kwargs):
|
||||||
|
import sys
|
||||||
|
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||||
|
from ..plotting.matplot_dep import dim_reduction_plots
|
||||||
|
|
||||||
|
return dim_reduction_plots.plot_magnification(self, labels, which_indices,
|
||||||
|
resolution, ax, marker, s,
|
||||||
|
fignum, False, legend,
|
||||||
|
plot_limits, aspect, updates, **kwargs)
|
||||||
|
|
||||||
def do_test_latents(self, Y):
|
def do_test_latents(self, Y):
|
||||||
"""
|
"""
|
||||||
Compute the latent representation for a set of new points Y
|
Compute the latent representation for a set of new points Y
|
||||||
|
|
|
||||||
|
|
@ -43,19 +43,19 @@ class GPLVM(GP):
|
||||||
super(GPLVM, self).parameters_changed()
|
super(GPLVM, self).parameters_changed()
|
||||||
self.X.gradient = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X, None)
|
self.X.gradient = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X, None)
|
||||||
|
|
||||||
def jacobian(self,X):
|
#def jacobian(self,X):
|
||||||
J = np.zeros((X.shape[0],X.shape[1],self.output_dim))
|
# J = np.zeros((X.shape[0],X.shape[1],self.output_dim))
|
||||||
for i in range(self.output_dim):
|
# for i in range(self.output_dim):
|
||||||
J[:,:,i] = self.kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1], X, self.X)
|
# J[:,:,i] = self.kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1], X, self.X)
|
||||||
return J
|
# return J
|
||||||
|
|
||||||
def magnification(self,X):
|
#def magnification(self,X):
|
||||||
target=np.zeros(X.shape[0])
|
# target=np.zeros(X.shape[0])
|
||||||
#J = np.zeros((X.shape[0],X.shape[1],self.output_dim))
|
# #J = np.zeros((X.shape[0],X.shape[1],self.output_dim))
|
||||||
J = self.jacobian(X)
|
## J = self.jacobian(X)
|
||||||
for i in range(X.shape[0]):
|
# for i in range(X.shape[0]):
|
||||||
target[i]=np.sqrt(np.linalg.det(np.dot(J[i,:,:],np.transpose(J[i,:,:]))))
|
# target[i]=np.sqrt(np.linalg.det(np.dot(J[i,:,:],np.transpose(J[i,:,:]))))
|
||||||
return target
|
# return target
|
||||||
|
|
||||||
def plot(self):
|
def plot(self):
|
||||||
assert self.Y.shape[1] == 2, "too high dimensional to plot. Try plot_latent"
|
assert self.Y.shape[1] == 2, "too high dimensional to plot. Try plot_latent"
|
||||||
|
|
@ -82,5 +82,17 @@ class GPLVM(GP):
|
||||||
fignum, False, legend,
|
fignum, False, legend,
|
||||||
plot_limits, aspect, updates, **kwargs)
|
plot_limits, aspect, updates, **kwargs)
|
||||||
|
|
||||||
def plot_magnification(self, *args, **kwargs):
|
def plot_magnification(self, labels=None, which_indices=None,
|
||||||
return util.plot_latent.plot_magnification(self, *args, **kwargs)
|
resolution=50, ax=None, marker='o', s=40,
|
||||||
|
fignum=None, legend=True,
|
||||||
|
plot_limits=None,
|
||||||
|
aspect='auto', updates=False, **kwargs):
|
||||||
|
|
||||||
|
import sys
|
||||||
|
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||||
|
from ..plotting.matplot_dep import dim_reduction_plots
|
||||||
|
|
||||||
|
return dim_reduction_plots.plot_magnification(self, labels, which_indices,
|
||||||
|
resolution, ax, marker, s,
|
||||||
|
fignum, False, legend,
|
||||||
|
plot_limits, aspect, updates, **kwargs)
|
||||||
|
|
|
||||||
|
|
@ -114,7 +114,7 @@ def plot_latent(model, labels=None, which_indices=None,
|
||||||
|
|
||||||
# create a function which computes the shading of latent space according to the output variance
|
# create a function which computes the shading of latent space according to the output variance
|
||||||
def plot_function(x):
|
def plot_function(x):
|
||||||
Xtest_full = np.zeros((x.shape[0], model.X.shape[1]))
|
Xtest_full = np.zeros((x.shape[0], X.shape[1]))
|
||||||
Xtest_full[:, [input_1, input_2]] = x
|
Xtest_full[:, [input_1, input_2]] = x
|
||||||
_, var = model.predict(Xtest_full, **predict_kwargs)
|
_, var = model.predict(Xtest_full, **predict_kwargs)
|
||||||
var = var[:, :1]
|
var = var[:, :1]
|
||||||
|
|
@ -202,6 +202,7 @@ def plot_latent(model, labels=None, which_indices=None,
|
||||||
def plot_magnification(model, labels=None, which_indices=None,
|
def plot_magnification(model, labels=None, which_indices=None,
|
||||||
resolution=60, ax=None, marker='o', s=40,
|
resolution=60, ax=None, marker='o', s=40,
|
||||||
fignum=None, plot_inducing=False, legend=True,
|
fignum=None, plot_inducing=False, legend=True,
|
||||||
|
plot_limits=None,
|
||||||
aspect='auto', updates=False):
|
aspect='auto', updates=False):
|
||||||
"""
|
"""
|
||||||
:param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc)
|
:param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc)
|
||||||
|
|
@ -217,17 +218,88 @@ def plot_magnification(model, labels=None, which_indices=None,
|
||||||
|
|
||||||
input_1, input_2 = most_significant_input_dimensions(model, which_indices)
|
input_1, input_2 = most_significant_input_dimensions(model, which_indices)
|
||||||
|
|
||||||
# first, plot the output variance as a function of the latent space
|
#fethch the data points X that we'd like to plot
|
||||||
Xtest, xx, yy, xmin, xmax = x_frame2D(model.X[:, [input_1, input_2]], resolution=resolution)
|
X = model.X
|
||||||
Xtest_full = np.zeros((Xtest.shape[0], model.X.shape[1]))
|
if isinstance(X, VariationalPosterior):
|
||||||
|
X = X.mean
|
||||||
|
else:
|
||||||
|
X = X
|
||||||
|
|
||||||
|
if X.shape[0] > 1000:
|
||||||
|
print("Warning: subsampling X, as it has more samples then 1000. X.shape={!s}".format(X.shape))
|
||||||
|
subsample = np.random.choice(X.shape[0], size=1000, replace=False)
|
||||||
|
X = X[subsample]
|
||||||
|
labels = labels[subsample]
|
||||||
|
#=======================================================================
|
||||||
|
# <<<WORK IN PROGRESS>>>
|
||||||
|
# <<<DO NOT DELETE>>>
|
||||||
|
# plt.close('all')
|
||||||
|
# fig, ax = plt.subplots(1,1)
|
||||||
|
# from GPy.plotting.matplot_dep.dim_reduction_plots import most_significant_input_dimensions
|
||||||
|
# import matplotlib.patches as mpatches
|
||||||
|
# i1, i2 = most_significant_input_dimensions(m, None)
|
||||||
|
# xmin, xmax = 100, -100
|
||||||
|
# ymin, ymax = 100, -100
|
||||||
|
# legend_handles = []
|
||||||
|
#
|
||||||
|
# X = m.X.mean[:, [i1, i2]]
|
||||||
|
# X = m.X.variance[:, [i1, i2]]
|
||||||
|
#
|
||||||
|
# xmin = X[:,0].min(); xmax = X[:,0].max()
|
||||||
|
# ymin = X[:,1].min(); ymax = X[:,1].max()
|
||||||
|
# range_ = [[xmin, xmax], [ymin, ymax]]
|
||||||
|
# ul = np.unique(labels)
|
||||||
|
#
|
||||||
|
# for i, l in enumerate(ul):
|
||||||
|
# #cdict = dict(red =[(0., colors[i][0], colors[i][0]), (1., colors[i][0], colors[i][0])],
|
||||||
|
# # green=[(0., colors[i][0], colors[i][1]), (1., colors[i][1], colors[i][1])],
|
||||||
|
# # blue =[(0., colors[i][0], colors[i][2]), (1., colors[i][2], colors[i][2])],
|
||||||
|
# # alpha=[(0., 0., .0), (.5, .5, .5), (1., .5, .5)])
|
||||||
|
# #cmap = LinearSegmentedColormap('{}'.format(l), cdict)
|
||||||
|
# cmap = LinearSegmentedColormap.from_list('cmap_{}'.format(str(l)), [colors[i], colors[i]], 255)
|
||||||
|
# cmap._init()
|
||||||
|
# #alphas = .5*(1+scipy.special.erf(np.linspace(-2,2, cmap.N+3)))#np.log(np.linspace(np.exp(0), np.exp(1.), cmap.N+3))
|
||||||
|
# alphas = (scipy.special.erf(np.linspace(0,2.4, cmap.N+3)))#np.log(np.linspace(np.exp(0), np.exp(1.), cmap.N+3))
|
||||||
|
# cmap._lut[:, -1] = alphas
|
||||||
|
# print l
|
||||||
|
# x, y = X[labels==l].T
|
||||||
|
#
|
||||||
|
# heatmap, xedges, yedges = np.histogram2d(x, y, bins=300, range=range_)
|
||||||
|
# #heatmap, xedges, yedges = np.histogram2d(x, y, bins=100)
|
||||||
|
#
|
||||||
|
# im = ax.imshow(heatmap, extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]], cmap=cmap, aspect='auto', interpolation='nearest', label=str(l))
|
||||||
|
# legend_handles.append(mpatches.Patch(color=colors[i], label=l))
|
||||||
|
# ax.set_xlim(xmin, xmax)
|
||||||
|
# ax.set_ylim(ymin, ymax)
|
||||||
|
# plt.legend(legend_handles, [l.get_label() for l in legend_handles])
|
||||||
|
# plt.draw()
|
||||||
|
# plt.show()
|
||||||
|
#=======================================================================
|
||||||
|
|
||||||
|
#Create an IMshow controller that can re-plot the latent space shading at a good resolution
|
||||||
|
if plot_limits is None:
|
||||||
|
xmin, ymin = X[:, [input_1, input_2]].min(0)
|
||||||
|
xmax, ymax = X[:, [input_1, input_2]].max(0)
|
||||||
|
x_r, y_r = xmax-xmin, ymax-ymin
|
||||||
|
xmin -= .1*x_r
|
||||||
|
xmax += .1*x_r
|
||||||
|
ymin -= .1*y_r
|
||||||
|
ymax += .1*y_r
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
xmin, xmax, ymin, ymax = plot_limits
|
||||||
|
except (TypeError, ValueError) as e:
|
||||||
|
raise e.__class__("Wrong plot limits: {} given -> need (xmin, xmax, ymin, ymax)".format(plot_limits))
|
||||||
|
|
||||||
|
|
||||||
def plot_function(x):
|
def plot_function(x):
|
||||||
|
Xtest_full = np.zeros((x.shape[0], X.shape[1]))
|
||||||
Xtest_full[:, [input_1, input_2]] = x
|
Xtest_full[:, [input_1, input_2]] = x
|
||||||
mf=model.magnification(Xtest_full)
|
mf = model.predict_magnification(Xtest_full)
|
||||||
return mf
|
return mf
|
||||||
|
|
||||||
view = ImshowController(ax, plot_function,
|
view = ImshowController(ax, plot_function,
|
||||||
tuple(model.X.min(0)[:, [input_1, input_2]]) + tuple(model.X.max(0)[:, [input_1, input_2]]),
|
(xmin, ymin, xmax, ymax),
|
||||||
resolution, aspect=aspect, interpolation='bilinear',
|
resolution, aspect=aspect, interpolation='bilinear',
|
||||||
cmap=pb.cm.gray)
|
cmap=pb.cm.gray)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -46,6 +46,8 @@ def offdiag_view(A, offset=0):
|
||||||
return as_strided(Af[(1+offset):], shape=(A.shape[0]-1, A.shape[1]), strides=(A.strides[0] + A.itemsize, A.strides[1]))
|
return as_strided(Af[(1+offset):], shape=(A.shape[0]-1, A.shape[1]), strides=(A.strides[0] + A.itemsize, A.strides[1]))
|
||||||
|
|
||||||
def _diag_ufunc(A,b,offset,func):
|
def _diag_ufunc(A,b,offset,func):
|
||||||
|
b = np.squeeze(b)
|
||||||
|
assert b.ndim <= 1, "only implemented for one dimensional arrays"
|
||||||
dA = view(A, offset); func(dA,b,dA)
|
dA = view(A, offset); func(dA,b,dA)
|
||||||
return A
|
return A
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue