mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-15 06:52:39 +02:00
added sparsegp with missing data
This commit is contained in:
parent
e96d40b4cc
commit
d0c563ff0a
19 changed files with 572 additions and 325 deletions
|
|
@ -36,6 +36,8 @@ class ObservableArray(np.ndarray, Observable):
|
|||
# see InfoArray.__array_finalize__ for comments
|
||||
if obj is None: return
|
||||
self._observers_ = getattr(obj, '_observers_', None)
|
||||
def __array_wrap__(self, out_arr, context=None):
|
||||
return out_arr.view(np.ndarray)
|
||||
|
||||
def _s_not_empty(self, s):
|
||||
# this checks whether there is something picked by this slice.
|
||||
|
|
@ -68,11 +70,6 @@ class ObservableArray(np.ndarray, Observable):
|
|||
def copy(self, *args):
|
||||
return self.__copy__(*args)
|
||||
|
||||
def __ror__(self, *args, **kwargs):
|
||||
r = np.ndarray.__ror__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
def __ilshift__(self, *args, **kwargs):
|
||||
r = np.ndarray.__ilshift__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
|
|
@ -83,71 +80,19 @@ class ObservableArray(np.ndarray, Observable):
|
|||
self._notify_observers()
|
||||
return r
|
||||
|
||||
def __rrshift__(self, *args, **kwargs):
|
||||
r = np.ndarray.__rrshift__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
def __ixor__(self, *args, **kwargs):
|
||||
r = np.ndarray.__ixor__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
def __rxor__(self, *args, **kwargs):
|
||||
r = np.ndarray.__rxor__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
|
||||
def __rdivmod__(self, *args, **kwargs):
|
||||
r = np.ndarray.__rdivmod__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __radd__(self, *args, **kwargs):
|
||||
r = np.ndarray.__radd__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __rdiv__(self, *args, **kwargs):
|
||||
r = np.ndarray.__rdiv__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __rtruediv__(self, *args, **kwargs):
|
||||
r = np.ndarray.__rtruediv__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __ipow__(self, *args, **kwargs):
|
||||
r = np.ndarray.__ipow__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __rmul__(self, *args, **kwargs):
|
||||
r = np.ndarray.__rmul__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __rpow__(self, *args, **kwargs):
|
||||
r = np.ndarray.__rpow__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __rsub__(self, *args, **kwargs):
|
||||
r = np.ndarray.__rsub__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
|
||||
def __ifloordiv__(self, *args, **kwargs):
|
||||
r = np.ndarray.__ifloordiv__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
|
|
@ -178,12 +123,6 @@ class ObservableArray(np.ndarray, Observable):
|
|||
return r
|
||||
|
||||
|
||||
def __rfloordiv__(self, *args, **kwargs):
|
||||
r = np.ndarray.__rfloordiv__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __iand__(self, *args, **kwargs):
|
||||
r = np.ndarray.__iand__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
|
|
@ -208,8 +147,74 @@ class ObservableArray(np.ndarray, Observable):
|
|||
return r
|
||||
|
||||
|
||||
def __rshift__(self, *args, **kwargs):
|
||||
r = np.ndarray.__rshift__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
# def __rrshift__(self, *args, **kwargs):
|
||||
# r = np.ndarray.__rrshift__(self, *args, **kwargs)
|
||||
# self._notify_observers()
|
||||
# return r
|
||||
|
||||
|
||||
# def __ror__(self, *args, **kwargs):
|
||||
# r = np.ndarray.__ror__(self, *args, **kwargs)
|
||||
# self._notify_observers()
|
||||
# return r
|
||||
|
||||
|
||||
# def __rxor__(self, *args, **kwargs):
|
||||
# r = np.ndarray.__rxor__(self, *args, **kwargs)
|
||||
# self._notify_observers()
|
||||
# return r
|
||||
|
||||
|
||||
|
||||
# def __rdivmod__(self, *args, **kwargs):
|
||||
# r = np.ndarray.__rdivmod__(self, *args, **kwargs)
|
||||
# self._notify_observers()
|
||||
# return r
|
||||
|
||||
|
||||
# def __radd__(self, *args, **kwargs):
|
||||
# r = np.ndarray.__radd__(self, *args, **kwargs)
|
||||
# self._notify_observers()
|
||||
# return r
|
||||
|
||||
|
||||
# def __rdiv__(self, *args, **kwargs):
|
||||
# r = np.ndarray.__rdiv__(self, *args, **kwargs)
|
||||
# self._notify_observers()
|
||||
# return r
|
||||
|
||||
|
||||
# def __rtruediv__(self, *args, **kwargs):
|
||||
# r = np.ndarray.__rtruediv__(self, *args, **kwargs)
|
||||
# self._notify_observers()
|
||||
# return r
|
||||
|
||||
|
||||
# def __rshift__(self, *args, **kwargs):
|
||||
# r = np.ndarray.__rshift__(self, *args, **kwargs)
|
||||
# self._notify_observers()
|
||||
# return r
|
||||
|
||||
|
||||
# def __rmul__(self, *args, **kwargs):
|
||||
# r = np.ndarray.__rmul__(self, *args, **kwargs)
|
||||
# self._notify_observers()
|
||||
# return r
|
||||
|
||||
|
||||
# def __rpow__(self, *args, **kwargs):
|
||||
# r = np.ndarray.__rpow__(self, *args, **kwargs)
|
||||
# self._notify_observers()
|
||||
# return r
|
||||
|
||||
|
||||
# def __rsub__(self, *args, **kwargs):
|
||||
# r = np.ndarray.__rsub__(self, *args, **kwargs)
|
||||
# self._notify_observers()
|
||||
# return r
|
||||
|
||||
# def __rfloordiv__(self, *args, **kwargs):
|
||||
# r = np.ndarray.__rfloordiv__(self, *args, **kwargs)
|
||||
# self._notify_observers()
|
||||
# return r
|
||||
|
||||
|
|
|
|||
|
|
@ -80,8 +80,6 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
|
|||
self.constraints = getattr(obj, 'constraints', None)
|
||||
self.priors = getattr(obj, 'priors', None)
|
||||
|
||||
#def __array_wrap__(self, out_arr, context=None):
|
||||
# return out_arr.view(numpy.ndarray)
|
||||
#===========================================================================
|
||||
# Pickling operations
|
||||
#===========================================================================
|
||||
|
|
@ -335,19 +333,20 @@ class ParamConcatenation(object):
|
|||
if len(params)==1: return params[0]
|
||||
return ParamConcatenation(params)
|
||||
def __setitem__(self, s, val, update=True):
|
||||
if isinstance(val, ParamConcatenation):
|
||||
val = val._vals()
|
||||
ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True;
|
||||
vals = self._vals(); vals[s] = val; del val
|
||||
[numpy.place(p, ind[ps], vals[ps])# and p._notify_tied_parameters()
|
||||
[numpy.place(p, ind[ps], vals[ps]) and update and p._notify_parameters_changed()
|
||||
for p, ps in zip(self.params, self._param_slices_)]
|
||||
if update:
|
||||
self.params[0]._notify_parameters_changed()
|
||||
def _vals(self):
|
||||
return numpy.hstack([p._get_params() for p in self.params])
|
||||
#===========================================================================
|
||||
# parameter operations:
|
||||
#===========================================================================
|
||||
def update_all_params(self):
|
||||
self.params[0]._notify_parameters_changed()
|
||||
for p in self.params:
|
||||
p._notify_parameters_changed()
|
||||
|
||||
def constrain(self, constraint, warning=True):
|
||||
[param.constrain(constraint, update=False) for param in self.params]
|
||||
|
|
|
|||
|
|
@ -2,12 +2,11 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from ..util.linalg import mdot, tdot, symmetrify, backsub_both_sides, dtrtrs, dpotrs, dpotri
|
||||
from ..util.linalg import mdot
|
||||
from gp import GP
|
||||
from parameterization.param import Param
|
||||
from ..inference.latent_function_inference import varDTC
|
||||
from GPy.inference.latent_function_inference import var_dtc
|
||||
from .. import likelihoods
|
||||
from GPy.util.misc import param_to_array
|
||||
|
||||
class SparseGP(GP):
|
||||
"""
|
||||
|
|
@ -37,7 +36,7 @@ class SparseGP(GP):
|
|||
#pick a sensible inference method
|
||||
if inference_method is None:
|
||||
if isinstance(likelihood, likelihoods.Gaussian):
|
||||
inference_method = varDTC.VarDTC()
|
||||
inference_method = var_dtc.VarDTC()
|
||||
else:
|
||||
#inference_method = ??
|
||||
raise NotImplementedError, "what to do what to do?"
|
||||
|
|
|
|||
|
|
@ -3,12 +3,11 @@
|
|||
import numpy as _np
|
||||
default_seed = _np.random.seed(123344)
|
||||
|
||||
def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False, output_dim=1e4):
|
||||
def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False, output_dim=200, nan=False):
|
||||
"""
|
||||
model for testing purposes. Samples from a GP with rbf kernel and learns
|
||||
the samples with a new kernel. Normally not for optimization, just model cheking
|
||||
"""
|
||||
from GPy.likelihoods.gaussian import Gaussian
|
||||
import GPy
|
||||
|
||||
num_inputs = 13
|
||||
|
|
@ -36,12 +35,17 @@ def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False,
|
|||
# k = GPy.kern.rbf(input_dim, .5, 2., ARD=0) + GPy.kern.rbf(input_dim, .3, .2, ARD=0)
|
||||
# k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True)
|
||||
|
||||
p = .3
|
||||
|
||||
m = GPy.models.BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
|
||||
|
||||
if nan:
|
||||
m.inference_method = GPy.inference.latent_function_inference.var_dtc.VarDTCMissingData()
|
||||
m.Y[_np.random.binomial(1,p,size=(Y.shape))] = _np.nan
|
||||
m.parameters_changed()
|
||||
|
||||
#===========================================================================
|
||||
# randomly obstruct data with percentage p
|
||||
p = .8
|
||||
Y_obstruct = Y.copy()
|
||||
Y_obstruct[_np.random.uniform(size=(Y.shape)) < p] = _np.nan
|
||||
#===========================================================================
|
||||
#m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing)
|
||||
m.lengthscales = lengthscales
|
||||
|
|
@ -276,6 +280,35 @@ def bgplvm_simulation(optimize=True, verbose=1,
|
|||
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
|
||||
return m
|
||||
|
||||
def bgplvm_simulation_missing_data(optimize=True, verbose=1,
|
||||
plot=True, plot_sim=False,
|
||||
max_iters=2e4,
|
||||
):
|
||||
from GPy import kern
|
||||
from GPy.models import BayesianGPLVM
|
||||
from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData
|
||||
|
||||
D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 3, 10
|
||||
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
|
||||
Y = Ylist[0]
|
||||
k = kern.linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
|
||||
|
||||
inan = _np.random.binomial(1, .3, size=Y.shape)
|
||||
m = BayesianGPLVM(Y, Q, init="random", num_inducing=num_inducing, kernel=k)
|
||||
m.inference_method = VarDTCMissingData()
|
||||
m.Y[inan] = _np.nan
|
||||
m.parameters_changed()
|
||||
|
||||
if optimize:
|
||||
print "Optimizing model:"
|
||||
m.optimize('bfgs', messages=verbose, max_iters=max_iters,
|
||||
gtol=.05)
|
||||
if plot:
|
||||
m.q.plot("BGPLVM Latent Space 1D")
|
||||
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
|
||||
return m
|
||||
|
||||
|
||||
def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
|
||||
from GPy import kern
|
||||
from GPy.models import MRD
|
||||
|
|
|
|||
|
|
@ -361,7 +361,7 @@ def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4, o
|
|||
kernel = GPy.kern.rbf_inv(X.shape[1], ARD=1)
|
||||
else:
|
||||
kernel = GPy.kern.rbf(X.shape[1], ARD=1)
|
||||
kernel += GPy.kern.bias(X.shape[1])
|
||||
#kernel += GPy.kern.bias(X.shape[1])
|
||||
X_variance = np.ones(X.shape) * 0.5
|
||||
m = GPy.models.SparseGPRegression(X, Y, kernel, X_variance=X_variance)
|
||||
# len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25
|
||||
|
|
@ -434,10 +434,14 @@ def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, opti
|
|||
|
||||
return m
|
||||
|
||||
def sparse_GP_regression_2D(num_samples=400, num_inducing=50, max_iters=100, optimize=True, plot=True):
|
||||
def sparse_GP_regression_2D(num_samples=400, num_inducing=50, max_iters=100, optimize=True, plot=True, nan=False):
|
||||
"""Run a 2D example of a sparse GP regression."""
|
||||
np.random.seed(1234)
|
||||
X = np.random.uniform(-3., 3., (num_samples, 2))
|
||||
Y = np.sin(X[:, 0:1]) * np.sin(X[:, 1:2]) + np.random.randn(num_samples, 1) * 0.05
|
||||
if nan:
|
||||
inan = np.random.binomial(1,.2,size=Y.shape)
|
||||
Y[inan] = np.nan
|
||||
|
||||
# construct kernel
|
||||
rbf = GPy.kern.rbf(2)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,2 @@
|
|||
import latent_function_inference
|
||||
import optimization
|
||||
|
|
@ -26,6 +26,6 @@ etc.
|
|||
from exact_gaussian_inference import ExactGaussianInference
|
||||
from laplace import Laplace
|
||||
expectation_propagation = 'foo' # TODO
|
||||
from varDTC import VarDTC
|
||||
from GPy.inference.latent_function_inference.var_dtc import VarDTC
|
||||
from dtc import DTC
|
||||
from fitc import FITC
|
||||
|
|
|
|||
|
|
@ -216,9 +216,9 @@ class Laplace(object):
|
|||
"""
|
||||
if not log_concave:
|
||||
#print "Under 1e-10: {}".format(np.sum(W < 1e-6))
|
||||
# W[W<1e-6] = 1e-6
|
||||
W[W<1e-6] = 1e-6
|
||||
# NOTE: when setting a parameter inside parameters_changed it will allways come to closed update circles!!!
|
||||
W.__setitem__(W < 1e-6, 1e-6, update=False) # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
|
||||
#W.__setitem__(W < 1e-6, 1e-6, update=False) # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
|
||||
# If the likelihood is non-log-concave. We wan't to say that there is a negative variance
|
||||
# To cause the posterior to become less certain than the prior and likelihood,
|
||||
# This is a property only held by non-log-concave likelihoods
|
||||
|
|
|
|||
|
|
@ -1,216 +0,0 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from posterior import Posterior
|
||||
from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify
|
||||
import numpy as np
|
||||
from ...util.caching import Cacher
|
||||
from ...util.misc import param_to_array
|
||||
log_2_pi = np.log(2*np.pi)
|
||||
|
||||
class VarDTC(object):
|
||||
"""
|
||||
An object for inference when the likelihood is Gaussian, but we want to do sparse inference.
|
||||
|
||||
The function self.inference returns a Posterior object, which summarizes
|
||||
the posterior.
|
||||
|
||||
For efficiency, we sometimes work with the cholesky of Y*Y.T. To save repeatedly recomputing this, we cache it.
|
||||
|
||||
"""
|
||||
def __init__(self):
|
||||
#self._YYTfactor_cache = caching.cache()
|
||||
self.const_jitter = 1e-6
|
||||
self.get_trYYT = Cacher(self._get_trYYT, 1)
|
||||
self.get_YYTfactor = Cacher(self._get_YYTfactor, 1)
|
||||
|
||||
def _get_trYYT(self, Y):
|
||||
return param_to_array(np.sum(np.square(Y)))
|
||||
|
||||
def _get_YYTfactor(self, Y):
|
||||
"""
|
||||
find a matrix L which satisfies LLT = YYT.
|
||||
|
||||
Note that L may have fewer columns than Y.
|
||||
"""
|
||||
N, D = Y.shape
|
||||
if (N>=D):
|
||||
return param_to_array(Y)
|
||||
else:
|
||||
return jitchol(tdot(Y))
|
||||
|
||||
def get_VVTfactor(self, Y, prec):
|
||||
return Y * prec # TODO chache this, and make it effective
|
||||
|
||||
def inference(self, kern, X, X_variance, Z, likelihood, Y):
|
||||
|
||||
num_inducing, _ = Z.shape
|
||||
num_data, output_dim = Y.shape
|
||||
|
||||
#see whether we're using variational uncertain inputs
|
||||
uncertain_inputs = not (X_variance is None)
|
||||
|
||||
#see whether we've got a different noise variance for each datum
|
||||
beta = 1./np.squeeze(likelihood.variance)
|
||||
het_noise = False
|
||||
if beta.size <1:
|
||||
het_noise = True
|
||||
|
||||
# kernel computations, using BGPLVM notation
|
||||
Kmm = kern.K(Z)
|
||||
if uncertain_inputs:
|
||||
psi0 = kern.psi0(Z, X, X_variance)
|
||||
psi1 = kern.psi1(Z, X, X_variance)
|
||||
psi2 = kern.psi2(Z, X, X_variance)
|
||||
else:
|
||||
psi0 = kern.Kdiag(X)
|
||||
psi1 = kern.K(X, Z)
|
||||
|
||||
#factor Kmm # TODO: cache?
|
||||
Lm = jitchol(Kmm)
|
||||
|
||||
# The rather complex computations of A
|
||||
if uncertain_inputs:
|
||||
if het_noise:
|
||||
psi2_beta = (psi2 * (beta.flatten().reshape(num_data, 1, 1))).sum(0)
|
||||
else:
|
||||
psi2_beta = psi2.sum(0) * beta
|
||||
if 0:
|
||||
evals, evecs = linalg.eigh(psi2_beta)
|
||||
clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
|
||||
if not np.array_equal(evals, clipped_evals):
|
||||
pass # print evals
|
||||
tmp = evecs * np.sqrt(clipped_evals)
|
||||
tmp = tmp.T
|
||||
# no backsubstitution because of bound explosion on tr(A) if not...
|
||||
LmInv, _ = dtrtri(Lm, lower=1)
|
||||
A = LmInv.dot(psi2_beta.dot(LmInv.T))
|
||||
#print A.sum()
|
||||
else:
|
||||
if het_noise:
|
||||
tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
|
||||
else:
|
||||
tmp = psi1 * (np.sqrt(beta))
|
||||
tmp, _ = dtrtrs(Lm, tmp.T, lower=1)
|
||||
A = tdot(tmp)
|
||||
|
||||
# factor B
|
||||
B = np.eye(num_inducing) + A
|
||||
self.A = A
|
||||
LB = jitchol(B)
|
||||
|
||||
# VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
|
||||
#self.YYTfactor = self.get_YYTfactor(Y)
|
||||
#VVT_factor = self.get_VVTfactor(self.YYTfactor, beta)
|
||||
VVT_factor = beta*param_to_array(Y)
|
||||
trYYT = self.get_trYYT(Y)
|
||||
|
||||
psi1Vf = np.dot(psi1.T, VVT_factor)
|
||||
|
||||
# back substutue C into psi1Vf
|
||||
tmp, info1 = dtrtrs(Lm, psi1Vf, lower=1, trans=0)
|
||||
_LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0)
|
||||
tmp, info2 = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1)
|
||||
Cpsi1Vf, info3 = dtrtrs(Lm, tmp, lower=1, trans=1)
|
||||
|
||||
|
||||
# Compute dL_dKmm
|
||||
tmp = tdot(_LBi_Lmi_psi1Vf)
|
||||
data_fit = np.trace(tmp)
|
||||
DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + tmp)
|
||||
tmp = -0.5 * DBi_plus_BiPBi
|
||||
tmp += -0.5 * B * output_dim
|
||||
tmp += output_dim * np.eye(num_inducing)
|
||||
dL_dKmm = backsub_both_sides(Lm, tmp)
|
||||
|
||||
# Compute dL_dpsi
|
||||
dL_dpsi0 = -0.5 * output_dim * (beta * np.ones([num_data, 1])).flatten()
|
||||
dL_dpsi1 = np.dot(VVT_factor, Cpsi1Vf.T)
|
||||
dL_dpsi2_beta = 0.5 * backsub_both_sides(Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi)
|
||||
|
||||
if het_noise:
|
||||
if uncertain_inputs:
|
||||
dL_dpsi2 = beta[:, None, None] * dL_dpsi2_beta[None, :, :]
|
||||
else:
|
||||
dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, (psi1 * beta.reshape(num_data, 1)).T).T
|
||||
dL_dpsi2 = None
|
||||
else:
|
||||
dL_dpsi2 = beta * dL_dpsi2_beta
|
||||
if uncertain_inputs:
|
||||
# repeat for each of the N psi_2 matrices
|
||||
dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], num_data, axis=0)
|
||||
else:
|
||||
# subsume back into psi1 (==Kmn)
|
||||
dL_dpsi1 += 2.*np.dot(psi1, dL_dpsi2)
|
||||
dL_dpsi2 = None
|
||||
|
||||
|
||||
# the partial derivative vector for the likelihood
|
||||
if likelihood.size == 0:
|
||||
# save computation here.
|
||||
partial_for_likelihood = None
|
||||
elif het_noise:
|
||||
if uncertain_inputs:
|
||||
raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented"
|
||||
else:
|
||||
LBi = chol_inv(LB)
|
||||
Lmi_psi1, nil = dtrtrs(Lm, psi1.T, lower=1, trans=0)
|
||||
_LBi_Lmi_psi1, _ = dtrtrs(LB, Lmi_psi1, lower=1, trans=0)
|
||||
|
||||
partial_for_likelihood = -0.5 * beta + 0.5 * likelihood.V**2
|
||||
partial_for_likelihood += 0.5 * output_dim * (psi0 - np.sum(Lmi_psi1**2,0))[:,None] * beta**2
|
||||
|
||||
partial_for_likelihood += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*beta**2
|
||||
|
||||
partial_for_likelihood += -np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * likelihood.Y * beta**2
|
||||
partial_for_likelihood += 0.5*np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * beta**2
|
||||
|
||||
else:
|
||||
# likelihood is not heteroscedatic
|
||||
partial_for_likelihood = -0.5 * num_data * output_dim * beta + 0.5 * trYYT * beta ** 2
|
||||
partial_for_likelihood += 0.5 * output_dim * (psi0.sum() * beta ** 2 - np.trace(A) * beta)
|
||||
partial_for_likelihood += beta * (0.5 * np.sum(A * DBi_plus_BiPBi) - data_fit)
|
||||
|
||||
#compute log marginal likelihood
|
||||
if het_noise:
|
||||
lik_1 = -0.5 * num_data * output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(beta)) - 0.5 * np.sum(likelihood.V * likelihood.Y)
|
||||
lik_2 = -0.5 * output_dim * (np.sum(beta * psi0) - np.trace(A))
|
||||
else:
|
||||
lik_1 = -0.5 * num_data * output_dim * (np.log(2.*np.pi) - np.log(beta)) - 0.5 * beta * trYYT
|
||||
lik_2 = -0.5 * output_dim * (np.sum(beta * psi0) - np.trace(A))
|
||||
lik_3 = -output_dim * (np.sum(np.log(np.diag(LB))))
|
||||
lik_4 = 0.5 * data_fit
|
||||
log_marginal = lik_1 + lik_2 + lik_3 + lik_4
|
||||
|
||||
#put the gradients in the right places
|
||||
likelihood.update_gradients(partial_for_likelihood)
|
||||
|
||||
if uncertain_inputs:
|
||||
grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dpsi0':dL_dpsi0, 'dL_dpsi1':dL_dpsi1, 'dL_dpsi2':dL_dpsi2}
|
||||
kern.update_gradients_variational(mu=X, S=X_variance, Z=Z, **grad_dict)
|
||||
else:
|
||||
grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dKdiag':dL_dpsi0, 'dL_dKnm':dL_dpsi1}
|
||||
kern.update_gradients_sparse(X=X, Z=Z, **grad_dict)
|
||||
|
||||
#get sufficient things for posterior prediction
|
||||
#TODO: do we really want to do this in the loop?
|
||||
if VVT_factor.shape[1] == Y.shape[1]:
|
||||
woodbury_vector = Cpsi1Vf # == Cpsi1V
|
||||
else:
|
||||
print 'foobar'
|
||||
psi1V = np.dot(Y.T*beta, psi1).T
|
||||
tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
|
||||
tmp, _ = dpotrs(LB, tmp, lower=1)
|
||||
woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
|
||||
Bi, _ = dpotri(LB, lower=1)
|
||||
symmetrify(Bi)
|
||||
Bi = dpotri(LB, lower=1)[0]
|
||||
woodbury_inv = backsub_both_sides(Lm, np.eye(num_inducing) - Bi)
|
||||
|
||||
|
||||
#construct a posterior object
|
||||
post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
|
||||
|
||||
return post, log_marginal, grad_dict
|
||||
|
||||
|
||||
413
GPy/inference/latent_function_inference/var_dtc.py
Normal file
413
GPy/inference/latent_function_inference/var_dtc.py
Normal file
|
|
@ -0,0 +1,413 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from posterior import Posterior
|
||||
from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify
|
||||
import numpy as np
|
||||
from ...util.misc import param_to_array
|
||||
log_2_pi = np.log(2*np.pi)
|
||||
|
||||
class VarDTC(object):
|
||||
"""
|
||||
An object for inference when the likelihood is Gaussian, but we want to do sparse inference.
|
||||
|
||||
The function self.inference returns a Posterior object, which summarizes
|
||||
the posterior.
|
||||
|
||||
For efficiency, we sometimes work with the cholesky of Y*Y.T. To save repeatedly recomputing this, we cache it.
|
||||
|
||||
"""
|
||||
const_jitter = 1e-6
|
||||
def __init__(self):
|
||||
#self._YYTfactor_cache = caching.cache()
|
||||
from ...util.caching import Cacher
|
||||
self.get_trYYT = Cacher(self._get_trYYT, 1)
|
||||
self.get_YYTfactor = Cacher(self._get_YYTfactor, 1)
|
||||
|
||||
def _get_trYYT(self, Y):
|
||||
return param_to_array(np.sum(np.square(Y)))
|
||||
|
||||
def _get_YYTfactor(self, Y):
|
||||
"""
|
||||
find a matrix L which satisfies LLT = YYT.
|
||||
|
||||
Note that L may have fewer columns than Y.
|
||||
"""
|
||||
N, D = Y.shape
|
||||
if (N>=D):
|
||||
return param_to_array(Y)
|
||||
else:
|
||||
return jitchol(tdot(Y))
|
||||
|
||||
def get_VVTfactor(self, Y, prec):
|
||||
return Y * prec # TODO chache this, and make it effective
|
||||
|
||||
def inference(self, kern, X, X_variance, Z, likelihood, Y):
|
||||
|
||||
#see whether we're using variational uncertain inputs
|
||||
uncertain_inputs = not (X_variance is None)
|
||||
|
||||
_, output_dim = Y.shape
|
||||
|
||||
#see whether we've got a different noise variance for each datum
|
||||
beta = 1./np.squeeze(likelihood.variance)
|
||||
|
||||
# VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
|
||||
#self.YYTfactor = self.get_YYTfactor(Y)
|
||||
#VVT_factor = self.get_VVTfactor(self.YYTfactor, beta)
|
||||
VVT_factor = beta*Y
|
||||
#VVT_factor = beta*Y
|
||||
trYYT = self.get_trYYT(Y)
|
||||
|
||||
# do the inference:
|
||||
dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Cpsi1Vf, \
|
||||
psi1, Lm, LB, log_marginal, Kmm, partial_for_likelihood = _do_inference_on(
|
||||
kern, X, X_variance, Z, likelihood,
|
||||
uncertain_inputs, output_dim,
|
||||
beta, VVT_factor, trYYT)
|
||||
|
||||
likelihood.update_gradients(partial_for_likelihood)
|
||||
|
||||
if uncertain_inputs:
|
||||
grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dpsi0':dL_dpsi0, 'dL_dpsi1':dL_dpsi1, 'dL_dpsi2':dL_dpsi2}
|
||||
kern.update_gradients_variational(mu=X, S=X_variance, Z=Z, **grad_dict)
|
||||
else:
|
||||
grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dKdiag':dL_dpsi0, 'dL_dKnm':dL_dpsi1}
|
||||
kern.update_gradients_sparse(X=X, Z=Z, **grad_dict)
|
||||
|
||||
#get sufficient things for posterior prediction
|
||||
#TODO: do we really want to do this in the loop?
|
||||
if VVT_factor.shape[1] == Y.shape[1]:
|
||||
woodbury_vector = Cpsi1Vf # == Cpsi1V
|
||||
else:
|
||||
print 'foobar'
|
||||
psi1V = np.dot(Y.T*beta, psi1).T
|
||||
tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
|
||||
tmp, _ = dpotrs(LB, tmp, lower=1)
|
||||
woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
|
||||
Bi, _ = dpotri(LB, lower=1)
|
||||
symmetrify(Bi)
|
||||
Bi = -dpotri(LB, lower=1)[0]
|
||||
from ...util import diag
|
||||
diag.add(Bi, 1)
|
||||
|
||||
woodbury_inv = backsub_both_sides(Lm, Bi)
|
||||
|
||||
#construct a posterior object
|
||||
post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
|
||||
|
||||
return post, log_marginal, grad_dict
|
||||
|
||||
class VarDTCMissingData(object):
|
||||
def __init__(self):
|
||||
from ...util.caching import Cacher
|
||||
self._Y = Cacher(self._subarray_computations, 1)
|
||||
pass
|
||||
|
||||
def _subarray_computations(self, Y):
|
||||
inan = np.isnan(Y)
|
||||
has_none = inan.any()
|
||||
if has_none:
|
||||
from ...util.subarray_and_sorting import common_subarrays
|
||||
self._subarray_indices = []
|
||||
for v,ind in common_subarrays(inan, 1).iteritems():
|
||||
if not np.all(v):
|
||||
v = ~np.array(v, dtype=bool)
|
||||
ind = np.array(ind, dtype=int)
|
||||
if ind.size == Y.shape[1]:
|
||||
ind = slice(None)
|
||||
self._subarray_indices.append([v,ind])
|
||||
Ys = [Y[v, :][:, ind] for v, ind in self._subarray_indices]
|
||||
traces = [(y**2).sum() for y in Ys]
|
||||
return Ys, traces
|
||||
else:
|
||||
self._subarray_indices = [[slice(None),slice(None)]]
|
||||
return [Y], [(Y**2).sum()]
|
||||
|
||||
def inference(self, kern, X, X_variance, Z, likelihood, Y):
|
||||
Ys, traces = self._Y(Y)
|
||||
beta_all = 1./likelihood.variance
|
||||
uncertain_inputs = not (X_variance is None)
|
||||
het_noise = beta_all.size != 1
|
||||
|
||||
import itertools
|
||||
num_inducing = Z.shape[0]
|
||||
|
||||
dL_dpsi0_all = np.zeros(X.shape[0])
|
||||
dL_dpsi1_all = np.zeros((X.shape[0], num_inducing))
|
||||
if uncertain_inputs:
|
||||
dL_dpsi2_all = np.zeros((X.shape[0], num_inducing, num_inducing))
|
||||
|
||||
partial_for_likelihood = 0
|
||||
LB_all = Cpsi1Vf_all = 0
|
||||
dL_dKmm = 0
|
||||
log_marginal = 0
|
||||
|
||||
Kmm = kern.K(Z)
|
||||
#factor Kmm
|
||||
Lm = jitchol(Kmm)
|
||||
if uncertain_inputs: LmInv = dtrtri(Lm)
|
||||
|
||||
# kernel computations, using BGPLVM notation
|
||||
psi0_all, psi1_all, psi2_all = _compute_psi(kern, X, X_variance, Z, uncertain_inputs)
|
||||
|
||||
VVT_factor_all = np.empty(Y.shape)
|
||||
full_VVT_factor = VVT_factor_all.shape[1] == Y.shape[1]
|
||||
|
||||
for y, trYYT, [v, ind] in itertools.izip(Ys, traces, self._subarray_indices):
|
||||
if het_noise: beta = beta_all[ind]
|
||||
else: beta = beta_all[0]
|
||||
|
||||
VVT_factor = (beta*y)
|
||||
VVT_factor_all[v, ind].flat = VVT_factor.flat
|
||||
output_dim = y.shape[1]
|
||||
|
||||
psi0 = psi0_all[v]
|
||||
psi1 = psi1_all[v, :]
|
||||
if uncertain_inputs: psi2 = psi2_all[v, :]
|
||||
else: psi2 = None
|
||||
num_data = psi1.shape[0]
|
||||
|
||||
if uncertain_inputs:
|
||||
if het_noise: psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0)
|
||||
else: psi2_beta = psi2.sum(0) * beta
|
||||
A = LmInv.dot(psi2_beta.dot(LmInv.T))
|
||||
else:
|
||||
if het_noise: tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
|
||||
else: tmp = psi1 * (np.sqrt(beta))
|
||||
tmp, _ = dtrtrs(Lm, tmp.T, lower=1)
|
||||
A = tdot(tmp) #print A.sum()
|
||||
|
||||
# factor B
|
||||
B = np.eye(num_inducing) + A
|
||||
LB = jitchol(B)
|
||||
|
||||
psi1Vf = psi1.T.dot(VVT_factor)
|
||||
_LBi_Lmi_psi1Vf, Cpsi1Vf = _compute_psi1Vf(Lm, LB, psi1Vf)
|
||||
|
||||
if full_VVT_factor: Cpsi1Vf_all += Cpsi1Vf
|
||||
LB_all += LB
|
||||
|
||||
# data fit and derivative of L w.r.t. Kmm
|
||||
delit = tdot(_LBi_Lmi_psi1Vf)
|
||||
data_fit = np.trace(delit)
|
||||
DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit)
|
||||
delit = -0.5 * DBi_plus_BiPBi
|
||||
delit += -0.5 * B * output_dim
|
||||
delit += output_dim * np.eye(num_inducing)
|
||||
dL_dKmm += backsub_both_sides(Lm, delit)
|
||||
|
||||
# derivatives of L w.r.t. psi
|
||||
dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm,
|
||||
VVT_factor, Cpsi1Vf, DBi_plus_BiPBi,
|
||||
psi1, het_noise, uncertain_inputs)
|
||||
|
||||
#import ipdb;ipdb.set_trace()
|
||||
dL_dpsi0_all[v] += dL_dpsi0
|
||||
dL_dpsi1_all[v, :] += dL_dpsi1
|
||||
if uncertain_inputs:
|
||||
dL_dpsi2_all[v, :] += dL_dpsi2
|
||||
|
||||
# log marginal likelihood
|
||||
log_marginal += _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise,
|
||||
psi0, A, LB, trYYT, data_fit)
|
||||
|
||||
#put the gradients in the right places
|
||||
partial_for_likelihood += _compute_partial_for_likelihood(likelihood,
|
||||
het_noise, uncertain_inputs, LB,
|
||||
_LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
|
||||
psi0, psi1, beta,
|
||||
data_fit, num_data, output_dim, trYYT)
|
||||
|
||||
# gradients:
|
||||
likelihood.update_gradients(partial_for_likelihood)
|
||||
|
||||
if uncertain_inputs:
|
||||
grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dpsi0':dL_dpsi0_all, 'dL_dpsi1':dL_dpsi1_all, 'dL_dpsi2':dL_dpsi2_all}
|
||||
kern.update_gradients_variational(mu=X, S=X_variance, Z=Z, **grad_dict)
|
||||
else:
|
||||
grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dKdiag':dL_dpsi0_all, 'dL_dKnm':dL_dpsi1_all}
|
||||
kern.update_gradients_sparse(X=X, Z=Z, **grad_dict)
|
||||
|
||||
#get sufficient things for posterior prediction
|
||||
#TODO: do we really want to do this in the loop?
|
||||
if full_VVT_factor:
|
||||
woodbury_vector = Cpsi1Vf_all # == Cpsi1V
|
||||
else:
|
||||
print 'foobar'
|
||||
psi1V = np.dot(Y.T*beta_all, psi1_all).T
|
||||
tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
|
||||
tmp, _ = dpotrs(LB_all, tmp, lower=1)
|
||||
woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
|
||||
|
||||
Bi, _ = dpotri(LB_all, lower=1)
|
||||
symmetrify(Bi)
|
||||
Bi = -dpotri(LB_all, lower=1)[0]
|
||||
from ...util import diag
|
||||
diag.add(Bi, 1)
|
||||
|
||||
woodbury_inv = backsub_both_sides(Lm, Bi)
|
||||
post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
|
||||
|
||||
return post, log_marginal, grad_dict
|
||||
|
||||
|
||||
def _compute_A(num_data, uncertain_inputs, beta, het_noise, psi1, psi2, Lm):
|
||||
# The rather complex computations of A
|
||||
if uncertain_inputs:
|
||||
if het_noise:
|
||||
psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0)
|
||||
else:
|
||||
psi2_beta = psi2.sum(0) * beta
|
||||
#if 0:
|
||||
# evals, evecs = linalg.eigh(psi2_beta)
|
||||
# clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
|
||||
# if not np.array_equal(evals, clipped_evals):
|
||||
# pass # print evals
|
||||
# tmp = evecs * np.sqrt(clipped_evals)
|
||||
# tmp = tmp.T
|
||||
# no backsubstitution because of bound explosion on tr(A) if not...
|
||||
LmInv = dtrtri(Lm)
|
||||
A = LmInv.dot(psi2_beta.dot(LmInv.T))
|
||||
else:
|
||||
if het_noise:
|
||||
tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
|
||||
else:
|
||||
tmp = psi1 * (np.sqrt(beta))
|
||||
tmp, _ = dtrtrs(Lm, tmp.T, lower=1)
|
||||
A = tdot(tmp) #print A.sum()
|
||||
return A
|
||||
|
||||
|
||||
def _compute_psi(kern, X, X_variance, Z, uncertain_inputs):
|
||||
if uncertain_inputs:
|
||||
psi0 = kern.psi0(Z, X, X_variance)
|
||||
psi1 = kern.psi1(Z, X, X_variance)
|
||||
psi2 = kern.psi2(Z, X, X_variance)
|
||||
else:
|
||||
psi0 = kern.Kdiag(X)
|
||||
psi1 = kern.K(X, Z)
|
||||
psi2 = None
|
||||
return psi0, psi1, psi2
|
||||
|
||||
def _compute_Kmm(kern, X, X_variance, Z, uncertain_inputs):
|
||||
Kmm = kern.K(Z)
|
||||
psi0, psi1, psi2 = _compute_psi(kern, X, X_variance, Z, uncertain_inputs)
|
||||
return Kmm, psi0, psi1, psi2
|
||||
|
||||
def _compute_dL_dKmm(num_inducing, output_dim, Lm, B, LB, _LBi_Lmi_psi1Vf):
|
||||
# Compute dL_dKmm
|
||||
delit = tdot(_LBi_Lmi_psi1Vf)
|
||||
data_fit = np.trace(delit)
|
||||
DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit)
|
||||
delit = -0.5 * DBi_plus_BiPBi
|
||||
delit += -0.5 * B * output_dim
|
||||
delit += output_dim * np.eye(num_inducing)
|
||||
dL_dKmm = backsub_both_sides(Lm, delit)
|
||||
return DBi_plus_BiPBi, data_fit, dL_dKmm
|
||||
|
||||
def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, psi1, het_noise, uncertain_inputs):
|
||||
dL_dpsi0 = -0.5 * output_dim * (beta * np.ones([num_data, 1])).flatten()
|
||||
dL_dpsi1 = np.dot(VVT_factor, Cpsi1Vf.T)
|
||||
dL_dpsi2_beta = 0.5 * backsub_both_sides(Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi)
|
||||
if het_noise:
|
||||
if uncertain_inputs:
|
||||
dL_dpsi2 = beta[:, None, None] * dL_dpsi2_beta[None, :, :]
|
||||
else:
|
||||
dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, (psi1 * beta.reshape(num_data, 1)).T).T
|
||||
dL_dpsi2 = None
|
||||
else:
|
||||
dL_dpsi2 = beta * dL_dpsi2_beta
|
||||
if uncertain_inputs:
|
||||
# repeat for each of the N psi_2 matrices
|
||||
dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], num_data, axis=0)
|
||||
else:
|
||||
# subsume back into psi1 (==Kmn)
|
||||
dL_dpsi1 += 2.*np.dot(psi1, dL_dpsi2)
|
||||
dL_dpsi2 = None
|
||||
|
||||
return dL_dpsi0, dL_dpsi1, dL_dpsi2
|
||||
|
||||
|
||||
def _compute_psi1Vf(Lm, LB, psi1Vf):
|
||||
# back substutue C into psi1Vf
|
||||
tmp, _ = dtrtrs(Lm, psi1Vf, lower=1, trans=0)
|
||||
_LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0)
|
||||
tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1)
|
||||
Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
|
||||
return _LBi_Lmi_psi1Vf, Cpsi1Vf
|
||||
|
||||
|
||||
def _compute_partial_for_likelihood(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, psi0, psi1, beta, data_fit, num_data, output_dim, trYYT):
|
||||
# the partial derivative vector for the likelihood
|
||||
if likelihood.size == 0:
|
||||
# save computation here.
|
||||
partial_for_likelihood = None
|
||||
elif het_noise:
|
||||
if uncertain_inputs:
|
||||
raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented"
|
||||
else:
|
||||
from ...util.linalg import chol_inv
|
||||
LBi = chol_inv(LB)
|
||||
Lmi_psi1, nil = dtrtrs(Lm, psi1.T, lower=1, trans=0)
|
||||
_LBi_Lmi_psi1, _ = dtrtrs(LB, Lmi_psi1, lower=1, trans=0)
|
||||
|
||||
partial_for_likelihood = -0.5 * beta + 0.5 * likelihood.V**2
|
||||
partial_for_likelihood += 0.5 * output_dim * (psi0 - np.sum(Lmi_psi1**2,0))[:,None] * beta**2
|
||||
|
||||
partial_for_likelihood += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*beta**2
|
||||
|
||||
partial_for_likelihood += -np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * likelihood.Y * beta**2
|
||||
partial_for_likelihood += 0.5*np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * beta**2
|
||||
|
||||
else:
|
||||
# likelihood is not heteroscedatic
|
||||
partial_for_likelihood = -0.5 * num_data * output_dim * beta + 0.5 * trYYT * beta ** 2
|
||||
partial_for_likelihood += 0.5 * output_dim * (psi0.sum() * beta ** 2 - np.trace(A) * beta)
|
||||
partial_for_likelihood += beta * (0.5 * np.sum(A * DBi_plus_BiPBi) - data_fit)
|
||||
return partial_for_likelihood
|
||||
|
||||
def _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, psi0, A, LB, trYYT, data_fit):
|
||||
#compute log marginal likelihood
|
||||
if het_noise:
|
||||
lik_1 = -0.5 * num_data * output_dim * np.log(2. * np.pi) + 0.5 * np.sum(np.log(beta)) - 0.5 * np.sum(likelihood.V * likelihood.Y)
|
||||
lik_2 = -0.5 * output_dim * (np.sum(beta * psi0) - np.trace(A))
|
||||
else:
|
||||
lik_1 = -0.5 * num_data * output_dim * (np.log(2. * np.pi) - np.log(beta)) - 0.5 * beta * trYYT
|
||||
lik_2 = -0.5 * output_dim * (np.sum(beta * psi0) - np.trace(A))
|
||||
lik_3 = -output_dim * (np.sum(np.log(np.diag(LB))))
|
||||
lik_4 = 0.5 * data_fit
|
||||
log_marginal = lik_1 + lik_2 + lik_3 + lik_4
|
||||
return log_marginal
|
||||
|
||||
def _do_inference_on(kern, X, X_variance, Z, likelihood, uncertain_inputs, output_dim, beta, VVT_factor, trYYT):
|
||||
het_noise = beta.size < 1
|
||||
num_inducing = Z.shape[0]
|
||||
num_data = X.shape[0]
|
||||
# kernel computations, using BGPLVM notation
|
||||
Kmm, psi0, psi1, psi2 = _compute_Kmm(kern, X, X_variance, Z, uncertain_inputs)
|
||||
#factor Kmm # TODO: cache?
|
||||
Lm = jitchol(Kmm)
|
||||
A = _compute_A(num_data, uncertain_inputs, beta, het_noise, psi1, psi2, Lm)
|
||||
# factor B
|
||||
B = np.eye(num_inducing) + A
|
||||
LB = jitchol(B)
|
||||
psi1Vf = np.dot(psi1.T, VVT_factor)
|
||||
_LBi_Lmi_psi1Vf, Cpsi1Vf = _compute_psi1Vf(Lm, LB, psi1Vf)
|
||||
# data fit and derivative of L w.r.t. Kmm
|
||||
DBi_plus_BiPBi, data_fit, dL_dKmm = _compute_dL_dKmm(num_inducing, output_dim,
|
||||
Lm, B, LB, _LBi_Lmi_psi1Vf)
|
||||
# derivatives of L w.r.t. psi
|
||||
dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm,
|
||||
VVT_factor, Cpsi1Vf, DBi_plus_BiPBi,
|
||||
psi1, het_noise, uncertain_inputs)
|
||||
# log marginal likelihood
|
||||
log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise,
|
||||
psi0, A, LB, trYYT, data_fit)
|
||||
#put the gradients in the right places
|
||||
partial_for_likelihood = _compute_partial_for_likelihood(likelihood,
|
||||
het_noise, uncertain_inputs, LB,
|
||||
_LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
|
||||
psi0, psi1, beta,
|
||||
data_fit, num_data, output_dim, trYYT)
|
||||
return dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Cpsi1Vf, psi1, Lm, LB, log_marginal, Kmm, partial_for_likelihood
|
||||
|
|
@ -136,7 +136,7 @@ def poly(input_dim,variance=1., weight_variance=None,bias_variance=1.,degree=2,
|
|||
part = parts.poly.POLY(input_dim,variance,weight_variance,bias_variance,degree,ARD)
|
||||
return kern(input_dim, [part])
|
||||
|
||||
def white(input_dim,variance=1.):
|
||||
def white(input_dim,variance=1.,name='white'):
|
||||
"""
|
||||
Construct a white kernel.
|
||||
|
||||
|
|
@ -146,7 +146,7 @@ def white(input_dim,variance=1.):
|
|||
:type variance: float
|
||||
|
||||
"""
|
||||
part = parts.white.White(input_dim,variance)
|
||||
part = parts.white.White(input_dim,variance,name=name)
|
||||
return kern(input_dim, [part])
|
||||
|
||||
def eq_ode1(output_dim, W=None, rank=1, kappa=None, length_scale=1., decay=None, delay=None):
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ class Linear(Kernpart):
|
|||
self._K_computations(X, None)
|
||||
|
||||
def update_gradients_full(self, dL_dK, X):
|
||||
#self.variances.gradient[:] = 0
|
||||
self.variances.gradient[:] = 0
|
||||
self._param_grad_helper(dL_dK, X, None, self.variances.gradient)
|
||||
|
||||
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
||||
|
|
|
|||
|
|
@ -15,8 +15,8 @@ class White(Kernpart):
|
|||
:param variance:
|
||||
:type variance: float
|
||||
"""
|
||||
def __init__(self,input_dim,variance=1.):
|
||||
super(White, self).__init__(input_dim, 'white')
|
||||
def __init__(self,input_dim,variance=1., name='white'):
|
||||
super(White, self).__init__(input_dim, name)
|
||||
self.input_dim = input_dim
|
||||
self.variance = Param('variance', variance, Logexp())
|
||||
self.add_parameters(self.variance)
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ class GPRegression(GP):
|
|||
likelihood = likelihoods.Gaussian()
|
||||
|
||||
super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression')
|
||||
self.parameters_changed()
|
||||
|
||||
def _getstate(self):
|
||||
return GP._getstate(self)
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ class GPLVM(GP):
|
|||
PC = PCA(Y, input_dim)[0]
|
||||
Xr[:PC.shape[0], :PC.shape[1]] = PC
|
||||
else:
|
||||
raise NotImplementedError
|
||||
pass
|
||||
return Xr
|
||||
|
||||
def parameters_changed(self):
|
||||
|
|
|
|||
|
|
@ -132,10 +132,10 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
|||
#predict on the frame and plot
|
||||
if plot_raw:
|
||||
m, _ = model._raw_predict(Xgrid, which_parts=which_parts)
|
||||
Y = model.likelihood.Y
|
||||
Y = model.Y
|
||||
else:
|
||||
m, _, _, _ = model.predict(Xgrid, which_parts=which_parts)
|
||||
Y = model.likelihood.data
|
||||
Y = model.data
|
||||
for d in which_data_ycols:
|
||||
m_d = m[:,d].reshape(resolution, resolution).T
|
||||
ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
|
||||
|
|
|
|||
|
|
@ -30,7 +30,14 @@ class Cacher(object):
|
|||
|
||||
def on_cache_changed(self, X):
|
||||
#print id(X)
|
||||
i = self.cached_inputs.index(X)
|
||||
Xbase = X
|
||||
while Xbase is not None:
|
||||
try:
|
||||
i = self.cached_inputs.index(X)
|
||||
break
|
||||
except ValueError:
|
||||
Xbase = X.base
|
||||
continue
|
||||
self.inputs_changed[i] = True
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -101,17 +101,17 @@ def jitchol(A, maxtries=5):
|
|||
|
||||
|
||||
|
||||
def dtrtri(L, lower=1):
|
||||
"""
|
||||
Wrapper for lapack dtrtri function
|
||||
Inverse of L
|
||||
|
||||
:param L: Triangular Matrix L
|
||||
:param lower: is matrix lower (true) or upper (false)
|
||||
:returns: Li, info
|
||||
"""
|
||||
L = force_F_ordered(L)
|
||||
return lapack.dtrtri(L, lower=lower)
|
||||
# def dtrtri(L, lower=1):
|
||||
# """
|
||||
# Wrapper for lapack dtrtri function
|
||||
# Inverse of L
|
||||
#
|
||||
# :param L: Triangular Matrix L
|
||||
# :param lower: is matrix lower (true) or upper (false)
|
||||
# :returns: Li, info
|
||||
# """
|
||||
# L = force_F_ordered(L)
|
||||
# return lapack.dtrtri(L, lower=lower)
|
||||
|
||||
def dtrtrs(A, B, lower=1, trans=0, unitdiag=0):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ def common_subarrays(X, axis=0):
|
|||
|
||||
:param :class:`np.ndarray` X: 2d array to check for common subarrays in
|
||||
:param int axis: axis to apply subarray detection over.
|
||||
When the index is 0, compare rows, columns, otherwise.
|
||||
When the index is 0, compare rows -- columns, otherwise.
|
||||
|
||||
Examples:
|
||||
=========
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue