Merge branch 'devel' of github.com:SheffieldML/GPy into devel

This commit is contained in:
Nicolo Fusi 2013-05-20 19:22:18 +01:00
commit b59253fe01
12 changed files with 291 additions and 121 deletions

View file

@ -66,7 +66,7 @@ class model(parameterised):
# check constraints are okay # check constraints are okay
if isinstance(what, (priors.gamma, priors.log_Gaussian)): if isinstance(what, (priors.gamma, priors.inverse_gamma, priors.log_Gaussian)):
constrained_positive_indices = [i for i, t in zip(self.constrained_indices, self.constraints) if t.domain == 'positive'] constrained_positive_indices = [i for i, t in zip(self.constrained_indices, self.constraints) if t.domain == 'positive']
if len(constrained_positive_indices): if len(constrained_positive_indices):
constrained_positive_indices = np.hstack(constrained_positive_indices) constrained_positive_indices = np.hstack(constrained_positive_indices)

View file

@ -26,7 +26,6 @@ class Gaussian(prior):
:param mu: mean :param mu: mean
:param sigma: standard deviation :param sigma: standard deviation
.. Note:: Bishop 2006 notation is used throughout the code .. Note:: Bishop 2006 notation is used throughout the code
""" """
@ -144,7 +143,6 @@ def gamma_from_EV(E,V):
b = E/V b = E/V
return gamma(a,b) return gamma(a,b)
class gamma(prior): class gamma(prior):
""" """
Implementation of the Gamma probability function, coupled with random variables. Implementation of the Gamma probability function, coupled with random variables.
@ -155,7 +153,6 @@ class gamma(prior):
.. Note:: Bishop 2006 notation is used throughout the code .. Note:: Bishop 2006 notation is used throughout the code
""" """
def __init__(self,a,b): def __init__(self,a,b):
self.a = float(a) self.a = float(a)
self.b = float(b) self.b = float(b)
@ -183,3 +180,30 @@ class gamma(prior):
def rvs(self,n): def rvs(self,n):
return np.random.gamma(scale=1./self.b,shape=self.a,size=n) return np.random.gamma(scale=1./self.b,shape=self.a,size=n)
class inverse_gamma(prior):
"""
Implementation of the inverse-Gamma probability function, coupled with random variables.
:param a: shape parameter
:param b: rate parameter (warning: it's the *inverse* of the scale)
.. Note:: Bishop 2006 notation is used throughout the code
"""
def __init__(self,a,b):
self.a = float(a)
self.b = float(b)
self.constant = -gammaln(self.a) + a*np.log(b)
def __str__(self):
return "iGa("+str(np.round(self.a))+', '+str(np.round(self.b))+')'
def lnpdf(self,x):
return self.constant - (self.a+1)*np.log(x) - self.b/x
def lnpdf_grad(self,x):
return -(self.a+1.)/x + self.b/x**2
def rvs(self,n):
return 1./np.random.gamma(scale=1./self.b,shape=self.a,size=n)

View file

@ -39,8 +39,8 @@ class logexp(transformation):
return '(+ve)' return '(+ve)'
class logexp_clipped(transformation): class logexp_clipped(transformation):
max_bound = 1e300 max_bound = 1e250
min_bound = 1e-10 min_bound = 1e-9
log_max_bound = np.log(max_bound) log_max_bound = np.log(max_bound)
log_min_bound = np.log(min_bound) log_min_bound = np.log(min_bound)
def __init__(self, lower=1e-6): def __init__(self, lower=1e-6):
@ -49,11 +49,13 @@ class logexp_clipped(transformation):
def f(self, x): def f(self, x):
exp = np.exp(np.clip(x, self.log_min_bound, self.log_max_bound)) exp = np.exp(np.clip(x, self.log_min_bound, self.log_max_bound))
f = np.log(1. + exp) f = np.log(1. + exp)
if np.isnan(f).any():
import ipdb;ipdb.set_trace()
return f return f
def finv(self, f): def finv(self, f):
return np.log(np.exp(np.clip(f, self.min_bound, self.max_bound)) - 1.) return np.log(np.exp(np.clip(f, self.min_bound, self.max_bound)) - 1.)
def gradfactor(self, f): def gradfactor(self, f):
ef = np.exp(f) ef = np.exp(f) # np.clip(f, self.min_bound, self.max_bound))
gf = (ef - 1.) / ef gf = (ef - 1.) / ef
return np.where(f < self.lower, 0, gf) return np.where(f < self.lower, 0, gf)
def initialize(self, f): def initialize(self, f):

View file

@ -273,8 +273,8 @@ def bgplvm_simulation(optimize='scg',
pylab.figure(); pylab.axis(); m.kern.plot_ARD() pylab.figure(); pylab.axis(); m.kern.plot_ARD()
return m return m
def mrd_simulation(plot_sim=False): def mrd_simulation(optimize=True, plot_sim=False):
D1, D2, D3, N, M, Q = 150, 250, 300, 700, 3, 7 D1, D2, D3, N, M, Q = 150, 250, 30, 300, 3, 7
slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim) slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)
from GPy.models import mrd from GPy.models import mrd
@ -292,6 +292,13 @@ def mrd_simulation(plot_sim=False):
m.constrain('variance|noise', logexp_clipped()) m.constrain('variance|noise', logexp_clipped())
m.ensure_default_constraints() m.ensure_default_constraints()
# DEBUG
np.seterr("raise")
if optimize:
print "Optimizing Model:"
m.optimize('scg', messages=1, max_iters=3e3)
return m return m
def brendan_faces(): def brendan_faces():
@ -306,9 +313,7 @@ def brendan_faces():
m = GPy.models.GPLVM(Yn, Q)#, M=Y.shape[0]/4) m = GPy.models.GPLVM(Yn, Q)#, M=Y.shape[0]/4)
# optimize # optimize
# m.constrain_fixed('white', 1e-2) m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped())
# m.constrain_bounded('noise', 1e-6, 10)
m.constrain('rbf', GPy.core.transformations.logexp_clipped())
m.ensure_default_constraints() m.ensure_default_constraints()
m.optimize('scg', messages=1, max_f_eval=10000) m.optimize('scg', messages=1, max_f_eval=10000)

View file

@ -39,8 +39,10 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
function_eval number of fn evaluations function_eval number of fn evaluations
status: string describing convergence status status: string describing convergence status
""" """
print " SCG"
print ' {0:{mi}s} {1:11s} {2:11s} {3:11s}'.format("I", "F", "Scale", "|g|", mi=len(str(maxiters))) if display:
print " SCG"
print ' {0:{mi}s} {1:11s} {2:11s} {3:11s}'.format("I", "F", "Scale", "|g|", mi=len(str(maxiters)))
if xtol is None: if xtol is None:
xtol = 1e-6 xtol = 1e-6
@ -85,8 +87,6 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
# Increase effective curvature and evaluate step size alpha. # Increase effective curvature and evaluate step size alpha.
delta = theta + beta * kappa delta = theta + beta * kappa
if delta <= 0: if delta <= 0:
if display:
print ""
delta = beta * kappa delta = beta * kappa
beta = beta - theta / kappa beta = beta - theta / kappa

View file

@ -13,24 +13,30 @@ from numpy.linalg.linalg import LinAlgError
import itertools import itertools
from matplotlib.colors import colorConverter from matplotlib.colors import colorConverter
from matplotlib.figure import SubplotParams from matplotlib.figure import SubplotParams
from GPy.inference.optimization import SCG
class Bayesian_GPLVM(sparse_GP, GPLVM): class Bayesian_GPLVM(sparse_GP, GPLVM):
""" """
Bayesian Gaussian Process Latent Variable Model Bayesian Gaussian Process Latent Variable Model
:param Y: observed data :param Y: observed data (np.ndarray) or GPy.likelihood
:type Y: np.ndarray :type Y: np.ndarray| GPy.likelihood instance
:param Q: latent dimensionality :param Q: latent dimensionality
:type Q: int :type Q: int
:param init: initialisation method for the latent space :param init: initialisation method for the latent space
:type init: 'PCA'|'random' :type init: 'PCA'|'random'
""" """
def __init__(self, Y, Q, X=None, X_variance=None, init='PCA', M=10, def __init__(self, likelihood_or_Y, Q, X=None, X_variance=None, init='PCA', M=10,
Z=None, kernel=None, oldpsave=10, _debug=False, Z=None, kernel=None, oldpsave=10, _debug=False,
**kwargs): **kwargs):
if type(likelihood_or_Y) is np.ndarray:
likelihood = Gaussian(likelihood_or_Y)
else:
likelihood = likelihood_or_Y
if X == None: if X == None:
X = self.initialise_latent(init, Q, Y) X = self.initialise_latent(init, Q, likelihood.Y)
if X_variance is None: if X_variance is None:
X_variance = np.clip((np.ones_like(X) * 0.5) + .01 * np.random.randn(*X.shape), 0.001, 1) X_variance = np.clip((np.ones_like(X) * 0.5) + .01 * np.random.randn(*X.shape), 0.001, 1)
@ -56,7 +62,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
self._savedpsiKmm = [] self._savedpsiKmm = []
self._savedABCD = [] self._savedABCD = []
sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_variance=X_variance, **kwargs) sparse_GP.__init__(self, X, likelihood, kernel, Z=Z, X_variance=X_variance, **kwargs)
@property @property
def oldps(self): def oldps(self):
@ -171,9 +177,6 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
self.dbound_dZtheta = sparse_GP._log_likelihood_gradients(self) self.dbound_dZtheta = sparse_GP._log_likelihood_gradients(self)
return np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta)) return np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta))
def _log_likelihood_normal_gradients(self):
Si, _, _, _ = pdinv(self.X_variance)
def plot_latent(self, which_indices=None, *args, **kwargs): def plot_latent(self, which_indices=None, *args, **kwargs):
if which_indices is None: if which_indices is None:
@ -187,16 +190,46 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w') ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w')
return ax return ax
def do_test_latents(self, Y):
"""
Compute the latent representation for a set of new points Y
Notes:
This will only work with a univariate Gaussian likelihood (for now)
"""
assert not self.likelihood.is_heteroscedastic
N_test = Y.shape[0]
Q = self.Z.shape[1]
means = np.zeros((N_test,Q))
covars = np.zeros((N_test,Q))
dpsi0 = - 0.5 * self.D * self.likelihood.precision
dpsi2 = self.dL_dpsi2[0][None,:,:] # TODO: this may change if we ignore het. likelihoods
V = self.likelihood.precision*Y
dpsi1 = np.dot(self.Cpsi1V,V.T)
start = np.zeros(self.Q*2)
for n,dpsi1_n in enumerate(dpsi1.T[:,:,None]):
args = (self.kern,self.Z,dpsi0,dpsi1_n,dpsi2)
xopt,fopt,neval,status = SCG(f=latent_cost, gradf=latent_grad, x=start, optargs=args, display = False)
mu,log_S = xopt.reshape(2,1,-1)
means[n] = mu[0].copy()
covars[n] = np.exp(log_S[0]).copy()
return means, covars
def plot_X_1d(self, fig=None, axes=None, fig_num="LVM mu S 1d", colors=None): def plot_X_1d(self, fig=None, axes=None, fig_num="LVM mu S 1d", colors=None):
""" """
Plot latent space X in 1D: Plot latent space X in 1D:
-if fig is given, create Q subplots in fig and plot in these -if fig is given, create Q subplots in fig and plot in these
-if axes is given plot Q 1D latent space plots of X into each `axis` -if axes is given plot Q 1D latent space plots of X into each `axis`
-if neither fig nor axes is given create a figure with fig_num and plot in there -if neither fig nor axes is given create a figure with fig_num and plot in there
colors: colors:
colors of different latent space dimensions Q colors of different latent space dimensions Q
""" """
import pylab import pylab
@ -486,3 +519,63 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
cidd = figs[0].canvas.mpl_connect('motion_notify_event', motion) cidd = figs[0].canvas.mpl_connect('motion_notify_event', motion)
return ax1, ax2, ax3, ax4, ax5 # , ax6, ax7 return ax1, ax2, ax3, ax4, ax5 # , ax6, ax7
def latent_cost_and_grad(mu_S, kern,Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
"""
objective function for fitting the latent variables for test points
(negative log-likelihood: should be minimised!)
"""
mu,log_S = mu_S.reshape(2,1,-1)
S = np.exp(log_S)
psi0 = kern.psi0(Z,mu,S)
psi1 = kern.psi1(Z,mu,S)
psi2 = kern.psi2(Z,mu,S)
lik = dL_dpsi0*psi0 + np.dot(dL_dpsi1.flatten(),psi1.flatten()) + np.dot(dL_dpsi2.flatten(),psi2.flatten()) - 0.5*np.sum(np.square(mu) + S) + 0.5*np.sum(log_S)
mu0, S0 = kern.dpsi0_dmuS(dL_dpsi0,Z,mu,S)
mu1, S1 = kern.dpsi1_dmuS(dL_dpsi1,Z,mu,S)
mu2, S2 = kern.dpsi2_dmuS(dL_dpsi2,Z,mu,S)
dmu = mu0 + mu1 + mu2 - mu
#dS = S0 + S1 + S2 -0.5 + .5/S
dlnS = S*(S0 + S1 + S2 -0.5) + .5
return -lik,-np.hstack((dmu.flatten(),dlnS.flatten()))
def latent_cost(mu_S, kern,Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
"""
objective function for fitting the latent variables (negative log-likelihood: should be minimised!)
This is the same as latent_cost_and_grad but only for the objective
"""
mu,log_S = mu_S.reshape(2,1,-1)
S = np.exp(log_S)
psi0 = kern.psi0(Z,mu,S)
psi1 = kern.psi1(Z,mu,S)
psi2 = kern.psi2(Z,mu,S)
lik = dL_dpsi0*psi0 + np.dot(dL_dpsi1.flatten(),psi1.flatten()) + np.dot(dL_dpsi2.flatten(),psi2.flatten()) - 0.5*np.sum(np.square(mu) + S) + 0.5*np.sum(log_S)
return -float(lik)
def latent_grad(mu_S, kern,Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
"""
This is the same as latent_cost_and_grad but only for the grad
"""
mu,log_S = mu_S.reshape(2,1,-1)
S = np.exp(log_S)
mu0, S0 = kern.dpsi0_dmuS(dL_dpsi0,Z,mu,S)
mu1, S1 = kern.dpsi1_dmuS(dL_dpsi1,Z,mu,S)
mu2, S2 = kern.dpsi2_dmuS(dL_dpsi2,Z,mu,S)
dmu = mu0 + mu1 + mu2 - mu
#dS = S0 + S1 + S2 -0.5 + .5/S
dlnS = S*(S0 + S1 + S2 -0.5) + .5
return -np.hstack((dmu.flatten(),dlnS.flatten()))

View file

@ -15,11 +15,11 @@ import pylab
class MRD(model): class MRD(model):
""" """
Do MRD on given Datasets in Ylist. Do MRD on given Datasets in Ylist.
All Ys in Ylist are in [N x Dn], where Dn can be different per Yn, All Ys in likelihood_list are in [N x Dn], where Dn can be different per Yn,
N must be shared across datasets though. N must be shared across datasets though.
:param Ylist...: observed datasets :param likelihood_list...: likelihoods of observed datasets
:type Ylist: [np.ndarray] :type likelihood_list: [GPy.likelihood]
:param names: names for different gplvm models :param names: names for different gplvm models
:type names: [str] :type names: [str]
:param Q: latent dimensionality (will raise :param Q: latent dimensionality (will raise
@ -41,8 +41,9 @@ class MRD(model):
:param kernel: :param kernel:
kernel to use kernel to use
""" """
#TODO allow different kernels for different outputs
def __init__(self, *Ylist, **kwargs): #def __init__(self, *Ylist, **kwargs):
def __init__(self, *likelihood_list, **kwargs):
if kwargs.has_key("_debug"): if kwargs.has_key("_debug"):
self._debug = kwargs['_debug'] self._debug = kwargs['_debug']
del kwargs['_debug'] del kwargs['_debug']
@ -52,7 +53,7 @@ class MRD(model):
self.names = kwargs['names'] self.names = kwargs['names']
del kwargs['names'] del kwargs['names']
else: else:
self.names = ["{}".format(i + 1) for i in range(len(Ylist))] self.names = ["{}".format(i + 1) for i in range(len(likelihood_list))]
if kwargs.has_key('kernel'): if kwargs.has_key('kernel'):
kernel = kwargs['kernel'] kernel = kwargs['kernel']
k = lambda: kernel.copy() k = lambda: kernel.copy()
@ -80,9 +81,10 @@ class MRD(model):
self.M = 10 self.M = 10
self._init = True self._init = True
X = self._init_X(initx, Ylist) X = self._init_X(initx, likelihood_list)
Z = self._init_Z(initz, X) Z = self._init_Z(initz, X)
self.bgplvms = [Bayesian_GPLVM(Y, kernel=k(), X=X, Z=Z, M=self.M, **kwargs) for Y in Ylist] self.bgplvms = [Bayesian_GPLVM(Y, kernel=k(), X=X, Z=Z, M=self.M, **kwargs) for Y in likelihood_list]
del self._init del self._init
self.gref = self.bgplvms[0] self.gref = self.bgplvms[0]
@ -126,11 +128,11 @@ class MRD(model):
if not self._init: if not self._init:
raise AttributeError("bgplvm list not initialized") raise AttributeError("bgplvm list not initialized")
@property @property
def Ylist(self): def likelihood_list(self):
return [g.likelihood.Y for g in self.bgplvms] return [g.likelihood.Y for g in self.bgplvms]
@Ylist.setter @likelihood_list.setter
def Ylist(self, Ylist): def likelihood_list(self, likelihood_list):
for g, Y in itertools.izip(self.bgplvms, Ylist): for g, Y in itertools.izip(self.bgplvms, likelihood_list):
g.likelihood.Y = Y g.likelihood.Y = Y
@property @property
@ -152,7 +154,7 @@ class MRD(model):
def randomize(self, initx='concat', initz='permute', *args, **kw): def randomize(self, initx='concat', initz='permute', *args, **kw):
super(MRD, self).randomize(*args, **kw) super(MRD, self).randomize(*args, **kw)
self._init_X(initx, self.Ylist) self._init_X(initx, self.likelihood_list)
self._init_Z(initz, self.X) self._init_Z(initz, self.X)
def _get_param_names(self): def _get_param_names(self):
@ -225,6 +227,10 @@ class MRD(model):
# g._computations() # g._computations()
def update_likelihood_approximation(self):#TODO: object oriented vs script base
for bgplvm in self.bgplvms:
bgplvm.update_likelihood_approximation()
def log_likelihood(self): def log_likelihood(self):
ll = -self.gref.KL_divergence() ll = -self.gref.KL_divergence()
for g in self.bgplvms: for g in self.bgplvms:
@ -246,17 +252,18 @@ class MRD(model):
partial=g.partial_for_likelihood)]) \ partial=g.partial_for_likelihood)]) \
for g in self.bgplvms]))) for g in self.bgplvms])))
def _init_X(self, init='PCA', Ylist=None): def _init_X(self, init='PCA', likelihood_list=None):
if Ylist is None: if likelihood_list is None:
Ylist = self.Ylist likelihood_list = self.likelihood_list
if init in "PCA_single": if init in "PCA_single":
X = numpy.zeros((Ylist[0].shape[0], self.Q)) X = numpy.zeros((likelihood_list[0].Y.shape[0], self.Q))
for qs, Y in itertools.izip(numpy.array_split(numpy.arange(self.Q), len(Ylist)), Ylist): for qs, Y in itertools.izip(numpy.array_split(numpy.arange(self.Q), len(likelihood_list)), likelihood_list):
X[:, qs] = PCA(Y, len(qs))[0] X[:, qs] = PCA(Y.Y, len(qs))[0]
elif init in "PCA_concat": elif init in "PCA_concat":
X = PCA(numpy.hstack(Ylist), self.Q)[0] X = PCA(numpy.hstack([l.Y for l in likelihood_list]), self.Q)[0]
#X = PCA(numpy.hstack(likelihood_list), self.Q)[0]
else: # init == 'random': else: # init == 'random':
X = numpy.random.randn(Ylist[0].shape[0], self.Q) X = numpy.random.randn(likelihood_list[0].Y.shape[0], self.Q)
self.X = X self.X = X
return X return X
@ -294,8 +301,8 @@ class MRD(model):
fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: ax.imshow(g.X)) fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: ax.imshow(g.X))
return fig return fig
def plot_predict(self, fig_num="MRD Predictions", axes=None): def plot_predict(self, fig_num="MRD Predictions", axes=None, **kwargs):
fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: ax.imshow(g.predict(g.X)[0])) fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: ax.imshow(g.predict(g.X)[0],**kwargs))
return fig return fig
def plot_scales(self, fig_num="MRD Scales", axes=None, *args, **kwargs): def plot_scales(self, fig_num="MRD Scales", axes=None, *args, **kwargs):

View file

@ -3,11 +3,12 @@
import numpy as np import numpy as np
import pylab as pb import pylab as pb
from ..util.linalg import mdot, jitchol, tdot, symmetrify, backsub_both_sides from ..util.linalg import mdot, jitchol, tdot, symmetrify, backsub_both_sides,chol_inv
from ..util.plot import gpplot from ..util.plot import gpplot
from .. import kern from .. import kern
from GP import GP from GP import GP
from scipy import linalg from scipy import linalg
from ..likelihoods import Gaussian
class sparse_GP(GP): class sparse_GP(GP):
""" """
@ -16,9 +17,9 @@ class sparse_GP(GP):
:param X: inputs :param X: inputs
:type X: np.ndarray (N x Q) :type X: np.ndarray (N x Q)
:param likelihood: a likelihood instance, containing the observed data :param likelihood: a likelihood instance, containing the observed data
:type likelihood: GPy.likelihood.(Gaussian | EP) :type likelihood: GPy.likelihood.(Gaussian | EP | Laplace)
:param kernel : the kernel/covariance function. See link kernels :param kernel : the kernel (covariance function). See link kernels
:type kernel: a GPy kernel :type kernel: a GPy.kern.kern instance
:param X_variance: The uncertainty in the measurements of X (Gaussian variance) :param X_variance: The uncertainty in the measurements of X (Gaussian variance)
:type X_variance: np.ndarray (N x Q) | None :type X_variance: np.ndarray (N x Q) | None
:param Z: inducing inputs (optional, see note) :param Z: inducing inputs (optional, see note)
@ -30,8 +31,6 @@ class sparse_GP(GP):
""" """
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False): def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
# self.scale_factor = 100.0 # a scaling factor to help keep the algorithm stable
# self.auto_scale_factor = False
self.Z = Z self.Z = Z
self.M = Z.shape[0] self.M = Z.shape[0]
self.likelihood = likelihood self.likelihood = likelihood
@ -63,49 +62,29 @@ class sparse_GP(GP):
self.psi2 = None self.psi2 = None
def _computations(self): def _computations(self):
# sf = self.scale_factor
# sf2 = sf ** 2
# factor Kmm # factor Kmm
self.Lm = jitchol(self.Kmm) self.Lm = jitchol(self.Kmm)
# The rather complex computations of self.A # The rather complex computations of self.A
if self.likelihood.is_heteroscedastic: if self.has_uncertain_inputs:
assert self.likelihood.D == 1 # TODO: what if the likelihood is heterscedatic and there are multiple independent outputs? if self.likelihood.is_heteroscedastic:
if self.has_uncertain_inputs: psi2_beta = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.N, 1, 1))).sum(0)
# psi2_beta_scaled = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.N, 1, 1) / sf2)).sum(0)
psi2_beta_scaled = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.N, 1, 1))).sum(0)
evals, evecs = linalg.eigh(psi2_beta_scaled)
clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
if not np.allclose(evals, clipped_evals):
print "Warning: clipping posterior eigenvalues"
tmp = evecs * np.sqrt(clipped_evals)
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp)
else: else:
# tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.N)) / sf) psi2_beta = self.psi2.sum(0) * self.likelihood.precision
tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.N))) evals, evecs = linalg.eigh(psi2_beta)
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1) clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
self.A = tdot(tmp) tmp = evecs * np.sqrt(clipped_evals)
else: else:
if self.has_uncertain_inputs: if self.likelihood.is_heteroscedastic:
# psi2_beta_scaled = (self.psi2 * (self.likelihood.precision / sf2)).sum(0) tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.N)))
psi2_beta_scaled = (self.psi2 * (self.likelihood.precision)).sum(0)
evals, evecs = linalg.eigh(psi2_beta_scaled)
clipped_evals = np.clip(evals, 0., 1e15) # TODO: make clipping configurable
if not np.allclose(evals, clipped_evals):
print "Warning: clipping posterior eigenvalues"
tmp = evecs * np.sqrt(clipped_evals)
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp)
else: else:
# tmp = self.psi1 * (np.sqrt(self.likelihood.precision) / sf)
tmp = self.psi1 * (np.sqrt(self.likelihood.precision)) tmp = self.psi1 * (np.sqrt(self.likelihood.precision))
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1) tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp) self.A = tdot(tmp)
# factor B # factor B
# self.B = np.eye(self.M) / sf2 + self.A
self.B = np.eye(self.M) + self.A self.B = np.eye(self.M) + self.A
self.LB = jitchol(self.B) self.LB = jitchol(self.B)
@ -121,8 +100,6 @@ class sparse_GP(GP):
# Compute dL_dKmm # Compute dL_dKmm
tmp = tdot(self._LBi_Lmi_psi1V) tmp = tdot(self._LBi_Lmi_psi1V)
self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.D * np.eye(self.M) + tmp) self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.D * np.eye(self.M) + tmp)
# tmp = -0.5 * self.DBi_plus_BiPBi / sf2
# tmp += -0.5 * self.B * sf2 * self.D
tmp = -0.5 * self.DBi_plus_BiPBi tmp = -0.5 * self.DBi_plus_BiPBi
tmp += -0.5 * self.B * self.D tmp += -0.5 * self.B * self.D
tmp += self.D * np.eye(self.M) tmp += self.D * np.eye(self.M)
@ -132,9 +109,10 @@ class sparse_GP(GP):
self.dL_dpsi0 = -0.5 * self.D * (self.likelihood.precision * np.ones([self.N, 1])).flatten() self.dL_dpsi0 = -0.5 * self.D * (self.likelihood.precision * np.ones([self.N, 1])).flatten()
self.dL_dpsi1 = np.dot(self.Cpsi1V, self.likelihood.V.T) self.dL_dpsi1 = np.dot(self.Cpsi1V, self.likelihood.V.T)
dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.D * np.eye(self.M) - self.DBi_plus_BiPBi) dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.D * np.eye(self.M) - self.DBi_plus_BiPBi)
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
self.dL_dpsi2 = self.likelihood.precision[:, None, None] * dL_dpsi2_beta[None, :, :] self.dL_dpsi2 = self.likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :]
else: else:
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, self.psi1 * self.likelihood.precision.reshape(1, self.N)) self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, self.psi1 * self.likelihood.precision.reshape(1, self.N))
self.dL_dpsi2 = None self.dL_dpsi2 = None
@ -158,7 +136,6 @@ class sparse_GP(GP):
else: else:
# likelihood is not heterscedatic # likelihood is not heterscedatic
self.partial_for_likelihood = -0.5 * self.N * self.D * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2 self.partial_for_likelihood = -0.5 * self.N * self.D * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
# self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision * sf2)
self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision) self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision)
self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V))) self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V)))
@ -166,16 +143,12 @@ class sparse_GP(GP):
def log_likelihood(self): def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """ """ Compute the (lower bound on the) log marginal likelihood """
# sf2 = self.scale_factor ** 2
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y) A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y)
# B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2)
B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A)) B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
else: else:
A = -0.5 * self.N * self.D * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT A = -0.5 * self.N * self.D * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
# B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2)
B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A)) B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
# C = -self.D * (np.sum(np.log(np.diag(self.LB))) + 0.5 * self.M * np.log(sf2))
C = -self.D * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.M * np.log(sf2)) C = -self.D * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.M * np.log(sf2))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V)) D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
return A + B + C + D return A + B + C + D
@ -185,14 +158,6 @@ class sparse_GP(GP):
self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.Nparam]) self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.Nparam])
self.likelihood._set_params(p[self.Z.size + self.kern.Nparam:]) self.likelihood._set_params(p[self.Z.size + self.kern.Nparam:])
self._compute_kernel_matrices() self._compute_kernel_matrices()
# if self.auto_scale_factor:
# self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision)
# if self.auto_scale_factor:
# if self.likelihood.is_heteroscedastic:
# self.scale_factor = max(100,np.sqrt(self.psi2_beta_scaled.sum(0).mean()))
# else:
# self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision)
# self.scale_factor = 100.
self._computations() self._computations()
def _get_params(self): def _get_params(self):
@ -205,16 +170,22 @@ class sparse_GP(GP):
""" """
Approximates a non-gaussian likelihood using Expectation Propagation Approximates a non-gaussian likelihood using Expectation Propagation
For a Gaussian (or direct: TODO) likelihood, no iteration is required: For a Gaussian likelihood, no iteration is required:
this function does nothing this function does nothing
""" """
if self.has_uncertain_inputs: if not isinstance(self.likelihood,Gaussian): #Updates not needed for Gaussian likelihood
raise NotImplementedError, "EP approximation not implemented for uncertain inputs" self.likelihood.restart() #TODO check consistency with pseudo_EP
else: if self.has_uncertain_inputs:
self.likelihood.fit_DTC(self.Kmm, self.psi1) Lmi = chol_inv(self.Lm)
# self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0) Kmmi = tdot(Lmi.T)
self._set_params(self._get_params()) # update the GP diag_tr_psi2Kmmi = np.array([np.trace(psi2_Kmmi) for psi2_Kmmi in np.dot(self.psi2,Kmmi)])
self.likelihood.fit_FITC(self.Kmm,self.psi1,diag_tr_psi2Kmmi) #This uses the fit_FITC code, but does not perfomr a FITC-EP.#TODO solve potential confusion
#raise NotImplementedError, "EP approximation not implemented for uncertain inputs"
else:
self.likelihood.fit_DTC(self.Kmm, self.psi1)
# self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params()) # update the GP
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood))) return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))
@ -246,20 +217,33 @@ class sparse_GP(GP):
dL_dZ += self.kern.dK_dX(self.dL_dpsi1, self.Z, self.X) dL_dZ += self.kern.dK_dX(self.dL_dpsi1, self.Z, self.X)
return dL_dZ return dL_dZ
def _raw_predict(self, Xnew, which_parts='all', full_cov=False): def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
"""Internal helper function for making predictions, does not account for normalization""" """Internal helper function for making predictions, does not account for normalization"""
Bi, _ = linalg.lapack.flapack.dpotri(self.LB, lower=0) # WTH? this lower switch should be 1, but that doesn't work! Bi, _ = linalg.lapack.flapack.dpotri(self.LB, lower=0) # WTH? this lower switch should be 1, but that doesn't work!
symmetrify(Bi) symmetrify(Bi)
Kmmi_LmiBLmi = backsub_both_sides(self.Lm, np.eye(self.M) - Bi) Kmmi_LmiBLmi = backsub_both_sides(self.Lm, np.eye(self.M) - Bi)
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts) if X_variance_new is None:
mu = np.dot(Kx.T, self.Cpsi1V) # / self.scale_factor) Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
if full_cov: mu = np.dot(Kx.T, self.Cpsi1V)
Kxx = self.kern.K(Xnew, which_parts=which_parts) if full_cov:
var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx) # NOTE this won't work for plotting Kxx = self.kern.K(Xnew, which_parts=which_parts)
var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx) # NOTE this won't work for plotting
else:
Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts)
var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0)
else: else:
Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts) assert which_parts=='all', "swithching out parts of variational kernels is not implemented"
var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0) Kx = self.kern.psi1(self.Z, Xnew, X_variance_new)#, which_parts=which_parts) TODO: which_parts
mu = np.dot(Kx, self.Cpsi1V)
if full_cov:
raise NotImplementedError, "TODO"
else:
Kxx = self.kern.psi0(self.Z,Xnew,X_variance_new)
psi2 = self.kern.psi2(self.Z,Xnew,X_variance_new)
var = Kxx - np.sum(np.sum(psi2*Kmmi_LmiBLmi[None,:,:],1),1)
return mu, var[:, None] return mu, var[:, None]

View file

@ -21,8 +21,9 @@ class MRDTests(unittest.TestCase):
K = k.K(X) K = k.K(X)
Ylist = [np.random.multivariate_normal(np.zeros(N), K, D).T for _ in range(num_m)] Ylist = [np.random.multivariate_normal(np.zeros(N), K, D).T for _ in range(num_m)]
likelihood_list = [GPy.likelihoods.Gaussian(Y) for Y in Ylist]
m = GPy.models.MRD(*Ylist, Q=Q, kernel=k, M=M) m = GPy.models.MRD(*likelihood_list, Q=Q, kernel=k, M=M)
m.ensure_default_constraints() m.ensure_default_constraints()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())

View file

@ -5,6 +5,8 @@ import GPy
import scipy.sparse import scipy.sparse
import scipy.io import scipy.io
import cPickle as pickle import cPickle as pickle
import urllib2 as url
data_path = os.path.join(os.path.dirname(__file__), 'datasets') data_path = os.path.join(os.path.dirname(__file__), 'datasets')
default_seed = 10000 default_seed = 10000
@ -15,6 +17,25 @@ def sample_class(f):
c = np.where(c, 1, -1) c = np.where(c, 1, -1)
return c return c
def fetch_dataset(resource, save_name = None, save_file = True, messages = True):
if messages:
print "Downloading resource: " , resource, " ... "
response = url.urlopen(resource)
# TODO: Some error checking...
# ...
html = response.read()
response.close()
if save_file:
# TODO: Check if already exists...
# ...
with open(save_name, "w") as text_file:
text_file.write("%s"%html)
if messages:
print "Done!"
return html
def della_gatta_TRP63_gene_expression(gene_number=None): def della_gatta_TRP63_gene_expression(gene_number=None):
mat_data = scipy.io.loadmat(os.path.join(data_path, 'DellaGattadata.mat')) mat_data = scipy.io.loadmat(os.path.join(data_path, 'DellaGattadata.mat'))
X = np.double(mat_data['timepoints']) X = np.double(mat_data['timepoints'])

View file

@ -236,7 +236,7 @@ def tdot(*args, **kwargs):
else: else:
return tdot_numpy(*args,**kwargs) return tdot_numpy(*args,**kwargs)
def DSYR(A,x,alpha=1.): def DSYR_blas(A,x,alpha=1.):
""" """
Performs a symmetric rank-1 update operation: Performs a symmetric rank-1 update operation:
A <- A + alpha * np.dot(x,x.T) A <- A + alpha * np.dot(x,x.T)
@ -258,6 +258,26 @@ def DSYR(A,x,alpha=1.):
x_, byref(INCX), A_, byref(LDA)) x_, byref(INCX), A_, byref(LDA))
symmetrify(A,upper=True) symmetrify(A,upper=True)
def DSYR_numpy(A,x,alpha=1.):
"""
Performs a symmetric rank-1 update operation:
A <- A + alpha * np.dot(x,x.T)
Arguments
---------
:param A: Symmetric NxN np.array
:param x: Nx1 np.array
:param alpha: scalar
"""
A += alpha*np.dot(x[:,None],x[None,:])
def DSYR(*args, **kwargs):
if _blas_available:
return DSYR_blas(*args,**kwargs)
else:
return DSYR_numpy(*args,**kwargs)
def symmetrify(A,upper=False): def symmetrify(A,upper=False):
""" """
Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper

13
GPy/util/mocap_fetch.py Normal file
View file

@ -0,0 +1,13 @@
import GPy
import urllib2
# TODO...
class mocap_fetch(base_url = 'http://mocap.cs.cmu.edu:8080/subjects/', skel_store_dir = './', motion_store_dir = './'):
def __init__(self):
self.base_url = base_url
self.store_dir = store_dir
self.motion_dict = []
def fetch_motions(self, motion_dict = None):
response = urllib2.urlopen(...)
html = response.read()