Merge branch 'devel' of https://github.com/SheffieldML/GPy into devel

This commit is contained in:
mzwiessele 2015-07-29 10:48:27 +02:00
commit d6defa6645
9 changed files with 92 additions and 92 deletions

View file

@ -244,7 +244,7 @@ class GP(Model):
mu, var = self.normalizer.inverse_mean(mu), self.normalizer.inverse_variance(var)
# now push through likelihood
mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata)
mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata=Y_metadata)
return mean, var
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None):
@ -261,7 +261,7 @@ class GP(Model):
m, v = self._raw_predict(X, full_cov=False)
if self.normalizer is not None:
m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata)
return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata=Y_metadata)
def predictive_gradients(self, Xnew):
"""
@ -331,7 +331,7 @@ class GP(Model):
:returns: Ysim: set of simulations, a Numpy array (N x samples).
"""
fsim = self.posterior_samples_f(X, size, full_cov=full_cov)
Ysim = self.likelihood.samples(fsim, Y_metadata)
Ysim = self.likelihood.samples(fsim, Y_metadata=Y_metadata)
return Ysim
def plot_f(self, plot_limits=None, which_data_rows='all',
@ -473,16 +473,16 @@ class GP(Model):
self.inference_method.on_optimization_end()
raise
def infer_newX(self, Y_new, optimize=True, ):
def infer_newX(self, Y_new, optimize=True):
"""
Infer the distribution of X for the new observed data *Y_new*.
Infer X for the new observed data *Y_new*.
:param Y_new: the new observed data for inference
:type Y_new: numpy.ndarray
:param optimize: whether to optimize the location of new X (True by default)
:type optimize: boolean
:return: a tuple containing the posterior estimation of X and the model that optimize X
:rtype: (:class:`~GPy.core.parameterization.variational.VariationalPosterior` or numpy.ndarray, :class:`~GPy.core.model.Model`)
:rtype: (:class:`~GPy.core.parameterization.variational.VariationalPosterior` and numpy.ndarray, :class:`~GPy.core.model.Model`)
"""
from ..inference.latent_function_inference.inferenceX import infer_newX
return infer_newX(self, Y_new, optimize=optimize)

View file

@ -522,16 +522,9 @@ class DGPLVM(Prior):
"""
domain = _REAL
# _instances = []
# def __new__(cls, mu, sigma): # Singleton:
# if cls._instances:
# cls._instances[:] = [instance for instance in cls._instances if instance()]
# for instance in cls._instances:
# if instance().mu == mu and instance().sigma == sigma:
# return instance()
# o = super(Prior, cls).__new__(cls, mu, sigma)
# cls._instances.append(weakref.ref(o))
# return cls._instances[-1]()
def __new__(cls, sigma2, lbl, x_shape):
return super(Prior, cls).__new__(cls, sigma2, lbl, x_shape)
def __init__(self, sigma2, lbl, x_shape):
self.sigma2 = sigma2
@ -843,7 +836,7 @@ class DGPLVM_Lamda(Prior, Parameterized):
# Calculating beta and Bi for Sb
def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all):
import pdb
# import pdb
# pdb.set_trace()
B_i = np.zeros((self.classnum, self.dim))
Sig_beta_B_i_all = np.zeros((self.datanum, self.dim))
@ -909,8 +902,8 @@ class DGPLVM_Lamda(Prior, Parameterized):
Sw = self.compute_Sw(cls, M_i)
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.5))[0]
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0]
return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
# This function calculates derivative of the log of prior function
@ -933,8 +926,8 @@ class DGPLVM_Lamda(Prior, Parameterized):
# Calculating inverse of Sb and its transpose and minus
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.5))[0]
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0]
Sb_inv_N_trans = np.transpose(Sb_inv_N)
Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
Sw_trans = np.transpose(Sw)

View file

@ -217,9 +217,8 @@ def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=
elif model_type == 'FITC':
m = GPy.models.FITCClassification(data['X'], Y, kernel=kernel, num_inducing=num_inducing)
m['.*len'] = 3.
if optimize:
m.pseudo_EM()
m.optimize()
if plot:
m.plot()

View file

@ -215,6 +215,7 @@ def ssgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40
return m
def _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim=False):
"""Simulate some data drawn from a matern covariance and a periodic exponential for use in MRD demos."""
Q_signal = 4
import GPy
import numpy as np
@ -254,6 +255,7 @@ def _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim=False):
return slist, [S1, S2, S3], Ylist
def _simulate_sincos(D1, D2, D3, N, num_inducing, plot_sim=False):
"""Simulate some data drawn from sine and cosine for use in demos of MRD"""
_np.random.seed(1234)
x = _np.linspace(0, 4 * _np.pi, N)[:, None]
@ -402,7 +404,8 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
from GPy.models import MRD
D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5
_, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim)
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, plot_sim)
# Ylist = [Ylist[0]]
k = kern.Linear(Q, ARD=True)
@ -585,6 +588,7 @@ def robot_wireless(optimize=True, verbose=True, plot=True):
return m
def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
"""Interactive visualisation of the Stick Man data from Ohio State University with the Bayesian GPLVM."""
from GPy.models import BayesianGPLVM
from matplotlib import pyplot as plt
import numpy as np
@ -613,7 +617,8 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
data_show = GPy.plotting.matplot_dep.visualize.stick_show(y, connect=data['connect'])
dim_select = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean[:1, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
fig.canvas.draw()
fig.canvas.show()
# Canvas.show doesn't work on OSX.
#fig.canvas.show()
raw_input('Press enter to finish')
return m

View file

@ -27,12 +27,19 @@ def infer_newX(model, Y_new, optimize=True, init='L2'):
class InferenceX(Model):
"""
The class for inference of new X with given new Y. (do_test_latent)
The model class for inference of new X with given new Y. (replacing the "do_test_latent" in Bayesian GPLVM)
It is a tiny inference model created from the original GP model. The kernel, likelihood (only Gaussian is supported at the moment)
and posterior distribution are taken from the original model.
For Regression models and GPLVM, a point estimate of the latent variable X will be inferred.
For Bayesian GPLVM, the variational posterior of X will be inferred.
X is inferred through a gradient optimization of the inference model.
:param model: the GPy model used in inference
:type model: GPy.core.Model
:param Y: the new observed data for inference
:type Y: numpy.ndarray
:param init: the distance metric of Y for initializing X with the nearest neighbour.
:type init: 'L2', 'NCC' and 'rand'
"""
def __init__(self, model, Y, name='inferenceX', init='L2'):
if np.isnan(Y).any() or getattr(model, 'missing_data', False):

View file

@ -139,10 +139,6 @@ class Laplace(LatentFunctionInference):
f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
self.f_hat = f_hat
#self.Ki_fhat = Ki_fhat
#self.K = K.copy()
#Compute hessian and other variables at mode
log_marginal, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata)
@ -298,6 +294,11 @@ class Laplace(LatentFunctionInference):
else:
dL_dthetaL = np.zeros(likelihood.size)
#Cache some things for speedy LOO
self.Ki_W_i = Ki_W_i
self.K = K
self.W = W
self.f_hat = f_hat
return log_marginal, K_Wi_i, dL_dK, dL_dthetaL
def _compute_B_statistics(self, K, W, log_concave, *args, **kwargs):

View file

@ -298,13 +298,8 @@ class Likelihood(Parameterized):
return self.conditional_mean(f)*p
scaled_mean = [quad(int_mean, fmin, fmax,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
mean = np.array(scaled_mean)[:,None] / np.sqrt(2*np.pi*(variance))
return mean
def _conditional_mean(self, f):
"""Quadrature calculation of the conditional mean: E(Y_star|f)"""
raise NotImplementedError("implement this function to make predictions")
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
"""
Approximation to the predictive variance: V(Y_star)
@ -608,23 +603,30 @@ class Likelihood(Parameterized):
:param full_cov: whether to use the full covariance or just the diagonal
:type full_cov: Boolean
"""
pred_mean = self.predictive_mean(mu, var, Y_metadata)
pred_var = self.predictive_variance(mu, var, pred_mean, Y_metadata)
try:
pred_mean = self.predictive_mean(mu, var, Y_metadata=Y_metadata)
pred_var = self.predictive_variance(mu, var, pred_mean, Y_metadata=Y_metadata)
except NotImplementedError:
print "Finding predictive mean and variance via sampling rather than quadrature"
Nf_samp = 300
Ny_samp = 1
s = np.random.randn(mu.shape[0], Nf_samp)*np.sqrt(var) + mu
ss_y = self.samples(s, Y_metadata, samples=Ny_samp)
pred_mean = np.mean(ss_y, axis=1)[:, None]
pred_var = np.var(ss_y, axis=1)[:, None]
return pred_mean, pred_var
def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None):
#compute the quantiles by sampling!!!
N_samp = 500
s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu
#ss_f = s.flatten()
#ss_y = self.samples(ss_f, Y_metadata)
#ss_y = self.samples(s, Y_metadata, samples=100)
ss_y = self.samples(s, Y_metadata)
#ss_y = ss_y.reshape(mu.shape[0], N_samp)
Nf_samp = 300
Ny_samp = 1
s = np.random.randn(mu.shape[0], Nf_samp)*np.sqrt(var) + mu
ss_y = self.samples(s, Y_metadata, samples=Ny_samp)
#ss_y = ss_y.reshape(mu.shape[0], mu.shape[1], Nf_samp*Ny_samp)
return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles]
pred_quantiles = [np.percentile(ss_y, q, axis=1)[:,None] for q in quantiles]
return pred_quantiles
def samples(self, gp, Y_metadata=None, samples=1):
"""

View file

@ -9,13 +9,14 @@ from ..core.parameterization import ObsAr
from .. import kern
from ..core.parameterization.param import Param
from ..util.linalg import pdinv
from ..likelihoods import Gaussian
log_2_pi = np.log(2*np.pi)
class GPVariationalGaussianApproximation(Model):
"""
The Variational Gaussian Approximation revisited implementation for regression
The Variational Gaussian Approximation revisited
@article{Opper:2009,
title = {The Variational Gaussian Approximation Revisited},
@ -25,44 +26,29 @@ class GPVariationalGaussianApproximation(Model):
pages = {786--792},
}
"""
def __init__(self, X, Y, kernel=None):
Model.__init__(self,'Variational GP classification')
def __init__(self, X, Y, kernel, likelihood=None, Y_metadata=None):
Model.__init__(self,'Variational GP')
if likelihood is None:
likelihood = Gaussian()
# accept the construction arguments
self.X = ObsAr(X)
if kernel is None:
kernel = kern.RBF(X.shape[1]) + kern.White(X.shape[1], 0.01)
self.kern = kernel
self.link_parameter(self.kern)
self.Y = Y
self.num_data, self.input_dim = self.X.shape
self.Y_metadata = Y_metadata
self.alpha = Param('alpha', np.zeros(self.num_data))
self.kern = kernel
self.likelihood = likelihood
self.link_parameter(self.kern)
self.link_parameter(self.likelihood)
self.alpha = Param('alpha', np.zeros((self.num_data,1))) # only one latent fn for now.
self.beta = Param('beta', np.ones(self.num_data))
self.link_parameter(self.alpha)
self.link_parameter(self.beta)
self.gh_x, self.gh_w = np.polynomial.hermite.hermgauss(20)
self.Ysign = np.where(Y==1, 1, -1).flatten()
def log_likelihood(self):
"""
Marginal log likelihood evaluation
"""
return self._log_lik
def likelihood_quadrature(self, m, v):
"""
Perform Gauss-Hermite quadrature over the log of the likelihood, with a fixed weight
"""
# assume probit for now.
X = self.gh_x[None, :]*np.sqrt(2.*v[:, None]) + (m*self.Ysign)[:, None]
p = stats.norm.cdf(X)
N = stats.norm.pdf(X)
F = np.log(p).dot(self.gh_w)
NoverP = N/p
dF_dm = (NoverP*self.Ysign[:,None]).dot(self.gh_w)
dF_dv = -0.5*(NoverP**2 + NoverP*X).dot(self.gh_w)
return F, dF_dm, dF_dv
def parameters_changed(self):
K = self.kern.K(self.X)
m = K.dot(self.alpha)
@ -71,13 +57,14 @@ class GPVariationalGaussianApproximation(Model):
A = np.eye(self.num_data) + BKB
Ai, LA, _, Alogdet = pdinv(A)
Sigma = np.diag(self.beta**-2) - Ai/self.beta[:, None]/self.beta[None, :] # posterior coavairance: need full matrix for gradients
var = np.diag(Sigma)
var = np.diag(Sigma).reshape(-1,1)
F, dF_dm, dF_dv = self.likelihood_quadrature(m, var)
F, dF_dm, dF_dv, dF_dthetaL = self.likelihood.variational_expectations(self.Y, m, var, Y_metadata=self.Y_metadata)
self.likelihood.gradient = dF_dthetaL.sum(1).sum(1)
dF_da = np.dot(K, dF_dm)
SigmaB = Sigma*self.beta
dF_db = -np.diag(Sigma.dot(np.diag(dF_dv)).dot(SigmaB))*2
KL = 0.5*(Alogdet + np.trace(Ai) - self.num_data + m.dot(self.alpha))
dF_db = -np.diag(Sigma.dot(np.diag(dF_dv.flatten())).dot(SigmaB))*2
KL = 0.5*(Alogdet + np.trace(Ai) - self.num_data + np.sum(m*self.alpha))
dKL_da = m
A_A2 = Ai - Ai.dot(Ai)
dKL_db = np.diag(np.dot(KB.T, A_A2))
@ -86,12 +73,12 @@ class GPVariationalGaussianApproximation(Model):
self.beta.gradient = dF_db - dKL_db
# K-gradients
dKL_dK = 0.5*(self.alpha[None, :]*self.alpha[:, None] + self.beta[:, None]*self.beta[None, :]*A_A2)
dKL_dK = 0.5*(self.alpha*self.alpha.T + self.beta[:, None]*self.beta[None, :]*A_A2)
tmp = Ai*self.beta[:, None]/self.beta[None, :]
dF_dK = self.alpha[:, None]*dF_dm[None, :] + np.dot(tmp*dF_dv, tmp.T)
dF_dK = self.alpha*dF_dm.T + np.dot(tmp*dF_dv, tmp.T)
self.kern.update_gradients_full(dF_dK - dKL_dK, self.X)
def predict(self, Xnew):
def _raw_predict(self, Xnew):
"""
Predict the function(s) at the new point(s) Xnew.
@ -105,4 +92,4 @@ class GPVariationalGaussianApproximation(Model):
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(WiKux*Kux, 0)
return 0.5*(1+erf(mu/np.sqrt(2.*(var+1))))
return mu, var.reshape(-1,1)

View file

@ -107,11 +107,13 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
upper = m + 2*np.sqrt(v)
else:
if isinstance(model,GPCoregionalizedRegression) or isinstance(model,SparseGPCoregionalizedRegression):
meta = {'output_index': Xgrid[:,-1:].astype(np.int)}
else:
meta = None
m, v = model.predict(Xgrid, full_cov=False, Y_metadata=meta, **predict_kw)
lower, upper = model.predict_quantiles(Xgrid, Y_metadata=meta)
extra_data = Xgrid[:,-1:].astype(np.int)
if Y_metadata is None:
Y_metadata = {'output_index': extra_data}
else:
Y_metadata['output_index'] = extra_data
m, v = model.predict(Xgrid, full_cov=False, Y_metadata=Y_metadata, **predict_kw)
lower, upper = model.predict_quantiles(Xgrid, Y_metadata=Y_metadata)
for d in which_data_ycols:
@ -120,7 +122,9 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#optionally plot some samples
if samples: #NOTE not tested with fixed_inputs
Ysim = model.posterior_samples(Xgrid, samples)
Ysim = model.posterior_samples(Xgrid, samples, Y_metadata=Y_metadata)
print Ysim.shape
print Xnew.shape
for yi in Ysim.T:
plots['posterior_samples'] = ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
#ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
@ -185,10 +189,12 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
m, _ = model._raw_predict(Xgrid, **predict_kw)
else:
if isinstance(model,GPCoregionalizedRegression) or isinstance(model,SparseGPCoregionalizedRegression):
meta = {'output_index': Xgrid[:,-1:].astype(np.int)}
else:
meta = None
m, v = model.predict(Xgrid, full_cov=False, Y_metadata=meta, **predict_kw)
extra_data = Xgrid[:,-1:].astype(np.int)
if Y_metadata is None:
Y_metadata = {'output_index': extra_data}
else:
Y_metadata['output_index'] = extra_data
m, v = model.predict(Xgrid, full_cov=False, Y_metadata=Y_metadata, **predict_kw)
for d in which_data_ycols:
m_d = m[:,d].reshape(resolution, resolution).T
plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)