diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 898c7b58..5ca86d9e 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -244,7 +244,7 @@ class GP(Model): mu, var = self.normalizer.inverse_mean(mu), self.normalizer.inverse_variance(var) # now push through likelihood - mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata) + mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata=Y_metadata) return mean, var def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None): @@ -261,7 +261,7 @@ class GP(Model): m, v = self._raw_predict(X, full_cov=False) if self.normalizer is not None: m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v) - return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata) + return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata=Y_metadata) def predictive_gradients(self, Xnew): """ @@ -331,7 +331,7 @@ class GP(Model): :returns: Ysim: set of simulations, a Numpy array (N x samples). """ fsim = self.posterior_samples_f(X, size, full_cov=full_cov) - Ysim = self.likelihood.samples(fsim, Y_metadata) + Ysim = self.likelihood.samples(fsim, Y_metadata=Y_metadata) return Ysim def plot_f(self, plot_limits=None, which_data_rows='all', @@ -473,16 +473,16 @@ class GP(Model): self.inference_method.on_optimization_end() raise - def infer_newX(self, Y_new, optimize=True, ): + def infer_newX(self, Y_new, optimize=True): """ - Infer the distribution of X for the new observed data *Y_new*. + Infer X for the new observed data *Y_new*. :param Y_new: the new observed data for inference :type Y_new: numpy.ndarray :param optimize: whether to optimize the location of new X (True by default) :type optimize: boolean :return: a tuple containing the posterior estimation of X and the model that optimize X - :rtype: (:class:`~GPy.core.parameterization.variational.VariationalPosterior` or numpy.ndarray, :class:`~GPy.core.model.Model`) + :rtype: (:class:`~GPy.core.parameterization.variational.VariationalPosterior` and numpy.ndarray, :class:`~GPy.core.model.Model`) """ from ..inference.latent_function_inference.inferenceX import infer_newX return infer_newX(self, Y_new, optimize=optimize) diff --git a/GPy/core/parameterization/priors.py b/GPy/core/parameterization/priors.py index c748f0df..83c83dfd 100644 --- a/GPy/core/parameterization/priors.py +++ b/GPy/core/parameterization/priors.py @@ -522,16 +522,9 @@ class DGPLVM(Prior): """ domain = _REAL - # _instances = [] - # def __new__(cls, mu, sigma): # Singleton: - # if cls._instances: - # cls._instances[:] = [instance for instance in cls._instances if instance()] - # for instance in cls._instances: - # if instance().mu == mu and instance().sigma == sigma: - # return instance() - # o = super(Prior, cls).__new__(cls, mu, sigma) - # cls._instances.append(weakref.ref(o)) - # return cls._instances[-1]() + + def __new__(cls, sigma2, lbl, x_shape): + return super(Prior, cls).__new__(cls, sigma2, lbl, x_shape) def __init__(self, sigma2, lbl, x_shape): self.sigma2 = sigma2 @@ -843,7 +836,7 @@ class DGPLVM_Lamda(Prior, Parameterized): # Calculating beta and Bi for Sb def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all): - import pdb + # import pdb # pdb.set_trace() B_i = np.zeros((self.classnum, self.dim)) Sig_beta_B_i_all = np.zeros((self.datanum, self.dim)) @@ -909,8 +902,8 @@ class DGPLVM_Lamda(Prior, Parameterized): Sw = self.compute_Sw(cls, M_i) # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) - #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] - Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0] + #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.5))[0] + Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0] return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw)) # This function calculates derivative of the log of prior function @@ -933,8 +926,8 @@ class DGPLVM_Lamda(Prior, Parameterized): # Calculating inverse of Sb and its transpose and minus # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) - #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] - Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0] + #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.5))[0] + Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0] Sb_inv_N_trans = np.transpose(Sb_inv_N) Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans Sw_trans = np.transpose(Sw) diff --git a/GPy/examples/classification.py b/GPy/examples/classification.py index d4518f24..3aa022e0 100644 --- a/GPy/examples/classification.py +++ b/GPy/examples/classification.py @@ -217,9 +217,8 @@ def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel= elif model_type == 'FITC': m = GPy.models.FITCClassification(data['X'], Y, kernel=kernel, num_inducing=num_inducing) m['.*len'] = 3. - if optimize: - m.pseudo_EM() + m.optimize() if plot: m.plot() diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index cccb2496..745c2c24 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -215,6 +215,7 @@ def ssgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40 return m def _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim=False): + """Simulate some data drawn from a matern covariance and a periodic exponential for use in MRD demos.""" Q_signal = 4 import GPy import numpy as np @@ -254,6 +255,7 @@ def _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim=False): return slist, [S1, S2, S3], Ylist def _simulate_sincos(D1, D2, D3, N, num_inducing, plot_sim=False): + """Simulate some data drawn from sine and cosine for use in demos of MRD""" _np.random.seed(1234) x = _np.linspace(0, 4 * _np.pi, N)[:, None] @@ -402,7 +404,8 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw): from GPy.models import MRD D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5 - _, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim) + _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, plot_sim) + # Ylist = [Ylist[0]] k = kern.Linear(Q, ARD=True) @@ -585,6 +588,7 @@ def robot_wireless(optimize=True, verbose=True, plot=True): return m def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True): + """Interactive visualisation of the Stick Man data from Ohio State University with the Bayesian GPLVM.""" from GPy.models import BayesianGPLVM from matplotlib import pyplot as plt import numpy as np @@ -613,7 +617,8 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True): data_show = GPy.plotting.matplot_dep.visualize.stick_show(y, connect=data['connect']) dim_select = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean[:1, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes) fig.canvas.draw() - fig.canvas.show() + # Canvas.show doesn't work on OSX. + #fig.canvas.show() raw_input('Press enter to finish') return m diff --git a/GPy/inference/latent_function_inference/inferenceX.py b/GPy/inference/latent_function_inference/inferenceX.py index a8ed2d09..19013b06 100644 --- a/GPy/inference/latent_function_inference/inferenceX.py +++ b/GPy/inference/latent_function_inference/inferenceX.py @@ -27,12 +27,19 @@ def infer_newX(model, Y_new, optimize=True, init='L2'): class InferenceX(Model): """ - The class for inference of new X with given new Y. (do_test_latent) + The model class for inference of new X with given new Y. (replacing the "do_test_latent" in Bayesian GPLVM) + It is a tiny inference model created from the original GP model. The kernel, likelihood (only Gaussian is supported at the moment) + and posterior distribution are taken from the original model. + For Regression models and GPLVM, a point estimate of the latent variable X will be inferred. + For Bayesian GPLVM, the variational posterior of X will be inferred. + X is inferred through a gradient optimization of the inference model. :param model: the GPy model used in inference :type model: GPy.core.Model :param Y: the new observed data for inference :type Y: numpy.ndarray + :param init: the distance metric of Y for initializing X with the nearest neighbour. + :type init: 'L2', 'NCC' and 'rand' """ def __init__(self, model, Y, name='inferenceX', init='L2'): if np.isnan(Y).any() or getattr(model, 'missing_data', False): diff --git a/GPy/inference/latent_function_inference/laplace.py b/GPy/inference/latent_function_inference/laplace.py index aefc82ac..902d5fff 100644 --- a/GPy/inference/latent_function_inference/laplace.py +++ b/GPy/inference/latent_function_inference/laplace.py @@ -139,10 +139,6 @@ class Laplace(LatentFunctionInference): f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata) - self.f_hat = f_hat - #self.Ki_fhat = Ki_fhat - #self.K = K.copy() - #Compute hessian and other variables at mode log_marginal, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata) @@ -298,6 +294,11 @@ class Laplace(LatentFunctionInference): else: dL_dthetaL = np.zeros(likelihood.size) + #Cache some things for speedy LOO + self.Ki_W_i = Ki_W_i + self.K = K + self.W = W + self.f_hat = f_hat return log_marginal, K_Wi_i, dL_dK, dL_dthetaL def _compute_B_statistics(self, K, W, log_concave, *args, **kwargs): diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py index d41468ab..67a6a3a3 100644 --- a/GPy/likelihoods/likelihood.py +++ b/GPy/likelihoods/likelihood.py @@ -298,13 +298,8 @@ class Likelihood(Parameterized): return self.conditional_mean(f)*p scaled_mean = [quad(int_mean, fmin, fmax,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)] mean = np.array(scaled_mean)[:,None] / np.sqrt(2*np.pi*(variance)) - return mean - def _conditional_mean(self, f): - """Quadrature calculation of the conditional mean: E(Y_star|f)""" - raise NotImplementedError("implement this function to make predictions") - def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None): """ Approximation to the predictive variance: V(Y_star) @@ -608,23 +603,30 @@ class Likelihood(Parameterized): :param full_cov: whether to use the full covariance or just the diagonal :type full_cov: Boolean """ - - pred_mean = self.predictive_mean(mu, var, Y_metadata) - pred_var = self.predictive_variance(mu, var, pred_mean, Y_metadata) + try: + pred_mean = self.predictive_mean(mu, var, Y_metadata=Y_metadata) + pred_var = self.predictive_variance(mu, var, pred_mean, Y_metadata=Y_metadata) + except NotImplementedError: + print "Finding predictive mean and variance via sampling rather than quadrature" + Nf_samp = 300 + Ny_samp = 1 + s = np.random.randn(mu.shape[0], Nf_samp)*np.sqrt(var) + mu + ss_y = self.samples(s, Y_metadata, samples=Ny_samp) + pred_mean = np.mean(ss_y, axis=1)[:, None] + pred_var = np.var(ss_y, axis=1)[:, None] return pred_mean, pred_var def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None): #compute the quantiles by sampling!!! - N_samp = 500 - s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu - #ss_f = s.flatten() - #ss_y = self.samples(ss_f, Y_metadata) - #ss_y = self.samples(s, Y_metadata, samples=100) - ss_y = self.samples(s, Y_metadata) - #ss_y = ss_y.reshape(mu.shape[0], N_samp) + Nf_samp = 300 + Ny_samp = 1 + s = np.random.randn(mu.shape[0], Nf_samp)*np.sqrt(var) + mu + ss_y = self.samples(s, Y_metadata, samples=Ny_samp) + #ss_y = ss_y.reshape(mu.shape[0], mu.shape[1], Nf_samp*Ny_samp) - return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles] + pred_quantiles = [np.percentile(ss_y, q, axis=1)[:,None] for q in quantiles] + return pred_quantiles def samples(self, gp, Y_metadata=None, samples=1): """ diff --git a/GPy/models/gp_var_gauss.py b/GPy/models/gp_var_gauss.py index cd688360..729b6bb8 100644 --- a/GPy/models/gp_var_gauss.py +++ b/GPy/models/gp_var_gauss.py @@ -9,13 +9,14 @@ from ..core.parameterization import ObsAr from .. import kern from ..core.parameterization.param import Param from ..util.linalg import pdinv +from ..likelihoods import Gaussian log_2_pi = np.log(2*np.pi) class GPVariationalGaussianApproximation(Model): """ - The Variational Gaussian Approximation revisited implementation for regression + The Variational Gaussian Approximation revisited @article{Opper:2009, title = {The Variational Gaussian Approximation Revisited}, @@ -25,44 +26,29 @@ class GPVariationalGaussianApproximation(Model): pages = {786--792}, } """ - def __init__(self, X, Y, kernel=None): - Model.__init__(self,'Variational GP classification') + def __init__(self, X, Y, kernel, likelihood=None, Y_metadata=None): + Model.__init__(self,'Variational GP') + if likelihood is None: + likelihood = Gaussian() # accept the construction arguments self.X = ObsAr(X) - if kernel is None: - kernel = kern.RBF(X.shape[1]) + kern.White(X.shape[1], 0.01) - self.kern = kernel - self.link_parameter(self.kern) + self.Y = Y self.num_data, self.input_dim = self.X.shape + self.Y_metadata = Y_metadata - self.alpha = Param('alpha', np.zeros(self.num_data)) + self.kern = kernel + self.likelihood = likelihood + self.link_parameter(self.kern) + self.link_parameter(self.likelihood) + + self.alpha = Param('alpha', np.zeros((self.num_data,1))) # only one latent fn for now. self.beta = Param('beta', np.ones(self.num_data)) self.link_parameter(self.alpha) self.link_parameter(self.beta) - self.gh_x, self.gh_w = np.polynomial.hermite.hermgauss(20) - self.Ysign = np.where(Y==1, 1, -1).flatten() - def log_likelihood(self): - """ - Marginal log likelihood evaluation - """ return self._log_lik - def likelihood_quadrature(self, m, v): - """ - Perform Gauss-Hermite quadrature over the log of the likelihood, with a fixed weight - """ - # assume probit for now. - X = self.gh_x[None, :]*np.sqrt(2.*v[:, None]) + (m*self.Ysign)[:, None] - p = stats.norm.cdf(X) - N = stats.norm.pdf(X) - F = np.log(p).dot(self.gh_w) - NoverP = N/p - dF_dm = (NoverP*self.Ysign[:,None]).dot(self.gh_w) - dF_dv = -0.5*(NoverP**2 + NoverP*X).dot(self.gh_w) - return F, dF_dm, dF_dv - def parameters_changed(self): K = self.kern.K(self.X) m = K.dot(self.alpha) @@ -71,13 +57,14 @@ class GPVariationalGaussianApproximation(Model): A = np.eye(self.num_data) + BKB Ai, LA, _, Alogdet = pdinv(A) Sigma = np.diag(self.beta**-2) - Ai/self.beta[:, None]/self.beta[None, :] # posterior coavairance: need full matrix for gradients - var = np.diag(Sigma) + var = np.diag(Sigma).reshape(-1,1) - F, dF_dm, dF_dv = self.likelihood_quadrature(m, var) + F, dF_dm, dF_dv, dF_dthetaL = self.likelihood.variational_expectations(self.Y, m, var, Y_metadata=self.Y_metadata) + self.likelihood.gradient = dF_dthetaL.sum(1).sum(1) dF_da = np.dot(K, dF_dm) SigmaB = Sigma*self.beta - dF_db = -np.diag(Sigma.dot(np.diag(dF_dv)).dot(SigmaB))*2 - KL = 0.5*(Alogdet + np.trace(Ai) - self.num_data + m.dot(self.alpha)) + dF_db = -np.diag(Sigma.dot(np.diag(dF_dv.flatten())).dot(SigmaB))*2 + KL = 0.5*(Alogdet + np.trace(Ai) - self.num_data + np.sum(m*self.alpha)) dKL_da = m A_A2 = Ai - Ai.dot(Ai) dKL_db = np.diag(np.dot(KB.T, A_A2)) @@ -86,12 +73,12 @@ class GPVariationalGaussianApproximation(Model): self.beta.gradient = dF_db - dKL_db # K-gradients - dKL_dK = 0.5*(self.alpha[None, :]*self.alpha[:, None] + self.beta[:, None]*self.beta[None, :]*A_A2) + dKL_dK = 0.5*(self.alpha*self.alpha.T + self.beta[:, None]*self.beta[None, :]*A_A2) tmp = Ai*self.beta[:, None]/self.beta[None, :] - dF_dK = self.alpha[:, None]*dF_dm[None, :] + np.dot(tmp*dF_dv, tmp.T) + dF_dK = self.alpha*dF_dm.T + np.dot(tmp*dF_dv, tmp.T) self.kern.update_gradients_full(dF_dK - dKL_dK, self.X) - def predict(self, Xnew): + def _raw_predict(self, Xnew): """ Predict the function(s) at the new point(s) Xnew. @@ -105,4 +92,4 @@ class GPVariationalGaussianApproximation(Model): Kxx = self.kern.Kdiag(Xnew) var = Kxx - np.sum(WiKux*Kux, 0) - return 0.5*(1+erf(mu/np.sqrt(2.*(var+1)))) + return mu, var.reshape(-1,1) diff --git a/GPy/plotting/matplot_dep/models_plots.py b/GPy/plotting/matplot_dep/models_plots.py index 77c42825..9f841372 100644 --- a/GPy/plotting/matplot_dep/models_plots.py +++ b/GPy/plotting/matplot_dep/models_plots.py @@ -107,11 +107,13 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', upper = m + 2*np.sqrt(v) else: if isinstance(model,GPCoregionalizedRegression) or isinstance(model,SparseGPCoregionalizedRegression): - meta = {'output_index': Xgrid[:,-1:].astype(np.int)} - else: - meta = None - m, v = model.predict(Xgrid, full_cov=False, Y_metadata=meta, **predict_kw) - lower, upper = model.predict_quantiles(Xgrid, Y_metadata=meta) + extra_data = Xgrid[:,-1:].astype(np.int) + if Y_metadata is None: + Y_metadata = {'output_index': extra_data} + else: + Y_metadata['output_index'] = extra_data + m, v = model.predict(Xgrid, full_cov=False, Y_metadata=Y_metadata, **predict_kw) + lower, upper = model.predict_quantiles(Xgrid, Y_metadata=Y_metadata) for d in which_data_ycols: @@ -120,7 +122,9 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', #optionally plot some samples if samples: #NOTE not tested with fixed_inputs - Ysim = model.posterior_samples(Xgrid, samples) + Ysim = model.posterior_samples(Xgrid, samples, Y_metadata=Y_metadata) + print Ysim.shape + print Xnew.shape for yi in Ysim.T: plots['posterior_samples'] = ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25) #ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs. @@ -185,10 +189,12 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', m, _ = model._raw_predict(Xgrid, **predict_kw) else: if isinstance(model,GPCoregionalizedRegression) or isinstance(model,SparseGPCoregionalizedRegression): - meta = {'output_index': Xgrid[:,-1:].astype(np.int)} - else: - meta = None - m, v = model.predict(Xgrid, full_cov=False, Y_metadata=meta, **predict_kw) + extra_data = Xgrid[:,-1:].astype(np.int) + if Y_metadata is None: + Y_metadata = {'output_index': extra_data} + else: + Y_metadata['output_index'] = extra_data + m, v = model.predict(Xgrid, full_cov=False, Y_metadata=Y_metadata, **predict_kw) for d in which_data_ycols: m_d = m[:,d].reshape(resolution, resolution).T plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)