From 70c44b2cdd95e81b5b675724bc2e797399b0a413 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Thu, 18 Jul 2013 18:49:26 +0100 Subject: [PATCH] Multioutput is working --- GPy/core/gp.py | 35 +- GPy/core/gp_base.py | 29 +- GPy/core/model.py | 2 +- GPy/likelihoods/__init__.py | 1 + GPy/likelihoods/ep.py | 125 +----- GPy/likelihoods/ep_mixed_noise.py | 372 ++++++++++++++++++ GPy/likelihoods/noise_model_constructors.py | 13 + GPy/likelihoods/noise_models/__init__.py | 1 + .../noise_models/exponential_noise.py | 68 ++++ .../noise_models/gaussian_noise.py | 2 +- .../noise_models/gp_transformations.py | 12 + .../noise_models/noise_distributions.py | 2 +- GPy/models/__init__.py | 1 + GPy/models/gp_classification.py | 5 +- GPy/models/gp_multioutput.py | 56 +++ 15 files changed, 598 insertions(+), 126 deletions(-) create mode 100644 GPy/likelihoods/ep_mixed_noise.py create mode 100644 GPy/likelihoods/noise_models/exponential_noise.py create mode 100644 GPy/models/gp_multioutput.py diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 8d52a984..607fdc5b 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -7,7 +7,7 @@ import pylab as pb from .. import kern from ..util.linalg import pdinv, mdot, tdot, dpotrs, dtrtrs #from ..util.plot import gpplot, Tango -from ..likelihoods import EP +from ..likelihoods import EP,EP_Mixed_Noise from gp_base import GPBase class GP(GPBase): @@ -151,5 +151,36 @@ class GP(GPBase): # now push through likelihood mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov) - + return mean, var, _025pm, _975pm + + def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False): + """ + Predict the function(s) at the new point(s) Xnew. + Arguments + --------- + :param Xnew: The points at which to make a prediction + :type Xnew: np.ndarray, Nnew x self.input_dim + :param which_parts: specifies which outputs kernel(s) to use in prediction + :type which_parts: ('all', list of bools) + :param full_cov: whether to return the folll covariance matrix, or just the diagonal + :type full_cov: bool + :rtype: posterior mean, a Numpy array, Nnew x self.input_dim + :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise + :rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim + + + If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew. + This is to allow for different normalizations of the output dimensions. + + """ + assert isinstance(self.likelihood,EP_Mixed_Noise) + index = np.ones_like(Xnew)*output + Xnew = np.hstack((Xnew,index)) + + # normalize X values + Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale + mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts) + + # now push through likelihood + mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output) return mean, var, _025pm, _975pm diff --git a/GPy/core/gp_base.py b/GPy/core/gp_base.py index b82f3298..609fc500 100644 --- a/GPy/core/gp_base.py +++ b/GPy/core/gp_base.py @@ -3,6 +3,7 @@ from .. import kern from ..util.plot import gpplot, Tango, x_frame1D, x_frame2D import pylab as pb from GPy.core.model import Model +from GPy.likelihoods.ep_mixed_noise import EP_Mixed_Noise class GPBase(Model): """ @@ -91,7 +92,7 @@ class GPBase(Model): else: raise NotImplementedError, "Cannot define a frame with more than two input dimensions" - def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None): + def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, output=None): """ TODO: Docstrings! @@ -106,7 +107,7 @@ class GPBase(Model): fig = pb.figure(num=fignum) ax = fig.add_subplot(111) - if self.X.shape[1] == 1: + if self.X.shape[1] == 1 and not isinstance(self.likelihood,EP_Mixed_Noise): Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now @@ -120,7 +121,7 @@ class GPBase(Model): ax.set_xlim(xmin, xmax) ax.set_ylim(ymin, ymax) - elif self.X.shape[1] == 2: # FIXME + elif self.X.shape[1] == 2 and not isinstance(self.likelihood,EP_Mixed_Noise): # FIXME resolution = resolution or 50 Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution) x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution) @@ -132,5 +133,27 @@ class GPBase(Model): ax.set_xlim(xmin[0], xmax[0]) ax.set_ylim(xmin[1], xmax[1]) + elif self.X.shape[1] == 2 and isinstance(self.likelihood,EP_Mixed_Noise): + Xu = self.X[self.X[:,-1]==output,:] + Xu = self.X * self._Xscale + self._Xoffset + Xu = self.X[self.X[:,-1]==output ,0:1] + + Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits) + m, _, lower, upper = self.predict_single_output(Xnew, which_parts=which_parts,output=output) + for d in range(m.shape[1]): + gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax) + #ax.plot(Xu[which_data], self.likelihood.data[which_data, d], 'kx', mew=1.5) + ax.plot(Xu[which_data], self.likelihood.data[self.likelihood.index==output][:,None], 'kx', mew=1.5) + ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper)) + ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) + ax.set_xlim(xmin, xmax) + ax.set_ylim(ymin, ymax) + + else: raise NotImplementedError, "Cannot define a frame with more than two input dimensions" + + + + + diff --git a/GPy/core/model.py b/GPy/core/model.py index 05375b2a..f67adc1c 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -480,7 +480,7 @@ class Model(Parameterised): :type optimzer: string TODO: valid strings? """ - assert isinstance(self.likelihood, likelihoods.EP), "pseudo_EM is only available for EP likelihoods" + assert isinstance(self.likelihood, likelihoods.EP) or isinstance(self.likelihood, likelihoods.EP_Mixed_Noise), "pseudo_EM is only available for EP likelihoods" ll_change = epsilon + 1. iteration = 0 last_ll = -np.inf diff --git a/GPy/likelihoods/__init__.py b/GPy/likelihoods/__init__.py index 3e6a28d3..55f437b1 100644 --- a/GPy/likelihoods/__init__.py +++ b/GPy/likelihoods/__init__.py @@ -1,4 +1,5 @@ from ep import EP +from ep_mixed_noise import EP_Mixed_Noise from gaussian import Gaussian from noise_model_constructors import * # TODO: from Laplace import Laplace diff --git a/GPy/likelihoods/ep.py b/GPy/likelihoods/ep.py index aaa03938..c9f23839 100644 --- a/GPy/likelihoods/ep.py +++ b/GPy/likelihoods/ep.py @@ -24,18 +24,9 @@ class EP(likelihood): #Initial values - Likelihood approximation parameters: #p(y|f) = t(f|tau_tilde,v_tilde) - #TODO restore self.tau_tilde = np.zeros(self.N) self.v_tilde = np.zeros(self.N) - #_gp = self.noise_model.gp_link.transf(self.data) - #_mean = self.noise_model._mean(_gp) - #_variance = self.noise_model._variance(_gp) - #self.tau_tilde = 1./_variance - #self.tau_tilde[_variance== 0] = 1. - #self.v_tilde = _mean*self.tau_tilde - - #initial values for the GP variables self.Y = np.zeros((self.N,1)) self.covariance_matrix = np.eye(self.N) @@ -47,17 +38,16 @@ class EP(likelihood): self.trYYT = 0. def restart(self): - #FIXME self.tau_tilde = np.zeros(self.N) self.v_tilde = np.zeros(self.N) - #self.Y = np.zeros((self.N,1)) - #self.covariance_matrix = np.eye(self.N) - #self.precision = np.ones(self.N)[:,None] - #self.Z = 0 - #self.YYT = None - #self.V = self.precision * self.Y - #self.VVT_factor = self.V - #self.trYYT = 0. + self.Y = np.zeros((self.N,1)) + self.covariance_matrix = np.eye(self.N) + self.precision = np.ones(self.N)[:,None] + self.Z = 0 + self.YYT = None + self.V = self.precision * self.Y + self.VVT_factor = self.V + self.trYYT = 0. def predictive_values(self,mu,var,full_cov): if full_cov: @@ -95,8 +85,6 @@ class EP(likelihood): self.VVT_factor = self.V self.trYYT = np.trace(self.YYT) - #a = kjkjkjkj - def fit_full(self,K): """ The expectation-propagation algorithm. @@ -136,103 +124,15 @@ class EP(likelihood): self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i] #Marginal moments self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) - - - #DELETE - """ - import pylab as pb - from scipy import stats - import scipy as sp - import gp_transformations - from constructors import * - - gp_link = gp_transformations.Log_ex_1() - distribution = poisson(gp_link=gp_link) - gp = np.linspace(-3,50,100) - #distribution = binomial() - #gp = np.linspace(-3,3,100) - - y = self._transf_data[i] - tau_ = self.tau_[i] - v_ = self.v_[i] - sigma2_ = np.sqrt(1./tau_) - mu_ = v_/tau_ - - gaussian = stats.norm.pdf(gp,loc=mu_,scale=np.sqrt(sigma2_)) - non_gaussian = np.array([distribution._mass(gp_i,y) for gp_i in gp]) - prod = np.array([distribution._product(gp_i,y,mu_,np.sqrt(sigma2_)) for gp_i in gp]) - my_Z_hat,my_mu_hat,my_sigma2_hat = distribution.moments_match(y,tau_,v_) - proxy = stats.norm.pdf(gp,loc=my_mu_hat,scale=np.sqrt(my_sigma2_hat)) - - - new_sigma2_tilde = 1./self.tau_tilde[i] - new_mu_tilde = self.v_tilde[i]/self.tau_tilde[i] - new_Z_tilde = self.Z_hat[i]*np.sqrt(2*np.pi)*np.sqrt(sigma2_+new_sigma2_tilde)*np.exp(.5*(mu_-new_mu_tilde)**2/(sigma2_+new_sigma2_tilde)) - bad_gaussian = stats.norm.pdf(gp,self.v_tilde[i]/self.tau_tilde[i],np.sqrt(1./self.tau_tilde[i])) - new_gaussian = stats.norm.pdf(gp,new_mu_tilde,np.sqrt(new_sigma2_tilde))*new_Z_tilde - #new_gaussian = stats.norm.pdf(gp,_mu_tilde,np.sqrt(_sigma2_tilde))*_Z_tilde - - _sigma2_tilde = 1./(1./(my_sigma2_hat) - 1./sigma2_) - _mu_tilde = (my_mu_hat/my_sigma2_hat - mu_/sigma2_)*_sigma2_tilde - _Z_tilde = my_Z_hat*np.sqrt(2*np.pi)*np.sqrt(sigma2_+_sigma2_tilde)*np.exp(.5*(mu_ - _mu_tilde)**2/(sigma2_ + _sigma2_tilde)) - - fig1 = pb.figure(figsize=(15,5)) - ax1 = fig1.add_subplot(131) - ax1.grid(True) - #pb.plot(gp,bad_gaussian,'b--',linewidth=1.5) - #pb.plot(gp,non_gaussian,'b-',linewidth=1.5) - pb.plot(gp,new_gaussian,'r--',linewidth=1.5) - pb.title('Likelihood: $p(y_i|f_i)$',fontsize=22) - - ax2 = fig1.add_subplot(132) - ax2.grid(True) - pb.plot(gp,gaussian,'b-',linewidth=1.5) - pb.title('Cavity distribution: $q_{-i}(f_i)$',fontsize=22) - - ax3 = fig1.add_subplot(133) - ax3.grid(True) - pb.plot(gp,prod,'b--',linewidth=1.5) - - pb.plot(gp,proxy*my_Z_hat,'r-',linewidth=1.5) - - pb.title('Approximation: $\mathcal{N}(f_i|\hat{\mu}_i,\hat{\sigma}_i^2) \hat{Z}_i$',fontsize=22) - pb.legend(('Exact','Approximation'),frameon=False) - - print 'i',i - print 'v/tau _tilde', self.v_tilde[i], self.tau_tilde[i] - print 'v/tau _', self.v_[i], self.tau_[i] - print 'Z/mu/sigma2 _hat', self.Z_hat[i], mu_hat[i], sigma2_hat[i] - pb.plot(gp,new_gaussian*gaussian,'k-') - - a = kj - break - """ - #DELETE - - - - #Site parameters update - Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i]) #FIXME - Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i]) #FIXME + Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i]) + Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i]) self.tau_tilde[i] += Delta_tau self.v_tilde[i] += Delta_v - - #new_tau = self.delta/self.eta*(1./sigma2_hat[i] - self.tau_[i]) - #new_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - self.v_[i]) - #Delta_tau = new_tau - self.tau_tilde[i] - #Delta_v = new_v - self.v_tilde[i] - #self.tau_tilde[i] += Delta_tau - #self.v_tilde[i] += Delta_v - #Posterior distribution parameters update DSYR(Sigma,Sigma[:,i].copy(), -float(Delta_tau/(1.+ Delta_tau*Sigma[i,i]))) mu = np.dot(Sigma,self.v_tilde) self.iterations += 1 - - - - #Sigma recomptutation with Cholesky decompositon Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K @@ -245,11 +145,6 @@ class EP(likelihood): self.np1.append(self.tau_tilde.copy()) self.np2.append(self.v_tilde.copy()) - ##DELETE - #pb.vlines(mu[i],0,max(prod)) - #break - #DELETE - return self._compute_GP_variables() def fit_DTC(self, Kmm, Kmn): diff --git a/GPy/likelihoods/ep_mixed_noise.py b/GPy/likelihoods/ep_mixed_noise.py new file mode 100644 index 00000000..24c5498e --- /dev/null +++ b/GPy/likelihoods/ep_mixed_noise.py @@ -0,0 +1,372 @@ +# Copyright (c) 2013, Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from scipy import stats +from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs +from likelihood import likelihood + +class EP_Mixed_Noise(likelihood): + def __init__(self,data_list,noise_model_list,epsilon=1e-3,power_ep=[1.,1.]): + """ + Expectation Propagation + + Arguments + --------- + epsilon : Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float) + noise_model : a likelihood function (see likelihood_functions.py) + """ + assert len(data_list) == len(noise_model_list) + self.noise_model_list = noise_model_list + n_list = [data.size for data in data_list] + n_models = len(data_list) + self.n_params = [noise_model._get_params().size for noise_model in noise_model_list] + self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(n_models),n_list)]) + self.epsilon = epsilon + self.eta, self.delta = power_ep + self.data = np.vstack(data_list) + self.N, self.output_dim = self.data.shape + self.is_heteroscedastic = True + self.Nparams = 0#FIXME + self._transf_data = np.vstack([noise_model._preprocess_values(data) for noise_model,data in zip(noise_model_list,data_list)]) + #TODO non-gaussian index + + #Initial values - Likelihood approximation parameters: + #p(y|f) = t(f|tau_tilde,v_tilde) + self.tau_tilde = np.zeros(self.N) + self.v_tilde = np.zeros(self.N) + + #initial values for the GP variables + self.Y = np.zeros((self.N,1)) + self.covariance_matrix = np.eye(self.N) + self.precision = np.ones(self.N)[:,None] + self.Z = 0 + self.YYT = None + self.V = self.precision * self.Y + self.VVT_factor = self.V + self.trYYT = 0. + + def restart(self): + self.tau_tilde = np.zeros(self.N) + self.v_tilde = np.zeros(self.N) + self.Y = np.zeros((self.N,1)) + self.covariance_matrix = np.eye(self.N) + self.precision = np.ones(self.N)[:,None] + self.Z = 0 + self.YYT = None + self.V = self.precision * self.Y + self.VVT_factor = self.V + self.trYYT = 0. + + def predictive_values(self,mu,var,full_cov,noise_model): + if full_cov: + raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood" + #_mu = [] + #_var = [] + #_q1 = [] + #_q2 = [] + #for m,v,o in zip(mu,var,output.flatten()): + # a,b,c,d = self.noise_model_list[int(o)].predictive_values(m,v) + # _mu.append(a) + # _var.append(b) + # _q1.append(c) + # _q2.append(d) + #return np.vstack(_mu),np.vstack(_var),np.vstack(_q1),np.vstack(_q2) + return self.noise_model_list[noise_model].predictive_values(mu,var) + + def _get_params(self): + return np.hstack([noise_model._get_params().flatten() for noise_model in self.noise_model_list]) + + def _get_param_names(self): + names = [] + for noise_model in self.noise_model_list: + names += noise_model._get_param_names() + return names + + def _set_params(self,p): + cs_params = np.cumsum([0]+self.n_params) + for i in range(len(self.n_params)): + self.noise_model_list[i]._set_params(p[cs_params[i]:cs_params[i+1]]) + + def _gradients(self,partial): + #NOTE this is not tested + return np.hstack([noise_model._gradients(partial) for noise_model in self.noise_model_list]) + + def _compute_GP_variables(self): + #Variables to be called from GP + mu_tilde = self.v_tilde/self.tau_tilde #When calling EP, this variable is used instead of Y in the GP model + sigma_sum = 1./self.tau_ + 1./self.tau_tilde + mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2 + self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep + + self.Y = mu_tilde[:,None] + self.YYT = np.dot(self.Y,self.Y.T) + self.covariance_matrix = np.diag(1./self.tau_tilde) + self.precision = self.tau_tilde[:,None] + self.V = self.precision * self.Y + self.VVT_factor = self.V + self.trYYT = np.trace(self.YYT) + + def fit_full(self,K): + """ + The expectation-propagation algorithm. + For nomenclature see Rasmussen & Williams 2006. + """ + #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma) + mu = np.zeros(self.N) + Sigma = K.copy() + + """ + Initial values - Cavity distribution parameters: + q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)} + sigma_ = 1./tau_ + mu_ = v_/tau_ + """ + self.tau_ = np.empty(self.N,dtype=float) + self.v_ = np.empty(self.N,dtype=float) + + #Initial values - Marginal moments + z = np.empty(self.N,dtype=float) + self.Z_hat = np.empty(self.N,dtype=float) + phi = np.empty(self.N,dtype=float) + mu_hat = np.empty(self.N,dtype=float) + sigma2_hat = np.empty(self.N,dtype=float) + + #Approximation + epsilon_np1 = self.epsilon + 1. + epsilon_np2 = self.epsilon + 1. + self.iterations = 0 + self.np1 = [self.tau_tilde.copy()] + self.np2 = [self.v_tilde.copy()] + while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon: + update_order = np.random.permutation(self.N) + for i in update_order: + #Cavity distribution parameters + self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i] + self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i] + #Marginal moments + self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model_list[self.index[i]].moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) + #Site parameters update + Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i]) + Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i]) + self.tau_tilde[i] += Delta_tau + self.v_tilde[i] += Delta_v + #Posterior distribution parameters update + DSYR(Sigma,Sigma[:,i].copy(), -float(Delta_tau/(1.+ Delta_tau*Sigma[i,i]))) + mu = np.dot(Sigma,self.v_tilde) + self.iterations += 1 + #Sigma recomptutation with Cholesky decompositon + Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K + B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K + L = jitchol(B) + V,info = dtrtrs(L,Sroot_tilde_K,lower=1) + Sigma = K - np.dot(V.T,V) + mu = np.dot(Sigma,self.v_tilde) + epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N + epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N + self.np1.append(self.tau_tilde.copy()) + self.np2.append(self.v_tilde.copy()) + + return self._compute_GP_variables() + + def fit_DTC(self, Kmm, Kmn): + """ + The expectation-propagation algorithm with sparse pseudo-input. + For nomenclature see ... 2013. + """ + num_inducing = Kmm.shape[0] + + #TODO: this doesn't work with uncertain inputs! + + """ + Prior approximation parameters: + q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0) + Sigma0 = Qnn = Knm*Kmmi*Kmn + """ + KmnKnm = np.dot(Kmn,Kmn.T) + Lm = jitchol(Kmm) + Lmi = chol_inv(Lm) + Kmmi = np.dot(Lmi.T,Lmi) + KmmiKmn = np.dot(Kmmi,Kmn) + Qnn_diag = np.sum(Kmn*KmmiKmn,-2) + LLT0 = Kmm.copy() + + #Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm) + #KmnKnm = np.dot(Kmn, Kmn.T) + #KmmiKmn = np.dot(Kmmi,Kmn) + #Qnn_diag = np.sum(Kmn*KmmiKmn,-2) + #LLT0 = Kmm.copy() + + """ + Posterior approximation: q(f|y) = N(f| mu, Sigma) + Sigma = Diag + P*R.T*R*P.T + K + mu = w + P*Gamma + """ + mu = np.zeros(self.N) + LLT = Kmm.copy() + Sigma_diag = Qnn_diag.copy() + + """ + Initial values - Cavity distribution parameters: + q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)} + sigma_ = 1./tau_ + mu_ = v_/tau_ + """ + self.tau_ = np.empty(self.N,dtype=float) + self.v_ = np.empty(self.N,dtype=float) + + #Initial values - Marginal moments + z = np.empty(self.N,dtype=float) + self.Z_hat = np.empty(self.N,dtype=float) + phi = np.empty(self.N,dtype=float) + mu_hat = np.empty(self.N,dtype=float) + sigma2_hat = np.empty(self.N,dtype=float) + + #Approximation + epsilon_np1 = 1 + epsilon_np2 = 1 + self.iterations = 0 + np1 = [self.tau_tilde.copy()] + np2 = [self.v_tilde.copy()] + while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon: + update_order = np.random.permutation(self.N) + for i in update_order: + #Cavity distribution parameters + self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i] + self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i] + #Marginal moments + self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) + #Site parameters update + Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) + Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) + self.tau_tilde[i] += Delta_tau + self.v_tilde[i] += Delta_v + #Posterior distribution parameters update + DSYR(LLT,Kmn[:,i].copy(),Delta_tau) #LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau + L = jitchol(LLT) + #cholUpdate(L,Kmn[:,i]*np.sqrt(Delta_tau)) + V,info = dtrtrs(L,Kmn,lower=1) + Sigma_diag = np.sum(V*V,-2) + si = np.sum(V.T*V[:,i],-1) + mu += (Delta_v-Delta_tau*mu[i])*si + self.iterations += 1 + #Sigma recomputation with Cholesky decompositon + LLT = LLT0 + np.dot(Kmn*self.tau_tilde[None,:],Kmn.T) + L = jitchol(LLT) + V,info = dtrtrs(L,Kmn,lower=1) + V2,info = dtrtrs(L.T,V,lower=0) + Sigma_diag = np.sum(V*V,-2) + Knmv_tilde = np.dot(Kmn,self.v_tilde) + mu = np.dot(V2.T,Knmv_tilde) + epsilon_np1 = sum((self.tau_tilde-np1[-1])**2)/self.N + epsilon_np2 = sum((self.v_tilde-np2[-1])**2)/self.N + np1.append(self.tau_tilde.copy()) + np2.append(self.v_tilde.copy()) + + self._compute_GP_variables() + + def fit_FITC(self, Kmm, Kmn, Knn_diag): + """ + The expectation-propagation algorithm with sparse pseudo-input. + For nomenclature see Naish-Guzman and Holden, 2008. + """ + num_inducing = Kmm.shape[0] + + """ + Prior approximation parameters: + q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0) + Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn + """ + Lm = jitchol(Kmm) + Lmi = chol_inv(Lm) + Kmmi = np.dot(Lmi.T,Lmi) + P0 = Kmn.T + KmnKnm = np.dot(P0.T, P0) + KmmiKmn = np.dot(Kmmi,P0.T) + Qnn_diag = np.sum(P0.T*KmmiKmn,-2) + Diag0 = Knn_diag - Qnn_diag + R0 = jitchol(Kmmi).T + + """ + Posterior approximation: q(f|y) = N(f| mu, Sigma) + Sigma = Diag + P*R.T*R*P.T + K + mu = w + P*Gamma + """ + self.w = np.zeros(self.N) + self.Gamma = np.zeros(num_inducing) + mu = np.zeros(self.N) + P = P0.copy() + R = R0.copy() + Diag = Diag0.copy() + Sigma_diag = Knn_diag + RPT0 = np.dot(R0,P0.T) + + """ + Initial values - Cavity distribution parameters: + q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)} + sigma_ = 1./tau_ + mu_ = v_/tau_ + """ + self.tau_ = np.empty(self.N,dtype=float) + self.v_ = np.empty(self.N,dtype=float) + + #Initial values - Marginal moments + z = np.empty(self.N,dtype=float) + self.Z_hat = np.empty(self.N,dtype=float) + phi = np.empty(self.N,dtype=float) + mu_hat = np.empty(self.N,dtype=float) + sigma2_hat = np.empty(self.N,dtype=float) + + #Approximation + epsilon_np1 = 1 + epsilon_np2 = 1 + self.iterations = 0 + self.np1 = [self.tau_tilde.copy()] + self.np2 = [self.v_tilde.copy()] + while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon: + update_order = np.random.permutation(self.N) + for i in update_order: + #Cavity distribution parameters + self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i] + self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i] + #Marginal moments + self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) + #Site parameters update + Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) + Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) + self.tau_tilde[i] += Delta_tau + self.v_tilde[i] += Delta_v + #Posterior distribution parameters update + dtd1 = Delta_tau*Diag[i] + 1. + dii = Diag[i] + Diag[i] = dii - (Delta_tau * dii**2.)/dtd1 + pi_ = P[i,:].reshape(1,num_inducing) + P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_ + Rp_i = np.dot(R,pi_.T) + RTR = np.dot(R.T,np.dot(np.eye(num_inducing) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R)) + R = jitchol(RTR).T + self.w[i] += (Delta_v - Delta_tau*self.w[i])*dii/dtd1 + self.Gamma += (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T) + RPT = np.dot(R,P.T) + Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1) + mu = self.w + np.dot(P,self.Gamma) + self.iterations += 1 + #Sigma recomptutation with Cholesky decompositon + Iplus_Dprod_i = 1./(1.+ Diag0 * self.tau_tilde) + Diag = Diag0 * Iplus_Dprod_i + P = Iplus_Dprod_i[:,None] * P0 + safe_diag = np.where(Diag0 < self.tau_tilde, self.tau_tilde/(1.+Diag0*self.tau_tilde), (1. - Iplus_Dprod_i)/Diag0) + L = jitchol(np.eye(num_inducing) + np.dot(RPT0,safe_diag[:,None]*RPT0.T)) + R,info = dtrtrs(L,R0,lower=1) + RPT = np.dot(R,P.T) + Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1) + self.w = Diag * self.v_tilde + self.Gamma = np.dot(R.T, np.dot(RPT,self.v_tilde)) + mu = self.w + np.dot(P,self.Gamma) + epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N + epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N + self.np1.append(self.tau_tilde.copy()) + self.np2.append(self.v_tilde.copy()) + + return self._compute_GP_variables() diff --git a/GPy/likelihoods/noise_model_constructors.py b/GPy/likelihoods/noise_model_constructors.py index cc205c6d..01f15f71 100644 --- a/GPy/likelihoods/noise_model_constructors.py +++ b/GPy/likelihoods/noise_model_constructors.py @@ -22,6 +22,19 @@ def binomial(gp_link=None): analytical_variance = False return noise_models.binomial_noise.Binomial(gp_link,analytical_mean,analytical_variance) +def exponential(gp_link=None): + """ + Construct a binomial likelihood + + :param gp_link: a GPy gp_link function + """ + if gp_link is None: + gp_link = noise_models.gp_transformations.Identity() + + analytical_mean = False + analytical_variance = False + return noise_models.exponential_noise.Exponential(gp_link,analytical_mean,analytical_variance) + def gaussian(gp_link=None,variance=1.): """ Construct a gaussian likelihood diff --git a/GPy/likelihoods/noise_models/__init__.py b/GPy/likelihoods/noise_models/__init__.py index 65a94e1e..b47702a7 100644 --- a/GPy/likelihoods/noise_models/__init__.py +++ b/GPy/likelihoods/noise_models/__init__.py @@ -1,5 +1,6 @@ import noise_distributions import binomial_noise +import exponential_noise import gaussian_noise import gamma_noise import poisson_noise diff --git a/GPy/likelihoods/noise_models/exponential_noise.py b/GPy/likelihoods/noise_models/exponential_noise.py new file mode 100644 index 00000000..e72b8c22 --- /dev/null +++ b/GPy/likelihoods/noise_models/exponential_noise.py @@ -0,0 +1,68 @@ +# Copyright (c) 2012, 2013 Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from scipy import stats,special +import scipy as sp +from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf +import gp_transformations +from noise_distributions import NoiseDistribution + +class Exponential(NoiseDistribution): + """ + Gamma likelihood + Y is expected to take values in {0,1,2,...} + ----- + $$ + L(x) = \exp(\lambda) * \lambda**Y_i / Y_i! + $$ + """ + def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False): + super(Exponential, self).__init__(gp_link,analytical_mean,analytical_variance) + + def _preprocess_values(self,Y): + return Y + + def _mass(self,gp,obs): + """ + Mass (or density) function + """ + return np.exp(-obs/self.gp_link.transf(gp))/self.gp_link.transf(gp) + + def _nlog_mass(self,gp,obs): + """ + Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted + """ + return obs/self.gp_link.transf(gp) + np.log(self.gp_link.transf(gp)) + + def _dnlog_mass_dgp(self,gp,obs): + return ( 1./self.gp_link.transf(gp) - obs/self.gp_link.transf(gp)**2) * self.gp_link.dtransf_df(gp) + + def _d2nlog_mass_dgp2(self,gp,obs): + fgp = self.gp_link.transf(gp) + return (2*obs/fgp**3 - 1./fgp**2) * self.gp_link.dtransf_df(gp)**2 + ( 1./fgp - obs/fgp**2) * self.gp_link.d2transf_df2(gp) + + def _mean(self,gp): + """ + Mass (or density) function + """ + return self.gp_link.transf(gp) + + def _dmean_dgp(self,gp): + return self.gp_link.dtransf_df(gp) + + def _d2mean_dgp2(self,gp): + return self.gp_link.d2transf_df2(gp) + + def _variance(self,gp): + """ + Mass (or density) function + """ + return self.gp_link.transf(gp)**2 + + def _dvariance_dgp(self,gp): + return 2*self.gp_link.transf(gp)*self.gp_link.dtransf_df(gp) + + def _d2variance_dgp2(self,gp): + return 2 * (self.gp_link.dtransf_df(gp)**2 + self.gp_link.transf(gp)*self.gp_link.d2transf_df2(gp)) diff --git a/GPy/likelihoods/noise_models/gaussian_noise.py b/GPy/likelihoods/noise_models/gaussian_noise.py index 40db423c..398ed32a 100644 --- a/GPy/likelihoods/noise_models/gaussian_noise.py +++ b/GPy/likelihoods/noise_models/gaussian_noise.py @@ -20,7 +20,7 @@ class Gaussian(NoiseDistribution): super(Gaussian, self).__init__(gp_link,analytical_mean,analytical_variance) def _get_params(self): - return self.variance + return np.array([self.variance]) def _get_param_names(self): return ['noise_model_variance'] diff --git a/GPy/likelihoods/noise_models/gp_transformations.py b/GPy/likelihoods/noise_models/gp_transformations.py index b81e88e1..5cb08e8e 100644 --- a/GPy/likelihoods/noise_models/gp_transformations.py +++ b/GPy/likelihoods/noise_models/gp_transformations.py @@ -97,3 +97,15 @@ class Log_ex_1(GPTransformation): def d2transf_df2(self,f): aux = np.exp(f)/(1.+np.exp(f)) return aux*(1.-aux) + +class Reciprocal(GPTransformation): + def transf(sefl,f): + return 1./f + + def dtransf_df(self,f): + return -1./f**2 + + def d2transf_df2(self,f): + return 2./f**3 + + diff --git a/GPy/likelihoods/noise_models/noise_distributions.py b/GPy/likelihoods/noise_models/noise_distributions.py index bc4d89d6..9d4eedfb 100644 --- a/GPy/likelihoods/noise_models/noise_distributions.py +++ b/GPy/likelihoods/noise_models/noise_distributions.py @@ -359,7 +359,7 @@ class NoiseDistribution(object): """ return sp.optimize.fmin_ncg(self._nlog_joint_predictive_scaled,x0=(mu,self.gp_link.transf(mu)),fprime=self._gradient_nlog_joint_predictive,fhess=self._hessian_nlog_joint_predictive,args=(mu,sigma)) - def predictive_values(self,mu,var,sample=True,sample_size=5000): + def predictive_values(self,mu,var): """ Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction :param mu: mean of the latent variable diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py index 885372a1..093456b6 100644 --- a/GPy/models/__init__.py +++ b/GPy/models/__init__.py @@ -11,3 +11,4 @@ from gplvm import GPLVM from warped_gp import WarpedGP from bayesian_gplvm import BayesianGPLVM from mrd import MRD +from gp_multioutput import GPMultioutput diff --git a/GPy/models/gp_classification.py b/GPy/models/gp_classification.py index d1cf2e00..73d492fe 100644 --- a/GPy/models/gp_classification.py +++ b/GPy/models/gp_classification.py @@ -31,9 +31,8 @@ class GPClassification(GP): kernel = kern.rbf(X.shape[1]) if likelihood is None: - #distribution = GPy.likelihoods.binomial_likelihood.Binomial(link=link) - distribution = likelihoods.binomial() - likelihood = likelihoods.EP(Y, distribution) + noise_model = likelihoods.binomial() + likelihood = likelihoods.EP(Y, noise_model) elif Y is not None: if not all(Y.flatten() == likelihood.data.flatten()): raise Warning, 'likelihood.data and Y are different.' diff --git a/GPy/models/gp_multioutput.py b/GPy/models/gp_multioutput.py new file mode 100644 index 00000000..72427f43 --- /dev/null +++ b/GPy/models/gp_multioutput.py @@ -0,0 +1,56 @@ +# Copyright (c) 2013, Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from ..core import GP +from .. import likelihoods +from .. import kern + + +import pylab as pb + +class GPMultioutput(GP): + """ + Multiple output Gaussian process + + This is a thin wrapper around the models.GP class, with a set of sensible defaults + + :param X_list: input observations + :param Y_list: observed values + :param L_list: a GPy likelihood, defaults to Binomial with probit link_function + :param kernel: a GPy kernel, defaults to rbf + :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) + :type normalize_X: False|True + :param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales) + :type normalize_Y: False|True + + .. Note:: Multiple independent outputs are allowed using columns of Y + + """ + + def __init__(self,X_list,Y_list=None,likelihood=None,kernel=None,normalize_X=False,normalize_Y=False,W=1): + + if likelihood is None: + noise_model_list = [likelihoods.gaussian(variance=1.) for Y in Y_list] + likelihood = likelihoods.EP_Mixed_Noise(Y_list, noise_model_list) + + elif Y_list is not None: + if not all(np.vstack(Y_list).flatten() == likelihood.data.flatten()): + raise Warning, 'likelihood.data and Y_list values are different.' + + X = np.hstack([np.vstack(X_list),likelihood.index]) + + if kernel is None: + original_dim = X.shape[1]-1 + kernel = kern.rbf(original_dim) + kern.white(original_dim) + + mkernel = kernel.prod(kern.coregionalise(len(X_list),W),tensor=True) #TODO W + + #kern1 = kern.rbf(1) + kern.white(1) + #kern2 = kern.coregionalise(2,1) + #kern3 = kern1.prod(kern2,tensor=True) + + + GP.__init__(self, X, likelihood, mkernel, normalize_X=normalize_X) + self.ensure_default_constraints()