From 8c9d9e7fec94fe17d3155beba55a8e9284d9af64 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Tue, 18 Jun 2013 15:01:47 +0100 Subject: [PATCH 01/49] working on the Poisson likelihood --- GPy/likelihoods/likelihood_functions.py | 30 +++++++++++++++++------- GPy/likelihoods/link_functions.py | 31 ++++++++++++++++++++++++- 2 files changed, 52 insertions(+), 9 deletions(-) diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py index 7b9b8982..7397fb94 100644 --- a/GPy/likelihoods/likelihood_functions.py +++ b/GPy/likelihoods/likelihood_functions.py @@ -31,16 +31,20 @@ class LikelihoodFunction(object): def _product(self,gp,obs,mu,sigma): return stats.norm.pdf(gp,loc=mu,scale=sigma) * self._distribution(gp,obs) - def _nlog_product(self,gp,obs,mu,sigma): - return -(-.5*(gp-mu)**2/sigma**2 + self._log_distribution(gp,obs)) + def _log_product_scaled(self,gp,obs,mu,sigma): + return -.5*(gp-mu)**2/sigma**2 + self._log_distribution_scaled(gp,obs) + + def _log_product_scaled_dgp(self,gp,obs,mu,sigma): + return -(gp -mu)/sigma**2 + self._log_distribution_scaled_dgp(gp,obs) def _locate(self,obs,mu,sigma): """ Golden Search to find the mode in the _product function (cavity x exact likelihood) and define a grid around it for numerical integration """ - golden_A = -1 if obs == 0 else np.array([np.log(obs),mu]).min() #Lower limit - golden_B = np.array([np.log(obs),mu]).max() #Upper limit - return sp.optimize.golden(self._nlog_product, args=(obs,mu,sigma), brack=(golden_A,golden_B)) #Better to work with _nlog_product than with _product + lower = -1 if obs == 0 else np.array([np.log(obs),mu]).min() #Lower limit #FIXME + upper = np.array([np.log(obs),mu]).max() #Upper limit #FIXME + #return sp.optimize.golden(self._nlog_product, args=(obs,mu,sigma), brack=(golden_A,golden_B)) #Better to work with _nlog_product than with _product + return sp.optimize.brent(self._nlog_product, args=(obs,mu,sigma), brack=(lower,upper)) #Better to work with _nlog_product than with _product def _moments_match_numerical(self,obs,tau,v): """ @@ -87,7 +91,7 @@ class Binomial(LikelihoodFunction): def _distribution(self,gp,obs): pass - def _log_distribution(self,gp,obs): + def _log_distribution_scaled(self,gp,obs): pass def _preprocess_values(self,Y): @@ -152,8 +156,18 @@ class Poisson(LikelihoodFunction): def _distribution(self,gp,obs): return stats.poisson.pmf(obs,self.link.inv_transf(gp)) - def _log_distribution(self,gp,obs): - return - self.link.inv_transf(gp) + obs * self.link.log_inv_transf(gp) + def _log_distribution_scaled(self,gp,obs): + """ + Logarithm of the un-normalized distribution: factors that are not a function of gp are omitted + """ + return -self.link.inv_transf(gp) + obs * self.link.log_inv_transf(gp) + + def _log_distribution_scaled_dgp(self,gp,obs): + return -self.link.inv_transf_df(gp) + obs * self.link.log_inv_transf_df(gp) + + def _log_distribution_scaled_d2gp2(self,gp,obs): + return -self.link.inv_transf_df(gp) + obs * self.link.log_inv_transf_df(gp) + def predictive_values(self,mu,var): """ diff --git a/GPy/likelihoods/link_functions.py b/GPy/likelihoods/link_functions.py index 3b9a55b2..3338c042 100644 --- a/GPy/likelihoods/link_functions.py +++ b/GPy/likelihoods/link_functions.py @@ -21,7 +21,7 @@ class LinkFunction(object): class Probit(LinkFunction): """ - Probit link function: Squashes a likelihood between 0 and 1 + Probit link function """ def transf(self,mu): pass @@ -31,3 +31,32 @@ class Probit(LinkFunction): def log_inv_transf(self,f): pass + +class Log(LinkFunction): + """ + Logarithm link function + """ + + def transf(self,mu): + return np.log(mu) + + def inv_transf(self,f): + return np.exp(f) + + def log_inv_transf(self,f): + return f + + def inv_transf_df(sefl,f): + return np.exp(f) + + def log_inv_transf_df(self,f): + return 1 + + def inv_transf_df(sefl,f): + return np.exp(f) + + def log_inv_transf_df(self,f): + return 1 + + + From c0bb304f4f3ed26a0c36a34bc900386c5c0faffd Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 21 Jun 2013 16:00:12 +0100 Subject: [PATCH 02/49] Some cool stuff for EP --- GPy/likelihoods/likelihood_functions.py | 167 +++++++++++++++++------- GPy/likelihoods/link_functions.py | 70 +++++++--- GPy/util/univariate_Gaussian.py | 11 ++ 3 files changed, 181 insertions(+), 67 deletions(-) diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py index 7397fb94..8464ec99 100644 --- a/GPy/likelihoods/likelihood_functions.py +++ b/GPy/likelihoods/likelihood_functions.py @@ -3,7 +3,7 @@ import numpy as np -from scipy import stats +from scipy import stats,special import scipy as sp import pylab as pb from ..util.plot import gpplot @@ -29,49 +29,83 @@ class LikelihoodFunction(object): return Y def _product(self,gp,obs,mu,sigma): - return stats.norm.pdf(gp,loc=mu,scale=sigma) * self._distribution(gp,obs) - - def _log_product_scaled(self,gp,obs,mu,sigma): - return -.5*(gp-mu)**2/sigma**2 + self._log_distribution_scaled(gp,obs) - - def _log_product_scaled_dgp(self,gp,obs,mu,sigma): - return -(gp -mu)/sigma**2 + self._log_distribution_scaled_dgp(gp,obs) - - def _locate(self,obs,mu,sigma): """ - Golden Search to find the mode in the _product function (cavity x exact likelihood) and define a grid around it for numerical integration + Product between the cavity distribution and a likelihood factor + """ + return stats.norm.pdf(gp,loc=mu,scale=sigma) * self._mass(gp,obs) + + def _nlog_product_scaled(self,gp,obs,mu,sigma): + """ + Negative log-product between the cavity distribution and a likelihood factor + """ + return .5*(gp-mu)**2/sigma**2 + self._nlog_mass_scaled(gp,obs) + + def _dlog_product_dgp(self,gp,obs,mu,sigma): + """ + Derivative wrt gp of the log-product between the cavity distribution and a likelihood factor + """ + return -(gp - mu)/sigma**2 + self._dlog_mass_dgp(gp,obs) + + def _d2log_product_dgp2(self,gp,obs,mu,sigma): + """ + Second derivative wrt gp of the log-product between the cavity distribution and a likelihood factor + """ + return -1./sigma**2 + self._d2log_mass_dgp2(gp,obs) + + #def _dlog_product_dobs(self,obs,gp): + # return self._dlog_mass_dobs(obs,gp) + + #def _d2log_product_dobs2(self,obs,gp): + # return self._d2log_mass_dobs2(obs,gp) + + #def _d2log_product_dcross(self,gp,obs): + + def _gradient_log_product(self,x,mu,sigma): + return np.array((self._dlog_product_dgp(gp=x[0],obs=x[1],mu=mu,sigma=sigma),self._dlog_mass_dobs(obs=x[1],gp=x[0]))) + + def _hessian_log_product(self,x,mu,sigma): + cross_derivative = self._d2log_mass_dcross(gp=x[0],obs=x[1]) + return np.array((self._d2log_product_dgp2(gp=x[0],obs=x[1],mu=mu,sigma=sigma),cross_derivative,cross_derivative,self._d2log_mass_dobs2(obs=x[1],gp=x[0]))).reshape(2,2) + + + def _product_mode(self,obs,mu,sigma): + """ + Brent's method to find the mode in the _product function (cavity x likelihood factor) """ lower = -1 if obs == 0 else np.array([np.log(obs),mu]).min() #Lower limit #FIXME - upper = np.array([np.log(obs),mu]).max() #Upper limit #FIXME - #return sp.optimize.golden(self._nlog_product, args=(obs,mu,sigma), brack=(golden_A,golden_B)) #Better to work with _nlog_product than with _product - return sp.optimize.brent(self._nlog_product, args=(obs,mu,sigma), brack=(lower,upper)) #Better to work with _nlog_product than with _product + upper = 2*np.array([np.log(obs),mu]).max() #Upper limit #FIXME + print lower,upper + return sp.optimize.brent(self._nlog_product_scaled, args=(obs,mu,sigma), brack=(lower,upper)) #Better to work with _nlog_product than with _product def _moments_match_numerical(self,obs,tau,v): """ - Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input. + Lapace approximation to calculate the moments mumerically. """ mu = v/tau - sigma = np.sqrt(1./tau) - opt = self._locate(obs,mu,sigma) - width = 3./np.log(max(obs,2)) - A = opt - width #Grid's lower limit - B = opt + width #Grid's Upper limit - K = 10*int(np.log(max(obs,150))) #Number of points in the grid - h = (B-A)/K # length of the intervals - grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis) - x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier - _aux1 = self._product(A,obs,mu,sigma) - _aux2 = self._product(B,obs,mu,sigma) - _aux3 = 4*self._product(grid_x[range(1,K,2)],obs,mu,sigma) - _aux4 = 2*self._product(grid_x[range(2,K-1,2)],obs,mu,sigma) - zeroth = np.hstack((_aux1,_aux2,_aux3,_aux4)) # grid of points (Y axis) rearranged - first = zeroth*x - second = first*x - Z_hat = sum(zeroth)*h/3 # Zero-th moment - mu_hat = sum(first)*h/(3*Z_hat) # First moment - m2 = sum(second)*h/(3*Z_hat) # Second moment - sigma2_hat = m2 - mu_hat**2 # Second central moment - return float(Z_hat), float(mu_hat), float(sigma2_hat) + mu_hat = self._product_mode(obs,mu,np.sqrt(1./tau)) + sigma2_hat = 1./(tau - self._d2log_mass_dgp2(mu_hat,obs)) + Z_hat = np.exp(-.5*tau*(mu_hat-mu)**2) * self._mass(mu_hat,obs)*np.sqrt(tau*sigma2_hat) + return Z_hat,mu_hat,sigma2_hat + + def _nlog_joint_posterior_scaled(x,mu,sigma): + """ + x = np.array([gp,obs]) + """ + return self._product(x[0],x[1],mu,sigma) + + def _gradient_log_joint_posterior(x,mu,sigma): + return self._dlog_product_dgp(x[0],x[1],mu,sigma) + self._dlog_mass_dgp(gp,obs), + + def _predictive_values_numerical(self,mu,var): + """ + Lapace approximation to calculate the predictive values. + """ + mu = mu.flatten() + var = var.flatten() + tranf_mu = self.link.transf(mu) + mu_hat = [self._product_mode(t_i,m_i,np.sqrt(v_i)) for t_i,mu_i,v_i in zip(transf_mu,mu,var)] + sigma2_hat = [1./(1./var - self._d2log_mass_dgp2(m_i,t_i)) for m_i,t_i in zip(mu_hat,transf_mu)] + class Binomial(LikelihoodFunction): """ @@ -88,10 +122,10 @@ class Binomial(LikelihoodFunction): link = self._analytical super(Binomial, self).__init__(link) - def _distribution(self,gp,obs): + def _mass(self,gp,obs): pass - def _log_distribution_scaled(self,gp,obs): + def _nlog_mass_scaled(self,gp,obs): pass def _preprocess_values(self,Y): @@ -123,7 +157,7 @@ class Binomial(LikelihoodFunction): sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat) return Z_hat, mu_hat, sigma2_hat - def predictive_values(self,mu,var): + def _predictive_values_analytical(self,mu,var): """ Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction :param mu: mean of the latent variable @@ -153,28 +187,65 @@ class Poisson(LikelihoodFunction): link = link_functions.Log() super(Poisson, self).__init__(link) - def _distribution(self,gp,obs): + def _mass(self,gp,obs): + """ + Mass (or density) function + """ return stats.poisson.pmf(obs,self.link.inv_transf(gp)) - def _log_distribution_scaled(self,gp,obs): + def _nlog_mass_scaled(self,gp,obs): """ - Logarithm of the un-normalized distribution: factors that are not a function of gp are omitted + Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted """ - return -self.link.inv_transf(gp) + obs * self.link.log_inv_transf(gp) + return self.link.inv_transf(gp) - obs * np.log(self.link.inv_transf(gp)) - def _log_distribution_scaled_dgp(self,gp,obs): - return -self.link.inv_transf_df(gp) + obs * self.link.log_inv_transf_df(gp) + def _dlog_mass_dgp(self,gp,obs): + return self.link.dinv_transf_df(gp) * (obs/self.link.inv_transf(gp) - 1) - def _log_distribution_scaled_d2gp2(self,gp,obs): - return -self.link.inv_transf_df(gp) + obs * self.link.log_inv_transf_df(gp) + def _d2log_mass_dgp2(self,gp,obs): + d2_df = self.link.d2inv_transf_df2(gp) + inv_transf = self.link.inv_transf(gp) + return obs * ( d2_df/inv_transf - (self.link.dinv_transf_df(gp)/inv_transf)**2 ) - d2_df + def _dlog_mass_dobs(self,obs,gp): + return np.log(self.link.inv_transf(gp)) - special.psi(obs+1) + + def _d2log_mass_dobs2(self,obs,gp=None): + return -special.polygamma(1,obs) + + def _d2log_mass_dcross(self,obs,gp): + return self.link.dinv_transf_df(gp)/self.link.inv_transf(gp) def predictive_values(self,mu,var): """ Compute mean, and conficence interval (percentiles 5 and 95) of the prediction """ - mean = self.link.transf(mu)#np.exp(mu*self.scale + self.location) + mean = self.link.transf(mu) tmp = stats.poisson.ppf(np.array([.025,.975]),mean) p_025 = tmp[:,0] p_975 = tmp[:,1] return mean,np.nan*mean,p_025,p_975 # better variance here TODO + + """ + simpson approximation + width = 3./np.log(max(obs,2)) + A = opt - width #Grid's lower limit + B = opt + width #Grid's Upper limit + K = 10*int(np.log(max(obs,150))) #Number of points in the grid + h = (B-A)/K # length of the intervals + grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis) + x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier + _aux1 = self._product(A,obs,mu,sigma) + _aux2 = self._product(B,obs,mu,sigma) + _aux3 = 4*self._product(grid_x[range(1,K,2)],obs,mu,sigma) + _aux4 = 2*self._product(grid_x[range(2,K-1,2)],obs,mu,sigma) + zeroth = np.hstack((_aux1,_aux2,_aux3,_aux4)) # grid of points (Y axis) rearranged + first = zeroth*x + second = first*x + Z_hat = sum(zeroth)*h/3 # Zero-th moment + mu_hat = sum(first)*h/(3*Z_hat) # First moment + m2 = sum(second)*h/(3*Z_hat) # Second moment + sigma2_hat = m2 - mu_hat**2 # Second central moment + return float(Z_hat), float(mu_hat), float(sigma2_hat) + """ + diff --git a/GPy/likelihoods/link_functions.py b/GPy/likelihoods/link_functions.py index 3338c042..2f25ae3a 100644 --- a/GPy/likelihoods/link_functions.py +++ b/GPy/likelihoods/link_functions.py @@ -7,7 +7,7 @@ from scipy import stats import scipy as sp import pylab as pb from ..util.plot import gpplot -from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf +from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf class LinkFunction(object): """ @@ -19,44 +19,76 @@ class LinkFunction(object): def __init__(self): pass -class Probit(LinkFunction): +class Identity(LinkFunction): """ - Probit link function + $$ + g(f) = f + $$ """ def transf(self,mu): - pass + return mu def inv_transf(self,f): - pass + return f - def log_inv_transf(self,f): - pass + def dinv_transf_df(self,f): + return 1. + + def d2inv_transf_df2(self,f): + return 0 + + +class Probit(LinkFunction): + """ + $$ + g(f) = \\Phi^{-1} (mu) + $$ + """ + def transf(self,mu): + return inv_std_norm_cdf(mu) + + def inv_transf(self,f): + return std_norm_cdf(f) + + def dinv_transf_df(self,f): + return std_norm_pdf(f) + + def d2inv_transf_df2(self,f): + return -f * std_norm_pdf(f) class Log(LinkFunction): """ - Logarithm link function + $$ + g(f) = \log(\mu) + $$ """ - def transf(self,mu): return np.log(mu) def inv_transf(self,f): return np.exp(f) - def log_inv_transf(self,f): - return f - - def inv_transf_df(sefl,f): + def dinv_transf_df(self,f): return np.exp(f) - def log_inv_transf_df(self,f): - return 1 - - def inv_transf_df(sefl,f): + def d2inv_transf_df2(self,f): return np.exp(f) - def log_inv_transf_df(self,f): - return 1 +class Log_ex_1(LinkFunction): + """ + $$ + g(f) = \log(\exp(\mu) - 1) + $$ + """ + def transf(self,mu): + return np.log(np.exp(mu) - 1) + def inv_transf(self,f): + return np.log(np.exp(f)+1) + def dinv_transf_df(self,f): + return np.exp(f)/(1.+np.exp(f)) + def d2inv_transf_df2(self,f): + aux = np.exp(f)/(1.+np.exp(f)) + return aux*(1.-aux) diff --git a/GPy/util/univariate_Gaussian.py b/GPy/util/univariate_Gaussian.py index 28946894..5a5880d5 100644 --- a/GPy/util/univariate_Gaussian.py +++ b/GPy/util/univariate_Gaussian.py @@ -32,4 +32,15 @@ def std_norm_cdf(x): x = float(x) return weave.inline(code,arg_names=['x'],support_code=support_code) +def inv_std_norm_cdf(x): + """ + Inverse cumulative standard Gaussian distribution + Based on Winitzki, S. (2008) + """ + z = 2*x -1 + ln1z2 = np.log(1-z**2) + a = 8*(np.pi -3)/(3*np.pi*(4-np.pi)) + b = 2/(np.pi * a) + ln1z2/2 + inv_erf = np.sign(z) * np.sqrt( np.sqrt(b**2 - ln1z2/a) - b ) + return np.sqrt(2) * inv_erf From 7a3eb369be7ae4162db0c208d30aa684b05c8c60 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Mon, 24 Jun 2013 00:54:50 +0100 Subject: [PATCH 03/49] predictive values, new method --- GPy/likelihoods/likelihood_functions.py | 157 +++++++++++------------- 1 file changed, 74 insertions(+), 83 deletions(-) diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py index 8464ec99..24b4f9cb 100644 --- a/GPy/likelihoods/likelihood_functions.py +++ b/GPy/likelihoods/likelihood_functions.py @@ -38,35 +38,21 @@ class LikelihoodFunction(object): """ Negative log-product between the cavity distribution and a likelihood factor """ - return .5*(gp-mu)**2/sigma**2 + self._nlog_mass_scaled(gp,obs) + return .5*((gp-mu)/sigma)**2 + self._nlog_mass(gp,obs) - def _dlog_product_dgp(self,gp,obs,mu,sigma): + def _dnlog_product_dgp(self,gp,obs,mu,sigma): """ Derivative wrt gp of the log-product between the cavity distribution and a likelihood factor """ - return -(gp - mu)/sigma**2 + self._dlog_mass_dgp(gp,obs) + #return -(gp - mu)/sigma**2 + self._dlog_mass_dgp(gp,obs) + return (gp - mu)/sigma**2 + self._dnlog_mass_dgp(gp,obs) - def _d2log_product_dgp2(self,gp,obs,mu,sigma): + def _d2nlog_product_dgp2(self,gp,obs,mu,sigma): """ Second derivative wrt gp of the log-product between the cavity distribution and a likelihood factor """ - return -1./sigma**2 + self._d2log_mass_dgp2(gp,obs) - - #def _dlog_product_dobs(self,obs,gp): - # return self._dlog_mass_dobs(obs,gp) - - #def _d2log_product_dobs2(self,obs,gp): - # return self._d2log_mass_dobs2(obs,gp) - - #def _d2log_product_dcross(self,gp,obs): - - def _gradient_log_product(self,x,mu,sigma): - return np.array((self._dlog_product_dgp(gp=x[0],obs=x[1],mu=mu,sigma=sigma),self._dlog_mass_dobs(obs=x[1],gp=x[0]))) - - def _hessian_log_product(self,x,mu,sigma): - cross_derivative = self._d2log_mass_dcross(gp=x[0],obs=x[1]) - return np.array((self._d2log_product_dgp2(gp=x[0],obs=x[1],mu=mu,sigma=sigma),cross_derivative,cross_derivative,self._d2log_mass_dobs2(obs=x[1],gp=x[0]))).reshape(2,2) - + #return -1./sigma**2 + self._d2log_mass_dgp2(gp,obs) + return 1./sigma**2 + self._d2nlog_mass_dgp2(gp,obs) def _product_mode(self,obs,mu,sigma): """ @@ -74,7 +60,6 @@ class LikelihoodFunction(object): """ lower = -1 if obs == 0 else np.array([np.log(obs),mu]).min() #Lower limit #FIXME upper = 2*np.array([np.log(obs),mu]).max() #Upper limit #FIXME - print lower,upper return sp.optimize.brent(self._nlog_product_scaled, args=(obs,mu,sigma), brack=(lower,upper)) #Better to work with _nlog_product than with _product def _moments_match_numerical(self,obs,tau,v): @@ -83,29 +68,58 @@ class LikelihoodFunction(object): """ mu = v/tau mu_hat = self._product_mode(obs,mu,np.sqrt(1./tau)) - sigma2_hat = 1./(tau - self._d2log_mass_dgp2(mu_hat,obs)) + #sigma2_hat = 1./(tau - self._d2log_mass_dgp2(mu_hat,obs)) + sigma2_hat = 1./(tau + self._d2nlog_mass_dgp2(mu_hat,obs)) Z_hat = np.exp(-.5*tau*(mu_hat-mu)**2) * self._mass(mu_hat,obs)*np.sqrt(tau*sigma2_hat) return Z_hat,mu_hat,sigma2_hat - def _nlog_joint_posterior_scaled(x,mu,sigma): + def _nlog_joint_predictive_scaled(self,x,mu,sigma): #TODO not needed """ x = np.array([gp,obs]) """ - return self._product(x[0],x[1],mu,sigma) + return self._nlog_product_scaled(x[0],x[1],mu,sigma) - def _gradient_log_joint_posterior(x,mu,sigma): - return self._dlog_product_dgp(x[0],x[1],mu,sigma) + self._dlog_mass_dgp(gp,obs), + def _gradient_nlog_joint_predictive(self,x,mu,sigma): #TODO not needed + return np.array((self._dnlog_product_dgp(gp=x[0],obs=x[1],mu=mu,sigma=sigma),self._dnlog_mass_dobs(obs=x[1],gp=x[0]))) - def _predictive_values_numerical(self,mu,var): + def _hessian_nlog_joint_predictive(self,x,mu,sigma): #TODO not needed + cross_derivative = self._d2nlog_mass_dcross(gp=x[0],obs=x[1]) + return np.array((self._d2nlog_product_dgp2(gp=x[0],obs=x[1],mu=mu,sigma=sigma),cross_derivative,cross_derivative,self._d2nlog_mass_dobs2(obs=x[1],gp=x[0]))).reshape(2,2) + + def _joint_predictive_mode(self,mu,sigma): + return sp.optimize.fmin_ncg(self._nlog_joint_predictive_scaled,x0=(mu,self.link.transf(mu)),fprime=self._gradient_nlog_joint_predictive,fhess=self._hessian_nlog_joint_predictive,args=(mu,sigma)) + + def predictive_values(self,mu,var): """ - Lapace approximation to calculate the predictive values. + Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction """ - mu = mu.flatten() - var = var.flatten() - tranf_mu = self.link.transf(mu) - mu_hat = [self._product_mode(t_i,m_i,np.sqrt(v_i)) for t_i,mu_i,v_i in zip(transf_mu,mu,var)] - sigma2_hat = [1./(1./var - self._d2log_mass_dgp2(m_i,t_i)) for m_i,t_i in zip(mu_hat,transf_mu)] - + if isinstance(mu,float): + mu = [mu] + var = [var] + pred_mean = [] + pred_var = [] + pred_025 = [] + pred_975 = [] + for m,s in zip(mu,np.sqrt(var)): + sample_points = [m - i*s for i in range(-3,4)] + _mean = 0 + _var = 0 + _025 = 0 + _975 = 0 + for q_i in sample_points: + _mean += self.link.inv_transf(q_i) + _var += self._variance(q_i) + _025 += self._percentile(.025,q_i) + _975 += self._percentile(.975,q_i) + pred_mean.append(_mean/len(sample_points)) + pred_var.append(_var/len(sample_points)) + pred_025.append(_025/len(sample_points)) + pred_975.append(_975/len(sample_points)) + pred_mean = np.array(pred_mean)[:,None] + pred_var = np.array(pred_var)[:,None] + pred_025 = np.array(pred_025)[:,None] + pred_975 = np.array(pred_975)[:,None] + return pred_mean, pred_var, pred_025, pred_975 class Binomial(LikelihoodFunction): """ @@ -125,7 +139,7 @@ class Binomial(LikelihoodFunction): def _mass(self,gp,obs): pass - def _nlog_mass_scaled(self,gp,obs): + def _nlog_mass(self,gp,obs): pass def _preprocess_values(self,Y): @@ -157,7 +171,7 @@ class Binomial(LikelihoodFunction): sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat) return Z_hat, mu_hat, sigma2_hat - def _predictive_values_analytical(self,mu,var): + def predictive_values(self,mu,var): """ Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction :param mu: mean of the latent variable @@ -193,59 +207,36 @@ class Poisson(LikelihoodFunction): """ return stats.poisson.pmf(obs,self.link.inv_transf(gp)) - def _nlog_mass_scaled(self,gp,obs): + def _variance(self,gp): + return self.link.inv_transf(gp) + + def _percentile(self,x,gp,*args): #TODO *args + return stats.poisson.ppf(x,self.link.inv_transf(gp)) + + def _nlog_mass(self,gp,obs): """ Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted """ - return self.link.inv_transf(gp) - obs * np.log(self.link.inv_transf(gp)) + return self.link.inv_transf(gp) - obs * np.log(self.link.inv_transf(gp)) + np.log(special.gamma(obs+1)) - def _dlog_mass_dgp(self,gp,obs): - return self.link.dinv_transf_df(gp) * (obs/self.link.inv_transf(gp) - 1) + def _dnlog_mass_dgp(self,gp,obs): + #return self.link.dinv_transf_df(gp) * (obs/self.link.inv_transf(gp) - 1) + return self.link.dinv_transf_df(gp) * (1. - obs/self.link.inv_transf(gp)) - def _d2log_mass_dgp2(self,gp,obs): + def _d2nlog_mass_dgp2(self,gp,obs): d2_df = self.link.d2inv_transf_df2(gp) inv_transf = self.link.inv_transf(gp) - return obs * ( d2_df/inv_transf - (self.link.dinv_transf_df(gp)/inv_transf)**2 ) - d2_df + #return obs * ( d2_df/inv_transf - (self.link.dinv_transf_df(gp)/inv_transf)**2 ) - d2_df + return obs * ((self.link.dinv_transf_df(gp)/inv_transf)**2 - d2_df/inv_transf) + d2_df - def _dlog_mass_dobs(self,obs,gp): - return np.log(self.link.inv_transf(gp)) - special.psi(obs+1) + def _dnlog_mass_dobs(self,obs,gp): #TODO not needed + #return np.log(self.link.inv_transf(gp)) - special.psi(obs+1) + return special.psi(obs+1) - np.log(self.link.inv_transf(gp)) - def _d2log_mass_dobs2(self,obs,gp=None): - return -special.polygamma(1,obs) - - def _d2log_mass_dcross(self,obs,gp): - return self.link.dinv_transf_df(gp)/self.link.inv_transf(gp) - - def predictive_values(self,mu,var): - """ - Compute mean, and conficence interval (percentiles 5 and 95) of the prediction - """ - mean = self.link.transf(mu) - tmp = stats.poisson.ppf(np.array([.025,.975]),mean) - p_025 = tmp[:,0] - p_975 = tmp[:,1] - return mean,np.nan*mean,p_025,p_975 # better variance here TODO - - """ - simpson approximation - width = 3./np.log(max(obs,2)) - A = opt - width #Grid's lower limit - B = opt + width #Grid's Upper limit - K = 10*int(np.log(max(obs,150))) #Number of points in the grid - h = (B-A)/K # length of the intervals - grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis) - x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier - _aux1 = self._product(A,obs,mu,sigma) - _aux2 = self._product(B,obs,mu,sigma) - _aux3 = 4*self._product(grid_x[range(1,K,2)],obs,mu,sigma) - _aux4 = 2*self._product(grid_x[range(2,K-1,2)],obs,mu,sigma) - zeroth = np.hstack((_aux1,_aux2,_aux3,_aux4)) # grid of points (Y axis) rearranged - first = zeroth*x - second = first*x - Z_hat = sum(zeroth)*h/3 # Zero-th moment - mu_hat = sum(first)*h/(3*Z_hat) # First moment - m2 = sum(second)*h/(3*Z_hat) # Second moment - sigma2_hat = m2 - mu_hat**2 # Second central moment - return float(Z_hat), float(mu_hat), float(sigma2_hat) - """ + def _d2nlog_mass_dobs2(self,obs,gp=None): #TODO not needed + #return -special.polygamma(1,obs) + return special.polygamma(1,obs) + def _d2nlog_mass_dcross(self,obs,gp): #TODO not needed + #return self.link.dinv_transf_df(gp)/self.link.inv_transf(gp) + return -self.link.dinv_transf_df(gp)/self.link.inv_transf(gp) From e2ebfe522e612c4ca04c607531ea7fbbdfbc9c4c Mon Sep 17 00:00:00 2001 From: Ricardo Date: Mon, 24 Jun 2013 18:15:16 +0100 Subject: [PATCH 04/49] more changes --- GPy/likelihoods/likelihood_functions.py | 44 +++++++++++++++++-------- GPy/likelihoods/link_functions.py | 6 ++++ 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py index 24b4f9cb..f355bfc5 100644 --- a/GPy/likelihoods/likelihood_functions.py +++ b/GPy/likelihoods/likelihood_functions.py @@ -56,11 +56,13 @@ class LikelihoodFunction(object): def _product_mode(self,obs,mu,sigma): """ - Brent's method to find the mode in the _product function (cavity x likelihood factor) + Newton's CG method to find the mode in the _product function (cavity x likelihood factor) """ - lower = -1 if obs == 0 else np.array([np.log(obs),mu]).min() #Lower limit #FIXME - upper = 2*np.array([np.log(obs),mu]).max() #Upper limit #FIXME - return sp.optimize.brent(self._nlog_product_scaled, args=(obs,mu,sigma), brack=(lower,upper)) #Better to work with _nlog_product than with _product + #lower = -1 if obs == 0 else np.array([np.log(obs),mu]).min() #Lower limit #FIXME + #upper = 2*np.array([np.log(obs),mu]).max() #Upper limit #FIXME + #return sp.optimize.brent(self._nlog_product_scaled, args=(obs,mu,sigma), brack=(lower,upper)) #Better to work with _nlog_product than with _product + return sp.optimize.fmin_ncg(self._nlog_product_scaled,x0=mu,fprime=self._dnlog_product_dgp,fhess=self._d2nlog_product_dgp2,args=(obs,mu,sigma)) + def _moments_match_numerical(self,obs,tau,v): """ @@ -73,6 +75,18 @@ class LikelihoodFunction(object): Z_hat = np.exp(-.5*tau*(mu_hat-mu)**2) * self._mass(mu_hat,obs)*np.sqrt(tau*sigma2_hat) return Z_hat,mu_hat,sigma2_hat + def _nlog_predictive_mean_scaled(self,gp,mu,sigma): + return .5*((gp-mu)/sigma)**2 - np.log(self.link.inv_transf(gp)) + + def _dnlog_predictive_mean_dgp(self,gp,mu,sigma): + return (gp - mu)/sigma**2 - self.link.dinv_transf_df(gp)/self.link.inv_transf(gp) + + def _d2nlog_predictive_mean_dgp2(self,gp,mu,sigma): #TODO mu is not necessary + return 1/sigma**2 - (self.link.d2inv_transf_df2(gp) - self.link.dinv_transf_df(gp))/self.link.inv_transf(gp) + + def _predictive_mean(self,mu,sigma): + return sp.optimize.fmin_ncg(self._nlog_predictive_mean_scaled,x0=mu,fprime=self._dnlog_predictive_mean_dgp,fhess=self._d2nlog_predictive_mean_dgp2,args=(mu,sigma)) + def _nlog_joint_predictive_scaled(self,x,mu,sigma): #TODO not needed """ x = np.array([gp,obs]) @@ -87,7 +101,7 @@ class LikelihoodFunction(object): return np.array((self._d2nlog_product_dgp2(gp=x[0],obs=x[1],mu=mu,sigma=sigma),cross_derivative,cross_derivative,self._d2nlog_mass_dobs2(obs=x[1],gp=x[0]))).reshape(2,2) def _joint_predictive_mode(self,mu,sigma): - return sp.optimize.fmin_ncg(self._nlog_joint_predictive_scaled,x0=(mu,self.link.transf(mu)),fprime=self._gradient_nlog_joint_predictive,fhess=self._hessian_nlog_joint_predictive,args=(mu,sigma)) + return sp.optimize.fmin_ncg(self._nlog_joint_predictive_scaled,x0=(mu,self.link.inv_transf(mu)),fprime=self._gradient_nlog_joint_predictive,fhess=self._hessian_nlog_joint_predictive,args=(mu,sigma)) def predictive_values(self,mu,var): """ @@ -100,27 +114,29 @@ class LikelihoodFunction(object): pred_var = [] pred_025 = [] pred_975 = [] + weights = np.diff([0]+[stats.norm.cdf(-2.5+i,0,1) for i in range(6)] + [1]) for m,s in zip(mu,np.sqrt(var)): sample_points = [m - i*s for i in range(-3,4)] _mean = 0 _var = 0 _025 = 0 _975 = 0 - for q_i in sample_points: - _mean += self.link.inv_transf(q_i) - _var += self._variance(q_i) - _025 += self._percentile(.025,q_i) - _975 += self._percentile(.975,q_i) - pred_mean.append(_mean/len(sample_points)) - pred_var.append(_var/len(sample_points)) - pred_025.append(_025/len(sample_points)) - pred_975.append(_975/len(sample_points)) + for q_i,w_i in zip(sample_points,weights): + _mean += w_i*self.link.inv_transf(q_i) + _var += w_i*self._variance(q_i) + _025 += w_i*self._percentile(.025,q_i) + _975 += w_i*self._percentile(.975,q_i) + pred_mean.append(_mean) + pred_var.append(_var) + pred_025.append(_025) + pred_975.append(_975) pred_mean = np.array(pred_mean)[:,None] pred_var = np.array(pred_var)[:,None] pred_025 = np.array(pred_025)[:,None] pred_975 = np.array(pred_975)[:,None] return pred_mean, pred_var, pred_025, pred_975 + class Binomial(LikelihoodFunction): """ Probit likelihood diff --git a/GPy/likelihoods/link_functions.py b/GPy/likelihoods/link_functions.py index 2f25ae3a..a6434bfb 100644 --- a/GPy/likelihoods/link_functions.py +++ b/GPy/likelihoods/link_functions.py @@ -81,9 +81,15 @@ class Log_ex_1(LinkFunction): $$ """ def transf(self,mu): + """ + function: output space -> latent space + """ return np.log(np.exp(mu) - 1) def inv_transf(self,f): + """ + function: latent space -> output space + """ return np.log(np.exp(f)+1) def dinv_transf_df(self,f): From da108cc6d1e0f48f4fa100240f85e6b86bead559 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Tue, 25 Jun 2013 18:20:09 +0100 Subject: [PATCH 05/49] predictive mean done --- GPy/likelihoods/likelihood_functions.py | 142 +++++++++++++++++++----- 1 file changed, 114 insertions(+), 28 deletions(-) diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py index f355bfc5..ad03cead 100644 --- a/GPy/likelihoods/likelihood_functions.py +++ b/GPy/likelihoods/likelihood_functions.py @@ -58,9 +58,6 @@ class LikelihoodFunction(object): """ Newton's CG method to find the mode in the _product function (cavity x likelihood factor) """ - #lower = -1 if obs == 0 else np.array([np.log(obs),mu]).min() #Lower limit #FIXME - #upper = 2*np.array([np.log(obs),mu]).max() #Upper limit #FIXME - #return sp.optimize.brent(self._nlog_product_scaled, args=(obs,mu,sigma), brack=(lower,upper)) #Better to work with _nlog_product than with _product return sp.optimize.fmin_ncg(self._nlog_product_scaled,x0=mu,fprime=self._dnlog_product_dgp,fhess=self._d2nlog_product_dgp2,args=(obs,mu,sigma)) @@ -70,11 +67,11 @@ class LikelihoodFunction(object): """ mu = v/tau mu_hat = self._product_mode(obs,mu,np.sqrt(1./tau)) - #sigma2_hat = 1./(tau - self._d2log_mass_dgp2(mu_hat,obs)) sigma2_hat = 1./(tau + self._d2nlog_mass_dgp2(mu_hat,obs)) Z_hat = np.exp(-.5*tau*(mu_hat-mu)**2) * self._mass(mu_hat,obs)*np.sqrt(tau*sigma2_hat) return Z_hat,mu_hat,sigma2_hat + """ def _nlog_predictive_mean_scaled(self,gp,mu,sigma): return .5*((gp-mu)/sigma)**2 - np.log(self.link.inv_transf(gp)) @@ -86,6 +83,7 @@ class LikelihoodFunction(object): def _predictive_mean(self,mu,sigma): return sp.optimize.fmin_ncg(self._nlog_predictive_mean_scaled,x0=mu,fprime=self._dnlog_predictive_mean_dgp,fhess=self._d2nlog_predictive_mean_dgp2,args=(mu,sigma)) + """ def _nlog_joint_predictive_scaled(self,x,mu,sigma): #TODO not needed """ @@ -107,34 +105,91 @@ class LikelihoodFunction(object): """ Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction """ - if isinstance(mu,float): + if isinstance(mu,float) or isinstance(mu,int): mu = [mu] var = [var] pred_mean = [] pred_var = [] - pred_025 = [] - pred_975 = [] - weights = np.diff([0]+[stats.norm.cdf(-2.5+i,0,1) for i in range(6)] + [1]) + q1 = [] + q3 = [] + y_range = range(0,250) #TODO fix this range + marginal_proxy = np.zeros(len(y_range)) #TODO fixed 7? for m,s in zip(mu,np.sqrt(var)): - sample_points = [m - i*s for i in range(-3,4)] - _mean = 0 - _var = 0 - _025 = 0 - _975 = 0 - for q_i,w_i in zip(sample_points,weights): - _mean += w_i*self.link.inv_transf(q_i) - _var += w_i*self._variance(q_i) - _025 += w_i*self._percentile(.025,q_i) - _975 += w_i*self._percentile(.975,q_i) - pred_mean.append(_mean) - pred_var.append(_var) - pred_025.append(_025) - pred_975.append(_975) + for g in [m + step*s for step in range(-3,4)]: + mp = [] + for y in y_range:#*np.int(self.link.inv_transf(mu))): #TODO fix this range + mp.append(self._product(g,y,m,s)) + marginal_proxy += mp + cumulative = np.cumsum(marginal_proxy)/np.sum(marginal_proxy) + q1.append(cumulative[cumulative<=.025].size) #What if not start in y=0 + q3.append(cumulative[cumulative<=.975].size) + pred_mean = np.array(pred_mean)[:,None] pred_var = np.array(pred_var)[:,None] - pred_025 = np.array(pred_025)[:,None] - pred_975 = np.array(pred_975)[:,None] - return pred_mean, pred_var, pred_025, pred_975 + q1 = np.array(q1)[:,None] + q3 = np.array(q3)[:,None] + pred_mean = np.zeros(q1.shape) #TODO erase me + pred_var = np.zeros(q1.shape) #TODO erase me + return pred_mean, pred_var, q1, q3 + + def _nlog_conditional_mean_scaled(self,gp,mu,sigma): + """ + E(Y_star) = E( E(Y_star|f_star) ) + """ + return ((gp - mu)/sigma)**2 - np.log(self._mean(gp)) + + def _dnlog_conditional_mean_dgp(self,gp,mu,sigma): + return (gp - mu)/sigma**2 - self._dmean_dgp(gp)/self._mean(gp) + + def _d2nlog_conditional_mean_dgp2(self,gp,mu,sigma): + return 1./sigma**2 - (self._dmean_dgp(gp)/self._mean(gp))**2 + self._d2mean_dgp2(gp)/self._mean(gp) + + def _nlog_exp_conditional_variance_scaled(self,gp,mu,sigma): + """ + E( V(Y_star|f_star) ) + """ + return ((gp - mu)/sigma)**2 - np.log(self._variance(gp)) + + def _dnlog_exp_conditional_variance_dgp(self,gp,mu,sigma): + return (gp - mu)/sigma**2 - self._dvariance_dgp(gp)/self._variance(gp) + + def _d2nlog_exp_conditional_variance_dgp2(self,gp,mu,sigma): + return 1./sigma**2 - (self._dvariance_dgp(gp)/self._variance(gp))**2 + self._d2variance_dgp2(gp)/self._variance(gp) + + def _nlog_var_conditional_mean_scaled(self,gp,mu,sigma,predictive_mean): + """ + V( E(Y_star|f_star) ) + """ + return ((gp - mu)/sigma)**2 - 2*np.log(self._mean(gp)-predictive_mean) + + def _dnlog_var_conditional_mean_dgp(self,gp,mu,sigma,predictive_mean): + return (gp - mu)/sigma**2 - 2*self._dmean_dgp(gp)/(self._mean(gp)-predictive_mean) + + def _d2nlog_var_conditional_mean_dgp2(self,gp,mu,sigma,predictive_mean): + return 1./sigma**2 - 2*( (self._dmean_dgp(gp)/(self._mean(gp)-predictive_mean))**2 + self._d2mean_dgp2(gp)/(self._variance(gp)-predictive_mean) ) + + def _predictive_mean(self,gp,mu,sigma): + """ + Laplace approximation to the predictive mean + """ + maximum = sp.optimize.fmin_ncg(self._nlog_conditional_mean_scaled,x0=self._mean(mu),fprime=self._dnlog_conditional_mean_dgp,fhess=self._d2nlog_conditional_mean_dgp2,args=(mu,sigma)) + mean = np.exp(-self._nlog_conditional_mean_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_conditional_mean_dgp2(gp,mu,sigma))*sigma) + return mean + + def _predictive_variance(self,gp,mu,sigma,predictive_mean): + """ + Laplace approximation to the predictive variance + ------------------------------------------------ + E(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) ) + """ + maximum_1 = sp.optimize.fmin_ncg(self._nlog_exp_conditional_variance_scaled,x0=self._variance(mu),fprime=self._dnlog_exp_conditional_variance_dgp,fhess=self._d2nlog_exp_conditional_variance_dgp2,args=(mu,sigma)) + exp_var = np.exp(-self._nlog_exp_conditional_variance_scaled(maximum_1,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_variance_dgp2(gp,mu,sigma))*sigma) + + #(self._mean(mu)-predictive_mean)**2 + maximum_2 = sp.optimize.fmin_ncg(self._nlog_var_conditional_mean_scaled,x0=self._variance(mu),fprime=self._dnlog_var_conditional_mean_dgp,fhess=self._d2nlog_var_conditional_mean_dgp2,args=(mu,sigma,predictive_mean)) + var_exp = np.exp(-self._nlog_var_conditional_mean_scaled(maximum_2,mu,sigma))/(np.sqrt(self._d2nlog_var_conditional_mean_dgp2(gp,mu,sigma))*sigma) + + return exp_var + var_exp class Binomial(LikelihoodFunction): @@ -223,9 +278,6 @@ class Poisson(LikelihoodFunction): """ return stats.poisson.pmf(obs,self.link.inv_transf(gp)) - def _variance(self,gp): - return self.link.inv_transf(gp) - def _percentile(self,x,gp,*args): #TODO *args return stats.poisson.ppf(x,self.link.inv_transf(gp)) @@ -256,3 +308,37 @@ class Poisson(LikelihoodFunction): def _d2nlog_mass_dcross(self,obs,gp): #TODO not needed #return self.link.dinv_transf_df(gp)/self.link.inv_transf(gp) return -self.link.dinv_transf_df(gp)/self.link.inv_transf(gp) + + def _mean(self,gp): + """ + Mass (or density) function + """ + return self.link.inv_transf(gp) + + def _variance(self,gp): + return self.link.inv_transf(gp) + + def _dmean_dgp(self,gp): + return self.link.dinv_transf_df(gp) + + def _d2mean_dgp2(self,gp): + return self.link.dinv_transf_df(gp) + + def _variance(self,gp): + """ + Mass (or density) function + """ + return self.link.inv_transf(gp) + + def _variance(self,gp): + return self.link.inv_transf(gp) + + def _dvariance_dgp(self,gp): + return self.link.dinv_transf_df(gp) + + def _d2variance_dgp2(self,gp): + return self.link.dinv_transf_df(gp) + + + + From efa782c636ddedbc06ccdc61550de354d5176793 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Thu, 27 Jun 2013 01:03:32 +0100 Subject: [PATCH 06/49] massive changes --- GPy/likelihoods/__init__.py | 2 + GPy/likelihoods/binomial_likelihood.py | 85 ++++ GPy/likelihoods/likelihood_functions.py | 501 ++++++++++++------------ GPy/likelihoods/poisson_likelihood.py | 91 +++++ 4 files changed, 428 insertions(+), 251 deletions(-) create mode 100644 GPy/likelihoods/binomial_likelihood.py create mode 100644 GPy/likelihoods/poisson_likelihood.py diff --git a/GPy/likelihoods/__init__.py b/GPy/likelihoods/__init__.py index 99e88b6d..e058de79 100644 --- a/GPy/likelihoods/__init__.py +++ b/GPy/likelihoods/__init__.py @@ -2,3 +2,5 @@ from ep import EP from gaussian import Gaussian # TODO: from Laplace import Laplace import likelihood_functions as functions +import binomial_likelihood +import poisson_likelihood diff --git a/GPy/likelihoods/binomial_likelihood.py b/GPy/likelihoods/binomial_likelihood.py new file mode 100644 index 00000000..420a9607 --- /dev/null +++ b/GPy/likelihoods/binomial_likelihood.py @@ -0,0 +1,85 @@ +# Copyright (c) 2012, 2013 Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from scipy import stats,special +import scipy as sp +import pylab as pb +from ..util.plot import gpplot +from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf +import link_functions +from likelihood_functions import LikelihoodFunction + +class Binomial(LikelihoodFunction): + """ + Probit likelihood + Y is expected to take values in {-1,1} + ----- + $$ + L(x) = \\Phi (Y_i*f_i) + $$ + """ + def __init__(self,link=None): + self.discrete = True + self.support_limits = (0,1) + self._analytical = link_functions.Probit + if not link: + link = self._analytical + super(Binomial, self).__init__(link) + + def _mass(self,gp,obs): + pass + + def _nlog_mass(self,gp,obs): + pass + + def _preprocess_values(self,Y): + """ + Check if the values of the observations correspond to the values + assumed by the likelihood function. + + ..Note:: Binary classification algorithm works better with classes {-1,1} + """ + Y_prep = Y.copy() + Y1 = Y[Y.flatten()==1].size + Y2 = Y[Y.flatten()==0].size + assert Y1 + Y2 == Y.size, 'Binomial likelihood is meant to be used only with outputs in {0,1}.' + Y_prep[Y.flatten() == 0] = -1 + return Y_prep + + def _moments_match_analytical(self,data_i,tau_i,v_i): + """ + Moments match of the marginal approximation in EP algorithm + + :param i: number of observation (int) + :param tau_i: precision of the cavity distribution (float) + :param v_i: mean/variance of the cavity distribution (float) + """ + z = data_i*v_i/np.sqrt(tau_i**2 + tau_i) + Z_hat = std_norm_cdf(z) + phi = std_norm_pdf(z) + mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i)) + sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat) + return Z_hat, mu_hat, sigma2_hat + + def _predictive_mean_analytical(self,mu,sigma): + return stats.norm.cdf(mu/np.sqrt(1+sigma**2)) + + def predictive_values(self,mu,var): + """ + Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction + :param mu: mean of the latent variable + :param var: variance of the latent variable + """ + mu = mu.flatten() + var = var.flatten() + #mean = stats.norm.cdf(mu/np.sqrt(1+var)) + mean = self._predictive_mean_analytical(mu,np.sqrt(var)) + norm_025 = [stats.norm.ppf(.025,m,v) for m,v in zip(mu,var)] + norm_975 = [stats.norm.ppf(.975,m,v) for m,v in zip(mu,var)] + #p_025 = stats.norm.cdf(norm_025/np.sqrt(1+var)) + #p_975 = stats.norm.cdf(norm_975/np.sqrt(1+var)) + p_025 = self._predictive_mean_analytical(norm_025,np.sqrt(var)) + p_975 = self._predictive_mean_analytical(norm_975,np.sqrt(var)) + return mean[:,None], np.nan*var, p_025[:,None], p_975[:,None] # TODO: var diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py index ad03cead..cb0be86a 100644 --- a/GPy/likelihoods/likelihood_functions.py +++ b/GPy/likelihoods/likelihood_functions.py @@ -10,6 +10,7 @@ from ..util.plot import gpplot from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf import link_functions + class LikelihoodFunction(object): """ Likelihood class for doing Expectation propagation @@ -20,50 +21,89 @@ class LikelihoodFunction(object): def __init__(self,link): if link == self._analytical: self.moments_match = self._moments_match_analytical + self.predictive_mean = self._predictive_mean_analytical else: assert isinstance(link,link_functions.LinkFunction) self.link = link self.moments_match = self._moments_match_numerical + self.predictive_mean = self._predictive_mean_numerical def _preprocess_values(self,Y): + """ + In case it is needed, this function assess the output values or makes any pertinent transformation on them. + + :param Y: observed output (Nx1 numpy.darray) + """ return Y def _product(self,gp,obs,mu,sigma): """ - Product between the cavity distribution and a likelihood factor + Product between the cavity distribution and a likelihood factor. + + :param gp: latent variable + :param obs: observed output + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation """ return stats.norm.pdf(gp,loc=mu,scale=sigma) * self._mass(gp,obs) def _nlog_product_scaled(self,gp,obs,mu,sigma): """ - Negative log-product between the cavity distribution and a likelihood factor + Negative log-product between the cavity distribution and a likelihood factor. + ..Note:: The constant term in the Gaussian distribution is ignored. + + :param gp: latent variable + :param obs: observed output + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation """ return .5*((gp-mu)/sigma)**2 + self._nlog_mass(gp,obs) def _dnlog_product_dgp(self,gp,obs,mu,sigma): """ - Derivative wrt gp of the log-product between the cavity distribution and a likelihood factor + Derivative wrt latent variable of the log-product between the cavity distribution and a likelihood factor. + + :param gp: latent variable + :param obs: observed output + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation """ - #return -(gp - mu)/sigma**2 + self._dlog_mass_dgp(gp,obs) return (gp - mu)/sigma**2 + self._dnlog_mass_dgp(gp,obs) def _d2nlog_product_dgp2(self,gp,obs,mu,sigma): """ - Second derivative wrt gp of the log-product between the cavity distribution and a likelihood factor + Second derivative wrt latent variable of the log-product between the cavity distribution and a likelihood factor. + + :param gp: latent variable + :param obs: observed output + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation """ - #return -1./sigma**2 + self._d2log_mass_dgp2(gp,obs) return 1./sigma**2 + self._d2nlog_mass_dgp2(gp,obs) def _product_mode(self,obs,mu,sigma): """ - Newton's CG method to find the mode in the _product function (cavity x likelihood factor) + Newton's CG method to find the mode in _product (cavity x likelihood factor). + + :param obs: observed output + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation """ return sp.optimize.fmin_ncg(self._nlog_product_scaled,x0=mu,fprime=self._dnlog_product_dgp,fhess=self._d2nlog_product_dgp2,args=(obs,mu,sigma)) + def _moments_match_analytical(self,obs,tau,v): + """ + If available, this function computes the moments analytically. + """ + pass def _moments_match_numerical(self,obs,tau,v): """ - Lapace approximation to calculate the moments mumerically. + Lapace approximation to calculate the moments. + + :param obs: observed output + :param tau: cavity distribution 1st natural parameter (precision) + :param v: cavity distribution 2nd natural paramenter (mu*precision) """ mu = v/tau mu_hat = self._product_mode(obs,mu,np.sqrt(1./tau)) @@ -71,37 +111,216 @@ class LikelihoodFunction(object): Z_hat = np.exp(-.5*tau*(mu_hat-mu)**2) * self._mass(mu_hat,obs)*np.sqrt(tau*sigma2_hat) return Z_hat,mu_hat,sigma2_hat - """ - def _nlog_predictive_mean_scaled(self,gp,mu,sigma): - return .5*((gp-mu)/sigma)**2 - np.log(self.link.inv_transf(gp)) - - def _dnlog_predictive_mean_dgp(self,gp,mu,sigma): - return (gp - mu)/sigma**2 - self.link.dinv_transf_df(gp)/self.link.inv_transf(gp) - - def _d2nlog_predictive_mean_dgp2(self,gp,mu,sigma): #TODO mu is not necessary - return 1/sigma**2 - (self.link.d2inv_transf_df2(gp) - self.link.dinv_transf_df(gp))/self.link.inv_transf(gp) - - def _predictive_mean(self,mu,sigma): - return sp.optimize.fmin_ncg(self._nlog_predictive_mean_scaled,x0=mu,fprime=self._dnlog_predictive_mean_dgp,fhess=self._d2nlog_predictive_mean_dgp2,args=(mu,sigma)) - """ - - def _nlog_joint_predictive_scaled(self,x,mu,sigma): #TODO not needed + def _nlog_conditional_mean_scaled(self,gp,mu,sigma): """ - x = np.array([gp,obs]) + Negative logarithm of the l.v.'s predictive distribution times the output's mean given the l.v. + + :param gp: latent variable + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + + ..Note:: This function helps computing E(Y_star) = E(E(Y_star|f_star)) + """ + return .5*((gp - mu)/sigma)**2 - np.log(self._mean(gp)) + + def _dnlog_conditional_mean_dgp(self,gp,mu,sigma): + """ + Derivative of _nlog_conditional_mean_scaled wrt. l.v. + + :param gp: latent variable + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + return (gp - mu)/sigma**2 - self._dmean_dgp(gp)/self._mean(gp) + + def _d2nlog_conditional_mean_dgp2(self,gp,mu,sigma): + """ + Second derivative of _nlog_conditional_mean_scaled wrt. l.v. + + :param gp: latent variable + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + return 1./sigma**2 - self._d2mean_dgp2(gp)/self._mean(gp) + (self._dmean_dgp(gp)/self._mean(gp))**2 + + def _nlog_exp_conditional_variance_scaled(self,gp,mu,sigma): + """ + Negative logarithm of the l.v.'s predictive distribution times the output's variance given the l.v. + + :param gp: latent variable + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + + ..Note:: This function helps computing E(V(Y_star|f_star)) + """ + return .5*((gp - mu)/sigma)**2 - np.log(self._variance(gp)) + + def _dnlog_exp_conditional_variance_dgp(self,gp,mu,sigma): + """ + Derivative of _nlog_exp_conditional_variance_scaled wrt. l.v. + + :param gp: latent variable + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + return (gp - mu)/sigma**2 - self._dvariance_dgp(gp)/self._variance(gp) + + def _d2nlog_exp_conditional_variance_dgp2(self,gp,mu,sigma): + """ + Second derivative of _nlog_exp_conditional_variance_scaled wrt. l.v. + + :param gp: latent variable + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + return 1./sigma**2 - self._d2variance_dgp2(gp)/self._variance(gp) + (self._dvariance_dgp(gp)/self._variance(gp))**2 + + def _nlog_exp_conditional_mean_sq_scaled(self,gp,mu,sigma): + """ + Negative logarithm of the l.v.'s predictive distribution times the output's mean squared given the l.v. + + :param gp: latent variable + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + + ..Note:: This function helps computing E( E(Y_star|f_star)**2 ) + """ + return .5*((gp - mu)/sigma)**2 - 2*np.log(self._mean(gp)) + + def _dnlog_exp_conditional_mean_sq_dgp(self,gp,mu,sigma): + """ + Derivative of _nlog_exp_conditional_mean_sq_scaled wrt. l.v. + + :param gp: latent variable + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + return (gp - mu)/sigma**2 - 2*self._dmean_dgp(gp)/self._mean(gp) + + def _d2nlog_exp_conditional_mean_sq_dgp2(self,gp,mu,sigma): + """ + Second derivative of _nlog_exp_conditional_mean_sq_scaled wrt. l.v. + + :param gp: latent variable + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + return 1./sigma**2 - 2*( self._d2mean_dgp2(gp)/self._mean(gp) - (self._dmean_dgp(gp)/self._mean(gp))**2 ) + + def _predictive_mean_analytical(self,mu,sigma): + """ + If available, this function computes the predictive mean analytically. + """ + pass + + def _predictive_mean_numerical(self,mu,sigma): + """ + Laplace approximation to the predictive mean: E(Y_star) = E( E(Y_star|f_star) ) + + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + maximum = sp.optimize.fmin_ncg(self._nlog_conditional_mean_scaled,x0=self._mean(mu),fprime=self._dnlog_conditional_mean_dgp,fhess=self._d2nlog_conditional_mean_dgp2,args=(mu,sigma)) + mean = np.exp(-self._nlog_conditional_mean_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_conditional_mean_dgp2(maximum,mu,sigma))*sigma) + """ + pb.figure() + x = np.array([mu + step*sigma for step in np.linspace(-7,7,100)]) + f = np.array([np.exp(-self._nlog_conditional_mean_scaled(xi,mu,sigma))/np.sqrt(2*np.pi*sigma**2) for xi in x]) + pb.plot(x,f,'b-') + sigma2 = 1./self._d2nlog_conditional_mean_dgp2(maximum,mu,sigma) + f2 = np.exp(-.5*(x-maximum)**2/sigma2)/np.sqrt(2*np.pi*sigma2) + k = np.exp(-self._nlog_conditional_mean_scaled(maximum,mu,sigma))*np.sqrt(sigma2)/np.sqrt(sigma**2) + pb.plot(x,f2*mean,'r-') + pb.vlines(maximum,0,f.max()) + """ + return mean + + def _predictive_mean_sq(self,mu,sigma): + """ + Laplace approximation to the predictive mean squared: E(Y_star**2) = E( E(Y_star|f_star)**2 ) + + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_mean_sq_scaled,x0=self._mean(mu),fprime=self._dnlog_exp_conditional_mean_sq_dgp,fhess=self._d2nlog_exp_conditional_mean_sq_dgp2,args=(mu,sigma)) + mean_squared = np.exp(-self._nlog_exp_conditional_mean_sq_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_mean_sq_dgp2(maximum,mu,sigma))*sigma) + return mean_squared + + def predictive_variance(self,mu,sigma,predictive_mean=None): + """ + Laplace approximation to the predictive variance: V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) ) + + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + :predictive_mean: output's predictive mean, if None _predictive_mean function will be called. + """ + # E( V(Y_star|f_star) ) + maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_variance_scaled,x0=self._variance(mu),fprime=self._dnlog_exp_conditional_variance_dgp,fhess=self._d2nlog_exp_conditional_variance_dgp2,args=(mu,sigma)) + exp_var = np.exp(-self._nlog_exp_conditional_variance_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_variance_dgp2(maximum,mu,sigma))*sigma) + + pb.figure() + x = np.array([mu + step*sigma for step in np.linspace(-7,7,100)]) + f = np.array([np.exp(-self._nlog_exp_conditional_variance_scaled(xi,mu,sigma))/np.sqrt(2*np.pi*sigma**2) for xi in x]) + pb.plot(x,f,'b-') + sigma2 = 1./self._d2nlog_exp_conditional_variance_dgp2(maximum,mu,sigma) + f2 = np.exp(-.5*(x-maximum)**2/sigma2)/np.sqrt(2*np.pi*sigma2) + k = np.exp(-self._nlog_exp_conditional_variance_scaled(maximum,mu,sigma))*np.sqrt(sigma2)/np.sqrt(sigma**2) + pb.plot(x,f2*exp_var,'r--') + pb.vlines(maximum,0,f.max()) + + #V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star)**2 ) + exp_exp2 = self._predictive_mean_sq(mu,sigma) + if predictive_mean is None: + predictive_mean = self.predictive_mean(mu,sigma) + var_exp = exp_exp2 - predictive_mean**2 + return exp_var + var_exp + + def _nlog_joint_predictive_scaled(self,x,mu,sigma): + """ + Negative logarithm of the joint predictive distribution (latent variable and output). + + :param x: tuple (latent variable,output) + :param mu: latent variable's predictive mean + :param sigma: latent variable's predictive standard deviation """ return self._nlog_product_scaled(x[0],x[1],mu,sigma) - def _gradient_nlog_joint_predictive(self,x,mu,sigma): #TODO not needed + def _gradient_nlog_joint_predictive(self,x,mu,sigma): + """ + Gradient of _nlog_joint_predictive_scaled. + + :param x: tuple (latent variable,output) + :param mu: latent variable's predictive mean + :param sigma: latent variable's predictive standard deviation + ..Note: Only avilable when the output is continuous + """ + assert not self.discrete, "Gradient not available for discrete outputs." return np.array((self._dnlog_product_dgp(gp=x[0],obs=x[1],mu=mu,sigma=sigma),self._dnlog_mass_dobs(obs=x[1],gp=x[0]))) - def _hessian_nlog_joint_predictive(self,x,mu,sigma): #TODO not needed + def _hessian_nlog_joint_predictive(self,x,mu,sigma): + """ + Hessian of _nlog_joint_predictive_scaled. + + :param x: tuple (latent variable,output) + :param mu: latent variable's predictive mean + :param sigma: latent variable's predictive standard deviation + ..Note: Only avilable when the output is continuous + """ + assert not self.discrete, "Hessian not available for discrete outputs." cross_derivative = self._d2nlog_mass_dcross(gp=x[0],obs=x[1]) return np.array((self._d2nlog_product_dgp2(gp=x[0],obs=x[1],mu=mu,sigma=sigma),cross_derivative,cross_derivative,self._d2nlog_mass_dobs2(obs=x[1],gp=x[0]))).reshape(2,2) def _joint_predictive_mode(self,mu,sigma): + """ + Negative logarithm of the joint predictive distribution (latent variable and output). + + :param x: tuple (latent variable,output) + :param mu: latent variable's predictive mean + :param sigma: latent variable's predictive standard deviation + """ return sp.optimize.fmin_ncg(self._nlog_joint_predictive_scaled,x0=(mu,self.link.inv_transf(mu)),fprime=self._gradient_nlog_joint_predictive,fhess=self._hessian_nlog_joint_predictive,args=(mu,sigma)) - def predictive_values(self,mu,var): + def predictive_values(self,mu,var,sample=True,sample_size=5000): """ Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction """ @@ -112,233 +331,13 @@ class LikelihoodFunction(object): pred_var = [] q1 = [] q3 = [] - y_range = range(0,250) #TODO fix this range - marginal_proxy = np.zeros(len(y_range)) #TODO fixed 7? for m,s in zip(mu,np.sqrt(var)): - for g in [m + step*s for step in range(-3,4)]: - mp = [] - for y in y_range:#*np.int(self.link.inv_transf(mu))): #TODO fix this range - mp.append(self._product(g,y,m,s)) - marginal_proxy += mp - cumulative = np.cumsum(marginal_proxy)/np.sum(marginal_proxy) - q1.append(cumulative[cumulative<=.025].size) #What if not start in y=0 - q3.append(cumulative[cumulative<=.975].size) - + pred_mean.append(self.predictive_mean(m,s)) + pred_var.append(self.predictive_variance(m,s,pred_mean[-1])) + q1.append(self.predictive_mean(stats.norm.ppf(.025,m,s**2),s)) + q3.append(self.predictive_mean(stats.norm.ppf(.975,m,s**2),s)) pred_mean = np.array(pred_mean)[:,None] pred_var = np.array(pred_var)[:,None] q1 = np.array(q1)[:,None] q3 = np.array(q3)[:,None] - pred_mean = np.zeros(q1.shape) #TODO erase me - pred_var = np.zeros(q1.shape) #TODO erase me return pred_mean, pred_var, q1, q3 - - def _nlog_conditional_mean_scaled(self,gp,mu,sigma): - """ - E(Y_star) = E( E(Y_star|f_star) ) - """ - return ((gp - mu)/sigma)**2 - np.log(self._mean(gp)) - - def _dnlog_conditional_mean_dgp(self,gp,mu,sigma): - return (gp - mu)/sigma**2 - self._dmean_dgp(gp)/self._mean(gp) - - def _d2nlog_conditional_mean_dgp2(self,gp,mu,sigma): - return 1./sigma**2 - (self._dmean_dgp(gp)/self._mean(gp))**2 + self._d2mean_dgp2(gp)/self._mean(gp) - - def _nlog_exp_conditional_variance_scaled(self,gp,mu,sigma): - """ - E( V(Y_star|f_star) ) - """ - return ((gp - mu)/sigma)**2 - np.log(self._variance(gp)) - - def _dnlog_exp_conditional_variance_dgp(self,gp,mu,sigma): - return (gp - mu)/sigma**2 - self._dvariance_dgp(gp)/self._variance(gp) - - def _d2nlog_exp_conditional_variance_dgp2(self,gp,mu,sigma): - return 1./sigma**2 - (self._dvariance_dgp(gp)/self._variance(gp))**2 + self._d2variance_dgp2(gp)/self._variance(gp) - - def _nlog_var_conditional_mean_scaled(self,gp,mu,sigma,predictive_mean): - """ - V( E(Y_star|f_star) ) - """ - return ((gp - mu)/sigma)**2 - 2*np.log(self._mean(gp)-predictive_mean) - - def _dnlog_var_conditional_mean_dgp(self,gp,mu,sigma,predictive_mean): - return (gp - mu)/sigma**2 - 2*self._dmean_dgp(gp)/(self._mean(gp)-predictive_mean) - - def _d2nlog_var_conditional_mean_dgp2(self,gp,mu,sigma,predictive_mean): - return 1./sigma**2 - 2*( (self._dmean_dgp(gp)/(self._mean(gp)-predictive_mean))**2 + self._d2mean_dgp2(gp)/(self._variance(gp)-predictive_mean) ) - - def _predictive_mean(self,gp,mu,sigma): - """ - Laplace approximation to the predictive mean - """ - maximum = sp.optimize.fmin_ncg(self._nlog_conditional_mean_scaled,x0=self._mean(mu),fprime=self._dnlog_conditional_mean_dgp,fhess=self._d2nlog_conditional_mean_dgp2,args=(mu,sigma)) - mean = np.exp(-self._nlog_conditional_mean_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_conditional_mean_dgp2(gp,mu,sigma))*sigma) - return mean - - def _predictive_variance(self,gp,mu,sigma,predictive_mean): - """ - Laplace approximation to the predictive variance - ------------------------------------------------ - E(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) ) - """ - maximum_1 = sp.optimize.fmin_ncg(self._nlog_exp_conditional_variance_scaled,x0=self._variance(mu),fprime=self._dnlog_exp_conditional_variance_dgp,fhess=self._d2nlog_exp_conditional_variance_dgp2,args=(mu,sigma)) - exp_var = np.exp(-self._nlog_exp_conditional_variance_scaled(maximum_1,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_variance_dgp2(gp,mu,sigma))*sigma) - - #(self._mean(mu)-predictive_mean)**2 - maximum_2 = sp.optimize.fmin_ncg(self._nlog_var_conditional_mean_scaled,x0=self._variance(mu),fprime=self._dnlog_var_conditional_mean_dgp,fhess=self._d2nlog_var_conditional_mean_dgp2,args=(mu,sigma,predictive_mean)) - var_exp = np.exp(-self._nlog_var_conditional_mean_scaled(maximum_2,mu,sigma))/(np.sqrt(self._d2nlog_var_conditional_mean_dgp2(gp,mu,sigma))*sigma) - - return exp_var + var_exp - - -class Binomial(LikelihoodFunction): - """ - Probit likelihood - Y is expected to take values in {-1,1} - ----- - $$ - L(x) = \\Phi (Y_i*f_i) - $$ - """ - def __init__(self,link=None): - self._analytical = link_functions.Probit - if not link: - link = self._analytical - super(Binomial, self).__init__(link) - - def _mass(self,gp,obs): - pass - - def _nlog_mass(self,gp,obs): - pass - - def _preprocess_values(self,Y): - """ - Check if the values of the observations correspond to the values - assumed by the likelihood function. - - ..Note:: Binary classification algorithm works better with classes {-1,1} - """ - Y_prep = Y.copy() - Y1 = Y[Y.flatten()==1].size - Y2 = Y[Y.flatten()==0].size - assert Y1 + Y2 == Y.size, 'Binomial likelihood is meant to be used only with outputs in {0,1}.' - Y_prep[Y.flatten() == 0] = -1 - return Y_prep - - def _moments_match_analytical(self,data_i,tau_i,v_i): - """ - Moments match of the marginal approximation in EP algorithm - - :param i: number of observation (int) - :param tau_i: precision of the cavity distribution (float) - :param v_i: mean/variance of the cavity distribution (float) - """ - z = data_i*v_i/np.sqrt(tau_i**2 + tau_i) - Z_hat = std_norm_cdf(z) - phi = std_norm_pdf(z) - mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i)) - sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat) - return Z_hat, mu_hat, sigma2_hat - - def predictive_values(self,mu,var): - """ - Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction - :param mu: mean of the latent variable - :param var: variance of the latent variable - """ - mu = mu.flatten() - var = var.flatten() - mean = stats.norm.cdf(mu/np.sqrt(1+var)) - norm_025 = [stats.norm.ppf(.025,m,v) for m,v in zip(mu,var)] - norm_975 = [stats.norm.ppf(.975,m,v) for m,v in zip(mu,var)] - p_025 = stats.norm.cdf(norm_025/np.sqrt(1+var)) - p_975 = stats.norm.cdf(norm_975/np.sqrt(1+var)) - return mean[:,None], np.nan*var, p_025[:,None], p_975[:,None] # TODO: var - -class Poisson(LikelihoodFunction): - """ - Poisson likelihood - Y is expected to take values in {0,1,2,...} - ----- - $$ - L(x) = \exp(\lambda) * \lambda**Y_i / Y_i! - $$ - """ - def __init__(self,link=None): - self._analytical = None - if not link: - link = link_functions.Log() - super(Poisson, self).__init__(link) - - def _mass(self,gp,obs): - """ - Mass (or density) function - """ - return stats.poisson.pmf(obs,self.link.inv_transf(gp)) - - def _percentile(self,x,gp,*args): #TODO *args - return stats.poisson.ppf(x,self.link.inv_transf(gp)) - - def _nlog_mass(self,gp,obs): - """ - Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted - """ - return self.link.inv_transf(gp) - obs * np.log(self.link.inv_transf(gp)) + np.log(special.gamma(obs+1)) - - def _dnlog_mass_dgp(self,gp,obs): - #return self.link.dinv_transf_df(gp) * (obs/self.link.inv_transf(gp) - 1) - return self.link.dinv_transf_df(gp) * (1. - obs/self.link.inv_transf(gp)) - - def _d2nlog_mass_dgp2(self,gp,obs): - d2_df = self.link.d2inv_transf_df2(gp) - inv_transf = self.link.inv_transf(gp) - #return obs * ( d2_df/inv_transf - (self.link.dinv_transf_df(gp)/inv_transf)**2 ) - d2_df - return obs * ((self.link.dinv_transf_df(gp)/inv_transf)**2 - d2_df/inv_transf) + d2_df - - def _dnlog_mass_dobs(self,obs,gp): #TODO not needed - #return np.log(self.link.inv_transf(gp)) - special.psi(obs+1) - return special.psi(obs+1) - np.log(self.link.inv_transf(gp)) - - def _d2nlog_mass_dobs2(self,obs,gp=None): #TODO not needed - #return -special.polygamma(1,obs) - return special.polygamma(1,obs) - - def _d2nlog_mass_dcross(self,obs,gp): #TODO not needed - #return self.link.dinv_transf_df(gp)/self.link.inv_transf(gp) - return -self.link.dinv_transf_df(gp)/self.link.inv_transf(gp) - - def _mean(self,gp): - """ - Mass (or density) function - """ - return self.link.inv_transf(gp) - - def _variance(self,gp): - return self.link.inv_transf(gp) - - def _dmean_dgp(self,gp): - return self.link.dinv_transf_df(gp) - - def _d2mean_dgp2(self,gp): - return self.link.dinv_transf_df(gp) - - def _variance(self,gp): - """ - Mass (or density) function - """ - return self.link.inv_transf(gp) - - def _variance(self,gp): - return self.link.inv_transf(gp) - - def _dvariance_dgp(self,gp): - return self.link.dinv_transf_df(gp) - - def _d2variance_dgp2(self,gp): - return self.link.dinv_transf_df(gp) - - - - diff --git a/GPy/likelihoods/poisson_likelihood.py b/GPy/likelihoods/poisson_likelihood.py new file mode 100644 index 00000000..9466d4de --- /dev/null +++ b/GPy/likelihoods/poisson_likelihood.py @@ -0,0 +1,91 @@ +# Copyright (c) 2012, 2013 Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from scipy import stats,special +import scipy as sp +import pylab as pb +from ..util.plot import gpplot +from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf +import link_functions +from likelihood_functions import LikelihoodFunction + +class Poisson(LikelihoodFunction): + """ + Poisson likelihood + Y is expected to take values in {0,1,2,...} + ----- + $$ + L(x) = \exp(\lambda) * \lambda**Y_i / Y_i! + $$ + """ + def __init__(self,link=None): + self.discrete = True + self.support_limits = (0,np.inf) + self._analytical = None + if not link: + link = link_functions.Log() + super(Poisson, self).__init__(link) + + def _mass(self,gp,obs): + """ + Mass (or density) function + """ + return stats.poisson.pmf(obs,self.link.inv_transf(gp)) + + def _percentile(self,x,gp,*args): #TODO *args + return stats.poisson.ppf(x,self.link.inv_transf(gp)) + + def _nlog_mass(self,gp,obs): + """ + Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted + """ + return self.link.inv_transf(gp) - obs * np.log(self.link.inv_transf(gp)) + np.log(special.gamma(obs+1)) + + def _dnlog_mass_dgp(self,gp,obs): + return self.link.dinv_transf_df(gp) * (1. - obs/self.link.inv_transf(gp)) + + def _d2nlog_mass_dgp2(self,gp,obs): + d2_df = self.link.d2inv_transf_df2(gp) + inv_transf = self.link.inv_transf(gp) + return obs * ((self.link.dinv_transf_df(gp)/inv_transf)**2 - d2_df/inv_transf) + d2_df + + def _dnlog_mass_dobs(self,obs,gp): #TODO not needed + return special.psi(obs+1) - np.log(self.link.inv_transf(gp)) + + def _d2nlog_mass_dobs2(self,obs,gp=None): #TODO not needed + return special.polygamma(1,obs) + + def _d2nlog_mass_dcross(self,obs,gp): #TODO not needed + return -self.link.dinv_transf_df(gp)/self.link.inv_transf(gp) + + def _mean(self,gp): + """ + Mass (or density) function + """ + return self.link.inv_transf(gp) + + def _variance(self,gp): + return self.link.inv_transf(gp) + + def _dmean_dgp(self,gp): + return self.link.dinv_transf_df(gp) + + def _d2mean_dgp2(self,gp): + return self.link.d2inv_transf_df2(gp) + + def _variance(self,gp): + """ + Mass (or density) function + """ + return self.link.inv_transf(gp) + + def _variance(self,gp): + return self.link.inv_transf(gp) + + def _dvariance_dgp(self,gp): + return self.link.dinv_transf_df(gp) + + def _d2variance_dgp2(self,gp): + return self.link.d2inv_transf_df2(gp) From 7361d311c143e2057299297cda33fd0c18f488e0 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Sun, 30 Jun 2013 23:36:37 +0100 Subject: [PATCH 07/49] further corrections --- GPy/likelihoods/binomial_likelihood.py | 62 ++++++++++++++++++++----- GPy/likelihoods/likelihood_functions.py | 11 +++-- GPy/likelihoods/poisson_likelihood.py | 18 ++++--- GPy/models/gp_classification.py | 3 +- 4 files changed, 68 insertions(+), 26 deletions(-) diff --git a/GPy/likelihoods/binomial_likelihood.py b/GPy/likelihoods/binomial_likelihood.py index 420a9607..15b8067e 100644 --- a/GPy/likelihoods/binomial_likelihood.py +++ b/GPy/likelihoods/binomial_likelihood.py @@ -23,17 +23,15 @@ class Binomial(LikelihoodFunction): def __init__(self,link=None): self.discrete = True self.support_limits = (0,1) - self._analytical = link_functions.Probit + if not link: - link = self._analytical + link = link_functions.Probit + if isinstance(link,link_functions.Probit): + self.analytical_moments = True + else: + self.analytical_moments = False super(Binomial, self).__init__(link) - def _mass(self,gp,obs): - pass - - def _nlog_mass(self,gp,obs): - pass - def _preprocess_values(self,Y): """ Check if the values of the observations correspond to the values @@ -66,12 +64,51 @@ class Binomial(LikelihoodFunction): def _predictive_mean_analytical(self,mu,sigma): return stats.norm.cdf(mu/np.sqrt(1+sigma**2)) - def predictive_values(self,mu,var): + def _mass(self,gp,obs): + #NOTE obs must be in {0,1} + p = self.link.inv_transf(gp) + return p**obs * (1.-p)**(1.-obs) + + def _nlog_mass(self,gp,obs): + p = self.link.inv_transf(gp) + return obs*np.log(p) + (1.-obs)*np.log(1-p) + + def _dnlog_mass_dgp(self,gp,obs): + p = self.link.inv_transf(gp) + dp = self.link.dinv_transf_df(gp) + return obs/p * dp - (1.-obs)/(1.-p) * dp + + def _d2nlog_mass_dgp2(self,gp,obs): + p = self.link.inv_transf(gp) + return (obs/p + (1.-obs)/(1.-p))*self.lind.d2inv_transf_df(gp) + ((1.-obs)/(1.-p)**2-obs/p**2)*self.link.dinv_transf_df(gp) + + def _mean(self,gp): """ - Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction - :param mu: mean of the latent variable - :param var: variance of the latent variable + Mass (or density) function """ + return self.link.inv_transf(gp) + + def _dmean_dgp(self,gp): + return self.link.dinv_transf_df(gp) + + def _d2mean_dgp2(self,gp): + return self.link.d2inv_transf_df2(gp) + + def _variance(self,gp): + """ + Mass (or density) function + """ + p = self.link.inv_transf(gp) + return p*(1-p) + + def _dvariance_dgp(self,gp): + return self.link.dinv_transf_df(gp)*(1. - 2.*self.link.inv_transf(gp)) + + def _d2variance_dgp2(self,gp): + return self.link.d2inv_transf_df2(gp)*(1. - 2.*self.link.inv_transf(gp)) - 2*self.link.dinv_transf_df(gp)**2 + + """ + def predictive_values(self,mu,var): #TODO remove mu = mu.flatten() var = var.flatten() #mean = stats.norm.cdf(mu/np.sqrt(1+var)) @@ -83,3 +120,4 @@ class Binomial(LikelihoodFunction): p_025 = self._predictive_mean_analytical(norm_025,np.sqrt(var)) p_975 = self._predictive_mean_analytical(norm_975,np.sqrt(var)) return mean[:,None], np.nan*var, p_025[:,None], p_975[:,None] # TODO: var + """ diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py index cb0be86a..9b132d72 100644 --- a/GPy/likelihoods/likelihood_functions.py +++ b/GPy/likelihoods/likelihood_functions.py @@ -19,12 +19,13 @@ class LikelihoodFunction(object): ..Note:: Y values allowed depend on the LikelihoodFunction used """ def __init__(self,link): - if link == self._analytical: + #assert isinstance(link,link_functions.LinkFunction), "link is not a valid LinkFunction."#FIXME + self.link = link + + if self.analytical_moments: self.moments_match = self._moments_match_analytical self.predictive_mean = self._predictive_mean_analytical else: - assert isinstance(link,link_functions.LinkFunction) - self.link = link self.moments_match = self._moments_match_numerical self.predictive_mean = self._predictive_mean_numerical @@ -258,6 +259,7 @@ class LikelihoodFunction(object): maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_variance_scaled,x0=self._variance(mu),fprime=self._dnlog_exp_conditional_variance_dgp,fhess=self._d2nlog_exp_conditional_variance_dgp2,args=(mu,sigma)) exp_var = np.exp(-self._nlog_exp_conditional_variance_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_variance_dgp2(maximum,mu,sigma))*sigma) + """ pb.figure() x = np.array([mu + step*sigma for step in np.linspace(-7,7,100)]) f = np.array([np.exp(-self._nlog_exp_conditional_variance_scaled(xi,mu,sigma))/np.sqrt(2*np.pi*sigma**2) for xi in x]) @@ -267,6 +269,7 @@ class LikelihoodFunction(object): k = np.exp(-self._nlog_exp_conditional_variance_scaled(maximum,mu,sigma))*np.sqrt(sigma2)/np.sqrt(sigma**2) pb.plot(x,f2*exp_var,'r--') pb.vlines(maximum,0,f.max()) + """ #V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star)**2 ) exp_exp2 = self._predictive_mean_sq(mu,sigma) @@ -323,6 +326,8 @@ class LikelihoodFunction(object): def predictive_values(self,mu,var,sample=True,sample_size=5000): """ Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction + :param mu: mean of the latent variable + :param var: variance of the latent variable """ if isinstance(mu,float) or isinstance(mu,int): mu = [mu] diff --git a/GPy/likelihoods/poisson_likelihood.py b/GPy/likelihoods/poisson_likelihood.py index 9466d4de..fba89331 100644 --- a/GPy/likelihoods/poisson_likelihood.py +++ b/GPy/likelihoods/poisson_likelihood.py @@ -23,9 +23,8 @@ class Poisson(LikelihoodFunction): def __init__(self,link=None): self.discrete = True self.support_limits = (0,np.inf) - self._analytical = None - if not link: - link = link_functions.Log() + + self.analytical_moments = False super(Poisson, self).__init__(link) def _mass(self,gp,obs): @@ -34,15 +33,14 @@ class Poisson(LikelihoodFunction): """ return stats.poisson.pmf(obs,self.link.inv_transf(gp)) - def _percentile(self,x,gp,*args): #TODO *args - return stats.poisson.ppf(x,self.link.inv_transf(gp)) - def _nlog_mass(self,gp,obs): """ Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted """ return self.link.inv_transf(gp) - obs * np.log(self.link.inv_transf(gp)) + np.log(special.gamma(obs+1)) + #def _preprocess_values(self,Y): #TODO + def _dnlog_mass_dgp(self,gp,obs): return self.link.dinv_transf_df(gp) * (1. - obs/self.link.inv_transf(gp)) @@ -66,8 +64,8 @@ class Poisson(LikelihoodFunction): """ return self.link.inv_transf(gp) - def _variance(self,gp): - return self.link.inv_transf(gp) + #def _variance(self,gp): + # return self.link.inv_transf(gp) def _dmean_dgp(self,gp): return self.link.dinv_transf_df(gp) @@ -81,8 +79,8 @@ class Poisson(LikelihoodFunction): """ return self.link.inv_transf(gp) - def _variance(self,gp): - return self.link.inv_transf(gp) + #def _variance(self,gp): + # return self.link.inv_transf(gp) def _dvariance_dgp(self,gp): return self.link.dinv_transf_df(gp) diff --git a/GPy/models/gp_classification.py b/GPy/models/gp_classification.py index c6012988..2e0d9c4a 100644 --- a/GPy/models/gp_classification.py +++ b/GPy/models/gp_classification.py @@ -31,7 +31,8 @@ class GPClassification(GP): kernel = kern.rbf(X.shape[1]) if likelihood is None: - distribution = likelihoods.likelihood_functions.Binomial() + #distribution = GPy.likelihoods.binomial_likelihood.Binomial(link=link) + distribution = likelihoods.binomial_likelihood.Binomial() likelihood = likelihoods.EP(Y, distribution) elif Y is not None: if not all(Y.flatten() == likelihood.data.flatten()): From 405444246259850b2fc3ca39a391d4fd7fa56ce5 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Mon, 1 Jul 2013 20:31:16 +0100 Subject: [PATCH 08/49] more files --- GPy/examples/regression.py | 6 +- GPy/likelihoods/__init__.py | 5 +- GPy/likelihoods/binomial_likelihood.py | 123 --------- GPy/likelihoods/constructors.py | 42 +++ GPy/likelihoods/likelihood_functions.py | 348 ------------------------ GPy/likelihoods/link_functions.py | 100 ------- GPy/likelihoods/poisson_likelihood.py | 89 ------ GPy/models/gp_classification.py | 2 +- 8 files changed, 48 insertions(+), 667 deletions(-) delete mode 100644 GPy/likelihoods/binomial_likelihood.py create mode 100644 GPy/likelihoods/constructors.py delete mode 100644 GPy/likelihoods/likelihood_functions.py delete mode 100644 GPy/likelihoods/link_functions.py delete mode 100644 GPy/likelihoods/poisson_likelihood.py diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py index 21b435e7..452167ce 100644 --- a/GPy/examples/regression.py +++ b/GPy/examples/regression.py @@ -83,7 +83,7 @@ def coregionalisation_toy2(optim_iters=100): Y = np.vstack((Y1,Y2)) k1 = GPy.kern.rbf(1) + GPy.kern.bias(1) - k2 = GPy.kern.Coregionalise(2,1) + k2 = GPy.kern.coregionalise(2,1) k = k1.prod(k2,tensor=True) m = GPy.models.GPRegression(X,Y,kernel=k) m.constrain_fixed('.*rbf_var',1.) @@ -114,7 +114,7 @@ def coregionalisation_toy(optim_iters=100): Y = np.vstack((Y1,Y2)) k1 = GPy.kern.rbf(1) - k2 = GPy.kern.Coregionalise(2,2) + k2 = GPy.kern.coregionalise(2,2) k = k1.prod(k2,tensor=True) m = GPy.models.GPRegression(X,Y,kernel=k) m.constrain_fixed('.*rbf_var',1.) @@ -149,7 +149,7 @@ def coregionalisation_sparse(optim_iters=100): Z = np.hstack((np.random.rand(num_inducing,1)*8,np.random.randint(0,2,num_inducing)[:,None])) k1 = GPy.kern.rbf(1) - k2 = GPy.kern.Coregionalise(2,2) + k2 = GPy.kern.coregionalise(2,2) k = k1.prod(k2,tensor=True) + GPy.kern.white(2,0.001) m = GPy.models.SparseGPRegression(X,Y,kernel=k,Z=Z) diff --git a/GPy/likelihoods/__init__.py b/GPy/likelihoods/__init__.py index e058de79..4932bd40 100644 --- a/GPy/likelihoods/__init__.py +++ b/GPy/likelihoods/__init__.py @@ -1,6 +1,5 @@ from ep import EP from gaussian import Gaussian # TODO: from Laplace import Laplace -import likelihood_functions as functions -import binomial_likelihood -import poisson_likelihood +from constructors import * + diff --git a/GPy/likelihoods/binomial_likelihood.py b/GPy/likelihoods/binomial_likelihood.py deleted file mode 100644 index 15b8067e..00000000 --- a/GPy/likelihoods/binomial_likelihood.py +++ /dev/null @@ -1,123 +0,0 @@ -# Copyright (c) 2012, 2013 Ricardo Andrade -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -import numpy as np -from scipy import stats,special -import scipy as sp -import pylab as pb -from ..util.plot import gpplot -from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf -import link_functions -from likelihood_functions import LikelihoodFunction - -class Binomial(LikelihoodFunction): - """ - Probit likelihood - Y is expected to take values in {-1,1} - ----- - $$ - L(x) = \\Phi (Y_i*f_i) - $$ - """ - def __init__(self,link=None): - self.discrete = True - self.support_limits = (0,1) - - if not link: - link = link_functions.Probit - if isinstance(link,link_functions.Probit): - self.analytical_moments = True - else: - self.analytical_moments = False - super(Binomial, self).__init__(link) - - def _preprocess_values(self,Y): - """ - Check if the values of the observations correspond to the values - assumed by the likelihood function. - - ..Note:: Binary classification algorithm works better with classes {-1,1} - """ - Y_prep = Y.copy() - Y1 = Y[Y.flatten()==1].size - Y2 = Y[Y.flatten()==0].size - assert Y1 + Y2 == Y.size, 'Binomial likelihood is meant to be used only with outputs in {0,1}.' - Y_prep[Y.flatten() == 0] = -1 - return Y_prep - - def _moments_match_analytical(self,data_i,tau_i,v_i): - """ - Moments match of the marginal approximation in EP algorithm - - :param i: number of observation (int) - :param tau_i: precision of the cavity distribution (float) - :param v_i: mean/variance of the cavity distribution (float) - """ - z = data_i*v_i/np.sqrt(tau_i**2 + tau_i) - Z_hat = std_norm_cdf(z) - phi = std_norm_pdf(z) - mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i)) - sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat) - return Z_hat, mu_hat, sigma2_hat - - def _predictive_mean_analytical(self,mu,sigma): - return stats.norm.cdf(mu/np.sqrt(1+sigma**2)) - - def _mass(self,gp,obs): - #NOTE obs must be in {0,1} - p = self.link.inv_transf(gp) - return p**obs * (1.-p)**(1.-obs) - - def _nlog_mass(self,gp,obs): - p = self.link.inv_transf(gp) - return obs*np.log(p) + (1.-obs)*np.log(1-p) - - def _dnlog_mass_dgp(self,gp,obs): - p = self.link.inv_transf(gp) - dp = self.link.dinv_transf_df(gp) - return obs/p * dp - (1.-obs)/(1.-p) * dp - - def _d2nlog_mass_dgp2(self,gp,obs): - p = self.link.inv_transf(gp) - return (obs/p + (1.-obs)/(1.-p))*self.lind.d2inv_transf_df(gp) + ((1.-obs)/(1.-p)**2-obs/p**2)*self.link.dinv_transf_df(gp) - - def _mean(self,gp): - """ - Mass (or density) function - """ - return self.link.inv_transf(gp) - - def _dmean_dgp(self,gp): - return self.link.dinv_transf_df(gp) - - def _d2mean_dgp2(self,gp): - return self.link.d2inv_transf_df2(gp) - - def _variance(self,gp): - """ - Mass (or density) function - """ - p = self.link.inv_transf(gp) - return p*(1-p) - - def _dvariance_dgp(self,gp): - return self.link.dinv_transf_df(gp)*(1. - 2.*self.link.inv_transf(gp)) - - def _d2variance_dgp2(self,gp): - return self.link.d2inv_transf_df2(gp)*(1. - 2.*self.link.inv_transf(gp)) - 2*self.link.dinv_transf_df(gp)**2 - - """ - def predictive_values(self,mu,var): #TODO remove - mu = mu.flatten() - var = var.flatten() - #mean = stats.norm.cdf(mu/np.sqrt(1+var)) - mean = self._predictive_mean_analytical(mu,np.sqrt(var)) - norm_025 = [stats.norm.ppf(.025,m,v) for m,v in zip(mu,var)] - norm_975 = [stats.norm.ppf(.975,m,v) for m,v in zip(mu,var)] - #p_025 = stats.norm.cdf(norm_025/np.sqrt(1+var)) - #p_975 = stats.norm.cdf(norm_975/np.sqrt(1+var)) - p_025 = self._predictive_mean_analytical(norm_025,np.sqrt(var)) - p_975 = self._predictive_mean_analytical(norm_975,np.sqrt(var)) - return mean[:,None], np.nan*var, p_025[:,None], p_975[:,None] # TODO: var - """ diff --git a/GPy/likelihoods/constructors.py b/GPy/likelihoods/constructors.py new file mode 100644 index 00000000..0b995894 --- /dev/null +++ b/GPy/likelihoods/constructors.py @@ -0,0 +1,42 @@ +# Copyright (c) 2013, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import numpy as np +from likelihood_functions import LikelihoodFunction +import noise_models +import link_functions + +def binomial(link=None): + """ + Construct a binomial likelihood + + :param link: a GPy link function + """ + #self.discrete = True + #self.support_limits = (0,1) + + if link is None: + link = link_functions.Probit() + else: + assert isinstance(link,link_functions.LinkFunction), 'link function is not valid.' + + if isinstance(link,link_functions.Probit): + analytical_moments = True + else: + analytical_moments = False + return noise_models.binomial_likelihood.Binomial(link,analytical_moments) + + +def poisson(link=None): + """ + Construct a Poisson likelihood + + :param link: a GPy link function + """ + if link is None: + link = link_functions.Log_ex_1() + else: + assert isinstance(link,link_functions.LinkFunction), 'link function is not valid.' + #assert isinstance(link,link_functions.LinkFunction), 'link function is not valid.' + analytical_moments = False + return noise_models.poisson_likelihood.Poisson(link,analytical_moments) diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py deleted file mode 100644 index 9b132d72..00000000 --- a/GPy/likelihoods/likelihood_functions.py +++ /dev/null @@ -1,348 +0,0 @@ -# Copyright (c) 2012, 2013 Ricardo Andrade -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -import numpy as np -from scipy import stats,special -import scipy as sp -import pylab as pb -from ..util.plot import gpplot -from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf -import link_functions - - -class LikelihoodFunction(object): - """ - Likelihood class for doing Expectation propagation - - :param Y: observed output (Nx1 numpy.darray) - ..Note:: Y values allowed depend on the LikelihoodFunction used - """ - def __init__(self,link): - #assert isinstance(link,link_functions.LinkFunction), "link is not a valid LinkFunction."#FIXME - self.link = link - - if self.analytical_moments: - self.moments_match = self._moments_match_analytical - self.predictive_mean = self._predictive_mean_analytical - else: - self.moments_match = self._moments_match_numerical - self.predictive_mean = self._predictive_mean_numerical - - def _preprocess_values(self,Y): - """ - In case it is needed, this function assess the output values or makes any pertinent transformation on them. - - :param Y: observed output (Nx1 numpy.darray) - """ - return Y - - def _product(self,gp,obs,mu,sigma): - """ - Product between the cavity distribution and a likelihood factor. - - :param gp: latent variable - :param obs: observed output - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - """ - return stats.norm.pdf(gp,loc=mu,scale=sigma) * self._mass(gp,obs) - - def _nlog_product_scaled(self,gp,obs,mu,sigma): - """ - Negative log-product between the cavity distribution and a likelihood factor. - ..Note:: The constant term in the Gaussian distribution is ignored. - - :param gp: latent variable - :param obs: observed output - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - """ - return .5*((gp-mu)/sigma)**2 + self._nlog_mass(gp,obs) - - def _dnlog_product_dgp(self,gp,obs,mu,sigma): - """ - Derivative wrt latent variable of the log-product between the cavity distribution and a likelihood factor. - - :param gp: latent variable - :param obs: observed output - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - """ - return (gp - mu)/sigma**2 + self._dnlog_mass_dgp(gp,obs) - - def _d2nlog_product_dgp2(self,gp,obs,mu,sigma): - """ - Second derivative wrt latent variable of the log-product between the cavity distribution and a likelihood factor. - - :param gp: latent variable - :param obs: observed output - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - """ - return 1./sigma**2 + self._d2nlog_mass_dgp2(gp,obs) - - def _product_mode(self,obs,mu,sigma): - """ - Newton's CG method to find the mode in _product (cavity x likelihood factor). - - :param obs: observed output - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - """ - return sp.optimize.fmin_ncg(self._nlog_product_scaled,x0=mu,fprime=self._dnlog_product_dgp,fhess=self._d2nlog_product_dgp2,args=(obs,mu,sigma)) - - def _moments_match_analytical(self,obs,tau,v): - """ - If available, this function computes the moments analytically. - """ - pass - - def _moments_match_numerical(self,obs,tau,v): - """ - Lapace approximation to calculate the moments. - - :param obs: observed output - :param tau: cavity distribution 1st natural parameter (precision) - :param v: cavity distribution 2nd natural paramenter (mu*precision) - """ - mu = v/tau - mu_hat = self._product_mode(obs,mu,np.sqrt(1./tau)) - sigma2_hat = 1./(tau + self._d2nlog_mass_dgp2(mu_hat,obs)) - Z_hat = np.exp(-.5*tau*(mu_hat-mu)**2) * self._mass(mu_hat,obs)*np.sqrt(tau*sigma2_hat) - return Z_hat,mu_hat,sigma2_hat - - def _nlog_conditional_mean_scaled(self,gp,mu,sigma): - """ - Negative logarithm of the l.v.'s predictive distribution times the output's mean given the l.v. - - :param gp: latent variable - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - ..Note:: This function helps computing E(Y_star) = E(E(Y_star|f_star)) - """ - return .5*((gp - mu)/sigma)**2 - np.log(self._mean(gp)) - - def _dnlog_conditional_mean_dgp(self,gp,mu,sigma): - """ - Derivative of _nlog_conditional_mean_scaled wrt. l.v. - - :param gp: latent variable - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - """ - return (gp - mu)/sigma**2 - self._dmean_dgp(gp)/self._mean(gp) - - def _d2nlog_conditional_mean_dgp2(self,gp,mu,sigma): - """ - Second derivative of _nlog_conditional_mean_scaled wrt. l.v. - - :param gp: latent variable - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - """ - return 1./sigma**2 - self._d2mean_dgp2(gp)/self._mean(gp) + (self._dmean_dgp(gp)/self._mean(gp))**2 - - def _nlog_exp_conditional_variance_scaled(self,gp,mu,sigma): - """ - Negative logarithm of the l.v.'s predictive distribution times the output's variance given the l.v. - - :param gp: latent variable - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - ..Note:: This function helps computing E(V(Y_star|f_star)) - """ - return .5*((gp - mu)/sigma)**2 - np.log(self._variance(gp)) - - def _dnlog_exp_conditional_variance_dgp(self,gp,mu,sigma): - """ - Derivative of _nlog_exp_conditional_variance_scaled wrt. l.v. - - :param gp: latent variable - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - """ - return (gp - mu)/sigma**2 - self._dvariance_dgp(gp)/self._variance(gp) - - def _d2nlog_exp_conditional_variance_dgp2(self,gp,mu,sigma): - """ - Second derivative of _nlog_exp_conditional_variance_scaled wrt. l.v. - - :param gp: latent variable - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - """ - return 1./sigma**2 - self._d2variance_dgp2(gp)/self._variance(gp) + (self._dvariance_dgp(gp)/self._variance(gp))**2 - - def _nlog_exp_conditional_mean_sq_scaled(self,gp,mu,sigma): - """ - Negative logarithm of the l.v.'s predictive distribution times the output's mean squared given the l.v. - - :param gp: latent variable - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - ..Note:: This function helps computing E( E(Y_star|f_star)**2 ) - """ - return .5*((gp - mu)/sigma)**2 - 2*np.log(self._mean(gp)) - - def _dnlog_exp_conditional_mean_sq_dgp(self,gp,mu,sigma): - """ - Derivative of _nlog_exp_conditional_mean_sq_scaled wrt. l.v. - - :param gp: latent variable - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - """ - return (gp - mu)/sigma**2 - 2*self._dmean_dgp(gp)/self._mean(gp) - - def _d2nlog_exp_conditional_mean_sq_dgp2(self,gp,mu,sigma): - """ - Second derivative of _nlog_exp_conditional_mean_sq_scaled wrt. l.v. - - :param gp: latent variable - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - """ - return 1./sigma**2 - 2*( self._d2mean_dgp2(gp)/self._mean(gp) - (self._dmean_dgp(gp)/self._mean(gp))**2 ) - - def _predictive_mean_analytical(self,mu,sigma): - """ - If available, this function computes the predictive mean analytically. - """ - pass - - def _predictive_mean_numerical(self,mu,sigma): - """ - Laplace approximation to the predictive mean: E(Y_star) = E( E(Y_star|f_star) ) - - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - """ - maximum = sp.optimize.fmin_ncg(self._nlog_conditional_mean_scaled,x0=self._mean(mu),fprime=self._dnlog_conditional_mean_dgp,fhess=self._d2nlog_conditional_mean_dgp2,args=(mu,sigma)) - mean = np.exp(-self._nlog_conditional_mean_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_conditional_mean_dgp2(maximum,mu,sigma))*sigma) - """ - pb.figure() - x = np.array([mu + step*sigma for step in np.linspace(-7,7,100)]) - f = np.array([np.exp(-self._nlog_conditional_mean_scaled(xi,mu,sigma))/np.sqrt(2*np.pi*sigma**2) for xi in x]) - pb.plot(x,f,'b-') - sigma2 = 1./self._d2nlog_conditional_mean_dgp2(maximum,mu,sigma) - f2 = np.exp(-.5*(x-maximum)**2/sigma2)/np.sqrt(2*np.pi*sigma2) - k = np.exp(-self._nlog_conditional_mean_scaled(maximum,mu,sigma))*np.sqrt(sigma2)/np.sqrt(sigma**2) - pb.plot(x,f2*mean,'r-') - pb.vlines(maximum,0,f.max()) - """ - return mean - - def _predictive_mean_sq(self,mu,sigma): - """ - Laplace approximation to the predictive mean squared: E(Y_star**2) = E( E(Y_star|f_star)**2 ) - - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - """ - maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_mean_sq_scaled,x0=self._mean(mu),fprime=self._dnlog_exp_conditional_mean_sq_dgp,fhess=self._d2nlog_exp_conditional_mean_sq_dgp2,args=(mu,sigma)) - mean_squared = np.exp(-self._nlog_exp_conditional_mean_sq_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_mean_sq_dgp2(maximum,mu,sigma))*sigma) - return mean_squared - - def predictive_variance(self,mu,sigma,predictive_mean=None): - """ - Laplace approximation to the predictive variance: V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) ) - - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - :predictive_mean: output's predictive mean, if None _predictive_mean function will be called. - """ - # E( V(Y_star|f_star) ) - maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_variance_scaled,x0=self._variance(mu),fprime=self._dnlog_exp_conditional_variance_dgp,fhess=self._d2nlog_exp_conditional_variance_dgp2,args=(mu,sigma)) - exp_var = np.exp(-self._nlog_exp_conditional_variance_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_variance_dgp2(maximum,mu,sigma))*sigma) - - """ - pb.figure() - x = np.array([mu + step*sigma for step in np.linspace(-7,7,100)]) - f = np.array([np.exp(-self._nlog_exp_conditional_variance_scaled(xi,mu,sigma))/np.sqrt(2*np.pi*sigma**2) for xi in x]) - pb.plot(x,f,'b-') - sigma2 = 1./self._d2nlog_exp_conditional_variance_dgp2(maximum,mu,sigma) - f2 = np.exp(-.5*(x-maximum)**2/sigma2)/np.sqrt(2*np.pi*sigma2) - k = np.exp(-self._nlog_exp_conditional_variance_scaled(maximum,mu,sigma))*np.sqrt(sigma2)/np.sqrt(sigma**2) - pb.plot(x,f2*exp_var,'r--') - pb.vlines(maximum,0,f.max()) - """ - - #V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star)**2 ) - exp_exp2 = self._predictive_mean_sq(mu,sigma) - if predictive_mean is None: - predictive_mean = self.predictive_mean(mu,sigma) - var_exp = exp_exp2 - predictive_mean**2 - return exp_var + var_exp - - def _nlog_joint_predictive_scaled(self,x,mu,sigma): - """ - Negative logarithm of the joint predictive distribution (latent variable and output). - - :param x: tuple (latent variable,output) - :param mu: latent variable's predictive mean - :param sigma: latent variable's predictive standard deviation - """ - return self._nlog_product_scaled(x[0],x[1],mu,sigma) - - def _gradient_nlog_joint_predictive(self,x,mu,sigma): - """ - Gradient of _nlog_joint_predictive_scaled. - - :param x: tuple (latent variable,output) - :param mu: latent variable's predictive mean - :param sigma: latent variable's predictive standard deviation - ..Note: Only avilable when the output is continuous - """ - assert not self.discrete, "Gradient not available for discrete outputs." - return np.array((self._dnlog_product_dgp(gp=x[0],obs=x[1],mu=mu,sigma=sigma),self._dnlog_mass_dobs(obs=x[1],gp=x[0]))) - - def _hessian_nlog_joint_predictive(self,x,mu,sigma): - """ - Hessian of _nlog_joint_predictive_scaled. - - :param x: tuple (latent variable,output) - :param mu: latent variable's predictive mean - :param sigma: latent variable's predictive standard deviation - ..Note: Only avilable when the output is continuous - """ - assert not self.discrete, "Hessian not available for discrete outputs." - cross_derivative = self._d2nlog_mass_dcross(gp=x[0],obs=x[1]) - return np.array((self._d2nlog_product_dgp2(gp=x[0],obs=x[1],mu=mu,sigma=sigma),cross_derivative,cross_derivative,self._d2nlog_mass_dobs2(obs=x[1],gp=x[0]))).reshape(2,2) - - def _joint_predictive_mode(self,mu,sigma): - """ - Negative logarithm of the joint predictive distribution (latent variable and output). - - :param x: tuple (latent variable,output) - :param mu: latent variable's predictive mean - :param sigma: latent variable's predictive standard deviation - """ - return sp.optimize.fmin_ncg(self._nlog_joint_predictive_scaled,x0=(mu,self.link.inv_transf(mu)),fprime=self._gradient_nlog_joint_predictive,fhess=self._hessian_nlog_joint_predictive,args=(mu,sigma)) - - def predictive_values(self,mu,var,sample=True,sample_size=5000): - """ - Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction - :param mu: mean of the latent variable - :param var: variance of the latent variable - """ - if isinstance(mu,float) or isinstance(mu,int): - mu = [mu] - var = [var] - pred_mean = [] - pred_var = [] - q1 = [] - q3 = [] - for m,s in zip(mu,np.sqrt(var)): - pred_mean.append(self.predictive_mean(m,s)) - pred_var.append(self.predictive_variance(m,s,pred_mean[-1])) - q1.append(self.predictive_mean(stats.norm.ppf(.025,m,s**2),s)) - q3.append(self.predictive_mean(stats.norm.ppf(.975,m,s**2),s)) - pred_mean = np.array(pred_mean)[:,None] - pred_var = np.array(pred_var)[:,None] - q1 = np.array(q1)[:,None] - q3 = np.array(q3)[:,None] - return pred_mean, pred_var, q1, q3 diff --git a/GPy/likelihoods/link_functions.py b/GPy/likelihoods/link_functions.py deleted file mode 100644 index a6434bfb..00000000 --- a/GPy/likelihoods/link_functions.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright (c) 2012, 2013 Ricardo Andrade -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -import numpy as np -from scipy import stats -import scipy as sp -import pylab as pb -from ..util.plot import gpplot -from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf - -class LinkFunction(object): - """ - Link function class for doing non-Gaussian likelihoods approximation - - :param Y: observed output (Nx1 numpy.darray) - ..Note:: Y values allowed depend on the likelihood_function used - """ - def __init__(self): - pass - -class Identity(LinkFunction): - """ - $$ - g(f) = f - $$ - """ - def transf(self,mu): - return mu - - def inv_transf(self,f): - return f - - def dinv_transf_df(self,f): - return 1. - - def d2inv_transf_df2(self,f): - return 0 - - -class Probit(LinkFunction): - """ - $$ - g(f) = \\Phi^{-1} (mu) - $$ - """ - def transf(self,mu): - return inv_std_norm_cdf(mu) - - def inv_transf(self,f): - return std_norm_cdf(f) - - def dinv_transf_df(self,f): - return std_norm_pdf(f) - - def d2inv_transf_df2(self,f): - return -f * std_norm_pdf(f) - -class Log(LinkFunction): - """ - $$ - g(f) = \log(\mu) - $$ - """ - def transf(self,mu): - return np.log(mu) - - def inv_transf(self,f): - return np.exp(f) - - def dinv_transf_df(self,f): - return np.exp(f) - - def d2inv_transf_df2(self,f): - return np.exp(f) - -class Log_ex_1(LinkFunction): - """ - $$ - g(f) = \log(\exp(\mu) - 1) - $$ - """ - def transf(self,mu): - """ - function: output space -> latent space - """ - return np.log(np.exp(mu) - 1) - - def inv_transf(self,f): - """ - function: latent space -> output space - """ - return np.log(np.exp(f)+1) - - def dinv_transf_df(self,f): - return np.exp(f)/(1.+np.exp(f)) - - def d2inv_transf_df2(self,f): - aux = np.exp(f)/(1.+np.exp(f)) - return aux*(1.-aux) diff --git a/GPy/likelihoods/poisson_likelihood.py b/GPy/likelihoods/poisson_likelihood.py deleted file mode 100644 index fba89331..00000000 --- a/GPy/likelihoods/poisson_likelihood.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (c) 2012, 2013 Ricardo Andrade -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -import numpy as np -from scipy import stats,special -import scipy as sp -import pylab as pb -from ..util.plot import gpplot -from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf -import link_functions -from likelihood_functions import LikelihoodFunction - -class Poisson(LikelihoodFunction): - """ - Poisson likelihood - Y is expected to take values in {0,1,2,...} - ----- - $$ - L(x) = \exp(\lambda) * \lambda**Y_i / Y_i! - $$ - """ - def __init__(self,link=None): - self.discrete = True - self.support_limits = (0,np.inf) - - self.analytical_moments = False - super(Poisson, self).__init__(link) - - def _mass(self,gp,obs): - """ - Mass (or density) function - """ - return stats.poisson.pmf(obs,self.link.inv_transf(gp)) - - def _nlog_mass(self,gp,obs): - """ - Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted - """ - return self.link.inv_transf(gp) - obs * np.log(self.link.inv_transf(gp)) + np.log(special.gamma(obs+1)) - - #def _preprocess_values(self,Y): #TODO - - def _dnlog_mass_dgp(self,gp,obs): - return self.link.dinv_transf_df(gp) * (1. - obs/self.link.inv_transf(gp)) - - def _d2nlog_mass_dgp2(self,gp,obs): - d2_df = self.link.d2inv_transf_df2(gp) - inv_transf = self.link.inv_transf(gp) - return obs * ((self.link.dinv_transf_df(gp)/inv_transf)**2 - d2_df/inv_transf) + d2_df - - def _dnlog_mass_dobs(self,obs,gp): #TODO not needed - return special.psi(obs+1) - np.log(self.link.inv_transf(gp)) - - def _d2nlog_mass_dobs2(self,obs,gp=None): #TODO not needed - return special.polygamma(1,obs) - - def _d2nlog_mass_dcross(self,obs,gp): #TODO not needed - return -self.link.dinv_transf_df(gp)/self.link.inv_transf(gp) - - def _mean(self,gp): - """ - Mass (or density) function - """ - return self.link.inv_transf(gp) - - #def _variance(self,gp): - # return self.link.inv_transf(gp) - - def _dmean_dgp(self,gp): - return self.link.dinv_transf_df(gp) - - def _d2mean_dgp2(self,gp): - return self.link.d2inv_transf_df2(gp) - - def _variance(self,gp): - """ - Mass (or density) function - """ - return self.link.inv_transf(gp) - - #def _variance(self,gp): - # return self.link.inv_transf(gp) - - def _dvariance_dgp(self,gp): - return self.link.dinv_transf_df(gp) - - def _d2variance_dgp2(self,gp): - return self.link.d2inv_transf_df2(gp) diff --git a/GPy/models/gp_classification.py b/GPy/models/gp_classification.py index 2e0d9c4a..d1cf2e00 100644 --- a/GPy/models/gp_classification.py +++ b/GPy/models/gp_classification.py @@ -32,7 +32,7 @@ class GPClassification(GP): if likelihood is None: #distribution = GPy.likelihoods.binomial_likelihood.Binomial(link=link) - distribution = likelihoods.binomial_likelihood.Binomial() + distribution = likelihoods.binomial() likelihood = likelihoods.EP(Y, distribution) elif Y is not None: if not all(Y.flatten() == likelihood.data.flatten()): From b856c60d30c7fe33dfd4935b27b4bf42356558d5 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Tue, 2 Jul 2013 18:18:11 +0100 Subject: [PATCH 09/49] massive changes --- GPy/likelihoods/ep.py | 125 ++++++- GPy/likelihoods/noise_models/__init__.py | 4 + .../noise_models/binomial_likelihood.py | 111 ++++++ .../noise_models/likelihood_functions.py | 349 ++++++++++++++++++ .../noise_models/link_functions.py | 99 +++++ .../noise_models/poisson_likelihood.py | 92 +++++ 6 files changed, 770 insertions(+), 10 deletions(-) create mode 100644 GPy/likelihoods/noise_models/__init__.py create mode 100644 GPy/likelihoods/noise_models/binomial_likelihood.py create mode 100644 GPy/likelihoods/noise_models/likelihood_functions.py create mode 100644 GPy/likelihoods/noise_models/link_functions.py create mode 100644 GPy/likelihoods/noise_models/poisson_likelihood.py diff --git a/GPy/likelihoods/ep.py b/GPy/likelihoods/ep.py index 94f760e9..7e90755e 100644 --- a/GPy/likelihoods/ep.py +++ b/GPy/likelihoods/ep.py @@ -24,9 +24,18 @@ class EP(likelihood): #Initial values - Likelihood approximation parameters: #p(y|f) = t(f|tau_tilde,v_tilde) + #TODO restore self.tau_tilde = np.zeros(self.N) self.v_tilde = np.zeros(self.N) + #_gp = self.LikelihoodFunction.link.transf(self.data) + #_mean = self.LikelihoodFunction._mean(_gp) + #_variance = self.LikelihoodFunction._variance(_gp) + #self.tau_tilde = 1./_variance + #self.tau_tilde[_variance== 0] = 1. + #self.v_tilde = _mean*self.tau_tilde + + #initial values for the GP variables self.Y = np.zeros((self.N,1)) self.covariance_matrix = np.eye(self.N) @@ -38,16 +47,17 @@ class EP(likelihood): self.trYYT = 0. def restart(self): + #FIXME self.tau_tilde = np.zeros(self.N) self.v_tilde = np.zeros(self.N) - self.Y = np.zeros((self.N,1)) - self.covariance_matrix = np.eye(self.N) - self.precision = np.ones(self.N)[:,None] - self.Z = 0 - self.YYT = None - self.V = self.precision * self.Y - self.VVT_factor = self.V - self.trYYT = 0. + #self.Y = np.zeros((self.N,1)) + #self.covariance_matrix = np.eye(self.N) + #self.precision = np.ones(self.N)[:,None] + #self.Z = 0 + #self.YYT = None + #self.V = self.precision * self.Y + #self.VVT_factor = self.V + #self.trYYT = 0. def predictive_values(self,mu,var,full_cov): if full_cov: @@ -78,6 +88,8 @@ class EP(likelihood): self.VVT_factor = self.V self.trYYT = np.trace(self.YYT) + #a = kjkjkjkj + def fit_full(self,K): """ The expectation-propagation algorithm. @@ -117,15 +129,103 @@ class EP(likelihood): self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i] #Marginal moments self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.LikelihoodFunction.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) + + + #DELETE + """ + import pylab as pb + from scipy import stats + import scipy as sp + import link_functions + from constructors import * + + link = link_functions.Log_ex_1() + distribution = poisson(link=link) + gp = np.linspace(-3,50,100) + #distribution = binomial() + #gp = np.linspace(-3,3,100) + + y = self._transf_data[i] + tau_ = self.tau_[i] + v_ = self.v_[i] + sigma2_ = np.sqrt(1./tau_) + mu_ = v_/tau_ + + gaussian = stats.norm.pdf(gp,loc=mu_,scale=np.sqrt(sigma2_)) + non_gaussian = np.array([distribution._mass(gp_i,y) for gp_i in gp]) + prod = np.array([distribution._product(gp_i,y,mu_,np.sqrt(sigma2_)) for gp_i in gp]) + my_Z_hat,my_mu_hat,my_sigma2_hat = distribution.moments_match(y,tau_,v_) + proxy = stats.norm.pdf(gp,loc=my_mu_hat,scale=np.sqrt(my_sigma2_hat)) + + + new_sigma2_tilde = 1./self.tau_tilde[i] + new_mu_tilde = self.v_tilde[i]/self.tau_tilde[i] + new_Z_tilde = self.Z_hat[i]*np.sqrt(2*np.pi)*np.sqrt(sigma2_+new_sigma2_tilde)*np.exp(.5*(mu_-new_mu_tilde)**2/(sigma2_+new_sigma2_tilde)) + bad_gaussian = stats.norm.pdf(gp,self.v_tilde[i]/self.tau_tilde[i],np.sqrt(1./self.tau_tilde[i])) + new_gaussian = stats.norm.pdf(gp,new_mu_tilde,np.sqrt(new_sigma2_tilde))*new_Z_tilde + #new_gaussian = stats.norm.pdf(gp,_mu_tilde,np.sqrt(_sigma2_tilde))*_Z_tilde + + _sigma2_tilde = 1./(1./(my_sigma2_hat) - 1./sigma2_) + _mu_tilde = (my_mu_hat/my_sigma2_hat - mu_/sigma2_)*_sigma2_tilde + _Z_tilde = my_Z_hat*np.sqrt(2*np.pi)*np.sqrt(sigma2_+_sigma2_tilde)*np.exp(.5*(mu_ - _mu_tilde)**2/(sigma2_ + _sigma2_tilde)) + + fig1 = pb.figure(figsize=(15,5)) + ax1 = fig1.add_subplot(131) + ax1.grid(True) + #pb.plot(gp,bad_gaussian,'b--',linewidth=1.5) + #pb.plot(gp,non_gaussian,'b-',linewidth=1.5) + pb.plot(gp,new_gaussian,'r--',linewidth=1.5) + pb.title('Likelihood: $p(y_i|f_i)$',fontsize=22) + + ax2 = fig1.add_subplot(132) + ax2.grid(True) + pb.plot(gp,gaussian,'b-',linewidth=1.5) + pb.title('Cavity distribution: $q_{-i}(f_i)$',fontsize=22) + + ax3 = fig1.add_subplot(133) + ax3.grid(True) + pb.plot(gp,prod,'b--',linewidth=1.5) + + pb.plot(gp,proxy*my_Z_hat,'r-',linewidth=1.5) + + pb.title('Approximation: $\mathcal{N}(f_i|\hat{\mu}_i,\hat{\sigma}_i^2) \hat{Z}_i$',fontsize=22) + pb.legend(('Exact','Approximation'),frameon=False) + + print 'i',i + print 'v/tau _tilde', self.v_tilde[i], self.tau_tilde[i] + print 'v/tau _', self.v_[i], self.tau_[i] + print 'Z/mu/sigma2 _hat', self.Z_hat[i], mu_hat[i], sigma2_hat[i] + pb.plot(gp,new_gaussian*gaussian,'k-') + + a = kj + break + """ + #DELETE + + + + #Site parameters update - Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i]) - Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i]) + Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i]) #FIXME + Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i]) #FIXME self.tau_tilde[i] += Delta_tau self.v_tilde[i] += Delta_v + + #new_tau = self.delta/self.eta*(1./sigma2_hat[i] - self.tau_[i]) + #new_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - self.v_[i]) + #Delta_tau = new_tau - self.tau_tilde[i] + #Delta_v = new_v - self.v_tilde[i] + #self.tau_tilde[i] += Delta_tau + #self.v_tilde[i] += Delta_v + #Posterior distribution parameters update DSYR(Sigma,Sigma[:,i].copy(), -float(Delta_tau/(1.+ Delta_tau*Sigma[i,i]))) mu = np.dot(Sigma,self.v_tilde) self.iterations += 1 + + + + #Sigma recomptutation with Cholesky decompositon Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K @@ -138,6 +238,11 @@ class EP(likelihood): self.np1.append(self.tau_tilde.copy()) self.np2.append(self.v_tilde.copy()) + ##DELETE + #pb.vlines(mu[i],0,max(prod)) + #break + #DELETE + return self._compute_GP_variables() def fit_DTC(self, Kmm, Kmn): diff --git a/GPy/likelihoods/noise_models/__init__.py b/GPy/likelihoods/noise_models/__init__.py new file mode 100644 index 00000000..40282402 --- /dev/null +++ b/GPy/likelihoods/noise_models/__init__.py @@ -0,0 +1,4 @@ +import likelihood_functions +import binomial_likelihood +import poisson_likelihood +import link_functions diff --git a/GPy/likelihoods/noise_models/binomial_likelihood.py b/GPy/likelihoods/noise_models/binomial_likelihood.py new file mode 100644 index 00000000..d23dd2f7 --- /dev/null +++ b/GPy/likelihoods/noise_models/binomial_likelihood.py @@ -0,0 +1,111 @@ +# Copyright (c) 2012, 2013 Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import numpy as np +from scipy import stats,special +import scipy as sp +from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf +import link_functions +from likelihood_functions import NoiseModel + +class Binomial(NoiseModel): + """ + Probit likelihood + Y is expected to take values in {-1,1} + ----- + $$ + L(x) = \\Phi (Y_i*f_i) + $$ + """ + def __init__(self,link=None,analytical_moments=False): + super(Binomial, self).__init__(link,analytical_moments) + + def _preprocess_values(self,Y): + """ + Check if the values of the observations correspond to the values + assumed by the likelihood function. + + ..Note:: Binary classification algorithm works better with classes {-1,1} + """ + Y_prep = Y.copy() + Y1 = Y[Y.flatten()==1].size + Y2 = Y[Y.flatten()==0].size + assert Y1 + Y2 == Y.size, 'Binomial likelihood is meant to be used only with outputs in {0,1}.' + Y_prep[Y.flatten() == 0] = -1 + return Y_prep + + def _moments_match_analytical(self,data_i,tau_i,v_i): + """ + Moments match of the marginal approximation in EP algorithm + + :param i: number of observation (int) + :param tau_i: precision of the cavity distribution (float) + :param v_i: mean/variance of the cavity distribution (float) + """ + z = data_i*v_i/np.sqrt(tau_i**2 + tau_i) + Z_hat = std_norm_cdf(z) + phi = std_norm_pdf(z) + mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i)) + sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat) + return Z_hat, mu_hat, sigma2_hat + + def _predictive_mean_analytical(self,mu,sigma): + return stats.norm.cdf(mu/np.sqrt(1+sigma**2)) + + def _mass(self,gp,obs): + #NOTE obs must be in {0,1} + p = self.link.inv_transf(gp) + return p**obs * (1.-p)**(1.-obs) + + def _nlog_mass(self,gp,obs): + p = self.link.inv_transf(gp) + return obs*np.log(p) + (1.-obs)*np.log(1-p) + + def _dnlog_mass_dgp(self,gp,obs): + p = self.link.inv_transf(gp) + dp = self.link.dinv_transf_df(gp) + return obs/p * dp - (1.-obs)/(1.-p) * dp + + def _d2nlog_mass_dgp2(self,gp,obs): + p = self.link.inv_transf(gp) + return (obs/p + (1.-obs)/(1.-p))*self.link.d2inv_transf_df2(gp) + ((1.-obs)/(1.-p)**2-obs/p**2)*self.link.dinv_transf_df(gp) + + def _mean(self,gp): + """ + Mass (or density) function + """ + return self.link.inv_transf(gp) + + def _dmean_dgp(self,gp): + return self.link.dinv_transf_df(gp) + + def _d2mean_dgp2(self,gp): + return self.link.d2inv_transf_df2(gp) + + def _variance(self,gp): + """ + Mass (or density) function + """ + p = self.link.inv_transf(gp) + return p*(1-p) + + def _dvariance_dgp(self,gp): + return self.link.dinv_transf_df(gp)*(1. - 2.*self.link.inv_transf(gp)) + + def _d2variance_dgp2(self,gp): + return self.link.d2inv_transf_df2(gp)*(1. - 2.*self.link.inv_transf(gp)) - 2*self.link.dinv_transf_df(gp)**2 + + """ + def predictive_values(self,mu,var): #TODO remove + mu = mu.flatten() + var = var.flatten() + #mean = stats.norm.cdf(mu/np.sqrt(1+var)) + mean = self._predictive_mean_analytical(mu,np.sqrt(var)) + norm_025 = [stats.norm.ppf(.025,m,v) for m,v in zip(mu,var)] + norm_975 = [stats.norm.ppf(.975,m,v) for m,v in zip(mu,var)] + #p_025 = stats.norm.cdf(norm_025/np.sqrt(1+var)) + #p_975 = stats.norm.cdf(norm_975/np.sqrt(1+var)) + p_025 = self._predictive_mean_analytical(norm_025,np.sqrt(var)) + p_975 = self._predictive_mean_analytical(norm_975,np.sqrt(var)) + return mean[:,None], np.nan*var, p_025[:,None], p_975[:,None] # TODO: var + """ diff --git a/GPy/likelihoods/noise_models/likelihood_functions.py b/GPy/likelihoods/noise_models/likelihood_functions.py new file mode 100644 index 00000000..5376e8e7 --- /dev/null +++ b/GPy/likelihoods/noise_models/likelihood_functions.py @@ -0,0 +1,349 @@ +# Copyright (c) 2012, 2013 Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from scipy import stats,special +import scipy as sp +import pylab as pb +from GPy.util.plot import gpplot +from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf +import link_functions + + +class NoiseModel(object): + """ + Likelihood class for doing Expectation propagation + + :param Y: observed output (Nx1 numpy.darray) + ..Note:: Y values allowed depend on the LikelihoodFunction used + """ + def __init__(self,link,analytical_moments=False): + #assert isinstance(link,link_functions.LinkFunction), "link is not a valid LinkFunction."#FIXME + self.link = link + self.analytical_moments = analytical_moments + if self.analytical_moments: + self.moments_match = self._moments_match_analytical + self.predictive_mean = self._predictive_mean_analytical + else: + self.moments_match = self._moments_match_numerical + self.predictive_mean = self._predictive_mean_numerical + + def _preprocess_values(self,Y): + """ + In case it is needed, this function assess the output values or makes any pertinent transformation on them. + + :param Y: observed output (Nx1 numpy.darray) + """ + return Y + + def _product(self,gp,obs,mu,sigma): + """ + Product between the cavity distribution and a likelihood factor. + + :param gp: latent variable + :param obs: observed output + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + return stats.norm.pdf(gp,loc=mu,scale=sigma) * self._mass(gp,obs) + + def _nlog_product_scaled(self,gp,obs,mu,sigma): + """ + Negative log-product between the cavity distribution and a likelihood factor. + ..Note:: The constant term in the Gaussian distribution is ignored. + + :param gp: latent variable + :param obs: observed output + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + return .5*((gp-mu)/sigma)**2 + self._nlog_mass(gp,obs) + + def _dnlog_product_dgp(self,gp,obs,mu,sigma): + """ + Derivative wrt latent variable of the log-product between the cavity distribution and a likelihood factor. + + :param gp: latent variable + :param obs: observed output + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + return (gp - mu)/sigma**2 + self._dnlog_mass_dgp(gp,obs) + + def _d2nlog_product_dgp2(self,gp,obs,mu,sigma): + """ + Second derivative wrt latent variable of the log-product between the cavity distribution and a likelihood factor. + + :param gp: latent variable + :param obs: observed output + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + return 1./sigma**2 + self._d2nlog_mass_dgp2(gp,obs) + + def _product_mode(self,obs,mu,sigma): + """ + Newton's CG method to find the mode in _product (cavity x likelihood factor). + + :param obs: observed output + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + return sp.optimize.fmin_ncg(self._nlog_product_scaled,x0=mu,fprime=self._dnlog_product_dgp,fhess=self._d2nlog_product_dgp2,args=(obs,mu,sigma)) + + def _moments_match_analytical(self,obs,tau,v): + """ + If available, this function computes the moments analytically. + """ + pass + + def _moments_match_numerical(self,obs,tau,v): + """ + Lapace approximation to calculate the moments. + + :param obs: observed output + :param tau: cavity distribution 1st natural parameter (precision) + :param v: cavity distribution 2nd natural paramenter (mu*precision) + """ + mu = v/tau + mu_hat = self._product_mode(obs,mu,np.sqrt(1./tau)) + sigma2_hat = 1./(tau + self._d2nlog_mass_dgp2(mu_hat,obs)) + Z_hat = np.exp(-.5*tau*(mu_hat-mu)**2) * self._mass(mu_hat,obs)*np.sqrt(tau*sigma2_hat) + return Z_hat,mu_hat,sigma2_hat + + def _nlog_conditional_mean_scaled(self,gp,mu,sigma): + """ + Negative logarithm of the l.v.'s predictive distribution times the output's mean given the l.v. + + :param gp: latent variable + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + + ..Note:: This function helps computing E(Y_star) = E(E(Y_star|f_star)) + """ + return .5*((gp - mu)/sigma)**2 - np.log(self._mean(gp)) + + def _dnlog_conditional_mean_dgp(self,gp,mu,sigma): + """ + Derivative of _nlog_conditional_mean_scaled wrt. l.v. + + :param gp: latent variable + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + return (gp - mu)/sigma**2 - self._dmean_dgp(gp)/self._mean(gp) + + def _d2nlog_conditional_mean_dgp2(self,gp,mu,sigma): + """ + Second derivative of _nlog_conditional_mean_scaled wrt. l.v. + + :param gp: latent variable + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + return 1./sigma**2 - self._d2mean_dgp2(gp)/self._mean(gp) + (self._dmean_dgp(gp)/self._mean(gp))**2 + + def _nlog_exp_conditional_variance_scaled(self,gp,mu,sigma): + """ + Negative logarithm of the l.v.'s predictive distribution times the output's variance given the l.v. + + :param gp: latent variable + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + + ..Note:: This function helps computing E(V(Y_star|f_star)) + """ + return .5*((gp - mu)/sigma)**2 - np.log(self._variance(gp)) + + def _dnlog_exp_conditional_variance_dgp(self,gp,mu,sigma): + """ + Derivative of _nlog_exp_conditional_variance_scaled wrt. l.v. + + :param gp: latent variable + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + return (gp - mu)/sigma**2 - self._dvariance_dgp(gp)/self._variance(gp) + + def _d2nlog_exp_conditional_variance_dgp2(self,gp,mu,sigma): + """ + Second derivative of _nlog_exp_conditional_variance_scaled wrt. l.v. + + :param gp: latent variable + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + return 1./sigma**2 - self._d2variance_dgp2(gp)/self._variance(gp) + (self._dvariance_dgp(gp)/self._variance(gp))**2 + + def _nlog_exp_conditional_mean_sq_scaled(self,gp,mu,sigma): + """ + Negative logarithm of the l.v.'s predictive distribution times the output's mean squared given the l.v. + + :param gp: latent variable + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + + ..Note:: This function helps computing E( E(Y_star|f_star)**2 ) + """ + return .5*((gp - mu)/sigma)**2 - 2*np.log(self._mean(gp)) + + def _dnlog_exp_conditional_mean_sq_dgp(self,gp,mu,sigma): + """ + Derivative of _nlog_exp_conditional_mean_sq_scaled wrt. l.v. + + :param gp: latent variable + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + return (gp - mu)/sigma**2 - 2*self._dmean_dgp(gp)/self._mean(gp) + + def _d2nlog_exp_conditional_mean_sq_dgp2(self,gp,mu,sigma): + """ + Second derivative of _nlog_exp_conditional_mean_sq_scaled wrt. l.v. + + :param gp: latent variable + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + return 1./sigma**2 - 2*( self._d2mean_dgp2(gp)/self._mean(gp) - (self._dmean_dgp(gp)/self._mean(gp))**2 ) + + def _predictive_mean_analytical(self,mu,sigma): + """ + If available, this function computes the predictive mean analytically. + """ + pass + + def _predictive_mean_numerical(self,mu,sigma): + """ + Laplace approximation to the predictive mean: E(Y_star) = E( E(Y_star|f_star) ) + + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + maximum = sp.optimize.fmin_ncg(self._nlog_conditional_mean_scaled,x0=self._mean(mu),fprime=self._dnlog_conditional_mean_dgp,fhess=self._d2nlog_conditional_mean_dgp2,args=(mu,sigma)) + mean = np.exp(-self._nlog_conditional_mean_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_conditional_mean_dgp2(maximum,mu,sigma))*sigma) + """ + + pb.figure() + x = np.array([mu + step*sigma for step in np.linspace(-7,7,100)]) + f = np.array([np.exp(-self._nlog_conditional_mean_scaled(xi,mu,sigma))/np.sqrt(2*np.pi*sigma**2) for xi in x]) + pb.plot(x,f,'b-') + sigma2 = 1./self._d2nlog_conditional_mean_dgp2(maximum,mu,sigma) + f2 = np.exp(-.5*(x-maximum)**2/sigma2)/np.sqrt(2*np.pi*sigma2) + k = np.exp(-self._nlog_conditional_mean_scaled(maximum,mu,sigma))*np.sqrt(sigma2)/np.sqrt(sigma**2) + pb.plot(x,f2*mean,'r-') + pb.vlines(maximum,0,f.max()) + """ + return mean + + def _predictive_mean_sq(self,mu,sigma): + """ + Laplace approximation to the predictive mean squared: E(Y_star**2) = E( E(Y_star|f_star)**2 ) + + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_mean_sq_scaled,x0=self._mean(mu),fprime=self._dnlog_exp_conditional_mean_sq_dgp,fhess=self._d2nlog_exp_conditional_mean_sq_dgp2,args=(mu,sigma)) + mean_squared = np.exp(-self._nlog_exp_conditional_mean_sq_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_mean_sq_dgp2(maximum,mu,sigma))*sigma) + return mean_squared + + def predictive_variance(self,mu,sigma,predictive_mean=None): + """ + Laplace approximation to the predictive variance: V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) ) + + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + :predictive_mean: output's predictive mean, if None _predictive_mean function will be called. + """ + # E( V(Y_star|f_star) ) + maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_variance_scaled,x0=self._variance(mu),fprime=self._dnlog_exp_conditional_variance_dgp,fhess=self._d2nlog_exp_conditional_variance_dgp2,args=(mu,sigma)) + exp_var = np.exp(-self._nlog_exp_conditional_variance_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_variance_dgp2(maximum,mu,sigma))*sigma) + + """ + pb.figure() + x = np.array([mu + step*sigma for step in np.linspace(-7,7,100)]) + f = np.array([np.exp(-self._nlog_exp_conditional_variance_scaled(xi,mu,sigma))/np.sqrt(2*np.pi*sigma**2) for xi in x]) + pb.plot(x,f,'b-') + sigma2 = 1./self._d2nlog_exp_conditional_variance_dgp2(maximum,mu,sigma) + f2 = np.exp(-.5*(x-maximum)**2/sigma2)/np.sqrt(2*np.pi*sigma2) + k = np.exp(-self._nlog_exp_conditional_variance_scaled(maximum,mu,sigma))*np.sqrt(sigma2)/np.sqrt(sigma**2) + pb.plot(x,f2*exp_var,'r--') + pb.vlines(maximum,0,f.max()) + """ + + #V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star)**2 ) + exp_exp2 = self._predictive_mean_sq(mu,sigma) + if predictive_mean is None: + predictive_mean = self.predictive_mean(mu,sigma) + var_exp = exp_exp2 - predictive_mean**2 + return exp_var + var_exp + + def _nlog_joint_predictive_scaled(self,x,mu,sigma): + """ + Negative logarithm of the joint predictive distribution (latent variable and output). + + :param x: tuple (latent variable,output) + :param mu: latent variable's predictive mean + :param sigma: latent variable's predictive standard deviation + """ + return self._nlog_product_scaled(x[0],x[1],mu,sigma) + + def _gradient_nlog_joint_predictive(self,x,mu,sigma): + """ + Gradient of _nlog_joint_predictive_scaled. + + :param x: tuple (latent variable,output) + :param mu: latent variable's predictive mean + :param sigma: latent variable's predictive standard deviation + ..Note: Only avilable when the output is continuous + """ + assert not self.discrete, "Gradient not available for discrete outputs." + return np.array((self._dnlog_product_dgp(gp=x[0],obs=x[1],mu=mu,sigma=sigma),self._dnlog_mass_dobs(obs=x[1],gp=x[0]))) + + def _hessian_nlog_joint_predictive(self,x,mu,sigma): + """ + Hessian of _nlog_joint_predictive_scaled. + + :param x: tuple (latent variable,output) + :param mu: latent variable's predictive mean + :param sigma: latent variable's predictive standard deviation + ..Note: Only avilable when the output is continuous + """ + assert not self.discrete, "Hessian not available for discrete outputs." + cross_derivative = self._d2nlog_mass_dcross(gp=x[0],obs=x[1]) + return np.array((self._d2nlog_product_dgp2(gp=x[0],obs=x[1],mu=mu,sigma=sigma),cross_derivative,cross_derivative,self._d2nlog_mass_dobs2(obs=x[1],gp=x[0]))).reshape(2,2) + + def _joint_predictive_mode(self,mu,sigma): + """ + Negative logarithm of the joint predictive distribution (latent variable and output). + + :param x: tuple (latent variable,output) + :param mu: latent variable's predictive mean + :param sigma: latent variable's predictive standard deviation + """ + return sp.optimize.fmin_ncg(self._nlog_joint_predictive_scaled,x0=(mu,self.link.inv_transf(mu)),fprime=self._gradient_nlog_joint_predictive,fhess=self._hessian_nlog_joint_predictive,args=(mu,sigma)) + + def predictive_values(self,mu,var,sample=True,sample_size=5000): + """ + Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction + :param mu: mean of the latent variable + :param var: variance of the latent variable + """ + if isinstance(mu,float) or isinstance(mu,int): + mu = [mu] + var = [var] + pred_mean = [] + pred_var = [] + q1 = [] + q3 = [] + for m,s in zip(mu,np.sqrt(var)): + pred_mean.append(self.predictive_mean(m,s)) + pred_var.append(self.predictive_variance(m,s,pred_mean[-1])) + q1.append(self.predictive_mean(stats.norm.ppf(.025,m,s**2),s)) + q3.append(self.predictive_mean(stats.norm.ppf(.975,m,s**2),s)) + pred_mean = np.vstack(pred_mean) + pred_var = np.vstack(pred_var) + q1 = np.vstack(q1) + q3 = np.vstack(q3) + return pred_mean, pred_var, q1, q3 diff --git a/GPy/likelihoods/noise_models/link_functions.py b/GPy/likelihoods/noise_models/link_functions.py new file mode 100644 index 00000000..b0cdcd49 --- /dev/null +++ b/GPy/likelihoods/noise_models/link_functions.py @@ -0,0 +1,99 @@ +# Copyright (c) 2012, 2013 Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from scipy import stats +import scipy as sp +import pylab as pb +from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf + +class LinkFunction(object): + """ + Link function class for doing non-Gaussian likelihoods approximation + + :param Y: observed output (Nx1 numpy.darray) + ..Note:: Y values allowed depend on the likelihood_function used + """ + def __init__(self): + pass + +class Identity(LinkFunction): + """ + $$ + g(f) = f + $$ + """ + def transf(self,mu): + return mu + + def inv_transf(self,f): + return f + + def dinv_transf_df(self,f): + return 1. + + def d2inv_transf_df2(self,f): + return 0 + + +class Probit(LinkFunction): + """ + $$ + g(f) = \\Phi^{-1} (mu) + $$ + """ + def transf(self,mu): + return inv_std_norm_cdf(mu) + + def inv_transf(self,f): + return std_norm_cdf(f) + + def dinv_transf_df(self,f): + return std_norm_pdf(f) + + def d2inv_transf_df2(self,f): + return -f * std_norm_pdf(f) + +class Log(LinkFunction): + """ + $$ + g(f) = \log(\mu) + $$ + """ + def transf(self,mu): + return np.log(mu) + + def inv_transf(self,f): + return np.exp(f) + + def dinv_transf_df(self,f): + return np.exp(f) + + def d2inv_transf_df2(self,f): + return np.exp(f) + +class Log_ex_1(LinkFunction): + """ + $$ + g(f) = \log(\exp(\mu) - 1) + $$ + """ + def transf(self,mu): + """ + function: output space -> latent space + """ + return np.log(np.exp(mu) - 1) + + def inv_transf(self,f): + """ + function: latent space -> output space + """ + return np.log(1.+np.exp(f)) + + def dinv_transf_df(self,f): + return np.exp(f)/(1.+np.exp(f)) + + def d2inv_transf_df2(self,f): + aux = np.exp(f)/(1.+np.exp(f)) + return aux*(1.-aux) diff --git a/GPy/likelihoods/noise_models/poisson_likelihood.py b/GPy/likelihoods/noise_models/poisson_likelihood.py new file mode 100644 index 00000000..86a2df2a --- /dev/null +++ b/GPy/likelihoods/noise_models/poisson_likelihood.py @@ -0,0 +1,92 @@ +# Copyright (c) 2012, 2013 Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from scipy import stats,special +import scipy as sp +#import pylab as pb +from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf +import link_functions +from likelihood_functions import NoiseModel + +class Poisson(NoiseModel): + """ + Poisson likelihood + Y is expected to take values in {0,1,2,...} + ----- + $$ + L(x) = \exp(\lambda) * \lambda**Y_i / Y_i! + $$ + """ + def __init__(self,link=None,analytical_moments=False): + #self.discrete = True + #self.support_limits = (0,np.inf) + + #self.analytical_moments = False + super(Poisson, self).__init__(link,analytical_moments) + + def _preprocess_values(self,Y): #TODO + self.scale = .5*Y.max() + self.shift = Y.mean() + return (Y - self.shift)/self.scale + + def _mass(self,gp,obs): + """ + Mass (or density) function + """ + obs = obs*self.scale + self.shift + return stats.poisson.pmf(obs,self.link.inv_transf(gp)) + + def _nlog_mass(self,gp,obs): + """ + Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted + """ + return self.link.inv_transf(gp) - obs * np.log(self.link.inv_transf(gp)) + np.log(special.gamma(obs+1)) + + def _dnlog_mass_dgp(self,gp,obs): + return self.link.dinv_transf_df(gp) * (1. - obs/self.link.inv_transf(gp)) + + def _d2nlog_mass_dgp2(self,gp,obs): + d2_df = self.link.d2inv_transf_df2(gp) + inv_transf = self.link.inv_transf(gp) + return obs * ((self.link.dinv_transf_df(gp)/inv_transf)**2 - d2_df/inv_transf) + d2_df + + def _dnlog_mass_dobs(self,obs,gp): #TODO not needed + return special.psi(obs+1) - np.log(self.link.inv_transf(gp)) + + def _d2nlog_mass_dobs2(self,obs,gp=None): #TODO not needed + return special.polygamma(1,obs) + + def _d2nlog_mass_dcross(self,obs,gp): #TODO not needed + return -self.link.dinv_transf_df(gp)/self.link.inv_transf(gp) + + def _mean(self,gp): + """ + Mass (or density) function + """ + return self.link.inv_transf(gp) + + #def _variance(self,gp): + # return self.link.inv_transf(gp) + + def _dmean_dgp(self,gp): + return self.link.dinv_transf_df(gp) + + def _d2mean_dgp2(self,gp): + return self.link.d2inv_transf_df2(gp) + + def _variance(self,gp): + """ + Mass (or density) function + """ + return self.link.inv_transf(gp) + + #def _variance(self,gp): + # return self.link.inv_transf(gp) + + def _dvariance_dgp(self,gp): + return self.link.dinv_transf_df(gp) + + def _d2variance_dgp2(self,gp): + return self.link.d2inv_transf_df2(gp) From b3fd843d338a18b651bceb831c861fd9f1cefd88 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Mon, 8 Jul 2013 18:33:44 +0100 Subject: [PATCH 10/49] confidence interval fixed --- .../noise_models/likelihood_functions.py | 16 ++++++++++++++-- .../noise_models/poisson_likelihood.py | 8 ++++---- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/GPy/likelihoods/noise_models/likelihood_functions.py b/GPy/likelihoods/noise_models/likelihood_functions.py index 5376e8e7..dd597521 100644 --- a/GPy/likelihoods/noise_models/likelihood_functions.py +++ b/GPy/likelihoods/noise_models/likelihood_functions.py @@ -279,6 +279,18 @@ class NoiseModel(object): var_exp = exp_exp2 - predictive_mean**2 return exp_var + var_exp + def _predictive_percentiles(self,p,mu,sigma): + """ + Percentiles of the predictive distribution + + :parm p: lower tail probability + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + :predictive_mean: output's predictive mean, if None _predictive_mean function will be called. + """ + qf = stats.norm.ppf(p,mu,sigma) + return self.link.inv_transf(qf) + def _nlog_joint_predictive_scaled(self,x,mu,sigma): """ Negative logarithm of the joint predictive distribution (latent variable and output). @@ -340,8 +352,8 @@ class NoiseModel(object): for m,s in zip(mu,np.sqrt(var)): pred_mean.append(self.predictive_mean(m,s)) pred_var.append(self.predictive_variance(m,s,pred_mean[-1])) - q1.append(self.predictive_mean(stats.norm.ppf(.025,m,s**2),s)) - q3.append(self.predictive_mean(stats.norm.ppf(.975,m,s**2),s)) + q1.append(self._predictive_percentiles(.025,m,s)) + q3.append(self._predictive_percentiles(.975,m,s)) pred_mean = np.vstack(pred_mean) pred_var = np.vstack(pred_var) q1 = np.vstack(q1) diff --git a/GPy/likelihoods/noise_models/poisson_likelihood.py b/GPy/likelihoods/noise_models/poisson_likelihood.py index 86a2df2a..cdae29e3 100644 --- a/GPy/likelihoods/noise_models/poisson_likelihood.py +++ b/GPy/likelihoods/noise_models/poisson_likelihood.py @@ -27,15 +27,15 @@ class Poisson(NoiseModel): super(Poisson, self).__init__(link,analytical_moments) def _preprocess_values(self,Y): #TODO - self.scale = .5*Y.max() - self.shift = Y.mean() - return (Y - self.shift)/self.scale + #self.scale = .5*Y.max() + #self.shift = Y.mean() + return Y #(Y - self.shift)/self.scale def _mass(self,gp,obs): """ Mass (or density) function """ - obs = obs*self.scale + self.shift + #obs = obs*self.scale + self.shift return stats.poisson.pmf(obs,self.link.inv_transf(gp)) def _nlog_mass(self,gp,obs): From 06ffb884abd7b1eb135f0b474f90c981ccc4c9c4 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Tue, 9 Jul 2013 17:54:56 +0100 Subject: [PATCH 11/49] files re-organized --- GPy/likelihoods/__init__.py | 2 +- GPy/likelihoods/constructors.py | 42 --------- GPy/likelihoods/ep.py | 28 +++--- GPy/likelihoods/noise_model_constructors.py | 42 +++++++++ GPy/likelihoods/noise_models/__init__.py | 8 +- ...nomial_likelihood.py => binomial_noise.py} | 34 +++---- .../noise_models/gaussian_noise.py | 89 +++++++++++++++++++ ...ink_functions.py => gp_transformations.py} | 56 ++++++------ ...od_functions.py => noise_distributions.py} | 14 +-- ...poisson_likelihood.py => poisson_noise.py} | 43 +++++---- 10 files changed, 223 insertions(+), 135 deletions(-) delete mode 100644 GPy/likelihoods/constructors.py create mode 100644 GPy/likelihoods/noise_model_constructors.py rename GPy/likelihoods/noise_models/{binomial_likelihood.py => binomial_noise.py} (76%) create mode 100644 GPy/likelihoods/noise_models/gaussian_noise.py rename GPy/likelihoods/noise_models/{link_functions.py => gp_transformations.py} (59%) rename GPy/likelihoods/noise_models/{likelihood_functions.py => noise_distributions.py} (97%) rename GPy/likelihoods/noise_models/{poisson_likelihood.py => poisson_noise.py} (58%) diff --git a/GPy/likelihoods/__init__.py b/GPy/likelihoods/__init__.py index 4932bd40..3e6a28d3 100644 --- a/GPy/likelihoods/__init__.py +++ b/GPy/likelihoods/__init__.py @@ -1,5 +1,5 @@ from ep import EP from gaussian import Gaussian +from noise_model_constructors import * # TODO: from Laplace import Laplace -from constructors import * diff --git a/GPy/likelihoods/constructors.py b/GPy/likelihoods/constructors.py deleted file mode 100644 index 0b995894..00000000 --- a/GPy/likelihoods/constructors.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright (c) 2013, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - -import numpy as np -from likelihood_functions import LikelihoodFunction -import noise_models -import link_functions - -def binomial(link=None): - """ - Construct a binomial likelihood - - :param link: a GPy link function - """ - #self.discrete = True - #self.support_limits = (0,1) - - if link is None: - link = link_functions.Probit() - else: - assert isinstance(link,link_functions.LinkFunction), 'link function is not valid.' - - if isinstance(link,link_functions.Probit): - analytical_moments = True - else: - analytical_moments = False - return noise_models.binomial_likelihood.Binomial(link,analytical_moments) - - -def poisson(link=None): - """ - Construct a Poisson likelihood - - :param link: a GPy link function - """ - if link is None: - link = link_functions.Log_ex_1() - else: - assert isinstance(link,link_functions.LinkFunction), 'link function is not valid.' - #assert isinstance(link,link_functions.LinkFunction), 'link function is not valid.' - analytical_moments = False - return noise_models.poisson_likelihood.Poisson(link,analytical_moments) diff --git a/GPy/likelihoods/ep.py b/GPy/likelihoods/ep.py index 7e90755e..717bfcb7 100644 --- a/GPy/likelihoods/ep.py +++ b/GPy/likelihoods/ep.py @@ -4,23 +4,23 @@ from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs from likelihood import likelihood class EP(likelihood): - def __init__(self,data,LikelihoodFunction,epsilon=1e-3,power_ep=[1.,1.]): + def __init__(self,data,noise_model,epsilon=1e-3,power_ep=[1.,1.]): """ Expectation Propagation Arguments --------- epsilon : Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float) - LikelihoodFunction : a likelihood function (see likelihood_functions.py) + noise_model : a likelihood function (see likelihood_functions.py) """ - self.LikelihoodFunction = LikelihoodFunction + self.noise_model = noise_model self.epsilon = epsilon self.eta, self.delta = power_ep self.data = data self.N, self.output_dim = self.data.shape self.is_heteroscedastic = True self.Nparams = 0 - self._transf_data = self.LikelihoodFunction._preprocess_values(data) + self._transf_data = self.noise_model._preprocess_values(data) #Initial values - Likelihood approximation parameters: #p(y|f) = t(f|tau_tilde,v_tilde) @@ -28,9 +28,9 @@ class EP(likelihood): self.tau_tilde = np.zeros(self.N) self.v_tilde = np.zeros(self.N) - #_gp = self.LikelihoodFunction.link.transf(self.data) - #_mean = self.LikelihoodFunction._mean(_gp) - #_variance = self.LikelihoodFunction._variance(_gp) + #_gp = self.noise_model.gp_link.transf(self.data) + #_mean = self.noise_model._mean(_gp) + #_variance = self.noise_model._variance(_gp) #self.tau_tilde = 1./_variance #self.tau_tilde[_variance== 0] = 1. #self.v_tilde = _mean*self.tau_tilde @@ -62,7 +62,7 @@ class EP(likelihood): def predictive_values(self,mu,var,full_cov): if full_cov: raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood" - return self.LikelihoodFunction.predictive_values(mu,var) + return self.noise_model.predictive_values(mu,var) def _get_params(self): return np.zeros(0) @@ -128,7 +128,7 @@ class EP(likelihood): self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i] self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i] #Marginal moments - self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.LikelihoodFunction.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) + self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) #DELETE @@ -136,11 +136,11 @@ class EP(likelihood): import pylab as pb from scipy import stats import scipy as sp - import link_functions + import gp_transformations from constructors import * - link = link_functions.Log_ex_1() - distribution = poisson(link=link) + gp_link = gp_transformations.Log_ex_1() + distribution = poisson(gp_link=gp_link) gp = np.linspace(-3,50,100) #distribution = binomial() #gp = np.linspace(-3,3,100) @@ -311,7 +311,7 @@ class EP(likelihood): self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i] self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i] #Marginal moments - self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.LikelihoodFunction.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) + self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) #Site parameters update Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) @@ -406,7 +406,7 @@ class EP(likelihood): self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i] self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i] #Marginal moments - self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.LikelihoodFunction.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) + self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) #Site parameters update Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) diff --git a/GPy/likelihoods/noise_model_constructors.py b/GPy/likelihoods/noise_model_constructors.py new file mode 100644 index 00000000..4267fc32 --- /dev/null +++ b/GPy/likelihoods/noise_model_constructors.py @@ -0,0 +1,42 @@ +# Copyright (c) 2013, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import numpy as np +import noise_models +#from likelihood_functions import LikelihoodFunction +#import gp_transformations + +def binomial(gp_link=None): + """ + Construct a binomial likelihood + + :param gp_link: a GPy gp_link function + """ + #self.discrete = True + #self.support_limits = (0,1) + + if gp_link is None: + gp_link = noise_models.gp_transformations.Probit() + else: + assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.' + + if isinstance(gp_link,noise_models.gp_transformations.Probit): + analytical_moments = True + else: + analytical_moments = False + return noise_models.binomial_noise.Binomial(gp_link,analytical_moments) + + +def poisson(gp_link=None): + """ + Construct a Poisson likelihood + + :param gp_link: a GPy gp_link function + """ + if gp_link is None: + gp_link = noise_models.gp_transformations.Log_ex_1() + else: + assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.' + #assert isinstance(gp_link,gp_transformations.GPTransformation), 'gp_link function is not valid.' + analytical_moments = False + return noise_models.poisson_noise.Poisson(gp_link,analytical_moments) diff --git a/GPy/likelihoods/noise_models/__init__.py b/GPy/likelihoods/noise_models/__init__.py index 40282402..c5fc66b0 100644 --- a/GPy/likelihoods/noise_models/__init__.py +++ b/GPy/likelihoods/noise_models/__init__.py @@ -1,4 +1,4 @@ -import likelihood_functions -import binomial_likelihood -import poisson_likelihood -import link_functions +import noise_distributions +import binomial_noise +import poisson_noise +import gp_transformations diff --git a/GPy/likelihoods/noise_models/binomial_likelihood.py b/GPy/likelihoods/noise_models/binomial_noise.py similarity index 76% rename from GPy/likelihoods/noise_models/binomial_likelihood.py rename to GPy/likelihoods/noise_models/binomial_noise.py index d23dd2f7..77fde5eb 100644 --- a/GPy/likelihoods/noise_models/binomial_likelihood.py +++ b/GPy/likelihoods/noise_models/binomial_noise.py @@ -5,10 +5,10 @@ import numpy as np from scipy import stats,special import scipy as sp from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf -import link_functions -from likelihood_functions import NoiseModel +import gp_transformations +from noise_distributions import NoiseDistribution -class Binomial(NoiseModel): +class Binomial(NoiseDistribution): """ Probit likelihood Y is expected to take values in {-1,1} @@ -17,8 +17,8 @@ class Binomial(NoiseModel): L(x) = \\Phi (Y_i*f_i) $$ """ - def __init__(self,link=None,analytical_moments=False): - super(Binomial, self).__init__(link,analytical_moments) + def __init__(self,gp_link=None,analytical_moments=False): + super(Binomial, self).__init__(gp_link,analytical_moments) def _preprocess_values(self,Y): """ @@ -54,46 +54,46 @@ class Binomial(NoiseModel): def _mass(self,gp,obs): #NOTE obs must be in {0,1} - p = self.link.inv_transf(gp) + p = self.gp_link.transf(gp) return p**obs * (1.-p)**(1.-obs) def _nlog_mass(self,gp,obs): - p = self.link.inv_transf(gp) + p = self.gp_link.transf(gp) return obs*np.log(p) + (1.-obs)*np.log(1-p) def _dnlog_mass_dgp(self,gp,obs): - p = self.link.inv_transf(gp) - dp = self.link.dinv_transf_df(gp) + p = self.gp_link.transf(gp) + dp = self.gp_link.dtransf_df(gp) return obs/p * dp - (1.-obs)/(1.-p) * dp def _d2nlog_mass_dgp2(self,gp,obs): - p = self.link.inv_transf(gp) - return (obs/p + (1.-obs)/(1.-p))*self.link.d2inv_transf_df2(gp) + ((1.-obs)/(1.-p)**2-obs/p**2)*self.link.dinv_transf_df(gp) + p = self.gp_link.transf(gp) + return (obs/p + (1.-obs)/(1.-p))*self.gp_link.d2transf_df2(gp) + ((1.-obs)/(1.-p)**2-obs/p**2)*self.gp_link.dtransf_df(gp) def _mean(self,gp): """ Mass (or density) function """ - return self.link.inv_transf(gp) + return self.gp_link.transf(gp) def _dmean_dgp(self,gp): - return self.link.dinv_transf_df(gp) + return self.gp_link.dtransf_df(gp) def _d2mean_dgp2(self,gp): - return self.link.d2inv_transf_df2(gp) + return self.gp_link.d2transf_df2(gp) def _variance(self,gp): """ Mass (or density) function """ - p = self.link.inv_transf(gp) + p = self.gp_link.transf(gp) return p*(1-p) def _dvariance_dgp(self,gp): - return self.link.dinv_transf_df(gp)*(1. - 2.*self.link.inv_transf(gp)) + return self.gp_link.dtransf_df(gp)*(1. - 2.*self.gp_link.transf(gp)) def _d2variance_dgp2(self,gp): - return self.link.d2inv_transf_df2(gp)*(1. - 2.*self.link.inv_transf(gp)) - 2*self.link.dinv_transf_df(gp)**2 + return self.gp_link.d2transf_df2(gp)*(1. - 2.*self.gp_link.transf(gp)) - 2*self.gp_link.dtransf_df(gp)**2 """ def predictive_values(self,mu,var): #TODO remove diff --git a/GPy/likelihoods/noise_models/gaussian_noise.py b/GPy/likelihoods/noise_models/gaussian_noise.py new file mode 100644 index 00000000..a77becb2 --- /dev/null +++ b/GPy/likelihoods/noise_models/gaussian_noise.py @@ -0,0 +1,89 @@ +# Copyright (c) 2012, 2013 Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import numpy as np +from scipy import stats,special +import scipy as sp +from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf +import gp_transformations +from noise_distributions import NoiseDistribution + +class Gaussian(NoiseDistribution): + """ + Gaussian likelihood + + :param mean: mean value of the Gaussian distribution + :param variance: mean value of the Gaussian distribution + """ + def __init__(self,gp_link=None,analytical_moments=False,mean=0,variance=1.): + self.mean = mean + self.variance = variance + super(Gaussian, self).__init__(gp_link,analytical_moments) + + def _preprocess_values(self,Y): + """ + Check if the values of the observations correspond to the values + assumed by the likelihood function. + """ + return Y + + def _moments_match_analytical(self,data_i,tau_i,v_i): + """ + Moments match of the marginal approximation in EP algorithm + + :param i: number of observation (int) + :param tau_i: precision of the cavity distribution (float) + :param v_i: mean/variance of the cavity distribution (float) + """ + sigma2_hat = 1./(1./self.variance + tau_i) + mu_hat = sigma2_hat*(self.mean/self.variance + v_i) + Z_hat = np.sqrt(2*np.pi*sigma2_hat)*np.exp(-.5*(self.mean - v_i/tau_i)**2/(self.variance + 1./tau_i)) #TODO check + return Z_hat, mu_hat, sigma2_hat + + def _predictive_mean_analytical(self,mu,sigma): + new_sigma2 = 1./(1./self.variance + 1./sigma**2) + return new_sigma2*(mu/sigma + self.mean/self.variance) + + def _mass(self,gp,obs): + p = (self.gp_link.transf(gp)-self.mean)/np.sqrt(self.variance) + return std_norm_pdf(p) + + def _nlog_mass(self,gp,obs): + p = (self.gp_link.transf(gp)-self.mean)/np.sqrt(self.variance) + return .5*np.log(2*np.pi*self.variance) + .5*(p-self.mean)**2/self.variance + + def _dnlog_mass_dgp(self,gp,obs): + p = (self.gp_link.transf(gp)-self.mean)/np.sqrt(self.variance) + dp = self.gp_link.dtransf_df(gp) + return (p - self.mean)/self.variance * dp + + def _d2nlog_mass_dgp2(self,gp,obs): + p = (self.gp_link.transf(gp)-self.mean)/np.sqrt(self.variance) + dp = self.gp_link.dtransf_df(gp) + d2p = self.gp_link.d2transf_df2(gp) + return dp**2/self.variance + (p - self.mean)/self.variance * d2p + + def _mean(self,gp): + """ + Mass (or density) function + """ + return self.gp_link.transf(gp) + + def _dmean_dgp(self,gp): + return self.gp_link.dtransf_df(gp) + + def _d2mean_dgp2(self,gp): + return self.gp_link.d2transf_df2(gp) + + def _variance(self,gp): + """ + Mass (or density) function + """ + p = self.gp_link.transf(gp) + return p*(1-p) + + def _dvariance_dgp(self,gp): + return self.gp_link.dtransf_df(gp)*(1. - 2.*self.gp_link.transf(gp)) + + def _d2variance_dgp2(self,gp): + return self.gp_link.d2transf_df2(gp)*(1. - 2.*self.gp_link.transf(gp)) - 2*self.gp_link.dtransf_df(gp)**2 diff --git a/GPy/likelihoods/noise_models/link_functions.py b/GPy/likelihoods/noise_models/gp_transformations.py similarity index 59% rename from GPy/likelihoods/noise_models/link_functions.py rename to GPy/likelihoods/noise_models/gp_transformations.py index b0cdcd49..b81e88e1 100644 --- a/GPy/likelihoods/noise_models/link_functions.py +++ b/GPy/likelihoods/noise_models/gp_transformations.py @@ -8,7 +8,7 @@ import scipy as sp import pylab as pb from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf -class LinkFunction(object): +class GPTransformation(object): """ Link function class for doing non-Gaussian likelihoods approximation @@ -18,82 +18,82 @@ class LinkFunction(object): def __init__(self): pass -class Identity(LinkFunction): +class Identity(GPTransformation): """ $$ g(f) = f $$ """ - def transf(self,mu): - return mu + #def transf(self,mu): + # return mu - def inv_transf(self,f): + def transf(self,f): return f - def dinv_transf_df(self,f): + def dtransf_df(self,f): return 1. - def d2inv_transf_df2(self,f): + def d2transf_df2(self,f): return 0 -class Probit(LinkFunction): +class Probit(GPTransformation): """ $$ g(f) = \\Phi^{-1} (mu) $$ """ - def transf(self,mu): - return inv_std_norm_cdf(mu) + #def transf(self,mu): + # return inv_std_norm_cdf(mu) - def inv_transf(self,f): + def transf(self,f): return std_norm_cdf(f) - def dinv_transf_df(self,f): + def dtransf_df(self,f): return std_norm_pdf(f) - def d2inv_transf_df2(self,f): + def d2transf_df2(self,f): return -f * std_norm_pdf(f) -class Log(LinkFunction): +class Log(GPTransformation): """ $$ g(f) = \log(\mu) $$ """ - def transf(self,mu): - return np.log(mu) + #def transf(self,mu): + # return np.log(mu) - def inv_transf(self,f): + def transf(self,f): return np.exp(f) - def dinv_transf_df(self,f): + def dtransf_df(self,f): return np.exp(f) - def d2inv_transf_df2(self,f): + def d2transf_df2(self,f): return np.exp(f) -class Log_ex_1(LinkFunction): +class Log_ex_1(GPTransformation): """ $$ g(f) = \log(\exp(\mu) - 1) $$ """ - def transf(self,mu): - """ - function: output space -> latent space - """ - return np.log(np.exp(mu) - 1) + #def transf(self,mu): + # """ + # function: output space -> latent space + # """ + # return np.log(np.exp(mu) - 1) - def inv_transf(self,f): + def transf(self,f): """ function: latent space -> output space """ return np.log(1.+np.exp(f)) - def dinv_transf_df(self,f): + def dtransf_df(self,f): return np.exp(f)/(1.+np.exp(f)) - def d2inv_transf_df2(self,f): + def d2transf_df2(self,f): aux = np.exp(f)/(1.+np.exp(f)) return aux*(1.-aux) diff --git a/GPy/likelihoods/noise_models/likelihood_functions.py b/GPy/likelihoods/noise_models/noise_distributions.py similarity index 97% rename from GPy/likelihoods/noise_models/likelihood_functions.py rename to GPy/likelihoods/noise_models/noise_distributions.py index dd597521..0dc9e03c 100644 --- a/GPy/likelihoods/noise_models/likelihood_functions.py +++ b/GPy/likelihoods/noise_models/noise_distributions.py @@ -8,19 +8,19 @@ import scipy as sp import pylab as pb from GPy.util.plot import gpplot from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf -import link_functions +import gp_transformations -class NoiseModel(object): +class NoiseDistribution(object): """ Likelihood class for doing Expectation propagation :param Y: observed output (Nx1 numpy.darray) ..Note:: Y values allowed depend on the LikelihoodFunction used """ - def __init__(self,link,analytical_moments=False): - #assert isinstance(link,link_functions.LinkFunction), "link is not a valid LinkFunction."#FIXME - self.link = link + def __init__(self,gp_link,analytical_moments=False): + #assert isinstance(gp_link,gp_transformations.GPTransformation), "gp_link is not a valid GPTransformation."#FIXME + self.gp_link = gp_link self.analytical_moments = analytical_moments if self.analytical_moments: self.moments_match = self._moments_match_analytical @@ -289,7 +289,7 @@ class NoiseModel(object): :predictive_mean: output's predictive mean, if None _predictive_mean function will be called. """ qf = stats.norm.ppf(p,mu,sigma) - return self.link.inv_transf(qf) + return self.gp_link.transf(qf) def _nlog_joint_predictive_scaled(self,x,mu,sigma): """ @@ -334,7 +334,7 @@ class NoiseModel(object): :param mu: latent variable's predictive mean :param sigma: latent variable's predictive standard deviation """ - return sp.optimize.fmin_ncg(self._nlog_joint_predictive_scaled,x0=(mu,self.link.inv_transf(mu)),fprime=self._gradient_nlog_joint_predictive,fhess=self._hessian_nlog_joint_predictive,args=(mu,sigma)) + return sp.optimize.fmin_ncg(self._nlog_joint_predictive_scaled,x0=(mu,self.gp_link.transf(mu)),fprime=self._gradient_nlog_joint_predictive,fhess=self._hessian_nlog_joint_predictive,args=(mu,sigma)) def predictive_values(self,mu,var,sample=True,sample_size=5000): """ diff --git a/GPy/likelihoods/noise_models/poisson_likelihood.py b/GPy/likelihoods/noise_models/poisson_noise.py similarity index 58% rename from GPy/likelihoods/noise_models/poisson_likelihood.py rename to GPy/likelihoods/noise_models/poisson_noise.py index cdae29e3..e90b3ce8 100644 --- a/GPy/likelihoods/noise_models/poisson_likelihood.py +++ b/GPy/likelihoods/noise_models/poisson_noise.py @@ -5,12 +5,11 @@ import numpy as np from scipy import stats,special import scipy as sp -#import pylab as pb from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf -import link_functions -from likelihood_functions import NoiseModel +import gp_transformations +from noise_distributions import NoiseDistribution -class Poisson(NoiseModel): +class Poisson(NoiseDistribution): """ Poisson likelihood Y is expected to take values in {0,1,2,...} @@ -19,12 +18,12 @@ class Poisson(NoiseModel): L(x) = \exp(\lambda) * \lambda**Y_i / Y_i! $$ """ - def __init__(self,link=None,analytical_moments=False): + def __init__(self,gp_link=None,analytical_moments=False): #self.discrete = True #self.support_limits = (0,np.inf) #self.analytical_moments = False - super(Poisson, self).__init__(link,analytical_moments) + super(Poisson, self).__init__(gp_link,analytical_moments) def _preprocess_values(self,Y): #TODO #self.scale = .5*Y.max() @@ -36,57 +35,57 @@ class Poisson(NoiseModel): Mass (or density) function """ #obs = obs*self.scale + self.shift - return stats.poisson.pmf(obs,self.link.inv_transf(gp)) + return stats.poisson.pmf(obs,self.gp_link.transf(gp)) def _nlog_mass(self,gp,obs): """ Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted """ - return self.link.inv_transf(gp) - obs * np.log(self.link.inv_transf(gp)) + np.log(special.gamma(obs+1)) + return self.gp_link.transf(gp) - obs * np.log(self.gp_link.transf(gp)) + np.log(special.gamma(obs+1)) def _dnlog_mass_dgp(self,gp,obs): - return self.link.dinv_transf_df(gp) * (1. - obs/self.link.inv_transf(gp)) + return self.gp_link.dtransf_df(gp) * (1. - obs/self.gp_link.transf(gp)) def _d2nlog_mass_dgp2(self,gp,obs): - d2_df = self.link.d2inv_transf_df2(gp) - inv_transf = self.link.inv_transf(gp) - return obs * ((self.link.dinv_transf_df(gp)/inv_transf)**2 - d2_df/inv_transf) + d2_df + d2_df = self.gp_link.d2transf_df2(gp) + transf = self.gp_link.transf(gp) + return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df def _dnlog_mass_dobs(self,obs,gp): #TODO not needed - return special.psi(obs+1) - np.log(self.link.inv_transf(gp)) + return special.psi(obs+1) - np.log(self.gp_link.transf(gp)) def _d2nlog_mass_dobs2(self,obs,gp=None): #TODO not needed return special.polygamma(1,obs) def _d2nlog_mass_dcross(self,obs,gp): #TODO not needed - return -self.link.dinv_transf_df(gp)/self.link.inv_transf(gp) + return -self.gp_link.dtransf_df(gp)/self.gp_link.transf(gp) def _mean(self,gp): """ Mass (or density) function """ - return self.link.inv_transf(gp) + return self.gp_link.transf(gp) #def _variance(self,gp): - # return self.link.inv_transf(gp) + # return self.gp_link.transf(gp) def _dmean_dgp(self,gp): - return self.link.dinv_transf_df(gp) + return self.gp_link.dtransf_df(gp) def _d2mean_dgp2(self,gp): - return self.link.d2inv_transf_df2(gp) + return self.gp_link.d2transf_df2(gp) def _variance(self,gp): """ Mass (or density) function """ - return self.link.inv_transf(gp) + return self.gp_link.transf(gp) #def _variance(self,gp): - # return self.link.inv_transf(gp) + # return self.gp_link.transf(gp) def _dvariance_dgp(self,gp): - return self.link.dinv_transf_df(gp) + return self.gp_link.dtransf_df(gp) def _d2variance_dgp2(self,gp): - return self.link.d2inv_transf_df2(gp) + return self.gp_link.d2transf_df2(gp) From 68e37e8684703fe26f71ef693547b525c84c9421 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Wed, 10 Jul 2013 19:33:43 +0100 Subject: [PATCH 12/49] The next step is to optimize the noise models' parameters --- GPy/likelihoods/ep.py | 12 +++-- GPy/likelihoods/noise_model_constructors.py | 40 ++++++++++------ GPy/likelihoods/noise_models/__init__.py | 1 + .../noise_models/binomial_noise.py | 4 +- .../noise_models/gaussian_noise.py | 48 ++++++++++--------- .../noise_models/noise_distributions.py | 28 +++++++++-- GPy/likelihoods/noise_models/poisson_noise.py | 6 +-- 7 files changed, 90 insertions(+), 49 deletions(-) diff --git a/GPy/likelihoods/ep.py b/GPy/likelihoods/ep.py index 717bfcb7..50df599a 100644 --- a/GPy/likelihoods/ep.py +++ b/GPy/likelihoods/ep.py @@ -65,11 +65,17 @@ class EP(likelihood): return self.noise_model.predictive_values(mu,var) def _get_params(self): - return np.zeros(0) + #return np.zeros(0) + return self.noise_model._get_params() + def _get_param_names(self): - return [] + #return [] + return self.noise_model._get_param_names() + def _set_params(self,p): - pass # TODO: the EP likelihood might want to take some parameters... + #pass # TODO: the EP likelihood might want to take some parameters... + self.noise_model._set_params(p) + def _gradients(self,partial): return np.zeros(0) # TODO: the EP likelihood might want to take some parameters... diff --git a/GPy/likelihoods/noise_model_constructors.py b/GPy/likelihoods/noise_model_constructors.py index 4267fc32..f64fb235 100644 --- a/GPy/likelihoods/noise_model_constructors.py +++ b/GPy/likelihoods/noise_model_constructors.py @@ -3,8 +3,6 @@ import numpy as np import noise_models -#from likelihood_functions import LikelihoodFunction -#import gp_transformations def binomial(gp_link=None): """ @@ -12,20 +10,32 @@ def binomial(gp_link=None): :param gp_link: a GPy gp_link function """ - #self.discrete = True - #self.support_limits = (0,1) - if gp_link is None: gp_link = noise_models.gp_transformations.Probit() - else: - assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.' + #else: + # assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.' if isinstance(gp_link,noise_models.gp_transformations.Probit): - analytical_moments = True + analytical_mean = True else: - analytical_moments = False - return noise_models.binomial_noise.Binomial(gp_link,analytical_moments) + analytical_mean = False + analytical_variance = False + return noise_models.binomial_noise.Binomial(gp_link,analytical_mean,analytical_variance) +def gaussian(gp_link=None,variance=1.): + """ + Construct a gaussian likelihood + + :param gp_link: a GPy gp_link function + """ + if gp_link is None: + gp_link = noise_models.gp_transformations.Identity() + #else: + # assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.' + + analytical_mean = True + analytical_variance = True + return noise_models.gaussian_noise.Gaussian(gp_link,analytical_mean,analytical_variance,variance) def poisson(gp_link=None): """ @@ -35,8 +45,8 @@ def poisson(gp_link=None): """ if gp_link is None: gp_link = noise_models.gp_transformations.Log_ex_1() - else: - assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.' - #assert isinstance(gp_link,gp_transformations.GPTransformation), 'gp_link function is not valid.' - analytical_moments = False - return noise_models.poisson_noise.Poisson(gp_link,analytical_moments) + #else: + # assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.' + analytical_mean = False + analytical_variance = False + return noise_models.poisson_noise.Poisson(gp_link,analytical_mean,analytical_variance) diff --git a/GPy/likelihoods/noise_models/__init__.py b/GPy/likelihoods/noise_models/__init__.py index c5fc66b0..b16f8fbd 100644 --- a/GPy/likelihoods/noise_models/__init__.py +++ b/GPy/likelihoods/noise_models/__init__.py @@ -1,4 +1,5 @@ import noise_distributions import binomial_noise +import gaussian_noise import poisson_noise import gp_transformations diff --git a/GPy/likelihoods/noise_models/binomial_noise.py b/GPy/likelihoods/noise_models/binomial_noise.py index 77fde5eb..200625d9 100644 --- a/GPy/likelihoods/noise_models/binomial_noise.py +++ b/GPy/likelihoods/noise_models/binomial_noise.py @@ -17,8 +17,8 @@ class Binomial(NoiseDistribution): L(x) = \\Phi (Y_i*f_i) $$ """ - def __init__(self,gp_link=None,analytical_moments=False): - super(Binomial, self).__init__(gp_link,analytical_moments) + def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False): + super(Binomial, self).__init__(gp_link,analytical_mean,analytical_variance) def _preprocess_values(self,Y): """ diff --git a/GPy/likelihoods/noise_models/gaussian_noise.py b/GPy/likelihoods/noise_models/gaussian_noise.py index a77becb2..389363a3 100644 --- a/GPy/likelihoods/noise_models/gaussian_noise.py +++ b/GPy/likelihoods/noise_models/gaussian_noise.py @@ -15,10 +15,18 @@ class Gaussian(NoiseDistribution): :param mean: mean value of the Gaussian distribution :param variance: mean value of the Gaussian distribution """ - def __init__(self,gp_link=None,analytical_moments=False,mean=0,variance=1.): - self.mean = mean + def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,variance=1.): self.variance = variance - super(Gaussian, self).__init__(gp_link,analytical_moments) + super(Gaussian, self).__init__(gp_link,analytical_mean,analytical_variance) + + def _get_params(self): + return self.variance + + def _get_param_names(self): + return ['noise_model_variance'] + + def _set_params(self,p): + self.variance = p def _preprocess_values(self,Y): """ @@ -36,32 +44,29 @@ class Gaussian(NoiseDistribution): :param v_i: mean/variance of the cavity distribution (float) """ sigma2_hat = 1./(1./self.variance + tau_i) - mu_hat = sigma2_hat*(self.mean/self.variance + v_i) - Z_hat = np.sqrt(2*np.pi*sigma2_hat)*np.exp(-.5*(self.mean - v_i/tau_i)**2/(self.variance + 1./tau_i)) #TODO check + mu_hat = sigma2_hat*(data_i/self.variance + v_i) + sum_var = self.variance + 1./tau_i + Z_hat = 1./np.sqrt(2.*np.pi*sum_var)*np.exp(-.5*(data_i - v_i/tau_i)**2./sum_var) return Z_hat, mu_hat, sigma2_hat def _predictive_mean_analytical(self,mu,sigma): - new_sigma2 = 1./(1./self.variance + 1./sigma**2) - return new_sigma2*(mu/sigma + self.mean/self.variance) + new_sigma2 = self.predictive_variance(mu,sigma) + return new_sigma2*(mu/sigma**2 + self.gp_link.transf(mu)/self.variance) + + def _predictive_variance_analytical(self,mu,sigma,*args): #TODO *args? + return 1./(1./self.variance + 1./sigma**2) def _mass(self,gp,obs): - p = (self.gp_link.transf(gp)-self.mean)/np.sqrt(self.variance) - return std_norm_pdf(p) + return std_norm_pdf( (self.gp_link.transf(gp)-obs)/np.sqrt(self.variance) ) def _nlog_mass(self,gp,obs): - p = (self.gp_link.transf(gp)-self.mean)/np.sqrt(self.variance) - return .5*np.log(2*np.pi*self.variance) + .5*(p-self.mean)**2/self.variance + return .5*((self.gp_link.transf(gp)-obs)**2/np.sqrt(self.variance) + np.log(2*np.pi*self.variance)) def _dnlog_mass_dgp(self,gp,obs): - p = (self.gp_link.transf(gp)-self.mean)/np.sqrt(self.variance) - dp = self.gp_link.dtransf_df(gp) - return (p - self.mean)/self.variance * dp + return (self.gp_link.transf(gp)-obs)/np.sqrt(self.variance) * self.gp_link.dtransf_df(gp) def _d2nlog_mass_dgp2(self,gp,obs): - p = (self.gp_link.transf(gp)-self.mean)/np.sqrt(self.variance) - dp = self.gp_link.dtransf_df(gp) - d2p = self.gp_link.d2transf_df2(gp) - return dp**2/self.variance + (p - self.mean)/self.variance * d2p + return ((self.gp_link.transf(gp)-obs)*self.gp_link.d2transf_df2(gp) + self.gp_link.dtransf_df(gp)**2)/self.variance def _mean(self,gp): """ @@ -79,11 +84,10 @@ class Gaussian(NoiseDistribution): """ Mass (or density) function """ - p = self.gp_link.transf(gp) - return p*(1-p) + return self.variance def _dvariance_dgp(self,gp): - return self.gp_link.dtransf_df(gp)*(1. - 2.*self.gp_link.transf(gp)) + return 0 def _d2variance_dgp2(self,gp): - return self.gp_link.d2transf_df2(gp)*(1. - 2.*self.gp_link.transf(gp)) - 2*self.gp_link.dtransf_df(gp)**2 + return 0 diff --git a/GPy/likelihoods/noise_models/noise_distributions.py b/GPy/likelihoods/noise_models/noise_distributions.py index 0dc9e03c..45f92950 100644 --- a/GPy/likelihoods/noise_models/noise_distributions.py +++ b/GPy/likelihoods/noise_models/noise_distributions.py @@ -18,16 +18,30 @@ class NoiseDistribution(object): :param Y: observed output (Nx1 numpy.darray) ..Note:: Y values allowed depend on the LikelihoodFunction used """ - def __init__(self,gp_link,analytical_moments=False): + def __init__(self,gp_link,analytical_mean=False,analytical_variance=False): #assert isinstance(gp_link,gp_transformations.GPTransformation), "gp_link is not a valid GPTransformation."#FIXME self.gp_link = gp_link - self.analytical_moments = analytical_moments - if self.analytical_moments: + self.analytical_mean = analytical_mean + self.analytical_variance = analytical_variance + if self.analytical_mean: self.moments_match = self._moments_match_analytical self.predictive_mean = self._predictive_mean_analytical else: self.moments_match = self._moments_match_numerical self.predictive_mean = self._predictive_mean_numerical + if self.analytical_variance: + self.predictive_variance = self._predictive_variance_analytical + else: + self.predictive_variance = self._predictive_variance_numerical + + def _get_params(self): + return np.zeros(0) + + def _get_param_names(self): + return [] + + def _set_params(self,p): + pass def _preprocess_values(self,Y): """ @@ -214,6 +228,12 @@ class NoiseDistribution(object): """ pass + def _predictive_variance_analytical(self,mu,sigma): + """ + If available, this function computes the predictive variance analytically. + """ + pass + def _predictive_mean_numerical(self,mu,sigma): """ Laplace approximation to the predictive mean: E(Y_star) = E( E(Y_star|f_star) ) @@ -248,7 +268,7 @@ class NoiseDistribution(object): mean_squared = np.exp(-self._nlog_exp_conditional_mean_sq_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_mean_sq_dgp2(maximum,mu,sigma))*sigma) return mean_squared - def predictive_variance(self,mu,sigma,predictive_mean=None): + def _predictive_variance_numerical(self,mu,sigma,predictive_mean=None): """ Laplace approximation to the predictive variance: V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) ) diff --git a/GPy/likelihoods/noise_models/poisson_noise.py b/GPy/likelihoods/noise_models/poisson_noise.py index e90b3ce8..f7dd93b9 100644 --- a/GPy/likelihoods/noise_models/poisson_noise.py +++ b/GPy/likelihoods/noise_models/poisson_noise.py @@ -18,12 +18,12 @@ class Poisson(NoiseDistribution): L(x) = \exp(\lambda) * \lambda**Y_i / Y_i! $$ """ - def __init__(self,gp_link=None,analytical_moments=False): + def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False): #self.discrete = True #self.support_limits = (0,np.inf) - #self.analytical_moments = False - super(Poisson, self).__init__(gp_link,analytical_moments) + #self.analytical_mean = False + super(Poisson, self).__init__(gp_link,analytical_mean,analytical_variance) def _preprocess_values(self,Y): #TODO #self.scale = .5*Y.max() From 5b433a3f73504604240b4425b699987d53a9408e Mon Sep 17 00:00:00 2001 From: Ricardo Date: Mon, 15 Jul 2013 18:09:26 +0100 Subject: [PATCH 13/49] random changes --- GPy/core/gp.py | 7 ++++++- GPy/likelihoods/ep.py | 3 ++- GPy/likelihoods/noise_models/gaussian_noise.py | 11 ++++++++--- GPy/likelihoods/noise_models/noise_distributions.py | 3 +++ 4 files changed, 19 insertions(+), 5 deletions(-) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 5172d9e7..8d52a984 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -98,7 +98,12 @@ class GP(GPBase): Note, we use the chain rule: dL_dtheta = dL_dK * d_K_dtheta """ - return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK)))) + #return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK)))) + if not isinstance(self.likelihood,EP): + tmp = np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK)))) + else: + tmp = np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK)))) + return tmp def _raw_predict(self, _Xnew, which_parts='all', full_cov=False,stop=False): """ diff --git a/GPy/likelihoods/ep.py b/GPy/likelihoods/ep.py index 50df599a..aaa03938 100644 --- a/GPy/likelihoods/ep.py +++ b/GPy/likelihoods/ep.py @@ -77,7 +77,8 @@ class EP(likelihood): self.noise_model._set_params(p) def _gradients(self,partial): - return np.zeros(0) # TODO: the EP likelihood might want to take some parameters... + #return np.zeros(0) # TODO: the EP likelihood might want to take some parameters... + return self.noise_model._gradients(partial) def _compute_GP_variables(self): #Variables to be called from GP diff --git a/GPy/likelihoods/noise_models/gaussian_noise.py b/GPy/likelihoods/noise_models/gaussian_noise.py index 389363a3..40db423c 100644 --- a/GPy/likelihoods/noise_models/gaussian_noise.py +++ b/GPy/likelihoods/noise_models/gaussian_noise.py @@ -28,6 +28,10 @@ class Gaussian(NoiseDistribution): def _set_params(self,p): self.variance = p + def _gradients(self,partial): + return np.zeros(1) + #return np.sum(partial) + def _preprocess_values(self,Y): """ Check if the values of the observations correspond to the values @@ -57,13 +61,14 @@ class Gaussian(NoiseDistribution): return 1./(1./self.variance + 1./sigma**2) def _mass(self,gp,obs): - return std_norm_pdf( (self.gp_link.transf(gp)-obs)/np.sqrt(self.variance) ) + #return std_norm_pdf( (self.gp_link.transf(gp)-obs)/np.sqrt(self.variance) ) + return stats.norm.pdf(obs,self.gp_link.transf(gp),np.sqrt(self.variance)) #FIXME def _nlog_mass(self,gp,obs): - return .5*((self.gp_link.transf(gp)-obs)**2/np.sqrt(self.variance) + np.log(2*np.pi*self.variance)) + return .5*((self.gp_link.transf(gp)-obs)**2/self.variance + np.log(2.*np.pi*self.variance)) def _dnlog_mass_dgp(self,gp,obs): - return (self.gp_link.transf(gp)-obs)/np.sqrt(self.variance) * self.gp_link.dtransf_df(gp) + return (self.gp_link.transf(gp)-obs)/self.variance * self.gp_link.dtransf_df(gp) def _d2nlog_mass_dgp2(self,gp,obs): return ((self.gp_link.transf(gp)-obs)*self.gp_link.d2transf_df2(gp) + self.gp_link.dtransf_df(gp)**2)/self.variance diff --git a/GPy/likelihoods/noise_models/noise_distributions.py b/GPy/likelihoods/noise_models/noise_distributions.py index 45f92950..bc4d89d6 100644 --- a/GPy/likelihoods/noise_models/noise_distributions.py +++ b/GPy/likelihoods/noise_models/noise_distributions.py @@ -43,6 +43,9 @@ class NoiseDistribution(object): def _set_params(self,p): pass + def _gradients(self,partial): + return np.zeros(0) + def _preprocess_values(self,Y): """ In case it is needed, this function assess the output values or makes any pertinent transformation on them. From ddf64629ae9977c13569c6e608d5f7d45862929c Mon Sep 17 00:00:00 2001 From: Ricardo Date: Tue, 16 Jul 2013 18:38:13 +0100 Subject: [PATCH 14/49] gamma noise added --- GPy/likelihoods/noise_model_constructors.py | 20 +++++- GPy/likelihoods/noise_models/__init__.py | 1 + .../noise_models/binomial_noise.py | 2 +- GPy/likelihoods/noise_models/gamma_noise.py | 71 +++++++++++++++++++ GPy/likelihoods/noise_models/poisson_noise.py | 6 -- 5 files changed, 91 insertions(+), 9 deletions(-) create mode 100644 GPy/likelihoods/noise_models/gamma_noise.py diff --git a/GPy/likelihoods/noise_model_constructors.py b/GPy/likelihoods/noise_model_constructors.py index f64fb235..cc205c6d 100644 --- a/GPy/likelihoods/noise_model_constructors.py +++ b/GPy/likelihoods/noise_model_constructors.py @@ -27,14 +27,15 @@ def gaussian(gp_link=None,variance=1.): Construct a gaussian likelihood :param gp_link: a GPy gp_link function + :param variance: scalar """ if gp_link is None: gp_link = noise_models.gp_transformations.Identity() #else: # assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.' - analytical_mean = True - analytical_variance = True + analytical_mean = False + analytical_variance = False return noise_models.gaussian_noise.Gaussian(gp_link,analytical_mean,analytical_variance,variance) def poisson(gp_link=None): @@ -50,3 +51,18 @@ def poisson(gp_link=None): analytical_mean = False analytical_variance = False return noise_models.poisson_noise.Poisson(gp_link,analytical_mean,analytical_variance) + +def gamma(gp_link=None,beta=1.): + """ + Construct a Gamma likelihood + + :param gp_link: a GPy gp_link function + :param beta: scalar + """ + if gp_link is None: + gp_link = noise_models.gp_transformations.Log_ex_1() + analytical_mean = False + analytical_variance = False + return noise_models.gamma_noise.Gamma(gp_link,analytical_mean,analytical_variance,beta) + + diff --git a/GPy/likelihoods/noise_models/__init__.py b/GPy/likelihoods/noise_models/__init__.py index b16f8fbd..65a94e1e 100644 --- a/GPy/likelihoods/noise_models/__init__.py +++ b/GPy/likelihoods/noise_models/__init__.py @@ -1,5 +1,6 @@ import noise_distributions import binomial_noise import gaussian_noise +import gamma_noise import poisson_noise import gp_transformations diff --git a/GPy/likelihoods/noise_models/binomial_noise.py b/GPy/likelihoods/noise_models/binomial_noise.py index 200625d9..e47d9211 100644 --- a/GPy/likelihoods/noise_models/binomial_noise.py +++ b/GPy/likelihoods/noise_models/binomial_noise.py @@ -87,7 +87,7 @@ class Binomial(NoiseDistribution): Mass (or density) function """ p = self.gp_link.transf(gp) - return p*(1-p) + return p*(1.-p) def _dvariance_dgp(self,gp): return self.gp_link.dtransf_df(gp)*(1. - 2.*self.gp_link.transf(gp)) diff --git a/GPy/likelihoods/noise_models/gamma_noise.py b/GPy/likelihoods/noise_models/gamma_noise.py new file mode 100644 index 00000000..6bf0dd7b --- /dev/null +++ b/GPy/likelihoods/noise_models/gamma_noise.py @@ -0,0 +1,71 @@ +# Copyright (c) 2012, 2013 Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from scipy import stats,special +import scipy as sp +from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf +import gp_transformations +from noise_distributions import NoiseDistribution + +class Gamma(NoiseDistribution): + """ + Gamma likelihood + Y is expected to take values in {0,1,2,...} + ----- + $$ + L(x) = \exp(\lambda) * \lambda**Y_i / Y_i! + $$ + """ + def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,beta=1.): + self.beta = beta + super(Gamma, self).__init__(gp_link,analytical_mean,analytical_variance) + + def _preprocess_values(self,Y): + return Y + + def _mass(self,gp,obs): + """ + Mass (or density) function + """ + #return stats.gamma.pdf(obs,a = self.gp_link.transf(gp)/self.variance,scale=self.variance) + alpha = self.gp_link.transf(gp)*self.beta + return obs**(alpha - 1.) * np.exp(-self.beta*obs) * self.beta**alpha / special.gamma(alpha) + + def _nlog_mass(self,gp,obs): + """ + Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted + """ + alpha = self.gp_link.transf(gp)*self.beta + return (1. - alpha)*np.log(obs) + self.beta*obs - alpha * np.log(self.beta) + np.log(special.gamma(alpha)) + + def _dnlog_mass_dgp(self,gp,obs): + return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta + + def _d2nlog_mass_dgp2(self,gp,obs): + return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta + + def _mean(self,gp): + """ + Mass (or density) function + """ + return self.gp_link.transf(gp) + + def _dmean_dgp(self,gp): + return self.gp_link.dtransf_df(gp) + + def _d2mean_dgp2(self,gp): + return self.gp_link.d2transf_df2(gp) + + def _variance(self,gp): + """ + Mass (or density) function + """ + return self.gp_link.transf(gp)/self.beta + + def _dvariance_dgp(self,gp): + return self.gp_link.dtransf_df(gp)/self.beta + + def _d2variance_dgp2(self,gp): + return self.gp_link.d2transf_df2(gp)/self.beta diff --git a/GPy/likelihoods/noise_models/poisson_noise.py b/GPy/likelihoods/noise_models/poisson_noise.py index f7dd93b9..e4ce90d3 100644 --- a/GPy/likelihoods/noise_models/poisson_noise.py +++ b/GPy/likelihoods/noise_models/poisson_noise.py @@ -66,9 +66,6 @@ class Poisson(NoiseDistribution): """ return self.gp_link.transf(gp) - #def _variance(self,gp): - # return self.gp_link.transf(gp) - def _dmean_dgp(self,gp): return self.gp_link.dtransf_df(gp) @@ -81,9 +78,6 @@ class Poisson(NoiseDistribution): """ return self.gp_link.transf(gp) - #def _variance(self,gp): - # return self.gp_link.transf(gp) - def _dvariance_dgp(self,gp): return self.gp_link.dtransf_df(gp) From 70c44b2cdd95e81b5b675724bc2e797399b0a413 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Thu, 18 Jul 2013 18:49:26 +0100 Subject: [PATCH 15/49] Multioutput is working --- GPy/core/gp.py | 35 +- GPy/core/gp_base.py | 29 +- GPy/core/model.py | 2 +- GPy/likelihoods/__init__.py | 1 + GPy/likelihoods/ep.py | 125 +----- GPy/likelihoods/ep_mixed_noise.py | 372 ++++++++++++++++++ GPy/likelihoods/noise_model_constructors.py | 13 + GPy/likelihoods/noise_models/__init__.py | 1 + .../noise_models/exponential_noise.py | 68 ++++ .../noise_models/gaussian_noise.py | 2 +- .../noise_models/gp_transformations.py | 12 + .../noise_models/noise_distributions.py | 2 +- GPy/models/__init__.py | 1 + GPy/models/gp_classification.py | 5 +- GPy/models/gp_multioutput.py | 56 +++ 15 files changed, 598 insertions(+), 126 deletions(-) create mode 100644 GPy/likelihoods/ep_mixed_noise.py create mode 100644 GPy/likelihoods/noise_models/exponential_noise.py create mode 100644 GPy/models/gp_multioutput.py diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 8d52a984..607fdc5b 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -7,7 +7,7 @@ import pylab as pb from .. import kern from ..util.linalg import pdinv, mdot, tdot, dpotrs, dtrtrs #from ..util.plot import gpplot, Tango -from ..likelihoods import EP +from ..likelihoods import EP,EP_Mixed_Noise from gp_base import GPBase class GP(GPBase): @@ -151,5 +151,36 @@ class GP(GPBase): # now push through likelihood mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov) - + return mean, var, _025pm, _975pm + + def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False): + """ + Predict the function(s) at the new point(s) Xnew. + Arguments + --------- + :param Xnew: The points at which to make a prediction + :type Xnew: np.ndarray, Nnew x self.input_dim + :param which_parts: specifies which outputs kernel(s) to use in prediction + :type which_parts: ('all', list of bools) + :param full_cov: whether to return the folll covariance matrix, or just the diagonal + :type full_cov: bool + :rtype: posterior mean, a Numpy array, Nnew x self.input_dim + :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise + :rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim + + + If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew. + This is to allow for different normalizations of the output dimensions. + + """ + assert isinstance(self.likelihood,EP_Mixed_Noise) + index = np.ones_like(Xnew)*output + Xnew = np.hstack((Xnew,index)) + + # normalize X values + Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale + mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts) + + # now push through likelihood + mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output) return mean, var, _025pm, _975pm diff --git a/GPy/core/gp_base.py b/GPy/core/gp_base.py index b82f3298..609fc500 100644 --- a/GPy/core/gp_base.py +++ b/GPy/core/gp_base.py @@ -3,6 +3,7 @@ from .. import kern from ..util.plot import gpplot, Tango, x_frame1D, x_frame2D import pylab as pb from GPy.core.model import Model +from GPy.likelihoods.ep_mixed_noise import EP_Mixed_Noise class GPBase(Model): """ @@ -91,7 +92,7 @@ class GPBase(Model): else: raise NotImplementedError, "Cannot define a frame with more than two input dimensions" - def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None): + def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, output=None): """ TODO: Docstrings! @@ -106,7 +107,7 @@ class GPBase(Model): fig = pb.figure(num=fignum) ax = fig.add_subplot(111) - if self.X.shape[1] == 1: + if self.X.shape[1] == 1 and not isinstance(self.likelihood,EP_Mixed_Noise): Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now @@ -120,7 +121,7 @@ class GPBase(Model): ax.set_xlim(xmin, xmax) ax.set_ylim(ymin, ymax) - elif self.X.shape[1] == 2: # FIXME + elif self.X.shape[1] == 2 and not isinstance(self.likelihood,EP_Mixed_Noise): # FIXME resolution = resolution or 50 Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution) x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution) @@ -132,5 +133,27 @@ class GPBase(Model): ax.set_xlim(xmin[0], xmax[0]) ax.set_ylim(xmin[1], xmax[1]) + elif self.X.shape[1] == 2 and isinstance(self.likelihood,EP_Mixed_Noise): + Xu = self.X[self.X[:,-1]==output,:] + Xu = self.X * self._Xscale + self._Xoffset + Xu = self.X[self.X[:,-1]==output ,0:1] + + Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits) + m, _, lower, upper = self.predict_single_output(Xnew, which_parts=which_parts,output=output) + for d in range(m.shape[1]): + gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax) + #ax.plot(Xu[which_data], self.likelihood.data[which_data, d], 'kx', mew=1.5) + ax.plot(Xu[which_data], self.likelihood.data[self.likelihood.index==output][:,None], 'kx', mew=1.5) + ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper)) + ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) + ax.set_xlim(xmin, xmax) + ax.set_ylim(ymin, ymax) + + else: raise NotImplementedError, "Cannot define a frame with more than two input dimensions" + + + + + diff --git a/GPy/core/model.py b/GPy/core/model.py index 05375b2a..f67adc1c 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -480,7 +480,7 @@ class Model(Parameterised): :type optimzer: string TODO: valid strings? """ - assert isinstance(self.likelihood, likelihoods.EP), "pseudo_EM is only available for EP likelihoods" + assert isinstance(self.likelihood, likelihoods.EP) or isinstance(self.likelihood, likelihoods.EP_Mixed_Noise), "pseudo_EM is only available for EP likelihoods" ll_change = epsilon + 1. iteration = 0 last_ll = -np.inf diff --git a/GPy/likelihoods/__init__.py b/GPy/likelihoods/__init__.py index 3e6a28d3..55f437b1 100644 --- a/GPy/likelihoods/__init__.py +++ b/GPy/likelihoods/__init__.py @@ -1,4 +1,5 @@ from ep import EP +from ep_mixed_noise import EP_Mixed_Noise from gaussian import Gaussian from noise_model_constructors import * # TODO: from Laplace import Laplace diff --git a/GPy/likelihoods/ep.py b/GPy/likelihoods/ep.py index aaa03938..c9f23839 100644 --- a/GPy/likelihoods/ep.py +++ b/GPy/likelihoods/ep.py @@ -24,18 +24,9 @@ class EP(likelihood): #Initial values - Likelihood approximation parameters: #p(y|f) = t(f|tau_tilde,v_tilde) - #TODO restore self.tau_tilde = np.zeros(self.N) self.v_tilde = np.zeros(self.N) - #_gp = self.noise_model.gp_link.transf(self.data) - #_mean = self.noise_model._mean(_gp) - #_variance = self.noise_model._variance(_gp) - #self.tau_tilde = 1./_variance - #self.tau_tilde[_variance== 0] = 1. - #self.v_tilde = _mean*self.tau_tilde - - #initial values for the GP variables self.Y = np.zeros((self.N,1)) self.covariance_matrix = np.eye(self.N) @@ -47,17 +38,16 @@ class EP(likelihood): self.trYYT = 0. def restart(self): - #FIXME self.tau_tilde = np.zeros(self.N) self.v_tilde = np.zeros(self.N) - #self.Y = np.zeros((self.N,1)) - #self.covariance_matrix = np.eye(self.N) - #self.precision = np.ones(self.N)[:,None] - #self.Z = 0 - #self.YYT = None - #self.V = self.precision * self.Y - #self.VVT_factor = self.V - #self.trYYT = 0. + self.Y = np.zeros((self.N,1)) + self.covariance_matrix = np.eye(self.N) + self.precision = np.ones(self.N)[:,None] + self.Z = 0 + self.YYT = None + self.V = self.precision * self.Y + self.VVT_factor = self.V + self.trYYT = 0. def predictive_values(self,mu,var,full_cov): if full_cov: @@ -95,8 +85,6 @@ class EP(likelihood): self.VVT_factor = self.V self.trYYT = np.trace(self.YYT) - #a = kjkjkjkj - def fit_full(self,K): """ The expectation-propagation algorithm. @@ -136,103 +124,15 @@ class EP(likelihood): self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i] #Marginal moments self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) - - - #DELETE - """ - import pylab as pb - from scipy import stats - import scipy as sp - import gp_transformations - from constructors import * - - gp_link = gp_transformations.Log_ex_1() - distribution = poisson(gp_link=gp_link) - gp = np.linspace(-3,50,100) - #distribution = binomial() - #gp = np.linspace(-3,3,100) - - y = self._transf_data[i] - tau_ = self.tau_[i] - v_ = self.v_[i] - sigma2_ = np.sqrt(1./tau_) - mu_ = v_/tau_ - - gaussian = stats.norm.pdf(gp,loc=mu_,scale=np.sqrt(sigma2_)) - non_gaussian = np.array([distribution._mass(gp_i,y) for gp_i in gp]) - prod = np.array([distribution._product(gp_i,y,mu_,np.sqrt(sigma2_)) for gp_i in gp]) - my_Z_hat,my_mu_hat,my_sigma2_hat = distribution.moments_match(y,tau_,v_) - proxy = stats.norm.pdf(gp,loc=my_mu_hat,scale=np.sqrt(my_sigma2_hat)) - - - new_sigma2_tilde = 1./self.tau_tilde[i] - new_mu_tilde = self.v_tilde[i]/self.tau_tilde[i] - new_Z_tilde = self.Z_hat[i]*np.sqrt(2*np.pi)*np.sqrt(sigma2_+new_sigma2_tilde)*np.exp(.5*(mu_-new_mu_tilde)**2/(sigma2_+new_sigma2_tilde)) - bad_gaussian = stats.norm.pdf(gp,self.v_tilde[i]/self.tau_tilde[i],np.sqrt(1./self.tau_tilde[i])) - new_gaussian = stats.norm.pdf(gp,new_mu_tilde,np.sqrt(new_sigma2_tilde))*new_Z_tilde - #new_gaussian = stats.norm.pdf(gp,_mu_tilde,np.sqrt(_sigma2_tilde))*_Z_tilde - - _sigma2_tilde = 1./(1./(my_sigma2_hat) - 1./sigma2_) - _mu_tilde = (my_mu_hat/my_sigma2_hat - mu_/sigma2_)*_sigma2_tilde - _Z_tilde = my_Z_hat*np.sqrt(2*np.pi)*np.sqrt(sigma2_+_sigma2_tilde)*np.exp(.5*(mu_ - _mu_tilde)**2/(sigma2_ + _sigma2_tilde)) - - fig1 = pb.figure(figsize=(15,5)) - ax1 = fig1.add_subplot(131) - ax1.grid(True) - #pb.plot(gp,bad_gaussian,'b--',linewidth=1.5) - #pb.plot(gp,non_gaussian,'b-',linewidth=1.5) - pb.plot(gp,new_gaussian,'r--',linewidth=1.5) - pb.title('Likelihood: $p(y_i|f_i)$',fontsize=22) - - ax2 = fig1.add_subplot(132) - ax2.grid(True) - pb.plot(gp,gaussian,'b-',linewidth=1.5) - pb.title('Cavity distribution: $q_{-i}(f_i)$',fontsize=22) - - ax3 = fig1.add_subplot(133) - ax3.grid(True) - pb.plot(gp,prod,'b--',linewidth=1.5) - - pb.plot(gp,proxy*my_Z_hat,'r-',linewidth=1.5) - - pb.title('Approximation: $\mathcal{N}(f_i|\hat{\mu}_i,\hat{\sigma}_i^2) \hat{Z}_i$',fontsize=22) - pb.legend(('Exact','Approximation'),frameon=False) - - print 'i',i - print 'v/tau _tilde', self.v_tilde[i], self.tau_tilde[i] - print 'v/tau _', self.v_[i], self.tau_[i] - print 'Z/mu/sigma2 _hat', self.Z_hat[i], mu_hat[i], sigma2_hat[i] - pb.plot(gp,new_gaussian*gaussian,'k-') - - a = kj - break - """ - #DELETE - - - - #Site parameters update - Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i]) #FIXME - Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i]) #FIXME + Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i]) + Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i]) self.tau_tilde[i] += Delta_tau self.v_tilde[i] += Delta_v - - #new_tau = self.delta/self.eta*(1./sigma2_hat[i] - self.tau_[i]) - #new_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - self.v_[i]) - #Delta_tau = new_tau - self.tau_tilde[i] - #Delta_v = new_v - self.v_tilde[i] - #self.tau_tilde[i] += Delta_tau - #self.v_tilde[i] += Delta_v - #Posterior distribution parameters update DSYR(Sigma,Sigma[:,i].copy(), -float(Delta_tau/(1.+ Delta_tau*Sigma[i,i]))) mu = np.dot(Sigma,self.v_tilde) self.iterations += 1 - - - - #Sigma recomptutation with Cholesky decompositon Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K @@ -245,11 +145,6 @@ class EP(likelihood): self.np1.append(self.tau_tilde.copy()) self.np2.append(self.v_tilde.copy()) - ##DELETE - #pb.vlines(mu[i],0,max(prod)) - #break - #DELETE - return self._compute_GP_variables() def fit_DTC(self, Kmm, Kmn): diff --git a/GPy/likelihoods/ep_mixed_noise.py b/GPy/likelihoods/ep_mixed_noise.py new file mode 100644 index 00000000..24c5498e --- /dev/null +++ b/GPy/likelihoods/ep_mixed_noise.py @@ -0,0 +1,372 @@ +# Copyright (c) 2013, Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from scipy import stats +from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs +from likelihood import likelihood + +class EP_Mixed_Noise(likelihood): + def __init__(self,data_list,noise_model_list,epsilon=1e-3,power_ep=[1.,1.]): + """ + Expectation Propagation + + Arguments + --------- + epsilon : Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float) + noise_model : a likelihood function (see likelihood_functions.py) + """ + assert len(data_list) == len(noise_model_list) + self.noise_model_list = noise_model_list + n_list = [data.size for data in data_list] + n_models = len(data_list) + self.n_params = [noise_model._get_params().size for noise_model in noise_model_list] + self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(n_models),n_list)]) + self.epsilon = epsilon + self.eta, self.delta = power_ep + self.data = np.vstack(data_list) + self.N, self.output_dim = self.data.shape + self.is_heteroscedastic = True + self.Nparams = 0#FIXME + self._transf_data = np.vstack([noise_model._preprocess_values(data) for noise_model,data in zip(noise_model_list,data_list)]) + #TODO non-gaussian index + + #Initial values - Likelihood approximation parameters: + #p(y|f) = t(f|tau_tilde,v_tilde) + self.tau_tilde = np.zeros(self.N) + self.v_tilde = np.zeros(self.N) + + #initial values for the GP variables + self.Y = np.zeros((self.N,1)) + self.covariance_matrix = np.eye(self.N) + self.precision = np.ones(self.N)[:,None] + self.Z = 0 + self.YYT = None + self.V = self.precision * self.Y + self.VVT_factor = self.V + self.trYYT = 0. + + def restart(self): + self.tau_tilde = np.zeros(self.N) + self.v_tilde = np.zeros(self.N) + self.Y = np.zeros((self.N,1)) + self.covariance_matrix = np.eye(self.N) + self.precision = np.ones(self.N)[:,None] + self.Z = 0 + self.YYT = None + self.V = self.precision * self.Y + self.VVT_factor = self.V + self.trYYT = 0. + + def predictive_values(self,mu,var,full_cov,noise_model): + if full_cov: + raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood" + #_mu = [] + #_var = [] + #_q1 = [] + #_q2 = [] + #for m,v,o in zip(mu,var,output.flatten()): + # a,b,c,d = self.noise_model_list[int(o)].predictive_values(m,v) + # _mu.append(a) + # _var.append(b) + # _q1.append(c) + # _q2.append(d) + #return np.vstack(_mu),np.vstack(_var),np.vstack(_q1),np.vstack(_q2) + return self.noise_model_list[noise_model].predictive_values(mu,var) + + def _get_params(self): + return np.hstack([noise_model._get_params().flatten() for noise_model in self.noise_model_list]) + + def _get_param_names(self): + names = [] + for noise_model in self.noise_model_list: + names += noise_model._get_param_names() + return names + + def _set_params(self,p): + cs_params = np.cumsum([0]+self.n_params) + for i in range(len(self.n_params)): + self.noise_model_list[i]._set_params(p[cs_params[i]:cs_params[i+1]]) + + def _gradients(self,partial): + #NOTE this is not tested + return np.hstack([noise_model._gradients(partial) for noise_model in self.noise_model_list]) + + def _compute_GP_variables(self): + #Variables to be called from GP + mu_tilde = self.v_tilde/self.tau_tilde #When calling EP, this variable is used instead of Y in the GP model + sigma_sum = 1./self.tau_ + 1./self.tau_tilde + mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2 + self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep + + self.Y = mu_tilde[:,None] + self.YYT = np.dot(self.Y,self.Y.T) + self.covariance_matrix = np.diag(1./self.tau_tilde) + self.precision = self.tau_tilde[:,None] + self.V = self.precision * self.Y + self.VVT_factor = self.V + self.trYYT = np.trace(self.YYT) + + def fit_full(self,K): + """ + The expectation-propagation algorithm. + For nomenclature see Rasmussen & Williams 2006. + """ + #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma) + mu = np.zeros(self.N) + Sigma = K.copy() + + """ + Initial values - Cavity distribution parameters: + q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)} + sigma_ = 1./tau_ + mu_ = v_/tau_ + """ + self.tau_ = np.empty(self.N,dtype=float) + self.v_ = np.empty(self.N,dtype=float) + + #Initial values - Marginal moments + z = np.empty(self.N,dtype=float) + self.Z_hat = np.empty(self.N,dtype=float) + phi = np.empty(self.N,dtype=float) + mu_hat = np.empty(self.N,dtype=float) + sigma2_hat = np.empty(self.N,dtype=float) + + #Approximation + epsilon_np1 = self.epsilon + 1. + epsilon_np2 = self.epsilon + 1. + self.iterations = 0 + self.np1 = [self.tau_tilde.copy()] + self.np2 = [self.v_tilde.copy()] + while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon: + update_order = np.random.permutation(self.N) + for i in update_order: + #Cavity distribution parameters + self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i] + self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i] + #Marginal moments + self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model_list[self.index[i]].moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) + #Site parameters update + Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i]) + Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i]) + self.tau_tilde[i] += Delta_tau + self.v_tilde[i] += Delta_v + #Posterior distribution parameters update + DSYR(Sigma,Sigma[:,i].copy(), -float(Delta_tau/(1.+ Delta_tau*Sigma[i,i]))) + mu = np.dot(Sigma,self.v_tilde) + self.iterations += 1 + #Sigma recomptutation with Cholesky decompositon + Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K + B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K + L = jitchol(B) + V,info = dtrtrs(L,Sroot_tilde_K,lower=1) + Sigma = K - np.dot(V.T,V) + mu = np.dot(Sigma,self.v_tilde) + epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N + epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N + self.np1.append(self.tau_tilde.copy()) + self.np2.append(self.v_tilde.copy()) + + return self._compute_GP_variables() + + def fit_DTC(self, Kmm, Kmn): + """ + The expectation-propagation algorithm with sparse pseudo-input. + For nomenclature see ... 2013. + """ + num_inducing = Kmm.shape[0] + + #TODO: this doesn't work with uncertain inputs! + + """ + Prior approximation parameters: + q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0) + Sigma0 = Qnn = Knm*Kmmi*Kmn + """ + KmnKnm = np.dot(Kmn,Kmn.T) + Lm = jitchol(Kmm) + Lmi = chol_inv(Lm) + Kmmi = np.dot(Lmi.T,Lmi) + KmmiKmn = np.dot(Kmmi,Kmn) + Qnn_diag = np.sum(Kmn*KmmiKmn,-2) + LLT0 = Kmm.copy() + + #Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm) + #KmnKnm = np.dot(Kmn, Kmn.T) + #KmmiKmn = np.dot(Kmmi,Kmn) + #Qnn_diag = np.sum(Kmn*KmmiKmn,-2) + #LLT0 = Kmm.copy() + + """ + Posterior approximation: q(f|y) = N(f| mu, Sigma) + Sigma = Diag + P*R.T*R*P.T + K + mu = w + P*Gamma + """ + mu = np.zeros(self.N) + LLT = Kmm.copy() + Sigma_diag = Qnn_diag.copy() + + """ + Initial values - Cavity distribution parameters: + q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)} + sigma_ = 1./tau_ + mu_ = v_/tau_ + """ + self.tau_ = np.empty(self.N,dtype=float) + self.v_ = np.empty(self.N,dtype=float) + + #Initial values - Marginal moments + z = np.empty(self.N,dtype=float) + self.Z_hat = np.empty(self.N,dtype=float) + phi = np.empty(self.N,dtype=float) + mu_hat = np.empty(self.N,dtype=float) + sigma2_hat = np.empty(self.N,dtype=float) + + #Approximation + epsilon_np1 = 1 + epsilon_np2 = 1 + self.iterations = 0 + np1 = [self.tau_tilde.copy()] + np2 = [self.v_tilde.copy()] + while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon: + update_order = np.random.permutation(self.N) + for i in update_order: + #Cavity distribution parameters + self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i] + self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i] + #Marginal moments + self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) + #Site parameters update + Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) + Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) + self.tau_tilde[i] += Delta_tau + self.v_tilde[i] += Delta_v + #Posterior distribution parameters update + DSYR(LLT,Kmn[:,i].copy(),Delta_tau) #LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau + L = jitchol(LLT) + #cholUpdate(L,Kmn[:,i]*np.sqrt(Delta_tau)) + V,info = dtrtrs(L,Kmn,lower=1) + Sigma_diag = np.sum(V*V,-2) + si = np.sum(V.T*V[:,i],-1) + mu += (Delta_v-Delta_tau*mu[i])*si + self.iterations += 1 + #Sigma recomputation with Cholesky decompositon + LLT = LLT0 + np.dot(Kmn*self.tau_tilde[None,:],Kmn.T) + L = jitchol(LLT) + V,info = dtrtrs(L,Kmn,lower=1) + V2,info = dtrtrs(L.T,V,lower=0) + Sigma_diag = np.sum(V*V,-2) + Knmv_tilde = np.dot(Kmn,self.v_tilde) + mu = np.dot(V2.T,Knmv_tilde) + epsilon_np1 = sum((self.tau_tilde-np1[-1])**2)/self.N + epsilon_np2 = sum((self.v_tilde-np2[-1])**2)/self.N + np1.append(self.tau_tilde.copy()) + np2.append(self.v_tilde.copy()) + + self._compute_GP_variables() + + def fit_FITC(self, Kmm, Kmn, Knn_diag): + """ + The expectation-propagation algorithm with sparse pseudo-input. + For nomenclature see Naish-Guzman and Holden, 2008. + """ + num_inducing = Kmm.shape[0] + + """ + Prior approximation parameters: + q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0) + Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn + """ + Lm = jitchol(Kmm) + Lmi = chol_inv(Lm) + Kmmi = np.dot(Lmi.T,Lmi) + P0 = Kmn.T + KmnKnm = np.dot(P0.T, P0) + KmmiKmn = np.dot(Kmmi,P0.T) + Qnn_diag = np.sum(P0.T*KmmiKmn,-2) + Diag0 = Knn_diag - Qnn_diag + R0 = jitchol(Kmmi).T + + """ + Posterior approximation: q(f|y) = N(f| mu, Sigma) + Sigma = Diag + P*R.T*R*P.T + K + mu = w + P*Gamma + """ + self.w = np.zeros(self.N) + self.Gamma = np.zeros(num_inducing) + mu = np.zeros(self.N) + P = P0.copy() + R = R0.copy() + Diag = Diag0.copy() + Sigma_diag = Knn_diag + RPT0 = np.dot(R0,P0.T) + + """ + Initial values - Cavity distribution parameters: + q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)} + sigma_ = 1./tau_ + mu_ = v_/tau_ + """ + self.tau_ = np.empty(self.N,dtype=float) + self.v_ = np.empty(self.N,dtype=float) + + #Initial values - Marginal moments + z = np.empty(self.N,dtype=float) + self.Z_hat = np.empty(self.N,dtype=float) + phi = np.empty(self.N,dtype=float) + mu_hat = np.empty(self.N,dtype=float) + sigma2_hat = np.empty(self.N,dtype=float) + + #Approximation + epsilon_np1 = 1 + epsilon_np2 = 1 + self.iterations = 0 + self.np1 = [self.tau_tilde.copy()] + self.np2 = [self.v_tilde.copy()] + while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon: + update_order = np.random.permutation(self.N) + for i in update_order: + #Cavity distribution parameters + self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i] + self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i] + #Marginal moments + self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) + #Site parameters update + Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) + Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) + self.tau_tilde[i] += Delta_tau + self.v_tilde[i] += Delta_v + #Posterior distribution parameters update + dtd1 = Delta_tau*Diag[i] + 1. + dii = Diag[i] + Diag[i] = dii - (Delta_tau * dii**2.)/dtd1 + pi_ = P[i,:].reshape(1,num_inducing) + P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_ + Rp_i = np.dot(R,pi_.T) + RTR = np.dot(R.T,np.dot(np.eye(num_inducing) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R)) + R = jitchol(RTR).T + self.w[i] += (Delta_v - Delta_tau*self.w[i])*dii/dtd1 + self.Gamma += (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T) + RPT = np.dot(R,P.T) + Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1) + mu = self.w + np.dot(P,self.Gamma) + self.iterations += 1 + #Sigma recomptutation with Cholesky decompositon + Iplus_Dprod_i = 1./(1.+ Diag0 * self.tau_tilde) + Diag = Diag0 * Iplus_Dprod_i + P = Iplus_Dprod_i[:,None] * P0 + safe_diag = np.where(Diag0 < self.tau_tilde, self.tau_tilde/(1.+Diag0*self.tau_tilde), (1. - Iplus_Dprod_i)/Diag0) + L = jitchol(np.eye(num_inducing) + np.dot(RPT0,safe_diag[:,None]*RPT0.T)) + R,info = dtrtrs(L,R0,lower=1) + RPT = np.dot(R,P.T) + Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1) + self.w = Diag * self.v_tilde + self.Gamma = np.dot(R.T, np.dot(RPT,self.v_tilde)) + mu = self.w + np.dot(P,self.Gamma) + epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N + epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N + self.np1.append(self.tau_tilde.copy()) + self.np2.append(self.v_tilde.copy()) + + return self._compute_GP_variables() diff --git a/GPy/likelihoods/noise_model_constructors.py b/GPy/likelihoods/noise_model_constructors.py index cc205c6d..01f15f71 100644 --- a/GPy/likelihoods/noise_model_constructors.py +++ b/GPy/likelihoods/noise_model_constructors.py @@ -22,6 +22,19 @@ def binomial(gp_link=None): analytical_variance = False return noise_models.binomial_noise.Binomial(gp_link,analytical_mean,analytical_variance) +def exponential(gp_link=None): + """ + Construct a binomial likelihood + + :param gp_link: a GPy gp_link function + """ + if gp_link is None: + gp_link = noise_models.gp_transformations.Identity() + + analytical_mean = False + analytical_variance = False + return noise_models.exponential_noise.Exponential(gp_link,analytical_mean,analytical_variance) + def gaussian(gp_link=None,variance=1.): """ Construct a gaussian likelihood diff --git a/GPy/likelihoods/noise_models/__init__.py b/GPy/likelihoods/noise_models/__init__.py index 65a94e1e..b47702a7 100644 --- a/GPy/likelihoods/noise_models/__init__.py +++ b/GPy/likelihoods/noise_models/__init__.py @@ -1,5 +1,6 @@ import noise_distributions import binomial_noise +import exponential_noise import gaussian_noise import gamma_noise import poisson_noise diff --git a/GPy/likelihoods/noise_models/exponential_noise.py b/GPy/likelihoods/noise_models/exponential_noise.py new file mode 100644 index 00000000..e72b8c22 --- /dev/null +++ b/GPy/likelihoods/noise_models/exponential_noise.py @@ -0,0 +1,68 @@ +# Copyright (c) 2012, 2013 Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from scipy import stats,special +import scipy as sp +from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf +import gp_transformations +from noise_distributions import NoiseDistribution + +class Exponential(NoiseDistribution): + """ + Gamma likelihood + Y is expected to take values in {0,1,2,...} + ----- + $$ + L(x) = \exp(\lambda) * \lambda**Y_i / Y_i! + $$ + """ + def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False): + super(Exponential, self).__init__(gp_link,analytical_mean,analytical_variance) + + def _preprocess_values(self,Y): + return Y + + def _mass(self,gp,obs): + """ + Mass (or density) function + """ + return np.exp(-obs/self.gp_link.transf(gp))/self.gp_link.transf(gp) + + def _nlog_mass(self,gp,obs): + """ + Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted + """ + return obs/self.gp_link.transf(gp) + np.log(self.gp_link.transf(gp)) + + def _dnlog_mass_dgp(self,gp,obs): + return ( 1./self.gp_link.transf(gp) - obs/self.gp_link.transf(gp)**2) * self.gp_link.dtransf_df(gp) + + def _d2nlog_mass_dgp2(self,gp,obs): + fgp = self.gp_link.transf(gp) + return (2*obs/fgp**3 - 1./fgp**2) * self.gp_link.dtransf_df(gp)**2 + ( 1./fgp - obs/fgp**2) * self.gp_link.d2transf_df2(gp) + + def _mean(self,gp): + """ + Mass (or density) function + """ + return self.gp_link.transf(gp) + + def _dmean_dgp(self,gp): + return self.gp_link.dtransf_df(gp) + + def _d2mean_dgp2(self,gp): + return self.gp_link.d2transf_df2(gp) + + def _variance(self,gp): + """ + Mass (or density) function + """ + return self.gp_link.transf(gp)**2 + + def _dvariance_dgp(self,gp): + return 2*self.gp_link.transf(gp)*self.gp_link.dtransf_df(gp) + + def _d2variance_dgp2(self,gp): + return 2 * (self.gp_link.dtransf_df(gp)**2 + self.gp_link.transf(gp)*self.gp_link.d2transf_df2(gp)) diff --git a/GPy/likelihoods/noise_models/gaussian_noise.py b/GPy/likelihoods/noise_models/gaussian_noise.py index 40db423c..398ed32a 100644 --- a/GPy/likelihoods/noise_models/gaussian_noise.py +++ b/GPy/likelihoods/noise_models/gaussian_noise.py @@ -20,7 +20,7 @@ class Gaussian(NoiseDistribution): super(Gaussian, self).__init__(gp_link,analytical_mean,analytical_variance) def _get_params(self): - return self.variance + return np.array([self.variance]) def _get_param_names(self): return ['noise_model_variance'] diff --git a/GPy/likelihoods/noise_models/gp_transformations.py b/GPy/likelihoods/noise_models/gp_transformations.py index b81e88e1..5cb08e8e 100644 --- a/GPy/likelihoods/noise_models/gp_transformations.py +++ b/GPy/likelihoods/noise_models/gp_transformations.py @@ -97,3 +97,15 @@ class Log_ex_1(GPTransformation): def d2transf_df2(self,f): aux = np.exp(f)/(1.+np.exp(f)) return aux*(1.-aux) + +class Reciprocal(GPTransformation): + def transf(sefl,f): + return 1./f + + def dtransf_df(self,f): + return -1./f**2 + + def d2transf_df2(self,f): + return 2./f**3 + + diff --git a/GPy/likelihoods/noise_models/noise_distributions.py b/GPy/likelihoods/noise_models/noise_distributions.py index bc4d89d6..9d4eedfb 100644 --- a/GPy/likelihoods/noise_models/noise_distributions.py +++ b/GPy/likelihoods/noise_models/noise_distributions.py @@ -359,7 +359,7 @@ class NoiseDistribution(object): """ return sp.optimize.fmin_ncg(self._nlog_joint_predictive_scaled,x0=(mu,self.gp_link.transf(mu)),fprime=self._gradient_nlog_joint_predictive,fhess=self._hessian_nlog_joint_predictive,args=(mu,sigma)) - def predictive_values(self,mu,var,sample=True,sample_size=5000): + def predictive_values(self,mu,var): """ Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction :param mu: mean of the latent variable diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py index 885372a1..093456b6 100644 --- a/GPy/models/__init__.py +++ b/GPy/models/__init__.py @@ -11,3 +11,4 @@ from gplvm import GPLVM from warped_gp import WarpedGP from bayesian_gplvm import BayesianGPLVM from mrd import MRD +from gp_multioutput import GPMultioutput diff --git a/GPy/models/gp_classification.py b/GPy/models/gp_classification.py index d1cf2e00..73d492fe 100644 --- a/GPy/models/gp_classification.py +++ b/GPy/models/gp_classification.py @@ -31,9 +31,8 @@ class GPClassification(GP): kernel = kern.rbf(X.shape[1]) if likelihood is None: - #distribution = GPy.likelihoods.binomial_likelihood.Binomial(link=link) - distribution = likelihoods.binomial() - likelihood = likelihoods.EP(Y, distribution) + noise_model = likelihoods.binomial() + likelihood = likelihoods.EP(Y, noise_model) elif Y is not None: if not all(Y.flatten() == likelihood.data.flatten()): raise Warning, 'likelihood.data and Y are different.' diff --git a/GPy/models/gp_multioutput.py b/GPy/models/gp_multioutput.py new file mode 100644 index 00000000..72427f43 --- /dev/null +++ b/GPy/models/gp_multioutput.py @@ -0,0 +1,56 @@ +# Copyright (c) 2013, Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from ..core import GP +from .. import likelihoods +from .. import kern + + +import pylab as pb + +class GPMultioutput(GP): + """ + Multiple output Gaussian process + + This is a thin wrapper around the models.GP class, with a set of sensible defaults + + :param X_list: input observations + :param Y_list: observed values + :param L_list: a GPy likelihood, defaults to Binomial with probit link_function + :param kernel: a GPy kernel, defaults to rbf + :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) + :type normalize_X: False|True + :param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales) + :type normalize_Y: False|True + + .. Note:: Multiple independent outputs are allowed using columns of Y + + """ + + def __init__(self,X_list,Y_list=None,likelihood=None,kernel=None,normalize_X=False,normalize_Y=False,W=1): + + if likelihood is None: + noise_model_list = [likelihoods.gaussian(variance=1.) for Y in Y_list] + likelihood = likelihoods.EP_Mixed_Noise(Y_list, noise_model_list) + + elif Y_list is not None: + if not all(np.vstack(Y_list).flatten() == likelihood.data.flatten()): + raise Warning, 'likelihood.data and Y_list values are different.' + + X = np.hstack([np.vstack(X_list),likelihood.index]) + + if kernel is None: + original_dim = X.shape[1]-1 + kernel = kern.rbf(original_dim) + kern.white(original_dim) + + mkernel = kernel.prod(kern.coregionalise(len(X_list),W),tensor=True) #TODO W + + #kern1 = kern.rbf(1) + kern.white(1) + #kern2 = kern.coregionalise(2,1) + #kern3 = kern1.prod(kern2,tensor=True) + + + GP.__init__(self, X, likelihood, mkernel, normalize_X=normalize_X) + self.ensure_default_constraints() From 7e1e8de5e4f0ededee30109706903bcfa10f69e0 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Mon, 22 Jul 2013 10:41:12 +0100 Subject: [PATCH 16/49] Plots for multiple outputs --- GPy/core/gp.py | 26 ++++++++++++++++++++++++++ GPy/core/gp_base.py | 31 ++++++++++++++++++++++++++++--- 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 607fdc5b..ce2ca0b8 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -184,3 +184,29 @@ class GP(GPBase): # now push through likelihood mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output) return mean, var, _025pm, _975pm + + def _raw_predict_single_output(self, _Xnew, output=0, which_parts='all', full_cov=False,stop=False): + """ + Internal helper function for making predictions, does not account + for normalization or likelihood + """ + assert isinstance(self.likelihood,EP_Mixed_Noise) + index = np.ones_like(_Xnew)*output + _Xnew = np.hstack((_Xnew,index)) + + Kx = self.kern.K(_Xnew,self.X,which_parts=which_parts).T + #KiKx = np.dot(self.Ki, Kx) + KiKx, _ = dpotrs(self.L, np.asfortranarray(Kx), lower=1) + mu = np.dot(KiKx.T, self.likelihood.Y) + if full_cov: + Kxx = self.kern.K(_Xnew, which_parts=which_parts) + var = Kxx - np.dot(KiKx.T, Kx) + else: + Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts) + var = Kxx - np.sum(np.multiply(KiKx, Kx), 0) + var = var[:, None] + if stop: + debug_this # @UndefinedVariable + return mu, var + + diff --git a/GPy/core/gp_base.py b/GPy/core/gp_base.py index 609fc500..c3f9d85a 100644 --- a/GPy/core/gp_base.py +++ b/GPy/core/gp_base.py @@ -34,7 +34,7 @@ class GPBase(Model): # All leaf nodes should call self._set_params(self._get_params()) at # the end - def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None): + def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None,output=None): """ Plot the GP's view of the world, where the data is normalized and the likelihood is Gaussian. @@ -62,7 +62,7 @@ class GPBase(Model): fig = pb.figure(num=fignum) ax = fig.add_subplot(111) - if self.X.shape[1] == 1: + if self.X.shape[1] == 1 and not isinstance(self.likelihood,EP_Mixed_Noise): Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits) if samples == 0: m, v = self._raw_predict(Xnew, which_parts=which_parts) @@ -80,7 +80,7 @@ class GPBase(Model): ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) ax.set_ylim(ymin, ymax) - elif self.X.shape[1] == 2: + elif self.X.shape[1] == 2 and not isinstance(self.likelihood,EP_Mixed_Noise): resolution = resolution or 50 Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution) m, v = self._raw_predict(Xnew, which_parts=which_parts) @@ -89,6 +89,31 @@ class GPBase(Model): ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max()) # @UndefinedVariable ax.set_xlim(xmin[0], xmax[0]) ax.set_ylim(xmin[1], xmax[1]) + + + elif self.X.shape[1] == 2 and isinstance(self.likelihood,EP_Mixed_Noise): + Xu = self.X[self.X[:,-1]==output ,0:1] + Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits) + + if samples == 0: + m, v = self._raw_predict_single_output(Xnew, output=output, which_parts=which_parts) + gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax) + #ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5) + ax.plot(Xu[which_data], self.likelihood.Y[self.likelihood.index==output][:,None], 'kx', mew=1.5) + else: + m, v = self._raw_predict_single_output(Xnew, output=output, which_parts=which_parts, full_cov=True) + Ysim = np.random.multivariate_normal(m.flatten(), v, samples) + gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax) + for i in range(samples): + ax.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25) + #ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5) + #ax.plot(Xu[which_data], self.likelihood.Y[self.likelihood.index==output][:,None], 'kx', mew=1.5) + ax.set_xlim(xmin, xmax) + ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None]))) + ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) + ax.set_ylim(ymin, ymax) + + else: raise NotImplementedError, "Cannot define a frame with more than two input dimensions" From 1c2a4c5c645707c0131a768325605427626ae9e7 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Wed, 31 Jul 2013 19:00:54 +0100 Subject: [PATCH 17/49] Changes to allow multiple output plotting --- GPy/core/gp.py | 4 +- GPy/core/gp_base.py | 6 +-- GPy/core/sparse_gp.py | 36 +++++++++++++++- GPy/likelihoods/ep_mixed_noise.py | 17 +++++++- GPy/models/__init__.py | 1 + GPy/models/gp_multioutput.py | 72 +++++++++++++++++++++++-------- 6 files changed, 109 insertions(+), 27 deletions(-) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index ce2ca0b8..bd31edd8 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -190,7 +190,7 @@ class GP(GPBase): Internal helper function for making predictions, does not account for normalization or likelihood """ - assert isinstance(self.likelihood,EP_Mixed_Noise) + assert hasattr(self,'multioutput') index = np.ones_like(_Xnew)*output _Xnew = np.hstack((_Xnew,index)) @@ -208,5 +208,3 @@ class GP(GPBase): if stop: debug_this # @UndefinedVariable return mu, var - - diff --git a/GPy/core/gp_base.py b/GPy/core/gp_base.py index c3f9d85a..8f63e256 100644 --- a/GPy/core/gp_base.py +++ b/GPy/core/gp_base.py @@ -62,7 +62,7 @@ class GPBase(Model): fig = pb.figure(num=fignum) ax = fig.add_subplot(111) - if self.X.shape[1] == 1 and not isinstance(self.likelihood,EP_Mixed_Noise): + if self.X.shape[1] == 1 and not hasattr(self,'multioutput'): Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits) if samples == 0: m, v = self._raw_predict(Xnew, which_parts=which_parts) @@ -80,7 +80,7 @@ class GPBase(Model): ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) ax.set_ylim(ymin, ymax) - elif self.X.shape[1] == 2 and not isinstance(self.likelihood,EP_Mixed_Noise): + elif self.X.shape[1] == 2 and not hasattr(self,'multioutput'): resolution = resolution or 50 Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution) m, v = self._raw_predict(Xnew, which_parts=which_parts) @@ -91,7 +91,7 @@ class GPBase(Model): ax.set_ylim(xmin[1], xmax[1]) - elif self.X.shape[1] == 2 and isinstance(self.likelihood,EP_Mixed_Noise): + elif self.X.shape[1] == 2 and hasattr(self,'multioutput'): Xu = self.X[self.X[:,-1]==output ,0:1] Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits) diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 04119071..04762522 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -5,7 +5,7 @@ import numpy as np import pylab as pb from ..util.linalg import mdot, jitchol, tdot, symmetrify, backsub_both_sides, chol_inv, dtrtrs, dpotrs, dpotri from scipy import linalg -from ..likelihoods import Gaussian +from ..likelihoods import Gaussian, EP,EP_Mixed_Noise from gp_base import GPBase class SparseGP(GPBase): @@ -314,3 +314,37 @@ class SparseGP(GPBase): elif self.X.shape[1] == 2: Zu = self.Z * self._Xscale + self._Xoffset ax.plot(Zu[:, 0], Zu[:, 1], 'wo') + + + def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False): + """ + Predict the function(s) at the new point(s) Xnew. + Arguments + --------- + :param Xnew: The points at which to make a prediction + :type Xnew: np.ndarray, Nnew x self.input_dim + :param which_parts: specifies which outputs kernel(s) to use in prediction + :type which_parts: ('all', list of bools) + :param full_cov: whether to return the folll covariance matrix, or just the diagonal + :type full_cov: bool + :rtype: posterior mean, a Numpy array, Nnew x self.input_dim + :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise + :rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim + + + If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew. + This is to allow for different normalizations of the output dimensions. + + """ + assert isinstance(self.likelihood,EP_Mixed_Noise) + index = np.ones_like(Xnew)*output + Xnew = np.hstack((Xnew,index)) + + # normalize X values + Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale + mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts) + + # now push through likelihood + mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output) + return mean, var, _025pm, _975pm + diff --git a/GPy/likelihoods/ep_mixed_noise.py b/GPy/likelihoods/ep_mixed_noise.py index 24c5498e..150de09c 100644 --- a/GPy/likelihoods/ep_mixed_noise.py +++ b/GPy/likelihoods/ep_mixed_noise.py @@ -14,8 +14,11 @@ class EP_Mixed_Noise(likelihood): Arguments --------- - epsilon : Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float) - noise_model : a likelihood function (see likelihood_functions.py) + :param data_list: list of outputs + :param noise_model_list: a list of noise models + :param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations + :type epsilon: float + :param power_ep: list of power ep parameters """ assert len(data_list) == len(noise_model_list) self.noise_model_list = noise_model_list @@ -60,6 +63,16 @@ class EP_Mixed_Noise(likelihood): self.trYYT = 0. def predictive_values(self,mu,var,full_cov,noise_model): + """ + Predicts the output given the GP + + :param mu: GP's mean + :param var: GP's variance + :param full_cov: whether to return the full covariance matrix, or just the diagonal + :type full_cov: False|True + :param noise_model: noise model to use + :type noise_model: integer + """ if full_cov: raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood" #_mu = [] diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py index 093456b6..d0290165 100644 --- a/GPy/models/__init__.py +++ b/GPy/models/__init__.py @@ -12,3 +12,4 @@ from warped_gp import WarpedGP from bayesian_gplvm import BayesianGPLVM from mrd import MRD from gp_multioutput import GPMultioutput +from sparse_gp_multioutput import SparseGPMultioutput diff --git a/GPy/models/gp_multioutput.py b/GPy/models/gp_multioutput.py index 72427f43..0fdad786 100644 --- a/GPy/models/gp_multioutput.py +++ b/GPy/models/gp_multioutput.py @@ -19,7 +19,7 @@ class GPMultioutput(GP): :param X_list: input observations :param Y_list: observed values :param L_list: a GPy likelihood, defaults to Binomial with probit link_function - :param kernel: a GPy kernel, defaults to rbf + :param kernel_list: a GPy kernel, defaults to rbf :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) :type normalize_X: False|True :param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales) @@ -29,28 +29,64 @@ class GPMultioutput(GP): """ - def __init__(self,X_list,Y_list=None,likelihood=None,kernel=None,normalize_X=False,normalize_Y=False,W=1): + def __init__(self,X_list,Y_list,noise_list=[],kernel_list=None,normalize_X=False,normalize_Y=False,W=1): #TODO W - if likelihood is None: - noise_model_list = [likelihoods.gaussian(variance=1.) for Y in Y_list] - likelihood = likelihoods.EP_Mixed_Noise(Y_list, noise_model_list) + assert len(X_list) == len(Y_list) + index = [] + i = 0 + for x,y in zip(X_list,Y_list): + assert x.shape[0] == y.shape[0] + index.append(np.repeat(i,y.size)[:,None]) + i += 1 + index = np.vstack(index) - elif Y_list is not None: - if not all(np.vstack(Y_list).flatten() == likelihood.data.flatten()): - raise Warning, 'likelihood.data and Y_list values are different.' + if noise_list == []: + likelihood_list = [] + for Y in Y_list: + likelihood_list.append(likelihoods.Gaussian(Y,normalize = normalize_Y)) - X = np.hstack([np.vstack(X_list),likelihood.index]) + Y = np.vstack([l_.Y for l_ in likelihood_list]) + likelihood = likelihoods.Gaussian(Y,normalize=False) + likelihood.index = index - if kernel is None: + X = np.hstack([np.vstack(X_list),index]) + + if kernel_list is None: original_dim = X.shape[1]-1 - kernel = kern.rbf(original_dim) + kern.white(original_dim) - - mkernel = kernel.prod(kern.coregionalise(len(X_list),W),tensor=True) #TODO W - - #kern1 = kern.rbf(1) + kern.white(1) - #kern2 = kern.coregionalise(2,1) - #kern3 = kern1.prod(kern2,tensor=True) - + kernel_list = [kern.rbf(original_dim) + kern.white(original_dim)] + mkernel = kernel_list[0].prod(kern.coregionalise(len(X_list),W),tensor=True) + for k in kernel_list[1:]: + mkernel += k.prod(kern.coregionalise(len(X_list),W),tensor=True) + self.multioutput = True GP.__init__(self, X, likelihood, mkernel, normalize_X=normalize_X) self.ensure_default_constraints() + + +""" +if likelihood is None: +noise_model_list = [] +for Y in Y_list: +noise_model_list.append(likelihoods.Gaussian(Y,normalize = normalize_Y)) +#noise_model_list = [likelihoods.gaussian(variance=1.) for Y in Y_list] +#likelihood = likelihoods.EP_Mixed_Noise(Y_list, noise_model_list) + +elif Y_list is not None: +if not all(np.vstack(Y_list).flatten() == likelihood.data.flatten()): +raise Warning, 'likelihood.data and Y_list values are different.' + +X = np.hstack([np.vstack(X_list),likelihood.index]) + +if kernel_list is None: +original_dim = X.shape[1]-1 +kernel_list = [kern.rbf(original_dim) + kern.white(original_dim)] + +mkernel = kernel_list[0].prod(kern.coregionalise(len(X_list),W),tensor=True) #TODO W +for k in kernel_list[1:]: +mkernel += k.prod(kern.coregionalise(len(X_list),W),tensor=True) #TODO W + +#kern1 = kern.rbf(1) + kern.white(1) +#kern2 = kern.coregionalise(2,1) +#kern3 = kern1.prod(kern2,tensor=True) +""" + From 4c7ebb6601d11bacd7a68db1c89c86dcca215077 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 2 Aug 2013 20:10:02 +0100 Subject: [PATCH 18/49] Multioutput models added --- GPy/core/gp.py | 7 ++- GPy/core/gp_base.py | 29 ++++++--- GPy/core/sparse_gp.py | 78 +++++++++++++++++++++-- GPy/kern/parts/prod.py | 2 +- GPy/likelihoods/ep_mixed_noise.py | 4 +- GPy/models/gp_multioutput.py | 60 ++++++------------ GPy/models/sparse_gp_multioutput.py | 97 +++++++++++++++++++++++++++++ GPy/util/__init__.py | 1 + GPy/util/multioutput.py | 35 +++++++++++ 9 files changed, 251 insertions(+), 62 deletions(-) create mode 100644 GPy/models/sparse_gp_multioutput.py create mode 100644 GPy/util/multioutput.py diff --git a/GPy/core/gp.py b/GPy/core/gp.py index bd31edd8..fb5fe789 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -173,7 +173,7 @@ class GP(GPBase): This is to allow for different normalizations of the output dimensions. """ - assert isinstance(self.likelihood,EP_Mixed_Noise) + assert hasattr(self,'multioutput') index = np.ones_like(Xnew)*output Xnew = np.hstack((Xnew,index)) @@ -182,7 +182,10 @@ class GP(GPBase): mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts) # now push through likelihood - mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output) + if isinstance(self.likelihood,EP_Mixed_Noise): + mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output) + else: + mean, var, _025pm, _975pm = self.likelihood_list[output].predictive_values(mu, var, full_cov) return mean, var, _025pm, _975pm def _raw_predict_single_output(self, _Xnew, output=0, which_parts='all', full_cov=False,stop=False): diff --git a/GPy/core/gp_base.py b/GPy/core/gp_base.py index 8f63e256..2820a447 100644 --- a/GPy/core/gp_base.py +++ b/GPy/core/gp_base.py @@ -106,13 +106,16 @@ class GPBase(Model): gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax) for i in range(samples): ax.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25) - #ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5) - #ax.plot(Xu[which_data], self.likelihood.Y[self.likelihood.index==output][:,None], 'kx', mew=1.5) ax.set_xlim(xmin, xmax) ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None]))) ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) ax.set_ylim(ymin, ymax) + if hasattr(self,'Z'): + Zu = self.Z[self.Z[:,-1]==output,:] + Zu = self.Z * self._Xscale + self._Xoffset + Zu = self.Z[self.Z[:,-1]==output ,0:1] #?? + ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12) else: raise NotImplementedError, "Cannot define a frame with more than two input dimensions" @@ -120,7 +123,7 @@ class GPBase(Model): def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, output=None): """ TODO: Docstrings! - + :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure """ @@ -132,7 +135,7 @@ class GPBase(Model): fig = pb.figure(num=fignum) ax = fig.add_subplot(111) - if self.X.shape[1] == 1 and not isinstance(self.likelihood,EP_Mixed_Noise): + if self.X.shape[1] == 1 and not hasattr(self,'multioutput'): Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now @@ -146,7 +149,7 @@ class GPBase(Model): ax.set_xlim(xmin, xmax) ax.set_ylim(ymin, ymax) - elif self.X.shape[1] == 2 and not isinstance(self.likelihood,EP_Mixed_Noise): # FIXME + elif self.X.shape[1] == 2 and not hasattr(self,'multioutput'): resolution = resolution or 50 Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution) x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution) @@ -158,17 +161,23 @@ class GPBase(Model): ax.set_xlim(xmin[0], xmax[0]) ax.set_ylim(xmin[1], xmax[1]) - elif self.X.shape[1] == 2 and isinstance(self.likelihood,EP_Mixed_Noise): - Xu = self.X[self.X[:,-1]==output,:] + elif self.X.shape[1] == 2 and hasattr(self,'multioutput'): + Xu = self.X[self.X[:,-1]==output,:] #keep the output of interest Xu = self.X * self._Xscale + self._Xoffset - Xu = self.X[self.X[:,-1]==output ,0:1] + Xu = self.X[self.X[:,-1]==output ,0:1] #get rid of the index column Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits) + m, _, lower, upper = self.predict_single_output(Xnew, which_parts=which_parts,output=output) + #if not isinstance(self.likelihood,EP_Mixed_Noise): + # m, _, lower, upper = self.predict(np.hstack([Xnew,np.repeat(output,Xnew.size)[:,None]]), which_parts=which_parts) + #else: + # m, _, lower, upper = self.predict_single_output(Xnew, which_parts=which_parts,output=output) + for d in range(m.shape[1]): gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax) - #ax.plot(Xu[which_data], self.likelihood.data[which_data, d], 'kx', mew=1.5) - ax.plot(Xu[which_data], self.likelihood.data[self.likelihood.index==output][:,None], 'kx', mew=1.5) + #ax.plot(Xu[which_data], self.likelihood.data[self.likelihood.index==output][:,None], 'kx', mew=1.5) + ax.plot(Xu[which_data], self.likelihood_list[output].data, 'kx', mew=1.5) ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper)) ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) ax.set_xlim(xmin, xmax) diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 04762522..6d9761c4 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -293,7 +293,7 @@ class SparseGP(GPBase): return mean, var, _025pm, _975pm - def plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, fignum=None, ax=None): + def plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, fignum=None, ax=None, output=None): if ax is None: fig = pb.figure(num=fignum) ax = fig.add_subplot(111) @@ -301,8 +301,8 @@ class SparseGP(GPBase): if which_data is 'all': which_data = slice(None) - GPBase.plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, ax=ax) - if self.X.shape[1] == 1: + GPBase.plot(self, samples=0, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=None, levels=20, ax=ax, output=output) + if self.X.shape[1] == 1 and not hasattr(self,'multioutput'): if self.has_uncertain_inputs: Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0], @@ -311,10 +311,31 @@ class SparseGP(GPBase): Zu = self.Z * self._Xscale + self._Xoffset ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12) - elif self.X.shape[1] == 2: + elif self.X.shape[1] == 2 and not hasattr(self,'multioutput'): Zu = self.Z * self._Xscale + self._Xoffset ax.plot(Zu[:, 0], Zu[:, 1], 'wo') + elif self.X.shape[1] == 2 and hasattr(self,'multioutput'): + Xu = self.X[self.X[:,-1]==output,:] + if self.has_uncertain_inputs: + Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now + + Xu = self.X[self.X[:,-1]==output ,0:1] #?? + + ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0], + xerr=2 * np.sqrt(self.X_variance[which_data, 0]), + ecolor='k', fmt=None, elinewidth=.5, alpha=.5) + + Zu = self.Z[self.Z[:,-1]==output,:] + Zu = self.Z * self._Xscale + self._Xoffset + Zu = self.Z[self.Z[:,-1]==output ,0:1] #?? + ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12) + #ax.set_ylim(ax.get_ylim()[0],) + + else: + raise NotImplementedError, "Cannot define a frame with more than two input dimensions" + + def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False): """ @@ -336,7 +357,7 @@ class SparseGP(GPBase): This is to allow for different normalizations of the output dimensions. """ - assert isinstance(self.likelihood,EP_Mixed_Noise) + assert hasattr(self,'multioutput') index = np.ones_like(Xnew)*output Xnew = np.hstack((Xnew,index)) @@ -345,6 +366,51 @@ class SparseGP(GPBase): mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts) # now push through likelihood - mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output) + if isinstance(self.likelihood,EP_Mixed_Noise): + mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output) + else: + mean, var, _025pm, _975pm = self.likelihood_list[output].predictive_values(mu, var, full_cov) return mean, var, _025pm, _975pm + + + def _raw_predict_single_output(self, _Xnew, output=0, X_variance_new=None, which_parts='all', full_cov=False,stop=False): + """ + Internal helper function for making predictions, does not account + for normalization or likelihood + """ + Bi, _ = dpotri(self.LB, lower=0) # WTH? this lower switch should be 1, but that doesn't work! + symmetrify(Bi) + Kmmi_LmiBLmi = backsub_both_sides(self.Lm, np.eye(self.num_inducing) - Bi) + + if self.Cpsi1V is None: + psi1V = np.dot(self.psi1.T,self.likelihood.V) + tmp, _ = dtrtrs(self.Lm, np.asfortranarray(psi1V), lower=1, trans=0) + tmp, _ = dpotrs(self.LB, tmp, lower=1) + self.Cpsi1V, _ = dtrtrs(self.Lm, tmp, lower=1, trans=1) + + assert hasattr(self,'multioutput') + index = np.ones_like(_Xnew)*output + _Xnew = np.hstack((_Xnew,index)) + + if X_variance_new is None: + Kx = self.kern.K(self.Z, _Xnew, which_parts=which_parts) + mu = np.dot(Kx.T, self.Cpsi1V) + if full_cov: + Kxx = self.kern.K(_Xnew, which_parts=which_parts) + var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx) # NOTE this won't work for plotting + else: + Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts) + var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0) + else: + # assert which_p.Tarts=='all', "swithching out parts of variational kernels is not implemented" + Kx = self.kern.psi1(self.Z, _Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts + mu = np.dot(Kx, self.Cpsi1V) + if full_cov: + raise NotImplementedError, "TODO" + else: + Kxx = self.kern.psi0(self.Z, _Xnew, X_variance_new) + psi2 = self.kern.psi2(self.Z, _Xnew, X_variance_new) + var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1) + + return mu, var[:, None] diff --git a/GPy/kern/parts/prod.py b/GPy/kern/parts/prod.py index db31c626..8b4e832b 100644 --- a/GPy/kern/parts/prod.py +++ b/GPy/kern/parts/prod.py @@ -18,7 +18,7 @@ class Prod(Kernpart): """ def __init__(self,k1,k2,tensor=False): self.num_params = k1.num_params + k2.num_params - self.name = k1.name + '' + k2.name + self.name = '['+k1.name + '(x)' + k2.name +']' self.k1 = k1 self.k2 = k2 if tensor: diff --git a/GPy/likelihoods/ep_mixed_noise.py b/GPy/likelihoods/ep_mixed_noise.py index 150de09c..a00b0643 100644 --- a/GPy/likelihoods/ep_mixed_noise.py +++ b/GPy/likelihoods/ep_mixed_noise.py @@ -249,7 +249,7 @@ class EP_Mixed_Noise(likelihood): self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i] self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i] #Marginal moments - self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) + self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model_list[self.index[i]].moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) #Site parameters update Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) @@ -344,7 +344,7 @@ class EP_Mixed_Noise(likelihood): self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i] self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i] #Marginal moments - self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) + self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model_list[self.index[i]].moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) #Site parameters update Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) diff --git a/GPy/models/gp_multioutput.py b/GPy/models/gp_multioutput.py index 0fdad786..96138bbf 100644 --- a/GPy/models/gp_multioutput.py +++ b/GPy/models/gp_multioutput.py @@ -6,6 +6,7 @@ import numpy as np from ..core import GP from .. import likelihoods from .. import kern +from ..util import multioutput import pylab as pb @@ -29,7 +30,7 @@ class GPMultioutput(GP): """ - def __init__(self,X_list,Y_list,noise_list=[],kernel_list=None,normalize_X=False,normalize_Y=False,W=1): #TODO W + def __init__(self,X_list,Y_list,kernel_list=None,normalize_X=False,normalize_Y=False,W=1,mixed_noise_list=[]): #TODO W assert len(X_list) == len(Y_list) index = [] @@ -40,53 +41,30 @@ class GPMultioutput(GP): i += 1 index = np.vstack(index) - if noise_list == []: - likelihood_list = [] + self.likelihood_list = [] + if mixed_noise_list == []: for Y in Y_list: - likelihood_list.append(likelihoods.Gaussian(Y,normalize = normalize_Y)) + self.likelihood_list.append(likelihoods.Gaussian(Y,normalize = normalize_Y)) + + Y = np.vstack([l_.Y for l_ in self.likelihood_list]) + likelihood = likelihoods.Gaussian(Y,normalize=False) + likelihood.index = index + + else: + assert len(Y_list) == len(mixed_noise_list) + for noise,Y in zip(mixed_noise_list,Y_list): + self.likelihood_list.append(likelihoods.EP(Y,noise)) + likelihood = likelihoods.EP_Mixed_Noise(Y_list, mixed_noise_list) - Y = np.vstack([l_.Y for l_ in likelihood_list]) - likelihood = likelihoods.Gaussian(Y,normalize=False) - likelihood.index = index X = np.hstack([np.vstack(X_list),index]) + original_dim = X.shape[1] - 1 if kernel_list is None: - original_dim = X.shape[1]-1 - kernel_list = [kern.rbf(original_dim) + kern.white(original_dim)] + kernel_list = [[kern.rbf(original_dim)],[kern.white(original_dim+1)]] + + mkernel = multioutput.build_cor_kernel(input_dim=original_dim, Nout=len(X_list), CK = kernel_list[0], NC = kernel_list[1], W=1) - mkernel = kernel_list[0].prod(kern.coregionalise(len(X_list),W),tensor=True) - for k in kernel_list[1:]: - mkernel += k.prod(kern.coregionalise(len(X_list),W),tensor=True) self.multioutput = True GP.__init__(self, X, likelihood, mkernel, normalize_X=normalize_X) self.ensure_default_constraints() - - -""" -if likelihood is None: -noise_model_list = [] -for Y in Y_list: -noise_model_list.append(likelihoods.Gaussian(Y,normalize = normalize_Y)) -#noise_model_list = [likelihoods.gaussian(variance=1.) for Y in Y_list] -#likelihood = likelihoods.EP_Mixed_Noise(Y_list, noise_model_list) - -elif Y_list is not None: -if not all(np.vstack(Y_list).flatten() == likelihood.data.flatten()): -raise Warning, 'likelihood.data and Y_list values are different.' - -X = np.hstack([np.vstack(X_list),likelihood.index]) - -if kernel_list is None: -original_dim = X.shape[1]-1 -kernel_list = [kern.rbf(original_dim) + kern.white(original_dim)] - -mkernel = kernel_list[0].prod(kern.coregionalise(len(X_list),W),tensor=True) #TODO W -for k in kernel_list[1:]: -mkernel += k.prod(kern.coregionalise(len(X_list),W),tensor=True) #TODO W - -#kern1 = kern.rbf(1) + kern.white(1) -#kern2 = kern.coregionalise(2,1) -#kern3 = kern1.prod(kern2,tensor=True) -""" - diff --git a/GPy/models/sparse_gp_multioutput.py b/GPy/models/sparse_gp_multioutput.py new file mode 100644 index 00000000..ae99421f --- /dev/null +++ b/GPy/models/sparse_gp_multioutput.py @@ -0,0 +1,97 @@ +# Copyright (c) 2013, Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from ..core import SparseGP +from .. import likelihoods +from .. import kern +from ..util import multioutput + + +import pylab as pb + +class SparseGPMultioutput(SparseGP): + """ + Multiple output Gaussian process + + This is a thin wrapper around the models.GP class, with a set of sensible defaults + + :param X_list: input observations + :param Y_list: observed values + :param L_list: a GPy likelihood, defaults to Binomial with probit link_function + :param kernel_list: a GPy kernel, defaults to rbf + :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) + :type normalize_X: False|True + :param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales) + :type normalize_Y: False|True + + .. Note:: Multiple independent outputs are allowed using columns of Y + + """ + + def __init__(self,X_list,Y_list,kernel_list=None,normalize_X=False,normalize_Y=False,Z_list=None,num_inducing_list=10,X_variance=None,W=1,mixed_noise_list=[]): #TODO W + + assert len(X_list) == len(Y_list) + index = [] + for x,y,j in zip(X_list,Y_list,range(len(X_list))): + assert x.shape[0] == y.shape[0] + index.append(np.repeat(j,y.size)[:,None]) + index = np.vstack(index) + + + self.likelihood_list = [] + if mixed_noise_list == []: + for Y in Y_list: + self.likelihood_list.append(likelihoods.Gaussian(Y,normalize = normalize_Y)) + + Y = np.vstack([l_.Y for l_ in self.likelihood_list]) + likelihood = likelihoods.Gaussian(Y,normalize=False) + likelihood.index = index + + else: + assert len(Y_list) == len(mixed_noise_list) + for noise,Y in zip(mixed_noise_list,Y_list): + self.likelihood_list.append(likelihoods.EP(Y,noise)) + likelihood = likelihoods.EP_Mixed_Noise(Y_list, mixed_noise_list) + + """ + if noise_list == []: + self.likelihood_list = [] + for Y in Y_list: + self.likelihood_list.append(likelihoods.Gaussian(Y,normalize = normalize_Y)) + + Y = np.vstack([l_.Y for l_ in self.likelihood_list]) + likelihood = likelihoods.Gaussian(Y,normalize=False) + likelihood.index = index + """ + X = np.hstack([np.vstack(X_list),index]) + original_dim = X.shape[1] - 1 + + if kernel_list is None: + kernel_list = [[kern.rbf(original_dim)],[kern.white(original_dim+1)]] + + mkernel = multioutput.build_cor_kernel(input_dim=original_dim, Nout=len(X_list), CK = kernel_list[0], NC = kernel_list[1], W=1) + + z_index = [] + if Z_list is None: + if isinstance(num_inducing_list,int): + num_inducing_list = [num_inducing_list for Xj in X_list] + Z_list = [] + for Xj,nj,j in zip(X_list,num_inducing_list,range(len(X_list))): + i = np.random.permutation(Xj.shape[0])[:nj] + z_index.append(np.repeat(j,nj)[:,None]) + Z_list.append(Xj[i].copy()) + else: + assert len(Z_list) == len(X_list) + for Zj,Xj,j in zip(Z_list,X_list,range(len(Z_list))): + assert Zj.shape[1] == Xj.shape[1] + z_index.append(np.repeat(j,Zj.shape[0])[:,None]) + + Z = np.hstack([np.vstack(Z_list),np.vstack(z_index)]) + + + self.multioutput = True + SparseGP.__init__(self, X, likelihood, mkernel, Z=Z, normalize_X=normalize_X, X_variance=X_variance) + self.constrain_fixed('.*iip_\d+_1') + self.ensure_default_constraints() diff --git a/GPy/util/__init__.py b/GPy/util/__init__.py index 27d25518..f5384356 100644 --- a/GPy/util/__init__.py +++ b/GPy/util/__init__.py @@ -14,3 +14,4 @@ import mocap import visualize import decorators import classification +import multioutput diff --git a/GPy/util/multioutput.py b/GPy/util/multioutput.py new file mode 100644 index 00000000..44b70b6f --- /dev/null +++ b/GPy/util/multioutput.py @@ -0,0 +1,35 @@ +import numpy as np +import warnings +from .. import kern + +def build_cor_kernel(input_dim, Nout, CK = [], NC = [], W=1): + """ + Builds an appropiate coregionalized kernel + + :input_dim: Input dimensionality + :Nout: Number of outputs + :param CK: List of coregionalized kernels (i.e., this will be multiplied by a coregionalise kernel). + :param K: List of kernels that won't be multiplied by a coregionalise kernel + :W: + """ + + for k in CK: + if k.input_dim <> input_dim: + k.input_dim = input_dim + #raise Warning("kernel's input dimension overwritten to fit input_dim parameter.") + warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.") + + for k in NC: + if k.input_dim <> input_dim + 1: + k.input_dim = input_dim + 1 + #raise Warning("kernel's input dimension overwritten to fit input_dim parameter.") + warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.") + + kernel = CK[0].prod(kern.coregionalise(Nout,W),tensor=True) + for k in CK[1:]: + kernel += k.prod(kern.coregionalise(Nout,W),tensor=True) + + for k in NC: + kernel += k + + return kernel From 3dc7574c5009fa60c8b942a48bd06d62640b0e1a Mon Sep 17 00:00:00 2001 From: Ricardo Date: Sat, 31 Aug 2013 01:30:31 +0100 Subject: [PATCH 19/49] Error in plot corrected --- GPy/core/gp_base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/GPy/core/gp_base.py b/GPy/core/gp_base.py index 2820a447..51a9352c 100644 --- a/GPy/core/gp_base.py +++ b/GPy/core/gp_base.py @@ -136,10 +136,11 @@ class GPBase(Model): ax = fig.add_subplot(111) if self.X.shape[1] == 1 and not hasattr(self,'multioutput'): + resolution = resolution or 200 Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now - Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits) + Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits,resolution=resolution) m, _, lower, upper = self.predict(Xnew, which_parts=which_parts) for d in range(m.shape[1]): gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax) From a3d43553df1668ef34d8ef93065d5681fbbf756d Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Tue, 3 Sep 2013 10:04:33 +0100 Subject: [PATCH 20/49] added slicing to kern.py --- GPy/kern/kern.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index 90176401..1b9e306d 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -416,19 +416,19 @@ class kern(Parameterized): # TODO: input_slices needed crossterms = 0 - for p1, p2 in itertools.combinations(self.parts, 2): + for [p1, i_s1], [p2, i_s2] in itertools.combinations(zip(self.parts, self.input_slices), 2): + if i_s1 == i_s2: + # TODO psi1 this must be faster/better/precached/more nice + tmp1 = np.zeros((mu.shape[0], Z.shape[0])) + p1.psi1(Z[:, i_s1], mu[:, i_s1], S[:, i_s1], tmp1) + tmp2 = np.zeros((mu.shape[0], Z.shape[0])) + p2.psi1(Z[:, i_s2], mu[:, i_s2], S[:, i_s2], tmp2) + + prod = np.multiply(tmp1, tmp2) + crossterms += prod[:, :, None] + prod[:, None, :] - # TODO psi1 this must be faster/better/precached/more nice - tmp1 = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z, mu, S, tmp1) - tmp2 = np.zeros((mu.shape[0], Z.shape[0])) - p2.psi1(Z, mu, S, tmp2) - - prod = np.multiply(tmp1, tmp2) - crossterms += prod[:, :, None] + prod[:, None, :] - - target += crossterms - return target + # target += crossterms + return target + crossterms def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S): """Gradient of the psi2 statistics with respect to the parameters.""" From 1d37b31bea07e033b2d5d58ef836de015499369c Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Tue, 3 Sep 2013 10:05:42 +0100 Subject: [PATCH 21/49] only compare Z cache once --- GPy/kern/parts/rbf.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/GPy/kern/parts/rbf.py b/GPy/kern/parts/rbf.py index 345134bd..d0fa9742 100644 --- a/GPy/kern/parts/rbf.py +++ b/GPy/kern/parts/rbf.py @@ -239,13 +239,14 @@ class RBF(Kernpart): def _psi_computations(self, Z, mu, S): # here are the "statistics" for psi1 and psi2 - if not fast_array_equal(Z, self._Z): + Z_changed = not fast_array_equal(Z, self._Z) + if Z_changed: # Z has changed, compute Z specific stuff self._psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q self._psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q self._psi2_Zdist_sq = np.square(self._psi2_Zdist / self.lengthscale) # M,M,Q - if not fast_array_equal(Z, self._Z) or not fast_array_equal(mu, self._mu) or not fast_array_equal(S, self._S): + if Z_changed or not fast_array_equal(mu, self._mu) or not fast_array_equal(S, self._S): # something's changed. recompute EVERYTHING # psi1 From b42a6da835d3bb75ecfe65807a844f0e7b8fca2b Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Tue, 3 Sep 2013 10:06:14 +0100 Subject: [PATCH 22/49] expectation test slicing --- GPy/testing/psi_stat_expectation_tests.py | 55 +++++++++++++++-------- 1 file changed, 36 insertions(+), 19 deletions(-) diff --git a/GPy/testing/psi_stat_expectation_tests.py b/GPy/testing/psi_stat_expectation_tests.py index da71754b..30ca14d6 100644 --- a/GPy/testing/psi_stat_expectation_tests.py +++ b/GPy/testing/psi_stat_expectation_tests.py @@ -7,9 +7,14 @@ import unittest import GPy import numpy as np from GPy import testing +import sys +import numpy +from GPy.kern.parts.rbf import RBF +from GPy.kern.parts.linear import Linear +from copy import deepcopy -__test__ = False -np.random.seed(0) +__test__ = lambda: 'deep' in sys.argv +# np.random.seed(0) def ard(p): try: @@ -19,24 +24,37 @@ def ard(p): pass return "" -@testing.deepTest(__test__) +@testing.deepTest(__test__()) class Test(unittest.TestCase): input_dim = 9 num_inducing = 4 N = 3 - Nsamples = 6e6 + Nsamples = 5e6 def setUp(self): + i_s_dim_list = [2,4,3] + indices = numpy.cumsum(i_s_dim_list).tolist() + input_slices = [slice(a,b) for a,b in zip([None]+indices, indices)] + #input_slices[2] = deepcopy(input_slices[1]) + input_slice_kern = GPy.kern.kern(9, + [ + RBF(i_s_dim_list[0], np.random.rand(), np.random.rand(i_s_dim_list[0]), ARD=True), + RBF(i_s_dim_list[1], np.random.rand(), np.random.rand(i_s_dim_list[1]), ARD=True), + Linear(i_s_dim_list[2], np.random.rand(i_s_dim_list[2]), ARD=True) + ], + input_slices = input_slices + ) self.kerns = ( + input_slice_kern, # (GPy.kern.rbf(self.input_dim, ARD=True) + # GPy.kern.linear(self.input_dim, ARD=True) + # GPy.kern.bias(self.input_dim) + # GPy.kern.white(self.input_dim)), - (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) + - GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) + - GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True) + - GPy.kern.bias(self.input_dim) + - GPy.kern.white(self.input_dim)), +# (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) + +# GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) + +# GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True) + +# GPy.kern.bias(self.input_dim) + +# GPy.kern.white(self.input_dim)), # GPy.kern.rbf(self.input_dim), GPy.kern.rbf(self.input_dim, ARD=True), # GPy.kern.linear(self.input_dim, ARD=False), GPy.kern.linear(self.input_dim, ARD=True), # GPy.kern.linear(self.input_dim) + GPy.kern.bias(self.input_dim), @@ -61,22 +79,22 @@ class Test(unittest.TestCase): def test_psi1(self): for kern in self.kerns: - Nsamples = 100 + Nsamples = np.floor(self.Nsamples/300.) psi1 = kern.psi1(self.Z, self.q_x_mean, self.q_x_variance) K_ = np.zeros((Nsamples, self.num_inducing)) diffs = [] for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)): - K = kern.K(q_x_sample_stripe, self.Z) + K = kern.K(q_x_sample_stripe[:Nsamples], self.Z) K_ += K - diffs.append(((psi1 - (K_ / (i + 1)))).mean()) + diffs.append((np.abs(psi1 - (K_ / (i + 1)))**2).mean()) K_ /= self.Nsamples / Nsamples msg = "psi1: " + "+".join([p.name + ard(p) for p in kern.parts]) try: import pylab pylab.figure(msg) pylab.plot(diffs) - self.assertTrue(np.allclose(psi1.squeeze(), K_, - rtol=1e-1, atol=.1), +# print msg, ((psi1.squeeze() - K_)**2).mean() < .01 + self.assertTrue(((psi1.squeeze() - K_)**2).mean() < .01, msg=msg + ": not matching") # sys.stdout.write(".") except: @@ -87,7 +105,7 @@ class Test(unittest.TestCase): def test_psi2(self): for kern in self.kerns: - Nsamples = 100 + Nsamples = self.Nsamples/300. psi2 = kern.psi2(self.Z, self.q_x_mean, self.q_x_variance) K_ = np.zeros((self.num_inducing, self.num_inducing)) diffs = [] @@ -95,13 +113,14 @@ class Test(unittest.TestCase): K = kern.K(q_x_sample_stripe, self.Z) K = (K[:, :, None] * K[:, None, :]).mean(0) K_ += K - diffs.append(((psi2 - (K_ / (i + 1)))).mean()) + diffs.append(((psi2 - (K_ / (i + 1)))**2).mean()) K_ /= self.Nsamples / Nsamples msg = "psi2: {}".format("+".join([p.name + ard(p) for p in kern.parts])) try: import pylab pylab.figure(msg) pylab.plot(diffs) +# print msg, np.allclose(psi2.squeeze(), K_, rtol=1e-1, atol=.1) self.assertTrue(np.allclose(psi2.squeeze(), K_, rtol=1e-1, atol=.1), msg=msg + ": not matching") @@ -114,10 +133,8 @@ class Test(unittest.TestCase): pass if __name__ == "__main__": - import sys - __test__ = 'deep' in sys.argv sys.argv = ['', - 'Test.test_psi0', + #'Test.test_psi0', 'Test.test_psi1', 'Test.test_psi2', ] From 2558418b0836d70103f68b05d54ecbd9ff299f88 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Tue, 3 Sep 2013 10:07:03 +0100 Subject: [PATCH 23/49] mrd_sim stable and deprecated --- GPy/examples/dimensionality_reduction.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 994e3b48..005b131f 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -298,7 +298,7 @@ def bgplvm_simulation(optimize='scg', return m def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw): - D1, D2, D3, N, num_inducing, Q = 30, 10, 15, 60, 3, 10 + D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5 slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) likelihood_list = [Gaussian(x, normalize=True) for x in Ylist] @@ -321,7 +321,7 @@ def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw): if optimize: print "Optimizing Model:" - m.optimize(messages=1, max_iters=8e3, max_f_eval=8e3, gtol=.1) + m.optimize(messages=1, max_iters=8e3, gtol=.1) if plot: m.plot_X_1d("MRD Latent Space 1D") m.plot_scales("MRD Scales") From 671591fa96c810180e20d03187e05e2b3325b204 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Wed, 4 Sep 2013 18:06:14 +0100 Subject: [PATCH 24/49] New Gaussian likelihood for multiple outputs --- GPy/core/gp.py | 2 +- GPy/core/gp_base.py | 2 +- GPy/kern/constructors.py | 13 ++++ GPy/likelihoods/__init__.py | 1 + GPy/likelihoods/gaussian_mixed_noise.py | 99 +++++++++++++++++++++++++ GPy/models/gp_multioutput.py | 12 ++- 6 files changed, 124 insertions(+), 5 deletions(-) create mode 100644 GPy/likelihoods/gaussian_mixed_noise.py diff --git a/GPy/core/gp.py b/GPy/core/gp.py index fb5fe789..4edde5cd 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -185,7 +185,7 @@ class GP(GPBase): if isinstance(self.likelihood,EP_Mixed_Noise): mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output) else: - mean, var, _025pm, _975pm = self.likelihood_list[output].predictive_values(mu, var, full_cov) + mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output) return mean, var, _025pm, _975pm def _raw_predict_single_output(self, _Xnew, output=0, which_parts='all', full_cov=False,stop=False): diff --git a/GPy/core/gp_base.py b/GPy/core/gp_base.py index 51a9352c..fe297d6b 100644 --- a/GPy/core/gp_base.py +++ b/GPy/core/gp_base.py @@ -178,7 +178,7 @@ class GPBase(Model): for d in range(m.shape[1]): gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax) #ax.plot(Xu[which_data], self.likelihood.data[self.likelihood.index==output][:,None], 'kx', mew=1.5) - ax.plot(Xu[which_data], self.likelihood_list[output].data, 'kx', mew=1.5) + ax.plot(Xu[which_data], self.likelihood.noise_model_list[output].data, 'kx', mew=1.5) ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper)) ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) ax.set_xlim(xmin, xmax) diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py index 697f3554..e05d0688 100644 --- a/GPy/kern/constructors.py +++ b/GPy/kern/constructors.py @@ -250,6 +250,19 @@ def symmetric(k): return k_ def coregionalise(Nout,R=1, W=None, kappa=None): + """ + Construct coregionalisation kernel, based on the coregionlisation matrix B = np.dot(W,W.T) + np.eye(Nout)*kappa + + :param Nout: the number of outputs to corregionalise + :type Nout: int + :param R: the number of columns in the W matrix + :type R: int + :param W: W matrix + :type W: numpy array of dimensionality (Nout x R) + :param kappa: kappa vector + :type kappa: numpy array of dimensionality (Nout,) + + """ p = parts.coregionalise.Coregionalise(Nout,R,W,kappa) return kern(1,[p]) diff --git a/GPy/likelihoods/__init__.py b/GPy/likelihoods/__init__.py index 55f437b1..0cb62eb0 100644 --- a/GPy/likelihoods/__init__.py +++ b/GPy/likelihoods/__init__.py @@ -1,6 +1,7 @@ from ep import EP from ep_mixed_noise import EP_Mixed_Noise from gaussian import Gaussian +from gaussian_mixed_noise import Gaussian_Mixed_Noise from noise_model_constructors import * # TODO: from Laplace import Laplace diff --git a/GPy/likelihoods/gaussian_mixed_noise.py b/GPy/likelihoods/gaussian_mixed_noise.py new file mode 100644 index 00000000..cf5e2ca8 --- /dev/null +++ b/GPy/likelihoods/gaussian_mixed_noise.py @@ -0,0 +1,99 @@ +# Copyright (c) 2013, Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from scipy import stats +from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs +from likelihood import likelihood +from . import Gaussian + + +class Gaussian_Mixed_Noise(likelihood): + def __init__(self, data_list, noise_params=None, normalize=True): + + if noise_params is None: + noise_params = [1.] * len(data_list) + + assert len(data_list) == len(noise_params) + + self.noise_model_list = [Gaussian(Y,variance=v,normalize = normalize) for Y,v in zip(data_list,noise_params)] + + + self.n_list = [data.size for data in data_list] + n_models = len(data_list) + self.n_params = [noise_model._get_params().size for noise_model in self.noise_model_list] + + self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(n_models),self.n_list)]) + + + self.data = np.vstack(data_list) + self.N, self.output_dim = self.data.shape + self._offset = np.zeros((1, self.output_dim)) + self._scale = np.ones((1, self.output_dim)) + + self.is_heteroscedastic = True #TODO check how to deal with this + self.Z = 0. # a correction factor which accounts for the approximation made + + self.set_data(data_list) + #self._variance = np.asarray(variance) + 1. + self._set_params(np.asarray(noise_params)) + + def set_data(self, data_list): + self.data = np.vstack(data_list) + self.N, D = self.data.shape + assert D == self.output_dim + self.Y = (self.data - self._offset) / self._scale + if D > self.N: + self.YYT = np.dot(self.Y, self.Y.T) + self.trYYT = np.trace(self.YYT) + self.YYT_factor = jitchol(self.YYT) + else: + self.YYT = None + self.trYYT = np.sum(np.square(self.Y)) + self.YYT_factor = self.Y + + def predictive_values(self,mu,var,full_cov,noise_model): + """ + Predicts the output given the GP + + :param mu: GP's mean + :param var: GP's variance + :param full_cov: whether to return the full covariance matrix, or just the diagonal + :type full_cov: False|True + :param noise_model: noise model to use + :type noise_model: integer + """ + if full_cov: + raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood" + return self.noise_model_list[noise_model].predictive_values(mu,var,full_cov) + + def _get_params(self): + return np.hstack([noise_model._get_params().flatten() for noise_model in self.noise_model_list]) + + def _get_param_names(self): + if len(self.noise_model_list) == 1: + names = self.noise_model_list[0]._get_param_names() + else: + names = [] + for noise_model,i in zip(self.noise_model_list,range(len(self.n_list))): + names.append(''.join(noise_model._get_param_names() + ['_%s' %i])) + return names + + def _set_params(self,p): + cs_params = np.cumsum([0]+self.n_params) + for i in range(len(self.n_params)): + self.noise_model_list[i]._set_params(p[cs_params[i]:cs_params[i+1]]) + self.precision = np.hstack([np.repeat(noise_model.precision,n) for noise_model,n in zip(self.noise_model_list,self.n_list)])[:,None] + self.V = (self.precision) * self.Y + self.VVT_factor = self.precision * self.YYT_factor + self.covariance_matrix = np.eye(self.N) * 1./self.precision + #self._variance = x + + def _gradients(self,partial): + #NOTE this is not tested + return np.hstack([noise_model._gradients(partial) for noise_model in self.noise_model_list]) + + + + diff --git a/GPy/models/gp_multioutput.py b/GPy/models/gp_multioutput.py index 96138bbf..bc696cbd 100644 --- a/GPy/models/gp_multioutput.py +++ b/GPy/models/gp_multioutput.py @@ -31,6 +31,7 @@ class GPMultioutput(GP): """ def __init__(self,X_list,Y_list,kernel_list=None,normalize_X=False,normalize_Y=False,W=1,mixed_noise_list=[]): #TODO W + #TODO: split into 2 models gp_mixed_noise and ep_mixed_noise assert len(X_list) == len(Y_list) index = [] @@ -41,7 +42,8 @@ class GPMultioutput(GP): i += 1 index = np.vstack(index) - self.likelihood_list = [] + """ + if mixed_noise_list == []: for Y in Y_list: self.likelihood_list.append(likelihoods.Gaussian(Y,normalize = normalize_Y)) @@ -49,14 +51,18 @@ class GPMultioutput(GP): Y = np.vstack([l_.Y for l_ in self.likelihood_list]) likelihood = likelihoods.Gaussian(Y,normalize=False) likelihood.index = index - + """ + if mixed_noise_list == []: + likelihood = likelihoods.Gaussian_Mixed_Noise(Y_list,normalize=normalize_Y) + #TODO: allow passing the variance parameter into the model else: + self.likelihood_list = [] #TODO this is not needed assert len(Y_list) == len(mixed_noise_list) for noise,Y in zip(mixed_noise_list,Y_list): self.likelihood_list.append(likelihoods.EP(Y,noise)) + #TODO: allow normalization likelihood = likelihoods.EP_Mixed_Noise(Y_list, mixed_noise_list) - X = np.hstack([np.vstack(X_list),index]) original_dim = X.shape[1] - 1 From 3608290350ee5dd4b9316428c89cc8bd88abcfe5 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Wed, 11 Sep 2013 16:00:20 +0100 Subject: [PATCH 25/49] Step transformation added --- GPy/likelihoods/noise_model_constructors.py | 9 ++++++++- GPy/likelihoods/noise_models/binomial_noise.py | 17 ++++++++++++----- .../noise_models/gp_transformations.py | 16 ++++++++++++++++ 3 files changed, 36 insertions(+), 6 deletions(-) diff --git a/GPy/likelihoods/noise_model_constructors.py b/GPy/likelihoods/noise_model_constructors.py index 01f15f71..158f1674 100644 --- a/GPy/likelihoods/noise_model_constructors.py +++ b/GPy/likelihoods/noise_model_constructors.py @@ -17,9 +17,16 @@ def binomial(gp_link=None): if isinstance(gp_link,noise_models.gp_transformations.Probit): analytical_mean = True + analytical_variance = False + + elif isinstance(gp_link,noise_models.gp_transformations.Step): + analytical_mean = True + analytical_variance = True + else: analytical_mean = False - analytical_variance = False + analytical_variance = False + return noise_models.binomial_noise.Binomial(gp_link,analytical_mean,analytical_variance) def exponential(gp_link=None): diff --git a/GPy/likelihoods/noise_models/binomial_noise.py b/GPy/likelihoods/noise_models/binomial_noise.py index e47d9211..9ceacf79 100644 --- a/GPy/likelihoods/noise_models/binomial_noise.py +++ b/GPy/likelihoods/noise_models/binomial_noise.py @@ -42,11 +42,18 @@ class Binomial(NoiseDistribution): :param tau_i: precision of the cavity distribution (float) :param v_i: mean/variance of the cavity distribution (float) """ - z = data_i*v_i/np.sqrt(tau_i**2 + tau_i) - Z_hat = std_norm_cdf(z) - phi = std_norm_pdf(z) - mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i)) - sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat) + if isinstance(self.gp_link,gp_transformations.Probit): + z = data_i*v_i/np.sqrt(tau_i**2 + tau_i) + Z_hat = std_norm_cdf(z) + phi = std_norm_pdf(z) + mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i)) + sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat) + + elif isinstance(self.gp_link,gp_transformations.Step): + Z_hat = None + mu_hat = None + sigma2_hat = None + return Z_hat, mu_hat, sigma2_hat def _predictive_mean_analytical(self,mu,sigma): diff --git a/GPy/likelihoods/noise_models/gp_transformations.py b/GPy/likelihoods/noise_models/gp_transformations.py index 5cb08e8e..41cc5790 100644 --- a/GPy/likelihoods/noise_models/gp_transformations.py +++ b/GPy/likelihoods/noise_models/gp_transformations.py @@ -55,6 +55,8 @@ class Probit(GPTransformation): def d2transf_df2(self,f): return -f * std_norm_pdf(f) + def + class Log(GPTransformation): """ $$ @@ -108,4 +110,18 @@ class Reciprocal(GPTransformation): def d2transf_df2(self,f): return 2./f**3 +class Step(GPTransformation): + """ + $$ + g(f) = I_{x \in A} + $$ + """ + def transf(self,f): + #transformation goes here + return np.where(f>0, 1, 0) + def dtransf_df(self,f): + pass + + def d2transf_df2(self,f): + pass From ef072f235cfa71570cea99525906afe00a722264 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Wed, 11 Sep 2013 16:06:05 +0100 Subject: [PATCH 26/49] Step function modified, now the output is either 1 or -1 --- GPy/likelihoods/noise_models/gp_transformations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPy/likelihoods/noise_models/gp_transformations.py b/GPy/likelihoods/noise_models/gp_transformations.py index 41cc5790..73e08cac 100644 --- a/GPy/likelihoods/noise_models/gp_transformations.py +++ b/GPy/likelihoods/noise_models/gp_transformations.py @@ -118,7 +118,7 @@ class Step(GPTransformation): """ def transf(self,f): #transformation goes here - return np.where(f>0, 1, 0) + return np.where(f>0, 1, -1) def dtransf_df(self,f): pass From 822fd4f62292c15dcd7d0cdec1cfd3f324183ae2 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Wed, 11 Sep 2013 16:36:11 +0100 Subject: [PATCH 27/49] bug fixed --- GPy/likelihoods/noise_models/gp_transformations.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/GPy/likelihoods/noise_models/gp_transformations.py b/GPy/likelihoods/noise_models/gp_transformations.py index 73e08cac..5f337aa4 100644 --- a/GPy/likelihoods/noise_models/gp_transformations.py +++ b/GPy/likelihoods/noise_models/gp_transformations.py @@ -55,8 +55,6 @@ class Probit(GPTransformation): def d2transf_df2(self,f): return -f * std_norm_pdf(f) - def - class Log(GPTransformation): """ $$ From 0070a73004a5458b8b616aaedf283db85d06e0c6 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Thu, 12 Sep 2013 10:20:28 +0100 Subject: [PATCH 28/49] Fixed Alans checkgrad bug --- GPy/core/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPy/core/model.py b/GPy/core/model.py index c31ea209..2e70e7c2 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -458,7 +458,7 @@ class Model(Parameterized): numerical_gradient = (f1 - f2) / (2 * dx) global_ratio = (f1 - f2) / (2 * np.dot(dx, gradient)) - return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gradient - numerical_gradient).mean() - 1) < tolerance + return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gradient - numerical_gradient).mean() < tolerance) else: # check the gradient of each parameter individually, and do some pretty printing try: From a99dccaab7ecb946612f3233175b739b81e32980 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Thu, 12 Sep 2013 13:30:20 +0100 Subject: [PATCH 29/49] fixed args to bfgs --- GPy/inference/optimization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPy/inference/optimization.py b/GPy/inference/optimization.py index 0ef487af..589ec4c7 100644 --- a/GPy/inference/optimization.py +++ b/GPy/inference/optimization.py @@ -130,7 +130,7 @@ class opt_lbfgsb(Optimizer): opt_dict['pgtol'] = self.gtol opt_result = optimize.fmin_l_bfgs_b(f_fp, self.x_init, iprint=iprint, - maxfun=self.max_f_eval, **opt_dict) + maxfun=self.max_iters, **opt_dict) self.x_opt = opt_result[0] self.f_opt = f_fp(self.x_opt)[0] self.funct_eval = opt_result[2]['funcalls'] From e35cc76895c1e6a677f9dd402368d062b0aa42db Mon Sep 17 00:00:00 2001 From: Ricardo Date: Thu, 12 Sep 2013 16:04:46 +0100 Subject: [PATCH 30/49] docstrings completed --- GPy/likelihoods/gaussian.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py index 7665e608..6582ede0 100644 --- a/GPy/likelihoods/gaussian.py +++ b/GPy/likelihoods/gaussian.py @@ -7,9 +7,9 @@ class Gaussian(likelihood): """ Likelihood class for doing Expectation propagation - :param Y: observed output (Nx1 numpy.darray) - ..Note:: Y values allowed depend on the likelihood_function used - :param variance : + :param data: observed output (Nx1 numpy.darray) + ..Note:: data values allowed depend on the likelihood_function used + :param variance: noise parameter :param normalize: whether to normalize the data before computing (predictions will be in original scales) :type normalize: False|True """ From b739a105627eba0098cd7db85277ef6fd0a7aa7a Mon Sep 17 00:00:00 2001 From: Ricardo Date: Thu, 12 Sep 2013 16:18:29 +0100 Subject: [PATCH 31/49] docstrings completed --- GPy/likelihoods/gaussian.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py index 6582ede0..cdf54dd6 100644 --- a/GPy/likelihoods/gaussian.py +++ b/GPy/likelihoods/gaussian.py @@ -7,8 +7,8 @@ class Gaussian(likelihood): """ Likelihood class for doing Expectation propagation - :param data: observed output (Nx1 numpy.darray) - ..Note:: data values allowed depend on the likelihood_function used + :param data: observed output + :type data: Nx1 numpy.darray :param variance: noise parameter :param normalize: whether to normalize the data before computing (predictions will be in original scales) :type normalize: False|True From 1b54365f7cb61c2053d1d718685781c1a9f230e1 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 13 Sep 2013 12:25:53 +0100 Subject: [PATCH 32/49] errors fixed --- GPy/testing/unit_tests.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/GPy/testing/unit_tests.py b/GPy/testing/unit_tests.py index 6e504a69..2aea03ac 100644 --- a/GPy/testing/unit_tests.py +++ b/GPy/testing/unit_tests.py @@ -166,10 +166,7 @@ class GradientTests(unittest.TestCase): X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None] Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None] kernel = GPy.kern.rbf(1) - distribution = GPy.likelihoods.likelihood_functions.Binomial() - likelihood = GPy.likelihoods.EP(Y, distribution) - m = GPy.core.GP(X, likelihood, kernel) - m.ensure_default_constraints() + m = GPy.models.GPClassification(X,Y,kernel=kernel) m.update_likelihood_approximation() self.assertTrue(m.checkgrad()) @@ -179,10 +176,11 @@ class GradientTests(unittest.TestCase): Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None] Z = np.linspace(0, 15, 4)[:, None] kernel = GPy.kern.rbf(1) - distribution = GPy.likelihoods.likelihood_functions.Binomial() - likelihood = GPy.likelihoods.EP(Y, distribution) - m = GPy.core.SparseGP(X, likelihood, kernel, Z) - m.ensure_default_constraints() + m = GPy.models.SparseGPClassification(X,Y,kernel=kernel,Z=Z) + #distribution = GPy.likelihoods.likelihood_functions.Binomial() + #likelihood = GPy.likelihoods.EP(Y, distribution) + #m = GPy.core.SparseGP(X, likelihood, kernel, Z) + #m.ensure_default_constraints() m.update_likelihood_approximation() self.assertTrue(m.checkgrad()) @@ -191,7 +189,7 @@ class GradientTests(unittest.TestCase): X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None] k = GPy.kern.rbf(1) + GPy.kern.white(1) Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None] - m = GPy.models.FITCClassification(X, Y=Y) + m = GPy.models.FITCClassification(X, Y, kernel = k) m.update_likelihood_approximation() self.assertTrue(m.checkgrad()) From 6533f7792f32ef2a7b99cf47b7d52785a1e546f8 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 13 Sep 2013 12:26:50 +0100 Subject: [PATCH 33/49] changes according to new likelihoods definition --- GPy/models/fitc_classification.py | 4 ++-- GPy/models/sparse_gp_classification.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/GPy/models/fitc_classification.py b/GPy/models/fitc_classification.py index f4cf4e8d..ee92a1b4 100644 --- a/GPy/models/fitc_classification.py +++ b/GPy/models/fitc_classification.py @@ -31,8 +31,8 @@ class FITCClassification(FITC): kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3) if likelihood is None: - distribution = likelihoods.likelihood_functions.Binomial() - likelihood = likelihoods.EP(Y, distribution) + noise_model = likelihoods.binomial() + likelihood = likelihoods.EP(Y, noise_model) elif Y is not None: if not all(Y.flatten() == likelihood.data.flatten()): raise Warning, 'likelihood.data and Y are different.' diff --git a/GPy/models/sparse_gp_classification.py b/GPy/models/sparse_gp_classification.py index 9228fb89..4e4e4316 100644 --- a/GPy/models/sparse_gp_classification.py +++ b/GPy/models/sparse_gp_classification.py @@ -28,11 +28,11 @@ class SparseGPClassification(SparseGP): def __init__(self, X, Y=None, likelihood=None, kernel=None, normalize_X=False, normalize_Y=False, Z=None, num_inducing=10): if kernel is None: - kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3) + kernel = kern.rbf(X.shape[1])# + kern.white(X.shape[1],1e-3) if likelihood is None: - distribution = likelihoods.likelihood_functions.Binomial() - likelihood = likelihoods.EP(Y, distribution) + noise_model = likelihoods.binomial() + likelihood = likelihoods.EP(Y, noise_model) elif Y is not None: if not all(Y.flatten() == likelihood.data.flatten()): raise Warning, 'likelihood.data and Y are different.' From 79aef1707fa4ed534b3592a78a2b397684159a5f Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 13 Sep 2013 12:28:04 +0100 Subject: [PATCH 34/49] docstring completed --- GPy/models/sparse_gp_regression.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/GPy/models/sparse_gp_regression.py b/GPy/models/sparse_gp_regression.py index 0dcef3e0..70ad46a6 100644 --- a/GPy/models/sparse_gp_regression.py +++ b/GPy/models/sparse_gp_regression.py @@ -20,7 +20,11 @@ class SparseGPRegression(SparseGP): :type normalize_X: False|True :param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales) :type normalize_Y: False|True + :param Z: inducing inputs (optional, see note) + :type Z: np.ndarray (num_inducing x input_dim) | None :rtype: model object + :param X_variance: The uncertainty in the measurements of X (Gaussian variance) + :type X_variance: np.ndarray (num_data x input_dim) | None .. Note:: Multiple independent outputs are allowed using columns of Y From cf2cf67ed216120e72cd4a7817226a46cd3be66d Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 13 Sep 2013 12:29:08 +0100 Subject: [PATCH 35/49] build_cor_kernel is now called build_lcm --- GPy/util/multioutput.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/GPy/util/multioutput.py b/GPy/util/multioutput.py index 44b70b6f..2b06ba95 100644 --- a/GPy/util/multioutput.py +++ b/GPy/util/multioutput.py @@ -2,33 +2,33 @@ import numpy as np import warnings from .. import kern -def build_cor_kernel(input_dim, Nout, CK = [], NC = [], W=1): +def build_lcm(input_dim, num_outputs, CK = [], NC = [], W_columns=1,W=None,kappa=None): + #TODO build_icm or build_lcm """ - Builds an appropiate coregionalized kernel + Builds a kernel for a linear coregionalization model :input_dim: Input dimensionality - :Nout: Number of outputs + :num_outputs: Number of outputs :param CK: List of coregionalized kernels (i.e., this will be multiplied by a coregionalise kernel). - :param K: List of kernels that won't be multiplied by a coregionalise kernel - :W: + :param K: List of kernels that will be added up together with CK, but won't be multiplied by a coregionalise kernel + :param W_columns: number tuples of the corregionalization parameters 'coregion_W' + :type W_columns: integer """ for k in CK: if k.input_dim <> input_dim: k.input_dim = input_dim - #raise Warning("kernel's input dimension overwritten to fit input_dim parameter.") warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.") for k in NC: if k.input_dim <> input_dim + 1: k.input_dim = input_dim + 1 - #raise Warning("kernel's input dimension overwritten to fit input_dim parameter.") warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.") - kernel = CK[0].prod(kern.coregionalise(Nout,W),tensor=True) + kernel = CK[0].prod(kern.coregionalise(num_outputs,W_columns,W,kappa),tensor=True) for k in CK[1:]: - kernel += k.prod(kern.coregionalise(Nout,W),tensor=True) - + k_coreg = kern.coregionalise(num_outputs,W_columns,W,kappa) + kernel += k.prod(k_coreg,tensor=True) for k in NC: kernel += k From c941da9d3c7b31d9249503f9723fb74b373b7905 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 13 Sep 2013 12:29:53 +0100 Subject: [PATCH 36/49] new files added --- GPy/models/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py index d0290165..5c73c4bf 100644 --- a/GPy/models/__init__.py +++ b/GPy/models/__init__.py @@ -12,4 +12,6 @@ from warped_gp import WarpedGP from bayesian_gplvm import BayesianGPLVM from mrd import MRD from gp_multioutput import GPMultioutput +from gp_multioutput_regression import GPMultioutputRegression +from sparse_gp_multioutput_regression import SparseGPMultioutputRegression from sparse_gp_multioutput import SparseGPMultioutput From 81eb22dffd2b011953d4d08faa1e5cbf7c80359a Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 13 Sep 2013 12:31:16 +0100 Subject: [PATCH 37/49] R paramter renamed as W_columns, and Nout renamed as num_outputs --- GPy/kern/parts/coregionalise.py | 63 ++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/GPy/kern/parts/coregionalise.py b/GPy/kern/parts/coregionalise.py index 8faceafe..e98cd0da 100644 --- a/GPy/kern/parts/coregionalise.py +++ b/GPy/kern/parts/coregionalise.py @@ -9,24 +9,32 @@ from scipy import weave class Coregionalise(Kernpart): """ - Kernel for Intrinsic Corregionalization Models + Kernel for Intrinsic Coregionalization Models + + This kernel has the form: K = np.dot(W,W.T) + np.diag(kappa) + An intrinsec coregionalization kernel is obtained as the tensor product between a different kernel and the coregionalize kernel. + + :param num_outputs: number of outputs to coregionalize + :param W_columns: number of columns of the W matrix (this parameter is ignored if parameter W is not None) + :param W: array of shape (num_outputs, W_columns) + :param kappa: array of dimensions (num_outputs,) """ - def __init__(self,Nout,R=1, W=None, kappa=None): + def __init__(self,num_outputs,W_columns=1, W=None, kappa=None): self.input_dim = 1 self.name = 'coregion' - self.Nout = Nout - self.R = R + self.num_outputs = num_outputs + self.W_columns = W_columns if W is None: - self.W = np.ones((self.Nout,self.R)) + self.W = np.ones((self.num_outputs,self.W_columns)) else: - assert W.shape==(self.Nout,self.R) + assert W.shape==(self.num_outputs,self.W_columns) self.W = W if kappa is None: - kappa = np.ones(self.Nout) + kappa = np.ones(self.num_outputs) else: - assert kappa.shape==(self.Nout,) + assert kappa.shape==(self.num_outputs,) self.kappa = kappa - self.num_params = self.Nout*(self.R + 1) + self.num_params = self.num_outputs*(self.W_columns + 1) self._set_params(np.hstack([self.W.flatten(),self.kappa])) def _get_params(self): @@ -34,12 +42,12 @@ class Coregionalise(Kernpart): def _set_params(self,x): assert x.size == self.num_params - self.kappa = x[-self.Nout:] - self.W = x[:-self.Nout].reshape(self.Nout,self.R) + self.kappa = x[-self.num_outputs:] + self.W = x[:-self.num_outputs].reshape(self.num_outputs,self.W_columns) self.B = np.dot(self.W,self.W.T) + np.diag(self.kappa) def _get_param_names(self): - return sum([['W%i_%i'%(i,j) for j in range(self.R)] for i in range(self.Nout)],[]) + ['kappa_%i'%i for i in range(self.Nout)] + return sum([['W%i_%i'%(i,j) for j in range(self.W_columns)] for i in range(self.num_outputs)],[]) + ['kappa_%i'%i for i in range(self.num_outputs)] def K(self,index,index2,target): index = np.asarray(index,dtype=np.int) @@ -57,26 +65,26 @@ class Coregionalise(Kernpart): if index2 is None: code=""" for(int i=0;i Date: Fri, 13 Sep 2013 12:32:11 +0100 Subject: [PATCH 38/49] redundant variable eliminated --- GPy/likelihoods/ep_mixed_noise.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPy/likelihoods/ep_mixed_noise.py b/GPy/likelihoods/ep_mixed_noise.py index a00b0643..ffc8cb51 100644 --- a/GPy/likelihoods/ep_mixed_noise.py +++ b/GPy/likelihoods/ep_mixed_noise.py @@ -23,9 +23,9 @@ class EP_Mixed_Noise(likelihood): assert len(data_list) == len(noise_model_list) self.noise_model_list = noise_model_list n_list = [data.size for data in data_list] - n_models = len(data_list) + self.n_models = len(data_list) self.n_params = [noise_model._get_params().size for noise_model in noise_model_list] - self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(n_models),n_list)]) + self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(self.n_models),n_list)]) self.epsilon = epsilon self.eta, self.delta = power_ep self.data = np.vstack(data_list) From 86b5b3aa05a919998532b640321af5a74a8e79da Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 13 Sep 2013 12:33:16 +0100 Subject: [PATCH 39/49] works well now --- GPy/likelihoods/gaussian_mixed_noise.py | 55 ++++++++++++++----------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/GPy/likelihoods/gaussian_mixed_noise.py b/GPy/likelihoods/gaussian_mixed_noise.py index cf5e2ca8..e7df2584 100644 --- a/GPy/likelihoods/gaussian_mixed_noise.py +++ b/GPy/likelihoods/gaussian_mixed_noise.py @@ -10,33 +10,39 @@ from . import Gaussian class Gaussian_Mixed_Noise(likelihood): + """ + Gaussian Likelihood for multiple outputs + + This is a wrapper around likelihood.Gaussian class + + :param data_list: data observations + :type data_list: list of numpy arrays (num_data_output_i x 1), one array per output + :param noise_params: noise parameters of each output + :type noise_params: list of floats, one per output + :param normalize: whether to normalize the data before computing (predictions will be in original scales) + :type normalize: False|True + """ def __init__(self, data_list, noise_params=None, normalize=True): + self.Nparams = len(data_list) + self.n_list = [data.size for data in data_list] + self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(self.Nparams),self.n_list)]) if noise_params is None: - noise_params = [1.] * len(data_list) - - assert len(data_list) == len(noise_params) + noise_params = [1.] * self.Nparams + else: + assert self.Nparams == len(noise_params), 'Number of noise parameters does not match the number of noise models.' self.noise_model_list = [Gaussian(Y,variance=v,normalize = normalize) for Y,v in zip(data_list,noise_params)] - - - self.n_list = [data.size for data in data_list] - n_models = len(data_list) self.n_params = [noise_model._get_params().size for noise_model in self.noise_model_list] - - self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(n_models),self.n_list)]) - - self.data = np.vstack(data_list) self.N, self.output_dim = self.data.shape self._offset = np.zeros((1, self.output_dim)) self._scale = np.ones((1, self.output_dim)) - self.is_heteroscedastic = True #TODO check how to deal with this + self.is_heteroscedastic = True self.Z = 0. # a correction factor which accounts for the approximation made self.set_data(data_list) - #self._variance = np.asarray(variance) + 1. self._set_params(np.asarray(noise_params)) def set_data(self, data_list): @@ -45,9 +51,10 @@ class Gaussian_Mixed_Noise(likelihood): assert D == self.output_dim self.Y = (self.data - self._offset) / self._scale if D > self.N: - self.YYT = np.dot(self.Y, self.Y.T) - self.trYYT = np.trace(self.YYT) - self.YYT_factor = jitchol(self.YYT) + raise NotImplementedError + #self.YYT = np.dot(self.Y, self.Y.T) + #self.trYYT = np.trace(self.YYT) + #self.YYT_factor = jitchol(self.YYT) else: self.YYT = None self.trYYT = np.sum(np.square(self.Y)) @@ -82,18 +89,18 @@ class Gaussian_Mixed_Noise(likelihood): def _set_params(self,p): cs_params = np.cumsum([0]+self.n_params) + for i in range(len(self.n_params)): self.noise_model_list[i]._set_params(p[cs_params[i]:cs_params[i+1]]) self.precision = np.hstack([np.repeat(noise_model.precision,n) for noise_model,n in zip(self.noise_model_list,self.n_list)])[:,None] - self.V = (self.precision) * self.Y + + self.V = self.precision * self.Y self.VVT_factor = self.precision * self.YYT_factor self.covariance_matrix = np.eye(self.N) * 1./self.precision - #self._variance = x def _gradients(self,partial): - #NOTE this is not tested - return np.hstack([noise_model._gradients(partial) for noise_model in self.noise_model_list]) - - - - + gradients = [] + aux = np.cumsum([0]+self.n_list) + for ai,af,noise_model in zip(aux[:-1],aux[1:],self.noise_model_list): + gradients += [noise_model._gradients(partial[ai:af])] + return np.hstack(gradients) From f0569d6d88be82a97683103b8b1664efb8e09892 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 13 Sep 2013 12:35:12 +0100 Subject: [PATCH 40/49] docstrings in new functions --- GPy/core/gp.py | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 4edde5cd..b51f9ab4 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -20,9 +20,6 @@ class GP(GPBase): :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) :type normalize_X: False|True :rtype: model object - :param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1 - :param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.] - :type powerep: list .. Note:: Multiple independent outputs are allowed using columns of Y @@ -134,7 +131,7 @@ class GP(GPBase): :type Xnew: np.ndarray, Nnew x self.input_dim :param which_parts: specifies which outputs kernel(s) to use in prediction :type which_parts: ('all', list of bools) - :param full_cov: whether to return the folll covariance matrix, or just the diagonal + :param full_cov: whether to return the full covariance matrix, or just the diagonal :type full_cov: bool :rtype: posterior mean, a Numpy array, Nnew x self.input_dim :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise @@ -155,23 +152,22 @@ class GP(GPBase): def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False): """ - Predict the function(s) at the new point(s) Xnew. + For a specific output, predict the function at the new point(s) Xnew. Arguments --------- :param Xnew: The points at which to make a prediction :type Xnew: np.ndarray, Nnew x self.input_dim + :param output: output to predict + :type output: integer in {0,..., num_outputs-1} :param which_parts: specifies which outputs kernel(s) to use in prediction :type which_parts: ('all', list of bools) - :param full_cov: whether to return the folll covariance matrix, or just the diagonal + :param full_cov: whether to return the full covariance matrix, or just the diagonal :type full_cov: bool :rtype: posterior mean, a Numpy array, Nnew x self.input_dim :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise :rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim - - If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew. - This is to allow for different normalizations of the output dimensions. - + .. Note:: For multiple output models only """ assert hasattr(self,'multioutput') index = np.ones_like(Xnew)*output @@ -182,23 +178,32 @@ class GP(GPBase): mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts) # now push through likelihood - if isinstance(self.likelihood,EP_Mixed_Noise): - mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output) - else: - mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output) + mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output) return mean, var, _025pm, _975pm def _raw_predict_single_output(self, _Xnew, output=0, which_parts='all', full_cov=False,stop=False): """ - Internal helper function for making predictions, does not account - for normalization or likelihood + Internal helper function for making predictions for a specific output, + does not account for normalization or likelihood + --------- + + :param Xnew: The points at which to make a prediction + :type Xnew: np.ndarray, Nnew x self.input_dim + :param output: output to predict + :type output: integer in {0,..., num_outputs-1} + :param which_parts: specifies which outputs kernel(s) to use in prediction + :type which_parts: ('all', list of bools) + :param full_cov: whether to return the full covariance matrix, or just the diagonal + + .. Note:: For multiple output models only """ assert hasattr(self,'multioutput') + + # creates an index column and appends it to _Xnew index = np.ones_like(_Xnew)*output _Xnew = np.hstack((_Xnew,index)) Kx = self.kern.K(_Xnew,self.X,which_parts=which_parts).T - #KiKx = np.dot(self.Ki, Kx) KiKx, _ = dpotrs(self.L, np.asfortranarray(Kx), lower=1) mu = np.dot(KiKx.T, self.likelihood.Y) if full_cov: From ea7c18fcccfa0e790848b8dd799c49be6bae9131 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 13 Sep 2013 12:36:32 +0100 Subject: [PATCH 41/49] Changes in plot functions, to allow 1D multiple outputs visualization --- GPy/core/gp_base.py | 103 ++++++++++++++++++++++++-------------------- 1 file changed, 57 insertions(+), 46 deletions(-) diff --git a/GPy/core/gp_base.py b/GPy/core/gp_base.py index fe297d6b..b57fe65a 100644 --- a/GPy/core/gp_base.py +++ b/GPy/core/gp_base.py @@ -46,6 +46,8 @@ class GPBase(Model): :param which_parts: which of the kernel functions to plot (additively) :type which_parts: 'all', or list of bools :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D + :param output: which output to plot (for multiple output models only) + :type output: integer (first output is 0) Plot the posterior of the GP. - In one dimension, the function is plotted with a shaded region identifying two standard deviations. @@ -92,13 +94,14 @@ class GPBase(Model): elif self.X.shape[1] == 2 and hasattr(self,'multioutput'): + output -= 1 + assert self.num_outputs >= output, 'The model has only %s outputs.' %self.num_outputs Xu = self.X[self.X[:,-1]==output ,0:1] Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits) if samples == 0: m, v = self._raw_predict_single_output(Xnew, output=output, which_parts=which_parts) gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax) - #ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5) ax.plot(Xu[which_data], self.likelihood.Y[self.likelihood.index==output][:,None], 'kx', mew=1.5) else: m, v = self._raw_predict_single_output(Xnew, output=output, which_parts=which_parts, full_cov=True) @@ -117,6 +120,11 @@ class GPBase(Model): Zu = self.Z[self.Z[:,-1]==output ,0:1] #?? ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12) + elif self.X.shape[1] == 3 and hasattr(self,'multioutput'): + raise NotImplementedError, "Plots not implemented for multioutput models with 2D inputs...yet" + output -= 1 + assert self.num_outputs >= output, 'The model has only %s outputs.' %self.num_outputs + else: raise NotImplementedError, "Cannot define a frame with more than two input dimensions" @@ -126,6 +134,8 @@ class GPBase(Model): :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure + :param output: which output to plot (for multiple output models only) + :type output: integer (first output is 0) """ # TODO include samples if which_data == 'all': @@ -135,60 +145,61 @@ class GPBase(Model): fig = pb.figure(num=fignum) ax = fig.add_subplot(111) - if self.X.shape[1] == 1 and not hasattr(self,'multioutput'): - resolution = resolution or 200 + if not hasattr(self,'multioutput'): - Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now + if self.X.shape[1] == 1: + resolution = resolution or 200 - Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits,resolution=resolution) - m, _, lower, upper = self.predict(Xnew, which_parts=which_parts) - for d in range(m.shape[1]): - gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax) - ax.plot(Xu[which_data], self.likelihood.data[which_data, d], 'kx', mew=1.5) - ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper)) - ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) - ax.set_xlim(xmin, xmax) - ax.set_ylim(ymin, ymax) + Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now - elif self.X.shape[1] == 2 and not hasattr(self,'multioutput'): - resolution = resolution or 50 - Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution) - x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution) - m, _, lower, upper = self.predict(Xnew, which_parts=which_parts) - m = m.reshape(resolution, resolution).T - ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable - Yf = self.likelihood.Y.flatten() - ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) # @UndefinedVariable - ax.set_xlim(xmin[0], xmax[0]) - ax.set_ylim(xmin[1], xmax[1]) + Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits,resolution=resolution) + m, _, lower, upper = self.predict(Xnew, which_parts=which_parts) + for d in range(m.shape[1]): + gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax) + ax.plot(Xu[which_data], self.likelihood.data[which_data, d], 'kx', mew=1.5) + ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper)) + ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) + ax.set_xlim(xmin, xmax) + ax.set_ylim(ymin, ymax) - elif self.X.shape[1] == 2 and hasattr(self,'multioutput'): - Xu = self.X[self.X[:,-1]==output,:] #keep the output of interest - Xu = self.X * self._Xscale + self._Xoffset - Xu = self.X[self.X[:,-1]==output ,0:1] #get rid of the index column - - Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits) - - m, _, lower, upper = self.predict_single_output(Xnew, which_parts=which_parts,output=output) - #if not isinstance(self.likelihood,EP_Mixed_Noise): - # m, _, lower, upper = self.predict(np.hstack([Xnew,np.repeat(output,Xnew.size)[:,None]]), which_parts=which_parts) - #else: - # m, _, lower, upper = self.predict_single_output(Xnew, which_parts=which_parts,output=output) - - for d in range(m.shape[1]): - gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax) - #ax.plot(Xu[which_data], self.likelihood.data[self.likelihood.index==output][:,None], 'kx', mew=1.5) - ax.plot(Xu[which_data], self.likelihood.noise_model_list[output].data, 'kx', mew=1.5) - ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper)) - ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) - ax.set_xlim(xmin, xmax) - ax.set_ylim(ymin, ymax) + elif self.X.shape[1] == 2: + resolution = resolution or 50 + Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution) + x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution) + m, _, lower, upper = self.predict(Xnew, which_parts=which_parts) + m = m.reshape(resolution, resolution).T + ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable + Yf = self.likelihood.Y.flatten() + ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) # @UndefinedVariable + ax.set_xlim(xmin[0], xmax[0]) + ax.set_ylim(xmin[1], xmax[1]) + else: + raise NotImplementedError, "Cannot define a frame with more than two input dimensions" else: - raise NotImplementedError, "Cannot define a frame with more than two input dimensions" + assert self.num_outputs > output, 'The model has only %s outputs.' %self.num_outputs + if self.X.shape[1] == 2: + resolution = resolution or 200 + Xu = self.X[self.X[:,-1]==output,:] #keep the output of interest + Xu = self.X * self._Xscale + self._Xoffset + Xu = self.X[self.X[:,-1]==output ,0:1] #get rid of the index column + Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits) + m, _, lower, upper = self.predict_single_output(Xnew, which_parts=which_parts,output=output) + for d in range(m.shape[1]): + gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax) + ax.plot(Xu[which_data], self.likelihood.noise_model_list[output].data, 'kx', mew=1.5) + ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper)) + ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) + ax.set_xlim(xmin, xmax) + ax.set_ylim(ymin, ymax) + elif self.X.shape[1] == 3: + raise NotImplementedError, "Plots not yet implemented for multioutput models with 2D inputs" + resolution = resolution or 50 + else: + raise NotImplementedError, "Cannot define a frame with more than two input dimensions" From 62daa978d37f693cde4e126dbd66d15c96d72618 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 13 Sep 2013 12:45:58 +0100 Subject: [PATCH 42/49] Likelihood gradients for heteroscedastic noise --- GPy/core/sparse_gp.py | 61 +++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 22 deletions(-) diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 6d9761c4..e50e818d 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -85,7 +85,6 @@ class SparseGP(GPBase): tmp, _ = dtrtrs(self.Lm, np.asfortranarray(tmp.T), lower=1) self.A = tdot(tmp) - # factor B self.B = np.eye(self.num_inducing) + self.A self.LB = jitchol(self.B) @@ -114,6 +113,7 @@ class SparseGP(GPBase): dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.output_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi) if self.likelihood.is_heteroscedastic: + if self.has_uncertain_inputs: self.dL_dpsi2 = self.likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :] else: @@ -135,9 +135,23 @@ class SparseGP(GPBase): # save computation here. self.partial_for_likelihood = None elif self.likelihood.is_heteroscedastic: - raise NotImplementedError, "heteroscedatic derivates not implemented" + + if self.has_uncertain_inputs: + raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented" + + else: + Lmi_psi1, nil = dtrtrs(self.Lm, np.asfortranarray(self.psi1.T), lower=1, trans=0) + _LBi_Lmi_psi1, _ = dtrtrs(self.LB, np.asfortranarray(Lmi_psi1), lower=1, trans=0) + _Bi_Lmi_psi1, _ = dtrtrs(self.LB.T, np.asfortranarray(_LBi_Lmi_psi1), lower=1, trans=0) + + self.partial_for_likelihood = -0.5 * self.likelihood.precision + 0.5 * self.likelihood.V**2 + self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0 - np.sum(Lmi_psi1**2,0))[:,None] * self.likelihood.precision**2 + self.partial_for_likelihood += 0.5*np.sum(_Bi_Lmi_psi1*Lmi_psi1,0)[:,None]*self.likelihood.precision**2 #NOTE this term has numerical issues + self.partial_for_likelihood += -np.dot(self._LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * self.likelihood.Y * self.likelihood.precision**2 + self.partial_for_likelihood += 0.5*np.dot(self._LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * self.likelihood.precision**2 + else: - # likelihood is not heterscedatic + # likelihood is not heteroscedatic self.partial_for_likelihood = -0.5 * self.num_data * self.output_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2 self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision) self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - self.data_fit) @@ -269,7 +283,7 @@ class SparseGP(GPBase): :type X_variance_new: np.ndarray, Nnew x self.input_dim :param which_parts: specifies which outputs kernel(s) to use in prediction :type which_parts: ('all', list of bools) - :param full_cov: whether to return the folll covariance matrix, or just the diagonal + :param full_cov: whether to return the full covariance matrix, or just the diagonal :type full_cov: bool :rtype: posterior mean, a Numpy array, Nnew x self.input_dim :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise @@ -335,28 +349,26 @@ class SparseGP(GPBase): else: raise NotImplementedError, "Cannot define a frame with more than two input dimensions" - - def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False): """ - Predict the function(s) at the new point(s) Xnew. + For a specific output, predict the function at the new point(s) Xnew. Arguments --------- :param Xnew: The points at which to make a prediction :type Xnew: np.ndarray, Nnew x self.input_dim + :param output: output to predict + :type output: integer in {0,..., num_outputs-1} :param which_parts: specifies which outputs kernel(s) to use in prediction :type which_parts: ('all', list of bools) - :param full_cov: whether to return the folll covariance matrix, or just the diagonal + :param full_cov: whether to return the full covariance matrix, or just the diagonal :type full_cov: bool :rtype: posterior mean, a Numpy array, Nnew x self.input_dim :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise :rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim - - If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew. - This is to allow for different normalizations of the output dimensions. - + .. Note:: For multiple output models only """ + assert hasattr(self,'multioutput') index = np.ones_like(Xnew)*output Xnew = np.hstack((Xnew,index)) @@ -366,18 +378,24 @@ class SparseGP(GPBase): mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts) # now push through likelihood - if isinstance(self.likelihood,EP_Mixed_Noise): - mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output) - else: - mean, var, _025pm, _975pm = self.likelihood_list[output].predictive_values(mu, var, full_cov) + mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output) return mean, var, _025pm, _975pm - - def _raw_predict_single_output(self, _Xnew, output=0, X_variance_new=None, which_parts='all', full_cov=False,stop=False): """ - Internal helper function for making predictions, does not account - for normalization or likelihood + Internal helper function for making predictions for a specific output, + does not account for normalization or likelihood + --------- + + :param Xnew: The points at which to make a prediction + :type Xnew: np.ndarray, Nnew x self.input_dim + :param output: output to predict + :type output: integer in {0,..., num_outputs-1} + :param which_parts: specifies which outputs kernel(s) to use in prediction + :type which_parts: ('all', list of bools) + :param full_cov: whether to return the full covariance matrix, or just the diagonal + + .. Note:: For multiple output models only """ Bi, _ = dpotri(self.LB, lower=0) # WTH? this lower switch should be 1, but that doesn't work! symmetrify(Bi) @@ -403,8 +421,7 @@ class SparseGP(GPBase): Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts) var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0) else: - # assert which_p.Tarts=='all', "swithching out parts of variational kernels is not implemented" - Kx = self.kern.psi1(self.Z, _Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts + Kx = self.kern.psi1(self.Z, _Xnew, X_variance_new) mu = np.dot(Kx, self.Cpsi1V) if full_cov: raise NotImplementedError, "TODO" From 277601d55682d1c5d9a41a423afa93015f1d6ddc Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 13 Sep 2013 12:50:41 +0100 Subject: [PATCH 43/49] Multiple outputs model with Gaussian noise --- GPy/models/gp_multioutput_regression.py | 59 +++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 GPy/models/gp_multioutput_regression.py diff --git a/GPy/models/gp_multioutput_regression.py b/GPy/models/gp_multioutput_regression.py new file mode 100644 index 00000000..c0a5b557 --- /dev/null +++ b/GPy/models/gp_multioutput_regression.py @@ -0,0 +1,59 @@ +# Copyright (c) 2013, Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from ..core import GP +from .. import likelihoods +from .. import kern +from ..util import multioutput + +class GPMultioutputRegression(GP): + """ + Multiple output Gaussian process with Gaussian noise + + This is a wrapper around the models.GP class, with a set of sensible defaults + + :param X_list: input observations + :type X_list: list of numpy arrays (num_data_output_i x input_dim), one array per output + :param Y_list: observed values + :type Y_list: list of numpy arrays (num_data_output_i x 1), one array per output + :param kernel_list: GPy kernels, defaults to rbf + :type kernel_list: list of GPy kernels + :param noise_variance_list: noise parameters per output, defaults to 1.0 for every output + :type noise_variance_list: list of floats + :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) + :type normalize_X: False|True + :param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales) + :type normalize_Y: False|True + :param W_columns: number tuples of the corregionalization parameters 'coregion_W' (see coregionalize kernel documentation) + :type W_columns: integer + """ + + def __init__(self,X_list,Y_list,kernel_list=None,noise_variance_list=None,normalize_X=False,normalize_Y=False,W_columns=1): + + self.num_outputs = len(Y_list) + assert len(X_list) == self.num_outputs, 'Number of outputs do not match length of inputs list.' + + #Inputs indexing + i = 0 + index = [] + for x,y in zip(X_list,Y_list): + assert x.shape[0] == y.shape[0] + index.append(np.repeat(i,x.size)[:,None]) + i += 1 + index = np.vstack(index) + X = np.hstack([np.vstack(X_list),index]) + original_dim = X.shape[1] - 1 + + #Mixed noise likelihood definition + likelihood = likelihoods.Gaussian_Mixed_Noise(Y_list,noise_params=noise_variance_list,normalize=normalize_Y) + + #Coregionalization kernel definition + if kernel_list is None: + kernel_list = [[kern.rbf(original_dim)],[]] + mkernel = multioutput.build_lcm(input_dim=original_dim, num_outputs=self.num_outputs, CK = kernel_list[0], NC = kernel_list[1], W_columns=W_columns) + + self.multioutput = True + GP.__init__(self, X, likelihood, mkernel, normalize_X=normalize_X) + self.ensure_default_constraints() From 34bac4fa40c47ba7391a6de9d01b752971ae908b Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 13 Sep 2013 12:51:05 +0100 Subject: [PATCH 44/49] Sparse multiple outputs model with Gaussian noise --- .../sparse_gp_multioutput_regression.py | 80 +++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 GPy/models/sparse_gp_multioutput_regression.py diff --git a/GPy/models/sparse_gp_multioutput_regression.py b/GPy/models/sparse_gp_multioutput_regression.py new file mode 100644 index 00000000..62712e15 --- /dev/null +++ b/GPy/models/sparse_gp_multioutput_regression.py @@ -0,0 +1,80 @@ +# Copyright (c) 2013, Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from ..core import SparseGP +from .. import likelihoods +from .. import kern +from ..util import multioutput + +class SparseGPMultioutputRegression(SparseGP): + """ + Sparse multiple output Gaussian process with Gaussian noise + + This is a wrapper around the models.SparseGP class, with a set of sensible defaults + + :param X_list: input observations + :type X_list: list of numpy arrays (num_data_output_i x input_dim), one array per output + :param Y_list: observed values + :type Y_list: list of numpy arrays (num_data_output_i x 1), one array per output + :param kernel_list: GPy kernels, defaults to rbf + :type kernel_list: list of GPy kernels + :param noise_variance_list: noise parameters per output, defaults to 1.0 for every output + :type noise_variance_list: list of floats + :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) + :type normalize_X: False|True + :param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales) + :type normalize_Y: False|True + :param Z_list: inducing inputs (optional) + :type Z_list: list of numpy arrays (num_inducing_output_i x input_dim), one array per output | empty list + :param num_inducing: number of inducing inputs per output, defaults to 10 (ignored if Z_list is not empty) + :type num_inducing: integer + :param W_columns: number tuples of the corregionalization parameters 'coregion_W' (see coregionalize kernel documentation) + :type W_columns: integer + """ + #NOTE not tested with uncertain inputs + def __init__(self,X_list,Y_list,kernel_list=None,noise_variance_list=None,normalize_X=False,normalize_Y=False,Z_list=[],num_inducing=10,W_columns=1): + + self.num_outputs = len(Y_list) + assert len(X_list) == self.num_outputs, 'Number of outputs do not match length of inputs list.' + + #Inducing inputs list + if len(Z_list): + assert len(Z_list) == self.num_outputs, 'Number of outputs do not match length of inducing inputs list.' + else: + if isinstance(num_inducing,np.int): + num_inducing = [num_inducing] * self.num_outputs + num_inducing = np.asarray(num_inducing) + assert num_inducing.size == self.num_outputs, 'Number of outputs do not match length of inducing inputs list.' + for ni,X in zip(num_inducing,X_list): + i = np.random.permutation(X.shape[0])[:ni] + Z_list.append(X[i].copy()) + + #Inputs and inducing inputs indexing + i = 0 + index = [] + index_z = [] + for x,y,z in zip(X_list,Y_list,Z_list): + assert x.shape[0] == y.shape[0] + index.append(np.repeat(i,x.size)[:,None]) + index_z.append(np.repeat(i,z.size)[:,None]) + i += 1 + index = np.vstack(index) + index_z = np.vstack(index_z) + X = np.hstack([np.vstack(X_list),index]) + Z = np.hstack([np.vstack(Z_list),index_z]) + original_dim = X.shape[1] - 1 + + #Mixed noise likelihood definition + likelihood = likelihoods.Gaussian_Mixed_Noise(Y_list,noise_params=noise_variance_list,normalize=normalize_Y) + + #Coregionalization kernel definition + if kernel_list is None: + kernel_list = [[kern.rbf(original_dim)],[]] + mkernel = multioutput.build_lcm(input_dim=original_dim, num_outputs=self.num_outputs, CK = kernel_list[0], NC = kernel_list[1], W_columns=W_columns) + + self.multioutput = True + SparseGP.__init__(self, X, likelihood, mkernel, Z=Z, normalize_X=normalize_X) + self.constrain_fixed('.*iip_\d+_1') + self.ensure_default_constraints() From 329da293f844c826de51963dfc4be28428e251bf Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 13 Sep 2013 13:00:54 +0100 Subject: [PATCH 45/49] duplicated line erased --- GPy/core/fitc.py | 1 - 1 file changed, 1 deletion(-) diff --git a/GPy/core/fitc.py b/GPy/core/fitc.py index ef171459..326cadab 100644 --- a/GPy/core/fitc.py +++ b/GPy/core/fitc.py @@ -140,7 +140,6 @@ class FITC(SparseGP): dA_dnoise = 0.5 * self.input_dim * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.input_dim * np.sum(self.likelihood.Y**2 * dbstar_dnoise) dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T) - dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T) dD_dnoise_1 = mdot(self.V_star*LBiLmipsi1.T,LBiLmipsi1*dbstar_dnoise.T*self.likelihood.Y.T) alpha = mdot(LBiLmipsi1,self.V_star) From c01ca1d0076b263a2ff00e3ecf388a518a4fe30a Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 13 Sep 2013 13:03:12 +0100 Subject: [PATCH 46/49] Doesn't matter I'll erase this file --- GPy/models/gp_multioutput.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/GPy/models/gp_multioutput.py b/GPy/models/gp_multioutput.py index bc696cbd..3551a866 100644 --- a/GPy/models/gp_multioutput.py +++ b/GPy/models/gp_multioutput.py @@ -69,8 +69,9 @@ class GPMultioutput(GP): if kernel_list is None: kernel_list = [[kern.rbf(original_dim)],[kern.white(original_dim+1)]] - mkernel = multioutput.build_cor_kernel(input_dim=original_dim, Nout=len(X_list), CK = kernel_list[0], NC = kernel_list[1], W=1) + mkernel = multioutput.build_cor_kernel(input_dim=original_dim, Nout=len(X_list), CK = kernel_list[0], NC = kernel_list[1], W=W) self.multioutput = True + self.num_outputs = len(Y_list) GP.__init__(self, X, likelihood, mkernel, normalize_X=normalize_X) self.ensure_default_constraints() From f4794fb79d7d0b3b3c65262709f8cd9605045669 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 13 Sep 2013 13:06:01 +0100 Subject: [PATCH 47/49] Moved to (sparse_)gp_multioutput_regression --- GPy/models/gp_multioutput.py | 77 ------------------------------------ 1 file changed, 77 deletions(-) delete mode 100644 GPy/models/gp_multioutput.py diff --git a/GPy/models/gp_multioutput.py b/GPy/models/gp_multioutput.py deleted file mode 100644 index 3551a866..00000000 --- a/GPy/models/gp_multioutput.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (c) 2013, Ricardo Andrade -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -import numpy as np -from ..core import GP -from .. import likelihoods -from .. import kern -from ..util import multioutput - - -import pylab as pb - -class GPMultioutput(GP): - """ - Multiple output Gaussian process - - This is a thin wrapper around the models.GP class, with a set of sensible defaults - - :param X_list: input observations - :param Y_list: observed values - :param L_list: a GPy likelihood, defaults to Binomial with probit link_function - :param kernel_list: a GPy kernel, defaults to rbf - :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) - :type normalize_X: False|True - :param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales) - :type normalize_Y: False|True - - .. Note:: Multiple independent outputs are allowed using columns of Y - - """ - - def __init__(self,X_list,Y_list,kernel_list=None,normalize_X=False,normalize_Y=False,W=1,mixed_noise_list=[]): #TODO W - #TODO: split into 2 models gp_mixed_noise and ep_mixed_noise - - assert len(X_list) == len(Y_list) - index = [] - i = 0 - for x,y in zip(X_list,Y_list): - assert x.shape[0] == y.shape[0] - index.append(np.repeat(i,y.size)[:,None]) - i += 1 - index = np.vstack(index) - - """ - - if mixed_noise_list == []: - for Y in Y_list: - self.likelihood_list.append(likelihoods.Gaussian(Y,normalize = normalize_Y)) - - Y = np.vstack([l_.Y for l_ in self.likelihood_list]) - likelihood = likelihoods.Gaussian(Y,normalize=False) - likelihood.index = index - """ - if mixed_noise_list == []: - likelihood = likelihoods.Gaussian_Mixed_Noise(Y_list,normalize=normalize_Y) - #TODO: allow passing the variance parameter into the model - else: - self.likelihood_list = [] #TODO this is not needed - assert len(Y_list) == len(mixed_noise_list) - for noise,Y in zip(mixed_noise_list,Y_list): - self.likelihood_list.append(likelihoods.EP(Y,noise)) - #TODO: allow normalization - likelihood = likelihoods.EP_Mixed_Noise(Y_list, mixed_noise_list) - - X = np.hstack([np.vstack(X_list),index]) - original_dim = X.shape[1] - 1 - - if kernel_list is None: - kernel_list = [[kern.rbf(original_dim)],[kern.white(original_dim+1)]] - - mkernel = multioutput.build_cor_kernel(input_dim=original_dim, Nout=len(X_list), CK = kernel_list[0], NC = kernel_list[1], W=W) - - self.multioutput = True - self.num_outputs = len(Y_list) - GP.__init__(self, X, likelihood, mkernel, normalize_X=normalize_X) - self.ensure_default_constraints() From d653921bf35d1c502c847bd5696479ef719b75ea Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 13 Sep 2013 13:09:55 +0100 Subject: [PATCH 48/49] Useless files deleted --- GPy/models/__init__.py | 2 - GPy/models/sparse_gp_multioutput.py | 97 ----------------------------- 2 files changed, 99 deletions(-) delete mode 100644 GPy/models/sparse_gp_multioutput.py diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py index 5c73c4bf..443bef10 100644 --- a/GPy/models/__init__.py +++ b/GPy/models/__init__.py @@ -11,7 +11,5 @@ from gplvm import GPLVM from warped_gp import WarpedGP from bayesian_gplvm import BayesianGPLVM from mrd import MRD -from gp_multioutput import GPMultioutput from gp_multioutput_regression import GPMultioutputRegression from sparse_gp_multioutput_regression import SparseGPMultioutputRegression -from sparse_gp_multioutput import SparseGPMultioutput diff --git a/GPy/models/sparse_gp_multioutput.py b/GPy/models/sparse_gp_multioutput.py deleted file mode 100644 index ae99421f..00000000 --- a/GPy/models/sparse_gp_multioutput.py +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright (c) 2013, Ricardo Andrade -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -import numpy as np -from ..core import SparseGP -from .. import likelihoods -from .. import kern -from ..util import multioutput - - -import pylab as pb - -class SparseGPMultioutput(SparseGP): - """ - Multiple output Gaussian process - - This is a thin wrapper around the models.GP class, with a set of sensible defaults - - :param X_list: input observations - :param Y_list: observed values - :param L_list: a GPy likelihood, defaults to Binomial with probit link_function - :param kernel_list: a GPy kernel, defaults to rbf - :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) - :type normalize_X: False|True - :param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales) - :type normalize_Y: False|True - - .. Note:: Multiple independent outputs are allowed using columns of Y - - """ - - def __init__(self,X_list,Y_list,kernel_list=None,normalize_X=False,normalize_Y=False,Z_list=None,num_inducing_list=10,X_variance=None,W=1,mixed_noise_list=[]): #TODO W - - assert len(X_list) == len(Y_list) - index = [] - for x,y,j in zip(X_list,Y_list,range(len(X_list))): - assert x.shape[0] == y.shape[0] - index.append(np.repeat(j,y.size)[:,None]) - index = np.vstack(index) - - - self.likelihood_list = [] - if mixed_noise_list == []: - for Y in Y_list: - self.likelihood_list.append(likelihoods.Gaussian(Y,normalize = normalize_Y)) - - Y = np.vstack([l_.Y for l_ in self.likelihood_list]) - likelihood = likelihoods.Gaussian(Y,normalize=False) - likelihood.index = index - - else: - assert len(Y_list) == len(mixed_noise_list) - for noise,Y in zip(mixed_noise_list,Y_list): - self.likelihood_list.append(likelihoods.EP(Y,noise)) - likelihood = likelihoods.EP_Mixed_Noise(Y_list, mixed_noise_list) - - """ - if noise_list == []: - self.likelihood_list = [] - for Y in Y_list: - self.likelihood_list.append(likelihoods.Gaussian(Y,normalize = normalize_Y)) - - Y = np.vstack([l_.Y for l_ in self.likelihood_list]) - likelihood = likelihoods.Gaussian(Y,normalize=False) - likelihood.index = index - """ - X = np.hstack([np.vstack(X_list),index]) - original_dim = X.shape[1] - 1 - - if kernel_list is None: - kernel_list = [[kern.rbf(original_dim)],[kern.white(original_dim+1)]] - - mkernel = multioutput.build_cor_kernel(input_dim=original_dim, Nout=len(X_list), CK = kernel_list[0], NC = kernel_list[1], W=1) - - z_index = [] - if Z_list is None: - if isinstance(num_inducing_list,int): - num_inducing_list = [num_inducing_list for Xj in X_list] - Z_list = [] - for Xj,nj,j in zip(X_list,num_inducing_list,range(len(X_list))): - i = np.random.permutation(Xj.shape[0])[:nj] - z_index.append(np.repeat(j,nj)[:,None]) - Z_list.append(Xj[i].copy()) - else: - assert len(Z_list) == len(X_list) - for Zj,Xj,j in zip(Z_list,X_list,range(len(Z_list))): - assert Zj.shape[1] == Xj.shape[1] - z_index.append(np.repeat(j,Zj.shape[0])[:,None]) - - Z = np.hstack([np.vstack(Z_list),np.vstack(z_index)]) - - - self.multioutput = True - SparseGP.__init__(self, X, likelihood, mkernel, Z=Z, normalize_X=normalize_X, X_variance=X_variance) - self.constrain_fixed('.*iip_\d+_1') - self.ensure_default_constraints() From 1bc93747178b0bab1b7177568388ebd4207647e0 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 13 Sep 2013 18:09:59 +0100 Subject: [PATCH 49/49] linK2_functions2 merged --- GPy/core/gp_base.py | 44 ++++++------- GPy/core/sparse_gp.py | 8 +-- GPy/examples/regression.py | 66 +++++++++---------- GPy/kern/constructors.py | 30 ++++++++- GPy/kern/parts/coregionalise.py | 10 +-- GPy/models/gp_multioutput_regression.py | 6 +- .../sparse_gp_multioutput_regression.py | 4 +- GPy/testing/kernel_tests.py | 3 - GPy/testing/unit_tests.py | 15 ++++- GPy/util/__init__.py | 2 +- 10 files changed, 113 insertions(+), 75 deletions(-) diff --git a/GPy/core/gp_base.py b/GPy/core/gp_base.py index 29e845ad..4513ddac 100644 --- a/GPy/core/gp_base.py +++ b/GPy/core/gp_base.py @@ -58,30 +58,30 @@ class GPBase(Model): Model.setstate(self, state) def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None,output=None): - """ - Plot the GP's view of the world, where the data is normalized and the - - In one dimension, the function is plotted with a shaded region identifying two standard deviations. - - In two dimsensions, a contour-plot shows the mean predicted function - - Not implemented in higher dimensions + """ + Plot the GP's view of the world, where the data is normalized and the + - In one dimension, the function is plotted with a shaded region identifying two standard deviations. + - In two dimsensions, a contour-plot shows the mean predicted function + - Not implemented in higher dimensions - :param samples: the number of a posteriori samples to plot - :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits - :param which_data: which if the training data to plot (default all) - :type which_data: 'all' or a slice object to slice self.X, self.Y - :param which_parts: which of the kernel functions to plot (additively) - :type which_parts: 'all', or list of bools - :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D - :type resolution: int - :param full_cov: - :type full_cov: bool - :param fignum: figure to plot on. - :type fignum: figure number - :param ax: axes to plot on. - :type ax: axes handle + :param samples: the number of a posteriori samples to plot + :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits + :param which_data: which if the training data to plot (default all) + :type which_data: 'all' or a slice object to slice self.X, self.Y + :param which_parts: which of the kernel functions to plot (additively) + :type which_parts: 'all', or list of bools + :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D + :type resolution: int + :param full_cov: + :type full_cov: bool + :param fignum: figure to plot on. + :type fignum: figure number + :param ax: axes to plot on. + :type ax: axes handle - :param output: which output to plot (for multiple output models only) - :type output: integer (first output is 0) - """ + :param output: which output to plot (for multiple output models only) + :type output: integer (first output is 0) + """ if which_data == 'all': which_data = slice(None) diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 01ae1806..48a7d523 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -165,7 +165,7 @@ class SparseGP(GPBase): raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented" else: - Lmi_psi1, nil = dtrtrs(self.Lm, np.asfortranarray(self.psi1.T), lower=1, trans=0) + Lmi_psi1, nil = dtrtrs(self._Lm, np.asfortranarray(self.psi1.T), lower=1, trans=0) _LBi_Lmi_psi1, _ = dtrtrs(self.LB, np.asfortranarray(Lmi_psi1), lower=1, trans=0) _Bi_Lmi_psi1, _ = dtrtrs(self.LB.T, np.asfortranarray(_LBi_Lmi_psi1), lower=1, trans=0) @@ -427,13 +427,13 @@ class SparseGP(GPBase): """ Bi, _ = dpotri(self.LB, lower=0) # WTH? this lower switch should be 1, but that doesn't work! symmetrify(Bi) - Kmmi_LmiBLmi = backsub_both_sides(self.Lm, np.eye(self.num_inducing) - Bi) + Kmmi_LmiBLmi = backsub_both_sides(self._Lm, np.eye(self.num_inducing) - Bi) if self.Cpsi1V is None: psi1V = np.dot(self.psi1.T,self.likelihood.V) - tmp, _ = dtrtrs(self.Lm, np.asfortranarray(psi1V), lower=1, trans=0) + tmp, _ = dtrtrs(self._Lm, np.asfortranarray(psi1V), lower=1, trans=0) tmp, _ = dpotrs(self.LB, tmp, lower=1) - self.Cpsi1V, _ = dtrtrs(self.Lm, tmp, lower=1, trans=1) + self.Cpsi1V, _ = dtrtrs(self._Lm, tmp, lower=1, trans=1) assert hasattr(self,'multioutput') index = np.ones_like(_Xnew)*output diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py index d12643f6..df7b92b8 100644 --- a/GPy/examples/regression.py +++ b/GPy/examples/regression.py @@ -46,32 +46,23 @@ def coregionalisation_toy(max_iters=100): """ X1 = np.random.rand(50, 1) * 8 X2 = np.random.rand(30, 1) * 5 - index = np.vstack((np.zeros_like(X1), np.ones_like(X2))) - X = np.hstack((np.vstack((X1, X2)), index)) + X = np.vstack((X1, X2)) Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05 Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05 Y = np.vstack((Y1, Y2)) k1 = GPy.kern.rbf(1) - k2 = GPy.kern.coregionalise(2, 2) - k = k1**k2 #k1.prod(k2, tensor=True) - m = GPy.models.GPRegression(X, Y, kernel=k) + m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1]) m.constrain_fixed('.*rbf_var', 1.) - # m.constrain_positive('kappa') m.optimize(max_iters=max_iters) - pb.figure() - Xtest1 = np.hstack((np.linspace(0, 9, 100)[:, None], np.zeros((100, 1)))) - Xtest2 = np.hstack((np.linspace(0, 9, 100)[:, None], np.ones((100, 1)))) - mean, var, low, up = m.predict(Xtest1) - GPy.util.plot.gpplot(Xtest1[:, 0], mean, low, up) - mean, var, low, up = m.predict(Xtest2) - GPy.util.plot.gpplot(Xtest2[:, 0], mean, low, up) - pb.plot(X1[:, 0], Y1[:, 0], 'rx', mew=2) - pb.plot(X2[:, 0], Y2[:, 0], 'gx', mew=2) + fig, axes = pb.subplots(2,1) + m.plot(output=0,ax=axes[0]) + m.plot(output=1,ax=axes[1]) + axes[0].set_title('Output 0') + axes[1].set_title('Output 1') return m - def coregionalisation_sparse(max_iters=100): """ A simple demonstration of coregionalisation on two sinusoidal functions using sparse approximations. @@ -86,30 +77,39 @@ def coregionalisation_sparse(max_iters=100): num_inducing = 40 Z = np.hstack((np.random.rand(num_inducing, 1) * 8, np.random.randint(0, 2, num_inducing)[:, None])) + Z = np.hstack((np.random.rand(num_inducing, 1) * 8, np.random.randint(0, 2, num_inducing)[:, None])) k1 = GPy.kern.rbf(1) - k2 = GPy.kern.coregionalise(2, 2) - k = k1**k2 #.prod(k2, tensor=True) # + GPy.kern.white(2,0.001) - m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z) + + m = GPy.models.SparseGPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1],num_inducing=20) + #k2 = GPy.kern.coregionalise(2, 2) + #k = k1**k2 #.prod(k2, tensor=True) # + GPy.kern.white(2,0.001) + #m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z) m.constrain_fixed('.*rbf_var', 1.) - m.constrain_fixed('iip') - m.constrain_bounded('noise_variance', 1e-3, 1e-1) + + #m.constrain_fixed('iip') + #m.constrain_bounded('noise_variance', 1e-3, 1e-1) # m.optimize_restarts(5, robust=True, messages=1, max_iters=max_iters, optimizer='bfgs') m.optimize(max_iters=max_iters) + fig, axes = pb.subplots(2,1) + m.plot(output=0,ax=axes[0]) + m.plot(output=1,ax=axes[1]) + axes[0].set_title('Output 0') + axes[1].set_title('Output 1') # plotting: - pb.figure() - Xtest1 = np.hstack((np.linspace(0, 9, 100)[:, None], np.zeros((100, 1)))) - Xtest2 = np.hstack((np.linspace(0, 9, 100)[:, None], np.ones((100, 1)))) - mean, var, low, up = m.predict(Xtest1) - GPy.util.plot.gpplot(Xtest1[:, 0], mean, low, up) - mean, var, low, up = m.predict(Xtest2) - GPy.util.plot.gpplot(Xtest2[:, 0], mean, low, up) - pb.plot(X1[:, 0], Y1[:, 0], 'rx', mew=2) - pb.plot(X2[:, 0], Y2[:, 0], 'gx', mew=2) - y = pb.ylim()[0] - pb.plot(Z[:, 0][Z[:, 1] == 0], np.zeros(np.sum(Z[:, 1] == 0)) + y, 'r|', mew=2) - pb.plot(Z[:, 0][Z[:, 1] == 1], np.zeros(np.sum(Z[:, 1] == 1)) + y, 'g|', mew=2) + #pb.figure() + #Xtest1 = np.hstack((np.linspace(0, 9, 100)[:, None], np.zeros((100, 1)))) + #Xtest2 = np.hstack((np.linspace(0, 9, 100)[:, None], np.ones((100, 1)))) + #mean, var, low, up = m.predict(Xtest1) + #GPy.util.plot.gpplot(Xtest1[:, 0], mean, low, up) + #mean, var, low, up = m.predict(Xtest2) + #GPy.util.plot.gpplot(Xtest2[:, 0], mean, low, up) + #pb.plot(X1[:, 0], Y1[:, 0], 'rx', mew=2) + #pb.plot(X2[:, 0], Y2[:, 0], 'gx', mew=2) + #y = pb.ylim()[0] + #pb.plot(Z[:, 0][Z[:, 1] == 0], np.zeros(np.sum(Z[:, 1] == 0)) + y, 'r|', mew=2) + #pb.plot(Z[:, 0][Z[:, 1] == 1], np.zeros(np.sum(Z[:, 1] == 1)) + y, 'g|', mew=2) return m def epomeo_gpx(max_iters=100): diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py index a90e6ef8..57c264ef 100644 --- a/GPy/kern/constructors.py +++ b/GPy/kern/constructors.py @@ -340,7 +340,7 @@ def symmetric(k): k_.parts = [symmetric.Symmetric(p) for p in k.parts] return k_ -def coregionalise(num_outpus,W_columns=1, W=None, kappa=None): +def coregionalise(num_outputs,W_columns=1, W=None, kappa=None): """ Coregionlization matrix B, of the form: .. math:: @@ -422,3 +422,31 @@ def hierarchical(k): # assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)" _parts = [parts.hierarchical.Hierarchical(k.parts)] return kern(k.input_dim+len(k.parts),_parts) + +def build_lcm(input_dim, num_outputs, kernel_list = [], W_columns=1,W=None,kappa=None): + """ + Builds a kernel of a linear coregionalization model + + :input_dim: Input dimensionality + :num_outputs: Number of outputs + :kernel_list: List of coregionalized kernels, each element in the list will be multiplied by a different corregionalization matrix + :type kernel_list: list of GPy kernels + :param W_columns: number tuples of the corregionalization parameters 'coregion_W' + :type W_columns: integer + + ..Note the kernels dimensionality is overwritten to fit input_dim + """ + + for k in kernel_list: + if k.input_dim <> input_dim: + k.input_dim = input_dim + warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.") + + k_coreg = coregionalise(num_outputs,W_columns,W,kappa) + kernel = kernel_list[0]**k_coreg.copy() + + for k in kernel_list[1:]: + k_coreg = coregionalise(num_outputs,W_columns,W,kappa) + kernel += k**k_coreg.copy() + + return kernel diff --git a/GPy/kern/parts/coregionalise.py b/GPy/kern/parts/coregionalise.py index 9a1c31c6..66e14052 100644 --- a/GPy/kern/parts/coregionalise.py +++ b/GPy/kern/parts/coregionalise.py @@ -38,16 +38,16 @@ class Coregionalise(Kernpart): self.num_outputs = num_outputs self.W_columns = W_columns if W is None: - self.W = 0.5*np.random.randn(self.output_dim,self.rank)/np.sqrt(self.rank) + self.W = 0.5*np.random.randn(self.num_outputs,self.W_columns)/np.sqrt(self.W_columns) else: - assert W.shape==(self.output_dim,self.rank) + assert W.shape==(self.num_outputs,self.W_columns) self.W = W if kappa is None: - kappa = 0.5*np.ones(self.output_dim) + kappa = 0.5*np.ones(self.num_outputs) else: - assert kappa.shape==(self.output_dim,) + assert kappa.shape==(self.num_outputs,) self.kappa = kappa - self.num_params = self.output_dim*(self.rank + 1) + self.num_params = self.num_outputs*(self.W_columns + 1) self._set_params(np.hstack([self.W.flatten(),self.kappa])) def _get_params(self): diff --git a/GPy/models/gp_multioutput_regression.py b/GPy/models/gp_multioutput_regression.py index c0a5b557..d51f3bae 100644 --- a/GPy/models/gp_multioutput_regression.py +++ b/GPy/models/gp_multioutput_regression.py @@ -6,7 +6,7 @@ import numpy as np from ..core import GP from .. import likelihoods from .. import kern -from ..util import multioutput +#from ..util import multioutput class GPMultioutputRegression(GP): """ @@ -51,8 +51,8 @@ class GPMultioutputRegression(GP): #Coregionalization kernel definition if kernel_list is None: - kernel_list = [[kern.rbf(original_dim)],[]] - mkernel = multioutput.build_lcm(input_dim=original_dim, num_outputs=self.num_outputs, CK = kernel_list[0], NC = kernel_list[1], W_columns=W_columns) + kernel_list = [kern.rbf(original_dim)] + mkernel = kern.build_lcm(input_dim=original_dim, num_outputs=self.num_outputs, kernel_list = kernel_list, W_columns=W_columns) self.multioutput = True GP.__init__(self, X, likelihood, mkernel, normalize_X=normalize_X) diff --git a/GPy/models/sparse_gp_multioutput_regression.py b/GPy/models/sparse_gp_multioutput_regression.py index 62712e15..041204b6 100644 --- a/GPy/models/sparse_gp_multioutput_regression.py +++ b/GPy/models/sparse_gp_multioutput_regression.py @@ -71,8 +71,8 @@ class SparseGPMultioutputRegression(SparseGP): #Coregionalization kernel definition if kernel_list is None: - kernel_list = [[kern.rbf(original_dim)],[]] - mkernel = multioutput.build_lcm(input_dim=original_dim, num_outputs=self.num_outputs, CK = kernel_list[0], NC = kernel_list[1], W_columns=W_columns) + kernel_list = [kern.rbf(original_dim)] + mkernel = kern.build_lcm(input_dim=original_dim, num_outputs=self.num_outputs, kernel_list = kernel_list, W_columns=W_columns) self.multioutput = True SparseGP.__init__(self, X, likelihood, mkernel, Z=Z, normalize_X=normalize_X) diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index 65a8da77..49445276 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -4,7 +4,6 @@ import unittest import numpy as np import GPy - class KernelTests(unittest.TestCase): @@ -12,7 +11,6 @@ class KernelTests(unittest.TestCase): K = GPy.kern.rbf(5, ARD=True) K.tie_params('.*[01]') K.constrain_fixed('2') - X = np.random.rand(5,5) Y = np.ones((5,1)) m = GPy.models.GPRegression(X,Y,K) @@ -68,7 +66,6 @@ class KernelTests(unittest.TestCase): self.assertTrue(m.checkgrad()) - if __name__ == "__main__": print "Running unit tests, please be (very) patient..." unittest.main() diff --git a/GPy/testing/unit_tests.py b/GPy/testing/unit_tests.py index fd6e8911..6bb624df 100644 --- a/GPy/testing/unit_tests.py +++ b/GPy/testing/unit_tests.py @@ -5,7 +5,6 @@ import unittest import numpy as np import GPy -from GPy.likelihoods.likelihood_functions import Binomial class GradientTests(unittest.TestCase): def setUp(self): @@ -226,6 +225,20 @@ class GradientTests(unittest.TestCase): m.update_likelihood_approximation() self.assertTrue(m.checkgrad()) + def multioutput_regression_1D(self): + X1 = np.random.rand(50, 1) * 8 + X2 = np.random.rand(30, 1) * 5 + X = np.vstack((X1, X2)) + Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05 + Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05 + Y = np.vstack((Y1, Y2)) + + k1 = GPy.kern.rbf(1) + m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1]) + m.constrain_fixed('.*rbf_var', 1.) + self.assertTrue(m.checkgrad()) + + if __name__ == "__main__": print "Running unit tests, please be (very) patient..." unittest.main() diff --git a/GPy/util/__init__.py b/GPy/util/__init__.py index 9b21f8f5..c205548b 100644 --- a/GPy/util/__init__.py +++ b/GPy/util/__init__.py @@ -14,4 +14,4 @@ import visualize import decorators import classification import latent_space_visualizations -import multioutput +#import multioutput