very basic functionality is now working

This commit is contained in:
James Hensman 2013-01-31 15:02:34 +00:00
parent bdc89170d4
commit d077d28fd1
10 changed files with 88 additions and 284 deletions

View file

@ -7,5 +7,5 @@ import models
import inference
import util
import examples
#import examples TODO: discuss!
from core import priors
import likelihoods

View file

@ -1,12 +1,9 @@
import numpy as np
import random
import pylab as pb #TODO erase me
from scipy import stats, linalg
from .likelihoods import likelihood
from ..core import model
from ..util.linalg import pdinv,mdot,jitchol
from ..util.plot import gpplot
from .. import kern
class EP:
def __init__(self,data,likelihood_function,epsilon=1e-3,power_ep=[1.,1.]):
@ -15,12 +12,8 @@ class EP:
Arguments
---------
X : input observations
likelihood : Output's likelihood (likelihood class)
kernel : a GPy kernel (kern class)
inducing : Either an array specifying the inducing points location or a sacalar defining their number. None value for using a non-sparse model is used.
power_ep : Power-EP parameters (eta,delta) - 2x1 numpy array (floats)
epsilon : Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
likelihood_function : a likelihood function (see likelihood_functions.py)
"""
self.likelihood_function = likelihood_function
self.epsilon = epsilon
@ -48,7 +41,6 @@ class EP:
For nomenclature see Rasmussen & Williams 2006.
"""
#Prior distribution parameters: p(f|X) = N(f|0,K)
#self.K = self.kernel.K(self.X,self.X)
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
self.mu = np.zeros(self.N)

View file

@ -1,16 +1,39 @@
import numpy as np
class Gaussian:
def __init__(self,data,variance=1.,normalise=False):
def __init__(self,data,variance=1.,normalize=False):
self.data = data
if normalise:
foo
self._variance = variance
self.N,D = data.shape
self.Z = 0. # a correction factor which accounts for the approximation made
#normalisation
if normalize:
self._mean = data.mean(0)[None,:]
self._std = data.std(0)[None,:]
self.Y = (self.data - self._mean)/self._std
else:
self._mean = np.zeros((1,D))
self._std = np.ones((1,D))
self.Y = self.data
self.YYT = np.dot(self.Y,self.Y.T)
self._set_params(np.asarray(variance))
def _get_params(self):
return np.asarray(self.variance)
return np.asarray(self._variance)
def _get_param_names(self):
return ["noise variance"]
def _set_params(self,x):
self._variance = x
self.variance = np.eye(self.N)*self._variance
def fit(self):
"""
No approximations needed
"""
pass
def _gradients(self,foo):
return bar(foo)
def _gradients(self,partial):
return np.sum(np.diag(partial))

View file

@ -1,3 +1,4 @@
from EP import EP
from Gaussian import Gaussian
# TODO: from Laplace import Laplace
import likelihood_functions as functions

View file

@ -192,10 +192,10 @@ class poisson(likelihood):
pb.plot(Z,Z*0+pb.ylim()[0],'k|',mew=1.5,markersize=12)
class gaussian(likelihood):
"""
Gaussian likelihood
Y is expected to take values in (-inf,inf)
"""
"""
Gaussian likelihood
Y is expected to take values in (-inf,inf)
"""
def moments_match(self,i,tau_i,v_i):
"""
Moments match of the marginal approximation in EP algorithm
@ -221,6 +221,3 @@ class gaussian(likelihood):
def _log_likelihood_gradients():
raise NotImplementedError
else:
var = var[:,None] * np.square(self._Ystd)

View file

@ -8,8 +8,6 @@ from .. import kern
from ..core import model
from ..util.linalg import pdinv,mdot
from ..util.plot import gpplot, Tango
from ..inference.EP import Full # TODO: tidy
from ..inference import likelihoods
class GP(model):
"""
@ -55,8 +53,6 @@ class GP(model):
self._Xstd = np.ones((1,self.X.shape[1]))
self.likelihood = likelihood
self.Y = self.likelihood.Y
self.YYT = self.likelihood.YYT # TODO: this is ugly. what about sufficient_stats?
assert self.X.shape[0] == self.likelihood.Y.shape[0]
self.N, self.D = self.likelihood.Y.shape
@ -67,18 +63,16 @@ class GP(model):
self.likelihood._set_params(p[self.kern.Nparam:])
self.K = self.kern.K(self.X,slices1=self.Xslices)
self.K += np.diag(self.likelihood_variance)
self.K += self.likelihood.variance
self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
#the gradient of the likelihood wrt the covariance matrix
if self.YYT is None:
self._alpha = np.dot(self.Ki,self.Y)
self._alpha2 = np.square(self._alpha)
self.dL_dK = 0.5*(np.dot(self._alpha,self._alpha.T)-self.D*self.Ki)
if self.likelihood.YYT is None:
alpha = np.dot(self.Ki,self.likelihood.Y)
self.dL_dK = 0.5*(np.dot(alpha,alpha.T)-self.D*self.Ki)
else:
tmp = mdot(self.Ki, self.YYT, self.Ki)
self._alpha2 = np.diag(tmp)
tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
self.dL_dK = 0.5*(tmp - self.D*self.Ki)
def _get_params(self):
@ -95,16 +89,15 @@ class GP(model):
this function does nothing
"""
self.likelihood.fit(self.K)
self.Y, self.YYT, self.likelihood_variance, self.likelihood_Z = self.likelihood.sufficient_stats() # TODO: just store these in the likelihood?
def _model_fit_term(self):
"""
Computes the model fit using YYT if it's available
"""
if self.YYT is None:
return -0.5*np.sum(np.square(np.dot(self.Li,self.Y)))
if self.likelihood.YYT is None:
return -0.5*np.sum(np.square(np.dot(self.Li,self.likelihood.Y)))
else:
return -0.5*np.sum(np.multiply(self.Ki, self.YYT))
return -0.5*np.sum(np.multiply(self.Ki, self.likelihood.YYT))
def log_likelihood(self):
"""
@ -114,7 +107,7 @@ class GP(model):
model for a new variable Y* = v_tilde/tau_tilde, with a covariance
matrix K* = K + diag(1./tau_tilde) plus a normalization term.
"""
return -0.5*self.D*self.K_logdet + self.model_fit_term() + self.likelihood.Z
return -0.5*self.D*self.K_logdet + self._model_fit_term() + self.likelihood.Z
def _log_likelihood_gradients(self):
@ -125,7 +118,7 @@ class GP(model):
For the likelihood parameters, pass in alpha = K^-1 y
"""
return np.hstack((self.kern.dK_dtheta(partial=self.dL_dK(),X=self.X), self.likelihood._gradients(self.alpha2)))
return np.hstack((self.kern.dK_dtheta(partial=self.dL_dK,X=self.X), self.likelihood._gradients(partial=self.dL_dK)))
def _raw_predict(self,_Xnew,slices, full_cov=False):
"""
@ -133,7 +126,7 @@ class GP(model):
for normalisation or likelihood
"""
Kx = self.kern.K(self.X,_Xnew, slices1=self.Xslices,slices2=slices)
mu = np.dot(np.dot(Kx.T,self.Ki),self.Y)
mu = np.dot(np.dot(Kx.T,self.Ki),self.likelihood.Y)
KiKx = np.dot(self.Ki,Kx)
if full_cov:
Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices)
@ -177,8 +170,10 @@ class GP(model):
return mean, _5pc, _95pc
def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,full_cov=False):
def raw_plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None):
"""
Plot the GP's view of the world, where the data is normalised and the likelihood is Gaussian
:param samples: the number of a posteriori samples to plot
:param which_data: which if the training data to plot (default all)
:type which_data: 'all' or a slice object to slice self.X, self.Y
@ -194,19 +189,17 @@ class GP(model):
Can plot only part of the data and part of the posterior functions using which_data and which_functions
"""
if which_functions=='all':
which_functions = [True]*self.kern.Nparts
if which_data=='all':
which_data = slice(None)
X = self.X[which_data,:]
Y = self.Y[which_data,:]
Xorig = X*self._Xstd + self._Xmean
Yorig = Y*self._Ystd + self._Ymean #NOTE For EP this is v_tilde/beta
Y = self.likelihood.Y[which_data,:]
if plot_limits is None:
xmin,xmax = Xorig.min(0),Xorig.max(0)
xmin,xmax = X.min(0),X.max(0)
xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
elif len(plot_limits)==2:
xmin, xmax = plot_limits
@ -215,27 +208,17 @@ class GP(model):
if self.X.shape[1]==1:
Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None]
m,v,phi = self.predict(Xnew,slices=which_functions,full_cov=full_cov)
if self.EP:
pb.subplot(211)
gpplot(Xnew,m,v)
m,v = self._raw_predict(Xnew,slices=which_functions,full_cov=False)
lower, upper = m.flatten() - 2.*np.sqrt(v) , m.flatten()+ 2.*np.sqrt(v)
gpplot(Xnew,m,lower,upper)
if samples: #NOTE why don't we put samples as a parameter of gpplot
s = np.random.multivariate_normal(m.flatten(),np.diag(v.flatten()),samples)
pb.plot(Xnew.flatten(),s.T, alpha = 0.4, c='#3465a4', linewidth = 0.8)
pb.plot(Xorig,Yorig,'kx',mew=1.5)
pb.plot(X,Y,'kx',mew=1.5)
pb.xlim(xmin,xmax)
if self.EP:
pb.subplot(212)
self.likelihood.plot(Xnew,m,v,phi,self.X,samples=samples)
pb.xlim(xmin,xmax)
elif self.X.shape[1]==2:
resolution = 50 or resolution
resolution = resolution or 50
xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
Xtest = np.vstack((xx.flatten(),yy.flatten())).T
zz,vv,phi = self.predict(Xtest,slices=which_functions,full_cov=full_cov)
zz,vv = self._raw_predict(Xtest,slices=which_functions,full_cov=False)
zz = zz.reshape(resolution,resolution)
pb.contour(xx,yy,zz,vmin=zz.min(),vmax=zz.max(),cmap=pb.cm.jet)
pb.scatter(Xorig[:,0],Xorig[:,1],40,Yorig,linewidth=0,cmap=pb.cm.jet,vmin=zz.min(),vmax=zz.max())
@ -244,3 +227,10 @@ class GP(model):
else:
raise NotImplementedError, "Cannot plot GPs with more than two input dimensions"
def plot(self):
"""
Plot the data's view of the world, with non-normalised values and GP predictions passed through the likelihood
"""
pass# TODO!!!!!

View file

@ -1,18 +1,18 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Copyright (c) 2012, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from GP import GP
from .. import likelihoods
from .. import kern
from ..core import model
from ..util.linalg import pdinv,mdot
from ..util.plot import gpplot, Tango
class GP_regression(model):
class GP_regression(GP):
"""
Gaussian Process model for regression
This is a thin wrapper around the GP class, with a set of sensible defalts
:param X: input observations
:param Y: observed values
:param kernel: a GPy kernel, defaults to rbf+white
@ -29,199 +29,8 @@ class GP_regression(model):
def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False, Xslices=None):
if kernel is None:
kernel = kern.rbf(X.shape[1]) + kern.bias(X.shape[1]) + kern.white(X.shape[1])
kernel = kern.rbf(X.shape[1])
# parse arguments
self.Xslices = Xslices
assert isinstance(kernel, kern.kern)
self.kern = kernel
self.X = X
self.Y = Y
assert len(self.X.shape)==2
assert len(self.Y.shape)==2
assert self.X.shape[0] == self.Y.shape[0]
self.N, self.D = self.Y.shape
self.N, self.Q = self.X.shape
likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
#here's some simple normalisation
if normalize_X:
self._Xmean = X.mean(0)[None,:]
self._Xstd = X.std(0)[None,:]
self.X = (X.copy() - self._Xmean) / self._Xstd
if hasattr(self,'Z'):
self.Z = (self.Z - self._Xmean) / self._Xstd
else:
self._Xmean = np.zeros((1,self.X.shape[1]))
self._Xstd = np.ones((1,self.X.shape[1]))
if normalize_Y:
self._Ymean = Y.mean(0)[None,:]
self._Ystd = Y.std(0)[None,:]
self.Y = (Y.copy()- self._Ymean) / self._Ystd
else:
self._Ymean = np.zeros((1,self.Y.shape[1]))
self._Ystd = np.ones((1,self.Y.shape[1]))
if self.D > self.N:
# then it's more efficient to store YYT
self.YYT = np.dot(self.Y, self.Y.T)
else:
self.YYT = None
model.__init__(self)
def _set_params(self,p):
self.kern._set_params_transformed(p)
self.K = self.kern.K(self.X,slices1=self.Xslices)
self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
def _get_params(self):
return self.kern._get_params_transformed()
def _get_param_names(self):
return self.kern._get_param_names_transformed()
def _model_fit_term(self):
"""
Computes the model fit using YYT if it's available
"""
if self.YYT is None:
return -0.5*np.sum(np.square(np.dot(self.Li,self.Y)))
else:
return -0.5*np.sum(np.multiply(self.Ki, self.YYT))
def log_likelihood(self):
complexity_term = -0.5*self.N*self.D*np.log(2.*np.pi) - 0.5*self.D*self.K_logdet
return complexity_term + self._model_fit_term()
def dL_dK(self):
if self.YYT is None:
alpha = np.dot(self.Ki,self.Y)
dL_dK = 0.5*(np.dot(alpha,alpha.T)-self.D*self.Ki)
else:
dL_dK = 0.5*(mdot(self.Ki, self.YYT, self.Ki) - self.D*self.Ki)
return dL_dK
def _log_likelihood_gradients(self):
return self.kern.dK_dtheta(partial=self.dL_dK(),X=self.X)
def predict(self,Xnew, slices=None, full_cov=False):
"""
Predict the function(s) at the new point(s) Xnew.
Arguments
---------
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.Q
:param slices: specifies which outputs kernel(s) the Xnew correspond to (see below)
:type slices: (None, list of slice objects, list of ints)
:param full_cov: whether to return the folll covariance matrix, or just the diagonal
:type full_cov: bool
:rtype: posterior mean, a Numpy array, Nnew x self.D
:rtype: posterior variance, a Numpy array, Nnew x Nnew x (self.D)
.. Note:: "slices" specifies how the the points X_new co-vary wich the training points.
- If None, the new points covary throigh every kernel part (default)
- If a list of slices, the i^th slice specifies which data are affected by the i^th kernel part
- If a list of booleans, specifying which kernel parts are active
If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew.
This is to allow for different normalisations of the output dimensions.
"""
#normalise X values
Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
mu, var = self._raw_predict(Xnew, slices, full_cov)
#un-normalise
mu = mu*self._Ystd + self._Ymean
if full_cov:
if self.D==1:
var *= np.square(self._Ystd)
else:
var = var[:,:,None] * np.square(self._Ystd)
else:
if self.D==1:
var *= np.square(np.squeeze(self._Ystd))
else:
var = var[:,None] * np.square(self._Ystd)
return mu,var
def _raw_predict(self,_Xnew,slices, full_cov=False):
"""Internal helper function for making predictions, does not account for normalisation"""
Kx = self.kern.K(self.X,_Xnew, slices1=self.Xslices,slices2=slices)
mu = np.dot(np.dot(Kx.T,self.Ki),self.Y)
KiKx = np.dot(self.Ki,Kx)
if full_cov:
Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices)
var = Kxx - np.dot(KiKx.T,Kx)
else:
Kxx = self.kern.Kdiag(_Xnew, slices=slices)
var = Kxx - np.sum(np.multiply(KiKx,Kx),0)
return mu, var
def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None):
"""
:param samples: the number of a posteriori samples to plot
:param which_data: which if the training data to plot (default all)
:type which_data: 'all' or a slice object to slice self.X, self.Y
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:param which_functions: which of the kernel functions to plot (additively)
:type which_functions: list of bools
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
Plot the posterior of the GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
- In two dimsensions, a contour-plot shows the mean predicted function
- In higher dimensions, we've no implemented this yet !TODO!
Can plot only part of the data and part of the posterior functions using which_data and which_functions
"""
if which_functions=='all':
which_functions = [True]*self.kern.Nparts
if which_data=='all':
which_data = slice(None)
X = self.X[which_data,:]
Y = self.Y[which_data,:]
Xorig = X*self._Xstd + self._Xmean
Yorig = Y*self._Ystd + self._Ymean
if plot_limits is None:
xmin,xmax = Xorig.min(0),Xorig.max(0)
xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
elif len(plot_limits)==2:
xmin, xmax = plot_limits
else:
raise ValueError, "Bad limits for plotting"
if self.X.shape[1]==1:
Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None]
m,v = self.predict(Xnew,slices=which_functions)
gpplot(Xnew,m,v)
if samples:
s = np.random.multivariate_normal(m.flatten(),v,samples)
pb.plot(Xnew.flatten(),s.T, alpha = 0.4, c='#3465a4', linewidth = 0.8)
pb.plot(Xorig,Yorig,'kx',mew=1.5)
pb.xlim(xmin,xmax)
elif self.X.shape[1]==2:
resolution = 50 or resolution
xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
Xtest = np.vstack((xx.flatten(),yy.flatten())).T
zz,vv = self.predict(Xtest,slices=which_functions)
zz = zz.reshape(resolution,resolution)
pb.contour(xx,yy,zz,vmin=zz.min(),vmax=zz.max(),cmap=pb.cm.jet)
pb.scatter(Xorig[:,0],Xorig[:,1],40,Yorig,linewidth=0,cmap=pb.cm.jet,vmin=zz.min(),vmax=zz.max())
pb.xlim(xmin[0],xmax[0])
pb.ylim(xmin[1],xmax[1])
else:
raise NotImplementedError, "Cannot plot GPs with more than two input dimensions"
GP.__init__(self, X, kernel, likelihood, normalize_X=normalize_X, Xslices=Xslices)

View file

@ -2,14 +2,14 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
#from GP_regression import GP_regression
#from sparse_GP_regression import sparse_GP_regression
# ^^ remove these?
# TODO ^^ remove these?
from GPLVM import GPLVM
from warped_GP import warpedGP
from generalized_FITC import generalized_FITC
from sparse_GPLVM import sparse_GPLVM
from uncollapsed_sparse_GP import uncollapsed_sparse_GP
# TODO: from generalized_FITC import generalized_FITC
#from sparse_GPLVM import sparse_GPLVM
#from uncollapsed_sparse_GP import uncollapsed_sparse_GP
from GP import GP
from sparse_GP import sparse_GP
from BGPLVM import Bayesian_GPLVM
from GP_regression import GP_regression
#from sparse_GP import sparse_GP
#from BGPLVM import Bayesian_GPLVM

View file

@ -7,8 +7,6 @@ from ..util.linalg import mdot, jitchol, chol_inv, pdinv
from ..util.plot import gpplot
from .. import kern
from GP import GP
from ..inference.EP import Full,DTC,FITC
from ..inference.likelihoods import likelihood,probit,poisson,gaussian
#Still TODO:

View file

@ -6,7 +6,7 @@ import Tango
import pylab as pb
import numpy as np
def gpplot(x,mu,var,edgecol=Tango.coloursHex['darkBlue'],fillcol=Tango.coloursHex['lightBlue'],axes=None,**kwargs):
def gpplot(x,mu,lower,upper,edgecol=Tango.coloursHex['darkBlue'],fillcol=Tango.coloursHex['lightBlue'],axes=None,**kwargs):
if axes is None:
axes = pb.gca()
mu = mu.flatten()
@ -15,21 +15,15 @@ def gpplot(x,mu,var,edgecol=Tango.coloursHex['darkBlue'],fillcol=Tango.coloursHe
#here's the mean
axes.plot(x,mu,color=edgecol,linewidth=2)
#ensure variance is a vector
if len(var.shape)>1:
err = 2*np.sqrt(np.diag(var))
else:
err = 2*np.sqrt(var)
#here's the 2*std box
#here's the box
kwargs['linewidth']=0.5
if not 'alpha' in kwargs.keys():
kwargs['alpha'] = 0.3
axes.fill(np.hstack((x,x[::-1])),np.hstack((mu+err,mu[::-1]-err[::-1])),color=fillcol,**kwargs)
axes.fill(np.hstack((x,x[::-1])),np.hstack((upper,lower[::-1])),color=fillcol,**kwargs)
#this is the edge:
axes.plot(x,mu+err,color=edgecol,linewidth=0.2)
axes.plot(x,mu-err,color=edgecol,linewidth=0.2)
axes.plot(x,upper,color=edgecol,linewidth=0.2)
axes.plot(x,lower,color=edgecol,linewidth=0.2)
def removeRightTicks(ax=None):
ax = ax or pb.gca()