mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-15 06:52:39 +02:00
very basic functionality is now working
This commit is contained in:
parent
bdc89170d4
commit
d077d28fd1
10 changed files with 88 additions and 284 deletions
|
|
@ -7,5 +7,5 @@ import models
|
||||||
import inference
|
import inference
|
||||||
import util
|
import util
|
||||||
import examples
|
import examples
|
||||||
#import examples TODO: discuss!
|
|
||||||
from core import priors
|
from core import priors
|
||||||
|
import likelihoods
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,9 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import random
|
import random
|
||||||
import pylab as pb #TODO erase me
|
|
||||||
from scipy import stats, linalg
|
from scipy import stats, linalg
|
||||||
from .likelihoods import likelihood
|
|
||||||
from ..core import model
|
from ..core import model
|
||||||
from ..util.linalg import pdinv,mdot,jitchol
|
from ..util.linalg import pdinv,mdot,jitchol
|
||||||
from ..util.plot import gpplot
|
from ..util.plot import gpplot
|
||||||
from .. import kern
|
|
||||||
|
|
||||||
class EP:
|
class EP:
|
||||||
def __init__(self,data,likelihood_function,epsilon=1e-3,power_ep=[1.,1.]):
|
def __init__(self,data,likelihood_function,epsilon=1e-3,power_ep=[1.,1.]):
|
||||||
|
|
@ -15,12 +12,8 @@ class EP:
|
||||||
|
|
||||||
Arguments
|
Arguments
|
||||||
---------
|
---------
|
||||||
X : input observations
|
|
||||||
likelihood : Output's likelihood (likelihood class)
|
|
||||||
kernel : a GPy kernel (kern class)
|
|
||||||
inducing : Either an array specifying the inducing points location or a sacalar defining their number. None value for using a non-sparse model is used.
|
|
||||||
power_ep : Power-EP parameters (eta,delta) - 2x1 numpy array (floats)
|
|
||||||
epsilon : Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
|
epsilon : Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
|
||||||
|
likelihood_function : a likelihood function (see likelihood_functions.py)
|
||||||
"""
|
"""
|
||||||
self.likelihood_function = likelihood_function
|
self.likelihood_function = likelihood_function
|
||||||
self.epsilon = epsilon
|
self.epsilon = epsilon
|
||||||
|
|
@ -48,7 +41,6 @@ class EP:
|
||||||
For nomenclature see Rasmussen & Williams 2006.
|
For nomenclature see Rasmussen & Williams 2006.
|
||||||
"""
|
"""
|
||||||
#Prior distribution parameters: p(f|X) = N(f|0,K)
|
#Prior distribution parameters: p(f|X) = N(f|0,K)
|
||||||
#self.K = self.kernel.K(self.X,self.X)
|
|
||||||
|
|
||||||
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
|
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
|
||||||
self.mu = np.zeros(self.N)
|
self.mu = np.zeros(self.N)
|
||||||
|
|
|
||||||
|
|
@ -1,16 +1,39 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
class Gaussian:
|
class Gaussian:
|
||||||
def __init__(self,data,variance=1.,normalise=False):
|
def __init__(self,data,variance=1.,normalize=False):
|
||||||
self.data = data
|
self.data = data
|
||||||
if normalise:
|
self.N,D = data.shape
|
||||||
foo
|
self.Z = 0. # a correction factor which accounts for the approximation made
|
||||||
self._variance = variance
|
|
||||||
|
#normalisation
|
||||||
|
if normalize:
|
||||||
|
self._mean = data.mean(0)[None,:]
|
||||||
|
self._std = data.std(0)[None,:]
|
||||||
|
self.Y = (self.data - self._mean)/self._std
|
||||||
|
else:
|
||||||
|
self._mean = np.zeros((1,D))
|
||||||
|
self._std = np.ones((1,D))
|
||||||
|
self.Y = self.data
|
||||||
|
|
||||||
|
self.YYT = np.dot(self.Y,self.Y.T)
|
||||||
|
self._set_params(np.asarray(variance))
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
return np.asarray(self.variance)
|
return np.asarray(self._variance)
|
||||||
|
|
||||||
|
def _get_param_names(self):
|
||||||
|
return ["noise variance"]
|
||||||
|
|
||||||
def _set_params(self,x):
|
def _set_params(self,x):
|
||||||
self._variance = x
|
self._variance = x
|
||||||
|
self.variance = np.eye(self.N)*self._variance
|
||||||
|
|
||||||
def fit(self):
|
def fit(self):
|
||||||
|
"""
|
||||||
|
No approximations needed
|
||||||
|
"""
|
||||||
pass
|
pass
|
||||||
def _gradients(self,foo):
|
|
||||||
return bar(foo)
|
def _gradients(self,partial):
|
||||||
|
return np.sum(np.diag(partial))
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
from EP import EP
|
from EP import EP
|
||||||
from Gaussian import Gaussian
|
from Gaussian import Gaussian
|
||||||
# TODO: from Laplace import Laplace
|
# TODO: from Laplace import Laplace
|
||||||
|
import likelihood_functions as functions
|
||||||
|
|
|
||||||
|
|
@ -192,10 +192,10 @@ class poisson(likelihood):
|
||||||
pb.plot(Z,Z*0+pb.ylim()[0],'k|',mew=1.5,markersize=12)
|
pb.plot(Z,Z*0+pb.ylim()[0],'k|',mew=1.5,markersize=12)
|
||||||
|
|
||||||
class gaussian(likelihood):
|
class gaussian(likelihood):
|
||||||
"""
|
"""
|
||||||
Gaussian likelihood
|
Gaussian likelihood
|
||||||
Y is expected to take values in (-inf,inf)
|
Y is expected to take values in (-inf,inf)
|
||||||
"""
|
"""
|
||||||
def moments_match(self,i,tau_i,v_i):
|
def moments_match(self,i,tau_i,v_i):
|
||||||
"""
|
"""
|
||||||
Moments match of the marginal approximation in EP algorithm
|
Moments match of the marginal approximation in EP algorithm
|
||||||
|
|
@ -221,6 +221,3 @@ class gaussian(likelihood):
|
||||||
|
|
||||||
def _log_likelihood_gradients():
|
def _log_likelihood_gradients():
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
else:
|
|
||||||
var = var[:,None] * np.square(self._Ystd)
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -8,8 +8,6 @@ from .. import kern
|
||||||
from ..core import model
|
from ..core import model
|
||||||
from ..util.linalg import pdinv,mdot
|
from ..util.linalg import pdinv,mdot
|
||||||
from ..util.plot import gpplot, Tango
|
from ..util.plot import gpplot, Tango
|
||||||
from ..inference.EP import Full # TODO: tidy
|
|
||||||
from ..inference import likelihoods
|
|
||||||
|
|
||||||
class GP(model):
|
class GP(model):
|
||||||
"""
|
"""
|
||||||
|
|
@ -55,8 +53,6 @@ class GP(model):
|
||||||
self._Xstd = np.ones((1,self.X.shape[1]))
|
self._Xstd = np.ones((1,self.X.shape[1]))
|
||||||
|
|
||||||
self.likelihood = likelihood
|
self.likelihood = likelihood
|
||||||
self.Y = self.likelihood.Y
|
|
||||||
self.YYT = self.likelihood.YYT # TODO: this is ugly. what about sufficient_stats?
|
|
||||||
assert self.X.shape[0] == self.likelihood.Y.shape[0]
|
assert self.X.shape[0] == self.likelihood.Y.shape[0]
|
||||||
self.N, self.D = self.likelihood.Y.shape
|
self.N, self.D = self.likelihood.Y.shape
|
||||||
|
|
||||||
|
|
@ -67,18 +63,16 @@ class GP(model):
|
||||||
self.likelihood._set_params(p[self.kern.Nparam:])
|
self.likelihood._set_params(p[self.kern.Nparam:])
|
||||||
|
|
||||||
self.K = self.kern.K(self.X,slices1=self.Xslices)
|
self.K = self.kern.K(self.X,slices1=self.Xslices)
|
||||||
self.K += np.diag(self.likelihood_variance)
|
self.K += self.likelihood.variance
|
||||||
|
|
||||||
self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
|
self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
|
||||||
|
|
||||||
#the gradient of the likelihood wrt the covariance matrix
|
#the gradient of the likelihood wrt the covariance matrix
|
||||||
if self.YYT is None:
|
if self.likelihood.YYT is None:
|
||||||
self._alpha = np.dot(self.Ki,self.Y)
|
alpha = np.dot(self.Ki,self.likelihood.Y)
|
||||||
self._alpha2 = np.square(self._alpha)
|
self.dL_dK = 0.5*(np.dot(alpha,alpha.T)-self.D*self.Ki)
|
||||||
self.dL_dK = 0.5*(np.dot(self._alpha,self._alpha.T)-self.D*self.Ki)
|
|
||||||
else:
|
else:
|
||||||
tmp = mdot(self.Ki, self.YYT, self.Ki)
|
tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
|
||||||
self._alpha2 = np.diag(tmp)
|
|
||||||
self.dL_dK = 0.5*(tmp - self.D*self.Ki)
|
self.dL_dK = 0.5*(tmp - self.D*self.Ki)
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
|
|
@ -95,16 +89,15 @@ class GP(model):
|
||||||
this function does nothing
|
this function does nothing
|
||||||
"""
|
"""
|
||||||
self.likelihood.fit(self.K)
|
self.likelihood.fit(self.K)
|
||||||
self.Y, self.YYT, self.likelihood_variance, self.likelihood_Z = self.likelihood.sufficient_stats() # TODO: just store these in the likelihood?
|
|
||||||
|
|
||||||
def _model_fit_term(self):
|
def _model_fit_term(self):
|
||||||
"""
|
"""
|
||||||
Computes the model fit using YYT if it's available
|
Computes the model fit using YYT if it's available
|
||||||
"""
|
"""
|
||||||
if self.YYT is None:
|
if self.likelihood.YYT is None:
|
||||||
return -0.5*np.sum(np.square(np.dot(self.Li,self.Y)))
|
return -0.5*np.sum(np.square(np.dot(self.Li,self.likelihood.Y)))
|
||||||
else:
|
else:
|
||||||
return -0.5*np.sum(np.multiply(self.Ki, self.YYT))
|
return -0.5*np.sum(np.multiply(self.Ki, self.likelihood.YYT))
|
||||||
|
|
||||||
def log_likelihood(self):
|
def log_likelihood(self):
|
||||||
"""
|
"""
|
||||||
|
|
@ -114,7 +107,7 @@ class GP(model):
|
||||||
model for a new variable Y* = v_tilde/tau_tilde, with a covariance
|
model for a new variable Y* = v_tilde/tau_tilde, with a covariance
|
||||||
matrix K* = K + diag(1./tau_tilde) plus a normalization term.
|
matrix K* = K + diag(1./tau_tilde) plus a normalization term.
|
||||||
"""
|
"""
|
||||||
return -0.5*self.D*self.K_logdet + self.model_fit_term() + self.likelihood.Z
|
return -0.5*self.D*self.K_logdet + self._model_fit_term() + self.likelihood.Z
|
||||||
|
|
||||||
|
|
||||||
def _log_likelihood_gradients(self):
|
def _log_likelihood_gradients(self):
|
||||||
|
|
@ -125,7 +118,7 @@ class GP(model):
|
||||||
|
|
||||||
For the likelihood parameters, pass in alpha = K^-1 y
|
For the likelihood parameters, pass in alpha = K^-1 y
|
||||||
"""
|
"""
|
||||||
return np.hstack((self.kern.dK_dtheta(partial=self.dL_dK(),X=self.X), self.likelihood._gradients(self.alpha2)))
|
return np.hstack((self.kern.dK_dtheta(partial=self.dL_dK,X=self.X), self.likelihood._gradients(partial=self.dL_dK)))
|
||||||
|
|
||||||
def _raw_predict(self,_Xnew,slices, full_cov=False):
|
def _raw_predict(self,_Xnew,slices, full_cov=False):
|
||||||
"""
|
"""
|
||||||
|
|
@ -133,7 +126,7 @@ class GP(model):
|
||||||
for normalisation or likelihood
|
for normalisation or likelihood
|
||||||
"""
|
"""
|
||||||
Kx = self.kern.K(self.X,_Xnew, slices1=self.Xslices,slices2=slices)
|
Kx = self.kern.K(self.X,_Xnew, slices1=self.Xslices,slices2=slices)
|
||||||
mu = np.dot(np.dot(Kx.T,self.Ki),self.Y)
|
mu = np.dot(np.dot(Kx.T,self.Ki),self.likelihood.Y)
|
||||||
KiKx = np.dot(self.Ki,Kx)
|
KiKx = np.dot(self.Ki,Kx)
|
||||||
if full_cov:
|
if full_cov:
|
||||||
Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices)
|
Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices)
|
||||||
|
|
@ -177,8 +170,10 @@ class GP(model):
|
||||||
|
|
||||||
return mean, _5pc, _95pc
|
return mean, _5pc, _95pc
|
||||||
|
|
||||||
def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,full_cov=False):
|
def raw_plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None):
|
||||||
"""
|
"""
|
||||||
|
Plot the GP's view of the world, where the data is normalised and the likelihood is Gaussian
|
||||||
|
|
||||||
:param samples: the number of a posteriori samples to plot
|
:param samples: the number of a posteriori samples to plot
|
||||||
:param which_data: which if the training data to plot (default all)
|
:param which_data: which if the training data to plot (default all)
|
||||||
:type which_data: 'all' or a slice object to slice self.X, self.Y
|
:type which_data: 'all' or a slice object to slice self.X, self.Y
|
||||||
|
|
@ -194,19 +189,17 @@ class GP(model):
|
||||||
|
|
||||||
Can plot only part of the data and part of the posterior functions using which_data and which_functions
|
Can plot only part of the data and part of the posterior functions using which_data and which_functions
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if which_functions=='all':
|
if which_functions=='all':
|
||||||
which_functions = [True]*self.kern.Nparts
|
which_functions = [True]*self.kern.Nparts
|
||||||
if which_data=='all':
|
if which_data=='all':
|
||||||
which_data = slice(None)
|
which_data = slice(None)
|
||||||
|
|
||||||
X = self.X[which_data,:]
|
X = self.X[which_data,:]
|
||||||
Y = self.Y[which_data,:]
|
Y = self.likelihood.Y[which_data,:]
|
||||||
|
|
||||||
Xorig = X*self._Xstd + self._Xmean
|
|
||||||
Yorig = Y*self._Ystd + self._Ymean #NOTE For EP this is v_tilde/beta
|
|
||||||
|
|
||||||
if plot_limits is None:
|
if plot_limits is None:
|
||||||
xmin,xmax = Xorig.min(0),Xorig.max(0)
|
xmin,xmax = X.min(0),X.max(0)
|
||||||
xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
|
xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
|
||||||
elif len(plot_limits)==2:
|
elif len(plot_limits)==2:
|
||||||
xmin, xmax = plot_limits
|
xmin, xmax = plot_limits
|
||||||
|
|
@ -215,27 +208,17 @@ class GP(model):
|
||||||
|
|
||||||
if self.X.shape[1]==1:
|
if self.X.shape[1]==1:
|
||||||
Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None]
|
Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None]
|
||||||
m,v,phi = self.predict(Xnew,slices=which_functions,full_cov=full_cov)
|
m,v = self._raw_predict(Xnew,slices=which_functions,full_cov=False)
|
||||||
if self.EP:
|
lower, upper = m.flatten() - 2.*np.sqrt(v) , m.flatten()+ 2.*np.sqrt(v)
|
||||||
pb.subplot(211)
|
gpplot(Xnew,m,lower,upper)
|
||||||
gpplot(Xnew,m,v)
|
|
||||||
|
|
||||||
if samples: #NOTE why don't we put samples as a parameter of gpplot
|
pb.plot(X,Y,'kx',mew=1.5)
|
||||||
s = np.random.multivariate_normal(m.flatten(),np.diag(v.flatten()),samples)
|
|
||||||
pb.plot(Xnew.flatten(),s.T, alpha = 0.4, c='#3465a4', linewidth = 0.8)
|
|
||||||
pb.plot(Xorig,Yorig,'kx',mew=1.5)
|
|
||||||
pb.xlim(xmin,xmax)
|
pb.xlim(xmin,xmax)
|
||||||
|
|
||||||
if self.EP:
|
|
||||||
pb.subplot(212)
|
|
||||||
self.likelihood.plot(Xnew,m,v,phi,self.X,samples=samples)
|
|
||||||
pb.xlim(xmin,xmax)
|
|
||||||
|
|
||||||
elif self.X.shape[1]==2:
|
elif self.X.shape[1]==2:
|
||||||
resolution = 50 or resolution
|
resolution = resolution or 50
|
||||||
xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
|
xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
|
||||||
Xtest = np.vstack((xx.flatten(),yy.flatten())).T
|
Xtest = np.vstack((xx.flatten(),yy.flatten())).T
|
||||||
zz,vv,phi = self.predict(Xtest,slices=which_functions,full_cov=full_cov)
|
zz,vv = self._raw_predict(Xtest,slices=which_functions,full_cov=False)
|
||||||
zz = zz.reshape(resolution,resolution)
|
zz = zz.reshape(resolution,resolution)
|
||||||
pb.contour(xx,yy,zz,vmin=zz.min(),vmax=zz.max(),cmap=pb.cm.jet)
|
pb.contour(xx,yy,zz,vmin=zz.min(),vmax=zz.max(),cmap=pb.cm.jet)
|
||||||
pb.scatter(Xorig[:,0],Xorig[:,1],40,Yorig,linewidth=0,cmap=pb.cm.jet,vmin=zz.min(),vmax=zz.max())
|
pb.scatter(Xorig[:,0],Xorig[:,1],40,Yorig,linewidth=0,cmap=pb.cm.jet,vmin=zz.min(),vmax=zz.max())
|
||||||
|
|
@ -244,3 +227,10 @@ class GP(model):
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError, "Cannot plot GPs with more than two input dimensions"
|
raise NotImplementedError, "Cannot plot GPs with more than two input dimensions"
|
||||||
|
|
||||||
|
def plot(self):
|
||||||
|
"""
|
||||||
|
Plot the data's view of the world, with non-normalised values and GP predictions passed through the likelihood
|
||||||
|
"""
|
||||||
|
pass# TODO!!!!!
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,18 +1,18 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
# Copyright (c) 2012, James Hensman
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pylab as pb
|
from GP import GP
|
||||||
|
from .. import likelihoods
|
||||||
from .. import kern
|
from .. import kern
|
||||||
from ..core import model
|
|
||||||
from ..util.linalg import pdinv,mdot
|
|
||||||
from ..util.plot import gpplot, Tango
|
|
||||||
|
|
||||||
class GP_regression(model):
|
class GP_regression(GP):
|
||||||
"""
|
"""
|
||||||
Gaussian Process model for regression
|
Gaussian Process model for regression
|
||||||
|
|
||||||
|
This is a thin wrapper around the GP class, with a set of sensible defalts
|
||||||
|
|
||||||
:param X: input observations
|
:param X: input observations
|
||||||
:param Y: observed values
|
:param Y: observed values
|
||||||
:param kernel: a GPy kernel, defaults to rbf+white
|
:param kernel: a GPy kernel, defaults to rbf+white
|
||||||
|
|
@ -29,199 +29,8 @@ class GP_regression(model):
|
||||||
|
|
||||||
def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False, Xslices=None):
|
def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False, Xslices=None):
|
||||||
if kernel is None:
|
if kernel is None:
|
||||||
kernel = kern.rbf(X.shape[1]) + kern.bias(X.shape[1]) + kern.white(X.shape[1])
|
kernel = kern.rbf(X.shape[1])
|
||||||
|
|
||||||
# parse arguments
|
likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
|
||||||
self.Xslices = Xslices
|
|
||||||
assert isinstance(kernel, kern.kern)
|
|
||||||
self.kern = kernel
|
|
||||||
self.X = X
|
|
||||||
self.Y = Y
|
|
||||||
assert len(self.X.shape)==2
|
|
||||||
assert len(self.Y.shape)==2
|
|
||||||
assert self.X.shape[0] == self.Y.shape[0]
|
|
||||||
self.N, self.D = self.Y.shape
|
|
||||||
self.N, self.Q = self.X.shape
|
|
||||||
|
|
||||||
#here's some simple normalisation
|
GP.__init__(self, X, kernel, likelihood, normalize_X=normalize_X, Xslices=Xslices)
|
||||||
if normalize_X:
|
|
||||||
self._Xmean = X.mean(0)[None,:]
|
|
||||||
self._Xstd = X.std(0)[None,:]
|
|
||||||
self.X = (X.copy() - self._Xmean) / self._Xstd
|
|
||||||
if hasattr(self,'Z'):
|
|
||||||
self.Z = (self.Z - self._Xmean) / self._Xstd
|
|
||||||
else:
|
|
||||||
self._Xmean = np.zeros((1,self.X.shape[1]))
|
|
||||||
self._Xstd = np.ones((1,self.X.shape[1]))
|
|
||||||
|
|
||||||
if normalize_Y:
|
|
||||||
self._Ymean = Y.mean(0)[None,:]
|
|
||||||
self._Ystd = Y.std(0)[None,:]
|
|
||||||
self.Y = (Y.copy()- self._Ymean) / self._Ystd
|
|
||||||
else:
|
|
||||||
self._Ymean = np.zeros((1,self.Y.shape[1]))
|
|
||||||
self._Ystd = np.ones((1,self.Y.shape[1]))
|
|
||||||
|
|
||||||
if self.D > self.N:
|
|
||||||
# then it's more efficient to store YYT
|
|
||||||
self.YYT = np.dot(self.Y, self.Y.T)
|
|
||||||
else:
|
|
||||||
self.YYT = None
|
|
||||||
|
|
||||||
model.__init__(self)
|
|
||||||
|
|
||||||
def _set_params(self,p):
|
|
||||||
self.kern._set_params_transformed(p)
|
|
||||||
self.K = self.kern.K(self.X,slices1=self.Xslices)
|
|
||||||
self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
|
|
||||||
|
|
||||||
def _get_params(self):
|
|
||||||
return self.kern._get_params_transformed()
|
|
||||||
|
|
||||||
def _get_param_names(self):
|
|
||||||
return self.kern._get_param_names_transformed()
|
|
||||||
|
|
||||||
def _model_fit_term(self):
|
|
||||||
"""
|
|
||||||
Computes the model fit using YYT if it's available
|
|
||||||
"""
|
|
||||||
if self.YYT is None:
|
|
||||||
return -0.5*np.sum(np.square(np.dot(self.Li,self.Y)))
|
|
||||||
else:
|
|
||||||
return -0.5*np.sum(np.multiply(self.Ki, self.YYT))
|
|
||||||
|
|
||||||
def log_likelihood(self):
|
|
||||||
complexity_term = -0.5*self.N*self.D*np.log(2.*np.pi) - 0.5*self.D*self.K_logdet
|
|
||||||
return complexity_term + self._model_fit_term()
|
|
||||||
|
|
||||||
def dL_dK(self):
|
|
||||||
if self.YYT is None:
|
|
||||||
alpha = np.dot(self.Ki,self.Y)
|
|
||||||
dL_dK = 0.5*(np.dot(alpha,alpha.T)-self.D*self.Ki)
|
|
||||||
else:
|
|
||||||
dL_dK = 0.5*(mdot(self.Ki, self.YYT, self.Ki) - self.D*self.Ki)
|
|
||||||
|
|
||||||
return dL_dK
|
|
||||||
|
|
||||||
def _log_likelihood_gradients(self):
|
|
||||||
return self.kern.dK_dtheta(partial=self.dL_dK(),X=self.X)
|
|
||||||
|
|
||||||
def predict(self,Xnew, slices=None, full_cov=False):
|
|
||||||
"""
|
|
||||||
|
|
||||||
Predict the function(s) at the new point(s) Xnew.
|
|
||||||
|
|
||||||
Arguments
|
|
||||||
---------
|
|
||||||
:param Xnew: The points at which to make a prediction
|
|
||||||
:type Xnew: np.ndarray, Nnew x self.Q
|
|
||||||
:param slices: specifies which outputs kernel(s) the Xnew correspond to (see below)
|
|
||||||
:type slices: (None, list of slice objects, list of ints)
|
|
||||||
:param full_cov: whether to return the folll covariance matrix, or just the diagonal
|
|
||||||
:type full_cov: bool
|
|
||||||
:rtype: posterior mean, a Numpy array, Nnew x self.D
|
|
||||||
:rtype: posterior variance, a Numpy array, Nnew x Nnew x (self.D)
|
|
||||||
|
|
||||||
.. Note:: "slices" specifies how the the points X_new co-vary wich the training points.
|
|
||||||
|
|
||||||
- If None, the new points covary throigh every kernel part (default)
|
|
||||||
- If a list of slices, the i^th slice specifies which data are affected by the i^th kernel part
|
|
||||||
- If a list of booleans, specifying which kernel parts are active
|
|
||||||
|
|
||||||
If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew.
|
|
||||||
This is to allow for different normalisations of the output dimensions.
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
#normalise X values
|
|
||||||
Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
|
|
||||||
mu, var = self._raw_predict(Xnew, slices, full_cov)
|
|
||||||
|
|
||||||
#un-normalise
|
|
||||||
mu = mu*self._Ystd + self._Ymean
|
|
||||||
if full_cov:
|
|
||||||
if self.D==1:
|
|
||||||
var *= np.square(self._Ystd)
|
|
||||||
else:
|
|
||||||
var = var[:,:,None] * np.square(self._Ystd)
|
|
||||||
else:
|
|
||||||
if self.D==1:
|
|
||||||
var *= np.square(np.squeeze(self._Ystd))
|
|
||||||
else:
|
|
||||||
var = var[:,None] * np.square(self._Ystd)
|
|
||||||
|
|
||||||
return mu,var
|
|
||||||
|
|
||||||
def _raw_predict(self,_Xnew,slices, full_cov=False):
|
|
||||||
"""Internal helper function for making predictions, does not account for normalisation"""
|
|
||||||
Kx = self.kern.K(self.X,_Xnew, slices1=self.Xslices,slices2=slices)
|
|
||||||
mu = np.dot(np.dot(Kx.T,self.Ki),self.Y)
|
|
||||||
KiKx = np.dot(self.Ki,Kx)
|
|
||||||
if full_cov:
|
|
||||||
Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices)
|
|
||||||
var = Kxx - np.dot(KiKx.T,Kx)
|
|
||||||
else:
|
|
||||||
Kxx = self.kern.Kdiag(_Xnew, slices=slices)
|
|
||||||
var = Kxx - np.sum(np.multiply(KiKx,Kx),0)
|
|
||||||
return mu, var
|
|
||||||
|
|
||||||
def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None):
|
|
||||||
"""
|
|
||||||
:param samples: the number of a posteriori samples to plot
|
|
||||||
:param which_data: which if the training data to plot (default all)
|
|
||||||
:type which_data: 'all' or a slice object to slice self.X, self.Y
|
|
||||||
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
|
|
||||||
:param which_functions: which of the kernel functions to plot (additively)
|
|
||||||
:type which_functions: list of bools
|
|
||||||
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
|
|
||||||
|
|
||||||
Plot the posterior of the GP.
|
|
||||||
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
|
|
||||||
- In two dimsensions, a contour-plot shows the mean predicted function
|
|
||||||
- In higher dimensions, we've no implemented this yet !TODO!
|
|
||||||
|
|
||||||
Can plot only part of the data and part of the posterior functions using which_data and which_functions
|
|
||||||
"""
|
|
||||||
if which_functions=='all':
|
|
||||||
which_functions = [True]*self.kern.Nparts
|
|
||||||
if which_data=='all':
|
|
||||||
which_data = slice(None)
|
|
||||||
|
|
||||||
X = self.X[which_data,:]
|
|
||||||
Y = self.Y[which_data,:]
|
|
||||||
|
|
||||||
Xorig = X*self._Xstd + self._Xmean
|
|
||||||
Yorig = Y*self._Ystd + self._Ymean
|
|
||||||
if plot_limits is None:
|
|
||||||
xmin,xmax = Xorig.min(0),Xorig.max(0)
|
|
||||||
xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
|
|
||||||
elif len(plot_limits)==2:
|
|
||||||
xmin, xmax = plot_limits
|
|
||||||
else:
|
|
||||||
raise ValueError, "Bad limits for plotting"
|
|
||||||
|
|
||||||
|
|
||||||
if self.X.shape[1]==1:
|
|
||||||
Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None]
|
|
||||||
m,v = self.predict(Xnew,slices=which_functions)
|
|
||||||
gpplot(Xnew,m,v)
|
|
||||||
if samples:
|
|
||||||
s = np.random.multivariate_normal(m.flatten(),v,samples)
|
|
||||||
pb.plot(Xnew.flatten(),s.T, alpha = 0.4, c='#3465a4', linewidth = 0.8)
|
|
||||||
pb.plot(Xorig,Yorig,'kx',mew=1.5)
|
|
||||||
pb.xlim(xmin,xmax)
|
|
||||||
|
|
||||||
elif self.X.shape[1]==2:
|
|
||||||
resolution = 50 or resolution
|
|
||||||
xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
|
|
||||||
Xtest = np.vstack((xx.flatten(),yy.flatten())).T
|
|
||||||
zz,vv = self.predict(Xtest,slices=which_functions)
|
|
||||||
zz = zz.reshape(resolution,resolution)
|
|
||||||
pb.contour(xx,yy,zz,vmin=zz.min(),vmax=zz.max(),cmap=pb.cm.jet)
|
|
||||||
pb.scatter(Xorig[:,0],Xorig[:,1],40,Yorig,linewidth=0,cmap=pb.cm.jet,vmin=zz.min(),vmax=zz.max())
|
|
||||||
pb.xlim(xmin[0],xmax[0])
|
|
||||||
pb.ylim(xmin[1],xmax[1])
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise NotImplementedError, "Cannot plot GPs with more than two input dimensions"
|
|
||||||
|
|
|
||||||
|
|
@ -2,14 +2,14 @@
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
#from GP_regression import GP_regression
|
|
||||||
#from sparse_GP_regression import sparse_GP_regression
|
#from sparse_GP_regression import sparse_GP_regression
|
||||||
# ^^ remove these?
|
# TODO ^^ remove these?
|
||||||
from GPLVM import GPLVM
|
from GPLVM import GPLVM
|
||||||
from warped_GP import warpedGP
|
from warped_GP import warpedGP
|
||||||
from generalized_FITC import generalized_FITC
|
# TODO: from generalized_FITC import generalized_FITC
|
||||||
from sparse_GPLVM import sparse_GPLVM
|
#from sparse_GPLVM import sparse_GPLVM
|
||||||
from uncollapsed_sparse_GP import uncollapsed_sparse_GP
|
#from uncollapsed_sparse_GP import uncollapsed_sparse_GP
|
||||||
from GP import GP
|
from GP import GP
|
||||||
from sparse_GP import sparse_GP
|
from GP_regression import GP_regression
|
||||||
from BGPLVM import Bayesian_GPLVM
|
#from sparse_GP import sparse_GP
|
||||||
|
#from BGPLVM import Bayesian_GPLVM
|
||||||
|
|
|
||||||
|
|
@ -7,8 +7,6 @@ from ..util.linalg import mdot, jitchol, chol_inv, pdinv
|
||||||
from ..util.plot import gpplot
|
from ..util.plot import gpplot
|
||||||
from .. import kern
|
from .. import kern
|
||||||
from GP import GP
|
from GP import GP
|
||||||
from ..inference.EP import Full,DTC,FITC
|
|
||||||
from ..inference.likelihoods import likelihood,probit,poisson,gaussian
|
|
||||||
|
|
||||||
|
|
||||||
#Still TODO:
|
#Still TODO:
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ import Tango
|
||||||
import pylab as pb
|
import pylab as pb
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
def gpplot(x,mu,var,edgecol=Tango.coloursHex['darkBlue'],fillcol=Tango.coloursHex['lightBlue'],axes=None,**kwargs):
|
def gpplot(x,mu,lower,upper,edgecol=Tango.coloursHex['darkBlue'],fillcol=Tango.coloursHex['lightBlue'],axes=None,**kwargs):
|
||||||
if axes is None:
|
if axes is None:
|
||||||
axes = pb.gca()
|
axes = pb.gca()
|
||||||
mu = mu.flatten()
|
mu = mu.flatten()
|
||||||
|
|
@ -15,21 +15,15 @@ def gpplot(x,mu,var,edgecol=Tango.coloursHex['darkBlue'],fillcol=Tango.coloursHe
|
||||||
#here's the mean
|
#here's the mean
|
||||||
axes.plot(x,mu,color=edgecol,linewidth=2)
|
axes.plot(x,mu,color=edgecol,linewidth=2)
|
||||||
|
|
||||||
#ensure variance is a vector
|
#here's the box
|
||||||
if len(var.shape)>1:
|
|
||||||
err = 2*np.sqrt(np.diag(var))
|
|
||||||
else:
|
|
||||||
err = 2*np.sqrt(var)
|
|
||||||
|
|
||||||
#here's the 2*std box
|
|
||||||
kwargs['linewidth']=0.5
|
kwargs['linewidth']=0.5
|
||||||
if not 'alpha' in kwargs.keys():
|
if not 'alpha' in kwargs.keys():
|
||||||
kwargs['alpha'] = 0.3
|
kwargs['alpha'] = 0.3
|
||||||
axes.fill(np.hstack((x,x[::-1])),np.hstack((mu+err,mu[::-1]-err[::-1])),color=fillcol,**kwargs)
|
axes.fill(np.hstack((x,x[::-1])),np.hstack((upper,lower[::-1])),color=fillcol,**kwargs)
|
||||||
|
|
||||||
#this is the edge:
|
#this is the edge:
|
||||||
axes.plot(x,mu+err,color=edgecol,linewidth=0.2)
|
axes.plot(x,upper,color=edgecol,linewidth=0.2)
|
||||||
axes.plot(x,mu-err,color=edgecol,linewidth=0.2)
|
axes.plot(x,lower,color=edgecol,linewidth=0.2)
|
||||||
|
|
||||||
def removeRightTicks(ax=None):
|
def removeRightTicks(ax=None):
|
||||||
ax = ax or pb.gca()
|
ax = ax or pb.gca()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue