Merge branch 'master' of github.com:SheffieldML/GPy

This commit is contained in:
Nicolo Fusi 2012-12-10 17:26:05 +00:00
commit da51f69ec3
15 changed files with 167 additions and 101 deletions

View file

@ -6,5 +6,5 @@ import kern
import models import models
import inference import inference
import util import util
import examples #import examples TODO: discuss!
from core import priors from core import priors

View file

@ -162,6 +162,20 @@ class model(parameterised):
else: else:
self.expand_param(initial_parameters) self.expand_param(initial_parameters)
def ensure_default_constraints(self,warn=False):
"""
Ensure that any variables which should clearly be positive have been constrained somehow.
"""
positive_strings = ['variance','lengthscale', 'precision']
for s in positive_strings:
for i in self.grep_param_names(s):
if not (i in self.all_constrained_indices()):
name = self.get_param_names()[i]
self.constrain_positive(name)
if warn:
print "Warning! constraining %s postive"%name
def optimize(self, optimizer=None, start=None, **kwargs): def optimize(self, optimizer=None, start=None, **kwargs):
""" """
Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors. Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.

View file

@ -11,45 +11,21 @@ from ..util.linalg import pdinv,mdot,jitchol
from ..util.plot import gpplot from ..util.plot import gpplot
from .. import kern from .. import kern
class EP: class EP_base:
def __init__(self,covariance,likelihood,Kmn=None,Knn_diag=None,epsilon=1e-3,powerep=[1.,1.]): """
""" Expectation Propagation.
Expectation Propagation
Arguments This is just the base class for expectation propagation. We'll extend it for full and sparse EP.
--------- """
X : input observations def __init__(self,likelihood,epsilon=1e-3,powerep=[1.,1.]):
likelihood : Output's likelihood (likelihood class)
kernel : a GPy kernel (kern class)
inducing : Either an array specifying the inducing points location or a sacalar defining their number. None value for using a non-sparse model is used.
powerep : Power-EP parameters (eta,delta) - 2x1 numpy array (floats)
epsilon : Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
"""
self.likelihood = likelihood self.likelihood = likelihood
assert covariance.shape[0] == covariance.shape[1]
if Kmn is not None:
self.Kmm = covariance
self.Kmn = Kmn
self.M = self.Kmn.shape[0]
self.N = self.Kmn.shape[1]
assert self.M < self.N, 'The number of inducing inputs must be smaller than the number of observations'
else:
self.K = covariance
self.N = self.K.shape[0]
if Knn_diag is not None:
self.Knn_diag = Knn_diag
assert len(Knn_diag) == self.N, 'Knn_diagonal has size different from N'
self.epsilon = epsilon self.epsilon = epsilon
self.eta, self.delta = powerep self.eta, self.delta = powerep
self.jitter = 1e-12 self.jitter = 1e-12
""" #Initial values - Likelihood approximation parameters:
Initial values - Likelihood approximation parameters: #p(y|f) = t(f|tau_tilde,v_tilde)
p(y|f) = t(f|tau_tilde,v_tilde) self.restart_EP()
"""
self.tau_tilde = np.zeros(self.N)
self.v_tilde = np.zeros(self.N)
def restart_EP(self): def restart_EP(self):
""" """
@ -59,8 +35,23 @@ class EP:
self.v_tilde = np.zeros(self.N) self.v_tilde = np.zeros(self.N)
self.mu = np.zeros(self.N) self.mu = np.zeros(self.N)
class Full(EP): class Full(EP_base):
def fit_EP(self): """
:param likelihood: Output's likelihood (e.g. probit)
:type likelihood: GPy.inference.likelihood instance
:param K: prior covariance matrix
:type K: np.ndarray (N x N)
:param likelihood: Output's likelihood (e.g. probit)
:type likelihood: GPy.inference.likelihood instance
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
:param powerep: Power-EP parameters (eta,delta) - 2x1 numpy array (floats)
"""
def __init__(self,K,likelihood,*args,**kwargs):
assert K.shape[0] == K.shape[1]
self.K = K
self.N = self.K.shape[0]
EP_base.__init__(self,likelihood,*args,**kwargs)
def fit_EP(self,messages=False):
""" """
The expectation-propagation algorithm. The expectation-propagation algorithm.
For nomenclature see Rasmussen & Williams 2006 (pag. 52-60) For nomenclature see Rasmussen & Williams 2006 (pag. 52-60)
@ -78,15 +69,16 @@ class Full(EP):
sigma_ = 1./tau_ sigma_ = 1./tau_
mu_ = v_/tau_ mu_ = v_/tau_
""" """
self.tau_ = np.empty(self.N,dtype=float)
self.v_ = np.empty(self.N,dtype=float) self.tau_ = np.empty(self.N,dtype=np.float64)
self.v_ = np.empty(self.N,dtype=np.float64)
#Initial values - Marginal moments #Initial values - Marginal moments
z = np.empty(self.N,dtype=float) z = np.empty(self.N,dtype=np.float64)
self.Z_hat = np.empty(self.N,dtype=float) self.Z_hat = np.empty(self.N,dtype=np.float64)
phi = np.empty(self.N,dtype=float) phi = np.empty(self.N,dtype=np.float64)
mu_hat = np.empty(self.N,dtype=float) mu_hat = np.empty(self.N,dtype=np.float64)
sigma2_hat = np.empty(self.N,dtype=float) sigma2_hat = np.empty(self.N,dtype=np.float64)
#Approximation #Approximation
epsilon_np1 = self.epsilon + 1. epsilon_np1 = self.epsilon + 1.
@ -95,8 +87,7 @@ class Full(EP):
self.np1 = [self.tau_tilde.copy()] self.np1 = [self.tau_tilde.copy()]
self.np2 = [self.v_tilde.copy()] self.np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon: while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.arange(self.N) update_order = np.random.permutation(self.N)
random.shuffle(update_order)
for i in update_order: for i in update_order:
#Cavity distribution parameters #Cavity distribution parameters
self.tau_[i] = 1./self.Sigma[i,i] - self.eta*self.tau_tilde[i] self.tau_[i] = 1./self.Sigma[i,i] - self.eta*self.tau_tilde[i]
@ -120,14 +111,35 @@ class Full(EP):
V,info = linalg.flapack.dtrtrs(L,Sroot_tilde_K,lower=1) V,info = linalg.flapack.dtrtrs(L,Sroot_tilde_K,lower=1)
self.Sigma = self.K - np.dot(V.T,V) self.Sigma = self.K - np.dot(V.T,V)
self.mu = np.dot(self.Sigma,self.v_tilde) self.mu = np.dot(self.Sigma,self.v_tilde)
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N epsilon_np1 = np.mean(self.tau_tilde-self.np1[-1]**2)
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N epsilon_np2 = np.mean(self.v_tilde-self.np2[-1]**2)
self.np1.append(self.tau_tilde.copy()) self.np1.append(self.tau_tilde.copy())
self.np2.append(self.v_tilde.copy()) self.np2.append(self.v_tilde.copy())
if messages:
print "EP iteration %i, epsiolon %d"%(self.iterations,epsilon_np1)
self.np2.append(self.v_tilde.copy()) class FITC(EP_base):
"""
:param likelihood: Output's likelihood (e.g. probit)
:type likelihood: GPy.inference.likelihood instance
:param Knn_diag: The diagonal elements of Knn is a 1D vector
:param Kmn: The 'cross' variance between inducing inputs and data
:param Kmm: the covariance matrix of the inducing inputs
:param likelihood: Output's likelihood (e.g. probit)
:type likelihood: GPy.inference.likelihood instance
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
:param powerep: Power-EP parameters (eta,delta) - 2x1 numpy array (floats)
"""
def __init__(self,likelihood,Knn_diag,Kmn,Kmm,*args,**kwargs):
self.Knn_diag = Knn_diag
self.Kmn = Kmn
self.Kmm = Kmm
self.M = self.Kmn.shape[0]
self.N = self.Kmn.shape[1]
assert self.M <= self.N, 'The number of inducing inputs must be smaller than the number of observations'
assert len(Knn_diag) == self.N, 'Knn_diagonal has size different from N'
EP_base.__init__(self,likelihood,*args,**kwargs)
class FITC(EP):
def fit_EP(self): def fit_EP(self):
""" """
The expectation-propagation algorithm with sparse pseudo-input. The expectation-propagation algorithm with sparse pseudo-input.
@ -166,15 +178,15 @@ class FITC(EP):
sigma_ = 1./tau_ sigma_ = 1./tau_
mu_ = v_/tau_ mu_ = v_/tau_
""" """
self.tau_ = np.empty(self.N,dtype=float) self.tau_ = np.empty(self.N,dtype=np.float64)
self.v_ = np.empty(self.N,dtype=float) self.v_ = np.empty(self.N,dtype=np.float64)
#Initial values - Marginal moments #Initial values - Marginal moments
z = np.empty(self.N,dtype=float) z = np.empty(self.N,dtype=np.float64)
self.Z_hat = np.empty(self.N,dtype=float) self.Z_hat = np.empty(self.N,dtype=np.float64)
phi = np.empty(self.N,dtype=float) phi = np.empty(self.N,dtype=np.float64)
mu_hat = np.empty(self.N,dtype=float) mu_hat = np.empty(self.N,dtype=np.float64)
sigma2_hat = np.empty(self.N,dtype=float) sigma2_hat = np.empty(self.N,dtype=np.float64)
#Approximation #Approximation
epsilon_np1 = 1 epsilon_np1 = 1

View file

@ -19,7 +19,7 @@ class finite_dimensional(kernpart):
self.D = D self.D = D
self.F = F self.F = F
self.G = G self.G = G
self.G_1 ,tmp = pdinv(G) self.G_1 ,L,Li,logdet = pdinv(G)
self.n = F.shape[0] self.n = F.shape[0]
if weights is not None: if weights is not None:
assert weights.shape==(self.n,) assert weights.shape==(self.n,)

View file

@ -99,6 +99,7 @@ class kern(parameterised):
newkern.constrained_fixed_values = self.constrained_fixed_values + other.constrained_fixed_values newkern.constrained_fixed_values = self.constrained_fixed_values + other.constrained_fixed_values
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices] newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
return newkern return newkern
def add(self,other): def add(self,other):
""" """
Add another kernel to this one. Both kernels are defined on the same _space_ Add another kernel to this one. Both kernels are defined on the same _space_
@ -139,7 +140,13 @@ class kern(parameterised):
[p.set_param(x[s]) for p, s in zip(self.parts, self.param_slices)] [p.set_param(x[s]) for p, s in zip(self.parts, self.param_slices)]
def get_param_names(self): def get_param_names(self):
return sum([[k.name+'_'+str(i)+'_'+n for n in k.get_param_names()] for i,k in enumerate(self.parts)],[]) #this is a bit nasty: we wat to distinguish between parts with the same name by appending a count
part_names = np.array([k.name for k in self.parts],dtype=np.str)
counts = [np.sum(part_names==ni) for i, ni in enumerate(part_names)]
cum_counts = [np.sum(part_names[i:]==ni) for i, ni in enumerate(part_names)]
names = [name+'_'+str(cum_count) if count>1 else name for name,count,cum_count in zip(part_names,counts,cum_counts)]
return sum([[name+'_'+n for n in k.get_param_names()] for name,k in zip(names,self.parts)],[])
def K(self,X,X2=None,slices1=None,slices2=None): def K(self,X,X2=None,slices1=None,slices2=None):
assert X.shape[1]==self.D assert X.shape[1]==self.D

View file

@ -6,7 +6,7 @@ import numpy as np
import pylab as pb import pylab as pb
from scipy import stats, linalg from scipy import stats, linalg
from .. import kern from .. import kern
from ..inference.Expectation_Propagation import EP,Full from ..inference.Expectation_Propagation import Full
from ..inference.likelihoods import likelihood,probit#,poisson,gaussian from ..inference.likelihoods import likelihood,probit#,poisson,gaussian
from ..core import model from ..core import model
from ..util.linalg import pdinv,jitchol from ..util.linalg import pdinv,jitchol

View file

@ -71,7 +71,7 @@ class GP_regression(model):
def set_param(self,p): def set_param(self,p):
self.kern.expand_param(p) self.kern.expand_param(p)
self.K = self.kern.K(self.X,slices1=self.Xslices) self.K = self.kern.K(self.X,slices1=self.Xslices)
self.Ki,self.hld = pdinv(self.K) self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
def get_param(self): def get_param(self):
return self.kern.extract_param() return self.kern.extract_param()
@ -90,7 +90,7 @@ class GP_regression(model):
return -0.5*np.sum(np.multiply(self.Ki, self.Youter)) return -0.5*np.sum(np.multiply(self.Ki, self.Youter))
def log_likelihood(self): def log_likelihood(self):
complexity_term = -0.5*self.N*self.D*np.log(2.*np.pi) - self.D*self.hld complexity_term = -0.5*self.N*self.D*np.log(2.*np.pi) - 0.5*self.D*self.K_logdet
return complexity_term + self._model_fit_term() return complexity_term + self._model_fit_term()
def dL_dK(self): def dL_dK(self):

View file

@ -9,4 +9,3 @@ from warped_GP import warpedGP
from GP_EP import GP_EP from GP_EP import GP_EP
from generalized_FITC import generalized_FITC from generalized_FITC import generalized_FITC
from sparse_GPLVM import sparse_GPLVM from sparse_GPLVM import sparse_GPLVM

View file

@ -9,7 +9,7 @@ from .. import kern
from ..core import model from ..core import model
from ..util.linalg import pdinv,mdot from ..util.linalg import pdinv,mdot
from ..util.plot import gpplot from ..util.plot import gpplot
from ..inference.Expectation_Propagation import EP,Full,FITC from ..inference.Expectation_Propagation import FITC
from ..inference.likelihoods import likelihood,probit from ..inference.likelihoods import likelihood,probit
class generalized_FITC(model): class generalized_FITC(model):
@ -62,7 +62,7 @@ class generalized_FITC(model):
def posterior_param(self): def posterior_param(self):
self.Knn_diag = self.kernel.Kdiag(self.X) self.Knn_diag = self.kernel.Kdiag(self.X)
self.Kmm = self.kernel.K(self.Z) self.Kmm = self.kernel.K(self.Z)
self.Kmmi, self.Kmm_hld = pdinv(self.Kmm) self.Kmmi, self.Lmm, self.Lmmi, self.Kmm_logdet = pdinv(self.Kmm)
self.Knm = self.kernel.K(self.X,self.Z) self.Knm = self.kernel.K(self.X,self.Z)
self.KmmiKmn = np.dot(self.Kmmi,self.Knm.T) self.KmmiKmn = np.dot(self.Kmmi,self.Knm.T)
self.Qnn = np.dot(self.Knm,self.KmmiKmn) self.Qnn = np.dot(self.Knm,self.KmmiKmn)
@ -72,10 +72,10 @@ class generalized_FITC(model):
self.Taut = self.ep_approx.tau_tilde/(1.+ self.ep_approx.tau_tilde*self.Diag0) self.Taut = self.ep_approx.tau_tilde/(1.+ self.ep_approx.tau_tilde*self.Diag0)
self.KmnTaut = self.Knm.T*self.Taut[None,:] self.KmnTaut = self.Knm.T*self.Taut[None,:]
self.KmnTautKnm = np.dot(self.KmnTaut, self.Knm) self.KmnTautKnm = np.dot(self.KmnTaut, self.Knm)
self.Woodbury_inv, self.Woodbury_hld = pdinv(self.Kmm + self.KmnTautKnm) self.Woodbury_inv, self.Wood_L, self.Wood_Li, self.Woodbury_logdet = pdinv(self.Kmm + self.KmnTautKnm)
self.Qnn_diag = self.Knn_diag - np.diag(self.Qnn) + 1./self.ep_approx.tau_tilde self.Qnn_diag = self.Knn_diag - np.diag(self.Qnn) + 1./self.ep_approx.tau_tilde
self.Qi = -np.dot(self.KmnTaut.T, np.dot(self.Woodbury_inv,self.KmnTaut)) + np.diag(self.Taut) self.Qi = -np.dot(self.KmnTaut.T, np.dot(self.Woodbury_inv,self.KmnTaut)) + np.diag(self.Taut)
self.hld = 0.5*np.sum(np.log(self.Diag0 + 1./self.ep_approx.tau_tilde)) - self.Kmm_hld + self.Woodbury_hld self.hld = 0.5*np.sum(np.log(self.Diag0 + 1./self.ep_approx.tau_tilde)) - 0.5*self.Kmm_logdet + 0.5*self.Woodbury_logdet
self.Diag = self.Diag0/(1.+ self.Diag0 * self.ep_approx.tau_tilde) self.Diag = self.Diag0/(1.+ self.Diag0 * self.ep_approx.tau_tilde)
self.P = (self.Diag / self.Diag0)[:,None] * self.Knm self.P = (self.Diag / self.Diag0)[:,None] * self.Knm

View file

@ -87,14 +87,10 @@ class sparse_GP_regression(GP_regression):
self.V = self.beta*self.Y self.V = self.beta*self.Y
self.psi1V = np.dot(self.psi1, self.V) self.psi1V = np.dot(self.psi1, self.V)
self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T) self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
self.Lm = jitchol(self.Kmm) self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
self.Lmi = chol_inv(self.Lm)
self.Kmmi = np.dot(self.Lmi.T, self.Lmi)
self.A = mdot(self.Lmi, self.psi2, self.Lmi.T) self.A = mdot(self.Lmi, self.psi2, self.Lmi.T)
self.B = np.eye(self.M) + self.beta * self.A self.B = np.eye(self.M) + self.beta * self.A
self.LB = jitchol(self.B) self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
self.LBi = chol_inv(self.LB)
self.Bi = np.dot(self.LBi.T, self.LBi)
self.LLambdai = np.dot(self.LBi, self.Lmi) self.LLambdai = np.dot(self.LBi, self.Lmi)
self.trace_K = self.psi0 - np.trace(self.A) self.trace_K = self.psi0 - np.trace(self.A)
self.LBL_inv = mdot(self.Lmi.T, self.Bi, self.Lmi) self.LBL_inv = mdot(self.Lmi.T, self.Bi, self.Lmi)
@ -124,7 +120,7 @@ class sparse_GP_regression(GP_regression):
""" """
A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta)) A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta))
B = -0.5*self.beta*self.D*self.trace_K B = -0.5*self.beta*self.D*self.trace_K
C = -self.D * np.sum(np.log(np.diag(self.LB))) C = -0.5*self.D * self.B_logdet
D = -0.5*self.beta*self.trYYT D = -0.5*self.beta*self.trYYT
E = +0.5*np.sum(self.psi1VVpsi1 * self.LBL_inv) E = +0.5*np.sum(self.psi1VVpsi1 * self.LBL_inv)
return A+B+C+D+E return A+B+C+D+E

View file

@ -39,7 +39,8 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
M = Z.shape[0] M = Z.shape[0]
else: else:
M=M M=M
self.set_vb_param(np.hstack((np.ones(M*D)),np.eye(M).flatten())) q_u = np.hstack((np.ones(M*D)),np.eye(M).flatten())
self.set_vb_param(q_u)
sparse_GP_regression.__init__(self, X, Y, *args, **kwargs) sparse_GP_regression.__init__(self, X, Y, *args, **kwargs)
def _computations(self): def _computations(self):

42
GPy/notes.txt Normal file
View file

@ -0,0 +1,42 @@
Fails in weird ways if you pass a integer as the input instead of a double to the kernel.
The Matern kernels (at least the 52) still is working in the ARD manner which means it wouldn't run for very large input dimension. Needs to be fixed to match the RBF.
Implementing new covariances is too complicated at the moment. We need a barebones example of what to implement and where. Commenting in the covariance matrices needs to be improved. It's not clear to a user what all the psi parts are for. Maybe we need a cut down and simplified example to help with this (perhaps a cut down version of the RBF?). And then we should provide a simple list of what you need to do to get a new kernel going.
Missing kernels: polynomial, rational quadratic.
Kernel implementations are far to obscure. Need to be easily readable for a first time user.
Need an implementation of scaled conjugate gradients for the optimizers.
Need an implementation of gradient descent for the optimizers (works well with GP-LVM for small random initializations)
Need Carl Rasmussen's permission to add his conjugate gradients algorithm. In fact, we can just provide a hook for it, and post a separate python implementation of his algorithm.
Change get_param and set_param to get_params and set_params
Get constrain param by default inside model creation.
Randomize doesn't seem to cover a wide enough range for restarts ... try it for a model where inputs are widely spaced apart and length scale is too short. Sampling from N(0,1) is too conservative. Dangerous for people who naively use restarts. Since we have the model we could maybe come up with some sensible heuristics for setting these things. Maybe we should also consider having '.initialize()'. If we can't do this well we should disable the restart method.
Tolerances for optimizers, do we need to introduce some standardization? At the moment does each have its own defaults?
Do all optimizers work only in terms of function evaluations? Do we need to check for one that uses iterations?
Change Youter to YYT (Youter doesn't mean anything for matrices).
Bug when running classification.crescent_data()
A dictionary for parameter storage? So we can go through names easily?
A flag on covariance functions that indicates when they are not associated with an underlying function (like white noise or a coregionalization matrix).
Diagonal noise covariance function
Long term: automatic Lagrange multiplier calculation for optimizers: constrain two parameters in an unusual way and the model automatically does the Lagrangian. Also augment the parameters with new ones, so define data variance to be white noise plus RBF variance and optimize over that and signal to noise ratio ... for example constrain the sum of variances to equal the known variance of the data.
When computing kernel.K for kernels like rbf, you can't compute a version with rbf.K(X) you have to do rbf.K(X, X)
the predict method for GP_regression returns a covariance matrix which is a bad idea as this takes a lot to compute, it's also confusing for first time users. Should only be returned if the user explicitly requests it.

View file

@ -139,13 +139,13 @@ class GradientTests(unittest.TestCase):
m.constrain_positive('(linear|bias|white)') m.constrain_positive('(linear|bias|white)')
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
def test_simple_GP_EP(self): def test_GP_EP(self):
N = 20 N = 20
X = np.hstack([np.random.rand(N/2)+1,np.random.rand(N/2)-1])[:,None] X = np.hstack([np.random.rand(N/2)+1,np.random.rand(N/2)-1])[:,None]
k = GPy.kern.rbf(1) + GPy.kern.white(1) k = GPy.kern.rbf(1) + GPy.kern.white(1)
Y = np.hstack([np.ones(N/2),-np.ones(N/2)])[:,None] Y = np.hstack([np.ones(N/2),-np.ones(N/2)])[:,None]
likelihood = GPy.inference.likelihoods.probit(Y) likelihood = GPy.inference.likelihoods.probit(Y)
m = GPy.models.simple_GP_EP(X,likelihood,k) m = GPy.models.GP_EP(X,likelihood,k)
m.constrain_positive('(var|len)') m.constrain_positive('(var|len)')
m.approximate_likelihood() m.approximate_likelihood()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())

View file

@ -46,19 +46,14 @@ def _mdot_r(a,b):
def jitchol(A,maxtries=5): def jitchol(A,maxtries=5):
""" """
Arguments :param A : An almost pd square matrix
---------
A : An almost pd square matrix
Returns :rval L: the Cholesky decomposition of A
-------
cholesky(K)
Notes .. Note:
----- Adds jitter to K, to enforce positive-definiteness
Adds jitter to K, to enforce positive-definiteness if stuff breaks, please check:
if stuff breaks, please check: np.allclose(sp.linalg.cholesky(XXT, lower = True), np.triu(sp.linalg.cho_factor(XXT)[0]).T)
np.allclose(sp.linalg.cholesky(XXT, lower = True), np.triu(sp.linalg.cho_factor(XXT)[0]).T)
""" """
try: try:
return linalg.cholesky(A, lower = True) return linalg.cholesky(A, lower = True)
@ -78,23 +73,23 @@ def jitchol(A,maxtries=5):
def pdinv(A): def pdinv(A):
""" """
Arguments
---------
:param A: A DxD pd numpy array :param A: A DxD pd numpy array
Returns :rval Ai: the inverse of A
------- :rtype Ai: np.ndarray
inv : the inverse of A :rval L: the Cholesky decomposition of A
hld: 0.5* the log of the determinant of A :rtype L: np.ndarray
:rval Li: the Cholesky decomposition of Ai
:rtype Li: np.ndarray
:rval logdet: the log of the determinant of A
:rtype logdet: float64
""" """
L = jitchol(A) L = jitchol(A)
hld = np.sum(np.log(np.diag(L))) logdet = 2.*np.sum(np.log(np.diag(L)))
Li = chol_inv(L)
Ai = np.dot(Li.T,Li) #TODO: get the flapack routine form multiplying triangular matrices
inv = sp.lib.lapack.flapack.dpotri(L)[0] return Ai, L, Li, logdet
# inv = linalg.flapack.dpotri(L,lower = 1)[0]
inv = np.tril(inv)+np.tril(inv,-1).T
return inv, hld
def chol_inv(L): def chol_inv(L):
@ -139,7 +134,7 @@ def PCA(Y, Q):
Returns Returns
------- -------
X - NxQ np.array of dimensionality reduced data :rval X: - NxQ np.array of dimensionality reduced data
W - QxD mapping from X to Y W - QxD mapping from X to Y
""" """
if not np.allclose(Y.mean(axis=0), 0.0): if not np.allclose(Y.mean(axis=0), 0.0):

View file

@ -23,7 +23,7 @@ setup(name = 'GPy',
package_dir={'GPy': 'GPy'}, package_dir={'GPy': 'GPy'},
package_data = {'GPy': ['GPy/examples']}, package_data = {'GPy': ['GPy/examples']},
py_modules = ['GPy.__init__'], py_modules = ['GPy.__init__'],
long_description=read('README'), long_description=read('README.md'),
ext_modules = [Extension(name = 'GPy.kern.lfmUpsilonf2py', ext_modules = [Extension(name = 'GPy.kern.lfmUpsilonf2py',
sources = ['GPy/kern/src/lfmUpsilonf2py.f90'])], sources = ['GPy/kern/src/lfmUpsilonf2py.f90'])],
install_requires=['numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1'], install_requires=['numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1'],